From 64a27470363ed398110dc688869953ec7ab20bd5 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 10:06:45 +0200 Subject: [PATCH 01/80] Add OperatorDescriptor --- Deeploy/DeeployTypes.py | 155 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index 8c2f5d2485..b1e11679d0 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -1020,6 +1020,152 @@ def copy(self) -> NetworkContext: return copy.copy(self) +class IoDesc: + + def __init__(self, required: Union[str, List[str]], optional: Union[str, List[str]] = []) -> None: + if isinstance(required, str): + required = [required] + self.required = required + if isinstance(optional, str): + optional = [optional] + self.optional = optional + + def symbolicName(self, idx: int) -> str: + return (self.required + self.optional)[idx] + + def checkTensors(self, tensors: Sequence[gs.Tensor]) -> bool: + return len(tensors) >= len(self.required) and \ + len(tensors) <= len(self.required) + len(self.optional) + + +class VariadicIoDesc(IoDesc): + + def __init__(self, baseName: str, minNumTensors: int = 0) -> None: + self.baseName = baseName + self.minNumTensors = minNumTensors + + def symbolicName(self, idx: int) -> str: + return f"{self.baseName}_{idx}" + + def checkTensors(self, tensors: Sequence[gs.Tensor]) -> bool: + return len(tensors) >= self.minNumTensors + + +@dataclass +class AttrDesc: + name: str + unpacker: Callable[[Any], Any] + default: Optional[Union[Any, Callable[[gs.Node], Any]]] = None + + @staticmethod + def _constUnpack(value: Any) -> Any: + if isinstance(value, gs.Constant): + return value.values.tolist() + elif isinstance(value, np.ndarray): + return value.tolist() + else: + return value + + def unpack(self, value: Any) -> Union[int, float, List[int], List[float]]: + return self.unpacker(self._constUnpack(value)) + + def getDefault(self, node: gs.Node) -> Any: + if callable(self.default): + return self.default(node) + else: + return self.default + + +@dataclass +class OperatorDescriptor: + inputDescriptor: IoDesc + outputDescriptor: IoDesc + attrDescriptors: List[AttrDesc] + + def check(self, node: gs.Node) -> bool: + """This method checks whether the node is valid. + + Parameters + ---------- + node : gs.Node + Graphsurgeon node to be validated + + Returns + ------- + bool : node validity + + """ + valid = True + + if not self.inputDescriptor.checkTensors(node.inputs): + # TODO: Change to logging + print(f"[ERROR OP {node.op}] Invalid input tensors: {node.inputs}") + valid = False + + if not self.outputDescriptor.checkTensors(node.outputs): + # TODO: Change to logging + print(f"[ERROR OP {node.op}] Invalid output tensors: {node.outputs}") + valid = False + + for attrDesc in self.attrDescriptors: + if attrDesc.default is None and not attrDesc.name in node.attrs: + # TODO: Change to logging + print(f"[ERROR OP {node.op}] Missing attribute {attrDesc.name}") + valid = False + + return valid + + def canonicalize(self, node: gs.Node, opset: int) -> bool: + _ = opset + for desc in self.attrDescriptors: + if desc.default is None: + value = node.attrs[desc.name] + else: + value = node.attrs.get(desc.name, desc.getDefault(node)) + try: + node.attrs[desc.name] = desc.unpack(value) + except ValueError as e: + raise ValueError(f"[ERROR OP {node.op}] Error unpacking the attribute {desc.name}. {e}") from e + return True + + def parseTensors(self, ctxt: NetworkContext, tensors: Sequence[gs.Tensor], + ioDesc: IoDesc) -> OperatorRepresentation: + opRepr = {} + for i, tensor in enumerate(tensors): + symName = ioDesc.symbolicName(i) + buffer = ctxt.lookup(tensor.name) + assert isinstance(buffer, VariableBuffer) + opRepr[symName] = buffer.name + opRepr[f"{symName}_shape"] = buffer.shape + opRepr[f"{symName}_size"] = math.prod(buffer.shape) + opRepr[f"{symName}_type"] = buffer._type + return opRepr + + def parseAttrs(self, node: gs.Node) -> OperatorRepresentation: + return node.attrs.copy() + + def parse(self, ctxt: NetworkContext, node: gs.Node) -> OperatorRepresentation: + opReprs = { + "input tensors": self.parseTensors(ctxt, node.inputs, self.inputDescriptor), + "output tesnors": self.parseTensors(ctxt, node.outputs, self.outputDescriptor), + "attributes": self.parseAttrs(node), + } + + for (firstName, firstOpRepr), (secondName, secondOpRepr) in itertools.combinations(opReprs.items(), 2): + firstKeySet = set(firstOpRepr.keys()) + secondKeySet = set(secondOpRepr.keys()) + assert firstKeySet.isdisjoint(secondKeySet), \ + f"[PARSE ERROR] (Node: {node.name}, Op: {node.op}) " \ + f"Keys from parsing {firstName} clash with the keys from parsing {secondName}. "\ + f"Overlapping keys: {firstKeySet ^ secondKeySet}" + + resultOpRepr = {} + for opRepr in opReprs.values(): + resultOpRepr.update(opRepr) + + return resultOpRepr + + class NodeParser(): """Deeploy's core Parser class. Analyzes network nodes and evaluates whether they can be mapped by it. @@ -2429,6 +2575,7 @@ def __init__(self, graph: gs.Graph, platform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable[[gs.Graph], Schedule] = lambda graph: list(graph.nodes), name: str = 'DeeployNetwork', deeployStateDir: str = "DeeployState"): @@ -2453,6 +2600,7 @@ def __init__(self, """ self.graph = graph + self.operatorDescriptors = operatorDescriptors self.scheduler = scheduler self.layerBinding: 'OrderedDict[str, ONNXLayer]' = OrderedDict() self.parsed = False @@ -2582,6 +2730,13 @@ def _bindLayers(self): flatSchedule += subGraph for node in flatSchedule: + assert node.op in self.operatorDescriptors, \ + f"[ERROR] Error parsing node {node.name}. There is no descriptor for operator {node.op}." + desc = self.operatorDescriptors[node.op] + desc.canonicalize(node, self.graph.opset) + assert desc.check(node), \ + f"[ERROR] Node {node.name} is not a valid instance of {node.op} operator" + layer = self._mapNode(node) if isinstance(layer, ONNXLayer): log.debug(f" {SUCCESS_MARK} Bind {node.name} to layer {layer.__class__.__name__}") From c5a0c71e2bae852ede4966586143d0a6b2766ea1 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 09:57:15 +0200 Subject: [PATCH 02/80] Add OperatorDescriptor.py --- Deeploy/DeeployTypes.py | 9 +- Deeploy/OperatorDescriptor.py | 366 ++++++++++++++++++++++++++++++++++ 2 files changed, 372 insertions(+), 3 deletions(-) create mode 100644 Deeploy/OperatorDescriptor.py diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index b1e11679d0..3282c56ec3 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -1063,6 +1063,9 @@ def _constUnpack(value: Any) -> Any: return value.values.tolist() elif isinstance(value, np.ndarray): return value.tolist() + # LMACAN: hacky way to detect a 0-dim numpy array + elif hasattr(value, "ndim") and value.ndim == 0 and hasattr(value, "item"): + return value.item() else: return value @@ -1099,12 +1102,12 @@ def check(self, node: gs.Node) -> bool: if not self.inputDescriptor.checkTensors(node.inputs): # TODO: Change to logging - print(f"[ERROR OP {node.op}] Invalid input tensors: {node.inputs}") + print(f"[ERROR OP {node.op}] Invalid input tensors: {[t.name for t in node.inputs]}") valid = False if not self.outputDescriptor.checkTensors(node.outputs): # TODO: Change to logging - print(f"[ERROR OP {node.op}] Invalid output tensors: {node.outputs}") + print(f"[ERROR OP {node.op}] Invalid output tensors: {[t.name for t in node.outputs]}") valid = False for attrDesc in self.attrDescriptors: @@ -1124,7 +1127,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: value = node.attrs.get(desc.name, desc.getDefault(node)) try: node.attrs[desc.name] = desc.unpack(value) - except ValueError as e: + except Exception as e: raise ValueError(f"[ERROR OP {node.op}] Error unpacking the attribute {desc.name}. {e}") from e return True diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py new file mode 100644 index 0000000000..f6bd478184 --- /dev/null +++ b/Deeploy/OperatorDescriptor.py @@ -0,0 +1,366 @@ +# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from enum import Enum, IntEnum +from typing import Any, Dict, Tuple + +import numpy as np +import onnx_graphsurgeon as gs + +from Deeploy.DeeployTypes import AttrDesc, IoDesc, OperatorDescriptor, VariadicIoDesc + + +def IntUnpack(value: Any) -> int: + if isinstance(value, (list, tuple)) and len(value) == 1: + value = value[0] + + if isinstance(value, int): + return value + elif isinstance(value, float): + assert value.is_integer(), f"Received a non-integer value {value}" + return int(value) + raise ValueError(f"Unsupported value type {type(value)}") + + +def BoolUnpack(value: Any) -> bool: + value = IntUnpack(value) + assert value in [0, 1], f"Casting to bool only supported from 0, 1. Received {value}" + return bool(value) + + +def FloatUnpack(value: Any) -> float: + if isinstance(value, (list, tuple)) and len(value) == 1: + value = value[0] + + assert isinstance(value, (int, float)), f"Unsupported value type {type(value)}" + return float(value) + + +def IntTupleUnpack(value: Any) -> Tuple[int, ...]: + try: + return tuple(IntUnpack(item) for item in value) + except TypeError: + return (IntUnpack(value),) + + +def FloatTupleUnpack(value: Any) -> Tuple[float, ...]: + try: + return tuple(FloatUnpack(item) for item in value) + except TypeError: + return (FloatUnpack(value),) + + +def attrToTensor(node: gs.Node, attr: str) -> None: + values = node.attrs[attr] + if isinstance(values, (int, float)): + values = np.array([values]) + elif isinstance(values, (list, tuple)): + values = np.array(values) + assert isinstance(values, np.ndarray), f"Unsupported values type {type(values)}" + tensor = gs.Constant(f"{node.name}_{attr}", values) + node.inputs.append(tensor) + node.attrs.pop(attr) + + +concatDesc = OperatorDescriptor( + inputDescriptor = VariadicIoDesc("data_in", minNumTensors = 2), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +iRMSNormDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "weight"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("D", IntUnpack), + AttrDesc("n_levels", IntUnpack), + ], +) + + +class SliceDescriptor(OperatorDescriptor): + + def canonicalize(self, node: gs.Node, opset: int) -> bool: + if opset < 10: + attrToTensor(node, "starts") + attrToTensor(node, "ends") + if "axes" in node.attrs: + attrToTensor(node, "axes") + + return super().canonicalize(node, opset) + + +# Opset: 13 +sliceDesc = SliceDescriptor( + inputDescriptor = IoDesc(["data_in", "starts", "ends"], ["axes", "steps"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +# Opset: 1 +sliceDescOld = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("axes", IntTupleUnpack, lambda n: range(len(n.attrs["starts"]))), + AttrDesc("ends", IntTupleUnpack), + AttrDesc("starts", IntTupleUnpack), + ], +) + +transposeDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("perm", IntTupleUnpack)], +) + + +class CeilMode(IntEnum): + floor = 0 + ceil = 1 + + +maxPoolDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("ceil_mode", unpacker = CeilMode, default = CeilMode.floor), + AttrDesc("kernel_shape", IntTupleUnpack), + AttrDesc("pads", IntTupleUnpack), + AttrDesc("strides", IntTupleUnpack), + ]) + + +class PadMode(str, Enum): + constant = "constant" + reflect = "reflect" + edge = "edge" + wrap = "wrap" + + +# Opset 24 +padDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "pads"], ["constant_value", "axes"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc('mode', unpacker = PadMode, default = PadMode.constant), + ], +) + + +class PadModeOld(str, Enum): + constant = "constant" + reflect = "reflect" + edge = "edge" + + +padDescOld = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("mode", unpacker = PadModeOld, default = PadModeOld.constant), + AttrDesc("pads", IntTupleUnpack), + AttrDesc("value", FloatUnpack), + ], +) + +addDesc = OperatorDescriptor( + inputDescriptor = VariadicIoDesc("data_in", minNumTensors = 2), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + + +class ReduceMeanDescriptor(OperatorDescriptor): + + def canonicalize(self, node: gs.Node, opset: int) -> bool: + if opset < 18: + if "axes" in node.attrs: + attrToTensor(node, "axes") + return super().canonicalize(node, opset) + + +# Opset 18 +reduceMeanDesc = ReduceMeanDescriptor( + inputDescriptor = IoDesc("data_in", optional = "axes"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("keepdims", unpacker = BoolUnpack, default = True), + AttrDesc("noop_with_empty_axes", unpacker = BoolUnpack, default = False), + ], +) + +reduceSumDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in", optional = "axes"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("keepdims", unpacker = BoolUnpack, default = True), + AttrDesc("noop_with_empty_axes", unpacker = BoolUnpack, default = False), + ], +) + +softmaxDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("axis", IntUnpack, default = -1)], +) + +softmaxGradDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["upstream_grad", "softmax_output"]), + outputDescriptor = IoDesc("softmax_grad"), + attrDescriptors = [AttrDesc("axis", IntUnpack, default = -1)], +) + +iSoftmaxDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("axis", IntUnpack, default = -1), + AttrDesc("coeffA", IntUnpack), + AttrDesc("coeffB", IntUnpack), + AttrDesc("coeffC", IntUnpack), + AttrDesc("log2", IntUnpack), + AttrDesc("n_levels", IntUnpack), + ], +) + +itaMaxDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("axis", IntUnpack, default = -1), + AttrDesc("n_levels", IntUnpack), + ], +) + +itaPartialMaxDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("axis", IntUnpack, default = -1), + AttrDesc("n_levels", IntUnpack), + AttrDesc("group_width", IntUnpack), + ], +) + + +class GeluApprox(str, Enum): + tanh = "tanh" + none = "none" + + +geluDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("approximate", GeluApprox, default = GeluApprox.none), + ]) + +rqsIGeluDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "mul", "add", "shift"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("b", IntUnpack), + AttrDesc("one", IntUnpack), + ]) + +iHardswishDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("one_over_six", IntUnpack), + AttrDesc("six", IntUnpack), + AttrDesc("three", IntUnpack), + ]) + +iNoNormDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "weights", "bias"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("D", IntUnpack), + AttrDesc("mul", IntUnpack), + AttrDesc("n_levels", IntUnpack), + ]) + +quantDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("scale", FloatUnpack), + AttrDesc("zero_point", FloatUnpack), + AttrDesc("bit_width", IntUnpack), + AttrDesc("signed", BoolUnpack, default = True), + AttrDesc("min_val", + IntUnpack, + default = lambda node: -(2**(node.attrs["bit_width"] - 1)) if node.attrs["signed"] else 0), + AttrDesc("max_val", + IntUnpack, + default = lambda node: 2**(node.attrs["bit_width"] - 1) - 1 + if node.attrs["signed"] else 2**node.attrs["bit_width"] - 1), + ], +) + + +class AutoPad(str, Enum): + NOTSET = "NOTSET" + SAME_UPPER = "SAME_UPPER" + SAME_LOWER = "SAME_LOWER" + VALID = "VALID" + + +def _dilationsDefault(node: gs.Node) -> Tuple[int, ...]: + # Remove 2 dims for input and output channels + nSpatialDims = len(node.inputs[1].shape) - 2 + return tuple([1] * nSpatialDims) + + +def _kernelShapeDefault(node: gs.Node) -> Tuple[int, ...]: + # Remove 2 dims for input and output channels + nSpatialDims = len(node.inputs[1].shape) - 2 + return node.inputs[1].shape[-nSpatialDims:] + + +def _stridesDefault(node: gs.Node) -> Tuple[int, ...]: + # Remove 2 dims for input and output channels + nSpatialDims = len(node.inputs[1].shape) - 2 + return tuple([1] * nSpatialDims) + + +def _padsDefault(node: gs.Node) -> Tuple[int, ...]: + # Remove 2 dims for input and output channels + nSpatialDims = len(node.inputs[1].shape) - 2 + # Two 0's per dimension for begin and end + return tuple([0] * (2 * nSpatialDims)) + + +convDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "weight"], optional = "bias"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("auto_pad", AutoPad, default = AutoPad.NOTSET), + AttrDesc("dilations", IntTupleUnpack, default = _dilationsDefault), + AttrDesc("group", IntUnpack, default = 1), + AttrDesc("kernel_shape", IntTupleUnpack, default = _kernelShapeDefault), + AttrDesc("pads", IntTupleUnpack, default = _padsDefault), + AttrDesc("strides", IntTupleUnpack, default = _stridesDefault), + ], +) + +defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { + "Concat": concatDesc, + "Conv": convDesc, + "iRMSNorm": iRMSNormDesc, + "Slice": sliceDesc, + "Transpose": transposeDesc, + "MaxPool": maxPoolDesc, + "Pad": padDescOld, + "Add": addDesc, + "ReduceMean": reduceMeanDesc, + "ReduceSum": reduceSumDesc, + "Softmax": softmaxDesc, + "iSoftmax": iSoftmaxDesc, + "SoftmaxGrad": softmaxGradDesc, + "Gelu": geluDesc, + "RequantizediGELU": rqsIGeluDesc, + "iHardswish": iHardswishDesc, + "Quant": quantDesc, + "iNoNorm": iNoNormDesc, + "ITAMax": itaMaxDesc, + "ITAPartialMax": itaPartialMaxDesc, +} From e31ea13a65cb7748dabe4fdb0c1134d3ba2fb1aa Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 15:37:26 +0200 Subject: [PATCH 03/80] Add operatorDescriptors to NetworkDeployers --- .../NetworkDeployers/SignPropDeployer.py | 5 +++-- Deeploy/DeeployTypes.py | 9 ++++++++- .../NetworkDeployers/EngineColoringDeployer.py | 6 ++++-- .../NetworkDeployers/MemoryLevelDeployer.py | 10 ++++++---- Deeploy/Targets/Chimera/Deployer.py | 4 +++- Deeploy/Targets/CortexM/Deployer.py | 4 +++- Deeploy/Targets/Generic/Deployer.py | 4 +++- Deeploy/Targets/MemPool/Deployer.py | 5 +++-- Deeploy/Targets/Neureka/Deployer.py | 5 +++-- Deeploy/Targets/PULPOpen/Deployer.py | 5 ++++- Deeploy/Targets/Snitch/Deployer.py | 4 +++- Deeploy/Targets/SoftHier/Deployer.py | 5 +++-- DeeployTest/testMemoryLevelExtension.py | 5 +++++ DeeployTest/testUtils/dmaUtils.py | 3 +++ DeeployTest/testUtils/platformMapping.py | 15 ++++++++++++++- 15 files changed, 68 insertions(+), 21 deletions(-) diff --git a/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py b/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py index 7a9fbea1ae..e576ff865b 100644 --- a/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py +++ b/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py @@ -7,7 +7,7 @@ import onnx_graphsurgeon as gs from Deeploy.AbstractDataTypes import Pointer -from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, OperatorDescriptor, TopologyOptimizer from Deeploy.Logging import DEFAULT_LOGGER as log @@ -18,12 +18,13 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", inputOffsets: Dict[str, int] = {}): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir) if inputOffsets == {}: diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index 3282c56ec3..91d22d55ad 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -3339,6 +3339,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable[[gs.Graph], Schedule] = lambda graph: list(graph.nodes), name: str = 'DeeployNetwork', default_channels_first: bool = True, @@ -3371,7 +3372,13 @@ def __init__(self, """ - super().__init__(graph, deploymentPlatform, inputTypes, scheduler, name, deeployStateDir = deeployStateDir) + super().__init__(graph, + deploymentPlatform, + inputTypes, + operatorDescriptors, + scheduler, + name, + deeployStateDir = deeployStateDir) self.loweringOptimizer = loweringOptimizer self.default_channels_first = default_channels_first diff --git a/Deeploy/EngineExtension/NetworkDeployers/EngineColoringDeployer.py b/Deeploy/EngineExtension/NetworkDeployers/EngineColoringDeployer.py index 4b05ab5be4..eb7175f613 100644 --- a/Deeploy/EngineExtension/NetworkDeployers/EngineColoringDeployer.py +++ b/Deeploy/EngineExtension/NetworkDeployers/EngineColoringDeployer.py @@ -8,7 +8,8 @@ from Deeploy.AbstractDataTypes import Pointer from Deeploy.CommonExtensions.NetworkDeployers.NetworkDeployerWrapper import NetworkDeployerWrapper -from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, ONNXLayer, Schedule, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, ONNXLayer, OperatorDescriptor, Schedule, \ + TopologyOptimizer from Deeploy.EngineExtension.OptimizationPasses.TopologyOptimizationPasses.EngineColoringPasses import \ EngineColoringPass, EngineMapper @@ -20,12 +21,13 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable[[gs.Graph], Schedule] = lambda graph: list(graph.nodes), name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", engineMapperCls: Type[EngineMapper] = EngineMapper): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir) self._initEngineColoringDeployer(engineMapperCls) diff --git a/Deeploy/MemoryLevelExtension/NetworkDeployers/MemoryLevelDeployer.py b/Deeploy/MemoryLevelExtension/NetworkDeployers/MemoryLevelDeployer.py index 2599f9e819..d75b28433e 100644 --- a/Deeploy/MemoryLevelExtension/NetworkDeployers/MemoryLevelDeployer.py +++ b/Deeploy/MemoryLevelExtension/NetworkDeployers/MemoryLevelDeployer.py @@ -11,8 +11,8 @@ from Deeploy.CommonExtensions.NetworkDeployers.NetworkDeployerWrapper import NetworkDeployerWrapper from Deeploy.CommonExtensions.NetworkDeployers.SignPropDeployer import SignPropDeployer from Deeploy.DeeployTypes import CodeGenVerbosity, ConstantBuffer, DeploymentEngine, DeploymentPlatform, \ - NetworkContext, NetworkDeployer, NetworkOptimizationPass, NetworkOptimizer, Schedule, StructBuffer, \ - TopologyOptimizer, TransientBuffer, VariableBuffer, _NoVerbosity + NetworkContext, NetworkDeployer, NetworkOptimizationPass, NetworkOptimizer, OperatorDescriptor, Schedule, \ + StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer, _NoVerbosity from Deeploy.Logging import DEFAULT_LOGGER as log from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel from Deeploy.MemoryLevelExtension.OptimizationPasses.MemoryLevelAnnotationPasses import AnnotateDefaultMemoryLevel @@ -112,12 +112,13 @@ def __init__(self, deploymentPlatform: Union[MemoryPlatform, MemoryPlatformWrapper], inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable[[gs.Graph], Schedule] = lambda graph: list(graph.nodes), name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", memoryLevelAnnotationPasses: List[NetworkOptimizationPass] = []): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir) if len(memoryLevelAnnotationPasses) == 0: memoryLevelAnnotationPasses.append(AnnotateDefaultMemoryLevel(self.Platform.memoryHierarchy)) @@ -155,13 +156,14 @@ def __init__(self, deploymentPlatform: Union[MemoryPlatform, MemoryPlatformWrapper], inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", inputOffsets: Dict[str, int] = {}, memoryLevelAnnotationPasses: List[NetworkOptimizationPass] = []): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir, inputOffsets) if len(memoryLevelAnnotationPasses) == 0: memoryLevelAnnotationPasses.append(AnnotateDefaultMemoryLevel(self.Platform.memoryHierarchy)) diff --git a/Deeploy/Targets/Chimera/Deployer.py b/Deeploy/Targets/Chimera/Deployer.py index ba28279b66..85b0496e39 100644 --- a/Deeploy/Targets/Chimera/Deployer.py +++ b/Deeploy/Targets/Chimera/Deployer.py @@ -8,7 +8,7 @@ from Deeploy.AbstractDataTypes import Pointer from Deeploy.CommonExtensions.NetworkDeployers.SignPropDeployer import SignPropDeployer -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer class ChimeraDeployer(SignPropDeployer): @@ -18,6 +18,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first = False, @@ -27,6 +28,7 @@ def __init__(self, deploymentPlatform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name, default_channels_first = default_channels_first, diff --git a/Deeploy/Targets/CortexM/Deployer.py b/Deeploy/Targets/CortexM/Deployer.py index bef8fdcf36..9a4f27b061 100644 --- a/Deeploy/Targets/CortexM/Deployer.py +++ b/Deeploy/Targets/CortexM/Deployer.py @@ -11,7 +11,7 @@ from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.DebugPasses import DebugPrintMergePass from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ NCHWtoNHWCPass, TransposeMatmulInputsPass -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import TransposeConstOptPass, TransposeMergePass @@ -22,6 +22,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first = False, @@ -32,6 +33,7 @@ def __init__(self, deploymentPlatform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name, default_channels_first = default_channels_first, diff --git a/Deeploy/Targets/Generic/Deployer.py b/Deeploy/Targets/Generic/Deployer.py index 3cef57a2ea..9bf89a8a0e 100644 --- a/Deeploy/Targets/Generic/Deployer.py +++ b/Deeploy/Targets/Generic/Deployer.py @@ -11,7 +11,7 @@ from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.DebugPasses import DebugPrintMergePass from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ NCHWtoNHWCPass, TransposeMatmulInputsPass -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import TransposeConstOptPass, TransposeMergePass @@ -22,6 +22,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first = False, @@ -32,6 +33,7 @@ def __init__(self, deploymentPlatform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name, default_channels_first = default_channels_first, diff --git a/Deeploy/Targets/MemPool/Deployer.py b/Deeploy/Targets/MemPool/Deployer.py index 5431320978..968787972a 100644 --- a/Deeploy/Targets/MemPool/Deployer.py +++ b/Deeploy/Targets/MemPool/Deployer.py @@ -11,7 +11,7 @@ from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.DebugPasses import DebugPrintMergePass from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ NCHWtoNHWCPass, TransposeMatmulInputsPass -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import TransposeConstOptPass, TransposeMergePass @@ -22,12 +22,13 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", inputOffsets: Dict[str, int] = {}): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir) self.inputOffsets = inputOffsets diff --git a/Deeploy/Targets/Neureka/Deployer.py b/Deeploy/Targets/Neureka/Deployer.py index be34e1f4d3..6d96f8d097 100644 --- a/Deeploy/Targets/Neureka/Deployer.py +++ b/Deeploy/Targets/Neureka/Deployer.py @@ -9,7 +9,7 @@ from Deeploy.AbstractDataTypes import Pointer from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ NCHWtoNHWCPass, PULPNCHWtoNHWCPass -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer from Deeploy.Targets.Neureka.TopologyOptimizationPasses.Passes import ConvEngineDiscolorationPass, \ NeurekaOptimizationPass from Deeploy.Targets.PULPOpen.Deployer import PULPDeployer @@ -22,12 +22,13 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda graph: list(graph.nodes), name: str = 'DeeployNetwork', default_channels_first = False, deeployStateDir: str = "DeeployStateDir", inputOffsets = {}): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir, inputOffsets) if self.Platform.engines[0].enable3x3: diff --git a/Deeploy/Targets/PULPOpen/Deployer.py b/Deeploy/Targets/PULPOpen/Deployer.py index 86bf02e578..17412c8da4 100644 --- a/Deeploy/Targets/PULPOpen/Deployer.py +++ b/Deeploy/Targets/PULPOpen/Deployer.py @@ -12,7 +12,8 @@ from Deeploy.CommonExtensions.OptimizationPasses.BindingsOptimizationPasses.AutoTranspose import AutoTransposeMergePass from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ PULPNCHWtoNHWCPass, RemoveGlobalOutputReshapePass, TransposeMatmulInputsPass -from Deeploy.DeeployTypes import ConstantBuffer, DeploymentPlatform, NodeTemplate, TopologyOptimizer, VariableBuffer +from Deeploy.DeeployTypes import ConstantBuffer, DeploymentPlatform, NodeTemplate, OperatorDescriptor, \ + TopologyOptimizer, VariableBuffer from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import ReshapeConstOptPass, TransposeConstOptPass, \ TransposeMergePass, TransposeNoPermOptPass, TransposeSplitPass from Deeploy.Targets.PULPOpen.TopologyOptimizationPasses.Passes import RQAddTransposeSquashPass @@ -33,6 +34,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first = False, @@ -42,6 +44,7 @@ def __init__(self, deploymentPlatform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name, default_channels_first = default_channels_first, diff --git a/Deeploy/Targets/Snitch/Deployer.py b/Deeploy/Targets/Snitch/Deployer.py index 7c3922a6bb..4daab3b9f5 100644 --- a/Deeploy/Targets/Snitch/Deployer.py +++ b/Deeploy/Targets/Snitch/Deployer.py @@ -10,7 +10,7 @@ from Deeploy.CommonExtensions.NetworkDeployers.SignPropDeployer import SignPropDeployer from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ NCHWtoNHWCPass, RemoveGlobalOutputReshapePass, TransposeMatmulInputsPass -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import ReshapeConstOptPass, TransposeConstOptPass, \ TransposeMergePass, TransposeSplitPass @@ -22,6 +22,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first = False, @@ -31,6 +32,7 @@ def __init__(self, deploymentPlatform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name, default_channels_first = default_channels_first, diff --git a/Deeploy/Targets/SoftHier/Deployer.py b/Deeploy/Targets/SoftHier/Deployer.py index e4ab37f299..4827ba83b9 100644 --- a/Deeploy/Targets/SoftHier/Deployer.py +++ b/Deeploy/Targets/SoftHier/Deployer.py @@ -8,7 +8,7 @@ from Deeploy.AbstractDataTypes import Pointer from Deeploy.CommonExtensions.NetworkDeployers.SignPropDeployer import SignPropDeployer -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer class SoftHierDeployer(SignPropDeployer): @@ -18,12 +18,13 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", inputOffsets: Dict[str, int] = {}): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir) self.inputOffsets = inputOffsets diff --git a/DeeployTest/testMemoryLevelExtension.py b/DeeployTest/testMemoryLevelExtension.py index 0e1ed6cc43..a6a1cf37d2 100644 --- a/DeeployTest/testMemoryLevelExtension.py +++ b/DeeployTest/testMemoryLevelExtension.py @@ -18,6 +18,7 @@ from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryDeployerWrapper, \ MemoryLevelAwareSignPropDeployer +from Deeploy.OperatorDescriptor import defaultOperatorDescriptors from Deeploy.Targets.CortexM.Platform import CMSISEngine, CMSISMapping, CMSISOptimizer, CMSISPlatform from Deeploy.Targets.Generic.Platform import GenericEngine, GenericMapping, GenericOptimizer, GenericPlatform from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import TransposeConstOptPass, TransposeMergePass @@ -83,6 +84,7 @@ MockPlatform, inputTypes, CMSISOptimizer, + defaultOperatorDescriptors, defaultScheduler, name = "DeeployNetwork", deeployStateDir = _DEEPLOYSTATEDIR, @@ -106,6 +108,7 @@ MockPlatform, inputTypes, MemPoolOptimizer, + defaultOperatorDescriptors, defaultScheduler, name = "DeeployNetwork", deeployStateDir = _DEEPLOYSTATEDIR, @@ -121,6 +124,7 @@ MockPlatform, inputTypes, GenericOptimizer, + defaultOperatorDescriptors, defaultScheduler, name = "DeeployNetworkMock", deeployStateDir = _DEEPLOYSTATEDIRMOCK, @@ -136,6 +140,7 @@ MockPlatform, inputTypes, PULPOptimizer, + defaultOperatorDescriptors, defaultScheduler, name = "DeeployNetworkMock", deeployStateDir = _DEEPLOYSTATEDIRMOCK, diff --git a/DeeployTest/testUtils/dmaUtils.py b/DeeployTest/testUtils/dmaUtils.py index 3266ce5129..3205275fda 100644 --- a/DeeployTest/testUtils/dmaUtils.py +++ b/DeeployTest/testUtils/dmaUtils.py @@ -17,6 +17,7 @@ MemoryPlatformWrapper from Deeploy.MemoryLevelExtension.OptimizationPasses.MemoryLevelAnnotationPasses import AnnotateDefaultMemoryLevel, \ AnnotateIOMemoryLevel +from Deeploy.OperatorDescriptor import defaultOperatorDescriptors from Deeploy.Targets.PULPOpen.Deployer import PULPDeployer from Deeploy.Targets.PULPOpen.Platform import MemoryPULPPlatform, PULPOptimizer from Deeploy.Targets.Snitch.Deployer import SnitchDeployer @@ -299,6 +300,7 @@ def setup_pulp_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph, platform, inputTypes, PULPOptimizer, + defaultOperatorDescriptors, defaultScheduler, default_channels_first = True, deeployStateDir = deeployStateDir) @@ -340,6 +342,7 @@ def setup_snitch_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph platform, inputTypes, SnitchOptimizer, + defaultOperatorDescriptors, defaultScheduler, deeployStateDir = deeployStateDir) memoryLevelAnnotationPasses = [AnnotateIOMemoryLevel(defaultMemory), AnnotateDefaultMemoryLevel(memoryHierarchy)] diff --git a/DeeployTest/testUtils/platformMapping.py b/DeeployTest/testUtils/platformMapping.py index 48c5777905..d02c3da64e 100644 --- a/DeeployTest/testUtils/platformMapping.py +++ b/DeeployTest/testUtils/platformMapping.py @@ -7,9 +7,10 @@ import onnx_graphsurgeon as gs from Deeploy.AbstractDataTypes import Pointer -from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, OperatorDescriptor, TopologyOptimizer from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryPlatform, MemoryPlatformWrapper +from Deeploy.OperatorDescriptor import defaultOperatorDescriptors from Deeploy.Targets.Chimera.Deployer import ChimeraDeployer from Deeploy.Targets.Chimera.Platform import ChimeraOptimizer, ChimeraPlatform from Deeploy.Targets.CortexM.Deployer import CMSISDeployer @@ -93,6 +94,7 @@ def mapDeployer(platform: DeploymentPlatform, graph: gs.Graph, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: Optional[TopologyOptimizer] = None, + operatorDescriptors: Optional[Dict[str, OperatorDescriptor]] = None, scheduler: Optional[Callable] = None, name: Optional[str] = None, default_channels_first: Optional[bool] = None, @@ -108,6 +110,9 @@ def mapDeployer(platform: DeploymentPlatform, if name is None: name = "DeeployNetwork" + if operatorDescriptors is None: + operatorDescriptors = defaultOperatorDescriptors + if isinstance(platform, CMSISPlatform): if loweringOptimizer is None: @@ -120,6 +125,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -138,6 +144,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -156,6 +163,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -177,6 +185,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -195,6 +204,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -212,6 +222,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -228,6 +239,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -244,6 +256,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, From 550b559d8fcb083e01ee6b566bd190a5e96ceccb Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 15:37:52 +0200 Subject: [PATCH 04/80] Fix extract padding pass --- .../TopologyOptimizationPasses/Passes.py | 69 ++++++++++--------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/Deeploy/Targets/Generic/TopologyOptimizationPasses/Passes.py b/Deeploy/Targets/Generic/TopologyOptimizationPasses/Passes.py index b881529f7e..09ed0b6c7d 100644 --- a/Deeploy/Targets/Generic/TopologyOptimizationPasses/Passes.py +++ b/Deeploy/Targets/Generic/TopologyOptimizationPasses/Passes.py @@ -353,44 +353,49 @@ def __init__(self): super().__init__(graph, _split_add_fun, name) -def _extract_padding_fun_conv(graph: gs.Graph, match: Match, name: str, value = 0): +def _extract_padding_fun_conv(graph: gs.Graph, match: Match, name: str, value = 0) -> gs.Graph: + conv = list(match.nodes_map.values())[0] - matched_nodes = [m for k, m in match.nodes_map.items()] - conv = matched_nodes[0] - if 'pads' in conv.attrs and np.sum(conv.attrs['pads']) > 1: - pads = copy.deepcopy(conv.attrs['pads']) - shape = copy.deepcopy(conv.inputs[0].shape) - newPads = np.zeros(2 * len(shape)) - assert len(shape) - 2 == len(pads) / 2, "Conv padding dims do not match!" - newShape = shape + if 'pads' not in conv.attrs: + return graph - beginPads = pads[0:len(pads) // 2] - endPads = pads[len(pads) // 2:] - for idx, i in enumerate(beginPads): - newShape[2 + idx] = newShape[2 + idx] + i - newPads[2 + idx] = i + convPads = conv.attrs['pads'] - for idx, i in enumerate(endPads): - newShape[2 + idx] = newShape[2 + idx] + i - newPads[len(newPads) // 2 + 2 + idx] = i + if all(p == 0 for p in convPads): + return graph - newConvInput = gs.Variable(name + '_padded_input', dtype = np.float32, shape = newShape) - #valConst = gs.Constant('value', np.array(0)) - conv.attrs['pads'] = [0 for pad in conv.attrs['pads']] - newPad = gs.Node(op = 'Pad', - name = name + '_pad', - attrs = { - 'pads': newPads, - 'mode': 'constant', - 'value': value - }, - inputs = [conv.inputs[0]], - outputs = [newConvInput]) + inTensor = conv.inputs[0] + assert isinstance(inTensor, gs.Variable) + convShape = inTensor.shape - conv.inputs[0] = newConvInput - graph.nodes.append(newPad) - graph.cleanup().toposort() + beginConvPads = convPads[0:len(convPads) // 2] + endConvPads = convPads[len(convPads) // 2:] + + nonSpatialDimCount = len(convShape) - (len(convPads) // 2) + pads = [0] * nonSpatialDimCount + beginConvPads + [0] * nonSpatialDimCount + endConvPads + shape = [] + for dim, begin, end in zip(convShape, pads[:len(pads) // 2], pads[len(pads) // 2:]): + shape.append(begin + dim + end) + + paddedInput = gs.Variable(f"{name}_{inTensor.name}", dtype = np.float32, shape = shape) + + newPad = gs.Node(op = 'Pad', + name = name + '_pad', + attrs = { + 'pads': pads, + 'mode': 'constant', + 'value': value + }, + inputs = [conv.inputs[0]], + outputs = [paddedInput]) + + graph.nodes.append(newPad) + + conv.attrs['pads'] = [0] * len(convPads) + conv.inputs[0] = paddedInput + + graph.cleanup().toposort() return graph From ab9fdfece7ccaa09085978de0d39c55d089f45bb Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 21:52:36 +0200 Subject: [PATCH 05/80] Fix isoftmax parser --- Deeploy/Targets/Generic/Parsers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 7752834c50..8b1ddf8f73 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -669,11 +669,11 @@ def parseNode(self, node: gs.Node) -> bool: ]) if wellFormed: - self.operatorRepresentation['coeffA'] = int(node.attrs['coeffA'].values) - self.operatorRepresentation['coeffB'] = int(node.attrs['coeffB'].values) - self.operatorRepresentation['coeffC'] = int(node.attrs['coeffC'].values) - self.operatorRepresentation['log2'] = int(node.attrs['log2'].values) - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) + self.operatorRepresentation['coeffA'] = node.attrs['coeffA'] + self.operatorRepresentation['coeffB'] = node.attrs['coeffB'] + self.operatorRepresentation['coeffC'] = node.attrs['coeffC'] + self.operatorRepresentation['log2'] = node.attrs['log2'] + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] return wellFormed From a410763f4745af05bea9491f2293c53c64f1faaf Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 22:01:26 +0200 Subject: [PATCH 06/80] Fix iRMSNorm and iNoNorm parsers --- Deeploy/Targets/Generic/Parsers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 8b1ddf8f73..ab12a09d3c 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -52,7 +52,7 @@ def parseNode(self, node: gs.Node) -> (bool): if ret: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels']) + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['D'])) return ret @@ -848,8 +848,8 @@ def parseNode(self, node: gs.Node) -> bool: if ret: self.operatorRepresentation['D'] = node.attrs['D'] - self.operatorRepresentation['log2D'] = int(np.log2(node.attrs['D'].values).tolist()[0]) - self.operatorRepresentation['mul'] = int(node.attrs['mul'].values.tolist()[0]) + self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['D'])) + self.operatorRepresentation['mul'] = node.attrs['mul'] self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] return ret From f6027fb72ca6a199732f577fa1ed6db003946f08 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 22:21:04 +0200 Subject: [PATCH 07/80] Fix ReduceMean type signature --- Deeploy/Targets/Generic/Bindings.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index 6bfe805b39..b29e403d55 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -8,7 +8,7 @@ from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \ MemoryManagementGeneration, MemoryPassthroughGeneration from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes, SignedIntegerDataTypes, float32_t, \ - int8_t, int32_t, uint8_t + int8_t, int32_t, int64_t, uint8_t from Deeploy.DeeployTypes import CodeTransformation, NodeBinding from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration from Deeploy.Targets.Generic.Templates import AddTemplate, BatchNormalizationTemplate, ConcatTemplate, ConvTemplate, \ @@ -195,13 +195,11 @@ ] BasicReduceMeanBindings = [ - NodeBinding(ReduceMeanChecker([PointerClass(type)], [PointerClass(type)]), ReduceMeanTemplate.referenceTemplate, - BasicTransformer) for type in SignedIntegerDataTypes + NodeBinding(ReduceMeanChecker([PointerClass(ty), PointerClass(int64_t)], [PointerClass(ty)]), + ReduceMeanTemplate.referenceTemplate, BasicTransformer) for ty in SignedIntegerDataTypes ] + [ - NodeBinding(ReduceMeanChecker([PointerClass(float_type), PointerClass(integer_type)], [PointerClass(float_type)]), - FloatReduceMeanTemplate.referenceTemplate, BasicTransformer) - for integer_type in SignedIntegerDataTypes - for float_type in FloatDataTypes + NodeBinding(ReduceMeanChecker([PointerClass(ty), PointerClass(int64_t)], [PointerClass(ty)]), + FloatReduceMeanTemplate.referenceTemplate, BasicTransformer) for ty in FloatDataTypes ] BasicReduceSumBindings = [ From 475b337cc99b4282529cce8a9d1e213858672687 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 22:23:49 +0200 Subject: [PATCH 08/80] Fix itamax and itapartialmax parsers --- Deeploy/Targets/Generic/Parsers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index ab12a09d3c..c8ecf9e83e 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -698,7 +698,7 @@ def parseNode(self, node: gs.Node) -> bool: ret = all(['n_levels' in node.attrs]) if ret and wellFormed: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] return True return False @@ -725,8 +725,8 @@ def parseNode(self, node: gs.Node) -> bool: ret = all(['group_width' in node.attrs, 'n_levels' in node.attrs]) if ret and wellFormed: - self.operatorRepresentation['group_width'] = int(node.attrs['group_width']) - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) + self.operatorRepresentation['group_width'] = node.attrs['group_width'] + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] return True return False From c6c310912315be33bb71805a07bcf4889c336a1f Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 22:52:17 +0200 Subject: [PATCH 09/80] Fix attr comparison to compare with tuple in neureka --- Deeploy/Targets/Neureka/Parsers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Deeploy/Targets/Neureka/Parsers.py b/Deeploy/Targets/Neureka/Parsers.py index 3c564c10b2..1d3db0d882 100644 --- a/Deeploy/Targets/Neureka/Parsers.py +++ b/Deeploy/Targets/Neureka/Parsers.py @@ -18,7 +18,7 @@ def parseNode(self, node: gs.Node) -> bool: if not all([ # No dilation support - self.operatorRepresentation['dilations'] == [1, 1], + self.operatorRepresentation['dilations'] == (1, 1), # Channels have to be last 'channels_first' in self.operatorRepresentation and not self.operatorRepresentation['channels_first'], # Expect "weight_offset" attribute in the node @@ -129,7 +129,7 @@ def parseNode(self, node: gs.Node) -> bool: return False if not all([ - self.operatorRepresentation['kernel_shape'] == [1, 1], + self.operatorRepresentation['kernel_shape'] == (1, 1), self.operatorRepresentation['group'] == 1, ]): return False From cd2270c540f51d92090f438af58f4eae9077c217 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 23:03:24 +0200 Subject: [PATCH 10/80] Fix keepdims type in fuse mhsa pass --- Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py b/Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py index 49f317caa4..46bad04cea 100644 --- a/Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py +++ b/Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py @@ -289,7 +289,7 @@ def get_constant_input_or_zeros(n: gs.Node, shape): name = name + "_sum", attrs = { 'axes': [1], - "keepdims": "0" + "keepdims": 0 }) mhsa_out[0].shape = [_output.shape[0]] + [int(H)] + _output.shape[1:] From 2e62e8451b4c05e2f580f999b3c2237e5922c9fc Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Thu, 25 Sep 2025 11:56:49 +0200 Subject: [PATCH 11/80] Fix old _unpack_const to pass Python literals --- Deeploy/DeeployTypes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index 91d22d55ad..aecb112b57 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -1292,7 +1292,9 @@ def _unpack_const(attr) -> Union[int, float]: The attributes can either be a numpy scalar value or a Constant tensor. This expects the numpy value to be of size 1. """ - if isinstance(attr, gs.Constant): + if isinstance(attr, (int, float, bool, str)): + return attr + elif isinstance(attr, gs.Constant): value = attr.values elif isinstance(attr, np.ndarray): value = attr From 587d6deea3140243d4910a540bfc17a1577a4a17 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Thu, 25 Sep 2025 11:57:42 +0200 Subject: [PATCH 12/80] Add RequantizedConv desc --- Deeploy/OperatorDescriptor.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index f6bd478184..a3fe275366 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -342,9 +342,44 @@ def _padsDefault(node: gs.Node) -> Tuple[int, ...]: ], ) + +class RequantizedConvDescriptor(OperatorDescriptor): + + def canonicalize(self, node: gs.Node, opset: int) -> bool: + if "n_levels_out" in node.attrs and "n_levels" in node.attrs: + # TODO: Change to log + print("[WARNING] RequantizedConv cannot have n_levels_out and n_levels in it's attributes") + return False + + if "n_levels_out" in node.attrs: + node.attrs["n_levels"] = node.attrs["n_levels_out"] + node.attrs.pop("n_levels_out") + + return super().canonicalize(node, opset) + + +requantizedConvDesc = RequantizedConvDescriptor( + inputDescriptor = IoDesc(["data_in", "weight", "mul", "add"], optional = ["shift"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + # Conv attrs + AttrDesc("auto_pad", AutoPad, default = AutoPad.NOTSET), + AttrDesc("dilations", IntTupleUnpack, default = _dilationsDefault), + AttrDesc("group", IntUnpack, default = 1), + AttrDesc("kernel_shape", IntTupleUnpack, default = _kernelShapeDefault), + AttrDesc("pads", IntTupleUnpack, default = _padsDefault), + AttrDesc("strides", IntTupleUnpack, default = _stridesDefault), + # RequantizedShift attrs + AttrDesc("n_levels", IntUnpack), + AttrDesc("signed", BoolUnpack), + AttrDesc("div", IntUnpack), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Concat": concatDesc, "Conv": convDesc, + "RequantizedConv": requantizedConvDesc, "iRMSNorm": iRMSNormDesc, "Slice": sliceDesc, "Transpose": transposeDesc, From 0ccd3b8330a3e96cb5d069afc1465131e80c2c31 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 13:50:17 +0200 Subject: [PATCH 13/80] Fix DW parser --- Deeploy/Targets/PULPOpen/Parsers.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Deeploy/Targets/PULPOpen/Parsers.py b/Deeploy/Targets/PULPOpen/Parsers.py index e94af6e420..eebe3ad406 100644 --- a/Deeploy/Targets/PULPOpen/Parsers.py +++ b/Deeploy/Targets/PULPOpen/Parsers.py @@ -206,12 +206,9 @@ def parseNode(self, node: gs.Node) -> (bool): self.operatorRepresentation['stride_x'] = int(self.operatorRepresentation['strides'][0]) self.operatorRepresentation['stride_y'] = int(self.operatorRepresentation['strides'][1]) - if 'n_levels' in node.attrs: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) - else: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels_out'].values) - self.operatorRepresentation['signed'] = int(node.attrs['signed'].values) - self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['div'].values)) + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] + self.operatorRepresentation['signed'] = node.attrs['signed'] + self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['div'])) return ret return False From c2f2bb2c0430f6e94cd416608e01e65dd6da8b3c Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 13:52:57 +0200 Subject: [PATCH 14/80] Fix pulp 1D conv --- Deeploy/Targets/PULPOpen/Parsers.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/Deeploy/Targets/PULPOpen/Parsers.py b/Deeploy/Targets/PULPOpen/Parsers.py index eebe3ad406..51b26ae546 100644 --- a/Deeploy/Targets/PULPOpen/Parsers.py +++ b/Deeploy/Targets/PULPOpen/Parsers.py @@ -133,13 +133,9 @@ def parseNode(self, node: gs.Node) -> (bool): self.operatorRepresentation['padding_y_bottom'] = int(self.operatorRepresentation['pads'][1]) self.operatorRepresentation['stride_y'] = int(self.operatorRepresentation['strides'][0]) - if 'n_levels' in node.attrs: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) - else: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels_out'].values) - - self.operatorRepresentation['signed'] = int(node.attrs['signed'].values) - self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['div'].values)) + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] + self.operatorRepresentation['signed'] = node.attrs['signed'] + self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['div'])) return ret def parseNodeCtxt(self, From 0b6032972a4a1e971550188bd6b08b4e32ad0651 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 14:08:36 +0200 Subject: [PATCH 15/80] Sort operator descriptors alphabetically --- Deeploy/OperatorDescriptor.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index a3fe275366..3dc0f5fd7f 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -377,25 +377,25 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ) defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { + "Add": addDesc, "Concat": concatDesc, "Conv": convDesc, - "RequantizedConv": requantizedConvDesc, - "iRMSNorm": iRMSNormDesc, - "Slice": sliceDesc, - "Transpose": transposeDesc, + "Gelu": geluDesc, + "ITAMax": itaMaxDesc, + "ITAPartialMax": itaPartialMaxDesc, "MaxPool": maxPoolDesc, "Pad": padDescOld, - "Add": addDesc, + "Quant": quantDesc, "ReduceMean": reduceMeanDesc, "ReduceSum": reduceSumDesc, + "RequantizedConv": requantizedConvDesc, + "RequantizediGELU": rqsIGeluDesc, + "Slice": sliceDesc, "Softmax": softmaxDesc, - "iSoftmax": iSoftmaxDesc, "SoftmaxGrad": softmaxGradDesc, - "Gelu": geluDesc, - "RequantizediGELU": rqsIGeluDesc, + "Transpose": transposeDesc, "iHardswish": iHardswishDesc, - "Quant": quantDesc, "iNoNorm": iNoNormDesc, - "ITAMax": itaMaxDesc, - "ITAPartialMax": itaPartialMaxDesc, + "iRMSNorm": iRMSNormDesc, + "iSoftmax": iSoftmaxDesc, } From a19f98a080dd8a3d0daf56fe5e32a0304c038630 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 14:16:45 +0200 Subject: [PATCH 16/80] Add DequantDescriptor --- Deeploy/OperatorDescriptor.py | 12 ++++++++++++ Deeploy/Targets/Generic/Parsers.py | 10 ++++------ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 3dc0f5fd7f..cbb5d723ba 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -376,10 +376,22 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) +dequantDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("scale", FloatUnpack), + AttrDesc("zero_point", FloatUnpack), + AttrDesc("bit_width", IntUnpack), + AttrDesc("signed", BoolUnpack), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, "Conv": convDesc, + "Dequant": dequantDesc, "Gelu": geluDesc, "ITAMax": itaMaxDesc, "ITAPartialMax": itaPartialMaxDesc, diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index c8ecf9e83e..b43672d9c9 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -2488,12 +2488,10 @@ def parseNode(self, node: gs.Node) -> bool: ]) if ret: - self.operatorRepresentation['scale'] = float(node.attrs['scale']) - self.operatorRepresentation['zero_point'] = float(node.attrs['zero_point']) - self.operatorRepresentation['bit_width'] = int(node.attrs['bit_width']) - - self.operatorRepresentation['signed'] = bool(node.attrs['signed']) - + self.operatorRepresentation['scale'] = node.attrs['scale'] + self.operatorRepresentation['zero_point'] = node.attrs['zero_point'] + self.operatorRepresentation['bit_width'] = node.attrs['bit_width'] + self.operatorRepresentation['signed'] = node.attrs['signed'] return ret def parseNodeCtxt(self, From 4af65525156222cef52333bdfabcabe87c4afe69 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 14:26:47 +0200 Subject: [PATCH 17/80] Add Div, IntegerDiv, RQIntegerDiv --- Deeploy/OperatorDescriptor.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index cbb5d723ba..ac486f4af9 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -387,12 +387,44 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) +divDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["input1", "input2"]), + outputDescriptor = IoDesc("output"), + attrDescriptors = [], +) + +integerDivDescriptor = OperatorDescriptor( + inputDescriptor = IoDesc(["A", "B"]), + outputDescriptor = IoDesc("C"), + attrDescriptors = [ + AttrDesc("Delta", IntUnpack), + AttrDesc("eps", IntUnpack), + AttrDesc("eta", IntUnpack), + ], +) + +requantizedIntegerDivDescriptor = OperatorDescriptor( + inputDescriptor = IoDesc(["A", "B", "requant_mul", "requant_add", "requant_div"]), + outputDescriptor = IoDesc("C"), + attrDescriptors = [ + # IntegerDiv attrs + AttrDesc("Delta", IntUnpack), + AttrDesc("eps", IntUnpack), + AttrDesc("eta", IntUnpack), + # RequantizedShift attrs + AttrDesc("n_levels", IntUnpack), + AttrDesc("signed", BoolUnpack), + AttrDesc("div", IntUnpack), + ]) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, "Conv": convDesc, "Dequant": dequantDesc, + "Div": divDesc, "Gelu": geluDesc, + "IntegerDiv": integerDivDescriptor, "ITAMax": itaMaxDesc, "ITAPartialMax": itaPartialMaxDesc, "MaxPool": maxPoolDesc, @@ -402,6 +434,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "ReduceSum": reduceSumDesc, "RequantizedConv": requantizedConvDesc, "RequantizediGELU": rqsIGeluDesc, + "RQIntegerDiv": requantizedIntegerDivDescriptor, "Slice": sliceDesc, "Softmax": softmaxDesc, "SoftmaxGrad": softmaxGradDesc, From 2e2e3dfbb74c45d7e9cb63624e63ffc48d31ccbb Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 14:37:24 +0200 Subject: [PATCH 18/80] Add DebugPrint, LayerNormalization, iLayerNorm --- Deeploy/OperatorDescriptor.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index ac486f4af9..c6f9aa49cd 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -417,29 +417,50 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: AttrDesc("div", IntUnpack), ]) +debugPrintDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +layerNormalizationDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "weight", "bias"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("epsilon", FloatUnpack)], +) + +iLayerNormDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "weight", "bias"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("D", IntUnpack), AttrDesc("n_levels", IntUnpack)], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, "Conv": convDesc, + "DebugPrint": debugPrintDesc, "Dequant": dequantDesc, "Div": divDesc, "Gelu": geluDesc, - "IntegerDiv": integerDivDescriptor, "ITAMax": itaMaxDesc, "ITAPartialMax": itaPartialMaxDesc, + "IntegerDiv": integerDivDescriptor, + "LayerNormalization": layerNormalizationDesc, "MaxPool": maxPoolDesc, "Pad": padDescOld, "Quant": quantDesc, + "RQIntegerDiv": requantizedIntegerDivDescriptor, "ReduceMean": reduceMeanDesc, "ReduceSum": reduceSumDesc, "RequantizedConv": requantizedConvDesc, "RequantizediGELU": rqsIGeluDesc, - "RQIntegerDiv": requantizedIntegerDivDescriptor, "Slice": sliceDesc, "Softmax": softmaxDesc, "SoftmaxGrad": softmaxGradDesc, "Transpose": transposeDesc, "iHardswish": iHardswishDesc, + "iLayerNorm": iLayerNormDesc, "iNoNorm": iNoNormDesc, "iRMSNorm": iRMSNormDesc, "iSoftmax": iSoftmaxDesc, From 9ac9a62ae0713220b4f4693f8565c78781ae1539 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 14:44:13 +0200 Subject: [PATCH 19/80] Add RequantizedOperatorDescriptor --- Deeploy/OperatorDescriptor.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index c6f9aa49cd..bd57ae822c 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -255,12 +255,12 @@ class GeluApprox(str, Enum): AttrDesc("approximate", GeluApprox, default = GeluApprox.none), ]) -rqsIGeluDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "mul", "add", "shift"]), - outputDescriptor = IoDesc("data_out"), - attrDescriptors = [ - AttrDesc("b", IntUnpack), - AttrDesc("one", IntUnpack), - ]) +requantizedIGeluDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "mul", "add", "shift"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("b", IntUnpack), + AttrDesc("one", IntUnpack), + ]) iHardswishDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"), outputDescriptor = IoDesc("data_out"), @@ -343,7 +343,7 @@ def _padsDefault(node: gs.Node) -> Tuple[int, ...]: ) -class RequantizedConvDescriptor(OperatorDescriptor): +class RequantizedOperatorDescriptor(OperatorDescriptor): def canonicalize(self, node: gs.Node, opset: int) -> bool: if "n_levels_out" in node.attrs and "n_levels" in node.attrs: @@ -358,7 +358,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: return super().canonicalize(node, opset) -requantizedConvDesc = RequantizedConvDescriptor( +requantizedConvDesc = RequantizedOperatorDescriptor( inputDescriptor = IoDesc(["data_in", "weight", "mul", "add"], optional = ["shift"]), outputDescriptor = IoDesc("data_out"), attrDescriptors = [ @@ -403,7 +403,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) -requantizedIntegerDivDescriptor = OperatorDescriptor( +requantizedIntegerDivDescriptor = RequantizedOperatorDescriptor( inputDescriptor = IoDesc(["A", "B", "requant_mul", "requant_add", "requant_div"]), outputDescriptor = IoDesc("C"), attrDescriptors = [ @@ -454,7 +454,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "ReduceMean": reduceMeanDesc, "ReduceSum": reduceSumDesc, "RequantizedConv": requantizedConvDesc, - "RequantizediGELU": rqsIGeluDesc, + "RequantizediGELU": requantizedIGeluDesc, "Slice": sliceDesc, "Softmax": softmaxDesc, "SoftmaxGrad": softmaxGradDesc, From e01fdb034426ad24ce969525e19b82f9ea9692ad Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 14:48:48 +0200 Subject: [PATCH 20/80] Add flatten and gather --- Deeploy/OperatorDescriptor.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index bd57ae822c..462b4c4fc7 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -435,6 +435,18 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: attrDescriptors = [AttrDesc("D", IntUnpack), AttrDesc("n_levels", IntUnpack)], ) +flattenDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("axis", IntUnpack, default = 1)], +) + +gatherDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "indices"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("axis", IntUnpack, default = 0)], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, @@ -442,6 +454,8 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "DebugPrint": debugPrintDesc, "Dequant": dequantDesc, "Div": divDesc, + "Flatten": flattenDesc, + "Gather": gatherDesc, "Gelu": geluDesc, "ITAMax": itaMaxDesc, "ITAPartialMax": itaPartialMaxDesc, From 1db3ae7e951be7c41f50d22c0b2b0d933dc808bd Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 14:55:09 +0200 Subject: [PATCH 21/80] Add Squeeze and Unsqueeze --- Deeploy/OperatorDescriptor.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 462b4c4fc7..0b5836fa2d 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -447,6 +447,20 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: attrDescriptors = [AttrDesc("axis", IntUnpack, default = 0)], ) +# Opset <= 11 +unsqueezeDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("axes", IntTupleUnpack)], +) + +# Opset <= 11 +squeezeDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("axes", IntTupleUnpack)], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, @@ -472,7 +486,9 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "Slice": sliceDesc, "Softmax": softmaxDesc, "SoftmaxGrad": softmaxGradDesc, + "Squeeze": squeezeDesc, "Transpose": transposeDesc, + "Unsqueeze": unsqueezeDesc, "iHardswish": iHardswishDesc, "iLayerNorm": iLayerNormDesc, "iNoNorm": iNoNormDesc, From fd30dc727c161c2e4d4e63e5083b5ea518d45062 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 16:46:06 +0200 Subject: [PATCH 22/80] Add Mul --- Deeploy/OperatorDescriptor.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 0b5836fa2d..7f36e9a4bf 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -461,6 +461,12 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: attrDescriptors = [AttrDesc("axes", IntTupleUnpack)], ) +mulDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["A", "B"]), + outputDescriptor = IoDesc("C"), + attrDescriptors = [], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, @@ -476,6 +482,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "IntegerDiv": integerDivDescriptor, "LayerNormalization": layerNormalizationDesc, "MaxPool": maxPoolDesc, + "Mul": mulDesc, "Pad": padDescOld, "Quant": quantDesc, "RQIntegerDiv": requantizedIntegerDivDescriptor, From a3309edf2fd30eed2bf9a65fc89494fc00ff76a4 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 16:53:01 +0200 Subject: [PATCH 23/80] Add MatMul, RQMatMul, MatMulInteger --- Deeploy/OperatorDescriptor.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 7f36e9a4bf..edbaf6a530 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -467,6 +467,23 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: attrDescriptors = [], ) +matMulDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["A", "B"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +rqMatMulDesc = RequantizedOperatorDescriptor( + inputDescriptor = IoDesc(["A", "B", "add", "mul"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + # RequantizedShift attrs + AttrDesc("n_levels", IntUnpack), + AttrDesc("signed", BoolUnpack), + AttrDesc("div", IntUnpack), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, @@ -481,11 +498,14 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "ITAPartialMax": itaPartialMaxDesc, "IntegerDiv": integerDivDescriptor, "LayerNormalization": layerNormalizationDesc, + "MatMul": matMulDesc, + "MatMulInteger": matMulDesc, "MaxPool": maxPoolDesc, "Mul": mulDesc, "Pad": padDescOld, "Quant": quantDesc, "RQIntegerDiv": requantizedIntegerDivDescriptor, + "RQMatMul": rqMatMulDesc, "ReduceMean": reduceMeanDesc, "ReduceSum": reduceSumDesc, "RequantizedConv": requantizedConvDesc, From c758fccb89e67ceb676e89bcb89320723b24422f Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 17:15:12 +0200 Subject: [PATCH 24/80] Add Gemm and RQGemm --- Deeploy/OperatorDescriptor.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index edbaf6a530..327c7b442d 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -484,6 +484,31 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) +gemmDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["A", "B"], optional = ["C"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("alpha", FloatUnpack, default = 1.0), + AttrDesc("beta", FloatUnpack, default = 1.0), + AttrDesc("transA", BoolUnpack, default = False), + AttrDesc("transB", BoolUnpack, default = False), + ], +) + +rqGemmDesc = RequantizedOperatorDescriptor( + inputDescriptor = IoDesc(["A", "B", "C", "add", "mul"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("alpha", FloatUnpack, default = 1.0), + AttrDesc("beta", FloatUnpack, default = 1.0), + AttrDesc("transA", BoolUnpack, default = False), + AttrDesc("transB", BoolUnpack, default = False), + # RequantizedShift attrs + AttrDesc("n_levels", IntUnpack), + AttrDesc("signed", BoolUnpack), + AttrDesc("div", IntUnpack), + ]) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, @@ -494,6 +519,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "Flatten": flattenDesc, "Gather": gatherDesc, "Gelu": geluDesc, + "Gemm": gemmDesc, "ITAMax": itaMaxDesc, "ITAPartialMax": itaPartialMaxDesc, "IntegerDiv": integerDivDescriptor, @@ -504,6 +530,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "Mul": mulDesc, "Pad": padDescOld, "Quant": quantDesc, + "RQGemm": rqGemmDesc, "RQIntegerDiv": requantizedIntegerDivDescriptor, "RQMatMul": rqMatMulDesc, "ReduceMean": reduceMeanDesc, From 7e951d83f68eafd8f0241a6a17fbee6cdfa49516 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 22:06:59 +0200 Subject: [PATCH 25/80] Add RequantizedGemm --- Deeploy/OperatorDescriptor.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 327c7b442d..e347dad8a6 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -509,6 +509,20 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: AttrDesc("div", IntUnpack), ]) +requantizedGemmDesc = RequantizedOperatorDescriptor( + inputDescriptor = IoDesc(["A", "B", "add", "mul"]), # Important diff to RQGemm + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("alpha", FloatUnpack, default = 1.0), + AttrDesc("beta", FloatUnpack, default = 1.0), + AttrDesc("transA", BoolUnpack, default = False), + AttrDesc("transB", BoolUnpack, default = False), + # RequantizedShift attrs + AttrDesc("n_levels", IntUnpack), + AttrDesc("signed", BoolUnpack), + AttrDesc("div", IntUnpack), + ]) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, @@ -536,6 +550,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "ReduceMean": reduceMeanDesc, "ReduceSum": reduceSumDesc, "RequantizedConv": requantizedConvDesc, + "RequantizedGemm": requantizedGemmDesc, "RequantizediGELU": requantizedIGeluDesc, "Slice": sliceDesc, "Softmax": softmaxDesc, From 1ab763e2d2f7abb81b8939ecb8741cf65dab402f Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 29 Sep 2025 08:24:27 +0200 Subject: [PATCH 26/80] Fix transA and transB being treated like ints --- Deeploy/Targets/Generic/Parsers.py | 47 ++++++++++++------- .../Generic/Templates/FloatGemmTemplate.py | 4 +- .../Targets/Generic/Templates/GemmTemplate.py | 4 +- Deeploy/Targets/Generic/TypeCheckers.py | 6 +-- .../Targets/MemPool/Templates/GemmTemplate.py | 4 +- .../MemPool/Templates/RQGemmTemplate.py | 8 ++-- .../PULPOpen/Templates/FloatGemmTemplate.py | 4 +- .../TileConstraints/MatMulTileConstraint.py | 8 ++-- Deeploy/Targets/Snitch/Parsers.py | 8 +--- 9 files changed, 50 insertions(+), 43 deletions(-) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index b43672d9c9..f6c2ee9784 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -1690,27 +1690,40 @@ def parseNodeCtxt(self, node.inputs.append(zeroTensor) self.operatorRepresentation['C'] = f'{node.name}_C_Tensor' + buffA = ctxt.lookup(node.inputs[0].name) + assert isinstance(buffA, VariableBuffer) + buffB = ctxt.lookup(node.inputs[1].name) + assert isinstance(buffB, VariableBuffer) + buffOut = ctxt.lookup(node.outputs[0].name) + assert isinstance(buffOut, VariableBuffer) + # Store the input and output shapes in the operator representation - self.operatorRepresentation['size'] = np.prod(ctxt.lookup(node.inputs[0].name).shape) - self.operatorRepresentation['A_shape'] = ctxt.lookup(node.inputs[0].name).shape - self.operatorRepresentation['B_shape'] = ctxt.lookup(node.inputs[1].name).shape - self.operatorRepresentation['data_out_shape'] = ctxt.lookup(node.outputs[0].name).shape + self.operatorRepresentation['size'] = np.prod(buffA.shape) + self.operatorRepresentation['A_shape'] = buffA.shape + self.operatorRepresentation['B_shape'] = buffB.shape + self.operatorRepresentation['data_out_shape'] = buffOut.shape + + if self.operatorRepresentation['transA']: + N_A, M = buffA.shape[-2:] + else: + M, N_A = buffA.shape[-2:] + + if self.operatorRepresentation['transB']: + O, N_B = buffB.shape[-2:] + else: + N_B, O = buffB.shape[-2:] # Store the matrix dimensions in the operator representation - self.operatorRepresentation['M'] = ctxt.lookup( - node.inputs[0].name).shape[(-2 + self.operatorRepresentation['transA'])] - self.operatorRepresentation['N'] = ctxt.lookup( - node.inputs[0].name).shape[(-1 - self.operatorRepresentation['transA'])] - self.operatorRepresentation['O'] = ctxt.lookup( - node.inputs[1].name).shape[(-1 - self.operatorRepresentation['transB'])] + self.operatorRepresentation['M'] = M + self.operatorRepresentation['N'] = N_A + self.operatorRepresentation['O'] = O # SCHEREMO: Assert that reduction dimension is the same on both matrices - ret = ret and (self.operatorRepresentation['N'] == ctxt.lookup( - node.inputs[1].name).shape[-2 + self.operatorRepresentation['transB']]) + ret = ret and N_A == N_B # Check if the batch dimensions are compatible - self.operatorRepresentation['batch_A'] = np.prod(ctxt.lookup(node.inputs[0].name).shape[:-2]) - self.operatorRepresentation['batch_B'] = np.prod(ctxt.lookup(node.inputs[1].name).shape[:-2]) + self.operatorRepresentation['batch_A'] = np.prod(buffA.shape[:-2]) + self.operatorRepresentation['batch_B'] = np.prod(buffB.shape[:-2]) self.operatorRepresentation['batch'] = max(self.operatorRepresentation['batch_A'], self.operatorRepresentation['batch_B']) @@ -1722,10 +1735,10 @@ def parseNodeCtxt(self, ), "Incompatible dimensions for input matrices. Broadcasting not yet supported for dimensions larger than 1 on one of the inputs, or equal dimensions between the 2." # Create flags for same dimension between each input matrix and the final batch dimension - self.operatorRepresentation['A_batched'] = (self.operatorRepresentation['batch'] == np.prod( - ctxt.lookup(node.inputs[0].name).shape[:-2])) + self.operatorRepresentation['A_batched'] = ( + self.operatorRepresentation['batch'] == self.operatorRepresentation['batch_A']) self.operatorRepresentation['W_batched'] = self.operatorRepresentation['B_batched'] = ( - self.operatorRepresentation['batch'] == np.prod(ctxt.lookup(node.inputs[1].name).shape[:-2])) + self.operatorRepresentation['batch'] == self.operatorRepresentation['batch_B']) return ctxt, ret diff --git a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py index 69bea8484e..ab78e742d0 100644 --- a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py +++ b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py @@ -21,8 +21,8 @@ ${M}, ${N}, ${O}, - ${transA}, - ${transB} + ${int(transA)}, + ${int(transB)} ); % if A_batched: diff --git a/Deeploy/Targets/Generic/Templates/GemmTemplate.py b/Deeploy/Targets/Generic/Templates/GemmTemplate.py index 62d760d15c..371004a8e7 100644 --- a/Deeploy/Targets/Generic/Templates/GemmTemplate.py +++ b/Deeploy/Targets/Generic/Templates/GemmTemplate.py @@ -56,8 +56,8 @@ def alignToContext(self, ctxt: NetworkContext, ${O}, ${alpha}, ${beta}, - ${transA}, - ${transB}, + ${int(transA)}, + ${int(transB)}, ${A_offset}, ${B_offset}, ${C_offset}, diff --git a/Deeploy/Targets/Generic/TypeCheckers.py b/Deeploy/Targets/Generic/TypeCheckers.py index c2c8d436f8..1907a0aea0 100644 --- a/Deeploy/Targets/Generic/TypeCheckers.py +++ b/Deeploy/Targets/Generic/TypeCheckers.py @@ -185,10 +185,8 @@ def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[ def _inferNumLevels(self, inputs: List[VariableBuffer], operatorRepresentation: OperatorRepresentation) -> List[int]: - return [ - 2**((self.input_types[0].referencedType.typeWidth) * 2) * - inputs[0].shape[-1 - operatorRepresentation['transA']] - ] + O = inputs[0].shape[-1] if not operatorRepresentation['transA'] else inputs[0].shape[-2] + return [2**((self.input_types[0].referencedType.typeWidth) * 2) * O] def _inferSignedness(self, inputs: List[VariableBuffer], operatorRepresentation: OperatorRepresentation) -> List[bool]: diff --git a/Deeploy/Targets/MemPool/Templates/GemmTemplate.py b/Deeploy/Targets/MemPool/Templates/GemmTemplate.py index e5d53bd255..54cc86f6af 100644 --- a/Deeploy/Targets/MemPool/Templates/GemmTemplate.py +++ b/Deeploy/Targets/MemPool/Templates/GemmTemplate.py @@ -127,8 +127,8 @@ def hoistTransientBuffers(self, ctxt: NetworkContext, ${O}, ${alpha}, ${beta}, - ${transA}, - ${transB}, + ${int(transA)}, + ${int(transB)}, ${A_offset}, ${B_offset}, ${C_offset}, diff --git a/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py b/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py index e6a42768e8..f544841acf 100644 --- a/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py +++ b/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py @@ -145,8 +145,8 @@ def hoistTransientBuffers(self, ctxt: NetworkContext, ${O}, ${alpha}, ${beta}, - ${transA}, - ${transB}, + ${int(transA)}, + ${int(transB)}, ${mul}, ${add}, ${log2Dstring}, @@ -170,8 +170,8 @@ def hoistTransientBuffers(self, ctxt: NetworkContext, ${O}, ${alpha}, ${beta}, - ${transA}, - ${transB}, + ${int(transA)}, + ${int(transB)}, ${mul}, ${add}, ${log2Dstring}, diff --git a/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py b/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py index f4c22b2c22..21044a5eca 100644 --- a/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py +++ b/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py @@ -20,8 +20,8 @@ ${M}, ${N}, ${O}, - ${transA}, - ${transB} + ${int(transA)}, + ${int(transB)} ); ref_${data_out}_${A} += ${M} * ${N}; diff --git a/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py b/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py index 8b795be88e..a9259a15cf 100644 --- a/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py +++ b/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py @@ -32,13 +32,13 @@ def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: Netw tensorsShapeLen = len(bufferA.shape) AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, - dimIdx = (tensorsShapeLen - 2) + parseDict['transA']) + dimIdx = (tensorsShapeLen - 2) + int(parseDict['transA'])) ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, - dimIdx = (tensorsShapeLen - 1) - parseDict['transA']) + dimIdx = (tensorsShapeLen - 1) - int(parseDict['transA'])) BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, - dimIdx = (tensorsShapeLen - 2) + parseDict['transB']) + dimIdx = (tensorsShapeLen - 2) + int(parseDict['transB'])) BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, - dimIdx = (tensorsShapeLen - 1) - parseDict['transB']) + dimIdx = (tensorsShapeLen - 1) - int(parseDict['transB'])) outputFirstDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = (tensorsShapeLen - 2)) outputSecondDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = (tensorsShapeLen - 1)) diff --git a/Deeploy/Targets/Snitch/Parsers.py b/Deeploy/Targets/Snitch/Parsers.py index 0051994686..51b32db210 100644 --- a/Deeploy/Targets/Snitch/Parsers.py +++ b/Deeploy/Targets/Snitch/Parsers.py @@ -18,9 +18,7 @@ def parseNode(self, node: gs.Node) -> bool: if not ret: return False - if not all([ - self.operatorRepresentation['transA'] == 0, - ]): + if self.operatorRepresentation['transA']: return False return True @@ -50,9 +48,7 @@ def parseNode(self, node: gs.Node) -> bool: if not ret: return False - if not all([ - self.operatorRepresentation['transA'] == 0, - ]): + if self.operatorRepresentation['transA']: return False return True From 1ec6cde925b9da692b80e306d12af590f8fa9446 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 22:02:10 +0200 Subject: [PATCH 27/80] Add LinearAttention --- Deeploy/OperatorDescriptor.py | 37 ++++++++++++++++++++++ Deeploy/Targets/Generic/Parsers.py | 51 ++++++++++++------------------ 2 files changed, 57 insertions(+), 31 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index e347dad8a6..93ad2643d7 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -523,6 +523,42 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: AttrDesc("div", IntUnpack), ]) +linearAttentionDesc = OperatorDescriptor( + inputDescriptor = IoDesc( + ["q", "k", "v", "wq_weight", "wq_bias", "wk_weight", "wk_bias", "wv_weight", "wv_bias", "wo_weight", + "wo_bias"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("preattn_requant_mul", IntTupleUnpack), + AttrDesc("preattn_requant_div", IntTupleUnpack), + AttrDesc("normalizer_requant_mul", IntTupleUnpack), + AttrDesc("normalizer_requant_shift", IntTupleUnpack), + AttrDesc("normalizer_requant_div", IntTupleUnpack), + AttrDesc("postattn_requant_mul", IntTupleUnpack), + AttrDesc("postattn_requant_shift", IntTupleUnpack), + AttrDesc("postattn_requant_div", IntTupleUnpack), + AttrDesc("wo_requant_mul", IntTupleUnpack), + AttrDesc("wo_requant_shift", IntTupleUnpack), + AttrDesc("wo_requant_div", IntTupleUnpack), + AttrDesc("wq_requant_mul", IntTupleUnpack), + AttrDesc("wq_requant_shift", IntTupleUnpack), + AttrDesc("wq_requant_div", IntTupleUnpack), + AttrDesc("wk_requant_mul", IntTupleUnpack), + AttrDesc("wk_requant_shift", IntTupleUnpack), + AttrDesc("wk_requant_div", IntTupleUnpack), + AttrDesc("wv_requant_mul", IntTupleUnpack), + AttrDesc("wv_requant_shift", IntTupleUnpack), + AttrDesc("wv_requant_div", IntTupleUnpack), + AttrDesc("Delta", IntUnpack), + AttrDesc("eps", IntUnpack), + AttrDesc("act_type", IntUnpack), + AttrDesc("n_levels", IntUnpack), + AttrDesc("dim", IntUnpack), + AttrDesc("dim_head", IntUnpack), + AttrDesc("heads", IntUnpack), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, @@ -538,6 +574,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "ITAPartialMax": itaPartialMaxDesc, "IntegerDiv": integerDivDescriptor, "LayerNormalization": layerNormalizationDesc, + "LinearAttention": linearAttentionDesc, "MatMul": matMulDesc, "MatMulInteger": matMulDesc, "MaxPool": maxPoolDesc, diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index f6c2ee9784..9389034969 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -1472,37 +1472,26 @@ def parseNode(self, node: gs.Node) -> (bool): ]) if ret: - self.operatorRepresentation['preattn_requant_mul'] = int(node.attrs['preattn_requant_mul'].values) - self.operatorRepresentation['preattn_requant_shift'] = int(node.attrs['preattn_requant_shift'].values) - self.operatorRepresentation['preattn_requant_div'] = int( - math.log2(int(node.attrs['preattn_requant_div'].values))) - self.operatorRepresentation['normalizer_requant_mul'] = int(node.attrs['normalizer_requant_mul'].values) - self.operatorRepresentation['normalizer_requant_shift'] = int(node.attrs['normalizer_requant_shift'].values) - self.operatorRepresentation['normalizer_requant_div'] = int( - math.log2(int(node.attrs['normalizer_requant_div'].values))) - self.operatorRepresentation['postattn_requant_mul'] = int(node.attrs['postattn_requant_mul'].values) - self.operatorRepresentation['postattn_requant_shift'] = int(node.attrs['postattn_requant_shift'].values) - self.operatorRepresentation['postattn_requant_div'] = int( - math.log2(int(node.attrs['postattn_requant_div'].values))) - self.operatorRepresentation['wo_requant_mul'] = int(node.attrs['wo_requant_mul'].values) - self.operatorRepresentation['wo_requant_shift'] = int(node.attrs['wo_requant_shift'].values) - self.operatorRepresentation['wo_requant_div'] = int(math.log2(int(node.attrs['wo_requant_div'].values))) - self.operatorRepresentation['wq_requant_mul'] = int(node.attrs['wq_requant_mul'].values) - self.operatorRepresentation['wq_requant_shift'] = int(node.attrs['wq_requant_shift'].values) - self.operatorRepresentation['wq_requant_div'] = int(math.log2(int(node.attrs['wq_requant_div'].values))) - self.operatorRepresentation['wk_requant_mul'] = int(node.attrs['wk_requant_mul'].values) - self.operatorRepresentation['wk_requant_shift'] = int(node.attrs['wk_requant_shift'].values) - self.operatorRepresentation['wk_requant_div'] = int(math.log2(int(node.attrs['wk_requant_div'].values))) - self.operatorRepresentation['wv_requant_mul'] = int(node.attrs['wv_requant_mul'].values) - self.operatorRepresentation['wv_requant_shift'] = int(node.attrs['wv_requant_shift'].values) - self.operatorRepresentation['wv_requant_div'] = int(math.log2(int(node.attrs['wv_requant_div'].values))) - self.operatorRepresentation['Delta'] = int(node.attrs['Delta']) - self.operatorRepresentation['eps'] = int(node.attrs['eps']) - self.operatorRepresentation['act_type'] = int(node.attrs['act_type']) - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) - self.operatorRepresentation['dim'] = int(node.attrs['dim'].values) - self.operatorRepresentation['dim_head'] = int(node.attrs['dim_head'].values) - self.operatorRepresentation['heads'] = int(node.attrs['heads'].values) + self.operatorRepresentation.update(node.attrs) + + # All *_div attrs are log2d-ified + log2Attrs = [ + "preattn_requant_div", + "preattn_requant_div", + "normalizer_requant_div", + "normalizer_requant_div", + "postattn_requant_div", + "postattn_requant_div", + "wo_requant_div", + "wq_requant_div", + "wk_requant_div", + "wv_requant_div", + ] + + for attr in log2Attrs: + value = self.operatorRepresentation[attr] + assert isinstance(value, int) + self.operatorRepresentation[attr] = int(math.log2(value)) return ret From 565cd95b167a41554b66a75a79f558b38f80c1b2 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 22:11:23 +0200 Subject: [PATCH 28/80] Add CLCA --- Deeploy/OperatorDescriptor.py | 23 +++++++++++++++++++++++ Deeploy/Targets/Generic/Parsers.py | 10 +--------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 93ad2643d7..f25926c1cf 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -559,8 +559,31 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) +clcaDesc = OperatorDescriptor( + inputDescriptor = IoDesc([ + "q", "k", "wq_weight", "wq_bias", "wk_weight", "wk_bias", "wo_weight", "wo_bias", "wq_requant_mul", + "wq_requant_add", "wq_requant_div", "wk_requant_mul", "wk_requant_add", "wk_requant_div", "wv_requant_mul", + "wv_requant_add", "wv_requant_div", "kdiv_requant_mul", "kdiv_requant_add", "kdiv_requant_div", + "preattn_requant_mul", "preattn_requant_add", "preattn_requant_div", "postattn_requant_mul", + "postattn_requant_add", "postattn_requant_div", "wo_requant_mul", "wo_requant_add", "wo_requant_div" + ]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("Delta", IntUnpack), + AttrDesc("eps", IntUnpack), + AttrDesc("eta", IntUnpack), + AttrDesc("act_type", IntUnpack), + AttrDesc("n_levels", IntUnpack), + AttrDesc("dim", IntUnpack), + AttrDesc("dim_head", IntUnpack), + AttrDesc("out_dim", IntUnpack), + AttrDesc("heads", IntUnpack), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, + "CLCA": clcaDesc, "Concat": concatDesc, "Conv": convDesc, "DebugPrint": debugPrintDesc, diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 9389034969..d0f499b93e 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -1533,15 +1533,7 @@ def parseNode(self, node: gs.Node) -> (bool): ]) if ret: - self.operatorRepresentation['Delta'] = int(node.attrs['Delta']) - self.operatorRepresentation['eps'] = int(node.attrs['eps']) - self.operatorRepresentation['eta'] = int(node.attrs['eta']) - self.operatorRepresentation['act_type'] = int(node.attrs['act_type']) - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) - self.operatorRepresentation['dim'] = int(node.attrs['dim'].values) - self.operatorRepresentation['dim_head'] = int(node.attrs['dim_head'].values) - self.operatorRepresentation['out_dim'] = int(node.attrs['out_dim'].values) - self.operatorRepresentation['heads'] = int(node.attrs['heads'].values) + self.operatorRepresentation.update(node.attrs) return ret From 26cf6486a54089b5177522a5e1cfb76920cd1da6 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 22:13:11 +0200 Subject: [PATCH 29/80] Add IntegerMean --- Deeploy/OperatorDescriptor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index f25926c1cf..9818601193 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -596,6 +596,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "ITAMax": itaMaxDesc, "ITAPartialMax": itaPartialMaxDesc, "IntegerDiv": integerDivDescriptor, + "IntegerMean": reduceMeanDesc, "LayerNormalization": layerNormalizationDesc, "LinearAttention": linearAttentionDesc, "MatMul": matMulDesc, From 8b00f48d013c141a6c8a9373dff0573c186534f2 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 22:18:31 +0200 Subject: [PATCH 30/80] Add MHSA --- Deeploy/OperatorDescriptor.py | 36 +++++++++++++++++++++++++++++- Deeploy/Targets/Generic/Parsers.py | 18 +-------------- 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 9818601193..925a1ac0e3 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from enum import Enum, IntEnum -from typing import Any, Dict, Tuple +from typing import Any, Dict, Tuple, Union import numpy as np import onnx_graphsurgeon as gs @@ -51,6 +51,13 @@ def FloatTupleUnpack(value: Any) -> Tuple[float, ...]: return (FloatUnpack(value),) +def IntTupleIfNotSingleItemUnpack(value: Any) -> Union[int, Tuple[int, ...]]: + try: + return IntUnpack(value) + except: + return IntTupleUnpack(value) + + def attrToTensor(node: gs.Node, attr: str) -> None: values = node.attrs[attr] if isinstance(values, (int, float)): @@ -581,6 +588,32 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) +mhsaDesc = OperatorDescriptor( + inputDescriptor = IoDesc( + ["q", "k", "v", "wq_weight", "wq_bias", "wk_weight", "wk_bias", "wv_weight", "wv_bias", "wo_weight", + "wo_bias"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("preattn_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("preattn_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("postattn_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("postattn_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("wo_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("wo_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("wq_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("wq_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("wk_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("wk_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("wv_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("wv_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("n_levels", IntUnpack), + AttrDesc("dim", IntUnpack), + AttrDesc("dim_head", IntUnpack), + AttrDesc("heads", IntUnpack), + AttrDesc("signed", BoolUnpack), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "CLCA": clcaDesc, @@ -599,6 +632,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "IntegerMean": reduceMeanDesc, "LayerNormalization": layerNormalizationDesc, "LinearAttention": linearAttentionDesc, + "MHSA": mhsaDesc, "MatMul": matMulDesc, "MatMulInteger": matMulDesc, "MaxPool": maxPoolDesc, diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index d0f499b93e..8a3e0662d6 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -1408,23 +1408,7 @@ def parseNode(self, node: gs.Node) -> (bool): ]) if ret: - self.operatorRepresentation['preattn_requant_mul'] = node.attrs['preattn_requant_mul'] - self.operatorRepresentation['preattn_requant_div'] = node.attrs['preattn_requant_div'] - self.operatorRepresentation['postattn_requant_mul'] = node.attrs['postattn_requant_mul'] - self.operatorRepresentation['postattn_requant_div'] = node.attrs['postattn_requant_div'] - self.operatorRepresentation['wo_requant_mul'] = node.attrs['wo_requant_mul'] - self.operatorRepresentation['wo_requant_div'] = node.attrs['wo_requant_div'] - self.operatorRepresentation['wq_requant_mul'] = node.attrs['wq_requant_mul'] - self.operatorRepresentation['wq_requant_div'] = node.attrs['wq_requant_div'] - self.operatorRepresentation['wk_requant_mul'] = node.attrs['wk_requant_mul'] - self.operatorRepresentation['wk_requant_div'] = node.attrs['wk_requant_div'] - self.operatorRepresentation['wv_requant_mul'] = node.attrs['wv_requant_mul'] - self.operatorRepresentation['wv_requant_div'] = node.attrs['wv_requant_div'] - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels']) - self.operatorRepresentation['dim'] = int(node.attrs['dim']) # Sequence Length - self.operatorRepresentation['dim_head'] = int(node.attrs['dim_head']) # Projection Size - self.operatorRepresentation['heads'] = int(node.attrs['heads']) - self.operatorRepresentation['signed'] = int(node.attrs['signed']) + self.operatorRepresentation.update(node.attrs) return ret From 6ecf95db33066d0fec6c32551473c45b27ccebf2 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 22:28:14 +0200 Subject: [PATCH 31/80] Add Relu, Reshape, RequantShift --- Deeploy/OperatorDescriptor.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 925a1ac0e3..daa6c41f9b 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -614,6 +614,28 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) +reluDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +reshapeDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "shape"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +requantShiftDesc = RequantizedOperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "mul", "add"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("n_levels", IntUnpack), + AttrDesc("signed", BoolUnpack), + AttrDesc("div", IntUnpack), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "CLCA": clcaDesc, @@ -644,9 +666,12 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "RQMatMul": rqMatMulDesc, "ReduceMean": reduceMeanDesc, "ReduceSum": reduceSumDesc, + "Relu": reluDesc, "RequantizedConv": requantizedConvDesc, "RequantizedGemm": requantizedGemmDesc, "RequantizediGELU": requantizedIGeluDesc, + "RequantShift": requantShiftDesc, + "Reshape": reshapeDesc, "Slice": sliceDesc, "Softmax": softmaxDesc, "SoftmaxGrad": softmaxGradDesc, From 9a577a39e8c198a38ef8dc1e717a6be216c8df91 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 22:56:46 +0200 Subject: [PATCH 32/80] Add RequantizedAdd --- Deeploy/OperatorDescriptor.py | 47 +++++++++++++++++++++++++++++- Deeploy/Targets/Generic/Parsers.py | 32 ++++---------------- 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index daa6c41f9b..f9f5532bfa 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -355,7 +355,7 @@ class RequantizedOperatorDescriptor(OperatorDescriptor): def canonicalize(self, node: gs.Node, opset: int) -> bool: if "n_levels_out" in node.attrs and "n_levels" in node.attrs: # TODO: Change to log - print("[WARNING] RequantizedConv cannot have n_levels_out and n_levels in it's attributes") + print("[WARNING] Requantized operator cannot have n_levels_out and n_levels in its attributes") return False if "n_levels_out" in node.attrs: @@ -636,6 +636,50 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) + +class RequantizedAddDescriptor(OperatorDescriptor): + + def canonicalize(self, node: gs.Node, opset: int) -> bool: + for tensor in ["rqs1", "rqs2", "rqsOut"]: + n_levels = f"{tensor}_n_levels" + n_levels_out = f"{tensor}_n_levels_out" + if n_levels_out in node.attrs and n_levels in node.attrs: + # TODO: Change to log + print( + f"[WARNING] RequantizedAdd tensor {tensor} cannot have {n_levels_out} and {n_levels} in its attributes" + ) + return False + + if n_levels_out in node.attrs: + node.attrs[n_levels] = node.attrs[n_levels_out] + node.attrs.pop(n_levels_out) + + return super().canonicalize(node, opset) + + +requantizedAddDesc = RequantizedAddDescriptor( + inputDescriptor = IoDesc(["data_in_0", "data_in_1"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("rqs1_n_levels", IntUnpack), + AttrDesc("rqs1_mul", IntUnpack), + AttrDesc("rqs1_add", IntUnpack), + AttrDesc("rqs1_div", IntUnpack), + AttrDesc("rqs1_signed", BoolUnpack), + AttrDesc("rqs1_n_levels", IntUnpack), + AttrDesc("rqs2_mul", IntUnpack), + AttrDesc("rqs2_add", IntUnpack), + AttrDesc("rqs2_div", IntUnpack), + AttrDesc("rqs2_signed", BoolUnpack), + AttrDesc("rqs2_n_levels", IntUnpack), + AttrDesc("rqsOut_mul", IntUnpack), + AttrDesc("rqsOut_add", IntUnpack), + AttrDesc("rqsOut_div", IntUnpack), + AttrDesc("rqsOut_signed", BoolUnpack), + AttrDesc("rqsOut_n_levels", IntUnpack), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "CLCA": clcaDesc, @@ -667,6 +711,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "ReduceMean": reduceMeanDesc, "ReduceSum": reduceSumDesc, "Relu": reluDesc, + "RequantizedAdd": requantizedAddDesc, "RequantizedConv": requantizedConvDesc, "RequantizedGemm": requantizedGemmDesc, "RequantizediGELU": requantizedIGeluDesc, diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 8a3e0662d6..ad3bad549d 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -2373,32 +2373,12 @@ def parseNode(self, node: gs.Node) -> bool: ]) if ret: - if 'rqs1_n_levels' in node.attrs: - self.operatorRepresentation['rqs1_n_levels'] = int(node.attrs['rqs1_n_levels'].values) - else: - self.operatorRepresentation['rqs1_n_levels'] = int(node.attrs['rqs1_n_levels_out'].values) - self.operatorRepresentation['rqs1_mul'] = int(node.attrs['rqs1_mul']) - self.operatorRepresentation['rqs1_add'] = int(node.attrs['rqs1_add']) - self.operatorRepresentation['rqs1_signed'] = int(node.attrs['rqs1_signed'].values) - self.operatorRepresentation['rqs1_log2D'] = int(math.log2(node.attrs['rqs1_div'].values)) - - if 'rqs2_n_levels' in node.attrs: - self.operatorRepresentation['rqs2_n_levels'] = int(node.attrs['rqs2_n_levels'].values) - else: - self.operatorRepresentation['rqs2_n_levels'] = int(node.attrs['rqs2_n_levels_out'].values) - self.operatorRepresentation['rqs2_mul'] = int(node.attrs['rqs2_mul']) - self.operatorRepresentation['rqs2_add'] = int(node.attrs['rqs2_add']) - self.operatorRepresentation['rqs2_signed'] = int(node.attrs['rqs2_signed'].values) - self.operatorRepresentation['rqs2_log2D'] = int(math.log2(node.attrs['rqs2_div'].values)) - - if 'rqsOut_n_levels' in node.attrs: - self.operatorRepresentation['rqsOut_n_levels'] = int(node.attrs['rqsOut_n_levels'].values) - else: - self.operatorRepresentation['rqsOut_n_levels'] = int(node.attrs['rqsOut_n_levels_out'].values) - self.operatorRepresentation['rqsOut_mul'] = int(node.attrs['rqsOut_mul']) - self.operatorRepresentation['rqsOut_add'] = int(node.attrs['rqsOut_add']) - self.operatorRepresentation['rqsOut_signed'] = int(node.attrs['rqsOut_signed'].values) - self.operatorRepresentation['rqsOut_log2D'] = int(math.log2(node.attrs['rqsOut_div'].values)) + self.operatorRepresentation.update(node.attrs) + + for tensor in ["rqs1", "rqs2", "rqsOut"]: + value = self.operatorRepresentation[f"{tensor}_div"] + assert isinstance(value, int) + self.operatorRepresentation[f"{tensor}_log2D"] = int(math.log2(value)) return ret From 8ae808a26466acd85fc5ed00caf27682719f1c81 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 23:07:48 +0200 Subject: [PATCH 33/80] Add RequantizediHardswish --- Deeploy/OperatorDescriptor.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index f9f5532bfa..ecf077a480 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -277,6 +277,17 @@ class GeluApprox(str, Enum): AttrDesc("three", IntUnpack), ]) +requantizedIHardswishDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("one_over_six", IntUnpack), + AttrDesc("six", IntUnpack), + AttrDesc("three", IntUnpack), + AttrDesc("mul", IntUnpack), + AttrDesc("add", IntUnpack), + AttrDesc("shift", IntUnpack), + ]) + iNoNormDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "weights", "bias"]), outputDescriptor = IoDesc("data_out"), attrDescriptors = [ @@ -715,6 +726,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "RequantizedConv": requantizedConvDesc, "RequantizedGemm": requantizedGemmDesc, "RequantizediGELU": requantizedIGeluDesc, + "RequantizediHardswish": requantizedIHardswishDesc, "RequantShift": requantShiftDesc, "Reshape": reshapeDesc, "Slice": sliceDesc, From 5eece92c257bc1ab50143c446a09361ea61b64a4 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 23:19:30 +0200 Subject: [PATCH 34/80] Add iGELU --- Deeploy/OperatorDescriptor.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index ecf077a480..01c5fbeb08 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -256,11 +256,22 @@ class GeluApprox(str, Enum): none = "none" -geluDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"), - outputDescriptor = IoDesc("data_out"), - attrDescriptors = [ - AttrDesc("approximate", GeluApprox, default = GeluApprox.none), - ]) +geluDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("approximate", GeluApprox, default = GeluApprox.none), + ], +) + +iGeluDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("b", IntUnpack), + AttrDesc("one", IntUnpack), + ], +) requantizedIGeluDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "mul", "add", "shift"]), outputDescriptor = IoDesc("data_out"), @@ -691,6 +702,12 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) +sgdDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["weight", "grad"]), + outputDescriptor = IoDesc("weight_updated"), + attrDescriptors = [AttrDesc("lr", FloatUnpack)], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "CLCA": clcaDesc, @@ -729,12 +746,14 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "RequantizediHardswish": requantizedIHardswishDesc, "RequantShift": requantShiftDesc, "Reshape": reshapeDesc, + "SGD": sgdDesc, "Slice": sliceDesc, "Softmax": softmaxDesc, "SoftmaxGrad": softmaxGradDesc, "Squeeze": squeezeDesc, "Transpose": transposeDesc, "Unsqueeze": unsqueezeDesc, + "iGELU": iGeluDesc, "iHardswish": iHardswishDesc, "iLayerNorm": iLayerNormDesc, "iNoNorm": iNoNormDesc, From 75983038183da7eabe9ba9f76619f77c4f2aa46b Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 23:23:24 +0200 Subject: [PATCH 35/80] Add SoftmaxCrossEntropyLoss(Grad) --- Deeploy/OperatorDescriptor.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 01c5fbeb08..ff1fbcf3fd 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -708,6 +708,18 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: attrDescriptors = [AttrDesc("lr", FloatUnpack)], ) +softmaxCrossEntropyLossDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["logits", "labels"]), + outputDescriptor = IoDesc("log_prob"), + attrDescriptors = [], +) + +softmaxCrossEntropyLossGradDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["log_prob", "labels"]), + outputDescriptor = IoDesc("grad"), + attrDescriptors = [], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "CLCA": clcaDesc, @@ -749,6 +761,8 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "SGD": sgdDesc, "Slice": sliceDesc, "Softmax": softmaxDesc, + "SoftmaxCrossEntropyLoss": softmaxCrossEntropyLossDesc, + "SoftmaxCrossEntropyLossGrad": softmaxCrossEntropyLossGradDesc, "SoftmaxGrad": softmaxGradDesc, "Squeeze": squeezeDesc, "Transpose": transposeDesc, From 72c8d21ca66bbfdf61f2c1b734acc98a34915361 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 23:50:41 +0200 Subject: [PATCH 36/80] Add Memcopy for dma tests --- DeeployTest/testUtils/dmaUtils.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/DeeployTest/testUtils/dmaUtils.py b/DeeployTest/testUtils/dmaUtils.py index 3205275fda..ba2f6e176f 100644 --- a/DeeployTest/testUtils/dmaUtils.py +++ b/DeeployTest/testUtils/dmaUtils.py @@ -10,14 +10,13 @@ from Deeploy.AbstractDataTypes import BaseType, Pointer, PointerClass from Deeploy.CommonExtensions.DataTypes import minimalIntegerType -from Deeploy.DeeployTypes import NetworkContext, NetworkDeployer, NodeParser, NodeTemplate, NodeTypeChecker, \ - ONNXLayer, OperatorRepresentation, VariableBuffer +from Deeploy.DeeployTypes import IoDesc, NetworkContext, NetworkDeployer, NodeParser, NodeTemplate, NodeTypeChecker, \ + ONNXLayer, OperatorDescriptor, OperatorRepresentation, VariableBuffer from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryDeployerWrapper, \ MemoryPlatformWrapper from Deeploy.MemoryLevelExtension.OptimizationPasses.MemoryLevelAnnotationPasses import AnnotateDefaultMemoryLevel, \ AnnotateIOMemoryLevel -from Deeploy.OperatorDescriptor import defaultOperatorDescriptors from Deeploy.Targets.PULPOpen.Deployer import PULPDeployer from Deeploy.Targets.PULPOpen.Platform import MemoryPULPPlatform, PULPOptimizer from Deeploy.Targets.Snitch.Deployer import SnitchDeployer @@ -280,6 +279,17 @@ def defaultScheduler(graph: gs.Graph) -> List[List[gs.Node]]: return [[node] for node in graph.nodes] +memcpyDesc = OperatorDescriptor( + inputDescriptor = IoDesc("src"), + outputDescriptor = IoDesc("dest"), + attrDescriptors = [], +) + +dmaTestOperatorDescriptors = { + "Memcpy": memcpyDesc, +} + + def setup_pulp_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph, inputTypes: Dict[str, Type[Pointer]], doublebuffer: bool, deeployStateDir: str) -> NetworkDeployer: L3 = MemoryLevel(name = "L3", neighbourNames = ["L2"], size = 64000000) @@ -300,7 +310,7 @@ def setup_pulp_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph, platform, inputTypes, PULPOptimizer, - defaultOperatorDescriptors, + dmaTestOperatorDescriptors, defaultScheduler, default_channels_first = True, deeployStateDir = deeployStateDir) @@ -342,7 +352,7 @@ def setup_snitch_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph platform, inputTypes, SnitchOptimizer, - defaultOperatorDescriptors, + dmaTestOperatorDescriptors, defaultScheduler, deeployStateDir = deeployStateDir) memoryLevelAnnotationPasses = [AnnotateIOMemoryLevel(defaultMemory), AnnotateDefaultMemoryLevel(memoryHierarchy)] From bff86683b022bbee34b0a75919be874dcbb0c94f Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 13:30:30 +0100 Subject: [PATCH 37/80] Remove some trailing white space in CHANGELOG.md --- CHANGELOG.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a567305e2b..6b6ee83f60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -177,9 +177,9 @@ This release containing major architectural changes, new platform support, enhan ### Added -- BatchNorm kernel -- ConvTranspose kernel -- MaxPool1D kernel +- BatchNorm kernel +- ConvTranspose kernel +- MaxPool1D kernel - Template for 1D Convolution - Support for float32 data type in the previous kernels - Float binding for Pad1D kernel @@ -318,7 +318,7 @@ This release containing major architectural changes, new platform support, enhan ### Changed - FloatConvTemplate file -- Platform.py file +- Platform.py file - Bump the CMake version to 3.24 as required for the chimera-sdk - Bump GVSoC's version and add chimera simulation target - Rename the generic source util to utils to avoid name collision with chimera-sdk From 5ac4e316398e333e5573f8cff07bfd64cd5d76a7 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 13:55:49 +0100 Subject: [PATCH 38/80] Add try canonicalization exceptions --- Deeploy/DeeployTypes.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index aecb112b57..2bf9452ade 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -2738,7 +2738,10 @@ def _bindLayers(self): assert node.op in self.operatorDescriptors, \ f"[ERROR] Error parsing node {node.name}. There is no descriptor for operator {node.op}." desc = self.operatorDescriptors[node.op] - desc.canonicalize(node, self.graph.opset) + try: + desc.canonicalize(node, self.graph.opset) + except BaseException as e: + raise ValueError(f"[ERROR] Node {node.name} of op {node.op} could not be canonicalized.") from e assert desc.check(node), \ f"[ERROR] Node {node.name} is not a valid instance of {node.op} operator" From 2f871d476ea5019ba9e9884e2626dffe1b286324 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 13:56:10 +0100 Subject: [PATCH 39/80] Make IntegerDataTypes a tuple --- Deeploy/CommonExtensions/DataTypes.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Deeploy/CommonExtensions/DataTypes.py b/Deeploy/CommonExtensions/DataTypes.py index 4f6dba3827..c05ea3b9d9 100644 --- a/Deeploy/CommonExtensions/DataTypes.py +++ b/Deeploy/CommonExtensions/DataTypes.py @@ -87,11 +87,11 @@ class float64_t(FloatImmediate): SignedIntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (int8_t, int16_t, int32_t, int64_t) UnsignedIntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (uint8_t, uint16_t, uint32_t, uint64_t) -IntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (sorted(( - *SignedIntegerDataTypes, - *UnsignedIntegerDataTypes, -), - key = lambda _type: _type.typeWidth)) +IntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = tuple( + sorted(( + *SignedIntegerDataTypes, + *UnsignedIntegerDataTypes, + ), key = lambda _type: _type.typeWidth)) FloatDataTypes: Tuple[Type[FloatImmediate], ...] = (bfloat16_t, float16_t, float32_t, float64_t) From 31577c31c95e36decbdd12dfe616521df2795a70 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 13:57:00 +0100 Subject: [PATCH 40/80] Fix reshape bindings (which are used for squeeze/unsqeeze too) to type map axes to int64_t as per onnx --- Deeploy/Targets/PULPOpen/Bindings.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/Deeploy/Targets/PULPOpen/Bindings.py b/Deeploy/Targets/PULPOpen/Bindings.py index 9ff940b2f0..57fdf90a57 100644 --- a/Deeploy/Targets/PULPOpen/Bindings.py +++ b/Deeploy/Targets/PULPOpen/Bindings.py @@ -9,8 +9,8 @@ from Deeploy.CommonExtensions.CodeTransformationPasses.Closure import ClosureGeneration, MemoryAwareClosureGeneration from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \ MemoryManagementGeneration, MemoryPassthroughGeneration -from Deeploy.CommonExtensions.DataTypes import IntegerDataTypes, SignedIntegerDataTypes, float32_t, int8_t, int32_t, \ - uint8_t +from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes, SignedIntegerDataTypes, float32_t, \ + int8_t, int32_t, int64_t, uint8_t from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeTemplate from Deeploy.FutureExtension.Bindings.AutoFutureBinding import AutoFutureBinding from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration @@ -153,11 +153,8 @@ ] PULPReshapeBindings = [ - NodeBinding(ReshapeChecker([PointerClass(type), PointerClass(int32_t)], [PointerClass(type)]), - ReshapeTemplate.referenceTemplate, SkipTransformer) for type in IntegerDataTypes -] + [ - NodeBinding(ReshapeChecker([PointerClass(float32_t), PointerClass(type)], [PointerClass(float32_t)]), - ReshapeTemplate.referenceTemplate, SkipTransformer) for type in IntegerDataTypes + NodeBinding(ReshapeChecker([PointerClass(type), PointerClass(int64_t)], [PointerClass(type)]), + ReshapeTemplate.referenceTemplate, SkipTransformer) for type in IntegerDataTypes + FloatDataTypes ] PULPRQAddBindings = [ From 90102f5eac819838252446fc5a0ab5513f09903e Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 14:15:48 +0100 Subject: [PATCH 41/80] Canonicalize (un)squeeze operations as pre-opset-13, i.e., put axes into node attributes to ommit creating a buffer for it --- Deeploy/OperatorDescriptor.py | 18 +++++++++++-- Deeploy/Targets/Generic/Parsers.py | 43 +++++++----------------------- 2 files changed, 25 insertions(+), 36 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index ff1fbcf3fd..a0cb483589 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -476,15 +476,29 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: attrDescriptors = [AttrDesc("axis", IntUnpack, default = 0)], ) + +class SqueezeDescriptor(OperatorDescriptor): + + def canonicalize(self, node: gs.Node, opset: int) -> bool: + if opset >= 13: + assert len(node.inputs) == 2, f"Expected 2 inputs but received {len(node.inputs)}" + axes = node.inputs[1] + assert isinstance(axes, + gs.Constant), f"Expected axes to be a constant but received axes of type {type(axes)}" + node.attrs["axes"] = axes.values + axes.outputs.clear() + return super().canonicalize(node, opset) + + # Opset <= 11 -unsqueezeDesc = OperatorDescriptor( +unsqueezeDesc = SqueezeDescriptor( inputDescriptor = IoDesc("data_in"), outputDescriptor = IoDesc("data_out"), attrDescriptors = [AttrDesc("axes", IntTupleUnpack)], ) # Opset <= 11 -squeezeDesc = OperatorDescriptor( +squeezeDesc = SqueezeDescriptor( inputDescriptor = IoDesc("data_in"), outputDescriptor = IoDesc("data_out"), attrDescriptors = [AttrDesc("axes", IntTupleUnpack)], diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index ad3bad549d..ddd08a8551 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -986,48 +986,23 @@ def __init__(self): super().__init__() def parseNode(self, node: gs.Node) -> (bool): + if not all(['axes' in node.attrs, len(node.inputs) == 1, len(node.outputs) == 1]): + return False - # ONNX v11: 'axes' is a node attribute - if 'axes' in node.attrs: - ret = all(['axes' in node.attrs, len(node.inputs) == 1, len(node.outputs) == 1]) - # ONNX v13+: 'axes' becomes an input with the data - # Source: https://onnx.ai/onnx/operators/onnx__Unsqueeze.html - else: - ret = all([len(node.inputs) == 2, len(node.outputs) == 1]) - - if ret and 'axes' in node.attrs: - axes_attr = node.attrs['axes'] - self.operatorRepresentation['axes'] = [int(axes_attr)] if isinstance(axes_attr, int) \ - else [int(a) for a in axes_attr] - # For opset 13+, axes will be extracted from the second input in parseNodeCtxt - - return ret + self.operatorRepresentation['axes'] = node.attrs['axes'] + return True def parseNodeCtxt(self, ctxt: NetworkContext, node: gs.Node, channels_first: bool = True) -> Tuple[NetworkContext, bool]: + inputs = ['data_in'] + for idx, inputNode in enumerate(node.inputs): + self.operatorRepresentation[inputs[idx]] = ctxt.lookup(inputNode.name).name outputs = ['data_out'] - if len(node.inputs) == 1: - inputs = ['data_in'] - for idx, inputNode in enumerate(node.inputs): - self.operatorRepresentation[inputs[idx]] = ctxt.lookup(inputNode.name).name - for idx, outputNode in enumerate(node.outputs): - self.operatorRepresentation[outputs[idx]] = ctxt.lookup(outputNode.name).name - else: - data_in = ctxt.lookup(node.inputs[0].name) - data_out = ctxt.lookup(node.outputs[0].name) - self.operatorRepresentation['data_in'] = data_in.name - self.operatorRepresentation['data_out'] = data_out.name - # axes must be a constant; extract values - axes_buf = ctxt.lookup(node.inputs[1].name) - assert hasattr(axes_buf, 'values'), "Unsqueeze: expected constant 'axes' input for opset 13+" - axes_vals = np.array(axes_buf.values).astype(int).flatten().tolist() - self.operatorRepresentation['axes'] = axes_vals - # Do not deploy the axes tensor - axes_buf._live = False - axes_buf._deploy = False + for idx, outputNode in enumerate(node.outputs): + self.operatorRepresentation[outputs[idx]] = ctxt.lookup(outputNode.name).name return ctxt, True From 7bd7353df3ca04e1414164f2025ea9a892eff2e7 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 16:37:50 +0100 Subject: [PATCH 42/80] Add BatchNormalization descriptor --- Deeploy/OperatorDescriptor.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index a0cb483589..9d74a32aec 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -734,8 +734,19 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: attrDescriptors = [], ) +batchNormalizationDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "scale", "bias", "mean", "variance"]), + outputDescriptor = IoDesc(["data_out"], optional = ["running_mean", "running_var"]), + attrDescriptors = [ + AttrDesc("epsilon", FloatUnpack, default = 1e-5), + AttrDesc("momentum", FloatUnpack, default = 0.9), + AttrDesc("training_mode", BoolUnpack, default = False), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, + "BatchNormalization": batchNormalizationDesc, "CLCA": clcaDesc, "Concat": concatDesc, "Conv": convDesc, From 16bc4630c2e4ac4f71f2a798c8c7f5bae25d3023 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 16:38:11 +0100 Subject: [PATCH 43/80] Add ConvTranspose descriptor --- Deeploy/OperatorDescriptor.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 9d74a32aec..fbf333cf1f 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -371,6 +371,25 @@ def _padsDefault(node: gs.Node) -> Tuple[int, ...]: ], ) +convTransposeDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "weight"], optional = "bias"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("auto_pad", AutoPad, default = AutoPad.NOTSET), + AttrDesc("dilations", IntTupleUnpack, default = _dilationsDefault), + AttrDesc("group", IntUnpack, default = 1), + AttrDesc("kernel_shape", IntTupleUnpack, default = _kernelShapeDefault), + # TODO: Add output_shape and output_padding default functions. + # Docs: + # - ONNX: https://onnx.ai/onnx/operators/onnx__ConvTranspose.html + # - PyTorch: https://docs.pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html + # AttrDesc("output_shape", IntTupleUnpack, default = _outputShapeDefault), + # AttrDesc("output_padding", IntTupleUnpack, default = _outputPaddingDefault), + AttrDesc("pads", IntTupleUnpack, default = _padsDefault), + AttrDesc("strides", IntTupleUnpack, default = _stridesDefault), + ], +) + class RequantizedOperatorDescriptor(OperatorDescriptor): @@ -750,6 +769,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "CLCA": clcaDesc, "Concat": concatDesc, "Conv": convDesc, + "ConvTranspose": convTransposeDesc, "DebugPrint": debugPrintDesc, "Dequant": dequantDesc, "Div": divDesc, From d865898cefe487c83676282f61e7d2740e24f16e Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 16:50:43 +0100 Subject: [PATCH 44/80] Relax opset check on squeeze operations to a warning --- Deeploy/OperatorDescriptor.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index fbf333cf1f..0e93a07ed4 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -9,6 +9,7 @@ import onnx_graphsurgeon as gs from Deeploy.DeeployTypes import AttrDesc, IoDesc, OperatorDescriptor, VariadicIoDesc +from Deeploy.Logging import DEFAULT_LOGGER as log def IntUnpack(value: Any) -> int: @@ -499,13 +500,24 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: class SqueezeDescriptor(OperatorDescriptor): def canonicalize(self, node: gs.Node, opset: int) -> bool: - if opset >= 13: - assert len(node.inputs) == 2, f"Expected 2 inputs but received {len(node.inputs)}" + if len(node.inputs) == 2: axes = node.inputs[1] - assert isinstance(axes, - gs.Constant), f"Expected axes to be a constant but received axes of type {type(axes)}" + assert isinstance(axes, gs.Constant), \ + f"Expected axes to be a constant but received axes of type {type(axes)}" node.attrs["axes"] = axes.values axes.outputs.clear() + + if opset >= 13 and len(node.inputs) != 2: + log.warning( + "Squeeze operation expects 2 inputs for opset >= 13. " + f"Received node {node.name} with {len(node.inputs)} input{'s' if len(node.inputs) > 1 else ''} and opset {opset}" + ) + elif opset < 13 and len(node.inputs) != 1: + log.warning( + "Squeeze operation expects 1 input for opset < 13. " + f"Received node {node.name} with {len(node.inputs)} input{'s' if len(node.inputs) > 1 else ''} and opset {opset}" + ) + return super().canonicalize(node, opset) From cd62a695cb523db2dc207b6c876b10424c3b32d3 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 16:56:21 +0100 Subject: [PATCH 45/80] Replace prints with logging --- Deeploy/DeeployTypes.py | 13 +++++-------- Deeploy/OperatorDescriptor.py | 9 +++------ 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index 2bf9452ade..30c06548c0 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -1101,19 +1101,16 @@ def check(self, node: gs.Node) -> bool: valid = True if not self.inputDescriptor.checkTensors(node.inputs): - # TODO: Change to logging - print(f"[ERROR OP {node.op}] Invalid input tensors: {[t.name for t in node.inputs]}") + log.error(f"[OP {node.op}] Invalid input tensors: {[t.name for t in node.inputs]}") valid = False if not self.outputDescriptor.checkTensors(node.outputs): - # TODO: Change to logging - print(f"[ERROR OP {node.op}] Invalid output tensors: {[t.name for t in node.outputs]}") + log.error(f"[OP {node.op}] Invalid output tensors: {[t.name for t in node.outputs]}") valid = False for attrDesc in self.attrDescriptors: if attrDesc.default is None and not attrDesc.name in node.attrs: - # TODO: Change to logging - print(f"[ERROR OP {node.op}] Missing attribute {attrDesc.name}") + log.error(f"[OP {node.op}] Missing attribute {attrDesc.name}") valid = False return valid @@ -1128,7 +1125,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: try: node.attrs[desc.name] = desc.unpack(value) except Exception as e: - raise ValueError(f"[ERROR OP {node.op}] Error unpacking the attribute {desc.name}. {e}") from e + raise ValueError(f"[OP {node.op}] Error unpacking the attribute {desc.name}. {e}") from e return True def parseTensors(self, ctxt: NetworkContext, tensors: Sequence[gs.Tensor], @@ -1158,7 +1155,7 @@ def parse(self, ctxt: NetworkContext, node: gs.Node) -> OperatorRepresentation: firstKeySet = set(firstOpRepr.keys()) secondKeySet = set(secondOpRepr.keys()) assert firstKeySet.isdisjoint(secondKeySet), \ - f"[PARSE ERROR] (Node: {node.name}, Op: {node.op}) " \ + f"[OP {node.op}] Encourntered error while parsing node {node.name}. " \ f"Keys from parsing {firstName} clash with the keys from parsing {secondName}. "\ f"Overlapping keys: {firstKeySet ^ secondKeySet}" diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 0e93a07ed4..4ebab580a7 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -396,8 +396,7 @@ class RequantizedOperatorDescriptor(OperatorDescriptor): def canonicalize(self, node: gs.Node, opset: int) -> bool: if "n_levels_out" in node.attrs and "n_levels" in node.attrs: - # TODO: Change to log - print("[WARNING] Requantized operator cannot have n_levels_out and n_levels in its attributes") + log.warning("Requantized operator cannot have n_levels_out and n_levels in its attributes") return False if "n_levels_out" in node.attrs: @@ -711,10 +710,8 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: n_levels = f"{tensor}_n_levels" n_levels_out = f"{tensor}_n_levels_out" if n_levels_out in node.attrs and n_levels in node.attrs: - # TODO: Change to log - print( - f"[WARNING] RequantizedAdd tensor {tensor} cannot have {n_levels_out} and {n_levels} in its attributes" - ) + log.warning( + f"RequantizedAdd tensor {tensor} cannot have {n_levels_out} and {n_levels} in its attributes") return False if n_levels_out in node.attrs: From 91bdeb7f4573dbee5a97a650ef464d3b62f60c7c Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 17:02:59 +0100 Subject: [PATCH 46/80] Add missing itertools import --- Deeploy/DeeployTypes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index 30c06548c0..b2afde7410 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -5,6 +5,7 @@ from __future__ import annotations import copy +import itertools import math import os import pickle From 238d3affd422e61cc65774a698bac093a8ce370c Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 17:03:33 +0100 Subject: [PATCH 47/80] Initialize optional value with None --- Deeploy/DeeployTypes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index b2afde7410..166c91289c 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -1023,10 +1023,11 @@ def copy(self) -> NetworkContext: class IoDesc: - def __init__(self, required: Union[str, List[str]], optional: Union[str, List[str]] = []) -> None: + def __init__(self, required: Union[str, List[str]], optional: Optional[Union[str, List[str]]] = None) -> None: if isinstance(required, str): required = [required] self.required = required + optional = optional if optional is not None else [] if isinstance(optional, str): optional = [optional] self.optional = optional From a4198b433517eeb5e0068063e04e8722da203f55 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 17:04:34 +0100 Subject: [PATCH 48/80] Fix typo --- Deeploy/DeeployTypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index 166c91289c..6b63697d2f 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -1149,7 +1149,7 @@ def parseAttrs(self, node: gs.Node) -> OperatorRepresentation: def parse(self, ctxt: NetworkContext, node: gs.Node) -> OperatorRepresentation: opReprs = { "input tensors": self.parseTensors(ctxt, node.inputs, self.inputDescriptor), - "output tesnors": self.parseTensors(ctxt, node.outputs, self.outputDescriptor), + "output tensors": self.parseTensors(ctxt, node.outputs, self.outputDescriptor), "attributes": self.parseAttrs(node), } From e8f1721bcf45c6c05efb921b1b54f65b9c1c5678 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 17:07:08 +0100 Subject: [PATCH 49/80] Explicit exception coverage --- Deeploy/OperatorDescriptor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 4ebab580a7..3af145cd43 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -55,7 +55,7 @@ def FloatTupleUnpack(value: Any) -> Tuple[float, ...]: def IntTupleIfNotSingleItemUnpack(value: Any) -> Union[int, Tuple[int, ...]]: try: return IntUnpack(value) - except: + except ValueError: return IntTupleUnpack(value) From f180f85348c3f74c90cc97823069722b7d332d19 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 17:11:31 +0100 Subject: [PATCH 50/80] Rename attrToTensor to attrToInputTensor and add inputTensorToAttr --- Deeploy/OperatorDescriptor.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 3af145cd43..7f283708c0 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -59,7 +59,7 @@ def IntTupleIfNotSingleItemUnpack(value: Any) -> Union[int, Tuple[int, ...]]: return IntTupleUnpack(value) -def attrToTensor(node: gs.Node, attr: str) -> None: +def attrToInputTensor(node: gs.Node, attr: str) -> None: values = node.attrs[attr] if isinstance(values, (int, float)): values = np.array([values]) @@ -71,6 +71,14 @@ def attrToTensor(node: gs.Node, attr: str) -> None: node.attrs.pop(attr) +def inputTensorToAttr(node: gs.Node, tensorIdx: int, attr: str) -> None: + tensor = node.inputs[tensorIdx] + assert isinstance(tensor, gs.Constant), \ + f"Can convert only constant tensors to attributes. Received tensor of type {tensor}" + node.attrs[attr] = tensor.values + tensor.outputs.clear() + + concatDesc = OperatorDescriptor( inputDescriptor = VariadicIoDesc("data_in", minNumTensors = 2), outputDescriptor = IoDesc("data_out"), @@ -91,10 +99,10 @@ class SliceDescriptor(OperatorDescriptor): def canonicalize(self, node: gs.Node, opset: int) -> bool: if opset < 10: - attrToTensor(node, "starts") - attrToTensor(node, "ends") + attrToInputTensor(node, "starts") + attrToInputTensor(node, "ends") if "axes" in node.attrs: - attrToTensor(node, "axes") + attrToInputTensor(node, "axes") return super().canonicalize(node, opset) @@ -184,7 +192,7 @@ class ReduceMeanDescriptor(OperatorDescriptor): def canonicalize(self, node: gs.Node, opset: int) -> bool: if opset < 18: if "axes" in node.attrs: - attrToTensor(node, "axes") + attrToInputTensor(node, "axes") return super().canonicalize(node, opset) From bc75e85564ea1401eeae2f4f2ecb1bfab9f82f22 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 17:17:17 +0100 Subject: [PATCH 51/80] Use inputTensorToAttr in squeeze canonicalization --- Deeploy/OperatorDescriptor.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 7f283708c0..6bf6b6ca30 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -508,11 +508,7 @@ class SqueezeDescriptor(OperatorDescriptor): def canonicalize(self, node: gs.Node, opset: int) -> bool: if len(node.inputs) == 2: - axes = node.inputs[1] - assert isinstance(axes, gs.Constant), \ - f"Expected axes to be a constant but received axes of type {type(axes)}" - node.attrs["axes"] = axes.values - axes.outputs.clear() + inputTensorToAttr(node, tensorIdx = 1, attr = "axes") if opset >= 13 and len(node.inputs) != 2: log.warning( From 6976c52dc8368620e69164d43ca32eb03b2b851d Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 17:12:52 +0100 Subject: [PATCH 52/80] Remove duplicate attribute --- Deeploy/OperatorDescriptor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 6bf6b6ca30..288a9de505 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -729,7 +729,6 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: inputDescriptor = IoDesc(["data_in_0", "data_in_1"]), outputDescriptor = IoDesc("data_out"), attrDescriptors = [ - AttrDesc("rqs1_n_levels", IntUnpack), AttrDesc("rqs1_mul", IntUnpack), AttrDesc("rqs1_add", IntUnpack), AttrDesc("rqs1_div", IntUnpack), From 97da07c29915bd7e2c3661212e02bb3a4f1a94df Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 17:37:08 +0100 Subject: [PATCH 53/80] Refactor MatMulTileConstraint --- .../TileConstraints/MatMulTileConstraint.py | 87 +++++++++++-------- 1 file changed, 50 insertions(+), 37 deletions(-) diff --git a/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py b/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py index a9259a15cf..c0f3b70461 100644 --- a/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py +++ b/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py @@ -19,42 +19,50 @@ class MatMulTileConstraint(TileConstraint): @staticmethod def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel: - - # Get to-be-tiled tensor's buffers bufferA = ctxt.lookup(name = parseDict['A']) bufferB = ctxt.lookup(name = parseDict['B']) - outputBuffer = ctxt.lookup(name = parseDict['data_out']) + bufferOut = ctxt.lookup(name = parseDict['data_out']) # Add I/O dimensions to the model as variables - for _buffer in [bufferA, bufferB, outputBuffer]: - tilerModel.addTensorDimToModel(ctxt, _buffer.name) - - tensorsShapeLen = len(bufferA.shape) - - AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, - dimIdx = (tensorsShapeLen - 2) + int(parseDict['transA'])) - ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, - dimIdx = (tensorsShapeLen - 1) - int(parseDict['transA'])) - BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, - dimIdx = (tensorsShapeLen - 2) + int(parseDict['transB'])) - BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, - dimIdx = (tensorsShapeLen - 1) - int(parseDict['transB'])) - outputFirstDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = (tensorsShapeLen - 2)) - outputSecondDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = (tensorsShapeLen - 1)) - - # Map output dims to inputs dims - for idx in range(tensorsShapeLen - 2): - tilerModel.addConstraint( - tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = idx) == tilerModel.getTensorDimVar( - tensorName = bufferA.name, dimIdx = idx)) - tilerModel.addConstraint( - tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = idx) == tilerModel.getTensorDimVar( - tensorName = bufferB.name, dimIdx = idx)) + for buff in [bufferA, bufferB, bufferOut]: + tilerModel.addTensorDimToModel(ctxt, buff.name) + + rankA = len(bufferA.shape) + if not parseDict['transA']: + firstDimIdxA, secondDimIdxA = rankA - 2, rankA - 1 + else: + firstDimIdxA, secondDimIdxA = rankA - 1, rankA - 2 + AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = firstDimIdxA) + ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = secondDimIdxA) + + rankB = len(bufferB.shape) + if not parseDict['transB']: + firstDimIdxB, secondDimIdxB = rankB - 2, rankB - 1 + else: + firstDimIdxB, secondDimIdxB = rankB - 1, rankB - 2 + BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = firstDimIdxB) + BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = secondDimIdxB) + + rankOut = len(bufferOut.shape) + outputFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = rankOut - 2) + outputSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = rankOut - 1) + + # Map batch dims between A and output + batchDimsA = rankA - 2 + for dimIdx in range(batchDimsA): + varA = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = dimIdx) + varOut = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = (rankOut - rankA) + dimIdx) + tilerModel.addConstraint(varOut == varA) + + # Map batch dims between B and output + batchDimsB = rankB - 2 + for dimIdx in range(batchDimsB): + varB = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = dimIdx) + varOut = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = (rankOut - rankB) + dimIdx) + tilerModel.addConstraint(varOut == varB) tilerModel.addConstraint(outputFirstDimVar == AFirstDimVar) tilerModel.addConstraint(outputSecondDimVar == BSecondDimVar) - - # Add GEMM Geometrical constraints tilerModel.addConstraint(ASecondDimVar == BFirstDimVar) return tilerModel @@ -65,14 +73,19 @@ def addPolicyConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkCo bufferA = ctxt.lookup(name = parseDict['A']) bufferB = ctxt.lookup(name = parseDict['B']) - tensorsShapeLen = len(bufferA.shape) - - ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, - dimIdx = (tensorsShapeLen - 1) - parseDict['transA']) - BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, - dimIdx = (tensorsShapeLen - 2) + parseDict['transB']) - BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, - dimIdx = (tensorsShapeLen - 1) - parseDict['transB']) + rankA = len(bufferA.shape) + if not parseDict['transA']: + _, secondDimIdxA = rankA - 2, rankA - 1 + else: + _, secondDimIdxA = rankA - 1, rankA - 2 + ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = secondDimIdxA) + + rankB = len(bufferB.shape) + if not parseDict['transB']: + firstDimIdxB, _ = rankB - 2, rankB - 1 + else: + firstDimIdxB, _ = rankB - 1, rankB - 2 + BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = firstDimIdxB) # VIC: We don't want to deal with intermediate results between kernel calls tilerModel.addConstraint(ASecondDimVar == parseDict['N']) From 0c64a3eb587aeeb703a63512057e32e182b4f0e3 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 19:16:30 +0100 Subject: [PATCH 54/80] Remove duplicate attributes and check that the value is positive --- Deeploy/Targets/Generic/Parsers.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index ddd08a8551..edbb2bc917 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -1436,11 +1436,8 @@ def parseNode(self, node: gs.Node) -> (bool): # All *_div attrs are log2d-ified log2Attrs = [ "preattn_requant_div", - "preattn_requant_div", - "normalizer_requant_div", "normalizer_requant_div", "postattn_requant_div", - "postattn_requant_div", "wo_requant_div", "wq_requant_div", "wk_requant_div", @@ -1449,7 +1446,8 @@ def parseNode(self, node: gs.Node) -> (bool): for attr in log2Attrs: value = self.operatorRepresentation[attr] - assert isinstance(value, int) + assert isinstance( + value, int) and value > 0, f"Attribute {attr} must be a positive integer. Received value {value}" self.operatorRepresentation[attr] = int(math.log2(value)) return ret From fe3bce631409a22b683b54a270868d5d67bf5bd9 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 29 Oct 2025 13:35:48 +0100 Subject: [PATCH 55/80] Rename ref in GemmTemplate and check for batching --- .../Targets/Generic/Templates/GemmTemplate.py | 33 ++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/Deeploy/Targets/Generic/Templates/GemmTemplate.py b/Deeploy/Targets/Generic/Templates/GemmTemplate.py index 371004a8e7..4f42236780 100644 --- a/Deeploy/Targets/Generic/Templates/GemmTemplate.py +++ b/Deeploy/Targets/Generic/Templates/GemmTemplate.py @@ -40,17 +40,17 @@ def alignToContext(self, ctxt: NetworkContext, referenceTemplate = _GemmTemplate(""" // GEMM (Name: ${nodeName}, Op: ${nodeOp}) BEGIN_SINGLE_CORE - ${A_type.typeName} ref_${data_out}_${A} = ${A}; - ${B_type.typeName} ref_${data_out}_${B} = ${B}; - ${C_type.typeName} ref_${data_out}_${C} = ${C}; - ${data_out_type.typeName} ref_${data_out}_${data_out} = ${data_out}; + ${A_type.typeName} ref_${nodeName}_${A} = ${A}; + ${B_type.typeName} ref_${nodeName}_${B} = ${B}; + ${C_type.typeName} ref_${nodeName}_${C} = ${C}; + ${data_out_type.typeName} ref_${nodeName}_${data_out} = ${data_out}; for(uint32_t i=0;i<${batch};i++){ Gemm_s${A_type.referencedType.typeWidth}_s${B_type.referencedType.typeWidth}_s${C_type.referencedType.typeWidth}_s${data_out_type.referencedType.typeWidth}( - ref_${data_out}_${A}, - ref_${data_out}_${B}, - ref_${data_out}_${C}, - ref_${data_out}_${data_out}, + ref_${nodeName}_${A}, + ref_${nodeName}_${B}, + ref_${nodeName}_${C}, + ref_${nodeName}_${data_out}, ${M}, ${N}, ${O}, @@ -64,10 +64,19 @@ def alignToContext(self, ctxt: NetworkContext, ${Y_offset} ); - ref_${data_out}_${A} += ${M} * ${N}; - ref_${data_out}_${B} += ${N} * ${O}; - ref_${data_out}_${C} += ${M} * ${O}; - ref_${data_out}_${data_out} += ${M} * ${O}; + % if A_batched: + ref_${nodeName}_${A} += ${M} * ${N}; + % endif + + % if B_batched: + ref_${nodeName}_${B} += ${N} * ${O}; + % endif + + % if C_batched: + ref_${nodeName}_${C} += ${M} * ${O}; + % endif + + ref_${nodeName}_${data_out} += ${M} * ${O}; } END_SINGLE_CORE """) From bb88795783c5588b49a34b46cb9069acbd8e96c3 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 29 Oct 2025 13:33:30 +0100 Subject: [PATCH 56/80] Rename ref in FloatGemmTemplate --- .../Generic/Templates/FloatGemmTemplate.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py index ab78e742d0..30ae6a3177 100644 --- a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py +++ b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py @@ -7,17 +7,17 @@ referenceTemplate = NodeTemplate(""" // GEMM (Name: ${nodeName}, Op: ${nodeOp}) BEGIN_SINGLE_CORE - ${A_type.typeName} ref_${data_out}_${A} = ${A}; - ${B_type.typeName} ref_${data_out}_${B} = ${B}; - ${C_type.typeName} ref_${data_out}_${C} = ${C}; - ${data_out_type.typeName} ref_${data_out}_${data_out} = ${data_out}; + ${A_type.typeName} ref_${nodeName}_${A} = ${A}; + ${B_type.typeName} ref_${nodeName}_${B} = ${B}; + ${C_type.typeName} ref_${nodeName}_${C} = ${C}; + ${data_out_type.typeName} ref_${nodeName}_${data_out} = ${data_out}; for(uint32_t i=0; i<${batch}; i++){ Gemm_fp${A_type.referencedType.typeWidth}_fp${B_type.referencedType.typeWidth}_fp${C_type.referencedType.typeWidth}_fp${data_out_type.referencedType.typeWidth}( - ref_${data_out}_${A}, - ref_${data_out}_${B}, - ref_${data_out}_${C}, - ref_${data_out}_${data_out}, + ref_${nodeName}_${A}, + ref_${nodeName}_${B}, + ref_${nodeName}_${C}, + ref_${nodeName}_${data_out}, ${M}, ${N}, ${O}, @@ -26,18 +26,18 @@ ); % if A_batched: - ref_${data_out}_${A} += ${M} * ${N}; + ref_${nodeName}_${A} += ${M} * ${N}; % endif % if B_batched: - ref_${data_out}_${B} += ${N} * ${O}; + ref_${nodeName}_${B} += ${N} * ${O}; % endif % if C_batched: - ref_${data_out}_${C} += ${M} * ${O}; + ref_${nodeName}_${C} += ${M} * ${O}; % endif - ref_${data_out}_${data_out} += ${M} * ${O}; + ref_${nodeName}_${data_out} += ${M} * ${O}; } END_SINGLE_CORE """) \ No newline at end of file From d078a3c36d2ab5d4f2f44055f5e30aad0adea65d Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Tue, 28 Oct 2025 17:50:09 +0100 Subject: [PATCH 57/80] Add min, max for single-item numpy numbers --- Deeploy/AbstractDataTypes.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Deeploy/AbstractDataTypes.py b/Deeploy/AbstractDataTypes.py index feeebe939b..0e8d4a0715 100644 --- a/Deeploy/AbstractDataTypes.py +++ b/Deeploy/AbstractDataTypes.py @@ -206,12 +206,20 @@ def checkValue(cls, value: Union[int, Iterable[int], np.ndarray], ctxt: Optional if isinstance(value, int): _max, _min = (value, value) + elif isinstance(value, np.number): + value = value.item() + if isinstance(value, float): + assert value.is_integer(), f"Floating-point value {value} is not an integer." + value = int(value) + _max, _min = (value, value) elif isinstance(value, np.ndarray): _max = value.max() _min = value.min() elif isinstance(value, Iterable): _max = max(value) _min = min(value) + else: + raise ValueError(f"Unsupported value of type {type(value)} with value {value}") if _max > cls.typeMax: return False From 8e81f94d274906539946b51157c94f9ec577e3cc Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 29 Oct 2025 13:58:05 +0100 Subject: [PATCH 58/80] Make SignPropTypeChecker an abstract class and refactor --- .../TypeCheckers/SignPropTypeChecker.py | 59 +++++++++---------- Deeploy/Targets/Generic/TypeCheckers.py | 4 ++ 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/Deeploy/CommonExtensions/TypeCheckers/SignPropTypeChecker.py b/Deeploy/CommonExtensions/TypeCheckers/SignPropTypeChecker.py index c70628729b..2e7eafa664 100644 --- a/Deeploy/CommonExtensions/TypeCheckers/SignPropTypeChecker.py +++ b/Deeploy/CommonExtensions/TypeCheckers/SignPropTypeChecker.py @@ -2,7 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import List, Optional +from abc import ABC, abstractmethod +from typing import List import onnx_graphsurgeon as gs @@ -11,27 +12,30 @@ from Deeploy.Logging import DEFAULT_LOGGER as log -class SignPropTypeChecker(NodeTypeChecker): +class SignPropTypeChecker(NodeTypeChecker, ABC): + @abstractmethod def _inferNumLevels(self, inputs: List[VariableBuffer], - operatorRepresentation: OperatorRepresentation) -> Optional[List[int]]: - return None + operatorRepresentation: OperatorRepresentation) -> List[int]: + pass + @abstractmethod def _inferSignedness(self, inputs: List[VariableBuffer], - operatorRepresentation: OperatorRepresentation) -> Optional[List[int]]: - return None + operatorRepresentation: OperatorRepresentation) -> List[bool]: + pass def typeInferGlobalCtxt(self, ctxt: NetworkContext, node: gs.Node) -> NetworkContext: ctxt = super().typeInferGlobalCtxt(ctxt, node) - for inputNode, _type in zip(node.inputs, self.input_types): - if isinstance(ctxt.lookup(inputNode.name), ConstantBuffer): - reference = ctxt.lookup(inputNode.name) - if not _type.referencedType.checkPromotion(reference.values): - raise Exception(f"Can't cast {reference} to {_type}!") - - reference.nLevels = reference.values.max() - reference.values.min() - reference._signed = _type.referencedType.typeMin < 0 + for tensor, _type in zip(node.inputs, self.input_types): + buffer = ctxt.lookup(tensor.name) + if isinstance(buffer, ConstantBuffer): + refTy = _type.referencedType + assert issubclass(refTy, IntegerImmediate) + if not refTy.checkPromotion(buffer.values): + raise ValueError(f"Can't cast {buffer} to {refTy}!") + buffer.nLevels = buffer.values.max() - buffer.values.min() + buffer._signed = refTy.typeMin < 0 return ctxt @@ -42,21 +46,16 @@ def typeInferOutput(self, ctxt: NetworkContext, node: gs.Node, inputs = [ctxt.lookup(inputNode.name) for inputNode in node.inputs] outputs = [ctxt.lookup(outputNode.name) for outputNode in node.outputs] - signProp = all([hasattr(_input, "_signed") and hasattr(_input, "nLevels") for _input in inputs]) - - if signProp: - nLevels = self._inferNumLevels(inputs, operatorRepresentation) - signedness = self._inferSignedness(inputs, operatorRepresentation) - - if nLevels is None or signedness is None: - return ctxt - for obj, nLevel, sign in zip(outputs, nLevels, signedness): - obj.nLevels = nLevel - obj._signed = sign - - if issubclass(obj._type.referencedType, IntegerImmediate) and not obj._type.fitsNumLevels(nLevel): - log.warning( - f"{obj.name} has {nLevel} levels, but {obj._type.referencedType.typeName} only supports {obj._type.referencedType.nLevels} levels." - ) + nLevels = self._inferNumLevels(inputs, operatorRepresentation) + signedness = self._inferSignedness(inputs, operatorRepresentation) + + for obj, nLevels, sign in zip(outputs, nLevels, signedness): + assert isinstance(obj, VariableBuffer) + obj.nLevels = nLevels + obj._signed = sign + refTy = obj._type.referencedType + if issubclass(refTy, IntegerImmediate) and not refTy.fitsNumLevels(nLevels): + log.warning( + f"{obj.name} has {nLevels} levels, but {refTy.typeName} only supports {refTy.nLevels} levels.") return ctxt diff --git a/Deeploy/Targets/Generic/TypeCheckers.py b/Deeploy/Targets/Generic/TypeCheckers.py index 1907a0aea0..2fa2a06d17 100644 --- a/Deeploy/Targets/Generic/TypeCheckers.py +++ b/Deeploy/Targets/Generic/TypeCheckers.py @@ -491,6 +491,10 @@ class DummyChecker(SignPropTypeChecker): def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]): super().__init__(input_types, output_types) + def _inferSignedness(self, inputs: List[VariableBuffer], + operatorRepresentation: OperatorRepresentation) -> List[int]: + return [] + def _inferNumLevels(self, inputs: List[VariableBuffer], operatorRepresentation: OperatorRepresentation) -> List[int]: return [2**(self.input_types[0].referencedType.typeWidth)] From b6ed382831587b3ff22afa5c40598a8d460e0e43 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Tue, 30 Sep 2025 22:55:53 +0200 Subject: [PATCH 59/80] DeeployTypes small refactors --- Deeploy/DeeployTypes.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index 6b63697d2f..e926e36043 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -252,8 +252,8 @@ def __init__(self, name: str = '', shape = [1], aliases: Optional[List[str]] = N self._live: bool = False #: bool: DO NOT OVERRIDE - this variable is true if a previous Memory allocation pass has allocated the buffer, and false if this buffer has been deallocated or has not been allocated yet. self._deploy: bool = True #: bool: MAY OVERRIDE - this variable is a global switch to deactivate the buffer for all purposes without deleting it outright. - self._signed = None - self.nLevels = None + self._signed: bool = None + self.nLevels: int = None self.is_input: bool = False self.is_output: bool = False @@ -1010,9 +1010,10 @@ def annotateType(self, name: str, _type: Type[Pointer]): VariableBuffer with """ - obj = self.lookup(name) - obj._type = _type - obj._instance = _type(name, ctxt = self) + buffer = self.lookup(name) + assert isinstance(buffer, VariableBuffer) + buffer._type = _type + buffer._instance = _type(name, ctxt = self) def copy(self) -> NetworkContext: """Return a shallow copy of this NetworkContext @@ -1462,14 +1463,12 @@ def typeCheckNodeInputs(self, ctxt: NetworkContext, node: gs.Node) -> bool: return retCheck def typeInferGlobalCtxt(self, ctxt: NetworkContext, node: gs.Node) -> NetworkContext: - for inputNode, _type in zip(node.inputs, self.input_types): - if isinstance(ctxt.lookup(inputNode.name), ConstantBuffer): - reference = ctxt.lookup(inputNode.name) - if not _type.referencedType.checkPromotion(reference.values): - raise Exception(f"Can't cast {reference} to {_type}!") - - ctxt.annotateType(inputNode.name, _type) - + for tensor, ty in zip(node.inputs, self.input_types): + buffer = ctxt.lookup(tensor.name) + if isinstance(buffer, ConstantBuffer): + if not ty.referencedType.checkPromotion(buffer.values): + raise Exception(f"Can't cast {buffer} to {ty}!") + ctxt.annotateType(tensor.name, ty) return ctxt def annotateDict(self, ctxt: NetworkContext, node: gs.Node, operatorRepresentation: OperatorRepresentation): From 77f3339c0d97ba3d9f2ea034ca2d635c4182c5d0 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 29 Oct 2025 14:07:52 +0100 Subject: [PATCH 60/80] Move condition checking for PULPMatrixVecParser and PULPTallGemmParser to parseNode from parseNodeCtxt --- Deeploy/Targets/PULPOpen/Parsers.py | 43 ++++++----------------------- 1 file changed, 9 insertions(+), 34 deletions(-) diff --git a/Deeploy/Targets/PULPOpen/Parsers.py b/Deeploy/Targets/PULPOpen/Parsers.py index 51b26ae546..b28fb86a28 100644 --- a/Deeploy/Targets/PULPOpen/Parsers.py +++ b/Deeploy/Targets/PULPOpen/Parsers.py @@ -342,41 +342,16 @@ def parseNodeCtxt(self, class PULPMatrixVecParser(PULPGEMMParser): - def parseNodeCtxt(self, - ctxt: NetworkContext, - node: gs.Node, - channels_first: bool = True) -> Tuple[NetworkContext, bool]: - - newCtxt, ret = super().parseNodeCtxt(ctxt, node, channels_first) - - if not ret: - return ctxt, False - - if not (self.operatorRepresentation['M'] == 1 and self.operatorRepresentation['batch'] >= 8): - return ctxt, False - - return newCtxt, True + def parseNode(self, node: gs.Node) -> bool: + M = node.inputs[0].shape[-1 if node.attrs["transA"] else -2] + batch = math.prod(node.inputs[0].shape[:-2]) + return super().parseNode(node) and M == 1 and batch >= 8 class PULPTallGEMMParser(PULPGEMMParser): - def parseNodeCtxt(self, - ctxt: NetworkContext, - node: gs.Node, - channels_first: bool = True) -> Tuple[NetworkContext, bool]: - - newCtxt, ret = super().parseNodeCtxt(ctxt, node, channels_first) - - if not ret: - return ctxt, False - - ret = all([ - self.operatorRepresentation['batch'] < 8, - self.operatorRepresentation['M'] >= 8, - self.operatorRepresentation['M'] % 8 < self.operatorRepresentation['O'] % 8, - ]) - - if not ret: - return ctxt, False - - return newCtxt, True + def parseNode(self, node: gs.Node) -> bool: + M = node.inputs[0].shape[-1 if node.attrs["transA"] else -2] + N = node.inputs[1].shape[-2 if node.attrs["transB"] else -1] + batch = math.prod(node.inputs[0].shape[:-2]) + return super().parseNode(node) and M >= 8 and (M % 8) < (N % 8) and batch < 8 From bc1f46fca223687ac4225ab7bc45dbceb84ae173 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 29 Oct 2025 14:04:08 +0100 Subject: [PATCH 61/80] Add node name and op to comment --- Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py b/Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py index 1f7149e1e8..4140101bd3 100644 --- a/Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py +++ b/Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py @@ -41,7 +41,7 @@ def alignToContext(self, ctxt: NetworkContext, else: signatureString += '_u8' %> -// PULP NN GEMM +// PULP NN GEMM (Name: ${nodeName}, Op: ${nodeOp}) int8_t* ref_${data_out}_${A} = ${A}; int8_t* ref_${data_out}_${B} = ${B}; int8_t* ref_${data_out}_${data_out} = ${data_out}; From 631466528db2d549486f88059adf086740e3c87f Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 1 Oct 2025 10:14:30 +0200 Subject: [PATCH 62/80] Fix wrong formatting of integer arrays and refactor test io generation --- DeeployTest/testUtils/codeGenerate.py | 108 +++++++++++++++----------- 1 file changed, 62 insertions(+), 46 deletions(-) diff --git a/DeeployTest/testUtils/codeGenerate.py b/DeeployTest/testUtils/codeGenerate.py index 878bc42014..d5c2e24cc0 100644 --- a/DeeployTest/testUtils/codeGenerate.py +++ b/DeeployTest/testUtils/codeGenerate.py @@ -2,11 +2,13 @@ # # SPDX-License-Identifier: Apache-2.0 +import math import os from typing import List, Tuple import numpy as np +from Deeploy.AbstractDataTypes import FloatImmediate, IntegerImmediate from Deeploy.DeeployTypes import CodeGenVerbosity, ConstantBuffer, NetworkDeployer, VariableBuffer from Deeploy.Targets.MemPool.Platform import MemPoolPlatform from Deeploy.Targets.PULPOpen.Platform import MemoryPULPPlatform, MemoryPULPPlatformWrapper, PULPPlatform @@ -30,6 +32,45 @@ def _shapeBroadcast(ctxt, value, name): return broadcastNum +def generateArray(name: str, buffer: VariableBuffer, values: np.ndarray) -> str: + assert math.prod(buffer.shape) == math.prod( + values.shape + ), f"Buffer size ({math.prod(buffer.shape)}) and values size ({math.prod(values.shape)}) are not equal." + refTy = buffer._type.referencedType + + if issubclass(refTy, FloatImmediate): + if refTy.typeWidth == 32: + suffix = "f" + elif refTy.typeWidth == 64: + suffix = "" + else: + raise RuntimeError( + f"Unimplemented floating-poing literal suffix for type {refTy.typeName} of typeWidth {refTy.typeWidth}") + + def formatFloat(x: float, suffix: str = "") -> str: + if np.isinf(x) or np.isnan(x): + return str(x) + else: + return str(x) + suffix + + list_str = ",".join(formatFloat(x) for x in values.flatten()) + elif issubclass(refTy, IntegerImmediate): + suffix = "u" if refTy.typeMin >= 0 else "" + suffix += "l" if refTy.typeWidth >= 64 else "" + list_str = ",".join(str(int(x)) + suffix for x in values) + else: + list_str = ",".join(str(x) for x in values) + + # WIESEP: Arrays have to be 4 byte aligned (at least in banshee) + total_bytes = (values.size * refTy.typeWidth) // 8 + pad_bytes = (-total_bytes) % 4 + if pad_bytes: + paddingElements = (pad_bytes * 8 + refTy.typeWidth - 1) // refTy.typeWidth + list_str += ", " + (", ").join("0" for _ in range(paddingElements)) + + return f"{refTy.typeName} {name}[] = {{ {list_str} }};\n" + + def generateTestInputsHeader(deployer: NetworkDeployer, test_inputs: List) -> str: vectors = [] retStr = "" @@ -44,69 +85,44 @@ def generateTestInputsHeader(deployer: NetworkDeployer, test_inputs: List) -> st if not deployer.ctxt.is_buffer(bufferName): continue - values = _shapeBroadcast(deployer.ctxt, values, bufferName) - buffer = deployer.ctxt.lookup(bufferName) - typeName = buffer._type.referencedType.typeName - typeWidth = buffer._type.referencedType.typeWidth + assert isinstance(buffer, VariableBuffer) + + bufferSize = math.prod(buffer.shape) + valuesSize = math.prod(values.shape) + assert bufferSize % valuesSize == 0, \ + f"Values shape {values.shape} of size {valuesSize} cannot be repeated into buffer of shape {buffer.shape} and size {bufferSize}." + repeat = bufferSize // valuesSize + values = np.tile(values, repeat) vectorName = f"testInputVector{index}" + retStr += generateArray(vectorName, buffer, values) vectors.append(vectorName) - retStr += f"{typeName} {vectorName}[] =" - retStr += "{" - if typeName == 'float32_t': - list_str = (", ").join([f'{x}f' if not (np.isinf(x) or np.isnan(x)) else str(x) for x in values]) - else: - list_str = (", ").join([str(x) for x in values]) - - # WIESEP: Arrays have to be 4 byte aligned (at least in banshee) - total_bytes = (values.size * typeWidth) // 8 - pad_bytes = (-total_bytes) % 4 - if pad_bytes: - paddingElements = (pad_bytes * 8 + typeWidth - 1) // typeWidth - list_str += ", " + (", ").join("0" for _ in range(paddingElements)) - - retStr += list_str - retStr += "};\n" - retStr += f"void* testInputVector[{len(vectors)}] = {{" - retStr += ", ".join(vectors) + retStr += ",".join(vectors) retStr += "};\n" return retStr def generateTestOutputsHeader(deployer: NetworkDeployer, test_outputs: List[np.ndarray]) -> str: + vectors = [] retStr = "" for index, values in enumerate(test_outputs): - typeName = deployer.ctxt.lookup(f'output_{index}')._type.referencedType.typeName - typeWidth = deployer.ctxt.lookup(f'output_{index}')._type.referencedType.typeWidth + buffer = deployer.ctxt.lookup(f"output_{index}") + assert isinstance(buffer, VariableBuffer) + refTy = buffer._type.referencedType - retStr += f"#define OUTPUTTYPE {typeName}\n" - retStr += f"#define ISOUTPUTFLOAT {int(typeName == 'float32_t')}\n" - retStr += f"{typeName} testOutputVector{index}[] =" - retStr += "{" + retStr += f"#define OUTPUTTYPE {refTy.typeName}\n" + retStr += f"#define ISOUTPUTFLOAT {int(refTy.typeName == 'float32_t')}\n" - values = values.flatten() - - if typeName == "float32_t": - list_str = (", ").join([f'{x}f' if not (np.isinf(x) or np.isnan(x)) else str(x) for x in values]) - else: - list_str = (", ").join([str(x) for x in values]) - - # WIESEP: Arrays have to be 4 byte aligned (at least in banshee) - total_bytes = (len(values) * typeWidth) // 8 - pad_bytes = (-total_bytes) % 4 - if pad_bytes: - paddingElements = (pad_bytes * 8 + typeWidth - 1) // typeWidth - list_str += ", " + (", ").join("0" for _ in range(paddingElements)) - - retStr += list_str - retStr += "};\n" + vectorName = f"testOutputVector{index}" + retStr += generateArray(vectorName, buffer, values) + vectors.append(vectorName) - retStr += f"void* testOutputVector[{len(test_outputs)}] = " + "{" - retStr += ", ".join([f"testOutputVector{idx}" for idx, _ in enumerate(test_outputs)]) + retStr += f"void* testOutputVector[{len(vectors)}] = {{" + retStr += ",".join(vectors) retStr += "};\n" return retStr From 38de2389aaef1347269f19144084872440e9b3d6 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 29 Oct 2025 11:45:49 +0100 Subject: [PATCH 63/80] Move iNoNorm from Generic to Snitch since it's only used there --- Deeploy/Targets/Generic/Layers.py | 19 +--------- Deeploy/Targets/Generic/TypeCheckers.py | 17 --------- Deeploy/Targets/Snitch/Bindings.py | 7 ++-- Deeploy/Targets/Snitch/Layers.py | 24 +++++++++++++ Deeploy/Targets/Snitch/Parsers.py | 36 ++++++++++++++++++- Deeploy/Targets/Snitch/Platform.py | 7 ++-- .../Templates/iNoNormTemplate.py | 11 ++---- Deeploy/Targets/Snitch/TypeCheckers.py | 25 +++++++++++++ 8 files changed, 95 insertions(+), 51 deletions(-) create mode 100644 Deeploy/Targets/Snitch/Layers.py rename Deeploy/Targets/{Generic => Snitch}/Templates/iNoNormTemplate.py (62%) create mode 100644 Deeploy/Targets/Snitch/TypeCheckers.py diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index c924895c13..97e833f489 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -7,7 +7,7 @@ import numpy as np -from Deeploy.DeeployTypes import NodeMapper, ONNXLayer, OperatorRepresentation, Shape +from Deeploy.DeeployTypes import NodeMapper, ONNXLayer, Shape class ConcatLayer(ONNXLayer): @@ -64,23 +64,6 @@ def __init__(self, maps: List[NodeMapper]): super().__init__(maps) -class iNoNormLayer(ONNXLayer): - - def __init__(self, maps: List[NodeMapper]): - super().__init__(maps) - - def computeOps(self): - return self.mapper.parser.operatorRepresentation['size'] * 4 # 2 mul, 1 add, 1 right shift - - def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorRepresentation: OperatorRepresentation, - channels_first: bool) -> Tuple[Shape]: - - # JUNGVI: Broadcast the weights and bias to have as many dimensions as the inputs - inputShapes[1] = [1] * (len(inputShapes[0]) - len(inputShapes[1])) + list(inputShapes[1]) - inputShapes[2] = inputShapes[1] - return (inputShapes, outputShapes) - - class RQSiGELULayer(GELULayer): def __init__(self, maps: List[NodeMapper]): diff --git a/Deeploy/Targets/Generic/TypeCheckers.py b/Deeploy/Targets/Generic/TypeCheckers.py index 2fa2a06d17..2e81f259f3 100644 --- a/Deeploy/Targets/Generic/TypeCheckers.py +++ b/Deeploy/Targets/Generic/TypeCheckers.py @@ -366,23 +366,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer], return [False] -class iNoNormChecker(SignPropTypeChecker): - - def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]): - super().__init__(input_types, output_types) - - def _inferNumLevels(self, inputs: List[VariableBuffer], - operatorRepresentation: OperatorRepresentation) -> List[int]: - return [2**(4 * self.input_types[0].referencedType.typeWidth)] - - def _inferSignedness(self, inputs: List[VariableBuffer], - operatorRepresentation: OperatorRepresentation) -> List[bool]: - if inputs[0]._signed: - return [True] - else: - return [False] - - class GELUChecker(SignPropTypeChecker): def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]): diff --git a/Deeploy/Targets/Snitch/Bindings.py b/Deeploy/Targets/Snitch/Bindings.py index e9be18a535..1d1af32a36 100644 --- a/Deeploy/Targets/Snitch/Bindings.py +++ b/Deeploy/Targets/Snitch/Bindings.py @@ -11,15 +11,16 @@ from Deeploy.CommonExtensions.DataTypes import float32_t, int8_t, int32_t, uint8_t from Deeploy.DeeployTypes import CodeTransformation, NodeBinding from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration -from Deeploy.Targets.Generic.Templates import iNoNormTemplate -from Deeploy.Targets.Generic.TypeCheckers import AddChecker, GEMMChecker, RQAddChecker, SoftmaxChecker, iNoNormChecker +from Deeploy.Targets.Generic.TypeCheckers import AddChecker, GEMMChecker, RQAddChecker, SoftmaxChecker from Deeploy.Targets.Snitch.CodeTransformationPasses import SnitchClusterTiling, SnitchCoreFilterPass, \ SnitchProfileExecutionBlockPass, SnitchSynchCoresPass from Deeploy.Targets.Snitch.DMA.SnitchDma import SnitchDma -from Deeploy.Targets.Snitch.Templates import AddTemplate, FloatGemmTemplate, RQAddTemplate, iSoftmaxTemplate +from Deeploy.Targets.Snitch.Templates import AddTemplate, FloatGemmTemplate, RQAddTemplate, iNoNormTemplate, \ + iSoftmaxTemplate from Deeploy.Targets.Snitch.Templates.FloatSoftmaxTemplate import FloatSoftmax_Template from Deeploy.Targets.Snitch.Templates.GemmTemplate import SnitchGemm_Template from Deeploy.Targets.Snitch.Templates.RqGemmTemplate import SnitchRqGemm_Template +from Deeploy.Targets.Snitch.TypeCheckers import iNoNormChecker from Deeploy.TilingExtension.CodeTransformationPasses.TilingVariableReplacement import TilingVariableReplacement, \ TilingVariableReplacementUpdate diff --git a/Deeploy/Targets/Snitch/Layers.py b/Deeploy/Targets/Snitch/Layers.py new file mode 100644 index 0000000000..017d279c38 --- /dev/null +++ b/Deeploy/Targets/Snitch/Layers.py @@ -0,0 +1,24 @@ +# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from typing import List, Tuple + +import numpy as np + +from Deeploy.DeeployTypes import NodeMapper, ONNXLayer, OperatorRepresentation, Shape + + +class iNoNormLayer(ONNXLayer): + + def __init__(self, maps: List[NodeMapper]): + super().__init__(maps) + + def computeOps(self): + return self.mapper.parser.operatorRepresentation['size'] * 4 # 2 mul, 1 add, 1 right shift + + def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorRepresentation: OperatorRepresentation, + channels_first: bool) -> Tuple[Shape]: + # JUNGVI: Broadcast the weights and bias to have as many dimensions as the inputs + shape = np.broadcast_shapes(*inputShapes) + return ([shape] * len(inputShapes), outputShapes) diff --git a/Deeploy/Targets/Snitch/Parsers.py b/Deeploy/Targets/Snitch/Parsers.py index 51b32db210..6f70156096 100644 --- a/Deeploy/Targets/Snitch/Parsers.py +++ b/Deeploy/Targets/Snitch/Parsers.py @@ -2,11 +2,12 @@ # # SPDX-License-Identifier: Apache-2.0 +import math from typing import Tuple import onnx_graphsurgeon as gs -from Deeploy.DeeployTypes import NetworkContext +from Deeploy.DeeployTypes import NetworkContext, NodeParser from Deeploy.Targets.Generic.Parsers import GEMMParser, RQGEMMParser @@ -68,3 +69,36 @@ def parseNodeCtxt(self, return ctxt, False return newCtxt, True + + +class iNoNormParser(NodeParser): + + def __init__(self): + super().__init__() + + def parseNode(self, node: gs.Node) -> bool: + + ret = all(['D' in node.attrs, 'mul' in node.attrs, 'n_levels' in node.attrs]) + + if ret: + self.operatorRepresentation.update(node.attrs) + self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['D'])) + + return ret + + def parseNodeCtxt(self, + ctxt: NetworkContext, + node: gs.Node, + channels_first: bool = True) -> Tuple[NetworkContext, bool]: + + data_in = ctxt.lookup(node.inputs[0].name) + weights = ctxt.lookup(node.inputs[1].name) + bias = ctxt.lookup(node.inputs[2].name) + data_out = ctxt.lookup(node.outputs[0].name) + self.operatorRepresentation['data_in'] = data_in.name + self.operatorRepresentation['weights'] = weights.name + self.operatorRepresentation['bias'] = bias.name + self.operatorRepresentation['data_out'] = data_out.name + self.operatorRepresentation['size'] = math.prod(data_in.shape) + + return ctxt, True diff --git a/Deeploy/Targets/Snitch/Platform.py b/Deeploy/Targets/Snitch/Platform.py index d62d1c3802..bb570b588f 100644 --- a/Deeploy/Targets/Snitch/Platform.py +++ b/Deeploy/Targets/Snitch/Platform.py @@ -11,15 +11,16 @@ from Deeploy.Targets.Generic.Bindings import BasicGatherBindings, BasicLayerNormBindings, BasicMatMulBindings, \ BasicPad1DBindings, BasicPad2DBindings, BasicReshapeBindings, BasicRQIntegerDivBinding from Deeploy.Targets.Generic.Layers import AddLayer, GatherLayer, GEMMLayer, LayerNormLayer, MatMulLayer, PadLayer, \ - ReshapeLayer, RQGEMMLayer, RQIntegerDivLayer, SoftmaxLayer, iNoNormLayer + ReshapeLayer, RQGEMMLayer, RQIntegerDivLayer, SoftmaxLayer from Deeploy.Targets.Generic.Parsers import AddParser, GatherParser, MatMulParser, Pad1DParser, Pad2DParser, \ - RQAddParser, RQIntegerDivParser, SoftmaxParser, UnsqueezeParser, iLayerNormParser, iNoNormParser, iSoftmaxParser + RQAddParser, RQIntegerDivParser, SoftmaxParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate as BasicAllocateTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import AddRequantMergePass, GEMMRequantMergePass, \ IntegerDivRequantMergePass, MergeConstAddAndRequantPass, MergeTrueIntegerDivRequantShiftPass, RQSSplitPass, \ SkipEmptyConcatPass, SkipUnityRequantPass, iGELURequantMergePass, iHardswishRequantMergePass from Deeploy.Targets.PULPOpen.Platform import RQAddMapper -from Deeploy.Targets.Snitch.Parsers import SnitchGEMMParser, SnitchRQGEMMParser +from Deeploy.Targets.Snitch.Layers import iNoNormLayer +from Deeploy.Targets.Snitch.Parsers import SnitchGEMMParser, SnitchRQGEMMParser, iNoNormParser from Deeploy.Targets.Snitch.Templates import AllocateTemplate, FreeTemplate from Deeploy.Targets.Snitch.Tiler import SnitchAddTileReadyBindings, SnitchGemmTilingReadyBindings, \ SnitchiNoNormTilingReadyBindings, SnitchiSoftmaxTilingReadyBindings, SnitchRQAddTilingReadyBindings, \ diff --git a/Deeploy/Targets/Generic/Templates/iNoNormTemplate.py b/Deeploy/Targets/Snitch/Templates/iNoNormTemplate.py similarity index 62% rename from Deeploy/Targets/Generic/Templates/iNoNormTemplate.py rename to Deeploy/Targets/Snitch/Templates/iNoNormTemplate.py index 562b3168a9..f99ffba3dc 100644 --- a/Deeploy/Targets/Generic/Templates/iNoNormTemplate.py +++ b/Deeploy/Targets/Snitch/Templates/iNoNormTemplate.py @@ -2,16 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 -from Deeploy.DeeployTypes import NodeTemplate +from Deeploy.CommonExtensions.NodeTemplate import ElementwiseTemplate - -class _iNoNormTemplate(NodeTemplate): - - def __init__(self, templateStr): - super().__init__(templateStr) - - -referenceTemplate = _iNoNormTemplate(""" +referenceTemplate = ElementwiseTemplate(""" // iNoNorm (Name: ${nodeName}, Op: ${nodeOp}) SnitchiNoNorm_s${data_in_type.referencedType.typeWidth}_s${data_out_type.referencedType.typeWidth}(${data_in}, ${data_out}, ${weights}, ${bias}, ${size}, ${mul}, ${log2D}); """) diff --git a/Deeploy/Targets/Snitch/TypeCheckers.py b/Deeploy/Targets/Snitch/TypeCheckers.py new file mode 100644 index 0000000000..09ef3bc3c5 --- /dev/null +++ b/Deeploy/Targets/Snitch/TypeCheckers.py @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from typing import List, Sequence, Type + +from Deeploy.CommonExtensions.TypeCheckers.SignPropTypeChecker import SignPropTypeChecker +from Deeploy.DeeployTypes import OperatorRepresentation, Pointer, VariableBuffer + + +class iNoNormChecker(SignPropTypeChecker): + + def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]): + super().__init__(input_types, output_types) + + def _inferNumLevels(self, inputs: List[VariableBuffer], + operatorRepresentation: OperatorRepresentation) -> List[int]: + return [2**(4 * self.input_types[0].referencedType.typeWidth)] + + def _inferSignedness(self, inputs: List[VariableBuffer], + operatorRepresentation: OperatorRepresentation) -> List[bool]: + if inputs[0]._signed: + return [True] + else: + return [False] From c5a5cfd00854a51159759dc30807ddb5384e242b Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 1 Oct 2025 10:29:35 +0200 Subject: [PATCH 64/80] Fix wrong path to generated sources --- DeeployTest/Platforms/Generic/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DeeployTest/Platforms/Generic/CMakeLists.txt b/DeeployTest/Platforms/Generic/CMakeLists.txt index f97f1cdf1b..b2e68b257e 100644 --- a/DeeployTest/Platforms/Generic/CMakeLists.txt +++ b/DeeployTest/Platforms/Generic/CMakeLists.txt @@ -8,7 +8,7 @@ file(GLOB_RECURSE SOURCES main.c ) -link_directories(${ProjectId}/../../${GENERATED_SOURCE}) +link_directories(${GENERATED_SOURCE}) add_deeploy_executable(${ProjectId} EXCLUDE_FROM_ALL ${SOURCES} ) target_link_libraries(${ProjectId} PRIVATE network deeploylib) From 264cf2afdacc2c0cc97c9c7ccefd94caea3ed0f2 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Thu, 2 Oct 2025 14:36:05 +0200 Subject: [PATCH 65/80] Refactor merge_conv_rq_fun --- .../TopologyOptimizationPasses/Passes.py | 47 ++++++++++++------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/Deeploy/Targets/PULPOpen/TopologyOptimizationPasses/Passes.py b/Deeploy/Targets/PULPOpen/TopologyOptimizationPasses/Passes.py index 43d490e80b..9ea1eda573 100644 --- a/Deeploy/Targets/PULPOpen/TopologyOptimizationPasses/Passes.py +++ b/Deeploy/Targets/PULPOpen/TopologyOptimizationPasses/Passes.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import copy +import math from collections import OrderedDict import numpy as np @@ -164,23 +165,35 @@ def __init__(self): def _merge_conv_rq_fun(graph: gs.Graph, match: Match, name: str): - matched_nodes = [m for k, m in match.nodes_map.items()] - conv = matched_nodes[0] - rqs = matched_nodes[1] - - totalShift = int(np.log2(rqs.attrs['div'].values)) - - # Artifically add half the shift division value to implement rounding - rounding = 2**(totalShift - 1) if totalShift > 0 else 0 - - rqs.inputs[-1].values = copy.deepcopy(rqs.inputs[-1].values) + rounding - - _inputs = list(conv.inputs) + list(rqs.inputs[1:]) - - _outputs = rqs.outputs - - rqsConv = gs.Node(op = 'RequantizedConv', name = name, attrs = {**conv.attrs, **rqs.attrs, "shift": totalShift}) - graph.replaceInsertNode(_inputs, _outputs, rqsConv) + conv, rqs = list(match.nodes_map.values()) + + mul, add = rqs.inputs[1:] + + div_attr = rqs.attrs['div'] + if isinstance(div_attr, gs.Constant): + assert div_attr.values.size == 1 + div = div_attr.values.item() + elif isinstance(div_attr, int): + div = div_attr + elif isinstance(div_attr, float) and div_attr.is_integer(): + div = int(div_attr) + else: + raise ValueError(f"Cannot convert div to integer. Received {div_attr}") + shift = int(math.log2(div)) + # Artifically add half the division value as rounding + if shift > 0: + add.values += 2**(shift - 1) + + rqsConv = gs.Node( + op = 'RequantizedConv', + name = name, + attrs = { + **conv.attrs, + **rqs.attrs, + "shift": shift, + }, + ) + graph.replaceInsertNode(list(conv.inputs) + [mul, add], rqs.outputs, rqsConv) return graph From a3bdc515f04f9d8d957aa60b23068dc64b5db11b Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Fri, 17 Oct 2025 11:06:11 +0200 Subject: [PATCH 66/80] Fix flatten values before generating the array --- DeeployTest/testUtils/codeGenerate.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/DeeployTest/testUtils/codeGenerate.py b/DeeployTest/testUtils/codeGenerate.py index d5c2e24cc0..5a4774a447 100644 --- a/DeeployTest/testUtils/codeGenerate.py +++ b/DeeployTest/testUtils/codeGenerate.py @@ -33,11 +33,12 @@ def _shapeBroadcast(ctxt, value, name): def generateArray(name: str, buffer: VariableBuffer, values: np.ndarray) -> str: - assert math.prod(buffer.shape) == math.prod( - values.shape - ), f"Buffer size ({math.prod(buffer.shape)}) and values size ({math.prod(values.shape)}) are not equal." + assert math.prod(buffer.shape) == math.prod(values.shape), \ + f"Buffer size ({math.prod(buffer.shape)}) and values size ({math.prod(values.shape)}) are not equal." refTy = buffer._type.referencedType + values = values.flatten() + if issubclass(refTy, FloatImmediate): if refTy.typeWidth == 32: suffix = "f" @@ -53,7 +54,7 @@ def formatFloat(x: float, suffix: str = "") -> str: else: return str(x) + suffix - list_str = ",".join(formatFloat(x) for x in values.flatten()) + list_str = ",".join(formatFloat(x) for x in values) elif issubclass(refTy, IntegerImmediate): suffix = "u" if refTy.typeMin >= 0 else "" suffix += "l" if refTy.typeWidth >= 64 else "" From a3b86c188773eadd3ab4494d2194b1c24bc91c95 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 29 Oct 2025 12:09:04 +0100 Subject: [PATCH 67/80] Fix MaxPool parseNode --- Deeploy/Targets/Generic/Parsers.py | 74 +++++++++++++++++------------- 1 file changed, 41 insertions(+), 33 deletions(-) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index edbb2bc917..c1e28047c4 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -227,20 +227,25 @@ def __init__(self): super().__init__() def parseNode(self, node: gs.Node) -> bool: - ret = super().parseNode(node) - wellFormed = False - if ret: - pads = self.operatorRepresentation['pads'] - kernel_shape = self.operatorRepresentation['kernel_shape'] - strides = self.operatorRepresentation['strides'] - # 1D: pads should be length 2, kernel_shape length 1, strides length 1 - if len(pads) == 2 and len(kernel_shape) == 1 and len(strides) == 1: - wellFormed = True - self.operatorRepresentation['padding_y'] = int(pads[0]) - self.operatorRepresentation['padding_y_right'] = int(pads[1]) - self.operatorRepresentation['stride_y'] = int(strides[0]) - self.operatorRepresentation['dim_kernel_y'] = int(kernel_shape[0]) - return wellFormed + if not super().parseNode(node): + return False + + pads = self.operatorRepresentation['pads'] + kernel_shape = self.operatorRepresentation['kernel_shape'] + strides = self.operatorRepresentation['strides'] + + if not all([ + len(pads) == 2, + len(kernel_shape) == 1, + len(strides) == 1, + ]): + return False + + self.operatorRepresentation['padding_y'] = pads[0] + self.operatorRepresentation['padding_y_right'] = pads[1] + self.operatorRepresentation['stride_y'] = strides[0] + self.operatorRepresentation['dim_kernel_y'] = kernel_shape[0] + return True def parseNodeCtxt(self, ctxt, node, channels_first = True): newCtxt, ret = super().parseNodeCtxt(ctxt, node, channels_first) @@ -269,28 +274,31 @@ def __init__(self): super().__init__() def parseNode(self, node: gs.Node) -> bool: + if not super().parseNode(node): + return False - ret = super().parseNode(node) - wellFormed = False - if ret: - pads = self.operatorRepresentation['pads'] - kernel_shape = self.operatorRepresentation['kernel_shape'] - strides = self.operatorRepresentation['strides'] - if len(pads) == 4 and len(kernel_shape) == 2 and len(strides) == 2: - wellFormed = True + pads = self.operatorRepresentation['pads'] + kernel_shape = self.operatorRepresentation['kernel_shape'] + strides = self.operatorRepresentation['strides'] - self.operatorRepresentation['padding_x'] = int(self.operatorRepresentation['pads'][0]) - self.operatorRepresentation['padding_y'] = int(self.operatorRepresentation['pads'][1]) - self.operatorRepresentation['padding_x_left'] = int(self.operatorRepresentation['pads'][0]) - self.operatorRepresentation['padding_y_top'] = int(self.operatorRepresentation['pads'][1]) - self.operatorRepresentation['padding_x_right'] = int(self.operatorRepresentation['pads'][2]) - self.operatorRepresentation['padding_y_bottom'] = int(self.operatorRepresentation['pads'][3]) - self.operatorRepresentation['stride_x'] = int(self.operatorRepresentation['strides'][0]) - self.operatorRepresentation['stride_y'] = int(self.operatorRepresentation['strides'][1]) - self.operatorRepresentation['dim_kernel_x'] = int(self.operatorRepresentation['kernel_shape'][0]) - self.operatorRepresentation['dim_kernel_y'] = int(self.operatorRepresentation['kernel_shape'][1]) + if not all([ + len(pads) == 4, + len(kernel_shape) == 2, + len(strides) == 2, + ]): + return False - return wellFormed + self.operatorRepresentation['padding_x'] = pads[0] + self.operatorRepresentation['padding_y'] = pads[1] + self.operatorRepresentation['padding_x_left'] = pads[0] + self.operatorRepresentation['padding_y_top'] = pads[1] + self.operatorRepresentation['padding_x_right'] = pads[2] + self.operatorRepresentation['padding_y_bottom'] = pads[3] + self.operatorRepresentation['stride_x'] = strides[0] + self.operatorRepresentation['stride_y'] = strides[1] + self.operatorRepresentation['dim_kernel_x'] = kernel_shape[0] + self.operatorRepresentation['dim_kernel_y'] = kernel_shape[1] + return True def parseNodeCtxt(self, ctxt: NetworkContext, From 5f15b1156ef75cdd67e90d7baf97c732660174c4 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Fri, 17 Oct 2025 11:06:46 +0200 Subject: [PATCH 68/80] Remove MemcpyTypeChecker and do the usual type listing --- DeeployTest/testRunner_siracusa_l3dma.py | 14 +++++++---- DeeployTest/testRunner_siracusa_mchandma.py | 14 +++++++---- DeeployTest/testRunner_snitch_dma.py | 14 +++++++---- DeeployTest/testUtils/dmaUtils.py | 26 ++------------------- 4 files changed, 29 insertions(+), 39 deletions(-) diff --git a/DeeployTest/testRunner_siracusa_l3dma.py b/DeeployTest/testRunner_siracusa_l3dma.py index b70d8dda22..937f7e9b29 100644 --- a/DeeployTest/testRunner_siracusa_l3dma.py +++ b/DeeployTest/testRunner_siracusa_l3dma.py @@ -6,15 +6,16 @@ import numpy as np from testUtils.codeGenerate import generateTestNetwork -from testUtils.dmaUtils import MemcpyLayer, MemcpyParser, MemcpyTileConstraint, MemcpyTypeChecker, generate_graph, \ - memcpyTemplate, prepare_deployer_with_custom_tiling, setup_pulp_deployer +from testUtils.dmaUtils import MemcpyLayer, MemcpyParser, MemcpyTileConstraint, generate_graph, memcpyTemplate, \ + prepare_deployer_with_custom_tiling, setup_pulp_deployer from testUtils.testRunner import TestRunner, TestRunnerArgumentParser from testUtils.typeMapping import baseTypeFromName, dtypeFromDeeployType from Deeploy.AbstractDataTypes import PointerClass from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \ MemoryManagementGeneration -from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeMapper, _NoVerbosity +from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes +from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeMapper, NodeTypeChecker, _NoVerbosity from Deeploy.Targets.PULPOpen.Bindings import L3MemoryAwareFunctionCallClosure, TilingCallClosure from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling from Deeploy.Targets.PULPOpen.DMA.L3Dma import l3DmaHack @@ -74,8 +75,11 @@ MemoryManagementGeneration(), ]) -binding = NodeBinding(MemcpyTypeChecker(), memcpyTemplate, transformer) -tilingReadyBindings = TilingReadyNodeBindings([binding], MemcpyTileConstraint()) +bindings = [ + NodeBinding(NodeTypeChecker([PointerClass(ty)], [PointerClass(ty)]), memcpyTemplate, transformer) + for ty in IntegerDataTypes + FloatDataTypes +] +tilingReadyBindings = TilingReadyNodeBindings(bindings, MemcpyTileConstraint()) memcpyMapper = NodeMapper(MemcpyParser(), tilingReadyBindings) memcpyMapping = {"Memcpy": MemcpyLayer([memcpyMapper])} deployer.Platform.engines[0].Mapping.update(memcpyMapping) diff --git a/DeeployTest/testRunner_siracusa_mchandma.py b/DeeployTest/testRunner_siracusa_mchandma.py index 56ed6f5a14..aeb407d7e6 100644 --- a/DeeployTest/testRunner_siracusa_mchandma.py +++ b/DeeployTest/testRunner_siracusa_mchandma.py @@ -6,15 +6,16 @@ import numpy as np from testUtils.codeGenerate import generateTestNetwork -from testUtils.dmaUtils import MemcpyLayer, MemcpyParser, MemcpyTileConstraint, MemcpyTypeChecker, generate_graph, \ - memcpyTemplate, prepare_deployer_with_custom_tiling, setup_pulp_deployer +from testUtils.dmaUtils import MemcpyLayer, MemcpyParser, MemcpyTileConstraint, generate_graph, memcpyTemplate, \ + prepare_deployer_with_custom_tiling, setup_pulp_deployer from testUtils.testRunner import TestRunner, TestRunnerArgumentParser from testUtils.typeMapping import baseTypeFromName, dtypeFromDeeployType from Deeploy.AbstractDataTypes import PointerClass from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \ MemoryManagementGeneration -from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeMapper, _NoVerbosity +from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes +from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeMapper, NodeTypeChecker, _NoVerbosity from Deeploy.Targets.PULPOpen.Bindings import MemoryAwareFunctionCallClosure, TilingCallClosure from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling from Deeploy.Targets.PULPOpen.DMA.MchanDma import MchanDma @@ -75,8 +76,11 @@ MemoryManagementGeneration(), ]) -binding = NodeBinding(MemcpyTypeChecker(), memcpyTemplate, transformer) -tilingReadyBindings = TilingReadyNodeBindings([binding], MemcpyTileConstraint()) +bindings = [ + NodeBinding(NodeTypeChecker([PointerClass(ty)], [PointerClass(ty)]), memcpyTemplate, transformer) + for ty in IntegerDataTypes + FloatDataTypes +] +tilingReadyBindings = TilingReadyNodeBindings(bindings, MemcpyTileConstraint()) memcpyMapper = NodeMapper(MemcpyParser(), tilingReadyBindings) memcpyMapping = {"Memcpy": MemcpyLayer([memcpyMapper])} deployer.Platform.engines[0].Mapping.update(memcpyMapping) diff --git a/DeeployTest/testRunner_snitch_dma.py b/DeeployTest/testRunner_snitch_dma.py index 80073ac5ed..ba42b433fe 100644 --- a/DeeployTest/testRunner_snitch_dma.py +++ b/DeeployTest/testRunner_snitch_dma.py @@ -6,15 +6,16 @@ import numpy as np from testUtils.codeGenerate import generateTestNetwork -from testUtils.dmaUtils import MemcpyLayer, MemcpyParser, MemcpyTileConstraint, MemcpyTypeChecker, generate_graph, \ - memcpyTemplate, prepare_deployer_with_custom_tiling, setup_snitch_deployer +from testUtils.dmaUtils import MemcpyLayer, MemcpyParser, MemcpyTileConstraint, generate_graph, memcpyTemplate, \ + prepare_deployer_with_custom_tiling, setup_snitch_deployer from testUtils.testRunner import TestRunner, TestRunnerArgumentParser from testUtils.typeMapping import baseTypeFromName, dtypeFromDeeployType from Deeploy.AbstractDataTypes import PointerClass from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \ MemoryManagementGeneration -from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeMapper, _NoVerbosity +from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes +from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeMapper, NodeTypeChecker, _NoVerbosity from Deeploy.Targets.Snitch.Bindings import MemoryAwareFunctionCallClosure, TilingCallClosure from Deeploy.Targets.Snitch.CodeTransformationPasses import SnitchClusterTiling from Deeploy.Targets.Snitch.CodeTransformationPasses.SnitchClusterSynch import SnitchSynchCoresPass @@ -80,8 +81,11 @@ MemoryManagementGeneration(), ]) -binding = NodeBinding(MemcpyTypeChecker(), memcpyTemplate, transformer) -tilingReadyBindings = TilingReadyNodeBindings([binding], MemcpyTileConstraint()) +bindings = [ + NodeBinding(NodeTypeChecker([PointerClass(ty)], [PointerClass(ty)]), memcpyTemplate, transformer) + for ty in IntegerDataTypes + FloatDataTypes +] +tilingReadyBindings = TilingReadyNodeBindings(bindings, MemcpyTileConstraint()) memcpyMapper = NodeMapper(MemcpyParser(), tilingReadyBindings) memcpyMapping = {"Memcpy": MemcpyLayer([memcpyMapper])} deployer.Platform.engines[0].Mapping.update(memcpyMapping) diff --git a/DeeployTest/testUtils/dmaUtils.py b/DeeployTest/testUtils/dmaUtils.py index ba2f6e176f..09ce4ef025 100644 --- a/DeeployTest/testUtils/dmaUtils.py +++ b/DeeployTest/testUtils/dmaUtils.py @@ -10,8 +10,8 @@ from Deeploy.AbstractDataTypes import BaseType, Pointer, PointerClass from Deeploy.CommonExtensions.DataTypes import minimalIntegerType -from Deeploy.DeeployTypes import IoDesc, NetworkContext, NetworkDeployer, NodeParser, NodeTemplate, NodeTypeChecker, \ - ONNXLayer, OperatorDescriptor, OperatorRepresentation, VariableBuffer +from Deeploy.DeeployTypes import IoDesc, NetworkContext, NetworkDeployer, NodeParser, NodeTemplate, ONNXLayer, \ + OperatorDescriptor, OperatorRepresentation, VariableBuffer from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryDeployerWrapper, \ MemoryPlatformWrapper @@ -35,28 +35,6 @@ """) -# Same interface as NodeTypeChecker but allow any input type and the -# output type matches the input type. -class MemcpyTypeChecker(NodeTypeChecker): - - def __init__(self): - super().__init__([], []) - - def typeInferOutput(self, ctxt: NetworkContext, node: gs.Node, - operatorRepresentation: OperatorRepresentation) -> NetworkContext: - assert len(node.inputs) == 1 and len(node.outputs) == 1 - buffer_in = ctxt.lookup(node.inputs[0].name) - ctxt.annotateType(node.outputs[0].name, buffer_in._type) - return ctxt - - def typeCheckNodeInputs(self, ctxt: NetworkContext, node: gs.Node) -> bool: - return True - - def typeInferGlobalCtxt(self, ctxt: NetworkContext, node: gs.Node) -> NetworkContext: - # Whatever it has already annotated, it's good - return ctxt - - class MemcpyTileConstraint(TileConstraint): @classmethod From 558bfccd54ec81883987484b0a8eff984efcc6f9 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 29 Oct 2025 13:38:55 +0100 Subject: [PATCH 69/80] Fix signprop checks --- .../Targets/Generic/Templates/FloatReduceMeanTemplate.py | 4 ++-- Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Deeploy/Targets/Generic/Templates/FloatReduceMeanTemplate.py b/Deeploy/Targets/Generic/Templates/FloatReduceMeanTemplate.py index 005b0b8893..7dbcaed269 100644 --- a/Deeploy/Targets/Generic/Templates/FloatReduceMeanTemplate.py +++ b/Deeploy/Targets/Generic/Templates/FloatReduceMeanTemplate.py @@ -18,10 +18,10 @@ def alignToContext(self, ctxt: NetworkContext, data_in = ctxt.lookup(operatorRepresentation['data_in']) data_out = ctxt.lookup(operatorRepresentation['data_out']) operatorRepresentation['input_offset'] = 0 - if hasattr(data_in, "_signed") and hasattr(data_in, "nLevels"): + if data_in._signed is not None and data_in.nLevels is not None: operatorRepresentation['input_offset'] = (data_in._signed == 0) * int(data_in.nLevels / 2) operatorRepresentation['output_offset'] = 0 - if hasattr(data_out, "_signed") and hasattr(data_out, "nLevels"): + if data_out._signed is not None and data_out.nLevels is not None: operatorRepresentation['output_offset'] = -(data_out._signed == 0) * int(data_in.nLevels / 2) return ctxt, operatorRepresentation, [] diff --git a/Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py b/Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py index 4140101bd3..c65404c096 100644 --- a/Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py +++ b/Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py @@ -82,11 +82,11 @@ def alignToContext(self, ctxt: NetworkContext, operatorRepresentation['B_offset'] = 0 operatorRepresentation['C_offset'] = 0 - if hasattr(A, "nLevels"): + if A.nLevels is not None: operatorRepresentation['A_offset'] = (A._type.referencedType.typeMin == 0) * int(A.nLevels / 2) - if hasattr(B, "nLevels"): + if B.nLevels is not None: operatorRepresentation['B_offset'] = (B._type.referencedType.typeMin == 0) * int(B.nLevels / 2) - if hasattr(C, "nLevels"): + if C.nLevels is not None: operatorRepresentation['C_offset'] = -(C._type.referencedType.typeMin == 0) * int(C.nLevels / 2) return ctxt, operatorRepresentation, [] From 4868f215d789c0daff3e37a2734587e6c6041f23 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Tue, 14 Oct 2025 23:23:14 +0200 Subject: [PATCH 70/80] Add Gemm helper function to get the matrix dimensions --- .../TileConstraints/MatMulTileConstraint.py | 71 ++++++++----------- 1 file changed, 30 insertions(+), 41 deletions(-) diff --git a/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py b/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py index c0f3b70461..1df898bab2 100644 --- a/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py +++ b/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py @@ -17,6 +17,14 @@ class MatMulTileConstraint(TileConstraint): + @staticmethod + def _getIdxMapping(rank: int, isTrans: bool) -> Tuple[int, int]: + if isTrans: + idxSecondDim, idxFirstDim = rank - 2, rank - 1 + else: + idxFirstDim, idxSecondDim = rank - 2, rank - 1 + return idxFirstDim, idxSecondDim + @staticmethod def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel: bufferA = ctxt.lookup(name = parseDict['A']) @@ -24,42 +32,32 @@ def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: Netw bufferOut = ctxt.lookup(name = parseDict['data_out']) # Add I/O dimensions to the model as variables - for buff in [bufferA, bufferB, bufferOut]: - tilerModel.addTensorDimToModel(ctxt, buff.name) + for _buffer in [bufferA, bufferB, bufferOut]: + tilerModel.addTensorDimToModel(ctxt, _buffer.name) - rankA = len(bufferA.shape) - if not parseDict['transA']: - firstDimIdxA, secondDimIdxA = rankA - 2, rankA - 1 - else: - firstDimIdxA, secondDimIdxA = rankA - 1, rankA - 2 - AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = firstDimIdxA) - ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = secondDimIdxA) + idxFirstDimA, idxSecondDimA = MatMulTileConstraint._getIdxMapping(len(bufferA.shape), parseDict['transA']) + AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = idxFirstDimA) + ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = idxSecondDimA) - rankB = len(bufferB.shape) - if not parseDict['transB']: - firstDimIdxB, secondDimIdxB = rankB - 2, rankB - 1 - else: - firstDimIdxB, secondDimIdxB = rankB - 1, rankB - 2 - BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = firstDimIdxB) - BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = secondDimIdxB) + idxFirstDimB, idxSecondDimB = MatMulTileConstraint._getIdxMapping(len(bufferB.shape), parseDict['transB']) + BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = idxFirstDimB) + BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = idxSecondDimB) rankOut = len(bufferOut.shape) outputFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = rankOut - 2) outputSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = rankOut - 1) - # Map batch dims between A and output - batchDimsA = rankA - 2 - for dimIdx in range(batchDimsA): - varA = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = dimIdx) - varOut = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = (rankOut - rankA) + dimIdx) - tilerModel.addConstraint(varOut == varA) + # Map input A's batch dims to output batch dims if present + for idx in range(len(bufferA.shape) - 2): + varA = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = idx) + varOut = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = idx) + tilerModel.addConstraint(varA == varOut) - # Map batch dims between B and output - batchDimsB = rankB - 2 - for dimIdx in range(batchDimsB): - varB = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = dimIdx) - varOut = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = (rankOut - rankB) + dimIdx) - tilerModel.addConstraint(varOut == varB) + # Map input B's batch dims to output batch dims if present + for idx in range(len(bufferB.shape) - 2): + varB = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = idx) + varOut = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = idx) + tilerModel.addConstraint(varB == varOut) tilerModel.addConstraint(outputFirstDimVar == AFirstDimVar) tilerModel.addConstraint(outputSecondDimVar == BSecondDimVar) @@ -69,23 +67,14 @@ def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: Netw @staticmethod def addPolicyConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel: - bufferA = ctxt.lookup(name = parseDict['A']) bufferB = ctxt.lookup(name = parseDict['B']) - rankA = len(bufferA.shape) - if not parseDict['transA']: - _, secondDimIdxA = rankA - 2, rankA - 1 - else: - _, secondDimIdxA = rankA - 1, rankA - 2 - ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = secondDimIdxA) + _, idxSecondDimA = MatMulTileConstraint._getIdxMapping(len(bufferA.shape), parseDict['transA']) + ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = idxSecondDimA) - rankB = len(bufferB.shape) - if not parseDict['transB']: - firstDimIdxB, _ = rankB - 2, rankB - 1 - else: - firstDimIdxB, _ = rankB - 1, rankB - 2 - BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = firstDimIdxB) + idxFirstDimB, _ = MatMulTileConstraint._getIdxMapping(len(bufferB.shape), parseDict['transB']) + BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = idxFirstDimB) # VIC: We don't want to deal with intermediate results between kernel calls tilerModel.addConstraint(ASecondDimVar == parseDict['N']) From d27ae03d0f3e90a71ea7137180840a743491572d Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 29 Oct 2025 14:41:07 +0100 Subject: [PATCH 71/80] Fix PULP Requantized Convolution tile constraints to properly handle rqs add and mul tensors --- .../TileConstraints/ConvTileConstraint.py | 19 +++++++++++-------- .../TileConstraints/DWConvTileConstraint.py | 15 ++++++++++----- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/Deeploy/Targets/PULPOpen/TileConstraints/ConvTileConstraint.py b/Deeploy/Targets/PULPOpen/TileConstraints/ConvTileConstraint.py index c69760df59..156271417a 100644 --- a/Deeploy/Targets/PULPOpen/TileConstraints/ConvTileConstraint.py +++ b/Deeploy/Targets/PULPOpen/TileConstraints/ConvTileConstraint.py @@ -54,12 +54,15 @@ def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: Netw outputWidthVar = tilerModel.getTensorDimVar(tensorName = outputBufferName, dimIdx = 2) outputChannelVar = tilerModel.getTensorDimVar(tensorName = outputBufferName, dimIdx = 3) - addChannelVar = tilerModel.getTensorDimVar(tensorName = addBufferName, dimIdx = 0) - mulChannelVar = tilerModel.getTensorDimVar(tensorName = mulBufferName, dimIdx = 0) + addBuffer = ctxt.lookup(addBufferName) + addChannelVar = tilerModel.getTensorDimVar(tensorName = addBufferName, dimIdx = len(addBuffer.shape) - 1) + mulBuffer = ctxt.lookup(mulBufferName) + mulChannelVar = tilerModel.getTensorDimVar(tensorName = mulBufferName, dimIdx = len(mulBuffer.shape) - 1) # Map output dims to inputs dims tilerModel.addConstraint(outputBatchVar == inputBatchVar) # Batch tilerModel.addConstraint(outputChannelVar == weightOutChannelVar) # Output Channel + tilerModel.addConstraint(inputChannelVar == weightInChannelVar) # Input channel tilerModel.addConstraint(outputChannelVar == addChannelVar) tilerModel.addConstraint(outputChannelVar == mulChannelVar) @@ -88,10 +91,8 @@ def addPolicyConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkCo outputChannelVar = tilerModel.getTensorDimVar(tensorName = weightBuffer.name, dimIdx = 0) weightHeightVar = tilerModel.getTensorDimVar(tensorName = weightBuffer.name, dimIdx = 1) weightWidthVar = tilerModel.getTensorDimVar(tensorName = weightBuffer.name, dimIdx = 2) - weightInChannelVar = tilerModel.getTensorDimVar(tensorName = weightBuffer.name, dimIdx = 3) strides = parseDict["strides"] - padding = parseDict["pads"] # VIC: Force at least one row of A and one col of B in the GEMM (since it's a im2col Conv) to avoid partial results tilerModel.addConstraint(inputChannelVar == parseDict['ch_im_in']) @@ -101,7 +102,6 @@ def addPolicyConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkCo tilerModel.addConstraint(inputHeightVar >= parseDict['dim_kernel_x']) tilerModel.addConstraint(inputWidthVar >= parseDict['dim_kernel_y']) - tilerModel.addConstraint(weightInChannelVar == parseDict['ch_im_in']) # VIC: Constraint the minimum tile size such that we can apply at least one kernel on it tilerModel.addConstraint(inputHeightVar >= parseDict['dim_kernel_x']) @@ -174,6 +174,8 @@ def serializeTilingSolution( weightH = ctxt.lookup(varWeight).shape[1] weightW = ctxt.lookup(varWeight).shape[2] weightC = ctxt.lookup(varWeight).shape[3] + shapeMul = ctxt.lookup(operatorRepresentation["mul"]).shape + shapeAdd = ctxt.lookup(operatorRepresentation["add"]).shape pads = operatorRepresentation['pads'] strides = operatorRepresentation['strides'] @@ -200,12 +202,13 @@ def serializeTilingSolution( inputInCubes.append(InCube) - RequantCube = HyperRectangle((COffset,), (CSize,)) + MulCube = HyperRectangle((0,) * (len(shapeMul) - 1) + (COffset,), (1,) * (len(shapeMul) - 1) + (CSize,)) + AddCube = HyperRectangle((0,) * (len(shapeAdd) - 1) + (COffset,), (1,) * (len(shapeAdd) - 1) + (CSize,)) WeightCube = HyperRectangle((COffset, 0, 0, 0), (CSize, weightH, weightW, weightC)) inputWeightCubes.append(WeightCube) - inputAddCubes.append(RequantCube) - inputMulCubes.append(RequantCube) + inputMulCubes.append(MulCube) + inputAddCubes.append(AddCube) inputLoadSchedule = [] outputLoadSchedule = [] diff --git a/Deeploy/Targets/PULPOpen/TileConstraints/DWConvTileConstraint.py b/Deeploy/Targets/PULPOpen/TileConstraints/DWConvTileConstraint.py index 8d54eea437..2d6ea07a0f 100644 --- a/Deeploy/Targets/PULPOpen/TileConstraints/DWConvTileConstraint.py +++ b/Deeploy/Targets/PULPOpen/TileConstraints/DWConvTileConstraint.py @@ -60,8 +60,10 @@ def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: Netw outputWidthVar = tilerModel.getTensorDimVar(tensorName = outputBufferName, dimIdx = 2) outputChannelVar = tilerModel.getTensorDimVar(tensorName = outputBufferName, dimIdx = 3) - addChannelVar = tilerModel.getTensorDimVar(tensorName = addBufferName, dimIdx = 0) - mulChannelVar = tilerModel.getTensorDimVar(tensorName = mulBufferName, dimIdx = 0) + addBuffer = ctxt.lookup(addBufferName) + addChannelVar = tilerModel.getTensorDimVar(tensorName = addBufferName, dimIdx = len(addBuffer.shape) - 1) + mulBuffer = ctxt.lookup(mulBufferName) + mulChannelVar = tilerModel.getTensorDimVar(tensorName = mulBufferName, dimIdx = len(mulBuffer.shape) - 1) # map output dims to inputs dims tilerModel.addConstraint(outputBatchVar == inputBatchVar) # Batch @@ -183,6 +185,8 @@ def serializeTilingSolution( weightH = ctxt.lookup(varWeight).shape[1] weightW = ctxt.lookup(varWeight).shape[2] + shapeMul = ctxt.lookup(operatorRepresentation["mul"]).shape + shapeAdd = ctxt.lookup(operatorRepresentation["add"]).shape pads = operatorRepresentation['pads'] strides = operatorRepresentation['strides'] @@ -200,7 +204,8 @@ def serializeTilingSolution( NCHWInCube = HyperRectangle((NHWCInCube.offset[0], COffset, NHWCInCube.offset[1], NHWCInCube.offset[2]), (NHWCInCube.dims[0], CSize, NHWCInCube.dims[1], NHWCInCube.dims[2])) - RequantCube = HyperRectangle((COffset,), (CSize,)) + MulCube = HyperRectangle((0,) * (len(shapeMul) - 1) + (COffset,), (1,) * (len(shapeMul) - 1) + (CSize,)) + AddCube = HyperRectangle((0,) * (len(shapeAdd) - 1) + (COffset,), (1,) * (len(shapeAdd) - 1) + (CSize,)) WeightCube = HyperRectangle((COffset, 0, 0, 0), (CSize, weightH, weightW, 1)) replacements['dim_im_in_x'].append(NCHWInCube.dims[2]) @@ -216,8 +221,8 @@ def serializeTilingSolution( replacements['padding_x_right'].append(padding_right) inputInCubes.append(NCHWInCube) - inputAddCubes.append(RequantCube) - inputMulCubes.append(RequantCube) + inputMulCubes.append(MulCube) + inputAddCubes.append(AddCube) inputWeightCubes.append(WeightCube) inputLoadSchedule = [] From 1d953bd855ac65a4febedfa04612bea5ce5bd3d6 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Thu, 25 Sep 2025 15:34:12 +0200 Subject: [PATCH 72/80] DeeployTypes.py changes --- Deeploy/DeeployTypes.py | 366 +++++++++++++++++++++++++++++++--------- 1 file changed, 286 insertions(+), 80 deletions(-) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index e926e36043..14d536cf77 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -105,6 +105,27 @@ def __init__(self, templateStr: str): Tuple[NetworkContext, OperatorRepresentation]]]] = {} self.subTemplateGenerators = {} + def alignShapes(self, node: gs.Node) -> Tuple[List[Sequence[int]], List[Sequence[int]]]: + return [t.shape for t in node.inputs], [t.shape for t in node.outputs] + + def _alignShapes(self, node: gs.Node) -> Tuple[List[Sequence[int]], List[Sequence[int]]]: + _in, out = self.alignShapes(node) + for tensor, shape in zip(node.inputs + node.outputs, _in + out): + assert shape is not None, f"Aligned shape for tensor {tensor.name} is None" + return _in, out + + def _tensorShapesBroadcastable(self, node: gs.Node) -> bool: + minShapesIn, minShapesOut = self._alignShapes(node) + for tensor, minShape in zip(node.inputs, minShapesIn, strict = True): + try: + np.broadcast_shapes(tensor.shape, minShape) + except ValueError: + return False + for tensor, minShape in zip(node.outputs, minShapesOut, strict = True): + if not all(dim == other for dim, other in zip(tensor.shape, minShape)): + return False + return True + def internalSize(self) -> int: """Return the byte size of internal memory buffers used by this template @@ -2651,6 +2672,30 @@ def _createIOBindings(self, ctxt: NetworkContext, graph: gs.Graph): return ctxt + def hoistGraphTensors(self, typeMap: Dict[str, Type[Pointer]]): + for name, tensor in self.graph.tensors().items(): + if isinstance(tensor, gs.Constant): + buffer = self.ctxt.ConstantBuffer(name, tensor.shape, tensor.values) + self.ctxt.add(buffer, "global") + else: + buffer = self.ctxt.VariableBuffer(name, tensor.shape) + if tensor in self.graph.inputs: + buffer.is_input = True + self.ctxt.add(buffer, "global") + elif tensor in self.graph.outputs: + buffer.is_output = True + self.ctxt.add(buffer, "global") + else: + self.ctxt.add(buffer, "local") + self.ctxt.annotateType(name, typeMap[name]) + + # Users have to be annotated in order of the schedule + for layer in self.layerBinding.values(): + for tensor in layer.node.inputs: + buffer = self.ctxt.lookup(tensor.name) + isinstance(buffer, VariableBuffer) + buffer._users.append(layer.node.name) + def inputs(self) -> List[VariableBuffer]: """Return a list of all VariableBuffers that are also global inputs of the network @@ -2765,6 +2810,114 @@ def _typeCheckNode(self, node: ONNXLayer, ctxt: NetworkContext) -> Tuple[Network return newCtxt, True + def typeCheckInputs(self, types: Sequence[Optional[Type[Pointer]]], supportedTypes: Sequence[Type[Pointer]], + tensors: Sequence[gs.Tensor]) -> bool: + assert len(types) == len(tensors) + + valid = True + for ty, tensor, suppTy in zip(types, tensors, supportedTypes): + if isinstance(tensor, gs.Constant): + if not suppTy.referencedType.checkValue(tensor.values): + # TODO: Log + valid = False + elif isinstance(tensor, gs.Variable): + if ty is None: + # TODO: Log + valid = False + continue + if tensor in self.graph.inputs: + # TODO: Why do we do this for graph inputs?? + refTy = ty.referencedType + suppRefTy = suppTy.referencedType + if not suppRefTy.partialOrderUpcast(refTy): + # TODO: Log + valid = False + else: + if ty != suppTy: + # TODO: Log + valid = False + else: + raise ValueError(f"Unsupported tensor type {type(tensor)}") + + return valid + + def selectTemplate( + self, schedule: Sequence[ONNXLayer], + candidates: Dict[str, List[NodeBinding]]) -> Tuple[Dict[str, NodeBinding], Dict[str, Type[Pointer]]]: + selection: Dict[str, Optional[NodeBinding]] = dict.fromkeys(candidates.keys()) + discard: Dict[str, List[NodeBinding]] = {k: [] for k in candidates.keys()} + typeMap: Dict[str, Optional[Type[Pointer]]] = dict.fromkeys(self.graph.tensors().keys()) + + typeMap.update(self.inputTypes) + + idx: int = 0 + deepestIdx = 0 + + while (idx < len(schedule)): + layer = schedule[idx] + node = layer.node + deepestIdx = max(idx, deepestIdx) + + log.debug(31 * "-" + f" TRYING NODE {node.name} OP {node.op} AT IDX {idx} " + 31 * "-") + + inputTypes = [typeMap[t.name] for t in node.inputs] + + viable = [] + for binding in candidates[node.name]: + if binding in discard[node.name]: + # TODO: Log + continue + if not self.typeCheckInputs(inputTypes, binding.typeChecker.input_types, node.inputs): + # TODO: Log + continue + viable.append(binding) + + if len(viable) > 0: + selectedBinding = viable[0] + # Update inputs types because we might have casted constant tensors + typeMap.update(zip([t.name for t in node.inputs], selectedBinding.typeChecker.input_types)) + # Update output types + typeMap.update(zip([t.name for t in node.outputs], selectedBinding.typeChecker.output_types)) + selection[node.name] = selectedBinding + idx += 1 + elif idx == 0: + # SCHEREMO: If we can't find a mapping for the root, we must exit + layer = schedule[deepestIdx] + node = layer.node + log.debug("-" * 80) + log.error("💥 PARSING FAILED - Backtracking exhausted at root!") + log.error("=" * 80) + log.error(f"🔍 Diagnosis:") + log.error(f" - Deepest successful exploration: Layer {deepestIdx} '{node.name}'") + log.error(f" - Candidates: {[type(binding).__name__ for binding in candidates[node.name]]}") + log.error("=" * 80) + raise RuntimeError( + f'Did not find adequate mapping for graph! Explored until layer {layer} of node {node.name} ' + f'Candidates: {[type(binding).__name__ for binding in candidates[node.name]]}. Exhausted backtracking.' + ) + else: + # SCHEREMO: Rollback one step + prev = schedule[idx - 1] + node = prev.node + prevSelection = selection[node.name] + assert prevSelection is not None, f"Previous node doesn't have a selection" + discard[node.name].append(prevSelection) + selection[node.name] = None + idx = idx - 1 + log.debug(31 * "-" + f" ROLLBACK TO IDX {idx} " + 31 * "-") + + finalSelection: Dict[str, NodeBinding] = {} + for name, binding in selection.items(): + assert binding is not None + finalSelection[name] = binding + + finalTypeMap: Dict[str, Type[Pointer]] = {} + for name, ty in typeMap.items(): + assert ty is not None + finalTypeMap[name] = ty + + return finalSelection, finalTypeMap + # Don't override this def parse(self, default_channels_first: bool = True) -> bool: """Parses the full network by iteratively exploring mapping and binding options with backtracking @@ -2792,97 +2945,141 @@ def parse(self, default_channels_first: bool = True) -> bool: constantBuffer = self.Platform.ConstantBuffer, structBuffer = self.Platform.StructBuffer, transientBuffer = self.Platform.TransientBuffer) + # Create schedule, binding, then parse resulting program for correctness + schedule = self.scheduler(self.graph) + flatSchedule = [] - log.debug(" - Create IO Bindings") - self.ctxt = self._createIOBindings(self.ctxt, self.graph) - - log.debug(" - Bind Nodes to Layers") - self._bindLayers() - - ctxt = self.ctxt.copy() + for subGraph in schedule: + if isinstance(subGraph, gs.Node): + flatSchedule.append(subGraph) + else: + flatSchedule += subGraph - ctxtStack = deque() - scheduledLayerList = list(self.layerBinding.values()) - idx: int = 0 + self.layerBinding: 'OrderedDict[str, ONNXLayer]' = OrderedDict() + templateCandidates: Dict[str, List[NodeBinding]] = {} + for node in flatSchedule: + assert node.op in self.operatorDescriptors, \ + f"[ERROR] Error parsing node {node.name}. There is no descriptor for operator {node.op}." + desc = self.operatorDescriptors[node.op] + desc.canonicalize(node, self.graph.opset) + assert desc.check(node), \ + f"[ERROR] Node {node.name} is not a valid instance of {node.op} operator" - deepestIdx = 0 + layer = self._mapNode(node) + if isinstance(layer, ONNXLayer): + self.layerBinding[node.name] = layer + + candidates = [] + discardedMaps = [] + discardedBindings = [] + for map in layer.maps: + if not map.parser.parseNode(node): + discardedMaps.append(map) + continue + + # NOTE: We count a map to be _true_ SignProp if all the integer bindings support only signed output + outRefTys = [binding.typeChecker.output_types[0].referencedType for binding in map.bindings] + intRefTys = [ty for ty in outRefTys if issubclass(ty, IntegerImmediate)] + trueSignProp = all(ty.signed for ty in intRefTys) + + for binding in map.bindings: + if not binding.template._tensorShapesBroadcastable(node): + discardedBindings.append((binding, "Shapes are not broadcastable")) + continue + # NOTE: will this even be needed once I can infer the outtype from a template + # immediately and not by looking at a bunch of bindings? This only makes sense here now + # because we have to sift through the bindings, but if we can deduce the out type straight + # from the input types + node attrs, we don't need that. + if not trueSignProp and "signed" in node.attrs or "rqsOut_signed" in node.attrs: + signed = node.attrs["signed"] if "signed" in node.attrs else node.attrs["rqsOut_signed"] + assert len(binding.typeChecker.output_types) == 1, f"Assume 1 output" + refTy = binding.typeChecker.output_types[0].referencedType + if issubclass(refTy, IntegerImmediate) and signed != refTy.signed: + discardedBindings.append( + (binding, f"Out type is not {'signed' if signed else 'unsigned'}")) + continue + candidates.append(binding) + assert len(candidates) > 0, ( + f"Node {node.name} of op {node.op} has no template candidate.\n" \ + f"Tried these maps: {discardedMaps}\n" \ + f"Tried these bindings:\n" + + "\n".join(f" - Binding {binding}: {msg}" for binding, msg in discardedBindings) + ) + templateCandidates[node.name] = candidates - log.debug(" - Parse and Type Check Network") + log.debug(" - Template selection") start_time = time.perf_counter() + selection, typeMap = self.selectTemplate(list(self.layerBinding.values()), templateCandidates) + end_time = time.perf_counter() + log.info( + f" {SUCCESS_MARK} Template selection succeded with {len(self.layerBinding)} layers in {(end_time-start_time)*1E3:.3f} ms" + ) - iteration_main = 0 - iteration_sub = 0 - iteration_tot = 0 - while (idx < len(scheduledLayerList)): - currentLayer = scheduledLayerList[idx] - - # Log current exploration state - if idx == 0: - iteration_main += 1 - iteration_tot += 1 - iteration_sub = 0 - log.debug(31 * "-" + f" MAIN ITERATION {iteration_main:<2} " + 31 * "-") - - log.debug(f"[Layer {idx}] Trying '{currentLayer.node.name}' (op: {currentLayer.node.op})") + # TODO: Remove after refactor + # Fixup the choice to old way + for layer in self.layerBinding.values(): + binding = selection[layer.node.name] + + # Find map + selectedMap = None + for map in layer.maps: + if binding in map.bindings: + selectedMap = map + break + assert selectedMap is not None, f"Cannot find binding {binding} in any map" + + selectedMap.binder = binding + selectedMap.bound = True + layer.mapper = selectedMap + + # Align shapes + for layer in self.layerBinding.values(): + node = layer.node + newInputShapes, _ = layer.mapper.binder.template._alignShapes(node) + for tensor, shape in zip(node.inputs, newInputShapes): + # TODO: This needs to be investigated because it assumes that if the shape is + # broadcastable, it is also executable, but that might not be the case. + # E.g., just because a kernel can implement a requant shift with per-channel + # rqs params, doesn't mean it can do it for per-layer params. + # There needs to be a mechanism for the kernel (template) to say which + # shapes it can execute, and which shapes it can execute if they get broadcasted. + # Current vision is 2 functions `checkShapes` and `negotiateBroadcasts`, but + # it's a wip. + shape = np.broadcast_shapes(tensor.shape, shape) + if isinstance(tensor, gs.Variable): + if tensor in self.graph.inputs: + tensor.shape = shape + elif any(dim != other for dim, other in zip(tensor.shape, shape)): + raise RuntimeError( + "Non-graph-input shape change is forbidden for now until someone adds automatic Expand node insertion." + f"Node {node.name}'s alignShape tried to change tensor {tensor.name}'s shape {tensor.shape} to {shape}" + ) + elif isinstance(tensor, gs.Constant): + if math.prod(tensor.shape) == math.prod(shape): + tensor.values = tensor.values.reshape(shape) + else: + tensor.values = np.broadcast_to(tensor.values, shape) - stCtxt = copy.deepcopy(ctxt) + self.hoistGraphTensors(typeMap) - newCtxt, parseSuccess = self._parseNode(currentLayer, ctxt, default_channels_first) + for layer in self.layerBinding.values(): + node = layer.node + parser = layer.mapper.parser - typeCheckSuccess = False - if parseSuccess: - newCtxt, typeCheckSuccess = self._typeCheckNode(currentLayer, newCtxt) + parser.parseNode(node) + parser.parseNodeCtxt(self.ctxt, node, default_channels_first) - if parseSuccess and typeCheckSuccess: - # SCHEREMO: Continue depth-first exploration - ctxtStack.append(stCtxt) - ctxt = newCtxt - idx = idx + 1 - if idx > deepestIdx: - deepestIdx = max(idx, deepestIdx) - deepestCtxt = stCtxt + opRepr = parser.operatorRepresentation + opRepr["nodeName"] = node.name + opRepr["nodeOp"] = node.op + opRepr["channels_first"] = node.attrs.get("channels_first", default_channels_first) - else: - # SCHEREMO: If we can't find a mapping for the root, we must exit - if idx == 0: - deepestLayer = scheduledLayerList[deepestIdx] - deepestNodeName = deepestLayer.node.name - log.debug("-" * 80) - log.error("💥 PARSING FAILED - Backtracking exhausted at root!") - log.error("=" * 80) - log.error(f"🔍 Diagnosis:") - log.error(f" - Deepest successful exploration: Layer {deepestIdx} '{deepestNodeName}'") - log.error( - f" - Deepest layer available mappers: {[type(x.parser).__name__ for x in deepestLayer.maps]}") - log.error("=" * 80) - raise RuntimeError( - f'Did not find adequate mapping for graph! Explored until layer {deepestLayer.__class__.__name__} of node {deepestNodeName}' - f'Candidates: {[type(x.parser).__name__ for x in deepestLayer.maps]}. Exhausted backtracking.') - - previousLayer = scheduledLayerList[idx - 1] - ctxt = ctxtStack.pop() - - # Keep options of current layer open - the upstream mapping will change, so we don't know which options are feasible here - currentLayer.resetDiscardedMappers() - - # Update the previous layer, by discarding the current mapper or binder - if previousLayer.mapper.bindingsExhausted(): - previousLayer.discardCurrentMapper() - else: - previousLayer.mapper.discardCurrentBinder() - - # SCHEREMO: Rollback one step - idx = idx - 1 - if idx != 0: - iteration_sub += 1 - iteration_tot += 1 - log.debug(31 * "-" + f" SUB ITERATION {iteration_main}.{iteration_sub:<2} " + 31 * "-") + for tensor in node.inputs + node.outputs: + for key, value in opRepr.items(): + if isinstance(value, str) and value == tensor.name: + opRepr[f"{key}_type"] = typeMap[value] + break - end_time = time.perf_counter() - log.info( - f" {SUCCESS_MARK} Parsed network with {len(self.layerBinding)} layers after {iteration_tot} iterations in {(end_time-start_time)*1E3:.3f} ms" - ) - self.ctxt = ctxt self.parsed = True return True @@ -3485,8 +3682,10 @@ def _duplicateConstants(self, graph: gs.Graph) -> None: graph.cleanup().toposort() def _foldConstants(self, graph: gs.Graph): + graph.toposort() # fold_constants requires the graph to be topologically sorted graph.fold_constants() - graph.cleanup().toposort() + graph.cleanup() # fold_constants doesn't remove dangling Constant nodes so we need a cleanup + graph.toposort() # toposort for good measure def _sanitizeGraphNames(self, graph: gs.Graph): @@ -3545,6 +3744,10 @@ def _assertTensorsHaveShape(self) -> None: assert len(missingShapes) == 0, \ f"Shape inference is not supported.\nFound tensors with missing shape annotation: {missingShapes}" + def _annotateChannelsFirst(self, graph: gs.Graph, default: bool) -> None: + for node in graph.nodes: + node.attrs["channels_first"] = node.attrs.get("channels_first", default) + def frontEnd(self): """API hook to prepare the graph to be deployed and build the initial NetworkContext @@ -3595,6 +3798,9 @@ def frontEnd(self): log.info(" - Assert all tensors have a shape annotation") self._assertTensorsHaveShape() + log.info("- Annotate node's with channel layout info") + self._annotateChannelsFirst(self.graph, self.default_channels_first) + log.info("- Perform Graph Parsing") try: self.parse(self.default_channels_first) # This reparses the lowered graph From 8c1b4e864a893fc9cb746c5b83a65e47b040dae6 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Tue, 28 Oct 2025 17:48:32 +0100 Subject: [PATCH 73/80] Add NodeTemplate.py --- Deeploy/CommonExtensions/NodeTemplate.py | 84 ++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 Deeploy/CommonExtensions/NodeTemplate.py diff --git a/Deeploy/CommonExtensions/NodeTemplate.py b/Deeploy/CommonExtensions/NodeTemplate.py new file mode 100644 index 0000000000..a94619f7a6 --- /dev/null +++ b/Deeploy/CommonExtensions/NodeTemplate.py @@ -0,0 +1,84 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from typing import List, Sequence, Tuple + +import numpy as np +import onnx_graphsurgeon as gs + +from Deeploy.DeeployTypes import NodeTemplate + + +class ElementwiseTemplate(NodeTemplate): + + def alignShapes(self, node: gs.Node) -> Tuple[List[Sequence[int]], List[Sequence[int]]]: + assert len(node.outputs) == 1, f"Expected only one output. Received {len(node.outputs)}" + shape = tuple(np.broadcast_shapes(*[t.shape for t in node.inputs])) + return [shape] * len(node.inputs), [shape] + + +class ElementwiseScalarTemplate(NodeTemplate): + + def alignShapes(self, node: gs.Node) -> Tuple[List[Sequence[int]], List[Sequence[int]]]: + assert len(node.inputs) == 2, f"Expected only two inputs. Received {len(node.inputs)}" + assert len(node.outputs) == 1, f"Expected only one output. Received {len(node.outputs)}" + shape = tuple(node.inputs[0].shape) + return [shape, (1,)], [shape] + + +class RequantShiftTemplate(NodeTemplate): + + def alignShapes(self, node: gs.Node) -> Tuple[List[Sequence[int]], List[Sequence[int]]]: + inShapes, outShapes = [t.shape for t in node.inputs], [t.shape for t in node.outputs] + batch, ch = inShapes[0][:2] + # TODO: Copied from old computeShape. Should probably be investigated + inShapes[1] = (batch, ch, *inShapes[1][1:]) + inShapes[2] = (batch, ch, *inShapes[2][1:]) + return inShapes, outShapes + + +class ConvTemplate(NodeTemplate): + + @staticmethod + def minPerChannelTensorShape(node: gs.Node, channels: int) -> Tuple[int, ...]: + spatialDims = len(node.attrs["kernel_shape"]) + if node.attrs["channels_first"]: + return (channels,) + (1,) * (spatialDims) + else: + return (channels,) + + def alignShapes(self, node: gs.Node) -> Tuple[List[Sequence[int]], List[Sequence[int]]]: + inShapes, outShapes = [t.shape for t in node.inputs], [t.shape for t in node.outputs] + if len(node.inputs) == 3: + minBiasShape = self.minPerChannelTensorShape(node, inShapes[1][0]) + inShapes[2] = minBiasShape + return inShapes, outShapes + + +class RequantizedConvTemplate(ConvTemplate): + + def alignShapes(self, node: gs.Node) -> Tuple[List[Sequence[int]], List[Sequence[int]]]: + inShapes, outShapes = [t.shape for t in node.inputs[:2]], [t.shape for t in node.outputs] + minRqsShape = self.minPerChannelTensorShape(node, inShapes[1][0]) + rqsShapes = [minRqsShape] * len(node.inputs[2:]) + return inShapes + rqsShapes, outShapes + + +class GemmTemplate(NodeTemplate): + + def alignShapes(self, node: gs.Node) -> Tuple[List[Sequence[int]], List[Sequence[int]]]: + biasShape = node.outputs[0].shape[-2:] + return [node.inputs[0].shape, node.inputs[1].shape, biasShape], [node.outputs[0].shape] + + +class RequantizedGemmTemplate(NodeTemplate): + + def alignShapes(self, node: gs.Node) -> Tuple[List[Sequence[int]], List[Sequence[int]]]: + inShapes, outShapes = [t.shape for t in node.inputs[:2]], [t.shape for t in node.outputs] + if node.attrs["transB"]: + N = inShapes[1][-2] + else: + N = inShapes[1][-1] + rqsShapes = [(N,)] * len(node.inputs[2:]) + return inShapes + rqsShapes, outShapes From c2415252a297c1daca944113fba08ccd81fae1eb Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 29 Oct 2025 10:58:58 +0100 Subject: [PATCH 74/80] Target Template changes --- .../Targets/CortexM/Templates/ConvTemplate.py | 7 +++--- .../Targets/Generic/Templates/AddTemplate.py | 5 +++-- .../Targets/Generic/Templates/ConvTemplate.py | 5 +++-- .../Generic/Templates/FloatGemmTemplate.py | 4 ++-- .../Generic/Templates/FloatMulTemplate.py | 4 ++-- .../Targets/Generic/Templates/GemmTemplate.py | 5 +++-- .../Targets/Generic/Templates/MulTemplate.py | 5 +++-- .../Generic/Templates/RQAddTemplate.py | 5 +++-- .../Generic/Templates/RequantShiftTemplate.py | 5 +++-- .../MemPool/Templates/RQGemmTemplate.py | 22 ++++++++++++++----- .../MemPool/Templates/RQMatMulTemplate.py | 20 ++++++++++++----- .../MemPool/Templates/RequantShiftTemplate.py | 5 +++-- .../Targets/Neureka/Templates/ConvTemplate.py | 5 +++-- .../PULPOpen/Templates/ConvTemplate.py | 7 +++--- .../PULPOpen/Templates/FloatAddTemplate.py | 4 ++-- .../PULPOpen/Templates/FloatGemmTemplate.py | 4 ++-- .../PULPOpen/Templates/FloatMulTemplate.py | 4 ++-- .../PULPOpen/Templates/GEMMTemplate.py | 3 ++- .../Templates/MatrixVectorTemplate.py | 5 +++-- .../Targets/PULPOpen/Templates/MulTemplate.py | 9 ++------ .../PULPOpen/Templates/TallGEMMTemplate.py | 5 +++-- 21 files changed, 84 insertions(+), 54 deletions(-) diff --git a/Deeploy/Targets/CortexM/Templates/ConvTemplate.py b/Deeploy/Targets/CortexM/Templates/ConvTemplate.py index d5e05c8343..6d9984a11a 100644 --- a/Deeploy/Targets/CortexM/Templates/ConvTemplate.py +++ b/Deeploy/Targets/CortexM/Templates/ConvTemplate.py @@ -6,12 +6,13 @@ from ortools.constraint_solver.pywrapcp import IntVar -from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation +from Deeploy.CommonExtensions.NodeTemplate import RequantizedConvTemplate +from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation from Deeploy.Targets.CortexM.DataTypes import cmsis_nn_context, cmsis_nn_conv_params, cmsis_nn_dims, \ cmsis_nn_per_channel_quant_params -class _Conv2D_8_Template(NodeTemplate): +class _Conv2D_8_Template(RequantizedConvTemplate): def __init__(self, templateStr): super().__init__(templateStr) @@ -128,7 +129,7 @@ def hoistTransientBuffers(self, ctxt: NetworkContext, ") -class _Conv1D_16_Template(NodeTemplate): +class _Conv1D_16_Template(RequantizedConvTemplate): def __init__(self, templateStr): super().__init__(templateStr) diff --git a/Deeploy/Targets/Generic/Templates/AddTemplate.py b/Deeploy/Targets/Generic/Templates/AddTemplate.py index 75c16ac429..2376e7b6b8 100644 --- a/Deeploy/Targets/Generic/Templates/AddTemplate.py +++ b/Deeploy/Targets/Generic/Templates/AddTemplate.py @@ -4,10 +4,11 @@ from typing import Dict, List, Tuple -from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation +from Deeploy.CommonExtensions.NodeTemplate import ElementwiseTemplate +from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation -class _AddTemplate(NodeTemplate): +class _AddTemplate(ElementwiseTemplate): def alignToContext(self, ctxt: NetworkContext, operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict, List[str]]: diff --git a/Deeploy/Targets/Generic/Templates/ConvTemplate.py b/Deeploy/Targets/Generic/Templates/ConvTemplate.py index 51f292dcae..1966e48892 100644 --- a/Deeploy/Targets/Generic/Templates/ConvTemplate.py +++ b/Deeploy/Targets/Generic/Templates/ConvTemplate.py @@ -4,10 +4,11 @@ from typing import Dict, List, Tuple -from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation +from Deeploy.CommonExtensions.NodeTemplate import ConvTemplate +from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation -class _Conv2D_Template(NodeTemplate): +class _Conv2D_Template(ConvTemplate): def __init__(self, templateStr): super().__init__(templateStr) diff --git a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py index 30ae6a3177..b5537ff831 100644 --- a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py +++ b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py @@ -2,9 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 -from Deeploy.DeeployTypes import NodeTemplate +from Deeploy.CommonExtensions.NodeTemplate import GemmTemplate -referenceTemplate = NodeTemplate(""" +referenceTemplate = GemmTemplate(""" // GEMM (Name: ${nodeName}, Op: ${nodeOp}) BEGIN_SINGLE_CORE ${A_type.typeName} ref_${nodeName}_${A} = ${A}; diff --git a/Deeploy/Targets/Generic/Templates/FloatMulTemplate.py b/Deeploy/Targets/Generic/Templates/FloatMulTemplate.py index 3c8c2da501..03aea61c4a 100644 --- a/Deeploy/Targets/Generic/Templates/FloatMulTemplate.py +++ b/Deeploy/Targets/Generic/Templates/FloatMulTemplate.py @@ -2,9 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 -from Deeploy.DeeployTypes import NodeTemplate +from Deeploy.CommonExtensions.NodeTemplate import ElementwiseScalarTemplate -referenceTemplate = NodeTemplate(""" +referenceTemplate = ElementwiseScalarTemplate(""" // Float Mul (Name: ${nodeName}, Op: ${nodeOp}) BEGIN_SINGLE_CORE for (uint32_t i=0;i<${size};i++){ diff --git a/Deeploy/Targets/Generic/Templates/GemmTemplate.py b/Deeploy/Targets/Generic/Templates/GemmTemplate.py index 4f42236780..eae375e555 100644 --- a/Deeploy/Targets/Generic/Templates/GemmTemplate.py +++ b/Deeploy/Targets/Generic/Templates/GemmTemplate.py @@ -4,10 +4,11 @@ from typing import Dict, List, Tuple -from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation +from Deeploy.CommonExtensions.NodeTemplate import GemmTemplate +from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation -class _GemmTemplate(NodeTemplate): +class _GemmTemplate(GemmTemplate): def __init__(self, templateStr): super().__init__(templateStr) diff --git a/Deeploy/Targets/Generic/Templates/MulTemplate.py b/Deeploy/Targets/Generic/Templates/MulTemplate.py index 5709eef4bf..0db4c6ce64 100644 --- a/Deeploy/Targets/Generic/Templates/MulTemplate.py +++ b/Deeploy/Targets/Generic/Templates/MulTemplate.py @@ -4,10 +4,11 @@ from typing import Dict, List, Tuple -from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation +from Deeploy.CommonExtensions.NodeTemplate import ElementwiseTemplate +from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation -class _MulTemplate(NodeTemplate): +class _MulTemplate(ElementwiseTemplate): def __init__(self, templateStr): super().__init__(templateStr) diff --git a/Deeploy/Targets/Generic/Templates/RQAddTemplate.py b/Deeploy/Targets/Generic/Templates/RQAddTemplate.py index 35593ad133..bf4e9d0a07 100644 --- a/Deeploy/Targets/Generic/Templates/RQAddTemplate.py +++ b/Deeploy/Targets/Generic/Templates/RQAddTemplate.py @@ -4,10 +4,11 @@ from typing import Dict, List, Tuple -from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation +from Deeploy.CommonExtensions.NodeTemplate import ElementwiseTemplate +from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation -class RQAddTemplate(NodeTemplate): +class RQAddTemplate(ElementwiseTemplate): def __init__(self, templateStr): super().__init__(templateStr) diff --git a/Deeploy/Targets/Generic/Templates/RequantShiftTemplate.py b/Deeploy/Targets/Generic/Templates/RequantShiftTemplate.py index 2fca2e0eb1..5518c6300a 100644 --- a/Deeploy/Targets/Generic/Templates/RequantShiftTemplate.py +++ b/Deeploy/Targets/Generic/Templates/RequantShiftTemplate.py @@ -4,10 +4,11 @@ from typing import Dict, List, Tuple -from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation +from Deeploy.CommonExtensions.NodeTemplate import RequantShiftTemplate +from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation -class _RequantShiftTemplate(NodeTemplate): +class _RequantShiftTemplate(RequantShiftTemplate): def __init__(self, templateStr): super().__init__(templateStr) diff --git a/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py b/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py index f544841acf..45f6a1e77a 100644 --- a/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py +++ b/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py @@ -2,19 +2,21 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Dict, List, Tuple +from typing import Dict, List, Sequence, Tuple + +import onnx_graphsurgeon as gs from Deeploy.DeeployTypes import ConstantBuffer, NetworkContext, NodeTemplate, OperatorRepresentation -class _RQGemmTemplate(NodeTemplate, OperatorRepresentation): +class _RQGemmTemplate(NodeTemplate): def __init__(self, templateStr): super().__init__(templateStr) - def alignToContext(self, ctxt: NetworkContext, - operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict]: - + def alignToContext( + self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, OperatorRepresentation, List[str]]: A = ctxt.lookup(operatorRepresentation['A']) B = ctxt.lookup(operatorRepresentation['B']) C = ctxt.lookup(operatorRepresentation['C']) @@ -79,6 +81,16 @@ def hoistTransientBuffers(self, ctxt: NetworkContext, return ctxt, operatorRepresentation, names + def alignShapes(self, node: gs.Node) -> Tuple[List[Sequence[int]], List[Sequence[int]]]: + inShapes, outShapes = [t.shape for t in node.inputs], [t.shape for t in node.outputs] + # rqs bias + inShapes[2] = outShapes[0][-2:] + # rqs add + inShapes[3] = (1,) + # rqs mul + inShapes[4] = (1,) + return inShapes, outShapes + MemPoolParallelTemplate = _RQGemmTemplate(""" <% diff --git a/Deeploy/Targets/MemPool/Templates/RQMatMulTemplate.py b/Deeploy/Targets/MemPool/Templates/RQMatMulTemplate.py index 76ad029fb4..b384089528 100644 --- a/Deeploy/Targets/MemPool/Templates/RQMatMulTemplate.py +++ b/Deeploy/Targets/MemPool/Templates/RQMatMulTemplate.py @@ -2,19 +2,21 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Dict, List, Tuple +from typing import Dict, List, Sequence, Tuple + +import onnx_graphsurgeon as gs from Deeploy.DeeployTypes import ConstantBuffer, NetworkContext, NodeTemplate, OperatorRepresentation -class _RQMatMulTemplate(NodeTemplate, OperatorRepresentation): +class _RQMatMulTemplate(NodeTemplate): def __init__(self, templateStr): super().__init__(templateStr) - def alignToContext(self, ctxt: NetworkContext, - operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict]: - + def alignToContext( + self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, OperatorRepresentation, List[str]]: A = ctxt.lookup(operatorRepresentation['A']) B = ctxt.lookup(operatorRepresentation['B']) data_out = ctxt.lookup(operatorRepresentation['data_out']) @@ -74,6 +76,14 @@ def hoistTransientBuffers(self, ctxt: NetworkContext, return ctxt, operatorRepresentation, names + def alignShapes(self, node: gs.Node) -> Tuple[List[Sequence[int]], List[Sequence[int]]]: + inShapes, outShapes = [t.shape for t in node.inputs], [t.shape for t in node.outputs] + # rqs mul + inShapes[2] = (1,) + # rqs add + inShapes[3] = (1,) + return inShapes, outShapes + MemPoolParallelTemplate = _RQMatMulTemplate(""" <% diff --git a/Deeploy/Targets/MemPool/Templates/RequantShiftTemplate.py b/Deeploy/Targets/MemPool/Templates/RequantShiftTemplate.py index 7898790af0..a43fe77551 100644 --- a/Deeploy/Targets/MemPool/Templates/RequantShiftTemplate.py +++ b/Deeploy/Targets/MemPool/Templates/RequantShiftTemplate.py @@ -4,10 +4,11 @@ from typing import Dict, Tuple -from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation +from Deeploy.CommonExtensions.NodeTemplate import RequantShiftTemplate +from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation -class _RequantShiftTemplate(NodeTemplate): +class _RequantShiftTemplate(RequantShiftTemplate): def __init__(self, templateStr): super().__init__(templateStr) diff --git a/Deeploy/Targets/Neureka/Templates/ConvTemplate.py b/Deeploy/Targets/Neureka/Templates/ConvTemplate.py index 97253d6e12..aebe884ca1 100644 --- a/Deeploy/Targets/Neureka/Templates/ConvTemplate.py +++ b/Deeploy/Targets/Neureka/Templates/ConvTemplate.py @@ -7,7 +7,8 @@ import numpy as np -from Deeploy.DeeployTypes import ConstantBuffer, NetworkContext, NodeTemplate, OperatorRepresentation +from Deeploy.CommonExtensions.NodeTemplate import RequantizedConvTemplate +from Deeploy.DeeployTypes import ConstantBuffer, NetworkContext, OperatorRepresentation def _getNumTiles(fullDim: int, tileDim: int) -> int: @@ -47,7 +48,7 @@ def getInputAddrOffset(width_in: int, width_in_stride: int, padding_top: int, pa return (padding_top * width_in + padding_left) * width_in_stride -class NeurekaConvTemplate(NodeTemplate): +class NeurekaConvTemplate(RequantizedConvTemplate): def __init__(self, templateStr: str): super().__init__(templateStr) diff --git a/Deeploy/Targets/PULPOpen/Templates/ConvTemplate.py b/Deeploy/Targets/PULPOpen/Templates/ConvTemplate.py index ebc614f479..85414c86e0 100644 --- a/Deeploy/Targets/PULPOpen/Templates/ConvTemplate.py +++ b/Deeploy/Targets/PULPOpen/Templates/ConvTemplate.py @@ -6,10 +6,11 @@ from ortools.constraint_solver.pywrapcp import IntVar -from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation +from Deeploy.CommonExtensions.NodeTemplate import RequantizedConvTemplate +from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation -class PULP2DConvTemplate(NodeTemplate): +class PULP2DConvTemplate(RequantizedConvTemplate): def __init__(self, templateStr): super().__init__(templateStr) @@ -63,7 +64,7 @@ def alignToContext(self, ctxt: NetworkContext, return ctxt, operatorRepresentation, [] -class PULP1DConvTemplate(NodeTemplate): +class PULP1DConvTemplate(RequantizedConvTemplate): def __init__(self, templateStr): super().__init__(templateStr) diff --git a/Deeploy/Targets/PULPOpen/Templates/FloatAddTemplate.py b/Deeploy/Targets/PULPOpen/Templates/FloatAddTemplate.py index 7f1c2e21c6..a6dd731dd9 100644 --- a/Deeploy/Targets/PULPOpen/Templates/FloatAddTemplate.py +++ b/Deeploy/Targets/PULPOpen/Templates/FloatAddTemplate.py @@ -2,9 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 -from Deeploy.DeeployTypes import NodeTemplate +from Deeploy.CommonExtensions.NodeTemplate import ElementwiseTemplate -referenceTemplate = NodeTemplate(""" +referenceTemplate = ElementwiseTemplate(""" // Add Parallel with 1x6 unrolling (Name: ${nodeName}, Op: ${nodeOp}) int8_t ${nodeName}_core_id = pi_core_id(); int8_t ${nodeName}_log2Core = log2(NUM_CORES); diff --git a/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py b/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py index 21044a5eca..17b8ec7366 100644 --- a/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py +++ b/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py @@ -2,9 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 -from Deeploy.DeeployTypes import NodeTemplate +from Deeploy.CommonExtensions.NodeTemplate import GemmTemplate -referenceTemplate = NodeTemplate(""" +referenceTemplate = GemmTemplate(""" // GEMM (Name: ${nodeName}, Op: ${nodeOp}) ${A_type.typeName} ref_${data_out}_${A} = ${A}; ${B_type.typeName} ref_${data_out}_${B} = ${B}; diff --git a/Deeploy/Targets/PULPOpen/Templates/FloatMulTemplate.py b/Deeploy/Targets/PULPOpen/Templates/FloatMulTemplate.py index 2f202b24d2..e9927981e2 100644 --- a/Deeploy/Targets/PULPOpen/Templates/FloatMulTemplate.py +++ b/Deeploy/Targets/PULPOpen/Templates/FloatMulTemplate.py @@ -2,9 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 -from Deeploy.DeeployTypes import NodeTemplate +from Deeploy.CommonExtensions.NodeTemplate import ElementwiseScalarTemplate -referenceTemplate = NodeTemplate(""" +referenceTemplate = ElementwiseScalarTemplate(""" // Float Mul with parallelism and 6x unrolling (Name: ${nodeName}, Op: ${nodeOp}) int8_t ${nodeName}_core_id = pi_core_id(); diff --git a/Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py b/Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py index c65404c096..26ea28ec6f 100644 --- a/Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py +++ b/Deeploy/Targets/PULPOpen/Templates/GEMMTemplate.py @@ -4,10 +4,11 @@ from typing import Dict, List, Tuple +from Deeploy.CommonExtensions.NodeTemplate import RequantizedGemmTemplate from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation -class PULPGEMMTemplate(NodeTemplate): +class PULPGEMMTemplate(RequantizedGemmTemplate): def __init__(self, templateStr): super().__init__(templateStr) diff --git a/Deeploy/Targets/PULPOpen/Templates/MatrixVectorTemplate.py b/Deeploy/Targets/PULPOpen/Templates/MatrixVectorTemplate.py index e4b8348614..d5f3164217 100644 --- a/Deeploy/Targets/PULPOpen/Templates/MatrixVectorTemplate.py +++ b/Deeploy/Targets/PULPOpen/Templates/MatrixVectorTemplate.py @@ -4,10 +4,11 @@ from typing import Dict, List, Tuple -from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation +from Deeploy.CommonExtensions.NodeTemplate import RequantizedGemmTemplate +from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation -class _PULPMatrixVectorTemplate(NodeTemplate): +class _PULPMatrixVectorTemplate(RequantizedGemmTemplate): def __init__(self, templateStr): super().__init__(templateStr) diff --git a/Deeploy/Targets/PULPOpen/Templates/MulTemplate.py b/Deeploy/Targets/PULPOpen/Templates/MulTemplate.py index 1dbefa3287..03a5f7219d 100644 --- a/Deeploy/Targets/PULPOpen/Templates/MulTemplate.py +++ b/Deeploy/Targets/PULPOpen/Templates/MulTemplate.py @@ -2,14 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 -from Deeploy.DeeployTypes import NodeTemplate, OperatorRepresentation +from Deeploy.CommonExtensions.NodeTemplate import ElementwiseTemplate - -class _MulTemplate(NodeTemplate, OperatorRepresentation): - pass - - -referenceTemplate = _MulTemplate(""" +referenceTemplate = ElementwiseTemplate(""" // Mul (Name: ${nodeName}, Op: ${nodeOp}) int8_t ${nodeName}_core_id = pi_core_id(); diff --git a/Deeploy/Targets/PULPOpen/Templates/TallGEMMTemplate.py b/Deeploy/Targets/PULPOpen/Templates/TallGEMMTemplate.py index 76fd47cfb6..2fd75cb165 100644 --- a/Deeploy/Targets/PULPOpen/Templates/TallGEMMTemplate.py +++ b/Deeploy/Targets/PULPOpen/Templates/TallGEMMTemplate.py @@ -4,10 +4,11 @@ from typing import Dict, List, Tuple -from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation +from Deeploy.CommonExtensions.NodeTemplate import RequantizedGemmTemplate +from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation -class _PULPTallGEMMTemplate(NodeTemplate): +class _PULPTallGEMMTemplate(RequantizedGemmTemplate): def __init__(self, templateStr): super().__init__(templateStr) From 444064d9ae04d3828aebbb913c027df744d22067 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Tue, 28 Oct 2025 17:48:10 +0100 Subject: [PATCH 75/80] SignPropDeployer changes --- .../NetworkDeployers/SignPropDeployer.py | 53 ++++++++++++++----- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py b/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py index e576ff865b..6f3498db6d 100644 --- a/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py +++ b/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py @@ -6,8 +6,10 @@ import onnx_graphsurgeon as gs -from Deeploy.AbstractDataTypes import Pointer -from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, OperatorDescriptor, TopologyOptimizer +from Deeploy.AbstractDataTypes import IntegerImmediate, Pointer +from Deeploy.CommonExtensions.TypeCheckers.SignPropTypeChecker import SignPropTypeChecker +from Deeploy.DeeployTypes import ConstantBuffer, DeploymentPlatform, NetworkDeployer, OperatorDescriptor, \ + TopologyOptimizer, VariableBuffer from Deeploy.Logging import DEFAULT_LOGGER as log @@ -33,17 +35,6 @@ def __init__(self, self.inputOffsets = inputOffsets - def _createIOBindings(self, ctxt, graph): - ctxt = super()._createIOBindings(ctxt, graph) - for node in graph.inputs: - data_name = node.name - nb = ctxt.lookup(data_name) - data_type = self.inputTypes[data_name] - nb._signed = (self.inputOffsets[data_name] == 0) - nb.nLevels = (2**data_type.referencedType.typeWidth) - - return ctxt - def _printInputOutputSummary(self): log.info('Input:') for buf in self.inputs(): @@ -56,3 +47,39 @@ def _printInputOutputSummary(self): log.info( f" - '{buf.name}': Type: {buf._type.referencedType.typeName}, nLevels: {buf.nLevels}, Signed: {buf._signed}" ) + + def parse(self, default_channels_first: bool = True) -> bool: + parsable = super().parse(default_channels_first) + if not parsable: + return False + + # Annotate global buffers + for obj in self.ctxt.globalObjects.values(): + assert isinstance(obj, VariableBuffer) + refTy = obj._type.referencedType + if isinstance(obj, ConstantBuffer): + assert refTy.checkPromotion(obj.values), f"Can't cast {obj} to {refTy}" + if issubclass(refTy, IntegerImmediate): + obj.nLevels = obj.values.max() - obj.values.min() + obj._signed = refTy.typeMin < 0 + elif obj.name in self.inputOffsets: + obj._signed = (self.inputOffsets[obj.name] == 0) + obj.nLevels = (2**refTy.typeWidth) + + # Annotate rest + for layer in self.layerBinding.values(): + node = layer.node + opRepr = layer.mapper.parser.operatorRepresentation + typeChecker = layer.mapper.binder.typeChecker + outTy = self.ctxt.lookup(node.outputs[0].name)._type.referencedType + if issubclass(outTy, IntegerImmediate) and isinstance(typeChecker, SignPropTypeChecker): + inputs = [self.ctxt.lookup(t.name) for t in node.inputs] + outputNLevels = typeChecker._inferNumLevels(inputs, opRepr) + outputSigned = typeChecker._inferSignedness(inputs, opRepr) + + outputs = [self.ctxt.lookup(t.name) for t in node.outputs] + for buffer, nLevels, signed in zip(outputs, outputNLevels, outputSigned): + buffer.nLevels = nLevels + buffer._signed = signed + + return True From 344dff9e7f17ccaa74f58d180ffde2470ae186e9 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Tue, 14 Oct 2025 11:52:05 +0200 Subject: [PATCH 76/80] Remove ioBinding and add parse to the methods that the NetworkDeployerWrapper should call from the _innerObject --- .../NetworkDeployers/NetworkDeployerWrapper.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Deeploy/CommonExtensions/NetworkDeployers/NetworkDeployerWrapper.py b/Deeploy/CommonExtensions/NetworkDeployers/NetworkDeployerWrapper.py index f07fe57c96..476128b3d3 100644 --- a/Deeploy/CommonExtensions/NetworkDeployers/NetworkDeployerWrapper.py +++ b/Deeploy/CommonExtensions/NetworkDeployers/NetworkDeployerWrapper.py @@ -6,7 +6,7 @@ import onnx_graphsurgeon as gs -from Deeploy.DeeployTypes import CodeGenVerbosity, NetworkContext, NetworkDeployer, ONNXLayer, _NoVerbosity +from Deeploy.DeeployTypes import CodeGenVerbosity, NetworkDeployer, ONNXLayer, _NoVerbosity class NetworkDeployerWrapper(NetworkDeployer): @@ -48,8 +48,8 @@ def prepared(self): """ # SignPropDeployer augment - def _createIOBindings(self, ctxt: NetworkContext, graph: gs.Graph): - return self._innerObject._createIOBindings(ctxt, graph) + def parse(self, default_channels_first: bool = True) -> bool: + return self._innerObject.parse(default_channels_first) # MemoryAwareDeployer, TilerAwareDeployer, and PULPDeployer augments def bind(self) -> bool: From 21b0b9ca300cdbf27f699d2dd09c7f9ce2958a96 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Thu, 16 Oct 2025 18:45:35 +0200 Subject: [PATCH 77/80] Engine deployer for some reason is not overriding the _mapNode function I'm suspecting some interaction between the new parse function and all the other deployers. --- Deeploy/DeeployTypes.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index 14d536cf77..576a29970a 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -2759,10 +2759,17 @@ def codeTransform(self, verbose: CodeGenVerbosity = _NoVerbosity): self.transformed = True def _mapNode(self, node: gs.Node) -> Union[ONNXLayer, Any]: - for engine in self.Platform.engines: - if node.op in engine.Mapping: - return engine.Mapping[node.op](node) - raise RuntimeError(f"No mapping found for node {node.name} with op type {node.op}") + engine = None + if "engine" in node.attrs: + engineName = node.attrs["engine"] + engine = [engine for engine in self.Platform.engines if engine.name == engineName][0] + else: + for candidateEngine in self.Platform.engines: + if node.op in candidateEngine.Mapping: + engine = candidateEngine + break + assert engine is not None, f"No mapping found for node {node.name} with op type {node.op}" + return engine.Mapping[node.op](node) def _bindLayers(self): # Create schedule, binding, then parse resulting program for correctness From 9b4cbdb99b437b3fb966835df007707e6da45dd1 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Tue, 28 Oct 2025 17:48:52 +0100 Subject: [PATCH 78/80] Neureka parser changes due to canonicalization --- Deeploy/Targets/Neureka/Parsers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Deeploy/Targets/Neureka/Parsers.py b/Deeploy/Targets/Neureka/Parsers.py index 1d3db0d882..d0d0d7e918 100644 --- a/Deeploy/Targets/Neureka/Parsers.py +++ b/Deeploy/Targets/Neureka/Parsers.py @@ -20,7 +20,7 @@ def parseNode(self, node: gs.Node) -> bool: # No dilation support self.operatorRepresentation['dilations'] == (1, 1), # Channels have to be last - 'channels_first' in self.operatorRepresentation and not self.operatorRepresentation['channels_first'], + 'channels_first' in node.attrs and not node.attrs['channels_first'], # Expect "weight_offset" attribute in the node "weight_offset" in node.attrs, ]): @@ -87,7 +87,7 @@ def parseNode(self, node: gs.Node) -> bool: ch_im_in = node.inputs[1].shape[1] if not all([ - self.operatorRepresentation['kernel_shape'] == [3, 3], + self.operatorRepresentation['kernel_shape'] == (3, 3), self.operatorRepresentation['group'] == ch_im_out, self.operatorRepresentation['group'] == ch_im_in, ]): @@ -169,7 +169,7 @@ def parseNode(self, node: gs.Node) -> bool: return False if not all([ - self.operatorRepresentation['kernel_shape'] == [3, 3], + self.operatorRepresentation['kernel_shape'] == (3, 3), self.operatorRepresentation['group'] == 1, ]): return False From 43bd7e59976b533eff177e737a6fa05a1f3cf9a1 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Fri, 17 Oct 2025 10:50:13 +0200 Subject: [PATCH 79/80] Reduce the L2 memory size so that the test still fails --- .github/workflows/ci-deeploy.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-deeploy.yml b/.github/workflows/ci-deeploy.yml index 429e9c2027..a04229afe3 100644 --- a/.github/workflows/ci-deeploy.yml +++ b/.github/workflows/ci-deeploy.yml @@ -61,9 +61,9 @@ jobs: run: | cd DeeployTest python testMVP.py -t Tests/CCT/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=75000 --memAllocStrategy=MiniMalloc - python testMVP.py -t Tests/CCT/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=60000 --memAllocStrategy=MiniMalloc --shouldFail + python testMVP.py -t Tests/CCT/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=50000 --memAllocStrategy=MiniMalloc --shouldFail python testMVP.py -t Tests/CCT/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=90000 --memAllocStrategy=TetrisRandom - python testMVP.py -t Tests/CCT/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=75000 --memAllocStrategy=TetrisRandom --shouldFail + python testMVP.py -t Tests/CCT/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=69000 --memAllocStrategy=TetrisRandom --shouldFail deeploy-state-serialization: needs: select-env From b832d696b112553cd8f92fc497e3c50279cce755 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 29 Oct 2025 13:45:57 +0100 Subject: [PATCH 80/80] Rename Mul to MulScalar to reflect better what the kernel implements --- Deeploy/Targets/CortexM/Platform.py | 15 ++-- Deeploy/Targets/Generic/Bindings.py | 8 +- Deeploy/Targets/Generic/Layers.py | 10 +-- Deeploy/Targets/Generic/Parsers.py | 33 +++----- Deeploy/Targets/Generic/Platform.py | 15 ++-- ...lTemplate.py => FloatMulScalarTemplate.py} | 0 .../TileConstraints/AddTileConstraint.py | 4 +- .../TileConstraints/BOPTileConstraint.py | 84 ++++++++++++++++++- .../TileConstraints/MulTileConstraint.py | 8 +- Deeploy/Targets/MemPool/Platform.py | 16 ++-- Deeploy/Targets/PULPOpen/Bindings.py | 4 +- Deeploy/Targets/PULPOpen/Platform.py | 24 +++--- Deeploy/Targets/PULPOpen/Tiler.py | 9 +- 13 files changed, 159 insertions(+), 71 deletions(-) rename Deeploy/Targets/Generic/Templates/{FloatMulTemplate.py => FloatMulScalarTemplate.py} (100%) diff --git a/Deeploy/Targets/CortexM/Platform.py b/Deeploy/Targets/CortexM/Platform.py index 25caeed60f..abcddee64e 100644 --- a/Deeploy/Targets/CortexM/Platform.py +++ b/Deeploy/Targets/CortexM/Platform.py @@ -14,17 +14,17 @@ LinearAttentionAlignmentPass, MatMulRequantMergePass, MHSAAlignmentPass from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicDebugPrintBindings, BasicDivBindings, \ BasicGatherBindings, BasicGELUBindings, BasicLayerNormBindings, BasicMatMulBindings, BasicMulBindings, \ - BasicPad1DBindings, BasicPad2DBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReshapeBindings, \ - BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, \ - BasicTransposeBindings, DummyBinding + BasicMulScalarBindings, BasicPad1DBindings, BasicPad2DBindings, BasicReduceMeanBindings, BasicReduceSumBindings, \ + BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, \ + BasicSoftmaxBindings, BasicTransposeBindings, DummyBinding from Deeploy.Targets.Generic.Layers import AddLayer, CLCALayer, DebugPrintLayer, DivLayer, GatherLayer, GELULayer, \ LayerNormLayer, LinearAttentionLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, ReduceMeanLayer, \ ReduceSumLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SliceLayer, SoftmaxLayer, \ TransposeLayer from Deeploy.Targets.Generic.Parsers import AddParser, DebugParser, DummyParser, FlattenParser, GatherParser, \ - GELUParser, IntegerDivParser, MatMulParser, MulParser, Pad1DParser, Pad2DParser, ReduceMeanParser, \ - ReduceSumParser, RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, \ - TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser + GELUParser, IntegerDivParser, MatMulParser, MulParser, MulScalarParser, Pad1DParser, Pad2DParser, \ + ReduceMeanParser, ReduceSumParser, RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, \ + SliceParser, TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate, FreeTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import IntegerDivRequantMergePass, \ MergeConstAddAndRequantPass, iGELURequantMergePass @@ -46,6 +46,7 @@ MatMulMapper = NodeMapper(MatMulParser(), BasicMatMulBindings) MaxPool2DMapper = NodeMapper(CMSISMaxPool2DParser(), [CMSISMaxPool2DBinding]) MulMapper = NodeMapper(MulParser(), BasicMulBindings) +MulScalarMapper = NodeMapper(MulScalarParser(), BasicMulScalarBindings) Pad1DMapper = NodeMapper(Pad1DParser(), BasicPad1DBindings) Pad2DMapper = NodeMapper(Pad2DParser(), BasicPad2DBindings) ReduceMeanMapper = NodeMapper(ReduceMeanParser(), BasicReduceMeanBindings) @@ -78,7 +79,7 @@ 'LinearAttention': LinearAttentionLayer([LinearAttention_int16_Mapper]), 'MatMul': MatMulLayer([MatMulMapper]), 'MaxPool': MaxPoolLayer([MaxPool2DMapper]), - 'Mul': MulLayer([MulMapper]), + 'Mul': MulLayer([MulMapper, MulScalarMapper]), 'Pad': PadLayer([Pad1DMapper, Pad2DMapper]), 'ReduceMean': ReduceMeanLayer([ReduceMeanMapper]), 'ReduceSum': ReduceSumLayer([ReduceSumMapper]), diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index b29e403d55..b6855f352e 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -14,7 +14,7 @@ from Deeploy.Targets.Generic.Templates import AddTemplate, BatchNormalizationTemplate, ConcatTemplate, ConvTemplate, \ ConvTransposeTemplate, DebugPrintTemplate, DequantTemplate, DummyTemplate, DWConvTemplate, FloatAddTemplate, \ FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatGELUTemplate, FloatGemmTemplate, \ - FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, \ + FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulScalarTemplate, FloatPadTemplate, \ FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, GatherTemplate, GemmTemplate, \ IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, \ PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, \ @@ -171,9 +171,11 @@ NodeBinding(MulChecker([PointerClass(typeA), PointerClass(typeB)], [PointerClass(int32_t)]), MulTemplate.referenceTemplate, BasicTransformer) for typeA, typeB in itertools.product(SignedIntegerDataTypes, SignedIntegerDataTypes) -] + [ +] + +BasicMulScalarBindings = [ NodeBinding(MulChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]), - FloatMulTemplate.referenceTemplate, BasicTransformer) + FloatMulScalarTemplate.referenceTemplate, BasicTransformer) ] BasicPad1DBindings = [ diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index 97e833f489..1beb876a88 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import copy +import math from typing import List, Tuple import numpy as np @@ -295,15 +296,12 @@ def __init__(self, maps: List[NodeMapper]): def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorRepresentation, channels_first) -> Tuple[Shape, Shape]: - if inputShapes[1] == () or inputShapes[1] == []: inputShapes[1] = (1,) - - if len(inputShapes[0]) > len(inputShapes[1]): - inputShapes[1] = inputShapes[0] + if math.prod(inputShapes[1]) == 1: + return inputShapes, outputShapes else: - inputShapes[0] = inputShapes[1] - return (inputShapes, outputShapes) + return [np.broadcast_shapes(*inputShapes)] * len(inputShapes), outputShapes def computeOps(self): return self.mapper.parser.operatorRepresentation['size'] diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index c1e28047c4..e0200b1336 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -1114,35 +1114,28 @@ def parseNode(self, node: gs.Node) -> (bool): class MulParser(NodeParser): - def __init__(self): - super().__init__() - - def parseNode(self, node: gs.Node) -> (bool): - - wellFormed = all([ - len(node.inputs) == 2, - len(node.outputs) == 1, - ]) - - return wellFormed + def parseNode(self, node: gs.Node) -> bool: + return len(node.inputs) == 2 and len(node.outputs) == 1 def parseNodeCtxt(self, ctxt: NetworkContext, node: gs.Node, channels_first: bool = True) -> Tuple[NetworkContext, bool]: + inBuffers = [ctxt.lookup(t.name) for t in node.inputs] + outBuffers = [ctxt.lookup(t.name) for t in node.outputs] - inputs = ['A', 'B'] - outputs = ['C'] + self.operatorRepresentation.update(dict(zip(['A', 'B'], [b.name for b in inBuffers]))) + self.operatorRepresentation.update(dict(zip(['C'], [b.name for b in outBuffers]))) - for idx, inputNode in enumerate(node.inputs): - self.operatorRepresentation[inputs[idx]] = ctxt.lookup(inputNode.name).name - for idx, outputNode in enumerate(node.outputs): - self.operatorRepresentation[outputs[idx]] = ctxt.lookup(outputNode.name).name + self.operatorRepresentation['size'] = math.prod(inBuffers[0].shape) + self.operatorRepresentation['sizeB'] = math.prod(inBuffers[1].shape) + return ctxt, True - self.operatorRepresentation['size'] = np.prod(ctxt.lookup(node.inputs[0].name).shape) - self.operatorRepresentation['sizeB'] = np.prod(ctxt.lookup(node.inputs[1].name).shape) - return ctxt, True +class MulScalarParser(MulParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and math.prod(node.inputs[1].shape) == 1 class ConvParser(NodeParser): diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index a15b3db2e6..69bf41a839 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -11,9 +11,9 @@ BasicDequantBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBindings, BasicGatherBindings, \ BasicGELUBindings, BasicGEMMBindings, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \ BasicLayerNormBindings, BasicMatMulBindings, BasicMaxPool1DBindings, BasicMaxPool2DBindings, BasicMulBindings, \ - BasicPad1DBindings, BasicPad2DBindings, BasicQuantBindings, BasicReduceMeanBindings, BasicReduceSumBindings, \ - BasicReluBinding, BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, \ - BasicSliceBindings, BasicSoftmaxBindings, BasicTransposeBindings, DummyBinding + BasicMulScalarBindings, BasicPad1DBindings, BasicPad2DBindings, BasicQuantBindings, BasicReduceMeanBindings, \ + BasicReduceSumBindings, BasicReluBinding, BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, \ + BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, BasicTransposeBindings, DummyBinding from Deeploy.Targets.Generic.Layers import AddLayer, BatchNormalizationLayer, ConcatLayer, ConvLayer, \ ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, GatherLayer, GELULayer, GEMMLayer, ITAMaxLayer, \ LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, QuantLayer, ReduceMeanLayer, ReduceSumLayer, \ @@ -23,9 +23,9 @@ DebugParser, DequantParser, DivParser, DummyParser, FlattenParser, GatherParser, GELUParser, GenericConv1DParser, \ GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, GenericGEMMParser, GenericMaxPool2DParser, \ IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, MatMulParser, MaxPool1DParser, MulParser, \ - Pad1DParser, Pad2DParser, QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, RequantShiftParser, \ - ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, SoftmaxParser, TransposeParser, UnsqueezeParser, \ - iLayerNormParser, iSoftmaxParser + MulScalarParser, Pad1DParser, Pad2DParser, QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, \ + RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, SoftmaxParser, \ + TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate, FreeTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, ExtractPaddingFromConvPass, \ ExtractPaddingFromPoolPass, MatMulAddMergePass, MergeConstAddAndRequantPass, QuantPatternPass, \ @@ -52,6 +52,7 @@ MaxPoolMapper = NodeMapper(GenericMaxPool2DParser(), BasicMaxPool2DBindings) MaxPool1DMapper = NodeMapper(MaxPool1DParser(), BasicMaxPool1DBindings) MulMapper = NodeMapper(MulParser(), BasicMulBindings) +MulScalarMapper = NodeMapper(MulScalarParser(), BasicMulScalarBindings) Pad1DMapper = NodeMapper(Pad1DParser(), BasicPad1DBindings) Pad2DMapper = NodeMapper(Pad2DParser(), BasicPad2DBindings) ReduceMeanMapper = NodeMapper(ReduceMeanParser(), BasicReduceMeanBindings) @@ -97,7 +98,7 @@ 'MatMul': GEMMLayer([MatMulMapper]), 'MatMulInteger': MatMulLayer([MatMulMapper]), 'MaxPool': MaxPoolLayer([MaxPool1DMapper, MaxPoolMapper]), - 'Mul': MulLayer([MulMapper]), + 'Mul': MulLayer([MulScalarMapper, MulMapper]), 'Pad': PadLayer([Pad1DMapper, Pad2DMapper]), 'ReduceMean': ReduceMeanLayer([ReduceMeanMapper]), 'ReduceSum': ReduceSumLayer([ReduceSumMapper]), diff --git a/Deeploy/Targets/Generic/Templates/FloatMulTemplate.py b/Deeploy/Targets/Generic/Templates/FloatMulScalarTemplate.py similarity index 100% rename from Deeploy/Targets/Generic/Templates/FloatMulTemplate.py rename to Deeploy/Targets/Generic/Templates/FloatMulScalarTemplate.py diff --git a/Deeploy/Targets/Generic/TileConstraints/AddTileConstraint.py b/Deeploy/Targets/Generic/TileConstraints/AddTileConstraint.py index e87f9abb62..487c18e11d 100644 --- a/Deeploy/Targets/Generic/TileConstraints/AddTileConstraint.py +++ b/Deeploy/Targets/Generic/TileConstraints/AddTileConstraint.py @@ -6,4 +6,6 @@ class AddTileConstraint(BOPTileConstraint): - pass + dataIn1Name = "data_in_1" + dataIn2Name = "data_in_2" + dataOutName = "data_out" diff --git a/Deeploy/Targets/Generic/TileConstraints/BOPTileConstraint.py b/Deeploy/Targets/Generic/TileConstraints/BOPTileConstraint.py index e1f6f0e71c..a2d7da1a92 100644 --- a/Deeploy/Targets/Generic/TileConstraints/BOPTileConstraint.py +++ b/Deeploy/Targets/Generic/TileConstraints/BOPTileConstraint.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 +import math from typing import Dict, List, Tuple import numpy as np @@ -12,16 +13,17 @@ from Deeploy.TilingExtension.MemoryConstraints import NodeMemoryConstraint from Deeploy.TilingExtension.TileConstraint import TileConstraint from Deeploy.TilingExtension.TilerModel import TilerModel -from Deeploy.TilingExtension.TilingCodegen import AbsoluteHyperRectangle, TilingSchedule, VariableReplacementScheme +from Deeploy.TilingExtension.TilingCodegen import AbsoluteHyperRectangle, HyperRectangle, TilingSchedule, \ + VariableReplacementScheme class BOPTileConstraint(TileConstraint): """Tile constraint class for binary operators, i.e. operators that use two input tensors of equal dimensions """ - dataIn1Name = 'data_in_1' #: str: Name of the first input tensor as defined by the operator's parser - dataIn2Name = 'data_in_2' #: str: Name of the second input tensor as defined by the operator's parser - dataOutName = 'data_out' #: str: Name of the output tensor as defined by the operator's parser + dataIn1Name: str # Name of the first input tensor as defined by the operator's descripter + dataIn2Name: str # Name of the second input tensor as defined by the operator's descripter + dataOutName: str # Name of the output tensor as defined by the operator's descripter @classmethod def addGeometricalConstraint(cls, tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel: @@ -34,6 +36,15 @@ def addGeometricalConstraint(cls, tilerModel: TilerModel, parseDict: Dict, ctxt: tilerModel.addTensorDimToModel(ctxt, bufferName) input1Shape = ctxt.lookup(inputBuffer1Name).shape + input2Shape = ctxt.lookup(inputBuffer2Name).shape + outputShape = ctxt.lookup(outputBufferName).shape + + assert len(input1Shape) == len( + input2Shape + ), f"[{cls.__name__}] Input shape ranks differ. Shape input1: {input1Shape} vs. input2: {input2Shape}" + assert len(input1Shape) == len( + outputShape + ), f"[{cls.__name__}] Input and output shape ranks differ. Shape input: {input1Shape} vs. output: {outputShape}" for dim in range(len(input1Shape)): inputDim1Var = tilerModel.getTensorDimVar(tensorName = inputBuffer1Name, dimIdx = dim) @@ -77,3 +88,68 @@ def serializeTilingSolution( variableReplacementSchedule = VariableReplacementScheme(replacements, replacementTypes) return variableReplacementSchedule, tilingSchedule + + +class BOPScalarTileConstraint(TileConstraint): + """Tile constraint class for binary operators, i.e. operators that use two input tensors of equal dimensions + """ + + dataIn1Name: str # Name of the first input tensor as defined by the operator's descripter + dataIn2Name: str # Name of the second input tensor as defined by the operator's descripter + dataOutName: str # Name of the output tensor as defined by the operator's descripter + + @classmethod + def addGeometricalConstraint(cls, tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel: + + inputBuffer1Name = parseDict[cls.dataIn1Name] + inputBuffer2Name = parseDict[cls.dataIn2Name] + outputBufferName = parseDict[cls.dataOutName] + + for bufferName in [inputBuffer1Name, inputBuffer2Name, outputBufferName]: + tilerModel.addTensorDimToModel(ctxt, bufferName) + + input1Shape = ctxt.lookup(inputBuffer1Name).shape + input2Shape = ctxt.lookup(inputBuffer2Name).shape + assert math.prod(input2Shape) == 1, f"Expecting the second operand to be a scalar" + + for dim in range(len(input1Shape)): + inputDim1Var = tilerModel.getTensorDimVar(tensorName = inputBuffer1Name, dimIdx = dim) + outputDimVar = tilerModel.getTensorDimVar(tensorName = outputBufferName, dimIdx = dim) + tilerModel.addConstraint(inputDim1Var == outputDimVar) + + return tilerModel + + @classmethod + def serializeTilingSolution( + cls, tilingSolution: NodeMemoryConstraint, absoluteOutputCubes: List[AbsoluteHyperRectangle], + targetMemLevel: str, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> Tuple[VariableReplacementScheme, TilingSchedule]: + outputCubes = [cube.rectangle for cube in absoluteOutputCubes] + + addrNames = [cls.dataIn1Name, cls.dataIn2Name, cls.dataOutName] + inputBaseOffsets, outputBaseOffsets = cls.extractBaseAddr(tilingSolution, targetMemLevel, + operatorRepresentation, addrNames) + + replacements = {"size": []} + + replacementTypes = {"size": PointerClass(uint16_t)} + + for cube in outputCubes: + newSize = np.prod(cube.dims) + replacements["size"].append(newSize) + + inputLoadSchedule = [] + outputLoadSchedule = [] + + # TODO: Optimize to not fetch dataIn2 + scalarCube = HyperRectangle((0,), (1,)) + for cube in outputCubes: + inputLoadSchedule.append({cls.dataIn1Name: cube, cls.dataIn2Name: scalarCube}) + + for out in outputCubes: + outputLoadSchedule.append({cls.dataOutName: out}) + + tilingSchedule = TilingSchedule(inputBaseOffsets, outputBaseOffsets, inputLoadSchedule, outputLoadSchedule) + variableReplacementSchedule = VariableReplacementScheme(replacements, replacementTypes) + + return variableReplacementSchedule, tilingSchedule diff --git a/Deeploy/Targets/Generic/TileConstraints/MulTileConstraint.py b/Deeploy/Targets/Generic/TileConstraints/MulTileConstraint.py index 9f71012ffe..1788909549 100644 --- a/Deeploy/Targets/Generic/TileConstraints/MulTileConstraint.py +++ b/Deeploy/Targets/Generic/TileConstraints/MulTileConstraint.py @@ -2,10 +2,16 @@ # # SPDX-License-Identifier: Apache-2.0 -from .BOPTileConstraint import BOPTileConstraint +from .BOPTileConstraint import BOPScalarTileConstraint, BOPTileConstraint class MulTileConstraint(BOPTileConstraint): dataIn1Name = "A" dataIn2Name = "B" dataOutName = "C" + + +class MulScalarTileConstraint(BOPScalarTileConstraint): + dataIn1Name = "A" + dataIn2Name = "B" + dataOutName = "C" diff --git a/Deeploy/Targets/MemPool/Platform.py b/Deeploy/Targets/MemPool/Platform.py index 48599736f4..478d51422e 100644 --- a/Deeploy/Targets/MemPool/Platform.py +++ b/Deeploy/Targets/MemPool/Platform.py @@ -10,18 +10,19 @@ StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicConv1DBindings, BasicConv2DBindings, \ BasicDebugPrintBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBindings, BasicGatherBindings, \ - BasicGELUBindings, BasicLayerNormBindings, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, \ - BasicReduceMeanBindings, BasicReduceSumBindings, BasicReshapeBindings, BasicRQIntegerDivBinding, \ - BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, BasicTransposeBindings, DummyBinding + BasicGELUBindings, BasicLayerNormBindings, BasicMulBindings, BasicMulScalarBindings, BasicPad1DBindings, \ + BasicPad2DBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReshapeBindings, \ + BasicRQIntegerDivBinding, BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, BasicTransposeBindings, \ + DummyBinding from Deeploy.Targets.Generic.Layers import AddLayer, ConvLayer, DebugPrintLayer, DivLayer, GatherLayer, GELULayer, \ GEMMLayer, ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MHSALayer, MulLayer, PadLayer, ReduceMeanLayer, \ ReduceSumLayer, RequantShiftLayer, ReshapeLayer, RQGEMMLayer, RQIntegerDivLayer, RQMatMulLayer, RQSiGELULayer, \ SliceLayer, SoftmaxLayer, TransposeLayer from Deeploy.Targets.Generic.Parsers import AddParser, DebugParser, DummyParser, FlattenParser, GatherParser, \ GELUParser, GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, \ - GenericGEMMParser, GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, MatMulParser, MulParser, Pad1DParser, \ - Pad2DParser, ReduceMeanParser, ReduceSumParser, RequantShiftParser, ReshapeParser, RQGEMMParser, \ - RQIntegerDivParser, RQMatMulParser, RQSiGELUParser, SliceParser, TransposeParser, UnsqueezeParser, \ + GenericGEMMParser, GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, MatMulParser, MulParser, \ + MulScalarParser, Pad1DParser, Pad2DParser, ReduceMeanParser, ReduceSumParser, RequantShiftParser, ReshapeParser, \ + RQGEMMParser, RQIntegerDivParser, RQMatMulParser, RQSiGELUParser, SliceParser, TransposeParser, UnsqueezeParser, \ iLayerNormParser, iSoftmaxParser from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import ExtractPaddingFromConvPass, \ ExtractPaddingFromPoolPass, MatMulAddMergePass, MergeConstAddAndRequantPass, SplitAddPass, iGELURequantMergePass @@ -54,6 +55,7 @@ IntegerDiv_Mapper = NodeMapper(IntegerDivParser(), BasicDivBindings) ITAMaxMapper = NodeMapper(ITAMaxParser(), [MemPoolITASoftmaxBinding_8_8]) Mul_Mapper = NodeMapper(MulParser(), BasicMulBindings) +MulScalar_Mapper = NodeMapper(MulScalarParser(), BasicMulScalarBindings) Pad1D_Mapper = NodeMapper(Pad1DParser(), BasicPad1DBindings) Pad2D_Mapper = NodeMapper(Pad2DParser(), BasicPad2DBindings) ReduceMean_Mapper = NodeMapper(ReduceMeanParser(), BasicReduceMeanBindings) @@ -108,7 +110,7 @@ 'MatMulInteger': MatMulLayer([MatMul_Mapper]), 'MaxPool': MaxPoolLayer([MaxPool_Mapper]), 'MHSA': MHSALayer(MHSA_Mappers), - 'Mul': MulLayer([Mul_Mapper]), + 'Mul': MulLayer([MulScalar_Mapper, Mul_Mapper]), 'Pad': PadLayer([Pad1D_Mapper, Pad2D_Mapper]), 'ReduceMean': ReduceMeanLayer([ReduceMean_Mapper]), 'ReduceSum': ReduceSumLayer([ReduceSum_Mapper]), diff --git a/Deeploy/Targets/PULPOpen/Bindings.py b/Deeploy/Targets/PULPOpen/Bindings.py index 57fdf90a57..c7d463a3f2 100644 --- a/Deeploy/Targets/PULPOpen/Bindings.py +++ b/Deeploy/Targets/PULPOpen/Bindings.py @@ -373,7 +373,9 @@ NodeBinding(MulChecker([PointerClass(typeA), PointerClass(typeB)], [PointerClass(int32_t)]), MulTemplate.referenceTemplate, ForkTransformer) for typeA, typeB in itertools.product(SignedIntegerDataTypes, SignedIntegerDataTypes) -] + [ +] + +PULPMulScalarBindings = [ NodeBinding(MulChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]), FloatMulTemplate.referenceTemplate, ForkTransformer) ] diff --git a/Deeploy/Targets/PULPOpen/Platform.py b/Deeploy/Targets/PULPOpen/Platform.py index 99c1c93351..293f40debe 100644 --- a/Deeploy/Targets/PULPOpen/Platform.py +++ b/Deeploy/Targets/PULPOpen/Platform.py @@ -17,9 +17,9 @@ SliceLayer, SoftmaxCrossEntropyLossGradLayer, SoftmaxCrossEntropyLossLayer, SoftmaxGradLayer, SoftmaxLayer, \ TransposeLayer, iHardswishLayer, iRMSNormLayer from Deeploy.Targets.Generic.Parsers import AddParser, ConcatParser, DequantParser, FlattenParser, GatherParser, \ - GELUParser, GEMMParser, LayerNormParser, MatMulParser, MaxPool2DParser, MulParser, Pad1DParser, Pad2DParser, \ - QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, RQAddParser, \ - RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, SGDParser, SliceParser, \ + GELUParser, GEMMParser, LayerNormParser, MatMulParser, MaxPool2DParser, MulParser, MulScalarParser, Pad1DParser, \ + Pad2DParser, QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, \ + RQAddParser, RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, SGDParser, SliceParser, \ SoftmaxCrossEntropyLossGradParser, SoftmaxCrossEntropyLossParser, SoftmaxGradParser, SoftmaxParser, \ TransposeParser, UniformRequantShiftParser, UnsqueezeParser, iHardswishParser, iRMSNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate as BasicAllocateTemplate @@ -36,13 +36,14 @@ PULPConv2DTilingReadyBindings, PULPFlattenTilingReadyBindings, PULPFPGELUTilingReadyBindings, \ PULPFPGEMMTilingReadyBindings, PULPGatherTilingReadyBindings, PULPiHardswishTilingReadyBindings, \ PULPiRMSNormTilingReadyBindings, PULPiRQSGELUTilingReadyBindings, PULPLayernormTilingReadyBindings, \ - PULPMatMulTilingReadyBindings, PULPMaxPool2DTilingReadyBindings, PULPMulTilingReadyBindings, \ - PULPReduceSumTilingReadyBindings, PULPReluTilingReadyBindings, PULPRQAddTilingReadyBindings, \ - PULPRQSConv2DTilingReadyBindings, PULPRQSDWConv2DTilingReadyBindings, PULPRQSGEMMTilingReadyBindings, \ - PULPRQSiHardswishTilingReadyBindings, PULPRQSMatrixVecTilingReadyBindings, PULPRQSTallGEMMTilingReadyBindings, \ - PULPRQSTilingReadyBindings, PULPSGDTilingReadyBindings, PULPSoftmaxCrossEntropyGradTilingReadyBindings, \ - PULPSoftmaxCrossEntropyTilingReadyBindings, PULPSoftmaxGradTilingReadyBindings, PULPSoftmaxTilingReadyBindings, \ - PULPTransposeTilingReadyBindings, PULPUniformRQSTilingReadyBindings + PULPMatMulTilingReadyBindings, PULPMaxPool2DTilingReadyBindings, PULPMulScalarTilingReadyBindings, \ + PULPMulTilingReadyBindings, PULPReduceSumTilingReadyBindings, PULPReluTilingReadyBindings, \ + PULPRQAddTilingReadyBindings, PULPRQSConv2DTilingReadyBindings, PULPRQSDWConv2DTilingReadyBindings, \ + PULPRQSGEMMTilingReadyBindings, PULPRQSiHardswishTilingReadyBindings, PULPRQSMatrixVecTilingReadyBindings, \ + PULPRQSTallGEMMTilingReadyBindings, PULPRQSTilingReadyBindings, PULPSGDTilingReadyBindings, \ + PULPSoftmaxCrossEntropyGradTilingReadyBindings, PULPSoftmaxCrossEntropyTilingReadyBindings, \ + PULPSoftmaxGradTilingReadyBindings, PULPSoftmaxTilingReadyBindings, PULPTransposeTilingReadyBindings, \ + PULPUniformRQSTilingReadyBindings from Deeploy.Targets.PULPOpen.TopologyOptimizationPasses.Passes import PULPAddRequantMergePass, \ PULPConvRequantMergePass, PULPGEMMRequantMergePass, PULPMatMulRequantMergePass @@ -52,6 +53,7 @@ GELUMapper = NodeMapper(GELUParser(), PULPFPGELUTilingReadyBindings) GatherMapper = NodeMapper(GatherParser(), PULPGatherTilingReadyBindings) MulMapper = NodeMapper(MulParser(), PULPMulTilingReadyBindings) +MulScalarMapper = NodeMapper(MulScalarParser(), PULPMulScalarTilingReadyBindings) Pad1DMapper = NodeMapper(Pad1DParser(), BasicPad1DBindings) Pad2DMapper = NodeMapper(Pad2DParser(), BasicPad2DBindings) ReshapeMapper = NodeMapper(ReshapeParser(), PULPFlattenTilingReadyBindings) @@ -118,7 +120,7 @@ 'Add': AddLayer([AddMapper]), 'Flatten': ReshapeLayer([FlattenMapper]), 'Gather': GatherLayer([GatherMapper]), - 'Mul': MulLayer([MulMapper]), + 'Mul': MulLayer([MulMapper, MulScalarMapper]), 'Pad': PadLayer([Pad1DMapper, Pad2DMapper]), 'Relu': ReluLayer([ReluMapper]), 'Reshape': ReshapeLayer([ReshapeMapper]), diff --git a/Deeploy/Targets/PULPOpen/Tiler.py b/Deeploy/Targets/PULPOpen/Tiler.py index a6dbaa4e87..1ae43099d0 100644 --- a/Deeploy/Targets/PULPOpen/Tiler.py +++ b/Deeploy/Targets/PULPOpen/Tiler.py @@ -8,7 +8,7 @@ from Deeploy.Targets.Generic.TileConstraints.ConcatTileConstraint import ConcatTileConstraint from Deeploy.Targets.Generic.TileConstraints.iHardswishTileConstraint import iHardswishTileConstraint from Deeploy.Targets.Generic.TileConstraints.iRMSNormTileConstraint import iRMSNormTileConstraint -from Deeploy.Targets.Generic.TileConstraints.MulTileConstraint import MulTileConstraint +from Deeploy.Targets.Generic.TileConstraints.MulTileConstraint import MulScalarTileConstraint, MulTileConstraint from Deeploy.Targets.Generic.TileConstraints.NOPTileConstraint import NOPTileConstraint from Deeploy.Targets.Generic.TileConstraints.RQSiGELUTileConstraint import RQSiGELUTileConstraint from Deeploy.Targets.Generic.TileConstraints.RQSiHardswishTileConstraint import RQSiHardswishTileConstraint @@ -18,8 +18,8 @@ from Deeploy.Targets.PULPOpen.Bindings import PULPAddBindings, PULPConcatBindings, PULPFloatConv2DBindings, \ PULPFloatGELUBinding, PULPFloatGEMMBindings, PULPGatherBindings, PULPiHardswishBindings, PULPiRMSNormBindings, \ PULPiRQSGELUBindings, PULPLayernormBinding, PULPMatMulBindings, PULPMaxPool2DBindings, PULPMulBindings, \ - PULPReduceSumBindings, PULPReluBinding, PULPReshapeBindings, PULPRQAddBindings, PULPRQSBindings, \ - PULPRQSConv2DBindings, PULPRQSDWConv2DBindings, PULPRQSGEMMBindings, PULPRQSiHardswishBindings, \ + PULPMulScalarBindings, PULPReduceSumBindings, PULPReluBinding, PULPReshapeBindings, PULPRQAddBindings, \ + PULPRQSBindings, PULPRQSConv2DBindings, PULPRQSDWConv2DBindings, PULPRQSGEMMBindings, PULPRQSiHardswishBindings, \ PULPRQSMatrixVecBindings, PULPRQSTallGEMMBindings, PULPSGDBindings, PULPSoftmaxBindings, \ PULPSoftmaxCrossEntropyLossBindings, PULPSoftmaxCrossEntropyLossGradBindings, PULPSoftmaxGradBindings, \ PULPTransposeBindings, PULPUniformRQSBindings @@ -105,6 +105,9 @@ PULPMulTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = PULPMulBindings, tileConstraint = MulTileConstraint()) +PULPMulScalarTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = PULPMulScalarBindings, + tileConstraint = MulScalarTileConstraint()) + PULPReluTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = [PULPReluBinding], tileConstraint = UnaryTileConstraint())