From 64a27470363ed398110dc688869953ec7ab20bd5 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Wed, 24 Sep 2025 10:06:45 +0200
Subject: [PATCH 01/54] Add OperatorDescriptor

---
 Deeploy/DeeployTypes.py | 155 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 155 insertions(+)

diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py
index 8c2f5d2485..b1e11679d0 100644
--- a/Deeploy/DeeployTypes.py
+++ b/Deeploy/DeeployTypes.py
@@ -1020,6 +1020,152 @@ def copy(self) -> NetworkContext:
         return copy.copy(self)
 
 
+class IoDesc:
+
+    def __init__(self, required: Union[str, List[str]], optional: Union[str, List[str]] = []) -> None:
+        if isinstance(required, str):
+            required = [required]
+        self.required = required
+        if isinstance(optional, str):
+            optional = [optional]
+        self.optional = optional
+
+    def symbolicName(self, idx: int) -> str:
+        return (self.required + self.optional)[idx]
+
+    def checkTensors(self, tensors: Sequence[gs.Tensor]) -> bool:
+        return len(tensors) >= len(self.required) and \
+               len(tensors) <= len(self.required) + len(self.optional)
+
+
+class VariadicIoDesc(IoDesc):
+
+    def __init__(self, baseName: str, minNumTensors: int = 0) -> None:
+        self.baseName = baseName
+        self.minNumTensors = minNumTensors
+
+    def symbolicName(self, idx: int) -> str:
+        return f"{self.baseName}_{idx}"
+
+    def checkTensors(self, tensors: Sequence[gs.Tensor]) -> bool:
+        return len(tensors) >= self.minNumTensors
+
+
+@dataclass
+class AttrDesc:
+    name: str
+    unpacker: Callable[[Any], Any]
+    default: Optional[Union[Any, Callable[[gs.Node], Any]]] = None
+
+    @staticmethod
+    def _constUnpack(value: Any) -> Any:
+        if isinstance(value, gs.Constant):
+            return value.values.tolist()
+        elif isinstance(value, np.ndarray):
+            return value.tolist()
+        else:
+            return value
+
+    def unpack(self, value: Any) -> Union[int, float, List[int], List[float]]:
+        return self.unpacker(self._constUnpack(value))
+
+    def getDefault(self, node: gs.Node) -> Any:
+        if callable(self.default):
+            return self.default(node)
+        else:
+            return self.default
+
+
+@dataclass
+class OperatorDescriptor:
+    inputDescriptor: IoDesc
+    outputDescriptor: IoDesc
+    attrDescriptors: List[AttrDesc]
+
+    def check(self, node: gs.Node) -> bool:
+        """This method checks whether the node is valid.
+
+        Parameters
+        ----------
+        node : gs.Node
+            Graphsurgeon node to be validated
+
+        Returns
+        -------
+        bool : node validity
+
+        """
+        valid = True
+
+        if not self.inputDescriptor.checkTensors(node.inputs):
+            # TODO: Change to logging
+            print(f"[ERROR OP {node.op}] Invalid input tensors: {node.inputs}")
+            valid = False
+
+        if not self.outputDescriptor.checkTensors(node.outputs):
+            # TODO: Change to logging
+            print(f"[ERROR OP {node.op}] Invalid output tensors: {node.outputs}")
+            valid = False
+
+        for attrDesc in self.attrDescriptors:
+            if attrDesc.default is None and not attrDesc.name in node.attrs:
+                # TODO: Change to logging
+                print(f"[ERROR OP {node.op}] Missing attribute {attrDesc.name}")
+                valid = False
+
+        return valid
+
+    def canonicalize(self, node: gs.Node, opset: int) -> bool:
+        _ = opset
+        for desc in self.attrDescriptors:
+            if desc.default is None:
+                value = node.attrs[desc.name]
+            else:
+                value = node.attrs.get(desc.name, desc.getDefault(node))
+            try:
+                node.attrs[desc.name] = desc.unpack(value)
+            except ValueError as e:
+                raise ValueError(f"[ERROR OP {node.op}] Error unpacking the attribute {desc.name}. {e}") from e
+        return True
+
+    def parseTensors(self, ctxt: NetworkContext, tensors: Sequence[gs.Tensor],
+                     ioDesc: IoDesc) -> OperatorRepresentation:
+        opRepr = {}
+        for i, tensor in enumerate(tensors):
+            symName = ioDesc.symbolicName(i)
+            buffer = ctxt.lookup(tensor.name)
+            assert isinstance(buffer, VariableBuffer)
+            opRepr[symName] = buffer.name
+            opRepr[f"{symName}_shape"] = buffer.shape
+            opRepr[f"{symName}_size"] = math.prod(buffer.shape)
+            opRepr[f"{symName}_type"] = buffer._type
+        return opRepr
+
+    def parseAttrs(self, node: gs.Node) -> OperatorRepresentation:
+        return node.attrs.copy()
+
+    def parse(self, ctxt: NetworkContext, node: gs.Node) -> OperatorRepresentation:
+        opReprs = {
+            "input tensors": self.parseTensors(ctxt, node.inputs, self.inputDescriptor),
+            "output tesnors": self.parseTensors(ctxt, node.outputs, self.outputDescriptor),
+            "attributes": self.parseAttrs(node),
+        }
+
+        for (firstName, firstOpRepr), (secondName, secondOpRepr) in itertools.combinations(opReprs.items(), 2):
+            firstKeySet = set(firstOpRepr.keys())
+            secondKeySet = set(secondOpRepr.keys())
+            assert firstKeySet.isdisjoint(secondKeySet), \
+                f"[PARSE ERROR] (Node: {node.name}, Op: {node.op}) " \
+                f"Keys from parsing {firstName} clash with the keys from parsing {secondName}. "\
+                f"Overlapping keys: {firstKeySet ^ secondKeySet}"
+
+        resultOpRepr = {}
+        for opRepr in opReprs.values():
+            resultOpRepr.update(opRepr)
+
+        return resultOpRepr
+
+
 class NodeParser():
     """Deeploy's core Parser class. Analyzes network nodes and evaluates whether they can be mapped by it.
 
@@ -2429,6 +2575,7 @@ def __init__(self,
                  graph: gs.Graph,
                  platform: DeploymentPlatform,
                  inputTypes: Dict[str, Type[Pointer]],
+                 operatorDescriptors: Dict[str, OperatorDescriptor],
                  scheduler: Callable[[gs.Graph], Schedule] = lambda graph: list(graph.nodes),
                  name: str = 'DeeployNetwork',
                  deeployStateDir: str = "DeeployState"):
@@ -2453,6 +2600,7 @@ def __init__(self,
 
         """
         self.graph = graph
+        self.operatorDescriptors = operatorDescriptors
         self.scheduler = scheduler
         self.layerBinding: 'OrderedDict[str, ONNXLayer]' = OrderedDict()
         self.parsed = False
@@ -2582,6 +2730,13 @@ def _bindLayers(self):
                 flatSchedule += subGraph
 
         for node in flatSchedule:
+            assert node.op in self.operatorDescriptors, \
+                f"[ERROR] Error parsing node {node.name}. There is no descriptor for operator {node.op}."
+            desc = self.operatorDescriptors[node.op]
+            desc.canonicalize(node, self.graph.opset)
+            assert desc.check(node), \
+                f"[ERROR] Node {node.name} is not a valid instance of {node.op} operator"
+
             layer = self._mapNode(node)
             if isinstance(layer, ONNXLayer):
                 log.debug(f"   {SUCCESS_MARK} Bind {node.name} to layer {layer.__class__.__name__}")

From c5a0c71e2bae852ede4966586143d0a6b2766ea1 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Wed, 24 Sep 2025 09:57:15 +0200
Subject: [PATCH 02/54] Add OperatorDescriptor.py

---
 Deeploy/DeeployTypes.py       |   9 +-
 Deeploy/OperatorDescriptor.py | 366 ++++++++++++++++++++++++++++++++++
 2 files changed, 372 insertions(+), 3 deletions(-)
 create mode 100644 Deeploy/OperatorDescriptor.py

diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py
index b1e11679d0..3282c56ec3 100644
--- a/Deeploy/DeeployTypes.py
+++ b/Deeploy/DeeployTypes.py
@@ -1063,6 +1063,9 @@ def _constUnpack(value: Any) -> Any:
             return value.values.tolist()
         elif isinstance(value, np.ndarray):
             return value.tolist()
+        # LMACAN: hacky way to detect a 0-dim numpy array
+        elif hasattr(value, "ndim") and value.ndim == 0 and hasattr(value, "item"):
+            return value.item()
         else:
             return value
 
@@ -1099,12 +1102,12 @@ def check(self, node: gs.Node) -> bool:
 
         if not self.inputDescriptor.checkTensors(node.inputs):
             # TODO: Change to logging
-            print(f"[ERROR OP {node.op}] Invalid input tensors: {node.inputs}")
+            print(f"[ERROR OP {node.op}] Invalid input tensors: {[t.name for t in node.inputs]}")
             valid = False
 
         if not self.outputDescriptor.checkTensors(node.outputs):
             # TODO: Change to logging
-            print(f"[ERROR OP {node.op}] Invalid output tensors: {node.outputs}")
+            print(f"[ERROR OP {node.op}] Invalid output tensors: {[t.name for t in node.outputs]}")
             valid = False
 
         for attrDesc in self.attrDescriptors:
@@ -1124,7 +1127,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
                 value = node.attrs.get(desc.name, desc.getDefault(node))
             try:
                 node.attrs[desc.name] = desc.unpack(value)
-            except ValueError as e:
+            except Exception as e:
                 raise ValueError(f"[ERROR OP {node.op}] Error unpacking the attribute {desc.name}. {e}") from e
         return True
 
diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
new file mode 100644
index 0000000000..f6bd478184
--- /dev/null
+++ b/Deeploy/OperatorDescriptor.py
@@ -0,0 +1,366 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from enum import Enum, IntEnum
+from typing import Any, Dict, Tuple
+
+import numpy as np
+import onnx_graphsurgeon as gs
+
+from Deeploy.DeeployTypes import AttrDesc, IoDesc, OperatorDescriptor, VariadicIoDesc
+
+
+def IntUnpack(value: Any) -> int:
+    if isinstance(value, (list, tuple)) and len(value) == 1:
+        value = value[0]
+
+    if isinstance(value, int):
+        return value
+    elif isinstance(value, float):
+        assert value.is_integer(), f"Received a non-integer value {value}"
+        return int(value)
+    raise ValueError(f"Unsupported value type {type(value)}")
+
+
+def BoolUnpack(value: Any) -> bool:
+    value = IntUnpack(value)
+    assert value in [0, 1], f"Casting to bool only supported from 0, 1. Received {value}"
+    return bool(value)
+
+
+def FloatUnpack(value: Any) -> float:
+    if isinstance(value, (list, tuple)) and len(value) == 1:
+        value = value[0]
+
+    assert isinstance(value, (int, float)), f"Unsupported value type {type(value)}"
+    return float(value)
+
+
+def IntTupleUnpack(value: Any) -> Tuple[int, ...]:
+    try:
+        return tuple(IntUnpack(item) for item in value)
+    except TypeError:
+        return (IntUnpack(value),)
+
+
+def FloatTupleUnpack(value: Any) -> Tuple[float, ...]:
+    try:
+        return tuple(FloatUnpack(item) for item in value)
+    except TypeError:
+        return (FloatUnpack(value),)
+
+
+def attrToTensor(node: gs.Node, attr: str) -> None:
+    values = node.attrs[attr]
+    if isinstance(values, (int, float)):
+        values = np.array([values])
+    elif isinstance(values, (list, tuple)):
+        values = np.array(values)
+    assert isinstance(values, np.ndarray), f"Unsupported values type {type(values)}"
+    tensor = gs.Constant(f"{node.name}_{attr}", values)
+    node.inputs.append(tensor)
+    node.attrs.pop(attr)
+
+
+concatDesc = OperatorDescriptor(
+    inputDescriptor = VariadicIoDesc("data_in", minNumTensors = 2),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [],
+)
+
+iRMSNormDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(["data_in", "weight"]),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("D", IntUnpack),
+        AttrDesc("n_levels", IntUnpack),
+    ],
+)
+
+
+class SliceDescriptor(OperatorDescriptor):
+
+    def canonicalize(self, node: gs.Node, opset: int) -> bool:
+        if opset < 10:
+            attrToTensor(node, "starts")
+            attrToTensor(node, "ends")
+            if "axes" in node.attrs:
+                attrToTensor(node, "axes")
+
+        return super().canonicalize(node, opset)
+
+
+# Opset: 13
+sliceDesc = SliceDescriptor(
+    inputDescriptor = IoDesc(["data_in", "starts", "ends"], ["axes", "steps"]),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [],
+)
+
+# Opset: 1
+sliceDescOld = OperatorDescriptor(
+    inputDescriptor = IoDesc("data_in"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("axes", IntTupleUnpack, lambda n: range(len(n.attrs["starts"]))),
+        AttrDesc("ends", IntTupleUnpack),
+        AttrDesc("starts", IntTupleUnpack),
+    ],
+)
+
+transposeDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc("data_in"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [AttrDesc("perm", IntTupleUnpack)],
+)
+
+
+class CeilMode(IntEnum):
+    floor = 0
+    ceil = 1
+
+
+maxPoolDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"),
+                                 outputDescriptor = IoDesc("data_out"),
+                                 attrDescriptors = [
+                                     AttrDesc("ceil_mode", unpacker = CeilMode, default = CeilMode.floor),
+                                     AttrDesc("kernel_shape", IntTupleUnpack),
+                                     AttrDesc("pads", IntTupleUnpack),
+                                     AttrDesc("strides", IntTupleUnpack),
+                                 ])
+
+
+class PadMode(str, Enum):
+    constant = "constant"
+    reflect = "reflect"
+    edge = "edge"
+    wrap = "wrap"
+
+
+# Opset 24
+padDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(["data_in", "pads"], ["constant_value", "axes"]),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc('mode', unpacker = PadMode, default = PadMode.constant),
+    ],
+)
+
+
+class PadModeOld(str, Enum):
+    constant = "constant"
+    reflect = "reflect"
+    edge = "edge"
+
+
+padDescOld = OperatorDescriptor(
+    inputDescriptor = IoDesc("data_in"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("mode", unpacker = PadModeOld, default = PadModeOld.constant),
+        AttrDesc("pads", IntTupleUnpack),
+        AttrDesc("value", FloatUnpack),
+    ],
+)
+
+addDesc = OperatorDescriptor(
+    inputDescriptor = VariadicIoDesc("data_in", minNumTensors = 2),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [],
+)
+
+
+class ReduceMeanDescriptor(OperatorDescriptor):
+
+    def canonicalize(self, node: gs.Node, opset: int) -> bool:
+        if opset < 18:
+            if "axes" in node.attrs:
+                attrToTensor(node, "axes")
+        return super().canonicalize(node, opset)
+
+
+# Opset 18
+reduceMeanDesc = ReduceMeanDescriptor(
+    inputDescriptor = IoDesc("data_in", optional = "axes"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("keepdims", unpacker = BoolUnpack, default = True),
+        AttrDesc("noop_with_empty_axes", unpacker = BoolUnpack, default = False),
+    ],
+)
+
+reduceSumDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc("data_in", optional = "axes"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("keepdims", unpacker = BoolUnpack, default = True),
+        AttrDesc("noop_with_empty_axes", unpacker = BoolUnpack, default = False),
+    ],
+)
+
+softmaxDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc("data_in"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [AttrDesc("axis", IntUnpack, default = -1)],
+)
+
+softmaxGradDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(["upstream_grad", "softmax_output"]),
+    outputDescriptor = IoDesc("softmax_grad"),
+    attrDescriptors = [AttrDesc("axis", IntUnpack, default = -1)],
+)
+
+iSoftmaxDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc("data_in"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("axis", IntUnpack, default = -1),
+        AttrDesc("coeffA", IntUnpack),
+        AttrDesc("coeffB", IntUnpack),
+        AttrDesc("coeffC", IntUnpack),
+        AttrDesc("log2", IntUnpack),
+        AttrDesc("n_levels", IntUnpack),
+    ],
+)
+
+itaMaxDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc("data_in"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("axis", IntUnpack, default = -1),
+        AttrDesc("n_levels", IntUnpack),
+    ],
+)
+
+itaPartialMaxDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc("data_in"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("axis", IntUnpack, default = -1),
+        AttrDesc("n_levels", IntUnpack),
+        AttrDesc("group_width", IntUnpack),
+    ],
+)
+
+
+class GeluApprox(str, Enum):
+    tanh = "tanh"
+    none = "none"
+
+
+geluDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"),
+                              outputDescriptor = IoDesc("data_out"),
+                              attrDescriptors = [
+                                  AttrDesc("approximate", GeluApprox, default = GeluApprox.none),
+                              ])
+
+rqsIGeluDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "mul", "add", "shift"]),
+                                  outputDescriptor = IoDesc("data_out"),
+                                  attrDescriptors = [
+                                      AttrDesc("b", IntUnpack),
+                                      AttrDesc("one", IntUnpack),
+                                  ])
+
+iHardswishDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"),
+                                    outputDescriptor = IoDesc("data_out"),
+                                    attrDescriptors = [
+                                        AttrDesc("one_over_six", IntUnpack),
+                                        AttrDesc("six", IntUnpack),
+                                        AttrDesc("three", IntUnpack),
+                                    ])
+
+iNoNormDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "weights", "bias"]),
+                                 outputDescriptor = IoDesc("data_out"),
+                                 attrDescriptors = [
+                                     AttrDesc("D", IntUnpack),
+                                     AttrDesc("mul", IntUnpack),
+                                     AttrDesc("n_levels", IntUnpack),
+                                 ])
+
+quantDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc("data_in"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("scale", FloatUnpack),
+        AttrDesc("zero_point", FloatUnpack),
+        AttrDesc("bit_width", IntUnpack),
+        AttrDesc("signed", BoolUnpack, default = True),
+        AttrDesc("min_val",
+                 IntUnpack,
+                 default = lambda node: -(2**(node.attrs["bit_width"] - 1)) if node.attrs["signed"] else 0),
+        AttrDesc("max_val",
+                 IntUnpack,
+                 default = lambda node: 2**(node.attrs["bit_width"] - 1) - 1
+                 if node.attrs["signed"] else 2**node.attrs["bit_width"] - 1),
+    ],
+)
+
+
+class AutoPad(str, Enum):
+    NOTSET = "NOTSET"
+    SAME_UPPER = "SAME_UPPER"
+    SAME_LOWER = "SAME_LOWER"
+    VALID = "VALID"
+
+
+def _dilationsDefault(node: gs.Node) -> Tuple[int, ...]:
+    # Remove 2 dims for input and output channels
+    nSpatialDims = len(node.inputs[1].shape) - 2
+    return tuple([1] * nSpatialDims)
+
+
+def _kernelShapeDefault(node: gs.Node) -> Tuple[int, ...]:
+    # Remove 2 dims for input and output channels
+    nSpatialDims = len(node.inputs[1].shape) - 2
+    return node.inputs[1].shape[-nSpatialDims:]
+
+
+def _stridesDefault(node: gs.Node) -> Tuple[int, ...]:
+    # Remove 2 dims for input and output channels
+    nSpatialDims = len(node.inputs[1].shape) - 2
+    return tuple([1] * nSpatialDims)
+
+
+def _padsDefault(node: gs.Node) -> Tuple[int, ...]:
+    # Remove 2 dims for input and output channels
+    nSpatialDims = len(node.inputs[1].shape) - 2
+    # Two 0's per dimension for begin and end
+    return tuple([0] * (2 * nSpatialDims))
+
+
+convDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(["data_in", "weight"], optional = "bias"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("auto_pad", AutoPad, default = AutoPad.NOTSET),
+        AttrDesc("dilations", IntTupleUnpack, default = _dilationsDefault),
+        AttrDesc("group", IntUnpack, default = 1),
+        AttrDesc("kernel_shape", IntTupleUnpack, default = _kernelShapeDefault),
+        AttrDesc("pads", IntTupleUnpack, default = _padsDefault),
+        AttrDesc("strides", IntTupleUnpack, default = _stridesDefault),
+    ],
+)
+
+defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
+    "Concat": concatDesc,
+    "Conv": convDesc,
+    "iRMSNorm": iRMSNormDesc,
+    "Slice": sliceDesc,
+    "Transpose": transposeDesc,
+    "MaxPool": maxPoolDesc,
+    "Pad": padDescOld,
+    "Add": addDesc,
+    "ReduceMean": reduceMeanDesc,
+    "ReduceSum": reduceSumDesc,
+    "Softmax": softmaxDesc,
+    "iSoftmax": iSoftmaxDesc,
+    "SoftmaxGrad": softmaxGradDesc,
+    "Gelu": geluDesc,
+    "RequantizediGELU": rqsIGeluDesc,
+    "iHardswish": iHardswishDesc,
+    "Quant": quantDesc,
+    "iNoNorm": iNoNormDesc,
+    "ITAMax": itaMaxDesc,
+    "ITAPartialMax": itaPartialMaxDesc,
+}

From e31ea13a65cb7748dabe4fdb0c1134d3ba2fb1aa Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Wed, 24 Sep 2025 15:37:26 +0200
Subject: [PATCH 03/54] Add operatorDescriptors to NetworkDeployers

---
 .../NetworkDeployers/SignPropDeployer.py          |  5 +++--
 Deeploy/DeeployTypes.py                           |  9 ++++++++-
 .../NetworkDeployers/EngineColoringDeployer.py    |  6 ++++--
 .../NetworkDeployers/MemoryLevelDeployer.py       | 10 ++++++----
 Deeploy/Targets/Chimera/Deployer.py               |  4 +++-
 Deeploy/Targets/CortexM/Deployer.py               |  4 +++-
 Deeploy/Targets/Generic/Deployer.py               |  4 +++-
 Deeploy/Targets/MemPool/Deployer.py               |  5 +++--
 Deeploy/Targets/Neureka/Deployer.py               |  5 +++--
 Deeploy/Targets/PULPOpen/Deployer.py              |  5 ++++-
 Deeploy/Targets/Snitch/Deployer.py                |  4 +++-
 Deeploy/Targets/SoftHier/Deployer.py              |  5 +++--
 DeeployTest/testMemoryLevelExtension.py           |  5 +++++
 DeeployTest/testUtils/dmaUtils.py                 |  3 +++
 DeeployTest/testUtils/platformMapping.py          | 15 ++++++++++++++-
 15 files changed, 68 insertions(+), 21 deletions(-)

diff --git a/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py b/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py
index 7a9fbea1ae..e576ff865b 100644
--- a/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py
+++ b/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py
@@ -7,7 +7,7 @@
 import onnx_graphsurgeon as gs
 
 from Deeploy.AbstractDataTypes import Pointer
-from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, TopologyOptimizer
+from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, OperatorDescriptor, TopologyOptimizer
 from Deeploy.Logging import DEFAULT_LOGGER as log
 
 
@@ -18,12 +18,13 @@ def __init__(self,
                  deploymentPlatform: DeploymentPlatform,
                  inputTypes: Dict[str, Type[Pointer]],
                  loweringOptimizer: TopologyOptimizer,
+                 operatorDescriptors: Dict[str, OperatorDescriptor],
                  scheduler: Callable = lambda x: x,
                  name: str = 'DeeployNetwork',
                  default_channels_first: bool = True,
                  deeployStateDir: str = "DeeployState",
                  inputOffsets: Dict[str, int] = {}):
-        super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name,
+        super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name,
                          default_channels_first, deeployStateDir)
 
         if inputOffsets == {}:
diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py
index 3282c56ec3..91d22d55ad 100644
--- a/Deeploy/DeeployTypes.py
+++ b/Deeploy/DeeployTypes.py
@@ -3339,6 +3339,7 @@ def __init__(self,
                  deploymentPlatform: DeploymentPlatform,
                  inputTypes: Dict[str, Type[Pointer]],
                  loweringOptimizer: TopologyOptimizer,
+                 operatorDescriptors: Dict[str, OperatorDescriptor],
                  scheduler: Callable[[gs.Graph], Schedule] = lambda graph: list(graph.nodes),
                  name: str = 'DeeployNetwork',
                  default_channels_first: bool = True,
@@ -3371,7 +3372,13 @@ def __init__(self,
 
 
         """
-        super().__init__(graph, deploymentPlatform, inputTypes, scheduler, name, deeployStateDir = deeployStateDir)
+        super().__init__(graph,
+                         deploymentPlatform,
+                         inputTypes,
+                         operatorDescriptors,
+                         scheduler,
+                         name,
+                         deeployStateDir = deeployStateDir)
 
         self.loweringOptimizer = loweringOptimizer
         self.default_channels_first = default_channels_first
diff --git a/Deeploy/EngineExtension/NetworkDeployers/EngineColoringDeployer.py b/Deeploy/EngineExtension/NetworkDeployers/EngineColoringDeployer.py
index 4b05ab5be4..eb7175f613 100644
--- a/Deeploy/EngineExtension/NetworkDeployers/EngineColoringDeployer.py
+++ b/Deeploy/EngineExtension/NetworkDeployers/EngineColoringDeployer.py
@@ -8,7 +8,8 @@
 
 from Deeploy.AbstractDataTypes import Pointer
 from Deeploy.CommonExtensions.NetworkDeployers.NetworkDeployerWrapper import NetworkDeployerWrapper
-from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, ONNXLayer, Schedule, TopologyOptimizer
+from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, ONNXLayer, OperatorDescriptor, Schedule, \
+    TopologyOptimizer
 from Deeploy.EngineExtension.OptimizationPasses.TopologyOptimizationPasses.EngineColoringPasses import \
     EngineColoringPass, EngineMapper
 
@@ -20,12 +21,13 @@ def __init__(self,
                  deploymentPlatform: DeploymentPlatform,
                  inputTypes: Dict[str, Type[Pointer]],
                  loweringOptimizer: TopologyOptimizer,
+                 operatorDescriptors: Dict[str, OperatorDescriptor],
                  scheduler: Callable[[gs.Graph], Schedule] = lambda graph: list(graph.nodes),
                  name: str = 'DeeployNetwork',
                  default_channels_first: bool = True,
                  deeployStateDir: str = "DeeployState",
                  engineMapperCls: Type[EngineMapper] = EngineMapper):
-        super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name,
+        super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name,
                          default_channels_first, deeployStateDir)
         self._initEngineColoringDeployer(engineMapperCls)
 
diff --git a/Deeploy/MemoryLevelExtension/NetworkDeployers/MemoryLevelDeployer.py b/Deeploy/MemoryLevelExtension/NetworkDeployers/MemoryLevelDeployer.py
index 2599f9e819..d75b28433e 100644
--- a/Deeploy/MemoryLevelExtension/NetworkDeployers/MemoryLevelDeployer.py
+++ b/Deeploy/MemoryLevelExtension/NetworkDeployers/MemoryLevelDeployer.py
@@ -11,8 +11,8 @@
 from Deeploy.CommonExtensions.NetworkDeployers.NetworkDeployerWrapper import NetworkDeployerWrapper
 from Deeploy.CommonExtensions.NetworkDeployers.SignPropDeployer import SignPropDeployer
 from Deeploy.DeeployTypes import CodeGenVerbosity, ConstantBuffer, DeploymentEngine, DeploymentPlatform, \
-    NetworkContext, NetworkDeployer, NetworkOptimizationPass, NetworkOptimizer, Schedule, StructBuffer, \
-    TopologyOptimizer, TransientBuffer, VariableBuffer, _NoVerbosity
+    NetworkContext, NetworkDeployer, NetworkOptimizationPass, NetworkOptimizer, OperatorDescriptor, Schedule, \
+    StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer, _NoVerbosity
 from Deeploy.Logging import DEFAULT_LOGGER as log
 from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel
 from Deeploy.MemoryLevelExtension.OptimizationPasses.MemoryLevelAnnotationPasses import AnnotateDefaultMemoryLevel
@@ -112,12 +112,13 @@ def __init__(self,
                  deploymentPlatform: Union[MemoryPlatform, MemoryPlatformWrapper],
                  inputTypes: Dict[str, Type[Pointer]],
                  loweringOptimizer: TopologyOptimizer,
+                 operatorDescriptors: Dict[str, OperatorDescriptor],
                  scheduler: Callable[[gs.Graph], Schedule] = lambda graph: list(graph.nodes),
                  name: str = 'DeeployNetwork',
                  default_channels_first: bool = True,
                  deeployStateDir: str = "DeeployState",
                  memoryLevelAnnotationPasses: List[NetworkOptimizationPass] = []):
-        super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name,
+        super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name,
                          default_channels_first, deeployStateDir)
         if len(memoryLevelAnnotationPasses) == 0:
             memoryLevelAnnotationPasses.append(AnnotateDefaultMemoryLevel(self.Platform.memoryHierarchy))
@@ -155,13 +156,14 @@ def __init__(self,
                  deploymentPlatform: Union[MemoryPlatform, MemoryPlatformWrapper],
                  inputTypes: Dict[str, Type[Pointer]],
                  loweringOptimizer: TopologyOptimizer,
+                 operatorDescriptors: Dict[str, OperatorDescriptor],
                  scheduler: Callable = lambda x: x,
                  name: str = 'DeeployNetwork',
                  default_channels_first: bool = True,
                  deeployStateDir: str = "DeeployState",
                  inputOffsets: Dict[str, int] = {},
                  memoryLevelAnnotationPasses: List[NetworkOptimizationPass] = []):
-        super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name,
+        super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name,
                          default_channels_first, deeployStateDir, inputOffsets)
         if len(memoryLevelAnnotationPasses) == 0:
             memoryLevelAnnotationPasses.append(AnnotateDefaultMemoryLevel(self.Platform.memoryHierarchy))
diff --git a/Deeploy/Targets/Chimera/Deployer.py b/Deeploy/Targets/Chimera/Deployer.py
index ba28279b66..85b0496e39 100644
--- a/Deeploy/Targets/Chimera/Deployer.py
+++ b/Deeploy/Targets/Chimera/Deployer.py
@@ -8,7 +8,7 @@
 
 from Deeploy.AbstractDataTypes import Pointer
 from Deeploy.CommonExtensions.NetworkDeployers.SignPropDeployer import SignPropDeployer
-from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer
+from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer
 
 
 class ChimeraDeployer(SignPropDeployer):
@@ -18,6 +18,7 @@ def __init__(self,
                  deploymentPlatform: DeploymentPlatform,
                  inputTypes: Dict[str, Type[Pointer]],
                  loweringOptimizer: TopologyOptimizer,
+                 operatorDescriptors: Dict[str, OperatorDescriptor],
                  scheduler: Callable = lambda x: x,
                  name: str = 'DeeployNetwork',
                  default_channels_first = False,
@@ -27,6 +28,7 @@ def __init__(self,
                          deploymentPlatform,
                          inputTypes,
                          loweringOptimizer,
+                         operatorDescriptors,
                          scheduler,
                          name,
                          default_channels_first = default_channels_first,
diff --git a/Deeploy/Targets/CortexM/Deployer.py b/Deeploy/Targets/CortexM/Deployer.py
index bef8fdcf36..9a4f27b061 100644
--- a/Deeploy/Targets/CortexM/Deployer.py
+++ b/Deeploy/Targets/CortexM/Deployer.py
@@ -11,7 +11,7 @@
 from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.DebugPasses import DebugPrintMergePass
 from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \
     NCHWtoNHWCPass, TransposeMatmulInputsPass
-from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer
+from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer
 from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import TransposeConstOptPass, TransposeMergePass
 
 
@@ -22,6 +22,7 @@ def __init__(self,
                  deploymentPlatform: DeploymentPlatform,
                  inputTypes: Dict[str, Type[Pointer]],
                  loweringOptimizer: TopologyOptimizer,
+                 operatorDescriptors: Dict[str, OperatorDescriptor],
                  scheduler: Callable = lambda x: x,
                  name: str = 'DeeployNetwork',
                  default_channels_first = False,
@@ -32,6 +33,7 @@ def __init__(self,
                          deploymentPlatform,
                          inputTypes,
                          loweringOptimizer,
+                         operatorDescriptors,
                          scheduler,
                          name,
                          default_channels_first = default_channels_first,
diff --git a/Deeploy/Targets/Generic/Deployer.py b/Deeploy/Targets/Generic/Deployer.py
index 3cef57a2ea..9bf89a8a0e 100644
--- a/Deeploy/Targets/Generic/Deployer.py
+++ b/Deeploy/Targets/Generic/Deployer.py
@@ -11,7 +11,7 @@
 from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.DebugPasses import DebugPrintMergePass
 from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \
     NCHWtoNHWCPass, TransposeMatmulInputsPass
-from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer
+from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer
 from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import TransposeConstOptPass, TransposeMergePass
 
 
@@ -22,6 +22,7 @@ def __init__(self,
                  deploymentPlatform: DeploymentPlatform,
                  inputTypes: Dict[str, Type[Pointer]],
                  loweringOptimizer: TopologyOptimizer,
+                 operatorDescriptors: Dict[str, OperatorDescriptor],
                  scheduler: Callable = lambda x: x,
                  name: str = 'DeeployNetwork',
                  default_channels_first = False,
@@ -32,6 +33,7 @@ def __init__(self,
                          deploymentPlatform,
                          inputTypes,
                          loweringOptimizer,
+                         operatorDescriptors,
                          scheduler,
                          name,
                          default_channels_first = default_channels_first,
diff --git a/Deeploy/Targets/MemPool/Deployer.py b/Deeploy/Targets/MemPool/Deployer.py
index 5431320978..968787972a 100644
--- a/Deeploy/Targets/MemPool/Deployer.py
+++ b/Deeploy/Targets/MemPool/Deployer.py
@@ -11,7 +11,7 @@
 from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.DebugPasses import DebugPrintMergePass
 from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \
     NCHWtoNHWCPass, TransposeMatmulInputsPass
-from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer
+from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer
 from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import TransposeConstOptPass, TransposeMergePass
 
 
@@ -22,12 +22,13 @@ def __init__(self,
                  deploymentPlatform: DeploymentPlatform,
                  inputTypes: Dict[str, Type[Pointer]],
                  loweringOptimizer: TopologyOptimizer,
+                 operatorDescriptors: Dict[str, OperatorDescriptor],
                  scheduler: Callable = lambda x: x,
                  name: str = 'DeeployNetwork',
                  default_channels_first: bool = True,
                  deeployStateDir: str = "DeeployState",
                  inputOffsets: Dict[str, int] = {}):
-        super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name,
+        super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name,
                          default_channels_first, deeployStateDir)
 
         self.inputOffsets = inputOffsets
diff --git a/Deeploy/Targets/Neureka/Deployer.py b/Deeploy/Targets/Neureka/Deployer.py
index be34e1f4d3..6d96f8d097 100644
--- a/Deeploy/Targets/Neureka/Deployer.py
+++ b/Deeploy/Targets/Neureka/Deployer.py
@@ -9,7 +9,7 @@
 from Deeploy.AbstractDataTypes import Pointer
 from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \
     NCHWtoNHWCPass, PULPNCHWtoNHWCPass
-from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer
+from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer
 from Deeploy.Targets.Neureka.TopologyOptimizationPasses.Passes import ConvEngineDiscolorationPass, \
     NeurekaOptimizationPass
 from Deeploy.Targets.PULPOpen.Deployer import PULPDeployer
@@ -22,12 +22,13 @@ def __init__(self,
                  deploymentPlatform: DeploymentPlatform,
                  inputTypes: Dict[str, Type[Pointer]],
                  loweringOptimizer: TopologyOptimizer,
+                 operatorDescriptors: Dict[str, OperatorDescriptor],
                  scheduler: Callable = lambda graph: list(graph.nodes),
                  name: str = 'DeeployNetwork',
                  default_channels_first = False,
                  deeployStateDir: str = "DeeployStateDir",
                  inputOffsets = {}):
-        super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name,
+        super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name,
                          default_channels_first, deeployStateDir, inputOffsets)
 
         if self.Platform.engines[0].enable3x3:
diff --git a/Deeploy/Targets/PULPOpen/Deployer.py b/Deeploy/Targets/PULPOpen/Deployer.py
index 86bf02e578..17412c8da4 100644
--- a/Deeploy/Targets/PULPOpen/Deployer.py
+++ b/Deeploy/Targets/PULPOpen/Deployer.py
@@ -12,7 +12,8 @@
 from Deeploy.CommonExtensions.OptimizationPasses.BindingsOptimizationPasses.AutoTranspose import AutoTransposeMergePass
 from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \
     PULPNCHWtoNHWCPass, RemoveGlobalOutputReshapePass, TransposeMatmulInputsPass
-from Deeploy.DeeployTypes import ConstantBuffer, DeploymentPlatform, NodeTemplate, TopologyOptimizer, VariableBuffer
+from Deeploy.DeeployTypes import ConstantBuffer, DeploymentPlatform, NodeTemplate, OperatorDescriptor, \
+    TopologyOptimizer, VariableBuffer
 from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import ReshapeConstOptPass, TransposeConstOptPass, \
     TransposeMergePass, TransposeNoPermOptPass, TransposeSplitPass
 from Deeploy.Targets.PULPOpen.TopologyOptimizationPasses.Passes import RQAddTransposeSquashPass
@@ -33,6 +34,7 @@ def __init__(self,
                  deploymentPlatform: DeploymentPlatform,
                  inputTypes: Dict[str, Type[Pointer]],
                  loweringOptimizer: TopologyOptimizer,
+                 operatorDescriptors: Dict[str, OperatorDescriptor],
                  scheduler: Callable = lambda x: x,
                  name: str = 'DeeployNetwork',
                  default_channels_first = False,
@@ -42,6 +44,7 @@ def __init__(self,
                          deploymentPlatform,
                          inputTypes,
                          loweringOptimizer,
+                         operatorDescriptors,
                          scheduler,
                          name,
                          default_channels_first = default_channels_first,
diff --git a/Deeploy/Targets/Snitch/Deployer.py b/Deeploy/Targets/Snitch/Deployer.py
index 7c3922a6bb..4daab3b9f5 100644
--- a/Deeploy/Targets/Snitch/Deployer.py
+++ b/Deeploy/Targets/Snitch/Deployer.py
@@ -10,7 +10,7 @@
 from Deeploy.CommonExtensions.NetworkDeployers.SignPropDeployer import SignPropDeployer
 from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \
     NCHWtoNHWCPass, RemoveGlobalOutputReshapePass, TransposeMatmulInputsPass
-from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer
+from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer
 from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import ReshapeConstOptPass, TransposeConstOptPass, \
     TransposeMergePass, TransposeSplitPass
 
@@ -22,6 +22,7 @@ def __init__(self,
                  deploymentPlatform: DeploymentPlatform,
                  inputTypes: Dict[str, Type[Pointer]],
                  loweringOptimizer: TopologyOptimizer,
+                 operatorDescriptors: Dict[str, OperatorDescriptor],
                  scheduler: Callable = lambda x: x,
                  name: str = 'DeeployNetwork',
                  default_channels_first = False,
@@ -31,6 +32,7 @@ def __init__(self,
                          deploymentPlatform,
                          inputTypes,
                          loweringOptimizer,
+                         operatorDescriptors,
                          scheduler,
                          name,
                          default_channels_first = default_channels_first,
diff --git a/Deeploy/Targets/SoftHier/Deployer.py b/Deeploy/Targets/SoftHier/Deployer.py
index e4ab37f299..4827ba83b9 100644
--- a/Deeploy/Targets/SoftHier/Deployer.py
+++ b/Deeploy/Targets/SoftHier/Deployer.py
@@ -8,7 +8,7 @@
 
 from Deeploy.AbstractDataTypes import Pointer
 from Deeploy.CommonExtensions.NetworkDeployers.SignPropDeployer import SignPropDeployer
-from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer
+from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer
 
 
 class SoftHierDeployer(SignPropDeployer):
@@ -18,12 +18,13 @@ def __init__(self,
                  deploymentPlatform: DeploymentPlatform,
                  inputTypes: Dict[str, Type[Pointer]],
                  loweringOptimizer: TopologyOptimizer,
+                 operatorDescriptors: Dict[str, OperatorDescriptor],
                  scheduler: Callable = lambda x: x,
                  name: str = 'DeeployNetwork',
                  default_channels_first: bool = True,
                  deeployStateDir: str = "DeeployState",
                  inputOffsets: Dict[str, int] = {}):
-        super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name,
+        super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name,
                          default_channels_first, deeployStateDir)
 
         self.inputOffsets = inputOffsets
diff --git a/DeeployTest/testMemoryLevelExtension.py b/DeeployTest/testMemoryLevelExtension.py
index 0e1ed6cc43..a6a1cf37d2 100644
--- a/DeeployTest/testMemoryLevelExtension.py
+++ b/DeeployTest/testMemoryLevelExtension.py
@@ -18,6 +18,7 @@
 from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel
 from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryDeployerWrapper, \
     MemoryLevelAwareSignPropDeployer
+from Deeploy.OperatorDescriptor import defaultOperatorDescriptors
 from Deeploy.Targets.CortexM.Platform import CMSISEngine, CMSISMapping, CMSISOptimizer, CMSISPlatform
 from Deeploy.Targets.Generic.Platform import GenericEngine, GenericMapping, GenericOptimizer, GenericPlatform
 from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import TransposeConstOptPass, TransposeMergePass
@@ -83,6 +84,7 @@
                                                         MockPlatform,
                                                         inputTypes,
                                                         CMSISOptimizer,
+                                                        defaultOperatorDescriptors,
                                                         defaultScheduler,
                                                         name = "DeeployNetwork",
                                                         deeployStateDir = _DEEPLOYSTATEDIR,
@@ -106,6 +108,7 @@
                                                         MockPlatform,
                                                         inputTypes,
                                                         MemPoolOptimizer,
+                                                        defaultOperatorDescriptors,
                                                         defaultScheduler,
                                                         name = "DeeployNetwork",
                                                         deeployStateDir = _DEEPLOYSTATEDIR,
@@ -121,6 +124,7 @@
                                                         MockPlatform,
                                                         inputTypes,
                                                         GenericOptimizer,
+                                                        defaultOperatorDescriptors,
                                                         defaultScheduler,
                                                         name = "DeeployNetworkMock",
                                                         deeployStateDir = _DEEPLOYSTATEDIRMOCK,
@@ -136,6 +140,7 @@
                                                         MockPlatform,
                                                         inputTypes,
                                                         PULPOptimizer,
+                                                        defaultOperatorDescriptors,
                                                         defaultScheduler,
                                                         name = "DeeployNetworkMock",
                                                         deeployStateDir = _DEEPLOYSTATEDIRMOCK,
diff --git a/DeeployTest/testUtils/dmaUtils.py b/DeeployTest/testUtils/dmaUtils.py
index 3266ce5129..3205275fda 100644
--- a/DeeployTest/testUtils/dmaUtils.py
+++ b/DeeployTest/testUtils/dmaUtils.py
@@ -17,6 +17,7 @@
     MemoryPlatformWrapper
 from Deeploy.MemoryLevelExtension.OptimizationPasses.MemoryLevelAnnotationPasses import AnnotateDefaultMemoryLevel, \
     AnnotateIOMemoryLevel
+from Deeploy.OperatorDescriptor import defaultOperatorDescriptors
 from Deeploy.Targets.PULPOpen.Deployer import PULPDeployer
 from Deeploy.Targets.PULPOpen.Platform import MemoryPULPPlatform, PULPOptimizer
 from Deeploy.Targets.Snitch.Deployer import SnitchDeployer
@@ -299,6 +300,7 @@ def setup_pulp_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph,
                             platform,
                             inputTypes,
                             PULPOptimizer,
+                            defaultOperatorDescriptors,
                             defaultScheduler,
                             default_channels_first = True,
                             deeployStateDir = deeployStateDir)
@@ -340,6 +342,7 @@ def setup_snitch_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph
                               platform,
                               inputTypes,
                               SnitchOptimizer,
+                              defaultOperatorDescriptors,
                               defaultScheduler,
                               deeployStateDir = deeployStateDir)
     memoryLevelAnnotationPasses = [AnnotateIOMemoryLevel(defaultMemory), AnnotateDefaultMemoryLevel(memoryHierarchy)]
diff --git a/DeeployTest/testUtils/platformMapping.py b/DeeployTest/testUtils/platformMapping.py
index 48c5777905..d02c3da64e 100644
--- a/DeeployTest/testUtils/platformMapping.py
+++ b/DeeployTest/testUtils/platformMapping.py
@@ -7,9 +7,10 @@
 import onnx_graphsurgeon as gs
 
 from Deeploy.AbstractDataTypes import Pointer
-from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, TopologyOptimizer
+from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, OperatorDescriptor, TopologyOptimizer
 from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel
 from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryPlatform, MemoryPlatformWrapper
+from Deeploy.OperatorDescriptor import defaultOperatorDescriptors
 from Deeploy.Targets.Chimera.Deployer import ChimeraDeployer
 from Deeploy.Targets.Chimera.Platform import ChimeraOptimizer, ChimeraPlatform
 from Deeploy.Targets.CortexM.Deployer import CMSISDeployer
@@ -93,6 +94,7 @@ def mapDeployer(platform: DeploymentPlatform,
                 graph: gs.Graph,
                 inputTypes: Dict[str, Type[Pointer]],
                 loweringOptimizer: Optional[TopologyOptimizer] = None,
+                operatorDescriptors: Optional[Dict[str, OperatorDescriptor]] = None,
                 scheduler: Optional[Callable] = None,
                 name: Optional[str] = None,
                 default_channels_first: Optional[bool] = None,
@@ -108,6 +110,9 @@ def mapDeployer(platform: DeploymentPlatform,
     if name is None:
         name = "DeeployNetwork"
 
+    if operatorDescriptors is None:
+        operatorDescriptors = defaultOperatorDescriptors
+
     if isinstance(platform, CMSISPlatform):
 
         if loweringOptimizer is None:
@@ -120,6 +125,7 @@ def mapDeployer(platform: DeploymentPlatform,
                                  platform,
                                  inputTypes,
                                  loweringOptimizer,
+                                 operatorDescriptors,
                                  scheduler,
                                  name = name,
                                  default_channels_first = default_channels_first,
@@ -138,6 +144,7 @@ def mapDeployer(platform: DeploymentPlatform,
                                    platform,
                                    inputTypes,
                                    loweringOptimizer,
+                                   operatorDescriptors,
                                    scheduler,
                                    name = name,
                                    default_channels_first = default_channels_first,
@@ -156,6 +163,7 @@ def mapDeployer(platform: DeploymentPlatform,
                                     platform,
                                     inputTypes,
                                     loweringOptimizer,
+                                    operatorDescriptors,
                                     scheduler,
                                     name = name,
                                     default_channels_first = default_channels_first,
@@ -177,6 +185,7 @@ def mapDeployer(platform: DeploymentPlatform,
                                    platform,
                                    inputTypes,
                                    loweringOptimizer,
+                                   operatorDescriptors,
                                    scheduler,
                                    name = name,
                                    default_channels_first = default_channels_first,
@@ -195,6 +204,7 @@ def mapDeployer(platform: DeploymentPlatform,
                                    platform,
                                    inputTypes,
                                    loweringOptimizer,
+                                   operatorDescriptors,
                                    scheduler,
                                    name = name,
                                    default_channels_first = default_channels_first,
@@ -212,6 +222,7 @@ def mapDeployer(platform: DeploymentPlatform,
                                 platform,
                                 inputTypes,
                                 loweringOptimizer,
+                                operatorDescriptors,
                                 scheduler,
                                 name = name,
                                 default_channels_first = default_channels_first,
@@ -228,6 +239,7 @@ def mapDeployer(platform: DeploymentPlatform,
                                   platform,
                                   inputTypes,
                                   loweringOptimizer,
+                                  operatorDescriptors,
                                   scheduler,
                                   name = name,
                                   default_channels_first = default_channels_first,
@@ -244,6 +256,7 @@ def mapDeployer(platform: DeploymentPlatform,
                                    platform,
                                    inputTypes,
                                    loweringOptimizer,
+                                   operatorDescriptors,
                                    scheduler,
                                    name = name,
                                    default_channels_first = default_channels_first,

From 550b559d8fcb083e01ee6b566bd190a5e96ceccb Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Wed, 24 Sep 2025 15:37:52 +0200
Subject: [PATCH 04/54] Fix extract padding pass

---
 .../TopologyOptimizationPasses/Passes.py      | 69 ++++++++++---------
 1 file changed, 37 insertions(+), 32 deletions(-)

diff --git a/Deeploy/Targets/Generic/TopologyOptimizationPasses/Passes.py b/Deeploy/Targets/Generic/TopologyOptimizationPasses/Passes.py
index b881529f7e..09ed0b6c7d 100644
--- a/Deeploy/Targets/Generic/TopologyOptimizationPasses/Passes.py
+++ b/Deeploy/Targets/Generic/TopologyOptimizationPasses/Passes.py
@@ -353,44 +353,49 @@ def __init__(self):
         super().__init__(graph, _split_add_fun, name)
 
 
-def _extract_padding_fun_conv(graph: gs.Graph, match: Match, name: str, value = 0):
+def _extract_padding_fun_conv(graph: gs.Graph, match: Match, name: str, value = 0) -> gs.Graph:
+    conv = list(match.nodes_map.values())[0]
 
-    matched_nodes = [m for k, m in match.nodes_map.items()]
-    conv = matched_nodes[0]
-    if 'pads' in conv.attrs and np.sum(conv.attrs['pads']) > 1:
-        pads = copy.deepcopy(conv.attrs['pads'])
-        shape = copy.deepcopy(conv.inputs[0].shape)
-        newPads = np.zeros(2 * len(shape))
-        assert len(shape) - 2 == len(pads) / 2, "Conv padding dims do not match!"
-        newShape = shape
+    if 'pads' not in conv.attrs:
+        return graph
 
-        beginPads = pads[0:len(pads) // 2]
-        endPads = pads[len(pads) // 2:]
-        for idx, i in enumerate(beginPads):
-            newShape[2 + idx] = newShape[2 + idx] + i
-            newPads[2 + idx] = i
+    convPads = conv.attrs['pads']
 
-        for idx, i in enumerate(endPads):
-            newShape[2 + idx] = newShape[2 + idx] + i
-            newPads[len(newPads) // 2 + 2 + idx] = i
+    if all(p == 0 for p in convPads):
+        return graph
 
-        newConvInput = gs.Variable(name + '_padded_input', dtype = np.float32, shape = newShape)
-        #valConst = gs.Constant('value', np.array(0))
-        conv.attrs['pads'] = [0 for pad in conv.attrs['pads']]
-        newPad = gs.Node(op = 'Pad',
-                         name = name + '_pad',
-                         attrs = {
-                             'pads': newPads,
-                             'mode': 'constant',
-                             'value': value
-                         },
-                         inputs = [conv.inputs[0]],
-                         outputs = [newConvInput])
+    inTensor = conv.inputs[0]
+    assert isinstance(inTensor, gs.Variable)
+    convShape = inTensor.shape
 
-        conv.inputs[0] = newConvInput
-        graph.nodes.append(newPad)
-        graph.cleanup().toposort()
+    beginConvPads = convPads[0:len(convPads) // 2]
+    endConvPads = convPads[len(convPads) // 2:]
+
+    nonSpatialDimCount = len(convShape) - (len(convPads) // 2)
+    pads = [0] * nonSpatialDimCount + beginConvPads + [0] * nonSpatialDimCount + endConvPads
 
+    shape = []
+    for dim, begin, end in zip(convShape, pads[:len(pads) // 2], pads[len(pads) // 2:]):
+        shape.append(begin + dim + end)
+
+    paddedInput = gs.Variable(f"{name}_{inTensor.name}", dtype = np.float32, shape = shape)
+
+    newPad = gs.Node(op = 'Pad',
+                     name = name + '_pad',
+                     attrs = {
+                         'pads': pads,
+                         'mode': 'constant',
+                         'value': value
+                     },
+                     inputs = [conv.inputs[0]],
+                     outputs = [paddedInput])
+
+    graph.nodes.append(newPad)
+
+    conv.attrs['pads'] = [0] * len(convPads)
+    conv.inputs[0] = paddedInput
+
+    graph.cleanup().toposort()
     return graph
 
 

From ab9fdfece7ccaa09085978de0d39c55d089f45bb Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Wed, 24 Sep 2025 21:52:36 +0200
Subject: [PATCH 05/54] Fix isoftmax parser

---
 Deeploy/Targets/Generic/Parsers.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py
index 7752834c50..8b1ddf8f73 100644
--- a/Deeploy/Targets/Generic/Parsers.py
+++ b/Deeploy/Targets/Generic/Parsers.py
@@ -669,11 +669,11 @@ def parseNode(self, node: gs.Node) -> bool:
             ])
 
         if wellFormed:
-            self.operatorRepresentation['coeffA'] = int(node.attrs['coeffA'].values)
-            self.operatorRepresentation['coeffB'] = int(node.attrs['coeffB'].values)
-            self.operatorRepresentation['coeffC'] = int(node.attrs['coeffC'].values)
-            self.operatorRepresentation['log2'] = int(node.attrs['log2'].values)
-            self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values)
+            self.operatorRepresentation['coeffA'] = node.attrs['coeffA']
+            self.operatorRepresentation['coeffB'] = node.attrs['coeffB']
+            self.operatorRepresentation['coeffC'] = node.attrs['coeffC']
+            self.operatorRepresentation['log2'] = node.attrs['log2']
+            self.operatorRepresentation['n_levels'] = node.attrs['n_levels']
 
         return wellFormed
 

From a410763f4745af05bea9491f2293c53c64f1faaf Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Wed, 24 Sep 2025 22:01:26 +0200
Subject: [PATCH 06/54] Fix iRMSNorm and iNoNorm parsers

---
 Deeploy/Targets/Generic/Parsers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py
index 8b1ddf8f73..ab12a09d3c 100644
--- a/Deeploy/Targets/Generic/Parsers.py
+++ b/Deeploy/Targets/Generic/Parsers.py
@@ -52,7 +52,7 @@ def parseNode(self, node: gs.Node) -> (bool):
 
         if ret:
 
-            self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'])
+            self.operatorRepresentation['n_levels'] = node.attrs['n_levels']
             self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['D']))
 
         return ret
@@ -848,8 +848,8 @@ def parseNode(self, node: gs.Node) -> bool:
 
         if ret:
             self.operatorRepresentation['D'] = node.attrs['D']
-            self.operatorRepresentation['log2D'] = int(np.log2(node.attrs['D'].values).tolist()[0])
-            self.operatorRepresentation['mul'] = int(node.attrs['mul'].values.tolist()[0])
+            self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['D']))
+            self.operatorRepresentation['mul'] = node.attrs['mul']
             self.operatorRepresentation['n_levels'] = node.attrs['n_levels']
 
         return ret

From f6027fb72ca6a199732f577fa1ed6db003946f08 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Wed, 24 Sep 2025 22:21:04 +0200
Subject: [PATCH 07/54] Fix ReduceMean type signature

---
 Deeploy/Targets/Generic/Bindings.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py
index 6bfe805b39..b29e403d55 100644
--- a/Deeploy/Targets/Generic/Bindings.py
+++ b/Deeploy/Targets/Generic/Bindings.py
@@ -8,7 +8,7 @@
 from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \
     MemoryManagementGeneration, MemoryPassthroughGeneration
 from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes, SignedIntegerDataTypes, float32_t, \
-    int8_t, int32_t, uint8_t
+    int8_t, int32_t, int64_t, uint8_t
 from Deeploy.DeeployTypes import CodeTransformation, NodeBinding
 from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration
 from Deeploy.Targets.Generic.Templates import AddTemplate, BatchNormalizationTemplate, ConcatTemplate, ConvTemplate, \
@@ -195,13 +195,11 @@
 ]
 
 BasicReduceMeanBindings = [
-    NodeBinding(ReduceMeanChecker([PointerClass(type)], [PointerClass(type)]), ReduceMeanTemplate.referenceTemplate,
-                BasicTransformer) for type in SignedIntegerDataTypes
+    NodeBinding(ReduceMeanChecker([PointerClass(ty), PointerClass(int64_t)], [PointerClass(ty)]),
+                ReduceMeanTemplate.referenceTemplate, BasicTransformer) for ty in SignedIntegerDataTypes
 ] + [
-    NodeBinding(ReduceMeanChecker([PointerClass(float_type), PointerClass(integer_type)], [PointerClass(float_type)]),
-                FloatReduceMeanTemplate.referenceTemplate, BasicTransformer)
-    for integer_type in SignedIntegerDataTypes
-    for float_type in FloatDataTypes
+    NodeBinding(ReduceMeanChecker([PointerClass(ty), PointerClass(int64_t)], [PointerClass(ty)]),
+                FloatReduceMeanTemplate.referenceTemplate, BasicTransformer) for ty in FloatDataTypes
 ]
 
 BasicReduceSumBindings = [

From 475b337cc99b4282529cce8a9d1e213858672687 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Wed, 24 Sep 2025 22:23:49 +0200
Subject: [PATCH 08/54] Fix itamax and itapartialmax parsers

---
 Deeploy/Targets/Generic/Parsers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py
index ab12a09d3c..c8ecf9e83e 100644
--- a/Deeploy/Targets/Generic/Parsers.py
+++ b/Deeploy/Targets/Generic/Parsers.py
@@ -698,7 +698,7 @@ def parseNode(self, node: gs.Node) -> bool:
         ret = all(['n_levels' in node.attrs])
 
         if ret and wellFormed:
-            self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values)
+            self.operatorRepresentation['n_levels'] = node.attrs['n_levels']
             return True
 
         return False
@@ -725,8 +725,8 @@ def parseNode(self, node: gs.Node) -> bool:
         ret = all(['group_width' in node.attrs, 'n_levels' in node.attrs])
 
         if ret and wellFormed:
-            self.operatorRepresentation['group_width'] = int(node.attrs['group_width'])
-            self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values)
+            self.operatorRepresentation['group_width'] = node.attrs['group_width']
+            self.operatorRepresentation['n_levels'] = node.attrs['n_levels']
             return True
 
         return False

From c6c310912315be33bb71805a07bcf4889c336a1f Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Wed, 24 Sep 2025 22:52:17 +0200
Subject: [PATCH 09/54] Fix attr comparison to compare with tuple in neureka

---
 Deeploy/Targets/Neureka/Parsers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Deeploy/Targets/Neureka/Parsers.py b/Deeploy/Targets/Neureka/Parsers.py
index 3c564c10b2..1d3db0d882 100644
--- a/Deeploy/Targets/Neureka/Parsers.py
+++ b/Deeploy/Targets/Neureka/Parsers.py
@@ -18,7 +18,7 @@ def parseNode(self, node: gs.Node) -> bool:
 
         if not all([
                 # No dilation support
-                self.operatorRepresentation['dilations'] == [1, 1],
+                self.operatorRepresentation['dilations'] == (1, 1),
                 # Channels have to be last
                 'channels_first' in self.operatorRepresentation and not self.operatorRepresentation['channels_first'],
                 # Expect "weight_offset" attribute in the node
@@ -129,7 +129,7 @@ def parseNode(self, node: gs.Node) -> bool:
             return False
 
         if not all([
-                self.operatorRepresentation['kernel_shape'] == [1, 1],
+                self.operatorRepresentation['kernel_shape'] == (1, 1),
                 self.operatorRepresentation['group'] == 1,
         ]):
             return False

From cd2270c540f51d92090f438af58f4eae9077c217 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Wed, 24 Sep 2025 23:03:24 +0200
Subject: [PATCH 10/54] Fix keepdims type in fuse mhsa pass

---
 Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py b/Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py
index 49f317caa4..46bad04cea 100644
--- a/Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py
+++ b/Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py
@@ -289,7 +289,7 @@ def get_constant_input_or_zeros(n: gs.Node, shape):
                     name = name + "_sum",
                     attrs = {
                         'axes': [1],
-                        "keepdims": "0"
+                        "keepdims": 0
                     })
 
         mhsa_out[0].shape = [_output.shape[0]] + [int(H)] + _output.shape[1:]

From 2e62e8451b4c05e2f580f999b3c2237e5922c9fc Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Thu, 25 Sep 2025 11:56:49 +0200
Subject: [PATCH 11/54] Fix old _unpack_const to pass Python literals

---
 Deeploy/DeeployTypes.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py
index 91d22d55ad..aecb112b57 100644
--- a/Deeploy/DeeployTypes.py
+++ b/Deeploy/DeeployTypes.py
@@ -1292,7 +1292,9 @@ def _unpack_const(attr) -> Union[int, float]:
         The attributes can either be a numpy scalar value or a Constant tensor.
         This expects the numpy value to be of size 1.
         """
-        if isinstance(attr, gs.Constant):
+        if isinstance(attr, (int, float, bool, str)):
+            return attr
+        elif isinstance(attr, gs.Constant):
             value = attr.values
         elif isinstance(attr, np.ndarray):
             value = attr

From 587d6deea3140243d4910a540bfc17a1577a4a17 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Thu, 25 Sep 2025 11:57:42 +0200
Subject: [PATCH 12/54] Add RequantizedConv desc

---
 Deeploy/OperatorDescriptor.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index f6bd478184..a3fe275366 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -342,9 +342,44 @@ def _padsDefault(node: gs.Node) -> Tuple[int, ...]:
     ],
 )
 
+
+class RequantizedConvDescriptor(OperatorDescriptor):
+
+    def canonicalize(self, node: gs.Node, opset: int) -> bool:
+        if "n_levels_out" in node.attrs and "n_levels" in node.attrs:
+            # TODO: Change to log
+            print("[WARNING] RequantizedConv cannot have n_levels_out and n_levels in it's attributes")
+            return False
+
+        if "n_levels_out" in node.attrs:
+            node.attrs["n_levels"] = node.attrs["n_levels_out"]
+            node.attrs.pop("n_levels_out")
+
+        return super().canonicalize(node, opset)
+
+
+requantizedConvDesc = RequantizedConvDescriptor(
+    inputDescriptor = IoDesc(["data_in", "weight", "mul", "add"], optional = ["shift"]),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        # Conv attrs
+        AttrDesc("auto_pad", AutoPad, default = AutoPad.NOTSET),
+        AttrDesc("dilations", IntTupleUnpack, default = _dilationsDefault),
+        AttrDesc("group", IntUnpack, default = 1),
+        AttrDesc("kernel_shape", IntTupleUnpack, default = _kernelShapeDefault),
+        AttrDesc("pads", IntTupleUnpack, default = _padsDefault),
+        AttrDesc("strides", IntTupleUnpack, default = _stridesDefault),
+        # RequantizedShift attrs
+        AttrDesc("n_levels", IntUnpack),
+        AttrDesc("signed", BoolUnpack),
+        AttrDesc("div", IntUnpack),
+    ],
+)
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Concat": concatDesc,
     "Conv": convDesc,
+    "RequantizedConv": requantizedConvDesc,
     "iRMSNorm": iRMSNormDesc,
     "Slice": sliceDesc,
     "Transpose": transposeDesc,

From 0ccd3b8330a3e96cb5d069afc1465131e80c2c31 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 13:50:17 +0200
Subject: [PATCH 13/54] Fix DW parser

---
 Deeploy/Targets/PULPOpen/Parsers.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/Deeploy/Targets/PULPOpen/Parsers.py b/Deeploy/Targets/PULPOpen/Parsers.py
index e94af6e420..eebe3ad406 100644
--- a/Deeploy/Targets/PULPOpen/Parsers.py
+++ b/Deeploy/Targets/PULPOpen/Parsers.py
@@ -206,12 +206,9 @@ def parseNode(self, node: gs.Node) -> (bool):
                 self.operatorRepresentation['stride_x'] = int(self.operatorRepresentation['strides'][0])
                 self.operatorRepresentation['stride_y'] = int(self.operatorRepresentation['strides'][1])
 
-                if 'n_levels' in node.attrs:
-                    self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values)
-                else:
-                    self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels_out'].values)
-                self.operatorRepresentation['signed'] = int(node.attrs['signed'].values)
-                self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['div'].values))
+                self.operatorRepresentation['n_levels'] = node.attrs['n_levels']
+                self.operatorRepresentation['signed'] = node.attrs['signed']
+                self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['div']))
 
             return ret
         return False

From c2f2bb2c0430f6e94cd416608e01e65dd6da8b3c Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 13:52:57 +0200
Subject: [PATCH 14/54] Fix pulp 1D conv

---
 Deeploy/Targets/PULPOpen/Parsers.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/Deeploy/Targets/PULPOpen/Parsers.py b/Deeploy/Targets/PULPOpen/Parsers.py
index eebe3ad406..51b26ae546 100644
--- a/Deeploy/Targets/PULPOpen/Parsers.py
+++ b/Deeploy/Targets/PULPOpen/Parsers.py
@@ -133,13 +133,9 @@ def parseNode(self, node: gs.Node) -> (bool):
                 self.operatorRepresentation['padding_y_bottom'] = int(self.operatorRepresentation['pads'][1])
                 self.operatorRepresentation['stride_y'] = int(self.operatorRepresentation['strides'][0])
 
-                if 'n_levels' in node.attrs:
-                    self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values)
-                else:
-                    self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels_out'].values)
-
-                self.operatorRepresentation['signed'] = int(node.attrs['signed'].values)
-                self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['div'].values))
+                self.operatorRepresentation['n_levels'] = node.attrs['n_levels']
+                self.operatorRepresentation['signed'] = node.attrs['signed']
+                self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['div']))
             return ret
 
     def parseNodeCtxt(self,

From 0b6032972a4a1e971550188bd6b08b4e32ad0651 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 14:08:36 +0200
Subject: [PATCH 15/54] Sort operator descriptors alphabetically

---
 Deeploy/OperatorDescriptor.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index a3fe275366..3dc0f5fd7f 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -377,25 +377,25 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
 )
 
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
+    "Add": addDesc,
     "Concat": concatDesc,
     "Conv": convDesc,
-    "RequantizedConv": requantizedConvDesc,
-    "iRMSNorm": iRMSNormDesc,
-    "Slice": sliceDesc,
-    "Transpose": transposeDesc,
+    "Gelu": geluDesc,
+    "ITAMax": itaMaxDesc,
+    "ITAPartialMax": itaPartialMaxDesc,
     "MaxPool": maxPoolDesc,
     "Pad": padDescOld,
-    "Add": addDesc,
+    "Quant": quantDesc,
     "ReduceMean": reduceMeanDesc,
     "ReduceSum": reduceSumDesc,
+    "RequantizedConv": requantizedConvDesc,
+    "RequantizediGELU": rqsIGeluDesc,
+    "Slice": sliceDesc,
     "Softmax": softmaxDesc,
-    "iSoftmax": iSoftmaxDesc,
     "SoftmaxGrad": softmaxGradDesc,
-    "Gelu": geluDesc,
-    "RequantizediGELU": rqsIGeluDesc,
+    "Transpose": transposeDesc,
     "iHardswish": iHardswishDesc,
-    "Quant": quantDesc,
     "iNoNorm": iNoNormDesc,
-    "ITAMax": itaMaxDesc,
-    "ITAPartialMax": itaPartialMaxDesc,
+    "iRMSNorm": iRMSNormDesc,
+    "iSoftmax": iSoftmaxDesc,
 }

From a19f98a080dd8a3d0daf56fe5e32a0304c038630 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 14:16:45 +0200
Subject: [PATCH 16/54] Add DequantDescriptor

---
 Deeploy/OperatorDescriptor.py      | 12 ++++++++++++
 Deeploy/Targets/Generic/Parsers.py | 10 ++++------
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index 3dc0f5fd7f..cbb5d723ba 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -376,10 +376,22 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     ],
 )
 
+dequantDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc("data_in"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("scale", FloatUnpack),
+        AttrDesc("zero_point", FloatUnpack),
+        AttrDesc("bit_width", IntUnpack),
+        AttrDesc("signed", BoolUnpack),
+    ],
+)
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Add": addDesc,
     "Concat": concatDesc,
     "Conv": convDesc,
+    "Dequant": dequantDesc,
     "Gelu": geluDesc,
     "ITAMax": itaMaxDesc,
     "ITAPartialMax": itaPartialMaxDesc,
diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py
index c8ecf9e83e..b43672d9c9 100644
--- a/Deeploy/Targets/Generic/Parsers.py
+++ b/Deeploy/Targets/Generic/Parsers.py
@@ -2488,12 +2488,10 @@ def parseNode(self, node: gs.Node) -> bool:
         ])
 
         if ret:
-            self.operatorRepresentation['scale'] = float(node.attrs['scale'])
-            self.operatorRepresentation['zero_point'] = float(node.attrs['zero_point'])
-            self.operatorRepresentation['bit_width'] = int(node.attrs['bit_width'])
-
-            self.operatorRepresentation['signed'] = bool(node.attrs['signed'])
-
+            self.operatorRepresentation['scale'] = node.attrs['scale']
+            self.operatorRepresentation['zero_point'] = node.attrs['zero_point']
+            self.operatorRepresentation['bit_width'] = node.attrs['bit_width']
+            self.operatorRepresentation['signed'] = node.attrs['signed']
         return ret
 
     def parseNodeCtxt(self,

From 4af65525156222cef52333bdfabcabe87c4afe69 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 14:26:47 +0200
Subject: [PATCH 17/54] Add Div, IntegerDiv, RQIntegerDiv

---
 Deeploy/OperatorDescriptor.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index cbb5d723ba..ac486f4af9 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -387,12 +387,44 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     ],
 )
 
+divDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(["input1", "input2"]),
+    outputDescriptor = IoDesc("output"),
+    attrDescriptors = [],
+)
+
+integerDivDescriptor = OperatorDescriptor(
+    inputDescriptor = IoDesc(["A", "B"]),
+    outputDescriptor = IoDesc("C"),
+    attrDescriptors = [
+        AttrDesc("Delta", IntUnpack),
+        AttrDesc("eps", IntUnpack),
+        AttrDesc("eta", IntUnpack),
+    ],
+)
+
+requantizedIntegerDivDescriptor = OperatorDescriptor(
+    inputDescriptor = IoDesc(["A", "B", "requant_mul", "requant_add", "requant_div"]),
+    outputDescriptor = IoDesc("C"),
+    attrDescriptors = [
+        # IntegerDiv attrs
+        AttrDesc("Delta", IntUnpack),
+        AttrDesc("eps", IntUnpack),
+        AttrDesc("eta", IntUnpack),
+        # RequantizedShift attrs
+        AttrDesc("n_levels", IntUnpack),
+        AttrDesc("signed", BoolUnpack),
+        AttrDesc("div", IntUnpack),
+    ])
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Add": addDesc,
     "Concat": concatDesc,
     "Conv": convDesc,
     "Dequant": dequantDesc,
+    "Div": divDesc,
     "Gelu": geluDesc,
+    "IntegerDiv": integerDivDescriptor,
     "ITAMax": itaMaxDesc,
     "ITAPartialMax": itaPartialMaxDesc,
     "MaxPool": maxPoolDesc,
@@ -402,6 +434,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "ReduceSum": reduceSumDesc,
     "RequantizedConv": requantizedConvDesc,
     "RequantizediGELU": rqsIGeluDesc,
+    "RQIntegerDiv": requantizedIntegerDivDescriptor,
     "Slice": sliceDesc,
     "Softmax": softmaxDesc,
     "SoftmaxGrad": softmaxGradDesc,

From 2e2e3dfbb74c45d7e9cb63624e63ffc48d31ccbb Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 14:37:24 +0200
Subject: [PATCH 18/54] Add DebugPrint, LayerNormalization, iLayerNorm

---
 Deeploy/OperatorDescriptor.py | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index ac486f4af9..c6f9aa49cd 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -417,29 +417,50 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
         AttrDesc("div", IntUnpack),
     ])
 
+debugPrintDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc("data_in"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [],
+)
+
+layerNormalizationDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(["data_in", "weight", "bias"]),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [AttrDesc("epsilon", FloatUnpack)],
+)
+
+iLayerNormDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(["data_in", "weight", "bias"]),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [AttrDesc("D", IntUnpack), AttrDesc("n_levels", IntUnpack)],
+)
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Add": addDesc,
     "Concat": concatDesc,
     "Conv": convDesc,
+    "DebugPrint": debugPrintDesc,
     "Dequant": dequantDesc,
     "Div": divDesc,
     "Gelu": geluDesc,
-    "IntegerDiv": integerDivDescriptor,
     "ITAMax": itaMaxDesc,
     "ITAPartialMax": itaPartialMaxDesc,
+    "IntegerDiv": integerDivDescriptor,
+    "LayerNormalization": layerNormalizationDesc,
     "MaxPool": maxPoolDesc,
     "Pad": padDescOld,
     "Quant": quantDesc,
+    "RQIntegerDiv": requantizedIntegerDivDescriptor,
     "ReduceMean": reduceMeanDesc,
     "ReduceSum": reduceSumDesc,
     "RequantizedConv": requantizedConvDesc,
     "RequantizediGELU": rqsIGeluDesc,
-    "RQIntegerDiv": requantizedIntegerDivDescriptor,
     "Slice": sliceDesc,
     "Softmax": softmaxDesc,
     "SoftmaxGrad": softmaxGradDesc,
     "Transpose": transposeDesc,
     "iHardswish": iHardswishDesc,
+    "iLayerNorm": iLayerNormDesc,
     "iNoNorm": iNoNormDesc,
     "iRMSNorm": iRMSNormDesc,
     "iSoftmax": iSoftmaxDesc,

From 9ac9a62ae0713220b4f4693f8565c78781ae1539 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 14:44:13 +0200
Subject: [PATCH 19/54] Add RequantizedOperatorDescriptor

---
 Deeploy/OperatorDescriptor.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index c6f9aa49cd..bd57ae822c 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -255,12 +255,12 @@ class GeluApprox(str, Enum):
                                   AttrDesc("approximate", GeluApprox, default = GeluApprox.none),
                               ])
 
-rqsIGeluDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "mul", "add", "shift"]),
-                                  outputDescriptor = IoDesc("data_out"),
-                                  attrDescriptors = [
-                                      AttrDesc("b", IntUnpack),
-                                      AttrDesc("one", IntUnpack),
-                                  ])
+requantizedIGeluDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "mul", "add", "shift"]),
+                                          outputDescriptor = IoDesc("data_out"),
+                                          attrDescriptors = [
+                                              AttrDesc("b", IntUnpack),
+                                              AttrDesc("one", IntUnpack),
+                                          ])
 
 iHardswishDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"),
                                     outputDescriptor = IoDesc("data_out"),
@@ -343,7 +343,7 @@ def _padsDefault(node: gs.Node) -> Tuple[int, ...]:
 )
 
 
-class RequantizedConvDescriptor(OperatorDescriptor):
+class RequantizedOperatorDescriptor(OperatorDescriptor):
 
     def canonicalize(self, node: gs.Node, opset: int) -> bool:
         if "n_levels_out" in node.attrs and "n_levels" in node.attrs:
@@ -358,7 +358,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
         return super().canonicalize(node, opset)
 
 
-requantizedConvDesc = RequantizedConvDescriptor(
+requantizedConvDesc = RequantizedOperatorDescriptor(
     inputDescriptor = IoDesc(["data_in", "weight", "mul", "add"], optional = ["shift"]),
     outputDescriptor = IoDesc("data_out"),
     attrDescriptors = [
@@ -403,7 +403,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     ],
 )
 
-requantizedIntegerDivDescriptor = OperatorDescriptor(
+requantizedIntegerDivDescriptor = RequantizedOperatorDescriptor(
     inputDescriptor = IoDesc(["A", "B", "requant_mul", "requant_add", "requant_div"]),
     outputDescriptor = IoDesc("C"),
     attrDescriptors = [
@@ -454,7 +454,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "ReduceMean": reduceMeanDesc,
     "ReduceSum": reduceSumDesc,
     "RequantizedConv": requantizedConvDesc,
-    "RequantizediGELU": rqsIGeluDesc,
+    "RequantizediGELU": requantizedIGeluDesc,
     "Slice": sliceDesc,
     "Softmax": softmaxDesc,
     "SoftmaxGrad": softmaxGradDesc,

From e01fdb034426ad24ce969525e19b82f9ea9692ad Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 14:48:48 +0200
Subject: [PATCH 20/54] Add flatten and gather

---
 Deeploy/OperatorDescriptor.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index bd57ae822c..462b4c4fc7 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -435,6 +435,18 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     attrDescriptors = [AttrDesc("D", IntUnpack), AttrDesc("n_levels", IntUnpack)],
 )
 
+flattenDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc("data_in"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [AttrDesc("axis", IntUnpack, default = 1)],
+)
+
+gatherDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(["data_in", "indices"]),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [AttrDesc("axis", IntUnpack, default = 0)],
+)
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Add": addDesc,
     "Concat": concatDesc,
@@ -442,6 +454,8 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "DebugPrint": debugPrintDesc,
     "Dequant": dequantDesc,
     "Div": divDesc,
+    "Flatten": flattenDesc,
+    "Gather": gatherDesc,
     "Gelu": geluDesc,
     "ITAMax": itaMaxDesc,
     "ITAPartialMax": itaPartialMaxDesc,

From 1db3ae7e951be7c41f50d22c0b2b0d933dc808bd Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 14:55:09 +0200
Subject: [PATCH 21/54] Add Squeeze and Unsqueeze

---
 Deeploy/OperatorDescriptor.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index 462b4c4fc7..0b5836fa2d 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -447,6 +447,20 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     attrDescriptors = [AttrDesc("axis", IntUnpack, default = 0)],
 )
 
+# Opset <= 11
+unsqueezeDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc("data_in"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [AttrDesc("axes", IntTupleUnpack)],
+)
+
+# Opset <= 11
+squeezeDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc("data_in"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [AttrDesc("axes", IntTupleUnpack)],
+)
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Add": addDesc,
     "Concat": concatDesc,
@@ -472,7 +486,9 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "Slice": sliceDesc,
     "Softmax": softmaxDesc,
     "SoftmaxGrad": softmaxGradDesc,
+    "Squeeze": squeezeDesc,
     "Transpose": transposeDesc,
+    "Unsqueeze": unsqueezeDesc,
     "iHardswish": iHardswishDesc,
     "iLayerNorm": iLayerNormDesc,
     "iNoNorm": iNoNormDesc,

From fd30dc727c161c2e4d4e63e5083b5ea518d45062 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 16:46:06 +0200
Subject: [PATCH 22/54] Add Mul

---
 Deeploy/OperatorDescriptor.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index 0b5836fa2d..7f36e9a4bf 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -461,6 +461,12 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     attrDescriptors = [AttrDesc("axes", IntTupleUnpack)],
 )
 
+mulDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(["A", "B"]),
+    outputDescriptor = IoDesc("C"),
+    attrDescriptors = [],
+)
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Add": addDesc,
     "Concat": concatDesc,
@@ -476,6 +482,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "IntegerDiv": integerDivDescriptor,
     "LayerNormalization": layerNormalizationDesc,
     "MaxPool": maxPoolDesc,
+    "Mul": mulDesc,
     "Pad": padDescOld,
     "Quant": quantDesc,
     "RQIntegerDiv": requantizedIntegerDivDescriptor,

From a3309edf2fd30eed2bf9a65fc89494fc00ff76a4 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 16:53:01 +0200
Subject: [PATCH 23/54] Add MatMul, RQMatMul, MatMulInteger

---
 Deeploy/OperatorDescriptor.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index 7f36e9a4bf..edbaf6a530 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -467,6 +467,23 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     attrDescriptors = [],
 )
 
+matMulDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(["A", "B"]),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [],
+)
+
+rqMatMulDesc = RequantizedOperatorDescriptor(
+    inputDescriptor = IoDesc(["A", "B", "add", "mul"]),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        # RequantizedShift attrs
+        AttrDesc("n_levels", IntUnpack),
+        AttrDesc("signed", BoolUnpack),
+        AttrDesc("div", IntUnpack),
+    ],
+)
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Add": addDesc,
     "Concat": concatDesc,
@@ -481,11 +498,14 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "ITAPartialMax": itaPartialMaxDesc,
     "IntegerDiv": integerDivDescriptor,
     "LayerNormalization": layerNormalizationDesc,
+    "MatMul": matMulDesc,
+    "MatMulInteger": matMulDesc,
     "MaxPool": maxPoolDesc,
     "Mul": mulDesc,
     "Pad": padDescOld,
     "Quant": quantDesc,
     "RQIntegerDiv": requantizedIntegerDivDescriptor,
+    "RQMatMul": rqMatMulDesc,
     "ReduceMean": reduceMeanDesc,
     "ReduceSum": reduceSumDesc,
     "RequantizedConv": requantizedConvDesc,

From c758fccb89e67ceb676e89bcb89320723b24422f Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 17:15:12 +0200
Subject: [PATCH 24/54] Add Gemm and RQGemm

---
 Deeploy/OperatorDescriptor.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index edbaf6a530..327c7b442d 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -484,6 +484,31 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     ],
 )
 
+gemmDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(["A", "B"], optional = ["C"]),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("alpha", FloatUnpack, default = 1.0),
+        AttrDesc("beta", FloatUnpack, default = 1.0),
+        AttrDesc("transA", BoolUnpack, default = False),
+        AttrDesc("transB", BoolUnpack, default = False),
+    ],
+)
+
+rqGemmDesc = RequantizedOperatorDescriptor(
+    inputDescriptor = IoDesc(["A", "B", "C", "add", "mul"]),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("alpha", FloatUnpack, default = 1.0),
+        AttrDesc("beta", FloatUnpack, default = 1.0),
+        AttrDesc("transA", BoolUnpack, default = False),
+        AttrDesc("transB", BoolUnpack, default = False),
+        # RequantizedShift attrs
+        AttrDesc("n_levels", IntUnpack),
+        AttrDesc("signed", BoolUnpack),
+        AttrDesc("div", IntUnpack),
+    ])
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Add": addDesc,
     "Concat": concatDesc,
@@ -494,6 +519,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "Flatten": flattenDesc,
     "Gather": gatherDesc,
     "Gelu": geluDesc,
+    "Gemm": gemmDesc,
     "ITAMax": itaMaxDesc,
     "ITAPartialMax": itaPartialMaxDesc,
     "IntegerDiv": integerDivDescriptor,
@@ -504,6 +530,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "Mul": mulDesc,
     "Pad": padDescOld,
     "Quant": quantDesc,
+    "RQGemm": rqGemmDesc,
     "RQIntegerDiv": requantizedIntegerDivDescriptor,
     "RQMatMul": rqMatMulDesc,
     "ReduceMean": reduceMeanDesc,

From 7e951d83f68eafd8f0241a6a17fbee6cdfa49516 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 22:06:59 +0200
Subject: [PATCH 25/54] Add RequantizedGemm

---
 Deeploy/OperatorDescriptor.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index 327c7b442d..e347dad8a6 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -509,6 +509,20 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
         AttrDesc("div", IntUnpack),
     ])
 
+requantizedGemmDesc = RequantizedOperatorDescriptor(
+    inputDescriptor = IoDesc(["A", "B", "add", "mul"]),  # Important diff to RQGemm
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("alpha", FloatUnpack, default = 1.0),
+        AttrDesc("beta", FloatUnpack, default = 1.0),
+        AttrDesc("transA", BoolUnpack, default = False),
+        AttrDesc("transB", BoolUnpack, default = False),
+        # RequantizedShift attrs
+        AttrDesc("n_levels", IntUnpack),
+        AttrDesc("signed", BoolUnpack),
+        AttrDesc("div", IntUnpack),
+    ])
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Add": addDesc,
     "Concat": concatDesc,
@@ -536,6 +550,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "ReduceMean": reduceMeanDesc,
     "ReduceSum": reduceSumDesc,
     "RequantizedConv": requantizedConvDesc,
+    "RequantizedGemm": requantizedGemmDesc,
     "RequantizediGELU": requantizedIGeluDesc,
     "Slice": sliceDesc,
     "Softmax": softmaxDesc,

From 1ab763e2d2f7abb81b8939ecb8741cf65dab402f Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 29 Sep 2025 08:24:27 +0200
Subject: [PATCH 26/54] Fix transA and transB being treated like ints

---
 Deeploy/Targets/Generic/Parsers.py            | 47 ++++++++++++-------
 .../Generic/Templates/FloatGemmTemplate.py    |  4 +-
 .../Targets/Generic/Templates/GemmTemplate.py |  4 +-
 Deeploy/Targets/Generic/TypeCheckers.py       |  6 +--
 .../Targets/MemPool/Templates/GemmTemplate.py |  4 +-
 .../MemPool/Templates/RQGemmTemplate.py       |  8 ++--
 .../PULPOpen/Templates/FloatGemmTemplate.py   |  4 +-
 .../TileConstraints/MatMulTileConstraint.py   |  8 ++--
 Deeploy/Targets/Snitch/Parsers.py             |  8 +---
 9 files changed, 50 insertions(+), 43 deletions(-)

diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py
index b43672d9c9..f6c2ee9784 100644
--- a/Deeploy/Targets/Generic/Parsers.py
+++ b/Deeploy/Targets/Generic/Parsers.py
@@ -1690,27 +1690,40 @@ def parseNodeCtxt(self,
             node.inputs.append(zeroTensor)
             self.operatorRepresentation['C'] = f'{node.name}_C_Tensor'
 
+        buffA = ctxt.lookup(node.inputs[0].name)
+        assert isinstance(buffA, VariableBuffer)
+        buffB = ctxt.lookup(node.inputs[1].name)
+        assert isinstance(buffB, VariableBuffer)
+        buffOut = ctxt.lookup(node.outputs[0].name)
+        assert isinstance(buffOut, VariableBuffer)
+
         # Store the input and output shapes in the operator representation
-        self.operatorRepresentation['size'] = np.prod(ctxt.lookup(node.inputs[0].name).shape)
-        self.operatorRepresentation['A_shape'] = ctxt.lookup(node.inputs[0].name).shape
-        self.operatorRepresentation['B_shape'] = ctxt.lookup(node.inputs[1].name).shape
-        self.operatorRepresentation['data_out_shape'] = ctxt.lookup(node.outputs[0].name).shape
+        self.operatorRepresentation['size'] = np.prod(buffA.shape)
+        self.operatorRepresentation['A_shape'] = buffA.shape
+        self.operatorRepresentation['B_shape'] = buffB.shape
+        self.operatorRepresentation['data_out_shape'] = buffOut.shape
+
+        if self.operatorRepresentation['transA']:
+            N_A, M = buffA.shape[-2:]
+        else:
+            M, N_A = buffA.shape[-2:]
+
+        if self.operatorRepresentation['transB']:
+            O, N_B = buffB.shape[-2:]
+        else:
+            N_B, O = buffB.shape[-2:]
 
         # Store the matrix dimensions in the operator representation
-        self.operatorRepresentation['M'] = ctxt.lookup(
-            node.inputs[0].name).shape[(-2 + self.operatorRepresentation['transA'])]
-        self.operatorRepresentation['N'] = ctxt.lookup(
-            node.inputs[0].name).shape[(-1 - self.operatorRepresentation['transA'])]
-        self.operatorRepresentation['O'] = ctxt.lookup(
-            node.inputs[1].name).shape[(-1 - self.operatorRepresentation['transB'])]
+        self.operatorRepresentation['M'] = M
+        self.operatorRepresentation['N'] = N_A
+        self.operatorRepresentation['O'] = O
 
         # SCHEREMO: Assert that reduction dimension is the same on both matrices
-        ret = ret and (self.operatorRepresentation['N'] == ctxt.lookup(
-            node.inputs[1].name).shape[-2 + self.operatorRepresentation['transB']])
+        ret = ret and N_A == N_B
 
         # Check if the batch dimensions are compatible
-        self.operatorRepresentation['batch_A'] = np.prod(ctxt.lookup(node.inputs[0].name).shape[:-2])
-        self.operatorRepresentation['batch_B'] = np.prod(ctxt.lookup(node.inputs[1].name).shape[:-2])
+        self.operatorRepresentation['batch_A'] = np.prod(buffA.shape[:-2])
+        self.operatorRepresentation['batch_B'] = np.prod(buffB.shape[:-2])
 
         self.operatorRepresentation['batch'] = max(self.operatorRepresentation['batch_A'],
                                                    self.operatorRepresentation['batch_B'])
@@ -1722,10 +1735,10 @@ def parseNodeCtxt(self,
         ), "Incompatible dimensions for input matrices. Broadcasting not yet supported for dimensions larger than 1 on one of the inputs, or equal dimensions between the 2."
 
         # Create flags for same dimension between each input matrix and the final batch dimension
-        self.operatorRepresentation['A_batched'] = (self.operatorRepresentation['batch'] == np.prod(
-            ctxt.lookup(node.inputs[0].name).shape[:-2]))
+        self.operatorRepresentation['A_batched'] = (
+            self.operatorRepresentation['batch'] == self.operatorRepresentation['batch_A'])
         self.operatorRepresentation['W_batched'] = self.operatorRepresentation['B_batched'] = (
-            self.operatorRepresentation['batch'] == np.prod(ctxt.lookup(node.inputs[1].name).shape[:-2]))
+            self.operatorRepresentation['batch'] == self.operatorRepresentation['batch_B'])
 
         return ctxt, ret
 
diff --git a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py
index 69bea8484e..ab78e742d0 100644
--- a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py
+++ b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py
@@ -21,8 +21,8 @@
             ${M},
             ${N},
             ${O},
-            ${transA},
-            ${transB}
+            ${int(transA)},
+            ${int(transB)}
         );
 
         % if A_batched:
diff --git a/Deeploy/Targets/Generic/Templates/GemmTemplate.py b/Deeploy/Targets/Generic/Templates/GemmTemplate.py
index 62d760d15c..371004a8e7 100644
--- a/Deeploy/Targets/Generic/Templates/GemmTemplate.py
+++ b/Deeploy/Targets/Generic/Templates/GemmTemplate.py
@@ -56,8 +56,8 @@ def alignToContext(self, ctxt: NetworkContext,
             ${O},
             ${alpha},
             ${beta},
-            ${transA},
-            ${transB},
+            ${int(transA)},
+            ${int(transB)},
             ${A_offset},
             ${B_offset},
             ${C_offset},
diff --git a/Deeploy/Targets/Generic/TypeCheckers.py b/Deeploy/Targets/Generic/TypeCheckers.py
index c2c8d436f8..1907a0aea0 100644
--- a/Deeploy/Targets/Generic/TypeCheckers.py
+++ b/Deeploy/Targets/Generic/TypeCheckers.py
@@ -185,10 +185,8 @@ def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[
 
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [
-            2**((self.input_types[0].referencedType.typeWidth) * 2) *
-            inputs[0].shape[-1 - operatorRepresentation['transA']]
-        ]
+        O = inputs[0].shape[-1] if not operatorRepresentation['transA'] else inputs[0].shape[-2]
+        return [2**((self.input_types[0].referencedType.typeWidth) * 2) * O]
 
     def _inferSignedness(self, inputs: List[VariableBuffer],
                          operatorRepresentation: OperatorRepresentation) -> List[bool]:
diff --git a/Deeploy/Targets/MemPool/Templates/GemmTemplate.py b/Deeploy/Targets/MemPool/Templates/GemmTemplate.py
index e5d53bd255..54cc86f6af 100644
--- a/Deeploy/Targets/MemPool/Templates/GemmTemplate.py
+++ b/Deeploy/Targets/MemPool/Templates/GemmTemplate.py
@@ -127,8 +127,8 @@ def hoistTransientBuffers(self, ctxt: NetworkContext,
         ${O},
         ${alpha},
         ${beta},
-        ${transA},
-        ${transB},
+        ${int(transA)},
+        ${int(transB)},
         ${A_offset},
         ${B_offset},
         ${C_offset},
diff --git a/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py b/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py
index e6a42768e8..f544841acf 100644
--- a/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py
+++ b/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py
@@ -145,8 +145,8 @@ def hoistTransientBuffers(self, ctxt: NetworkContext,
         ${O},
         ${alpha},
         ${beta},
-        ${transA},
-        ${transB},
+        ${int(transA)},
+        ${int(transB)},
         ${mul},
         ${add},
         ${log2Dstring},
@@ -170,8 +170,8 @@ def hoistTransientBuffers(self, ctxt: NetworkContext,
         ${O},
         ${alpha},
         ${beta},
-        ${transA},
-        ${transB},
+        ${int(transA)},
+        ${int(transB)},
         ${mul},
         ${add},
         ${log2Dstring},
diff --git a/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py b/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py
index f4c22b2c22..21044a5eca 100644
--- a/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py
+++ b/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py
@@ -20,8 +20,8 @@
         ${M},
         ${N},
         ${O},
-        ${transA},
-        ${transB}
+        ${int(transA)},
+        ${int(transB)}
     );
 
     ref_${data_out}_${A} += ${M} * ${N};
diff --git a/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py b/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py
index 8b795be88e..a9259a15cf 100644
--- a/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py
+++ b/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py
@@ -32,13 +32,13 @@ def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: Netw
         tensorsShapeLen = len(bufferA.shape)
 
         AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name,
-                                                  dimIdx = (tensorsShapeLen - 2) + parseDict['transA'])
+                                                  dimIdx = (tensorsShapeLen - 2) + int(parseDict['transA']))
         ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name,
-                                                   dimIdx = (tensorsShapeLen - 1) - parseDict['transA'])
+                                                   dimIdx = (tensorsShapeLen - 1) - int(parseDict['transA']))
         BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name,
-                                                  dimIdx = (tensorsShapeLen - 2) + parseDict['transB'])
+                                                  dimIdx = (tensorsShapeLen - 2) + int(parseDict['transB']))
         BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name,
-                                                   dimIdx = (tensorsShapeLen - 1) - parseDict['transB'])
+                                                   dimIdx = (tensorsShapeLen - 1) - int(parseDict['transB']))
         outputFirstDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = (tensorsShapeLen - 2))
         outputSecondDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = (tensorsShapeLen - 1))
 
diff --git a/Deeploy/Targets/Snitch/Parsers.py b/Deeploy/Targets/Snitch/Parsers.py
index 0051994686..51b32db210 100644
--- a/Deeploy/Targets/Snitch/Parsers.py
+++ b/Deeploy/Targets/Snitch/Parsers.py
@@ -18,9 +18,7 @@ def parseNode(self, node: gs.Node) -> bool:
         if not ret:
             return False
 
-        if not all([
-                self.operatorRepresentation['transA'] == 0,
-        ]):
+        if self.operatorRepresentation['transA']:
             return False
 
         return True
@@ -50,9 +48,7 @@ def parseNode(self, node: gs.Node) -> bool:
         if not ret:
             return False
 
-        if not all([
-                self.operatorRepresentation['transA'] == 0,
-        ]):
+        if self.operatorRepresentation['transA']:
             return False
 
         return True

From 1ec6cde925b9da692b80e306d12af590f8fa9446 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 22:02:10 +0200
Subject: [PATCH 27/54] Add LinearAttention

---
 Deeploy/OperatorDescriptor.py      | 37 ++++++++++++++++++++++
 Deeploy/Targets/Generic/Parsers.py | 51 ++++++++++++------------------
 2 files changed, 57 insertions(+), 31 deletions(-)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index e347dad8a6..93ad2643d7 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -523,6 +523,42 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
         AttrDesc("div", IntUnpack),
     ])
 
+linearAttentionDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(
+        ["q", "k", "v", "wq_weight", "wq_bias", "wk_weight", "wk_bias", "wv_weight", "wv_bias", "wo_weight",
+         "wo_bias"]),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("preattn_requant_mul", IntTupleUnpack),
+        AttrDesc("preattn_requant_div", IntTupleUnpack),
+        AttrDesc("normalizer_requant_mul", IntTupleUnpack),
+        AttrDesc("normalizer_requant_shift", IntTupleUnpack),
+        AttrDesc("normalizer_requant_div", IntTupleUnpack),
+        AttrDesc("postattn_requant_mul", IntTupleUnpack),
+        AttrDesc("postattn_requant_shift", IntTupleUnpack),
+        AttrDesc("postattn_requant_div", IntTupleUnpack),
+        AttrDesc("wo_requant_mul", IntTupleUnpack),
+        AttrDesc("wo_requant_shift", IntTupleUnpack),
+        AttrDesc("wo_requant_div", IntTupleUnpack),
+        AttrDesc("wq_requant_mul", IntTupleUnpack),
+        AttrDesc("wq_requant_shift", IntTupleUnpack),
+        AttrDesc("wq_requant_div", IntTupleUnpack),
+        AttrDesc("wk_requant_mul", IntTupleUnpack),
+        AttrDesc("wk_requant_shift", IntTupleUnpack),
+        AttrDesc("wk_requant_div", IntTupleUnpack),
+        AttrDesc("wv_requant_mul", IntTupleUnpack),
+        AttrDesc("wv_requant_shift", IntTupleUnpack),
+        AttrDesc("wv_requant_div", IntTupleUnpack),
+        AttrDesc("Delta", IntUnpack),
+        AttrDesc("eps", IntUnpack),
+        AttrDesc("act_type", IntUnpack),
+        AttrDesc("n_levels", IntUnpack),
+        AttrDesc("dim", IntUnpack),
+        AttrDesc("dim_head", IntUnpack),
+        AttrDesc("heads", IntUnpack),
+    ],
+)
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Add": addDesc,
     "Concat": concatDesc,
@@ -538,6 +574,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "ITAPartialMax": itaPartialMaxDesc,
     "IntegerDiv": integerDivDescriptor,
     "LayerNormalization": layerNormalizationDesc,
+    "LinearAttention": linearAttentionDesc,
     "MatMul": matMulDesc,
     "MatMulInteger": matMulDesc,
     "MaxPool": maxPoolDesc,
diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py
index f6c2ee9784..9389034969 100644
--- a/Deeploy/Targets/Generic/Parsers.py
+++ b/Deeploy/Targets/Generic/Parsers.py
@@ -1472,37 +1472,26 @@ def parseNode(self, node: gs.Node) -> (bool):
         ])
 
         if ret:
-            self.operatorRepresentation['preattn_requant_mul'] = int(node.attrs['preattn_requant_mul'].values)
-            self.operatorRepresentation['preattn_requant_shift'] = int(node.attrs['preattn_requant_shift'].values)
-            self.operatorRepresentation['preattn_requant_div'] = int(
-                math.log2(int(node.attrs['preattn_requant_div'].values)))
-            self.operatorRepresentation['normalizer_requant_mul'] = int(node.attrs['normalizer_requant_mul'].values)
-            self.operatorRepresentation['normalizer_requant_shift'] = int(node.attrs['normalizer_requant_shift'].values)
-            self.operatorRepresentation['normalizer_requant_div'] = int(
-                math.log2(int(node.attrs['normalizer_requant_div'].values)))
-            self.operatorRepresentation['postattn_requant_mul'] = int(node.attrs['postattn_requant_mul'].values)
-            self.operatorRepresentation['postattn_requant_shift'] = int(node.attrs['postattn_requant_shift'].values)
-            self.operatorRepresentation['postattn_requant_div'] = int(
-                math.log2(int(node.attrs['postattn_requant_div'].values)))
-            self.operatorRepresentation['wo_requant_mul'] = int(node.attrs['wo_requant_mul'].values)
-            self.operatorRepresentation['wo_requant_shift'] = int(node.attrs['wo_requant_shift'].values)
-            self.operatorRepresentation['wo_requant_div'] = int(math.log2(int(node.attrs['wo_requant_div'].values)))
-            self.operatorRepresentation['wq_requant_mul'] = int(node.attrs['wq_requant_mul'].values)
-            self.operatorRepresentation['wq_requant_shift'] = int(node.attrs['wq_requant_shift'].values)
-            self.operatorRepresentation['wq_requant_div'] = int(math.log2(int(node.attrs['wq_requant_div'].values)))
-            self.operatorRepresentation['wk_requant_mul'] = int(node.attrs['wk_requant_mul'].values)
-            self.operatorRepresentation['wk_requant_shift'] = int(node.attrs['wk_requant_shift'].values)
-            self.operatorRepresentation['wk_requant_div'] = int(math.log2(int(node.attrs['wk_requant_div'].values)))
-            self.operatorRepresentation['wv_requant_mul'] = int(node.attrs['wv_requant_mul'].values)
-            self.operatorRepresentation['wv_requant_shift'] = int(node.attrs['wv_requant_shift'].values)
-            self.operatorRepresentation['wv_requant_div'] = int(math.log2(int(node.attrs['wv_requant_div'].values)))
-            self.operatorRepresentation['Delta'] = int(node.attrs['Delta'])
-            self.operatorRepresentation['eps'] = int(node.attrs['eps'])
-            self.operatorRepresentation['act_type'] = int(node.attrs['act_type'])
-            self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values)
-            self.operatorRepresentation['dim'] = int(node.attrs['dim'].values)
-            self.operatorRepresentation['dim_head'] = int(node.attrs['dim_head'].values)
-            self.operatorRepresentation['heads'] = int(node.attrs['heads'].values)
+            self.operatorRepresentation.update(node.attrs)
+
+            # All *_div attrs are log2d-ified
+            log2Attrs = [
+                "preattn_requant_div",
+                "preattn_requant_div",
+                "normalizer_requant_div",
+                "normalizer_requant_div",
+                "postattn_requant_div",
+                "postattn_requant_div",
+                "wo_requant_div",
+                "wq_requant_div",
+                "wk_requant_div",
+                "wv_requant_div",
+            ]
+
+            for attr in log2Attrs:
+                value = self.operatorRepresentation[attr]
+                assert isinstance(value, int)
+                self.operatorRepresentation[attr] = int(math.log2(value))
 
         return ret
 

From 565cd95b167a41554b66a75a79f558b38f80c1b2 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 22:11:23 +0200
Subject: [PATCH 28/54] Add CLCA

---
 Deeploy/OperatorDescriptor.py      | 23 +++++++++++++++++++++++
 Deeploy/Targets/Generic/Parsers.py | 10 +---------
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index 93ad2643d7..f25926c1cf 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -559,8 +559,31 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     ],
 )
 
+clcaDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc([
+        "q", "k", "wq_weight", "wq_bias", "wk_weight", "wk_bias", "wo_weight", "wo_bias", "wq_requant_mul",
+        "wq_requant_add", "wq_requant_div", "wk_requant_mul", "wk_requant_add", "wk_requant_div", "wv_requant_mul",
+        "wv_requant_add", "wv_requant_div", "kdiv_requant_mul", "kdiv_requant_add", "kdiv_requant_div",
+        "preattn_requant_mul", "preattn_requant_add", "preattn_requant_div", "postattn_requant_mul",
+        "postattn_requant_add", "postattn_requant_div", "wo_requant_mul", "wo_requant_add", "wo_requant_div"
+    ]),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("Delta", IntUnpack),
+        AttrDesc("eps", IntUnpack),
+        AttrDesc("eta", IntUnpack),
+        AttrDesc("act_type", IntUnpack),
+        AttrDesc("n_levels", IntUnpack),
+        AttrDesc("dim", IntUnpack),
+        AttrDesc("dim_head", IntUnpack),
+        AttrDesc("out_dim", IntUnpack),
+        AttrDesc("heads", IntUnpack),
+    ],
+)
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Add": addDesc,
+    "CLCA": clcaDesc,
     "Concat": concatDesc,
     "Conv": convDesc,
     "DebugPrint": debugPrintDesc,
diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py
index 9389034969..d0f499b93e 100644
--- a/Deeploy/Targets/Generic/Parsers.py
+++ b/Deeploy/Targets/Generic/Parsers.py
@@ -1533,15 +1533,7 @@ def parseNode(self, node: gs.Node) -> (bool):
         ])
 
         if ret:
-            self.operatorRepresentation['Delta'] = int(node.attrs['Delta'])
-            self.operatorRepresentation['eps'] = int(node.attrs['eps'])
-            self.operatorRepresentation['eta'] = int(node.attrs['eta'])
-            self.operatorRepresentation['act_type'] = int(node.attrs['act_type'])
-            self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values)
-            self.operatorRepresentation['dim'] = int(node.attrs['dim'].values)
-            self.operatorRepresentation['dim_head'] = int(node.attrs['dim_head'].values)
-            self.operatorRepresentation['out_dim'] = int(node.attrs['out_dim'].values)
-            self.operatorRepresentation['heads'] = int(node.attrs['heads'].values)
+            self.operatorRepresentation.update(node.attrs)
 
         return ret
 

From 26cf6486a54089b5177522a5e1cfb76920cd1da6 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 22:13:11 +0200
Subject: [PATCH 29/54] Add IntegerMean

---
 Deeploy/OperatorDescriptor.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index f25926c1cf..9818601193 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -596,6 +596,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "ITAMax": itaMaxDesc,
     "ITAPartialMax": itaPartialMaxDesc,
     "IntegerDiv": integerDivDescriptor,
+    "IntegerMean": reduceMeanDesc,
     "LayerNormalization": layerNormalizationDesc,
     "LinearAttention": linearAttentionDesc,
     "MatMul": matMulDesc,

From 8b00f48d013c141a6c8a9373dff0573c186534f2 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 22:18:31 +0200
Subject: [PATCH 30/54] Add MHSA

---
 Deeploy/OperatorDescriptor.py      | 36 +++++++++++++++++++++++++++++-
 Deeploy/Targets/Generic/Parsers.py | 18 +--------------
 2 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index 9818601193..925a1ac0e3 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from enum import Enum, IntEnum
-from typing import Any, Dict, Tuple
+from typing import Any, Dict, Tuple, Union
 
 import numpy as np
 import onnx_graphsurgeon as gs
@@ -51,6 +51,13 @@ def FloatTupleUnpack(value: Any) -> Tuple[float, ...]:
         return (FloatUnpack(value),)
 
 
+def IntTupleIfNotSingleItemUnpack(value: Any) -> Union[int, Tuple[int, ...]]:
+    try:
+        return IntUnpack(value)
+    except:
+        return IntTupleUnpack(value)
+
+
 def attrToTensor(node: gs.Node, attr: str) -> None:
     values = node.attrs[attr]
     if isinstance(values, (int, float)):
@@ -581,6 +588,32 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     ],
 )
 
+mhsaDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(
+        ["q", "k", "v", "wq_weight", "wq_bias", "wk_weight", "wk_bias", "wv_weight", "wv_bias", "wo_weight",
+         "wo_bias"]),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("preattn_requant_mul", IntTupleIfNotSingleItemUnpack),
+        AttrDesc("preattn_requant_div", IntTupleIfNotSingleItemUnpack),
+        AttrDesc("postattn_requant_mul", IntTupleIfNotSingleItemUnpack),
+        AttrDesc("postattn_requant_div", IntTupleIfNotSingleItemUnpack),
+        AttrDesc("wo_requant_mul", IntTupleIfNotSingleItemUnpack),
+        AttrDesc("wo_requant_div", IntTupleIfNotSingleItemUnpack),
+        AttrDesc("wq_requant_mul", IntTupleIfNotSingleItemUnpack),
+        AttrDesc("wq_requant_div", IntTupleIfNotSingleItemUnpack),
+        AttrDesc("wk_requant_mul", IntTupleIfNotSingleItemUnpack),
+        AttrDesc("wk_requant_div", IntTupleIfNotSingleItemUnpack),
+        AttrDesc("wv_requant_mul", IntTupleIfNotSingleItemUnpack),
+        AttrDesc("wv_requant_div", IntTupleIfNotSingleItemUnpack),
+        AttrDesc("n_levels", IntUnpack),
+        AttrDesc("dim", IntUnpack),
+        AttrDesc("dim_head", IntUnpack),
+        AttrDesc("heads", IntUnpack),
+        AttrDesc("signed", BoolUnpack),
+    ],
+)
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Add": addDesc,
     "CLCA": clcaDesc,
@@ -599,6 +632,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "IntegerMean": reduceMeanDesc,
     "LayerNormalization": layerNormalizationDesc,
     "LinearAttention": linearAttentionDesc,
+    "MHSA": mhsaDesc,
     "MatMul": matMulDesc,
     "MatMulInteger": matMulDesc,
     "MaxPool": maxPoolDesc,
diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py
index d0f499b93e..8a3e0662d6 100644
--- a/Deeploy/Targets/Generic/Parsers.py
+++ b/Deeploy/Targets/Generic/Parsers.py
@@ -1408,23 +1408,7 @@ def parseNode(self, node: gs.Node) -> (bool):
         ])
 
         if ret:
-            self.operatorRepresentation['preattn_requant_mul'] = node.attrs['preattn_requant_mul']
-            self.operatorRepresentation['preattn_requant_div'] = node.attrs['preattn_requant_div']
-            self.operatorRepresentation['postattn_requant_mul'] = node.attrs['postattn_requant_mul']
-            self.operatorRepresentation['postattn_requant_div'] = node.attrs['postattn_requant_div']
-            self.operatorRepresentation['wo_requant_mul'] = node.attrs['wo_requant_mul']
-            self.operatorRepresentation['wo_requant_div'] = node.attrs['wo_requant_div']
-            self.operatorRepresentation['wq_requant_mul'] = node.attrs['wq_requant_mul']
-            self.operatorRepresentation['wq_requant_div'] = node.attrs['wq_requant_div']
-            self.operatorRepresentation['wk_requant_mul'] = node.attrs['wk_requant_mul']
-            self.operatorRepresentation['wk_requant_div'] = node.attrs['wk_requant_div']
-            self.operatorRepresentation['wv_requant_mul'] = node.attrs['wv_requant_mul']
-            self.operatorRepresentation['wv_requant_div'] = node.attrs['wv_requant_div']
-            self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'])
-            self.operatorRepresentation['dim'] = int(node.attrs['dim'])  # Sequence Length
-            self.operatorRepresentation['dim_head'] = int(node.attrs['dim_head'])  # Projection Size
-            self.operatorRepresentation['heads'] = int(node.attrs['heads'])
-            self.operatorRepresentation['signed'] = int(node.attrs['signed'])
+            self.operatorRepresentation.update(node.attrs)
 
         return ret
 

From 6ecf95db33066d0fec6c32551473c45b27ccebf2 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 22:28:14 +0200
Subject: [PATCH 31/54] Add Relu, Reshape, RequantShift

---
 Deeploy/OperatorDescriptor.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index 925a1ac0e3..daa6c41f9b 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -614,6 +614,28 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     ],
 )
 
+reluDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc("data_in"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [],
+)
+
+reshapeDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(["data_in", "shape"]),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [],
+)
+
+requantShiftDesc = RequantizedOperatorDescriptor(
+    inputDescriptor = IoDesc(["data_in", "mul", "add"]),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("n_levels", IntUnpack),
+        AttrDesc("signed", BoolUnpack),
+        AttrDesc("div", IntUnpack),
+    ],
+)
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Add": addDesc,
     "CLCA": clcaDesc,
@@ -644,9 +666,12 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "RQMatMul": rqMatMulDesc,
     "ReduceMean": reduceMeanDesc,
     "ReduceSum": reduceSumDesc,
+    "Relu": reluDesc,
     "RequantizedConv": requantizedConvDesc,
     "RequantizedGemm": requantizedGemmDesc,
     "RequantizediGELU": requantizedIGeluDesc,
+    "RequantShift": requantShiftDesc,
+    "Reshape": reshapeDesc,
     "Slice": sliceDesc,
     "Softmax": softmaxDesc,
     "SoftmaxGrad": softmaxGradDesc,

From 9a577a39e8c198a38ef8dc1e717a6be216c8df91 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 22:56:46 +0200
Subject: [PATCH 32/54] Add RequantizedAdd

---
 Deeploy/OperatorDescriptor.py      | 47 +++++++++++++++++++++++++++++-
 Deeploy/Targets/Generic/Parsers.py | 32 ++++----------------
 2 files changed, 52 insertions(+), 27 deletions(-)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index daa6c41f9b..f9f5532bfa 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -355,7 +355,7 @@ class RequantizedOperatorDescriptor(OperatorDescriptor):
     def canonicalize(self, node: gs.Node, opset: int) -> bool:
         if "n_levels_out" in node.attrs and "n_levels" in node.attrs:
             # TODO: Change to log
-            print("[WARNING] RequantizedConv cannot have n_levels_out and n_levels in it's attributes")
+            print("[WARNING] Requantized operator cannot have n_levels_out and n_levels in its attributes")
             return False
 
         if "n_levels_out" in node.attrs:
@@ -636,6 +636,50 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     ],
 )
 
+
+class RequantizedAddDescriptor(OperatorDescriptor):
+
+    def canonicalize(self, node: gs.Node, opset: int) -> bool:
+        for tensor in ["rqs1", "rqs2", "rqsOut"]:
+            n_levels = f"{tensor}_n_levels"
+            n_levels_out = f"{tensor}_n_levels_out"
+            if n_levels_out in node.attrs and n_levels in node.attrs:
+                # TODO: Change to log
+                print(
+                    f"[WARNING] RequantizedAdd tensor {tensor} cannot have {n_levels_out} and {n_levels} in its attributes"
+                )
+                return False
+
+            if n_levels_out in node.attrs:
+                node.attrs[n_levels] = node.attrs[n_levels_out]
+                node.attrs.pop(n_levels_out)
+
+        return super().canonicalize(node, opset)
+
+
+requantizedAddDesc = RequantizedAddDescriptor(
+    inputDescriptor = IoDesc(["data_in_0", "data_in_1"]),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("rqs1_n_levels", IntUnpack),
+        AttrDesc("rqs1_mul", IntUnpack),
+        AttrDesc("rqs1_add", IntUnpack),
+        AttrDesc("rqs1_div", IntUnpack),
+        AttrDesc("rqs1_signed", BoolUnpack),
+        AttrDesc("rqs1_n_levels", IntUnpack),
+        AttrDesc("rqs2_mul", IntUnpack),
+        AttrDesc("rqs2_add", IntUnpack),
+        AttrDesc("rqs2_div", IntUnpack),
+        AttrDesc("rqs2_signed", BoolUnpack),
+        AttrDesc("rqs2_n_levels", IntUnpack),
+        AttrDesc("rqsOut_mul", IntUnpack),
+        AttrDesc("rqsOut_add", IntUnpack),
+        AttrDesc("rqsOut_div", IntUnpack),
+        AttrDesc("rqsOut_signed", BoolUnpack),
+        AttrDesc("rqsOut_n_levels", IntUnpack),
+    ],
+)
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Add": addDesc,
     "CLCA": clcaDesc,
@@ -667,6 +711,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "ReduceMean": reduceMeanDesc,
     "ReduceSum": reduceSumDesc,
     "Relu": reluDesc,
+    "RequantizedAdd": requantizedAddDesc,
     "RequantizedConv": requantizedConvDesc,
     "RequantizedGemm": requantizedGemmDesc,
     "RequantizediGELU": requantizedIGeluDesc,
diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py
index 8a3e0662d6..ad3bad549d 100644
--- a/Deeploy/Targets/Generic/Parsers.py
+++ b/Deeploy/Targets/Generic/Parsers.py
@@ -2373,32 +2373,12 @@ def parseNode(self, node: gs.Node) -> bool:
         ])
 
         if ret:
-            if 'rqs1_n_levels' in node.attrs:
-                self.operatorRepresentation['rqs1_n_levels'] = int(node.attrs['rqs1_n_levels'].values)
-            else:
-                self.operatorRepresentation['rqs1_n_levels'] = int(node.attrs['rqs1_n_levels_out'].values)
-            self.operatorRepresentation['rqs1_mul'] = int(node.attrs['rqs1_mul'])
-            self.operatorRepresentation['rqs1_add'] = int(node.attrs['rqs1_add'])
-            self.operatorRepresentation['rqs1_signed'] = int(node.attrs['rqs1_signed'].values)
-            self.operatorRepresentation['rqs1_log2D'] = int(math.log2(node.attrs['rqs1_div'].values))
-
-            if 'rqs2_n_levels' in node.attrs:
-                self.operatorRepresentation['rqs2_n_levels'] = int(node.attrs['rqs2_n_levels'].values)
-            else:
-                self.operatorRepresentation['rqs2_n_levels'] = int(node.attrs['rqs2_n_levels_out'].values)
-            self.operatorRepresentation['rqs2_mul'] = int(node.attrs['rqs2_mul'])
-            self.operatorRepresentation['rqs2_add'] = int(node.attrs['rqs2_add'])
-            self.operatorRepresentation['rqs2_signed'] = int(node.attrs['rqs2_signed'].values)
-            self.operatorRepresentation['rqs2_log2D'] = int(math.log2(node.attrs['rqs2_div'].values))
-
-            if 'rqsOut_n_levels' in node.attrs:
-                self.operatorRepresentation['rqsOut_n_levels'] = int(node.attrs['rqsOut_n_levels'].values)
-            else:
-                self.operatorRepresentation['rqsOut_n_levels'] = int(node.attrs['rqsOut_n_levels_out'].values)
-            self.operatorRepresentation['rqsOut_mul'] = int(node.attrs['rqsOut_mul'])
-            self.operatorRepresentation['rqsOut_add'] = int(node.attrs['rqsOut_add'])
-            self.operatorRepresentation['rqsOut_signed'] = int(node.attrs['rqsOut_signed'].values)
-            self.operatorRepresentation['rqsOut_log2D'] = int(math.log2(node.attrs['rqsOut_div'].values))
+            self.operatorRepresentation.update(node.attrs)
+
+            for tensor in ["rqs1", "rqs2", "rqsOut"]:
+                value = self.operatorRepresentation[f"{tensor}_div"]
+                assert isinstance(value, int)
+                self.operatorRepresentation[f"{tensor}_log2D"] = int(math.log2(value))
 
         return ret
 

From 8ae808a26466acd85fc5ed00caf27682719f1c81 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 23:07:48 +0200
Subject: [PATCH 33/54] Add RequantizediHardswish

---
 Deeploy/OperatorDescriptor.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index f9f5532bfa..ecf077a480 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -277,6 +277,17 @@ class GeluApprox(str, Enum):
                                         AttrDesc("three", IntUnpack),
                                     ])
 
+requantizedIHardswishDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"),
+                                               outputDescriptor = IoDesc("data_out"),
+                                               attrDescriptors = [
+                                                   AttrDesc("one_over_six", IntUnpack),
+                                                   AttrDesc("six", IntUnpack),
+                                                   AttrDesc("three", IntUnpack),
+                                                   AttrDesc("mul", IntUnpack),
+                                                   AttrDesc("add", IntUnpack),
+                                                   AttrDesc("shift", IntUnpack),
+                                               ])
+
 iNoNormDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "weights", "bias"]),
                                  outputDescriptor = IoDesc("data_out"),
                                  attrDescriptors = [
@@ -715,6 +726,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "RequantizedConv": requantizedConvDesc,
     "RequantizedGemm": requantizedGemmDesc,
     "RequantizediGELU": requantizedIGeluDesc,
+    "RequantizediHardswish": requantizedIHardswishDesc,
     "RequantShift": requantShiftDesc,
     "Reshape": reshapeDesc,
     "Slice": sliceDesc,

From 5eece92c257bc1ab50143c446a09361ea61b64a4 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 23:19:30 +0200
Subject: [PATCH 34/54] Add iGELU

---
 Deeploy/OperatorDescriptor.py | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index ecf077a480..01c5fbeb08 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -256,11 +256,22 @@ class GeluApprox(str, Enum):
     none = "none"
 
 
-geluDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"),
-                              outputDescriptor = IoDesc("data_out"),
-                              attrDescriptors = [
-                                  AttrDesc("approximate", GeluApprox, default = GeluApprox.none),
-                              ])
+geluDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc("data_in"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("approximate", GeluApprox, default = GeluApprox.none),
+    ],
+)
+
+iGeluDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc("data_in"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("b", IntUnpack),
+        AttrDesc("one", IntUnpack),
+    ],
+)
 
 requantizedIGeluDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "mul", "add", "shift"]),
                                           outputDescriptor = IoDesc("data_out"),
@@ -691,6 +702,12 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     ],
 )
 
+sgdDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(["weight", "grad"]),
+    outputDescriptor = IoDesc("weight_updated"),
+    attrDescriptors = [AttrDesc("lr", FloatUnpack)],
+)
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Add": addDesc,
     "CLCA": clcaDesc,
@@ -729,12 +746,14 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "RequantizediHardswish": requantizedIHardswishDesc,
     "RequantShift": requantShiftDesc,
     "Reshape": reshapeDesc,
+    "SGD": sgdDesc,
     "Slice": sliceDesc,
     "Softmax": softmaxDesc,
     "SoftmaxGrad": softmaxGradDesc,
     "Squeeze": squeezeDesc,
     "Transpose": transposeDesc,
     "Unsqueeze": unsqueezeDesc,
+    "iGELU": iGeluDesc,
     "iHardswish": iHardswishDesc,
     "iLayerNorm": iLayerNormDesc,
     "iNoNorm": iNoNormDesc,

From 75983038183da7eabe9ba9f76619f77c4f2aa46b Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 23:23:24 +0200
Subject: [PATCH 35/54] Add SoftmaxCrossEntropyLoss(Grad)

---
 Deeploy/OperatorDescriptor.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index 01c5fbeb08..ff1fbcf3fd 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -708,6 +708,18 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     attrDescriptors = [AttrDesc("lr", FloatUnpack)],
 )
 
+softmaxCrossEntropyLossDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(["logits", "labels"]),
+    outputDescriptor = IoDesc("log_prob"),
+    attrDescriptors = [],
+)
+
+softmaxCrossEntropyLossGradDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(["log_prob", "labels"]),
+    outputDescriptor = IoDesc("grad"),
+    attrDescriptors = [],
+)
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Add": addDesc,
     "CLCA": clcaDesc,
@@ -749,6 +761,8 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "SGD": sgdDesc,
     "Slice": sliceDesc,
     "Softmax": softmaxDesc,
+    "SoftmaxCrossEntropyLoss": softmaxCrossEntropyLossDesc,
+    "SoftmaxCrossEntropyLossGrad": softmaxCrossEntropyLossGradDesc,
     "SoftmaxGrad": softmaxGradDesc,
     "Squeeze": squeezeDesc,
     "Transpose": transposeDesc,

From 72c8d21ca66bbfdf61f2c1b734acc98a34915361 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Sun, 28 Sep 2025 23:50:41 +0200
Subject: [PATCH 36/54] Add Memcopy for dma tests

---
 DeeployTest/testUtils/dmaUtils.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/DeeployTest/testUtils/dmaUtils.py b/DeeployTest/testUtils/dmaUtils.py
index 3205275fda..ba2f6e176f 100644
--- a/DeeployTest/testUtils/dmaUtils.py
+++ b/DeeployTest/testUtils/dmaUtils.py
@@ -10,14 +10,13 @@
 
 from Deeploy.AbstractDataTypes import BaseType, Pointer, PointerClass
 from Deeploy.CommonExtensions.DataTypes import minimalIntegerType
-from Deeploy.DeeployTypes import NetworkContext, NetworkDeployer, NodeParser, NodeTemplate, NodeTypeChecker, \
-    ONNXLayer, OperatorRepresentation, VariableBuffer
+from Deeploy.DeeployTypes import IoDesc, NetworkContext, NetworkDeployer, NodeParser, NodeTemplate, NodeTypeChecker, \
+    ONNXLayer, OperatorDescriptor, OperatorRepresentation, VariableBuffer
 from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel
 from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryDeployerWrapper, \
     MemoryPlatformWrapper
 from Deeploy.MemoryLevelExtension.OptimizationPasses.MemoryLevelAnnotationPasses import AnnotateDefaultMemoryLevel, \
     AnnotateIOMemoryLevel
-from Deeploy.OperatorDescriptor import defaultOperatorDescriptors
 from Deeploy.Targets.PULPOpen.Deployer import PULPDeployer
 from Deeploy.Targets.PULPOpen.Platform import MemoryPULPPlatform, PULPOptimizer
 from Deeploy.Targets.Snitch.Deployer import SnitchDeployer
@@ -280,6 +279,17 @@ def defaultScheduler(graph: gs.Graph) -> List[List[gs.Node]]:
     return [[node] for node in graph.nodes]
 
 
+memcpyDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc("src"),
+    outputDescriptor = IoDesc("dest"),
+    attrDescriptors = [],
+)
+
+dmaTestOperatorDescriptors = {
+    "Memcpy": memcpyDesc,
+}
+
+
 def setup_pulp_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph, inputTypes: Dict[str, Type[Pointer]],
                         doublebuffer: bool, deeployStateDir: str) -> NetworkDeployer:
     L3 = MemoryLevel(name = "L3", neighbourNames = ["L2"], size = 64000000)
@@ -300,7 +310,7 @@ def setup_pulp_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph,
                             platform,
                             inputTypes,
                             PULPOptimizer,
-                            defaultOperatorDescriptors,
+                            dmaTestOperatorDescriptors,
                             defaultScheduler,
                             default_channels_first = True,
                             deeployStateDir = deeployStateDir)
@@ -342,7 +352,7 @@ def setup_snitch_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph
                               platform,
                               inputTypes,
                               SnitchOptimizer,
-                              defaultOperatorDescriptors,
+                              dmaTestOperatorDescriptors,
                               defaultScheduler,
                               deeployStateDir = deeployStateDir)
     memoryLevelAnnotationPasses = [AnnotateIOMemoryLevel(defaultMemory), AnnotateDefaultMemoryLevel(memoryHierarchy)]

From bff86683b022bbee34b0a75919be874dcbb0c94f Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 13:30:30 +0100
Subject: [PATCH 37/54] Remove some trailing white space in CHANGELOG.md

---
 CHANGELOG.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a567305e2b..6b6ee83f60 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -177,9 +177,9 @@ This release containing major architectural changes, new platform support, enhan
 
 
 ### Added
-- BatchNorm kernel 
-- ConvTranspose kernel 
-- MaxPool1D kernel 
+- BatchNorm kernel
+- ConvTranspose kernel
+- MaxPool1D kernel
 - Template for 1D Convolution
 - Support for float32 data type in the previous kernels
 - Float binding for Pad1D kernel
@@ -318,7 +318,7 @@ This release containing major architectural changes, new platform support, enhan
 
 ### Changed
 - FloatConvTemplate file
-- Platform.py file  
+- Platform.py file
 - Bump the CMake version to 3.24 as required for the chimera-sdk
 - Bump GVSoC's version and add chimera simulation target
 - Rename the generic source util to utils to avoid name collision with chimera-sdk

From 5ac4e316398e333e5573f8cff07bfd64cd5d76a7 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 13:55:49 +0100
Subject: [PATCH 38/54] Add try canonicalization exceptions

---
 Deeploy/DeeployTypes.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py
index aecb112b57..2bf9452ade 100644
--- a/Deeploy/DeeployTypes.py
+++ b/Deeploy/DeeployTypes.py
@@ -2738,7 +2738,10 @@ def _bindLayers(self):
             assert node.op in self.operatorDescriptors, \
                 f"[ERROR] Error parsing node {node.name}. There is no descriptor for operator {node.op}."
             desc = self.operatorDescriptors[node.op]
-            desc.canonicalize(node, self.graph.opset)
+            try:
+                desc.canonicalize(node, self.graph.opset)
+            except BaseException as e:
+                raise ValueError(f"[ERROR] Node {node.name} of op {node.op} could not be canonicalized.") from e
             assert desc.check(node), \
                 f"[ERROR] Node {node.name} is not a valid instance of {node.op} operator"
 

From 2f871d476ea5019ba9e9884e2626dffe1b286324 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 13:56:10 +0100
Subject: [PATCH 39/54] Make IntegerDataTypes a tuple

---
 Deeploy/CommonExtensions/DataTypes.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Deeploy/CommonExtensions/DataTypes.py b/Deeploy/CommonExtensions/DataTypes.py
index 4f6dba3827..c05ea3b9d9 100644
--- a/Deeploy/CommonExtensions/DataTypes.py
+++ b/Deeploy/CommonExtensions/DataTypes.py
@@ -87,11 +87,11 @@ class float64_t(FloatImmediate):
 
 SignedIntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (int8_t, int16_t, int32_t, int64_t)
 UnsignedIntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (uint8_t, uint16_t, uint32_t, uint64_t)
-IntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (sorted((
-    *SignedIntegerDataTypes,
-    *UnsignedIntegerDataTypes,
-),
-                                                               key = lambda _type: _type.typeWidth))
+IntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = tuple(
+    sorted((
+        *SignedIntegerDataTypes,
+        *UnsignedIntegerDataTypes,
+    ), key = lambda _type: _type.typeWidth))
 FloatDataTypes: Tuple[Type[FloatImmediate], ...] = (bfloat16_t, float16_t, float32_t, float64_t)
 
 

From 31577c31c95e36decbdd12dfe616521df2795a70 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 13:57:00 +0100
Subject: [PATCH 40/54] Fix reshape bindings (which are used for
 squeeze/unsqeeze too) to type map axes to int64_t as per onnx

---
 Deeploy/Targets/PULPOpen/Bindings.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/Deeploy/Targets/PULPOpen/Bindings.py b/Deeploy/Targets/PULPOpen/Bindings.py
index 9ff940b2f0..57fdf90a57 100644
--- a/Deeploy/Targets/PULPOpen/Bindings.py
+++ b/Deeploy/Targets/PULPOpen/Bindings.py
@@ -9,8 +9,8 @@
 from Deeploy.CommonExtensions.CodeTransformationPasses.Closure import ClosureGeneration, MemoryAwareClosureGeneration
 from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \
     MemoryManagementGeneration, MemoryPassthroughGeneration
-from Deeploy.CommonExtensions.DataTypes import IntegerDataTypes, SignedIntegerDataTypes, float32_t, int8_t, int32_t, \
-    uint8_t
+from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes, SignedIntegerDataTypes, float32_t, \
+    int8_t, int32_t, int64_t, uint8_t
 from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeTemplate
 from Deeploy.FutureExtension.Bindings.AutoFutureBinding import AutoFutureBinding
 from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration
@@ -153,11 +153,8 @@
 ]
 
 PULPReshapeBindings = [
-    NodeBinding(ReshapeChecker([PointerClass(type), PointerClass(int32_t)], [PointerClass(type)]),
-                ReshapeTemplate.referenceTemplate, SkipTransformer) for type in IntegerDataTypes
-] + [
-    NodeBinding(ReshapeChecker([PointerClass(float32_t), PointerClass(type)], [PointerClass(float32_t)]),
-                ReshapeTemplate.referenceTemplate, SkipTransformer) for type in IntegerDataTypes
+    NodeBinding(ReshapeChecker([PointerClass(type), PointerClass(int64_t)], [PointerClass(type)]),
+                ReshapeTemplate.referenceTemplate, SkipTransformer) for type in IntegerDataTypes + FloatDataTypes
 ]
 
 PULPRQAddBindings = [

From 90102f5eac819838252446fc5a0ab5513f09903e Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 14:15:48 +0100
Subject: [PATCH 41/54] Canonicalize (un)squeeze operations as pre-opset-13,
 i.e., put axes into node attributes to ommit creating a buffer for it

---
 Deeploy/OperatorDescriptor.py      | 18 +++++++++++--
 Deeploy/Targets/Generic/Parsers.py | 43 +++++++-----------------------
 2 files changed, 25 insertions(+), 36 deletions(-)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index ff1fbcf3fd..a0cb483589 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -476,15 +476,29 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     attrDescriptors = [AttrDesc("axis", IntUnpack, default = 0)],
 )
 
+
+class SqueezeDescriptor(OperatorDescriptor):
+
+    def canonicalize(self, node: gs.Node, opset: int) -> bool:
+        if opset >= 13:
+            assert len(node.inputs) == 2, f"Expected 2 inputs but received {len(node.inputs)}"
+            axes = node.inputs[1]
+            assert isinstance(axes,
+                              gs.Constant), f"Expected axes to be a constant but received axes of type {type(axes)}"
+            node.attrs["axes"] = axes.values
+            axes.outputs.clear()
+        return super().canonicalize(node, opset)
+
+
 # Opset <= 11
-unsqueezeDesc = OperatorDescriptor(
+unsqueezeDesc = SqueezeDescriptor(
     inputDescriptor = IoDesc("data_in"),
     outputDescriptor = IoDesc("data_out"),
     attrDescriptors = [AttrDesc("axes", IntTupleUnpack)],
 )
 
 # Opset <= 11
-squeezeDesc = OperatorDescriptor(
+squeezeDesc = SqueezeDescriptor(
     inputDescriptor = IoDesc("data_in"),
     outputDescriptor = IoDesc("data_out"),
     attrDescriptors = [AttrDesc("axes", IntTupleUnpack)],
diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py
index ad3bad549d..ddd08a8551 100644
--- a/Deeploy/Targets/Generic/Parsers.py
+++ b/Deeploy/Targets/Generic/Parsers.py
@@ -986,48 +986,23 @@ def __init__(self):
         super().__init__()
 
     def parseNode(self, node: gs.Node) -> (bool):
+        if not all(['axes' in node.attrs, len(node.inputs) == 1, len(node.outputs) == 1]):
+            return False
 
-        # ONNX v11: 'axes' is a node attribute
-        if 'axes' in node.attrs:
-            ret = all(['axes' in node.attrs, len(node.inputs) == 1, len(node.outputs) == 1])
-        # ONNX v13+: 'axes' becomes an input with the data
-        # Source: https://onnx.ai/onnx/operators/onnx__Unsqueeze.html
-        else:
-            ret = all([len(node.inputs) == 2, len(node.outputs) == 1])
-
-        if ret and 'axes' in node.attrs:
-            axes_attr = node.attrs['axes']
-            self.operatorRepresentation['axes'] = [int(axes_attr)] if isinstance(axes_attr, int) \
-                else [int(a) for a in axes_attr]
-        # For opset 13+, axes will be extracted from the second input in parseNodeCtxt
-
-        return ret
+        self.operatorRepresentation['axes'] = node.attrs['axes']
+        return True
 
     def parseNodeCtxt(self,
                       ctxt: NetworkContext,
                       node: gs.Node,
                       channels_first: bool = True) -> Tuple[NetworkContext, bool]:
+        inputs = ['data_in']
+        for idx, inputNode in enumerate(node.inputs):
+            self.operatorRepresentation[inputs[idx]] = ctxt.lookup(inputNode.name).name
 
         outputs = ['data_out']
-        if len(node.inputs) == 1:
-            inputs = ['data_in']
-            for idx, inputNode in enumerate(node.inputs):
-                self.operatorRepresentation[inputs[idx]] = ctxt.lookup(inputNode.name).name
-            for idx, outputNode in enumerate(node.outputs):
-                self.operatorRepresentation[outputs[idx]] = ctxt.lookup(outputNode.name).name
-        else:
-            data_in = ctxt.lookup(node.inputs[0].name)
-            data_out = ctxt.lookup(node.outputs[0].name)
-            self.operatorRepresentation['data_in'] = data_in.name
-            self.operatorRepresentation['data_out'] = data_out.name
-            # axes must be a constant; extract values
-            axes_buf = ctxt.lookup(node.inputs[1].name)
-            assert hasattr(axes_buf, 'values'), "Unsqueeze: expected constant 'axes' input for opset 13+"
-            axes_vals = np.array(axes_buf.values).astype(int).flatten().tolist()
-            self.operatorRepresentation['axes'] = axes_vals
-            # Do not deploy the axes tensor
-            axes_buf._live = False
-            axes_buf._deploy = False
+        for idx, outputNode in enumerate(node.outputs):
+            self.operatorRepresentation[outputs[idx]] = ctxt.lookup(outputNode.name).name
 
         return ctxt, True
 

From 7bd7353df3ca04e1414164f2025ea9a892eff2e7 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 16:37:50 +0100
Subject: [PATCH 42/54] Add BatchNormalization descriptor

---
 Deeploy/OperatorDescriptor.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index a0cb483589..9d74a32aec 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -734,8 +734,19 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     attrDescriptors = [],
 )
 
+batchNormalizationDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(["data_in", "scale", "bias", "mean", "variance"]),
+    outputDescriptor = IoDesc(["data_out"], optional = ["running_mean", "running_var"]),
+    attrDescriptors = [
+        AttrDesc("epsilon", FloatUnpack, default = 1e-5),
+        AttrDesc("momentum", FloatUnpack, default = 0.9),
+        AttrDesc("training_mode", BoolUnpack, default = False),
+    ],
+)
+
 defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = {
     "Add": addDesc,
+    "BatchNormalization": batchNormalizationDesc,
     "CLCA": clcaDesc,
     "Concat": concatDesc,
     "Conv": convDesc,

From 16bc4630c2e4ac4f71f2a798c8c7f5bae25d3023 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 16:38:11 +0100
Subject: [PATCH 43/54] Add ConvTranspose descriptor

---
 Deeploy/OperatorDescriptor.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index 9d74a32aec..fbf333cf1f 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -371,6 +371,25 @@ def _padsDefault(node: gs.Node) -> Tuple[int, ...]:
     ],
 )
 
+convTransposeDesc = OperatorDescriptor(
+    inputDescriptor = IoDesc(["data_in", "weight"], optional = "bias"),
+    outputDescriptor = IoDesc("data_out"),
+    attrDescriptors = [
+        AttrDesc("auto_pad", AutoPad, default = AutoPad.NOTSET),
+        AttrDesc("dilations", IntTupleUnpack, default = _dilationsDefault),
+        AttrDesc("group", IntUnpack, default = 1),
+        AttrDesc("kernel_shape", IntTupleUnpack, default = _kernelShapeDefault),
+        # TODO: Add output_shape and output_padding default functions.
+        #       Docs:
+        #          - ONNX: https://onnx.ai/onnx/operators/onnx__ConvTranspose.html
+        #          - PyTorch: https://docs.pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html
+        # AttrDesc("output_shape", IntTupleUnpack, default = _outputShapeDefault),
+        # AttrDesc("output_padding", IntTupleUnpack, default = _outputPaddingDefault),
+        AttrDesc("pads", IntTupleUnpack, default = _padsDefault),
+        AttrDesc("strides", IntTupleUnpack, default = _stridesDefault),
+    ],
+)
+
 
 class RequantizedOperatorDescriptor(OperatorDescriptor):
 
@@ -750,6 +769,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     "CLCA": clcaDesc,
     "Concat": concatDesc,
     "Conv": convDesc,
+    "ConvTranspose": convTransposeDesc,
     "DebugPrint": debugPrintDesc,
     "Dequant": dequantDesc,
     "Div": divDesc,

From d865898cefe487c83676282f61e7d2740e24f16e Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 16:50:43 +0100
Subject: [PATCH 44/54] Relax opset check on squeeze operations to a warning

---
 Deeploy/OperatorDescriptor.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index fbf333cf1f..0e93a07ed4 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -9,6 +9,7 @@
 import onnx_graphsurgeon as gs
 
 from Deeploy.DeeployTypes import AttrDesc, IoDesc, OperatorDescriptor, VariadicIoDesc
+from Deeploy.Logging import DEFAULT_LOGGER as log
 
 
 def IntUnpack(value: Any) -> int:
@@ -499,13 +500,24 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
 class SqueezeDescriptor(OperatorDescriptor):
 
     def canonicalize(self, node: gs.Node, opset: int) -> bool:
-        if opset >= 13:
-            assert len(node.inputs) == 2, f"Expected 2 inputs but received {len(node.inputs)}"
+        if len(node.inputs) == 2:
             axes = node.inputs[1]
-            assert isinstance(axes,
-                              gs.Constant), f"Expected axes to be a constant but received axes of type {type(axes)}"
+            assert isinstance(axes, gs.Constant), \
+                f"Expected axes to be a constant but received axes of type {type(axes)}"
             node.attrs["axes"] = axes.values
             axes.outputs.clear()
+
+        if opset >= 13 and len(node.inputs) != 2:
+            log.warning(
+                "Squeeze operation expects 2 inputs for opset >= 13. "
+                f"Received node {node.name} with {len(node.inputs)} input{'s' if len(node.inputs) > 1 else ''} and opset {opset}"
+            )
+        elif opset < 13 and len(node.inputs) != 1:
+            log.warning(
+                "Squeeze operation expects 1 input for opset < 13. "
+                f"Received node {node.name} with {len(node.inputs)} input{'s' if len(node.inputs) > 1 else ''} and opset {opset}"
+            )
+
         return super().canonicalize(node, opset)
 
 

From cd62a695cb523db2dc207b6c876b10424c3b32d3 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 16:56:21 +0100
Subject: [PATCH 45/54] Replace prints with logging

---
 Deeploy/DeeployTypes.py       | 13 +++++--------
 Deeploy/OperatorDescriptor.py |  9 +++------
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py
index 2bf9452ade..30c06548c0 100644
--- a/Deeploy/DeeployTypes.py
+++ b/Deeploy/DeeployTypes.py
@@ -1101,19 +1101,16 @@ def check(self, node: gs.Node) -> bool:
         valid = True
 
         if not self.inputDescriptor.checkTensors(node.inputs):
-            # TODO: Change to logging
-            print(f"[ERROR OP {node.op}] Invalid input tensors: {[t.name for t in node.inputs]}")
+            log.error(f"[OP {node.op}] Invalid input tensors: {[t.name for t in node.inputs]}")
             valid = False
 
         if not self.outputDescriptor.checkTensors(node.outputs):
-            # TODO: Change to logging
-            print(f"[ERROR OP {node.op}] Invalid output tensors: {[t.name for t in node.outputs]}")
+            log.error(f"[OP {node.op}] Invalid output tensors: {[t.name for t in node.outputs]}")
             valid = False
 
         for attrDesc in self.attrDescriptors:
             if attrDesc.default is None and not attrDesc.name in node.attrs:
-                # TODO: Change to logging
-                print(f"[ERROR OP {node.op}] Missing attribute {attrDesc.name}")
+                log.error(f"[OP {node.op}] Missing attribute {attrDesc.name}")
                 valid = False
 
         return valid
@@ -1128,7 +1125,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
             try:
                 node.attrs[desc.name] = desc.unpack(value)
             except Exception as e:
-                raise ValueError(f"[ERROR OP {node.op}] Error unpacking the attribute {desc.name}. {e}") from e
+                raise ValueError(f"[OP {node.op}] Error unpacking the attribute {desc.name}. {e}") from e
         return True
 
     def parseTensors(self, ctxt: NetworkContext, tensors: Sequence[gs.Tensor],
@@ -1158,7 +1155,7 @@ def parse(self, ctxt: NetworkContext, node: gs.Node) -> OperatorRepresentation:
             firstKeySet = set(firstOpRepr.keys())
             secondKeySet = set(secondOpRepr.keys())
             assert firstKeySet.isdisjoint(secondKeySet), \
-                f"[PARSE ERROR] (Node: {node.name}, Op: {node.op}) " \
+                f"[OP {node.op}] Encourntered error while parsing node {node.name}. " \
                 f"Keys from parsing {firstName} clash with the keys from parsing {secondName}. "\
                 f"Overlapping keys: {firstKeySet ^ secondKeySet}"
 
diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index 0e93a07ed4..4ebab580a7 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -396,8 +396,7 @@ class RequantizedOperatorDescriptor(OperatorDescriptor):
 
     def canonicalize(self, node: gs.Node, opset: int) -> bool:
         if "n_levels_out" in node.attrs and "n_levels" in node.attrs:
-            # TODO: Change to log
-            print("[WARNING] Requantized operator cannot have n_levels_out and n_levels in its attributes")
+            log.warning("Requantized operator cannot have n_levels_out and n_levels in its attributes")
             return False
 
         if "n_levels_out" in node.attrs:
@@ -711,10 +710,8 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
             n_levels = f"{tensor}_n_levels"
             n_levels_out = f"{tensor}_n_levels_out"
             if n_levels_out in node.attrs and n_levels in node.attrs:
-                # TODO: Change to log
-                print(
-                    f"[WARNING] RequantizedAdd tensor {tensor} cannot have {n_levels_out} and {n_levels} in its attributes"
-                )
+                log.warning(
+                    f"RequantizedAdd tensor {tensor} cannot have {n_levels_out} and {n_levels} in its attributes")
                 return False
 
             if n_levels_out in node.attrs:

From 91bdeb7f4573dbee5a97a650ef464d3b62f60c7c Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 17:02:59 +0100
Subject: [PATCH 46/54] Add missing itertools import

---
 Deeploy/DeeployTypes.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py
index 30c06548c0..b2afde7410 100644
--- a/Deeploy/DeeployTypes.py
+++ b/Deeploy/DeeployTypes.py
@@ -5,6 +5,7 @@
 from __future__ import annotations
 
 import copy
+import itertools
 import math
 import os
 import pickle

From 238d3affd422e61cc65774a698bac093a8ce370c Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 17:03:33 +0100
Subject: [PATCH 47/54] Initialize optional value with None

---
 Deeploy/DeeployTypes.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py
index b2afde7410..166c91289c 100644
--- a/Deeploy/DeeployTypes.py
+++ b/Deeploy/DeeployTypes.py
@@ -1023,10 +1023,11 @@ def copy(self) -> NetworkContext:
 
 class IoDesc:
 
-    def __init__(self, required: Union[str, List[str]], optional: Union[str, List[str]] = []) -> None:
+    def __init__(self, required: Union[str, List[str]], optional: Optional[Union[str, List[str]]] = None) -> None:
         if isinstance(required, str):
             required = [required]
         self.required = required
+        optional = optional if optional is not None else []
         if isinstance(optional, str):
             optional = [optional]
         self.optional = optional

From a4198b433517eeb5e0068063e04e8722da203f55 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 17:04:34 +0100
Subject: [PATCH 48/54] Fix typo

---
 Deeploy/DeeployTypes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py
index 166c91289c..6b63697d2f 100644
--- a/Deeploy/DeeployTypes.py
+++ b/Deeploy/DeeployTypes.py
@@ -1149,7 +1149,7 @@ def parseAttrs(self, node: gs.Node) -> OperatorRepresentation:
     def parse(self, ctxt: NetworkContext, node: gs.Node) -> OperatorRepresentation:
         opReprs = {
             "input tensors": self.parseTensors(ctxt, node.inputs, self.inputDescriptor),
-            "output tesnors": self.parseTensors(ctxt, node.outputs, self.outputDescriptor),
+            "output tensors": self.parseTensors(ctxt, node.outputs, self.outputDescriptor),
             "attributes": self.parseAttrs(node),
         }
 

From e8f1721bcf45c6c05efb921b1b54f65b9c1c5678 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 17:07:08 +0100
Subject: [PATCH 49/54] Explicit exception coverage

---
 Deeploy/OperatorDescriptor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index 4ebab580a7..3af145cd43 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -55,7 +55,7 @@ def FloatTupleUnpack(value: Any) -> Tuple[float, ...]:
 def IntTupleIfNotSingleItemUnpack(value: Any) -> Union[int, Tuple[int, ...]]:
     try:
         return IntUnpack(value)
-    except:
+    except ValueError:
         return IntTupleUnpack(value)
 
 

From f180f85348c3f74c90cc97823069722b7d332d19 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 17:11:31 +0100
Subject: [PATCH 50/54] Rename attrToTensor to attrToInputTensor and add
 inputTensorToAttr

---
 Deeploy/OperatorDescriptor.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index 3af145cd43..7f283708c0 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -59,7 +59,7 @@ def IntTupleIfNotSingleItemUnpack(value: Any) -> Union[int, Tuple[int, ...]]:
         return IntTupleUnpack(value)
 
 
-def attrToTensor(node: gs.Node, attr: str) -> None:
+def attrToInputTensor(node: gs.Node, attr: str) -> None:
     values = node.attrs[attr]
     if isinstance(values, (int, float)):
         values = np.array([values])
@@ -71,6 +71,14 @@ def attrToTensor(node: gs.Node, attr: str) -> None:
     node.attrs.pop(attr)
 
 
+def inputTensorToAttr(node: gs.Node, tensorIdx: int, attr: str) -> None:
+    tensor = node.inputs[tensorIdx]
+    assert isinstance(tensor, gs.Constant), \
+        f"Can convert only constant tensors to attributes. Received tensor of type {tensor}"
+    node.attrs[attr] = tensor.values
+    tensor.outputs.clear()
+
+
 concatDesc = OperatorDescriptor(
     inputDescriptor = VariadicIoDesc("data_in", minNumTensors = 2),
     outputDescriptor = IoDesc("data_out"),
@@ -91,10 +99,10 @@ class SliceDescriptor(OperatorDescriptor):
 
     def canonicalize(self, node: gs.Node, opset: int) -> bool:
         if opset < 10:
-            attrToTensor(node, "starts")
-            attrToTensor(node, "ends")
+            attrToInputTensor(node, "starts")
+            attrToInputTensor(node, "ends")
             if "axes" in node.attrs:
-                attrToTensor(node, "axes")
+                attrToInputTensor(node, "axes")
 
         return super().canonicalize(node, opset)
 
@@ -184,7 +192,7 @@ class ReduceMeanDescriptor(OperatorDescriptor):
     def canonicalize(self, node: gs.Node, opset: int) -> bool:
         if opset < 18:
             if "axes" in node.attrs:
-                attrToTensor(node, "axes")
+                attrToInputTensor(node, "axes")
         return super().canonicalize(node, opset)
 
 

From bc75e85564ea1401eeae2f4f2ecb1bfab9f82f22 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 17:17:17 +0100
Subject: [PATCH 51/54] Use inputTensorToAttr in squeeze canonicalization

---
 Deeploy/OperatorDescriptor.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index 7f283708c0..6bf6b6ca30 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -508,11 +508,7 @@ class SqueezeDescriptor(OperatorDescriptor):
 
     def canonicalize(self, node: gs.Node, opset: int) -> bool:
         if len(node.inputs) == 2:
-            axes = node.inputs[1]
-            assert isinstance(axes, gs.Constant), \
-                f"Expected axes to be a constant but received axes of type {type(axes)}"
-            node.attrs["axes"] = axes.values
-            axes.outputs.clear()
+            inputTensorToAttr(node, tensorIdx = 1, attr = "axes")
 
         if opset >= 13 and len(node.inputs) != 2:
             log.warning(

From 6976c52dc8368620e69164d43ca32eb03b2b851d Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 17:12:52 +0100
Subject: [PATCH 52/54] Remove duplicate attribute

---
 Deeploy/OperatorDescriptor.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py
index 6bf6b6ca30..288a9de505 100644
--- a/Deeploy/OperatorDescriptor.py
+++ b/Deeploy/OperatorDescriptor.py
@@ -729,7 +729,6 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool:
     inputDescriptor = IoDesc(["data_in_0", "data_in_1"]),
     outputDescriptor = IoDesc("data_out"),
     attrDescriptors = [
-        AttrDesc("rqs1_n_levels", IntUnpack),
         AttrDesc("rqs1_mul", IntUnpack),
         AttrDesc("rqs1_add", IntUnpack),
         AttrDesc("rqs1_div", IntUnpack),

From 97da07c29915bd7e2c3661212e02bb3a4f1a94df Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 17:37:08 +0100
Subject: [PATCH 53/54] Refactor MatMulTileConstraint

---
 .../TileConstraints/MatMulTileConstraint.py   | 87 +++++++++++--------
 1 file changed, 50 insertions(+), 37 deletions(-)

diff --git a/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py b/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py
index a9259a15cf..c0f3b70461 100644
--- a/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py
+++ b/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py
@@ -19,42 +19,50 @@ class MatMulTileConstraint(TileConstraint):
 
     @staticmethod
     def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel:
-
-        # Get to-be-tiled tensor's buffers
         bufferA = ctxt.lookup(name = parseDict['A'])
         bufferB = ctxt.lookup(name = parseDict['B'])
-        outputBuffer = ctxt.lookup(name = parseDict['data_out'])
+        bufferOut = ctxt.lookup(name = parseDict['data_out'])
 
         # Add I/O dimensions to the model as variables
-        for _buffer in [bufferA, bufferB, outputBuffer]:
-            tilerModel.addTensorDimToModel(ctxt, _buffer.name)
-
-        tensorsShapeLen = len(bufferA.shape)
-
-        AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name,
-                                                  dimIdx = (tensorsShapeLen - 2) + int(parseDict['transA']))
-        ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name,
-                                                   dimIdx = (tensorsShapeLen - 1) - int(parseDict['transA']))
-        BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name,
-                                                  dimIdx = (tensorsShapeLen - 2) + int(parseDict['transB']))
-        BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name,
-                                                   dimIdx = (tensorsShapeLen - 1) - int(parseDict['transB']))
-        outputFirstDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = (tensorsShapeLen - 2))
-        outputSecondDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = (tensorsShapeLen - 1))
-
-        # Map output dims to inputs dims
-        for idx in range(tensorsShapeLen - 2):
-            tilerModel.addConstraint(
-                tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = idx) == tilerModel.getTensorDimVar(
-                    tensorName = bufferA.name, dimIdx = idx))
-            tilerModel.addConstraint(
-                tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = idx) == tilerModel.getTensorDimVar(
-                    tensorName = bufferB.name, dimIdx = idx))
+        for buff in [bufferA, bufferB, bufferOut]:
+            tilerModel.addTensorDimToModel(ctxt, buff.name)
+
+        rankA = len(bufferA.shape)
+        if not parseDict['transA']:
+            firstDimIdxA, secondDimIdxA = rankA - 2, rankA - 1
+        else:
+            firstDimIdxA, secondDimIdxA = rankA - 1, rankA - 2
+        AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = firstDimIdxA)
+        ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = secondDimIdxA)
+
+        rankB = len(bufferB.shape)
+        if not parseDict['transB']:
+            firstDimIdxB, secondDimIdxB = rankB - 2, rankB - 1
+        else:
+            firstDimIdxB, secondDimIdxB = rankB - 1, rankB - 2
+        BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = firstDimIdxB)
+        BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = secondDimIdxB)
+
+        rankOut = len(bufferOut.shape)
+        outputFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = rankOut - 2)
+        outputSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = rankOut - 1)
+
+        # Map batch dims between A and output
+        batchDimsA = rankA - 2
+        for dimIdx in range(batchDimsA):
+            varA = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = dimIdx)
+            varOut = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = (rankOut - rankA) + dimIdx)
+            tilerModel.addConstraint(varOut == varA)
+
+        # Map batch dims between B and output
+        batchDimsB = rankB - 2
+        for dimIdx in range(batchDimsB):
+            varB = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = dimIdx)
+            varOut = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = (rankOut - rankB) + dimIdx)
+            tilerModel.addConstraint(varOut == varB)
 
         tilerModel.addConstraint(outputFirstDimVar == AFirstDimVar)
         tilerModel.addConstraint(outputSecondDimVar == BSecondDimVar)
-
-        # Add GEMM Geometrical constraints
         tilerModel.addConstraint(ASecondDimVar == BFirstDimVar)
 
         return tilerModel
@@ -65,14 +73,19 @@ def addPolicyConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkCo
         bufferA = ctxt.lookup(name = parseDict['A'])
         bufferB = ctxt.lookup(name = parseDict['B'])
 
-        tensorsShapeLen = len(bufferA.shape)
-
-        ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name,
-                                                   dimIdx = (tensorsShapeLen - 1) - parseDict['transA'])
-        BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name,
-                                                  dimIdx = (tensorsShapeLen - 2) + parseDict['transB'])
-        BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name,
-                                                   dimIdx = (tensorsShapeLen - 1) - parseDict['transB'])
+        rankA = len(bufferA.shape)
+        if not parseDict['transA']:
+            _, secondDimIdxA = rankA - 2, rankA - 1
+        else:
+            _, secondDimIdxA = rankA - 1, rankA - 2
+        ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = secondDimIdxA)
+
+        rankB = len(bufferB.shape)
+        if not parseDict['transB']:
+            firstDimIdxB, _ = rankB - 2, rankB - 1
+        else:
+            firstDimIdxB, _ = rankB - 1, rankB - 2
+        BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = firstDimIdxB)
 
         # VIC: We don't want to deal with intermediate results between kernel calls
         tilerModel.addConstraint(ASecondDimVar == parseDict['N'])

From 0c64a3eb587aeeb703a63512057e32e182b4f0e3 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Mon, 27 Oct 2025 19:16:30 +0100
Subject: [PATCH 54/54] Remove duplicate attributes and check that the value is
 positive

---
 Deeploy/Targets/Generic/Parsers.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py
index ddd08a8551..edbb2bc917 100644
--- a/Deeploy/Targets/Generic/Parsers.py
+++ b/Deeploy/Targets/Generic/Parsers.py
@@ -1436,11 +1436,8 @@ def parseNode(self, node: gs.Node) -> (bool):
             # All *_div attrs are log2d-ified
             log2Attrs = [
                 "preattn_requant_div",
-                "preattn_requant_div",
-                "normalizer_requant_div",
                 "normalizer_requant_div",
                 "postattn_requant_div",
-                "postattn_requant_div",
                 "wo_requant_div",
                 "wq_requant_div",
                 "wk_requant_div",
@@ -1449,7 +1446,8 @@ def parseNode(self, node: gs.Node) -> (bool):
 
             for attr in log2Attrs:
                 value = self.operatorRepresentation[attr]
-                assert isinstance(value, int)
+                assert isinstance(
+                    value, int) and value > 0, f"Attribute {attr} must be a positive integer. Received value {value}"
                 self.operatorRepresentation[attr] = int(math.log2(value))
 
         return ret