From fc26fa2fb09c812a91fe46eb1898b2811417b52d Mon Sep 17 00:00:00 2001 From: Alex Marchioni Date: Wed, 13 May 2026 11:53:30 +0200 Subject: [PATCH 01/16] add support for Ceil operator for Generic target --- Deeploy/Targets/Generic/Bindings.py | 7 ++++- Deeploy/Targets/Generic/Layers.py | 6 ++++ Deeploy/Targets/Generic/Parsers.py | 22 ++++++++++++++ Deeploy/Targets/Generic/Platform.py | 28 ++++++++++-------- .../Generic/Templates/FloatCeilTemplate.py | 14 +++++++++ .../Tests/Kernels/FP32/Ceil/inputs.npz | Bin 0 -> 776 bytes .../Tests/Kernels/FP32/Ceil/network.onnx | Bin 0 -> 121 bytes .../Tests/Kernels/FP32/Ceil/outputs.npz | Bin 0 -> 778 bytes DeeployTest/test_generic_config.py | 1 + .../Generic/inc/DeeployBasicMath.h | 1 + 10 files changed, 65 insertions(+), 14 deletions(-) create mode 100644 Deeploy/Targets/Generic/Templates/FloatCeilTemplate.py create mode 100644 DeeployTest/Tests/Kernels/FP32/Ceil/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/Ceil/network.onnx create mode 100644 DeeployTest/Tests/Kernels/FP32/Ceil/outputs.npz diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index 308b179aef..e34c1ec5f4 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -13,7 +13,7 @@ from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration from Deeploy.Targets.Generic.Templates import AddTemplate, BatchNormalizationTemplate, ConcatTemplate, ConvTemplate, \ ConvTransposeTemplate, DebugPrintTemplate, DequantTemplate, DummyTemplate, DWConvTemplate, FloatAddTemplate, \ - FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatGELUTemplate, FloatGemmTemplate, \ + FloatCeilTemplate, FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatGELUTemplate, FloatGemmTemplate, \ FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, \ FloatPowTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, \ GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, \ @@ -327,3 +327,8 @@ ConvTransposeTemplate.referenceTemplate, BasicTransformer) for type in FloatDataTypes ] + +BasicCeilBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), FloatCeilTemplate.referenceTemplate, + BasicTransformer), +] diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index cc733937cc..e0ef386276 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -709,3 +709,9 @@ def computeOps(self): numPx = opRep['dim_im_out_x'] return numPx * opsPerPx + + +class CeilLayer(ONNXLayer): + + def __init__(self, maps: List[NodeMapper]): + super().__init__(maps) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index ad787d9e4b..92a6ec56a8 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -2886,3 +2886,25 @@ def parseNodeCtxt(self, self.operatorRepresentation['size'] = int(np.prod(data_in.shape)) return ctxt, True + + +class CeilParser(NodeParser): + + def __init__(self): + super().__init__() + + def parseNode(self, node: gs.Node) -> bool: + return node.op == 'Ceil' and len(node.inputs) == 1 and len(node.outputs) == 1 + + def parseNodeCtxt(self, + ctxt: NetworkContext, + node: gs.Node, + channels_first: bool = True) -> Tuple[NetworkContext, bool]: + + data_in = ctxt.lookup(node.inputs[0].name) + data_out = ctxt.lookup(node.outputs[0].name) + + self.operatorRepresentation['data_in'] = data_in.name + self.operatorRepresentation['data_out'] = data_out.name + self.operatorRepresentation['size'] = int(np.prod(data_in.shape)) + return ctxt, True diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index e05e897270..c8db89e327 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -6,27 +6,27 @@ RemoveEmptyConvBiasPass, RemoveOnlySingletonReduceMeanPass from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NodeMapper, NodeTemplate, \ StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer -from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicBatchNormBindings, BasicConcatBindings, \ - BasicConv1DBindings, BasicConv2DBindings, BasicConvTransposeBindings, BasicDebugPrintBindings, \ - BasicDequantBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBindings, BasicGatherBindings, \ - BasicGELUBindings, BasicGEMMBindings, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \ +from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicBatchNormBindings, BasicCeilBindings, \ + BasicConcatBindings, BasicConv1DBindings, BasicConv2DBindings, BasicConvTransposeBindings, \ + BasicDebugPrintBindings, BasicDequantBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBindings, \ + BasicGatherBindings, BasicGELUBindings, BasicGEMMBindings, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \ BasicLayerNormBindings, BasicMatMulBindings, BasicMaxPool1DBindings, BasicMaxPool2DBindings, BasicMulBindings, \ BasicPad1DBindings, BasicPad2DBindings, BasicPowBindings, BasicQuantBindings, BasicReduceMeanBindings, \ BasicReduceSumBindings, BasicReluBinding, BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, \ BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, BasicTransposeBindings, \ DummyBinding -from Deeploy.Targets.Generic.Layers import AddLayer, BatchNormalizationLayer, ConcatLayer, ConvLayer, \ +from Deeploy.Targets.Generic.Layers import AddLayer, BatchNormalizationLayer, CeilLayer, ConcatLayer, ConvLayer, \ ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, GatherLayer, GELULayer, GEMMLayer, ITAMaxLayer, \ LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, QuantLayer, ReduceMeanLayer, \ ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SliceLayer, \ SoftmaxLayer, SqrtLayer, TransposeLayer -from Deeploy.Targets.Generic.Parsers import AddParser, BatchNormParser, ConcatParser, ConvTranspose1DParser, \ - DebugParser, DequantParser, DivParser, DummyParser, FlattenParser, GatherParser, GELUParser, GenericConv1DParser, \ - GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, GenericGEMMParser, GenericMaxPool2DParser, \ - IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, MatMulParser, MaxPool1DParser, MulParser, \ - Pad1DParser, Pad2DParser, PowParser, QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, \ - RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, SoftmaxParser, SqrtParser, \ - TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser +from Deeploy.Targets.Generic.Parsers import AddParser, BatchNormParser, CeilParser, ConcatParser, \ + ConvTranspose1DParser, DebugParser, DequantParser, DivParser, DummyParser, FlattenParser, GatherParser, \ + GELUParser, GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, \ + GenericGEMMParser, GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, \ + MatMulParser, MaxPool1DParser, MulParser, Pad1DParser, Pad2DParser, PowParser, QuantParser, ReduceMeanParser, \ + ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, \ + SoftmaxParser, SqrtParser, TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate, FreeTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, ExtractPaddingFromConvPass, \ ExtractPaddingFromPoolPass, MatMulAddMergePass, MergeConstAddAndRequantPass, QuantPatternPass, \ @@ -73,6 +73,7 @@ BatchNormalizationMapper = NodeMapper(BatchNormParser(), BasicBatchNormBindings) ConvTransposeMapper = NodeMapper(ConvTranspose1DParser(), BasicConvTransposeBindings) SliceMapper = NodeMapper(SliceParser(), BasicSliceBindings) +CeilMapper = NodeMapper(CeilParser(), BasicCeilBindings) # Dummy nodes are intended for development purposes only! # They should always generate compiler errors to not accidentally end up in production code @@ -118,7 +119,8 @@ 'Quant': QuantLayer([QuantMapper]), 'Dequant': DequantLayer([DequantMapper]), 'BatchNormalization': BatchNormalizationLayer([BatchNormalizationMapper]), - 'ConvTranspose': ConvTransposeLayer([ConvTransposeMapper]) + 'ConvTranspose': ConvTransposeLayer([ConvTransposeMapper]), + 'Ceil': CeilLayer([CeilMapper]), # # For example, you can use the DummpyMapper, in case you want to test # # deployment or optimizations with GlobalAveragePool nodes but did not yet # # implement the corresponding kernel diff --git a/Deeploy/Targets/Generic/Templates/FloatCeilTemplate.py b/Deeploy/Targets/Generic/Templates/FloatCeilTemplate.py new file mode 100644 index 0000000000..bb0145c9ec --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatCeilTemplate.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from Deeploy.DeeployTypes import NodeTemplate + +referenceTemplate = NodeTemplate(""" +// Add (Name: ${nodeName}, Op: ${nodeOp}) +BEGIN_SINGLE_CORE + for (uint32_t i = 0; i < ${size}; i++) { + ${data_out}[i] = ceilf(${data_in}[i]); + } +END_SINGLE_CORE +""") diff --git a/DeeployTest/Tests/Kernels/FP32/Ceil/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Ceil/inputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..ac58fc00e29cdc6487f6248edf903d0dedaa1f12 GIT binary patch literal 776 zcmbV~e=O8-7{|X~7lrbpI$Jl{k?qhiBFwMv=Oc;T#ICYk$1)pd?8wr70~{Ay6pQxvU_9FNn$u5eWH$ zyb`vQl@}i=VHHYi9xpFaD=oSDm|Uf_{60ZKez=ey6zVJFpW^?Q9A1BEN9;%neN$YZCb>}GQ;Cs&31vh!LrwcV=T?&X<05JKJOB+_yhx%) zBD$O4N|wLL$#-2L4m{%m2__-ax0Fl9gYUyma~;#~6Hc;O6-o^O;Ppnp*kypO06q2# zbuf%a`jQiE96T!7M?!mSk>j)LM0YlgoHm=8faH0!ES+L*ZPXjz3?hhaC?cKGS*Xo# zgvYylanVN^a&fF9^99N1gIWuvHzuHF#Pyls#-Rp2X4!nSS2YGsqI|T%)#AD14-I1(bWz0tHj#;DS@k~pX}lm+sbJLgM*(c@^>LMTrF&#id2*srh*j nPLwoEy%3iG2cr-V7ZV2~5OV;rK#~j>)D$5svQ8{q3o zr>pSw;_^JY^5k9~gJC?qNrOWb9F6vaPz9q&I!g!R>q$C{**_iJO=I+@w}T``z0(d= zCsgf&-B3NLf31c{rbW!VNKWixI_sjyQ5)1&J7lhrFIFPYhzzv?8uWZ%ACYvW@G!@& zrbfWGN}XkmATFpi&cjEpp{e{MfxL$zhcwjOb cslfz+$MLg_$Mp9tN4@n8c07C?;2ZQJ-=@=~761SM literal 0 HcmV?d00001 diff --git a/DeeployTest/test_generic_config.py b/DeeployTest/test_generic_config.py index b0d8c659ca..e8ce08234b 100644 --- a/DeeployTest/test_generic_config.py +++ b/DeeployTest/test_generic_config.py @@ -8,6 +8,7 @@ "Kernels/FP32/ReLU", "Kernels/FP32/Softmax/Regular", "Kernels/FP32/Add/Regular", + "Kernels/FP32/Ceil", "Kernels/FP32/Conv/DW_2D_Bias", "Kernels/FP32/Conv/DW_2D_NoBias", "Kernels/FP32/Conv/DW_2D_ZeroValuedBias", diff --git a/TargetLibraries/Generic/inc/DeeployBasicMath.h b/TargetLibraries/Generic/inc/DeeployBasicMath.h index 22081701a3..0b23a382c5 100644 --- a/TargetLibraries/Generic/inc/DeeployBasicMath.h +++ b/TargetLibraries/Generic/inc/DeeployBasicMath.h @@ -22,6 +22,7 @@ #include #include +#include #include #include From f1929a4cb3044e7626f74058b88d96a369744609 Mon Sep 17 00:00:00 2001 From: Alex Marchioni Date: Wed, 13 May 2026 20:14:55 +0200 Subject: [PATCH 02/16] add support for Clip operator for Generic target --- Deeploy/Targets/Generic/Bindings.py | 18 ++++++--- Deeploy/Targets/Generic/Layers.py | 6 +++ Deeploy/Targets/Generic/Parsers.py | 36 ++++++++++++++++++ Deeploy/Targets/Generic/Platform.py | 12 +++--- .../Generic/Templates/FloatClipTemplate.py | 26 +++++++++++++ .../Tests/Kernels/FP32/Clip/inputs.npz | Bin 0 -> 776 bytes .../Tests/Kernels/FP32/Clip/network.onnx | Bin 0 -> 162 bytes .../Tests/Kernels/FP32/Clip/outputs.npz | Bin 0 -> 778 bytes DeeployTest/test_generic_config.py | 1 + 9 files changed, 89 insertions(+), 10 deletions(-) create mode 100644 Deeploy/Targets/Generic/Templates/FloatClipTemplate.py create mode 100644 DeeployTest/Tests/Kernels/FP32/Clip/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/Clip/network.onnx create mode 100644 DeeployTest/Tests/Kernels/FP32/Clip/outputs.npz diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index e34c1ec5f4..6bcc9de0d2 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -13,11 +13,11 @@ from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration from Deeploy.Targets.Generic.Templates import AddTemplate, BatchNormalizationTemplate, ConcatTemplate, ConvTemplate, \ ConvTransposeTemplate, DebugPrintTemplate, DequantTemplate, DummyTemplate, DWConvTemplate, FloatAddTemplate, \ - FloatCeilTemplate, FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatGELUTemplate, FloatGemmTemplate, \ - FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, \ - FloatPowTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, \ - GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, \ - MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \ + FloatCeilTemplate, FloatClipTemplate, FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatGELUTemplate, \ + FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, \ + FloatPadTemplate, FloatPowTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, \ + FloatSqrtTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, \ + MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \ RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, TransposeTemplate, \ iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate from Deeploy.Targets.Generic.TypeCheckers import AddChecker, BatchNormChecker, ConcatChecker, ConvChecker, \ @@ -332,3 +332,11 @@ NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), FloatCeilTemplate.referenceTemplate, BasicTransformer), ] + +BasicClipBindings = [ + NodeBinding( + DummyChecker( + [PointerClass(float32_t), PointerClass(float32_t), + PointerClass(float32_t)], [PointerClass(float32_t)]), FloatClipTemplate.referenceTemplate, + BasicTransformer), +] diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index e0ef386276..5dc76e77b7 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -715,3 +715,9 @@ class CeilLayer(ONNXLayer): def __init__(self, maps: List[NodeMapper]): super().__init__(maps) + + +class ClipLayer(ONNXLayer): + + def __init__(self, maps: List[NodeMapper]): + super().__init__(maps) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 92a6ec56a8..361dc11f29 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -2908,3 +2908,39 @@ def parseNodeCtxt(self, self.operatorRepresentation['data_out'] = data_out.name self.operatorRepresentation['size'] = int(np.prod(data_in.shape)) return ctxt, True + + +class ClipParser(NodeParser): + + def __init__(self): + super().__init__() + + def parseNode(self, node: gs.Node) -> bool: + if node.op != 'Clip' \ + or len(node.outputs) != 1 \ + or (not (1 <= len(node.inputs) <= 3)): + return False + return True + + def parseNodeCtxt(self, + ctxt: NetworkContext, + node: gs.Node, + channels_first: bool = True) -> Tuple[NetworkContext, bool]: + + data_in = ctxt.lookup(node.inputs[0].name) + data_out = ctxt.lookup(node.outputs[0].name) + self.operatorRepresentation['data_in'] = data_in.name + self.operatorRepresentation['data_out'] = data_out.name + self.operatorRepresentation['size'] = int(np.prod(data_in.shape)) + + # min_val and max_val only handled as constants + # Defaults: full float32 range + self.operatorRepresentation['min_val'] = -np.finfo(np.float32).max + self.operatorRepresentation['max_val'] = np.finfo(np.float32).max + + if len(node.inputs) > 1 and node.inputs[1].name != '': + self.operatorRepresentation['min_val'] = float(node.inputs[1].values.item()) + if len(node.inputs) > 2 and node.inputs[2].name != '': + self.operatorRepresentation['max_val'] = float(node.inputs[2].values.item()) + + return ctxt, True \ No newline at end of file diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index c8db89e327..35d6d99d6f 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -7,7 +7,7 @@ from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NodeMapper, NodeTemplate, \ StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicBatchNormBindings, BasicCeilBindings, \ - BasicConcatBindings, BasicConv1DBindings, BasicConv2DBindings, BasicConvTransposeBindings, \ + BasicClipBindings, BasicConcatBindings, BasicConv1DBindings, BasicConv2DBindings, BasicConvTransposeBindings, \ BasicDebugPrintBindings, BasicDequantBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBindings, \ BasicGatherBindings, BasicGELUBindings, BasicGEMMBindings, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \ BasicLayerNormBindings, BasicMatMulBindings, BasicMaxPool1DBindings, BasicMaxPool2DBindings, BasicMulBindings, \ @@ -15,12 +15,12 @@ BasicReduceSumBindings, BasicReluBinding, BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, \ BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, BasicTransposeBindings, \ DummyBinding -from Deeploy.Targets.Generic.Layers import AddLayer, BatchNormalizationLayer, CeilLayer, ConcatLayer, ConvLayer, \ - ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, GatherLayer, GELULayer, GEMMLayer, ITAMaxLayer, \ - LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, QuantLayer, ReduceMeanLayer, \ +from Deeploy.Targets.Generic.Layers import AddLayer, BatchNormalizationLayer, CeilLayer, ClipLayer, ConcatLayer, \ + ConvLayer, ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, GatherLayer, GELULayer, GEMMLayer, \ + ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, QuantLayer, ReduceMeanLayer, \ ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SliceLayer, \ SoftmaxLayer, SqrtLayer, TransposeLayer -from Deeploy.Targets.Generic.Parsers import AddParser, BatchNormParser, CeilParser, ConcatParser, \ +from Deeploy.Targets.Generic.Parsers import AddParser, BatchNormParser, CeilParser, ClipParser, ConcatParser, \ ConvTranspose1DParser, DebugParser, DequantParser, DivParser, DummyParser, FlattenParser, GatherParser, \ GELUParser, GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, \ GenericGEMMParser, GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, \ @@ -74,6 +74,7 @@ ConvTransposeMapper = NodeMapper(ConvTranspose1DParser(), BasicConvTransposeBindings) SliceMapper = NodeMapper(SliceParser(), BasicSliceBindings) CeilMapper = NodeMapper(CeilParser(), BasicCeilBindings) +ClipMapper = NodeMapper(ClipParser(), BasicClipBindings) # Dummy nodes are intended for development purposes only! # They should always generate compiler errors to not accidentally end up in production code @@ -121,6 +122,7 @@ 'BatchNormalization': BatchNormalizationLayer([BatchNormalizationMapper]), 'ConvTranspose': ConvTransposeLayer([ConvTransposeMapper]), 'Ceil': CeilLayer([CeilMapper]), + 'Clip': ClipLayer([ClipMapper]), # # For example, you can use the DummpyMapper, in case you want to test # # deployment or optimizations with GlobalAveragePool nodes but did not yet # # implement the corresponding kernel diff --git a/Deeploy/Targets/Generic/Templates/FloatClipTemplate.py b/Deeploy/Targets/Generic/Templates/FloatClipTemplate.py new file mode 100644 index 0000000000..868f5135c4 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatClipTemplate.py @@ -0,0 +1,26 @@ +# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 +from typing import Dict, List, Tuple + +import numpy as np + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _ClipTemplate(NodeTemplate): + + def alignToContext(self, ctxt, operatorRepresentation): + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['size'] = int(np.prod(data_in.shape)) + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _ClipTemplate(""" +// Clip (Name: ${nodeName}, Op: ${nodeOp}) +BEGIN_SINGLE_CORE + for (uint32_t i = 0; i < ${size}; i++){ + ${data_out}[i] = fmaxf(${min_val}f, fminf(${max_val}f, ${data_in}[i])); + } +END_SINGLE_CORE +""") diff --git a/DeeployTest/Tests/Kernels/FP32/Clip/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Clip/inputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..ac58fc00e29cdc6487f6248edf903d0dedaa1f12 GIT binary patch literal 776 zcmbV~e=O8-7{|X~7lrbpI$Jl{k?qhiBFwMv=Oc;T#ICYk$1)pd?8wr70~{Ay6pQxvU_9FNn$u5eWH$ zyb`vQl@}i=VHHYi9xpFaD=oSDm|Uf_{60ZKez=ey6zVJFpW^?Q9A1BEN9;%neN$YZCb>}GQ;Cs&31vh!LrwcV=T?&X<05JKJOB+_yhx%) zBD$O4N|wLL$#-2L4m{%m2__-ax0Fl9gYUyma~;#~6Hc;O6-o^O;Ppnp*kypO06q2# zbuf%a`jQiE96T!7M?!mSk>j)LM0YlgoHm=8faH0!ES+L*ZPXjz3?hhaC?cKGS*Xo# zgvYylanVN^a&fF9^99N1gIWuvHzuHF#Pyls#-Rp2X4!nSS2YGsqI|T%)#AD14-I1(bWz0tHj#;DS@k~pX}lm+sbJncOG|(vN-WMfnFT`P`FVL2@##g0 z1sTPqMd_*ec@T~kuK=SHNRt-}1B1drC^xYJ%yo>ChMFrR#3jJND8$3X#K8!}96&6P RB*O)@1DmW93m1a`HvqivA-Dhl literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/FP32/Clip/outputs.npz b/DeeployTest/Tests/Kernels/FP32/Clip/outputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..aba055ba032de1ac46c55c46f126b44388f6743f GIT binary patch literal 778 zcmWIWW@gc4fB;2?b-RP@|3d*6g9t-@X-Pq8iC$hoB_o3XLjw~;9gLpr7wQ`j$;eQ~ zP_3SlTAW;@Zl$1ZlV+l>qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=XCxM+0{I$-Itms# z3WjEyItsN4WC5#1dD<68Pp3L@VKvI50blPyzYSP?_MxF zuBgBv$mGBQsr6O|&UGIE+o5H~wGX7Oa5l3eh&Efp3OB#*_iuYW|2GcbB5&I-WS_GC z$`%K(eM=&r9cWLV4A!6XDjKZ*O{TxY!{V|7C39~aXx(!dB<5(tDC!vH#&$5~sp|n_ zrVxh^F&W1%p)(KcS@L#2$Sx4(XJi7|>)01z0TOd8DoS@SEjV|8&pRD##*5tTAoqdY zrqIajsC_-oJ}%b&K*WnIu%Ec6^Bf53O#!RjbYtm(XVHuYKlCp+aKB8*acWwS!VVZZsel%v6l2M4sa1Rh}cTy+3w?!m|wTL*7pb%*MrK-)t~Vqh_s*=+|R z!=ewk$1)t0Dmu6SrgtOQ{FSBm?fI^%I6RnQ2X?>9f%bhB22vn*INq@Mw!h`CgG1KN zjo|PEiAB%*;1H(x*P*!q95-M;gr43FHY=`1?x2kM)&l|Fj7+-BsHqo}HbIz+0Rb8q XL2N`?4)A7W1BoyKp#_lM15Vcf2gUs- literal 0 HcmV?d00001 diff --git a/DeeployTest/test_generic_config.py b/DeeployTest/test_generic_config.py index e8ce08234b..8c56613c5b 100644 --- a/DeeployTest/test_generic_config.py +++ b/DeeployTest/test_generic_config.py @@ -9,6 +9,7 @@ "Kernels/FP32/Softmax/Regular", "Kernels/FP32/Add/Regular", "Kernels/FP32/Ceil", + "Kernels/FP32/Clip", "Kernels/FP32/Conv/DW_2D_Bias", "Kernels/FP32/Conv/DW_2D_NoBias", "Kernels/FP32/Conv/DW_2D_ZeroValuedBias", From 3b58bf92f5fa70162a8a4b24089035ff49d3170b Mon Sep 17 00:00:00 2001 From: Alex Marchioni Date: Thu, 14 May 2026 10:45:16 +0200 Subject: [PATCH 03/16] move math.h out from Generic DeeployBasicMath.h to avoid conflicts with other targets --- .../Generic/Templates/FloatCeilTemplate.py | 25 +++++++++++++------ .../Generic/Templates/FloatClipTemplate.py | 12 +++------ .../Generic/inc/DeeployBasicMath.h | 3 ++- TargetLibraries/Generic/inc/kernel/Ceil.h | 21 ++++++++++++++++ TargetLibraries/Generic/inc/kernel/Clip.h | 22 ++++++++++++++++ TargetLibraries/Generic/src/Ceil_fp32.c | 14 +++++++++++ TargetLibraries/Generic/src/Clip_fp32.c | 15 +++++++++++ 7 files changed, 95 insertions(+), 17 deletions(-) create mode 100644 TargetLibraries/Generic/inc/kernel/Ceil.h create mode 100644 TargetLibraries/Generic/inc/kernel/Clip.h create mode 100644 TargetLibraries/Generic/src/Ceil_fp32.c create mode 100644 TargetLibraries/Generic/src/Clip_fp32.c diff --git a/Deeploy/Targets/Generic/Templates/FloatCeilTemplate.py b/Deeploy/Targets/Generic/Templates/FloatCeilTemplate.py index bb0145c9ec..198bb3d9c8 100644 --- a/Deeploy/Targets/Generic/Templates/FloatCeilTemplate.py +++ b/Deeploy/Targets/Generic/Templates/FloatCeilTemplate.py @@ -1,14 +1,23 @@ # SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna # # SPDX-License-Identifier: Apache-2.0 +import numpy as np -from Deeploy.DeeployTypes import NodeTemplate +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation -referenceTemplate = NodeTemplate(""" -// Add (Name: ${nodeName}, Op: ${nodeOp}) -BEGIN_SINGLE_CORE - for (uint32_t i = 0; i < ${size}; i++) { - ${data_out}[i] = ceilf(${data_in}[i]); - } -END_SINGLE_CORE + +class _CeilTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['size'] = int(np.prod(data_in.shape)) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _CeilTemplate(""" +// Ceil (Name: ${nodeName}, Op: ${nodeOp}) +Ceil_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${size}); """) diff --git a/Deeploy/Targets/Generic/Templates/FloatClipTemplate.py b/Deeploy/Targets/Generic/Templates/FloatClipTemplate.py index 868f5135c4..c61b421755 100644 --- a/Deeploy/Targets/Generic/Templates/FloatClipTemplate.py +++ b/Deeploy/Targets/Generic/Templates/FloatClipTemplate.py @@ -1,8 +1,6 @@ # SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna # # SPDX-License-Identifier: Apache-2.0 -from typing import Dict, List, Tuple - import numpy as np from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation @@ -10,17 +8,15 @@ class _ClipTemplate(NodeTemplate): - def alignToContext(self, ctxt, operatorRepresentation): + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: data_in = ctxt.lookup(operatorRepresentation['data_in']) operatorRepresentation['size'] = int(np.prod(data_in.shape)) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth return ctxt, operatorRepresentation, [] referenceTemplate = _ClipTemplate(""" // Clip (Name: ${nodeName}, Op: ${nodeOp}) -BEGIN_SINGLE_CORE - for (uint32_t i = 0; i < ${size}; i++){ - ${data_out}[i] = fmaxf(${min_val}f, fminf(${max_val}f, ${data_in}[i])); - } -END_SINGLE_CORE +Clip_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${min_val}, ${max_val}, ${size}); """) diff --git a/TargetLibraries/Generic/inc/DeeployBasicMath.h b/TargetLibraries/Generic/inc/DeeployBasicMath.h index 0b23a382c5..37b9aef753 100644 --- a/TargetLibraries/Generic/inc/DeeployBasicMath.h +++ b/TargetLibraries/Generic/inc/DeeployBasicMath.h @@ -22,7 +22,6 @@ #include #include -#include #include #include @@ -34,6 +33,8 @@ #include "utils.h" #include "kernel/BatchNorm.h" +#include "kernel/Ceil.h" +#include "kernel/Clip.h" #include "kernel/ConvTranspose1d_fp32.h" #include "kernel/Convolution.h" #include "kernel/DWConvolution.h" diff --git a/TargetLibraries/Generic/inc/kernel/Ceil.h b/TargetLibraries/Generic/inc/kernel/Ceil.h new file mode 100644 index 0000000000..941b90c75d --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/Ceil.h @@ -0,0 +1,21 @@ +/* + * SPDX-FileCopyrightText: 2020 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_CEIL_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_CEIL_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/* + * element wise ceil operation + */ + +/******************************************************************************/ +/* Ceil */ +/******************************************************************************/ +void Ceil_fp32_fp32(float32_t *data_in, float32_t *data_out, int32_t size); + +#endif //__DEEPLOY_BASIC_MATH_CEIL_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/Clip.h b/TargetLibraries/Generic/inc/kernel/Clip.h new file mode 100644 index 0000000000..751c338c03 --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/Clip.h @@ -0,0 +1,22 @@ +/* + * SPDX-FileCopyrightText: 2020 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_CLIP_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_CLIP_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/* + * element wise clip operation + */ + +/******************************************************************************/ +/* Ceil */ +/******************************************************************************/ +void Clip_fp32_fp32(float32_t *data_in, float32_t *data_out, float32_t min_val, + float32_t max_val, int32_t size); + +#endif //__DEEPLOY_BASIC_MATH_CLIP_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/src/Ceil_fp32.c b/TargetLibraries/Generic/src/Ceil_fp32.c new file mode 100644 index 0000000000..fe73e20637 --- /dev/null +++ b/TargetLibraries/Generic/src/Ceil_fp32.c @@ -0,0 +1,14 @@ +/* + * SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" +#include + +void Ceil_fp32_fp32(float32_t *data_in, float32_t *data_out, int32_t size) { + for (int i = 0; i < size; i++) { + data_out[i] = ceilf(data_in[i]); + } +} diff --git a/TargetLibraries/Generic/src/Clip_fp32.c b/TargetLibraries/Generic/src/Clip_fp32.c new file mode 100644 index 0000000000..092fbf7b60 --- /dev/null +++ b/TargetLibraries/Generic/src/Clip_fp32.c @@ -0,0 +1,15 @@ +/* + * SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" +#include + +void Clip_fp32_fp32(float32_t *data_in, float32_t *data_out, float32_t min_val, + float32_t max_val, int32_t size) { + for (int i = 0; i < size; i++) { + data_out[i] = fmaxf(min_val, fminf(max_val, data_in[i])); + } +} From ecdbe8861fc55d4cb6fb50b478db3b26fac6b0d9 Mon Sep 17 00:00:00 2001 From: Alex Marchioni Date: Thu, 14 May 2026 10:45:34 +0000 Subject: [PATCH 04/16] add support for Floor operator for Generic target --- Deeploy/Targets/Generic/Bindings.py | 20 +++++++++----- Deeploy/Targets/Generic/Layers.py | 6 ++++ Deeploy/Targets/Generic/Parsers.py | 22 +++++++++++++++ Deeploy/Targets/Generic/Platform.py | 26 ++++++++++-------- .../Generic/Templates/FloatFloorTemplate.py | 23 ++++++++++++++++ .../Tests/Kernels/FP32/Floor/inputs.npz | Bin 0 -> 776 bytes .../Tests/Kernels/FP32/Floor/network.onnx | Bin 0 -> 122 bytes .../Tests/Kernels/FP32/Floor/outputs.npz | Bin 0 -> 778 bytes DeeployTest/test_generic_config.py | 1 + .../Generic/inc/DeeployBasicMath.h | 1 + TargetLibraries/Generic/inc/kernel/Floor.h | 21 ++++++++++++++ TargetLibraries/Generic/src/Floor_fp32.c | 14 ++++++++++ 12 files changed, 115 insertions(+), 19 deletions(-) create mode 100644 Deeploy/Targets/Generic/Templates/FloatFloorTemplate.py create mode 100644 DeeployTest/Tests/Kernels/FP32/Floor/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/Floor/network.onnx create mode 100644 DeeployTest/Tests/Kernels/FP32/Floor/outputs.npz create mode 100644 TargetLibraries/Generic/inc/kernel/Floor.h create mode 100644 TargetLibraries/Generic/src/Floor_fp32.c diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index 6bcc9de0d2..6062332300 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -13,13 +13,14 @@ from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration from Deeploy.Targets.Generic.Templates import AddTemplate, BatchNormalizationTemplate, ConcatTemplate, ConvTemplate, \ ConvTransposeTemplate, DebugPrintTemplate, DequantTemplate, DummyTemplate, DWConvTemplate, FloatAddTemplate, \ - FloatCeilTemplate, FloatClipTemplate, FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatGELUTemplate, \ - FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, \ - FloatPadTemplate, FloatPowTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, \ - FloatSqrtTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, \ - MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \ - RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, TransposeTemplate, \ - iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate + FloatCeilTemplate, FloatClipTemplate, FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, \ + FloatFloorTemplate, FloatGELUTemplate, FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, \ + FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, FloatPowTemplate, FloatReduceMeanTemplate, \ + FloatReluTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, \ + ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, \ + ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, \ + RQSiGELUTemplate, SliceTemplate, TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, \ + iSoftmaxTemplate from Deeploy.Targets.Generic.TypeCheckers import AddChecker, BatchNormChecker, ConcatChecker, ConvChecker, \ DebugPrintChecker, DequantChecker, DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, \ LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceMeanChecker, \ @@ -333,6 +334,11 @@ BasicTransformer), ] +BasicFloorBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatFloorTemplate.referenceTemplate, BasicTransformer), +] + BasicClipBindings = [ NodeBinding( DummyChecker( diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index 5dc76e77b7..9987fdc383 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -717,6 +717,12 @@ def __init__(self, maps: List[NodeMapper]): super().__init__(maps) +class FloorLayer(ONNXLayer): + + def __init__(self, maps: List[NodeMapper]): + super().__init__(maps) + + class ClipLayer(ONNXLayer): def __init__(self, maps: List[NodeMapper]): diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 361dc11f29..3e888905c1 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -2910,6 +2910,28 @@ def parseNodeCtxt(self, return ctxt, True +class FloorParser(NodeParser): + + def __init__(self): + super().__init__() + + def parseNode(self, node: gs.Node) -> bool: + return node.op == 'Floor' and len(node.inputs) == 1 and len(node.outputs) == 1 + + def parseNodeCtxt(self, + ctxt: NetworkContext, + node: gs.Node, + channels_first: bool = True) -> Tuple[NetworkContext, bool]: + + data_in = ctxt.lookup(node.inputs[0].name) + data_out = ctxt.lookup(node.outputs[0].name) + + self.operatorRepresentation['data_in'] = data_in.name + self.operatorRepresentation['data_out'] = data_out.name + self.operatorRepresentation['size'] = int(np.prod(data_in.shape)) + return ctxt, True + + class ClipParser(NodeParser): def __init__(self): diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index 35d6d99d6f..6c997bb10e 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -9,20 +9,20 @@ from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicBatchNormBindings, BasicCeilBindings, \ BasicClipBindings, BasicConcatBindings, BasicConv1DBindings, BasicConv2DBindings, BasicConvTransposeBindings, \ BasicDebugPrintBindings, BasicDequantBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBindings, \ - BasicGatherBindings, BasicGELUBindings, BasicGEMMBindings, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \ - BasicLayerNormBindings, BasicMatMulBindings, BasicMaxPool1DBindings, BasicMaxPool2DBindings, BasicMulBindings, \ - BasicPad1DBindings, BasicPad2DBindings, BasicPowBindings, BasicQuantBindings, BasicReduceMeanBindings, \ - BasicReduceSumBindings, BasicReluBinding, BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, \ - BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, BasicTransposeBindings, \ - DummyBinding + BasicFloorBindings, BasicGatherBindings, BasicGELUBindings, BasicGEMMBindings, BasicITAPartialSoftmaxBinding, \ + BasicITASoftmaxBinding, BasicLayerNormBindings, BasicMatMulBindings, BasicMaxPool1DBindings, \ + BasicMaxPool2DBindings, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, BasicPowBindings, \ + BasicQuantBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReluBinding, BasicReshapeBindings, \ + BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, \ + BasicSqrtBindings, BasicTransposeBindings, DummyBinding from Deeploy.Targets.Generic.Layers import AddLayer, BatchNormalizationLayer, CeilLayer, ClipLayer, ConcatLayer, \ - ConvLayer, ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, GatherLayer, GELULayer, GEMMLayer, \ - ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, QuantLayer, ReduceMeanLayer, \ - ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SliceLayer, \ - SoftmaxLayer, SqrtLayer, TransposeLayer + ConvLayer, ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, FloorLayer, GatherLayer, GELULayer, \ + GEMMLayer, ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, QuantLayer, \ + ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, \ + SliceLayer, SoftmaxLayer, SqrtLayer, TransposeLayer from Deeploy.Targets.Generic.Parsers import AddParser, BatchNormParser, CeilParser, ClipParser, ConcatParser, \ - ConvTranspose1DParser, DebugParser, DequantParser, DivParser, DummyParser, FlattenParser, GatherParser, \ - GELUParser, GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, \ + ConvTranspose1DParser, DebugParser, DequantParser, DivParser, DummyParser, FlattenParser, FloorParser, \ + GatherParser, GELUParser, GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, \ GenericGEMMParser, GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, \ MatMulParser, MaxPool1DParser, MulParser, Pad1DParser, Pad2DParser, PowParser, QuantParser, ReduceMeanParser, \ ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, \ @@ -74,6 +74,7 @@ ConvTransposeMapper = NodeMapper(ConvTranspose1DParser(), BasicConvTransposeBindings) SliceMapper = NodeMapper(SliceParser(), BasicSliceBindings) CeilMapper = NodeMapper(CeilParser(), BasicCeilBindings) +FloorMapper = NodeMapper(FloorParser(), BasicFloorBindings) ClipMapper = NodeMapper(ClipParser(), BasicClipBindings) # Dummy nodes are intended for development purposes only! @@ -122,6 +123,7 @@ 'BatchNormalization': BatchNormalizationLayer([BatchNormalizationMapper]), 'ConvTranspose': ConvTransposeLayer([ConvTransposeMapper]), 'Ceil': CeilLayer([CeilMapper]), + 'Floor': FloorLayer([FloorMapper]), 'Clip': ClipLayer([ClipMapper]), # # For example, you can use the DummpyMapper, in case you want to test # # deployment or optimizations with GlobalAveragePool nodes but did not yet diff --git a/Deeploy/Targets/Generic/Templates/FloatFloorTemplate.py b/Deeploy/Targets/Generic/Templates/FloatFloorTemplate.py new file mode 100644 index 0000000000..2d9768c1f4 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatFloorTemplate.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _FloorTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['size'] = int(np.prod(data_in.shape)) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _FloorTemplate(""" +// Floor (Name: ${nodeName}, Op: ${nodeOp}) +Floor_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${size}); +""") diff --git a/DeeployTest/Tests/Kernels/FP32/Floor/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Floor/inputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..ac58fc00e29cdc6487f6248edf903d0dedaa1f12 GIT binary patch literal 776 zcmbV~e=O8-7{|X~7lrbpI$Jl{k?qhiBFwMv=Oc;T#ICYk$1)pd?8wr70~{Ay6pQxvU_9FNn$u5eWH$ zyb`vQl@}i=VHHYi9xpFaD=oSDm|Uf_{60ZKez=ey6zVJFpW^?Q9A1BEN9;%neN$YZCb>}GQ;Cs&31vh!LrwcV=T?&X<05JKJOB+_yhx%) zBD$O4N|wLL$#-2L4m{%m2__-ax0Fl9gYUyma~;#~6Hc;O6-o^O;Ppnp*kypO06q2# zbuf%a`jQiE96T!7M?!mSk>j)LM0YlgoHm=8faH0!ES+L*ZPXjz3?hhaC?cKGS*Xo# zgvYylanVN^a&fF9^99N1gIWuvHzuHF#Pyls#-Rp2X4!nSS2YGsqI|T%)#AD14-I1(bWz0tHj#;DS@k~pX}lm+sbJ8bg7 o5KfddOoI@Y00*NG4;K>$BM@@{u|Sdx7t|ObEV524Tnqx-0D0#b+yDRo literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/FP32/Floor/outputs.npz b/DeeployTest/Tests/Kernels/FP32/Floor/outputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..93c0cb3bd5a23f8defd3a545f8efa5bdfce34341 GIT binary patch literal 778 zcmbVK!AiqG5S_H@p{E}Ffb}3zNTmglB7&QXm!jn2K@e=QDITN^X)A~b`w0bqV2^%; z;QA4Y$NY)DmrWF}PCL9eFK=fiOOiozts_3RTqUY|3o8$d-$}X?7n-TTH|I zYJ567KTR$Ixs!)*oX)TF@KA+Ell?GK;bfLC^3n8amXA~FC!^bW3VnVv%2L!jy-4*V z)jQaY)T8=Wv_y=?Mh<^q7K>$Ssh;`P|(&pXIw~XiS85 z{{1YF9`+J)UELM#gI?~$*+hn8U+3ph<9l_X722ml&HXsfLVE_M=3Fue8lC3fuh%rt a8HtbMCrm*4`z + +void Floor_fp32_fp32(float32_t *data_in, float32_t *data_out, int32_t size) { + for (int i = 0; i < size; i++) { + data_out[i] = floorf(data_in[i]); + } +} From 52c1e733fc9a9c80a193f41f4c819a56ebfe20d2 Mon Sep 17 00:00:00 2001 From: Alex Marchioni Date: Thu, 14 May 2026 14:15:41 +0000 Subject: [PATCH 05/16] add support for Sub operator for Generic target --- Deeploy/Targets/Generic/Bindings.py | 21 +++++++--- Deeploy/Targets/Generic/Layers.py | 3 ++ Deeploy/Targets/Generic/Parsers.py | 5 ++- Deeploy/Targets/Generic/Platform.py | 8 ++-- .../Generic/Templates/FloatSubTemplate.py | 14 +++++++ .../Targets/Generic/Templates/SubTemplate.py | 39 ++++++++++++++++++ DeeployTest/Tests/Kernels/FP32/Sub/inputs.npz | Bin 0 -> 1534 bytes .../Tests/Kernels/FP32/Sub/network.onnx | Bin 0 -> 161 bytes .../Tests/Kernels/FP32/Sub/outputs.npz | Bin 0 -> 778 bytes .../Tests/Kernels/Integer/Sub/inputs.npz | Bin 0 -> 1534 bytes .../Tests/Kernels/Integer/Sub/network.onnx | Bin 0 -> 161 bytes .../Tests/Kernels/Integer/Sub/outputs.npz | Bin 0 -> 778 bytes DeeployTest/test_generic_config.py | 2 + 13 files changed, 83 insertions(+), 9 deletions(-) create mode 100644 Deeploy/Targets/Generic/Templates/FloatSubTemplate.py create mode 100644 Deeploy/Targets/Generic/Templates/SubTemplate.py create mode 100644 DeeployTest/Tests/Kernels/FP32/Sub/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/Sub/network.onnx create mode 100644 DeeployTest/Tests/Kernels/FP32/Sub/outputs.npz create mode 100644 DeeployTest/Tests/Kernels/Integer/Sub/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/Integer/Sub/network.onnx create mode 100644 DeeployTest/Tests/Kernels/Integer/Sub/outputs.npz diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index 6062332300..35d927df62 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -16,11 +16,11 @@ FloatCeilTemplate, FloatClipTemplate, FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, \ FloatFloorTemplate, FloatGELUTemplate, FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, \ FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, FloatPowTemplate, FloatReduceMeanTemplate, \ - FloatReluTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, \ - ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, \ - ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, \ - RQSiGELUTemplate, SliceTemplate, TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, \ - iSoftmaxTemplate + FloatReluTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, FloatSubTemplate, GatherTemplate, GemmTemplate, \ + IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, \ + PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, \ + RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, SubTemplate, TransposeTemplate, iGELUTemplate, \ + iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate from Deeploy.Targets.Generic.TypeCheckers import AddChecker, BatchNormChecker, ConcatChecker, ConvChecker, \ DebugPrintChecker, DequantChecker, DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, \ LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceMeanChecker, \ @@ -55,6 +55,17 @@ FloatAddTemplate.referenceTemplate, BasicTransformer) ] +# using AddChecker since they are exactly the same +BasicSubBindings = [ + NodeBinding(AddChecker([PointerClass(type1), PointerClass(type2)], [PointerClass(int32_t)]), + SubTemplate.referenceTemplate, BasicTransformer) + for type1 in IntegerDataTypes + for type2 in IntegerDataTypes +] + [ + NodeBinding(AddChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatSubTemplate.referenceTemplate, BasicTransformer) +] + BasicConv1DBindings = [ NodeBinding(ConvChecker( [PointerClass(type), PointerClass(type), PointerClass(type)], [PointerClass(type)]), diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index 9987fdc383..9e9a4ec283 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -188,6 +188,9 @@ def computeOps(self): return self.mapper.parser.operatorRepresentation['size'] +SubLayer = AddLayer + + class MatMulLayer(ONNXLayer): def __init__(self, maps: List[NodeMapper]): diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 3e888905c1..a50564851b 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -492,6 +492,9 @@ def parseNodeCtxt(self, return ctxt, True +SubParser = AddParser + + class ReduceParser(NodeParser): def __init__(self): @@ -2965,4 +2968,4 @@ def parseNodeCtxt(self, if len(node.inputs) > 2 and node.inputs[2].name != '': self.operatorRepresentation['max_val'] = float(node.inputs[2].values.item()) - return ctxt, True \ No newline at end of file + return ctxt, True diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index 6c997bb10e..dbc0895757 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -14,25 +14,26 @@ BasicMaxPool2DBindings, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, BasicPowBindings, \ BasicQuantBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReluBinding, BasicReshapeBindings, \ BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, \ - BasicSqrtBindings, BasicTransposeBindings, DummyBinding + BasicSqrtBindings, BasicSubBindings, BasicTransposeBindings, DummyBinding from Deeploy.Targets.Generic.Layers import AddLayer, BatchNormalizationLayer, CeilLayer, ClipLayer, ConcatLayer, \ ConvLayer, ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, FloorLayer, GatherLayer, GELULayer, \ GEMMLayer, ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, QuantLayer, \ ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, \ - SliceLayer, SoftmaxLayer, SqrtLayer, TransposeLayer + SliceLayer, SoftmaxLayer, SqrtLayer, SubLayer, TransposeLayer from Deeploy.Targets.Generic.Parsers import AddParser, BatchNormParser, CeilParser, ClipParser, ConcatParser, \ ConvTranspose1DParser, DebugParser, DequantParser, DivParser, DummyParser, FlattenParser, FloorParser, \ GatherParser, GELUParser, GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, \ GenericGEMMParser, GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, \ MatMulParser, MaxPool1DParser, MulParser, Pad1DParser, Pad2DParser, PowParser, QuantParser, ReduceMeanParser, \ ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, \ - SoftmaxParser, SqrtParser, TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser + SoftmaxParser, SqrtParser, SubParser, TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate, FreeTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, ExtractPaddingFromConvPass, \ ExtractPaddingFromPoolPass, MatMulAddMergePass, MergeConstAddAndRequantPass, QuantPatternPass, \ iGELURequantMergePass AddMapper = NodeMapper(AddParser(), BasicAddBindings) +SubMapper = NodeMapper(SubParser(), BasicSubBindings) Conv1DMapper = NodeMapper(GenericConv1DParser(), BasicConv1DBindings) Conv2DMapper = NodeMapper(GenericConv2DParser(), BasicConv2DBindings) ConcatMapper = NodeMapper(ConcatParser(), BasicConcatBindings) @@ -83,6 +84,7 @@ GenericMapping = { 'Add': AddLayer([AddMapper]), + 'Sub': SubLayer([SubMapper]), 'Conv': ConvLayer([Conv2DMapper, DWConv2DMapper, Conv1DMapper, DWConv1DMapper]), 'Concat': ConcatLayer([ConcatMapper]), 'DebugPrint': DebugPrintLayer([DebugMapper]), diff --git a/Deeploy/Targets/Generic/Templates/FloatSubTemplate.py b/Deeploy/Targets/Generic/Templates/FloatSubTemplate.py new file mode 100644 index 0000000000..fcae7e1c0d --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatSubTemplate.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from Deeploy.DeeployTypes import NodeTemplate + +referenceTemplate = NodeTemplate(""" +// Add (Name: ${nodeName}, Op: ${nodeOp}) +BEGIN_SINGLE_CORE + for (uint32_t i=0;i<${size};i++){ + ${data_out}[i] = ${data_in_1}[i] - ${data_in_2}[i]; + } +END_SINGLE_CORE +""") diff --git a/Deeploy/Targets/Generic/Templates/SubTemplate.py b/Deeploy/Targets/Generic/Templates/SubTemplate.py new file mode 100644 index 0000000000..6fa24e54aa --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/SubTemplate.py @@ -0,0 +1,39 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _SubTemplate(NodeTemplate): + + def alignToContext( + self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, OperatorRepresentation, list[str]]: + + data_in_1 = ctxt.lookup(operatorRepresentation['data_in_1']) + data_in_2 = ctxt.lookup(operatorRepresentation['data_in_2']) + data_out = ctxt.lookup(operatorRepresentation['data_out']) + + input_1_offset = 0 + if hasattr(data_in_1, "_signed") and hasattr(data_in_1, "nLevels"): + input_1_offset = (data_in_1._signed == 0) * int(data_in_1.nLevels / 2) + input_2_offset = 0 + if hasattr(data_in_2, "_signed") and hasattr(data_in_2, "nLevels"): + input_2_offset = -(data_in_2._signed == 0) * int(data_in_2.nLevels / 2) + output_offset = 0 + if hasattr(data_out, "_signed") and hasattr(data_out, "nLevels"): + output_offset = -(data_out._signed == 0) * int(data_out.nLevels // 2) + + operatorRepresentation['offset'] = input_1_offset + input_2_offset + output_offset + + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _SubTemplate(""" +// Sub (Name: ${nodeName}, Op: ${nodeOp}) +BEGIN_SINGLE_CORE + for (uint32_t i = 0; i < ${size}; i++){ + ${data_out}[i] = ${data_in_1}[i] - ${data_in_2}[i] + ${offset}; + } +END_SINGLE_CORE +""") diff --git a/DeeployTest/Tests/Kernels/FP32/Sub/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Sub/inputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..c4bfb1f89b958c65b885ff259a415f5b408b2105 GIT binary patch literal 1534 zcmd5+YfzI%6wL!rP$7t*24tdw5Dip(q!f_vE`rKr(15%mN=XeU6eT8*QlvEqDYg}) z@(7TMSfdrOg6~mjau>yR>=a+E1*wHvwd$Z_t)P-Yo2Z?RV}JEm&(7|dd*`0}>+F$A zxOM@o7nsZ{tX&ZO>?NF7<5-z%m3g^AfoqlTvspZrmh)onZ?0UN5F?ebSsK=QKSjEF zrOGcv;1`xL$4?~i%g9mXs^n|m&QU4SZT$uEY<0S|uU;)zrdw3m z{|Qc)9yEYzsMWkX;vB@Zs7Y&&JF>dnv3y@4fvN#W3avq1!&eX>slw`!)i|+-4;Gg? z8u#>LT6^yhR*acKqrIbH?;=+^_()ElXtT)74Ln@bD`Hv)R?^F}_v0i>8FSD-n5L?f z5H|;r-x7kIpCaxFtRmCJPNvJDLb}etCG83Gsd%3~j5_l<)ozTZA6hI-;F5kAl=Uz> zMpT)9I1Ct3kxh-Vn^?N45>HGOlI%Mvz#COY`!koouWAkEZtFr_+bJ*&_>eHRJ6X2C zfiAxhg|j*F5bxnd9wyXboAIu>fEz@&D;Vm+=8)QF$)r3v*nFfyKvb$#Fjv`$&XQFy z&+u5 zebu||%S^9%ch>*ayE@Me%n7d@7~q*tb98So1CNCCz-B!(c)C&P@O#9|ID}ig7vo+b zhs?PjNgXR);WM9D5>cn6m*V@G*-NLvy}%(Tx*bJ-k6KD|?Z1S_uHj&o3c&5e3TQNR zqsllAo>qiHv;I2r=2w&SMm?N0gwwtW7h&*$Cw+X?0&ZzkllBbEf)I4o#CsnUfNifrEPxtdZG~{ZsA3+bdR+G`SFY+qy}!_*)S9 zI#QZ6gZe1y!Pm|O`wOl^V~!sD*M*q(6^@|3X)3z$S~*T|a3tRLj-ZKMNbfdZWiskD z@O{=8YCL_K*~@ijczt4e&7y#O^&$8sI)I!s_>;sn%c*pX2W>rPgyc*k96oP^$DaHM zS<6xAXq`*$?3_TJtUC`!JEHN_FAtzA<`FKh7>^dI0UhgN8L{X%sg+mLocbQfwkx7d zc6xj_G?0{+`!Lx`5#4{|I_}QzhMmW5W923%=wEs}oL_t%pAI#?G8xLIT4@HKwHKoqRyhnHr@p+3M(W0+9K%uI+`~2j3wLMTrF&#id2* wsrh*jPLvExpO6rj00*NG4;K>$BM@@{u>f4w2%BsYP%p#|Y_d))Tnqx-04KB`sQ>@~ literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/FP32/Sub/outputs.npz b/DeeployTest/Tests/Kernels/FP32/Sub/outputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..805378eb887e39cb9f627dc16c160c9d304c42c2 GIT binary patch literal 778 zcmbV~e=O8-7{|Zo?vz85F3FGEg*mokZd2)4_j#;2`+Y=0evpta+T$zapXdLVI3`{J3O1x7H2N66 zx2eFUhy|j&aqKkF4N1xsTsjbrKE-hW9gZY}!wZNi;_mktpp4X!n{+?(8$MAVL3(Se zaN@-nG|Sh}l2s?T5q=+!h`!J+Pp=Z6+7?N7)NOj+?ld{0@&_-OiOer8;I3u^8k0%D zy>}<1R#lSAj`29iz7RK`ox^>vJBcm(A-T0$OrEISLCWF+@9q+M+1h}F;v}Ln`V!9k zG!FFLhD6mz(eozCcu!$KWtwH;pq7zzrUtP01Bk7uK=m9G?r-xXE*;NkWv2jo!ZfJ7 zITmzYgE(<_1F5(`qhg=8;4&)1T6N=dRLs0uUA3N6Yq27&aOjC35 ztK({z)AXT6T`{Izdx+R=8hZ}f8b^6sp(6ScnKGA=g~A{bIduSXn@ey{g)6e(G>a+nj$D0 zPJ;L;S8^>fhIlx?hxW>LGSa9ghot*pi?|7)?J=}0f??&y{Fhz}o0jElFm16yhUNZc TSr);zwj_+T30iZ*Z?1mHlZ!(s%D4WLpkRP8sdmKE^*lT-RY_@h^_KNqN;>PyfqU#j5cY6Ka zR`5s z*U%sO>C6rE6L^m}&2xaR9@gU$ZLq1pf#w1SPnAqLhU2;dG~Z3x{y0 zU|NT5^5wt~zwT4-H3F)wx1f9jP{URq1!P0Or+F2l|3?R(hu%QfY?tVY_doy_BE|J5 zyiKHPs!zp~fO3tA(>b0a1LQo2JwU#ypZ1=Iiyovkb=I2G6pz1yPkzD0*31W$^c$c! z2fYbj2WRLpnD(d~aGJY;)QpOkUIUuTLF#N2Kk|ajOm=PV>X$dlk^%5x`?;cw)L-A> f-}mX&|9#&lGEMhQ{{zA1!Zl^e@(YoJ1zFz#Y^hh& literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/Integer/Sub/network.onnx b/DeeployTest/Tests/Kernels/Integer/Sub/network.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b82f4c7c13c6654a9d9e5e54721282d25cddb236 GIT binary patch literal 161 zcmdLMTrF&#id2* wsrh*jPLvExpO6rj00*NG4;K>$BM@@{u>f4w2%BsYP%p#|Y_d))Tnqx-04KB`sQ>@~ literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/Integer/Sub/outputs.npz b/DeeployTest/Tests/Kernels/Integer/Sub/outputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..2b1dc905cc37d6a31ae5b333589c61879a89ea7a GIT binary patch literal 778 zcmbV~KS/{=d=sfaja2-qRR3Bp-8C8Crd8PW_zh?N@@EEF^}5eG3xa{i+&+*CRg zIZ(vG;V2GHjv__cGearj;3N)G#3?vAy4m0BX-n6B;o*6o=Y8Jq?z?N8 zq3h+lht(-*sV#c$qBpSMt{RdOKN061d3}8HmGjz=C3$C6s*SgGYs9qjv%{8cTC=sf zS1&J2*Xot3`Y+4#jVgX)uIyIPeS@}{v(3Th&usI9`CqG3#4d^iO%eY{

b9E5Lst zlEt5a$Iu(1xz#%!E(f5kTuhDP8=jZ@Vqep>P6gfa&!S6u}>^0-e7OZ>hz&ZRI zWY{uCv z{gC<;Hu0ZQr>Jj1R1o=r-o?K^C%OY2&{M7dQIg&{e1@L~`t2lQhWLO_y}f>dCjl Date: Thu, 14 May 2026 15:53:16 +0000 Subject: [PATCH 06/16] add support for Exp operator for Generic target --- Deeploy/Targets/Generic/Bindings.py | 7 +- Deeploy/Targets/Generic/Layers.py | 14 +- Deeploy/Targets/Generic/Parsers.py | 123 ++++++------------ Deeploy/Targets/Generic/Platform.py | 24 ++-- .../Generic/Templates/FloatExpTemplate.py | 23 ++++ DeeployTest/Tests/Kernels/FP32/Exp/inputs.npz | Bin 0 -> 776 bytes .../Tests/Kernels/FP32/Exp/network.onnx | Bin 0 -> 120 bytes .../Tests/Kernels/FP32/Exp/outputs.npz | Bin 0 -> 778 bytes .../Generic/inc/DeeployBasicMath.h | 1 + TargetLibraries/Generic/inc/kernel/Exp.h | 21 +++ TargetLibraries/Generic/src/Exp_fp32.c | 14 ++ 11 files changed, 121 insertions(+), 106 deletions(-) create mode 100644 Deeploy/Targets/Generic/Templates/FloatExpTemplate.py create mode 100644 DeeployTest/Tests/Kernels/FP32/Exp/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/Exp/network.onnx create mode 100644 DeeployTest/Tests/Kernels/FP32/Exp/outputs.npz create mode 100644 TargetLibraries/Generic/inc/kernel/Exp.h create mode 100644 TargetLibraries/Generic/src/Exp_fp32.c diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index 35d927df62..c25369ff9e 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -13,7 +13,7 @@ from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration from Deeploy.Targets.Generic.Templates import AddTemplate, BatchNormalizationTemplate, ConcatTemplate, ConvTemplate, \ ConvTransposeTemplate, DebugPrintTemplate, DequantTemplate, DummyTemplate, DWConvTemplate, FloatAddTemplate, \ - FloatCeilTemplate, FloatClipTemplate, FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, \ + FloatCeilTemplate, FloatClipTemplate, FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatExpTemplate, \ FloatFloorTemplate, FloatGELUTemplate, FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, \ FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, FloatPowTemplate, FloatReduceMeanTemplate, \ FloatReluTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, FloatSubTemplate, GatherTemplate, GemmTemplate, \ @@ -357,3 +357,8 @@ PointerClass(float32_t)], [PointerClass(float32_t)]), FloatClipTemplate.referenceTemplate, BasicTransformer), ] + +BasicExpBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), FloatExpTemplate.referenceTemplate, + BasicTransformer), +] diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index 9e9a4ec283..2005bea1d1 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -715,18 +715,16 @@ def computeOps(self): class CeilLayer(ONNXLayer): - - def __init__(self, maps: List[NodeMapper]): - super().__init__(maps) + pass class FloorLayer(ONNXLayer): - - def __init__(self, maps: List[NodeMapper]): - super().__init__(maps) + pass class ClipLayer(ONNXLayer): + pass - def __init__(self, maps: List[NodeMapper]): - super().__init__(maps) + +class ExpLayer(ONNXLayer): + pass diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index a50564851b..d89f216811 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -11,6 +11,23 @@ from Deeploy.DeeployTypes import ConstantBuffer, NetworkContext, NodeParser, VariableBuffer +class UnaryElementWiseParser(NodeParser): + + def parseNode(self, node: gs.Node) -> bool: + return len(node.inputs) == 1 and len(node.outputs) == 1 + + def parseNodeCtxt(self, + ctxt: NetworkContext, + node: gs.Node, + channels_first: bool = True) -> Tuple[NetworkContext, bool]: + data_in = ctxt.lookup(node.inputs[0].name) + data_out = ctxt.lookup(node.outputs[0].name) + self.operatorRepresentation['data_in'] = data_in.name + self.operatorRepresentation['data_out'] = data_out.name + self.operatorRepresentation['size'] = int(np.prod(data_in.shape)) + return ctxt, True + + class ConcatParser(NodeParser): def __init__(self): @@ -1095,29 +1112,10 @@ def parseNodeCtxt(self, return ctxt, True -class ReluParser(NodeParser): - - def __init__(self): - super().__init__() - - def parseNode(self, node: gs.Node) -> (bool): - - ret = all([len(node.inputs) == 1, len(node.outputs) == 1]) - - return ret - - def parseNodeCtxt(self, - ctxt: NetworkContext, - node: gs.Node, - channels_first: bool = True) -> Tuple[NetworkContext, bool]: - - data_in = ctxt.lookup(node.inputs[0].name) - data_out = ctxt.lookup(node.outputs[0].name) - self.operatorRepresentation['data_in'] = data_in.name - self.operatorRepresentation['data_out'] = data_out.name - self.operatorRepresentation['size'] = np.prod(data_in.shape) +class ReluParser(UnaryElementWiseParser): - return ctxt, True + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and node.op == 'Relu' class ReshapeParser(NodeParser): @@ -2868,79 +2866,28 @@ def parseNodeCtxt(self, return ctxt, False -class SqrtParser(NodeParser): - - def __init__(self): - super().__init__() +class SqrtParser(UnaryElementWiseParser): def parseNode(self, node: gs.Node) -> bool: - return node.op == 'Sqrt' and len(node.inputs) == 1 and len(node.outputs) == 1 - - def parseNodeCtxt(self, - ctxt: NetworkContext, - node: gs.Node, - channels_first: bool = True) -> Tuple[NetworkContext, bool]: + return super().parseNode(node) and node.op == 'Sqrt' - data_in = ctxt.lookup(node.inputs[0].name) - data_out = ctxt.lookup(node.outputs[0].name) - self.operatorRepresentation['data_in'] = data_in.name - self.operatorRepresentation['data_out'] = data_out.name - self.operatorRepresentation['size'] = int(np.prod(data_in.shape)) - - return ctxt, True - - -class CeilParser(NodeParser): - - def __init__(self): - super().__init__() +class CeilParser(UnaryElementWiseParser): def parseNode(self, node: gs.Node) -> bool: - return node.op == 'Ceil' and len(node.inputs) == 1 and len(node.outputs) == 1 + return super().parseNode(node) and node.op == 'Ceil' - def parseNodeCtxt(self, - ctxt: NetworkContext, - node: gs.Node, - channels_first: bool = True) -> Tuple[NetworkContext, bool]: - data_in = ctxt.lookup(node.inputs[0].name) - data_out = ctxt.lookup(node.outputs[0].name) - - self.operatorRepresentation['data_in'] = data_in.name - self.operatorRepresentation['data_out'] = data_out.name - self.operatorRepresentation['size'] = int(np.prod(data_in.shape)) - return ctxt, True - - -class FloorParser(NodeParser): - - def __init__(self): - super().__init__() +class FloorParser(UnaryElementWiseParser): def parseNode(self, node: gs.Node) -> bool: - return node.op == 'Floor' and len(node.inputs) == 1 and len(node.outputs) == 1 + return super().parseNode(node) and node.op == 'Floor' - def parseNodeCtxt(self, - ctxt: NetworkContext, - node: gs.Node, - channels_first: bool = True) -> Tuple[NetworkContext, bool]: - - data_in = ctxt.lookup(node.inputs[0].name) - data_out = ctxt.lookup(node.outputs[0].name) - - self.operatorRepresentation['data_in'] = data_in.name - self.operatorRepresentation['data_out'] = data_out.name - self.operatorRepresentation['size'] = int(np.prod(data_in.shape)) - return ctxt, True - - -class ClipParser(NodeParser): - def __init__(self): - super().__init__() +class ClipParser(UnaryElementWiseParser): def parseNode(self, node: gs.Node) -> bool: + # Clip allows 1–3 inputs (optional min/max constants), so we can't use super() if node.op != 'Clip' \ or len(node.outputs) != 1 \ or (not (1 <= len(node.inputs) <= 3)): @@ -2952,11 +2899,9 @@ def parseNodeCtxt(self, node: gs.Node, channels_first: bool = True) -> Tuple[NetworkContext, bool]: - data_in = ctxt.lookup(node.inputs[0].name) - data_out = ctxt.lookup(node.outputs[0].name) - self.operatorRepresentation['data_in'] = data_in.name - self.operatorRepresentation['data_out'] = data_out.name - self.operatorRepresentation['size'] = int(np.prod(data_in.shape)) + ctxt, ok = super().parseNodeCtxt(ctxt, node, channels_first) + if not ok: + return ctxt, False # min_val and max_val only handled as constants # Defaults: full float32 range @@ -2969,3 +2914,9 @@ def parseNodeCtxt(self, self.operatorRepresentation['max_val'] = float(node.inputs[2].values.item()) return ctxt, True + + +class ExpParser(UnaryElementWiseParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and node.op == 'Exp' diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index dbc0895757..c31f7c5971 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -9,19 +9,19 @@ from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicBatchNormBindings, BasicCeilBindings, \ BasicClipBindings, BasicConcatBindings, BasicConv1DBindings, BasicConv2DBindings, BasicConvTransposeBindings, \ BasicDebugPrintBindings, BasicDequantBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBindings, \ - BasicFloorBindings, BasicGatherBindings, BasicGELUBindings, BasicGEMMBindings, BasicITAPartialSoftmaxBinding, \ - BasicITASoftmaxBinding, BasicLayerNormBindings, BasicMatMulBindings, BasicMaxPool1DBindings, \ - BasicMaxPool2DBindings, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, BasicPowBindings, \ - BasicQuantBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReluBinding, BasicReshapeBindings, \ - BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, \ - BasicSqrtBindings, BasicSubBindings, BasicTransposeBindings, DummyBinding + BasicExpBindings, BasicFloorBindings, BasicGatherBindings, BasicGELUBindings, BasicGEMMBindings, \ + BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, BasicLayerNormBindings, BasicMatMulBindings, \ + BasicMaxPool1DBindings, BasicMaxPool2DBindings, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, \ + BasicPowBindings, BasicQuantBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReluBinding, \ + BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, \ + BasicSoftmaxBindings, BasicSqrtBindings, BasicSubBindings, BasicTransposeBindings, DummyBinding from Deeploy.Targets.Generic.Layers import AddLayer, BatchNormalizationLayer, CeilLayer, ClipLayer, ConcatLayer, \ - ConvLayer, ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, FloorLayer, GatherLayer, GELULayer, \ - GEMMLayer, ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, QuantLayer, \ - ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, \ - SliceLayer, SoftmaxLayer, SqrtLayer, SubLayer, TransposeLayer + ConvLayer, ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, ExpLayer, FloorLayer, GatherLayer, \ + GELULayer, GEMMLayer, ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, \ + QuantLayer, ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, \ + RQSiGELULayer, SliceLayer, SoftmaxLayer, SqrtLayer, SubLayer, TransposeLayer from Deeploy.Targets.Generic.Parsers import AddParser, BatchNormParser, CeilParser, ClipParser, ConcatParser, \ - ConvTranspose1DParser, DebugParser, DequantParser, DivParser, DummyParser, FlattenParser, FloorParser, \ + ConvTranspose1DParser, DebugParser, DequantParser, DivParser, DummyParser, ExpParser, FlattenParser, FloorParser, \ GatherParser, GELUParser, GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, \ GenericGEMMParser, GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, \ MatMulParser, MaxPool1DParser, MulParser, Pad1DParser, Pad2DParser, PowParser, QuantParser, ReduceMeanParser, \ @@ -77,6 +77,7 @@ CeilMapper = NodeMapper(CeilParser(), BasicCeilBindings) FloorMapper = NodeMapper(FloorParser(), BasicFloorBindings) ClipMapper = NodeMapper(ClipParser(), BasicClipBindings) +ExpMapper = NodeMapper(ExpParser(), BasicExpBindings) # Dummy nodes are intended for development purposes only! # They should always generate compiler errors to not accidentally end up in production code @@ -127,6 +128,7 @@ 'Ceil': CeilLayer([CeilMapper]), 'Floor': FloorLayer([FloorMapper]), 'Clip': ClipLayer([ClipMapper]), + 'Exp': ExpLayer([ExpMapper]), # # For example, you can use the DummpyMapper, in case you want to test # # deployment or optimizations with GlobalAveragePool nodes but did not yet # # implement the corresponding kernel diff --git a/Deeploy/Targets/Generic/Templates/FloatExpTemplate.py b/Deeploy/Targets/Generic/Templates/FloatExpTemplate.py new file mode 100644 index 0000000000..734d7e0fea --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatExpTemplate.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _ExpTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['size'] = int(np.prod(data_in.shape)) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _ExpTemplate(""" +// Exp (Name: ${nodeName}, Op: ${nodeOp}) +Exp_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${size}); +""") diff --git a/DeeployTest/Tests/Kernels/FP32/Exp/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Exp/inputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..ac58fc00e29cdc6487f6248edf903d0dedaa1f12 GIT binary patch literal 776 zcmbV~e=O8-7{|X~7lrbpI$Jl{k?qhiBFwMv=Oc;T#ICYk$1)pd?8wr70~{Ay6pQxvU_9FNn$u5eWH$ zyb`vQl@}i=VHHYi9xpFaD=oSDm|Uf_{60ZKez=ey6zVJFpW^?Q9A1BEN9;%neN$YZCb>}GQ;Cs&31vh!LrwcV=T?&X<05JKJOB+_yhx%) zBD$O4N|wLL$#-2L4m{%m2__-ax0Fl9gYUyma~;#~6Hc;O6-o^O;Ppnp*kypO06q2# zbuf%a`jQiE96T!7M?!mSk>j)LM0YlgoHm=8faH0!ES+L*ZPXjz3?hhaC?cKGS*Xo# zgvYylanVN^a&fF9^99N1gIWuvHzuHF#Pyls#-Rp2X4!nSS2YGsqI|T%)#AD14-I1(bWz0tHj#;DS@k~pX}lm+sbJT$k@a zPu1mZR<%gmth{UNcy{6n!_?_q#;5P(SWm81w|M5c)3Nc^JB#OWuBLNZc)e^4uGvm4 z|7}0rP0v(gd9QUTkCTI!n}+#xH*KeLnTpn2(_?M#ZIZWsW^>sme{H<|)|*$&&*o&h z+9xbGTcgM5xR4>lF#OIF)7>gh9j+&xbcvs`(ScKFpT$Ii8u+rI%nlkEaGJOexg^~ zD2fMY4k7hSiU4KU?xSefcc!wg2^Y>-BvL zEqg!Sux{8W?ABp=%~9~{Vr!QQP3JjZH5`7oSG%3k&$61oRLb^3%p#lUt~VwZCeCtf z*b?cevAxayi2M=TgBkXwxzZ&zj-0HvZm(Bc-jMXMJDHvA)bAkTD;2-p;kQYUBd5`4 zJJAXA4HqT2*q=G_(q3Y>W#EdbqIL@n7CY{HZDPOaKaY8P@n-jBS + +void Exp_fp32_fp32(float32_t *data_in, float32_t *data_out, int32_t size) { + for (int i = 0; i < size; i++) { + data_out[i] = expf(data_in[i]); + } +} From 801df4432fa693850b7ed2574cfc2d91fe691f12 Mon Sep 17 00:00:00 2001 From: Alex Marchioni Date: Thu, 14 May 2026 16:08:42 +0000 Subject: [PATCH 07/16] add support for Sigmoid operator for Generic target --- Deeploy/Targets/Generic/Bindings.py | 15 ++++++++---- Deeploy/Targets/Generic/Layers.py | 4 +++ Deeploy/Targets/Generic/Parsers.py | 6 +++++ Deeploy/Targets/Generic/Platform.py | 14 +++++++---- .../Generic/Templates/FloatSigmoidTemplate.py | 23 ++++++++++++++++++ .../Tests/Kernels/FP32/Sigmoid/inputs.npz | Bin 0 -> 776 bytes .../Tests/Kernels/FP32/Sigmoid/network.onnx | Bin 0 -> 124 bytes .../Tests/Kernels/FP32/Sigmoid/outputs.npz | Bin 0 -> 778 bytes .../Generic/inc/DeeployBasicMath.h | 1 + TargetLibraries/Generic/inc/kernel/Sigmoid.h | 21 ++++++++++++++++ TargetLibraries/Generic/src/Sigmoid_fp32.c | 14 +++++++++++ 11 files changed, 88 insertions(+), 10 deletions(-) create mode 100644 Deeploy/Targets/Generic/Templates/FloatSigmoidTemplate.py create mode 100644 DeeployTest/Tests/Kernels/FP32/Sigmoid/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/Sigmoid/network.onnx create mode 100644 DeeployTest/Tests/Kernels/FP32/Sigmoid/outputs.npz create mode 100644 TargetLibraries/Generic/inc/kernel/Sigmoid.h create mode 100644 TargetLibraries/Generic/src/Sigmoid_fp32.c diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index c25369ff9e..195cf8bf71 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -16,11 +16,11 @@ FloatCeilTemplate, FloatClipTemplate, FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatExpTemplate, \ FloatFloorTemplate, FloatGELUTemplate, FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, \ FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, FloatPowTemplate, FloatReduceMeanTemplate, \ - FloatReluTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, FloatSubTemplate, GatherTemplate, GemmTemplate, \ - IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, \ - PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, \ - RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, SubTemplate, TransposeTemplate, iGELUTemplate, \ - iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate + FloatReluTemplate, FloatSigmoidTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, FloatSubTemplate, \ + GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, \ + MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \ + RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, SubTemplate, \ + TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate from Deeploy.Targets.Generic.TypeCheckers import AddChecker, BatchNormChecker, ConcatChecker, ConvChecker, \ DebugPrintChecker, DequantChecker, DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, \ LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceMeanChecker, \ @@ -362,3 +362,8 @@ NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), FloatExpTemplate.referenceTemplate, BasicTransformer), ] + +BasicSigmoidBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatSigmoidTemplate.referenceTemplate, BasicTransformer), +] diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index 2005bea1d1..ffe8bd5492 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -728,3 +728,7 @@ class ClipLayer(ONNXLayer): class ExpLayer(ONNXLayer): pass + + +class SigmoidLayer(ONNXLayer): + pass diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index d89f216811..b37051115d 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -2920,3 +2920,9 @@ class ExpParser(UnaryElementWiseParser): def parseNode(self, node: gs.Node) -> bool: return super().parseNode(node) and node.op == 'Exp' + + +class SigmoidParser(UnaryElementWiseParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and node.op == 'Sigmoid' diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index c31f7c5971..cce84f2c82 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -13,20 +13,22 @@ BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, BasicLayerNormBindings, BasicMatMulBindings, \ BasicMaxPool1DBindings, BasicMaxPool2DBindings, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, \ BasicPowBindings, BasicQuantBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReluBinding, \ - BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, \ - BasicSoftmaxBindings, BasicSqrtBindings, BasicSubBindings, BasicTransposeBindings, DummyBinding + BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSigmoidBindings, \ + BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, BasicSubBindings, BasicTransposeBindings, \ + DummyBinding from Deeploy.Targets.Generic.Layers import AddLayer, BatchNormalizationLayer, CeilLayer, ClipLayer, ConcatLayer, \ ConvLayer, ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, ExpLayer, FloorLayer, GatherLayer, \ GELULayer, GEMMLayer, ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, \ QuantLayer, ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, \ - RQSiGELULayer, SliceLayer, SoftmaxLayer, SqrtLayer, SubLayer, TransposeLayer + RQSiGELULayer, SigmoidLayer, SliceLayer, SoftmaxLayer, SqrtLayer, SubLayer, TransposeLayer from Deeploy.Targets.Generic.Parsers import AddParser, BatchNormParser, CeilParser, ClipParser, ConcatParser, \ ConvTranspose1DParser, DebugParser, DequantParser, DivParser, DummyParser, ExpParser, FlattenParser, FloorParser, \ GatherParser, GELUParser, GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, \ GenericGEMMParser, GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, \ MatMulParser, MaxPool1DParser, MulParser, Pad1DParser, Pad2DParser, PowParser, QuantParser, ReduceMeanParser, \ - ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, \ - SoftmaxParser, SqrtParser, SubParser, TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser + ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SigmoidParser, \ + SliceParser, SoftmaxParser, SqrtParser, SubParser, TransposeParser, UnsqueezeParser, iLayerNormParser, \ + iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate, FreeTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, ExtractPaddingFromConvPass, \ ExtractPaddingFromPoolPass, MatMulAddMergePass, MergeConstAddAndRequantPass, QuantPatternPass, \ @@ -78,6 +80,7 @@ FloorMapper = NodeMapper(FloorParser(), BasicFloorBindings) ClipMapper = NodeMapper(ClipParser(), BasicClipBindings) ExpMapper = NodeMapper(ExpParser(), BasicExpBindings) +SigmoidMapper = NodeMapper(SigmoidParser(), BasicSigmoidBindings) # Dummy nodes are intended for development purposes only! # They should always generate compiler errors to not accidentally end up in production code @@ -129,6 +132,7 @@ 'Floor': FloorLayer([FloorMapper]), 'Clip': ClipLayer([ClipMapper]), 'Exp': ExpLayer([ExpMapper]), + 'Sigmoid': SigmoidLayer([SigmoidMapper]), # # For example, you can use the DummpyMapper, in case you want to test # # deployment or optimizations with GlobalAveragePool nodes but did not yet # # implement the corresponding kernel diff --git a/Deeploy/Targets/Generic/Templates/FloatSigmoidTemplate.py b/Deeploy/Targets/Generic/Templates/FloatSigmoidTemplate.py new file mode 100644 index 0000000000..a25bf411e5 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatSigmoidTemplate.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _SigmoidTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['size'] = int(np.prod(data_in.shape)) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _SigmoidTemplate(""" +// Sigmoid (Name: ${nodeName}, Op: ${nodeOp}) +Sigmoid_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${size}); +""") diff --git a/DeeployTest/Tests/Kernels/FP32/Sigmoid/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Sigmoid/inputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..ac58fc00e29cdc6487f6248edf903d0dedaa1f12 GIT binary patch literal 776 zcmbV~e=O8-7{|X~7lrbpI$Jl{k?qhiBFwMv=Oc;T#ICYk$1)pd?8wr70~{Ay6pQxvU_9FNn$u5eWH$ zyb`vQl@}i=VHHYi9xpFaD=oSDm|Uf_{60ZKez=ey6zVJFpW^?Q9A1BEN9;%neN$YZCb>}GQ;Cs&31vh!LrwcV=T?&X<05JKJOB+_yhx%) zBD$O4N|wLL$#-2L4m{%m2__-ax0Fl9gYUyma~;#~6Hc;O6-o^O;Ppnp*kypO06q2# zbuf%a`jQiE96T!7M?!mSk>j)LM0YlgoHm=8faH0!ES+L*ZPXjz3?hhaC?cKGS*Xo# zgvYylanVN^a&fF9^99N1gIWuvHzuHF#Pyls#-Rp2X4!nSS2YGsqI|T%)#AD14-I1(bWz0tHj#;DS@k~pX}lm+sbJ5SIW4qYw`l69*#@a{#eGk_;ErAR#QWPApsu0^9)AHyW$} literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/FP32/Sigmoid/outputs.npz b/DeeployTest/Tests/Kernels/FP32/Sigmoid/outputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..9bb1aebe6708860127a1f99ebebe6cb688efa157 GIT binary patch literal 778 zcmWIWW@gc4fB;2?i7)R<|AzuD1`&q*(vpJG61}{FN=60&h6W~xIv73KFVr_6l98c| zp;|p9wK%y*-AX~-Ce1`$M?pO;zo?`rF)u#9C?ypn?v|KSoC*{#&PXgs1@bivbrdXg z6b#KYbrfn9$O2qftm^FL(tPcLX4l*Q`Co58W#eYsnjM>M{{h((9@g7GC={_d@Sx8A z!F*X82Ra`Ybal-gxjI@mHWJTPH+;B57PRmmpkU%h=aO9fDUgRxjlnf-y@ zRW=NX)2xo=*V|wGTW>G=3uxwoTuX)LhPFJ6<@P3jfc$H0HgCeY>=+uF%^4W3naz1q zZ~qGDW~I5y?G%A}YJ4i~8K#}HD)XoWheyMLOLm~J0Qor*=*C3_CiXuhU2M7Dnt{cZ zJUL_Q_P@gZ7f_AE({0ua4GS$BTCQ6Q{se~K380&l7hCt$*V*^4wzPlu8)(j-Oe+so zQCk7i`8Gf!P5-48+dEX|+0S-mwtw(@6VQJirVMQbHV*(z996K~ffgpa6wVU+dZsHtIbL%wpnJ;h)Z2f14hnIJ1u5at3|0tF3>AO<2W2Y9oxfkYUA M5E$_cd%)=$0HLY-A^-pY literal 0 HcmV?d00001 diff --git a/TargetLibraries/Generic/inc/DeeployBasicMath.h b/TargetLibraries/Generic/inc/DeeployBasicMath.h index 9bd7f042a8..5a7f15d073 100644 --- a/TargetLibraries/Generic/inc/DeeployBasicMath.h +++ b/TargetLibraries/Generic/inc/DeeployBasicMath.h @@ -54,6 +54,7 @@ #include "kernel/RQHardswish.h" #include "kernel/Relu.h" #include "kernel/RequantShift.h" +#include "kernel/Sigmoid.h" #include "kernel/Softmax.h" #include "kernel/Sqrt.h" diff --git a/TargetLibraries/Generic/inc/kernel/Sigmoid.h b/TargetLibraries/Generic/inc/kernel/Sigmoid.h new file mode 100644 index 0000000000..d9a960cab3 --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/Sigmoid.h @@ -0,0 +1,21 @@ +/* + * SPDX-FileCopyrightText: 2020 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_SIGMOID_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_SIGMOID_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/* + * element wise sigmoid + */ + +/******************************************************************************/ +/* Sigmoid */ +/******************************************************************************/ +void Sigmoid_fp32_fp32(float32_t *data_in, float32_t *data_out, int32_t size); + +#endif //__DEEPLOY_BASIC_MATH_SIGMOID_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/src/Sigmoid_fp32.c b/TargetLibraries/Generic/src/Sigmoid_fp32.c new file mode 100644 index 0000000000..1c98bdfc6f --- /dev/null +++ b/TargetLibraries/Generic/src/Sigmoid_fp32.c @@ -0,0 +1,14 @@ +/* + * SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" +#include + +void Sigmoid_fp32_fp32(float32_t *data_in, float32_t *data_out, int32_t size) { + for (int i = 0; i < size; i++) { + data_out[i] = 1 / (1 + expf(-data_in[i])); + } +} From e3f0ba62fdb6dcc92cf84d058a8b9a571d5041fa Mon Sep 17 00:00:00 2001 From: Alex Marchioni Date: Thu, 14 May 2026 16:54:40 +0000 Subject: [PATCH 08/16] add support for Swish operator for Generic target --- Deeploy/Targets/Generic/Bindings.py | 9 +++++-- Deeploy/Targets/Generic/Layers.py | 4 +++ Deeploy/Targets/Generic/Parsers.py | 10 ++++++++ Deeploy/Targets/Generic/Platform.py | 12 +++++---- .../Generic/Templates/FloatSwishTemplate.py | 23 ++++++++++++++++++ .../Tests/Kernels/FP32/Swish/inputs.npz | Bin 0 -> 776 bytes .../Tests/Kernels/FP32/Swish/network.onnx | Bin 0 -> 139 bytes .../Tests/Kernels/FP32/Swish/outputs.npz | Bin 0 -> 778 bytes .../Generic/inc/DeeployBasicMath.h | 1 + TargetLibraries/Generic/inc/kernel/Swish.h | 22 +++++++++++++++++ TargetLibraries/Generic/src/Swish_fp32.c | 16 ++++++++++++ 11 files changed, 90 insertions(+), 7 deletions(-) create mode 100644 Deeploy/Targets/Generic/Templates/FloatSwishTemplate.py create mode 100644 DeeployTest/Tests/Kernels/FP32/Swish/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/Swish/network.onnx create mode 100644 DeeployTest/Tests/Kernels/FP32/Swish/outputs.npz create mode 100644 TargetLibraries/Generic/inc/kernel/Swish.h create mode 100644 TargetLibraries/Generic/src/Swish_fp32.c diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index 195cf8bf71..a8bdd9205b 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -17,8 +17,8 @@ FloatFloorTemplate, FloatGELUTemplate, FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, \ FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, FloatPowTemplate, FloatReduceMeanTemplate, \ FloatReluTemplate, FloatSigmoidTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, FloatSubTemplate, \ - GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, \ - MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \ + FloatSwishTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, \ + MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \ RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, SubTemplate, \ TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate from Deeploy.Targets.Generic.TypeCheckers import AddChecker, BatchNormChecker, ConcatChecker, ConvChecker, \ @@ -367,3 +367,8 @@ NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), FloatSigmoidTemplate.referenceTemplate, BasicTransformer), ] + +BasicSwishBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatSwishTemplate.referenceTemplate, BasicTransformer), +] diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index ffe8bd5492..6393087199 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -732,3 +732,7 @@ class ExpLayer(ONNXLayer): class SigmoidLayer(ONNXLayer): pass + + +class SwishLayer(ONNXLayer): + pass diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index b37051115d..d8e0d72e7b 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -2926,3 +2926,13 @@ class SigmoidParser(UnaryElementWiseParser): def parseNode(self, node: gs.Node) -> bool: return super().parseNode(node) and node.op == 'Sigmoid' + + +class SwishParser(UnaryElementWiseParser): + + def parseNode(self, node: gs.Node) -> bool: + ret = all([super().parseNode(node), node.op == 'Swish', 'alpha' in node.attrs]) + if ret: + self.operatorRepresentation['alpha'] = node.attrs['alpha'] + return True + return False diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index cce84f2c82..9e257d0923 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -14,21 +14,21 @@ BasicMaxPool1DBindings, BasicMaxPool2DBindings, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, \ BasicPowBindings, BasicQuantBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReluBinding, \ BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSigmoidBindings, \ - BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, BasicSubBindings, BasicTransposeBindings, \ - DummyBinding + BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, BasicSubBindings, BasicSwishBindings, \ + BasicTransposeBindings, DummyBinding from Deeploy.Targets.Generic.Layers import AddLayer, BatchNormalizationLayer, CeilLayer, ClipLayer, ConcatLayer, \ ConvLayer, ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, ExpLayer, FloorLayer, GatherLayer, \ GELULayer, GEMMLayer, ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, \ QuantLayer, ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, \ - RQSiGELULayer, SigmoidLayer, SliceLayer, SoftmaxLayer, SqrtLayer, SubLayer, TransposeLayer + RQSiGELULayer, SigmoidLayer, SliceLayer, SoftmaxLayer, SqrtLayer, SubLayer, SwishLayer, TransposeLayer from Deeploy.Targets.Generic.Parsers import AddParser, BatchNormParser, CeilParser, ClipParser, ConcatParser, \ ConvTranspose1DParser, DebugParser, DequantParser, DivParser, DummyParser, ExpParser, FlattenParser, FloorParser, \ GatherParser, GELUParser, GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, \ GenericGEMMParser, GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, \ MatMulParser, MaxPool1DParser, MulParser, Pad1DParser, Pad2DParser, PowParser, QuantParser, ReduceMeanParser, \ ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SigmoidParser, \ - SliceParser, SoftmaxParser, SqrtParser, SubParser, TransposeParser, UnsqueezeParser, iLayerNormParser, \ - iSoftmaxParser + SliceParser, SoftmaxParser, SqrtParser, SubParser, SwishParser, TransposeParser, UnsqueezeParser, \ + iLayerNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate, FreeTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, ExtractPaddingFromConvPass, \ ExtractPaddingFromPoolPass, MatMulAddMergePass, MergeConstAddAndRequantPass, QuantPatternPass, \ @@ -81,6 +81,7 @@ ClipMapper = NodeMapper(ClipParser(), BasicClipBindings) ExpMapper = NodeMapper(ExpParser(), BasicExpBindings) SigmoidMapper = NodeMapper(SigmoidParser(), BasicSigmoidBindings) +SwishMapper = NodeMapper(SwishParser(), BasicSwishBindings) # Dummy nodes are intended for development purposes only! # They should always generate compiler errors to not accidentally end up in production code @@ -133,6 +134,7 @@ 'Clip': ClipLayer([ClipMapper]), 'Exp': ExpLayer([ExpMapper]), 'Sigmoid': SigmoidLayer([SigmoidMapper]), + 'Swish': SwishLayer([SwishMapper]), # # For example, you can use the DummpyMapper, in case you want to test # # deployment or optimizations with GlobalAveragePool nodes but did not yet # # implement the corresponding kernel diff --git a/Deeploy/Targets/Generic/Templates/FloatSwishTemplate.py b/Deeploy/Targets/Generic/Templates/FloatSwishTemplate.py new file mode 100644 index 0000000000..244e19ee0b --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatSwishTemplate.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _SigmoidTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['size'] = int(np.prod(data_in.shape)) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _SigmoidTemplate(""" +// Swish (Name: ${nodeName}, Op: ${nodeOp}) +Swish_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${alpha}, ${size}); +""") diff --git a/DeeployTest/Tests/Kernels/FP32/Swish/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Swish/inputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..ac58fc00e29cdc6487f6248edf903d0dedaa1f12 GIT binary patch literal 776 zcmbV~e=O8-7{|X~7lrbpI$Jl{k?qhiBFwMv=Oc;T#ICYk$1)pd?8wr70~{Ay6pQxvU_9FNn$u5eWH$ zyb`vQl@}i=VHHYi9xpFaD=oSDm|Uf_{60ZKez=ey6zVJFpW^?Q9A1BEN9;%neN$YZCb>}GQ;Cs&31vh!LrwcV=T?&X<05JKJOB+_yhx%) zBD$O4N|wLL$#-2L4m{%m2__-ax0Fl9gYUyma~;#~6Hc;O6-o^O;Ppnp*kypO06q2# zbuf%a`jQiE96T!7M?!mSk>j)LM0YlgoHm=8faH0!ES+L*ZPXjz3?hhaC?cKGS*Xo# zgvYylanVN^a&fF9^99N1gIWuvHzuHF#Pyls#-Rp2X4!nSS2YGsqI|T%)#AD14-I1(bWz0tHj#;DS@k~pX}lm+sbJElN+#&x3HHq+uoqaS3oR3h{6;aWDcg2M`M+$#6l<6~ZFx#KOfO GAOQd`86SWE literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/FP32/Swish/outputs.npz b/DeeployTest/Tests/Kernels/FP32/Swish/outputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..cfd41c40cd11cdb2f817f5dc1cc36f7c2e6ccd70 GIT binary patch literal 778 zcmWIWW@gc4fB;2?`Fj-=|3d*6g9t-@X-Pq8iC$hoB_o3XLjw~;9gLpr7wQ`j$;eQ~ zP_3SlTAW;@Zl$1ZlV+l>qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=XCxM+0{I$-Itms# z3WjEyItsN4WC1Q^_jwK*pVsdCE`Hv@cYU!V??lag-cvRA^{uIK+}QTjp>D~oJ;J(c z9p=q^wny>%!#$4s1ss4rXaM1$INLpMOQd$D{cv)4bL`g6AC;QhzBgn!$_g6oTk8JG z&gF;JwsR`+d&=yh?O8ub?pU}i$6@h7mOXDdU+n!*z`SRL-W11EAL{pJhaK5bJ)PN+ zVfKve``&)C2btZ_bY&YK-y?fF+cyru1^ev%<}~h`bLPfgqnwRf#n)-?j`n=BCqh}% z@dsnL14#YyEuIcv-X`p=O>W%1q~WlG-B%UIhyO(!6AQz4bvvBdD>mEDp+r>1@$iNd zd)@{`?c24WV25GSq3s*~Fgi~DAK^IbR_Z?92?~xihRF``uh;GV!=CK$L$u6Me82F% zbKzBvAUjlQnH|@&E84GDyt8+QrViMh>wPTt>8d0+be82fJ}K(mV_rUSSDo~dJ)WgP zj*U-z98})d?RmOtvBRa07WRjKNjbio?Y(>Eo{f7oEJOBA(tNd5rq0aalDwP)Z`gGk zPDRaqkJa3^S-n-;o2@f%Z_I*OyPmh^?@O9q?;!Scnq&LZL-sjqq#Z81WbOmGN8jMZ zUX7~Lc1HY19Vg!o-nYxf%E8xIVBZ&JrX6>0nmNwy`{2+o_Sa!i@AaL9MG20Vyc4!P ztl#a>VYA+m|Cr;>Ukzq^0=yZSbeU08FDPw-Fc;7g7-(Pwu@GrFz?+o~B*F-U7C?Fr HI9&q(i&rLG literal 0 HcmV?d00001 diff --git a/TargetLibraries/Generic/inc/DeeployBasicMath.h b/TargetLibraries/Generic/inc/DeeployBasicMath.h index 5a7f15d073..ed130c90c4 100644 --- a/TargetLibraries/Generic/inc/DeeployBasicMath.h +++ b/TargetLibraries/Generic/inc/DeeployBasicMath.h @@ -57,5 +57,6 @@ #include "kernel/Sigmoid.h" #include "kernel/Softmax.h" #include "kernel/Sqrt.h" +#include "kernel/Swish.h" #endif //__DEEPLOY_BASIC_MATH_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/Swish.h b/TargetLibraries/Generic/inc/kernel/Swish.h new file mode 100644 index 0000000000..326f7822c8 --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/Swish.h @@ -0,0 +1,22 @@ +/* + * SPDX-FileCopyrightText: 2020 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_SWISH_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_SWISH_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/* + * element wise swish + */ + +/******************************************************************************/ +/* Swish */ +/******************************************************************************/ +void Swish_fp32_fp32(float32_t *data_in, float32_t *data_out, float alpha, + int32_t size); + +#endif //__DEEPLOY_BASIC_MATH_SWISH_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/src/Swish_fp32.c b/TargetLibraries/Generic/src/Swish_fp32.c new file mode 100644 index 0000000000..5447de4c6a --- /dev/null +++ b/TargetLibraries/Generic/src/Swish_fp32.c @@ -0,0 +1,16 @@ +/* + * SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" +#include + +void Swish_fp32_fp32(float32_t *data_in, float32_t *data_out, float alpha, + int32_t size) { + for (int i = 0; i < size; i++) { + float32_t x = data_in[i]; + data_out[i] = x / (1 + expf(-alpha * x)); + } +} From bc73051d2b9025d5053451c124064e159543f907 Mon Sep 17 00:00:00 2001 From: Alex Marchioni Date: Fri, 15 May 2026 08:26:29 +0000 Subject: [PATCH 09/16] add support for HardSigmoid and HardSwish operator for Generic target --- Deeploy/Targets/Generic/Bindings.py | 25 ++++++++++++----- Deeploy/Targets/Generic/Layers.py | 8 ++++++ Deeploy/Targets/Generic/Parsers.py | 17 ++++++++++++ Deeploy/Targets/Generic/Platform.py | 26 ++++++++++-------- .../Templates/FloatHardSigmoidTemplate.py | 23 ++++++++++++++++ .../Templates/FloatHardSwishTemplate.py | 23 ++++++++++++++++ .../Tests/Kernels/FP32/HardSigmoid/inputs.npz | Bin 0 -> 776 bytes .../Kernels/FP32/HardSigmoid/network.onnx | Bin 0 -> 162 bytes .../Kernels/FP32/HardSigmoid/outputs.npz | Bin 0 -> 778 bytes .../Tests/Kernels/FP32/HardSwish/inputs.npz | Bin 0 -> 776 bytes .../Tests/Kernels/FP32/HardSwish/network.onnx | Bin 0 -> 126 bytes .../Tests/Kernels/FP32/HardSwish/outputs.npz | Bin 0 -> 778 bytes .../Generic/inc/DeeployBasicMath.h | 3 +- .../Generic/inc/kernel/HardSigmoid.h | 22 +++++++++++++++ .../inc/kernel/{Hardswish.h => HardSwish.h} | 6 ++++ .../Generic/src/HardSigmoid_fp32.c | 15 ++++++++++ TargetLibraries/Generic/src/HardSwish_fp32.c | 16 +++++++++++ 17 files changed, 165 insertions(+), 19 deletions(-) create mode 100644 Deeploy/Targets/Generic/Templates/FloatHardSigmoidTemplate.py create mode 100644 Deeploy/Targets/Generic/Templates/FloatHardSwishTemplate.py create mode 100644 DeeployTest/Tests/Kernels/FP32/HardSigmoid/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/HardSigmoid/network.onnx create mode 100644 DeeployTest/Tests/Kernels/FP32/HardSigmoid/outputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/HardSwish/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/HardSwish/network.onnx create mode 100644 DeeployTest/Tests/Kernels/FP32/HardSwish/outputs.npz create mode 100644 TargetLibraries/Generic/inc/kernel/HardSigmoid.h rename TargetLibraries/Generic/inc/kernel/{Hardswish.h => HardSwish.h} (69%) create mode 100644 TargetLibraries/Generic/src/HardSigmoid_fp32.c create mode 100644 TargetLibraries/Generic/src/HardSwish_fp32.c diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index a8bdd9205b..e81233fa31 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -14,13 +14,14 @@ from Deeploy.Targets.Generic.Templates import AddTemplate, BatchNormalizationTemplate, ConcatTemplate, ConvTemplate, \ ConvTransposeTemplate, DebugPrintTemplate, DequantTemplate, DummyTemplate, DWConvTemplate, FloatAddTemplate, \ FloatCeilTemplate, FloatClipTemplate, FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatExpTemplate, \ - FloatFloorTemplate, FloatGELUTemplate, FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, \ - FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, FloatPowTemplate, FloatReduceMeanTemplate, \ - FloatReluTemplate, FloatSigmoidTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, FloatSubTemplate, \ - FloatSwishTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, \ - MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \ - RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, SubTemplate, \ - TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate + FloatFloorTemplate, FloatGELUTemplate, FloatGemmTemplate, FloatHardSigmoidTemplate, FloatHardSwishTemplate, \ + FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, \ + FloatPowTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSigmoidTemplate, FloatSoftmaxTemplate, \ + FloatSqrtTemplate, FloatSubTemplate, FloatSwishTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, \ + ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, \ + ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, \ + RQSiGELUTemplate, SliceTemplate, SubTemplate, TransposeTemplate, iGELUTemplate, iLayernormTemplate, \ + iRMSNormTemplate, iSoftmaxTemplate from Deeploy.Targets.Generic.TypeCheckers import AddChecker, BatchNormChecker, ConcatChecker, ConvChecker, \ DebugPrintChecker, DequantChecker, DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, \ LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceMeanChecker, \ @@ -372,3 +373,13 @@ NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), FloatSwishTemplate.referenceTemplate, BasicTransformer), ] + +BasicHardSigmoidBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatHardSigmoidTemplate.referenceTemplate, BasicTransformer), +] + +BasicHardSwishBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatHardSwishTemplate.referenceTemplate, BasicTransformer), +] diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index 6393087199..36dfa161d4 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -736,3 +736,11 @@ class SigmoidLayer(ONNXLayer): class SwishLayer(ONNXLayer): pass + + +class HardSigmoidLayer(ONNXLayer): + pass + + +class HardSwishLayer(ONNXLayer): + pass diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index d8e0d72e7b..361ee35de3 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -2930,9 +2930,26 @@ def parseNode(self, node: gs.Node) -> bool: class SwishParser(UnaryElementWiseParser): + # TODO: make alpha optional (as in ONNX standard) def parseNode(self, node: gs.Node) -> bool: ret = all([super().parseNode(node), node.op == 'Swish', 'alpha' in node.attrs]) if ret: self.operatorRepresentation['alpha'] = node.attrs['alpha'] return True return False + + +class HardSigmoidParser(UnaryElementWiseParser): + + def parseNode(self, node: gs.Node) -> bool: + if not (super().parseNode(node) and node.op == 'HardSigmoid'): + return False + self.operatorRepresentation['alpha'] = node.attrs.get('alpha', 0.2) + self.operatorRepresentation['beta'] = node.attrs.get('beta', 0.5) + return True + + +class HardSwishParser(UnaryElementWiseParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and node.op == 'HardSwish' diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index 9e257d0923..2976b8b0db 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -10,12 +10,12 @@ BasicClipBindings, BasicConcatBindings, BasicConv1DBindings, BasicConv2DBindings, BasicConvTransposeBindings, \ BasicDebugPrintBindings, BasicDequantBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBindings, \ BasicExpBindings, BasicFloorBindings, BasicGatherBindings, BasicGELUBindings, BasicGEMMBindings, \ - BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, BasicLayerNormBindings, BasicMatMulBindings, \ - BasicMaxPool1DBindings, BasicMaxPool2DBindings, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, \ - BasicPowBindings, BasicQuantBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReluBinding, \ - BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSigmoidBindings, \ - BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, BasicSubBindings, BasicSwishBindings, \ - BasicTransposeBindings, DummyBinding + BasicHardSigmoidBindings, BasicHardSwishBindings, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \ + BasicLayerNormBindings, BasicMatMulBindings, BasicMaxPool1DBindings, BasicMaxPool2DBindings, BasicMulBindings, \ + BasicPad1DBindings, BasicPad2DBindings, BasicPowBindings, BasicQuantBindings, BasicReduceMeanBindings, \ + BasicReduceSumBindings, BasicReluBinding, BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, \ + BasicRQSGELUBinding, BasicSigmoidBindings, BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, \ + BasicSubBindings, BasicSwishBindings, BasicTransposeBindings, DummyBinding from Deeploy.Targets.Generic.Layers import AddLayer, BatchNormalizationLayer, CeilLayer, ClipLayer, ConcatLayer, \ ConvLayer, ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, ExpLayer, FloorLayer, GatherLayer, \ GELULayer, GEMMLayer, ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, \ @@ -24,11 +24,11 @@ from Deeploy.Targets.Generic.Parsers import AddParser, BatchNormParser, CeilParser, ClipParser, ConcatParser, \ ConvTranspose1DParser, DebugParser, DequantParser, DivParser, DummyParser, ExpParser, FlattenParser, FloorParser, \ GatherParser, GELUParser, GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, \ - GenericGEMMParser, GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, \ - MatMulParser, MaxPool1DParser, MulParser, Pad1DParser, Pad2DParser, PowParser, QuantParser, ReduceMeanParser, \ - ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SigmoidParser, \ - SliceParser, SoftmaxParser, SqrtParser, SubParser, SwishParser, TransposeParser, UnsqueezeParser, \ - iLayerNormParser, iSoftmaxParser + GenericGEMMParser, GenericMaxPool2DParser, HardSigmoidParser, HardSwishParser, IntegerDivParser, ITAMaxParser, \ + ITAPartialMaxParser, LayerNormParser, MatMulParser, MaxPool1DParser, MulParser, Pad1DParser, Pad2DParser, \ + PowParser, QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, \ + RQIntegerDivParser, RQSiGELUParser, SigmoidParser, SliceParser, SoftmaxParser, SqrtParser, SubParser, SwishParser, \ + TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate, FreeTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, ExtractPaddingFromConvPass, \ ExtractPaddingFromPoolPass, MatMulAddMergePass, MergeConstAddAndRequantPass, QuantPatternPass, \ @@ -82,6 +82,8 @@ ExpMapper = NodeMapper(ExpParser(), BasicExpBindings) SigmoidMapper = NodeMapper(SigmoidParser(), BasicSigmoidBindings) SwishMapper = NodeMapper(SwishParser(), BasicSwishBindings) +HardSigmoidMapper = NodeMapper(HardSigmoidParser(), BasicHardSigmoidBindings) +HardSwishMapper = NodeMapper(HardSwishParser(), BasicHardSwishBindings) # Dummy nodes are intended for development purposes only! # They should always generate compiler errors to not accidentally end up in production code @@ -135,6 +137,8 @@ 'Exp': ExpLayer([ExpMapper]), 'Sigmoid': SigmoidLayer([SigmoidMapper]), 'Swish': SwishLayer([SwishMapper]), + 'HardSigmoid': SigmoidLayer([HardSigmoidMapper]), + 'HardSwish': SwishLayer([HardSwishMapper]), # # For example, you can use the DummpyMapper, in case you want to test # # deployment or optimizations with GlobalAveragePool nodes but did not yet # # implement the corresponding kernel diff --git a/Deeploy/Targets/Generic/Templates/FloatHardSigmoidTemplate.py b/Deeploy/Targets/Generic/Templates/FloatHardSigmoidTemplate.py new file mode 100644 index 0000000000..135f168c3f --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatHardSigmoidTemplate.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _hardSigmoidTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['size'] = int(np.prod(data_in.shape)) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _hardSigmoidTemplate(""" +// HardSigmoid (Name: ${nodeName}, Op: ${nodeOp}) +HardSigmoid_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${alpha}, ${beta}, ${size}); +""") diff --git a/Deeploy/Targets/Generic/Templates/FloatHardSwishTemplate.py b/Deeploy/Targets/Generic/Templates/FloatHardSwishTemplate.py new file mode 100644 index 0000000000..6ff5c11c77 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatHardSwishTemplate.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _hardSwishTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['size'] = int(np.prod(data_in.shape)) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _hardSwishTemplate(""" +// HardSwish (Name: ${nodeName}, Op: ${nodeOp}) +HardSwish_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${size}); +""") diff --git a/DeeployTest/Tests/Kernels/FP32/HardSigmoid/inputs.npz b/DeeployTest/Tests/Kernels/FP32/HardSigmoid/inputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..ac58fc00e29cdc6487f6248edf903d0dedaa1f12 GIT binary patch literal 776 zcmbV~e=O8-7{|X~7lrbpI$Jl{k?qhiBFwMv=Oc;T#ICYk$1)pd?8wr70~{Ay6pQxvU_9FNn$u5eWH$ zyb`vQl@}i=VHHYi9xpFaD=oSDm|Uf_{60ZKez=ey6zVJFpW^?Q9A1BEN9;%neN$YZCb>}GQ;Cs&31vh!LrwcV=T?&X<05JKJOB+_yhx%) zBD$O4N|wLL$#-2L4m{%m2__-ax0Fl9gYUyma~;#~6Hc;O6-o^O;Ppnp*kypO06q2# zbuf%a`jQiE96T!7M?!mSk>j)LM0YlgoHm=8faH0!ES+L*ZPXjz3?hhaC?cKGS*Xo# zgvYylanVN^a&fF9^99N1gIWuvHzuHF#Pyls#-Rp2X4!nSS2YGsqI|T%)#AD14-I1(bWz0tHj#;DS@k~pX}lm+sbJVi1r30CXB8hX4Qo literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/FP32/HardSigmoid/outputs.npz b/DeeployTest/Tests/Kernels/FP32/HardSigmoid/outputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..2e63fd2da17c3b2a01df0ce0b06e294d0dc10b9d GIT binary patch literal 778 zcmWIWW@gc4fB;2?ve5Oy|Dk}3L4+Z{w4|W4L@%$Pl954xp@9ja4n|M*3-t|%WMn8~ zs8&x&Elw^{w^C5INi$K`QBY6IFDfZY%!|)2N=XHYyCvonrvk-`GZG6@fqV@^9R&*= z1w%7U9fev2vH%wYLxcT>t0{IM8iY?=V6-*(%xDYaLtP7$gMg|>)o?aU45Ajs1Da9b zX3IBO39jzODGMkYWCt>6us^B$31~M6K;(2jWFgd?P64tT?5FJwx38bv2~`JC4Iv?J zMzCRWFmYu2U}6jmR!5nU%z}vja@qzD1B7`E_Spfqq2eIX2K)SpI$-y?{?-H2D~c-6 z!V29D5PfP}48eYZ$s%Zo{nqoC>}8G4vI?v literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/FP32/HardSwish/inputs.npz b/DeeployTest/Tests/Kernels/FP32/HardSwish/inputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..ac58fc00e29cdc6487f6248edf903d0dedaa1f12 GIT binary patch literal 776 zcmbV~e=O8-7{|X~7lrbpI$Jl{k?qhiBFwMv=Oc;T#ICYk$1)pd?8wr70~{Ay6pQxvU_9FNn$u5eWH$ zyb`vQl@}i=VHHYi9xpFaD=oSDm|Uf_{60ZKez=ey6zVJFpW^?Q9A1BEN9;%neN$YZCb>}GQ;Cs&31vh!LrwcV=T?&X<05JKJOB+_yhx%) zBD$O4N|wLL$#-2L4m{%m2__-ax0Fl9gYUyma~;#~6Hc;O6-o^O;Ppnp*kypO06q2# zbuf%a`jQiE96T!7M?!mSk>j)LM0YlgoHm=8faH0!ES+L*ZPXjz3?hhaC?cKGS*Xo# zgvYylanVN^a&fF9^99N1gIWuvHzuHF#Pyls#-Rp2X4!nSS2YGsqI|T%)#AD14-I1(bWz0tHj#;DS@k~pX}lm+sbJ8bg75KfddOp_3o00*NG4;K>$BM@@{u|Sdx7t|;rEV524Tnqvd04`h`fB*mh literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/FP32/HardSwish/outputs.npz b/DeeployTest/Tests/Kernels/FP32/HardSwish/outputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..d46d07aefedeb895bcaa061eadb1e45dab9a8eae GIT binary patch literal 778 zcmWIWW@gc4fB;1X_O~lm{f7cB1`&q*(vpJG61}{FN=60&h6W~xIv73KFVr_6l98c| zp;|p9wK%y*-AX~-Ce1`$M?pO;zo?`rF)u#9C?ypn?v|KSoC*{#&PXgs1@bivbrdXg z6b#KYbrfn9$O2rKKh1Mcy1Z}SGNdzjt*14IsjeU&^m7| zmnvtNWLxx);P>p*O|R z`(HDdT{xTB5hS)}&m*u}5c|}s3-)^cZyZ8`*Vt>m+qw@VhKzS9YdYqjVYIqM~%L zIegyf4!=ap91YqO_nix`0_#&~WOjVEpWoh2V$p8tL`kqapH7~(FQ_-gp{pzhZWqYD z`(;9oQ`3ULX5F`2;_#_l#eVZ|DHJszb&I@g93-t}9MV~?+HCsRgyF}-FZb=c>D}lc z_H>%#g#8=rmuO2m9P#W2t2@y6eXpbHPCFz1qmEzJZ`jxJ*TF$TQx+`8cFW8$dfo?z zFvY)MH + +void HardSigmoid_fp32_fp32(float32_t *data_in, float32_t *data_out, + float32_t alpha, float32_t beta, int32_t size) { + for (int i = 0; i < size; i++) { + data_out[i] = fmaxf(0, fminf(1, alpha * data_in[i] + beta)); + } +} diff --git a/TargetLibraries/Generic/src/HardSwish_fp32.c b/TargetLibraries/Generic/src/HardSwish_fp32.c new file mode 100644 index 0000000000..41e4f424b4 --- /dev/null +++ b/TargetLibraries/Generic/src/HardSwish_fp32.c @@ -0,0 +1,16 @@ +/* + * SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" +#include + +void HardSwish_fp32_fp32(float32_t *data_in, float32_t *data_out, + int32_t size) { + for (int i = 0; i < size; i++) { + float32_t x = data_in[i]; + data_out[i] = x * fmaxf(0, fminf(1, x / 6 + 0.5)); + } +} From 6eaa19fee5a3c46bd353124c04b317caca5fa28c Mon Sep 17 00:00:00 2001 From: Alex Marchioni Date: Fri, 15 May 2026 08:30:43 +0000 Subject: [PATCH 10/16] in Swish parser make alpha an optional attribute --- Deeploy/Targets/Generic/Parsers.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 361ee35de3..400dad93f0 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -2930,13 +2930,11 @@ def parseNode(self, node: gs.Node) -> bool: class SwishParser(UnaryElementWiseParser): - # TODO: make alpha optional (as in ONNX standard) def parseNode(self, node: gs.Node) -> bool: - ret = all([super().parseNode(node), node.op == 'Swish', 'alpha' in node.attrs]) - if ret: - self.operatorRepresentation['alpha'] = node.attrs['alpha'] - return True - return False + if not (super().parseNode(node) and node.op == 'Swish'): + return False + self.operatorRepresentation['alpha'] = node.attrs.get('alpha', 1.0) + return True class HardSigmoidParser(UnaryElementWiseParser): From cf7d2ca6fcbc000a252e67a3d12d7b5a5429db0d Mon Sep 17 00:00:00 2001 From: Alex Marchioni Date: Fri, 15 May 2026 13:32:35 +0000 Subject: [PATCH 11/16] add support for Instance and Group Normalization operators for Generic target --- Deeploy/Targets/Generic/Bindings.py | 32 +++++++--- Deeploy/Targets/Generic/Layers.py | 8 +++ Deeploy/Targets/Generic/Parsers.py | 49 +++++++++++++++ Deeploy/Targets/Generic/Platform.py | 35 ++++++----- .../Templates/FloatGroupNormTemplate.py | 22 +++++++ .../Templates/FloatInstanceNormTemplate.py | 21 +++++++ .../Tests/Kernels/FP32/GroupNorm/inputs.npz | Bin 0 -> 776 bytes .../Tests/Kernels/FP32/GroupNorm/network.onnx | Bin 0 -> 288 bytes .../Tests/Kernels/FP32/GroupNorm/outputs.npz | Bin 0 -> 778 bytes .../Kernels/FP32/InstanceNorm/inputs.npz | Bin 0 -> 776 bytes .../Kernels/FP32/InstanceNorm/network.onnx | Bin 0 -> 272 bytes .../Kernels/FP32/InstanceNorm/outputs.npz | Bin 0 -> 778 bytes .../Generic/inc/DeeployBasicMath.h | 2 + .../Generic/inc/kernel/GroupNorm.h | 23 +++++++ .../Generic/inc/kernel/InstanceNorm.h | 22 +++++++ .../Generic/src/GroupNormalization_fp32.c | 58 ++++++++++++++++++ .../Generic/src/InstanceNormalization_fp32.c | 50 +++++++++++++++ 17 files changed, 300 insertions(+), 22 deletions(-) create mode 100644 Deeploy/Targets/Generic/Templates/FloatGroupNormTemplate.py create mode 100644 Deeploy/Targets/Generic/Templates/FloatInstanceNormTemplate.py create mode 100644 DeeployTest/Tests/Kernels/FP32/GroupNorm/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/GroupNorm/network.onnx create mode 100644 DeeployTest/Tests/Kernels/FP32/GroupNorm/outputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/InstanceNorm/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/InstanceNorm/network.onnx create mode 100644 DeeployTest/Tests/Kernels/FP32/InstanceNorm/outputs.npz create mode 100644 TargetLibraries/Generic/inc/kernel/GroupNorm.h create mode 100644 TargetLibraries/Generic/inc/kernel/InstanceNorm.h create mode 100644 TargetLibraries/Generic/src/GroupNormalization_fp32.c create mode 100644 TargetLibraries/Generic/src/InstanceNormalization_fp32.c diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index e81233fa31..b68295a76c 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -14,14 +14,14 @@ from Deeploy.Targets.Generic.Templates import AddTemplate, BatchNormalizationTemplate, ConcatTemplate, ConvTemplate, \ ConvTransposeTemplate, DebugPrintTemplate, DequantTemplate, DummyTemplate, DWConvTemplate, FloatAddTemplate, \ FloatCeilTemplate, FloatClipTemplate, FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatExpTemplate, \ - FloatFloorTemplate, FloatGELUTemplate, FloatGemmTemplate, FloatHardSigmoidTemplate, FloatHardSwishTemplate, \ - FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, \ - FloatPowTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSigmoidTemplate, FloatSoftmaxTemplate, \ - FloatSqrtTemplate, FloatSubTemplate, FloatSwishTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, \ - ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, \ - ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, \ - RQSiGELUTemplate, SliceTemplate, SubTemplate, TransposeTemplate, iGELUTemplate, iLayernormTemplate, \ - iRMSNormTemplate, iSoftmaxTemplate + FloatFloorTemplate, FloatGELUTemplate, FloatGemmTemplate, FloatGroupNormTemplate, FloatHardSigmoidTemplate, \ + FloatHardSwishTemplate, FloatInstanceNormTemplate, FloatLayernormTemplate, FloatMatMulTemplate, \ + FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, FloatPowTemplate, FloatReduceMeanTemplate, \ + FloatReluTemplate, FloatSigmoidTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, FloatSubTemplate, \ + FloatSwishTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, \ + MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \ + RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, SubTemplate, \ + TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate from Deeploy.Targets.Generic.TypeCheckers import AddChecker, BatchNormChecker, ConcatChecker, ConvChecker, \ DebugPrintChecker, DequantChecker, DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, \ LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceMeanChecker, \ @@ -383,3 +383,19 @@ NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), FloatHardSwishTemplate.referenceTemplate, BasicTransformer), ] + +BasicInstanceNormBindings = [ + NodeBinding( + DummyChecker( + [PointerClass(float32_t), PointerClass(float32_t), + PointerClass(float32_t)], [PointerClass(float32_t)]), FloatInstanceNormTemplate.referenceTemplate, + BasicTransformer), +] + +BasicGroupNormBindings = [ + NodeBinding( + DummyChecker( + [PointerClass(float32_t), PointerClass(float32_t), + PointerClass(float32_t)], [PointerClass(float32_t)]), FloatGroupNormTemplate.referenceTemplate, + BasicTransformer), +] diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index 36dfa161d4..0c420d6c4b 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -744,3 +744,11 @@ class HardSigmoidLayer(ONNXLayer): class HardSwishLayer(ONNXLayer): pass + + +class InstanceNormLayer(ONNXLayer): + pass + + +class GroupNormLayer(ONNXLayer): + pass diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 400dad93f0..536c217ae3 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -2951,3 +2951,52 @@ class HardSwishParser(UnaryElementWiseParser): def parseNode(self, node: gs.Node) -> bool: return super().parseNode(node) and node.op == 'HardSwish' + + +class NormalizationParser(NodeParser): + + def parseNode(self, node: gs.Node) -> bool: + if not all([ + len(node.inputs) == 3, + len(node.outputs) == 1, + ]): + return False + + self.operatorRepresentation['epsilon'] = node.attrs.get('epsilon', 1e-5) + + return True + + def parseNodeCtxt(self, + ctxt: NetworkContext, + node: gs.Node, + channels_first: bool = True) -> Tuple[NetworkContext, bool]: + data_in = ctxt.lookup(node.inputs[0].name) + self.operatorRepresentation['data_in'] = data_in.name + self.operatorRepresentation['scale'] = ctxt.lookup(node.inputs[1].name).name + self.operatorRepresentation['bias'] = ctxt.lookup(node.inputs[2].name).name + self.operatorRepresentation['data_in'] = data_in.name + self.operatorRepresentation['data_out'] = ctxt.lookup(node.outputs[0].name).name + self.operatorRepresentation['batch_size'] = data_in.shape[0] + self.operatorRepresentation['num_channels'] = data_in.shape[1] + self.operatorRepresentation['spatial'] = np.prod(data_in.shape[2:]) + return ctxt, True + + +class InstanceNormParser(NormalizationParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and node.op == 'InstanceNormalization' + + +class GroupNormParser(NormalizationParser): + + # TODO: attribute stash_type not handled + def parseNode(self, node: gs.Node) -> bool: + if not all([ + super().parseNode(node), + node.op == 'GroupNormalization', + 'num_groups' in node.attrs, + ]): + return False + self.operatorRepresentation['num_groups'] = node.attrs['num_groups'] + return True diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index 2976b8b0db..c314af6ad9 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -10,25 +10,28 @@ BasicClipBindings, BasicConcatBindings, BasicConv1DBindings, BasicConv2DBindings, BasicConvTransposeBindings, \ BasicDebugPrintBindings, BasicDequantBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBindings, \ BasicExpBindings, BasicFloorBindings, BasicGatherBindings, BasicGELUBindings, BasicGEMMBindings, \ - BasicHardSigmoidBindings, BasicHardSwishBindings, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \ - BasicLayerNormBindings, BasicMatMulBindings, BasicMaxPool1DBindings, BasicMaxPool2DBindings, BasicMulBindings, \ - BasicPad1DBindings, BasicPad2DBindings, BasicPowBindings, BasicQuantBindings, BasicReduceMeanBindings, \ - BasicReduceSumBindings, BasicReluBinding, BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, \ - BasicRQSGELUBinding, BasicSigmoidBindings, BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, \ - BasicSubBindings, BasicSwishBindings, BasicTransposeBindings, DummyBinding + BasicGroupNormBindings, BasicHardSigmoidBindings, BasicHardSwishBindings, BasicInstanceNormBindings, \ + BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, BasicLayerNormBindings, BasicMatMulBindings, \ + BasicMaxPool1DBindings, BasicMaxPool2DBindings, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, \ + BasicPowBindings, BasicQuantBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReluBinding, \ + BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSigmoidBindings, \ + BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, BasicSubBindings, BasicSwishBindings, \ + BasicTransposeBindings, DummyBinding from Deeploy.Targets.Generic.Layers import AddLayer, BatchNormalizationLayer, CeilLayer, ClipLayer, ConcatLayer, \ ConvLayer, ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, ExpLayer, FloorLayer, GatherLayer, \ - GELULayer, GEMMLayer, ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, \ - QuantLayer, ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, \ - RQSiGELULayer, SigmoidLayer, SliceLayer, SoftmaxLayer, SqrtLayer, SubLayer, SwishLayer, TransposeLayer + GELULayer, GEMMLayer, GroupNormLayer, InstanceNormLayer, ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, \ + MulLayer, PadLayer, PowLayer, QuantLayer, ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, \ + ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SigmoidLayer, SliceLayer, SoftmaxLayer, SqrtLayer, SubLayer, \ + SwishLayer, TransposeLayer from Deeploy.Targets.Generic.Parsers import AddParser, BatchNormParser, CeilParser, ClipParser, ConcatParser, \ ConvTranspose1DParser, DebugParser, DequantParser, DivParser, DummyParser, ExpParser, FlattenParser, FloorParser, \ GatherParser, GELUParser, GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, \ - GenericGEMMParser, GenericMaxPool2DParser, HardSigmoidParser, HardSwishParser, IntegerDivParser, ITAMaxParser, \ - ITAPartialMaxParser, LayerNormParser, MatMulParser, MaxPool1DParser, MulParser, Pad1DParser, Pad2DParser, \ - PowParser, QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, \ - RQIntegerDivParser, RQSiGELUParser, SigmoidParser, SliceParser, SoftmaxParser, SqrtParser, SubParser, SwishParser, \ - TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser + GenericGEMMParser, GenericMaxPool2DParser, GroupNormParser, HardSigmoidParser, HardSwishParser, \ + InstanceNormParser, IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, MatMulParser, \ + MaxPool1DParser, MulParser, Pad1DParser, Pad2DParser, PowParser, QuantParser, ReduceMeanParser, ReduceSumParser, \ + ReluParser, RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SigmoidParser, SliceParser, \ + SoftmaxParser, SqrtParser, SubParser, SwishParser, TransposeParser, UnsqueezeParser, iLayerNormParser, \ + iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate, FreeTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, ExtractPaddingFromConvPass, \ ExtractPaddingFromPoolPass, MatMulAddMergePass, MergeConstAddAndRequantPass, QuantPatternPass, \ @@ -84,6 +87,8 @@ SwishMapper = NodeMapper(SwishParser(), BasicSwishBindings) HardSigmoidMapper = NodeMapper(HardSigmoidParser(), BasicHardSigmoidBindings) HardSwishMapper = NodeMapper(HardSwishParser(), BasicHardSwishBindings) +InstanceNormMapper = NodeMapper(InstanceNormParser(), BasicInstanceNormBindings) +GroupNormMapper = NodeMapper(GroupNormParser(), BasicGroupNormBindings) # Dummy nodes are intended for development purposes only! # They should always generate compiler errors to not accidentally end up in production code @@ -139,6 +144,8 @@ 'Swish': SwishLayer([SwishMapper]), 'HardSigmoid': SigmoidLayer([HardSigmoidMapper]), 'HardSwish': SwishLayer([HardSwishMapper]), + 'InstanceNormalization': InstanceNormLayer([InstanceNormMapper]), + 'GroupNormalization': GroupNormLayer([GroupNormMapper]), # # For example, you can use the DummpyMapper, in case you want to test # # deployment or optimizations with GlobalAveragePool nodes but did not yet # # implement the corresponding kernel diff --git a/Deeploy/Targets/Generic/Templates/FloatGroupNormTemplate.py b/Deeploy/Targets/Generic/Templates/FloatGroupNormTemplate.py new file mode 100644 index 0000000000..9c42d8011c --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatGroupNormTemplate.py @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _GroupNormTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _GroupNormTemplate(""" +// Group Normalization (Name: ${nodeName}, Op: ${nodeOp}) +GroupNormalization_fp${type_width}_fp${type_width}( + ${data_in}, ${data_out}, ${scale}, ${bias}, + ${batch_size}, ${num_channels}, ${spatial}, ${num_groups}, ${epsilon}); +""") \ No newline at end of file diff --git a/Deeploy/Targets/Generic/Templates/FloatInstanceNormTemplate.py b/Deeploy/Targets/Generic/Templates/FloatInstanceNormTemplate.py new file mode 100644 index 0000000000..efcfce5f86 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatInstanceNormTemplate.py @@ -0,0 +1,21 @@ +# SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _InstanceNormTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _InstanceNormTemplate(""" +// Instance Normalization (Name: ${nodeName}, Op: ${nodeOp}) +InstanceNormalization_fp${type_width}_fp${type_width}( + ${data_in}, ${data_out}, ${scale}, ${bias}, ${batch_size}, ${num_channels}, ${spatial}, ${epsilon}); +""") \ No newline at end of file diff --git a/DeeployTest/Tests/Kernels/FP32/GroupNorm/inputs.npz b/DeeployTest/Tests/Kernels/FP32/GroupNorm/inputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..b80b42275ce9a9543893f7cff207a6d872804fb5 GIT binary patch literal 776 zcmbV~eJs>*9LIk@7lrarv2~Lj*$y2e;^yi1{YqjtIp*oQTrP*$#q~RGLUoGtFtQvm zMQv&{MIL6w?ml0w{%O;C68Yow2eYPTg_fIi7q#_o@6W?~pZ7k0zQiKVCO>Lj9+b0? zKeV%YqkFN#~Ti(CWs|8rJ>6CiX3$|%Mau6BXWcJ0vm1#6&UH3P^ApiWwFZiBO+fX?Q>0(=!V$C!J`!s~j?Nr}AgdIV;_8MMQ(Iu9 z3~{8baPwLQuFDV_2J3m4W%JN})fhO4^3aNl2G5-w(vPK=VY%)V;Z<_* z-uE&_mdQZITmh3$w-fstTOqktfv#wjxNqVv%t~ppaP2l2HQRAxZZcY|?gC>}2hvxS zGW{kl4(OZ)-+?5czLmjsp8_JOibSFvd{oXRMUHy9LQ~0c64QDSBxzDOlt~dTJ7$;^ zRU+4!CfrqGWE|dkp=n)|L8WaWF;Jr~4d62S9G~F{;j*D>6`_L1kMs_!VZ`P`2$r@j zpfrzFbV2J(O4uy?T9idnUksy55j|K|;ZEE_2SF^Nt^7Fu!fRpEGPWB?TTDw^_Ft66 RB5NB3t9G&epSq_|juX#pdXka&JxUIkDj zu^^+kv?x6_KM%su(&gX~V041m=B26eXE=T?7#KQ-S6}|!>%M?xjpltv-?*H*xE(OLwzG8 j#wEhRC?vqe#K8!}96-zh!bu8TP_N-q;l#qlARr0=gbq~$ literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/FP32/GroupNorm/outputs.npz b/DeeployTest/Tests/Kernels/FP32/GroupNorm/outputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..c1d73d6d6730222cdd98be07f53a0d468786219f GIT binary patch literal 778 zcmbV~Ye-XZ7{<@ERMaG6X|b@QiCeG@O_?U{{7Ki-&`n;|+%!?c*5Su9mal+1XXnNSkLWaG?| zGNoE6mKT;Oi>2;dATCu&UAamoR!ChpD~gTBvGEvJ?`7ks@&Ax-;L{iwT@irvWF$n2 zgCT=T1+HtBN4CRD9}Yazi=e+E8jfX&AosWk!geIU!$}?3wQ3j&&jvNNjVSJ7z=*kr zPu(6QN+UZUeYVy1X6magH_cA;SeB?|`Ld1YRYW8j!-1=_QnD9yHbcS#Dv3+Mfq4vV z>7tWIKB<7^h#=3!&hgE$I`ZTAK60|wpU9VGLjUT1!dS&cM5_S}nGTZL#VJJRTt!id zw_reE28UlSc$S-CR&yE7=5<1NXA_K8U4(MI2_l>pkPlpf4^lILp9xM#u0V5TI~+D$ z1CHi8yz&l)4!i|&7AfK#GacGi#wfa^65UjfQIzZsv^ctnB}PB|oK7K6S$0CeY8sKW zI|LaanfT${MT!=hkkx8MEZ!Qz)o0MXFGYw~QHxsgCQ$rq48a2}QR1)_P@XD~RYK@j!?^eH`nrHgI#?;Bw(sT~$)gb>8&gNBF&sCb-X zpQ)RG>e^{=7%4~@s3sm|4?{pSpBQPYB2&i;Ye2(rGTf;9_Dp1Xk# z1@(f~n@%#See4s48iYF~WMMr8+HaX464E?4p8wM8X45^s8%(>+Ky&ZEEQ^Gjz1#`S LRdFsK{LS?*<`g~n literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/FP32/InstanceNorm/inputs.npz b/DeeployTest/Tests/Kernels/FP32/InstanceNorm/inputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..b80b42275ce9a9543893f7cff207a6d872804fb5 GIT binary patch literal 776 zcmbV~eJs>*9LIk@7lrarv2~Lj*$y2e;^yi1{YqjtIp*oQTrP*$#q~RGLUoGtFtQvm zMQv&{MIL6w?ml0w{%O;C68Yow2eYPTg_fIi7q#_o@6W?~pZ7k0zQiKVCO>Lj9+b0? zKeV%YqkFN#~Ti(CWs|8rJ>6CiX3$|%Mau6BXWcJ0vm1#6&UH3P^ApiWwFZiBO+fX?Q>0(=!V$C!J`!s~j?Nr}AgdIV;_8MMQ(Iu9 z3~{8baPwLQuFDV_2J3m4W%JN})fhO4^3aNl2G5-w(vPK=VY%)V;Z<_* z-uE&_mdQZITmh3$w-fstTOqktfv#wjxNqVv%t~ppaP2l2HQRAxZZcY|?gC>}2hvxS zGW{kl4(OZ)-+?5czLmjsp8_JOibSFvd{oXRMUHy9LQ~0c64QDSBxzDOlt~dTJ7$;^ zRU+4!CfrqGWE|dkp=n)|L8WaWF;Jr~4d62S9G~F{;j*D>6`_L1kMs_!VZ`P`2$r@j zpfrzFbV2J(O4uy?T9idnUksy55j|K|;ZEE_2SF^Nt^7Fu!fRpEGPWB?TTDw^_Ft66 RB5NB6eXE=T?7#KQ-S6}|!>%M?xjpltv-?*H*xE(OLtQT<#wEhRC?vqe#K8!} X96-zh!bu8TP_N)p;l#qlAixa(nI=%F literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/FP32/InstanceNorm/outputs.npz b/DeeployTest/Tests/Kernels/FP32/InstanceNorm/outputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..ace60623d022cf7d2497c70176aaeef257757cba GIT binary patch literal 778 zcmbV~TS${}7{W@zY~l1_2v0jJyAzW*nw7uj^kVw18OI@1oD%uUp3u0m5R zbw#omqN$mrDH3%N`n|i*wLzkIy0FA%R31_+!)V29OC!4Nc@Gco!+ZIYWYD}~upNbB za(fkTdslRffvI)cVx9J=s<@nj9l#9KoxeYRH}jNOl0(5tF+Dq9S(2w=3s`JnK_Z*O zVi%}2T8%<=POZsTdh&F|B_h$u`mn* z-o}X>qKEj@#}1Iz_mZdWE;88KgST_TNa1a75QHb;BdlnUq*KZ6DHF-;rH~;q5~}O- za4(w=;WT$aUuz&i2$IRg6%~;~jUZC@hmzCrM!?qI4L@d1;C|0UXf!vO=xzYS8N7i^ z);U;3Ch&jX4}Cfl#Q3*E8m|*l&)$PGZ<@inZUo0x7bsKPU`1hqEv*p_2Rwj+jaK+P z^9WMLdVteDM%M8!LiU$=T;^oJ1{xI>RFxwv=%sLz(EvT#SNNfmjc{v#OBf<2NrSHu zCuYM@f~^sstn?*fuii&{jW#sr0%AYoHc{|Mgngg4PFMI#On+eug&K1dQBo$#D804E$Wn9$oH&9%>R z>_~;>X0;Rg#57A# + +void GroupNormalization_fp32_fp32( + const float32_t *__restrict__ src, float32_t *__restrict__ dst, + const float32_t *__restrict__ scale, const float32_t *__restrict__ bias, + uint32_t batch_size, uint32_t num_channels, + uint32_t spatial, // spatial dimension (L or H*W or D*H*W, etc.) + uint32_t num_groups, float32_t epsilon) { + + uint32_t channels_per_group = num_channels / num_groups; + uint32_t group_elements = channels_per_group * spatial; + uint32_t slice = num_channels * spatial; // elements per batch + + for (uint32_t n = 0; n < batch_size; ++n) { + for (uint32_t g = 0; g < num_groups; ++g) { + uint32_t group_offset = n * slice + g * group_elements; + const float32_t *x_group = src + group_offset; + + /* --- mean --- */ + float64_t sum = 0.0; + for (uint32_t i = 0; i < group_elements; ++i) { + sum += x_group[i]; + } + float64_t mean = sum / (float32_t)group_elements; + + /* --- variance --- */ + float64_t var = 0.0; + for (uint32_t i = 0; i < group_elements; ++i) { + float64_t d = (float64_t)x_group[i] - mean; + var += d * d; + } + var /= (float64_t)group_elements; + + /* --- normalize + affine --- */ + float32_t inv_std = (float32_t)(1.0 / sqrt(var + (float64_t)epsilon)); + float32_t m = (float32_t)mean; + + for (uint32_t lc = 0; lc < channels_per_group; ++lc) { + const float32_t *x_channel = x_group + lc * spatial; + float32_t *y_channel = dst + group_offset + lc * spatial; + uint32_t c = g * channels_per_group + lc; // global channel + float32_t s = scale[c]; + float32_t b = bias[c]; + + for (uint32_t i = 0; i < spatial; ++i) { + y_channel[i] = s * (x_channel[i] - m) * inv_std + b; + } + } + } + } +} diff --git a/TargetLibraries/Generic/src/InstanceNormalization_fp32.c b/TargetLibraries/Generic/src/InstanceNormalization_fp32.c new file mode 100644 index 0000000000..ac3c5269b9 --- /dev/null +++ b/TargetLibraries/Generic/src/InstanceNormalization_fp32.c @@ -0,0 +1,50 @@ +/* + * SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" +#include + +void InstanceNormalization_fp32_fp32( + const float32_t *__restrict__ src, float32_t *__restrict__ dst, + const float32_t *__restrict__ scale, const float32_t *__restrict__ bias, + uint32_t batch_size, uint32_t num_channels, + uint32_t spatial, // spatial dimension (L or H*W or D*H*W, etc.) + float32_t epsilon) { + + uint32_t slice = num_channels * spatial; // elements per batch + + for (uint32_t n = 0; n < batch_size; ++n) { + for (uint32_t c = 0; c < num_channels; ++c) { + uint32_t channel_offset = n * slice + c * spatial; + const float32_t *x = src + channel_offset; + float32_t *y = dst + channel_offset; + + /* --- mean --- */ + float64_t sum = 0.0; + for (uint32_t i = 0; i < spatial; ++i) + sum += x[i]; + float64_t mean = sum / (float32_t)spatial; + + /* --- variance --- */ + float64_t var = 0.0; + for (uint32_t i = 0; i < spatial; ++i) { + float64_t d = (float64_t)x[i] - mean; + var += d * d; + } + var /= (float64_t)spatial; + + /* --- normalize + affine --- */ + float32_t inv_std = (float32_t)(1.0 / sqrt(var + (float64_t)epsilon)); + float32_t g = scale[c]; + float32_t b = bias[c]; + float32_t m = (float32_t)mean; + + for (size_t i = 0; i < spatial; ++i) { + y[i] = g * (x[i] - m) * inv_std + b; + } + } + } +} From f83550f6d9a0f7e4d8198c9c99325dd9cf1931d2 Mon Sep 17 00:00:00 2001 From: Alex Marchioni Date: Fri, 15 May 2026 21:08:36 +0000 Subject: [PATCH 12/16] add support for AveragePool, GlobalAveragePool, and GlobalMaxPool operators for Generic target --- Deeploy/Targets/Generic/Bindings.py | 25 +++- Deeploy/Targets/Generic/Layers.py | 12 ++ Deeploy/Targets/Generic/Parsers.py | 117 ++++++++++++++++++ Deeploy/Targets/Generic/Platform.py | 46 ++++--- .../Templates/FloatAveragePoolTemplate.py | 31 +++++ .../FloatGlobalAveragePoolTemplate.py | 22 ++++ .../Templates/FloatGlobalMaxPoolTemplate.py | 22 ++++ .../FP32/AveragePool/Regular_1D/inputs.npz | Bin 0 -> 776 bytes .../FP32/AveragePool/Regular_1D/network.onnx | Bin 0 -> 181 bytes .../FP32/AveragePool/Regular_1D/outputs.npz | Bin 0 -> 746 bytes .../FP32/AveragePool/Regular_2D/inputs.npz | Bin 0 -> 776 bytes .../FP32/AveragePool/Regular_2D/network.onnx | Bin 0 -> 197 bytes .../FP32/AveragePool/Regular_2D/outputs.npz | Bin 0 -> 554 bytes .../Kernels/FP32/GlobalAveragePool/inputs.npz | Bin 0 -> 776 bytes .../FP32/GlobalAveragePool/network.onnx | Bin 0 -> 143 bytes .../FP32/GlobalAveragePool/outputs.npz | Bin 0 -> 298 bytes .../Kernels/FP32/GlobalMaxPool/inputs.npz | Bin 0 -> 776 bytes .../Kernels/FP32/GlobalMaxPool/network.onnx | Bin 0 -> 138 bytes .../Kernels/FP32/GlobalMaxPool/outputs.npz | Bin 0 -> 298 bytes DeeployTest/test_generic_config.py | 11 ++ .../Generic/inc/DeeployBasicMath.h | 3 + .../Generic/inc/kernel/AveragePool.h | 29 +++++ .../Generic/inc/kernel/GlobalAveragePool.h | 19 +++ .../Generic/inc/kernel/GlobalMaxPool.h | 19 +++ .../Generic/src/AveragePool_fp32.c | 85 +++++++++++++ .../Generic/src/GlobalAveragePool_fp32.c | 25 ++++ .../Generic/src/GlobalMaxPool_fp32.c | 29 +++++ 27 files changed, 474 insertions(+), 21 deletions(-) create mode 100644 Deeploy/Targets/Generic/Templates/FloatAveragePoolTemplate.py create mode 100644 Deeploy/Targets/Generic/Templates/FloatGlobalAveragePoolTemplate.py create mode 100644 Deeploy/Targets/Generic/Templates/FloatGlobalMaxPoolTemplate.py create mode 100644 DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_1D/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_1D/network.onnx create mode 100644 DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_1D/outputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_2D/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_2D/network.onnx create mode 100644 DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_2D/outputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/GlobalAveragePool/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/GlobalAveragePool/network.onnx create mode 100644 DeeployTest/Tests/Kernels/FP32/GlobalAveragePool/outputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/GlobalMaxPool/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/FP32/GlobalMaxPool/network.onnx create mode 100644 DeeployTest/Tests/Kernels/FP32/GlobalMaxPool/outputs.npz create mode 100644 TargetLibraries/Generic/inc/kernel/AveragePool.h create mode 100644 TargetLibraries/Generic/inc/kernel/GlobalAveragePool.h create mode 100644 TargetLibraries/Generic/inc/kernel/GlobalMaxPool.h create mode 100644 TargetLibraries/Generic/src/AveragePool_fp32.c create mode 100644 TargetLibraries/Generic/src/GlobalAveragePool_fp32.c create mode 100644 TargetLibraries/Generic/src/GlobalMaxPool_fp32.c diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index b68295a76c..21cf01e52a 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -13,8 +13,9 @@ from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration from Deeploy.Targets.Generic.Templates import AddTemplate, BatchNormalizationTemplate, ConcatTemplate, ConvTemplate, \ ConvTransposeTemplate, DebugPrintTemplate, DequantTemplate, DummyTemplate, DWConvTemplate, FloatAddTemplate, \ - FloatCeilTemplate, FloatClipTemplate, FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatExpTemplate, \ - FloatFloorTemplate, FloatGELUTemplate, FloatGemmTemplate, FloatGroupNormTemplate, FloatHardSigmoidTemplate, \ + FloatAveragePoolTemplate, FloatCeilTemplate, FloatClipTemplate, FloatConvTemplate, FloatDivTemplate, \ + FloatDWConvTemplate, FloatExpTemplate, FloatFloorTemplate, FloatGELUTemplate, FloatGemmTemplate, \ + FloatGlobalAveragePoolTemplate, FloatGlobalMaxPoolTemplate, FloatGroupNormTemplate, FloatHardSigmoidTemplate, \ FloatHardSwishTemplate, FloatInstanceNormTemplate, FloatLayernormTemplate, FloatMatMulTemplate, \ FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, FloatPowTemplate, FloatReduceMeanTemplate, \ FloatReluTemplate, FloatSigmoidTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, FloatSubTemplate, \ @@ -399,3 +400,23 @@ PointerClass(float32_t)], [PointerClass(float32_t)]), FloatGroupNormTemplate.referenceTemplate, BasicTransformer), ] + +BasicAveragePool1DBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatAveragePoolTemplate.referenceTemplate1d, BasicTransformer) +] + +BasicAveragePool2DBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatAveragePoolTemplate.referenceTemplate2d, BasicTransformer) +] + +BasicGlobalAveragePoolBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatGlobalAveragePoolTemplate.referenceTemplate, BasicTransformer) +] + +BasicGlobalMaxPoolBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatGlobalMaxPoolTemplate.referenceTemplate, BasicTransformer) +] diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index 0c420d6c4b..e625ad7bc5 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -752,3 +752,15 @@ class InstanceNormLayer(ONNXLayer): class GroupNormLayer(ONNXLayer): pass + + +class AveragePoolLayer(ONNXLayer): + pass + + +class GlobalAveragePoolLayer(ONNXLayer): + pass + + +class GlobalMaxPoolLayer(ONNXLayer): + pass diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 536c217ae3..fd56be4a0a 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -3000,3 +3000,120 @@ def parseNode(self, node: gs.Node) -> bool: return False self.operatorRepresentation['num_groups'] = node.attrs['num_groups'] return True + + +class AveragePoolParser(NodeParser): + + def parseNode(self, node: gs.Node) -> bool: + + if not all([ + node.op == 'AveragePool', + len(node.inputs) == 1, + len(node.outputs) == 1, + 'kernel_shape' in node.attrs, + ]): + return False + + kernel_shape = node.attrs['kernel_shape'] + spatial_ndim = len(kernel_shape) + + auto_pad = node.attrs.get('auto_pad', 'NOTSET') + ceil_mode = node.attrs.get('ceil_mode', 0) + count_include_pad = node.attrs.get('count_include_pad ', 0) + dilations = node.attrs.get('dilations', (1,) * spatial_ndim) + strides = node.attrs.get('strides', (1,) * spatial_ndim) + pads = node.attrs.get('pads', (0,) * (2 * spatial_ndim)) + + if not all([ + auto_pad == 'NOTSET', # TODO: implement other values + ceil_mode == 0, # TODO: implement other values + count_include_pad == 0, # TODO: implement other values + all([d == 1 for d in dilations]), # TODO: implement other values + len(dilations) == spatial_ndim, + len(strides) == spatial_ndim, + len(pads) == 2 * spatial_ndim, + all([s > 0 for s in strides]), + ]): + return False + + self.operatorRepresentation['kernel_shape'] = kernel_shape + self.operatorRepresentation['auto_pad'] = auto_pad + self.operatorRepresentation['ceil_mode'] = ceil_mode + self.operatorRepresentation['count_include_pad'] = count_include_pad + self.operatorRepresentation['dilations'] = dilations + self.operatorRepresentation['strides'] = strides + self.operatorRepresentation['pads'] = pads + + return True + + def parseNodeCtxt(self, + ctxt: NetworkContext, + node: gs.Node, + channels_first: bool = True) -> Tuple[NetworkContext, bool]: + + data_in = ctxt.lookup(node.inputs[0].name) + data_out = ctxt.lookup(node.outputs[0].name) + self.operatorRepresentation['data_in'] = data_in.name + self.operatorRepresentation['data_out'] = data_out.name + + self.operatorRepresentation['batch_size'] = data_in.shape[0] + self.operatorRepresentation['num_channels'] = data_in.shape[1] + + spatial_shape = data_in.shape[2:] + if len(self.operatorRepresentation['kernel_shape']) != len(spatial_shape): + return ctxt, False + + if len(spatial_shape) == 1: + self.operatorRepresentation['length'] = spatial_shape[0] + elif len(spatial_shape) == 2: + self.operatorRepresentation['height'] = spatial_shape[0] + self.operatorRepresentation['width'] = spatial_shape[1] + else: + return ctxt, False + + return ctxt, True + + +class AveragePool1DParser(AveragePoolParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and len(node.attrs['kernel_shape']) == 1 + + +class AveragePool2DParser(AveragePoolParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and len(node.attrs['kernel_shape']) == 2 + + +class GlobalPoolParser(NodeParser): + + def parseNode(self, node: gs.Node) -> bool: + return len(node.inputs) == 1 and len(node.outputs) == 1 + + def parseNodeCtxt(self, + ctxt: NetworkContext, + node: gs.Node, + channels_first: bool = True) -> Tuple[NetworkContext, bool]: + + data_in = ctxt.lookup(node.inputs[0].name) + data_out = ctxt.lookup(node.outputs[0].name) + self.operatorRepresentation['data_in'] = data_in.name + self.operatorRepresentation['data_out'] = data_out.name + self.operatorRepresentation['batch_size'] = data_in.shape[0] + self.operatorRepresentation['num_channels'] = data_in.shape[1] + self.operatorRepresentation['spatial_size'] = np.prod(data_in.shape[2:]) + + return ctxt, True + + +class GlobalAveragePoolParser(GlobalPoolParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and node.op == 'GlobalAveragePool' + + +class GlobalMaxPoolParser(GlobalPoolParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and node.op == 'GlobalMaxPool' diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index c314af6ad9..2aa1ef1c38 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -6,10 +6,11 @@ RemoveEmptyConvBiasPass, RemoveOnlySingletonReduceMeanPass from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NodeMapper, NodeTemplate, \ StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer -from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicBatchNormBindings, BasicCeilBindings, \ - BasicClipBindings, BasicConcatBindings, BasicConv1DBindings, BasicConv2DBindings, BasicConvTransposeBindings, \ - BasicDebugPrintBindings, BasicDequantBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBindings, \ - BasicExpBindings, BasicFloorBindings, BasicGatherBindings, BasicGELUBindings, BasicGEMMBindings, \ +from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicAveragePool1DBindings, BasicAveragePool2DBindings, \ + BasicBatchNormBindings, BasicCeilBindings, BasicClipBindings, BasicConcatBindings, BasicConv1DBindings, \ + BasicConv2DBindings, BasicConvTransposeBindings, BasicDebugPrintBindings, BasicDequantBindings, BasicDivBindings, \ + BasicDWConv1DBinding, BasicDWConv2DBindings, BasicExpBindings, BasicFloorBindings, BasicGatherBindings, \ + BasicGELUBindings, BasicGEMMBindings, BasicGlobalAveragePoolBindings, BasicGlobalMaxPoolBindings, \ BasicGroupNormBindings, BasicHardSigmoidBindings, BasicHardSwishBindings, BasicInstanceNormBindings, \ BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, BasicLayerNormBindings, BasicMatMulBindings, \ BasicMaxPool1DBindings, BasicMaxPool2DBindings, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, \ @@ -17,21 +18,21 @@ BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSigmoidBindings, \ BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, BasicSubBindings, BasicSwishBindings, \ BasicTransposeBindings, DummyBinding -from Deeploy.Targets.Generic.Layers import AddLayer, BatchNormalizationLayer, CeilLayer, ClipLayer, ConcatLayer, \ - ConvLayer, ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, ExpLayer, FloorLayer, GatherLayer, \ - GELULayer, GEMMLayer, GroupNormLayer, InstanceNormLayer, ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, \ - MulLayer, PadLayer, PowLayer, QuantLayer, ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, \ - ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SigmoidLayer, SliceLayer, SoftmaxLayer, SqrtLayer, SubLayer, \ - SwishLayer, TransposeLayer -from Deeploy.Targets.Generic.Parsers import AddParser, BatchNormParser, CeilParser, ClipParser, ConcatParser, \ - ConvTranspose1DParser, DebugParser, DequantParser, DivParser, DummyParser, ExpParser, FlattenParser, FloorParser, \ - GatherParser, GELUParser, GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, \ - GenericGEMMParser, GenericMaxPool2DParser, GroupNormParser, HardSigmoidParser, HardSwishParser, \ - InstanceNormParser, IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, MatMulParser, \ - MaxPool1DParser, MulParser, Pad1DParser, Pad2DParser, PowParser, QuantParser, ReduceMeanParser, ReduceSumParser, \ - ReluParser, RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SigmoidParser, SliceParser, \ - SoftmaxParser, SqrtParser, SubParser, SwishParser, TransposeParser, UnsqueezeParser, iLayerNormParser, \ - iSoftmaxParser +from Deeploy.Targets.Generic.Layers import AddLayer, AveragePoolLayer, BatchNormalizationLayer, CeilLayer, ClipLayer, \ + ConcatLayer, ConvLayer, ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, ExpLayer, FloorLayer, \ + GatherLayer, GELULayer, GEMMLayer, GlobalAveragePoolLayer, GlobalMaxPoolLayer, GroupNormLayer, InstanceNormLayer, \ + ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, QuantLayer, ReduceMeanLayer, \ + ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SigmoidLayer, \ + SliceLayer, SoftmaxLayer, SqrtLayer, SubLayer, SwishLayer, TransposeLayer +from Deeploy.Targets.Generic.Parsers import AddParser, AveragePool1DParser, AveragePool2DParser, BatchNormParser, \ + CeilParser, ClipParser, ConcatParser, ConvTranspose1DParser, DebugParser, DequantParser, DivParser, DummyParser, \ + ExpParser, FlattenParser, FloorParser, GatherParser, GELUParser, GenericConv1DParser, GenericConv2DParser, \ + GenericDWConv1DParser, GenericDWConv2DParser, GenericGEMMParser, GenericMaxPool2DParser, GlobalAveragePoolParser, \ + GlobalMaxPoolParser, GroupNormParser, HardSigmoidParser, HardSwishParser, InstanceNormParser, IntegerDivParser, \ + ITAMaxParser, ITAPartialMaxParser, LayerNormParser, MatMulParser, MaxPool1DParser, MulParser, Pad1DParser, \ + Pad2DParser, PowParser, QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, RequantShiftParser, \ + ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SigmoidParser, SliceParser, SoftmaxParser, SqrtParser, \ + SubParser, SwishParser, TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate, FreeTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, ExtractPaddingFromConvPass, \ ExtractPaddingFromPoolPass, MatMulAddMergePass, MergeConstAddAndRequantPass, QuantPatternPass, \ @@ -89,6 +90,10 @@ HardSwishMapper = NodeMapper(HardSwishParser(), BasicHardSwishBindings) InstanceNormMapper = NodeMapper(InstanceNormParser(), BasicInstanceNormBindings) GroupNormMapper = NodeMapper(GroupNormParser(), BasicGroupNormBindings) +AveragePool1DMapper = NodeMapper(AveragePool1DParser(), BasicAveragePool1DBindings) +AveragePool2DMapper = NodeMapper(AveragePool2DParser(), BasicAveragePool2DBindings) +GlobalAveragePoolMapper = NodeMapper(GlobalAveragePoolParser(), BasicGlobalAveragePoolBindings) +GlobalMaxPoolMapper = NodeMapper(GlobalMaxPoolParser(), BasicGlobalMaxPoolBindings) # Dummy nodes are intended for development purposes only! # They should always generate compiler errors to not accidentally end up in production code @@ -146,6 +151,9 @@ 'HardSwish': SwishLayer([HardSwishMapper]), 'InstanceNormalization': InstanceNormLayer([InstanceNormMapper]), 'GroupNormalization': GroupNormLayer([GroupNormMapper]), + 'AveragePool': AveragePoolLayer([AveragePool1DMapper, AveragePool2DMapper]), + 'GlobalAveragePool': GlobalAveragePoolLayer([GlobalAveragePoolMapper]), + 'GlobalMaxPool': GlobalMaxPoolLayer([GlobalMaxPoolMapper]), # # For example, you can use the DummpyMapper, in case you want to test # # deployment or optimizations with GlobalAveragePool nodes but did not yet # # implement the corresponding kernel diff --git a/Deeploy/Targets/Generic/Templates/FloatAveragePoolTemplate.py b/Deeploy/Targets/Generic/Templates/FloatAveragePoolTemplate.py new file mode 100644 index 0000000000..36519dacc2 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatAveragePoolTemplate.py @@ -0,0 +1,31 @@ +# SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _AveragePoolTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate1d = _AveragePoolTemplate(""" +// Average Pool 1D (Name: ${nodeName}, Op: ${nodeOp}) +AveragePool1d_fp${type_width}_fp${type_width}( + ${data_in}, ${data_out}, ${batch_size}, ${num_channels}, ${length}, ${kernel_shape[0]}, + ${strides[0]}, ${pads[0]}, ${pads[1]}); +""") + +referenceTemplate2d = _AveragePoolTemplate(""" +// Average Pool 2D (Name: ${nodeName}, Op: ${nodeOp}) +AveragePool2d_fp${type_width}_fp${type_width}( + ${data_in}, ${data_out}, ${batch_size}, ${num_channels}, ${height}, ${width}, + ${kernel_shape[0]}, ${kernel_shape[1]}, ${strides[0]}, ${strides[1]}, + ${pads[0]}, ${pads[1]}, ${pads[2]}, ${pads[3]}); +""") diff --git a/Deeploy/Targets/Generic/Templates/FloatGlobalAveragePoolTemplate.py b/Deeploy/Targets/Generic/Templates/FloatGlobalAveragePoolTemplate.py new file mode 100644 index 0000000000..519fd8e82b --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatGlobalAveragePoolTemplate.py @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _GlobalAveragePoolTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _GlobalAveragePoolTemplate(""" +// Global Average Pool 1D (Name: ${nodeName}, Op: ${nodeOp}) +GlobalAveragePool_fp${type_width}_fp${type_width}( + ${data_in}, ${data_out}, ${batch_size}, ${num_channels}, ${spatial_size}); +""") \ No newline at end of file diff --git a/Deeploy/Targets/Generic/Templates/FloatGlobalMaxPoolTemplate.py b/Deeploy/Targets/Generic/Templates/FloatGlobalMaxPoolTemplate.py new file mode 100644 index 0000000000..c41743a898 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatGlobalMaxPoolTemplate.py @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _GlobalMaxPoolTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _GlobalMaxPoolTemplate(""" +// Global Max Pool 1D (Name: ${nodeName}, Op: ${nodeOp}) +GlobalMaxPool_fp${type_width}_fp${type_width}( + ${data_in}, ${data_out}, ${batch_size}, ${num_channels}, ${spatial_size}); +""") \ No newline at end of file diff --git a/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_1D/inputs.npz b/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_1D/inputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..ac58fc00e29cdc6487f6248edf903d0dedaa1f12 GIT binary patch literal 776 zcmbV~e=O8-7{|X~7lrbpI$Jl{k?qhiBFwMv=Oc;T#ICYk$1)pd?8wr70~{Ay6pQxvU_9FNn$u5eWH$ zyb`vQl@}i=VHHYi9xpFaD=oSDm|Uf_{60ZKez=ey6zVJFpW^?Q9A1BEN9;%neN$YZCb>}GQ;Cs&31vh!LrwcV=T?&X<05JKJOB+_yhx%) zBD$O4N|wLL$#-2L4m{%m2__-ax0Fl9gYUyma~;#~6Hc;O6-o^O;Ppnp*kypO06q2# zbuf%a`jQiE96T!7M?!mSk>j)LM0YlgoHm=8faH0!ES+L*ZPXjz3?hhaC?cKGS*Xo# zgvYylanVN^a&fF9^99N1gIWuvHzuHF#Pyls#-Rp2X4!nSS2YGsqI|T%)#AD14-I1(bWz0tHj#;DS@k~pX}lm+sbJ58w1l~MvQvxlQgh;q zGZG6@9hep{vTN~iu@{#VWu~MSJ1~Oyyj&~=i7CYn3=RxHu8??sUS36fdQoCQMsaCT tdTM?igcBtVvr33dfP+zphl`1W5r{c}SRhG;3+ezNOj&*>7A^(>ZU8eBDjWa+ literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_1D/outputs.npz b/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_1D/outputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..ca18db8983e44e1c155551c86d1a062116442241 GIT binary patch literal 746 zcmWIWW@gc4fB;1XNw#fw{zCy5g9t-@X-Pq8iC$hoB_o3XLjn^-9gLpr7wQ`j$;eQ~ zP_3SlTAW;@Zl$1ZlV+l>qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=XCxM+0{I$-Itms# z3WlbdItsN4WC5<+jHmWoEXoc6D|8(bEzUVa+!8(DXLj8oe7CjZrH%XS3nX{j9hh~> z?%~QQ2L#Rf_s`hEdQh!>;=vz3%?|$Qjz3_)WOpE7f#pGi2_1I2iN_rF^m*;yqki+i zxpvuu{9CyW_+>6TU@B2@!0lWA{trj{4y^mwcOX;zgv0uj)d!|DPTTMD^56a~uNEF? z5#TwvthxE10Q1%Z?8~P*T$?n}Vbzj8yPz1>1AF554tCwsKRE5do&#_1+c~sKKR&S3 z`q_ai>T~vOdcE>M1^e=S3|Y*MwZCN?71q7B+qi%Gfr7ISZ6AtVbl7rD&Ov#9$N`}p z=MM0gSR4##&~@amI_sdrJ=Gy7_m;zt+^zd#rf%II|3%!fJL-c&{G2ZiUoNaTaPcz# z!O3|O9VRbt-|w-wWxv`g@q>MD^zGlD5pp!_n(1Kgapl0h#TO5pi)C?W5jgG;ytHBe zf=4U%mis$9{BK_6kTY+Uz0$!Y2c`z69ndTmIe5w2$U!u6mHm%9llCkBXFfRRq0qqs zw~_;QjkX`?(w=o-Y20jk9t(5F!aFLCAKkee*2HJoiY5Q#{u0p6@^AQ46&v;fj`!08qMD{Chu literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_2D/inputs.npz b/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_2D/inputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..b80b42275ce9a9543893f7cff207a6d872804fb5 GIT binary patch literal 776 zcmbV~eJs>*9LIk@7lrarv2~Lj*$y2e;^yi1{YqjtIp*oQTrP*$#q~RGLUoGtFtQvm zMQv&{MIL6w?ml0w{%O;C68Yow2eYPTg_fIi7q#_o@6W?~pZ7k0zQiKVCO>Lj9+b0? zKeV%YqkFN#~Ti(CWs|8rJ>6CiX3$|%Mau6BXWcJ0vm1#6&UH3P^ApiWwFZiBO+fX?Q>0(=!V$C!J`!s~j?Nr}AgdIV;_8MMQ(Iu9 z3~{8baPwLQuFDV_2J3m4W%JN})fhO4^3aNl2G5-w(vPK=VY%)V;Z<_* z-uE&_mdQZITmh3$w-fstTOqktfv#wjxNqVv%t~ppaP2l2HQRAxZZcY|?gC>}2hvxS zGW{kl4(OZ)-+?5czLmjsp8_JOibSFvd{oXRMUHy9LQ~0c64QDSBxzDOlt~dTJ7$;^ zRU+4!CfrqGWE|dkp=n)|L8WaWF;Jr~4d62S9G~F{;j*D>6`_L1kMs_!VZ`P`2$r@j zpfrzFbV2J(O4uy?T9idnUksy55j|K|;ZEE_2SF^Nt^7Fu!fRpEGPWB?TTDw^_Ft66 RB5NB58v_!dhvQvxlQgh;q zGZG6@9he-L7BI4F32?C&mlS2Dq!v3cIxvDn1i4rW5>tvD7$6WRBqW}nmsb&=UX)mn zQCwP-o|>Nr;Y7*9tQ6xC;b0UJ;9}xn1Y!;#W&zjOzo zHakT5Zg9wO4>_>tnzDn_sT~e$Q`8Ss2=we*cI3}~|G7Q~o=MK%FY_w;K;P@S0}p&k z99Y_f9PHdX>{p4~+5b2{*=~|qu!F*^%?^747C3CE3O;aS?o|6X%cnaOZNK6WRNn63 t`LW_afHxzPE;DNEg5nN@xu9_lq8h;Qiiqm~Z&o&tI3o~R0O>nmc>p5V!+`(* literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/FP32/GlobalAveragePool/inputs.npz b/DeeployTest/Tests/Kernels/FP32/GlobalAveragePool/inputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..b80b42275ce9a9543893f7cff207a6d872804fb5 GIT binary patch literal 776 zcmbV~eJs>*9LIk@7lrarv2~Lj*$y2e;^yi1{YqjtIp*oQTrP*$#q~RGLUoGtFtQvm zMQv&{MIL6w?ml0w{%O;C68Yow2eYPTg_fIi7q#_o@6W?~pZ7k0zQiKVCO>Lj9+b0? zKeV%YqkFN#~Ti(CWs|8rJ>6CiX3$|%Mau6BXWcJ0vm1#6&UH3P^ApiWwFZiBO+fX?Q>0(=!V$C!J`!s~j?Nr}AgdIV;_8MMQ(Iu9 z3~{8baPwLQuFDV_2J3m4W%JN})fhO4^3aNl2G5-w(vPK=VY%)V;Z<_* z-uE&_mdQZITmh3$w-fstTOqktfv#wjxNqVv%t~ppaP2l2HQRAxZZcY|?gC>}2hvxS zGW{kl4(OZ)-+?5czLmjsp8_JOibSFvd{oXRMUHy9LQ~0c64QDSBxzDOlt~dTJ7$;^ zRU+4!CfrqGWE|dkp=n)|L8WaWF;Jr~4d62S9G~F{;j*D>6`_L1kMs_!VZ`P`2$r@j zpfrzFbV2J(O4uy?T9idnUksy55j|K|;ZEE_2SF^Nt^7Fu!fRpEGPWB?TTDw^_Ft66 RB5NBg6=u_Nr^d*WvNAp>8Szv`8h)3`FVL2 z@##g01sTPqMd_*ec@R#NJWR6~mk0-=kN_7G2O|)305J;)Cn<12jTORF0aD56#KOfO GzzqPKq#oY@ literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/FP32/GlobalAveragePool/outputs.npz b/DeeployTest/Tests/Kernels/FP32/GlobalAveragePool/outputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..2b68d327d028da8d3a1e56506c17f5053f3f97da GIT binary patch literal 298 zcmWIWW@gc4fB;1Xrn-H<|3d*6g9t-@X-Pq8iC$hoB_o3X!vd%}7(Ll9)HfiKk)e#C zT0JGTIJrpONm*4g&yENl4!S literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/FP32/GlobalMaxPool/inputs.npz b/DeeployTest/Tests/Kernels/FP32/GlobalMaxPool/inputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..b80b42275ce9a9543893f7cff207a6d872804fb5 GIT binary patch literal 776 zcmbV~eJs>*9LIk@7lrarv2~Lj*$y2e;^yi1{YqjtIp*oQTrP*$#q~RGLUoGtFtQvm zMQv&{MIL6w?ml0w{%O;C68Yow2eYPTg_fIi7q#_o@6W?~pZ7k0zQiKVCO>Lj9+b0? zKeV%YqkFN#~Ti(CWs|8rJ>6CiX3$|%Mau6BXWcJ0vm1#6&UH3P^ApiWwFZiBO+fX?Q>0(=!V$C!J`!s~j?Nr}AgdIV;_8MMQ(Iu9 z3~{8baPwLQuFDV_2J3m4W%JN})fhO4^3aNl2G5-w(vPK=VY%)V;Z<_* z-uE&_mdQZITmh3$w-fstTOqktfv#wjxNqVv%t~ppaP2l2HQRAxZZcY|?gC>}2hvxS zGW{kl4(OZ)-+?5czLmjsp8_JOibSFvd{oXRMUHy9LQ~0c64QDSBxzDOlt~dTJ7$;^ zRU+4!CfrqGWE|dkp=n)|L8WaWF;Jr~4d62S9G~F{;j*D>6`_L1kMs_!VZ`P`2$r@j zpfrzFbV2J(O4uy?T9idnUksy55j|K|;ZEE_2SF^Nt^7Fu!fRpEGPWB?TTDw^_Ft66 RB5NB8bg75KfdlOrsc=2nVB(02dPnBM@@{F$)MMDR4oJ6v9*iQpxDV!o?uK4FJcJ B9X$X5 literal 0 HcmV?d00001 diff --git a/DeeployTest/Tests/Kernels/FP32/GlobalMaxPool/outputs.npz b/DeeployTest/Tests/Kernels/FP32/GlobalMaxPool/outputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..5c74873cb54adb61d1b35d8b2209fde826d18f55 GIT binary patch literal 298 zcmWIWW@gc4fB;2?y1y%*{)YlC1`&q*(vpJG61}{FN=60&h6PY{FnY3IsBb_dBSRTO zwR%cwadMHmm4doWnu)rOf_hqhQAtr^UVMI0N-9X)EitD!6)0YukywxlNu2F1*UIt1QP+?CCT|BmSd~0p5&Ey3DA) h1Gyc9xqui38W=$=gkJ-^S=m4$j6i4sr0;+@3;+^CM#%sG literal 0 HcmV?d00001 diff --git a/DeeployTest/test_generic_config.py b/DeeployTest/test_generic_config.py index 121d4e84d2..eaea3d6400 100644 --- a/DeeployTest/test_generic_config.py +++ b/DeeployTest/test_generic_config.py @@ -8,6 +8,8 @@ "Kernels/FP32/ReLU", "Kernels/FP32/Softmax/Regular", "Kernels/FP32/Add/Regular", + "Kernels/FP32/AveragePool/Regular_1D", + "Kernels/FP32/AveragePool/Regular_2D", "Kernels/FP32/Ceil", "Kernels/FP32/Clip", "Kernels/FP32/Conv/DW_2D_Bias", @@ -17,8 +19,15 @@ "Kernels/FP32/Conv/Regular_2D_NoBias", "Kernels/FP32/Conv/Regular_2D_ZeroValuedBias", "Kernels/FP32/Div", + "Kernels/FP32/Exp", "Kernels/FP32/Floor", "Kernels/FP32/GEMM/Regular", + "Kernels/FP32/GlobalAveragePool", + "Kernels/FP32/GlobalMaxPool", + "Kernels/FP32/GroupNorm", + "Kernels/FP32/HardSigmoid", + "Kernels/FP32/HardSwish", + "Kernels/FP32/InstanceNorm", "Kernels/FP32/MatMul", "Kernels/FP32/MaxPool/Regular_1D", "Kernels/FP32/MaxPool/Regular_2D", @@ -46,8 +55,10 @@ "Kernels/FP32/ReduceMean/NoKeepDims/Axis2", "Kernels/FP32/ReduceMean/NoKeepDims/ReduceMean_Add", "Kernels/FP32/Reshape/SkipConnection", + "Kernels/FP32/Sigmoid", "Kernels/FP32/Sqrt", "Kernels/FP32/Sub", + "Kernels/FP32/Swish", "Kernels/FP32/Transpose", # Integer Kernels "Kernels/Integer/Softmax/Regular", diff --git a/TargetLibraries/Generic/inc/DeeployBasicMath.h b/TargetLibraries/Generic/inc/DeeployBasicMath.h index 0aa4449321..2023b9e725 100644 --- a/TargetLibraries/Generic/inc/DeeployBasicMath.h +++ b/TargetLibraries/Generic/inc/DeeployBasicMath.h @@ -32,6 +32,7 @@ #include "types.h" #include "utils.h" +#include "kernel/AveragePool.h" #include "kernel/BatchNorm.h" #include "kernel/Ceil.h" #include "kernel/Clip.h" @@ -43,6 +44,8 @@ #include "kernel/Floor.h" #include "kernel/GELU.h" #include "kernel/Gemm.h" +#include "kernel/GlobalAveragePool.h" +#include "kernel/GlobalMaxPool.h" #include "kernel/GroupNorm.h" #include "kernel/HardSigmoid.h" #include "kernel/HardSwish.h" diff --git a/TargetLibraries/Generic/inc/kernel/AveragePool.h b/TargetLibraries/Generic/inc/kernel/AveragePool.h new file mode 100644 index 0000000000..2e0c786ffc --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/AveragePool.h @@ -0,0 +1,29 @@ +/* + * SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_AVERAGEPOOL_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_AVERAGEPOOL_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/******************************************************************************/ +/* Average Pool */ +/******************************************************************************/ +void AveragePool2d_fp32_fp32(float32_t const *__restrict__ src, + float32_t *__restrict__ dst, uint32_t N, + uint32_t C, uint32_t H, uint32_t W, + uint32_t kernel_h, uint32_t kernel_w, + uint32_t stride_h, uint32_t stride_w, + uint32_t pad_top, uint32_t pad_left, + uint32_t pad_bottom, uint32_t pad_right); + +void AveragePool1d_fp32_fp32(float32_t const *__restrict__ src, + float32_t *__restrict__ dst, uint32_t N, + uint32_t C, uint32_t L, uint32_t kernel_len, + uint32_t stride, uint32_t pad_left, + uint32_t pad_right); + +#endif //__DEEPLOY_BASIC_MATH_AVERAGEPOOL_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/GlobalAveragePool.h b/TargetLibraries/Generic/inc/kernel/GlobalAveragePool.h new file mode 100644 index 0000000000..a64484189e --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/GlobalAveragePool.h @@ -0,0 +1,19 @@ +/* + * SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_GLOBALAVERAGEPOOL_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_GLOBALAVERAGEPOOL_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/******************************************************************************/ +/* Average Pool */ +/******************************************************************************/ +void GlobalAveragePool_fp32_fp32(float32_t const *__restrict__ src, + float32_t *__restrict__ dst, uint32_t N, + uint32_t C, uint32_t spatial_size); + +#endif //__DEEPLOY_BASIC_MATH_GLOBALAVERAGEPOOL_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/GlobalMaxPool.h b/TargetLibraries/Generic/inc/kernel/GlobalMaxPool.h new file mode 100644 index 0000000000..030b26211e --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/GlobalMaxPool.h @@ -0,0 +1,19 @@ +/* + * SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_GLOBALAVERAGEPOOL_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_GLOBALAVERAGEPOOL_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/******************************************************************************/ +/* Average Pool */ +/******************************************************************************/ +void GlobalMaxPool_fp32_fp32(float32_t const *__restrict__ src, + float32_t *__restrict__ dst, uint32_t N, + uint32_t C, uint32_t spatial_size); + +#endif //__DEEPLOY_BASIC_MATH_GLOBALAVERAGEPOOL_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/src/AveragePool_fp32.c b/TargetLibraries/Generic/src/AveragePool_fp32.c new file mode 100644 index 0000000000..044200767a --- /dev/null +++ b/TargetLibraries/Generic/src/AveragePool_fp32.c @@ -0,0 +1,85 @@ +/* + * SPDX-FileCopyrightText: 2022 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" + +void AveragePool2d_fp32_fp32(float32_t const *__restrict__ src, + float32_t *__restrict__ dst, uint32_t N, + uint32_t C, uint32_t H, uint32_t W, + uint32_t kernel_h, uint32_t kernel_w, + uint32_t stride_h, uint32_t stride_w, + uint32_t pad_top, uint32_t pad_left, + uint32_t pad_bottom, uint32_t pad_right) { + + if (N == 0 || C == 0 || H < kernel_h || W < kernel_w || stride_h == 0 || + stride_w == 0) { + return; + } + + uint32_t H_out = (H + pad_top + pad_bottom - kernel_h) / stride_h + 1; + uint32_t W_out = (W + pad_left + pad_right - kernel_w) / stride_w + 1; + + for (uint32_t n = 0; n < N; ++n) { + for (uint32_t c = 0; c < C; ++c) { + for (uint32_t h_out = 0; h_out < H_out; h_out++) { + for (uint32_t w_out = 0; w_out < W_out; w_out++) { + + float32_t sum = 0.0f; + uint32_t count = 0; + + for (uint32_t kh = 0; kh < kernel_h; kh++) { + for (uint32_t kw = 0; kw < kernel_w; kw++) { + + uint32_t h_in = h_out * stride_h + kh - pad_top; + uint32_t w_in = w_out * stride_w + kw - pad_left; + + if (h_in >= 0 && h_in < H && w_in >= 0 && w_in < W) { + sum += src[((n * C + c) * H + h_in) * W + w_in]; + count++; + } + } + } + uint32_t idx = ((n * C + c) * H_out + h_out) * W_out + w_out; + dst[idx] = sum / (float32_t)count; + } + } + } + } +} + +void AveragePool1d_fp32_fp32(float32_t const *__restrict__ src, + float32_t *__restrict__ dst, uint32_t N, + uint32_t C, uint32_t L, uint32_t kernel_len, + uint32_t stride, uint32_t pad_left, + uint32_t pad_right) { + + if (N == 0 || C == 0 || L < kernel_len || stride == 0) { + return; + } + + uint32_t L_out = (L + pad_left + pad_right - kernel_len) / stride + 1; + + for (uint32_t n = 0; n < N; ++n) { + for (uint32_t c = 0; c < C; ++c) { + for (uint32_t l_out = 0; l_out < L_out; l_out++) { + + float32_t sum = 0.0f; + uint32_t count = 0; + + for (uint32_t k = 0; k < kernel_len; k++) { + + uint32_t l_in = l_out * stride + k - pad_left; + + if (l_in >= 0 && l_in < L) { + sum += src[(n * C + c) * L + l_in]; + count++; + } + } + dst[(n * C + c) * L_out + l_out] = sum / (float32_t)count; + } + } + } +} \ No newline at end of file diff --git a/TargetLibraries/Generic/src/GlobalAveragePool_fp32.c b/TargetLibraries/Generic/src/GlobalAveragePool_fp32.c new file mode 100644 index 0000000000..d7da38d459 --- /dev/null +++ b/TargetLibraries/Generic/src/GlobalAveragePool_fp32.c @@ -0,0 +1,25 @@ +/* + * SPDX-FileCopyrightText: 2022 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" + +void GlobalAveragePool_fp32_fp32(float32_t const *__restrict__ src, + float32_t *__restrict__ dst, uint32_t N, + uint32_t C, uint32_t spatial_size) { + + for (uint32_t n = 0; n < N; ++n) { + for (uint32_t c = 0; c < C; ++c) { + + float32_t sum = 0.0f; + const float32_t *x = src + (n * C + c) * spatial_size; + + for (uint32_t i = 0; i < spatial_size; ++i) { + sum += x[i]; + } + dst[n * C + c] = sum / spatial_size; + } + } +} \ No newline at end of file diff --git a/TargetLibraries/Generic/src/GlobalMaxPool_fp32.c b/TargetLibraries/Generic/src/GlobalMaxPool_fp32.c new file mode 100644 index 0000000000..db4fddac43 --- /dev/null +++ b/TargetLibraries/Generic/src/GlobalMaxPool_fp32.c @@ -0,0 +1,29 @@ +/* + * SPDX-FileCopyrightText: 2022 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" + +void GlobalMaxPool_fp32_fp32(float32_t const *__restrict__ src, + float32_t *__restrict__ dst, uint32_t N, + uint32_t C, uint32_t spatial_size) { + + for (uint32_t n = 0; n < N; n++) { + for (uint32_t c = 0; c < C; c++) { + + float32_t sum = 0.0f; + const float32_t *x = src + (n * C + c) * spatial_size; + + float32_t max = x[0]; + for (uint32_t i = 1; i < spatial_size; i++) { + if (x[i] > max) { + max = x[i]; + } + } + + dst[n * C + c] = max; + } + } +} \ No newline at end of file From aac6eb4fb79f993eda357144a7ffc237068f660c Mon Sep 17 00:00:00 2001 From: Alex Marchioni Date: Fri, 15 May 2026 21:37:31 +0000 Subject: [PATCH 13/16] add computation of operation to new supported layers --- Deeploy/Targets/Generic/Layers.py | 70 +++++++++++++++---- Deeploy/Targets/Generic/Parsers.py | 1 + .../Generic/src/AveragePool_fp32.c | 9 +-- 3 files changed, 63 insertions(+), 17 deletions(-) diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index e625ad7bc5..f1cecced94 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -715,52 +715,96 @@ def computeOps(self): class CeilLayer(ONNXLayer): - pass + + def computeOps(self): + return self.mapper.parser.operatorRepresentation['size'] class FloorLayer(ONNXLayer): - pass + + def computeOps(self): + return self.mapper.parser.operatorRepresentation['size'] class ClipLayer(ONNXLayer): - pass + + def computeOps(self): + return self.mapper.parser.operatorRepresentation['size'] * 2 # compare vs min and max class ExpLayer(ONNXLayer): - pass + + def computeOps(self): + return self.mapper.parser.operatorRepresentation['size'] class SigmoidLayer(ONNXLayer): - pass + + def computeOps(self): + # σ(x) = 1 / (1 + exp(-x)): neg, exp, add, div + return self.mapper.parser.operatorRepresentation['size'] * 4 class SwishLayer(ONNXLayer): - pass + + def computeOps(self): + # x * σ(x): 4 ops for sigmoid + 1 mul + return self.mapper.parser.operatorRepresentation['size'] * 5 class HardSigmoidLayer(ONNXLayer): - pass + + def computeOps(self): + # max(0, min(1, α·x + β)): mul, add, clip(min), clip(max) + return self.mapper.parser.operatorRepresentation['size'] * 4 class HardSwishLayer(ONNXLayer): - pass + + def computeOps(self): + # x * HardSigmoid(x): 4 ops for hard sigmoid + 1 mul + return self.mapper.parser.operatorRepresentation['size'] * 5 class InstanceNormLayer(ONNXLayer): - pass + + def computeOps(self): + # per element: mean-sum(1) + variance(sub+sq+add=3) + normalize(sub+div=2) + affine(mul+add=2) = 8 + # per (batch, channel): mean(div=1) + variance(sqrt+div=2) = 3 + opRep = self.mapper.parser.operatorRepresentation + B, C, S = int(opRep['batch_size']), int(opRep['num_channels']), int(opRep['spatial']) + return B * C * (S * 8 + 3) class GroupNormLayer(ONNXLayer): - pass + + def computeOps(self): + # same structure as InstanceNorm: 8 ops/element + 3 ops per (batch, channel) + opRep = self.mapper.parser.operatorRepresentation + B, C, S = int(opRep['batch_size']), int(opRep['num_channels']), int(opRep['spatial']) + return B * C * (S * 8 + 3) class AveragePoolLayer(ONNXLayer): - pass + + def computeOps(self): + opRep = self.mapper.parser.operatorRepresentation + kernel_elements = int(np.prod(opRep['kernel_shape'])) + # (kernel_elements - 1) additions + 1 division per output element + return opRep['data_out_size'] * kernel_elements class GlobalAveragePoolLayer(ONNXLayer): - pass + + def computeOps(self): + opRep = self.mapper.parser.operatorRepresentation + # (spatial_size - 1) additions + 1 division per output channel + return int(opRep['batch_size'] * opRep['num_channels'] * opRep['spatial_size']) class GlobalMaxPoolLayer(ONNXLayer): - pass + + def computeOps(self): + opRep = self.mapper.parser.operatorRepresentation + # (spatial_size - 1) comparisons per output channel + return int(opRep['batch_size'] * opRep['num_channels'] * (opRep['spatial_size'] - 1)) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index fd56be4a0a..fc43cbf0cd 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -3058,6 +3058,7 @@ def parseNodeCtxt(self, self.operatorRepresentation['batch_size'] = data_in.shape[0] self.operatorRepresentation['num_channels'] = data_in.shape[1] + self.operatorRepresentation['data_out_size'] = int(np.prod(data_out.shape)) spatial_shape = data_in.shape[2:] if len(self.operatorRepresentation['kernel_shape']) != len(spatial_shape): diff --git a/TargetLibraries/Generic/src/AveragePool_fp32.c b/TargetLibraries/Generic/src/AveragePool_fp32.c index 044200767a..9cfde2e8cb 100644 --- a/TargetLibraries/Generic/src/AveragePool_fp32.c +++ b/TargetLibraries/Generic/src/AveragePool_fp32.c @@ -14,8 +14,9 @@ void AveragePool2d_fp32_fp32(float32_t const *__restrict__ src, uint32_t pad_top, uint32_t pad_left, uint32_t pad_bottom, uint32_t pad_right) { - if (N == 0 || C == 0 || H < kernel_h || W < kernel_w || stride_h == 0 || - stride_w == 0) { + if (N == 0 || C == 0 || stride_h == 0 || stride_w == 0 || + (H + pad_top + pad_bottom) < kernel_h || + (W + pad_left + pad_right) < kernel_w) { return; } @@ -33,8 +34,8 @@ void AveragePool2d_fp32_fp32(float32_t const *__restrict__ src, for (uint32_t kh = 0; kh < kernel_h; kh++) { for (uint32_t kw = 0; kw < kernel_w; kw++) { - uint32_t h_in = h_out * stride_h + kh - pad_top; - uint32_t w_in = w_out * stride_w + kw - pad_left; + int32_t h_in = h_out * stride_h + kh - pad_top; + int32_t w_in = w_out * stride_w + kw - pad_left; if (h_in >= 0 && h_in < H && w_in >= 0 && w_in < W) { sum += src[((n * C + c) * H + h_in) * W + w_in]; From e0d0e73e38994d67b1122cd5af2154a9d0989b6f Mon Sep 17 00:00:00 2001 From: Alex Marchioni Date: Fri, 15 May 2026 21:44:34 +0000 Subject: [PATCH 14/16] fix minor type issue in AveragePool kernel --- TargetLibraries/Generic/src/AveragePool_fp32.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/TargetLibraries/Generic/src/AveragePool_fp32.c b/TargetLibraries/Generic/src/AveragePool_fp32.c index 9cfde2e8cb..ccc9f60ed3 100644 --- a/TargetLibraries/Generic/src/AveragePool_fp32.c +++ b/TargetLibraries/Generic/src/AveragePool_fp32.c @@ -34,10 +34,11 @@ void AveragePool2d_fp32_fp32(float32_t const *__restrict__ src, for (uint32_t kh = 0; kh < kernel_h; kh++) { for (uint32_t kw = 0; kw < kernel_w; kw++) { - int32_t h_in = h_out * stride_h + kh - pad_top; - int32_t w_in = w_out * stride_w + kw - pad_left; + int32_t h_in = (int32_t)(h_out * stride_h + kh) - pad_top; + int32_t w_in = (int32_t)(w_out * stride_w + kw) - pad_left; - if (h_in >= 0 && h_in < H && w_in >= 0 && w_in < W) { + if (h_in >= 0 && h_in < (int32_t)H && w_in >= 0 && + w_in < (int32_t)W) { sum += src[((n * C + c) * H + h_in) * W + w_in]; count++; } @@ -72,9 +73,9 @@ void AveragePool1d_fp32_fp32(float32_t const *__restrict__ src, for (uint32_t k = 0; k < kernel_len; k++) { - uint32_t l_in = l_out * stride + k - pad_left; + int32_t l_in = (int32_t)(l_out * stride + k) - (int32_t)pad_left; - if (l_in >= 0 && l_in < L) { + if (l_in >= 0 && l_in < (int32_t)L) { sum += src[(n * C + c) * L + l_in]; count++; } From bbaffb7c93d85e643bcdb86568b1a7deadb65290 Mon Sep 17 00:00:00 2001 From: Alex Marchioni Date: Sat, 16 May 2026 08:35:42 +0000 Subject: [PATCH 15/16] update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49f0972a59..64388e74cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid - Deeploy-GAP9 Platform [#143](https://github.com/pulp-platform/Deeploy/pull/143) - Update CLI interface Across Project, Fix Tutorial, and Remove Legacy Test [#157](https://github.com/pulp-platform/Deeploy/pull/157) - Fix for python error when using python 3.12.11 [#189]( https://github.com/pulp-platform/Deeploy/pull/189) +- Add support for Operators for Generic target needed in MAGIA [#193]( https://github.com/pulp-platform/Deeploy/pull/189) ### Added - Add many missing docstrings @@ -26,6 +27,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid - Add integer MaxPool1D for Generic platform and RQSConv1D support for PULPOpen, with corresponding kernel tests. - Added GAP9 Platform Support: Deployer, Bindings, Templates, Tiler, DMA (L3Dma/MchanDma), target library, CI workflows - Per-layer microbenchmarking on PULPOpen via `--profileMicrobenchmark`: new `PULPMicrobenchmark` code-transformation pass + `perf_utils.h` helpers report cycles, instructions, stalls and cache misses per layer in `RunNetwork` +- Add support for the Generic target for the following operators [Ceil](https://onnx.ai/onnx/operators/onnx__Ceil.html), [Floor](https://onnx.ai/onnx/operators/onnx__Floor.html), [Clip](https://onnx.ai/onnx/operators/onnx__Clip.html), [Sub](https://onnx.ai/onnx/operators/onnx__Sub.html), [Exp](https://onnx.ai/onnx/operators/onnx__Exp.html), [Sigmoid](https://onnx.ai/onnx/operators/onnx__Sigmoid.html), [Swish](https://onnx.ai/onnx/operators/onnx__Swish.html), [HardSigmoid](https://onnx.ai/onnx/operators/onnx__HardSigmoid.html), [HardSwish](https://onnx.ai/onnx/operators/onnx__HardSwish.html), [InstanceNormalization](https://onnx.ai/onnx/operators/onnx__InstanceNormalization.html), [GroupNormalization](https://onnx.ai/onnx/operators/onnx__GroupNormalization.html), [AveragePool](https://onnx.ai/onnx/operators/onnx__AveragePool.html), [GlobalAveragePool](https://onnx.ai/onnx/operators/onnx__GlobalAveragePool.html), [GlobalMaxPool](https://onnx.ai/onnx/operators/onnx__GlobalMaxPool.html). ### Changed - Use by default `devel` container for GAP9 CI From 5678b476f2fc0038365dac8bf266d196965791a7 Mon Sep 17 00:00:00 2001 From: Alex Marchioni Date: Sat, 16 May 2026 11:01:18 +0000 Subject: [PATCH 16/16] minor fixes (implementing coderubbit suggestions) --- CHANGELOG.md | 2 +- Deeploy/Targets/Generic/Layers.py | 6 +++--- Deeploy/Targets/Generic/Parsers.py | 6 +++--- Deeploy/Targets/Generic/Templates/SubTemplate.py | 6 +++--- TargetLibraries/Generic/src/AveragePool_fp32.c | 6 ++++-- TargetLibraries/Generic/src/GlobalAveragePool_fp32.c | 3 +++ TargetLibraries/Generic/src/GroupNormalization_fp32.c | 6 ++++++ TargetLibraries/Generic/src/InstanceNormalization_fp32.c | 4 ++++ 8 files changed, 27 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64388e74cb..877bb91915 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid - Deeploy-GAP9 Platform [#143](https://github.com/pulp-platform/Deeploy/pull/143) - Update CLI interface Across Project, Fix Tutorial, and Remove Legacy Test [#157](https://github.com/pulp-platform/Deeploy/pull/157) - Fix for python error when using python 3.12.11 [#189]( https://github.com/pulp-platform/Deeploy/pull/189) -- Add support for Operators for Generic target needed in MAGIA [#193]( https://github.com/pulp-platform/Deeploy/pull/189) +- Add support for Operators for Generic target needed in MAGIA [#193]( https://github.com/pulp-platform/Deeploy/pull/193) ### Added - Add many missing docstrings diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index f1cecced94..605b8cf782 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -741,21 +741,21 @@ def computeOps(self): class SigmoidLayer(ONNXLayer): def computeOps(self): - # σ(x) = 1 / (1 + exp(-x)): neg, exp, add, div + # sigmoid(x) = 1 / (1 + exp(-x)): neg, exp, add, div return self.mapper.parser.operatorRepresentation['size'] * 4 class SwishLayer(ONNXLayer): def computeOps(self): - # x * σ(x): 4 ops for sigmoid + 1 mul + # x * sigmoid(x): 4 ops for sigmoid + 1 mul return self.mapper.parser.operatorRepresentation['size'] * 5 class HardSigmoidLayer(ONNXLayer): def computeOps(self): - # max(0, min(1, α·x + β)): mul, add, clip(min), clip(max) + # max(0, min(1, alpha*x + beta)): mul, add, clip(min), clip(max) return self.mapper.parser.operatorRepresentation['size'] * 4 diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index fc43cbf0cd..c750402198 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -2908,9 +2908,9 @@ def parseNodeCtxt(self, self.operatorRepresentation['min_val'] = -np.finfo(np.float32).max self.operatorRepresentation['max_val'] = np.finfo(np.float32).max - if len(node.inputs) > 1 and node.inputs[1].name != '': + if len(node.inputs) > 1 and isinstance(node.inputs[1], gs.Constant) and node.inputs[1].name != '': self.operatorRepresentation['min_val'] = float(node.inputs[1].values.item()) - if len(node.inputs) > 2 and node.inputs[2].name != '': + if len(node.inputs) > 2 and isinstance(node.inputs[2], gs.Constant) and node.inputs[2].name != '': self.operatorRepresentation['max_val'] = float(node.inputs[2].values.item()) return ctxt, True @@ -3019,7 +3019,7 @@ def parseNode(self, node: gs.Node) -> bool: auto_pad = node.attrs.get('auto_pad', 'NOTSET') ceil_mode = node.attrs.get('ceil_mode', 0) - count_include_pad = node.attrs.get('count_include_pad ', 0) + count_include_pad = node.attrs.get('count_include_pad', 0) dilations = node.attrs.get('dilations', (1,) * spatial_ndim) strides = node.attrs.get('strides', (1,) * spatial_ndim) pads = node.attrs.get('pads', (0,) * (2 * spatial_ndim)) diff --git a/Deeploy/Targets/Generic/Templates/SubTemplate.py b/Deeploy/Targets/Generic/Templates/SubTemplate.py index 6fa24e54aa..e5fade91ef 100644 --- a/Deeploy/Targets/Generic/Templates/SubTemplate.py +++ b/Deeploy/Targets/Generic/Templates/SubTemplate.py @@ -16,13 +16,13 @@ def alignToContext( input_1_offset = 0 if hasattr(data_in_1, "_signed") and hasattr(data_in_1, "nLevels"): - input_1_offset = (data_in_1._signed == 0) * int(data_in_1.nLevels / 2) + input_1_offset = -(data_in_1._signed == 0) * int(data_in_1.nLevels / 2) input_2_offset = 0 if hasattr(data_in_2, "_signed") and hasattr(data_in_2, "nLevels"): - input_2_offset = -(data_in_2._signed == 0) * int(data_in_2.nLevels / 2) + input_2_offset = (data_in_2._signed == 0) * int(data_in_2.nLevels / 2) output_offset = 0 if hasattr(data_out, "_signed") and hasattr(data_out, "nLevels"): - output_offset = -(data_out._signed == 0) * int(data_out.nLevels // 2) + output_offset = (data_out._signed == 0) * int(data_out.nLevels // 2) operatorRepresentation['offset'] = input_1_offset + input_2_offset + output_offset diff --git a/TargetLibraries/Generic/src/AveragePool_fp32.c b/TargetLibraries/Generic/src/AveragePool_fp32.c index ccc9f60ed3..6c17a8a49e 100644 --- a/TargetLibraries/Generic/src/AveragePool_fp32.c +++ b/TargetLibraries/Generic/src/AveragePool_fp32.c @@ -58,7 +58,8 @@ void AveragePool1d_fp32_fp32(float32_t const *__restrict__ src, uint32_t stride, uint32_t pad_left, uint32_t pad_right) { - if (N == 0 || C == 0 || L < kernel_len || stride == 0) { + if (N == 0 || C == 0 || stride == 0 || + (L + pad_left + pad_right) < kernel_len) { return; } @@ -80,7 +81,8 @@ void AveragePool1d_fp32_fp32(float32_t const *__restrict__ src, count++; } } - dst[(n * C + c) * L_out + l_out] = sum / (float32_t)count; + uint32_t i = (n * C + c) * L_out + l_out; + dst[i] = (count == 0) ? 0.0f : (sum / (float32_t)count); } } } diff --git a/TargetLibraries/Generic/src/GlobalAveragePool_fp32.c b/TargetLibraries/Generic/src/GlobalAveragePool_fp32.c index d7da38d459..72c2c08aa0 100644 --- a/TargetLibraries/Generic/src/GlobalAveragePool_fp32.c +++ b/TargetLibraries/Generic/src/GlobalAveragePool_fp32.c @@ -10,6 +10,9 @@ void GlobalAveragePool_fp32_fp32(float32_t const *__restrict__ src, float32_t *__restrict__ dst, uint32_t N, uint32_t C, uint32_t spatial_size) { + if (spatial_size == 0) { + return; // invalid shape for average pooling; avoid divide-by-zero + } for (uint32_t n = 0; n < N; ++n) { for (uint32_t c = 0; c < C; ++c) { diff --git a/TargetLibraries/Generic/src/GroupNormalization_fp32.c b/TargetLibraries/Generic/src/GroupNormalization_fp32.c index e2469db253..24fbf66a72 100644 --- a/TargetLibraries/Generic/src/GroupNormalization_fp32.c +++ b/TargetLibraries/Generic/src/GroupNormalization_fp32.c @@ -14,8 +14,14 @@ void GroupNormalization_fp32_fp32( uint32_t spatial, // spatial dimension (L or H*W or D*H*W, etc.) uint32_t num_groups, float32_t epsilon) { + if (num_groups == 0 || spatial == 0 || (num_channels % num_groups) != 0) { + return; + } uint32_t channels_per_group = num_channels / num_groups; uint32_t group_elements = channels_per_group * spatial; + if (group_elements == 0) { + return; + } uint32_t slice = num_channels * spatial; // elements per batch for (uint32_t n = 0; n < batch_size; ++n) { diff --git a/TargetLibraries/Generic/src/InstanceNormalization_fp32.c b/TargetLibraries/Generic/src/InstanceNormalization_fp32.c index ac3c5269b9..23405dc11d 100644 --- a/TargetLibraries/Generic/src/InstanceNormalization_fp32.c +++ b/TargetLibraries/Generic/src/InstanceNormalization_fp32.c @@ -14,6 +14,10 @@ void InstanceNormalization_fp32_fp32( uint32_t spatial, // spatial dimension (L or H*W or D*H*W, etc.) float32_t epsilon) { + if (spatial == 0) { + return; + } + uint32_t slice = num_channels * spatial; // elements per batch for (uint32_t n = 0; n < batch_size; ++n) {