diff --git a/CHANGELOG.md b/CHANGELOG.md index 49f0972a59..877bb91915 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid - Deeploy-GAP9 Platform [#143](https://github.com/pulp-platform/Deeploy/pull/143) - Update CLI interface Across Project, Fix Tutorial, and Remove Legacy Test [#157](https://github.com/pulp-platform/Deeploy/pull/157) - Fix for python error when using python 3.12.11 [#189]( https://github.com/pulp-platform/Deeploy/pull/189) +- Add support for Operators for Generic target needed in MAGIA [#193]( https://github.com/pulp-platform/Deeploy/pull/193) ### Added - Add many missing docstrings @@ -26,6 +27,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid - Add integer MaxPool1D for Generic platform and RQSConv1D support for PULPOpen, with corresponding kernel tests. - Added GAP9 Platform Support: Deployer, Bindings, Templates, Tiler, DMA (L3Dma/MchanDma), target library, CI workflows - Per-layer microbenchmarking on PULPOpen via `--profileMicrobenchmark`: new `PULPMicrobenchmark` code-transformation pass + `perf_utils.h` helpers report cycles, instructions, stalls and cache misses per layer in `RunNetwork` +- Add support for the Generic target for the following operators [Ceil](https://onnx.ai/onnx/operators/onnx__Ceil.html), [Floor](https://onnx.ai/onnx/operators/onnx__Floor.html), [Clip](https://onnx.ai/onnx/operators/onnx__Clip.html), [Sub](https://onnx.ai/onnx/operators/onnx__Sub.html), [Exp](https://onnx.ai/onnx/operators/onnx__Exp.html), [Sigmoid](https://onnx.ai/onnx/operators/onnx__Sigmoid.html), [Swish](https://onnx.ai/onnx/operators/onnx__Swish.html), [HardSigmoid](https://onnx.ai/onnx/operators/onnx__HardSigmoid.html), [HardSwish](https://onnx.ai/onnx/operators/onnx__HardSwish.html), [InstanceNormalization](https://onnx.ai/onnx/operators/onnx__InstanceNormalization.html), [GroupNormalization](https://onnx.ai/onnx/operators/onnx__GroupNormalization.html), [AveragePool](https://onnx.ai/onnx/operators/onnx__AveragePool.html), [GlobalAveragePool](https://onnx.ai/onnx/operators/onnx__GlobalAveragePool.html), [GlobalMaxPool](https://onnx.ai/onnx/operators/onnx__GlobalMaxPool.html). ### Changed - Use by default `devel` container for GAP9 CI diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index 308b179aef..21cf01e52a 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -13,13 +13,16 @@ from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration from Deeploy.Targets.Generic.Templates import AddTemplate, BatchNormalizationTemplate, ConcatTemplate, ConvTemplate, \ ConvTransposeTemplate, DebugPrintTemplate, DequantTemplate, DummyTemplate, DWConvTemplate, FloatAddTemplate, \ - FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatGELUTemplate, FloatGemmTemplate, \ - FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, \ - FloatPowTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, \ - GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, \ - MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \ - RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, TransposeTemplate, \ - iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate + FloatAveragePoolTemplate, FloatCeilTemplate, FloatClipTemplate, FloatConvTemplate, FloatDivTemplate, \ + FloatDWConvTemplate, FloatExpTemplate, FloatFloorTemplate, FloatGELUTemplate, FloatGemmTemplate, \ + FloatGlobalAveragePoolTemplate, FloatGlobalMaxPoolTemplate, FloatGroupNormTemplate, FloatHardSigmoidTemplate, \ + FloatHardSwishTemplate, FloatInstanceNormTemplate, FloatLayernormTemplate, FloatMatMulTemplate, \ + FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, FloatPowTemplate, FloatReduceMeanTemplate, \ + FloatReluTemplate, FloatSigmoidTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, FloatSubTemplate, \ + FloatSwishTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, \ + MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \ + RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, SubTemplate, \ + TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate from Deeploy.Targets.Generic.TypeCheckers import AddChecker, BatchNormChecker, ConcatChecker, ConvChecker, \ DebugPrintChecker, DequantChecker, DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, \ LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceMeanChecker, \ @@ -54,6 +57,17 @@ FloatAddTemplate.referenceTemplate, BasicTransformer) ] +# using AddChecker since they are exactly the same +BasicSubBindings = [ + NodeBinding(AddChecker([PointerClass(type1), PointerClass(type2)], [PointerClass(int32_t)]), + SubTemplate.referenceTemplate, BasicTransformer) + for type1 in IntegerDataTypes + for type2 in IntegerDataTypes +] + [ + NodeBinding(AddChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatSubTemplate.referenceTemplate, BasicTransformer) +] + BasicConv1DBindings = [ NodeBinding(ConvChecker( [PointerClass(type), PointerClass(type), PointerClass(type)], [PointerClass(type)]), @@ -327,3 +341,82 @@ ConvTransposeTemplate.referenceTemplate, BasicTransformer) for type in FloatDataTypes ] + +BasicCeilBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), FloatCeilTemplate.referenceTemplate, + BasicTransformer), +] + +BasicFloorBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatFloorTemplate.referenceTemplate, BasicTransformer), +] + +BasicClipBindings = [ + NodeBinding( + DummyChecker( + [PointerClass(float32_t), PointerClass(float32_t), + PointerClass(float32_t)], [PointerClass(float32_t)]), FloatClipTemplate.referenceTemplate, + BasicTransformer), +] + +BasicExpBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), FloatExpTemplate.referenceTemplate, + BasicTransformer), +] + +BasicSigmoidBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatSigmoidTemplate.referenceTemplate, BasicTransformer), +] + +BasicSwishBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatSwishTemplate.referenceTemplate, BasicTransformer), +] + +BasicHardSigmoidBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatHardSigmoidTemplate.referenceTemplate, BasicTransformer), +] + +BasicHardSwishBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatHardSwishTemplate.referenceTemplate, BasicTransformer), +] + +BasicInstanceNormBindings = [ + NodeBinding( + DummyChecker( + [PointerClass(float32_t), PointerClass(float32_t), + PointerClass(float32_t)], [PointerClass(float32_t)]), FloatInstanceNormTemplate.referenceTemplate, + BasicTransformer), +] + +BasicGroupNormBindings = [ + NodeBinding( + DummyChecker( + [PointerClass(float32_t), PointerClass(float32_t), + PointerClass(float32_t)], [PointerClass(float32_t)]), FloatGroupNormTemplate.referenceTemplate, + BasicTransformer), +] + +BasicAveragePool1DBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatAveragePoolTemplate.referenceTemplate1d, BasicTransformer) +] + +BasicAveragePool2DBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatAveragePoolTemplate.referenceTemplate2d, BasicTransformer) +] + +BasicGlobalAveragePoolBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatGlobalAveragePoolTemplate.referenceTemplate, BasicTransformer) +] + +BasicGlobalMaxPoolBindings = [ + NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatGlobalMaxPoolTemplate.referenceTemplate, BasicTransformer) +] diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index cc733937cc..605b8cf782 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -188,6 +188,9 @@ def computeOps(self): return self.mapper.parser.operatorRepresentation['size'] +SubLayer = AddLayer + + class MatMulLayer(ONNXLayer): def __init__(self, maps: List[NodeMapper]): @@ -709,3 +712,99 @@ def computeOps(self): numPx = opRep['dim_im_out_x'] return numPx * opsPerPx + + +class CeilLayer(ONNXLayer): + + def computeOps(self): + return self.mapper.parser.operatorRepresentation['size'] + + +class FloorLayer(ONNXLayer): + + def computeOps(self): + return self.mapper.parser.operatorRepresentation['size'] + + +class ClipLayer(ONNXLayer): + + def computeOps(self): + return self.mapper.parser.operatorRepresentation['size'] * 2 # compare vs min and max + + +class ExpLayer(ONNXLayer): + + def computeOps(self): + return self.mapper.parser.operatorRepresentation['size'] + + +class SigmoidLayer(ONNXLayer): + + def computeOps(self): + # sigmoid(x) = 1 / (1 + exp(-x)): neg, exp, add, div + return self.mapper.parser.operatorRepresentation['size'] * 4 + + +class SwishLayer(ONNXLayer): + + def computeOps(self): + # x * sigmoid(x): 4 ops for sigmoid + 1 mul + return self.mapper.parser.operatorRepresentation['size'] * 5 + + +class HardSigmoidLayer(ONNXLayer): + + def computeOps(self): + # max(0, min(1, alpha*x + beta)): mul, add, clip(min), clip(max) + return self.mapper.parser.operatorRepresentation['size'] * 4 + + +class HardSwishLayer(ONNXLayer): + + def computeOps(self): + # x * HardSigmoid(x): 4 ops for hard sigmoid + 1 mul + return self.mapper.parser.operatorRepresentation['size'] * 5 + + +class InstanceNormLayer(ONNXLayer): + + def computeOps(self): + # per element: mean-sum(1) + variance(sub+sq+add=3) + normalize(sub+div=2) + affine(mul+add=2) = 8 + # per (batch, channel): mean(div=1) + variance(sqrt+div=2) = 3 + opRep = self.mapper.parser.operatorRepresentation + B, C, S = int(opRep['batch_size']), int(opRep['num_channels']), int(opRep['spatial']) + return B * C * (S * 8 + 3) + + +class GroupNormLayer(ONNXLayer): + + def computeOps(self): + # same structure as InstanceNorm: 8 ops/element + 3 ops per (batch, channel) + opRep = self.mapper.parser.operatorRepresentation + B, C, S = int(opRep['batch_size']), int(opRep['num_channels']), int(opRep['spatial']) + return B * C * (S * 8 + 3) + + +class AveragePoolLayer(ONNXLayer): + + def computeOps(self): + opRep = self.mapper.parser.operatorRepresentation + kernel_elements = int(np.prod(opRep['kernel_shape'])) + # (kernel_elements - 1) additions + 1 division per output element + return opRep['data_out_size'] * kernel_elements + + +class GlobalAveragePoolLayer(ONNXLayer): + + def computeOps(self): + opRep = self.mapper.parser.operatorRepresentation + # (spatial_size - 1) additions + 1 division per output channel + return int(opRep['batch_size'] * opRep['num_channels'] * opRep['spatial_size']) + + +class GlobalMaxPoolLayer(ONNXLayer): + + def computeOps(self): + opRep = self.mapper.parser.operatorRepresentation + # (spatial_size - 1) comparisons per output channel + return int(opRep['batch_size'] * opRep['num_channels'] * (opRep['spatial_size'] - 1)) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index ad787d9e4b..c750402198 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -11,6 +11,23 @@ from Deeploy.DeeployTypes import ConstantBuffer, NetworkContext, NodeParser, VariableBuffer +class UnaryElementWiseParser(NodeParser): + + def parseNode(self, node: gs.Node) -> bool: + return len(node.inputs) == 1 and len(node.outputs) == 1 + + def parseNodeCtxt(self, + ctxt: NetworkContext, + node: gs.Node, + channels_first: bool = True) -> Tuple[NetworkContext, bool]: + data_in = ctxt.lookup(node.inputs[0].name) + data_out = ctxt.lookup(node.outputs[0].name) + self.operatorRepresentation['data_in'] = data_in.name + self.operatorRepresentation['data_out'] = data_out.name + self.operatorRepresentation['size'] = int(np.prod(data_in.shape)) + return ctxt, True + + class ConcatParser(NodeParser): def __init__(self): @@ -492,6 +509,9 @@ def parseNodeCtxt(self, return ctxt, True +SubParser = AddParser + + class ReduceParser(NodeParser): def __init__(self): @@ -1092,29 +1112,10 @@ def parseNodeCtxt(self, return ctxt, True -class ReluParser(NodeParser): - - def __init__(self): - super().__init__() - - def parseNode(self, node: gs.Node) -> (bool): - - ret = all([len(node.inputs) == 1, len(node.outputs) == 1]) - - return ret - - def parseNodeCtxt(self, - ctxt: NetworkContext, - node: gs.Node, - channels_first: bool = True) -> Tuple[NetworkContext, bool]: - - data_in = ctxt.lookup(node.inputs[0].name) - data_out = ctxt.lookup(node.outputs[0].name) - self.operatorRepresentation['data_in'] = data_in.name - self.operatorRepresentation['data_out'] = data_out.name - self.operatorRepresentation['size'] = np.prod(data_in.shape) +class ReluParser(UnaryElementWiseParser): - return ctxt, True + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and node.op == 'Relu' class ReshapeParser(NodeParser): @@ -2865,13 +2866,185 @@ def parseNodeCtxt(self, return ctxt, False -class SqrtParser(NodeParser): +class SqrtParser(UnaryElementWiseParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and node.op == 'Sqrt' + + +class CeilParser(UnaryElementWiseParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and node.op == 'Ceil' + + +class FloorParser(UnaryElementWiseParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and node.op == 'Floor' + + +class ClipParser(UnaryElementWiseParser): + + def parseNode(self, node: gs.Node) -> bool: + # Clip allows 1–3 inputs (optional min/max constants), so we can't use super() + if node.op != 'Clip' \ + or len(node.outputs) != 1 \ + or (not (1 <= len(node.inputs) <= 3)): + return False + return True + + def parseNodeCtxt(self, + ctxt: NetworkContext, + node: gs.Node, + channels_first: bool = True) -> Tuple[NetworkContext, bool]: + + ctxt, ok = super().parseNodeCtxt(ctxt, node, channels_first) + if not ok: + return ctxt, False + + # min_val and max_val only handled as constants + # Defaults: full float32 range + self.operatorRepresentation['min_val'] = -np.finfo(np.float32).max + self.operatorRepresentation['max_val'] = np.finfo(np.float32).max + + if len(node.inputs) > 1 and isinstance(node.inputs[1], gs.Constant) and node.inputs[1].name != '': + self.operatorRepresentation['min_val'] = float(node.inputs[1].values.item()) + if len(node.inputs) > 2 and isinstance(node.inputs[2], gs.Constant) and node.inputs[2].name != '': + self.operatorRepresentation['max_val'] = float(node.inputs[2].values.item()) + + return ctxt, True + + +class ExpParser(UnaryElementWiseParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and node.op == 'Exp' + + +class SigmoidParser(UnaryElementWiseParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and node.op == 'Sigmoid' + + +class SwishParser(UnaryElementWiseParser): + + def parseNode(self, node: gs.Node) -> bool: + if not (super().parseNode(node) and node.op == 'Swish'): + return False + self.operatorRepresentation['alpha'] = node.attrs.get('alpha', 1.0) + return True + + +class HardSigmoidParser(UnaryElementWiseParser): + + def parseNode(self, node: gs.Node) -> bool: + if not (super().parseNode(node) and node.op == 'HardSigmoid'): + return False + self.operatorRepresentation['alpha'] = node.attrs.get('alpha', 0.2) + self.operatorRepresentation['beta'] = node.attrs.get('beta', 0.5) + return True - def __init__(self): - super().__init__() + +class HardSwishParser(UnaryElementWiseParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and node.op == 'HardSwish' + + +class NormalizationParser(NodeParser): + + def parseNode(self, node: gs.Node) -> bool: + if not all([ + len(node.inputs) == 3, + len(node.outputs) == 1, + ]): + return False + + self.operatorRepresentation['epsilon'] = node.attrs.get('epsilon', 1e-5) + + return True + + def parseNodeCtxt(self, + ctxt: NetworkContext, + node: gs.Node, + channels_first: bool = True) -> Tuple[NetworkContext, bool]: + data_in = ctxt.lookup(node.inputs[0].name) + self.operatorRepresentation['data_in'] = data_in.name + self.operatorRepresentation['scale'] = ctxt.lookup(node.inputs[1].name).name + self.operatorRepresentation['bias'] = ctxt.lookup(node.inputs[2].name).name + self.operatorRepresentation['data_in'] = data_in.name + self.operatorRepresentation['data_out'] = ctxt.lookup(node.outputs[0].name).name + self.operatorRepresentation['batch_size'] = data_in.shape[0] + self.operatorRepresentation['num_channels'] = data_in.shape[1] + self.operatorRepresentation['spatial'] = np.prod(data_in.shape[2:]) + return ctxt, True + + +class InstanceNormParser(NormalizationParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and node.op == 'InstanceNormalization' + + +class GroupNormParser(NormalizationParser): + + # TODO: attribute stash_type not handled + def parseNode(self, node: gs.Node) -> bool: + if not all([ + super().parseNode(node), + node.op == 'GroupNormalization', + 'num_groups' in node.attrs, + ]): + return False + self.operatorRepresentation['num_groups'] = node.attrs['num_groups'] + return True + + +class AveragePoolParser(NodeParser): def parseNode(self, node: gs.Node) -> bool: - return node.op == 'Sqrt' and len(node.inputs) == 1 and len(node.outputs) == 1 + + if not all([ + node.op == 'AveragePool', + len(node.inputs) == 1, + len(node.outputs) == 1, + 'kernel_shape' in node.attrs, + ]): + return False + + kernel_shape = node.attrs['kernel_shape'] + spatial_ndim = len(kernel_shape) + + auto_pad = node.attrs.get('auto_pad', 'NOTSET') + ceil_mode = node.attrs.get('ceil_mode', 0) + count_include_pad = node.attrs.get('count_include_pad', 0) + dilations = node.attrs.get('dilations', (1,) * spatial_ndim) + strides = node.attrs.get('strides', (1,) * spatial_ndim) + pads = node.attrs.get('pads', (0,) * (2 * spatial_ndim)) + + if not all([ + auto_pad == 'NOTSET', # TODO: implement other values + ceil_mode == 0, # TODO: implement other values + count_include_pad == 0, # TODO: implement other values + all([d == 1 for d in dilations]), # TODO: implement other values + len(dilations) == spatial_ndim, + len(strides) == spatial_ndim, + len(pads) == 2 * spatial_ndim, + all([s > 0 for s in strides]), + ]): + return False + + self.operatorRepresentation['kernel_shape'] = kernel_shape + self.operatorRepresentation['auto_pad'] = auto_pad + self.operatorRepresentation['ceil_mode'] = ceil_mode + self.operatorRepresentation['count_include_pad'] = count_include_pad + self.operatorRepresentation['dilations'] = dilations + self.operatorRepresentation['strides'] = strides + self.operatorRepresentation['pads'] = pads + + return True def parseNodeCtxt(self, ctxt: NetworkContext, @@ -2880,9 +3053,68 @@ def parseNodeCtxt(self, data_in = ctxt.lookup(node.inputs[0].name) data_out = ctxt.lookup(node.outputs[0].name) + self.operatorRepresentation['data_in'] = data_in.name + self.operatorRepresentation['data_out'] = data_out.name + + self.operatorRepresentation['batch_size'] = data_in.shape[0] + self.operatorRepresentation['num_channels'] = data_in.shape[1] + self.operatorRepresentation['data_out_size'] = int(np.prod(data_out.shape)) + + spatial_shape = data_in.shape[2:] + if len(self.operatorRepresentation['kernel_shape']) != len(spatial_shape): + return ctxt, False + + if len(spatial_shape) == 1: + self.operatorRepresentation['length'] = spatial_shape[0] + elif len(spatial_shape) == 2: + self.operatorRepresentation['height'] = spatial_shape[0] + self.operatorRepresentation['width'] = spatial_shape[1] + else: + return ctxt, False + + return ctxt, True + + +class AveragePool1DParser(AveragePoolParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and len(node.attrs['kernel_shape']) == 1 + + +class AveragePool2DParser(AveragePoolParser): + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and len(node.attrs['kernel_shape']) == 2 + + +class GlobalPoolParser(NodeParser): + + def parseNode(self, node: gs.Node) -> bool: + return len(node.inputs) == 1 and len(node.outputs) == 1 + + def parseNodeCtxt(self, + ctxt: NetworkContext, + node: gs.Node, + channels_first: bool = True) -> Tuple[NetworkContext, bool]: + + data_in = ctxt.lookup(node.inputs[0].name) + data_out = ctxt.lookup(node.outputs[0].name) self.operatorRepresentation['data_in'] = data_in.name self.operatorRepresentation['data_out'] = data_out.name - self.operatorRepresentation['size'] = int(np.prod(data_in.shape)) + self.operatorRepresentation['batch_size'] = data_in.shape[0] + self.operatorRepresentation['num_channels'] = data_in.shape[1] + self.operatorRepresentation['spatial_size'] = np.prod(data_in.shape[2:]) return ctxt, True + + +class GlobalAveragePoolParser(GlobalPoolParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and node.op == 'GlobalAveragePool' + + +class GlobalMaxPoolParser(GlobalPoolParser): + + def parseNode(self, node: gs.Node) -> bool: + return super().parseNode(node) and node.op == 'GlobalMaxPool' diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index e05e897270..2aa1ef1c38 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -6,33 +6,40 @@ RemoveEmptyConvBiasPass, RemoveOnlySingletonReduceMeanPass from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NodeMapper, NodeTemplate, \ StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer -from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicBatchNormBindings, BasicConcatBindings, \ - BasicConv1DBindings, BasicConv2DBindings, BasicConvTransposeBindings, BasicDebugPrintBindings, \ - BasicDequantBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBindings, BasicGatherBindings, \ - BasicGELUBindings, BasicGEMMBindings, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \ - BasicLayerNormBindings, BasicMatMulBindings, BasicMaxPool1DBindings, BasicMaxPool2DBindings, BasicMulBindings, \ - BasicPad1DBindings, BasicPad2DBindings, BasicPowBindings, BasicQuantBindings, BasicReduceMeanBindings, \ - BasicReduceSumBindings, BasicReluBinding, BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, \ - BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, BasicTransposeBindings, \ - DummyBinding -from Deeploy.Targets.Generic.Layers import AddLayer, BatchNormalizationLayer, ConcatLayer, ConvLayer, \ - ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, GatherLayer, GELULayer, GEMMLayer, ITAMaxLayer, \ - LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, QuantLayer, ReduceMeanLayer, \ - ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SliceLayer, \ - SoftmaxLayer, SqrtLayer, TransposeLayer -from Deeploy.Targets.Generic.Parsers import AddParser, BatchNormParser, ConcatParser, ConvTranspose1DParser, \ - DebugParser, DequantParser, DivParser, DummyParser, FlattenParser, GatherParser, GELUParser, GenericConv1DParser, \ - GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, GenericGEMMParser, GenericMaxPool2DParser, \ - IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, MatMulParser, MaxPool1DParser, MulParser, \ - Pad1DParser, Pad2DParser, PowParser, QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, \ - RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, SoftmaxParser, SqrtParser, \ - TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser +from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicAveragePool1DBindings, BasicAveragePool2DBindings, \ + BasicBatchNormBindings, BasicCeilBindings, BasicClipBindings, BasicConcatBindings, BasicConv1DBindings, \ + BasicConv2DBindings, BasicConvTransposeBindings, BasicDebugPrintBindings, BasicDequantBindings, BasicDivBindings, \ + BasicDWConv1DBinding, BasicDWConv2DBindings, BasicExpBindings, BasicFloorBindings, BasicGatherBindings, \ + BasicGELUBindings, BasicGEMMBindings, BasicGlobalAveragePoolBindings, BasicGlobalMaxPoolBindings, \ + BasicGroupNormBindings, BasicHardSigmoidBindings, BasicHardSwishBindings, BasicInstanceNormBindings, \ + BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, BasicLayerNormBindings, BasicMatMulBindings, \ + BasicMaxPool1DBindings, BasicMaxPool2DBindings, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, \ + BasicPowBindings, BasicQuantBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReluBinding, \ + BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSigmoidBindings, \ + BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, BasicSubBindings, BasicSwishBindings, \ + BasicTransposeBindings, DummyBinding +from Deeploy.Targets.Generic.Layers import AddLayer, AveragePoolLayer, BatchNormalizationLayer, CeilLayer, ClipLayer, \ + ConcatLayer, ConvLayer, ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, ExpLayer, FloorLayer, \ + GatherLayer, GELULayer, GEMMLayer, GlobalAveragePoolLayer, GlobalMaxPoolLayer, GroupNormLayer, InstanceNormLayer, \ + ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, QuantLayer, ReduceMeanLayer, \ + ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SigmoidLayer, \ + SliceLayer, SoftmaxLayer, SqrtLayer, SubLayer, SwishLayer, TransposeLayer +from Deeploy.Targets.Generic.Parsers import AddParser, AveragePool1DParser, AveragePool2DParser, BatchNormParser, \ + CeilParser, ClipParser, ConcatParser, ConvTranspose1DParser, DebugParser, DequantParser, DivParser, DummyParser, \ + ExpParser, FlattenParser, FloorParser, GatherParser, GELUParser, GenericConv1DParser, GenericConv2DParser, \ + GenericDWConv1DParser, GenericDWConv2DParser, GenericGEMMParser, GenericMaxPool2DParser, GlobalAveragePoolParser, \ + GlobalMaxPoolParser, GroupNormParser, HardSigmoidParser, HardSwishParser, InstanceNormParser, IntegerDivParser, \ + ITAMaxParser, ITAPartialMaxParser, LayerNormParser, MatMulParser, MaxPool1DParser, MulParser, Pad1DParser, \ + Pad2DParser, PowParser, QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, RequantShiftParser, \ + ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SigmoidParser, SliceParser, SoftmaxParser, SqrtParser, \ + SubParser, SwishParser, TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate, FreeTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, ExtractPaddingFromConvPass, \ ExtractPaddingFromPoolPass, MatMulAddMergePass, MergeConstAddAndRequantPass, QuantPatternPass, \ iGELURequantMergePass AddMapper = NodeMapper(AddParser(), BasicAddBindings) +SubMapper = NodeMapper(SubParser(), BasicSubBindings) Conv1DMapper = NodeMapper(GenericConv1DParser(), BasicConv1DBindings) Conv2DMapper = NodeMapper(GenericConv2DParser(), BasicConv2DBindings) ConcatMapper = NodeMapper(ConcatParser(), BasicConcatBindings) @@ -73,6 +80,20 @@ BatchNormalizationMapper = NodeMapper(BatchNormParser(), BasicBatchNormBindings) ConvTransposeMapper = NodeMapper(ConvTranspose1DParser(), BasicConvTransposeBindings) SliceMapper = NodeMapper(SliceParser(), BasicSliceBindings) +CeilMapper = NodeMapper(CeilParser(), BasicCeilBindings) +FloorMapper = NodeMapper(FloorParser(), BasicFloorBindings) +ClipMapper = NodeMapper(ClipParser(), BasicClipBindings) +ExpMapper = NodeMapper(ExpParser(), BasicExpBindings) +SigmoidMapper = NodeMapper(SigmoidParser(), BasicSigmoidBindings) +SwishMapper = NodeMapper(SwishParser(), BasicSwishBindings) +HardSigmoidMapper = NodeMapper(HardSigmoidParser(), BasicHardSigmoidBindings) +HardSwishMapper = NodeMapper(HardSwishParser(), BasicHardSwishBindings) +InstanceNormMapper = NodeMapper(InstanceNormParser(), BasicInstanceNormBindings) +GroupNormMapper = NodeMapper(GroupNormParser(), BasicGroupNormBindings) +AveragePool1DMapper = NodeMapper(AveragePool1DParser(), BasicAveragePool1DBindings) +AveragePool2DMapper = NodeMapper(AveragePool2DParser(), BasicAveragePool2DBindings) +GlobalAveragePoolMapper = NodeMapper(GlobalAveragePoolParser(), BasicGlobalAveragePoolBindings) +GlobalMaxPoolMapper = NodeMapper(GlobalMaxPoolParser(), BasicGlobalMaxPoolBindings) # Dummy nodes are intended for development purposes only! # They should always generate compiler errors to not accidentally end up in production code @@ -80,6 +101,7 @@ GenericMapping = { 'Add': AddLayer([AddMapper]), + 'Sub': SubLayer([SubMapper]), 'Conv': ConvLayer([Conv2DMapper, DWConv2DMapper, Conv1DMapper, DWConv1DMapper]), 'Concat': ConcatLayer([ConcatMapper]), 'DebugPrint': DebugPrintLayer([DebugMapper]), @@ -118,7 +140,20 @@ 'Quant': QuantLayer([QuantMapper]), 'Dequant': DequantLayer([DequantMapper]), 'BatchNormalization': BatchNormalizationLayer([BatchNormalizationMapper]), - 'ConvTranspose': ConvTransposeLayer([ConvTransposeMapper]) + 'ConvTranspose': ConvTransposeLayer([ConvTransposeMapper]), + 'Ceil': CeilLayer([CeilMapper]), + 'Floor': FloorLayer([FloorMapper]), + 'Clip': ClipLayer([ClipMapper]), + 'Exp': ExpLayer([ExpMapper]), + 'Sigmoid': SigmoidLayer([SigmoidMapper]), + 'Swish': SwishLayer([SwishMapper]), + 'HardSigmoid': SigmoidLayer([HardSigmoidMapper]), + 'HardSwish': SwishLayer([HardSwishMapper]), + 'InstanceNormalization': InstanceNormLayer([InstanceNormMapper]), + 'GroupNormalization': GroupNormLayer([GroupNormMapper]), + 'AveragePool': AveragePoolLayer([AveragePool1DMapper, AveragePool2DMapper]), + 'GlobalAveragePool': GlobalAveragePoolLayer([GlobalAveragePoolMapper]), + 'GlobalMaxPool': GlobalMaxPoolLayer([GlobalMaxPoolMapper]), # # For example, you can use the DummpyMapper, in case you want to test # # deployment or optimizations with GlobalAveragePool nodes but did not yet # # implement the corresponding kernel diff --git a/Deeploy/Targets/Generic/Templates/FloatAveragePoolTemplate.py b/Deeploy/Targets/Generic/Templates/FloatAveragePoolTemplate.py new file mode 100644 index 0000000000..36519dacc2 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatAveragePoolTemplate.py @@ -0,0 +1,31 @@ +# SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _AveragePoolTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate1d = _AveragePoolTemplate(""" +// Average Pool 1D (Name: ${nodeName}, Op: ${nodeOp}) +AveragePool1d_fp${type_width}_fp${type_width}( + ${data_in}, ${data_out}, ${batch_size}, ${num_channels}, ${length}, ${kernel_shape[0]}, + ${strides[0]}, ${pads[0]}, ${pads[1]}); +""") + +referenceTemplate2d = _AveragePoolTemplate(""" +// Average Pool 2D (Name: ${nodeName}, Op: ${nodeOp}) +AveragePool2d_fp${type_width}_fp${type_width}( + ${data_in}, ${data_out}, ${batch_size}, ${num_channels}, ${height}, ${width}, + ${kernel_shape[0]}, ${kernel_shape[1]}, ${strides[0]}, ${strides[1]}, + ${pads[0]}, ${pads[1]}, ${pads[2]}, ${pads[3]}); +""") diff --git a/Deeploy/Targets/Generic/Templates/FloatCeilTemplate.py b/Deeploy/Targets/Generic/Templates/FloatCeilTemplate.py new file mode 100644 index 0000000000..198bb3d9c8 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatCeilTemplate.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _CeilTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['size'] = int(np.prod(data_in.shape)) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _CeilTemplate(""" +// Ceil (Name: ${nodeName}, Op: ${nodeOp}) +Ceil_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${size}); +""") diff --git a/Deeploy/Targets/Generic/Templates/FloatClipTemplate.py b/Deeploy/Targets/Generic/Templates/FloatClipTemplate.py new file mode 100644 index 0000000000..c61b421755 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatClipTemplate.py @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _ClipTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['size'] = int(np.prod(data_in.shape)) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _ClipTemplate(""" +// Clip (Name: ${nodeName}, Op: ${nodeOp}) +Clip_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${min_val}, ${max_val}, ${size}); +""") diff --git a/Deeploy/Targets/Generic/Templates/FloatExpTemplate.py b/Deeploy/Targets/Generic/Templates/FloatExpTemplate.py new file mode 100644 index 0000000000..734d7e0fea --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatExpTemplate.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _ExpTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['size'] = int(np.prod(data_in.shape)) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _ExpTemplate(""" +// Exp (Name: ${nodeName}, Op: ${nodeOp}) +Exp_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${size}); +""") diff --git a/Deeploy/Targets/Generic/Templates/FloatFloorTemplate.py b/Deeploy/Targets/Generic/Templates/FloatFloorTemplate.py new file mode 100644 index 0000000000..2d9768c1f4 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatFloorTemplate.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _FloorTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['size'] = int(np.prod(data_in.shape)) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _FloorTemplate(""" +// Floor (Name: ${nodeName}, Op: ${nodeOp}) +Floor_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${size}); +""") diff --git a/Deeploy/Targets/Generic/Templates/FloatGlobalAveragePoolTemplate.py b/Deeploy/Targets/Generic/Templates/FloatGlobalAveragePoolTemplate.py new file mode 100644 index 0000000000..519fd8e82b --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatGlobalAveragePoolTemplate.py @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _GlobalAveragePoolTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _GlobalAveragePoolTemplate(""" +// Global Average Pool 1D (Name: ${nodeName}, Op: ${nodeOp}) +GlobalAveragePool_fp${type_width}_fp${type_width}( + ${data_in}, ${data_out}, ${batch_size}, ${num_channels}, ${spatial_size}); +""") \ No newline at end of file diff --git a/Deeploy/Targets/Generic/Templates/FloatGlobalMaxPoolTemplate.py b/Deeploy/Targets/Generic/Templates/FloatGlobalMaxPoolTemplate.py new file mode 100644 index 0000000000..c41743a898 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatGlobalMaxPoolTemplate.py @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _GlobalMaxPoolTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _GlobalMaxPoolTemplate(""" +// Global Max Pool 1D (Name: ${nodeName}, Op: ${nodeOp}) +GlobalMaxPool_fp${type_width}_fp${type_width}( + ${data_in}, ${data_out}, ${batch_size}, ${num_channels}, ${spatial_size}); +""") \ No newline at end of file diff --git a/Deeploy/Targets/Generic/Templates/FloatGroupNormTemplate.py b/Deeploy/Targets/Generic/Templates/FloatGroupNormTemplate.py new file mode 100644 index 0000000000..9c42d8011c --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatGroupNormTemplate.py @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _GroupNormTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _GroupNormTemplate(""" +// Group Normalization (Name: ${nodeName}, Op: ${nodeOp}) +GroupNormalization_fp${type_width}_fp${type_width}( + ${data_in}, ${data_out}, ${scale}, ${bias}, + ${batch_size}, ${num_channels}, ${spatial}, ${num_groups}, ${epsilon}); +""") \ No newline at end of file diff --git a/Deeploy/Targets/Generic/Templates/FloatHardSigmoidTemplate.py b/Deeploy/Targets/Generic/Templates/FloatHardSigmoidTemplate.py new file mode 100644 index 0000000000..135f168c3f --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatHardSigmoidTemplate.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _hardSigmoidTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['size'] = int(np.prod(data_in.shape)) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _hardSigmoidTemplate(""" +// HardSigmoid (Name: ${nodeName}, Op: ${nodeOp}) +HardSigmoid_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${alpha}, ${beta}, ${size}); +""") diff --git a/Deeploy/Targets/Generic/Templates/FloatHardSwishTemplate.py b/Deeploy/Targets/Generic/Templates/FloatHardSwishTemplate.py new file mode 100644 index 0000000000..6ff5c11c77 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatHardSwishTemplate.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _hardSwishTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['size'] = int(np.prod(data_in.shape)) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _hardSwishTemplate(""" +// HardSwish (Name: ${nodeName}, Op: ${nodeOp}) +HardSwish_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${size}); +""") diff --git a/Deeploy/Targets/Generic/Templates/FloatInstanceNormTemplate.py b/Deeploy/Targets/Generic/Templates/FloatInstanceNormTemplate.py new file mode 100644 index 0000000000..efcfce5f86 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatInstanceNormTemplate.py @@ -0,0 +1,21 @@ +# SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _InstanceNormTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _InstanceNormTemplate(""" +// Instance Normalization (Name: ${nodeName}, Op: ${nodeOp}) +InstanceNormalization_fp${type_width}_fp${type_width}( + ${data_in}, ${data_out}, ${scale}, ${bias}, ${batch_size}, ${num_channels}, ${spatial}, ${epsilon}); +""") \ No newline at end of file diff --git a/Deeploy/Targets/Generic/Templates/FloatSigmoidTemplate.py b/Deeploy/Targets/Generic/Templates/FloatSigmoidTemplate.py new file mode 100644 index 0000000000..a25bf411e5 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatSigmoidTemplate.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _SigmoidTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['size'] = int(np.prod(data_in.shape)) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _SigmoidTemplate(""" +// Sigmoid (Name: ${nodeName}, Op: ${nodeOp}) +Sigmoid_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${size}); +""") diff --git a/Deeploy/Targets/Generic/Templates/FloatSubTemplate.py b/Deeploy/Targets/Generic/Templates/FloatSubTemplate.py new file mode 100644 index 0000000000..fcae7e1c0d --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatSubTemplate.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from Deeploy.DeeployTypes import NodeTemplate + +referenceTemplate = NodeTemplate(""" +// Add (Name: ${nodeName}, Op: ${nodeOp}) +BEGIN_SINGLE_CORE + for (uint32_t i=0;i<${size};i++){ + ${data_out}[i] = ${data_in_1}[i] - ${data_in_2}[i]; + } +END_SINGLE_CORE +""") diff --git a/Deeploy/Targets/Generic/Templates/FloatSwishTemplate.py b/Deeploy/Targets/Generic/Templates/FloatSwishTemplate.py new file mode 100644 index 0000000000..244e19ee0b --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatSwishTemplate.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _SigmoidTemplate(NodeTemplate): + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + operatorRepresentation['size'] = int(np.prod(data_in.shape)) + operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _SigmoidTemplate(""" +// Swish (Name: ${nodeName}, Op: ${nodeOp}) +Swish_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${alpha}, ${size}); +""") diff --git a/Deeploy/Targets/Generic/Templates/SubTemplate.py b/Deeploy/Targets/Generic/Templates/SubTemplate.py new file mode 100644 index 0000000000..e5fade91ef --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/SubTemplate.py @@ -0,0 +1,39 @@ +# SPDX-FileCopyrightText: 2021 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _SubTemplate(NodeTemplate): + + def alignToContext( + self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, OperatorRepresentation, list[str]]: + + data_in_1 = ctxt.lookup(operatorRepresentation['data_in_1']) + data_in_2 = ctxt.lookup(operatorRepresentation['data_in_2']) + data_out = ctxt.lookup(operatorRepresentation['data_out']) + + input_1_offset = 0 + if hasattr(data_in_1, "_signed") and hasattr(data_in_1, "nLevels"): + input_1_offset = -(data_in_1._signed == 0) * int(data_in_1.nLevels / 2) + input_2_offset = 0 + if hasattr(data_in_2, "_signed") and hasattr(data_in_2, "nLevels"): + input_2_offset = (data_in_2._signed == 0) * int(data_in_2.nLevels / 2) + output_offset = 0 + if hasattr(data_out, "_signed") and hasattr(data_out, "nLevels"): + output_offset = (data_out._signed == 0) * int(data_out.nLevels // 2) + + operatorRepresentation['offset'] = input_1_offset + input_2_offset + output_offset + + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _SubTemplate(""" +// Sub (Name: ${nodeName}, Op: ${nodeOp}) +BEGIN_SINGLE_CORE + for (uint32_t i = 0; i < ${size}; i++){ + ${data_out}[i] = ${data_in_1}[i] - ${data_in_2}[i] + ${offset}; + } +END_SINGLE_CORE +""") diff --git a/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_1D/inputs.npz b/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_1D/inputs.npz new file mode 100644 index 0000000000..ac58fc00e2 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_1D/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_1D/network.onnx b/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_1D/network.onnx new file mode 100644 index 0000000000..9472fe8a05 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_1D/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_1D/outputs.npz b/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_1D/outputs.npz new file mode 100644 index 0000000000..ca18db8983 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_1D/outputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_2D/inputs.npz b/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_2D/inputs.npz new file mode 100644 index 0000000000..b80b42275c Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_2D/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_2D/network.onnx b/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_2D/network.onnx new file mode 100644 index 0000000000..f69e84c010 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_2D/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_2D/outputs.npz b/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_2D/outputs.npz new file mode 100644 index 0000000000..1e6f505c5d Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/AveragePool/Regular_2D/outputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/Ceil/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Ceil/inputs.npz new file mode 100644 index 0000000000..ac58fc00e2 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Ceil/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/Ceil/network.onnx b/DeeployTest/Tests/Kernels/FP32/Ceil/network.onnx new file mode 100644 index 0000000000..d24a1981a0 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Ceil/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/FP32/Ceil/outputs.npz b/DeeployTest/Tests/Kernels/FP32/Ceil/outputs.npz new file mode 100644 index 0000000000..0911ac14bf Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Ceil/outputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/Clip/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Clip/inputs.npz new file mode 100644 index 0000000000..ac58fc00e2 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Clip/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/Clip/network.onnx b/DeeployTest/Tests/Kernels/FP32/Clip/network.onnx new file mode 100644 index 0000000000..e79b10d0a1 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Clip/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/FP32/Clip/outputs.npz b/DeeployTest/Tests/Kernels/FP32/Clip/outputs.npz new file mode 100644 index 0000000000..aba055ba03 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Clip/outputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/Exp/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Exp/inputs.npz new file mode 100644 index 0000000000..ac58fc00e2 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Exp/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/Exp/network.onnx b/DeeployTest/Tests/Kernels/FP32/Exp/network.onnx new file mode 100644 index 0000000000..fc64515614 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Exp/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/FP32/Exp/outputs.npz b/DeeployTest/Tests/Kernels/FP32/Exp/outputs.npz new file mode 100644 index 0000000000..8d57518ae0 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Exp/outputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/Floor/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Floor/inputs.npz new file mode 100644 index 0000000000..ac58fc00e2 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Floor/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/Floor/network.onnx b/DeeployTest/Tests/Kernels/FP32/Floor/network.onnx new file mode 100644 index 0000000000..d570c282eb Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Floor/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/FP32/Floor/outputs.npz b/DeeployTest/Tests/Kernels/FP32/Floor/outputs.npz new file mode 100644 index 0000000000..93c0cb3bd5 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Floor/outputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/GlobalAveragePool/inputs.npz b/DeeployTest/Tests/Kernels/FP32/GlobalAveragePool/inputs.npz new file mode 100644 index 0000000000..b80b42275c Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/GlobalAveragePool/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/GlobalAveragePool/network.onnx b/DeeployTest/Tests/Kernels/FP32/GlobalAveragePool/network.onnx new file mode 100644 index 0000000000..4c7238af40 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/GlobalAveragePool/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/FP32/GlobalAveragePool/outputs.npz b/DeeployTest/Tests/Kernels/FP32/GlobalAveragePool/outputs.npz new file mode 100644 index 0000000000..2b68d327d0 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/GlobalAveragePool/outputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/GlobalMaxPool/inputs.npz b/DeeployTest/Tests/Kernels/FP32/GlobalMaxPool/inputs.npz new file mode 100644 index 0000000000..b80b42275c Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/GlobalMaxPool/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/GlobalMaxPool/network.onnx b/DeeployTest/Tests/Kernels/FP32/GlobalMaxPool/network.onnx new file mode 100644 index 0000000000..76bf8f7c37 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/GlobalMaxPool/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/FP32/GlobalMaxPool/outputs.npz b/DeeployTest/Tests/Kernels/FP32/GlobalMaxPool/outputs.npz new file mode 100644 index 0000000000..5c74873cb5 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/GlobalMaxPool/outputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/GroupNorm/inputs.npz b/DeeployTest/Tests/Kernels/FP32/GroupNorm/inputs.npz new file mode 100644 index 0000000000..b80b42275c Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/GroupNorm/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/GroupNorm/network.onnx b/DeeployTest/Tests/Kernels/FP32/GroupNorm/network.onnx new file mode 100644 index 0000000000..be2ab5484c Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/GroupNorm/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/FP32/GroupNorm/outputs.npz b/DeeployTest/Tests/Kernels/FP32/GroupNorm/outputs.npz new file mode 100644 index 0000000000..c1d73d6d67 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/GroupNorm/outputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/HardSigmoid/inputs.npz b/DeeployTest/Tests/Kernels/FP32/HardSigmoid/inputs.npz new file mode 100644 index 0000000000..ac58fc00e2 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/HardSigmoid/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/HardSigmoid/network.onnx b/DeeployTest/Tests/Kernels/FP32/HardSigmoid/network.onnx new file mode 100644 index 0000000000..17b5354858 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/HardSigmoid/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/FP32/HardSigmoid/outputs.npz b/DeeployTest/Tests/Kernels/FP32/HardSigmoid/outputs.npz new file mode 100644 index 0000000000..2e63fd2da1 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/HardSigmoid/outputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/HardSwish/inputs.npz b/DeeployTest/Tests/Kernels/FP32/HardSwish/inputs.npz new file mode 100644 index 0000000000..ac58fc00e2 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/HardSwish/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/HardSwish/network.onnx b/DeeployTest/Tests/Kernels/FP32/HardSwish/network.onnx new file mode 100644 index 0000000000..281ddf23b0 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/HardSwish/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/FP32/HardSwish/outputs.npz b/DeeployTest/Tests/Kernels/FP32/HardSwish/outputs.npz new file mode 100644 index 0000000000..d46d07aefe Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/HardSwish/outputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/InstanceNorm/inputs.npz b/DeeployTest/Tests/Kernels/FP32/InstanceNorm/inputs.npz new file mode 100644 index 0000000000..b80b42275c Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/InstanceNorm/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/InstanceNorm/network.onnx b/DeeployTest/Tests/Kernels/FP32/InstanceNorm/network.onnx new file mode 100644 index 0000000000..c817bc0c30 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/InstanceNorm/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/FP32/InstanceNorm/outputs.npz b/DeeployTest/Tests/Kernels/FP32/InstanceNorm/outputs.npz new file mode 100644 index 0000000000..ace60623d0 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/InstanceNorm/outputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/Sigmoid/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Sigmoid/inputs.npz new file mode 100644 index 0000000000..ac58fc00e2 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Sigmoid/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/Sigmoid/network.onnx b/DeeployTest/Tests/Kernels/FP32/Sigmoid/network.onnx new file mode 100644 index 0000000000..be561ee8a8 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Sigmoid/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/FP32/Sigmoid/outputs.npz b/DeeployTest/Tests/Kernels/FP32/Sigmoid/outputs.npz new file mode 100644 index 0000000000..9bb1aebe67 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Sigmoid/outputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/Sub/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Sub/inputs.npz new file mode 100644 index 0000000000..c4bfb1f89b Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Sub/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/Sub/network.onnx b/DeeployTest/Tests/Kernels/FP32/Sub/network.onnx new file mode 100644 index 0000000000..b82f4c7c13 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Sub/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/FP32/Sub/outputs.npz b/DeeployTest/Tests/Kernels/FP32/Sub/outputs.npz new file mode 100644 index 0000000000..805378eb88 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Sub/outputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/Swish/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Swish/inputs.npz new file mode 100644 index 0000000000..ac58fc00e2 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Swish/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/Swish/network.onnx b/DeeployTest/Tests/Kernels/FP32/Swish/network.onnx new file mode 100644 index 0000000000..9b5251da35 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Swish/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/FP32/Swish/outputs.npz b/DeeployTest/Tests/Kernels/FP32/Swish/outputs.npz new file mode 100644 index 0000000000..cfd41c40cd Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Swish/outputs.npz differ diff --git a/DeeployTest/Tests/Kernels/Integer/Sub/inputs.npz b/DeeployTest/Tests/Kernels/Integer/Sub/inputs.npz new file mode 100644 index 0000000000..411fad498f Binary files /dev/null and b/DeeployTest/Tests/Kernels/Integer/Sub/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/Integer/Sub/network.onnx b/DeeployTest/Tests/Kernels/Integer/Sub/network.onnx new file mode 100644 index 0000000000..b82f4c7c13 Binary files /dev/null and b/DeeployTest/Tests/Kernels/Integer/Sub/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/Integer/Sub/outputs.npz b/DeeployTest/Tests/Kernels/Integer/Sub/outputs.npz new file mode 100644 index 0000000000..2b1dc905cc Binary files /dev/null and b/DeeployTest/Tests/Kernels/Integer/Sub/outputs.npz differ diff --git a/DeeployTest/test_generic_config.py b/DeeployTest/test_generic_config.py index b0d8c659ca..eaea3d6400 100644 --- a/DeeployTest/test_generic_config.py +++ b/DeeployTest/test_generic_config.py @@ -8,6 +8,10 @@ "Kernels/FP32/ReLU", "Kernels/FP32/Softmax/Regular", "Kernels/FP32/Add/Regular", + "Kernels/FP32/AveragePool/Regular_1D", + "Kernels/FP32/AveragePool/Regular_2D", + "Kernels/FP32/Ceil", + "Kernels/FP32/Clip", "Kernels/FP32/Conv/DW_2D_Bias", "Kernels/FP32/Conv/DW_2D_NoBias", "Kernels/FP32/Conv/DW_2D_ZeroValuedBias", @@ -15,7 +19,15 @@ "Kernels/FP32/Conv/Regular_2D_NoBias", "Kernels/FP32/Conv/Regular_2D_ZeroValuedBias", "Kernels/FP32/Div", + "Kernels/FP32/Exp", + "Kernels/FP32/Floor", "Kernels/FP32/GEMM/Regular", + "Kernels/FP32/GlobalAveragePool", + "Kernels/FP32/GlobalMaxPool", + "Kernels/FP32/GroupNorm", + "Kernels/FP32/HardSigmoid", + "Kernels/FP32/HardSwish", + "Kernels/FP32/InstanceNorm", "Kernels/FP32/MatMul", "Kernels/FP32/MaxPool/Regular_1D", "Kernels/FP32/MaxPool/Regular_2D", @@ -43,7 +55,10 @@ "Kernels/FP32/ReduceMean/NoKeepDims/Axis2", "Kernels/FP32/ReduceMean/NoKeepDims/ReduceMean_Add", "Kernels/FP32/Reshape/SkipConnection", + "Kernels/FP32/Sigmoid", "Kernels/FP32/Sqrt", + "Kernels/FP32/Sub", + "Kernels/FP32/Swish", "Kernels/FP32/Transpose", # Integer Kernels "Kernels/Integer/Softmax/Regular", @@ -63,6 +78,7 @@ "Kernels/Integer/ReduceMean", "Kernels/Integer/ReduceSum", "Kernels/Integer/Slice", + "Kernels/Integer/Sub", # Special test from TinyViT model layers "Models/TinyViT/5M/Layers/FP32/ReduceMean", # Mixed Precision / Quantization diff --git a/TargetLibraries/Generic/inc/DeeployBasicMath.h b/TargetLibraries/Generic/inc/DeeployBasicMath.h index 22081701a3..2023b9e725 100644 --- a/TargetLibraries/Generic/inc/DeeployBasicMath.h +++ b/TargetLibraries/Generic/inc/DeeployBasicMath.h @@ -32,14 +32,24 @@ #include "types.h" #include "utils.h" +#include "kernel/AveragePool.h" #include "kernel/BatchNorm.h" +#include "kernel/Ceil.h" +#include "kernel/Clip.h" #include "kernel/ConvTranspose1d_fp32.h" #include "kernel/Convolution.h" #include "kernel/DWConvolution.h" #include "kernel/Div.h" +#include "kernel/Exp.h" +#include "kernel/Floor.h" #include "kernel/GELU.h" #include "kernel/Gemm.h" -#include "kernel/Hardswish.h" +#include "kernel/GlobalAveragePool.h" +#include "kernel/GlobalMaxPool.h" +#include "kernel/GroupNorm.h" +#include "kernel/HardSigmoid.h" +#include "kernel/HardSwish.h" +#include "kernel/InstanceNorm.h" #include "kernel/Layernorm.h" #include "kernel/MatMul.h" #include "kernel/MaxPool.h" @@ -50,7 +60,9 @@ #include "kernel/RQHardswish.h" #include "kernel/Relu.h" #include "kernel/RequantShift.h" +#include "kernel/Sigmoid.h" #include "kernel/Softmax.h" #include "kernel/Sqrt.h" +#include "kernel/Swish.h" #endif //__DEEPLOY_BASIC_MATH_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/AveragePool.h b/TargetLibraries/Generic/inc/kernel/AveragePool.h new file mode 100644 index 0000000000..2e0c786ffc --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/AveragePool.h @@ -0,0 +1,29 @@ +/* + * SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_AVERAGEPOOL_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_AVERAGEPOOL_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/******************************************************************************/ +/* Average Pool */ +/******************************************************************************/ +void AveragePool2d_fp32_fp32(float32_t const *__restrict__ src, + float32_t *__restrict__ dst, uint32_t N, + uint32_t C, uint32_t H, uint32_t W, + uint32_t kernel_h, uint32_t kernel_w, + uint32_t stride_h, uint32_t stride_w, + uint32_t pad_top, uint32_t pad_left, + uint32_t pad_bottom, uint32_t pad_right); + +void AveragePool1d_fp32_fp32(float32_t const *__restrict__ src, + float32_t *__restrict__ dst, uint32_t N, + uint32_t C, uint32_t L, uint32_t kernel_len, + uint32_t stride, uint32_t pad_left, + uint32_t pad_right); + +#endif //__DEEPLOY_BASIC_MATH_AVERAGEPOOL_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/Ceil.h b/TargetLibraries/Generic/inc/kernel/Ceil.h new file mode 100644 index 0000000000..941b90c75d --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/Ceil.h @@ -0,0 +1,21 @@ +/* + * SPDX-FileCopyrightText: 2020 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_CEIL_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_CEIL_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/* + * element wise ceil operation + */ + +/******************************************************************************/ +/* Ceil */ +/******************************************************************************/ +void Ceil_fp32_fp32(float32_t *data_in, float32_t *data_out, int32_t size); + +#endif //__DEEPLOY_BASIC_MATH_CEIL_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/Clip.h b/TargetLibraries/Generic/inc/kernel/Clip.h new file mode 100644 index 0000000000..751c338c03 --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/Clip.h @@ -0,0 +1,22 @@ +/* + * SPDX-FileCopyrightText: 2020 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_CLIP_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_CLIP_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/* + * element wise clip operation + */ + +/******************************************************************************/ +/* Ceil */ +/******************************************************************************/ +void Clip_fp32_fp32(float32_t *data_in, float32_t *data_out, float32_t min_val, + float32_t max_val, int32_t size); + +#endif //__DEEPLOY_BASIC_MATH_CLIP_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/Exp.h b/TargetLibraries/Generic/inc/kernel/Exp.h new file mode 100644 index 0000000000..330a4c476c --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/Exp.h @@ -0,0 +1,21 @@ +/* + * SPDX-FileCopyrightText: 2020 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_EXP_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_EXP_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/* + * element wise exponential + */ + +/******************************************************************************/ +/* Exp */ +/******************************************************************************/ +void Exp_fp32_fp32(float32_t *data_in, float32_t *data_out, int32_t size); + +#endif //__DEEPLOY_BASIC_MATH_EXP_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/Floor.h b/TargetLibraries/Generic/inc/kernel/Floor.h new file mode 100644 index 0000000000..42ef3fd712 --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/Floor.h @@ -0,0 +1,21 @@ +/* + * SPDX-FileCopyrightText: 2020 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_FLOOR_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_FLOOR_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/* + * element wise floor operation + */ + +/******************************************************************************/ +/* Floor */ +/******************************************************************************/ +void Floor_fp32_fp32(float32_t *data_in, float32_t *data_out, int32_t size); + +#endif //__DEEPLOY_BASIC_MATH_FLOOR_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/GlobalAveragePool.h b/TargetLibraries/Generic/inc/kernel/GlobalAveragePool.h new file mode 100644 index 0000000000..a64484189e --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/GlobalAveragePool.h @@ -0,0 +1,19 @@ +/* + * SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_GLOBALAVERAGEPOOL_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_GLOBALAVERAGEPOOL_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/******************************************************************************/ +/* Average Pool */ +/******************************************************************************/ +void GlobalAveragePool_fp32_fp32(float32_t const *__restrict__ src, + float32_t *__restrict__ dst, uint32_t N, + uint32_t C, uint32_t spatial_size); + +#endif //__DEEPLOY_BASIC_MATH_GLOBALAVERAGEPOOL_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/GlobalMaxPool.h b/TargetLibraries/Generic/inc/kernel/GlobalMaxPool.h new file mode 100644 index 0000000000..030b26211e --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/GlobalMaxPool.h @@ -0,0 +1,19 @@ +/* + * SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_GLOBALAVERAGEPOOL_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_GLOBALAVERAGEPOOL_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/******************************************************************************/ +/* Average Pool */ +/******************************************************************************/ +void GlobalMaxPool_fp32_fp32(float32_t const *__restrict__ src, + float32_t *__restrict__ dst, uint32_t N, + uint32_t C, uint32_t spatial_size); + +#endif //__DEEPLOY_BASIC_MATH_GLOBALAVERAGEPOOL_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/GroupNorm.h b/TargetLibraries/Generic/inc/kernel/GroupNorm.h new file mode 100644 index 0000000000..02ca5a2e57 --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/GroupNorm.h @@ -0,0 +1,23 @@ +/* + * SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_GROUPNORM_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_GROUPNORM_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/******************************************************************************/ +/* Group Normalization */ +/******************************************************************************/ +void GroupNormalization_fp32_fp32(const float32_t *__restrict__ src, + float32_t *__restrict__ dst, + const float32_t *__restrict__ scale, + const float32_t *__restrict__ bias, + uint32_t batch_size, uint32_t num_channels, + uint32_t spatial, uint32_t num_groups, + float32_t epsilon); + +#endif //__DEEPLOY_BASIC_MATH_GROUPNORM_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/HardSigmoid.h b/TargetLibraries/Generic/inc/kernel/HardSigmoid.h new file mode 100644 index 0000000000..542689eb33 --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/HardSigmoid.h @@ -0,0 +1,22 @@ +/* + * SPDX-FileCopyrightText: 2020 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_HARDSIGMOID_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_HARDSIGMOID_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/* + * element wise hard-sigmoid + */ + +/******************************************************************************/ +/* HardSigmoid */ +/******************************************************************************/ +void HardSigmoid_fp32_fp32(float32_t *data_in, float32_t *data_out, + float32_t alpha, float32_t beta, int32_t size); + +#endif //__DEEPLOY_BASIC_MATH_HARDSIGMOID_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/Hardswish.h b/TargetLibraries/Generic/inc/kernel/HardSwish.h similarity index 69% rename from TargetLibraries/Generic/inc/kernel/Hardswish.h rename to TargetLibraries/Generic/inc/kernel/HardSwish.h index e0df42efbb..51e891622e 100644 --- a/TargetLibraries/Generic/inc/kernel/Hardswish.h +++ b/TargetLibraries/Generic/inc/kernel/HardSwish.h @@ -17,4 +17,10 @@ void iHardswish_s8_s32(int8_t *input, int32_t *output, int32_t size, int32_t one_over_six, int32_t three, int32_t six, int32_t input_offset); +/******************************************************************************/ +/* Hardswish (fp32) */ +/******************************************************************************/ + +void HardSwish_fp32_fp32(float32_t *data_in, float32_t *data_out, int32_t size); + #endif // __DEEPLOY_BASIC_MATH_HARDSWISH_KERNEL_HEADER_ \ No newline at end of file diff --git a/TargetLibraries/Generic/inc/kernel/InstanceNorm.h b/TargetLibraries/Generic/inc/kernel/InstanceNorm.h new file mode 100644 index 0000000000..dd68b0cadb --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/InstanceNorm.h @@ -0,0 +1,22 @@ +/* + * SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_INSTANCENORM_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_INSTANCENORM_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/******************************************************************************/ +/* Instance Normalization */ +/******************************************************************************/ +void InstanceNormalization_fp32_fp32(const float32_t *__restrict__ src, + float32_t *__restrict__ dst, + const float32_t *__restrict__ scale, + const float32_t *__restrict__ bias, + uint32_t batch_size, uint32_t num_channels, + uint32_t spatial, float32_t epsilon); + +#endif //__DEEPLOY_BASIC_MATH_INSTANCENORM_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/Sigmoid.h b/TargetLibraries/Generic/inc/kernel/Sigmoid.h new file mode 100644 index 0000000000..d9a960cab3 --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/Sigmoid.h @@ -0,0 +1,21 @@ +/* + * SPDX-FileCopyrightText: 2020 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_SIGMOID_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_SIGMOID_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/* + * element wise sigmoid + */ + +/******************************************************************************/ +/* Sigmoid */ +/******************************************************************************/ +void Sigmoid_fp32_fp32(float32_t *data_in, float32_t *data_out, int32_t size); + +#endif //__DEEPLOY_BASIC_MATH_SIGMOID_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/Swish.h b/TargetLibraries/Generic/inc/kernel/Swish.h new file mode 100644 index 0000000000..326f7822c8 --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/Swish.h @@ -0,0 +1,22 @@ +/* + * SPDX-FileCopyrightText: 2020 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_SWISH_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_SWISH_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +/* + * element wise swish + */ + +/******************************************************************************/ +/* Swish */ +/******************************************************************************/ +void Swish_fp32_fp32(float32_t *data_in, float32_t *data_out, float alpha, + int32_t size); + +#endif //__DEEPLOY_BASIC_MATH_SWISH_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/src/AveragePool_fp32.c b/TargetLibraries/Generic/src/AveragePool_fp32.c new file mode 100644 index 0000000000..6c17a8a49e --- /dev/null +++ b/TargetLibraries/Generic/src/AveragePool_fp32.c @@ -0,0 +1,89 @@ +/* + * SPDX-FileCopyrightText: 2022 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" + +void AveragePool2d_fp32_fp32(float32_t const *__restrict__ src, + float32_t *__restrict__ dst, uint32_t N, + uint32_t C, uint32_t H, uint32_t W, + uint32_t kernel_h, uint32_t kernel_w, + uint32_t stride_h, uint32_t stride_w, + uint32_t pad_top, uint32_t pad_left, + uint32_t pad_bottom, uint32_t pad_right) { + + if (N == 0 || C == 0 || stride_h == 0 || stride_w == 0 || + (H + pad_top + pad_bottom) < kernel_h || + (W + pad_left + pad_right) < kernel_w) { + return; + } + + uint32_t H_out = (H + pad_top + pad_bottom - kernel_h) / stride_h + 1; + uint32_t W_out = (W + pad_left + pad_right - kernel_w) / stride_w + 1; + + for (uint32_t n = 0; n < N; ++n) { + for (uint32_t c = 0; c < C; ++c) { + for (uint32_t h_out = 0; h_out < H_out; h_out++) { + for (uint32_t w_out = 0; w_out < W_out; w_out++) { + + float32_t sum = 0.0f; + uint32_t count = 0; + + for (uint32_t kh = 0; kh < kernel_h; kh++) { + for (uint32_t kw = 0; kw < kernel_w; kw++) { + + int32_t h_in = (int32_t)(h_out * stride_h + kh) - pad_top; + int32_t w_in = (int32_t)(w_out * stride_w + kw) - pad_left; + + if (h_in >= 0 && h_in < (int32_t)H && w_in >= 0 && + w_in < (int32_t)W) { + sum += src[((n * C + c) * H + h_in) * W + w_in]; + count++; + } + } + } + uint32_t idx = ((n * C + c) * H_out + h_out) * W_out + w_out; + dst[idx] = sum / (float32_t)count; + } + } + } + } +} + +void AveragePool1d_fp32_fp32(float32_t const *__restrict__ src, + float32_t *__restrict__ dst, uint32_t N, + uint32_t C, uint32_t L, uint32_t kernel_len, + uint32_t stride, uint32_t pad_left, + uint32_t pad_right) { + + if (N == 0 || C == 0 || stride == 0 || + (L + pad_left + pad_right) < kernel_len) { + return; + } + + uint32_t L_out = (L + pad_left + pad_right - kernel_len) / stride + 1; + + for (uint32_t n = 0; n < N; ++n) { + for (uint32_t c = 0; c < C; ++c) { + for (uint32_t l_out = 0; l_out < L_out; l_out++) { + + float32_t sum = 0.0f; + uint32_t count = 0; + + for (uint32_t k = 0; k < kernel_len; k++) { + + int32_t l_in = (int32_t)(l_out * stride + k) - (int32_t)pad_left; + + if (l_in >= 0 && l_in < (int32_t)L) { + sum += src[(n * C + c) * L + l_in]; + count++; + } + } + uint32_t i = (n * C + c) * L_out + l_out; + dst[i] = (count == 0) ? 0.0f : (sum / (float32_t)count); + } + } + } +} \ No newline at end of file diff --git a/TargetLibraries/Generic/src/Ceil_fp32.c b/TargetLibraries/Generic/src/Ceil_fp32.c new file mode 100644 index 0000000000..fe73e20637 --- /dev/null +++ b/TargetLibraries/Generic/src/Ceil_fp32.c @@ -0,0 +1,14 @@ +/* + * SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" +#include + +void Ceil_fp32_fp32(float32_t *data_in, float32_t *data_out, int32_t size) { + for (int i = 0; i < size; i++) { + data_out[i] = ceilf(data_in[i]); + } +} diff --git a/TargetLibraries/Generic/src/Clip_fp32.c b/TargetLibraries/Generic/src/Clip_fp32.c new file mode 100644 index 0000000000..092fbf7b60 --- /dev/null +++ b/TargetLibraries/Generic/src/Clip_fp32.c @@ -0,0 +1,15 @@ +/* + * SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" +#include + +void Clip_fp32_fp32(float32_t *data_in, float32_t *data_out, float32_t min_val, + float32_t max_val, int32_t size) { + for (int i = 0; i < size; i++) { + data_out[i] = fmaxf(min_val, fminf(max_val, data_in[i])); + } +} diff --git a/TargetLibraries/Generic/src/Exp_fp32.c b/TargetLibraries/Generic/src/Exp_fp32.c new file mode 100644 index 0000000000..6dfdeb52db --- /dev/null +++ b/TargetLibraries/Generic/src/Exp_fp32.c @@ -0,0 +1,14 @@ +/* + * SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" +#include + +void Exp_fp32_fp32(float32_t *data_in, float32_t *data_out, int32_t size) { + for (int i = 0; i < size; i++) { + data_out[i] = expf(data_in[i]); + } +} diff --git a/TargetLibraries/Generic/src/Floor_fp32.c b/TargetLibraries/Generic/src/Floor_fp32.c new file mode 100644 index 0000000000..2618e516fa --- /dev/null +++ b/TargetLibraries/Generic/src/Floor_fp32.c @@ -0,0 +1,14 @@ +/* + * SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" +#include + +void Floor_fp32_fp32(float32_t *data_in, float32_t *data_out, int32_t size) { + for (int i = 0; i < size; i++) { + data_out[i] = floorf(data_in[i]); + } +} diff --git a/TargetLibraries/Generic/src/GlobalAveragePool_fp32.c b/TargetLibraries/Generic/src/GlobalAveragePool_fp32.c new file mode 100644 index 0000000000..72c2c08aa0 --- /dev/null +++ b/TargetLibraries/Generic/src/GlobalAveragePool_fp32.c @@ -0,0 +1,28 @@ +/* + * SPDX-FileCopyrightText: 2022 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" + +void GlobalAveragePool_fp32_fp32(float32_t const *__restrict__ src, + float32_t *__restrict__ dst, uint32_t N, + uint32_t C, uint32_t spatial_size) { + + if (spatial_size == 0) { + return; // invalid shape for average pooling; avoid divide-by-zero + } + for (uint32_t n = 0; n < N; ++n) { + for (uint32_t c = 0; c < C; ++c) { + + float32_t sum = 0.0f; + const float32_t *x = src + (n * C + c) * spatial_size; + + for (uint32_t i = 0; i < spatial_size; ++i) { + sum += x[i]; + } + dst[n * C + c] = sum / spatial_size; + } + } +} \ No newline at end of file diff --git a/TargetLibraries/Generic/src/GlobalMaxPool_fp32.c b/TargetLibraries/Generic/src/GlobalMaxPool_fp32.c new file mode 100644 index 0000000000..db4fddac43 --- /dev/null +++ b/TargetLibraries/Generic/src/GlobalMaxPool_fp32.c @@ -0,0 +1,29 @@ +/* + * SPDX-FileCopyrightText: 2022 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" + +void GlobalMaxPool_fp32_fp32(float32_t const *__restrict__ src, + float32_t *__restrict__ dst, uint32_t N, + uint32_t C, uint32_t spatial_size) { + + for (uint32_t n = 0; n < N; n++) { + for (uint32_t c = 0; c < C; c++) { + + float32_t sum = 0.0f; + const float32_t *x = src + (n * C + c) * spatial_size; + + float32_t max = x[0]; + for (uint32_t i = 1; i < spatial_size; i++) { + if (x[i] > max) { + max = x[i]; + } + } + + dst[n * C + c] = max; + } + } +} \ No newline at end of file diff --git a/TargetLibraries/Generic/src/GroupNormalization_fp32.c b/TargetLibraries/Generic/src/GroupNormalization_fp32.c new file mode 100644 index 0000000000..24fbf66a72 --- /dev/null +++ b/TargetLibraries/Generic/src/GroupNormalization_fp32.c @@ -0,0 +1,64 @@ +/* + * SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" +#include + +void GroupNormalization_fp32_fp32( + const float32_t *__restrict__ src, float32_t *__restrict__ dst, + const float32_t *__restrict__ scale, const float32_t *__restrict__ bias, + uint32_t batch_size, uint32_t num_channels, + uint32_t spatial, // spatial dimension (L or H*W or D*H*W, etc.) + uint32_t num_groups, float32_t epsilon) { + + if (num_groups == 0 || spatial == 0 || (num_channels % num_groups) != 0) { + return; + } + uint32_t channels_per_group = num_channels / num_groups; + uint32_t group_elements = channels_per_group * spatial; + if (group_elements == 0) { + return; + } + uint32_t slice = num_channels * spatial; // elements per batch + + for (uint32_t n = 0; n < batch_size; ++n) { + for (uint32_t g = 0; g < num_groups; ++g) { + uint32_t group_offset = n * slice + g * group_elements; + const float32_t *x_group = src + group_offset; + + /* --- mean --- */ + float64_t sum = 0.0; + for (uint32_t i = 0; i < group_elements; ++i) { + sum += x_group[i]; + } + float64_t mean = sum / (float32_t)group_elements; + + /* --- variance --- */ + float64_t var = 0.0; + for (uint32_t i = 0; i < group_elements; ++i) { + float64_t d = (float64_t)x_group[i] - mean; + var += d * d; + } + var /= (float64_t)group_elements; + + /* --- normalize + affine --- */ + float32_t inv_std = (float32_t)(1.0 / sqrt(var + (float64_t)epsilon)); + float32_t m = (float32_t)mean; + + for (uint32_t lc = 0; lc < channels_per_group; ++lc) { + const float32_t *x_channel = x_group + lc * spatial; + float32_t *y_channel = dst + group_offset + lc * spatial; + uint32_t c = g * channels_per_group + lc; // global channel + float32_t s = scale[c]; + float32_t b = bias[c]; + + for (uint32_t i = 0; i < spatial; ++i) { + y_channel[i] = s * (x_channel[i] - m) * inv_std + b; + } + } + } + } +} diff --git a/TargetLibraries/Generic/src/HardSigmoid_fp32.c b/TargetLibraries/Generic/src/HardSigmoid_fp32.c new file mode 100644 index 0000000000..a436e3f1d8 --- /dev/null +++ b/TargetLibraries/Generic/src/HardSigmoid_fp32.c @@ -0,0 +1,15 @@ +/* + * SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" +#include + +void HardSigmoid_fp32_fp32(float32_t *data_in, float32_t *data_out, + float32_t alpha, float32_t beta, int32_t size) { + for (int i = 0; i < size; i++) { + data_out[i] = fmaxf(0, fminf(1, alpha * data_in[i] + beta)); + } +} diff --git a/TargetLibraries/Generic/src/HardSwish_fp32.c b/TargetLibraries/Generic/src/HardSwish_fp32.c new file mode 100644 index 0000000000..41e4f424b4 --- /dev/null +++ b/TargetLibraries/Generic/src/HardSwish_fp32.c @@ -0,0 +1,16 @@ +/* + * SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" +#include + +void HardSwish_fp32_fp32(float32_t *data_in, float32_t *data_out, + int32_t size) { + for (int i = 0; i < size; i++) { + float32_t x = data_in[i]; + data_out[i] = x * fmaxf(0, fminf(1, x / 6 + 0.5)); + } +} diff --git a/TargetLibraries/Generic/src/InstanceNormalization_fp32.c b/TargetLibraries/Generic/src/InstanceNormalization_fp32.c new file mode 100644 index 0000000000..23405dc11d --- /dev/null +++ b/TargetLibraries/Generic/src/InstanceNormalization_fp32.c @@ -0,0 +1,54 @@ +/* + * SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" +#include + +void InstanceNormalization_fp32_fp32( + const float32_t *__restrict__ src, float32_t *__restrict__ dst, + const float32_t *__restrict__ scale, const float32_t *__restrict__ bias, + uint32_t batch_size, uint32_t num_channels, + uint32_t spatial, // spatial dimension (L or H*W or D*H*W, etc.) + float32_t epsilon) { + + if (spatial == 0) { + return; + } + + uint32_t slice = num_channels * spatial; // elements per batch + + for (uint32_t n = 0; n < batch_size; ++n) { + for (uint32_t c = 0; c < num_channels; ++c) { + uint32_t channel_offset = n * slice + c * spatial; + const float32_t *x = src + channel_offset; + float32_t *y = dst + channel_offset; + + /* --- mean --- */ + float64_t sum = 0.0; + for (uint32_t i = 0; i < spatial; ++i) + sum += x[i]; + float64_t mean = sum / (float32_t)spatial; + + /* --- variance --- */ + float64_t var = 0.0; + for (uint32_t i = 0; i < spatial; ++i) { + float64_t d = (float64_t)x[i] - mean; + var += d * d; + } + var /= (float64_t)spatial; + + /* --- normalize + affine --- */ + float32_t inv_std = (float32_t)(1.0 / sqrt(var + (float64_t)epsilon)); + float32_t g = scale[c]; + float32_t b = bias[c]; + float32_t m = (float32_t)mean; + + for (size_t i = 0; i < spatial; ++i) { + y[i] = g * (x[i] - m) * inv_std + b; + } + } + } +} diff --git a/TargetLibraries/Generic/src/Sigmoid_fp32.c b/TargetLibraries/Generic/src/Sigmoid_fp32.c new file mode 100644 index 0000000000..1c98bdfc6f --- /dev/null +++ b/TargetLibraries/Generic/src/Sigmoid_fp32.c @@ -0,0 +1,14 @@ +/* + * SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" +#include + +void Sigmoid_fp32_fp32(float32_t *data_in, float32_t *data_out, int32_t size) { + for (int i = 0; i < size; i++) { + data_out[i] = 1 / (1 + expf(-data_in[i])); + } +} diff --git a/TargetLibraries/Generic/src/Swish_fp32.c b/TargetLibraries/Generic/src/Swish_fp32.c new file mode 100644 index 0000000000..5447de4c6a --- /dev/null +++ b/TargetLibraries/Generic/src/Swish_fp32.c @@ -0,0 +1,16 @@ +/* + * SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" +#include + +void Swish_fp32_fp32(float32_t *data_in, float32_t *data_out, float alpha, + int32_t size) { + for (int i = 0; i < size; i++) { + float32_t x = data_in[i]; + data_out[i] = x / (1 + expf(-alpha * x)); + } +}