From 5e18fb14e74601976b976cfe0da4a259e1585248 Mon Sep 17 00:00:00 2001 From: "Doronin, Maksim" Date: Fri, 27 Feb 2026 11:02:05 +0000 Subject: [PATCH 1/3] Revert Adding Compress Convolution as part of VF fusion --- .../interfaces/scf/scf_tiling_interfaces.hpp | 2 - .../dialect/VPU/interfaces/scf_tiling_ops.cpp | 1 - .../VPU/IR/ops/nce_compress_convolution.cpp | 27 -- .../vpux/compiler/dialect/VPU/ops/dpu.td | 4 +- .../VPU/passes/apply_tiling_scf_40XX+.mlir | 142 ---------- .../VPU/passes/scf_vertical_fusion_40XX+.mlir | 244 ------------------ 6 files changed, 1 insertion(+), 419 deletions(-) diff --git a/src/vpux_compiler/include/vpux/compiler/dialect/VPU/interfaces/scf/scf_tiling_interfaces.hpp b/src/vpux_compiler/include/vpux/compiler/dialect/VPU/interfaces/scf/scf_tiling_interfaces.hpp index d27411f807..9400da137b 100644 --- a/src/vpux_compiler/include/vpux/compiler/dialect/VPU/interfaces/scf/scf_tiling_interfaces.hpp +++ b/src/vpux_compiler/include/vpux/compiler/dialect/VPU/interfaces/scf/scf_tiling_interfaces.hpp @@ -502,8 +502,6 @@ class SCFTilingConvModelOp : public SCFTilingCommonModelOp {}; -class SCFCompressConvOpModel : public SCFTilingConvModelOp {}; - class SCFTilingDepthConvModelOp : public SCFTilingConvModelOp { public: SCFTilingInfo backInferSCFTileInfo(mlir::Operation* operation, mlir::OpBuilder& builder, diff --git a/src/vpux_compiler/src/NPU40XX/dialect/VPU/interfaces/scf_tiling_ops.cpp b/src/vpux_compiler/src/NPU40XX/dialect/VPU/interfaces/scf_tiling_ops.cpp index 6eced0936f..26d9d4bdcd 100644 --- a/src/vpux_compiler/src/NPU40XX/dialect/VPU/interfaces/scf_tiling_ops.cpp +++ b/src/vpux_compiler/src/NPU40XX/dialect/VPU/interfaces/scf_tiling_ops.cpp @@ -14,7 +14,6 @@ void vpux::VPU::arch40xx::registerSCFTilingOpsInterfaces(mlir::DialectRegistry& VPU::NCEAveragePoolOp::attachInterface(*ctx); VPU::NCEMaxPoolOp::attachInterface(*ctx); VPU::NCEConvolutionOp::attachInterface(*ctx); - VPU::NCECompressConvolutionOp::attachInterface(*ctx); VPU::NCEDepthConvolutionOp::attachInterface(*ctx); VPU::NCEPermuteOp::attachInterface(*ctx); diff --git a/src/vpux_compiler/src/dialect/VPU/IR/ops/nce_compress_convolution.cpp b/src/vpux_compiler/src/dialect/VPU/IR/ops/nce_compress_convolution.cpp index 24ae504b07..b2f69507eb 100644 --- a/src/vpux_compiler/src/dialect/VPU/IR/ops/nce_compress_convolution.cpp +++ b/src/vpux_compiler/src/dialect/VPU/IR/ops/nce_compress_convolution.cpp @@ -426,30 +426,3 @@ vpux::VPU::SparsitySupport vpux::VPU::NCECompressConvolutionOp::sparsitySupport( } return VPU::SparsitySupport::SPARSE_OUTPUTS & excludeMode; } - -mlir::LogicalResult vpux::VPU::NCECompressConvolutionOp::reifyResultShapes( - mlir::OpBuilder& builder, mlir::ReifiedRankedShapedTypeDims& reifiedReturnShapes) { - // Parse attributes - const auto strides = parseIntArrayAttr(getStrides()); - - const auto padTop = getPad().getTop().getValue().getSExtValue(); - const auto padBottom = getPad().getBottom().getValue().getSExtValue(); - const auto padLeft = getPad().getLeft().getValue().getSExtValue(); - const auto padRight = getPad().getRight().getValue().getSExtValue(); - - const auto dataPaddingAbove = SmallVector({padTop, padLeft}); - const auto dataPaddingBelow = SmallVector({padBottom, padRight}); - - const auto rawFilterShape = Shape(parseIntArrayAttr(getRawFilterShape())); - SmallVector kernelSize{rawFilterShape[Dims4D::Filter::KY], rawFilterShape[Dims4D::Filter::KX]}; - - // Compute output shape using utility - auto outShape = reifyConvPoolTensors(builder, getInput(), getOutput(), getFilter(), kernelSize, strides, - dataPaddingAbove, dataPaddingBelow, getLoc()); - if (mlir::failed(outShape)) { - return outShape; - } - - reifiedReturnShapes.emplace_back(std::move(outShape.value())); - return mlir::success(); -} diff --git a/src/vpux_compiler/tblgen/vpux/compiler/dialect/VPU/ops/dpu.td b/src/vpux_compiler/tblgen/vpux/compiler/dialect/VPU/ops/dpu.td index 0d9299f52c..63d86dccaf 100644 --- a/src/vpux_compiler/tblgen/vpux/compiler/dialect/VPU/ops/dpu.td +++ b/src/vpux_compiler/tblgen/vpux/compiler/dialect/VPU/ops/dpu.td @@ -425,9 +425,7 @@ def VPU_NCECompressConvolutionOp : "doesLayerFitIntoCMX", "doesLayerChangeOutputAlignmentFitIntoCMX", "getDistributedTypeForOpOperand"]>, - DeclareOpInterfaceMethods, - DeclareOpInterfaceMethods, - DeclareOpInterfaceMethods + DeclareOpInterfaceMethods ] > { let summary = "NCE version of Compressed Convolution layer"; diff --git a/tests/lit/NPU/dialect/VPU/passes/apply_tiling_scf_40XX+.mlir b/tests/lit/NPU/dialect/VPU/passes/apply_tiling_scf_40XX+.mlir index 53efde9e36..145f0370f6 100644 --- a/tests/lit/NPU/dialect/VPU/passes/apply_tiling_scf_40XX+.mlir +++ b/tests/lit/NPU/dialect/VPU/passes/apply_tiling_scf_40XX+.mlir @@ -1166,145 +1166,3 @@ func.func @ApplyTilingD2SPadded( //CHECK: return [[H_LOOP]] : tensor<1x16x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 16, 1080, 1920]> : tensor<4xsi64>, order = #NHWC}> } - -// ----- - -//CHECK: #[[$MAP:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 11)> - -#NHWC = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3, d1)> - -!dynInputType = tensor<1x4x800x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> -!dynOutputType = tensor<1x32x800x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> - -// CHECK-LABEL: @NoPaddingCompressCONV_W_DynamicInput -// CHECK-SAME: [[INPUT:%arg[0-9]]]: tensor<1x4x800x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> -// CHECK-SAME: [[WEIGHTS:%arg[0-9]]]: tensor<32x4x1x1xf16, {order = #NHWC}> -// CHECK-SAME: [[WEIGHTS_TABLE:%arg[0-9]]]: tensor<32x1x1x4xsi32> -func.func @NoPaddingCompressCONV_W_DynamicInput( - %arg0: !dynInputType, - %arg1: tensor<32x4x1x1xf16, {order = #NHWC}>, - %arg2: tensor<32x1x1x4xsi32> - ) -> !dynOutputType { - %1 = VPU.NCE.CompressConvolution(%arg0, %arg1, %arg2) { - pad = #VPU.Padding< - left = 0 : i64, - right = 0 : i64, - top = 0 : i64, - bottom = 0 : i64 - >, - ppe = #VPU.PPEInt< - mode = , - clamp_low = -2147483648 : i64, - clamp_high = 2147483647 : i64, - lrelu_mult = 1 : i64, - lrelu_shift = 0 : i64, - fp_prelu_alpha = 1.000000e+00 : f64 - >, - rawFilterShape = [32, 4, 1, 1], - strides = [1, 1], - tilingStrategy = [1, 1, 1, 117], - cm_sp_pattern = 0 - } : !dynInputType, tensor<32x4x1x1xf16, {order = #NHWC}>, tensor<32x1x1x4xsi32> -> !dynOutputType - - //CHECK-DAG: [[DIM_VALUE_0:%.+]] = arith.constant 3 : index - //CHECK-DAG: [[DIM_0:%.+]] = tensor.dim [[INPUT]], [[DIM_VALUE_0]] : tensor<1x4x800x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> - //CHECK-DAG: [[C0:%.+]] = arith.constant 0 : index - //CHECK-DAG: [[C11:%.+]] = arith.constant 11 : index - //CHECK-DAG: [[LOOP_OUTPUT:%.+]] = tensor.empty([[DIM_0]]) : tensor<1x32x800x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> - - //CHECK: [[LOOP:%.+]] = scf.for - //CHECK-SAME: [[LOOP_ITER:%arg[0-9]]] = [[C0]] to [[DIM_0]] step [[C11]] - //CHECK-SAME: iter_args([[LOOP_OUT:%arg[0-9]]] = [[LOOP_OUTPUT]]) -> (tensor<1x32x800x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 800, 1280]> : tensor<4xsi64>, order = #NHWC}>) { - - //CHECK: [[SIZE:%.+]] = affine.min #[[$MAP]]([[LOOP_ITER]])[[[DIM_0]]] - //CHECK: [[SLICE:%.+]] = tensor.extract_slice [[INPUT]][0, 0, 0, [[LOOP_ITER]]] [1, 4, 800, [[SIZE]]] [1, 1, 1, 1] - //CHECK-SAME: : tensor<1x4x800x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> to tensor<1x4x800x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 800, 11]> : tensor<4xsi64>, order = #NHWC}> - //CHECK: [[COMPRESS_CONV:%.+]] = VPU.NCE.CompressConvolution([[SLICE]], [[WEIGHTS]], [[WEIGHTS_TABLE]]) - //CHECK-SAME: {cm_sp_pattern = 0 : i64, pad = #VPU.Padding - //CHECK-SAME: , ppe = #VPU.PPEInt, clamp_low = -2147483648 : i64, clamp_high = 2147483647 : i64, lrelu_mult = 1 : i64, lrelu_shift = 0 : i64, fp_prelu_alpha = 1.000000e+00 : f64>, rawFilterShape = [32, 4, 1, 1], strides = [1, 1], tiling_loop_index = 0 : i64} -> tensor<1x32x800x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 800, 11]> : tensor<4xsi64>, order = #NHWC}> - - //CHECK: [[INSERT:%.+]] = tensor.insert_slice [[COMPRESS_CONV]] into [[LOOP_OUT]][0, 0, 0, [[LOOP_ITER]]] [1, 32, 800, [[SIZE]]] [1, 1, 1, 1] : tensor<1x32x800x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 800, 11]> : tensor<4xsi64>, order = #NHWC}> into tensor<1x32x800x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> - //CHECK: scf.yield [[INSERT]] : tensor<1x32x800x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> - - return %1 : !dynOutputType - - //CHECK: return [[LOOP]] : tensor<1x32x800x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> -} - -// ----- - -//CHECK: #[[$MAP:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 400)> -//CHECK: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 11)> - -#NHWC = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3, d1)> - -!dynInputType = tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> -!dynOutputType = tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> - -// CHECK-LABEL: @NoPaddingCompressCONV_HW_DynamicInput -// CHECK-SAME: [[INPUT:%arg[0-9]]]: tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> -// CHECK-SAME: [[WEIGHTS:%arg[0-9]]]: tensor<32x4x1x1xf16, {order = #NHWC}> -// CHECK-SAME: [[WEIGHTS_TABLE:%arg[0-9]]]: tensor<32x1x1x4xsi32> -func.func @NoPaddingCompressCONV_HW_DynamicInput( - %arg0: !dynInputType, - %arg1: tensor<32x4x1x1xf16, {order = #NHWC}>, - %arg2: tensor<32x1x1x4xsi32> - ) -> !dynOutputType { - %1 = VPU.NCE.CompressConvolution(%arg0, %arg1, %arg2) { - pad = #VPU.Padding< - left = 0 : i64, - right = 0 : i64, - top = 0 : i64, - bottom = 0 : i64 - >, - ppe = #VPU.PPEInt< - mode = , - clamp_low = -2147483648 : i64, - clamp_high = 2147483647 : i64, - lrelu_mult = 1 : i64, - lrelu_shift = 0 : i64, - fp_prelu_alpha = 1.000000e+00 : f64 - >, - rawFilterShape = [32, 4, 1, 1], - strides = [1, 1], - tilingStrategy = [1, 1, 2, 117], - cm_sp_pattern = 0 - } : !dynInputType, tensor<32x4x1x1xf16, {order = #NHWC}>, tensor<32x1x1x4xsi32> -> !dynOutputType - - //CHECK-DAG: [[LOOP_BEGIN:%.+]] = arith.constant 0 : index - - //CHECK-DAG: [[DIM_VALUE_H_1:%.+]] = arith.constant 2 : index - //CHECK-DAG: [[LOOP_END_H:%.+]] = tensor.dim [[INPUT]], [[DIM_VALUE_H_1]] : tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> - - //CHECK-DAG: [[DIM_VALUE_W_1:%.+]] = arith.constant 3 : index - //CHECK-DAG: [[LOOP_END_W:%.+]] = tensor.dim [[INPUT]], [[DIM_VALUE_W_1]] : tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> - - //CHECK-DAG: [[LOOP_STEP_H:%.+]] = arith.constant 400 : index - - //CHECK-DAG: [[LOOP_STEP_W:%.+]] = arith.constant 11 : index - - //CHECK-DAG: [[LOOP_OUTPUT:%.+]] = tensor.empty([[LOOP_END_H]], [[LOOP_END_W]]) : tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> - - //CHECK: [[LOOP_H:%.+]] = scf.for - //CHECK-SAME: [[LOOP_ITER_H:%arg[0-9]]] = [[LOOP_BEGIN]] to [[LOOP_END_H]] step [[LOOP_STEP_H]] - //CHECK-SAME: iter_args([[LOOP_OUT_H:%arg[0-9]]] = [[LOOP_OUTPUT]]) -> (tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 800, 1280]> : tensor<4xsi64>, order = #NHWC}>) { - - - //CHECK: [[LOOP_W:%.+]] = scf.for - //CHECK-SAME: [[LOOP_ITER_W:%arg[0-9]]] = [[LOOP_BEGIN]] to [[LOOP_END_W]] step [[LOOP_STEP_W]] - //CHECK-SAME: iter_args([[LOOP_OUT:%arg[0-9]]] = [[LOOP_OUT_H]]) -> (tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 800, 1280]> : tensor<4xsi64>, order = #NHWC}>) { - - //CHECK: [[SIZE_H:%.+]] = affine.min #[[$MAP]]([[LOOP_ITER_H]])[[[LOOP_END_H]]] - //CHECK: [[SIZE_W:%.+]] = affine.min #[[$MAP1]]([[LOOP_ITER_W]])[[[LOOP_END_W]]] - - //CHECK: [[SLICE:%.+]] = tensor.extract_slice [[INPUT]][0, 0, [[LOOP_ITER_H]], [[LOOP_ITER_W]]] [1, 4, [[SIZE_H]], [[SIZE_W]]] [1, 1, 1, 1] - //CHECK-SAME: : tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> to tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 400, 11]> : tensor<4xsi64>, order = #NHWC}> - //CHECK: [[COMPRESS_CONV:%.+]] = VPU.NCE.CompressConvolution([[SLICE]], [[WEIGHTS]], [[WEIGHTS_TABLE]]) - //CHECK-SAME: {cm_sp_pattern = 0 : i64, pad = #VPU.Padding - //CHECK-SAME: , ppe = #VPU.PPEInt, clamp_low = -2147483648 : i64, clamp_high = 2147483647 : i64, lrelu_mult = 1 : i64, lrelu_shift = 0 : i64, fp_prelu_alpha = 1.000000e+00 : f64>, rawFilterShape = [32, 4, 1, 1], strides = [1, 1], tiling_loop_index = 0 : i64} -> tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 400, 11]> : tensor<4xsi64>, order = #NHWC}> - - //CHECK: [[INSERT:%.+]] = tensor.insert_slice [[COMPRESS_CONV]] into [[LOOP_OUT]][0, 0, [[LOOP_ITER_H]], [[LOOP_ITER_W]]] [1, 32, [[SIZE_H]], [[SIZE_W]]] [1, 1, 1, 1] : tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 400, 11]> : tensor<4xsi64>, order = #NHWC}> into tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> - //CHECK: scf.yield [[INSERT]] : tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 800, 1280]> : tensor<4xsi64>, order = #NHWC}> - - return %1 : !dynOutputType -} diff --git a/tests/lit/NPU/dialect/VPU/passes/scf_vertical_fusion_40XX+.mlir b/tests/lit/NPU/dialect/VPU/passes/scf_vertical_fusion_40XX+.mlir index 559198b229..0caff9b889 100644 --- a/tests/lit/NPU/dialect/VPU/passes/scf_vertical_fusion_40XX+.mlir +++ b/tests/lit/NPU/dialect/VPU/passes/scf_vertical_fusion_40XX+.mlir @@ -1470,247 +1470,3 @@ func.func @PermuteEltwiseFusion(%arg0: tensor<1x16x?x?xf16, {bounds = #const.Opa // CHECK: scf.yield [[LOOP_W]] // CHECK: return [[LOOP_H]] : tensor<1x16x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 16, 1280, 1280]> : tensor<4xsi64>, order = #NHWC}> } - -// ----- - -config.Resources 3 of @NCE at 1.700000e+03 MHz { - config.MemoryResource 1326182 bytes of @CMX_NN_FragmentationAware - config.MemoryResource 1473536 bytes of @CMX_NN {config.bandwidth = 64 : i64, config.derateFactor = 1.000000e+00 : f64} - config.ExecutorResource 2 of @SHAVE_ACT - config.ExecutorResource 1 of @DPU -} - -#NHWC = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3, d1)> - -//CHECK: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 45)> -//CHECK: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 320)> -//CHECK: #[[$MAP2:.+]] = affine_map<(d0) -> (0, d0 - 1)> -//CHECK: #[[$MAP3:.+]] = affine_map<(d0) -> (-d0 + 1, 0)> -//CHECK: #[[$MAP4:.+]] = affine_map<()[s0] -> (1, s0)> -//CHECK: #[[$MAP5:.+]] = affine_map<(d0, d1)[s0] -> (0, d0 + d1 - s0 + 2)> -//CHECK: #[[$MAP6:.+]] = affine_map<(d0, d1, d2, d3)[s0] -> (0, d0 + d1 - d2 - d3 - s0 + 4)> -//CHECK: #[[$MAP7:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (-d0 - d1 + d2 - d3 - d4 + 4)> - - -// CHECK-LABEL: @Merge2DVFChainCompressConv -// CHECK-SAME: [[INPUT:%arg[0-9]]]: tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}>) -func.func @Merge2DVFChainCompressConv(%arg0: tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}>) -> tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - { - %cst = const.Declare tensor<32x4x3x3xf16, {order = #NHWC}> = dense<1.0> : tensor<32x4x3x3xf32>, [#const.CastElemType, #const.Reorder<#NHWC>] - %cst_3 = const.Declare tensor<32x1x1x4xsi32> = dense<1> : tensor<32x1x1x4xsi32> - %cst_0 = const.Declare tensor<32x16x1x1xf16, {order = #NHWC}> = dense<1.0> : tensor<1x32x1x1xf32>, [#const.Reshape<[32, 1, 1, 1]>, #const.CastElemType, #const.PadWithZero<[0, 0, 0, 0], [0, 15, 0, 0]>, #const.Reorder<#NHWC>] - %cst_1 = const.Declare tensor<32x32x3x3xf16, {order = #NHWC}> = dense<1.0> : tensor<32x32x3x3xf32>, [#const.CastElemType, #const.Reorder<#NHWC>] - - %0 = VPU.NCE.CompressConvolution(%arg0, %cst, %cst_3) {mpe_engine = #VPU.MPEEngine37XX>, - multiClusterStrategy = #VPU.multi_cluster_strategy, - pad = #VPU.Padding, - ppe = #VPU.PPEInt, clamp_low = -2147483648 : i64, clamp_high = 2147483647 : i64, - lrelu_mult = 1 : i64, lrelu_shift = 0 : i64, fp_prelu_alpha = 1.000000e+00 : f64>, - rawFilterShape = [32, 4, 3, 3], strides = [1, 1], tilingStrategy = [1, 1, 1, 21], cm_sp_pattern = 0} - : tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}>, - tensor<32x4x3x3xf16, {order = #NHWC}>, tensor<32x1x1x4xsi32> - -> tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - - %1 = VPU.NCE.DepthConvolution(%0, %cst_0) {multiClusterStrategy = #VPU.multi_cluster_strategy, - pad = #VPU.Padding, - ppe = #VPU.PPEInt, clamp_low = -2147483648 : i64, clamp_high = 2147483647 : i64, - lrelu_mult = 1 : i64, lrelu_shift = 0 : i64, fp_prelu_alpha = 1.000000e+00 : f64>, - rawFilterShape = [32, 1, 1, 1], strides = [1, 1], tilingStrategy = [1, 1, 1, 20]} - -> tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - - %2 = VPU.NCE.Convolution(%1, %cst_1) {mpe_engine = #VPU.MPEEngine37XX>, - multiClusterStrategy = #VPU.multi_cluster_strategy, - pad = #VPU.Padding, - ppe = #VPU.PPEInt, clamp_low = -2147483648 : i64, clamp_high = 2147483647 : i64, - lrelu_mult = 1 : i64, lrelu_shift = 0 : i64, fp_prelu_alpha = 1.000000e+00 : f64>, - rawFilterShape = [32, 32, 3, 3], strides = [1, 1], tilingStrategy = [1, 1, 1, 22]} - : tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}>, - tensor<32x32x3x3xf16, {order = #NHWC}> - -> tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - - return %2: tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - - //CHECK-DAG: [[DIM_INDEX_W:%.+]] = arith.constant 3 : index - //CHECK-DAG: [[PAD_VALUE:%.+]] = arith.constant 0.000000e+00 : f16 - //CHECK-DAG: [[DIM_INDEX_H:%.+]] = arith.constant 2 : index - //CHECK-DAG: [[LOOP_STEP_W:%.+]] = arith.constant 320 : index - //CHECK-DAG: [[LOOP_STEP_H:%.+]] = arith.constant 45 : index - //CHECK-DAG: [[LOOP_BEGIN:%.+]] = arith.constant 0 : index - - //CHECK: [[DIM_H:%.+]] = tensor.dim [[INPUT]], [[DIM_INDEX_H]] : tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - //CHECK: [[DIM_W:%.+]] = tensor.dim [[INPUT]], [[DIM_INDEX_W]] : tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - //CHECK: [[LOOP_OUTPUT:%.+]] = tensor.empty([[DIM_H]], [[DIM_W]]) : tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - //CHECK: [[LOOP_END_H:%.+]] = tensor.dim [[INPUT]], [[DIM_INDEX_H]] : tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - //CHECK: [[LOOP_END_W:%.+]] = tensor.dim [[INPUT]], [[DIM_INDEX_W]] : tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - - //CHECK: [[LOOP_H:%.+]] = scf.for - //CHECK-SAME: [[LOOP_ITER_H:%arg[0-9]]] = [[LOOP_BEGIN]] to [[LOOP_END_H]] step [[LOOP_STEP_H]] - //CHECK-SAME: iter_args([[LOOP_OUT:%arg[0-9]]] = [[LOOP_OUTPUT]]) -> (tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}>) { - - //CHECK: [[LOOP_W:%.+]] = scf.for - //CHECK-SAME: [[LOOP_ITER_W:%arg[0-9]]] = [[LOOP_BEGIN]] to [[LOOP_END_W]] step [[LOOP_STEP_W]] - //CHECK-SAME: iter_args([[LOOP_OUT_W:%arg[0-9]]] = [[LOOP_OUT]]) -> (tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}>) { - - //CHECK: [[INSERT_SIZE_H:%.+]] = affine.min #[[$MAP0]]([[LOOP_ITER_H]])[[[LOOP_END_H]]] - //CHECK: [[INSERT_SIZE_W:%.+]] = affine.min #[[$MAP1]]([[LOOP_ITER_W]])[[[LOOP_END_W]]] - - //CHECK: [[DIM_H_1:%.+]] = tensor.dim [[INPUT]], [[DIM_INDEX_H]] : tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - //CHECK: [[DIM_W_1:%.+]] = tensor.dim [[INPUT]], [[DIM_INDEX_W]] : tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - - //CHECK: [[TMP_VALUE7:%.+]] = affine.max #[[$MAP2]]([[LOOP_ITER_H]]) - //CHECK: [[TMP_VALUE6:%.+]] = affine.max #[[$MAP3]]([[LOOP_ITER_H]]) - //CHECK: [[PAD1_LOW_H:%.+]] = affine.min #[[$MAP4]]()[[[TMP_VALUE6]]] - //CHECK: [[TMP_VALUE9:%.+]] = affine.max #[[$MAP5]]([[INSERT_SIZE_H]], [[TMP_VALUE7]])[[[DIM_H_1]]] - //CHECK: [[PAD1_HIGH_H:%.+]] = affine.min #[[$MAP4]]()[[[TMP_VALUE9]]] - //CHECK: [[TMP_VALUE5:%.+]] = affine.max #[[$MAP2]]([[LOOP_ITER_W]]) - //CHECK: [[TMP_VALUE8:%.+]] = affine.max #[[$MAP3]]([[LOOP_ITER_W]]) - //CHECK: [[PAD1_LOW_W:%.+]] = affine.min #[[$MAP4]]()[[[TMP_VALUE8]]] - //CHECK: [[TMP_VALUE4:%.+]] = affine.max #[[$MAP5]]([[INSERT_SIZE_W]], [[TMP_VALUE5]])[[[DIM_W_1]]] - //CHECK: [[PAD1_HIGH_W:%.+]] = affine.min #[[$MAP4]]()[[[TMP_VALUE4]]] - - //CHECK: [[DIM_H_2:%.+]] = tensor.dim [[INPUT]], [[DIM_INDEX_H]] : tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - //CHECK: [[DIM_W_2:%.+]] = tensor.dim [[INPUT]], [[DIM_INDEX_W]] : tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - - //CHECK: [[SLICE_OFFSET_H:%.+]] = affine.max #[[$MAP2]]([[TMP_VALUE7]]) - //CHECK: [[TMP_VALUE3:%.+]] = affine.max #[[$MAP3]]([[TMP_VALUE7]]) - //CHECK: [[PAD0_LOW_H:%.+]] = affine.min #[[$MAP4]]()[[[TMP_VALUE3]]] - //CHECK: [[TMP_VALUE2:%.+]] = affine.max #[[$MAP6]]([[SLICE_OFFSET_H]], [[INSERT_SIZE_H]], [[PAD1_LOW_H]], [[PAD1_HIGH_H]])[[[DIM_H_2]]] - //CHECK: [[PAD0_HIGH_H:%.+]] = affine.min #[[$MAP4]]()[[[TMP_VALUE2]]] - //CHECK: [[SLICE_SIZE_H:%.+]] = affine.apply #[[$MAP7]]([[PAD0_LOW_H]], [[PAD0_HIGH_H]], [[INSERT_SIZE_H]], [[PAD1_LOW_H]], [[PAD1_HIGH_H]]) - //CHECK: [[SLICE_OFFSET_W:%.+]] = affine.max #[[$MAP2]]([[TMP_VALUE5]]) - //CHECK: [[TMP_VALUE1:%.+]] = affine.max #[[$MAP3]]([[TMP_VALUE5]]) - //CHECK: [[PAD0_LOW_W:%.+]] = affine.min #[[$MAP4]]()[[[TMP_VALUE1]]] - //CHECK: [[TMP_VALUE0:%.+]] = affine.max #[[$MAP6]]([[SLICE_OFFSET_W]], [[INSERT_SIZE_W]], [[PAD1_LOW_W]], [[PAD1_HIGH_W]])[[[DIM_W_2]]] - //CHECK: [[PAD0_HIGH_W:%.+]] = affine.min #[[$MAP4]]()[[[TMP_VALUE0]]] - //CHECK: [[SLICE_SIZE_W:%.+]] = affine.apply #[[$MAP7]]([[PAD0_LOW_W]], [[PAD0_HIGH_W]], [[INSERT_SIZE_W]], [[PAD1_LOW_W]], [[PAD1_HIGH_W]]) - - //CHECK: [[SLICE:%.+]] = tensor.extract_slice [[INPUT]][0, 0, [[SLICE_OFFSET_H]], [[SLICE_OFFSET_W]]] [1, 4, [[SLICE_SIZE_H]], [[SLICE_SIZE_W]]] [1, 1, 1, 1] - //CHECK-SAME: tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}> to tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 45, 320]> : tensor<4xsi64>, order = #NHWC}> - - //CHECK: [[PAD0:%.+]] = tensor.pad [[SLICE]] low[0, 0, [[PAD0_LOW_H]], [[PAD0_LOW_W]]] high[0, 0, [[PAD0_HIGH_H]], [[PAD0_HIGH_W]]] { - //CHECK: tensor.yield [[PAD_VALUE]] : f16 - //CHECK: tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 45, 320]> : tensor<4xsi64>, order = #NHWC}> to tensor<1x4x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 47, 322]> : tensor<4xsi64>, order = #NHWC}> - //CHECK: [[CONV0:%.+]] = VPU.NCE.CompressConvolution([[PAD0]] - //CHECK: [[DWCONV:%.+]] = VPU.NCE.DepthConvolution([[CONV0]] - //CHECK: [[PAD1:%.+]] = tensor.pad [[DWCONV]] low[0, 0, [[PAD1_LOW_H]], [[PAD1_LOW_W]]] high[0, 0, [[PAD1_HIGH_H]], [[PAD1_HIGH_W]]] { - //CHECK: tensor.yield [[PAD_VALUE]] : f16 - //CHECK: tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 45, 320]> : tensor<4xsi64>, order = #NHWC}> to tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 47, 322]> : tensor<4xsi64>, order = #NHWC}> - //CHECK: [[CONV1:%.+]] = VPU.NCE.Convolution([[PAD1]] - //CHECK: [[INSERT:%.+]] = tensor.insert_slice [[CONV1]] into [[LOOP_OUT_W]][0, 0, [[LOOP_ITER_H]], [[LOOP_ITER_W]]] [1, 32, [[INSERT_SIZE_H]], [[INSERT_SIZE_W]]] [1, 1, 1, 1] - //CHECK-SAME: tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 45, 320]> : tensor<4xsi64>, order = #NHWC}> into tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - //CHECK: scf.yield [[INSERT]] - - //CHECK: scf.yield [[LOOP_W]] - - //CHECK: return [[LOOP_H]] : tensor<1x32x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}> -} - -// ----- - -config.Resources 3 of @NCE at 1.700000e+03 MHz { - config.MemoryResource 1326182 bytes of @CMX_NN_FragmentationAware - config.MemoryResource 1473536 bytes of @CMX_NN {config.bandwidth = 64 : i64, config.derateFactor = 1.000000e+00 : f64} - config.ExecutorResource 2 of @SHAVE_ACT - config.ExecutorResource 1 of @DPU -} - -#NHWC = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3, d1)> - -//CHECK: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 35)> -//CHECK: #[[$MAP2:.+]] = affine_map<(d0) -> (0, d0 - 1)> -//CHECK: #[[$MAP3:.+]] = affine_map<(d0) -> (-d0 + 1, 0)> -//CHECK: #[[$MAP4:.+]] = affine_map<()[s0] -> (1, s0)> -//CHECK: #[[$MAP5:.+]] = affine_map<(d0, d1)[s0] -> (0, d0 + d1 - s0 + 2)> -//CHECK: #[[$MAP6:.+]] = affine_map<(d0, d1, d2, d3)[s0] -> (0, d0 + d1 - d2 - d3 - s0 + 4)> -//CHECK: #[[$MAP7:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (-d0 - d1 + d2 - d3 - d4 + 4)> - - -// CHECK-LABEL: @Merge1DVFChainCompressConv -// CHECK-SAME: [[INPUT:%arg[0-9]]]: tensor<1x4x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}>) -func.func @Merge1DVFChainCompressConv(%arg0: tensor<1x4x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}>) -> tensor<1x32x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - { - %cst = const.Declare tensor<32x4x3x3xf16, {order = #NHWC}> = dense<1.0> : tensor<32x4x3x3xf32>, [#const.CastElemType, #const.Reorder<#NHWC>] - %cst_3 = const.Declare tensor<32x1x1x4xsi32> = dense<1> : tensor<32x1x1x4xsi32> - %cst_0 = const.Declare tensor<32x16x1x1xf16, {order = #NHWC}> = dense<1.0> : tensor<1x32x1x1xf32>, [#const.Reshape<[32, 1, 1, 1]>, #const.CastElemType, #const.PadWithZero<[0, 0, 0, 0], [0, 15, 0, 0]>, #const.Reorder<#NHWC>] - %cst_1 = const.Declare tensor<32x32x3x3xf16, {order = #NHWC}> = dense<1.0> : tensor<32x32x3x3xf32>, [#const.CastElemType, #const.Reorder<#NHWC>] - - %0 = VPU.NCE.CompressConvolution(%arg0, %cst, %cst_3) {mpe_engine = #VPU.MPEEngine37XX>, - multiClusterStrategy = #VPU.multi_cluster_strategy, - pad = #VPU.Padding, - ppe = #VPU.PPEInt, clamp_low = -2147483648 : i64, clamp_high = 2147483647 : i64, - lrelu_mult = 1 : i64, lrelu_shift = 0 : i64, fp_prelu_alpha = 1.000000e+00 : f64>, - rawFilterShape = [32, 4, 3, 3], strides = [1, 1], tilingStrategy = [1, 1, 1, 21], cm_sp_pattern = 0} - : tensor<1x4x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}>, - tensor<32x4x3x3xf16, {order = #NHWC}>, tensor<32x1x1x4xsi32> - -> tensor<1x32x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - - %1 = VPU.NCE.DepthConvolution(%0, %cst_0) {multiClusterStrategy = #VPU.multi_cluster_strategy, - pad = #VPU.Padding, - ppe = #VPU.PPEInt, clamp_low = -2147483648 : i64, clamp_high = 2147483647 : i64, - lrelu_mult = 1 : i64, lrelu_shift = 0 : i64, fp_prelu_alpha = 1.000000e+00 : f64>, - rawFilterShape = [32, 1, 1, 1], strides = [1, 1], tilingStrategy = [1, 1, 1, 20]} - -> tensor<1x32x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - - %2 = VPU.NCE.Convolution(%1, %cst_1) {mpe_engine = #VPU.MPEEngine37XX>, - multiClusterStrategy = #VPU.multi_cluster_strategy, - pad = #VPU.Padding, - ppe = #VPU.PPEInt, clamp_low = -2147483648 : i64, clamp_high = 2147483647 : i64, - lrelu_mult = 1 : i64, lrelu_shift = 0 : i64, fp_prelu_alpha = 1.000000e+00 : f64>, - rawFilterShape = [32, 32, 3, 3], strides = [1, 1], tilingStrategy = [1, 1, 1, 22]} - : tensor<1x32x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}>, - tensor<32x32x3x3xf16, {order = #NHWC}> - -> tensor<1x32x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - - return %2: tensor<1x32x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - - //CHECK-DAG: [[DIM_INDEX_W:%.+]] = arith.constant 3 : index - //CHECK-DAG: [[PAD_VALUE:%.+]] = arith.constant 0.000000e+00 : f16 - //CHECK-DAG: [[LOOP_STEP_W:%.+]] = arith.constant 35 : index - //CHECK-DAG: [[LOOP_BEGIN:%.+]] = arith.constant 0 : index - - //CHECK: [[DIM_W:%.+]] = tensor.dim [[INPUT]], [[DIM_INDEX_W]] : tensor<1x4x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - //CHECK: [[LOOP_OUT:%.+]] = tensor.empty([[DIM_W]]) : tensor<1x32x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - //CHECK: [[LOOP_END_W:%.+]] = tensor.dim [[INPUT]], [[DIM_INDEX_W]] : tensor<1x4x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - - //CHECK: [[LOOP_W:%.+]] = scf.for - //CHECK-SAME: [[LOOP_ITER_W:%arg[0-9]]] = [[LOOP_BEGIN]] to [[LOOP_END_W]] step [[LOOP_STEP_W]] - //CHECK-SAME: iter_args([[LOOP_OUT_W:%arg[0-9]]] = [[LOOP_OUT]]) -> (tensor<1x32x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}>) { - - //CHECK: [[INSERT_SIZE_W:%.+]] = affine.min #[[$MAP1]]([[LOOP_ITER_W]])[[[LOOP_END_W]]] - - //CHECK: [[DIM_W_1:%.+]] = tensor.dim [[INPUT]], [[DIM_INDEX_W]] : tensor<1x4x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - - //CHECK: [[TMP_VALUE5:%.+]] = affine.max #[[$MAP2]]([[LOOP_ITER_W]]) - //CHECK: [[TMP_VALUE8:%.+]] = affine.max #[[$MAP3]]([[LOOP_ITER_W]]) - //CHECK: [[PAD1_LOW_W:%.+]] = affine.min #[[$MAP4]]()[[[TMP_VALUE8]]] - //CHECK: [[TMP_VALUE4:%.+]] = affine.max #[[$MAP5]]([[INSERT_SIZE_W]], [[TMP_VALUE5]])[[[DIM_W_1]]] - //CHECK: [[PAD1_HIGH_W:%.+]] = affine.min #[[$MAP4]]()[[[TMP_VALUE4]]] - - //CHECK: [[DIM_W_2:%.+]] = tensor.dim [[INPUT]], [[DIM_INDEX_W]] : tensor<1x4x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - - //CHECK: [[SLICE_OFFSET_W:%.+]] = affine.max #[[$MAP2]]([[TMP_VALUE5]]) - //CHECK: [[TMP_VALUE1:%.+]] = affine.max #[[$MAP3]]([[TMP_VALUE5]]) - //CHECK: [[PAD0_LOW_W:%.+]] = affine.min #[[$MAP4]]()[[[TMP_VALUE1]]] - //CHECK: [[TMP_VALUE0:%.+]] = affine.max #[[$MAP6]]([[SLICE_OFFSET_W]], [[INSERT_SIZE_W]], [[PAD1_LOW_W]], [[PAD1_HIGH_W]])[[[DIM_W_2]]] - //CHECK: [[PAD0_HIGH_W:%.+]] = affine.min #[[$MAP4]]()[[[TMP_VALUE0]]] - //CHECK: [[SLICE_SIZE_W:%.+]] = affine.apply #[[$MAP7]]([[PAD0_LOW_W]], [[PAD0_HIGH_W]], [[INSERT_SIZE_W]], [[PAD1_LOW_W]], [[PAD1_HIGH_W]]) - - //CHECK: [[SLICE:%.+]] = tensor.extract_slice [[INPUT]][0, 0, 0, [[SLICE_OFFSET_W]]] [1, 4, 540, [[SLICE_SIZE_W]]] [1, 1, 1, 1] - //CHECK-SAME: tensor<1x4x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 960]> : tensor<4xsi64>, order = #NHWC}> to tensor<1x4x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 35]> : tensor<4xsi64>, order = #NHWC}> - - //CHECK: [[PAD0:%.+]] = tensor.pad [[SLICE]] low[0, 0, 1, [[PAD0_LOW_W]]] high[0, 0, 1, [[PAD0_HIGH_W]]] { - //CHECK: tensor.yield [[PAD_VALUE]] : f16 - //CHECK: tensor<1x4x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 540, 35]> : tensor<4xsi64>, order = #NHWC}> to tensor<1x4x542x?xf16, {bounds = #const.OpaqueI64Elements<[1, 4, 542, 37]> : tensor<4xsi64>, order = #NHWC}> - //CHECK: [[CONV0:%.+]] = VPU.NCE.CompressConvolution([[PAD0]] - //CHECK: [[DWCONV:%.+]] = VPU.NCE.DepthConvolution([[CONV0]] - //CHECK: [[PAD1:%.+]] = tensor.pad [[DWCONV]] low[0, 0, 1, [[PAD1_LOW_W]]] high[0, 0, 1, [[PAD1_HIGH_W]]] { - //CHECK: tensor.yield [[PAD_VALUE]] : f16 - //CHECK: tensor<1x32x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 35]> : tensor<4xsi64>, order = #NHWC}> to tensor<1x32x542x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 542, 37]> : tensor<4xsi64>, order = #NHWC}> - //CHECK: [[CONV1:%.+]] = VPU.NCE.Convolution([[PAD1]] - //CHECK: [[INSERT:%.+]] = tensor.insert_slice [[CONV1]] into [[LOOP_OUT_W]][0, 0, 0, [[LOOP_ITER_W]]] [1, 32, 540, [[INSERT_SIZE_W]]] [1, 1, 1, 1] - //CHECK-SAME: tensor<1x32x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 35]> : tensor<4xsi64>, order = #NHWC}> into tensor<1x32x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}> - - //CHECK: scf.yield [[INSERT]] - - //CHECK: return [[LOOP_W]] : tensor<1x32x540x?xf16, {bounds = #const.OpaqueI64Elements<[1, 32, 540, 960]> : tensor<4xsi64>, order = #NHWC}> -} From 27609b095bfdeb16a96918e9d3ffcf3b6335ca9c Mon Sep 17 00:00:00 2001 From: Maksim Doronin Date: Fri, 27 Feb 2026 10:19:44 +0000 Subject: [PATCH 2/3] Generate build manifest for CiD builds (#259) * Increase timeout for CodeQL job * Generate build manifest * fix postcommit for release branch * Specify CiD subdir --- .github/actions/versions/action.yml | 87 ++++++++++++++++++++ .github/workflows/clang-format.yml | 2 +- .github/workflows/codeql.yml | 2 +- .github/workflows/job_build_cid.yml | 38 ++++++++- .github/workflows/job_build_plugin_linux.yml | 2 +- .github/workflows/scorecard.yml | 2 +- .github/workflows/ubuntu_22.yml | 2 +- .github/workflows/ubuntu_24.yml | 2 +- .github/workflows/windows_2022.yml | 2 +- 9 files changed, 130 insertions(+), 9 deletions(-) diff --git a/.github/actions/versions/action.yml b/.github/actions/versions/action.yml index bdaa7463fd..3fe75ec7db 100644 --- a/.github/actions/versions/action.yml +++ b/.github/actions/versions/action.yml @@ -1,6 +1,20 @@ name: Versions description: Parse git versions from config files +inputs: + build-manifest-dir: + description: 'Directory where build_manifest.json will be created (optional)' + required: false + default: '' + openvino-repo-path: + description: 'Path to OpenVINO repository (for TBB version detection)' + required: false + default: '' + npu-compiler-repo-path: + description: 'Path to NPU Compiler repository (for LLVM submodule SHA detection)' + required: false + default: '' + outputs: openvino-repository: description: 'OpenVINO Repository full name: fork/openvino' @@ -38,6 +52,15 @@ outputs: opencv-sha: description: 'OpenCV commit SHA' value: ${{ steps.read-ocv-sha.outputs.ocv-sha }} + llvm-sha: + description: 'LLVM SHA detected in NPU Compiler submodule' + value: ${{ steps.get-llvm-sha.outputs.llvm-sha }} + tbb-version: + description: 'TBB version detected from OpenVINO' + value: ${{ steps.get-tbb-version.outputs.tbb-version }} + build-manifest-path: + description: 'Absolute path to build_manifest.json file' + value: ${{ steps.create-build-manifest.outputs.build-manifest-path }} runs: using: 'composite' @@ -179,3 +202,67 @@ runs: echo "OpenCV commit sha = $OCV_SHA" echo "ocv-repository=$OCV_ORG/opencv" >> $GITHUB_OUTPUT echo "ocv-sha=$OCV_SHA" >> $GITHUB_OUTPUT + + - name: Get LLVM submodule commit sha + id: get-llvm-sha + shell: bash + run: | + LLVM_SHA="N/A" + + if [[ -n "${{ inputs.npu-compiler-repo-path }}" ]]; then + LLVM_SHA=$(git -C "${{ inputs.npu-compiler-repo-path }}" ls-tree HEAD thirdparty/llvm-project | awk '{print $3}') + [[ -z "$LLVM_SHA" ]] && LLVM_SHA="N/A" + fi + + echo "LLVM commit sha = $LLVM_SHA" + echo "llvm-sha=$LLVM_SHA" >> $GITHUB_OUTPUT + + - name: Get TBB version from OpenVINO + id: get-tbb-version + shell: bash + run: | + TBB_VERSION="N/A" + + if [[ -n "${{ inputs.openvino-repo-path }}" ]]; then + TBB_VERSION_FILES=$(find "${{ inputs.openvino-repo-path }}/temp" -name "TBBConfigVersion.cmake" 2>/dev/null) + + if [[ -n "$TBB_VERSION_FILES" ]]; then + TBB_VERSION=$(grep -h 'set(PACKAGE_VERSION' $TBB_VERSION_FILES | \ + grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1) + fi + + [[ -z "$TBB_VERSION" ]] && TBB_VERSION="N/A" + fi + + echo "TBB version = $TBB_VERSION" + echo "tbb-version=$TBB_VERSION" >> $GITHUB_OUTPUT + + - name: Create build manifest + id: create-build-manifest + if: inputs.build-manifest-dir != '' + shell: bash + run: | + OUTPUT_DIR="${{ inputs.build-manifest-dir }}" + + mkdir -p "$OUTPUT_DIR" + + jq -n \ + --arg openvino_sha "${{ steps.read-openvino-sha.outputs.openvino-sha }}" \ + --arg npu_compiler_sha "${{ steps.get-npu-sha.outputs.npu-compiler-sha }}" \ + --arg llvm_sha "${{ steps.get-llvm-sha.outputs.llvm-sha }}" \ + --arg tbb_version "${{ steps.get-tbb-version.outputs.tbb-version }}" \ + --arg opencv_sha "${{ steps.read-ocv-sha.outputs.ocv-sha }}" \ + --arg omz_sha "${{ steps.read-omz-sha.outputs.omz-sha }}" \ + '{ + openvino_sha: $openvino_sha, + npu_compiler_sha: $npu_compiler_sha, + llvm_sha: $llvm_sha, + tbb_version: $tbb_version, + opencv_sha: $opencv_sha, + omz_sha: $omz_sha + }' > "$OUTPUT_DIR/build_manifest.json" + + MANIFEST_PATH=$(cd "$OUTPUT_DIR" && pwd)/build_manifest.json + echo "Build manifest created at: $MANIFEST_PATH" + echo "build-manifest-path=$MANIFEST_PATH" >> $GITHUB_OUTPUT + cat "$MANIFEST_PATH" diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml index f14375691b..d920a03cef 100644 --- a/.github/workflows/clang-format.yml +++ b/.github/workflows/clang-format.yml @@ -5,7 +5,7 @@ on: push: branches: - develop - - 'releases/*' + - releases/** concurrency: group: clang-format-${{ github.event_name }}-${{ github.ref_name }} diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 78a9517de0..a2c1722529 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -5,7 +5,7 @@ on: push: branches: - develop - - 'releases/*' + - releases/** schedule: - cron: '24 8 * * 6' diff --git a/.github/workflows/job_build_cid.yml b/.github/workflows/job_build_cid.yml index 448be4ac9e..9ebd5c4f0e 100644 --- a/.github/workflows/job_build_cid.yml +++ b/.github/workflows/job_build_cid.yml @@ -287,6 +287,15 @@ jobs: -S ${OPENVINO_REPO} \ -B ${OPENVINO_BUILD_DIR} + - name: Get versions with build manifest + uses: ./npu_actions/.github/actions/versions + id: versions-with-manifest + if: ${{ !steps.cache-restore.outputs.cache-hit }} + with: + build-manifest-dir: ${{ env.CID_PACKAGE_ARTIFACTS_DIR }} + openvino-repo-path: ${{ env.OPENVINO_REPO }} + npu-compiler-repo-path: ${{ env.NPU_COMPILER_REPO }} + - name: CMake build - CiD targets if: ${{ !steps.cache-restore.outputs.cache-hit }} run: | @@ -299,13 +308,16 @@ jobs: - name: CMake cpack - CiD target if: ${{ !steps.cache-restore.outputs.cache-hit }} run: | + COMPONENT="CiD" + GENERATOR="Ninja" + cpack -V \ --config "${OPENVINO_BUILD_DIR}/CPackConfig.cmake" \ -C "${CMAKE_BUILD_TYPE}" \ -G "${{ steps.package-params.outputs.cpack-generator }}" \ -B "${CID_PACKAGE_ARTIFACTS_DIR}" \ - -D CPACK_COMPONENTS_ALL=CiD \ - -D CPACK_CMAKE_GENERATOR=Ninja \ + -D CPACK_COMPONENTS_ALL=${COMPONENT} \ + -D CPACK_CMAKE_GENERATOR=${GENERATOR} \ -D CPACK_PACKAGE_FILE_NAME="${{ steps.package-name.outputs.cid-package-base-name }}" - name: CiD Package renaming @@ -313,6 +325,28 @@ jobs: mv "${CID_PACKAGE_ARTIFACTS_DIR}/"*.${{ steps.package-params.outputs.package-extension }} \ "${CID_PACKAGE_ARTIFACTS_DIR}/${{ steps.package-name.outputs.cid-package-full-name }}" + - name: Inject build manifest into CiD package + if: ${{ !steps.cache-restore.outputs.cache-hit }} + run: | + PACKAGE_PATH="${CID_PACKAGE_ARTIFACTS_DIR}/${{ steps.package-name.outputs.cid-package-full-name }}" + EXT="${{ steps.package-params.outputs.package-extension }}" + MANIFEST_PATH="${{ steps.versions-with-manifest.outputs.build-manifest-path }}" + MANIFEST_FILENAME=$(basename "${MANIFEST_PATH}") + MANIFEST_DIR=$(dirname "${MANIFEST_PATH}") + + PACKAGE_ABS_PATH="$(realpath ${PACKAGE_PATH})" + + if [[ "${EXT}" == "zip" ]]; then + echo "Updating ZIP archive using 7-Zip..." + (cd "${MANIFEST_DIR}" && 7z u "${PACKAGE_ABS_PATH}" "${MANIFEST_FILENAME}") + elif [[ "${EXT}" == "tar.gz" ]]; then + echo "Updating TAR.GZ archive using tar..." + gunzip -S .gz "${PACKAGE_ABS_PATH}" + PACKAGE_ABS_PATH="$(realpath ${PACKAGE_PATH%.gz})" + (cd "${MANIFEST_DIR}" && tar -rf "${PACKAGE_ABS_PATH}" "${MANIFEST_FILENAME}") + gzip "${PACKAGE_ABS_PATH}" + fi + - name: Cache CiD artifacts if: ${{ inputs.build-cache && !steps.cache-restore.outputs.cache-hit }} uses: actions/cache/save@9255dc7a253b0ccc959486e2bca901246202afeb # v5.0.1 diff --git a/.github/workflows/job_build_plugin_linux.yml b/.github/workflows/job_build_plugin_linux.yml index e67bf00192..392f3675b1 100644 --- a/.github/workflows/job_build_plugin_linux.yml +++ b/.github/workflows/job_build_plugin_linux.yml @@ -56,7 +56,7 @@ jobs: Build: name: Build runs-on: ${{ inputs.build-runner }} - timeout-minutes: 240 + timeout-minutes: 360 permissions: actions: read contents: read diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 9de0eab66b..330b2ad4a8 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -8,7 +8,7 @@ on: push: branches: - develop - - 'releases/*' + - releases/** permissions: read-all diff --git a/.github/workflows/ubuntu_22.yml b/.github/workflows/ubuntu_22.yml index 89b4551a9c..ed40a06b4c 100644 --- a/.github/workflows/ubuntu_22.yml +++ b/.github/workflows/ubuntu_22.yml @@ -5,7 +5,7 @@ on: push: branches: - develop - - 'releases/*' + - releases/** concurrency: group: linux-${{ github.event_name }}-${{ github.ref_name }}-ubuntu_22_04 diff --git a/.github/workflows/ubuntu_24.yml b/.github/workflows/ubuntu_24.yml index d195412512..1cfb838ceb 100644 --- a/.github/workflows/ubuntu_24.yml +++ b/.github/workflows/ubuntu_24.yml @@ -5,7 +5,7 @@ on: push: branches: - develop - - 'releases/*' + - releases/** concurrency: group: linux-${{ github.event_name }}-${{ github.ref_name }}-ubuntu_24_04 diff --git a/.github/workflows/windows_2022.yml b/.github/workflows/windows_2022.yml index 4f0b76cfaa..0c5bca9af4 100644 --- a/.github/workflows/windows_2022.yml +++ b/.github/workflows/windows_2022.yml @@ -5,7 +5,7 @@ on: push: branches: - develop - - 'releases/*' + - releases/** concurrency: group: windows-${{ github.event_name }}-${{ github.ref_name }}-2022 From b2d63e7098d0aa305e54e481ca1350938a121b2e Mon Sep 17 00:00:00 2001 From: "Doronin, Maksim" Date: Fri, 27 Feb 2026 11:05:31 +0000 Subject: [PATCH 3/3] Expose restoreWeightsOffsets --- src/vpux_driver_compiler/src/vpux_compiler_l0/vcl_common.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/vpux_driver_compiler/src/vpux_compiler_l0/vcl_common.cpp b/src/vpux_driver_compiler/src/vpux_compiler_l0/vcl_common.cpp index 2ae22753ec..bcc1b1a500 100644 --- a/src/vpux_driver_compiler/src/vpux_compiler_l0/vcl_common.cpp +++ b/src/vpux_driver_compiler/src/vpux_compiler_l0/vcl_common.cpp @@ -634,10 +634,7 @@ vcl_result_t BuildInfo::prepareModel(const uint8_t* modelIR, uint64_t modelIRSiz throw std::invalid_argument(error_message.str()); } -#ifdef VPUX_DEVELOPER_BUILD - // E#103359: WS is only available in developer builds restoreWeightsOffsets(model, logger); -#endif // VPUX_DEVELOPER_BUILD if (enableProfiling) { stopWatch.stop();