Fix conv2d scratch buffer allocation to match CMSIS-NN wrapper dispatch

rascani · claude · rascani · commit 4e6bafaa12ac · 2026-03-06T16:59:10.000-08:00
Use arm_convolve_wrapper_s8_get_buffer_size instead of
arm_convolve_s8_get_buffer_size so the buffer size matches whichever
specialized kernel arm_convolve_wrapper_s8 will actually dispatch to
at runtime (1x1 fast, 1xN, or general).

Also remove the Error::NotFound carve-out that silently proceeded with
a null scratch buffer — CMSIS-NN returns ARM_CMSIS_NN_ARG_ERROR when
ctx-&gt;buf is NULL and a buffer is required, so fail immediately on any
allocation error, consistent with the other cortex_m conv ops.

Update CMSIS-NN from v7.0.0 to 84303a51fd867c7ddbd23068b7ce930af1b6269d
and remove GIT_SHALLOW (incompatible with SHA-based FetchContent pins).

Co-authored-by: Claude &lt;noreply@anthropic.com&gt;
diff --git a/backends/cortex_m/CMakeLists.txt b/backends/cortex_m/CMakeLists.txt
@@ -23,7 +23,7 @@ include(FetchContent)
 
 # CMSIS-NN configuration with dynamic path detection
 set(CMSIS_NN_VERSION
-    "v7.0.0"
+    "84303a51fd867c7ddbd23068b7ce930af1b6269d"
     CACHE STRING "CMSIS-NN version to download"
 )
 set(CMSIS_NN_LOCAL_PATH
@@ -45,7 +45,6 @@ else()
     cmsis_nn
     GIT_REPOSITORY https://github.com/ARM-software/CMSIS-NN.git
     GIT_TAG ${CMSIS_NN_VERSION}
-    GIT_SHALLOW TRUE
   )
 
   FetchContent_MakeAvailable(cmsis_nn)
diff --git a/backends/cortex_m/ops/op_quantized_conv2d.cpp b/backends/cortex_m/ops/op_quantized_conv2d.cpp
@@ -188,24 +188,28 @@ Tensor& quantized_conv2d_out(
   cmsis_context.buf = nullptr;
   cmsis_context.size = 0;
 
-  const size_t buffer_bytes = static_cast<size_t>(
-      arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims));
+  const int32_t buffer_bytes = arm_convolve_wrapper_s8_get_buffer_size(
+      &conv_params, &input_dims, &filter_dims, &output_dims);
+  if (buffer_bytes < 0) {
+    ET_LOG(
+        Error, "quantized_conv2d_out: CMSIS-NN buffer size calculation failed");
+    context.fail(Error::Internal);
+    return out;
+  }
   if (buffer_bytes > 0) {
     auto buffer_or_error =
         context.allocate_temp(buffer_bytes, kCortexMMveAlignment);
     if (!buffer_or_error.ok()) {
-      if (buffer_or_error.error() != Error::NotFound) {
-        ET_LOG(
-            Error,
-            "quantized_conv2d_out: failed to allocate scratch buffer (%d)",
-            static_cast<int>(buffer_or_error.error()));
-        context.fail(buffer_or_error.error());
-        return out;
-      }
-    } else {
-      cmsis_context.buf = buffer_or_error.get();
-      cmsis_context.size = buffer_bytes;
+      ET_LOG(
+          Error,
+          "quantized_conv2d_out: failed to allocate scratch buffer (%d bytes, error %d)",
+          static_cast<int>(buffer_bytes),
+          static_cast<int>(buffer_or_error.error()));
+      context.fail(buffer_or_error.error());
+      return out;
     }
+    cmsis_context.buf = buffer_or_error.get();
+    cmsis_context.size = buffer_bytes;
   }
 
   const arm_cmsis_nn_status status = arm_convolve_wrapper_s8(
@@ -224,7 +228,7 @@ Tensor& quantized_conv2d_out(
   if (status != ARM_CMSIS_NN_SUCCESS) {
     ET_LOG(
         Error,
-        "quantized_conv2d_out: arm_convolve_s8 failed with status %d",
+        "quantized_conv2d_out: arm_convolve_wrapper_s8 failed with status %d",
         status);
     context.fail(Error::Internal);
   }

Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,7 @@ include(FetchContent)`
`23`	`23`
`24`	`24`	`# CMSIS-NN configuration with dynamic path detection`
`25`	`25`	`set(CMSIS_NN_VERSION`
`26`		`- "v7.0.0"`
	`26`	`+ "84303a51fd867c7ddbd23068b7ce930af1b6269d"`
`27`	`27`	`CACHE STRING "CMSIS-NN version to download"`
`28`	`28`	`)`
`29`	`29`	`set(CMSIS_NN_LOCAL_PATH`
`@@ -45,7 +45,6 @@ else()`
`45`	`45`	`cmsis_nn`
`46`	`46`	`GIT_REPOSITORY https://github.com/ARM-software/CMSIS-NN.git`
`47`	`47`	`GIT_TAG ${CMSIS_NN_VERSION}`
`48`		`- GIT_SHALLOW TRUE`
`49`	`48`	`)`
`50`	`49`
`51`	`50`	`FetchContent_MakeAvailable(cmsis_nn)`