Skip to content

Commit 4e6bafa

Browse files
rascaniclaude
andcommitted
Fix conv2d scratch buffer allocation to match CMSIS-NN wrapper dispatch
Use arm_convolve_wrapper_s8_get_buffer_size instead of arm_convolve_s8_get_buffer_size so the buffer size matches whichever specialized kernel arm_convolve_wrapper_s8 will actually dispatch to at runtime (1x1 fast, 1xN, or general). Also remove the Error::NotFound carve-out that silently proceeded with a null scratch buffer — CMSIS-NN returns ARM_CMSIS_NN_ARG_ERROR when ctx->buf is NULL and a buffer is required, so fail immediately on any allocation error, consistent with the other cortex_m conv ops. Update CMSIS-NN from v7.0.0 to 84303a51fd867c7ddbd23068b7ce930af1b6269d and remove GIT_SHALLOW (incompatible with SHA-based FetchContent pins). Co-authored-by: Claude <noreply@anthropic.com>
1 parent 411061f commit 4e6bafa

File tree

2 files changed

+19
-16
lines changed

2 files changed

+19
-16
lines changed

backends/cortex_m/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ include(FetchContent)
2323

2424
# CMSIS-NN configuration with dynamic path detection
2525
set(CMSIS_NN_VERSION
26-
"v7.0.0"
26+
"84303a51fd867c7ddbd23068b7ce930af1b6269d"
2727
CACHE STRING "CMSIS-NN version to download"
2828
)
2929
set(CMSIS_NN_LOCAL_PATH
@@ -45,7 +45,6 @@ else()
4545
cmsis_nn
4646
GIT_REPOSITORY https://github.com/ARM-software/CMSIS-NN.git
4747
GIT_TAG ${CMSIS_NN_VERSION}
48-
GIT_SHALLOW TRUE
4948
)
5049

5150
FetchContent_MakeAvailable(cmsis_nn)

backends/cortex_m/ops/op_quantized_conv2d.cpp

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -188,24 +188,28 @@ Tensor& quantized_conv2d_out(
188188
cmsis_context.buf = nullptr;
189189
cmsis_context.size = 0;
190190

191-
const size_t buffer_bytes = static_cast<size_t>(
192-
arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims));
191+
const int32_t buffer_bytes = arm_convolve_wrapper_s8_get_buffer_size(
192+
&conv_params, &input_dims, &filter_dims, &output_dims);
193+
if (buffer_bytes < 0) {
194+
ET_LOG(
195+
Error, "quantized_conv2d_out: CMSIS-NN buffer size calculation failed");
196+
context.fail(Error::Internal);
197+
return out;
198+
}
193199
if (buffer_bytes > 0) {
194200
auto buffer_or_error =
195201
context.allocate_temp(buffer_bytes, kCortexMMveAlignment);
196202
if (!buffer_or_error.ok()) {
197-
if (buffer_or_error.error() != Error::NotFound) {
198-
ET_LOG(
199-
Error,
200-
"quantized_conv2d_out: failed to allocate scratch buffer (%d)",
201-
static_cast<int>(buffer_or_error.error()));
202-
context.fail(buffer_or_error.error());
203-
return out;
204-
}
205-
} else {
206-
cmsis_context.buf = buffer_or_error.get();
207-
cmsis_context.size = buffer_bytes;
203+
ET_LOG(
204+
Error,
205+
"quantized_conv2d_out: failed to allocate scratch buffer (%d bytes, error %d)",
206+
static_cast<int>(buffer_bytes),
207+
static_cast<int>(buffer_or_error.error()));
208+
context.fail(buffer_or_error.error());
209+
return out;
208210
}
211+
cmsis_context.buf = buffer_or_error.get();
212+
cmsis_context.size = buffer_bytes;
209213
}
210214

211215
const arm_cmsis_nn_status status = arm_convolve_wrapper_s8(
@@ -224,7 +228,7 @@ Tensor& quantized_conv2d_out(
224228
if (status != ARM_CMSIS_NN_SUCCESS) {
225229
ET_LOG(
226230
Error,
227-
"quantized_conv2d_out: arm_convolve_s8 failed with status %d",
231+
"quantized_conv2d_out: arm_convolve_wrapper_s8 failed with status %d",
228232
status);
229233
context.fail(Error::Internal);
230234
}

0 commit comments

Comments
 (0)