Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions backends/arm/runtime/EthosUBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
* ethos-u-core-driver for hardware interaction.
*/

// Workaround for runtime/core/portable_type/c10/c10/util/Float16-math.h
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@perheld have a PR on this here: #16221

#if defined(__GNUC__) && defined(__ZEPHYR__)
#pragma GCC diagnostic ignored "-Wdouble-promotion"
#endif

#include <cstdint>
#include <cstring>
#include <memory>
Expand Down
1 change: 1 addition & 0 deletions backends/cortex_m/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ target_link_libraries(
cortex_m_kernels
PRIVATE cmsis-nn
PRIVATE executorch
PRIVATE kernels_util_all_deps
)

# Include directories for cortex_m_kernels
Expand Down
255 changes: 255 additions & 0 deletions examples/arm/zephyr/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# Copyright 2025 Arm Limited and/or its affiliates.
#
# SPDX-License-Identifier: Apache-2.0

cmake_minimum_required(VERSION 3.24)

# Disable install(rule) generation while building under Zephyr. The sample
# application never runs `cmake --install`, and skipping the rules avoids export
# dependency checks when Zephyr provides targets such as ethosu_core_driver.
set(CMAKE_SKIP_INSTALL_RULES
ON
CACHE BOOL "" FORCE
)

# Use configuration overlay to disable default portable ops to enable selective
# operator build
set(ET_PTE_FILE_PATH
""
CACHE FILEPATH "Path to the ExecuTorch .pte (or .bpte) model to embed"
)
set(ET_PTE_SECTION
"network_model_sec"
CACHE STRING "Section attribute used for the generated model data"
)

if(NOT ET_PTE_FILE_PATH)
message(
FATAL_ERROR
"ET_PTE_FILE_PATH must point to the ExecuTorch .pte (or .bpte) model to embed."
)
endif()

if(NOT IS_ABSOLUTE "${ET_PTE_FILE_PATH}")
get_filename_component(
ET_PTE_FILE_PATH "${ET_PTE_FILE_PATH}" ABSOLUTE BASE_DIR
"${CMAKE_CURRENT_SOURCE_DIR}"
)
endif()

if(NOT EXISTS "${ET_PTE_FILE_PATH}")
message(
FATAL_ERROR
"Could not find ExecuTorch model at ET_PTE_FILE_PATH: ${ET_PTE_FILE_PATH}"
)
endif()

set(ET_PTE_FILE_PATH
"${ET_PTE_FILE_PATH}"
CACHE FILEPATH "Path to the ExecuTorch .pte (or .bpte) model to embed"
FORCE
)

execute_process(
COMMAND
python "${CMAKE_CURRENT_LIST_DIR}/../../../codegen/tools/gen_oplist.py"
--model_file_path=${ET_PTE_FILE_PATH}
--output_path=${CMAKE_CURRENT_BINARY_DIR}/temp.yaml
OUTPUT_VARIABLE CMD_RESULT
)

if(CMD_RESULT MATCHES "aten::" OR CMD_RESULT MATCHES "dim_order_ops::")
set(FOUND_OPS_IN_FILE "true")
else()
set(FOUND_OPS_IN_FILE "false")
endif()

if(${FOUND_OPS_IN_FILE})
set(EXECUTORCH_SELECT_OPS_LIST "")
set(EXECUTORCH_SELECT_OPS_MODEL
"${ET_PTE_FILE_PATH}"
CACHE STRING "Select operators from this ExecuTorch model" FORCE
)
set(_EXECUTORCH_GEN_ZEPHYR_PORTABLE_OPS ON)
message(
"gen_oplist: EXECUTORCH_SELECT_OPS_MODEL=${ET_PTE_FILE_PATH} is used to auto generate ops from"
)
else()
set(EXECUTORCH_SELECT_OPS_LIST "")
set(EXECUTORCH_SELECT_OPS_MODEL "")
set(_EXECUTORCH_GEN_ZEPHYR_PORTABLE_OPS OFF)
message(
"gen_oplist: No non delagated ops was found in ${ET_PTE_FILE_PATH} no ops added to build"
)
endif()

set(CONF_FILE "${CMAKE_CURRENT_SOURCE_DIR}/prj.conf")

find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})
project(executorch_executor_runner)

find_package(
Python3
COMPONENTS Interpreter
REQUIRED
)

set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wall -Wno-switch -Wno-float-conversion -Wno-double-promotion -ffunction-sections -fdata-sections"
)

# Always use selective build since embedded devices are resource constrained

# Include ExecuTorch selective build utilities Use EXECUTORCH_DIR if available,
# otherwise use Zephyr's module discovery
if(NOT DEFINED EXECUTORCH_DIR)
message(
STATUS
"ZEPHYR_EXECUTORCH_MODULE_DIR set to : ${ZEPHYR_EXECUTORCH_MODULE_DIR}"
)
if(DEFINED ZEPHYR_EXECUTORCH_MODULE_DIR)
set(EXECUTORCH_DIR ${ZEPHYR_EXECUTORCH_MODULE_DIR})
message(
STATUS "Using Zephyr module discovery: EXECUTORCH_DIR=${EXECUTORCH_DIR}"
)
else()
message(
FATAL_ERROR
"ExecutorTorch module not found. Ensure it's properly configured in your Zephyr workspace."
)
endif()
else()
message(STATUS "Using predefined EXECUTORCH_DIR=${EXECUTORCH_DIR}")
endif()

# Set EXECUTORCH_ROOT for the Codegen.cmake file
set(EXECUTORCH_ROOT ${EXECUTORCH_DIR})

# Ensure the portable kernels target exists even when portable ops are disabled
# in the global build.
if(NOT TARGET portable_kernels)
set(EXECUTORCH_PORTABLE_BUILD_KERNELS_ONLY ON)
add_subdirectory(
${EXECUTORCH_DIR}/kernels/portable
${CMAKE_CURRENT_BINARY_DIR}/executorch/kernels/portable
)
unset(EXECUTORCH_PORTABLE_BUILD_KERNELS_ONLY)
endif()
set(EXECUTORCH_OPS_LIB "")
if(_EXECUTORCH_GEN_ZEPHYR_PORTABLE_OPS)
include(${EXECUTORCH_DIR}/tools/cmake/Utils.cmake)
include(${EXECUTORCH_DIR}/tools/cmake/Codegen.cmake)
if(NOT DEFINED EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD)
set(EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD "")
endif()
gen_selected_ops(
LIB_NAME
"cpu_portable_ops_lib"
OPS_SCHEMA_YAML
""
ROOT_OPS
"${EXECUTORCH_SELECT_OPS_LIST}"
INCLUDE_ALL_OPS
""
OPS_FROM_MODEL
"${EXECUTORCH_SELECT_OPS_MODEL}"
DTYPE_SELECTIVE_BUILD
"${EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD}"
)
generate_bindings_for_kernels(
LIB_NAME "cpu_portable_ops_lib" FUNCTIONS_YAML
${EXECUTORCH_DIR}/kernels/portable/functions.yaml DTYPE_SELECTIVE_BUILD
"${EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD}"
)
gen_operators_lib(
LIB_NAME
"cpu_portable_ops_lib"
KERNEL_LIBS
portable_kernels
DEPS
executorch
DTYPE_SELECTIVE_BUILD
"${EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD}"
)
set(EXECUTORCH_OPS_LIB "cpu_portable_ops_lib")
endif()

set(_local_flatcc_root ${CMAKE_BINARY_DIR}/flatcc_src)
if(NOT EXISTS ${_local_flatcc_root}/CMakeLists.txt)
file(MAKE_DIRECTORY ${_local_flatcc_root})
execute_process(
COMMAND ${CMAKE_COMMAND} -E copy_directory
${EXECUTORCH_DIR}/third-party/flatcc ${_local_flatcc_root}
)
endif()
set(EXECUTORCH_FLATCC_SOURCE_ROOT
${_local_flatcc_root}
CACHE PATH "" FORCE
)
set(EXECUTORCH_FLATCC_INSTALL_ROOT
${_local_flatcc_root}
CACHE PATH "" FORCE
)

set(app_sources
src/arm_executor_runner.cpp
${EXECUTORCH_DIR}/examples/arm/executor_runner/arm_memory_allocator.cpp
)
target_sources(app PRIVATE ${app_sources})

set(_model_pte_header ${CMAKE_CURRENT_BINARY_DIR}/model_pte.h)
add_custom_command(
OUTPUT ${_model_pte_header}
COMMAND
${Python3_EXECUTABLE}
${EXECUTORCH_DIR}/examples/arm/executor_runner/pte_to_header.py --pte
${ET_PTE_FILE_PATH} --outdir ${CMAKE_CURRENT_BINARY_DIR} --section
${ET_PTE_SECTION}
DEPENDS ${ET_PTE_FILE_PATH}
${EXECUTORCH_DIR}/examples/arm/executor_runner/pte_to_header.py
COMMENT "Converting ${ET_PTE_FILE_PATH} to model_pte.h"
)
add_custom_target(gen_model_header DEPENDS ${_model_pte_header})
add_dependencies(app gen_model_header)

if(DEFINED CONFIG_EXECUTORCH_METHOD_ALLOCATOR_POOL_SIZE)
target_compile_definitions(
app
PRIVATE
ET_ARM_METHOD_ALLOCATOR_POOL_SIZE=${CONFIG_EXECUTORCH_METHOD_ALLOCATOR_POOL_SIZE}
)
endif()
if(DEFINED CONFIG_EXECUTORCH_TEMP_ALLOCATOR_POOL_SIZE)
target_compile_definitions(
app
PRIVATE
ET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE=${CONFIG_EXECUTORCH_TEMP_ALLOCATOR_POOL_SIZE}
)
endif()

# Link with ExecuTorch and our operators library
target_link_libraries(app PRIVATE libexecutorch)
if(EXECUTORCH_OPS_LIB)
target_link_libraries(app PRIVATE ${EXECUTORCH_OPS_LIB})
endif()
if(CONFIG_CPU_CORTEX_M)
if(TARGET cortex_m_ops_lib)
target_link_libraries(app PRIVATE cortex_m_ops_lib)
endif()
if(TARGET cortex_m_kernels)
target_link_libraries(app PRIVATE cortex_m_kernels)
endif()
endif()
if(TARGET executorch_delegate_ethos_u)
target_link_libraries(app PRIVATE executorch_delegate_ethos_u)
endif()
if(TARGET ethosu_core_driver)
target_link_libraries(app PRIVATE ethosu_core_driver)
endif()

# Add include directories for sources and generated headers
target_include_directories(app PRIVATE src ${CMAKE_CURRENT_BINARY_DIR})
get_target_property(OUT app LINK_LIBRARIES)
message(STATUS ${OUT})
31 changes: 31 additions & 0 deletions examples/arm/zephyr/Kconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# Copyright 2025 Arm Limited and/or its affiliates.
#
# SPDX-License-Identifier: Apache-2.0

source "Kconfig.zephyr"

menu "ExecuTorch Zephyr sample configuration"

config EXECUTORCH_METHOD_ALLOCATOR_POOL_SIZE
int "Method allocator pool size in bytes"
default 16384
depends on EXECUTORCH
help
Size of the method allocator pool in bytes. This memory is used
for allocating ExecuTorch method instances and their metadata.
Default is 16KB which should be sufficient for most small models.
Larger models may require more memory.

config EXECUTORCH_TEMP_ALLOCATOR_POOL_SIZE
int "Temporary allocator pool size in bytes"
default 2048
depends on EXECUTORCH
help
Size of the temporary allocator pool in bytes. This memory is used
for temporary allocations during model execution such as intermediate
tensor calculations. Default is 2KB which should be sufficient for
simple operations. Complex models may require more memory.

endmenu
15 changes: 15 additions & 0 deletions examples/arm/zephyr/boards/mps3_corstone300_fvp.overlay
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/* Copyright 2025 Arm Limited and/or its affiliates.
*
* SPDX-License-Identifier: Apache-2.0
*/

/* Ensure ExecuTorch scratch buffers live in shared SRAM that the Ethos-U
* DMA can access. The default board DTS routes Zephyr's general-purpose
* SRAM to DTCM, which the NPU cannot reach. Override the choice so that
* .data/.bss land in ISRAM (0x3100_0000) instead.
*/
/ {
chosen {
zephyr,sram = &isram;
};
};
40 changes: 40 additions & 0 deletions examples/arm/zephyr/prj.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# Copyright 2025 Arm Limited and/or its affiliates.
#
# SPDX-License-Identifier: Apache-2.0

# Enable ExecuTorch
CONFIG_EXECUTORCH=y

# C++
CONFIG_CPP=y
CONFIG_STD_CPP17=y

# Needed as kernels/portable/cpu/op_allclose.cpp uses isfinite() instead of std::isfinite()
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a PR on it's way to fix this, it will be uploaded after this is merged as it depends on this PR

# If you fully delegated your model to Ethos-U you could remove this and save some space
CONFIG_NEWLIB_LIBC=y

# Config - increased for ExecuTorch memory requirements
CONFIG_MAIN_STACK_SIZE=8192
CONFIG_HEAP_MEM_POOL_SIZE=32768

# Logging
CONFIG_REQUIRES_FLOAT_PRINTF=y

# Configuration for selective operator builds
# Keep the portable operator registry enabled so selective builds can pull in
# only the kernels required by the model (via EXECUTORCH_SELECT_OPS_MODEL).
# Setting this means you will include all portable ops
CONFIG_EXECUTORCH_BUILD_PORTABLE_OPS=n

# Enable the Zephyr Ethos-U driver so executorch_delegate_ethos_u can reserve
# and use the hardware instance exposed by the board DTS.
CONFIG_ETHOS_U=y

# Add model specific configs
# The default 1 KB pool is too small even for the tiny sample model;
# bump to 256 KB so method buffers and inputs fit during setup.
CONFIG_EXECUTORCH_METHOD_ALLOCATOR_POOL_SIZE=262144
# Ethos-U scratch memory requirements scale with the compiled network.
CONFIG_EXECUTORCH_TEMP_ALLOCATOR_POOL_SIZE=1572864
Loading
Loading