From 31f87965433afd31cff9e2a87b2434409e993298 Mon Sep 17 00:00:00 2001
From: Adrian Lundell <adrian.lundell@arm.com>
Date: Tue, 2 Dec 2025 19:05:43 +0100
Subject: [PATCH 1/2] Arm backend: Add support for Zephyr as a external module

This commit has everything needed to build executorch from zephyr and
include it as a external module without having any changes in zephyr.

Based on code by BujSet.

Co-authored-by: BujSet <ranganath1000@gmail.com>
Co-authored-by: Zingo Andersen <zingo.andersen@arm.com>
Co-authored-by: Per Held <per.held@arm.com>

Change-Id: I1a1aeb8aa02f1b5e3a578dd25d031919c9925019
Signed-off-by: Zingo Andersen <Zingo.Andersen@arm.com>
---
 backends/arm/runtime/EthosUBackend.cpp        |   5 +
 backends/cortex_m/CMakeLists.txt              |   1 +
 examples/arm/zephyr/CMakeLists.txt            | 255 +++++++++
 examples/arm/zephyr/Kconfig                   |  31 ++
 .../boards/mps3_corstone300_fvp.overlay       |  15 +
 examples/arm/zephyr/prj.conf                  |  40 ++
 .../arm/zephyr/src/arm_executor_runner.cpp    | 491 ++++++++++++++++++
 .../default/{arm_zephyr.cpp => zephyr.cpp}    |   0
 zephyr/CMakeLists.txt                         | 160 ++++++
 zephyr/Kconfig                                |  52 ++
 zephyr/README.md                              | 161 +++++-
 zephyr/executorch.yaml                        |   6 +
 zephyr/module.yml                             |   8 +-
 13 files changed, 1220 insertions(+), 5 deletions(-)
 create mode 100644 examples/arm/zephyr/CMakeLists.txt
 create mode 100644 examples/arm/zephyr/Kconfig
 create mode 100644 examples/arm/zephyr/boards/mps3_corstone300_fvp.overlay
 create mode 100644 examples/arm/zephyr/prj.conf
 create mode 100644 examples/arm/zephyr/src/arm_executor_runner.cpp
 rename runtime/platform/default/{arm_zephyr.cpp => zephyr.cpp} (100%)
 create mode 100644 zephyr/CMakeLists.txt
 create mode 100644 zephyr/Kconfig
 create mode 100644 zephyr/executorch.yaml
diff --git a/backends/arm/runtime/EthosUBackend.cpp b/backends/arm/runtime/EthosUBackend.cpp
index c339f4a6164..f7ad6242f06 100644
--- a/backends/arm/runtime/EthosUBackend.cpp
+++ b/backends/arm/runtime/EthosUBackend.cpp
@@ -10,6 +10,11 @@
  * ethos-u-core-driver for hardware interaction.
  */
 
+// Workaround for runtime/core/portable_type/c10/c10/util/Float16-math.h
+#if defined(__GNUC__) && defined(__ZEPHYR__)
+#pragma GCC diagnostic ignored "-Wdouble-promotion"
+#endif
+
 #include <cstdint>
 #include <cstring>
 #include <memory>
diff --git a/backends/cortex_m/CMakeLists.txt b/backends/cortex_m/CMakeLists.txt
index ac330d4b015..8d2c784cb71 100644
--- a/backends/cortex_m/CMakeLists.txt
+++ b/backends/cortex_m/CMakeLists.txt
@@ -79,6 +79,7 @@ target_link_libraries(
   cortex_m_kernels
   PRIVATE cmsis-nn
   PRIVATE executorch
+  PRIVATE kernels_util_all_deps
 )
 
 # Include directories for cortex_m_kernels
diff --git a/examples/arm/zephyr/CMakeLists.txt b/examples/arm/zephyr/CMakeLists.txt
new file mode 100644
index 00000000000..8255e6db83e
--- /dev/null
+++ b/examples/arm/zephyr/CMakeLists.txt
@@ -0,0 +1,255 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+cmake_minimum_required(VERSION 3.24)
+
+# Disable install(rule) generation while building under Zephyr. The sample
+# application never runs `cmake --install`, and skipping the rules avoids export
+# dependency checks when Zephyr provides targets such as ethosu_core_driver.
+set(CMAKE_SKIP_INSTALL_RULES
+    ON
+    CACHE BOOL "" FORCE
+)
+
+# Use configuration overlay to disable default portable ops to enable selective
+# operator build
+set(ET_PTE_FILE_PATH
+    ""
+    CACHE FILEPATH "Path to the ExecuTorch .pte (or .bpte) model to embed"
+)
+set(ET_PTE_SECTION
+    "network_model_sec"
+    CACHE STRING "Section attribute used for the generated model data"
+)
+
+if(NOT ET_PTE_FILE_PATH)
+  message(
+    FATAL_ERROR
+      "ET_PTE_FILE_PATH must point to the ExecuTorch .pte (or .bpte) model to embed."
+  )
+endif()
+
+if(NOT IS_ABSOLUTE "${ET_PTE_FILE_PATH}")
+  get_filename_component(
+    ET_PTE_FILE_PATH "${ET_PTE_FILE_PATH}" ABSOLUTE BASE_DIR
+    "${CMAKE_CURRENT_SOURCE_DIR}"
+  )
+endif()
+
+if(NOT EXISTS "${ET_PTE_FILE_PATH}")
+  message(
+    FATAL_ERROR
+      "Could not find ExecuTorch model at ET_PTE_FILE_PATH: ${ET_PTE_FILE_PATH}"
+  )
+endif()
+
+set(ET_PTE_FILE_PATH
+    "${ET_PTE_FILE_PATH}"
+    CACHE FILEPATH "Path to the ExecuTorch .pte (or .bpte) model to embed"
+          FORCE
+)
+
+execute_process(
+  COMMAND
+    python "${CMAKE_CURRENT_LIST_DIR}/../../../codegen/tools/gen_oplist.py"
+    --model_file_path=${ET_PTE_FILE_PATH}
+    --output_path=${CMAKE_CURRENT_BINARY_DIR}/temp.yaml
+  OUTPUT_VARIABLE CMD_RESULT
+)
+
+if(CMD_RESULT MATCHES "aten::" OR CMD_RESULT MATCHES "dim_order_ops::")
+  set(FOUND_OPS_IN_FILE "true")
+else()
+  set(FOUND_OPS_IN_FILE "false")
+endif()
+
+if(${FOUND_OPS_IN_FILE})
+  set(EXECUTORCH_SELECT_OPS_LIST "")
+  set(EXECUTORCH_SELECT_OPS_MODEL
+      "${ET_PTE_FILE_PATH}"
+      CACHE STRING "Select operators from this ExecuTorch model" FORCE
+  )
+  set(_EXECUTORCH_GEN_ZEPHYR_PORTABLE_OPS ON)
+  message(
+    "gen_oplist:  EXECUTORCH_SELECT_OPS_MODEL=${ET_PTE_FILE_PATH} is used to auto generate ops from"
+  )
+else()
+  set(EXECUTORCH_SELECT_OPS_LIST "")
+  set(EXECUTORCH_SELECT_OPS_MODEL "")
+  set(_EXECUTORCH_GEN_ZEPHYR_PORTABLE_OPS OFF)
+  message(
+    "gen_oplist: No non delagated ops was found in ${ET_PTE_FILE_PATH} no ops added to build"
+  )
+endif()
+
+set(CONF_FILE "${CMAKE_CURRENT_SOURCE_DIR}/prj.conf")
+
+find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})
+project(executorch_executor_runner)
+
+find_package(
+  Python3
+  COMPONENTS Interpreter
+  REQUIRED
+)
+
+set(CMAKE_CXX_FLAGS
+    "${CMAKE_CXX_FLAGS} -Wall -Wno-switch -Wno-float-conversion -Wno-double-promotion -ffunction-sections -fdata-sections"
+)
+
+# Always use selective build since embedded devices are resource constrained
+
+# Include ExecuTorch selective build utilities Use EXECUTORCH_DIR if available,
+# otherwise use Zephyr's module discovery
+if(NOT DEFINED EXECUTORCH_DIR)
+  message(
+    STATUS
+      "ZEPHYR_EXECUTORCH_MODULE_DIR set to : ${ZEPHYR_EXECUTORCH_MODULE_DIR}"
+  )
+  if(DEFINED ZEPHYR_EXECUTORCH_MODULE_DIR)
+    set(EXECUTORCH_DIR ${ZEPHYR_EXECUTORCH_MODULE_DIR})
+    message(
+      STATUS "Using Zephyr module discovery: EXECUTORCH_DIR=${EXECUTORCH_DIR}"
+    )
+  else()
+    message(
+      FATAL_ERROR
+        "ExecutorTorch module not found. Ensure it's properly configured in your Zephyr workspace."
+    )
+  endif()
+else()
+  message(STATUS "Using predefined EXECUTORCH_DIR=${EXECUTORCH_DIR}")
+endif()
+
+# Set EXECUTORCH_ROOT for the Codegen.cmake file
+set(EXECUTORCH_ROOT ${EXECUTORCH_DIR})
+
+# Ensure the portable kernels target exists even when portable ops are disabled
+# in the global build.
+if(NOT TARGET portable_kernels)
+  set(EXECUTORCH_PORTABLE_BUILD_KERNELS_ONLY ON)
+  add_subdirectory(
+    ${EXECUTORCH_DIR}/kernels/portable
+    ${CMAKE_CURRENT_BINARY_DIR}/executorch/kernels/portable
+  )
+  unset(EXECUTORCH_PORTABLE_BUILD_KERNELS_ONLY)
+endif()
+set(EXECUTORCH_OPS_LIB "")
+if(_EXECUTORCH_GEN_ZEPHYR_PORTABLE_OPS)
+  include(${EXECUTORCH_DIR}/tools/cmake/Utils.cmake)
+  include(${EXECUTORCH_DIR}/tools/cmake/Codegen.cmake)
+  if(NOT DEFINED EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD)
+    set(EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD "")
+  endif()
+  gen_selected_ops(
+    LIB_NAME
+    "cpu_portable_ops_lib"
+    OPS_SCHEMA_YAML
+    ""
+    ROOT_OPS
+    "${EXECUTORCH_SELECT_OPS_LIST}"
+    INCLUDE_ALL_OPS
+    ""
+    OPS_FROM_MODEL
+    "${EXECUTORCH_SELECT_OPS_MODEL}"
+    DTYPE_SELECTIVE_BUILD
+    "${EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD}"
+  )
+  generate_bindings_for_kernels(
+    LIB_NAME "cpu_portable_ops_lib" FUNCTIONS_YAML
+    ${EXECUTORCH_DIR}/kernels/portable/functions.yaml DTYPE_SELECTIVE_BUILD
+    "${EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD}"
+  )
+  gen_operators_lib(
+    LIB_NAME
+    "cpu_portable_ops_lib"
+    KERNEL_LIBS
+    portable_kernels
+    DEPS
+    executorch
+    DTYPE_SELECTIVE_BUILD
+    "${EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD}"
+  )
+  set(EXECUTORCH_OPS_LIB "cpu_portable_ops_lib")
+endif()
+
+set(_local_flatcc_root ${CMAKE_BINARY_DIR}/flatcc_src)
+if(NOT EXISTS ${_local_flatcc_root}/CMakeLists.txt)
+  file(MAKE_DIRECTORY ${_local_flatcc_root})
+  execute_process(
+    COMMAND ${CMAKE_COMMAND} -E copy_directory
+            ${EXECUTORCH_DIR}/third-party/flatcc ${_local_flatcc_root}
+  )
+endif()
+set(EXECUTORCH_FLATCC_SOURCE_ROOT
+    ${_local_flatcc_root}
+    CACHE PATH "" FORCE
+)
+set(EXECUTORCH_FLATCC_INSTALL_ROOT
+    ${_local_flatcc_root}
+    CACHE PATH "" FORCE
+)
+
+set(app_sources
+    src/arm_executor_runner.cpp
+    ${EXECUTORCH_DIR}/examples/arm/executor_runner/arm_memory_allocator.cpp
+)
+target_sources(app PRIVATE ${app_sources})
+
+set(_model_pte_header ${CMAKE_CURRENT_BINARY_DIR}/model_pte.h)
+add_custom_command(
+  OUTPUT ${_model_pte_header}
+  COMMAND
+    ${Python3_EXECUTABLE}
+    ${EXECUTORCH_DIR}/examples/arm/executor_runner/pte_to_header.py --pte
+    ${ET_PTE_FILE_PATH} --outdir ${CMAKE_CURRENT_BINARY_DIR} --section
+    ${ET_PTE_SECTION}
+  DEPENDS ${ET_PTE_FILE_PATH}
+          ${EXECUTORCH_DIR}/examples/arm/executor_runner/pte_to_header.py
+  COMMENT "Converting ${ET_PTE_FILE_PATH} to model_pte.h"
+)
+add_custom_target(gen_model_header DEPENDS ${_model_pte_header})
+add_dependencies(app gen_model_header)
+
+if(DEFINED CONFIG_EXECUTORCH_METHOD_ALLOCATOR_POOL_SIZE)
+  target_compile_definitions(
+    app
+    PRIVATE
+      ET_ARM_METHOD_ALLOCATOR_POOL_SIZE=${CONFIG_EXECUTORCH_METHOD_ALLOCATOR_POOL_SIZE}
+  )
+endif()
+if(DEFINED CONFIG_EXECUTORCH_TEMP_ALLOCATOR_POOL_SIZE)
+  target_compile_definitions(
+    app
+    PRIVATE
+      ET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE=${CONFIG_EXECUTORCH_TEMP_ALLOCATOR_POOL_SIZE}
+  )
+endif()
+
+# Link with ExecuTorch and our operators library
+target_link_libraries(app PRIVATE libexecutorch)
+if(EXECUTORCH_OPS_LIB)
+  target_link_libraries(app PRIVATE ${EXECUTORCH_OPS_LIB})
+endif()
+if(CONFIG_CPU_CORTEX_M)
+  if(TARGET cortex_m_ops_lib)
+    target_link_libraries(app PRIVATE cortex_m_ops_lib)
+  endif()
+  if(TARGET cortex_m_kernels)
+    target_link_libraries(app PRIVATE cortex_m_kernels)
+  endif()
+endif()
+if(TARGET executorch_delegate_ethos_u)
+  target_link_libraries(app PRIVATE executorch_delegate_ethos_u)
+endif()
+if(TARGET ethosu_core_driver)
+  target_link_libraries(app PRIVATE ethosu_core_driver)
+endif()
+
+# Add include directories for sources and generated headers
+target_include_directories(app PRIVATE src ${CMAKE_CURRENT_BINARY_DIR})
+get_target_property(OUT app LINK_LIBRARIES)
+message(STATUS ${OUT})
diff --git a/examples/arm/zephyr/Kconfig b/examples/arm/zephyr/Kconfig
new file mode 100644
index 00000000000..ea7cacd7e06
--- /dev/null
+++ b/examples/arm/zephyr/Kconfig
@@ -0,0 +1,31 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+source "Kconfig.zephyr"
+
+menu "ExecuTorch Zephyr sample configuration"
+
+config EXECUTORCH_METHOD_ALLOCATOR_POOL_SIZE
+	int "Method allocator pool size in bytes"
+	default 16384
+	depends on EXECUTORCH
+	help
+	  Size of the method allocator pool in bytes. This memory is used
+	  for allocating ExecuTorch method instances and their metadata.
+	  Default is 16KB which should be sufficient for most small models.
+	  Larger models may require more memory.
+
+config EXECUTORCH_TEMP_ALLOCATOR_POOL_SIZE
+	int "Temporary allocator pool size in bytes"
+	default 2048
+	depends on EXECUTORCH
+	help
+	  Size of the temporary allocator pool in bytes. This memory is used
+	  for temporary allocations during model execution such as intermediate
+	  tensor calculations. Default is 2KB which should be sufficient for
+	  simple operations. Complex models may require more memory.
+
+endmenu
diff --git a/examples/arm/zephyr/boards/mps3_corstone300_fvp.overlay b/examples/arm/zephyr/boards/mps3_corstone300_fvp.overlay
new file mode 100644
index 00000000000..1bd72f123b9
--- /dev/null
+++ b/examples/arm/zephyr/boards/mps3_corstone300_fvp.overlay
@@ -0,0 +1,15 @@
+/* Copyright 2025 Arm Limited and/or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/* Ensure ExecuTorch scratch buffers live in shared SRAM that the Ethos-U
+ * DMA can access. The default board DTS routes Zephyr's general-purpose
+ * SRAM to DTCM, which the NPU cannot reach. Override the choice so that
+ * .data/.bss land in ISRAM (0x3100_0000) instead.
+ */
+/ {
+	chosen {
+		zephyr,sram = &isram;
+	};
+};
diff --git a/examples/arm/zephyr/prj.conf b/examples/arm/zephyr/prj.conf
new file mode 100644
index 00000000000..59b857c708a
--- /dev/null
+++ b/examples/arm/zephyr/prj.conf
@@ -0,0 +1,40 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Enable ExecuTorch
+CONFIG_EXECUTORCH=y
+
+# C++
+CONFIG_CPP=y
+CONFIG_STD_CPP17=y
+
+# Needed as kernels/portable/cpu/op_allclose.cpp uses isfinite() instead of std::isfinite()
+# If you fully delegated your model to Ethos-U you could remove this and save some space
+CONFIG_NEWLIB_LIBC=y
+
+# Config - increased for ExecuTorch memory requirements
+CONFIG_MAIN_STACK_SIZE=8192
+CONFIG_HEAP_MEM_POOL_SIZE=32768
+
+# Logging
+CONFIG_REQUIRES_FLOAT_PRINTF=y
+
+# Configuration for selective operator builds
+# Keep the portable operator registry enabled so selective builds can pull in
+# only the kernels required by the model (via EXECUTORCH_SELECT_OPS_MODEL).
+# Setting this means you will include all portable ops
+CONFIG_EXECUTORCH_BUILD_PORTABLE_OPS=n
+
+# Enable the Zephyr Ethos-U driver so executorch_delegate_ethos_u can reserve
+# and use the hardware instance exposed by the board DTS.
+CONFIG_ETHOS_U=y
+
+# Add model specific configs
+# The default 1 KB pool is too small even for the tiny sample model;
+# bump to 256 KB so method buffers and inputs fit during setup.
+CONFIG_EXECUTORCH_METHOD_ALLOCATOR_POOL_SIZE=262144
+# Ethos-U scratch memory requirements scale with the compiled network.
+CONFIG_EXECUTORCH_TEMP_ALLOCATOR_POOL_SIZE=1572864
diff --git a/examples/arm/zephyr/src/arm_executor_runner.cpp b/examples/arm/zephyr/src/arm_executor_runner.cpp
new file mode 100644
index 00000000000..3badc39fb30
--- /dev/null
+++ b/examples/arm/zephyr/src/arm_executor_runner.cpp
@@ -0,0 +1,491 @@
+/* Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ * Copyright 2025 Arm Limited and/or its affiliates.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <errno.h>
+#include <executorch/examples/arm/executor_runner/arm_memory_allocator.h>
+#include <executorch/extension/data_loader/buffer_data_loader.h>
+#include <executorch/extension/runner_util/inputs.h>
+#include <executorch/runtime/core/memory_allocator.h>
+#include <executorch/runtime/executor/program.h>
+#include <executorch/runtime/platform/log.h>
+#include <executorch/runtime/platform/platform.h>
+#include <executorch/runtime/platform/runtime.h>
+#include <math.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <zephyr/kernel.h>
+#include <zephyr/sys/printk.h>
+#include <cstring>
+#include <memory>
+#include <vector>
+
+/**
+ * This header file is generated by the build process based on the .pte file
+ * specified in the ET_PTE_FILE_PATH variable to the cmake build.
+ * Control of the action of the .pte, it's use of operators and delegates, and
+ * which are included in the bare metal build are also orchestrated by the
+ * CMakeLists file. For example use see examples/arm/run.sh
+ *
+ * e.g. This includes the pte as a big chunk of data struct into this file
+ */
+#include "model_pte.h"
+
+// Runtime copy of the model blob placed in ISRAM so that the Ethos-U DMA can
+// access command stream and weights. The original model_pte[] lives in flash.
+alignas(16) static unsigned char model_pte_runtime[sizeof(model_pte)];
+static bool model_pte_runtime_initialized = false;
+
+using executorch::aten::ScalarType;
+using executorch::aten::Tensor;
+using executorch::aten::TensorImpl;
+using executorch::extension::BufferCleanup;
+using executorch::extension::BufferDataLoader;
+using executorch::runtime::Error;
+using executorch::runtime::EValue;
+using executorch::runtime::HierarchicalAllocator;
+using executorch::runtime::MemoryAllocator;
+using executorch::runtime::MemoryManager;
+using executorch::runtime::Method;
+using executorch::runtime::MethodMeta;
+using executorch::runtime::Program;
+using executorch::runtime::Result;
+using executorch::runtime::Span;
+using executorch::runtime::Tag;
+using executorch::runtime::TensorInfo;
+
+#if defined(CONFIG_ETHOS_U)
+extern "C" executorch::runtime::Error
+executorch_delegate_EthosUBackend_registered(void);
+#endif
+
+/**
+ * The method_allocation_pool should be large enough to fit the setup, input
+ * used and other data used like the planned memory pool (e.g. memory-planned
+ * buffers to use for mutable tensor data) In this example we run on a
+ * Corstone-3xx FVP so we can use a lot of memory to be able to run and test
+ * large models if you run on HW this should be lowered to fit into your
+ * availible memory.
+ */
+
+#if !defined(ET_ARM_METHOD_ALLOCATOR_POOL_SIZE)
+#define ET_ARM_METHOD_ALLOCATOR_POOL_SIZE (160 * 1024 * 1024)
+#endif
+const size_t method_allocation_pool_size = ET_ARM_METHOD_ALLOCATOR_POOL_SIZE;
+unsigned char __attribute__((
+    aligned(16))) method_allocation_pool[method_allocation_pool_size];
+
+// unsigned char method_allocation_pool[method_allocation_pool_size];
+
+/**
+ * The temp_allocation_pool is used for allocating temporary data during kernel
+ * or delegate execution. This will be reset after each kernel or delegate call.
+ * Currently a MemoryAllocator is used but a PlatformMemoryAllocator is probably
+ * a better fit.
+ *
+ * The Corstone-300/Corstone-320 platforms have 2MB/4MB of SRAM respectively.
+ * For Shared_Sram, ET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE is
+ * 2MB and the linker script places the .bss.tensor_arena symbol in the SRAM.
+ * For Dedicated_Sram, the .bss.tensor_arena symbol is placed in the DDR in the
+ * linker script. The examples/arm/zephyr/prj.conf contains the logic for the
+ * sizes of:
+ * CONFIG_EXECUTORCH_METHOD_ALLOCATOR_POOL_SIZE and
+ * CONFIG_EXECUTORCH_TEMP_ALLOCATOR_POOL_SIZE
+ */
+
+#if !defined(ET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE)
+#define ET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE 2048
+#endif
+#if !defined(ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE)
+#define ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE 0x600
+#endif
+
+const size_t temp_allocation_pool_size =
+    ET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE;
+unsigned char __attribute__((
+    section(".bss.tensor_arena"),
+    aligned(16))) temp_allocation_pool[temp_allocation_pool_size];
+#if defined(ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE)
+extern "C" {
+size_t ethosu_fast_scratch_size =
+    ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE;
+unsigned char __attribute__((section(".bss.ethosu_scratch"), aligned(16)))
+dedicated_sram[ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE];
+unsigned char* ethosu_fast_scratch = dedicated_sram;
+}
+#endif
+
+namespace {
+
+Result<BufferCleanup> prepare_input_tensors(
+    Method& method,
+    MemoryAllocator& allocator,
+    const std::vector<std::pair<char*, size_t>>& input_buffers) {
+  MethodMeta method_meta = method.method_meta();
+  size_t num_inputs = method_meta.num_inputs();
+  size_t num_allocated = 0;
+
+  void** inputs =
+      static_cast<void**>(allocator.allocate(num_inputs * sizeof(void*)));
+  ET_CHECK_OR_RETURN_ERROR(
+      inputs != nullptr,
+      MemoryAllocationFailed,
+      "Could not allocate memory for pointers to input buffers.");
+
+  for (size_t i = 0; i < num_inputs; i++) {
+    auto tag = method_meta.input_tag(i);
+    ET_CHECK_OK_OR_RETURN_ERROR(tag.error());
+
+    if (tag.get() != Tag::Tensor) {
+      ET_LOG(Debug, "Skipping non-tensor input %zu", i);
+      continue;
+    }
+    Result<TensorInfo> tensor_meta = method_meta.input_tensor_meta(i);
+    ET_CHECK_OK_OR_RETURN_ERROR(tensor_meta.error());
+
+    // Input is a tensor. Allocate a buffer for it.
+    void* data_ptr = allocator.allocate(tensor_meta->nbytes());
+    ET_CHECK_OR_RETURN_ERROR(
+        data_ptr != nullptr,
+        MemoryAllocationFailed,
+        "Could not allocate memory for input buffers.");
+    inputs[num_allocated++] = data_ptr;
+
+    Error err = Error::Ok;
+    if (input_buffers.size() > 0) {
+      auto [buffer, buffer_size] = input_buffers.at(i);
+      if (buffer_size != tensor_meta->nbytes()) {
+        ET_LOG(
+            Error,
+            "input size (%d) and tensor size (%d) mismatch!",
+            buffer_size,
+            tensor_meta->nbytes());
+        err = Error::InvalidArgument;
+      } else {
+        ET_LOG(Info, "Copying read input to tensor.");
+        std::memcpy(data_ptr, buffer, buffer_size);
+      }
+    }
+
+    TensorImpl impl = TensorImpl(
+        tensor_meta.get().scalar_type(),
+        tensor_meta.get().sizes().size(),
+        const_cast<TensorImpl::SizesType*>(tensor_meta.get().sizes().data()),
+        data_ptr,
+        const_cast<TensorImpl::DimOrderType*>(
+            tensor_meta.get().dim_order().data()));
+    Tensor t(&impl);
+
+    // If input_buffers.size <= 0, we don't have any input, fill it with 1's.
+    if (input_buffers.size() <= 0) {
+      for (size_t j = 0; j < t.numel(); j++) {
+        switch (t.scalar_type()) {
+          case ScalarType::Int:
+            t.mutable_data_ptr<int>()[j] = 1;
+            break;
+          case ScalarType::Float:
+            t.mutable_data_ptr<float>()[j] = 1.;
+            break;
+          case ScalarType::Char:
+            t.mutable_data_ptr<int8_t>()[j] = 1;
+            break;
+        }
+      }
+    }
+
+    err = method.set_input(t, i);
+
+    if (err != Error::Ok) {
+      ET_LOG(
+          Error, "Failed to prepare input %zu: 0x%" PRIx32, i, (uint32_t)err);
+      // The BufferCleanup will free the inputs when it goes out of scope.
+      BufferCleanup cleanup({inputs, num_allocated});
+      return err;
+    }
+  }
+  return BufferCleanup({inputs, num_allocated});
+}
+
+} // namespace
+
+int main(int argc, const char* argv[]) {
+  (void)argc;
+  (void)argv;
+
+#if defined(CONFIG_ETHOS_U)
+  if (executorch_delegate_EthosUBackend_registered() != Error::Ok) {
+    ET_LOG(
+        Error,
+        "Ethos-U backend registration failed; model execution cannot continue");
+    return 1;
+  }
+#endif
+  executorch::runtime::runtime_init();
+  std::vector<std::pair<char*, size_t>> input_buffers;
+  size_t pte_size = sizeof(model_pte);
+
+  ET_LOG(Info, "PTE at %p Size: %lu bytes", model_pte, pte_size);
+
+  // Find the offset to the embedded Program.
+  if (!model_pte_runtime_initialized) {
+    std::memcpy(model_pte_runtime, model_pte, sizeof(model_pte));
+    model_pte_runtime_initialized = true;
+  }
+  const void* program_data = model_pte_runtime;
+  size_t program_data_len = pte_size;
+
+  auto loader = BufferDataLoader(program_data, program_data_len);
+  ET_LOG(Info, "PTE Model data loaded. Size: %lu bytes.", program_data_len);
+
+  // Parse the program file. This is immutable, and can also be reused
+  // between multiple execution invocations across multiple threads.
+  Result<Program> program = Program::load(&loader);
+  if (!program.ok()) {
+    ET_LOG(
+        Info,
+        "Program loading failed @ 0x%p: 0x%" PRIx32,
+        program_data,
+        program.error());
+  }
+
+  ET_LOG(Info, "Model buffer loaded, has %zu methods", program->num_methods());
+
+  const char* method_name = nullptr;
+  {
+    const auto method_name_result = program->get_method_name(0);
+    ET_CHECK_MSG(method_name_result.ok(), "Program has no methods");
+    method_name = *method_name_result;
+  }
+  ET_LOG(Info, "Running method %s", method_name);
+
+  Result<MethodMeta> method_meta = program->method_meta(method_name);
+  if (!method_meta.ok()) {
+    ET_LOG(
+        Info,
+        "Failed to get method_meta for %s: 0x%x",
+        method_name,
+        (unsigned int)method_meta.error());
+  }
+
+  ET_LOG(
+      Info,
+      "Setup Method allocator pool. Size: %zu bytes.",
+      method_allocation_pool_size);
+
+  ArmMemoryAllocator method_allocator(
+      method_allocation_pool_size, method_allocation_pool);
+
+  std::vector<uint8_t*> planned_buffers; // Owns the memory
+  std::vector<Span<uint8_t>> planned_spans; // Passed to the allocator
+  size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers();
+
+  size_t planned_buffer_membase = method_allocator.used_size();
+
+  for (size_t id = 0; id < num_memory_planned_buffers; ++id) {
+    size_t buffer_size =
+        static_cast<size_t>(method_meta->memory_planned_buffer_size(id).get());
+    ET_LOG(Info, "Setting up planned buffer %zu, size %zu.", id, buffer_size);
+
+    /* Move to it's own allocator when MemoryPlanner is in place. */
+    uint8_t* buffer =
+        reinterpret_cast<uint8_t*>(method_allocator.allocate(buffer_size));
+    ET_CHECK_MSG(
+        buffer != nullptr,
+        "Could not allocate memory for memory planned buffer size %zu",
+        buffer_size);
+    planned_buffers.push_back(buffer);
+    planned_spans.push_back({planned_buffers.back(), buffer_size});
+  }
+
+  size_t planned_buffer_memsize =
+      method_allocator.used_size() - planned_buffer_membase;
+  ET_LOG(Info, "Computed planned buffer size=%zu", planned_buffer_memsize);
+
+  HierarchicalAllocator planned_memory(
+      {planned_spans.data(), planned_spans.size()});
+
+  ArmMemoryAllocator temp_allocator(
+      temp_allocation_pool_size, temp_allocation_pool);
+
+  MemoryManager memory_manager(
+      &method_allocator, &planned_memory, &temp_allocator);
+
+  size_t method_loaded_membase = method_allocator.used_size();
+
+  executorch::runtime::EventTracer* event_tracer_ptr = nullptr;
+
+  ET_LOG(Info, "Loading method.");
+  Result<Method> method =
+      program->load_method(method_name, &memory_manager, event_tracer_ptr);
+
+  if (!method.ok()) {
+    ET_LOG(
+        Info,
+        "Loading of method %s failed with status 0x%" PRIx32,
+        method_name,
+        method.error());
+  }
+  size_t method_loaded_memsize =
+      method_allocator.used_size() - method_loaded_membase;
+  ET_LOG(Info, "Method '%s' loaded.", method_name);
+
+  size_t input_membase = method_allocator.used_size();
+  ET_LOG(
+      Info,
+      "Preparing input: In use: %zuB, Free: %zuB",
+      input_membase,
+      method_allocator.free_size());
+
+  {
+    // Here you would add code to get input from your Hardware
+    // Get inputs from SEMIHOSTING or fake it with a lot of "1"
+    // Use "static" to force to compiler to remove this when it goes out of
+    // scope
+    static auto prepared_inputs =
+        ::prepare_input_tensors(*method, method_allocator, input_buffers);
+
+    if (!prepared_inputs.ok()) {
+      ET_LOG(
+          Info,
+          "Preparing inputs tensors for method %s failed with status 0x%" PRIx32,
+          method_name,
+          prepared_inputs.error());
+    }
+  }
+#if defined(ET_DUMP_INPUT)
+  {
+    std::vector<EValue> inputs(method->inputs_size());
+    ET_LOG(Info, "%zu inputs: ", inputs.size());
+    Error status = method->get_inputs(inputs.data(), inputs.size());
+    ET_CHECK(status == Error::Ok);
+
+    for (int i = 0; i < inputs.size(); ++i) {
+      if (inputs[i].isTensor()) {
+        Tensor tensor = inputs[i].toTensor();
+        // The output might be collected and parsed so printf() is used instead
+        // of ET_LOG() here
+        for (int j = 0; j < tensor.numel(); ++j) {
+          if (tensor.scalar_type() == ScalarType::Int) {
+            printf(
+                "Input[%d][%d]: (int) %d\n",
+                i,
+                j,
+                tensor.const_data_ptr<int>()[j]);
+          } else if (tensor.scalar_type() == ScalarType::Float) {
+            printf(
+                "Input[%d][%d]: (float) %f\n",
+                i,
+                j,
+                tensor.const_data_ptr<float>()[j]);
+          } else if (tensor.scalar_type() == ScalarType::Char) {
+            printf(
+                "Input[%d][%d]: (char) %d\n",
+                i,
+                j,
+                tensor.const_data_ptr<int8_t>()[j]);
+          } else if (tensor.scalar_type() == ScalarType::Bool) {
+            printf(
+                "Input[%d][%d]: (bool) %s (0x%x)\n",
+                i,
+                j,
+                tensor.const_data_ptr<int8_t>()[j] ? "true" : "false",
+                tensor.const_data_ptr<int8_t>()[j]);
+          }
+        }
+      } else {
+        printf("Input[%d]: Not Tensor\n", i);
+      }
+    }
+  }
+#endif
+  size_t input_memsize = method_allocator.used_size() - input_membase;
+  ET_LOG(Info, "Input prepared.");
+
+  ET_LOG(Info, "Starting the model execution...");
+  size_t executor_membase = method_allocator.used_size();
+  // Run the model.
+  Error status = method->execute();
+  size_t executor_memsize = method_allocator.used_size() - executor_membase;
+
+  ET_LOG(Info, "model_pte_program_size:     %lu bytes.", program_data_len);
+  ET_LOG(Info, "model_pte_loaded_size:      %lu bytes.", pte_size);
+  if (method_allocator.size() != 0) {
+    size_t method_allocator_used = method_allocator.used_size();
+    ET_LOG(
+        Info,
+        "method_allocator_used:     %zu / %zu  free: %zu ( used: %zu %% ) ",
+        method_allocator_used,
+        method_allocator.size(),
+        method_allocator.free_size(),
+        100 * method_allocator_used / method_allocator.size());
+    ET_LOG(
+        Info, "method_allocator_planned:  %zu bytes", planned_buffer_memsize);
+    ET_LOG(Info, "method_allocator_loaded:   %zu bytes", method_loaded_memsize);
+    ET_LOG(Info, "method_allocator_input:    %zu bytes", input_memsize);
+    ET_LOG(Info, "method_allocator_executor: %zu bytes", executor_memsize);
+  }
+
+  if (status != Error::Ok) {
+    ET_LOG(
+        Info,
+        "Execution of method %s failed with status 0x%" PRIx32,
+        method_name,
+        status);
+  } else {
+    ET_LOG(Info, "Model executed successfully.");
+  }
+
+  std::vector<EValue> outputs(method->outputs_size());
+  status = method->get_outputs(outputs.data(), outputs.size());
+  ET_CHECK(status == Error::Ok);
+
+  ET_LOG(Info, "Model outputs:");
+  for (size_t i = 0; i < outputs.size(); ++i) {
+    if (!outputs[i].isTensor()) {
+      ET_LOG(Info, "  output[%zu]: non-tensor value", i);
+      continue;
+    }
+    Tensor tensor = outputs[i].toTensor();
+    ET_LOG(
+        Info,
+        "  output[%zu]: tensor scalar_type=%s numel=%zd",
+        i,
+        executorch::runtime::toString(tensor.scalar_type()),
+        tensor.numel());
+    switch (tensor.scalar_type()) {
+      case ScalarType::Int: {
+        const int* data = tensor.const_data_ptr<int>();
+        for (ssize_t j = 0; j < tensor.numel(); ++j) {
+          ET_LOG(Info, "    [%zd] = %d", j, data[j]);
+        }
+        break;
+      }
+      case ScalarType::Float: {
+        const float* data = tensor.const_data_ptr<float>();
+        for (ssize_t j = 0; j < tensor.numel(); ++j) {
+          ET_LOG(Info, "    [%zd] = %f", j, static_cast<double>(data[j]));
+        }
+        break;
+      }
+      case ScalarType::Char: {
+        const int8_t* data = tensor.const_data_ptr<int8_t>();
+        for (ssize_t j = 0; j < tensor.numel(); ++j) {
+          ET_LOG(Info, "    [%zd] = %d", j, data[j]);
+        }
+        break;
+      }
+      default:
+        ET_LOG(
+            Info,
+            "    (%s tensor dump skipped)",
+            executorch::runtime::toString(tensor.scalar_type()));
+    }
+  }
+  ET_LOG(Info, "SUCCESS: Program complete, exiting.");
+  ET_LOG(Info, "\04");
+  return 0;
+}
diff --git a/runtime/platform/default/arm_zephyr.cpp b/runtime/platform/default/zephyr.cpp
similarity index 100%
rename from runtime/platform/default/arm_zephyr.cpp
rename to runtime/platform/default/zephyr.cpp
diff --git a/zephyr/CMakeLists.txt b/zephyr/CMakeLists.txt
new file mode 100644
index 00000000000..9dc21689dd6
--- /dev/null
+++ b/zephyr/CMakeLists.txt
@@ -0,0 +1,160 @@
+# Copyright (c) 2025 Petri Oksanen
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+if(CONFIG_EXECUTORCH)
+
+  set(CMAKE_POSITION_INDEPENDENT_CODE OFF)
+
+  set(EXECUTORCH_DIR ${ZEPHYR_CURRENT_MODULE_DIR})
+  message(STATUS "EXECUTORCH_DIR set to: ${EXECUTORCH_DIR}")
+  include(${EXECUTORCH_DIR}/tools/cmake/common/preset.cmake)
+  include(${EXECUTORCH_DIR}/tools/cmake/preset/zephyr.cmake)
+
+  # HACK: Force the parent directory of executorch into the global include path.
+  # This is needed because the executorch CMake scripts generate a relative
+  # include path ("../") that doesn't resolve correctly in a Zephyr out-of-tree
+  # module build.
+  get_filename_component(EXECUTORCH_PARENT_DIR ${EXECUTORCH_DIR} DIRECTORY)
+  include_directories(${EXECUTORCH_PARENT_DIR})
+
+  # Note: We don't create executorch_pal as a separate library to avoid circular
+  # dependencies with Zephyr's build system. Instead, we'll add the PAL source
+  # directly to our final library.
+  set(EXECUTORCH_PAL_DEFAULT "zephyr")
+
+  set(EXECUTORCH_BUILD_EXECUTORCH_PYBIND OFF)
+  set(EXECUTORCH_BUILD_GENERATED_SRC OFF)
+  set(EXECUTORCH_BUILD_SDK OFF)
+
+  if(CONFIG_EXECUTORCH_BUILD_PORTABLE_OPS)
+    set(EXECUTORCH_BUILD_PORTABLE_OPS ON)
+  else()
+    set(EXECUTORCH_BUILD_PORTABLE_OPS OFF)
+  endif()
+  set(EXECUTORCH_BUILD_EXECUTOR_RUNNER OFF)
+  set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER OFF)
+
+  # Backend selection
+  if(CONFIG_CPU_CORTEX_M)
+    set(EXECUTORCH_BUILD_CORTEX_M ON)
+  endif()
+  if(CONFIG_CPU_CORTEX_A)
+    set(EXECUTORCH_BUILD_XNNPACK ON)
+  endif()
+  if(CONFIG_ETHOS_U)
+    set(EXECUTORCH_BUILD_ARM_BAREMETAL ON)
+  endif()
+
+  set(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL ON)
+  set(EXECUTORCH_BUILD_KERNELS_QUANTIZED ON)
+  set(EXECUTORCH_BUILD_KERNELS_QUANTIZED_AOT ON)
+
+  set(EXECUTORCH_ENABLE_LOGGING ${CONFIG_EXECUTORCH_ENABLE_LOGGING})
+  set(EXECUTORCH_ENABLE_EVENT_TRACER ${CONFIG_EXECUTORCH_ENABLE_EVENT_TRACER})
+  set(EXECUTORCH_ENABLE_PROGRAM_VERIFICATION
+      ${CONFIG_EXECUTORCH_ENABLE_PROGRAM_VERIFICATION}
+  )
+  set(EXECUTORCH_OPTIMIZE_SIZE ${CONFIG_EXECUTORCH_OPTIMIZE_FOR_SIZE})
+
+  add_subdirectory(${EXECUTORCH_DIR} ${CMAKE_CURRENT_BINARY_DIR}/executorch)
+
+  # Get Zephyr's compiler flags that include the correct ARM architecture
+  # settings
+  get_property(
+    ZEPHYR_COMPILE_OPTIONS
+    TARGET zephyr_interface
+    PROPERTY INTERFACE_COMPILE_OPTIONS
+  )
+
+  # Build with ZephyrOS compiler options
+  set(BUILD_WITH_ZEPHYR_COMPILE_OPTIONS
+      $<BUILD_INTERFACE:${ZEPHYR_COMPILE_OPTIONS}>
+  )
+
+  # Function to aggressively clean and fix target compile options
+  function(fix_executorch_target_flags target_name)
+    if(TARGET ${target_name})
+      # Check if this is an INTERFACE target
+      get_target_property(TARGET_TYPE ${target_name} TYPE)
+
+      if(TARGET_TYPE STREQUAL "INTERFACE_LIBRARY")
+
+        # Apply corrected options as INTERFACE
+        target_compile_options(
+          ${target_name} INTERFACE ${BUILD_WITH_ZEPHYR_COMPILE_OPTIONS}
+        )
+        message(
+          STATUS "Fixed INTERFACE compile flags for target: ${target_name}"
+        )
+      else()
+        # Apply our corrected options
+        target_compile_options(
+          ${target_name} PUBLIC ${BUILD_WITH_ZEPHYR_COMPILE_OPTIONS}
+        )
+        message(STATUS "Fixed compile flags for target: ${target_name}")
+      endif()
+    endif()
+  endfunction()
+
+  # List of all ExecuTorch targets that need flag correction
+  set(EXECUTORCH_TARGETS_TO_FIX
+      executorch_core
+      executorch
+      portable_ops_lib
+      portable_kernels
+      optimized_portable_kernels
+      quantized_kernels
+      extension_data_loader
+      extension_runner_util
+      extension_tensor
+      extension_flat_tensor
+      extension_threadpool
+      program_schema
+      kernels_util_all_deps
+      optimized_kernels
+      cortex_m_ops_lib
+      cortex_m_kernels
+      cmsis-nn
+  )
+
+  # Apply corrected flags to each target
+  foreach(target IN LISTS EXECUTORCH_TARGETS_TO_FIX)
+    if(TARGET ${target})
+      fix_executorch_target_flags(${target})
+    else()
+      message(STATUS "Target not found in explicit list: ${target}")
+    endif()
+  endforeach()
+
+  if(TARGET zephyr_interface)
+    target_link_libraries(executorch_core PUBLIC zephyr_interface)
+  endif()
+
+  # Create a single Zephyr library that includes ExecuTorch integration and PAL
+  zephyr_library_named(libexecutorch)
+
+  # Link ExecuTorch libraries
+  target_link_libraries(libexecutorch PUBLIC executorch_core)
+
+  # Only link portable_ops_lib if it was built (i.e., if portable ops are
+  # enabled)
+  if(TARGET portable_ops_lib)
+    target_link_libraries(libexecutorch PUBLIC portable_ops_lib)
+    message(STATUS "Linking with default portable_ops_lib")
+  else()
+    message(
+      STATUS
+        "portable_ops_lib not available - using selective operators from application"
+    )
+  endif()
+
+  target_include_directories(
+    libexecutorch PUBLIC ${EXECUTORCH_DIR}/runtime
+                         ${EXECUTORCH_DIR}/third-party/flatcc/include
+  )
+
+endif()
diff --git a/zephyr/Kconfig b/zephyr/Kconfig
new file mode 100644
index 00000000000..ce977c93c67
--- /dev/null
+++ b/zephyr/Kconfig
@@ -0,0 +1,52 @@
+# Copyright (c) 2025 Petri Oksanen
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+config ZEPHYR_EXECUTORCH_MODULE
+	bool
+
+config EXECUTORCH
+	bool "ExecuTorch Support"
+	select REQUIRES_FULL_LIBCPP
+	help
+	  This option enables the ExecuTorch library.
+
+if EXECUTORCH
+
+config EXECUTORCH_ENABLE_LOGGING
+	bool "Enable ExecuTorch logging"
+	default y
+	help
+	  Enable logging within the ExecuTorch runtime.
+
+config EXECUTORCH_ENABLE_EVENT_TRACER
+	bool "Enable ExecuTorch event tracer"
+	default n
+	help
+	  Enable the event tracer for performance profiling.
+
+config EXECUTORCH_ENABLE_PROGRAM_VERIFICATION
+	bool "Enable ExecuTorch program verification"
+	default n
+	help
+	  Enable verification of the ExecuTorch program flatbuffer.
+
+config EXECUTORCH_OPTIMIZE_FOR_SIZE
+	bool "Optimize ExecuTorch for size"
+	default y
+	help
+	  Optimize the ExecuTorch library for size (-Os).
+
+config EXECUTORCH_BUILD_PORTABLE_OPS
+	bool "Build ExecuTorch portable operators library"
+	default y
+	help
+	  Build the default portable operators library that includes all
+	  available operators. When disabled, you should use selective
+	  operator building to include only needed operators, but the
+	  underlying kernel implementations will still be available.
+
+endif # EXECUTORCH
diff --git a/zephyr/README.md b/zephyr/README.md
index 8368a400c0a..415c9116529 100644
--- a/zephyr/README.md
+++ b/zephyr/README.md
@@ -1,7 +1,162 @@
+# Executorch
+
+**ExecuTorch** is PyTorch's unified solution for deploying AI models on-device—from smartphones to microcontrollers—built for privacy, performance, and portability. It powers Meta's on-device AI across **Instagram, WhatsApp, Quest 3, Ray-Ban Meta Smart Glasses**, and [more](https://docs.pytorch.org/executorch/main/success-stories.html).
+
+This folder adds ExecuTorch so it can be build and run in the Zephyr project as a external module. This includes an example under examples/arm/zephyr of running executor runners with Arm&reg; Ethos&trade;-U backend on a Corstone&trade; FVP, targeting the Zephyr RTOS.
+
+# Requirements
+
+Setup a venv (or conda)
+```
+mkdir <zephyr_build_root>
+cd <zephyr_build_root>
+python3 -m venv .zephyr_venv
+source .zephyr_venv/bin/activate
+```
+
+Install requirements
+```
+pip install west cmake==3.29 pyelftools ninja jsonschema
+```
+
+Setup zephyr repo
+```
+west init --manifest-rev v4.3.0
+```
+
+Install Zephyr SDK according to Zephyr's guides and set ZEPHYR_SDK_INSTALL_DIR
+
+```
+export ZEPHYR_SDK_INSTALL_DIR=<PATH-TO-ZEPHYR-SDK>
+```
+
+# Usage with Zephyr
+
+To pull in ExecuTorch as a Zephyr module, either add it as a West project in the west.yaml file or pull it in by adding a submanifest (e.g. zephyr/submanifests/executorch.yaml)
+
+# Create executorch.yaml under zephyr/submanifests
+
+There is an example executorch.yaml in this folder you can copy or just create a new file <zephyr_build_root>/zephyr/submanifests/executorch.yaml with the following content:
+
+<zephyr_build_root>/zephyr/submanifests/executorch.yaml
+```
+manifest:
+  projects:
+    - name: executorch
+      url: https://github.com/pytorch/executorch
+      revision: main
+      path: modules/lib/executorch
+```
+
+## Run west config and update:
+
+Add ExecuTorch to Zephyr
+```
+west config manifest.project-filter -- "-.*,+zephyr,+executorch,+cmsis,+cmsis_6,+cmsis-nn,+hal_ethos_u"
+west update
+```
+
+## Setup and install ExecuTorch
+
+Run this:
+
+```
+cd modules/lib/executorch/
+git submodule sync
+git submodule update --init --recursive
+./install_executorch.sh
+```
+
+## Prepare Ethos&trade;-U tools like Vela compiler and Corstone&trade; 300/320 FVP
+
+This is needed to convert python models to PTE files for Ethos&trade;-Ux5 and also installs Corstone&trade; 300/320 FVP so you can run and test.
+
+Make sure to read and agree to the Corstone&trade; eula
+
+```
+examples/arm/setup.sh --i-agree-to-the-contained-eula
+```
+
+
+# Running a sample application
+
+To run you need to point of the path to the installed Corstone&trade; FVP and you can then use west to build and run. You point out the model PTE file you want to run with -DET_PTE_FILE_PATH= see below.
+
+
+## Corstone&trade; 300 FVP (Ethos&trade;-U55)
+
+### Prepare a PTE model file
+
+Prepare a Corstone300 PTE model
+```
+cd <zephyr_build_root>/modules/lib/executorch
+source examples/arm/arm-scratch/setup_path.sh
+python -m examples.arm.aot_arm_compiler --model_name=examples/arm/example_modules/add.py --quantize --delegate -t ethos-u55-128 --output="add_u55_128.pte"
+```
+
+'--delegate' tells the aot_arm_compiler to use Ethos-U backend and '-t ethos-u55-128' specify the used Ethos-U variant and numbers of macs used, this must match you hardware or FVP config.
+
+### Setup FVP paths
+
+
+Set up FVP paths and macs used, this will also set shutdown_on_eot so the FVP auto stops after it has run the example.
+```
+cd <zephyr_build_root>
+export FVP_ROOT=$PWD/modules/lib/executorch/examples/arm/arm-scratch/FVP-corstone300
+export ARMFVP_BIN_PATH=${FVP_ROOT}/models/Linux64_GCC-9.3
+export ARMFVP_EXTRA_FLAGS='-C mps3_board.uart0.shutdown_on_eot=1 -C ethosu.num_macs=128' 
+```
+
+### Build and run
+
+```
+west build -b mps3/corstone300/fvp modules/lib/executorch/examples/arm/zephyr -t run -- -DET_PTE_FILE_PATH=modules/lib/executorch/add_u55_128.pte
+```
+
+Press CTRL-C to stop the FVP
+
+## Corstone&trade; 320 FVP (Ethos&trade;-U85)
+
+### Prepare a PTE model file
+
+Prepare a Corstone320 PTE model
+
+```
+cd <zephyr_build_root>/modules/lib/executorch
+source examples/arm/arm-scratch/setup_path.sh
+python -m examples.arm.aot_arm_compiler --model_name=examples/arm/example_modules/add.py --quantize --delegate -t ethos-u85-256 --output="add_u85_256.pte"
+```
+'--delegate' tells the aot_arm_compiler to use Ethos-U backend and '-t ethos-u85-256' specify the used Ethos-U variant and numbers of macs used, this must match you hardware or FVP config.
+
+### Setup FVP paths
+
+Set up FVP paths, libs and macs used, this will also set shutdown_on_eot so the FVP auto stops after it has run the example.
+```
+cd <zephyr_build_root>
+export FVP_ROOT=$PWD/modules/lib/executorch/examples/arm/arm-scratch/FVP-corstone320
+export LD_LIBRARY_PATH=${FVP_ROOT}/python/lib:${ARMFVP_BIN_PATH}:${LD_LIBRARY_PATH}
+export ARMFVP_BIN_PATH=${FVP_ROOT}/models/Linux64_GCC-9.3
+export ARMFVP_EXTRA_FLAGS='-C mps4_board.uart0.shutdown_on_eot=1 -C mps4_board.subsystem.ethosu.num_macs=256' 
+```
+
+### Build and run
+
+```
+west build -b mps4/corstone320/fvp modules/lib/executorch/examples/arm/zephyr -t run -- -DET_PTE_FILE_PATH=modules/lib/executorch/add_u85_256.pte
+```
+
+Press CTRL-C to stop the FVP
+
+## Notable files
+
+# executorch.yaml
+
+Copy this to <zephyr_build_root>/zephyr/submanifests/
+
 # module.yml
 
-Do not remove this file. As mentioned in the official Zephyr [documenation](https://docs.zephyrproject.org/latest/develop/modules.html), for Executorch to be built as Zephyr module, the file `zephyr/module.yml` must exist at the top level directory in the project. 
+Do not remove this file. As mentioned in the official Zephyr [documenation](https://docs.zephyrproject.org/latest/develop/modules.html), for Executorch to be built as Zephyr module, the file `zephyr/module.yml` must exist at the top level directory in the project.
 
-# Work In Progress
+# Reference
 
-We are currently working on request to the Zephyr project to formally support Executorch as a module. This will include an example of running executor runners on the Arm FVP, targetting the Zephyr RTOS. Once implemented, on executorch releases, the manifest in the Zephyr repo will need to be updated to point to the latest release of Executorch. More instructions on that will follow once the executorch module change is accepted into the Zephyr project.  
+<a href="https://docs.pytorch.org/executorch">Documentation</a>
diff --git a/zephyr/executorch.yaml b/zephyr/executorch.yaml
new file mode 100644
index 00000000000..03acbc46c92
--- /dev/null
+++ b/zephyr/executorch.yaml
@@ -0,0 +1,6 @@
+manifest:
+  projects:
+    - name: executorch
+      url: https://github.com/pytorch/executorch
+      revision: main
+      path: modules/lib/executorch
diff --git a/zephyr/module.yml b/zephyr/module.yml
index 4fedce88e2e..472d46113a5 100644
--- a/zephyr/module.yml
+++ b/zephyr/module.yml
@@ -1,4 +1,8 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# Copyright 2025 Arm Limited and/or its affiliates.
+
 name: executorch
 build:
-  cmake-ext: True
-  kconfig-ext: True
+  cmake: zephyr
+  kconfig: zephyr/Kconfig

From 6991033302fd8ceba47b009fa9560218da9188d5 Mon Sep 17 00:00:00 2001
From: Zingo Andersen <Zingo.Andersen@arm.com>
Date: Fri, 19 Dec 2025 06:37:53 +0100
Subject: [PATCH 2/2] Fix ZephyrOS review comments

Signed-off-by: Zingo Andersen <Zingo.Andersen@arm.com>
Change-Id: Ia5eec18ba42d8056332fb5f3e3699c6e85c81956
---
 examples/arm/zephyr/src/arm_executor_runner.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/examples/arm/zephyr/src/arm_executor_runner.cpp b/examples/arm/zephyr/src/arm_executor_runner.cpp
index 3badc39fb30..cce7f0c2f7f 100644
--- a/examples/arm/zephyr/src/arm_executor_runner.cpp
+++ b/examples/arm/zephyr/src/arm_executor_runner.cpp
@@ -79,8 +79,6 @@ const size_t method_allocation_pool_size = ET_ARM_METHOD_ALLOCATOR_POOL_SIZE;
 unsigned char __attribute__((
     aligned(16))) method_allocation_pool[method_allocation_pool_size];
 
-// unsigned char method_allocation_pool[method_allocation_pool_size];
-
 /**
  * The temp_allocation_pool is used for allocating temporary data during kernel
  * or delegate execution. This will be reset after each kernel or delegate call.