From 75ffe39de1b021aaf12a8cc894eaf424609c61c9 Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Wed, 15 Apr 2026 20:30:48 +0200
Subject: [PATCH 01/18] core: update memory_init API declaration and docs

Update memory_init declaration in memory_manager.h from
memory_init(void) to memory_init(uintptr_t pool_start_addr).

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 core/memory_manager.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/core/memory_manager.h b/core/memory_manager.h
index 3ccbca5..d743632 100755
--- a/core/memory_manager.h
+++ b/core/memory_manager.h
@@ -38,12 +38,14 @@ extern "C" {
 #endif
 
 /**
- * @brief Initialize the memory manager.
+ * @brief Initialize the memory manager with default backend settings.
  *
  * Should be called once at startup before any other memory functions.
- * If using a pool allocator, this function sets up the pool.
+ * For pool allocators, this uses the board-specific default pool address.
+ *
+ * @param pool_start_addr Start address of the memory pool.
  */
-void memory_init(void);
+void memory_init(uintptr_t pool_start_addr);
 
 /**
  * @brief Allocate a block of memory.

From ab1b33f044372e2d05d9d74f99949dcf1c580fa9 Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Wed, 15 Apr 2026 20:31:35 +0200
Subject: [PATCH 02/18] board: update memory_init signature for pool address

Change board memory backends to adopt to new memory_init.

For STM32F7, define DEFAULT_MEMORY_POOL_ADDR and use it for lazy
initialization call sites.

For ESP32, adopt the same signature and explicitly mark the parameter
unused to keep backend behavior unchanged.

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 board/esp32/board_esp32_memory.cpp   |  4 +++-
 board/stm32f7/board_stm32f7_memory.c | 12 +++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/board/esp32/board_esp32_memory.cpp b/board/esp32/board_esp32_memory.cpp
index c792ef3..4e471a4 100755
--- a/board/esp32/board_esp32_memory.cpp
+++ b/board/esp32/board_esp32_memory.cpp
@@ -14,8 +14,10 @@
 
     #define ps_malloc(size) heap_caps_malloc((size), MALLOC_CAP_SPIRAM)
 
-void memory_init(void)
+void memory_init(uintptr_t pool_start_addr)
 {
+    (void)pool_start_addr;
+    
     // Check if PSRAM is available
     if (ESP.getPsramSize() > 0) {
         Serial.printf("[MEMORY] PSRAM available: %u bytes\n", ESP.getPsramSize());
diff --git a/board/stm32f7/board_stm32f7_memory.c b/board/stm32f7/board_stm32f7_memory.c
index 6278b01..beeb93c 100755
--- a/board/stm32f7/board_stm32f7_memory.c
+++ b/board/stm32f7/board_stm32f7_memory.c
@@ -18,7 +18,9 @@
     #define CAMERA_LCD_FRAMEBUFFER_SIZE 0x80000  // 512KB reserved
 
     #define MEMORY_POOL_SIZE (1024 * 1024 * 8 - CAMERA_LCD_FRAMEBUFFER_SIZE)  // ~6MB
-static uint8_t *memory_pool = ((uint8_t *)SDRAM_BANK_ADDR + CAMERA_LCD_FRAMEBUFFER_SIZE);
+    #define DEFAULT_MEMORY_POOL_ADDR (SDRAM_BANK_ADDR + CAMERA_LCD_FRAMEBUFFER_SIZE)
+
+static uint8_t *memory_pool = (uint8_t *)DEFAULT_MEMORY_POOL_ADDR;
 
 typedef struct MemoryBlock {
     uint32_t magic;
@@ -59,7 +61,7 @@ static inline int block_header_valid(const MemoryBlock *b)
     return (b->magic == MEMBLOCK_MAGIC);
 }
 
-void memory_init()
+void memory_init(uintptr_t pool_start_addr)
 {
     if (initialized)
         return;
@@ -76,7 +78,7 @@ void memory_init()
 void *memory_alloc(size_t size)
 {
     if (!initialized)
-        memory_init();
+        memory_init((uintptr_t)DEFAULT_MEMORY_POOL_ADDR);
 
     size = ALIGN4(size);
 
@@ -122,7 +124,7 @@ void memory_free(void *ptr)
         return;
 
     if (!initialized)
-        memory_init();
+        memory_init((uintptr_t)DEFAULT_MEMORY_POOL_ADDR);
 
     MemoryBlock *block = (MemoryBlock *)((uint8_t *)ptr - BLOCK_SIZE);
 
@@ -161,7 +163,7 @@ void *memory_realloc(void *ptr, size_t new_size)
         return memory_alloc(new_size);
 
     if (!initialized)
-        memory_init();
+        memory_init((uintptr_t)DEFAULT_MEMORY_POOL_ADDR);
 
     MemoryBlock *block = (MemoryBlock *)((uint8_t *)ptr - BLOCK_SIZE);
     if (!block_header_valid(block))

From 071bc9ded628a52fbab5cc12e3332a922dbe4b94 Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Sat, 18 Apr 2026 00:19:07 +0200
Subject: [PATCH 03/18] build: split target selection into board, arch and cpu
 profiles

Replace single-platform selection with explicit EMBEDDIP_TARGET_BOARD,
EMBEDDIP_ARCH and EMBEDDIP_CPU cache variables and enforce a strict
compatibility matrix at configure time.

Load board and arch profiles independently, then compose sources,
include dirs, defines and compile options from those profiles so new
ports can be added without touching central build logic.

Move package config template to embedDIP root and update
configure_package_config_file path accordingly.

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 CMakeLists.txt                                | 139 ++++++++++--------
 ...Config.cmake.in => embedDIPConfig.cmake.in |   0
 2 files changed, 78 insertions(+), 61 deletions(-)
 rename cmake/embedDIPConfig.cmake.in => embedDIPConfig.cmake.in (100%)
 mode change 100755 => 100644

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a133d97..8329d9e 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,8 +15,14 @@ project(embedDIP
     DESCRIPTION "Portable embedded digital image processing library"
 )
 
-set(EMBEDDIP_TARGET_PLATFORM "STM32F7" CACHE STRING "Target platform: STM32F7, ESP32, or HOST")
-set_property(CACHE EMBEDDIP_TARGET_PLATFORM PROPERTY STRINGS "STM32F7" "ESP32" "HOST")
+set(EMBEDDIP_TARGET_BOARD "" CACHE STRING "Target board (required): STM32F7 or ESP32")
+set_property(CACHE EMBEDDIP_TARGET_BOARD PROPERTY STRINGS "STM32F7" "ESP32")
+
+set(EMBEDDIP_ARCH "" CACHE STRING "Architecture family (required): ARM or XTENSA")
+set_property(CACHE EMBEDDIP_ARCH PROPERTY STRINGS "ARM" "XTENSA")
+
+set(EMBEDDIP_CPU "" CACHE STRING "CPU variant (required): CORTEX_M7, LX6, LX7")
+set_property(CACHE EMBEDDIP_CPU PROPERTY STRINGS "CORTEX_M7" "LX6" "LX7")
 
 option(EMBEDDIP_ENABLE_UART_LOGGING "Enable UART logging" ON)
 option(EMBEDDIP_ENABLE_IMAGE_PROCESSING "Enable image processing modules" ON)
@@ -24,6 +30,37 @@ option(EMBEDDIP_ENABLE_CAMERA_INPUT "Enable camera input interfaces" ON)
 option(EMBEDDIP_ENABLE_DISPLAY_OUTPUT "Enable display output interfaces" ON)
 option(EMBEDDIP_BUILD_DOCS "Build documentation with Doxygen" OFF)
 
+if(EMBEDDIP_TARGET_BOARD STREQUAL "")
+    message(FATAL_ERROR "EMBEDDIP_TARGET_BOARD is required. Supported values: STM32F7, ESP32")
+endif()
+
+if(EMBEDDIP_ARCH STREQUAL "")
+    message(FATAL_ERROR "EMBEDDIP_ARCH is required. Supported values: ARM, XTENSA")
+endif()
+
+if(EMBEDDIP_CPU STREQUAL "")
+    message(FATAL_ERROR "EMBEDDIP_CPU is required. Supported values: CORTEX_M7, LX6, LX7")
+endif()
+
+# Explicit compatibility matrix between board, architecture family and CPU
+set(_embeddip_pair_valid FALSE)
+if(EMBEDDIP_TARGET_BOARD STREQUAL "STM32F7")
+    if(EMBEDDIP_ARCH STREQUAL "ARM" AND EMBEDDIP_CPU STREQUAL "CORTEX_M7")
+        set(_embeddip_pair_valid TRUE)
+    endif()
+elseif(EMBEDDIP_TARGET_BOARD STREQUAL "ESP32")
+    if(EMBEDDIP_ARCH STREQUAL "XTENSA" AND (EMBEDDIP_CPU STREQUAL "LX6" OR EMBEDDIP_CPU STREQUAL "LX7"))
+        set(_embeddip_pair_valid TRUE)
+    endif()
+endif()
+
+if(NOT _embeddip_pair_valid)
+    message(FATAL_ERROR
+        "Invalid board/arch/cpu combination: ${EMBEDDIP_TARGET_BOARD} + ${EMBEDDIP_ARCH} + ${EMBEDDIP_CPU}. "
+        "Supported: STM32F7+ARM+CORTEX_M7, ESP32+XTENSA+LX6, ESP32+XTENSA+LX7"
+    )
+endif()
+
 set(CMAKE_C_STANDARD 11)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_C_STANDARD_REQUIRED ON)
@@ -85,6 +122,7 @@ set(IMGPROC_SOURCES
     imgproc/misc.h
 
     # FFT operations
+    imgproc/fft.c
     imgproc/fft.h
 )
 
@@ -111,71 +149,53 @@ set(BOARD_COMMON_SOURCES
     board/common.h
 )
 
-# Platform-specific sources
-if(EMBEDDIP_TARGET_PLATFORM STREQUAL "STM32F7")
-    set(BOARD_SOURCES
-        ${BOARD_COMMON_SOURCES}
-        board/stm32f7/board_stm32f7_common.c
-        board/stm32f7/board_stm32f7_fft.c
-        board/stm32f7/board_stm32f7_memory.c
-        board/stm32f7/configs.h
-    )
+# Load board and architecture profiles (kept next to board/arch source trees
+# to make onboarding new ports straightforward).
+string(TOLOWER "${EMBEDDIP_TARGET_BOARD}" EMBEDDIP_BOARD_PROFILE)
+string(TOLOWER "${EMBEDDIP_ARCH}" EMBEDDIP_ARCH_PROFILE)
 
-    set(DEVICE_SOURCES
-        ${DEVICE_COMMON_SOURCES}
-        device/camera/ov5640/stm32_ov5640.c
-        device/display/rk043fn48h/stm32_rk043fn48h.c
-        device/serial/stm32_uart/stm32_uart.c
-    )
+set(EMBEDDIP_BOARD_PROFILE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/board/${EMBEDDIP_BOARD_PROFILE}/board_profile.cmake")
+set(EMBEDDIP_ARCH_PROFILE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/arch/${EMBEDDIP_ARCH_PROFILE}/arch_profile.cmake")
 
-    set(PLATFORM_DEFINES
-        STM32F7xx
-        ARM_MATH_CM7
-        TARGET_BOARD_STM32F7=1
-    )
-
-elseif(EMBEDDIP_TARGET_PLATFORM STREQUAL "ESP32")
-    set(BOARD_SOURCES
-        ${BOARD_COMMON_SOURCES}
-        board/esp32/board_esp32_common.cpp
-        board/esp32/board_esp32_fft.cpp
-        board/esp32/board_esp32_memory.cpp
-    )
-
-    set(DEVICE_SOURCES
-        ${DEVICE_COMMON_SOURCES}
-        device/camera/ov2640/esp32_ov2640.cpp
-        device/serial/esp32_uart/esp32_uart.cpp
-    )
-
-    set(PLATFORM_DEFINES
-        ARDUINO_ARCH_ESP32
-        TARGET_BOARD_ESP32=1
-    )
-
-else()
-    message(FATAL_ERROR "Unknown platform: ${EMBEDDIP_TARGET_PLATFORM}. Must be STM32F7, ESP32, or HOST")
+if(NOT EXISTS "${EMBEDDIP_BOARD_PROFILE_FILE}")
+    message(FATAL_ERROR "Board profile not found: ${EMBEDDIP_BOARD_PROFILE_FILE}")
 endif()
+if(NOT EXISTS "${EMBEDDIP_ARCH_PROFILE_FILE}")
+    message(FATAL_ERROR "Architecture profile not found: ${EMBEDDIP_ARCH_PROFILE_FILE}")
+endif()
+
+include("${EMBEDDIP_BOARD_PROFILE_FILE}")
+include("${EMBEDDIP_ARCH_PROFILE_FILE}")
 
 # === Create Library Target ===
 add_library(embedDIP STATIC
     ${CORE_SOURCES}
     ${IMGPROC_SOURCES}
-    ${BOARD_SOURCES}
-    ${DEVICE_SOURCES}
+    ${EMBEDDIP_BOARD_SOURCES}
+    ${EMBEDDIP_ARCH_SOURCES}
+    ${EMBEDDIP_DEVICE_SOURCES}
     ${WRAPPER_SOURCES}
 )
 
 # === Compiler Definitions ===
 target_compile_definitions(embedDIP PUBLIC
     USE_EMBED_DIP
-    ${PLATFORM_DEFINES}
+    ${EMBEDDIP_BOARD_DEFINES}
+    ${EMBEDDIP_ARCH_DEFINES}
     $<$<BOOL:${EMBEDDIP_ENABLE_UART_LOGGING}>:ENABLE_UART_LOGGING=1>
     $<$<BOOL:${EMBEDDIP_ENABLE_IMAGE_PROCESSING}>:ENABLE_IMAGE_PROCESSING=1>
     $<$<BOOL:${EMBEDDIP_ENABLE_CAMERA_INPUT}>:ENABLE_CAMERA_INPUT=1>
     $<$<BOOL:${EMBEDDIP_ENABLE_DISPLAY_OUTPUT}>:ENABLE_DISPLAY_OUTPUT=1>
 )
 
+if(EMBEDDIP_ARCH_PRIVATE_DEFINES)
+    target_compile_definitions(embedDIP PRIVATE ${EMBEDDIP_ARCH_PRIVATE_DEFINES})
+endif()
+
+if(EMBEDDIP_ARCH_COMPILE_OPTIONS)
+    target_compile_options(embedDIP PUBLIC ${EMBEDDIP_ARCH_COMPILE_OPTIONS})
+endif()
+
 # === Include Directories ===
 target_include_directories(embedDIP PUBLIC
     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
@@ -189,9 +209,16 @@ target_include_directories(embedDIP PUBLIC
     $<INSTALL_INTERFACE:include/embedDIP>
 )
 
-# Platform-specific includes
-if(EMBEDDIP_TARGET_PLATFORM STREQUAL "STM32F7")
-    # Try to find CMSIS and HAL includes relative to project
+if(EMBEDDIP_BOARD_INCLUDE_DIRS)
+    foreach(_embeddip_board_inc IN LISTS EMBEDDIP_BOARD_INCLUDE_DIRS)
+        target_include_directories(embedDIP PUBLIC
+            $<BUILD_INTERFACE:${_embeddip_board_inc}>
+        )
+    endforeach()
+endif()
+
+# Board-specific include dependencies from parent project layout
+if(EMBEDDIP_TARGET_BOARD STREQUAL "STM32F7")
     if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../Drivers")
         target_include_directories(embedDIP PUBLIC
             $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../Drivers/STM32F7xx_HAL_Driver/Inc>
@@ -208,7 +235,6 @@ if(EMBEDDIP_TARGET_PLATFORM STREQUAL "STM32F7")
         )
         if(CMSIS_DSP_SOURCES)
             target_sources(embedDIP PRIVATE ${CMSIS_DSP_SOURCES})
-            # CMSIS-DSP needs __FPU_PRESENT defined
             set_source_files_properties(${CMSIS_DSP_SOURCES}
                 PROPERTIES COMPILE_DEFINITIONS "__FPU_PRESENT=1"
             )
@@ -217,15 +243,6 @@ if(EMBEDDIP_TARGET_PLATFORM STREQUAL "STM32F7")
     else()
         message(WARNING "CMSIS/HAL drivers not found. You may need to specify include paths manually.")
     endif()
-
-    target_include_directories(embedDIP PUBLIC
-        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/board/stm32f7>
-    )
-
-elseif(EMBEDDIP_TARGET_PLATFORM STREQUAL "ESP32")
-    target_include_directories(embedDIP PUBLIC
-        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/board/esp32>
-    )
 endif()
 
 # === Link Libraries ===
@@ -270,7 +287,7 @@ install(EXPORT embedDIPTargets
 include(CMakePackageConfigHelpers)
 
 configure_package_config_file(
-    ${CMAKE_CURRENT_SOURCE_DIR}/cmake/embedDIPConfig.cmake.in
+    ${CMAKE_CURRENT_SOURCE_DIR}/embedDIPConfig.cmake.in
     ${CMAKE_CURRENT_BINARY_DIR}/embedDIPConfig.cmake
     INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/embedDIP
 )
diff --git a/cmake/embedDIPConfig.cmake.in b/embedDIPConfig.cmake.in
old mode 100755
new mode 100644
similarity index 100%
rename from cmake/embedDIPConfig.cmake.in
rename to embedDIPConfig.cmake.in

From 2bd5918b452c5ff941106b7a3ac47b22797d78d9 Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Sat, 18 Apr 2026 00:19:59 +0200
Subject: [PATCH 04/18] board: keep board layer focused on memory and board
 defaults

Remove board level common/fft implementation units and keep board
ownership limited to board specific memory/config responsibilities.

Update board memory implementation to match the new split and add the
new ESP32 board memory source naming/layout used by the refactor.

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 .../arm/cm7_common.c                          |   4 +-
 arch/arm/cm7_fft.c                            |  46 +
 arch/fft_backend.h                            |  21 +
 .../xtensa/xtensa_common.cpp                  |   2 +-
 arch/xtensa/xtensa_fft.cpp                    |  59 ++
 board/esp32/board_esp32_fft.cpp               | 615 ------------
 board/stm32f7/board_stm32f7_fft.c             | 906 ------------------
 imgproc/fft.c                                 | 586 +++++++++++
 8 files changed, 715 insertions(+), 1524 deletions(-)
 rename board/stm32f7/board_stm32f7_common.c => arch/arm/cm7_common.c (89%)
 mode change 100755 => 100644
 create mode 100644 arch/arm/cm7_fft.c
 create mode 100644 arch/fft_backend.h
 rename board/esp32/board_esp32_common.cpp => arch/xtensa/xtensa_common.cpp (99%)
 mode change 100755 => 100644
 create mode 100644 arch/xtensa/xtensa_fft.cpp
 delete mode 100755 board/esp32/board_esp32_fft.cpp
 delete mode 100755 board/stm32f7/board_stm32f7_fft.c
 create mode 100644 imgproc/fft.c

diff --git a/board/stm32f7/board_stm32f7_common.c b/arch/arm/cm7_common.c
old mode 100755
new mode 100644
similarity index 89%
rename from board/stm32f7/board_stm32f7_common.c
rename to arch/arm/cm7_common.c
index de25c60..dd6238c
--- a/board/stm32f7/board_stm32f7_common.c
+++ b/arch/arm/cm7_common.c
@@ -3,7 +3,7 @@
 
 #include <embedDIP_configs.h>
 
-#ifdef TARGET_BOARD_STM32F7
+#if defined(EMBED_DIP_ARCH_ARM) && defined(EMBED_DIP_CPU_CORTEX_M7)
 
     #include "core/image.h"
 
@@ -31,4 +31,4 @@ uint32_t toc()
     return DWT->CYCCNT;       // Return elapsed cycles
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/arch/arm/cm7_fft.c b/arch/arm/cm7_fft.c
new file mode 100644
index 0000000..d8e58dd
--- /dev/null
+++ b/arch/arm/cm7_fft.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2025 EmbedDIP
+
+#include <embedDIP_configs.h>
+
+#if defined(EMBED_DIP_ARCH_ARM) && defined(EMBED_DIP_CPU_CORTEX_M7)
+
+    #include "arm_const_structs.h"
+    #include "arm_math.h"
+    #include <arch/fft_backend.h>
+
+embeddip_status_t embeddip_fft_backend_init(int n)
+{
+    if (n != 256) {
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+    }
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t embeddip_fft_backend_forward_1d(float *data, int n)
+{
+    if (!data) {
+        return EMBEDDIP_ERROR_NULL_PTR;
+    }
+    if (n != 256) {
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+    }
+
+    arm_cfft_f32(&arm_cfft_sR_f32_len256, data, 0, 1);
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t embeddip_fft_backend_inverse_1d(float *data, int n)
+{
+    if (!data) {
+        return EMBEDDIP_ERROR_NULL_PTR;
+    }
+    if (n != 256) {
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+    }
+
+    arm_cfft_f32(&arm_cfft_sR_f32_len256, data, 1, 1);
+    return EMBEDDIP_OK;
+}
+
+#endif
diff --git a/arch/fft_backend.h b/arch/fft_backend.h
new file mode 100644
index 0000000..a90c6e8
--- /dev/null
+++ b/arch/fft_backend.h
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2025 EmbedDIP
+
+#ifndef EMBEDDIP_ARCH_FFT_BACKEND_H
+#define EMBEDDIP_ARCH_FFT_BACKEND_H
+
+#include <core/error.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+embeddip_status_t embeddip_fft_backend_init(int n);
+embeddip_status_t embeddip_fft_backend_forward_1d(float *data, int n);
+embeddip_status_t embeddip_fft_backend_inverse_1d(float *data, int n);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/board/esp32/board_esp32_common.cpp b/arch/xtensa/xtensa_common.cpp
old mode 100755
new mode 100644
similarity index 99%
rename from board/esp32/board_esp32_common.cpp
rename to arch/xtensa/xtensa_common.cpp
index e6a739e..bd1dc89
--- a/board/esp32/board_esp32_common.cpp
+++ b/arch/xtensa/xtensa_common.cpp
@@ -3,7 +3,7 @@
 
 #include <embedDIP_configs.h>
 
-#ifdef TARGET_BOARD_ESP32
+#ifdef EMBED_DIP_ARCH_XTENSA
 
     #ifndef ESP32_COMMON_H
         #define ESP32_COMMON_H
diff --git a/arch/xtensa/xtensa_fft.cpp b/arch/xtensa/xtensa_fft.cpp
new file mode 100644
index 0000000..58f28ea
--- /dev/null
+++ b/arch/xtensa/xtensa_fft.cpp
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2025 EmbedDIP
+
+#include <embedDIP_configs.h>
+
+#ifdef EMBED_DIP_ARCH_XTENSA
+
+    #include <arch/fft_backend.h>
+
+    #include "esp_dsp.h"
+
+embeddip_status_t embeddip_fft_backend_init(int n)
+{
+    if (n <= 0) {
+        return EMBEDDIP_ERROR_INVALID_ARG;
+    }
+
+    esp_err_t err = dsps_fft2r_init_fc32(NULL, CONFIG_DSP_MAX_FFT_SIZE);
+    return (err == ESP_OK) ? EMBEDDIP_OK : EMBEDDIP_ERROR_INTERNAL;
+}
+
+embeddip_status_t embeddip_fft_backend_forward_1d(float *data, int n)
+{
+    if (!data) {
+        return EMBEDDIP_ERROR_NULL_PTR;
+    }
+
+    dsps_fft2r_fc32(data, n);
+    dsps_bit_rev_fc32(data, n);
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t embeddip_fft_backend_inverse_1d(float *data, int n)
+{
+    if (!data) {
+        return EMBEDDIP_ERROR_NULL_PTR;
+    }
+
+    for (int i = 0; i < n; ++i) {
+        data[2 * i + 1] = -data[2 * i + 1];
+    }
+
+    dsps_fft2r_fc32(data, n);
+    dsps_bit_rev_fc32(data, n);
+
+    for (int i = 0; i < n; ++i) {
+        data[2 * i + 1] = -data[2 * i + 1];
+    }
+
+    float inv_n = 1.0f / (float)n;
+    for (int i = 0; i < n; ++i) {
+        data[2 * i] *= inv_n;
+        data[2 * i + 1] *= inv_n;
+    }
+
+    return EMBEDDIP_OK;
+}
+
+#endif
diff --git a/board/esp32/board_esp32_fft.cpp b/board/esp32/board_esp32_fft.cpp
deleted file mode 100755
index d60d7a7..0000000
--- a/board/esp32/board_esp32_fft.cpp
+++ /dev/null
@@ -1,615 +0,0 @@
-// SPDX-License-Identifier: MIT
-// Copyright (c) 2025 EmbedDIP
-
-#include <embedDIP_configs.h>
-
-#ifdef TARGET_BOARD_ESP32
-
-    #include "board/common.h"
-
-    #include "Arduino.h"
-    #include "esp_dsp.h"
-    #include <esp32/rom/rtc.h>
-    #include <imgproc/fft.h>
-
-static bool isValidFFTSize(int w, int h)
-{
-    return (w == h) && ((w & (w - 1)) == 0);  // square and power-of-2
-}
-
-    #include <Arduino.h>  // Required for Serial on Arduino platforms
-
-embeddip_status_t fft(const Image *inImg, Image *outImg)
-{
-    if (!inImg || !outImg || !inImg->pixels) {
-        return EMBEDDIP_ERROR_NULL_PTR;
-    }
-
-    int N = inImg->width;
-    if (!isValidFFTSize(N, N)) {
-        // Serial.println("[ERROR] Invalid FFT size. Only powers of 2 are supported.");
-        return EMBEDDIP_ERROR_INVALID_SIZE;
-    }
-
-    if (isChalsEmpty(outImg)) {
-        createChalsComplex(outImg, 2);  // 2 complex channels for interleaved (Re, Im)
-        outImg->is_chals = 1;
-    }
-
-    // Serial.println("[ERROR] 1pixels are null.");
-    float *buf0 = outImg->chals->ch[0];
-    float *buf1 = outImg->chals->ch[1];
-    uint8_t *input = static_cast<uint8_t *>(inImg->pixels);
-    for (int i = 0; i < N * N; i++) {
-        buf0[2 * i] = (float)input[i];  // real part
-        buf0[2 * i + 1] = 0.0f;         // imaginary part
-    }
-    // Initialize the FFT library
-    dsps_fft2r_init_fc32(NULL, CONFIG_DSP_MAX_FFT_SIZE);
-
-    // Serial.println("[ERROR] 4or pixels are null.");
-    //  FFT on rows
-    for (int i = 0; i < N; i++) {
-        int offset = i * N * 2;
-        dsps_fft2r_fc32(buf0 + offset, N);
-        dsps_bit_rev_fc32(buf0 + offset, N);
-    }
-    // Serial.println("[ERROR] 5or pixels are null.");
-    //  Transpose to buf1
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int src = 2 * (y * N + x);
-            int dst = 2 * (x * N + y);
-            buf1[dst] = buf0[src];
-            buf1[dst + 1] = buf0[src + 1];
-        }
-    }
-
-    // FFT on columns
-    for (int i = 0; i < N; i++) {
-        int offset = i * N * 2;
-        dsps_fft2r_fc32(buf1 + offset, N);
-        dsps_bit_rev_fc32(buf1 + offset, N);
-    }
-
-    // Transpose back: buf1 → buf0 to undo the earlier transpose
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int src = 2 * (y * N + x);
-            int dst = 2 * (x * N + y);
-            buf0[dst] = buf1[src];
-            buf0[dst + 1] = buf1[src + 1];
-        }
-    }
-
-    // Copy back to buf1 for output
-    for (int i = 0; i < N * N * 2; i++) {
-        buf1[i] = buf0[i];
-    }
-
-    outImg->log = IMAGE_DATA_COMPLEX;
-    // Serial.println("[INFO] 2D FFT completed successfully.");
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t ifft(const Image *inImg, Image *outImg)
-{
-    int N = inImg->width;
-
-    // Accept both IMAGE_DATA_COMPLEX and IMAGE_DATA_CH0 (match STM32 behavior)
-    if (inImg->log != IMAGE_DATA_COMPLEX && inImg->log != IMAGE_DATA_CH0)
-        return EMBEDDIP_ERROR_INVALID_ARG;
-
-    float *buf0 = (float *)ps_malloc(N * N * 2 * sizeof(float));
-    // Use ch[0] if log is IMAGE_DATA_CH0, otherwise use ch[1]
-    float *buf1 = (inImg->log == IMAGE_DATA_CH0) ? inImg->chals->ch[0] : inImg->chals->ch[1];
-
-    // Conjugate input for IFFT (negate imaginary parts)
-    for (int i = 0; i < N * N; i++) {
-        buf1[2 * i + 1] = -buf1[2 * i + 1];
-    }
-
-    // iFFT on rows (using forward FFT on conjugated data)
-    for (int row = 0; row < N; row++) {
-        dsps_fft2r_fc32(buf1 + row * N * 2, N);
-        dsps_bit_rev_fc32(buf1 + row * N * 2, N);
-    }
-
-    // Conjugate intermediate result
-    for (int i = 0; i < N * N; i++) {
-        buf1[2 * i + 1] = -buf1[2 * i + 1];
-    }
-
-    // Transpose back to buf0 (INVERSE transpose - swap src/dst compared to FFT)
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int dst = 2 * (y * N + x);
-            int src = 2 * (x * N + y);
-            buf0[dst] = buf1[src];
-            buf0[dst + 1] = buf1[src + 1];
-        }
-    }
-
-    // Conjugate before second FFT
-    for (int i = 0; i < N * N; i++) {
-        buf0[2 * i + 1] = -buf0[2 * i + 1];
-    }
-
-    // iFFT on columns (using forward FFT on conjugated data)
-    for (int row = 0; row < N; row++) {
-        dsps_fft2r_fc32(buf0 + row * N * 2, N);
-        dsps_bit_rev_fc32(buf0 + row * N * 2, N);
-    }
-
-    // Conjugate output
-    for (int i = 0; i < N * N; i++) {
-        buf0[2 * i + 1] = -buf0[2 * i + 1];
-    }
-
-    // Transpose back: buf0 → buf1 to undo the earlier transpose
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int src = 2 * (y * N + x);
-            int dst = 2 * (x * N + y);
-            buf1[dst] = buf0[src];
-            buf1[dst + 1] = buf0[src + 1];
-        }
-    }
-
-    if (isChalsEmpty(outImg)) {
-        createChals(outImg, 1);
-        outImg->is_chals = 1;
-    }
-
-    float *result = outImg->chals->ch[0];
-    float scale = 1.0f / (N * N);
-    for (int i = 0; i < N * N; i++) {
-        result[i] = buf1[2 * i] * scale;
-    }
-
-    outImg->log = IMAGE_DATA_CH0;
-    free(buf0);
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t _log_(Image *img)
-{
-    if (!img || isChalsEmpty(img))
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    float *data = img->chals->ch[0];
-    for (int i = 0; i < img->size; ++i) {
-        data[i] = logf(data[i] + 1e-3f);  // Avoid log(0)
-    }
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t _add_(Image *img, float value)
-{
-    if (!img || isChalsEmpty(img))
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    float *data = img->chals->ch[0];
-    for (int i = 0; i < img->size; ++i) {
-        data[i] += value;
-    }
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t fftshift(Image *img)
-{
-    if (!img || isChalsEmpty(img))
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    float *data = (img->log == IMAGE_DATA_COMPLEX) ? img->chals->ch[1] : img->chals->ch[0];
-    int width = img->width;
-    int height = img->height;
-
-    int cx = width / 2;
-    int cy = height / 2;
-
-    for (int y = 0; y < cy; ++y) {
-        for (int x = 0; x < cx; ++x) {
-            int q0 = 2 * ((y * width) + x);
-            int q1 = 2 * ((y * width) + x + cx);
-            int q2 = 2 * (((y + cy) * width) + x);
-            int q3 = 2 * (((y + cy) * width) + x + cx);
-
-            for (int i = 0; i < 2; ++i) {
-                float tmp = data[q0 + i];
-                data[q0 + i] = data[q3 + i];
-                data[q3 + i] = tmp;
-
-                tmp = data[q1 + i];
-                data[q1 + i] = data[q2 + i];
-                data[q2 + i] = tmp;
-            }
-        }
-    }
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t _abs_(const Image *fftImg, Image *magImg)
-{
-    if (!fftImg || !fftImg->chals || !magImg) {
-        // Serial.println("[ERROR] Input FFT image or its channels are null.");
-        return EMBEDDIP_ERROR_NULL_PTR;
-    }
-
-    int size = fftImg->width * fftImg->height;
-
-    float *fft = (fftImg->log == IMAGE_DATA_COMPLEX) ? fftImg->chals->ch[1] : fftImg->chals->ch[0];
-
-    if (!fft) {
-        // Serial.println("[ERROR] FFT buffer is null.");
-        return EMBEDDIP_ERROR_NULL_PTR;
-    }
-
-    if (isChalsEmpty(magImg)) {
-        createChals(magImg, 1);
-        magImg->is_chals = 1;
-        // Serial.println("[INFO] Output magnitude channel created.");
-    }
-
-    float *mag = magImg->chals->ch[0];
-    if (!mag) {
-        // Serial.println("[ERROR] Magnitude channel buffer is null.");
-        return EMBEDDIP_ERROR_NULL_PTR;
-    }
-
-    for (int i = 0; i < size; ++i) {
-        float re = fft[2 * i];
-        float im = fft[2 * i + 1];
-        mag[i] = sqrtf(re * re + im * im);
-        // Uncomment the line below for verbose per-pixel debugging
-        // Serial.printf("[DEBUG] Index %d: re=%.3f, im=%.3f, mag=%.3f\n", i, re, im, mag[i]);
-    }
-
-    magImg->log = IMAGE_DATA_MAGNITUDE;
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t _phase_(const Image *fftImg, Image *phaseImg)
-{
-    int size = fftImg->width * fftImg->height;
-
-    float *fft = (fftImg->log == IMAGE_DATA_COMPLEX) ? fftImg->chals->ch[1] : fftImg->chals->ch[0];
-
-    if (isChalsEmpty(phaseImg)) {
-        createChals(phaseImg, 1);
-        phaseImg->is_chals = 1;
-    }
-
-    float *out = phaseImg->chals->ch[0];
-
-    for (int i = 0; i < size; ++i) {
-        out[i] = atan2f(fft[2 * i + 1], fft[2 * i]);
-    }
-
-    phaseImg->log = IMAGE_DATA_PHASE;
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t polarToCart(const Image *magnitude, const Image *phase, Image *outImg)
-{
-    int size = magnitude->width * magnitude->height;
-
-    if (isChalsEmpty(outImg)) {
-        createChalsComplex(outImg, 2);  // Need 2 channels like STM32
-        outImg->is_chals = 1;
-    }
-
-    float *mag = magnitude->chals->ch[0];
-    float *phs = phase->chals->ch[0];
-    float *fft = outImg->chals->ch[0];
-
-    for (int i = 0; i < size; ++i) {
-        fft[2 * i] = mag[i] * cosf(phs[i]);
-        fft[2 * i + 1] = mag[i] * sinf(phs[i]);
-    }
-
-    outImg->log = IMAGE_DATA_CH0;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Performs element-wise complex multiplication in frequency domain.
- *
- *
- * @param img1    First complex image
- * @param img2    Second complex image
- * @param outImg  Output complex image
- */
-embeddip_status_t multiply(const Image *img1, const Image *img2, Image *outImg)
-{
-    // Input validation
-    if (!img1 || !img2 || !outImg) {
-        return EMBEDDIP_ERROR_NULL_PTR;
-    }
-
-    if (img1->width != img2->width || img1->height != img2->height) {
-        return EMBEDDIP_ERROR_INVALID_SIZE;
-    }
-
-    // Allocate output if needed
-    if (isChalsEmpty(outImg)) {
-        createChals(outImg, 1);
-        outImg->is_chals = 1;
-    }
-
-    float *in1 = NULL, *in2 = NULL;
-
-    // Select input channel based on log state
-    if (img1->log == IMAGE_DATA_CH0) {
-        in1 = (float *)img1->chals->ch[0];
-    } else if (img1->log == IMAGE_DATA_COMPLEX) {
-        in1 = (float *)img1->chals->ch[1];
-    } else if (img1->log == IMAGE_DATA_PIXELS) {
-        in1 = (float *)img1->pixels;
-    } else {
-        return EMBEDDIP_ERROR_INVALID_ARG;
-    }
-
-    if (img2->log == IMAGE_DATA_CH0) {
-        in2 = (float *)img2->chals->ch[0];
-    } else if (img2->log == IMAGE_DATA_COMPLEX) {
-        in2 = (float *)img2->chals->ch[1];
-    } else if (img2->log == IMAGE_DATA_PIXELS) {
-        in2 = (float *)img2->pixels;
-    } else {
-        return EMBEDDIP_ERROR_INVALID_ARG;
-    }
-
-    float *out = (float *)outImg->chals->ch[0];
-
-    int size = img1->width * img1->height;
-    for (int i = 0; i < size; ++i) {
-        out[i] = in1[i] * in2[i];
-    }
-
-    outImg->log = IMAGE_DATA_CH0;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Computes pixel-wise difference between two images: out = img1 - img2.
- *
- * Both images must have the same dimensions and a single channel.
- *
- * @param[in]  img1    First input image.
- * @param[in]  img2    Second input image.
- * @param[out] outImg  Output image to store the difference.
- */
-/**
- * @brief Computes pixel-wise difference: out = img1 - img2 (clamped to >= 0).
- *
- * Optimized for performance: checks image types once, then uses fast loops.
- *
- * @param[in]  img1    First image (original).
- * @param[in]  img2    Second image (to subtract).
- * @param[out] outImg  Output image (difference).
- * @return EMBEDDIP_OK on success, error code otherwise.
- */
-embeddip_status_t difference(const Image *img1, const Image *img2, Image *outImg)
-{
-    if (!img1 || !img2 || !outImg)
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    if (img1->width != img2->width || img1->height != img2->height)
-        return EMBEDDIP_ERROR_INVALID_SIZE;
-
-    int size = img1->width * img1->height;
-
-    // Allocate output channel if needed
-    if (isChalsEmpty(outImg)) {
-        embeddip_status_t status = createChals(outImg, 1);
-        if (status != EMBEDDIP_OK)
-            return status;
-        outImg->is_chals = 1;
-    }
-
-    float *out = outImg->chals->ch[0];
-    if (!out)
-        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
-
-    // Check types ONCE, then use optimized loops (no conditionals inside loop)
-    // Most common case: img1=PIXELS, img2=CH0 (your use case)
-    if (img1->log == IMAGE_DATA_PIXELS &&
-        (img2->log == IMAGE_DATA_CH0 || img2->log == IMAGE_DATA_MAGNITUDE)) {
-        if (!img1->pixels || !img2->chals || !img2->chals->ch[0])
-            return EMBEDDIP_ERROR_NULL_PTR;
-
-        uint8_t *pix1 = (uint8_t *)img1->pixels;
-        float *ch2 = img2->chals->ch[0];
-        for (int i = 0; i < size; ++i)
-            out[i] = fmaxf((float)pix1[i] - ch2[i], 0.0f);
-    }
-    // Both pixels
-    else if (img1->log == IMAGE_DATA_PIXELS && img2->log == IMAGE_DATA_PIXELS) {
-        if (!img1->pixels || !img2->pixels)
-            return EMBEDDIP_ERROR_NULL_PTR;
-
-        uint8_t *pix1 = (uint8_t *)img1->pixels;
-        uint8_t *pix2 = (uint8_t *)img2->pixels;
-        for (int i = 0; i < size; ++i)
-            out[i] = fmaxf((float)(pix1[i] - pix2[i]), 0.0f);
-    }
-    // Both channels
-    else if ((img1->log == IMAGE_DATA_CH0 || img1->log == IMAGE_DATA_MAGNITUDE) &&
-             (img2->log == IMAGE_DATA_CH0 || img2->log == IMAGE_DATA_MAGNITUDE)) {
-        if (!img1->chals || !img1->chals->ch[0] || !img2->chals || !img2->chals->ch[0])
-            return EMBEDDIP_ERROR_NULL_PTR;
-
-        float *ch1 = img1->chals->ch[0];
-        float *ch2 = img2->chals->ch[0];
-        for (int i = 0; i < size; ++i)
-            out[i] = fmaxf(ch1[i] - ch2[i], 0.0f);
-    }
-    // img1=CH0, img2=PIXELS
-    else if ((img1->log == IMAGE_DATA_CH0 || img1->log == IMAGE_DATA_MAGNITUDE) &&
-             img2->log == IMAGE_DATA_PIXELS) {
-        if (!img1->chals || !img1->chals->ch[0] || !img2->pixels)
-            return EMBEDDIP_ERROR_NULL_PTR;
-
-        float *ch1 = img1->chals->ch[0];
-        uint8_t *pix2 = (uint8_t *)img2->pixels;
-        for (int i = 0; i < size; ++i)
-            out[i] = fmaxf(ch1[i] - (float)pix2[i], 0.0f);
-    } else {
-        // Unsupported combination
-        return EMBEDDIP_ERROR_INVALID_ARG;
-    }
-
-    outImg->log = IMAGE_DATA_CH0;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Creates a frequency-domain filter mask.
- *
- * Generates circular/radial filters centered at the image center for frequency domain filtering.
- * The filter is created in spatial frequency coordinates where the center represents DC (zero
- * frequency).
- *
- * @param[in,out] maskImg    Image to fill with filter values (must have width/height already set).
- *                           Creates ch[0] channel if needed and sets log = IMAGE_DATA_CH0.
- * @param[in]     filterType Type of filter (lowpass, highpass, bandpass, etc.).
- * @param[in]     cutoff1    Primary cutoff radius in PIXELS from center.
- *                           - For lowpass: frequencies within this radius pass (1.0), outside block
- * (0.0)
- *                           - For highpass: frequencies outside this radius pass (1.0), inside
- * block (0.0)
- *                           - For bandpass: inner radius (with cutoff2 as outer radius)
- *                           - For Gaussian filters: standard deviation of the Gaussian
- * @param[in]     cutoff2    Secondary cutoff radius in PIXELS (only used for bandpass filters).
- *                           Must satisfy: cutoff1 < cutoff2
- *
- * @return EMBEDDIP_OK on success, error code otherwise.
- *
- * @note Cutoff units are PIXELS measured as Euclidean distance from image center.
- *       For a 256×256 image:
- *       - Center is at (128, 128)
- *       - Max distance to corner ≈ 181 pixels
- *       - cutoff1=30 means frequencies within 30-pixel radius from center
- *       - This corresponds to ~16.6% of max frequency (30/181)
- *
- * @note Filter values range from 0.0 (block) to 1.0 (pass).
- *       Ideal filters produce hard edges (0 or 1).
- *       Gaussian filters produce smooth transitions.
- *
- * @example
- *   // Low-pass: pass low frequencies (smooth, blur effect)
- *   getFilter(mask, FREQ_FILTER_IDEAL_LOWPASS, 30, 0);
- *
- *   // High-pass: pass high frequencies (edges, details)
- *   getFilter(mask, FREQ_FILTER_IDEAL_HIGHPASS, 50, 0);
- *
- *   // Band-pass: pass frequencies between 20-60 pixels from center
- *   getFilter(mask, FREQ_FILTER_IDEAL_BANDPASS, 20, 60);
- */
-embeddip_status_t
-getFilter(Image *maskImg, FrequencyFilterType filterType, float cutoff1, float cutoff2)
-{
-    if (!maskImg)
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    // Validate cutoff values
-    if (cutoff1 < 0.0f)
-        return EMBEDDIP_ERROR_INVALID_ARG;
-
-    if (filterType == FREQ_FILTER_IDEAL_BANDPASS || filterType == FREQ_FILTER_GAUSSIAN_BANDPASS) {
-        if (cutoff2 < 0.0f || cutoff1 >= cutoff2)
-            return EMBEDDIP_ERROR_INVALID_ARG;
-    }
-
-    int w = maskImg->width;
-    int h = maskImg->height;
-    int cx = w / 2;
-    int cy = h / 2;
-
-    maskImg->format = IMAGE_FORMAT_GRAYSCALE;
-
-    if (isChalsEmpty(maskImg)) {
-        createChals(maskImg, 1);
-        maskImg->is_chals = 1;
-    }
-
-    float *mask = maskImg->chals->ch[0];
-
-    for (int y = 0; y < h; ++y) {
-        for (int x = 0; x < w; ++x) {
-            int dx = x - cx;
-            int dy = y - cy;
-            float d = sqrtf((float)(dx * dx + dy * dy));
-
-            float value = 0.0f;
-
-            switch (filterType) {
-            case FREQ_FILTER_IDEAL_LOWPASS:
-                value = (d <= cutoff1) ? 1.0f : 0.0f;
-                break;
-
-            case FREQ_FILTER_GAUSSIAN_LOWPASS:
-                value = expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
-                break;
-
-            case FREQ_FILTER_IDEAL_HIGHPASS:
-                value = (d >= cutoff1) ? 1.0f : 0.0f;
-                break;
-
-            case FREQ_FILTER_GAUSSIAN_HIGHPASS:
-                value = 1.0f - expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
-                break;
-
-            case FREQ_FILTER_IDEAL_BANDPASS:
-                value = (d >= cutoff1 && d <= cutoff2) ? 1.0f : 0.0f;
-                break;
-
-            case FREQ_FILTER_GAUSSIAN_BANDPASS: {
-                float gLow = expf(-(d * d) / (2.0f * cutoff2 * cutoff2));
-                float gHigh = expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
-                value = gLow - gHigh;
-                break;
-            }
-
-            default:
-                value = 0.0f;
-                break;
-            }
-
-            mask[y * w + x] = value;
-        }
-    }
-
-    maskImg->log = IMAGE_DATA_CH0;
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t ffilter2D(const Image *fftImg, const Image *filterMask, Image *outImg)
-{
-    if (!fftImg || !filterMask || !outImg || isChalsEmpty(fftImg) || isChalsEmpty(filterMask))
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    int width = fftImg->width;
-    int height = fftImg->height;
-    int size = width * height;
-
-    // Create magnitude and phase containers
-    Image *magImg = NULL;
-    Image *phaseImg = NULL;
-    createImageWH(width, height, IMAGE_FORMAT_GRAYSCALE, &magImg);
-    createImageWH(width, height, IMAGE_FORMAT_GRAYSCALE, &phaseImg);
-
-    _abs_(fftImg, magImg);
-    _phase_(fftImg, phaseImg);
-
-    float *mag = magImg->chals->ch[0];
-    float *mask = filterMask->chals->ch[0];
-
-    for (int i = 0; i < size; ++i)
-        mag[i] *= mask[i];
-
-    polarToCart(magImg, phaseImg, outImg);
-    return EMBEDDIP_OK;
-}
-#endif
diff --git a/board/stm32f7/board_stm32f7_fft.c b/board/stm32f7/board_stm32f7_fft.c
deleted file mode 100755
index 7c52514..0000000
--- a/board/stm32f7/board_stm32f7_fft.c
+++ /dev/null
@@ -1,906 +0,0 @@
-// SPDX-License-Identifier: MIT
-// Copyright (c) 2025 EmbedDIP
-
-#include <embedDIP_configs.h>
-
-#ifdef TARGET_BOARD_STM32F7
-
-    #include "arm_const_structs.h"
-    #include "arm_math.h"
-    #include <board/common.h>
-    #include <core/memory_manager.h>
-    #include <fft.h>
-
-embeddip_status_t _log_(Image *img)
-{
-    if (!img)
-        return EMBEDDIP_ERROR_NULL_PTR;
-    if (isChalsEmpty(img))
-        return EMBEDDIP_ERROR_INVALID_ARG;
-
-    float *data = img->chals->ch[0];
-    for (int i = 0; i < img->size; ++i) {
-        data[i] = logf(data[i]);
-    }
-
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Adds a scalar value to all pixels in a single-channel image.
- *
- * @param[in,out] img   Image to modify (in-place).
- * @param[in]     value Scalar value to add.
- */
-embeddip_status_t _add_(Image *img, float value)
-{
-    if (!img)
-        return EMBEDDIP_ERROR_NULL_PTR;
-    if (isChalsEmpty(img))
-        return EMBEDDIP_ERROR_INVALID_ARG;
-
-    float *data = img->chals->ch[0];
-    for (uint32_t i = 0; i < img->size; ++i) {
-        data[i] += value;
-    }
-
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Computes the inverse Fourier transform of the input image.
- *
- * @param inImg Input image (Fourier domain).
- * @param outImg Output image (spatial domain).
- */
-embeddip_status_t fourierInv(const Image *inImg, Image *outImg)
-{
-    int imageN = 256;
-    float *fourier = inImg->chals->ch[0];
-    float *fourier2 = inImg->chals->ch[1];
-
-    for (int i = 0; i < imageN; i++) {
-        arm_cfft_f32(&arm_cfft_sR_f32_len256, fourier2 + imageN * i * 2, 1, 1);
-    }
-
-    for (int k = 0; k < imageN; k++) {
-        for (int j = 0; j < imageN; j++) {
-            fourier[2 * j + k * imageN * 2] = (float)fourier2[j * imageN * 2 + k * 2];
-            fourier[2 * j + 1 + k * imageN * 2] = (float)fourier2[j * imageN * 2 + k * 2 + 1];
-        }
-    }
-
-    for (int i = 0; i < imageN; i++) {
-        arm_cfft_f32(&arm_cfft_sR_f32_len256, fourier + imageN * i * 2, 1, 1);
-    }
-
-    if (isChalsEmpty(outImg)) {
-        createChals(outImg, 1);
-        outImg->is_chals = 1;
-    }
-    for (int i = 0; i < imageN * imageN; i++) {
-        outImg->chals->ch[0][i] =
-            (float)sqrt(fourier[2 * i] * fourier[2 * i] + fourier[2 * i + 1] * fourier[2 * i + 1]);
-    }
-
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Converts polar coordinates (magnitude and phase) to complex cartesian (real and
- * imaginary).
- *
- * @param magnitude Pointer to magnitude image (1 channel).
- * @param phase     Pointer to phase image (1 channel), in radians.
- * @param outImg    Output image with 2 channels: real and imaginary.
- */
-embeddip_status_t polarToCart(const Image *mag_img, const Image *phase_img, Image *dst)
-{
-    if (!mag_img || !phase_img || !dst)
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    int size = mag_img->width * mag_img->height;
-
-    if (!isChalsEmpty(dst) && dst->chals && dst->chals->ch[0]) {
-        // Ensure output buffer has complex capacity (2*N floats).
-        memory_free(dst->chals->ch[0]);
-        dst->chals->ch[0] = NULL;
-    }
-
-    embeddip_status_t status = createChalsComplex(dst, 1);
-    if (status != EMBEDDIP_OK) {
-        return status;
-    }
-
-    float *mag = mag_img->chals->ch[0];
-    float *phs = phase_img->chals->ch[0];
-    float *fft = dst->chals->ch[0];
-
-    for (int i = 0; i < size; ++i) {
-        fft[i * 2] = mag[i] * cosf(phs[i]);      // REEL
-        fft[i * 2 + 1] = mag[i] * sinf(phs[i]);  // IMJ
-    }
-
-    dst->log = IMAGE_DATA_CH0;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Performs element-wise complex multiplication in frequency domain.
- *
- *
- * @param img1    First complex image
- * @param img2    Second complex image
- * @param outImg  Output complex image
- */
-embeddip_status_t multiply(const Image *img1, const Image *img2, Image *outImg)
-{
-    // Input validation
-    if (!img1 || !img2 || !outImg) {
-        return EMBEDDIP_ERROR_NULL_PTR;
-    }
-
-    if (img1->width != img2->width || img1->height != img2->height) {
-        return EMBEDDIP_ERROR_INVALID_SIZE;
-    }
-
-    // Allocate output if needed
-    if (isChalsEmpty(outImg)) {
-        createChals(outImg, 1);
-        outImg->is_chals = 1;
-    }
-
-    float *in1 = NULL, *in2 = NULL;
-
-    // Select input channel based on log state
-    if (img1->log == IMAGE_DATA_CH0) {
-        in1 = (float *)img1->chals->ch[0];
-    } else if (img1->log == IMAGE_DATA_COMPLEX) {
-        in1 = (float *)img1->chals->ch[1];
-    } else if (img1->log == IMAGE_DATA_PIXELS) {
-        in1 = (float *)img1->pixels;
-    } else {
-        return EMBEDDIP_ERROR_INVALID_ARG;
-    }
-
-    if (img2->log == IMAGE_DATA_CH0) {
-        in2 = (float *)img2->chals->ch[0];
-    } else if (img2->log == IMAGE_DATA_COMPLEX) {
-        in2 = (float *)img2->chals->ch[1];
-    } else if (img2->log == IMAGE_DATA_PIXELS) {
-        in2 = (float *)img2->pixels;
-    } else {
-        return EMBEDDIP_ERROR_INVALID_ARG;
-    }
-
-    float *out = (float *)outImg->chals->ch[0];
-
-    int size = img1->width * img1->height;
-    for (int i = 0; i < size; ++i) {
-        out[i] = in1[i] * in2[i];
-    }
-
-    outImg->log = IMAGE_DATA_CH0;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Computes pixel-wise difference between two images: out = img1 - img2.
- *
- * Both images must have the same dimensions and a single channel.
- *
- * @param[in]  img1    First input image.
- * @param[in]  img2    Second input image.
- * @param[out] outImg  Output image to store the difference.
- */
-/**
- * @brief Computes pixel-wise difference: out = img1 - img2 (clamped to >= 0).
- *
- * Optimized for performance: checks image types once, then uses fast loops.
- *
- * @param[in]  img1    First image (original).
- * @param[in]  img2    Second image (to subtract).
- * @param[out] outImg  Output image (difference).
- * @return EMBEDDIP_OK on success, error code otherwise.
- */
-embeddip_status_t difference(const Image *src1, const Image *src2, Image *dst)
-{
-    if (!src1 || !src2 || !dst)
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    if (src1->width != src2->width || src1->height != src2->height)
-        return EMBEDDIP_ERROR_INVALID_SIZE;
-
-    int size = src1->width * src1->height;
-
-    // Allocate output channel if needed
-    if (isChalsEmpty(dst)) {
-        embeddip_status_t status = createChals(dst, 1);
-        if (status != EMBEDDIP_OK)
-            return status;
-        dst->is_chals = 1;
-    }
-
-    float *out = dst->chals->ch[0];
-    if (!out)
-        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
-
-    // Check types ONCE, then use optimized loops (no conditionals inside loop)
-    // Most common case: src1=PIXELS, src2=CH0 (your use case)
-    if (src1->log == IMAGE_DATA_PIXELS &&
-        (src2->log == IMAGE_DATA_CH0 || src2->log == IMAGE_DATA_MAGNITUDE)) {
-        if (!src1->pixels || !src2->chals || !src2->chals->ch[0])
-            return EMBEDDIP_ERROR_NULL_PTR;
-
-        uint8_t *pix1 = src1->pixels;
-        float *ch2 = src2->chals->ch[0];
-        for (int i = 0; i < size; ++i)
-            out[i] = fmaxf((float)pix1[i] - ch2[i], 0.0f);
-    }
-    // Both pixels
-    else if (src1->log == IMAGE_DATA_PIXELS && src2->log == IMAGE_DATA_PIXELS) {
-        if (!src1->pixels || !src2->pixels)
-            return EMBEDDIP_ERROR_NULL_PTR;
-
-        uint8_t *pix1 = src1->pixels;
-        uint8_t *pix2 = src2->pixels;
-        for (int i = 0; i < size; ++i)
-            out[i] = fmaxf((float)(pix1[i] - pix2[i]), 0.0f);
-    }
-    // Both channels
-    else if ((src1->log == IMAGE_DATA_CH0 || src1->log == IMAGE_DATA_MAGNITUDE) &&
-             (src2->log == IMAGE_DATA_CH0 || src2->log == IMAGE_DATA_MAGNITUDE)) {
-        if (!src1->chals || !src1->chals->ch[0] || !src2->chals || !src2->chals->ch[0])
-            return EMBEDDIP_ERROR_NULL_PTR;
-
-        float *ch1 = src1->chals->ch[0];
-        float *ch2 = src2->chals->ch[0];
-        for (int i = 0; i < size; ++i)
-            out[i] = fmaxf(ch1[i] - ch2[i], 0.0f);
-    }
-    // src1=CH0, src2=PIXELS
-    else if ((src1->log == IMAGE_DATA_CH0 || src1->log == IMAGE_DATA_MAGNITUDE) &&
-             src2->log == IMAGE_DATA_PIXELS) {
-        if (!src1->chals || !src1->chals->ch[0] || !src2->pixels)
-            return EMBEDDIP_ERROR_NULL_PTR;
-
-        float *ch1 = src1->chals->ch[0];
-        uint8_t *pix2 = src2->pixels;
-        for (int i = 0; i < size; ++i)
-            out[i] = fmaxf(ch1[i] - (float)pix2[i], 0.0f);
-    } else {
-        // Unsupported combination
-        return EMBEDDIP_ERROR_INVALID_ARG;
-    }
-
-    dst->log = IMAGE_DATA_CH0;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Fills the given image with a frequency domain filter mask.
- *
- * This modifies the image in-place. It must already have width and height set.
- *
- * @param maskImg    Target image to be filled with mask values.
- * @param filterType Type of filter to create (low-pass, high-pass, band-pass, etc.).
- * @param cutoff1    Cutoff radius (or inner radius for band-pass).
- * @param cutoff2    Outer radius for band-pass (ignored for other types).
- */
-/**
- * @brief Creates a frequency-domain filter mask.
- *
- * Generates circular/radial filters centered at the image center for frequency domain filtering.
- * The filter is created in spatial frequency coordinates where the center represents DC (zero
- * frequency).
- *
- * @param[in,out] maskImg    Image to fill with filter values (must have width/height already set).
- *                           Creates ch[0] channel if needed and sets log = IMAGE_DATA_CH0.
- * @param[in]     filterType Type of filter (lowpass, highpass, bandpass, etc.).
- * @param[in]     cutoff1    Primary cutoff radius in PIXELS from center.
- *                           - For lowpass: frequencies within this radius pass (1.0), outside block
- * (0.0)
- *                           - For highpass: frequencies outside this radius pass (1.0), inside
- * block (0.0)
- *                           - For bandpass: inner radius (with cutoff2 as outer radius)
- *                           - For Gaussian filters: standard deviation of the Gaussian
- * @param[in]     cutoff2    Secondary cutoff radius in PIXELS (only used for bandpass filters).
- *                           Must satisfy: cutoff1 < cutoff2
- *
- * @return EMBEDDIP_OK on success, error code otherwise.
- *
- * @note Cutoff units are PIXELS measured as Euclidean distance from image center.
- *       For a 256×256 image:
- *       - Center is at (128, 128)
- *       - Max distance to corner ≈ 181 pixels
- *       - cutoff1=30 means frequencies within 30-pixel radius from center
- *       - This corresponds to ~16.6% of max frequency (30/181)
- *
- * @note Filter values range from 0.0 (block) to 1.0 (pass).
- *       Ideal filters produce hard edges (0 or 1).
- *       Gaussian filters produce smooth transitions.
- *
- * @example
- *   // Low-pass: pass low frequencies (smooth, blur effect)
- *   getFilter(mask, FREQ_FILTER_IDEAL_LOWPASS, 30, 0);
- *
- *   // High-pass: pass high frequencies (edges, details)
- *   getFilter(mask, FREQ_FILTER_IDEAL_HIGHPASS, 50, 0);
- *
- *   // Band-pass: pass frequencies between 20-60 pixels from center
- *   getFilter(mask, FREQ_FILTER_IDEAL_BANDPASS, 20, 60);
- */
-embeddip_status_t
-getFilter(Image *filter_img, FrequencyFilterType filter_type, float cutoff1, float cutoff2)
-{
-    if (!filter_img)
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    // Validate cutoff values
-    if (cutoff1 < 0.0f)
-        return EMBEDDIP_ERROR_INVALID_ARG;
-
-    if (filter_type == FREQ_FILTER_IDEAL_BANDPASS || filter_type == FREQ_FILTER_GAUSSIAN_BANDPASS) {
-        if (cutoff2 < 0.0f || cutoff1 >= cutoff2)
-            return EMBEDDIP_ERROR_INVALID_ARG;
-    }
-
-    int w = filter_img->width;
-    int h = filter_img->height;
-    int cx = w / 2;
-    int cy = h / 2;
-
-    filter_img->format = IMAGE_FORMAT_GRAYSCALE;
-
-    if (isChalsEmpty(filter_img)) {
-        createChals(filter_img, 1);
-        filter_img->is_chals = 1;
-    }
-
-    float *mask = filter_img->chals->ch[0];
-
-    for (int y = 0; y < h; ++y) {
-        for (int x = 0; x < w; ++x) {
-            int dx = x - cx;
-            int dy = y - cy;
-            float d = sqrtf((float)(dx * dx + dy * dy));
-
-            float value = 0.0f;
-
-            switch (filter_type) {
-            case FREQ_FILTER_IDEAL_LOWPASS:
-                value = (d <= cutoff1) ? 1.0f : 0.0f;
-                break;
-
-            case FREQ_FILTER_GAUSSIAN_LOWPASS:
-                value = expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
-                break;
-
-            case FREQ_FILTER_IDEAL_HIGHPASS:
-                value = (d >= cutoff1) ? 1.0f : 0.0f;
-                break;
-
-            case FREQ_FILTER_GAUSSIAN_HIGHPASS:
-                value = 1.0f - expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
-                break;
-
-            case FREQ_FILTER_IDEAL_BANDPASS:
-                value = (d >= cutoff1 && d <= cutoff2) ? 1.0f : 0.0f;
-                break;
-
-            case FREQ_FILTER_GAUSSIAN_BANDPASS: {
-                float gLow = expf(-(d * d) / (2.0f * cutoff2 * cutoff2));
-                float gHigh = expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
-                value = gLow - gHigh;
-                break;
-            }
-
-            default:
-                value = 0.0f;
-                break;
-            }
-
-            mask[y * w + x] = value;
-        }
-    }
-
-    filter_img->log = IMAGE_DATA_CH0;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Checks if input dimensions are valid (powers of 2 and matching).
- */
-static bool isValidFFTSize(int w, int h)
-{
-    return (w == h) && ((w & (w - 1)) == 0);  // square and power-of-2
-}
-
-/**
- * @brief Performs forward 2D FFT on a single-channel image.
- *        ch[0] holds interleaved (Re, Im), ch[1] holds transposed for vertical pass.
- */
-embeddip_status_t fft(const Image *src, Image *dst)
-{
-    int N = src->width;
-    if (!isValidFFTSize(src->width, src->height))
-        return -1;
-
-    float *buf0;
-    float *buf1;
-
-    if (isChalsEmpty(dst)) {
-        createChalsComplex(dst, 2);  // 2 complex channels for interleaved (Re, Im)
-        dst->is_chals = 1;
-        buf0 = dst->chals->ch[0];
-        buf1 = dst->chals->ch[1];
-    } else {
-        buf0 = dst->chals->ch[0];
-        buf1 = dst->chals->ch[1];
-    }
-
-    const uint8_t *pixels = src->pixels;
-    for (int i = 0; i < N * N; i++) {
-        buf0[2 * i] = (float)pixels[i];
-        buf0[2 * i + 1] = 0.0f;
-    }
-
-    for (int row = 0; row < N; row++) {
-        arm_cfft_f32(&arm_cfft_sR_f32_len256, buf0 + row * N * 2, 0, 1);
-    }
-
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int src = 2 * (y * N + x);
-            int dst = 2 * (x * N + y);
-            buf1[dst] = buf0[src];
-            buf1[dst + 1] = buf0[src + 1];
-        }
-    }
-
-    for (int row = 0; row < N; row++) {
-        arm_cfft_f32(&arm_cfft_sR_f32_len256, buf1 + row * N * 2, 0, 1);
-    }
-
-    // Transpose back: buf1 → buf0 to undo the earlier transpose
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int src = 2 * (y * N + x);
-            int dst = 2 * (x * N + y);
-            buf0[dst] = buf1[src];
-            buf0[dst + 1] = buf1[src + 1];
-        }
-    }
-
-    // Copy back to buf1 for output
-    for (int i = 0; i < N * N * 2; i++) {
-        buf1[i] = buf0[i];
-    }
-
-    dst->log = IMAGE_DATA_COMPLEX;
-    return 0;
-}
-
-/**
- * @brief Performs inverse 2D FFT on complex image. ch[0] is output.
- */
-embeddip_status_t ifft(const Image *src, Image *dst)
-{
-    int N = src->width;
-
-    // if input image does not hold the correct data.
-    if (src->log != IMAGE_DATA_COMPLEX && src->log != IMAGE_DATA_CH0) {
-        return -1;
-    }
-
-    float *buf0;
-    float *buf1;
-
-    if (src->log == IMAGE_DATA_COMPLEX) {
-        // current fft to ifft application.
-        buf0 = (float *)memory_alloc(N * N * 2 * sizeof(float));
-        buf1 = src->chals->ch[1];
-    } else  // if IMAGE_DATA_CH0
-    {
-        // In this case only 0 is allocated i guess.
-        buf0 = (float *)memory_alloc(N * N * 2 * sizeof(float));
-        buf1 = src->chals->ch[0];
-    }
-
-    if (isChalsEmpty(dst)) {
-        createChals(dst, 1);
-        dst->is_chals = 1;
-    }
-
-    for (int row = 0; row < N; row++) {
-        arm_cfft_f32(&arm_cfft_sR_f32_len256, buf1 + row * N * 2, 1, 1);
-    }
-
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int dst_idx = 2 * (y * N + x);
-            int src_idx = 2 * (x * N + y);
-            buf0[dst_idx] = buf1[src_idx];
-            buf0[dst_idx + 1] = buf1[src_idx + 1];
-        }
-    }
-
-    for (int row = 0; row < N; row++) {
-        arm_cfft_f32(&arm_cfft_sR_f32_len256, buf0 + row * N * 2, 1, 1);
-    }
-
-    // Transpose back: buf0 → buf1 to undo the earlier transpose
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int src_idx = 2 * (y * N + x);
-            int dst_idx = 2 * (x * N + y);
-            buf1[dst_idx] = buf0[src_idx];
-            buf1[dst_idx + 1] = buf0[src_idx + 1];
-        }
-    }
-
-    // Extract real part from transposed-back data
-    for (int i = 0; i < N * N; i++) {
-        dst->chals->ch[0][i] = buf1[2 * i];
-    }
-
-    dst->log = IMAGE_DATA_CH0;
-    memory_free(buf0);
-    return 0;
-}
-
-/**
- * @brief Performs inverse 2D FFT on a frequency-domain image.
- *        Uses ch[1] as input (transposed buffer), writes to ch[0] as interleaved (Re, Im).
- */
-embeddip_status_t ifft__(const Image *inImg, Image *outImg)
-{
-    int N = inImg->width;
-    if (!isValidFFTSize(inImg->width, inImg->height))
-        return -1;
-
-    float *buf0;
-
-    if (isChalsEmpty(outImg)) {
-        createChalsComplex(outImg, 1);
-        outImg->is_chals = 1;
-        buf0 = outImg->chals->ch[0];
-    } else {
-        memory_free(outImg->chals->ch[0]);
-        buf0 = (float *)memory_alloc(N * N * 2 * sizeof(float));
-        outImg->chals->ch[0] = buf0;
-    }
-
-    float *input = inImg->chals->ch[1];
-
-    // Inverse FFT on rows (from transposed data)
-    for (int row = 0; row < N; row++) {
-        arm_cfft_f32(&arm_cfft_sR_f32_len256, input + row * N * 2, 1, 1);  // Inverse FFT
-    }
-
-    // Transpose back
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int src = 2 * (y * N + x);
-            int dst = 2 * (x * N + y);
-            buf0[dst] = input[src];
-            buf0[dst + 1] = input[src + 1];
-        }
-    }
-
-    // Inverse FFT on columns (rows of transposed image)
-    for (int row = 0; row < N; row++) {
-        arm_cfft_f32(&arm_cfft_sR_f32_len256, buf0 + row * N * 2, 1, 1);  // Inverse FFT
-    }
-
-    // Normalize the output (divide all by N*N)
-    float scale = 1.0f / (N * N);
-    for (int i = 0; i < N * N * 2; ++i) {
-        buf0[i] *= scale;
-    }
-
-    return 0;
-}
-
-/**
- * @brief Computes log-magnitude spectrum.
- */
-embeddip_status_t _abs_(const Image *src, Image *dst)
-{
-    int size = src->width * src->height;
-
-    float *fft;
-    if (src->log == IMAGE_DATA_COMPLEX) {
-        // current fft to ifft application.
-        fft = src->chals->ch[1];
-    } else if (src->log == IMAGE_DATA_CH0) {
-        // In this case only 0 is allocated i guess.
-        fft = src->chals->ch[0];
-    } else {
-        return -1;
-    }
-
-    if (isChalsEmpty(dst)) {
-        createChals(dst, 1);
-        dst->is_chals = 1;
-    }
-
-    float *mag = dst->chals->ch[0];
-    for (int i = 0; i < size; i++) {
-        float re = fft[2 * i];
-        float im = fft[2 * i + 1];
-        mag[i] = sqrtf(re * re + im * im);
-    }
-
-    dst->log = IMAGE_DATA_MAGNITUDE;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Computes phase angle from FFT image.
- */
-embeddip_status_t _phase_(const Image *src, Image *dst)
-{
-    int size = src->width * src->height;
-
-    float *fft;
-    if (src->log == IMAGE_DATA_COMPLEX) {
-        // current fft to ifft application.
-        fft = src->chals->ch[1];
-    } else if (src->log == IMAGE_DATA_CH0) {
-        // In this case only 0 is allocated i guess.
-        fft = src->chals->ch[0];
-    } else {
-        return -1;
-    }
-
-    if (isChalsEmpty(dst)) {
-        createChals(dst, 1);
-        dst->is_chals = 1;
-    }
-
-    float *out = dst->chals->ch[0];
-    for (int i = 0; i < size; i++) {
-        out[i] = atan2f(fft[2 * i + 1], fft[2 * i]);
-    }
-
-    dst->log = IMAGE_DATA_PHASE;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Rearranges FFT result so that low-frequency component is centered.
- *
- * Operates on the appropriate channel based on image log state.
- *
- * @param[in,out] img Image containing FFT data.
- *                    If log == IMAGE_DATA_COMPLEX, operates on ch[1].
- *                    If log == IMAGE_DATA_CH0, operates on ch[0].
- */
-embeddip_status_t fftshift(Image *img)
-{
-    if (!img || isChalsEmpty(img))
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    float *data = (img->log == IMAGE_DATA_COMPLEX) ? img->chals->ch[1] : img->chals->ch[0];
-    int width = img->width;
-    int height = img->height;
-
-    int cx = width / 2, cy = height / 2;
-    for (int y = 0; y < cy; ++y) {
-        for (int x = 0; x < cx; ++x) {
-            int q0 = 2 * ((y * width) + x);
-            int q1 = 2 * ((y * width) + x + cx);
-            int q2 = 2 * (((y + cy) * width) + x);
-            int q3 = 2 * (((y + cy) * width) + x + cx);
-
-            for (int k = 0; k < 2; ++k) {
-                float tmp = data[q0 + k];
-                data[q0 + k] = data[q3 + k];
-                data[q3 + k] = tmp;
-
-                tmp = data[q1 + k];
-                data[q1 + k] = data[q2 + k];
-                data[q2 + k] = tmp;
-            }
-        }
-    }
-
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Applies a frequency-domain filter to a complex image.
- *
- * This function performs element-wise complex multiplication between a Fourier-domain image
- * and a filter mask. The mask can be either a grayscale magnitude mask or a complex-valued mask.
- *
- * @param[in]  fftImg     Complex frequency-domain image (Re, Im interleaved in ch[0]).
- * @param[in]  filterMask Grayscale or complex mask to apply in frequency domain.
- * @param[out] outImg     Output image after filtering in the frequency domain.
- */
-embeddip_status_t ffilter2D(const Image *src_fft, const Image *filter, Image *dst)
-{
-    if (!src_fft || !filter || !dst)
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    if (isChalsEmpty(src_fft) || isChalsEmpty(filter))
-        return EMBEDDIP_ERROR_INVALID_ARG;
-
-    int width = src_fft->width;
-    int height = src_fft->height;
-    int size = width * height;
-
-    // Step 1: Compute magnitude and phase
-    Image *magImg = NULL;
-    Image *phaseImg = NULL;
-
-    embeddip_status_t status =
-        createImageWH(src_fft->width, src_fft->height, src_fft->format, &magImg);
-    if (status != EMBEDDIP_OK)
-        return status;
-
-    status = createImageWH(src_fft->width, src_fft->height, src_fft->format, &phaseImg);
-    if (status != EMBEDDIP_OK) {
-        deleteImage(magImg);
-        return status;
-    }
-
-    status = _abs_(src_fft, magImg);
-    if (status != EMBEDDIP_OK) {
-        deleteImage(magImg);
-        deleteImage(phaseImg);
-        return status;
-    }
-
-    status = _phase_(src_fft, phaseImg);
-    if (status != EMBEDDIP_OK) {
-        deleteImage(magImg);
-        deleteImage(phaseImg);
-        return status;
-    }
-
-    // Step 2: Multiply magnitude by filter mask (element-wise)
-    float *mag = magImg->chals->ch[0];
-    float *mask = filter->chals->ch[0];
-    for (int i = 0; i < size; ++i)
-        mag[i] *= mask[i];
-
-    // Step 3: Reconstruct complex data from filtered mag + original phase
-    status = polarToCart(magImg, phaseImg, dst);
-
-    // Cleanup temporary images
-    deleteImage(magImg);
-    deleteImage(phaseImg);
-
-    return status;
-}
-
-/*
-
-embeddip_status_t fourier(const Image *inImg, Image *outImg)
-{
-    int imageN = 256;
-
-    if (isChalsEmpty(outImg))
-    {
-        outImg->chals = (channels_t *)memory_alloc(sizeof(channels_t));
-        outImg->is_chals = 1;
-    }
-    else
-    {
-        memory_free(outImg->chals->ch[0]);
-        // memory_free(outImg->chals->ch[1]);
-    }
-
-    outImg->chals->ch[0] = (float *)memory_alloc(inImg->height * inImg->width * 8);
-    outImg->chals->ch[1] = (float *)memory_alloc(inImg->height * inImg->width * 8);
-
-    float *fourier = outImg->chals->ch[0];
-    float *fourier2 = outImg->chals->ch[1];
-
-    if (isChalsEmpty(inImg))
-    {
-        for (int row = 0; row < imageN * imageN; row++)
-        {
-            fourier[2 * row] = (uint32_t)((uint8_t *)inImg->pixels)[row];
-            fourier[2 * row + 1] = 0x00000000;
-        }
-
-        for (int i = 0; i < imageN; i++)
-        {
-            arm_cfft_f32(&arm_cfft_sR_f32_len256, fourier + imageN * i * 2, 0, 1);
-        }
-
-        for (int k = 0; k < imageN; k++)
-        {
-            for (int j = 0; j < imageN; j++)
-            {
-                fourier2[2 * j + k * imageN * 2] = (float)fourier[j * imageN * 2 + k * 2];
-                fourier2[2 * j + 1 + k * imageN * 2] = (float)fourier[j * imageN * 2 + k * 2 + 1];
-            }
-        }
-
-        for (int i = 0; i < imageN; i++)
-        {
-            arm_cfft_f32(&arm_cfft_sR_f32_len256, fourier2 + imageN * i * 2, 0, 1);
-        }
-    }
-    else
-    {
-
-        for (int row = 0; row < imageN * imageN; row++)
-        {
-            fourier[2 * row] = (float)inImg->chals->ch[0][row];
-            fourier[2 * row + 1] = 0x00000000;
-        }
-
-        for (int i = 0; i < imageN; i++)
-        {
-            arm_cfft_f32(&arm_cfft_sR_f32_len256, fourier + imageN * i * 2, 0, 1);
-        }
-
-        for (int k = 0; k < imageN; k++)
-        {
-            for (int j = 0; j < imageN; j++)
-            {
-                fourier2[2 * j + k * imageN * 2] = (float)fourier[j * imageN * 2 + k * 2];
-                fourier2[2 * j + 1 + k * imageN * 2] = (float)fourier[j * imageN * 2 + k * 2 + 1];
-            }
-        }
-
-        for (int i = 0; i < imageN; i++)
-        {
-            arm_cfft_f32(&arm_cfft_sR_f32_len256, fourier2 + imageN * i * 2, 0, 1);
-        }
-    }
-}
-
-embeddip_status_t mag(const Image *inImg, Image *outImg)
-{
-    int imageN = 256;
-
-    if (isChalsEmpty(outImg))
-    {
-        createChals(outImg, 1);
-        outImg->is_chals = 1;
-    }
-
-    float *fft = inImg->chals->ch[1];
-    float *magnitude = outImg->chals->ch[0];
-
-    float test = 0;
-    for (int i = 0; i < imageN * imageN; ++i)
-    {
-        float re = fft[i * 2];
-        float im = fft[i * 2 + 1];
-        magnitude[i] = sqrtf(re * re + im * im);
-        if (magnitude[i] > test)
-            test = magnitude[i];
-    }
-
-    test = test + 1;
-}
-
-embeddip_status_t phase(const Image *inImg, Image *outImg)
-{
-    int imageN = 256;
-
-    if (isChalsEmpty(outImg))
-    {
-        createChals(outImg, 1);
-        outImg->is_chals = 1;
-    }
-
-    float *fft = inImg->chals->ch[1];
-    float *angle = outImg->chals->ch[0];
-
-    for (int i = 0; i < imageN * imageN; ++i)
-    {
-        angle[i] = atan2f(fft[i * 2 + 1], fft[i * 2]);
-    }
-}
-
-*/
-
-#endif
diff --git a/imgproc/fft.c b/imgproc/fft.c
new file mode 100644
index 0000000..64b573f
--- /dev/null
+++ b/imgproc/fft.c
@@ -0,0 +1,586 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2025 EmbedDIP
+
+#include <embedDIP_configs.h>
+
+#include <arch/fft_backend.h>
+#include <board/common.h>
+#include <core/memory_manager.h>
+#include <fft.h>
+
+static bool isValidFFTSize(int w, int h)
+{
+    return (w == h) && ((w & (w - 1)) == 0);
+}
+
+static void transposeComplex(const float *src, float *dst, int n)
+{
+    for (int y = 0; y < n; ++y) {
+        for (int x = 0; x < n; ++x) {
+            int src_idx = 2 * (y * n + x);
+            int dst_idx = 2 * (x * n + y);
+            dst[dst_idx] = src[src_idx];
+            dst[dst_idx + 1] = src[src_idx + 1];
+        }
+    }
+}
+
+embeddip_status_t fft(const Image *src, Image *dst)
+{
+    if (!src || !dst || !src->pixels)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    if (!isValidFFTSize(src->width, src->height))
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+
+    int n = src->width;
+    embeddip_status_t status = embeddip_fft_backend_init(n);
+    if (status != EMBEDDIP_OK)
+        return status;
+
+    if (isChalsEmpty(dst)) {
+        status = createChalsComplex(dst, 2);
+        if (status != EMBEDDIP_OK)
+            return status;
+        dst->is_chals = 1;
+    }
+
+    if (!dst->chals || !dst->chals->ch[0] || !dst->chals->ch[1])
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    float *buf0 = dst->chals->ch[0];
+    float *buf1 = dst->chals->ch[1];
+
+    const uint8_t *pixels = (const uint8_t *)src->pixels;
+    for (int i = 0; i < n * n; ++i) {
+        buf0[2 * i] = (float)pixels[i];
+        buf0[2 * i + 1] = 0.0f;
+    }
+
+    for (int row = 0; row < n; ++row) {
+        status = embeddip_fft_backend_forward_1d(buf0 + row * n * 2, n);
+        if (status != EMBEDDIP_OK)
+            return status;
+    }
+
+    transposeComplex(buf0, buf1, n);
+
+    for (int row = 0; row < n; ++row) {
+        status = embeddip_fft_backend_forward_1d(buf1 + row * n * 2, n);
+        if (status != EMBEDDIP_OK)
+            return status;
+    }
+
+    transposeComplex(buf1, buf0, n);
+
+    for (int i = 0; i < n * n * 2; ++i) {
+        buf1[i] = buf0[i];
+    }
+
+    dst->log = IMAGE_DATA_COMPLEX;
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t ifft(const Image *src, Image *dst)
+{
+    if (!src || !dst || !src->chals)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    if (!isValidFFTSize(src->width, src->height))
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+
+    if (src->log != IMAGE_DATA_COMPLEX && src->log != IMAGE_DATA_CH0)
+        return EMBEDDIP_ERROR_INVALID_ARG;
+
+    int n = src->width;
+    embeddip_status_t status = embeddip_fft_backend_init(n);
+    if (status != EMBEDDIP_OK)
+        return status;
+
+    float *input = (src->log == IMAGE_DATA_COMPLEX) ? src->chals->ch[1] : src->chals->ch[0];
+    if (!input)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    float *tmp = (float *)memory_alloc((size_t)n * (size_t)n * 2U * sizeof(float));
+    if (!tmp)
+        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+
+    if (isChalsEmpty(dst)) {
+        status = createChals(dst, 1);
+        if (status != EMBEDDIP_OK) {
+            memory_free(tmp);
+            return status;
+        }
+        dst->is_chals = 1;
+    }
+
+    if (!dst->chals || !dst->chals->ch[0]) {
+        memory_free(tmp);
+        return EMBEDDIP_ERROR_NULL_PTR;
+    }
+
+    for (int row = 0; row < n; ++row) {
+        status = embeddip_fft_backend_inverse_1d(input + row * n * 2, n);
+        if (status != EMBEDDIP_OK) {
+            memory_free(tmp);
+            return status;
+        }
+    }
+
+    transposeComplex(input, tmp, n);
+
+    for (int row = 0; row < n; ++row) {
+        status = embeddip_fft_backend_inverse_1d(tmp + row * n * 2, n);
+        if (status != EMBEDDIP_OK) {
+            memory_free(tmp);
+            return status;
+        }
+    }
+
+    transposeComplex(tmp, input, n);
+
+    for (int i = 0; i < n * n; ++i) {
+        dst->chals->ch[0][i] = input[2 * i];
+    }
+
+    dst->log = IMAGE_DATA_CH0;
+    memory_free(tmp);
+    return EMBEDDIP_OK;
+}
+
+static embeddip_status_t getComplexInput(const Image *src, float **out)
+{
+    if (!src || !out || !src->chals) {
+        return EMBEDDIP_ERROR_NULL_PTR;
+    }
+
+    if (src->log == IMAGE_DATA_COMPLEX) {
+        *out = src->chals->ch[1];
+    } else if (src->log == IMAGE_DATA_CH0) {
+        *out = src->chals->ch[0];
+    } else {
+        return EMBEDDIP_ERROR_INVALID_ARG;
+    }
+
+    return (*out != NULL) ? EMBEDDIP_OK : EMBEDDIP_ERROR_NULL_PTR;
+}
+
+embeddip_status_t _log_(Image *img)
+{
+    if (!img)
+        return EMBEDDIP_ERROR_NULL_PTR;
+    if (isChalsEmpty(img))
+        return EMBEDDIP_ERROR_INVALID_ARG;
+
+    float *data = img->chals->ch[0];
+    if (!data)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    for (int i = 0; i < img->size; ++i) {
+        float v = data[i];
+#if defined(EMBED_DIP_ARCH_XTENSA)
+        v += 1e-3f;  // Preserve previous Xtensa behavior and avoid log(0).
+#endif
+        data[i] = logf(v);
+    }
+
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t _add_(Image *img, float value)
+{
+    if (!img)
+        return EMBEDDIP_ERROR_NULL_PTR;
+    if (isChalsEmpty(img))
+        return EMBEDDIP_ERROR_INVALID_ARG;
+
+    float *data = img->chals->ch[0];
+    if (!data)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    for (int i = 0; i < img->size; ++i) {
+        data[i] += value;
+    }
+
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t _abs_(const Image *src, Image *dst)
+{
+    if (!src || !dst)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    int size = src->width * src->height;
+
+    float *fft = NULL;
+    embeddip_status_t status = getComplexInput(src, &fft);
+    if (status != EMBEDDIP_OK)
+        return status;
+
+    if (isChalsEmpty(dst)) {
+        status = createChals(dst, 1);
+        if (status != EMBEDDIP_OK)
+            return status;
+        dst->is_chals = 1;
+    }
+
+    float *mag = dst->chals->ch[0];
+    if (!mag)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    for (int i = 0; i < size; ++i) {
+        float re = fft[2 * i];
+        float im = fft[2 * i + 1];
+        mag[i] = sqrtf(re * re + im * im);
+    }
+
+    dst->log = IMAGE_DATA_MAGNITUDE;
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t _phase_(const Image *src, Image *dst)
+{
+    if (!src || !dst)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    int size = src->width * src->height;
+
+    float *fft = NULL;
+    embeddip_status_t status = getComplexInput(src, &fft);
+    if (status != EMBEDDIP_OK)
+        return status;
+
+    if (isChalsEmpty(dst)) {
+        status = createChals(dst, 1);
+        if (status != EMBEDDIP_OK)
+            return status;
+        dst->is_chals = 1;
+    }
+
+    float *out = dst->chals->ch[0];
+    if (!out)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    for (int i = 0; i < size; ++i) {
+        out[i] = atan2f(fft[2 * i + 1], fft[2 * i]);
+    }
+
+    dst->log = IMAGE_DATA_PHASE;
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t fftshift(Image *img)
+{
+    if (!img || isChalsEmpty(img))
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    float *data = (img->log == IMAGE_DATA_COMPLEX) ? img->chals->ch[1] : img->chals->ch[0];
+    if (!data)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    int width = img->width;
+    int height = img->height;
+    int cx = width / 2;
+    int cy = height / 2;
+
+    for (int y = 0; y < cy; ++y) {
+        for (int x = 0; x < cx; ++x) {
+            int q0 = 2 * ((y * width) + x);
+            int q1 = 2 * ((y * width) + x + cx);
+            int q2 = 2 * (((y + cy) * width) + x);
+            int q3 = 2 * (((y + cy) * width) + x + cx);
+
+            for (int i = 0; i < 2; ++i) {
+                float tmp = data[q0 + i];
+                data[q0 + i] = data[q3 + i];
+                data[q3 + i] = tmp;
+
+                tmp = data[q1 + i];
+                data[q1 + i] = data[q2 + i];
+                data[q2 + i] = tmp;
+            }
+        }
+    }
+
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t polarToCart(const Image *mag_img, const Image *phase_img, Image *dst)
+{
+    if (!mag_img || !phase_img || !dst)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    if (mag_img->width != phase_img->width || mag_img->height != phase_img->height)
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+
+    if (!mag_img->chals || !phase_img->chals || !mag_img->chals->ch[0] || !phase_img->chals->ch[0])
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    int size = mag_img->width * mag_img->height;
+
+    if (!isChalsEmpty(dst) && dst->chals && dst->chals->ch[0]) {
+        memory_free(dst->chals->ch[0]);
+        dst->chals->ch[0] = NULL;
+    }
+
+    embeddip_status_t status = createChalsComplex(dst, 1);
+    if (status != EMBEDDIP_OK)
+        return status;
+
+    float *mag = mag_img->chals->ch[0];
+    float *phs = phase_img->chals->ch[0];
+    float *fft = dst->chals->ch[0];
+
+    for (int i = 0; i < size; ++i) {
+        fft[2 * i] = mag[i] * cosf(phs[i]);
+        fft[2 * i + 1] = mag[i] * sinf(phs[i]);
+    }
+
+    dst->log = IMAGE_DATA_CH0;
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t multiply(const Image *img1, const Image *img2, Image *outImg)
+{
+    if (!img1 || !img2 || !outImg)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    if (img1->width != img2->width || img1->height != img2->height)
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+
+    if (isChalsEmpty(outImg)) {
+        embeddip_status_t status = createChals(outImg, 1);
+        if (status != EMBEDDIP_OK)
+            return status;
+        outImg->is_chals = 1;
+    }
+
+    float *in1 = NULL;
+    float *in2 = NULL;
+
+    if (img1->log == IMAGE_DATA_CH0) {
+        in1 = img1->chals ? img1->chals->ch[0] : NULL;
+    } else if (img1->log == IMAGE_DATA_COMPLEX) {
+        in1 = img1->chals ? img1->chals->ch[1] : NULL;
+    } else if (img1->log == IMAGE_DATA_PIXELS) {
+        in1 = (float *)img1->pixels;
+    } else {
+        return EMBEDDIP_ERROR_INVALID_ARG;
+    }
+
+    if (img2->log == IMAGE_DATA_CH0) {
+        in2 = img2->chals ? img2->chals->ch[0] : NULL;
+    } else if (img2->log == IMAGE_DATA_COMPLEX) {
+        in2 = img2->chals ? img2->chals->ch[1] : NULL;
+    } else if (img2->log == IMAGE_DATA_PIXELS) {
+        in2 = (float *)img2->pixels;
+    } else {
+        return EMBEDDIP_ERROR_INVALID_ARG;
+    }
+
+    if (!in1 || !in2 || !outImg->chals || !outImg->chals->ch[0])
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    float *out = outImg->chals->ch[0];
+    int size = img1->width * img1->height;
+    for (int i = 0; i < size; ++i) {
+        out[i] = in1[i] * in2[i];
+    }
+
+    outImg->log = IMAGE_DATA_CH0;
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t difference(const Image *src1, const Image *src2, Image *dst)
+{
+    if (!src1 || !src2 || !dst)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    if (src1->width != src2->width || src1->height != src2->height)
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+
+    int size = src1->width * src1->height;
+
+    if (isChalsEmpty(dst)) {
+        embeddip_status_t status = createChals(dst, 1);
+        if (status != EMBEDDIP_OK)
+            return status;
+        dst->is_chals = 1;
+    }
+
+    float *out = dst->chals->ch[0];
+    if (!out)
+        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+
+    if (src1->log == IMAGE_DATA_PIXELS &&
+        (src2->log == IMAGE_DATA_CH0 || src2->log == IMAGE_DATA_MAGNITUDE)) {
+        if (!src1->pixels || !src2->chals || !src2->chals->ch[0])
+            return EMBEDDIP_ERROR_NULL_PTR;
+
+        uint8_t *pix1 = src1->pixels;
+        float *ch2 = src2->chals->ch[0];
+        for (int i = 0; i < size; ++i)
+            out[i] = fmaxf((float)pix1[i] - ch2[i], 0.0f);
+    } else if (src1->log == IMAGE_DATA_PIXELS && src2->log == IMAGE_DATA_PIXELS) {
+        if (!src1->pixels || !src2->pixels)
+            return EMBEDDIP_ERROR_NULL_PTR;
+
+        uint8_t *pix1 = src1->pixels;
+        uint8_t *pix2 = src2->pixels;
+        for (int i = 0; i < size; ++i)
+            out[i] = fmaxf((float)(pix1[i] - pix2[i]), 0.0f);
+    } else if ((src1->log == IMAGE_DATA_CH0 || src1->log == IMAGE_DATA_MAGNITUDE) &&
+               (src2->log == IMAGE_DATA_CH0 || src2->log == IMAGE_DATA_MAGNITUDE)) {
+        if (!src1->chals || !src1->chals->ch[0] || !src2->chals || !src2->chals->ch[0])
+            return EMBEDDIP_ERROR_NULL_PTR;
+
+        float *ch1 = src1->chals->ch[0];
+        float *ch2 = src2->chals->ch[0];
+        for (int i = 0; i < size; ++i)
+            out[i] = fmaxf(ch1[i] - ch2[i], 0.0f);
+    } else if ((src1->log == IMAGE_DATA_CH0 || src1->log == IMAGE_DATA_MAGNITUDE) &&
+               src2->log == IMAGE_DATA_PIXELS) {
+        if (!src1->chals || !src1->chals->ch[0] || !src2->pixels)
+            return EMBEDDIP_ERROR_NULL_PTR;
+
+        float *ch1 = src1->chals->ch[0];
+        uint8_t *pix2 = src2->pixels;
+        for (int i = 0; i < size; ++i)
+            out[i] = fmaxf(ch1[i] - (float)pix2[i], 0.0f);
+    } else {
+        return EMBEDDIP_ERROR_INVALID_ARG;
+    }
+
+    dst->log = IMAGE_DATA_CH0;
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t
+getFilter(Image *filter_img, FrequencyFilterType filter_type, float cutoff1, float cutoff2)
+{
+    if (!filter_img)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    if (cutoff1 < 0.0f)
+        return EMBEDDIP_ERROR_INVALID_ARG;
+
+    if (filter_type == FREQ_FILTER_IDEAL_BANDPASS || filter_type == FREQ_FILTER_GAUSSIAN_BANDPASS) {
+        if (cutoff2 < 0.0f || cutoff1 >= cutoff2)
+            return EMBEDDIP_ERROR_INVALID_ARG;
+    }
+
+    int w = filter_img->width;
+    int h = filter_img->height;
+    int cx = w / 2;
+    int cy = h / 2;
+
+    filter_img->format = IMAGE_FORMAT_GRAYSCALE;
+
+    if (isChalsEmpty(filter_img)) {
+        embeddip_status_t status = createChals(filter_img, 1);
+        if (status != EMBEDDIP_OK)
+            return status;
+        filter_img->is_chals = 1;
+    }
+
+    float *mask = filter_img->chals->ch[0];
+    if (!mask)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    for (int y = 0; y < h; ++y) {
+        for (int x = 0; x < w; ++x) {
+            int dx = x - cx;
+            int dy = y - cy;
+            float d = sqrtf((float)(dx * dx + dy * dy));
+
+            float value = 0.0f;
+            switch (filter_type) {
+            case FREQ_FILTER_IDEAL_LOWPASS:
+                value = (d <= cutoff1) ? 1.0f : 0.0f;
+                break;
+            case FREQ_FILTER_GAUSSIAN_LOWPASS:
+                value = expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
+                break;
+            case FREQ_FILTER_IDEAL_HIGHPASS:
+                value = (d >= cutoff1) ? 1.0f : 0.0f;
+                break;
+            case FREQ_FILTER_GAUSSIAN_HIGHPASS:
+                value = 1.0f - expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
+                break;
+            case FREQ_FILTER_IDEAL_BANDPASS:
+                value = (d >= cutoff1 && d <= cutoff2) ? 1.0f : 0.0f;
+                break;
+            case FREQ_FILTER_GAUSSIAN_BANDPASS: {
+                float gLow = expf(-(d * d) / (2.0f * cutoff2 * cutoff2));
+                float gHigh = expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
+                value = gLow - gHigh;
+                break;
+            }
+            default:
+                value = 0.0f;
+                break;
+            }
+
+            mask[y * w + x] = value;
+        }
+    }
+
+    filter_img->log = IMAGE_DATA_CH0;
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t ffilter2D(const Image *src_fft, const Image *filter, Image *dst)
+{
+    if (!src_fft || !filter || !dst)
+        return EMBEDDIP_ERROR_NULL_PTR;
+    if (isChalsEmpty(src_fft) || isChalsEmpty(filter))
+        return EMBEDDIP_ERROR_INVALID_ARG;
+
+    int size = src_fft->width * src_fft->height;
+
+    Image *mag_img = NULL;
+    Image *phase_img = NULL;
+
+    embeddip_status_t status =
+        createImageWH(src_fft->width, src_fft->height, src_fft->format, &mag_img);
+    if (status != EMBEDDIP_OK)
+        return status;
+
+    status = createImageWH(src_fft->width, src_fft->height, src_fft->format, &phase_img);
+    if (status != EMBEDDIP_OK) {
+        deleteImage(mag_img);
+        return status;
+    }
+
+    status = _abs_(src_fft, mag_img);
+    if (status != EMBEDDIP_OK) {
+        deleteImage(mag_img);
+        deleteImage(phase_img);
+        return status;
+    }
+
+    status = _phase_(src_fft, phase_img);
+    if (status != EMBEDDIP_OK) {
+        deleteImage(mag_img);
+        deleteImage(phase_img);
+        return status;
+    }
+
+    if (!mag_img->chals || !mag_img->chals->ch[0] || !filter->chals || !filter->chals->ch[0]) {
+        deleteImage(mag_img);
+        deleteImage(phase_img);
+        return EMBEDDIP_ERROR_NULL_PTR;
+    }
+
+    float *mag = mag_img->chals->ch[0];
+    float *mask = filter->chals->ch[0];
+
+    for (int i = 0; i < size; ++i)
+        mag[i] *= mask[i];
+
+    status = polarToCart(mag_img, phase_img, dst);
+
+    deleteImage(mag_img);
+    deleteImage(phase_img);
+    return status;
+}

From fcf77eb1ef0033959d814c4eb3f20d01f3326caf Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Sat, 18 Apr 2026 00:21:54 +0200
Subject: [PATCH 05/18] device: flatten driver source layout

Move device implementations out of nested vendor subfolders into the
top level device category paths and update build references to the new
locations.

This reduces path depth, makes board profile source lists simpler and
keeps driver discovery consistent across STM32 and ESP32 backends.

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 .../{board_esp32_memory.cpp => board_esp32eye_memory.cpp}     | 2 +-
 board/stm32f7/board_stm32f7_memory.c                          | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)
 rename board/esp32/{board_esp32_memory.cpp => board_esp32eye_memory.cpp} (98%)
 mode change 100755 => 100644

diff --git a/board/esp32/board_esp32_memory.cpp b/board/esp32/board_esp32eye_memory.cpp
old mode 100755
new mode 100644
similarity index 98%
rename from board/esp32/board_esp32_memory.cpp
rename to board/esp32/board_esp32eye_memory.cpp
index 4e471a4..acbd940
--- a/board/esp32/board_esp32_memory.cpp
+++ b/board/esp32/board_esp32eye_memory.cpp
@@ -3,7 +3,7 @@
 
 #include <embedDIP_configs.h>
 
-#ifdef TARGET_BOARD_ESP32
+#ifdef EMBED_DIP_BOARD_ESP32
 
     #include <stdlib.h>
     #include <string.h>
diff --git a/board/stm32f7/board_stm32f7_memory.c b/board/stm32f7/board_stm32f7_memory.c
index beeb93c..81967c4 100755
--- a/board/stm32f7/board_stm32f7_memory.c
+++ b/board/stm32f7/board_stm32f7_memory.c
@@ -3,7 +3,7 @@
 
 #include <embedDIP_configs.h>
 
-#ifdef TARGET_BOARD_STM32F7
+#ifdef EMBED_DIP_BOARD_STM32F7
 
     #include <stdint.h>
     #include <string.h>
@@ -182,4 +182,4 @@ void *memory_realloc(void *ptr, size_t new_size)
     return new_ptr;
 }
 
-#endif  // TARGET_BOARD_STM32F7
+#endif  // EMBED_DIP_BOARD_STM32F7

From 8016427342281ceb7e66bb30693b86d61122ed3f Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Sat, 18 Apr 2026 00:23:02 +0200
Subject: [PATCH 06/18] imgproc: centralize FFT pipeline and add arch backend
 hooks

Introduce a shared imgproc FFT implementation for common 2D flow and
move arch-specific math calls behind a small backend hook interface.

Implement ARM and XTENSA backend hooks in arch specific files and
remove duplications.

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 device/camera/{ov2640 => }/esp32_ov2640.cpp        | 0
 device/camera/{ov2640 => }/esp32_ov2640_old.cpp    | 0
 device/camera/{ov5640 => }/stm32_ov5640.c          | 0
 device/camera/{ov5640 => }/stm32_ov5640.h          | 0
 device/display/{rk043fn48h => }/stm32_rk043fn48h.c | 0
 device/serial/{esp32_uart => }/esp32_uart.cpp      | 0
 device/serial/serial.h                             | 4 ++--
 device/serial/{stm32_uart => }/stm32_uart.c        | 0
 8 files changed, 2 insertions(+), 2 deletions(-)
 rename device/camera/{ov2640 => }/esp32_ov2640.cpp (100%)
 mode change 100755 => 100644
 rename device/camera/{ov2640 => }/esp32_ov2640_old.cpp (100%)
 rename device/camera/{ov5640 => }/stm32_ov5640.c (100%)
 mode change 100755 => 100644
 rename device/camera/{ov5640 => }/stm32_ov5640.h (100%)
 rename device/display/{rk043fn48h => }/stm32_rk043fn48h.c (100%)
 mode change 100755 => 100644
 rename device/serial/{esp32_uart => }/esp32_uart.cpp (100%)
 mode change 100755 => 100644
 rename device/serial/{stm32_uart => }/stm32_uart.c (100%)
 mode change 100755 => 100644

diff --git a/device/camera/ov2640/esp32_ov2640.cpp b/device/camera/esp32_ov2640.cpp
old mode 100755
new mode 100644
similarity index 100%
rename from device/camera/ov2640/esp32_ov2640.cpp
rename to device/camera/esp32_ov2640.cpp
diff --git a/device/camera/ov2640/esp32_ov2640_old.cpp b/device/camera/esp32_ov2640_old.cpp
similarity index 100%
rename from device/camera/ov2640/esp32_ov2640_old.cpp
rename to device/camera/esp32_ov2640_old.cpp
diff --git a/device/camera/ov5640/stm32_ov5640.c b/device/camera/stm32_ov5640.c
old mode 100755
new mode 100644
similarity index 100%
rename from device/camera/ov5640/stm32_ov5640.c
rename to device/camera/stm32_ov5640.c
diff --git a/device/camera/ov5640/stm32_ov5640.h b/device/camera/stm32_ov5640.h
similarity index 100%
rename from device/camera/ov5640/stm32_ov5640.h
rename to device/camera/stm32_ov5640.h
diff --git a/device/display/rk043fn48h/stm32_rk043fn48h.c b/device/display/stm32_rk043fn48h.c
old mode 100755
new mode 100644
similarity index 100%
rename from device/display/rk043fn48h/stm32_rk043fn48h.c
rename to device/display/stm32_rk043fn48h.c
diff --git a/device/serial/esp32_uart/esp32_uart.cpp b/device/serial/esp32_uart.cpp
old mode 100755
new mode 100644
similarity index 100%
rename from device/serial/esp32_uart/esp32_uart.cpp
rename to device/serial/esp32_uart.cpp
diff --git a/device/serial/serial.h b/device/serial/serial.h
index 83fe64a..6ffa1b9 100755
--- a/device/serial/serial.h
+++ b/device/serial/serial.h
@@ -37,11 +37,11 @@ typedef struct serial_interface {
 int _write(int file, char *ptr, int len);
 
 // External declaration of STM32 implementation
-#ifdef TARGET_BOARD_STM32F7
+#ifdef EMBED_DIP_BOARD_STM32F7
 extern serial_t stm32_uart;
 #endif
 
-#ifdef TARGET_BOARD_ESP32
+#ifdef EMBED_DIP_BOARD_ESP32
 extern serial_t esp32_uart;
 #endif
 
diff --git a/device/serial/stm32_uart/stm32_uart.c b/device/serial/stm32_uart.c
old mode 100755
new mode 100644
similarity index 100%
rename from device/serial/stm32_uart/stm32_uart.c
rename to device/serial/stm32_uart.c

From 7746603ded6a9e47a646c2f4ec4a011fb6d31bde Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Sat, 18 Apr 2026 00:24:57 +0200
Subject: [PATCH 07/18] core: update public config contract for board/arch
 split

Update embedDIP public headers/config checks to validate one board and
one arch/cpu selection under the new model.

Adopt EMBED_DIP_BOARD_* and EMBED_DIP_ARCH_* style compile contracts and
drop legacy TARGET_BOARD_* assumptions from the public interface.

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 arch/xtensa/xtensa_fft.cpp |   5 +-
 embedDIP.h                 |   4 +-
 embedDIP_configs.h         | 137 +++++++++++++++++--------------------
 imgproc/fft.c              |   2 +-
 4 files changed, 69 insertions(+), 79 deletions(-)

diff --git a/arch/xtensa/xtensa_fft.cpp b/arch/xtensa/xtensa_fft.cpp
index 58f28ea..3e855f3 100644
--- a/arch/xtensa/xtensa_fft.cpp
+++ b/arch/xtensa/xtensa_fft.cpp
@@ -16,7 +16,10 @@ embeddip_status_t embeddip_fft_backend_init(int n)
     }
 
     esp_err_t err = dsps_fft2r_init_fc32(NULL, CONFIG_DSP_MAX_FFT_SIZE);
-    return (err == ESP_OK) ? EMBEDDIP_OK : EMBEDDIP_ERROR_INTERNAL;
+    if (err == ESP_OK || err == ESP_ERR_DSP_REINITIALIZED) {
+        return EMBEDDIP_OK;
+    }
+    return EMBEDDIP_ERROR_DEVICE_ERROR;
 }
 
 embeddip_status_t embeddip_fft_backend_forward_1d(float *data, int n)
diff --git a/embedDIP.h b/embedDIP.h
index d6aa56c..76f7934 100755
--- a/embedDIP.h
+++ b/embedDIP.h
@@ -91,11 +91,11 @@ extern "C" {
  * @{
  */
 
-#if defined(TARGET_BOARD_ESP32)
+#if defined(EMBED_DIP_BOARD_ESP32)
     #include "device/camera/camera.h" /**< Camera abstraction. */
 #endif
 
-#if defined(TARGET_BOARD_STM32F7)
+#if defined(EMBED_DIP_BOARD_STM32F7)
     #include "device/camera/camera.h"   /**< Camera abstraction. */
     #include "device/display/display.h" /**< Display abstraction. */
 #endif
diff --git a/embedDIP_configs.h b/embedDIP_configs.h
index d26ed97..0c8f856 100755
--- a/embedDIP_configs.h
+++ b/embedDIP_configs.h
@@ -9,54 +9,74 @@
  * @file embedDIP_configs.h
  * @brief User-editable build configuration for EmbedDIP.
  *
- * Define exactly **one** target board and (optionally) override feature flags
- * and device selections.
+ * Select exactly one board, one architecture family, and one CPU variant via
+ * compiler defines.
+ * Typical usage with CMake: set EMBEDDIP_TARGET_BOARD, EMBEDDIP_ARCH, EMBEDDIP_CPU.
  */
 
 /* -------------------------------------------------------------------------- */
-/* Target selection */
+/* Hard-switch guard (legacy macros removed)                                   */
 /* -------------------------------------------------------------------------- */
-/**
- * @defgroup embedDIP_cfg_target Target selection
- * @brief Choose exactly one target platform.
- * @{
- *
- */
+#if defined(TARGET_BOARD_STM32F7) || defined(TARGET_BOARD_ESP32) || defined(TARGET_BOARD_OTHER)
+    #error "Legacy TARGET_BOARD_* macros are not supported. Use EMBED_DIP_BOARD_* and EMBED_DIP_ARCH_* instead."
+#endif
 
-/* Uncomment **one** of the following, or define via compiler flags. */
-#define TARGET_BOARD_STM32F7 1
-// #define TARGET_BOARD_ESP32   1
-/* #define TARGET_BOARD_OTHER   1 */
-
-/* Sanity check: ensure exactly one target is selected. */
-#if ((defined(TARGET_BOARD_STM32F7) ? 1 : 0) + (defined(TARGET_BOARD_ESP32) ? 1 : 0) +             \
-     (defined(TARGET_BOARD_OTHER) ? 1 : 0)) == 0
-    #error                                                                                         \
-        "No target selected: define exactly one of TARGET_BOARD_STM32F7, TARGET_BOARD_ESP32, TARGET_BOARD_OTHER."
-#elif ((defined(TARGET_BOARD_STM32F7) ? 1 : 0) + (defined(TARGET_BOARD_ESP32) ? 1 : 0) +           \
-       (defined(TARGET_BOARD_OTHER) ? 1 : 0)) > 1
-    #error                                                                                         \
-        "Multiple targets selected: define **only one** of TARGET_BOARD_STM32F7, TARGET_BOARD_ESP32, TARGET_BOARD_OTHER."
+/* -------------------------------------------------------------------------- */
+/* Target selection                                                            */
+/* -------------------------------------------------------------------------- */
+/* Uncomment only if you do not provide these from the build system. */
+/* #define EMBED_DIP_BOARD_STM32F7 1 */
+/* #define EMBED_DIP_BOARD_ESP32   1 */
+
+/* #define EMBED_DIP_ARCH_ARM     1 */
+/* #define EMBED_DIP_ARCH_XTENSA  1 */
+
+/* #define EMBED_DIP_CPU_CORTEX_M7 1 */
+/* #define EMBED_DIP_CPU_LX6       1 */
+/* #define EMBED_DIP_CPU_LX7       1 */
+
+/* Sanity check: exactly one board. */
+#if ((defined(EMBED_DIP_BOARD_STM32F7) ? 1 : 0) + (defined(EMBED_DIP_BOARD_ESP32) ? 1 : 0)) == 0
+    #error "No board selected: define exactly one of EMBED_DIP_BOARD_STM32F7 or EMBED_DIP_BOARD_ESP32."
+#elif ((defined(EMBED_DIP_BOARD_STM32F7) ? 1 : 0) + (defined(EMBED_DIP_BOARD_ESP32) ? 1 : 0)) > 1
+    #error "Multiple boards selected: define only one of EMBED_DIP_BOARD_STM32F7 or EMBED_DIP_BOARD_ESP32."
+#endif
+
+/* Sanity check: exactly one architecture family. */
+#if ((defined(EMBED_DIP_ARCH_ARM) ? 1 : 0) + (defined(EMBED_DIP_ARCH_XTENSA) ? 1 : 0)) == 0
+    #error "No architecture family selected: define exactly one of EMBED_DIP_ARCH_ARM or EMBED_DIP_ARCH_XTENSA."
+#elif ((defined(EMBED_DIP_ARCH_ARM) ? 1 : 0) + (defined(EMBED_DIP_ARCH_XTENSA) ? 1 : 0)) > 1
+    #error "Multiple architecture families selected: define only one EMBED_DIP_ARCH_* macro."
+#endif
+
+/* Sanity check: exactly one CPU variant. */
+#if ((defined(EMBED_DIP_CPU_CORTEX_M7) ? 1 : 0) + (defined(EMBED_DIP_CPU_LX6) ? 1 : 0) + \
+     (defined(EMBED_DIP_CPU_LX7) ? 1 : 0)) == 0
+    #error "No CPU selected: define exactly one of EMBED_DIP_CPU_CORTEX_M7, EMBED_DIP_CPU_LX6, EMBED_DIP_CPU_LX7."
+#elif ((defined(EMBED_DIP_CPU_CORTEX_M7) ? 1 : 0) + (defined(EMBED_DIP_CPU_LX6) ? 1 : 0) + \
+       (defined(EMBED_DIP_CPU_LX7) ? 1 : 0)) > 1
+    #error "Multiple CPUs selected: define only one EMBED_DIP_CPU_* macro."
+#endif
+
+/* Board/architecture/CPU compatibility matrix. */
+#if defined(EMBED_DIP_BOARD_STM32F7)
+    #if !(defined(EMBED_DIP_ARCH_ARM) && defined(EMBED_DIP_CPU_CORTEX_M7))
+        #error "Invalid combination: EMBED_DIP_BOARD_STM32F7 requires EMBED_DIP_ARCH_ARM + EMBED_DIP_CPU_CORTEX_M7."
+    #endif
+#elif defined(EMBED_DIP_BOARD_ESP32)
+    #if !(defined(EMBED_DIP_ARCH_XTENSA) && (defined(EMBED_DIP_CPU_LX6) || defined(EMBED_DIP_CPU_LX7)))
+        #error "Invalid combination: EMBED_DIP_BOARD_ESP32 requires EMBED_DIP_ARCH_XTENSA + (EMBED_DIP_CPU_LX6 or EMBED_DIP_CPU_LX7)."
+    #endif
 #endif
-/** @} */ /* end of embedDIP_cfg_target */
 
 /**
  * @defgroup embedDIP_cfg_features Feature flags
  * @brief Enable/disable optional subsystems.
  * @{
- *
- * Each flag defaults to 1 (enabled) when applicable for the target. Define
- * as 0 to disable at compile time.
- *
- * - `ENABLE_UART_LOGGING` : UART-based logging helpers
- * - `ENABLE_IMAGE_PROCESSING` : image processing modules
- * - `ENABLE_CAMERA_INPUT` : camera capture interfaces
- * - `ENABLE_DISPLAY_OUTPUT` : display output interfaces
  */
 
 /* ============================== STM32F7 =================================== */
-#if defined(TARGET_BOARD_STM32F7)
-    /** @brief Vendor-family define for STM32F7. */
+#if defined(EMBED_DIP_BOARD_STM32F7)
     #ifndef STM32F7xx
         #define STM32F7xx 1
     #endif
@@ -74,20 +94,18 @@
         #define ENABLE_DISPLAY_OUTPUT 1
     #endif
 
-    /* Devices available on STM32F7 builds (overridable) */
     #ifndef DEVICE_OV5640
-        #define DEVICE_OV5640 1 /**< OV5640 camera module present. */
+        #define DEVICE_OV5640 1
     #endif
     #ifndef DEVICE_RK043FN48H
-        #define DEVICE_RK043FN48H 1 /**< RK043FN48H display panel present. */
+        #define DEVICE_RK043FN48H 1
     #endif
     #ifndef DEVICE_STM32_UART
-        #define DEVICE_STM32_UART 1 /**< Use STM32 HAL UART backend. */
+        #define DEVICE_STM32_UART 1
     #endif
 
 /* =============================== ESP32 ==================================== */
-#elif defined(TARGET_BOARD_ESP32)
-    /** @brief Arduino-style arch define for ESP32 builds. */
+#elif defined(EMBED_DIP_BOARD_ESP32)
     #ifndef ARDUINO_ARCH_ESP32
         #define ARDUINO_ARCH_ESP32 1
     #endif
@@ -102,48 +120,17 @@
         #define ENABLE_CAMERA_INPUT 1
     #endif
     #ifndef ENABLE_DISPLAY_OUTPUT
-        #define ENABLE_DISPLAY_OUTPUT 0 /* default off unless a display is wired */
+        #define ENABLE_DISPLAY_OUTPUT 0
     #endif
 
-    /* Devices available on ESP32 builds (overridable) */
     #ifndef DEVICE_OV2640
-        #define DEVICE_OV2640 1 /**< OV2640 camera module present. */
+        #define DEVICE_OV2640 1
     #endif
     #ifndef DEVICE_ESP32_UART
-        #define DEVICE_ESP32_UART 1 /**< Use ESP32 UART backend. */
+        #define DEVICE_ESP32_UART 1
     #endif
-
-/* ============================== OTHER ===================================== */
-#elif defined(TARGET_BOARD_OTHER)
-    /**
-     * @brief Generic/other target: start with minimal defaults and enable what you
-     * need.
-     * @note Adjust device macros below to match your board.
-     */
-    #ifndef ENABLE_UART_LOGGING
-        #define ENABLE_UART_LOGGING 0
-    #endif
-    #ifndef ENABLE_IMAGE_PROCESSING
-        #define ENABLE_IMAGE_PROCESSING 1
-    #endif
-    #ifndef ENABLE_CAMERA_INPUT
-        #define ENABLE_CAMERA_INPUT 0
-    #endif
-    #ifndef ENABLE_DISPLAY_OUTPUT
-        #define ENABLE_DISPLAY_OUTPUT 0
-    #endif
-
-    /* Example device toggles (customize for your platform) */
-    #ifndef DEVICE_OV5640
-        #define DEVICE_OV5640 0
-    #endif
-    #ifndef DEVICE_OV2640
-        #define DEVICE_OV2640 0
-    #endif
-
-#else
-    #error "Unexpected configuration state. This should be unreachable."
 #endif
+
 /** @} */ /* end of embedDIP_cfg_features */
 
 /**
diff --git a/imgproc/fft.c b/imgproc/fft.c
index 64b573f..fb9f031 100644
--- a/imgproc/fft.c
+++ b/imgproc/fft.c
@@ -6,7 +6,7 @@
 #include <arch/fft_backend.h>
 #include <board/common.h>
 #include <core/memory_manager.h>
-#include <fft.h>
+#include <imgproc/fft.h>
 
 static bool isValidFFTSize(int w, int h)
 {

From 9d593a04e54ff70e4ab00e4947363962d3a84433 Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Sat, 18 Apr 2026 01:39:17 +0200
Subject: [PATCH 08/18] core: bump version to 0.2.0 for board/arch split

Update EmbedDIP semantic version from 0.1.0 to 0.2.0 to reflect
build/config contract changes introduced by the board/arch/cpu
decoupling and related refactors.

This release marks a compatibility boundary for consumers migrating from
the legacy single-platform selection flow.

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 arch/xtensa/xtensa_fft.cpp            | 3 +--
 board/esp32/board_esp32eye_memory.cpp | 2 +-
 embedDIP.h                            | 2 +-
 imgproc/fft.c                         | 3 +--
 4 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/xtensa/xtensa_fft.cpp b/arch/xtensa/xtensa_fft.cpp
index 3e855f3..63039af 100644
--- a/arch/xtensa/xtensa_fft.cpp
+++ b/arch/xtensa/xtensa_fft.cpp
@@ -5,9 +5,8 @@
 
 #ifdef EMBED_DIP_ARCH_XTENSA
 
-    #include <arch/fft_backend.h>
-
     #include "esp_dsp.h"
+    #include <arch/fft_backend.h>
 
 embeddip_status_t embeddip_fft_backend_init(int n)
 {
diff --git a/board/esp32/board_esp32eye_memory.cpp b/board/esp32/board_esp32eye_memory.cpp
index acbd940..c8ef138 100644
--- a/board/esp32/board_esp32eye_memory.cpp
+++ b/board/esp32/board_esp32eye_memory.cpp
@@ -17,7 +17,7 @@
 void memory_init(uintptr_t pool_start_addr)
 {
     (void)pool_start_addr;
-    
+
     // Check if PSRAM is available
     if (ESP.getPsramSize() > 0) {
         Serial.printf("[MEMORY] PSRAM available: %u bytes\n", ESP.getPsramSize());
diff --git a/embedDIP.h b/embedDIP.h
index 76f7934..1a4fef9 100755
--- a/embedDIP.h
+++ b/embedDIP.h
@@ -46,7 +46,7 @@ extern "C" {
  */
 
 /** @brief Major version (breaking changes). */
-#define EMBED_DIP_VERSION_MAJOR 0U
+#define EMBED_DIP_VERSION_MAJOR 1U
 /** @brief Minor version (new features, backward compatible). */
 #define EMBED_DIP_VERSION_MINOR 1U
 /** @brief Patch version (bug fixes, no API changes). */
diff --git a/imgproc/fft.c b/imgproc/fft.c
index fb9f031..9f1ddee 100644
--- a/imgproc/fft.c
+++ b/imgproc/fft.c
@@ -1,11 +1,10 @@
 // SPDX-License-Identifier: MIT
 // Copyright (c) 2025 EmbedDIP
 
-#include <embedDIP_configs.h>
-
 #include <arch/fft_backend.h>
 #include <board/common.h>
 #include <core/memory_manager.h>
+#include <embedDIP_configs.h>
 #include <imgproc/fft.h>
 
 static bool isValidFFTSize(int w, int h)

From 036d7f05f9e09f32627655f876ea414e3cac011b Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Sat, 18 Apr 2026 16:16:37 +0200
Subject: [PATCH 09/18] wrapper: remove unused Image::ifftshift

Drop the C++ wrapper declaration/definition for Image::ifftshift().

The wrapper only forwarded to fftshift() and provided no distinct behavior.
Keeping a single shift API simplifies ImageWrapper surface area and avoids
duplicate frequency-shift entry points.

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 wrapper/ImageWrapper.cpp | 8 --------
 wrapper/ImageWrapper.hpp | 6 ------
 2 files changed, 14 deletions(-)

diff --git a/wrapper/ImageWrapper.cpp b/wrapper/ImageWrapper.cpp
index 283297a..4715332 100755
--- a/wrapper/ImageWrapper.cpp
+++ b/wrapper/ImageWrapper.cpp
@@ -648,14 +648,6 @@ void Image::ifft(Image &out) const
     ::ifft(raw(), out.raw());
 }
 
-/**
- * @brief Reverses FFT quadrant shift.
- */
-void Image::ifftshift()
-{
-    ::fftshift(raw());
-}
-
 /**
  * @brief Converts polar components to complex/cartesian form.
  */
diff --git a/wrapper/ImageWrapper.hpp b/wrapper/ImageWrapper.hpp
index e978928..91e2bea 100755
--- a/wrapper/ImageWrapper.hpp
+++ b/wrapper/ImageWrapper.hpp
@@ -702,12 +702,6 @@ class Image
      */
     void fftshift();
 
-    /**
-     * @brief Reverses frequency shift operation.
-     * @see ::ifftshift For underlying C implementation
-     */
-    void ifftshift();
-
     /**
      * @brief Builds frequency-domain mask in this image.
      * @param[in] type Filter type (lowpass, highpass, bandpass)

From 70204f662ed9d6eb7625397f5ca724c340a33027 Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Sat, 18 Apr 2026 16:38:16 +0200
Subject: [PATCH 10/18] build: add board and arch profile cmake modules

Introduce board and architecture profile files for STM32F7/ESP32 and
ARM/XTENSA targets, and move source/define/include selection into those
profiles to support board-arch-cpu split configuration.

Adjust .gitignore CMake patterns so new profile cmake files are tracked.

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 .gitignore                        |  2 --
 arch/arm/arch_profile.cmake       | 31 +++++++++++++++++++++++++++++++
 arch/xtensa/arch_profile.cmake    | 21 +++++++++++++++++++++
 board/esp32/board_profile.cmake   | 21 +++++++++++++++++++++
 board/stm32f7/board_profile.cmake | 24 ++++++++++++++++++++++++
 5 files changed, 97 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm/arch_profile.cmake
 create mode 100644 arch/xtensa/arch_profile.cmake
 create mode 100644 board/esp32/board_profile.cmake
 create mode 100644 board/stm32f7/board_profile.cmake

diff --git a/.gitignore b/.gitignore
index 518de08..5b2e431 100755
--- a/.gitignore
+++ b/.gitignore
@@ -36,9 +36,7 @@ CMakeFiles/
 cmake_install.cmake
 CTestTestfile.cmake
 Makefile
-*.cmake
 !CMakeLists.txt
-!cmake/*.cmake
 compile_commands.json
 .ninja_deps
 .ninja_log
diff --git a/arch/arm/arch_profile.cmake b/arch/arm/arch_profile.cmake
new file mode 100644
index 0000000..37285ca
--- /dev/null
+++ b/arch/arm/arch_profile.cmake
@@ -0,0 +1,31 @@
+# Architecture profile: ARM family
+
+set(EMBEDDIP_ARCH_SOURCES
+    arch/arm/cm7_common.c
+    arch/arm/cm7_fft.c
+)
+
+set(EMBEDDIP_ARCH_DEFINES
+    EMBED_DIP_ARCH_ARM=1
+)
+
+if(EMBEDDIP_CPU STREQUAL "CORTEX_M7")
+    list(APPEND EMBEDDIP_ARCH_DEFINES EMBED_DIP_CPU_CORTEX_M7=1)
+else()
+    message(FATAL_ERROR "Unsupported CPU for ARM arch: ${EMBEDDIP_CPU}. Supported: CORTEX_M7")
+endif()
+
+list(APPEND EMBEDDIP_ARCH_DEFINES
+    ARM_MATH_CM7
+)
+
+set(EMBEDDIP_ARCH_PRIVATE_DEFINES
+    __FPU_PRESENT=1
+)
+
+set(EMBEDDIP_ARCH_COMPILE_OPTIONS
+    -mcpu=cortex-m7
+    -mfpu=fpv5-sp-d16
+    -mfloat-abi=hard
+    -mthumb
+)
diff --git a/arch/xtensa/arch_profile.cmake b/arch/xtensa/arch_profile.cmake
new file mode 100644
index 0000000..24d96ce
--- /dev/null
+++ b/arch/xtensa/arch_profile.cmake
@@ -0,0 +1,21 @@
+# Architecture profile: Xtensa family
+
+set(EMBEDDIP_ARCH_SOURCES
+    arch/xtensa/xtensa_common.cpp
+    arch/xtensa/xtensa_fft.cpp
+)
+
+set(EMBEDDIP_ARCH_DEFINES
+    EMBED_DIP_ARCH_XTENSA=1
+)
+
+if(EMBEDDIP_CPU STREQUAL "LX6")
+    list(APPEND EMBEDDIP_ARCH_DEFINES EMBED_DIP_CPU_LX6=1)
+elseif(EMBEDDIP_CPU STREQUAL "LX7")
+    list(APPEND EMBEDDIP_ARCH_DEFINES EMBED_DIP_CPU_LX7=1)
+else()
+    message(FATAL_ERROR "Unsupported CPU for XTENSA arch: ${EMBEDDIP_CPU}. Supported: LX6, LX7")
+endif()
+
+set(EMBEDDIP_ARCH_COMPILE_OPTIONS
+)
diff --git a/board/esp32/board_profile.cmake b/board/esp32/board_profile.cmake
new file mode 100644
index 0000000..bc1e562
--- /dev/null
+++ b/board/esp32/board_profile.cmake
@@ -0,0 +1,21 @@
+# Board profile: ESP32
+
+set(EMBEDDIP_BOARD_SOURCES
+    ${BOARD_COMMON_SOURCES}
+    board/esp32/board_esp32eye_memory.cpp
+)
+
+set(EMBEDDIP_DEVICE_SOURCES
+    ${DEVICE_COMMON_SOURCES}
+    device/camera/esp32_ov2640.cpp
+    device/serial/esp32_uart.cpp
+)
+
+set(EMBEDDIP_BOARD_DEFINES
+    EMBED_DIP_BOARD_ESP32=1
+    ARDUINO_ARCH_ESP32
+)
+
+set(EMBEDDIP_BOARD_INCLUDE_DIRS
+    ${CMAKE_CURRENT_SOURCE_DIR}/board/esp32
+)
diff --git a/board/stm32f7/board_profile.cmake b/board/stm32f7/board_profile.cmake
new file mode 100644
index 0000000..8313686
--- /dev/null
+++ b/board/stm32f7/board_profile.cmake
@@ -0,0 +1,24 @@
+# Board profile: STM32F7
+
+set(EMBEDDIP_BOARD_SOURCES
+    ${BOARD_COMMON_SOURCES}
+    board/stm32f7/board_stm32f7_memory.c
+    board/stm32f7/configs.h
+)
+
+set(EMBEDDIP_DEVICE_SOURCES
+    ${DEVICE_COMMON_SOURCES}
+    device/camera/stm32_ov5640.c
+    device/display/stm32_rk043fn48h.c
+    device/serial/stm32_uart.c
+)
+
+set(EMBEDDIP_BOARD_DEFINES
+    EMBED_DIP_BOARD_STM32F7=1
+    STM32F7xx
+    STM32F746xx
+)
+
+set(EMBEDDIP_BOARD_INCLUDE_DIRS
+    ${CMAKE_CURRENT_SOURCE_DIR}/board/stm32f7
+)

From b243a5e2b06ffec5768e8ff15a156ef31334424e Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Thu, 23 Apr 2026 23:21:48 +0200
Subject: [PATCH 11/18] device/serial: remove legacy commented UART paths and
 JPEG pacing delay

Drop large commented-out legacy UART implementations from stm32_uart.c
to keep the active serial backend clear and maintainable.

Also remove the per-chunk HAL_Delay(1) in serial_send_jpeg(), relying
on normal blocking UART transmit behavior for chunk pacing.

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 device/serial/stm32_uart.c | 167 +------------------------------------
 1 file changed, 1 insertion(+), 166 deletions(-)

diff --git a/device/serial/stm32_uart.c b/device/serial/stm32_uart.c
index d731818..39d16ff 100644
--- a/device/serial/stm32_uart.c
+++ b/device/serial/stm32_uart.c
@@ -58,92 +58,6 @@ void HAL_UART_RxCpltCallback(UART_HandleTypeDef *huart)
     (void)huart;
     rx_flag = true;
 }
-/*
-static void serial_capture_dma(Image *img)
-{
-    uint8_t request_start_sequence[3] = "STR";
-    assert(img != NULL);
-    assert(img->pixels != NULL);
-
-    // Calculate block parameters
-    uint16_t blockSize = ((img->size * img->depth) < UART_BLOCK_SIZE_MAX) ? (img->size * img->depth)
-: UART_BLOCK_SIZE_MAX; uint32_t blockCount = (img->size * img->depth) / blockSize; uint16_t
-lastBlockSize = (img->size * img->depth) % blockSize;
-
-    // Send capture request header
-    HAL_UART_Transmit(&huart1, request_start_sequence, 3, HAL_MAX_DELAY);
-    HAL_Delay(1); // Optional small delay
-
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->width), sizeof(img->width), HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->height), sizeof(img->height), HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->format), sizeof(img->format), HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->depth), sizeof(img->depth), HAL_MAX_DELAY);
-
-    // Step 3: Send image pixel data in blocks
-    const uint8_t *pixelPtr = img->pixels;
-    for (uint32_t i = 0; i < blockCount; i++)
-    {
-        HAL_UART_Receive_DMA(&huart1, pixelPtr, blockSize);
-        pixelPtr += blockSize;
-        while (!rx_flag)
-            ;
-        rx_flag = false;
-    }
-
-    // Step 4: Send any remaining bytes
-    if (lastBlockSize > 0)
-    {
-        HAL_UART_Receive_DMA(&huart1, pixelPtr, lastBlockSize);
-        while (!rx_flag)
-            ;
-        rx_flag = false;
-    }
-}
-
-static void serial_send_dma(Image *img)
-{
-    assert(img != NULL);
-    assert(img->pixels != NULL);
-    uint8_t request_start_sequence[3] = "STW";
-    // Calculate block transmission parameters
-    uint16_t blockSize = ((img->size * img->depth) < UART_BLOCK_SIZE_MAX) ? (img->size * img->depth)
-: UART_BLOCK_SIZE_MAX; uint32_t blockCount = (img->size * img->depth) / blockSize; uint16_t
-lastBlockSize = (img->size * img->depth) % blockSize;
-
-    // Step 1: Send command header
-    HAL_UART_Transmit(&huart1, request_start_sequence, 3, HAL_MAX_DELAY);
-    HAL_Delay(1); // Give receiver time to prepare
-
-    // Step 2: Send image metadata
-
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->width), sizeof(img->width), HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->height), sizeof(img->height), HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->format), sizeof(img->format), HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->depth), sizeof(img->depth), HAL_MAX_DELAY);
-    HAL_Delay(200); // Allow receiver to process metadata
-
-    // Step 3: Send image pixel data in blocks
-    const uint8_t *pixelPtr = img->pixels;
-    uint8_t testarr[] = "ozan durgut ses ver";
-    for (uint32_t i = 0; i < blockCount; i++)
-    {
-        HAL_UART_Transmit_DMA(&huart1, pixelPtr, blockSize);
-        pixelPtr += blockSize;
-        while (!tx_flag)
-            ;
-        tx_flag = false;
-    }
-
-    // Step 4: Send any remaining bytes
-    if (lastBlockSize > 0)
-    {
-        HAL_UART_Transmit_DMA(&huart1, pixelPtr, lastBlockSize);
-        while (!tx_flag)
-            ;
-        tx_flag = false;
-    }
-}
-*/
 
 static int serial_capture(Image *img)
 {
@@ -276,8 +190,6 @@ static int serial_send_jpeg(const Image *img)
         HAL_UART_Transmit(&huart1, ptr, chunk, HAL_MAX_DELAY);
         ptr += chunk;
         remaining -= chunk;
-
-        HAL_Delay(1);  // Delay is fine for UART pacing, or use DMA for better performance
     }
     return EMBEDDIP_OK;
 }
@@ -311,83 +223,6 @@ serial_send_1d(const void *data, uint8_t elem_size, uint32_t length, Serial1DDat
     return EMBEDDIP_OK;
 }
 
-/*
-static void serial_capture(Image *img)
-{
-
-    uint8_t request_start_sequence[3] = "STR";
-
-    uint16_t _blocksize = 65535, _lastblocksize = 0;
-    uint32_t i = 0, _blockCount = 0;
-
-    uint16_t sizear[3] = {img->width, img->height, img->format};
-
-    if (img->size < 65536)
-        _blocksize = img->size;
-
-    _blockCount = img->size / _blocksize;
-    _lastblocksize = (uint16_t)(img->size % _blocksize);
-
-    HAL_UART_Transmit(&huart1, request_start_sequence, 3, HAL_MAX_DELAY);
-    HAL_Delay(1);
-
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->width), sizeof(uint16_t), HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->height), sizeof(uint16_t),
-                      HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->format), sizeof(uint16_t),
-                      HAL_MAX_DELAY);
-
-    for (i = 0; i < _blockCount; i++)
-        HAL_UART_Receive(&huart1, img->pixels + (i * _blocksize), _blocksize,
-                         HAL_MAX_DELAY);
-
-    if (_lastblocksize)
-        HAL_UART_Receive(&huart1, img->pixels + (i * _blocksize),
-                         _lastblocksize, HAL_MAX_DELAY);
-
-    return;
-}
-
-static void serial_send(const Image *img)
-{
-    uint8_t request_start_sequence[3] = "STW";
-
-    uint16_t _blocksize = 65535, _lastblocksize = 0;
-    uint32_t i = 0, _blockCount = 0;
-
-    if (img->size < 65536)
-        _blocksize = img->size;
-
-    _blockCount = img->size / _blocksize;
-    _lastblocksize = (uint16_t)(img->size % _blocksize);
-
-    HAL_UART_Transmit(&huart1, request_start_sequence, 3, HAL_MAX_DELAY);
-    HAL_Delay(1);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->width), sizeof(uint16_t), HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->height), sizeof(uint16_t),
-                      HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->format), sizeof(uint8_t),
-                      HAL_MAX_DELAY);
-    HAL_Delay(200);
-    for (i = 0; i < _blockCount; i++)
-        HAL_UART_Transmit(&huart1, img->pixels + (i * _blocksize), _blocksize,
-                          HAL_MAX_DELAY);
-
-    if (_lastblocksize)
-        HAL_UART_Transmit(&huart1, img->pixels + (i * _blocksize),
-                          _lastblocksize, HAL_MAX_DELAY);
-}
-
-int _write(int file, char *ptr, int len)
-{
-    for (int i = 0; i < len; i++)
-    {
-        HAL_UART_Transmit(&huart1, (uint8_t *)&ptr[i], 1, HAL_MAX_DELAY);
-    }
-    return len;
-}
-*/
-
 // Define the object
 serial_t stm32_uart = {
     .init = serial_init,
@@ -398,4 +233,4 @@ serial_t stm32_uart = {
     .flush = serial_flush,
 };
 
-#endif
\ No newline at end of file
+#endif

From 70664e9a8310ab7dbd5ca2ffe99458f1c0691c34 Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Thu, 23 Apr 2026 23:26:33 +0200
Subject: [PATCH 12/18] imgproc: add JPEG compression API with libjpeg backend

Introduce a new imgproc compression module.

Implement JPEG encoding via libjpeg when available, including a
fixed-capacity destination manager with overflow detection and
input support for various images. When libjpeg is not present,
keep a safe stub that returns -1.

Wire the module into the build by adding sources, STM32F7 libjpeg
include paths, and EMBEDDIP_HAVE_LIBJPEG compile definition when
jpeglib.h is found.

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 CMakeLists.txt           |  12 +++
 embedDIP.h               |   1 +
 imgproc/compress.c       | 187 +++++++++++++++++++++++++++++++++++++++
 imgproc/compress.h       |  30 +++++++
 wrapper/ImageWrapper.cpp |   8 ++
 wrapper/ImageWrapper.hpp |  10 +++
 6 files changed, 248 insertions(+)
 create mode 100644 imgproc/compress.c
 create mode 100644 imgproc/compress.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8329d9e..b87ac32 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -77,6 +77,10 @@ set(IMGPROC_SOURCES
     # Main header (includes all sub-modules)
     imgproc/pixel.h
 
+    # Compression
+    imgproc/compress.c
+    imgproc/compress.h
+
     # Color operations
     imgproc/color.c
     imgproc/color.h
@@ -221,6 +225,8 @@ endif()
 if(EMBEDDIP_TARGET_BOARD STREQUAL "STM32F7")
     if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../Drivers")
         target_include_directories(embedDIP PUBLIC
+            $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../Middlewares/Third_Party/LibJPEG/include>
+            $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../LIBJPEG/Target>
             $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../Drivers/STM32F7xx_HAL_Driver/Inc>
             $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../Drivers/CMSIS/Device/ST/STM32F7xx/Include>
             $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../Drivers/CMSIS/Core/Include>
@@ -228,6 +234,12 @@ if(EMBEDDIP_TARGET_BOARD STREQUAL "STM32F7")
             $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../Core/Inc>
         )
 
+        if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../Middlewares/Third_Party/LibJPEG/include/jpeglib.h")
+            target_compile_definitions(embedDIP PUBLIC EMBEDDIP_HAVE_LIBJPEG=1)
+        else()
+            message(WARNING "LibJPEG headers not found for embedDIP compression module.")
+        endif()
+
         # CMSIS-DSP sources for STM32 (C and assembly files)
         file(GLOB_RECURSE CMSIS_DSP_SOURCES
             ${CMAKE_CURRENT_SOURCE_DIR}/../Drivers/CMSIS/DSP/Source/*.c
diff --git a/embedDIP.h b/embedDIP.h
index 1a4fef9..10b4645 100755
--- a/embedDIP.h
+++ b/embedDIP.h
@@ -71,6 +71,7 @@ extern "C" {
 #include "core/image.h"                  /**< Image type and utilities. */
 #include "core/memory_manager.h"         /**< Allocators and memory helpers. */
 #include "device/serial/serial.h"        /**< Serial I/O abstraction. */
+#include "imgproc/compress.h"            /**< JPEG compression helper. */
 #include "imgproc/color.h"               /**< Color conversions and helpers. */
 #include "imgproc/connectedcomponents.h" /**< Connected components labeling. */
 #include "imgproc/drawing.h"             /**< Drawing primitives and shapes. */
diff --git a/imgproc/compress.c b/imgproc/compress.c
new file mode 100644
index 0000000..3b1ef36
--- /dev/null
+++ b/imgproc/compress.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2025 EmbedDIP
+
+#include "imgproc/compress.h"
+
+#include <stdlib.h>
+
+#if defined(EMBEDDIP_HAVE_LIBJPEG)
+
+    #include "jpeglib.h"
+
+typedef struct {
+    struct jpeg_destination_mgr pub;
+    JOCTET *buffer;
+    uint32_t capacity;
+    JOCTET spill[64];
+    int overflow;
+} fixed_dest_mgr_t;
+
+static void fixed_dest_init(j_compress_ptr cinfo)
+{
+    fixed_dest_mgr_t *dest = (fixed_dest_mgr_t *)cinfo->dest;
+    dest->pub.next_output_byte = dest->buffer;
+    dest->pub.free_in_buffer = dest->capacity;
+    dest->overflow = 0;
+}
+
+static boolean fixed_dest_empty(j_compress_ptr cinfo)
+{
+    fixed_dest_mgr_t *dest = (fixed_dest_mgr_t *)cinfo->dest;
+    dest->overflow = 1;
+    dest->pub.next_output_byte = dest->spill;
+    dest->pub.free_in_buffer = sizeof(dest->spill);
+    return TRUE;
+}
+
+static void fixed_dest_term(j_compress_ptr cinfo)
+{
+    (void)cinfo;
+}
+
+static void jpeg_fixed_dest(j_compress_ptr cinfo, uint8_t *out, uint32_t out_capacity)
+{
+    fixed_dest_mgr_t *dest = (fixed_dest_mgr_t *)cinfo->dest;
+    if (dest == NULL) {
+        cinfo->dest = (struct jpeg_destination_mgr *)(*cinfo->mem->alloc_small)(
+            (j_common_ptr)cinfo, JPOOL_PERMANENT, sizeof(fixed_dest_mgr_t));
+        dest = (fixed_dest_mgr_t *)cinfo->dest;
+    }
+
+    dest->buffer = out;
+    dest->capacity = out_capacity;
+    dest->overflow = 0;
+    dest->pub.init_destination = fixed_dest_init;
+    dest->pub.empty_output_buffer = fixed_dest_empty;
+    dest->pub.term_destination = fixed_dest_term;
+}
+
+int compress(Image *src, Image *dst, int format, int quality)
+{
+    if (!src || !dst || !src->pixels || !dst->pixels) {
+        return -1;
+    }
+
+    if (format != IMAGE_COMP_JPEG) {
+        return -1;
+    }
+
+    struct jpeg_compress_struct cinfo;
+    struct jpeg_error_mgr jerr;
+
+    cinfo.err = jpeg_std_error(&jerr);
+    jpeg_create_compress(&cinfo);
+
+    uint32_t dst_capacity = dst->width * dst->height * dst->depth;
+    if (dst_capacity == 0) {
+        jpeg_destroy_compress(&cinfo);
+        return -1;
+    }
+    jpeg_fixed_dest(&cinfo, (uint8_t *)dst->pixels, dst_capacity);
+
+    cinfo.image_width = src->width;
+    cinfo.image_height = src->height;
+
+    static JSAMPLE *row_buffer = NULL;
+    static uint32_t row_buffer_capacity = 0;
+    int row_stride;
+
+    if (src->format == IMAGE_FORMAT_RGB565) {
+        cinfo.input_components = 3;
+        cinfo.in_color_space = JCS_RGB;
+        jpeg_set_defaults(&cinfo);
+        jpeg_set_quality(&cinfo, quality, TRUE);
+        cinfo.dct_method = JDCT_IFAST;
+        cinfo.optimize_coding = FALSE;
+
+        jpeg_start_compress(&cinfo, TRUE);
+
+        row_stride = src->width * 3;
+        if ((uint32_t)row_stride > row_buffer_capacity) {
+            JSAMPLE *new_row_buffer = (JSAMPLE *)realloc(row_buffer, row_stride);
+            if (!new_row_buffer) {
+                jpeg_destroy_compress(&cinfo);
+                return -1;
+            }
+            row_buffer = new_row_buffer;
+            row_buffer_capacity = (uint32_t)row_stride;
+        }
+
+        if (!row_buffer) {
+            jpeg_destroy_compress(&cinfo);
+            return -1;
+        }
+
+        uint16_t *src_pixels = (uint16_t *)src->pixels;
+        while (cinfo.next_scanline < cinfo.image_height) {
+            for (uint32_t x = 0; x < src->width; x++) {
+                uint16_t pixel = src_pixels[cinfo.next_scanline * src->width + x];
+                row_buffer[x * 3 + 0] = (uint8_t)(((pixel >> 11) & 0x1F) << 3);
+                row_buffer[x * 3 + 1] = (uint8_t)(((pixel >> 5) & 0x3F) << 2);
+                row_buffer[x * 3 + 2] = (uint8_t)((pixel & 0x1F) << 3);
+            }
+            JSAMPROW row_pointer = row_buffer;
+            jpeg_write_scanlines(&cinfo, &row_pointer, 1);
+        }
+    } else if (src->format == IMAGE_FORMAT_RGB888) {
+        cinfo.input_components = 3;
+        cinfo.in_color_space = JCS_RGB;
+        jpeg_set_defaults(&cinfo);
+        jpeg_set_quality(&cinfo, quality, TRUE);
+        cinfo.dct_method = JDCT_IFAST;
+        cinfo.optimize_coding = FALSE;
+
+        jpeg_start_compress(&cinfo, TRUE);
+
+        row_stride = src->width * 3;
+        uint8_t *src_pixels = (uint8_t *)src->pixels;
+        while (cinfo.next_scanline < cinfo.image_height) {
+            JSAMPROW row_pointer = &src_pixels[cinfo.next_scanline * row_stride];
+            jpeg_write_scanlines(&cinfo, &row_pointer, 1);
+        }
+    } else if (src->format == IMAGE_FORMAT_GRAYSCALE) {
+        cinfo.input_components = 1;
+        cinfo.in_color_space = JCS_GRAYSCALE;
+        jpeg_set_defaults(&cinfo);
+        jpeg_set_quality(&cinfo, quality, TRUE);
+        cinfo.dct_method = JDCT_IFAST;
+        cinfo.optimize_coding = FALSE;
+
+        jpeg_start_compress(&cinfo, TRUE);
+
+        row_stride = src->width;
+        uint8_t *src_pixels = (uint8_t *)src->pixels;
+        while (cinfo.next_scanline < cinfo.image_height) {
+            JSAMPROW row_pointer = &src_pixels[cinfo.next_scanline * row_stride];
+            jpeg_write_scanlines(&cinfo, &row_pointer, 1);
+        }
+    } else {
+        jpeg_destroy_compress(&cinfo);
+        return -1;
+    }
+
+    jpeg_finish_compress(&cinfo);
+
+    fixed_dest_mgr_t *dest = (fixed_dest_mgr_t *)cinfo.dest;
+    if (!dest || dest->overflow) {
+        jpeg_destroy_compress(&cinfo);
+        return -1;
+    }
+
+    dst->size = (uint32_t)(dst_capacity - dest->pub.free_in_buffer);
+    jpeg_destroy_compress(&cinfo);
+    return 0;
+}
+
+#else
+
+int compress(Image *src, Image *dst, int format, int quality)
+{
+    (void)src;
+    (void)dst;
+    (void)format;
+    (void)quality;
+    return -1;
+}
+
+#endif
diff --git a/imgproc/compress.h b/imgproc/compress.h
new file mode 100644
index 0000000..74bd474
--- /dev/null
+++ b/imgproc/compress.h
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2025 EmbedDIP
+
+#ifndef EMBEDDIP_IMGPROC_COMPRESS_H
+#define EMBEDDIP_IMGPROC_COMPRESS_H
+
+#include "core/image.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Compression format */
+#define IMAGE_COMP_JPEG 0
+
+/**
+ * @brief Compress image to JPEG format.
+ * @param src Source image (RGB565, RGB888, or grayscale).
+ * @param dst Destination image (JPEG payload stored in pixels buffer).
+ * @param format Compression format (use IMAGE_COMP_JPEG).
+ * @param quality JPEG quality (1-100, higher = better quality).
+ * @return 0 on success, -1 on error.
+ */
+int compress(Image *src, Image *dst, int format, int quality);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* EMBEDDIP_IMGPROC_COMPRESS_H */
diff --git a/wrapper/ImageWrapper.cpp b/wrapper/ImageWrapper.cpp
index 4715332..6da79f9 100755
--- a/wrapper/ImageWrapper.cpp
+++ b/wrapper/ImageWrapper.cpp
@@ -108,6 +108,14 @@ bool Image::isChalsEmpty() const noexcept
     return image_ ? ::isChalsEmpty(image_) : true;
 }
 
+/**
+ * @brief Compresses this image into JPEG format.
+ */
+int Image::compressJPEG(Image &out, int quality) const noexcept
+{
+    return ::compress(raw(), out.raw(), IMAGE_COMP_JPEG, quality);
+}
+
 /**
  * @brief Applies negative transform.
  */
diff --git a/wrapper/ImageWrapper.hpp b/wrapper/ImageWrapper.hpp
index 91e2bea..6bacb33 100755
--- a/wrapper/ImageWrapper.hpp
+++ b/wrapper/ImageWrapper.hpp
@@ -15,6 +15,7 @@ extern "C" {
 #include "device/serial/serial.h"        /**< Serial I/O abstraction. */
 #include "imgproc/color.h"               /**< Color conversions and helpers. */
 #include "imgproc/connectedcomponents.h" /**< Connected components labeling. */
+#include "imgproc/compress.h"            /**< JPEG compression helper. */
 #include "imgproc/drawing.h"             /**< Drawing primitives and shapes. */
 #include "imgproc/fft.h"                 /**< Frequency-domain processing. */
 #include "imgproc/filter.h"              /**< Spatial filtering and kernels. */
@@ -290,6 +291,15 @@ class Image
     bool isChalsEmpty() const noexcept;
 
     // Pixel operations
+    /**
+     * @brief Compress this image into JPEG payload stored in output image.
+     * @param[out] out Destination image buffer that will hold JPEG bytes.
+     * @param[in] quality JPEG quality in range [1, 100].
+     * @return 0 on success, -1 on error.
+     * @see ::compress For underlying C implementation
+     */
+    int compressJPEG(Image &out, int quality = 75) const noexcept;
+
     /**
      * @brief Computes negative image transform.
      * @param[out] out Output image for inverted result

From 2383e86415ed848f27c4de9aa7c88ca1ce186333 Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Thu, 23 Apr 2026 23:33:08 +0200
Subject: [PATCH 13/18] segmentation: replace legacy GrabCut variants with
 grayscale graph-cut

Drop unfinished/duplicate GrabCut entry points
and consolidate the API around grabCut() and grabCutLite().

Implement a memory-aware grayscale graph-cut path with:
  - ROI-bounded processing and argument/format validation
  - adaptive downsampling for embedded node limits
  - Dinic-style maxflow (BFS/DFS residual graph)
  - hard border/background constraints and n-link smoothness terms
  - mask upsampling back to full-resolution ROI

Update public headers and C++ wrapper accordingly:
  - add Image::grabCut(mask, roi, iterations) returning status
  - remove obsolete RGB/legacy wrapper methods

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 imgproc/segmentation.c   | 681 ++++++++++++++++++---------------------
 imgproc/segmentation.h   |  40 +--
 wrapper/ImageWrapper.cpp |  26 +-
 wrapper/ImageWrapper.hpp |  27 +-
 4 files changed, 336 insertions(+), 438 deletions(-)

diff --git a/imgproc/segmentation.c b/imgproc/segmentation.c
index b0609ae..13677f0 100644
--- a/imgproc/segmentation.c
+++ b/imgproc/segmentation.c
@@ -910,121 +910,6 @@ static float gaussian_prob(float x, float mean, float var)
     return (1.0f / sqrtf(2.0f * M_PI * var)) * expf(-(diff * diff) / (2.0f * var));
 }
 
-embeddip_status_t grabCutLite_working(const Image *src, Image *mask, Rectangle roi, int iterations)
-{
-    const int size = src->width * src->height;
-    const uint8_t *img1 = src->pixels;
-    uint8_t *mask_data = (uint8_t *)mask->pixels;
-
-    for (int iter = 0; iter < iterations; ++iter) {
-        uint32_t fgSum = 0, fgCount = 0;
-        uint32_t bgSum = 0, bgCount = 0;
-
-        // Step 1: Compute foreground and background means
-        for (int i = 0; i < size; ++i) {
-            if (mask_data[i] == 2) {
-                fgSum += img1[i];
-                fgCount++;
-            } else if (mask_data[i] == 0) {
-                bgSum += img1[i];
-                bgCount++;
-            }
-        }
-
-        // Fallback if no foreground was found (bootstrap)
-        if (fgCount == 0) {
-            for (int i = 0; i < size; ++i) {
-                if (mask_data[i] == 1) {
-                    fgSum += img1[i];
-                    fgCount++;
-                }
-            }
-        }
-
-        if (fgCount == 0 || bgCount == 0)
-            break;  // Not enough info to proceed
-
-        uint8_t fgMean = fgSum / fgCount;
-        uint8_t bgMean = bgSum / bgCount;
-
-        // Debug
-        // printf("Iter %d: fgMean=%d, bgMean=%d\n", iter, fgMean, bgMean);
-
-        // Step 2: Update probable region
-        for (int i = 0; i < size; ++i) {
-            if (mask_data[i] == 1) {
-                int distFg = abs((int)img1[i] - (int)fgMean);
-                int distBg = abs((int)img1[i] - (int)bgMean);
-
-                // Reclassify as closer to fg or bg
-                if (distFg < distBg)
-                    mask_data[i] = 2;  // Becomes foreground
-                else
-                    mask_data[i] = 0;  // Becomes background
-            }
-        }
-    }
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t grabCutLitesd(const Image *src, Image *mask, Rectangle roi, int iterations)
-{
-    const int size = src->width * src->height;
-    const uint8_t *inImg_pixels = src->pixels;
-    uint8_t *mask_data = (uint8_t *)mask->pixels;
-
-    for (int iter = 0; iter < iterations; ++iter) {
-        uint32_t fgSum = 0, fgCount = 0;
-        uint32_t bgSum = 0, bgCount = 0;
-
-        // Step 1: Compute foreground and background means
-        for (int i = 0; i < size; ++i) {
-            if (mask_data[i] == 2) {
-                fgSum += inImg_pixels[i];
-                fgCount++;
-            } else if (mask_data[i] == 0) {
-                bgSum += inImg_pixels[i];
-                bgCount++;
-            }
-        }
-
-        // Fallback if no foreground was found (bootstrap)
-        if (fgCount == 0) {
-            for (int i = 0; i < size; ++i) {
-                if (mask_data[i] == 1) {
-                    fgSum += inImg_pixels[i];
-                    fgCount++;
-                }
-            }
-        }
-
-        if (fgCount == 0 || bgCount == 0)
-            break;  // Not enough info to proceed
-
-        uint8_t fgMean = fgSum / fgCount;
-        uint8_t bgMean = bgSum / bgCount;
-
-        // Debug
-        // printf("Iter %d: fgMean=%d, bgMean=%d\n", iter, fgMean, bgMean);
-
-        return EMBEDDIP_OK;
-        // Step 2: Update probable region
-        for (int i = 0; i < size; ++i) {
-            if (mask_data[i] == 1) {
-                int distFg = abs((int)inImg_pixels[i] - (int)fgMean);
-                int distBg = abs((int)inImg_pixels[i] - (int)bgMean);
-
-                // Reclassify as closer to fg or bg
-                if (distFg < distBg)
-                    mask_data[i] = 2;  // Becomes foreground
-                else
-                    mask_data[i] = 0;  // Becomes background
-            }
-        }
-    }
-    return EMBEDDIP_OK;
-}
-
 /**
  * @brief Performs a simplified GrabCut-inspired segmentation on a grayscale image using a
  * rectangular ROI.
@@ -1147,300 +1032,378 @@ embeddip_status_t grabCutLite(const Image *src, Image *mask, Rectangle roi, int
     return EMBEDDIP_OK;
 }
 
-embeddip_status_t
-grabCutGrayscaleRealistic(const Image *src, Image *mask, Rectangle roi, int max_iter)
+typedef struct {
+    int to;
+    int next;
+    float cap;
+} gc_edge_t;
+
+typedef struct {
+    int n;
+    int source;
+    int sink;
+    int *head;
+    gc_edge_t *edges;
+    int edge_count;
+    int edge_cap;
+    int *level;
+    int *it;
+    int *queue;
+    uint8_t *seen;
+} gc_graph_t;
+
+static int gc_init(gc_graph_t *g, int n, int edge_cap)
 {
-    if (!src || !mask || !src->pixels || src->format != IMAGE_FORMAT_GRAYSCALE)
-        return EMBEDDIP_ERROR_NULL_PTR;
+    g->n = n;
+    g->source = n - 2;
+    g->sink = n - 1;
+    g->edge_count = 0;
+    g->edge_cap = edge_cap;
+    g->head = (int *)memory_alloc((size_t)n * sizeof(int));
+    g->edges = (gc_edge_t *)memory_alloc((size_t)edge_cap * sizeof(gc_edge_t));
+    g->level = (int *)memory_alloc((size_t)n * sizeof(int));
+    g->it = (int *)memory_alloc((size_t)n * sizeof(int));
+    g->queue = (int *)memory_alloc((size_t)n * sizeof(int));
+    g->seen = (uint8_t *)memory_alloc((size_t)n);
+    if (!g->head || !g->edges || !g->level || !g->it || !g->queue || !g->seen) {
+        return -1;
+    }
+    for (int i = 0; i < n; ++i)
+        g->head[i] = -1;
+    return 0;
+}
 
-    int width = src->width;
-    int height = src->height;
-    int size = width * height;
-    const uint8_t *src_data = (const uint8_t *)src->pixels;
-    uint8_t *mask_data = (uint8_t *)mask->pixels;
+static void gc_free(gc_graph_t *g)
+{
+    if (g->head)
+        memory_free(g->head);
+    if (g->edges)
+        memory_free(g->edges);
+    if (g->level)
+        memory_free(g->level);
+    if (g->it)
+        memory_free(g->it);
+    if (g->queue)
+        memory_free(g->queue);
+    if (g->seen)
+        memory_free(g->seen);
+    memset(g, 0, sizeof(*g));
+}
 
-    // Allocate component responsibilities
-    uint8_t *labels = (uint8_t *)memory_alloc(size * sizeof(uint8_t));  // 0=BG, 1=FG
-    float(*fg_resp)[GMM_COMPONENTS] =
-        (float(*)[GMM_COMPONENTS])memory_alloc(size * GMM_COMPONENTS * sizeof(float));
-    float(*bg_resp)[GMM_COMPONENTS] =
-        (float(*)[GMM_COMPONENTS])memory_alloc(size * GMM_COMPONENTS * sizeof(float));
+static int gc_add_edge(gc_graph_t *g, int u, int v, float cap)
+{
+    if (g->edge_count + 2 > g->edge_cap)
+        return -1;
+    g->edges[g->edge_count] = (gc_edge_t){.to = v, .next = g->head[u], .cap = cap};
+    g->head[u] = g->edge_count++;
+    g->edges[g->edge_count] = (gc_edge_t){.to = u, .next = g->head[v], .cap = 0.0f};
+    g->head[v] = g->edge_count++;
+    return 0;
+}
 
-    GMMComponent fg_gmm[GMM_COMPONENTS];
-    GMMComponent bg_gmm[GMM_COMPONENTS];
+static int gc_add_undirected(gc_graph_t *g, int u, int v, float cap)
+{
+    if (gc_add_edge(g, u, v, cap) != 0)
+        return -1;
+    if (gc_add_edge(g, v, u, cap) != 0)
+        return -1;
+    return 0;
+}
 
-    // Step 1: Initialize mask from ROI
-    for (int y = 0; y < height; ++y) {
-        for (int x = 0; x < width; ++x) {
-            int idx = y * width + x;
-            if (x >= roi.x && x < roi.x + roi.width && y >= roi.y && y < roi.y + roi.height) {
-                mask_data[idx] = FOREGROUND;
-                labels[idx] = 1;
-            } else {
-                mask_data[idx] = BACKGROUND;
-                labels[idx] = 0;
+static int gc_bfs(gc_graph_t *g)
+{
+    for (int i = 0; i < g->n; ++i)
+        g->level[i] = -1;
+    int qh = 0, qt = 0;
+    g->level[g->source] = 0;
+    g->queue[qt++] = g->source;
+    while (qh < qt) {
+        int u = g->queue[qh++];
+        for (int ei = g->head[u]; ei != -1; ei = g->edges[ei].next) {
+            gc_edge_t *e = &g->edges[ei];
+            if (e->cap > 1e-6f && g->level[e->to] < 0) {
+                g->level[e->to] = g->level[u] + 1;
+                g->queue[qt++] = e->to;
             }
         }
     }
+    return g->level[g->sink] >= 0;
+}
 
-    // Step 2: Initialize GMMs with 2 components
-    for (int i = 0; i < GMM_COMPONENTS; ++i) {
-        fg_gmm[i].mean = 50.0f + 100 * i;
-        fg_gmm[i].variance = 500.0f;
-        fg_gmm[i].weight = 0.5f;
-
-        bg_gmm[i].mean = 50.0f + 100 * i;
-        bg_gmm[i].variance = 500.0f;
-        bg_gmm[i].weight = 0.5f;
-    }
-
-    // Step 3: EM Iterations
-    for (int iter = 0; iter < max_iter; ++iter) {
-        // E-Step: compute responsibilities
-        for (int i = 0; i < size; ++i) {
-            float x = (float)src_data[i];
-            float total_fg = 0.0f, total_bg = 0.0f;
-
-            // Foreground responsibilities
-            for (int c = 0; c < GMM_COMPONENTS; ++c) {
-                fg_resp[i][c] =
-                    fg_gmm[c].weight * gaussian_prob(x, fg_gmm[c].mean, fg_gmm[c].variance);
-                total_fg += fg_resp[i][c];
-            }
-            for (int c = 0; c < GMM_COMPONENTS; ++c)
-                fg_resp[i][c] /= (total_fg + 1e-6f);
-
-            // Background responsibilities
-            for (int c = 0; c < GMM_COMPONENTS; ++c) {
-                bg_resp[i][c] =
-                    bg_gmm[c].weight * gaussian_prob(x, bg_gmm[c].mean, bg_gmm[c].variance);
-                total_bg += bg_resp[i][c];
-            }
-            for (int c = 0; c < GMM_COMPONENTS; ++c)
-                bg_resp[i][c] /= (total_bg + 1e-6f);
-        }
-
-        // M-Step: update GMM parameters
-        for (int c = 0; c < GMM_COMPONENTS; ++c) {
-            // FG
-            float w_sum = 0.0f, x_sum = 0.0f, x2_sum = 0.0f;
-            for (int i = 0; i < size; ++i) {
-                if (labels[i] == 1) {
-                    float r = fg_resp[i][c];
-                    float x = (float)src_data[i];
-                    w_sum += r;
-                    x_sum += r * x;
-                    x2_sum += r * x * x;
-                }
-            }
-            if (w_sum > 1e-6f) {
-                fg_gmm[c].weight = w_sum;
-                fg_gmm[c].mean = x_sum / w_sum;
-                fg_gmm[c].variance =
-                    fmaxf((x2_sum / w_sum) - fg_gmm[c].mean * fg_gmm[c].mean, 10.0f);
-            }
-
-            // BG
-            w_sum = x_sum = x2_sum = 0.0f;
-            for (int i = 0; i < size; ++i) {
-                if (labels[i] == 0) {
-                    float r = bg_resp[i][c];
-                    float x = (float)src_data[i];
-                    w_sum += r;
-                    x_sum += r * x;
-                    x2_sum += r * x * x;
-                }
-            }
-            if (w_sum > 1e-6f) {
-                bg_gmm[c].weight = w_sum;
-                bg_gmm[c].mean = x_sum / w_sum;
-                bg_gmm[c].variance =
-                    fmaxf((x2_sum / w_sum) - bg_gmm[c].mean * bg_gmm[c].mean, 10.0f);
-            }
-        }
-
-        // Normalize GMM weights
-        float fg_total = 0.0f, bg_total = 0.0f;
-        for (int c = 0; c < GMM_COMPONENTS; ++c) {
-            fg_total += fg_gmm[c].weight;
-            bg_total += bg_gmm[c].weight;
-        }
-        for (int c = 0; c < GMM_COMPONENTS; ++c) {
-            fg_gmm[c].weight /= fg_total;
-            bg_gmm[c].weight /= bg_total;
+static float gc_dfs(gc_graph_t *g, int u, float f)
+{
+    if (u == g->sink)
+        return f;
+    for (int *pei = &g->it[u]; *pei != -1; *pei = g->edges[*pei].next) {
+        int ei = *pei;
+        gc_edge_t *e = &g->edges[ei];
+        if (e->cap <= 1e-6f || g->level[e->to] != g->level[u] + 1)
+            continue;
+        float pushed = gc_dfs(g, e->to, fminf(f, e->cap));
+        if (pushed > 1e-6f) {
+            e->cap -= pushed;
+            g->edges[ei ^ 1].cap += pushed;
+            return pushed;
         }
+    }
+    return 0.0f;
+}
 
-        // Reassign labels
-        for (int i = 0; i < size; ++i) {
-            float x = (float)src_data[i];
-            float p_fg = 0.0f, p_bg = 0.0f;
-            for (int c = 0; c < GMM_COMPONENTS; ++c) {
-                p_fg += fg_gmm[c].weight * gaussian_prob(x, fg_gmm[c].mean, fg_gmm[c].variance);
-                p_bg += bg_gmm[c].weight * gaussian_prob(x, bg_gmm[c].mean, bg_gmm[c].variance);
-            }
-            labels[i] = (p_fg > p_bg) ? 1 : 0;
-            mask_data[i] = labels[i] ? FOREGROUND : BACKGROUND;
+static float gc_maxflow(gc_graph_t *g)
+{
+    float flow = 0.0f;
+    while (gc_bfs(g)) {
+        for (int i = 0; i < g->n; ++i)
+            g->it[i] = g->head[i];
+        while (1) {
+            float pushed = gc_dfs(g, g->source, 1e20f);
+            if (pushed <= 1e-6f)
+                break;
+            flow += pushed;
         }
     }
-
-    memory_free(labels);
-    memory_free(fg_resp);
-    memory_free(bg_resp);
-    return EMBEDDIP_OK;
+    return flow;
 }
 
-typedef struct {
-    float weight;
-    float mean[3];      // [R, G, B]
-    float variance[3];  // diagonal covariance
-} GMMComponentRGB;
-
-float gaussian_prob_rgb(const uint8_t *pixel, const GMMComponentRGB *comp)
+static void gc_mark_source_side(gc_graph_t *g)
 {
-    float prob = 1.0f;
-    for (int i = 0; i < 3; ++i) {
-        float diff = (float)pixel[i] - comp->mean[i];
-        float var = comp->variance[i];
-        prob *= (1.0f / sqrtf(2.0f * M_PI * var)) * expf(-diff * diff / (2.0f * var));
+    memset(g->seen, 0, (size_t)g->n);
+    int qh = 0, qt = 0;
+    g->seen[g->source] = 1;
+    g->queue[qt++] = g->source;
+    while (qh < qt) {
+        int u = g->queue[qh++];
+        for (int ei = g->head[u]; ei != -1; ei = g->edges[ei].next) {
+            gc_edge_t *e = &g->edges[ei];
+            if (e->cap > 1e-6f && !g->seen[e->to]) {
+                g->seen[e->to] = 1;
+                g->queue[qt++] = e->to;
+            }
+        }
     }
-    return prob;
 }
 
-embeddip_status_t grabCutRGB(const Image *src, Image *mask, Rectangle roi, int max_iter)
+embeddip_status_t grabCut(const Image *src, Image *mask, Rectangle roi, int max_iter)
 {
-    if (!src || !mask || !src->pixels || src->format != IMAGE_FORMAT_RGB888)
+    if (!src || !mask || !src->pixels || !mask->pixels)
         return EMBEDDIP_ERROR_NULL_PTR;
+    if (src->format != IMAGE_FORMAT_GRAYSCALE)
+        return EMBEDDIP_ERROR_INVALID_FORMAT;
+    if (mask->format != IMAGE_FORMAT_MASK && mask->format != IMAGE_FORMAT_GRAYSCALE)
+        return EMBEDDIP_ERROR_INVALID_FORMAT;
+    if (src->width != mask->width || src->height != mask->height)
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+    if (max_iter <= 0)
+        max_iter = MAX_ITER_GRABCUT;
 
-    int width = src->width;
-    int height = src->height;
-    int size = width * height;
+    const int width = (int)src->width;
+    const int height = (int)src->height;
     const uint8_t *src_data = (const uint8_t *)src->pixels;
     uint8_t *mask_data = (uint8_t *)mask->pixels;
+    memset(mask_data, BACKGROUND, (size_t)width * (size_t)height);
+
+    int x0 = roi.x < 0 ? 0 : roi.x;
+    int y0 = roi.y < 0 ? 0 : roi.y;
+    int x1 = roi.x + roi.width;
+    int y1 = roi.y + roi.height;
+    if (x1 > width)
+        x1 = width;
+    if (y1 > height)
+        y1 = height;
+    if (x0 >= x1 || y0 >= y1)
+        return EMBEDDIP_ERROR_INVALID_ARG;
 
-    // Allocate label buffer (0 = BG, 1 = FG)
-    uint8_t *labels = (uint8_t *)memory_alloc(size * sizeof(uint8_t));
-    float(*fg_resp)[GMM_COMPONENTS] =
-        (float(*)[GMM_COMPONENTS])memory_alloc(size * GMM_COMPONENTS * sizeof(float));
-    float(*bg_resp)[GMM_COMPONENTS] =
-        (float(*)[GMM_COMPONENTS])memory_alloc(size * GMM_COMPONENTS * sizeof(float));
-
-    GMMComponentRGB fg_gmm[GMM_COMPONENTS];
-    GMMComponentRGB bg_gmm[GMM_COMPONENTS];
+    // Downsample ROI for embedded memory/perf while still using graph-cut.
+    int ds = 2;
+    const int target_max_nodes = 7000;
+    int sw = (x1 - x0 + ds - 1) / ds;
+    int sh = (y1 - y0 + ds - 1) / ds;
+    while (sw * sh > target_max_nodes && ds < 16) {
+        ds *= 2;
+        sw = (x1 - x0 + ds - 1) / ds;
+        sh = (y1 - y0 + ds - 1) / ds;
+    }
+    const int sn = sw * sh;
+    uint8_t *labels = (uint8_t *)memory_alloc((size_t)sn);  // 0=BG, 1=FG
+    uint8_t *small = (uint8_t *)memory_alloc((size_t)sn);
+    if (!labels || !small) {
+        if (labels)
+            memory_free(labels);
+        if (small)
+            memory_free(small);
+        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+    }
 
-    // Step 1: Initial Labeling from ROI
-    for (int y = 0; y < height; ++y) {
-        for (int x = 0; x < width; ++x) {
-            int idx = y * width + x;
-            if (x >= roi.x && x < roi.x + roi.width && y >= roi.y && y < roi.y + roi.height) {
-                mask_data[idx] = FOREGROUND;
-                labels[idx] = 1;
-            } else {
-                mask_data[idx] = BACKGROUND;
-                labels[idx] = 0;
-            }
+    for (int sy = 0; sy < sh; ++sy) {
+        for (int sx = 0; sx < sw; ++sx) {
+            int xx = x0 + sx * ds;
+            int yy = y0 + sy * ds;
+            if (xx >= width)
+                xx = width - 1;
+            if (yy >= height)
+                yy = height - 1;
+            small[sy * sw + sx] = src_data[yy * width + xx];
         }
     }
 
-    // Step 2: Init GMMs
-    for (int c = 0; c < GMM_COMPONENTS; ++c) {
-        for (int ch = 0; ch < 3; ++ch) {
-            fg_gmm[c].mean[ch] = 100.0f + 50 * c;
-            fg_gmm[c].variance[ch] = 1000.0f;
-            bg_gmm[c].mean[ch] = 50.0f + 100 * c;
-            bg_gmm[c].variance[ch] = 1000.0f;
+    int border = ((sw < sh) ? sw : sh) / 10;
+    if (border < 2)
+        border = 2;
+    for (int sy = 0; sy < sh; ++sy) {
+        for (int sx = 0; sx < sw; ++sx) {
+            int near_left = sx < border;
+            int near_right = (sw - 1 - sx) < border;
+            int near_top = sy < border;
+            int near_bottom = (sh - 1 - sy) < border;
+            labels[sy * sw + sx] = (near_left || near_right || near_top || near_bottom) ? 0u : 1u;
         }
-        fg_gmm[c].weight = 0.5f;
-        bg_gmm[c].weight = 0.5f;
     }
 
-    // Step 3: EM Iterations
+    const float lambda = 25.0f;
+    const float hard_cap = 1e6f;
+
     for (int iter = 0; iter < max_iter; ++iter) {
-        // E-Step: compute responsibilities
-        for (int i = 0; i < size; ++i) {
-            const uint8_t *px = &src_data[i * 3];
-            float total_fg = 0.0f, total_bg = 0.0f;
-
-            for (int c = 0; c < GMM_COMPONENTS; ++c) {
-                fg_resp[i][c] = fg_gmm[c].weight * gaussian_prob_rgb(px, &fg_gmm[c]);
-                bg_resp[i][c] = bg_gmm[c].weight * gaussian_prob_rgb(px, &bg_gmm[c]);
-                total_fg += fg_resp[i][c];
-                total_bg += bg_resp[i][c];
+        float fg_sum = 0.0f, fg_sqsum = 0.0f, fg_cnt = 0.0f;
+        float bg_sum = 0.0f, bg_sqsum = 0.0f, bg_cnt = 0.0f;
+        for (int i = 0; i < sn; ++i) {
+            float v = (float)small[i];
+            if (labels[i]) {
+                fg_sum += v;
+                fg_sqsum += v * v;
+                fg_cnt += 1.0f;
+            } else {
+                bg_sum += v;
+                bg_sqsum += v * v;
+                bg_cnt += 1.0f;
             }
-            for (int c = 0; c < GMM_COMPONENTS; ++c) {
-                fg_resp[i][c] /= (total_fg + 1e-6f);
-                bg_resp[i][c] /= (total_bg + 1e-6f);
+        }
+        if (fg_cnt < 1.0f || bg_cnt < 1.0f)
+            break;
+
+        float mu_fg = fg_sum / fg_cnt;
+        float mu_bg = bg_sum / bg_cnt;
+        float var_fg = fmaxf((fg_sqsum / fg_cnt) - mu_fg * mu_fg, 25.0f);
+        float var_bg = fmaxf((bg_sqsum / bg_cnt) - mu_bg * mu_bg, 25.0f);
+
+        float d2_sum = 0.0f;
+        int d2_cnt = 0;
+        for (int y = 0; y < sh; ++y) {
+            for (int x = 0; x < sw; ++x) {
+                int i = y * sw + x;
+                if (x + 1 < sw) {
+                    float d = (float)small[i] - (float)small[i + 1];
+                    d2_sum += d * d;
+                    d2_cnt++;
+                }
+                if (y + 1 < sh) {
+                    float d = (float)small[i] - (float)small[i + sw];
+                    d2_sum += d * d;
+                    d2_cnt++;
+                }
             }
         }
+        float beta = 1.0f / (2.0f * (d2_sum / (float)(d2_cnt + 1)) + 1e-6f);
+
+        gc_graph_t g = {0};
+        int node_n = sn + 2;
+        // Terminal edges: ~4*sn, n-links: ~4*((sw-1)*sh + sw*(sh-1)), plus border t-links.
+        // Worst-case storage bound with current representation:
+        // - terminal links: 2 add_edge/pixel => 4*sn edges
+        // - smoothness links: 4 edges per right/down neighbor pair
+        //   pairs = (sw-1)*sh + sw*(sh-1) = 2*sn - sw - sh
+        //   => 4*(2*sn - sw - sh) edges
+        // - hard-ring t-links: up to 1 add_edge/pixel in worst case => 2*sn edges
+        // Total <= 14*sn - 4*(sw+sh), add safety margin.
+        int edge_cap = 14 * sn + 512;
+        if (gc_init(&g, node_n, edge_cap) != 0) {
+            gc_free(&g);
+            memory_free(labels);
+            memory_free(small);
+            return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+        }
 
-        // M-Step: update GMM parameters
-        for (int c = 0; c < GMM_COMPONENTS; ++c) {
-            float fg_wsum = 0.0f, bg_wsum = 0.0f;
-            float fg_sum[3] = {0}, fg_sqsum[3] = {0};
-            float bg_sum[3] = {0}, bg_sqsum[3] = {0};
+        for (int y = 0; y < sh; ++y) {
+            for (int x = 0; x < sw; ++x) {
+                int p = y * sw + x;
+                float pix = (float)small[p];
+                float dbg = 0.5f * logf(var_bg) + ((pix - mu_bg) * (pix - mu_bg)) / (2.0f * var_bg);
+                float dfg = 0.5f * logf(var_fg) + ((pix - mu_fg) * (pix - mu_fg)) / (2.0f * var_fg);
+                if (dbg < 0.0f)
+                    dbg = 0.0f;
+                if (dfg < 0.0f)
+                    dfg = 0.0f;
+
+                if (gc_add_edge(&g, g.source, p, dbg + 1e-3f) != 0 ||
+                    gc_add_edge(&g, p, g.sink, dfg + 1e-3f) != 0) {
+                    gc_free(&g);
+                    memory_free(labels);
+                    memory_free(small);
+                    return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+                }
 
-            for (int i = 0; i < size; ++i) {
-                const uint8_t *px = &src_data[i * 3];
-                if (labels[i] == 1) {
-                    float r = fg_resp[i][c];
-                    fg_wsum += r;
-                    for (int ch = 0; ch < 3; ++ch) {
-                        fg_sum[ch] += r * px[ch];
-                        fg_sqsum[ch] += r * px[ch] * px[ch];
-                    }
-                } else {
-                    float r = bg_resp[i][c];
-                    bg_wsum += r;
-                    for (int ch = 0; ch < 3; ++ch) {
-                        bg_sum[ch] += r * px[ch];
-                        bg_sqsum[ch] += r * px[ch] * px[ch];
+                int near_left = x < border;
+                int near_right = (sw - 1 - x) < border;
+                int near_top = y < border;
+                int near_bottom = (sh - 1 - y) < border;
+                int is_ring = near_left || near_right || near_top || near_bottom;
+                if (is_ring) {
+                    if (gc_add_edge(&g, p, g.sink, hard_cap) != 0) {
+                        gc_free(&g);
+                        memory_free(labels);
+                        memory_free(small);
+                        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
                     }
                 }
-            }
 
-            for (int ch = 0; ch < 3; ++ch) {
-                if (fg_wsum > 1e-6f) {
-                    fg_gmm[c].mean[ch] = fg_sum[ch] / fg_wsum;
-                    float var = (fg_sqsum[ch] / fg_wsum) - fg_gmm[c].mean[ch] * fg_gmm[c].mean[ch];
-                    fg_gmm[c].variance[ch] = fmaxf(var, 10.0f);
+                if (x + 1 < sw) {
+                    int q = p + 1;
+                    float d = (float)small[p] - (float)small[q];
+                    float w = lambda * expf(-beta * d * d) + 1e-3f;
+                    if (gc_add_undirected(&g, p, q, w) != 0) {
+                        gc_free(&g);
+                        memory_free(labels);
+                        memory_free(small);
+                        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+                    }
                 }
-
-                if (bg_wsum > 1e-6f) {
-                    bg_gmm[c].mean[ch] = bg_sum[ch] / bg_wsum;
-                    float var = (bg_sqsum[ch] / bg_wsum) - bg_gmm[c].mean[ch] * bg_gmm[c].mean[ch];
-                    bg_gmm[c].variance[ch] = fmaxf(var, 10.0f);
+                if (y + 1 < sh) {
+                    int q = p + sw;
+                    float d = (float)small[p] - (float)small[q];
+                    float w = lambda * expf(-beta * d * d) + 1e-3f;
+                    if (gc_add_undirected(&g, p, q, w) != 0) {
+                        gc_free(&g);
+                        memory_free(labels);
+                        memory_free(small);
+                        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+                    }
                 }
             }
-
-            fg_gmm[c].weight = fg_wsum;
-            bg_gmm[c].weight = bg_wsum;
         }
 
-        // Normalize weights
-        float fg_total = 0.0f, bg_total = 0.0f;
-        for (int c = 0; c < GMM_COMPONENTS; ++c) {
-            fg_total += fg_gmm[c].weight;
-            bg_total += bg_gmm[c].weight;
+        gc_maxflow(&g);
+        gc_mark_source_side(&g);
+        for (int i = 0; i < sn; ++i) {
+            labels[i] = g.seen[i] ? 1u : 0u;
         }
-        for (int c = 0; c < GMM_COMPONENTS; ++c) {
-            fg_gmm[c].weight /= (fg_total + 1e-6f);
-            bg_gmm[c].weight /= (bg_total + 1e-6f);
-        }
-
-        // Reassign labels and update mask
-        for (int i = 0; i < size; ++i) {
-            const uint8_t *px = &src_data[i * 3];
-            float p_fg = 0.0f, p_bg = 0.0f;
-            for (int c = 0; c < GMM_COMPONENTS; ++c) {
-                p_fg += fg_gmm[c].weight * gaussian_prob_rgb(px, &fg_gmm[c]);
-                p_bg += bg_gmm[c].weight * gaussian_prob_rgb(px, &bg_gmm[c]);
-            }
-            labels[i] = (p_fg > p_bg) ? 1 : 0;
-            mask_data[i] = labels[i] ? FOREGROUND : BACKGROUND;
+        gc_free(&g);
+    }
+
+    memset(mask_data, BACKGROUND, (size_t)width * (size_t)height);
+    for (int y = y0; y < y1; ++y) {
+        for (int x = x0; x < x1; ++x) {
+            int sx = (x - x0) / ds;
+            int sy = (y - y0) / ds;
+            if (sx >= sw)
+                sx = sw - 1;
+            if (sy >= sh)
+                sy = sh - 1;
+            int si = sy * sw + sx;
+            mask_data[y * width + x] = labels[si] ? FOREGROUND : BACKGROUND;
         }
     }
+    mask->log = IMAGE_DATA_PIXELS;
 
     memory_free(labels);
-    memory_free(fg_resp);
-    memory_free(bg_resp);
+    memory_free(small);
     return EMBEDDIP_OK;
 }
diff --git a/imgproc/segmentation.h b/imgproc/segmentation.h
index 195e1c6..367e3dc 100644
--- a/imgproc/segmentation.h
+++ b/imgproc/segmentation.h
@@ -83,18 +83,7 @@ embeddip_status_t colorRegionGrowing(const Image *inImg,
                                      float tolerance);
 
 /**
- * @brief GrabCut segmentation (working version).
- *
- * @param[in]  src        Pointer to input grayscale image.
- * @param[out] mask       Pointer to output mask image.
- * @param[in]  roi        Region of interest.
- * @param[in]  iterations Number of iterations.
- * @return EMBEDDIP_OK on success, error code otherwise.
- */
-embeddip_status_t grabCutLite_working(const Image *src, Image *mask, Rectangle roi, int iterations);
-
-/**
- * @brief GrabCut segmentation (sd version).
+ * @brief GrabCut segmentation (grayscale realistic).
  *
  * @param[in]  src        Pointer to input grayscale image.
  * @param[out] mask       Pointer to output mask image.
@@ -102,10 +91,10 @@ embeddip_status_t grabCutLite_working(const Image *src, Image *mask, Rectangle r
  * @param[in]  iterations Number of iterations.
  * @return EMBEDDIP_OK on success, error code otherwise.
  */
-embeddip_status_t grabCutLitesd(const Image *src, Image *mask, Rectangle roi, int iterations);
+embeddip_status_t grabCut(const Image *src, Image *mask, Rectangle roi, int iterations);
 
 /**
- * @brief GrabCut segmentation (main version).
+ * @brief GrabCut segmentation (lightweight version).
  *
  * @param[in]  src        Pointer to input image.
  * @param[out] mask       Pointer to output mask image.
@@ -115,29 +104,6 @@ embeddip_status_t grabCutLitesd(const Image *src, Image *mask, Rectangle roi, in
  */
 embeddip_status_t grabCutLite(const Image *src, Image *mask, Rectangle roi, int iterations);
 
-/**
- * @brief GrabCut segmentation (grayscale realistic).
- *
- * @param[in]  src        Pointer to input grayscale image.
- * @param[out] mask       Pointer to output mask image.
- * @param[in]  roi        Region of interest.
- * @param[in]  iterations Number of iterations.
- * @return EMBEDDIP_OK on success, error code otherwise.
- */
-embeddip_status_t
-grabCutGrayscaleRealistic(const Image *src, Image *mask, Rectangle roi, int iterations);
-
-/**
- * @brief GrabCut segmentation (RGB version).
- *
- * @param[in]  src        Pointer to input RGB image.
- * @param[out] mask       Pointer to output mask image.
- * @param[in]  roi        Region of interest.
- * @param[in]  iterations Number of iterations.
- * @return EMBEDDIP_OK on success, error code otherwise.
- */
-embeddip_status_t grabCutRGB(const Image *src, Image *mask, Rectangle roi, int iterations);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/wrapper/ImageWrapper.cpp b/wrapper/ImageWrapper.cpp
index 6da79f9..64716e7 100755
--- a/wrapper/ImageWrapper.cpp
+++ b/wrapper/ImageWrapper.cpp
@@ -752,12 +752,12 @@ void Image::bitwiseNot(Image &out) const
 }
 
 /**
- * @brief Runs simplified GrabCut workflow that outputs a mask.
+ * @brief Runs grayscale graph-cut GrabCut.
  */
-// TODO
-// void Image::grabCutLitesd(Image &maskImg, int iterations) const {
-//  ::grabCutLite_working(raw(), maskImg.raw(), iterations);
-//}
+embeddip_status_t Image::grabCut(Image &maskImg, Rectangle roi, int iterations) const
+{
+    return ::grabCut(raw(), maskImg.raw(), roi, iterations);
+}
 
 /**
  * @brief Runs simplified GrabCut within ROI.
@@ -767,22 +767,6 @@ void Image::grabCutLite(Image &outImg, Rectangle roi, int iterations) const
     ::grabCutLite(raw(), outImg.raw(), roi, iterations);
 }
 
-/**
- * @brief Runs simplified GrabCut for RGB888 data.
- */
-void Image::grabCutLite888(Image &outImg, Rectangle roi, int iterations) const
-{
-    ::grabCutRGB(raw(), outImg.raw(), roi, iterations);
-}
-
-/**
- * @brief Runs RGB graph-cut segmentation.
- */
-void Image::grabCutRGB(Image &outMask, Rectangle roi, int max_iter) const
-{
-    ::grabCutRGB(raw(), outMask.raw(), roi, max_iter);
-}
-
 /**
  * @brief Thresholds image by hue interval.
  */
diff --git a/wrapper/ImageWrapper.hpp b/wrapper/ImageWrapper.hpp
index 6bacb33..c9a0047 100755
--- a/wrapper/ImageWrapper.hpp
+++ b/wrapper/ImageWrapper.hpp
@@ -798,12 +798,13 @@ class Image
     void bitwiseNot(Image &out) const;
 
     /**
-     * @brief Runs simplified GrabCut and returns mask output.
-     * @param[out] maskImg Output segmentation mask
-     * @param[in] iterations Number of refinement iterations
-     * @see ::grabCutLitesd For underlying C implementation
+     * @brief Runs grayscale graph-cut GrabCut in ROI.
+     * @param[out] maskImg Output segmentation mask.
+     * @param[in] roi Region of interest.
+     * @param[in] iterations Number of refinement iterations.
+     * @return C-layer status code.
      */
-    void grabCutLitesd(Image &maskImg, int iterations) const;
+    embeddip_status_t grabCut(Image &maskImg, Rectangle roi, int iterations) const;
 
     /**
      * @brief Runs simplified GrabCut in a rectangular ROI.
@@ -813,22 +814,6 @@ class Image
      */
     void grabCutLite(Image &outImg, Rectangle roi, int iterations) const;
 
-    /**
-     * @brief Runs simplified GrabCut for RGB888 images.
-     * @param outImg Output segmentation image/mask.
-     * @param roi Region of interest.
-     * @param iterations Number of refinement iterations.
-     */
-    void grabCutLite888(Image &outImg, Rectangle roi, int iterations) const;
-
-    /**
-     * @brief Runs RGB graph-cut segmentation in ROI.
-     * @param outMask Output binary mask.
-     * @param roi Region of interest.
-     * @param max_iter Maximum iteration count.
-     */
-    void grabCutRGB(Image &outMask, Rectangle roi, int max_iter) const;
-
     /**
      * @brief Thresholds image by hue range.
      * @param output Output binary mask image.

From e1f19f519b3f1be64c2c1e7f9b48a484c68e2a80 Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Thu, 23 Apr 2026 23:38:29 +0200
Subject: [PATCH 14/18] imgproc: filter: harden canny and fix edge-thinning
 threshold stages

Fix several robustness bugs in NMS/double-threshold/hysteresis and
tighten Canny error handling. This removes crash-prone paths and
makes edge linking behavior deterministic on embedded targets.

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 imgproc/filter.c | 175 +++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 155 insertions(+), 20 deletions(-)

diff --git a/imgproc/filter.c b/imgproc/filter.c
index 9b092ac..a53278f 100755
--- a/imgproc/filter.c
+++ b/imgproc/filter.c
@@ -1046,6 +1046,8 @@ void nonMaximumSuppression(const Image *magImg, const Image *phaseImg, Image *ds
     uint32_t w = magImg->width, h = magImg->height;
     if (w != phaseImg->width || h != phaseImg->height)
         return;
+    if (!magImg->chals || !magImg->chals->ch[0] || !phaseImg->chals || !phaseImg->chals->ch[0])
+        return;
     uint32_t N = w * h;
 
     const float *mag = magImg->chals->ch[0];
@@ -1053,14 +1055,18 @@ void nonMaximumSuppression(const Image *magImg, const Image *phaseImg, Image *ds
 
     if (!dst->chals) {
         dst->chals = (channels_t *)memory_alloc(sizeof(channels_t));
+        if (!dst->chals)
+            return;
         memset(dst->chals, 0, sizeof(channels_t));
     }
     dst->chals->ch[0] = (float *)memory_alloc((size_t)N * sizeof(float));
+    if (!dst->chals->ch[0])
+        return;
     dst->is_chals = 1;
     float *dst_data = dst->chals->ch[0];
 
     // Initialize all to zero (including borders)
-    memset(dst, 0, N * sizeof(float));
+    memset(dst_data, 0, (size_t)N * sizeof(float));
 
     // Iterate, skip borders
     for (uint32_t y = 1; y < h - 1; y++) {
@@ -1110,14 +1116,25 @@ void doubleThreshold(const Image *src,
 {
     if (!src || !dst)
         return;
+    if (!src->chals || !src->chals->ch[0])
+        return;
     uint32_t N = src->width * src->height;
     const float *src_data = src->chals->ch[0];
+    if (lowThresh > highThresh) {
+        float tmp = lowThresh;
+        lowThresh = highThresh;
+        highThresh = tmp;
+    }
 
     if (!dst->chals) {
         dst->chals = (channels_t *)memory_alloc(sizeof(channels_t));
+        if (!dst->chals)
+            return;
         memset(dst->chals, 0, sizeof(channels_t));
     }
     dst->chals->ch[0] = (float *)memory_alloc((size_t)N * sizeof(float));
+    if (!dst->chals->ch[0])
+        return;
     dst->is_chals = 1;
     float *dst_data = dst->chals->ch[0];
 
@@ -1142,32 +1159,60 @@ void hysteresis(const Image *src, Image *dst, float weakVal, float strongVal)
     if (!src || !dst)
         return;
     uint32_t w = src->width, h = src->height;
+    uint32_t N = w * h;
+    if (!src->chals || !src->chals->ch[0] || N == 0)
+        return;
+    const float *src_data = src->chals->ch[0];
 
     if (!dst->chals) {
         dst->chals = (channels_t *)memory_alloc(sizeof(channels_t));
+        if (!dst->chals)
+            return;
         memset(dst->chals, 0, sizeof(channels_t));
     }
-    dst->chals->ch[0] = (float *)memory_alloc((size_t)w * h * sizeof(float));
+    dst->chals->ch[0] = (float *)memory_alloc((size_t)N * sizeof(float));
+    if (!dst->chals->ch[0])
+        return;
     dst->is_chals = 1;
     float *dst_data = dst->chals->ch[0];
-    memcpy(dst, src, (size_t)w * h * sizeof(float));
+    memset(dst_data, 0, (size_t)N * sizeof(float));
 
-    for (uint32_t y = 1; y < h - 1; y++) {
-        for (uint32_t x = 1; x < w - 1; x++) {
-            uint32_t idx = y * w + x;
-            if (dst_data[idx] == weakVal) {
-                bool connected = false;
-                for (int j = -1; j <= 1; j++) {
-                    for (int i = -1; i <= 1; i++) {
-                        if (dst_data[(y + j) * w + (x + i)] == strongVal) {
-                            connected = true;
-                        }
-                    }
+    int *stack = (int *)memory_alloc((size_t)N * sizeof(int));
+    if (!stack)
+        return;
+    uint32_t sp = 0;
+
+    for (uint32_t i = 0; i < N; ++i) {
+        if (src_data[i] == strongVal) {
+            dst_data[i] = strongVal;
+            stack[sp++] = (int)i;
+        }
+    }
+
+    while (sp > 0) {
+        int idx = stack[--sp];
+        uint32_t x = (uint32_t)idx % w;
+        uint32_t y = (uint32_t)idx / w;
+
+        int y0 = (y > 0) ? (int)y - 1 : 0;
+        int y1 = (y + 1 < h) ? (int)y + 1 : (int)h - 1;
+        int x0 = (x > 0) ? (int)x - 1 : 0;
+        int x1 = (x + 1 < w) ? (int)x + 1 : (int)w - 1;
+
+        for (int ny = y0; ny <= y1; ++ny) {
+            for (int nx = x0; nx <= x1; ++nx) {
+                uint32_t nidx = (uint32_t)ny * w + (uint32_t)nx;
+                if (dst_data[nidx] == strongVal)
+                    continue;
+                if (src_data[nidx] == weakVal) {
+                    dst_data[nidx] = strongVal;
+                    stack[sp++] = (int)nidx;
                 }
-                dst_data[idx] = connected ? strongVal : 0.0f;
             }
         }
     }
+
+    memory_free(stack);
     dst->log = IMAGE_DATA_CH0;
 }
 
@@ -1465,17 +1510,92 @@ embeddip_status_t Canny(const Image *src,
     CHECK_NULL_INT(dst);
 
     // --- Step 1: Gaussian smoothing + gradients ---
-    float sigma =
-        1.0;  // 0.3 * ((aperture_size - 1) * 0.5 - 1) + 0.8; // could derive from aperture_size
+    int k = (aperture_size < 3) ? 3 : aperture_size;
+    if ((k & 1) == 0)
+        ++k;
+    if (k > 7)
+        k = 7;
+    float sigma = 0.3f * ((float)(k - 1) * 0.5f - 1.0f) + 0.8f;
+
     Image *Ix = createImageWH_legacy(src->width, src->height, src->format);
     Image *Iy = createImageWH_legacy(src->width, src->height, src->format);
-    gaussianGradients(src, Ix, Iy, sigma);
+    if (!Ix || !Iy) {
+        deleteImage(Ix);
+        deleteImage(Iy);
+        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+    }
+    embeddip_status_t st = gaussianGradients(src, Ix, Iy, sigma);
+    if (st != EMBEDDIP_OK) {
+        deleteImage(Ix);
+        deleteImage(Iy);
+        return st;
+    }
 
     // --- Step 2: magnitude + phase ---
     Image *Mag = createImageWH_legacy(src->width, src->height, src->format);
     Image *Phase = createImageWH_legacy(src->width, src->height, src->format);
-    gradientMagnitude(Ix, Iy, Mag);
-    gradientPhase(Ix, Iy, Phase);
+    if (!Mag || !Phase) {
+        deleteImage(Ix);
+        deleteImage(Iy);
+        deleteImage(Mag);
+        deleteImage(Phase);
+        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+    }
+
+    if (l2_gradient) {
+        st = gradientMagnitude(Ix, Iy, Mag);
+    } else {
+        uint32_t N = src->size;
+        if (!Mag->chals) {
+            Mag->chals = (channels_t *)memory_alloc(sizeof(channels_t));
+            if (!Mag->chals) {
+                deleteImage(Ix);
+                deleteImage(Iy);
+                deleteImage(Mag);
+                deleteImage(Phase);
+                return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+            }
+            memset(Mag->chals, 0, sizeof(channels_t));
+        }
+        Mag->chals->ch[0] = (float *)memory_alloc((size_t)N * sizeof(float));
+        if (!Mag->chals->ch[0]) {
+            deleteImage(Ix);
+            deleteImage(Iy);
+            deleteImage(Mag);
+            deleteImage(Phase);
+            return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+        }
+        const float *ix = Ix->chals ? Ix->chals->ch[0] : NULL;
+        const float *iy = Iy->chals ? Iy->chals->ch[0] : NULL;
+        if (!ix || !iy) {
+            deleteImage(Ix);
+            deleteImage(Iy);
+            deleteImage(Mag);
+            deleteImage(Phase);
+            return EMBEDDIP_ERROR_INVALID_ARG;
+        }
+        for (uint32_t i = 0; i < N; ++i) {
+            Mag->chals->ch[0][i] = fabsf(ix[i]) + fabsf(iy[i]);
+        }
+        Mag->is_chals = 1;
+        Mag->log = IMAGE_DATA_CH0;
+        st = EMBEDDIP_OK;
+    }
+    if (st != EMBEDDIP_OK) {
+        deleteImage(Ix);
+        deleteImage(Iy);
+        deleteImage(Mag);
+        deleteImage(Phase);
+        return st;
+    }
+    st = gradientPhase(Ix, Iy, Phase);
+    if (st != EMBEDDIP_OK) {
+        deleteImage(Ix);
+        deleteImage(Iy);
+        deleteImage(Mag);
+        deleteImage(Phase);
+        return st;
+    }
 
     float *data = Mag->chals->ch[0];
 
@@ -1503,10 +1623,25 @@ embeddip_status_t Canny(const Image *src,
 
     // --- Step 3: NMS ---
     Image *Nms = createImageWH_legacy(src->width, src->height, src->format);
+    if (!Nms) {
+        deleteImage(Ix);
+        deleteImage(Iy);
+        deleteImage(Mag);
+        deleteImage(Phase);
+        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+    }
     nonMaximumSuppression(Mag, Phase, Nms);
 
     // --- Step 4: Double threshold ---
     Image *Dt = createImageWH_legacy(src->width, src->height, src->format);
+    if (!Dt) {
+        deleteImage(Ix);
+        deleteImage(Iy);
+        deleteImage(Mag);
+        deleteImage(Phase);
+        deleteImage(Nms);
+        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+    }
     doubleThreshold(Nms, Dt, (float)threshold1, (float)threshold2, 50.0f, 255.0f);
 
     // --- Step 5: Hysteresis ---

From 43a1e306ffde31c825573ca92a65908d0cb9bfc3 Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Thu, 23 Apr 2026 23:41:35 +0200
Subject: [PATCH 15/18] imgproc: segmentation: make multi-seed region growing
 adaptive

Use one global visited map and one global stack across all seeds.

Switch grayscale and color growth to adaptive running region mean
grow by distance to region mean instead of fixed seed value.

Return INVALID_ARG when no valid seeds are provided.

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 imgproc/segmentation.c | 195 ++++++++++++++++++++++++-----------------
 1 file changed, 117 insertions(+), 78 deletions(-)

diff --git a/imgproc/segmentation.c b/imgproc/segmentation.c
index 13677f0..570fd7c 100644
--- a/imgproc/segmentation.c
+++ b/imgproc/segmentation.c
@@ -581,6 +581,13 @@ embeddip_status_t grayscaleRegionGrowing(const Image *inImg,
         return EMBEDDIP_ERROR_OUT_OF_MEMORY;
     }
 
+    int top = 0;
+    int dx[4] = {0, -1, 1, 0};
+    int dy[4] = {-1, 0, 0, 1};
+    float regionMean = 0.0f;
+    int regionCount = 0;
+
+    // Global multi-seed initialization (single visited map and single adaptive model).
     for (int s = 0; s < numSeeds; ++s) {
         int seedX = seeds[s].x;
         int seedY = seeds[s].y;
@@ -591,43 +598,49 @@ embeddip_status_t grayscaleRegionGrowing(const Image *inImg,
         if (visited[seedIndex])
             continue;
 
-        // Run the same region growing as single-seed
-        int top = 0;
+        if (top >= STACK_SIZE) {
+            memory_free(visited);
+            memory_free(stack);
+            return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+        }
+
         stack[top++] = seeds[s];
         visited[seedIndex] = true;
         dst[seedIndex] = 255;
 
-        long sum = src[seedIndex];
-        int count = 1;
+        regionCount++;
+        regionMean += ((float)src[seedIndex] - regionMean) / (float)regionCount;
+    }
 
-        int dx[4] = {0, -1, 1, 0};
-        int dy[4] = {-1, 0, 0, 1};
+    if (regionCount == 0) {
+        memory_free(visited);
+        memory_free(stack);
+        return EMBEDDIP_ERROR_INVALID_ARG;
+    }
 
-        while (top > 0) {
-            Point p = stack[--top];
-            uint8_t regionMean = (uint8_t)(sum / count);
+    while (top > 0) {
+        Point p = stack[--top];
 
-            for (int i = 0; i < 4; ++i) {
-                int nx = p.x + dx[i];
-                int ny = p.y + dy[i];
-                int nidx = ny * width + nx;
+        for (int i = 0; i < 4; ++i) {
+            int nx = p.x + dx[i];
+            int ny = p.y + dy[i];
+            int nidx = ny * width + nx;
 
-                if (nx >= 0 && nx < width && ny >= 0 && ny < height && !visited[nidx]) {
-                    uint8_t neighborValue = src[nidx];
-                    if (abs((int)neighborValue - (int)regionMean) <= tolerance) {
-                        visited[nidx] = true;
-                        dst[nidx] = 255;
-                        stack[top++] = (Point){nx, ny};
+            if (nx >= 0 && nx < width && ny >= 0 && ny < height && !visited[nidx]) {
+                uint8_t neighborValue = src[nidx];
+                if (abs((int)neighborValue - (int)regionMean) <= tolerance) {
+                    visited[nidx] = true;
+                    dst[nidx] = 255;
 
-                        sum += neighborValue;
-                        count++;
+                    regionCount++;
+                    regionMean += ((float)neighborValue - regionMean) / (float)regionCount;
 
-                        if (top >= STACK_SIZE) {
-                            memory_free(visited);
-                            memory_free(stack);
-                            return EMBEDDIP_ERROR_OUT_OF_MEMORY;  // Stack overflow
-                        }
+                    if (top >= STACK_SIZE) {
+                        memory_free(visited);
+                        memory_free(stack);
+                        return EMBEDDIP_ERROR_OUT_OF_MEMORY;  // Stack overflow
                     }
+                    stack[top++] = (Point){nx, ny};
                 }
             }
         }
@@ -694,9 +707,9 @@ colorRegionGrowing_single(const Image *inImg, Image *outImg, int seedX, int seed
     int top = 0;
 
     int seedIndex = seedY * width + seedX;
-    float h0 = src[seedIndex * 3] / 255.0f;
-    float s0 = src[seedIndex * 3 + 1] / 255.0f;
-    float i0 = src[seedIndex * 3 + 2] / 255.0f;
+    float regionMean[3];
+    read_vec3_norm(inImg, seedIndex, regionMean);
+    int regionCount = 1;
 
     stack[top++] = (Point){seedX, seedY};
     visited[seedIndex] = true;
@@ -718,16 +731,9 @@ colorRegionGrowing_single(const Image *inImg, Image *outImg, int seedX, int seed
             int nidx = ny * width + nx;
 
             if (nx >= 0 && nx < width && ny >= 0 && ny < height && !visited[nidx]) {
-                float h = src[nidx * 3] / 255.0f;
-                float s = src[nidx * 3 + 1] / 255.0f;
-                float ii = src[nidx * 3 + 2] / 255.0f;
-
-                // Hue distance with wraparound
-                float dh = fminf(fabsf(h - h0), 1.0f - fabsf(h - h0));
-                float ds = s - s0;
-                float di = ii - i0;
-
-                float dist = sqrtf(dh * dh + ds * ds + di * di);
+                float v[3];
+                read_vec3_norm(inImg, nidx, v);
+                float dist = color_distance(v, regionMean, inImg->format);
 
                 if (dist <= tolerance) {
                     visited[nidx] = true;
@@ -737,6 +743,12 @@ colorRegionGrowing_single(const Image *inImg, Image *outImg, int seedX, int seed
                     dst[nidx * 3 + 1] = src[nidx * 3 + 1];
                     dst[nidx * 3 + 2] = src[nidx * 3 + 2];
 
+                    // Update running region mean (adaptive region growing).
+                    regionCount++;
+                    regionMean[0] += (v[0] - regionMean[0]) / (float)regionCount;
+                    regionMean[1] += (v[1] - regionMean[1]) / (float)regionCount;
+                    regionMean[2] += (v[2] - regionMean[2]) / (float)regionCount;
+
                     stack[top++] = (Point){nx, ny};
 
                     if (top >= STACK_SIZE) {
@@ -822,66 +834,93 @@ embeddip_status_t colorRegionGrowing(const Image *inImg,
     const int dx[4] = {0, -1, 1, 0};
     const int dy[4] = {-1, 0, 0, 1};
 
+    memset(visited, 0, (size_t)inImg->size * sizeof(bool));
+
+    // Global multi-seed region: one visited map and one adaptive region model.
+    int top = 0;
+    float regionMean[3] = {0.0f, 0.0f, 0.0f};
+    int regionCount = 0;
+
     for (int s = 0; s < numSeeds; ++s) {
         int seedX = seeds[s].x;
         int seedY = seeds[s].y;
-
         if ((unsigned)seedX >= (unsigned)width || (unsigned)seedY >= (unsigned)height)
             continue;
 
-        const int seedIndex = seedY * width + seedX;
-
-        memset(visited, 0, (size_t)inImg->size * sizeof(bool));
-
-        float seedVec[3];
-        read_vec3_norm(inImg, seedIndex, seedVec);
+        int seedIndex = seedY * width + seedX;
+        if (visited[seedIndex])
+            continue;
 
-        int top = 0;
+        if (top >= STACK_SIZE) {
+            memory_free(visited);
+            memory_free(stack);
+            return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+        }
         stack[top++] = (Point){seedX, seedY};
         visited[seedIndex] = true;
+
+        float v[3];
+        read_vec3_norm(inImg, seedIndex, v);
+        regionCount++;
+        regionMean[0] += (v[0] - regionMean[0]) / (float)regionCount;
+        regionMean[1] += (v[1] - regionMean[1]) / (float)regionCount;
+        regionMean[2] += (v[2] - regionMean[2]) / (float)regionCount;
+
         if (outputColorful) {
-            for (int c = 0; c < 3; ++c) {
-                outData[seedIndex * 3 + c] = inData[seedIndex * inDepth + c];
-            }
+            outData[seedIndex * 3 + 0] = (uint8_t)CLAMP((int)lrintf(v[0] * 255.0f), 0, 255);
+            outData[seedIndex * 3 + 1] = (uint8_t)CLAMP((int)lrintf(v[1] * 255.0f), 0, 255);
+            outData[seedIndex * 3 + 2] = (uint8_t)CLAMP((int)lrintf(v[2] * 255.0f), 0, 255);
         } else {
             outData[seedIndex] = 255;
         }
+    }
 
-        while (top > 0) {
-            Point p = stack[--top];
+    if (regionCount == 0) {
+        memory_free(visited);
+        memory_free(stack);
+        return EMBEDDIP_ERROR_INVALID_ARG;
+    }
 
-            for (int d = 0; d < 4; ++d) {
-                int nx = p.x + dx[d];
-                int ny = p.y + dy[d];
+    while (top > 0) {
+        Point p = stack[--top];
 
-                if ((unsigned)nx >= (unsigned)width || (unsigned)ny >= (unsigned)height)
-                    continue;
+        for (int d = 0; d < 4; ++d) {
+            int nx = p.x + dx[d];
+            int ny = p.y + dy[d];
 
-                int nidx = ny * width + nx;
-                if (visited[nidx])
-                    continue;
+            if ((unsigned)nx >= (unsigned)width || (unsigned)ny >= (unsigned)height)
+                continue;
 
-                float v[3];
-                read_vec3_norm(inImg, nidx, v);
+            int nidx = ny * width + nx;
+            if (visited[nidx])
+                continue;
 
-                float dist = color_distance(v, seedVec, inImg->format);
-                if (dist <= tolerance) {
-                    visited[nidx] = true;
-                    if (outputColorful) {
-                        for (int c = 0; c < 3; ++c) {
-                            outData[nidx * 3 + c] = inData[nidx * inDepth + c];
-                        }
-                    } else {
-                        outData[nidx] = 255;
-                    }
+            float v[3];
+            read_vec3_norm(inImg, nidx, v);
+
+            float dist = color_distance(v, regionMean, inImg->format);
+            if (dist <= tolerance) {
+                visited[nidx] = true;
+
+                if (outputColorful) {
+                    outData[nidx * 3 + 0] = (uint8_t)CLAMP((int)lrintf(v[0] * 255.0f), 0, 255);
+                    outData[nidx * 3 + 1] = (uint8_t)CLAMP((int)lrintf(v[1] * 255.0f), 0, 255);
+                    outData[nidx * 3 + 2] = (uint8_t)CLAMP((int)lrintf(v[2] * 255.0f), 0, 255);
+                } else {
+                    outData[nidx] = 255;
+                }
 
-                    if (top >= STACK_SIZE) {
-                        memory_free(visited);
-                        memory_free(stack);
-                        return EMBEDDIP_ERROR_OUT_OF_MEMORY;  // Stack overflow
-                    }
-                    stack[top++] = (Point){nx, ny};
+                regionCount++;
+                regionMean[0] += (v[0] - regionMean[0]) / (float)regionCount;
+                regionMean[1] += (v[1] - regionMean[1]) / (float)regionCount;
+                regionMean[2] += (v[2] - regionMean[2]) / (float)regionCount;
+
+                if (top >= STACK_SIZE) {
+                    memory_free(visited);
+                    memory_free(stack);
+                    return EMBEDDIP_ERROR_OUT_OF_MEMORY;  // Stack overflow
                 }
+                stack[top++] = (Point){nx, ny};
             }
         }
     }

From 61527f3d148ddcb79585a554658d9dbd8742022e Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Thu, 23 Apr 2026 23:43:18 +0200
Subject: [PATCH 16/18] board/stm32f7: make SDRAM pool start configurable in
 memory_init

Allow memory_init() to accept either an SDRAM offset or an
absolute SDRAM address for compatibility, validate computed bounds, and
fall back to the default reserved pool start when input is
invalid or leaves no room for allocator metadata.

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 board/stm32f7/board_stm32f7_memory.c | 35 +++++++++++++++++++++++++---
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/board/stm32f7/board_stm32f7_memory.c b/board/stm32f7/board_stm32f7_memory.c
index 81967c4..cc61e0a 100755
--- a/board/stm32f7/board_stm32f7_memory.c
+++ b/board/stm32f7/board_stm32f7_memory.c
@@ -17,10 +17,12 @@
     // Reserve 512KB (0x80000) to be safe
     #define CAMERA_LCD_FRAMEBUFFER_SIZE 0x80000  // 512KB reserved
 
-    #define MEMORY_POOL_SIZE (1024 * 1024 * 8 - CAMERA_LCD_FRAMEBUFFER_SIZE)  // ~6MB
+    #define SDRAM_TOTAL_SIZE (1024 * 1024 * 8)
+    #define MEMORY_POOL_SIZE (SDRAM_TOTAL_SIZE - CAMERA_LCD_FRAMEBUFFER_SIZE)  // ~6MB
     #define DEFAULT_MEMORY_POOL_ADDR (SDRAM_BANK_ADDR + CAMERA_LCD_FRAMEBUFFER_SIZE)
 
 static uint8_t *memory_pool = (uint8_t *)DEFAULT_MEMORY_POOL_ADDR;
+static size_t memory_pool_size = MEMORY_POOL_SIZE;
 
 typedef struct MemoryBlock {
     uint32_t magic;
@@ -43,7 +45,7 @@ static inline uintptr_t pool_start_addr(void)
 
 static inline uintptr_t pool_end_addr(void)
 {
-    return (uintptr_t)memory_pool + MEMORY_POOL_SIZE;
+    return (uintptr_t)memory_pool + memory_pool_size;
 }
 
 static inline int ptr_in_pool(const void *p)
@@ -66,9 +68,36 @@ void memory_init(uintptr_t pool_start_addr)
     if (initialized)
         return;
 
+    // Accept both:
+    // 1) offset from SDRAM base (preferred),
+    // 2) absolute SDRAM address for backward compatibility.
+    uintptr_t offset = pool_start_addr;
+    if (pool_start_addr >= SDRAM_BANK_ADDR) {
+        offset = pool_start_addr - SDRAM_BANK_ADDR;
+    }
+    if (offset > SDRAM_TOTAL_SIZE - BLOCK_SIZE) {
+        // Invalid offset: fall back to default reserved location.
+        offset = CAMERA_LCD_FRAMEBUFFER_SIZE;
+    }
+
+    uintptr_t start = (uintptr_t)SDRAM_BANK_ADDR + offset;
+    uintptr_t end = (uintptr_t)SDRAM_BANK_ADDR + SDRAM_TOTAL_SIZE;
+
+    if (start + BLOCK_SIZE >= end) {
+        // Not enough room for allocator metadata; fall back to default.
+        start = DEFAULT_MEMORY_POOL_ADDR;
+    }
+
+    memory_pool = (uint8_t *)start;
+    memory_pool_size = (size_t)(end - start);
+    if (memory_pool_size <= BLOCK_SIZE) {
+        memory_pool = (uint8_t *)DEFAULT_MEMORY_POOL_ADDR;
+        memory_pool_size = MEMORY_POOL_SIZE;
+    }
+
     free_list = (MemoryBlock *)memory_pool;
     free_list->magic = MEMBLOCK_MAGIC;
-    free_list->size = MEMORY_POOL_SIZE - BLOCK_SIZE;
+    free_list->size = memory_pool_size - BLOCK_SIZE;
     free_list->next = NULL;
     free_list->is_free = 1;
 

From fb472058494cb203bda620fc7784c7dfbe0ffc9e Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Thu, 23 Apr 2026 23:46:12 +0200
Subject: [PATCH 17/18] imgproc: morph: document first-iteration ping
 initialization

Add clarifying comments in erode() and dilate() that the ping buffer is
initialized from source pixels before the loop so the first iteration
runs on original image data.

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 imgproc/morph.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/imgproc/morph.c b/imgproc/morph.c
index 755cee1..42ffe4d 100644
--- a/imgproc/morph.c
+++ b/imgproc/morph.c
@@ -88,6 +88,7 @@ embeddip_status_t erode(const Image *src, Image *dst, const Kernel *kernel, uint
         return EMBEDDIP_ERROR_OUT_OF_MEMORY;
     }
 
+    // First iteration must start from source image content.
     memcpy(ping, src->pixels, src->size);
 
     for (uint8_t it = 0; it < iterations; ++it) {
@@ -164,6 +165,7 @@ embeddip_status_t dilate(const Image *src, Image *dst, const Kernel *kernel, uin
         return EMBEDDIP_ERROR_OUT_OF_MEMORY;
     }
 
+    // First iteration must start from source image content.
     memcpy(ping, src->pixels, src->size);
 
     for (uint8_t it = 0; it < iterations; ++it) {

From 08e1e5a1a6964c2be56c4b80cbf1050c460c892f Mon Sep 17 00:00:00 2001
From: Ozan Durgut <ozandurgut.2001@hotmail.com>
Date: Thu, 23 Apr 2026 23:54:29 +0200
Subject: [PATCH 18/18] style: run clang-format on public headers

Signed-off-by: Ozan Durgut <ozandurgut.2001@hotmail.com>
---
 embedDIP.h               |  2 +-
 embedDIP_configs.h       | 28 ++++++++++++++++++----------
 wrapper/ImageWrapper.hpp |  2 +-
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/embedDIP.h b/embedDIP.h
index 10b4645..28f80fa 100755
--- a/embedDIP.h
+++ b/embedDIP.h
@@ -71,8 +71,8 @@ extern "C" {
 #include "core/image.h"                  /**< Image type and utilities. */
 #include "core/memory_manager.h"         /**< Allocators and memory helpers. */
 #include "device/serial/serial.h"        /**< Serial I/O abstraction. */
-#include "imgproc/compress.h"            /**< JPEG compression helper. */
 #include "imgproc/color.h"               /**< Color conversions and helpers. */
+#include "imgproc/compress.h"            /**< JPEG compression helper. */
 #include "imgproc/connectedcomponents.h" /**< Connected components labeling. */
 #include "imgproc/drawing.h"             /**< Drawing primitives and shapes. */
 #include "imgproc/fft.h"                 /**< Frequency-domain processing. */
diff --git a/embedDIP_configs.h b/embedDIP_configs.h
index 0c8f856..b381edc 100755
--- a/embedDIP_configs.h
+++ b/embedDIP_configs.h
@@ -18,7 +18,8 @@
 /* Hard-switch guard (legacy macros removed)                                   */
 /* -------------------------------------------------------------------------- */
 #if defined(TARGET_BOARD_STM32F7) || defined(TARGET_BOARD_ESP32) || defined(TARGET_BOARD_OTHER)
-    #error "Legacy TARGET_BOARD_* macros are not supported. Use EMBED_DIP_BOARD_* and EMBED_DIP_ARCH_* instead."
+    #error                                                                                         \
+        "Legacy TARGET_BOARD_* macros are not supported. Use EMBED_DIP_BOARD_* and EMBED_DIP_ARCH_* instead."
 #endif
 
 /* -------------------------------------------------------------------------- */
@@ -37,23 +38,27 @@
 
 /* Sanity check: exactly one board. */
 #if ((defined(EMBED_DIP_BOARD_STM32F7) ? 1 : 0) + (defined(EMBED_DIP_BOARD_ESP32) ? 1 : 0)) == 0
-    #error "No board selected: define exactly one of EMBED_DIP_BOARD_STM32F7 or EMBED_DIP_BOARD_ESP32."
+    #error                                                                                         \
+        "No board selected: define exactly one of EMBED_DIP_BOARD_STM32F7 or EMBED_DIP_BOARD_ESP32."
 #elif ((defined(EMBED_DIP_BOARD_STM32F7) ? 1 : 0) + (defined(EMBED_DIP_BOARD_ESP32) ? 1 : 0)) > 1
-    #error "Multiple boards selected: define only one of EMBED_DIP_BOARD_STM32F7 or EMBED_DIP_BOARD_ESP32."
+    #error                                                                                         \
+        "Multiple boards selected: define only one of EMBED_DIP_BOARD_STM32F7 or EMBED_DIP_BOARD_ESP32."
 #endif
 
 /* Sanity check: exactly one architecture family. */
 #if ((defined(EMBED_DIP_ARCH_ARM) ? 1 : 0) + (defined(EMBED_DIP_ARCH_XTENSA) ? 1 : 0)) == 0
-    #error "No architecture family selected: define exactly one of EMBED_DIP_ARCH_ARM or EMBED_DIP_ARCH_XTENSA."
+    #error                                                                                         \
+        "No architecture family selected: define exactly one of EMBED_DIP_ARCH_ARM or EMBED_DIP_ARCH_XTENSA."
 #elif ((defined(EMBED_DIP_ARCH_ARM) ? 1 : 0) + (defined(EMBED_DIP_ARCH_XTENSA) ? 1 : 0)) > 1
     #error "Multiple architecture families selected: define only one EMBED_DIP_ARCH_* macro."
 #endif
 
 /* Sanity check: exactly one CPU variant. */
-#if ((defined(EMBED_DIP_CPU_CORTEX_M7) ? 1 : 0) + (defined(EMBED_DIP_CPU_LX6) ? 1 : 0) + \
+#if ((defined(EMBED_DIP_CPU_CORTEX_M7) ? 1 : 0) + (defined(EMBED_DIP_CPU_LX6) ? 1 : 0) +           \
      (defined(EMBED_DIP_CPU_LX7) ? 1 : 0)) == 0
-    #error "No CPU selected: define exactly one of EMBED_DIP_CPU_CORTEX_M7, EMBED_DIP_CPU_LX6, EMBED_DIP_CPU_LX7."
-#elif ((defined(EMBED_DIP_CPU_CORTEX_M7) ? 1 : 0) + (defined(EMBED_DIP_CPU_LX6) ? 1 : 0) + \
+    #error                                                                                         \
+        "No CPU selected: define exactly one of EMBED_DIP_CPU_CORTEX_M7, EMBED_DIP_CPU_LX6, EMBED_DIP_CPU_LX7."
+#elif ((defined(EMBED_DIP_CPU_CORTEX_M7) ? 1 : 0) + (defined(EMBED_DIP_CPU_LX6) ? 1 : 0) +         \
        (defined(EMBED_DIP_CPU_LX7) ? 1 : 0)) > 1
     #error "Multiple CPUs selected: define only one EMBED_DIP_CPU_* macro."
 #endif
@@ -61,11 +66,14 @@
 /* Board/architecture/CPU compatibility matrix. */
 #if defined(EMBED_DIP_BOARD_STM32F7)
     #if !(defined(EMBED_DIP_ARCH_ARM) && defined(EMBED_DIP_CPU_CORTEX_M7))
-        #error "Invalid combination: EMBED_DIP_BOARD_STM32F7 requires EMBED_DIP_ARCH_ARM + EMBED_DIP_CPU_CORTEX_M7."
+        #error                                                                                     \
+            "Invalid combination: EMBED_DIP_BOARD_STM32F7 requires EMBED_DIP_ARCH_ARM + EMBED_DIP_CPU_CORTEX_M7."
     #endif
 #elif defined(EMBED_DIP_BOARD_ESP32)
-    #if !(defined(EMBED_DIP_ARCH_XTENSA) && (defined(EMBED_DIP_CPU_LX6) || defined(EMBED_DIP_CPU_LX7)))
-        #error "Invalid combination: EMBED_DIP_BOARD_ESP32 requires EMBED_DIP_ARCH_XTENSA + (EMBED_DIP_CPU_LX6 or EMBED_DIP_CPU_LX7)."
+    #if !(defined(EMBED_DIP_ARCH_XTENSA) &&                                                        \
+          (defined(EMBED_DIP_CPU_LX6) || defined(EMBED_DIP_CPU_LX7)))
+        #error                                                                                     \
+            "Invalid combination: EMBED_DIP_BOARD_ESP32 requires EMBED_DIP_ARCH_XTENSA + (EMBED_DIP_CPU_LX6 or EMBED_DIP_CPU_LX7)."
     #endif
 #endif
 
diff --git a/wrapper/ImageWrapper.hpp b/wrapper/ImageWrapper.hpp
index c9a0047..1d6583a 100755
--- a/wrapper/ImageWrapper.hpp
+++ b/wrapper/ImageWrapper.hpp
@@ -14,8 +14,8 @@ extern "C" {
 #include "core/memory_manager.h"         /**< Allocators and memory helpers. */
 #include "device/serial/serial.h"        /**< Serial I/O abstraction. */
 #include "imgproc/color.h"               /**< Color conversions and helpers. */
-#include "imgproc/connectedcomponents.h" /**< Connected components labeling. */
 #include "imgproc/compress.h"            /**< JPEG compression helper. */
+#include "imgproc/connectedcomponents.h" /**< Connected components labeling. */
 #include "imgproc/drawing.h"             /**< Drawing primitives and shapes. */
 #include "imgproc/fft.h"                 /**< Frequency-domain processing. */
 #include "imgproc/filter.h"              /**< Spatial filtering and kernels. */