diff --git a/.gitignore b/.gitignore
index 518de08..5b2e431 100755
--- a/.gitignore
+++ b/.gitignore
@@ -36,9 +36,7 @@ CMakeFiles/
 cmake_install.cmake
 CTestTestfile.cmake
 Makefile
-*.cmake
 !CMakeLists.txt
-!cmake/*.cmake
 compile_commands.json
 .ninja_deps
 .ninja_log
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a133d97..b87ac32 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,8 +15,14 @@ project(embedDIP
     DESCRIPTION "Portable embedded digital image processing library"
 )
 
-set(EMBEDDIP_TARGET_PLATFORM "STM32F7" CACHE STRING "Target platform: STM32F7, ESP32, or HOST")
-set_property(CACHE EMBEDDIP_TARGET_PLATFORM PROPERTY STRINGS "STM32F7" "ESP32" "HOST")
+set(EMBEDDIP_TARGET_BOARD "" CACHE STRING "Target board (required): STM32F7 or ESP32")
+set_property(CACHE EMBEDDIP_TARGET_BOARD PROPERTY STRINGS "STM32F7" "ESP32")
+
+set(EMBEDDIP_ARCH "" CACHE STRING "Architecture family (required): ARM or XTENSA")
+set_property(CACHE EMBEDDIP_ARCH PROPERTY STRINGS "ARM" "XTENSA")
+
+set(EMBEDDIP_CPU "" CACHE STRING "CPU variant (required): CORTEX_M7, LX6, LX7")
+set_property(CACHE EMBEDDIP_CPU PROPERTY STRINGS "CORTEX_M7" "LX6" "LX7")
 
 option(EMBEDDIP_ENABLE_UART_LOGGING "Enable UART logging" ON)
 option(EMBEDDIP_ENABLE_IMAGE_PROCESSING "Enable image processing modules" ON)
@@ -24,6 +30,37 @@ option(EMBEDDIP_ENABLE_CAMERA_INPUT "Enable camera input interfaces" ON)
 option(EMBEDDIP_ENABLE_DISPLAY_OUTPUT "Enable display output interfaces" ON)
 option(EMBEDDIP_BUILD_DOCS "Build documentation with Doxygen" OFF)
 
+if(EMBEDDIP_TARGET_BOARD STREQUAL "")
+    message(FATAL_ERROR "EMBEDDIP_TARGET_BOARD is required. Supported values: STM32F7, ESP32")
+endif()
+
+if(EMBEDDIP_ARCH STREQUAL "")
+    message(FATAL_ERROR "EMBEDDIP_ARCH is required. Supported values: ARM, XTENSA")
+endif()
+
+if(EMBEDDIP_CPU STREQUAL "")
+    message(FATAL_ERROR "EMBEDDIP_CPU is required. Supported values: CORTEX_M7, LX6, LX7")
+endif()
+
+# Explicit compatibility matrix between board, architecture family and CPU
+set(_embeddip_pair_valid FALSE)
+if(EMBEDDIP_TARGET_BOARD STREQUAL "STM32F7")
+    if(EMBEDDIP_ARCH STREQUAL "ARM" AND EMBEDDIP_CPU STREQUAL "CORTEX_M7")
+        set(_embeddip_pair_valid TRUE)
+    endif()
+elseif(EMBEDDIP_TARGET_BOARD STREQUAL "ESP32")
+    if(EMBEDDIP_ARCH STREQUAL "XTENSA" AND (EMBEDDIP_CPU STREQUAL "LX6" OR EMBEDDIP_CPU STREQUAL "LX7"))
+        set(_embeddip_pair_valid TRUE)
+    endif()
+endif()
+
+if(NOT _embeddip_pair_valid)
+    message(FATAL_ERROR
+        "Invalid board/arch/cpu combination: ${EMBEDDIP_TARGET_BOARD} + ${EMBEDDIP_ARCH} + ${EMBEDDIP_CPU}. "
+        "Supported: STM32F7+ARM+CORTEX_M7, ESP32+XTENSA+LX6, ESP32+XTENSA+LX7"
+    )
+endif()
+
 set(CMAKE_C_STANDARD 11)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_C_STANDARD_REQUIRED ON)
@@ -40,6 +77,10 @@ set(IMGPROC_SOURCES
     # Main header (includes all sub-modules)
     imgproc/pixel.h
 
+    # Compression
+    imgproc/compress.c
+    imgproc/compress.h
+
     # Color operations
     imgproc/color.c
     imgproc/color.h
@@ -85,6 +126,7 @@ set(IMGPROC_SOURCES
     imgproc/misc.h
 
     # FFT operations
+    imgproc/fft.c
     imgproc/fft.h
 )
 
@@ -111,71 +153,53 @@ set(BOARD_COMMON_SOURCES
     board/common.h
 )
 
-# Platform-specific sources
-if(EMBEDDIP_TARGET_PLATFORM STREQUAL "STM32F7")
-    set(BOARD_SOURCES
-        ${BOARD_COMMON_SOURCES}
-        board/stm32f7/board_stm32f7_common.c
-        board/stm32f7/board_stm32f7_fft.c
-        board/stm32f7/board_stm32f7_memory.c
-        board/stm32f7/configs.h
-    )
-
-    set(DEVICE_SOURCES
-        ${DEVICE_COMMON_SOURCES}
-        device/camera/ov5640/stm32_ov5640.c
-        device/display/rk043fn48h/stm32_rk043fn48h.c
-        device/serial/stm32_uart/stm32_uart.c
-    )
-
-    set(PLATFORM_DEFINES
-        STM32F7xx
-        ARM_MATH_CM7
-        TARGET_BOARD_STM32F7=1
-    )
-
-elseif(EMBEDDIP_TARGET_PLATFORM STREQUAL "ESP32")
-    set(BOARD_SOURCES
-        ${BOARD_COMMON_SOURCES}
-        board/esp32/board_esp32_common.cpp
-        board/esp32/board_esp32_fft.cpp
-        board/esp32/board_esp32_memory.cpp
-    )
-
-    set(DEVICE_SOURCES
-        ${DEVICE_COMMON_SOURCES}
-        device/camera/ov2640/esp32_ov2640.cpp
-        device/serial/esp32_uart/esp32_uart.cpp
-    )
+# Load board and architecture profiles (kept next to board/arch source trees
+# to make onboarding new ports straightforward).
+string(TOLOWER "${EMBEDDIP_TARGET_BOARD}" EMBEDDIP_BOARD_PROFILE)
+string(TOLOWER "${EMBEDDIP_ARCH}" EMBEDDIP_ARCH_PROFILE)
 
-    set(PLATFORM_DEFINES
-        ARDUINO_ARCH_ESP32
-        TARGET_BOARD_ESP32=1
-    )
+set(EMBEDDIP_BOARD_PROFILE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/board/${EMBEDDIP_BOARD_PROFILE}/board_profile.cmake")
+set(EMBEDDIP_ARCH_PROFILE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/arch/${EMBEDDIP_ARCH_PROFILE}/arch_profile.cmake")
 
-else()
-    message(FATAL_ERROR "Unknown platform: ${EMBEDDIP_TARGET_PLATFORM}. Must be STM32F7, ESP32, or HOST")
+if(NOT EXISTS "${EMBEDDIP_BOARD_PROFILE_FILE}")
+    message(FATAL_ERROR "Board profile not found: ${EMBEDDIP_BOARD_PROFILE_FILE}")
+endif()
+if(NOT EXISTS "${EMBEDDIP_ARCH_PROFILE_FILE}")
+    message(FATAL_ERROR "Architecture profile not found: ${EMBEDDIP_ARCH_PROFILE_FILE}")
 endif()
 
+include("${EMBEDDIP_BOARD_PROFILE_FILE}")
+include("${EMBEDDIP_ARCH_PROFILE_FILE}")
+
 # === Create Library Target ===
 add_library(embedDIP STATIC
     ${CORE_SOURCES}
     ${IMGPROC_SOURCES}
-    ${BOARD_SOURCES}
-    ${DEVICE_SOURCES}
+    ${EMBEDDIP_BOARD_SOURCES}
+    ${EMBEDDIP_ARCH_SOURCES}
+    ${EMBEDDIP_DEVICE_SOURCES}
     ${WRAPPER_SOURCES}
 )
 
 # === Compiler Definitions ===
 target_compile_definitions(embedDIP PUBLIC
     USE_EMBED_DIP
-    ${PLATFORM_DEFINES}
+    ${EMBEDDIP_BOARD_DEFINES}
+    ${EMBEDDIP_ARCH_DEFINES}
     $<$<BOOL:${EMBEDDIP_ENABLE_UART_LOGGING}>:ENABLE_UART_LOGGING=1>
     $<$<BOOL:${EMBEDDIP_ENABLE_IMAGE_PROCESSING}>:ENABLE_IMAGE_PROCESSING=1>
     $<$<BOOL:${EMBEDDIP_ENABLE_CAMERA_INPUT}>:ENABLE_CAMERA_INPUT=1>
     $<$<BOOL:${EMBEDDIP_ENABLE_DISPLAY_OUTPUT}>:ENABLE_DISPLAY_OUTPUT=1>
 )
 
+if(EMBEDDIP_ARCH_PRIVATE_DEFINES)
+    target_compile_definitions(embedDIP PRIVATE ${EMBEDDIP_ARCH_PRIVATE_DEFINES})
+endif()
+
+if(EMBEDDIP_ARCH_COMPILE_OPTIONS)
+    target_compile_options(embedDIP PUBLIC ${EMBEDDIP_ARCH_COMPILE_OPTIONS})
+endif()
+
 # === Include Directories ===
 target_include_directories(embedDIP PUBLIC
     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
@@ -189,11 +213,20 @@ target_include_directories(embedDIP PUBLIC
     $<INSTALL_INTERFACE:include/embedDIP>
 )
 
-# Platform-specific includes
-if(EMBEDDIP_TARGET_PLATFORM STREQUAL "STM32F7")
-    # Try to find CMSIS and HAL includes relative to project
+if(EMBEDDIP_BOARD_INCLUDE_DIRS)
+    foreach(_embeddip_board_inc IN LISTS EMBEDDIP_BOARD_INCLUDE_DIRS)
+        target_include_directories(embedDIP PUBLIC
+            $<BUILD_INTERFACE:${_embeddip_board_inc}>
+        )
+    endforeach()
+endif()
+
+# Board-specific include dependencies from parent project layout
+if(EMBEDDIP_TARGET_BOARD STREQUAL "STM32F7")
     if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../Drivers")
         target_include_directories(embedDIP PUBLIC
+            $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../Middlewares/Third_Party/LibJPEG/include>
+            $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../LIBJPEG/Target>
             $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../Drivers/STM32F7xx_HAL_Driver/Inc>
             $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../Drivers/CMSIS/Device/ST/STM32F7xx/Include>
             $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../Drivers/CMSIS/Core/Include>
@@ -201,6 +234,12 @@ if(EMBEDDIP_TARGET_PLATFORM STREQUAL "STM32F7")
             $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../Core/Inc>
         )
 
+        if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../Middlewares/Third_Party/LibJPEG/include/jpeglib.h")
+            target_compile_definitions(embedDIP PUBLIC EMBEDDIP_HAVE_LIBJPEG=1)
+        else()
+            message(WARNING "LibJPEG headers not found for embedDIP compression module.")
+        endif()
+
         # CMSIS-DSP sources for STM32 (C and assembly files)
         file(GLOB_RECURSE CMSIS_DSP_SOURCES
             ${CMAKE_CURRENT_SOURCE_DIR}/../Drivers/CMSIS/DSP/Source/*.c
@@ -208,7 +247,6 @@ if(EMBEDDIP_TARGET_PLATFORM STREQUAL "STM32F7")
         )
         if(CMSIS_DSP_SOURCES)
             target_sources(embedDIP PRIVATE ${CMSIS_DSP_SOURCES})
-            # CMSIS-DSP needs __FPU_PRESENT defined
             set_source_files_properties(${CMSIS_DSP_SOURCES}
                 PROPERTIES COMPILE_DEFINITIONS "__FPU_PRESENT=1"
             )
@@ -217,15 +255,6 @@ if(EMBEDDIP_TARGET_PLATFORM STREQUAL "STM32F7")
     else()
         message(WARNING "CMSIS/HAL drivers not found. You may need to specify include paths manually.")
     endif()
-
-    target_include_directories(embedDIP PUBLIC
-        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/board/stm32f7>
-    )
-
-elseif(EMBEDDIP_TARGET_PLATFORM STREQUAL "ESP32")
-    target_include_directories(embedDIP PUBLIC
-        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/board/esp32>
-    )
 endif()
 
 # === Link Libraries ===
@@ -270,7 +299,7 @@ install(EXPORT embedDIPTargets
 include(CMakePackageConfigHelpers)
 
 configure_package_config_file(
-    ${CMAKE_CURRENT_SOURCE_DIR}/cmake/embedDIPConfig.cmake.in
+    ${CMAKE_CURRENT_SOURCE_DIR}/embedDIPConfig.cmake.in
     ${CMAKE_CURRENT_BINARY_DIR}/embedDIPConfig.cmake
     INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/embedDIP
 )
diff --git a/arch/arm/arch_profile.cmake b/arch/arm/arch_profile.cmake
new file mode 100644
index 0000000..37285ca
--- /dev/null
+++ b/arch/arm/arch_profile.cmake
@@ -0,0 +1,31 @@
+# Architecture profile: ARM family
+
+set(EMBEDDIP_ARCH_SOURCES
+    arch/arm/cm7_common.c
+    arch/arm/cm7_fft.c
+)
+
+set(EMBEDDIP_ARCH_DEFINES
+    EMBED_DIP_ARCH_ARM=1
+)
+
+if(EMBEDDIP_CPU STREQUAL "CORTEX_M7")
+    list(APPEND EMBEDDIP_ARCH_DEFINES EMBED_DIP_CPU_CORTEX_M7=1)
+else()
+    message(FATAL_ERROR "Unsupported CPU for ARM arch: ${EMBEDDIP_CPU}. Supported: CORTEX_M7")
+endif()
+
+list(APPEND EMBEDDIP_ARCH_DEFINES
+    ARM_MATH_CM7
+)
+
+set(EMBEDDIP_ARCH_PRIVATE_DEFINES
+    __FPU_PRESENT=1
+)
+
+set(EMBEDDIP_ARCH_COMPILE_OPTIONS
+    -mcpu=cortex-m7
+    -mfpu=fpv5-sp-d16
+    -mfloat-abi=hard
+    -mthumb
+)
diff --git a/board/stm32f7/board_stm32f7_common.c b/arch/arm/cm7_common.c
old mode 100755
new mode 100644
similarity index 89%
rename from board/stm32f7/board_stm32f7_common.c
rename to arch/arm/cm7_common.c
index de25c60..dd6238c
--- a/board/stm32f7/board_stm32f7_common.c
+++ b/arch/arm/cm7_common.c
@@ -3,7 +3,7 @@
 
 #include <embedDIP_configs.h>
 
-#ifdef TARGET_BOARD_STM32F7
+#if defined(EMBED_DIP_ARCH_ARM) && defined(EMBED_DIP_CPU_CORTEX_M7)
 
     #include "core/image.h"
 
@@ -31,4 +31,4 @@ uint32_t toc()
     return DWT->CYCCNT;       // Return elapsed cycles
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/arch/arm/cm7_fft.c b/arch/arm/cm7_fft.c
new file mode 100644
index 0000000..d8e58dd
--- /dev/null
+++ b/arch/arm/cm7_fft.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2025 EmbedDIP
+
+#include <embedDIP_configs.h>
+
+#if defined(EMBED_DIP_ARCH_ARM) && defined(EMBED_DIP_CPU_CORTEX_M7)
+
+    #include "arm_const_structs.h"
+    #include "arm_math.h"
+    #include <arch/fft_backend.h>
+
+embeddip_status_t embeddip_fft_backend_init(int n)
+{
+    if (n != 256) {
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+    }
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t embeddip_fft_backend_forward_1d(float *data, int n)
+{
+    if (!data) {
+        return EMBEDDIP_ERROR_NULL_PTR;
+    }
+    if (n != 256) {
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+    }
+
+    arm_cfft_f32(&arm_cfft_sR_f32_len256, data, 0, 1);
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t embeddip_fft_backend_inverse_1d(float *data, int n)
+{
+    if (!data) {
+        return EMBEDDIP_ERROR_NULL_PTR;
+    }
+    if (n != 256) {
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+    }
+
+    arm_cfft_f32(&arm_cfft_sR_f32_len256, data, 1, 1);
+    return EMBEDDIP_OK;
+}
+
+#endif
diff --git a/arch/fft_backend.h b/arch/fft_backend.h
new file mode 100644
index 0000000..a90c6e8
--- /dev/null
+++ b/arch/fft_backend.h
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2025 EmbedDIP
+
+#ifndef EMBEDDIP_ARCH_FFT_BACKEND_H
+#define EMBEDDIP_ARCH_FFT_BACKEND_H
+
+#include <core/error.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+embeddip_status_t embeddip_fft_backend_init(int n);
+embeddip_status_t embeddip_fft_backend_forward_1d(float *data, int n);
+embeddip_status_t embeddip_fft_backend_inverse_1d(float *data, int n);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/arch/xtensa/arch_profile.cmake b/arch/xtensa/arch_profile.cmake
new file mode 100644
index 0000000..24d96ce
--- /dev/null
+++ b/arch/xtensa/arch_profile.cmake
@@ -0,0 +1,21 @@
+# Architecture profile: Xtensa family
+
+set(EMBEDDIP_ARCH_SOURCES
+    arch/xtensa/xtensa_common.cpp
+    arch/xtensa/xtensa_fft.cpp
+)
+
+set(EMBEDDIP_ARCH_DEFINES
+    EMBED_DIP_ARCH_XTENSA=1
+)
+
+if(EMBEDDIP_CPU STREQUAL "LX6")
+    list(APPEND EMBEDDIP_ARCH_DEFINES EMBED_DIP_CPU_LX6=1)
+elseif(EMBEDDIP_CPU STREQUAL "LX7")
+    list(APPEND EMBEDDIP_ARCH_DEFINES EMBED_DIP_CPU_LX7=1)
+else()
+    message(FATAL_ERROR "Unsupported CPU for XTENSA arch: ${EMBEDDIP_CPU}. Supported: LX6, LX7")
+endif()
+
+set(EMBEDDIP_ARCH_COMPILE_OPTIONS
+)
diff --git a/board/esp32/board_esp32_common.cpp b/arch/xtensa/xtensa_common.cpp
old mode 100755
new mode 100644
similarity index 99%
rename from board/esp32/board_esp32_common.cpp
rename to arch/xtensa/xtensa_common.cpp
index e6a739e..bd1dc89
--- a/board/esp32/board_esp32_common.cpp
+++ b/arch/xtensa/xtensa_common.cpp
@@ -3,7 +3,7 @@
 
 #include <embedDIP_configs.h>
 
-#ifdef TARGET_BOARD_ESP32
+#ifdef EMBED_DIP_ARCH_XTENSA
 
     #ifndef ESP32_COMMON_H
         #define ESP32_COMMON_H
diff --git a/arch/xtensa/xtensa_fft.cpp b/arch/xtensa/xtensa_fft.cpp
new file mode 100644
index 0000000..63039af
--- /dev/null
+++ b/arch/xtensa/xtensa_fft.cpp
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2025 EmbedDIP
+
+#include <embedDIP_configs.h>
+
+#ifdef EMBED_DIP_ARCH_XTENSA
+
+    #include "esp_dsp.h"
+    #include <arch/fft_backend.h>
+
+embeddip_status_t embeddip_fft_backend_init(int n)
+{
+    if (n <= 0) {
+        return EMBEDDIP_ERROR_INVALID_ARG;
+    }
+
+    esp_err_t err = dsps_fft2r_init_fc32(NULL, CONFIG_DSP_MAX_FFT_SIZE);
+    if (err == ESP_OK || err == ESP_ERR_DSP_REINITIALIZED) {
+        return EMBEDDIP_OK;
+    }
+    return EMBEDDIP_ERROR_DEVICE_ERROR;
+}
+
+embeddip_status_t embeddip_fft_backend_forward_1d(float *data, int n)
+{
+    if (!data) {
+        return EMBEDDIP_ERROR_NULL_PTR;
+    }
+
+    dsps_fft2r_fc32(data, n);
+    dsps_bit_rev_fc32(data, n);
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t embeddip_fft_backend_inverse_1d(float *data, int n)
+{
+    if (!data) {
+        return EMBEDDIP_ERROR_NULL_PTR;
+    }
+
+    for (int i = 0; i < n; ++i) {
+        data[2 * i + 1] = -data[2 * i + 1];
+    }
+
+    dsps_fft2r_fc32(data, n);
+    dsps_bit_rev_fc32(data, n);
+
+    for (int i = 0; i < n; ++i) {
+        data[2 * i + 1] = -data[2 * i + 1];
+    }
+
+    float inv_n = 1.0f / (float)n;
+    for (int i = 0; i < n; ++i) {
+        data[2 * i] *= inv_n;
+        data[2 * i + 1] *= inv_n;
+    }
+
+    return EMBEDDIP_OK;
+}
+
+#endif
diff --git a/board/esp32/board_esp32_fft.cpp b/board/esp32/board_esp32_fft.cpp
deleted file mode 100755
index d60d7a7..0000000
--- a/board/esp32/board_esp32_fft.cpp
+++ /dev/null
@@ -1,615 +0,0 @@
-// SPDX-License-Identifier: MIT
-// Copyright (c) 2025 EmbedDIP
-
-#include <embedDIP_configs.h>
-
-#ifdef TARGET_BOARD_ESP32
-
-    #include "board/common.h"
-
-    #include "Arduino.h"
-    #include "esp_dsp.h"
-    #include <esp32/rom/rtc.h>
-    #include <imgproc/fft.h>
-
-static bool isValidFFTSize(int w, int h)
-{
-    return (w == h) && ((w & (w - 1)) == 0);  // square and power-of-2
-}
-
-    #include <Arduino.h>  // Required for Serial on Arduino platforms
-
-embeddip_status_t fft(const Image *inImg, Image *outImg)
-{
-    if (!inImg || !outImg || !inImg->pixels) {
-        return EMBEDDIP_ERROR_NULL_PTR;
-    }
-
-    int N = inImg->width;
-    if (!isValidFFTSize(N, N)) {
-        // Serial.println("[ERROR] Invalid FFT size. Only powers of 2 are supported.");
-        return EMBEDDIP_ERROR_INVALID_SIZE;
-    }
-
-    if (isChalsEmpty(outImg)) {
-        createChalsComplex(outImg, 2);  // 2 complex channels for interleaved (Re, Im)
-        outImg->is_chals = 1;
-    }
-
-    // Serial.println("[ERROR] 1pixels are null.");
-    float *buf0 = outImg->chals->ch[0];
-    float *buf1 = outImg->chals->ch[1];
-    uint8_t *input = static_cast<uint8_t *>(inImg->pixels);
-    for (int i = 0; i < N * N; i++) {
-        buf0[2 * i] = (float)input[i];  // real part
-        buf0[2 * i + 1] = 0.0f;         // imaginary part
-    }
-    // Initialize the FFT library
-    dsps_fft2r_init_fc32(NULL, CONFIG_DSP_MAX_FFT_SIZE);
-
-    // Serial.println("[ERROR] 4or pixels are null.");
-    //  FFT on rows
-    for (int i = 0; i < N; i++) {
-        int offset = i * N * 2;
-        dsps_fft2r_fc32(buf0 + offset, N);
-        dsps_bit_rev_fc32(buf0 + offset, N);
-    }
-    // Serial.println("[ERROR] 5or pixels are null.");
-    //  Transpose to buf1
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int src = 2 * (y * N + x);
-            int dst = 2 * (x * N + y);
-            buf1[dst] = buf0[src];
-            buf1[dst + 1] = buf0[src + 1];
-        }
-    }
-
-    // FFT on columns
-    for (int i = 0; i < N; i++) {
-        int offset = i * N * 2;
-        dsps_fft2r_fc32(buf1 + offset, N);
-        dsps_bit_rev_fc32(buf1 + offset, N);
-    }
-
-    // Transpose back: buf1 → buf0 to undo the earlier transpose
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int src = 2 * (y * N + x);
-            int dst = 2 * (x * N + y);
-            buf0[dst] = buf1[src];
-            buf0[dst + 1] = buf1[src + 1];
-        }
-    }
-
-    // Copy back to buf1 for output
-    for (int i = 0; i < N * N * 2; i++) {
-        buf1[i] = buf0[i];
-    }
-
-    outImg->log = IMAGE_DATA_COMPLEX;
-    // Serial.println("[INFO] 2D FFT completed successfully.");
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t ifft(const Image *inImg, Image *outImg)
-{
-    int N = inImg->width;
-
-    // Accept both IMAGE_DATA_COMPLEX and IMAGE_DATA_CH0 (match STM32 behavior)
-    if (inImg->log != IMAGE_DATA_COMPLEX && inImg->log != IMAGE_DATA_CH0)
-        return EMBEDDIP_ERROR_INVALID_ARG;
-
-    float *buf0 = (float *)ps_malloc(N * N * 2 * sizeof(float));
-    // Use ch[0] if log is IMAGE_DATA_CH0, otherwise use ch[1]
-    float *buf1 = (inImg->log == IMAGE_DATA_CH0) ? inImg->chals->ch[0] : inImg->chals->ch[1];
-
-    // Conjugate input for IFFT (negate imaginary parts)
-    for (int i = 0; i < N * N; i++) {
-        buf1[2 * i + 1] = -buf1[2 * i + 1];
-    }
-
-    // iFFT on rows (using forward FFT on conjugated data)
-    for (int row = 0; row < N; row++) {
-        dsps_fft2r_fc32(buf1 + row * N * 2, N);
-        dsps_bit_rev_fc32(buf1 + row * N * 2, N);
-    }
-
-    // Conjugate intermediate result
-    for (int i = 0; i < N * N; i++) {
-        buf1[2 * i + 1] = -buf1[2 * i + 1];
-    }
-
-    // Transpose back to buf0 (INVERSE transpose - swap src/dst compared to FFT)
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int dst = 2 * (y * N + x);
-            int src = 2 * (x * N + y);
-            buf0[dst] = buf1[src];
-            buf0[dst + 1] = buf1[src + 1];
-        }
-    }
-
-    // Conjugate before second FFT
-    for (int i = 0; i < N * N; i++) {
-        buf0[2 * i + 1] = -buf0[2 * i + 1];
-    }
-
-    // iFFT on columns (using forward FFT on conjugated data)
-    for (int row = 0; row < N; row++) {
-        dsps_fft2r_fc32(buf0 + row * N * 2, N);
-        dsps_bit_rev_fc32(buf0 + row * N * 2, N);
-    }
-
-    // Conjugate output
-    for (int i = 0; i < N * N; i++) {
-        buf0[2 * i + 1] = -buf0[2 * i + 1];
-    }
-
-    // Transpose back: buf0 → buf1 to undo the earlier transpose
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int src = 2 * (y * N + x);
-            int dst = 2 * (x * N + y);
-            buf1[dst] = buf0[src];
-            buf1[dst + 1] = buf0[src + 1];
-        }
-    }
-
-    if (isChalsEmpty(outImg)) {
-        createChals(outImg, 1);
-        outImg->is_chals = 1;
-    }
-
-    float *result = outImg->chals->ch[0];
-    float scale = 1.0f / (N * N);
-    for (int i = 0; i < N * N; i++) {
-        result[i] = buf1[2 * i] * scale;
-    }
-
-    outImg->log = IMAGE_DATA_CH0;
-    free(buf0);
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t _log_(Image *img)
-{
-    if (!img || isChalsEmpty(img))
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    float *data = img->chals->ch[0];
-    for (int i = 0; i < img->size; ++i) {
-        data[i] = logf(data[i] + 1e-3f);  // Avoid log(0)
-    }
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t _add_(Image *img, float value)
-{
-    if (!img || isChalsEmpty(img))
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    float *data = img->chals->ch[0];
-    for (int i = 0; i < img->size; ++i) {
-        data[i] += value;
-    }
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t fftshift(Image *img)
-{
-    if (!img || isChalsEmpty(img))
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    float *data = (img->log == IMAGE_DATA_COMPLEX) ? img->chals->ch[1] : img->chals->ch[0];
-    int width = img->width;
-    int height = img->height;
-
-    int cx = width / 2;
-    int cy = height / 2;
-
-    for (int y = 0; y < cy; ++y) {
-        for (int x = 0; x < cx; ++x) {
-            int q0 = 2 * ((y * width) + x);
-            int q1 = 2 * ((y * width) + x + cx);
-            int q2 = 2 * (((y + cy) * width) + x);
-            int q3 = 2 * (((y + cy) * width) + x + cx);
-
-            for (int i = 0; i < 2; ++i) {
-                float tmp = data[q0 + i];
-                data[q0 + i] = data[q3 + i];
-                data[q3 + i] = tmp;
-
-                tmp = data[q1 + i];
-                data[q1 + i] = data[q2 + i];
-                data[q2 + i] = tmp;
-            }
-        }
-    }
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t _abs_(const Image *fftImg, Image *magImg)
-{
-    if (!fftImg || !fftImg->chals || !magImg) {
-        // Serial.println("[ERROR] Input FFT image or its channels are null.");
-        return EMBEDDIP_ERROR_NULL_PTR;
-    }
-
-    int size = fftImg->width * fftImg->height;
-
-    float *fft = (fftImg->log == IMAGE_DATA_COMPLEX) ? fftImg->chals->ch[1] : fftImg->chals->ch[0];
-
-    if (!fft) {
-        // Serial.println("[ERROR] FFT buffer is null.");
-        return EMBEDDIP_ERROR_NULL_PTR;
-    }
-
-    if (isChalsEmpty(magImg)) {
-        createChals(magImg, 1);
-        magImg->is_chals = 1;
-        // Serial.println("[INFO] Output magnitude channel created.");
-    }
-
-    float *mag = magImg->chals->ch[0];
-    if (!mag) {
-        // Serial.println("[ERROR] Magnitude channel buffer is null.");
-        return EMBEDDIP_ERROR_NULL_PTR;
-    }
-
-    for (int i = 0; i < size; ++i) {
-        float re = fft[2 * i];
-        float im = fft[2 * i + 1];
-        mag[i] = sqrtf(re * re + im * im);
-        // Uncomment the line below for verbose per-pixel debugging
-        // Serial.printf("[DEBUG] Index %d: re=%.3f, im=%.3f, mag=%.3f\n", i, re, im, mag[i]);
-    }
-
-    magImg->log = IMAGE_DATA_MAGNITUDE;
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t _phase_(const Image *fftImg, Image *phaseImg)
-{
-    int size = fftImg->width * fftImg->height;
-
-    float *fft = (fftImg->log == IMAGE_DATA_COMPLEX) ? fftImg->chals->ch[1] : fftImg->chals->ch[0];
-
-    if (isChalsEmpty(phaseImg)) {
-        createChals(phaseImg, 1);
-        phaseImg->is_chals = 1;
-    }
-
-    float *out = phaseImg->chals->ch[0];
-
-    for (int i = 0; i < size; ++i) {
-        out[i] = atan2f(fft[2 * i + 1], fft[2 * i]);
-    }
-
-    phaseImg->log = IMAGE_DATA_PHASE;
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t polarToCart(const Image *magnitude, const Image *phase, Image *outImg)
-{
-    int size = magnitude->width * magnitude->height;
-
-    if (isChalsEmpty(outImg)) {
-        createChalsComplex(outImg, 2);  // Need 2 channels like STM32
-        outImg->is_chals = 1;
-    }
-
-    float *mag = magnitude->chals->ch[0];
-    float *phs = phase->chals->ch[0];
-    float *fft = outImg->chals->ch[0];
-
-    for (int i = 0; i < size; ++i) {
-        fft[2 * i] = mag[i] * cosf(phs[i]);
-        fft[2 * i + 1] = mag[i] * sinf(phs[i]);
-    }
-
-    outImg->log = IMAGE_DATA_CH0;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Performs element-wise complex multiplication in frequency domain.
- *
- *
- * @param img1    First complex image
- * @param img2    Second complex image
- * @param outImg  Output complex image
- */
-embeddip_status_t multiply(const Image *img1, const Image *img2, Image *outImg)
-{
-    // Input validation
-    if (!img1 || !img2 || !outImg) {
-        return EMBEDDIP_ERROR_NULL_PTR;
-    }
-
-    if (img1->width != img2->width || img1->height != img2->height) {
-        return EMBEDDIP_ERROR_INVALID_SIZE;
-    }
-
-    // Allocate output if needed
-    if (isChalsEmpty(outImg)) {
-        createChals(outImg, 1);
-        outImg->is_chals = 1;
-    }
-
-    float *in1 = NULL, *in2 = NULL;
-
-    // Select input channel based on log state
-    if (img1->log == IMAGE_DATA_CH0) {
-        in1 = (float *)img1->chals->ch[0];
-    } else if (img1->log == IMAGE_DATA_COMPLEX) {
-        in1 = (float *)img1->chals->ch[1];
-    } else if (img1->log == IMAGE_DATA_PIXELS) {
-        in1 = (float *)img1->pixels;
-    } else {
-        return EMBEDDIP_ERROR_INVALID_ARG;
-    }
-
-    if (img2->log == IMAGE_DATA_CH0) {
-        in2 = (float *)img2->chals->ch[0];
-    } else if (img2->log == IMAGE_DATA_COMPLEX) {
-        in2 = (float *)img2->chals->ch[1];
-    } else if (img2->log == IMAGE_DATA_PIXELS) {
-        in2 = (float *)img2->pixels;
-    } else {
-        return EMBEDDIP_ERROR_INVALID_ARG;
-    }
-
-    float *out = (float *)outImg->chals->ch[0];
-
-    int size = img1->width * img1->height;
-    for (int i = 0; i < size; ++i) {
-        out[i] = in1[i] * in2[i];
-    }
-
-    outImg->log = IMAGE_DATA_CH0;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Computes pixel-wise difference between two images: out = img1 - img2.
- *
- * Both images must have the same dimensions and a single channel.
- *
- * @param[in]  img1    First input image.
- * @param[in]  img2    Second input image.
- * @param[out] outImg  Output image to store the difference.
- */
-/**
- * @brief Computes pixel-wise difference: out = img1 - img2 (clamped to >= 0).
- *
- * Optimized for performance: checks image types once, then uses fast loops.
- *
- * @param[in]  img1    First image (original).
- * @param[in]  img2    Second image (to subtract).
- * @param[out] outImg  Output image (difference).
- * @return EMBEDDIP_OK on success, error code otherwise.
- */
-embeddip_status_t difference(const Image *img1, const Image *img2, Image *outImg)
-{
-    if (!img1 || !img2 || !outImg)
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    if (img1->width != img2->width || img1->height != img2->height)
-        return EMBEDDIP_ERROR_INVALID_SIZE;
-
-    int size = img1->width * img1->height;
-
-    // Allocate output channel if needed
-    if (isChalsEmpty(outImg)) {
-        embeddip_status_t status = createChals(outImg, 1);
-        if (status != EMBEDDIP_OK)
-            return status;
-        outImg->is_chals = 1;
-    }
-
-    float *out = outImg->chals->ch[0];
-    if (!out)
-        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
-
-    // Check types ONCE, then use optimized loops (no conditionals inside loop)
-    // Most common case: img1=PIXELS, img2=CH0 (your use case)
-    if (img1->log == IMAGE_DATA_PIXELS &&
-        (img2->log == IMAGE_DATA_CH0 || img2->log == IMAGE_DATA_MAGNITUDE)) {
-        if (!img1->pixels || !img2->chals || !img2->chals->ch[0])
-            return EMBEDDIP_ERROR_NULL_PTR;
-
-        uint8_t *pix1 = (uint8_t *)img1->pixels;
-        float *ch2 = img2->chals->ch[0];
-        for (int i = 0; i < size; ++i)
-            out[i] = fmaxf((float)pix1[i] - ch2[i], 0.0f);
-    }
-    // Both pixels
-    else if (img1->log == IMAGE_DATA_PIXELS && img2->log == IMAGE_DATA_PIXELS) {
-        if (!img1->pixels || !img2->pixels)
-            return EMBEDDIP_ERROR_NULL_PTR;
-
-        uint8_t *pix1 = (uint8_t *)img1->pixels;
-        uint8_t *pix2 = (uint8_t *)img2->pixels;
-        for (int i = 0; i < size; ++i)
-            out[i] = fmaxf((float)(pix1[i] - pix2[i]), 0.0f);
-    }
-    // Both channels
-    else if ((img1->log == IMAGE_DATA_CH0 || img1->log == IMAGE_DATA_MAGNITUDE) &&
-             (img2->log == IMAGE_DATA_CH0 || img2->log == IMAGE_DATA_MAGNITUDE)) {
-        if (!img1->chals || !img1->chals->ch[0] || !img2->chals || !img2->chals->ch[0])
-            return EMBEDDIP_ERROR_NULL_PTR;
-
-        float *ch1 = img1->chals->ch[0];
-        float *ch2 = img2->chals->ch[0];
-        for (int i = 0; i < size; ++i)
-            out[i] = fmaxf(ch1[i] - ch2[i], 0.0f);
-    }
-    // img1=CH0, img2=PIXELS
-    else if ((img1->log == IMAGE_DATA_CH0 || img1->log == IMAGE_DATA_MAGNITUDE) &&
-             img2->log == IMAGE_DATA_PIXELS) {
-        if (!img1->chals || !img1->chals->ch[0] || !img2->pixels)
-            return EMBEDDIP_ERROR_NULL_PTR;
-
-        float *ch1 = img1->chals->ch[0];
-        uint8_t *pix2 = (uint8_t *)img2->pixels;
-        for (int i = 0; i < size; ++i)
-            out[i] = fmaxf(ch1[i] - (float)pix2[i], 0.0f);
-    } else {
-        // Unsupported combination
-        return EMBEDDIP_ERROR_INVALID_ARG;
-    }
-
-    outImg->log = IMAGE_DATA_CH0;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Creates a frequency-domain filter mask.
- *
- * Generates circular/radial filters centered at the image center for frequency domain filtering.
- * The filter is created in spatial frequency coordinates where the center represents DC (zero
- * frequency).
- *
- * @param[in,out] maskImg    Image to fill with filter values (must have width/height already set).
- *                           Creates ch[0] channel if needed and sets log = IMAGE_DATA_CH0.
- * @param[in]     filterType Type of filter (lowpass, highpass, bandpass, etc.).
- * @param[in]     cutoff1    Primary cutoff radius in PIXELS from center.
- *                           - For lowpass: frequencies within this radius pass (1.0), outside block
- * (0.0)
- *                           - For highpass: frequencies outside this radius pass (1.0), inside
- * block (0.0)
- *                           - For bandpass: inner radius (with cutoff2 as outer radius)
- *                           - For Gaussian filters: standard deviation of the Gaussian
- * @param[in]     cutoff2    Secondary cutoff radius in PIXELS (only used for bandpass filters).
- *                           Must satisfy: cutoff1 < cutoff2
- *
- * @return EMBEDDIP_OK on success, error code otherwise.
- *
- * @note Cutoff units are PIXELS measured as Euclidean distance from image center.
- *       For a 256×256 image:
- *       - Center is at (128, 128)
- *       - Max distance to corner ≈ 181 pixels
- *       - cutoff1=30 means frequencies within 30-pixel radius from center
- *       - This corresponds to ~16.6% of max frequency (30/181)
- *
- * @note Filter values range from 0.0 (block) to 1.0 (pass).
- *       Ideal filters produce hard edges (0 or 1).
- *       Gaussian filters produce smooth transitions.
- *
- * @example
- *   // Low-pass: pass low frequencies (smooth, blur effect)
- *   getFilter(mask, FREQ_FILTER_IDEAL_LOWPASS, 30, 0);
- *
- *   // High-pass: pass high frequencies (edges, details)
- *   getFilter(mask, FREQ_FILTER_IDEAL_HIGHPASS, 50, 0);
- *
- *   // Band-pass: pass frequencies between 20-60 pixels from center
- *   getFilter(mask, FREQ_FILTER_IDEAL_BANDPASS, 20, 60);
- */
-embeddip_status_t
-getFilter(Image *maskImg, FrequencyFilterType filterType, float cutoff1, float cutoff2)
-{
-    if (!maskImg)
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    // Validate cutoff values
-    if (cutoff1 < 0.0f)
-        return EMBEDDIP_ERROR_INVALID_ARG;
-
-    if (filterType == FREQ_FILTER_IDEAL_BANDPASS || filterType == FREQ_FILTER_GAUSSIAN_BANDPASS) {
-        if (cutoff2 < 0.0f || cutoff1 >= cutoff2)
-            return EMBEDDIP_ERROR_INVALID_ARG;
-    }
-
-    int w = maskImg->width;
-    int h = maskImg->height;
-    int cx = w / 2;
-    int cy = h / 2;
-
-    maskImg->format = IMAGE_FORMAT_GRAYSCALE;
-
-    if (isChalsEmpty(maskImg)) {
-        createChals(maskImg, 1);
-        maskImg->is_chals = 1;
-    }
-
-    float *mask = maskImg->chals->ch[0];
-
-    for (int y = 0; y < h; ++y) {
-        for (int x = 0; x < w; ++x) {
-            int dx = x - cx;
-            int dy = y - cy;
-            float d = sqrtf((float)(dx * dx + dy * dy));
-
-            float value = 0.0f;
-
-            switch (filterType) {
-            case FREQ_FILTER_IDEAL_LOWPASS:
-                value = (d <= cutoff1) ? 1.0f : 0.0f;
-                break;
-
-            case FREQ_FILTER_GAUSSIAN_LOWPASS:
-                value = expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
-                break;
-
-            case FREQ_FILTER_IDEAL_HIGHPASS:
-                value = (d >= cutoff1) ? 1.0f : 0.0f;
-                break;
-
-            case FREQ_FILTER_GAUSSIAN_HIGHPASS:
-                value = 1.0f - expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
-                break;
-
-            case FREQ_FILTER_IDEAL_BANDPASS:
-                value = (d >= cutoff1 && d <= cutoff2) ? 1.0f : 0.0f;
-                break;
-
-            case FREQ_FILTER_GAUSSIAN_BANDPASS: {
-                float gLow = expf(-(d * d) / (2.0f * cutoff2 * cutoff2));
-                float gHigh = expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
-                value = gLow - gHigh;
-                break;
-            }
-
-            default:
-                value = 0.0f;
-                break;
-            }
-
-            mask[y * w + x] = value;
-        }
-    }
-
-    maskImg->log = IMAGE_DATA_CH0;
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t ffilter2D(const Image *fftImg, const Image *filterMask, Image *outImg)
-{
-    if (!fftImg || !filterMask || !outImg || isChalsEmpty(fftImg) || isChalsEmpty(filterMask))
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    int width = fftImg->width;
-    int height = fftImg->height;
-    int size = width * height;
-
-    // Create magnitude and phase containers
-    Image *magImg = NULL;
-    Image *phaseImg = NULL;
-    createImageWH(width, height, IMAGE_FORMAT_GRAYSCALE, &magImg);
-    createImageWH(width, height, IMAGE_FORMAT_GRAYSCALE, &phaseImg);
-
-    _abs_(fftImg, magImg);
-    _phase_(fftImg, phaseImg);
-
-    float *mag = magImg->chals->ch[0];
-    float *mask = filterMask->chals->ch[0];
-
-    for (int i = 0; i < size; ++i)
-        mag[i] *= mask[i];
-
-    polarToCart(magImg, phaseImg, outImg);
-    return EMBEDDIP_OK;
-}
-#endif
diff --git a/board/esp32/board_esp32_memory.cpp b/board/esp32/board_esp32eye_memory.cpp
old mode 100755
new mode 100644
similarity index 94%
rename from board/esp32/board_esp32_memory.cpp
rename to board/esp32/board_esp32eye_memory.cpp
index c792ef3..c8ef138
--- a/board/esp32/board_esp32_memory.cpp
+++ b/board/esp32/board_esp32eye_memory.cpp
@@ -3,7 +3,7 @@
 
 #include <embedDIP_configs.h>
 
-#ifdef TARGET_BOARD_ESP32
+#ifdef EMBED_DIP_BOARD_ESP32
 
     #include <stdlib.h>
     #include <string.h>
@@ -14,8 +14,10 @@
 
     #define ps_malloc(size) heap_caps_malloc((size), MALLOC_CAP_SPIRAM)
 
-void memory_init(void)
+void memory_init(uintptr_t pool_start_addr)
 {
+    (void)pool_start_addr;
+
     // Check if PSRAM is available
     if (ESP.getPsramSize() > 0) {
         Serial.printf("[MEMORY] PSRAM available: %u bytes\n", ESP.getPsramSize());
diff --git a/board/esp32/board_profile.cmake b/board/esp32/board_profile.cmake
new file mode 100644
index 0000000..bc1e562
--- /dev/null
+++ b/board/esp32/board_profile.cmake
@@ -0,0 +1,21 @@
+# Board profile: ESP32
+
+set(EMBEDDIP_BOARD_SOURCES
+    ${BOARD_COMMON_SOURCES}
+    board/esp32/board_esp32eye_memory.cpp
+)
+
+set(EMBEDDIP_DEVICE_SOURCES
+    ${DEVICE_COMMON_SOURCES}
+    device/camera/esp32_ov2640.cpp
+    device/serial/esp32_uart.cpp
+)
+
+set(EMBEDDIP_BOARD_DEFINES
+    EMBED_DIP_BOARD_ESP32=1
+    ARDUINO_ARCH_ESP32
+)
+
+set(EMBEDDIP_BOARD_INCLUDE_DIRS
+    ${CMAKE_CURRENT_SOURCE_DIR}/board/esp32
+)
diff --git a/board/stm32f7/board_profile.cmake b/board/stm32f7/board_profile.cmake
new file mode 100644
index 0000000..8313686
--- /dev/null
+++ b/board/stm32f7/board_profile.cmake
@@ -0,0 +1,24 @@
+# Board profile: STM32F7
+
+set(EMBEDDIP_BOARD_SOURCES
+    ${BOARD_COMMON_SOURCES}
+    board/stm32f7/board_stm32f7_memory.c
+    board/stm32f7/configs.h
+)
+
+set(EMBEDDIP_DEVICE_SOURCES
+    ${DEVICE_COMMON_SOURCES}
+    device/camera/stm32_ov5640.c
+    device/display/stm32_rk043fn48h.c
+    device/serial/stm32_uart.c
+)
+
+set(EMBEDDIP_BOARD_DEFINES
+    EMBED_DIP_BOARD_STM32F7=1
+    STM32F7xx
+    STM32F746xx
+)
+
+set(EMBEDDIP_BOARD_INCLUDE_DIRS
+    ${CMAKE_CURRENT_SOURCE_DIR}/board/stm32f7
+)
diff --git a/board/stm32f7/board_stm32f7_fft.c b/board/stm32f7/board_stm32f7_fft.c
deleted file mode 100755
index 7c52514..0000000
--- a/board/stm32f7/board_stm32f7_fft.c
+++ /dev/null
@@ -1,906 +0,0 @@
-// SPDX-License-Identifier: MIT
-// Copyright (c) 2025 EmbedDIP
-
-#include <embedDIP_configs.h>
-
-#ifdef TARGET_BOARD_STM32F7
-
-    #include "arm_const_structs.h"
-    #include "arm_math.h"
-    #include <board/common.h>
-    #include <core/memory_manager.h>
-    #include <fft.h>
-
-embeddip_status_t _log_(Image *img)
-{
-    if (!img)
-        return EMBEDDIP_ERROR_NULL_PTR;
-    if (isChalsEmpty(img))
-        return EMBEDDIP_ERROR_INVALID_ARG;
-
-    float *data = img->chals->ch[0];
-    for (int i = 0; i < img->size; ++i) {
-        data[i] = logf(data[i]);
-    }
-
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Adds a scalar value to all pixels in a single-channel image.
- *
- * @param[in,out] img   Image to modify (in-place).
- * @param[in]     value Scalar value to add.
- */
-embeddip_status_t _add_(Image *img, float value)
-{
-    if (!img)
-        return EMBEDDIP_ERROR_NULL_PTR;
-    if (isChalsEmpty(img))
-        return EMBEDDIP_ERROR_INVALID_ARG;
-
-    float *data = img->chals->ch[0];
-    for (uint32_t i = 0; i < img->size; ++i) {
-        data[i] += value;
-    }
-
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Computes the inverse Fourier transform of the input image.
- *
- * @param inImg Input image (Fourier domain).
- * @param outImg Output image (spatial domain).
- */
-embeddip_status_t fourierInv(const Image *inImg, Image *outImg)
-{
-    int imageN = 256;
-    float *fourier = inImg->chals->ch[0];
-    float *fourier2 = inImg->chals->ch[1];
-
-    for (int i = 0; i < imageN; i++) {
-        arm_cfft_f32(&arm_cfft_sR_f32_len256, fourier2 + imageN * i * 2, 1, 1);
-    }
-
-    for (int k = 0; k < imageN; k++) {
-        for (int j = 0; j < imageN; j++) {
-            fourier[2 * j + k * imageN * 2] = (float)fourier2[j * imageN * 2 + k * 2];
-            fourier[2 * j + 1 + k * imageN * 2] = (float)fourier2[j * imageN * 2 + k * 2 + 1];
-        }
-    }
-
-    for (int i = 0; i < imageN; i++) {
-        arm_cfft_f32(&arm_cfft_sR_f32_len256, fourier + imageN * i * 2, 1, 1);
-    }
-
-    if (isChalsEmpty(outImg)) {
-        createChals(outImg, 1);
-        outImg->is_chals = 1;
-    }
-    for (int i = 0; i < imageN * imageN; i++) {
-        outImg->chals->ch[0][i] =
-            (float)sqrt(fourier[2 * i] * fourier[2 * i] + fourier[2 * i + 1] * fourier[2 * i + 1]);
-    }
-
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Converts polar coordinates (magnitude and phase) to complex cartesian (real and
- * imaginary).
- *
- * @param magnitude Pointer to magnitude image (1 channel).
- * @param phase     Pointer to phase image (1 channel), in radians.
- * @param outImg    Output image with 2 channels: real and imaginary.
- */
-embeddip_status_t polarToCart(const Image *mag_img, const Image *phase_img, Image *dst)
-{
-    if (!mag_img || !phase_img || !dst)
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    int size = mag_img->width * mag_img->height;
-
-    if (!isChalsEmpty(dst) && dst->chals && dst->chals->ch[0]) {
-        // Ensure output buffer has complex capacity (2*N floats).
-        memory_free(dst->chals->ch[0]);
-        dst->chals->ch[0] = NULL;
-    }
-
-    embeddip_status_t status = createChalsComplex(dst, 1);
-    if (status != EMBEDDIP_OK) {
-        return status;
-    }
-
-    float *mag = mag_img->chals->ch[0];
-    float *phs = phase_img->chals->ch[0];
-    float *fft = dst->chals->ch[0];
-
-    for (int i = 0; i < size; ++i) {
-        fft[i * 2] = mag[i] * cosf(phs[i]);      // REEL
-        fft[i * 2 + 1] = mag[i] * sinf(phs[i]);  // IMJ
-    }
-
-    dst->log = IMAGE_DATA_CH0;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Performs element-wise complex multiplication in frequency domain.
- *
- *
- * @param img1    First complex image
- * @param img2    Second complex image
- * @param outImg  Output complex image
- */
-embeddip_status_t multiply(const Image *img1, const Image *img2, Image *outImg)
-{
-    // Input validation
-    if (!img1 || !img2 || !outImg) {
-        return EMBEDDIP_ERROR_NULL_PTR;
-    }
-
-    if (img1->width != img2->width || img1->height != img2->height) {
-        return EMBEDDIP_ERROR_INVALID_SIZE;
-    }
-
-    // Allocate output if needed
-    if (isChalsEmpty(outImg)) {
-        createChals(outImg, 1);
-        outImg->is_chals = 1;
-    }
-
-    float *in1 = NULL, *in2 = NULL;
-
-    // Select input channel based on log state
-    if (img1->log == IMAGE_DATA_CH0) {
-        in1 = (float *)img1->chals->ch[0];
-    } else if (img1->log == IMAGE_DATA_COMPLEX) {
-        in1 = (float *)img1->chals->ch[1];
-    } else if (img1->log == IMAGE_DATA_PIXELS) {
-        in1 = (float *)img1->pixels;
-    } else {
-        return EMBEDDIP_ERROR_INVALID_ARG;
-    }
-
-    if (img2->log == IMAGE_DATA_CH0) {
-        in2 = (float *)img2->chals->ch[0];
-    } else if (img2->log == IMAGE_DATA_COMPLEX) {
-        in2 = (float *)img2->chals->ch[1];
-    } else if (img2->log == IMAGE_DATA_PIXELS) {
-        in2 = (float *)img2->pixels;
-    } else {
-        return EMBEDDIP_ERROR_INVALID_ARG;
-    }
-
-    float *out = (float *)outImg->chals->ch[0];
-
-    int size = img1->width * img1->height;
-    for (int i = 0; i < size; ++i) {
-        out[i] = in1[i] * in2[i];
-    }
-
-    outImg->log = IMAGE_DATA_CH0;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Computes pixel-wise difference between two images: out = img1 - img2.
- *
- * Both images must have the same dimensions and a single channel.
- *
- * @param[in]  img1    First input image.
- * @param[in]  img2    Second input image.
- * @param[out] outImg  Output image to store the difference.
- */
-/**
- * @brief Computes pixel-wise difference: out = img1 - img2 (clamped to >= 0).
- *
- * Optimized for performance: checks image types once, then uses fast loops.
- *
- * @param[in]  img1    First image (original).
- * @param[in]  img2    Second image (to subtract).
- * @param[out] outImg  Output image (difference).
- * @return EMBEDDIP_OK on success, error code otherwise.
- */
-embeddip_status_t difference(const Image *src1, const Image *src2, Image *dst)
-{
-    if (!src1 || !src2 || !dst)
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    if (src1->width != src2->width || src1->height != src2->height)
-        return EMBEDDIP_ERROR_INVALID_SIZE;
-
-    int size = src1->width * src1->height;
-
-    // Allocate output channel if needed
-    if (isChalsEmpty(dst)) {
-        embeddip_status_t status = createChals(dst, 1);
-        if (status != EMBEDDIP_OK)
-            return status;
-        dst->is_chals = 1;
-    }
-
-    float *out = dst->chals->ch[0];
-    if (!out)
-        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
-
-    // Check types ONCE, then use optimized loops (no conditionals inside loop)
-    // Most common case: src1=PIXELS, src2=CH0 (your use case)
-    if (src1->log == IMAGE_DATA_PIXELS &&
-        (src2->log == IMAGE_DATA_CH0 || src2->log == IMAGE_DATA_MAGNITUDE)) {
-        if (!src1->pixels || !src2->chals || !src2->chals->ch[0])
-            return EMBEDDIP_ERROR_NULL_PTR;
-
-        uint8_t *pix1 = src1->pixels;
-        float *ch2 = src2->chals->ch[0];
-        for (int i = 0; i < size; ++i)
-            out[i] = fmaxf((float)pix1[i] - ch2[i], 0.0f);
-    }
-    // Both pixels
-    else if (src1->log == IMAGE_DATA_PIXELS && src2->log == IMAGE_DATA_PIXELS) {
-        if (!src1->pixels || !src2->pixels)
-            return EMBEDDIP_ERROR_NULL_PTR;
-
-        uint8_t *pix1 = src1->pixels;
-        uint8_t *pix2 = src2->pixels;
-        for (int i = 0; i < size; ++i)
-            out[i] = fmaxf((float)(pix1[i] - pix2[i]), 0.0f);
-    }
-    // Both channels
-    else if ((src1->log == IMAGE_DATA_CH0 || src1->log == IMAGE_DATA_MAGNITUDE) &&
-             (src2->log == IMAGE_DATA_CH0 || src2->log == IMAGE_DATA_MAGNITUDE)) {
-        if (!src1->chals || !src1->chals->ch[0] || !src2->chals || !src2->chals->ch[0])
-            return EMBEDDIP_ERROR_NULL_PTR;
-
-        float *ch1 = src1->chals->ch[0];
-        float *ch2 = src2->chals->ch[0];
-        for (int i = 0; i < size; ++i)
-            out[i] = fmaxf(ch1[i] - ch2[i], 0.0f);
-    }
-    // src1=CH0, src2=PIXELS
-    else if ((src1->log == IMAGE_DATA_CH0 || src1->log == IMAGE_DATA_MAGNITUDE) &&
-             src2->log == IMAGE_DATA_PIXELS) {
-        if (!src1->chals || !src1->chals->ch[0] || !src2->pixels)
-            return EMBEDDIP_ERROR_NULL_PTR;
-
-        float *ch1 = src1->chals->ch[0];
-        uint8_t *pix2 = src2->pixels;
-        for (int i = 0; i < size; ++i)
-            out[i] = fmaxf(ch1[i] - (float)pix2[i], 0.0f);
-    } else {
-        // Unsupported combination
-        return EMBEDDIP_ERROR_INVALID_ARG;
-    }
-
-    dst->log = IMAGE_DATA_CH0;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Fills the given image with a frequency domain filter mask.
- *
- * This modifies the image in-place. It must already have width and height set.
- *
- * @param maskImg    Target image to be filled with mask values.
- * @param filterType Type of filter to create (low-pass, high-pass, band-pass, etc.).
- * @param cutoff1    Cutoff radius (or inner radius for band-pass).
- * @param cutoff2    Outer radius for band-pass (ignored for other types).
- */
-/**
- * @brief Creates a frequency-domain filter mask.
- *
- * Generates circular/radial filters centered at the image center for frequency domain filtering.
- * The filter is created in spatial frequency coordinates where the center represents DC (zero
- * frequency).
- *
- * @param[in,out] maskImg    Image to fill with filter values (must have width/height already set).
- *                           Creates ch[0] channel if needed and sets log = IMAGE_DATA_CH0.
- * @param[in]     filterType Type of filter (lowpass, highpass, bandpass, etc.).
- * @param[in]     cutoff1    Primary cutoff radius in PIXELS from center.
- *                           - For lowpass: frequencies within this radius pass (1.0), outside block
- * (0.0)
- *                           - For highpass: frequencies outside this radius pass (1.0), inside
- * block (0.0)
- *                           - For bandpass: inner radius (with cutoff2 as outer radius)
- *                           - For Gaussian filters: standard deviation of the Gaussian
- * @param[in]     cutoff2    Secondary cutoff radius in PIXELS (only used for bandpass filters).
- *                           Must satisfy: cutoff1 < cutoff2
- *
- * @return EMBEDDIP_OK on success, error code otherwise.
- *
- * @note Cutoff units are PIXELS measured as Euclidean distance from image center.
- *       For a 256×256 image:
- *       - Center is at (128, 128)
- *       - Max distance to corner ≈ 181 pixels
- *       - cutoff1=30 means frequencies within 30-pixel radius from center
- *       - This corresponds to ~16.6% of max frequency (30/181)
- *
- * @note Filter values range from 0.0 (block) to 1.0 (pass).
- *       Ideal filters produce hard edges (0 or 1).
- *       Gaussian filters produce smooth transitions.
- *
- * @example
- *   // Low-pass: pass low frequencies (smooth, blur effect)
- *   getFilter(mask, FREQ_FILTER_IDEAL_LOWPASS, 30, 0);
- *
- *   // High-pass: pass high frequencies (edges, details)
- *   getFilter(mask, FREQ_FILTER_IDEAL_HIGHPASS, 50, 0);
- *
- *   // Band-pass: pass frequencies between 20-60 pixels from center
- *   getFilter(mask, FREQ_FILTER_IDEAL_BANDPASS, 20, 60);
- */
-embeddip_status_t
-getFilter(Image *filter_img, FrequencyFilterType filter_type, float cutoff1, float cutoff2)
-{
-    if (!filter_img)
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    // Validate cutoff values
-    if (cutoff1 < 0.0f)
-        return EMBEDDIP_ERROR_INVALID_ARG;
-
-    if (filter_type == FREQ_FILTER_IDEAL_BANDPASS || filter_type == FREQ_FILTER_GAUSSIAN_BANDPASS) {
-        if (cutoff2 < 0.0f || cutoff1 >= cutoff2)
-            return EMBEDDIP_ERROR_INVALID_ARG;
-    }
-
-    int w = filter_img->width;
-    int h = filter_img->height;
-    int cx = w / 2;
-    int cy = h / 2;
-
-    filter_img->format = IMAGE_FORMAT_GRAYSCALE;
-
-    if (isChalsEmpty(filter_img)) {
-        createChals(filter_img, 1);
-        filter_img->is_chals = 1;
-    }
-
-    float *mask = filter_img->chals->ch[0];
-
-    for (int y = 0; y < h; ++y) {
-        for (int x = 0; x < w; ++x) {
-            int dx = x - cx;
-            int dy = y - cy;
-            float d = sqrtf((float)(dx * dx + dy * dy));
-
-            float value = 0.0f;
-
-            switch (filter_type) {
-            case FREQ_FILTER_IDEAL_LOWPASS:
-                value = (d <= cutoff1) ? 1.0f : 0.0f;
-                break;
-
-            case FREQ_FILTER_GAUSSIAN_LOWPASS:
-                value = expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
-                break;
-
-            case FREQ_FILTER_IDEAL_HIGHPASS:
-                value = (d >= cutoff1) ? 1.0f : 0.0f;
-                break;
-
-            case FREQ_FILTER_GAUSSIAN_HIGHPASS:
-                value = 1.0f - expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
-                break;
-
-            case FREQ_FILTER_IDEAL_BANDPASS:
-                value = (d >= cutoff1 && d <= cutoff2) ? 1.0f : 0.0f;
-                break;
-
-            case FREQ_FILTER_GAUSSIAN_BANDPASS: {
-                float gLow = expf(-(d * d) / (2.0f * cutoff2 * cutoff2));
-                float gHigh = expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
-                value = gLow - gHigh;
-                break;
-            }
-
-            default:
-                value = 0.0f;
-                break;
-            }
-
-            mask[y * w + x] = value;
-        }
-    }
-
-    filter_img->log = IMAGE_DATA_CH0;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Checks if input dimensions are valid (powers of 2 and matching).
- */
-static bool isValidFFTSize(int w, int h)
-{
-    return (w == h) && ((w & (w - 1)) == 0);  // square and power-of-2
-}
-
-/**
- * @brief Performs forward 2D FFT on a single-channel image.
- *        ch[0] holds interleaved (Re, Im), ch[1] holds transposed for vertical pass.
- */
-embeddip_status_t fft(const Image *src, Image *dst)
-{
-    int N = src->width;
-    if (!isValidFFTSize(src->width, src->height))
-        return -1;
-
-    float *buf0;
-    float *buf1;
-
-    if (isChalsEmpty(dst)) {
-        createChalsComplex(dst, 2);  // 2 complex channels for interleaved (Re, Im)
-        dst->is_chals = 1;
-        buf0 = dst->chals->ch[0];
-        buf1 = dst->chals->ch[1];
-    } else {
-        buf0 = dst->chals->ch[0];
-        buf1 = dst->chals->ch[1];
-    }
-
-    const uint8_t *pixels = src->pixels;
-    for (int i = 0; i < N * N; i++) {
-        buf0[2 * i] = (float)pixels[i];
-        buf0[2 * i + 1] = 0.0f;
-    }
-
-    for (int row = 0; row < N; row++) {
-        arm_cfft_f32(&arm_cfft_sR_f32_len256, buf0 + row * N * 2, 0, 1);
-    }
-
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int src = 2 * (y * N + x);
-            int dst = 2 * (x * N + y);
-            buf1[dst] = buf0[src];
-            buf1[dst + 1] = buf0[src + 1];
-        }
-    }
-
-    for (int row = 0; row < N; row++) {
-        arm_cfft_f32(&arm_cfft_sR_f32_len256, buf1 + row * N * 2, 0, 1);
-    }
-
-    // Transpose back: buf1 → buf0 to undo the earlier transpose
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int src = 2 * (y * N + x);
-            int dst = 2 * (x * N + y);
-            buf0[dst] = buf1[src];
-            buf0[dst + 1] = buf1[src + 1];
-        }
-    }
-
-    // Copy back to buf1 for output
-    for (int i = 0; i < N * N * 2; i++) {
-        buf1[i] = buf0[i];
-    }
-
-    dst->log = IMAGE_DATA_COMPLEX;
-    return 0;
-}
-
-/**
- * @brief Performs inverse 2D FFT on complex image. ch[0] is output.
- */
-embeddip_status_t ifft(const Image *src, Image *dst)
-{
-    int N = src->width;
-
-    // if input image does not hold the correct data.
-    if (src->log != IMAGE_DATA_COMPLEX && src->log != IMAGE_DATA_CH0) {
-        return -1;
-    }
-
-    float *buf0;
-    float *buf1;
-
-    if (src->log == IMAGE_DATA_COMPLEX) {
-        // current fft to ifft application.
-        buf0 = (float *)memory_alloc(N * N * 2 * sizeof(float));
-        buf1 = src->chals->ch[1];
-    } else  // if IMAGE_DATA_CH0
-    {
-        // In this case only 0 is allocated i guess.
-        buf0 = (float *)memory_alloc(N * N * 2 * sizeof(float));
-        buf1 = src->chals->ch[0];
-    }
-
-    if (isChalsEmpty(dst)) {
-        createChals(dst, 1);
-        dst->is_chals = 1;
-    }
-
-    for (int row = 0; row < N; row++) {
-        arm_cfft_f32(&arm_cfft_sR_f32_len256, buf1 + row * N * 2, 1, 1);
-    }
-
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int dst_idx = 2 * (y * N + x);
-            int src_idx = 2 * (x * N + y);
-            buf0[dst_idx] = buf1[src_idx];
-            buf0[dst_idx + 1] = buf1[src_idx + 1];
-        }
-    }
-
-    for (int row = 0; row < N; row++) {
-        arm_cfft_f32(&arm_cfft_sR_f32_len256, buf0 + row * N * 2, 1, 1);
-    }
-
-    // Transpose back: buf0 → buf1 to undo the earlier transpose
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int src_idx = 2 * (y * N + x);
-            int dst_idx = 2 * (x * N + y);
-            buf1[dst_idx] = buf0[src_idx];
-            buf1[dst_idx + 1] = buf0[src_idx + 1];
-        }
-    }
-
-    // Extract real part from transposed-back data
-    for (int i = 0; i < N * N; i++) {
-        dst->chals->ch[0][i] = buf1[2 * i];
-    }
-
-    dst->log = IMAGE_DATA_CH0;
-    memory_free(buf0);
-    return 0;
-}
-
-/**
- * @brief Performs inverse 2D FFT on a frequency-domain image.
- *        Uses ch[1] as input (transposed buffer), writes to ch[0] as interleaved (Re, Im).
- */
-embeddip_status_t ifft__(const Image *inImg, Image *outImg)
-{
-    int N = inImg->width;
-    if (!isValidFFTSize(inImg->width, inImg->height))
-        return -1;
-
-    float *buf0;
-
-    if (isChalsEmpty(outImg)) {
-        createChalsComplex(outImg, 1);
-        outImg->is_chals = 1;
-        buf0 = outImg->chals->ch[0];
-    } else {
-        memory_free(outImg->chals->ch[0]);
-        buf0 = (float *)memory_alloc(N * N * 2 * sizeof(float));
-        outImg->chals->ch[0] = buf0;
-    }
-
-    float *input = inImg->chals->ch[1];
-
-    // Inverse FFT on rows (from transposed data)
-    for (int row = 0; row < N; row++) {
-        arm_cfft_f32(&arm_cfft_sR_f32_len256, input + row * N * 2, 1, 1);  // Inverse FFT
-    }
-
-    // Transpose back
-    for (int y = 0; y < N; y++) {
-        for (int x = 0; x < N; x++) {
-            int src = 2 * (y * N + x);
-            int dst = 2 * (x * N + y);
-            buf0[dst] = input[src];
-            buf0[dst + 1] = input[src + 1];
-        }
-    }
-
-    // Inverse FFT on columns (rows of transposed image)
-    for (int row = 0; row < N; row++) {
-        arm_cfft_f32(&arm_cfft_sR_f32_len256, buf0 + row * N * 2, 1, 1);  // Inverse FFT
-    }
-
-    // Normalize the output (divide all by N*N)
-    float scale = 1.0f / (N * N);
-    for (int i = 0; i < N * N * 2; ++i) {
-        buf0[i] *= scale;
-    }
-
-    return 0;
-}
-
-/**
- * @brief Computes log-magnitude spectrum.
- */
-embeddip_status_t _abs_(const Image *src, Image *dst)
-{
-    int size = src->width * src->height;
-
-    float *fft;
-    if (src->log == IMAGE_DATA_COMPLEX) {
-        // current fft to ifft application.
-        fft = src->chals->ch[1];
-    } else if (src->log == IMAGE_DATA_CH0) {
-        // In this case only 0 is allocated i guess.
-        fft = src->chals->ch[0];
-    } else {
-        return -1;
-    }
-
-    if (isChalsEmpty(dst)) {
-        createChals(dst, 1);
-        dst->is_chals = 1;
-    }
-
-    float *mag = dst->chals->ch[0];
-    for (int i = 0; i < size; i++) {
-        float re = fft[2 * i];
-        float im = fft[2 * i + 1];
-        mag[i] = sqrtf(re * re + im * im);
-    }
-
-    dst->log = IMAGE_DATA_MAGNITUDE;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Computes phase angle from FFT image.
- */
-embeddip_status_t _phase_(const Image *src, Image *dst)
-{
-    int size = src->width * src->height;
-
-    float *fft;
-    if (src->log == IMAGE_DATA_COMPLEX) {
-        // current fft to ifft application.
-        fft = src->chals->ch[1];
-    } else if (src->log == IMAGE_DATA_CH0) {
-        // In this case only 0 is allocated i guess.
-        fft = src->chals->ch[0];
-    } else {
-        return -1;
-    }
-
-    if (isChalsEmpty(dst)) {
-        createChals(dst, 1);
-        dst->is_chals = 1;
-    }
-
-    float *out = dst->chals->ch[0];
-    for (int i = 0; i < size; i++) {
-        out[i] = atan2f(fft[2 * i + 1], fft[2 * i]);
-    }
-
-    dst->log = IMAGE_DATA_PHASE;
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Rearranges FFT result so that low-frequency component is centered.
- *
- * Operates on the appropriate channel based on image log state.
- *
- * @param[in,out] img Image containing FFT data.
- *                    If log == IMAGE_DATA_COMPLEX, operates on ch[1].
- *                    If log == IMAGE_DATA_CH0, operates on ch[0].
- */
-embeddip_status_t fftshift(Image *img)
-{
-    if (!img || isChalsEmpty(img))
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    float *data = (img->log == IMAGE_DATA_COMPLEX) ? img->chals->ch[1] : img->chals->ch[0];
-    int width = img->width;
-    int height = img->height;
-
-    int cx = width / 2, cy = height / 2;
-    for (int y = 0; y < cy; ++y) {
-        for (int x = 0; x < cx; ++x) {
-            int q0 = 2 * ((y * width) + x);
-            int q1 = 2 * ((y * width) + x + cx);
-            int q2 = 2 * (((y + cy) * width) + x);
-            int q3 = 2 * (((y + cy) * width) + x + cx);
-
-            for (int k = 0; k < 2; ++k) {
-                float tmp = data[q0 + k];
-                data[q0 + k] = data[q3 + k];
-                data[q3 + k] = tmp;
-
-                tmp = data[q1 + k];
-                data[q1 + k] = data[q2 + k];
-                data[q2 + k] = tmp;
-            }
-        }
-    }
-
-    return EMBEDDIP_OK;
-}
-
-/**
- * @brief Applies a frequency-domain filter to a complex image.
- *
- * This function performs element-wise complex multiplication between a Fourier-domain image
- * and a filter mask. The mask can be either a grayscale magnitude mask or a complex-valued mask.
- *
- * @param[in]  fftImg     Complex frequency-domain image (Re, Im interleaved in ch[0]).
- * @param[in]  filterMask Grayscale or complex mask to apply in frequency domain.
- * @param[out] outImg     Output image after filtering in the frequency domain.
- */
-embeddip_status_t ffilter2D(const Image *src_fft, const Image *filter, Image *dst)
-{
-    if (!src_fft || !filter || !dst)
-        return EMBEDDIP_ERROR_NULL_PTR;
-
-    if (isChalsEmpty(src_fft) || isChalsEmpty(filter))
-        return EMBEDDIP_ERROR_INVALID_ARG;
-
-    int width = src_fft->width;
-    int height = src_fft->height;
-    int size = width * height;
-
-    // Step 1: Compute magnitude and phase
-    Image *magImg = NULL;
-    Image *phaseImg = NULL;
-
-    embeddip_status_t status =
-        createImageWH(src_fft->width, src_fft->height, src_fft->format, &magImg);
-    if (status != EMBEDDIP_OK)
-        return status;
-
-    status = createImageWH(src_fft->width, src_fft->height, src_fft->format, &phaseImg);
-    if (status != EMBEDDIP_OK) {
-        deleteImage(magImg);
-        return status;
-    }
-
-    status = _abs_(src_fft, magImg);
-    if (status != EMBEDDIP_OK) {
-        deleteImage(magImg);
-        deleteImage(phaseImg);
-        return status;
-    }
-
-    status = _phase_(src_fft, phaseImg);
-    if (status != EMBEDDIP_OK) {
-        deleteImage(magImg);
-        deleteImage(phaseImg);
-        return status;
-    }
-
-    // Step 2: Multiply magnitude by filter mask (element-wise)
-    float *mag = magImg->chals->ch[0];
-    float *mask = filter->chals->ch[0];
-    for (int i = 0; i < size; ++i)
-        mag[i] *= mask[i];
-
-    // Step 3: Reconstruct complex data from filtered mag + original phase
-    status = polarToCart(magImg, phaseImg, dst);
-
-    // Cleanup temporary images
-    deleteImage(magImg);
-    deleteImage(phaseImg);
-
-    return status;
-}
-
-/*
-
-embeddip_status_t fourier(const Image *inImg, Image *outImg)
-{
-    int imageN = 256;
-
-    if (isChalsEmpty(outImg))
-    {
-        outImg->chals = (channels_t *)memory_alloc(sizeof(channels_t));
-        outImg->is_chals = 1;
-    }
-    else
-    {
-        memory_free(outImg->chals->ch[0]);
-        // memory_free(outImg->chals->ch[1]);
-    }
-
-    outImg->chals->ch[0] = (float *)memory_alloc(inImg->height * inImg->width * 8);
-    outImg->chals->ch[1] = (float *)memory_alloc(inImg->height * inImg->width * 8);
-
-    float *fourier = outImg->chals->ch[0];
-    float *fourier2 = outImg->chals->ch[1];
-
-    if (isChalsEmpty(inImg))
-    {
-        for (int row = 0; row < imageN * imageN; row++)
-        {
-            fourier[2 * row] = (uint32_t)((uint8_t *)inImg->pixels)[row];
-            fourier[2 * row + 1] = 0x00000000;
-        }
-
-        for (int i = 0; i < imageN; i++)
-        {
-            arm_cfft_f32(&arm_cfft_sR_f32_len256, fourier + imageN * i * 2, 0, 1);
-        }
-
-        for (int k = 0; k < imageN; k++)
-        {
-            for (int j = 0; j < imageN; j++)
-            {
-                fourier2[2 * j + k * imageN * 2] = (float)fourier[j * imageN * 2 + k * 2];
-                fourier2[2 * j + 1 + k * imageN * 2] = (float)fourier[j * imageN * 2 + k * 2 + 1];
-            }
-        }
-
-        for (int i = 0; i < imageN; i++)
-        {
-            arm_cfft_f32(&arm_cfft_sR_f32_len256, fourier2 + imageN * i * 2, 0, 1);
-        }
-    }
-    else
-    {
-
-        for (int row = 0; row < imageN * imageN; row++)
-        {
-            fourier[2 * row] = (float)inImg->chals->ch[0][row];
-            fourier[2 * row + 1] = 0x00000000;
-        }
-
-        for (int i = 0; i < imageN; i++)
-        {
-            arm_cfft_f32(&arm_cfft_sR_f32_len256, fourier + imageN * i * 2, 0, 1);
-        }
-
-        for (int k = 0; k < imageN; k++)
-        {
-            for (int j = 0; j < imageN; j++)
-            {
-                fourier2[2 * j + k * imageN * 2] = (float)fourier[j * imageN * 2 + k * 2];
-                fourier2[2 * j + 1 + k * imageN * 2] = (float)fourier[j * imageN * 2 + k * 2 + 1];
-            }
-        }
-
-        for (int i = 0; i < imageN; i++)
-        {
-            arm_cfft_f32(&arm_cfft_sR_f32_len256, fourier2 + imageN * i * 2, 0, 1);
-        }
-    }
-}
-
-embeddip_status_t mag(const Image *inImg, Image *outImg)
-{
-    int imageN = 256;
-
-    if (isChalsEmpty(outImg))
-    {
-        createChals(outImg, 1);
-        outImg->is_chals = 1;
-    }
-
-    float *fft = inImg->chals->ch[1];
-    float *magnitude = outImg->chals->ch[0];
-
-    float test = 0;
-    for (int i = 0; i < imageN * imageN; ++i)
-    {
-        float re = fft[i * 2];
-        float im = fft[i * 2 + 1];
-        magnitude[i] = sqrtf(re * re + im * im);
-        if (magnitude[i] > test)
-            test = magnitude[i];
-    }
-
-    test = test + 1;
-}
-
-embeddip_status_t phase(const Image *inImg, Image *outImg)
-{
-    int imageN = 256;
-
-    if (isChalsEmpty(outImg))
-    {
-        createChals(outImg, 1);
-        outImg->is_chals = 1;
-    }
-
-    float *fft = inImg->chals->ch[1];
-    float *angle = outImg->chals->ch[0];
-
-    for (int i = 0; i < imageN * imageN; ++i)
-    {
-        angle[i] = atan2f(fft[i * 2 + 1], fft[i * 2]);
-    }
-}
-
-*/
-
-#endif
diff --git a/board/stm32f7/board_stm32f7_memory.c b/board/stm32f7/board_stm32f7_memory.c
index 6278b01..cc61e0a 100755
--- a/board/stm32f7/board_stm32f7_memory.c
+++ b/board/stm32f7/board_stm32f7_memory.c
@@ -3,7 +3,7 @@
 
 #include <embedDIP_configs.h>
 
-#ifdef TARGET_BOARD_STM32F7
+#ifdef EMBED_DIP_BOARD_STM32F7
 
     #include <stdint.h>
     #include <string.h>
@@ -17,8 +17,12 @@
     // Reserve 512KB (0x80000) to be safe
     #define CAMERA_LCD_FRAMEBUFFER_SIZE 0x80000  // 512KB reserved
 
-    #define MEMORY_POOL_SIZE (1024 * 1024 * 8 - CAMERA_LCD_FRAMEBUFFER_SIZE)  // ~6MB
-static uint8_t *memory_pool = ((uint8_t *)SDRAM_BANK_ADDR + CAMERA_LCD_FRAMEBUFFER_SIZE);
+    #define SDRAM_TOTAL_SIZE (1024 * 1024 * 8)
+    #define MEMORY_POOL_SIZE (SDRAM_TOTAL_SIZE - CAMERA_LCD_FRAMEBUFFER_SIZE)  // ~6MB
+    #define DEFAULT_MEMORY_POOL_ADDR (SDRAM_BANK_ADDR + CAMERA_LCD_FRAMEBUFFER_SIZE)
+
+static uint8_t *memory_pool = (uint8_t *)DEFAULT_MEMORY_POOL_ADDR;
+static size_t memory_pool_size = MEMORY_POOL_SIZE;
 
 typedef struct MemoryBlock {
     uint32_t magic;
@@ -41,7 +45,7 @@ static inline uintptr_t pool_start_addr(void)
 
 static inline uintptr_t pool_end_addr(void)
 {
-    return (uintptr_t)memory_pool + MEMORY_POOL_SIZE;
+    return (uintptr_t)memory_pool + memory_pool_size;
 }
 
 static inline int ptr_in_pool(const void *p)
@@ -59,14 +63,41 @@ static inline int block_header_valid(const MemoryBlock *b)
     return (b->magic == MEMBLOCK_MAGIC);
 }
 
-void memory_init()
+void memory_init(uintptr_t pool_start_addr)
 {
     if (initialized)
         return;
 
+    // Accept both:
+    // 1) offset from SDRAM base (preferred),
+    // 2) absolute SDRAM address for backward compatibility.
+    uintptr_t offset = pool_start_addr;
+    if (pool_start_addr >= SDRAM_BANK_ADDR) {
+        offset = pool_start_addr - SDRAM_BANK_ADDR;
+    }
+    if (offset > SDRAM_TOTAL_SIZE - BLOCK_SIZE) {
+        // Invalid offset: fall back to default reserved location.
+        offset = CAMERA_LCD_FRAMEBUFFER_SIZE;
+    }
+
+    uintptr_t start = (uintptr_t)SDRAM_BANK_ADDR + offset;
+    uintptr_t end = (uintptr_t)SDRAM_BANK_ADDR + SDRAM_TOTAL_SIZE;
+
+    if (start + BLOCK_SIZE >= end) {
+        // Not enough room for allocator metadata; fall back to default.
+        start = DEFAULT_MEMORY_POOL_ADDR;
+    }
+
+    memory_pool = (uint8_t *)start;
+    memory_pool_size = (size_t)(end - start);
+    if (memory_pool_size <= BLOCK_SIZE) {
+        memory_pool = (uint8_t *)DEFAULT_MEMORY_POOL_ADDR;
+        memory_pool_size = MEMORY_POOL_SIZE;
+    }
+
     free_list = (MemoryBlock *)memory_pool;
     free_list->magic = MEMBLOCK_MAGIC;
-    free_list->size = MEMORY_POOL_SIZE - BLOCK_SIZE;
+    free_list->size = memory_pool_size - BLOCK_SIZE;
     free_list->next = NULL;
     free_list->is_free = 1;
 
@@ -76,7 +107,7 @@ void memory_init()
 void *memory_alloc(size_t size)
 {
     if (!initialized)
-        memory_init();
+        memory_init((uintptr_t)DEFAULT_MEMORY_POOL_ADDR);
 
     size = ALIGN4(size);
 
@@ -122,7 +153,7 @@ void memory_free(void *ptr)
         return;
 
     if (!initialized)
-        memory_init();
+        memory_init((uintptr_t)DEFAULT_MEMORY_POOL_ADDR);
 
     MemoryBlock *block = (MemoryBlock *)((uint8_t *)ptr - BLOCK_SIZE);
 
@@ -161,7 +192,7 @@ void *memory_realloc(void *ptr, size_t new_size)
         return memory_alloc(new_size);
 
     if (!initialized)
-        memory_init();
+        memory_init((uintptr_t)DEFAULT_MEMORY_POOL_ADDR);
 
     MemoryBlock *block = (MemoryBlock *)((uint8_t *)ptr - BLOCK_SIZE);
     if (!block_header_valid(block))
@@ -180,4 +211,4 @@ void *memory_realloc(void *ptr, size_t new_size)
     return new_ptr;
 }
 
-#endif  // TARGET_BOARD_STM32F7
+#endif  // EMBED_DIP_BOARD_STM32F7
diff --git a/core/memory_manager.h b/core/memory_manager.h
index 3ccbca5..d743632 100755
--- a/core/memory_manager.h
+++ b/core/memory_manager.h
@@ -38,12 +38,14 @@ extern "C" {
 #endif
 
 /**
- * @brief Initialize the memory manager.
+ * @brief Initialize the memory manager with default backend settings.
  *
  * Should be called once at startup before any other memory functions.
- * If using a pool allocator, this function sets up the pool.
+ * For pool allocators, this uses the board-specific default pool address.
+ *
+ * @param pool_start_addr Start address of the memory pool.
  */
-void memory_init(void);
+void memory_init(uintptr_t pool_start_addr);
 
 /**
  * @brief Allocate a block of memory.
diff --git a/device/camera/ov2640/esp32_ov2640.cpp b/device/camera/esp32_ov2640.cpp
old mode 100755
new mode 100644
similarity index 100%
rename from device/camera/ov2640/esp32_ov2640.cpp
rename to device/camera/esp32_ov2640.cpp
diff --git a/device/camera/ov2640/esp32_ov2640_old.cpp b/device/camera/esp32_ov2640_old.cpp
similarity index 100%
rename from device/camera/ov2640/esp32_ov2640_old.cpp
rename to device/camera/esp32_ov2640_old.cpp
diff --git a/device/camera/ov5640/stm32_ov5640.c b/device/camera/stm32_ov5640.c
old mode 100755
new mode 100644
similarity index 100%
rename from device/camera/ov5640/stm32_ov5640.c
rename to device/camera/stm32_ov5640.c
diff --git a/device/camera/ov5640/stm32_ov5640.h b/device/camera/stm32_ov5640.h
similarity index 100%
rename from device/camera/ov5640/stm32_ov5640.h
rename to device/camera/stm32_ov5640.h
diff --git a/device/display/rk043fn48h/stm32_rk043fn48h.c b/device/display/stm32_rk043fn48h.c
old mode 100755
new mode 100644
similarity index 100%
rename from device/display/rk043fn48h/stm32_rk043fn48h.c
rename to device/display/stm32_rk043fn48h.c
diff --git a/device/serial/esp32_uart/esp32_uart.cpp b/device/serial/esp32_uart.cpp
old mode 100755
new mode 100644
similarity index 100%
rename from device/serial/esp32_uart/esp32_uart.cpp
rename to device/serial/esp32_uart.cpp
diff --git a/device/serial/serial.h b/device/serial/serial.h
index 83fe64a..6ffa1b9 100755
--- a/device/serial/serial.h
+++ b/device/serial/serial.h
@@ -37,11 +37,11 @@ typedef struct serial_interface {
 int _write(int file, char *ptr, int len);
 
 // External declaration of STM32 implementation
-#ifdef TARGET_BOARD_STM32F7
+#ifdef EMBED_DIP_BOARD_STM32F7
 extern serial_t stm32_uart;
 #endif
 
-#ifdef TARGET_BOARD_ESP32
+#ifdef EMBED_DIP_BOARD_ESP32
 extern serial_t esp32_uart;
 #endif
 
diff --git a/device/serial/stm32_uart/stm32_uart.c b/device/serial/stm32_uart.c
old mode 100755
new mode 100644
similarity index 59%
rename from device/serial/stm32_uart/stm32_uart.c
rename to device/serial/stm32_uart.c
index d731818..39d16ff
--- a/device/serial/stm32_uart/stm32_uart.c
+++ b/device/serial/stm32_uart.c
@@ -58,92 +58,6 @@ void HAL_UART_RxCpltCallback(UART_HandleTypeDef *huart)
     (void)huart;
     rx_flag = true;
 }
-/*
-static void serial_capture_dma(Image *img)
-{
-    uint8_t request_start_sequence[3] = "STR";
-    assert(img != NULL);
-    assert(img->pixels != NULL);
-
-    // Calculate block parameters
-    uint16_t blockSize = ((img->size * img->depth) < UART_BLOCK_SIZE_MAX) ? (img->size * img->depth)
-: UART_BLOCK_SIZE_MAX; uint32_t blockCount = (img->size * img->depth) / blockSize; uint16_t
-lastBlockSize = (img->size * img->depth) % blockSize;
-
-    // Send capture request header
-    HAL_UART_Transmit(&huart1, request_start_sequence, 3, HAL_MAX_DELAY);
-    HAL_Delay(1); // Optional small delay
-
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->width), sizeof(img->width), HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->height), sizeof(img->height), HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->format), sizeof(img->format), HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->depth), sizeof(img->depth), HAL_MAX_DELAY);
-
-    // Step 3: Send image pixel data in blocks
-    const uint8_t *pixelPtr = img->pixels;
-    for (uint32_t i = 0; i < blockCount; i++)
-    {
-        HAL_UART_Receive_DMA(&huart1, pixelPtr, blockSize);
-        pixelPtr += blockSize;
-        while (!rx_flag)
-            ;
-        rx_flag = false;
-    }
-
-    // Step 4: Send any remaining bytes
-    if (lastBlockSize > 0)
-    {
-        HAL_UART_Receive_DMA(&huart1, pixelPtr, lastBlockSize);
-        while (!rx_flag)
-            ;
-        rx_flag = false;
-    }
-}
-
-static void serial_send_dma(Image *img)
-{
-    assert(img != NULL);
-    assert(img->pixels != NULL);
-    uint8_t request_start_sequence[3] = "STW";
-    // Calculate block transmission parameters
-    uint16_t blockSize = ((img->size * img->depth) < UART_BLOCK_SIZE_MAX) ? (img->size * img->depth)
-: UART_BLOCK_SIZE_MAX; uint32_t blockCount = (img->size * img->depth) / blockSize; uint16_t
-lastBlockSize = (img->size * img->depth) % blockSize;
-
-    // Step 1: Send command header
-    HAL_UART_Transmit(&huart1, request_start_sequence, 3, HAL_MAX_DELAY);
-    HAL_Delay(1); // Give receiver time to prepare
-
-    // Step 2: Send image metadata
-
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->width), sizeof(img->width), HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->height), sizeof(img->height), HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->format), sizeof(img->format), HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->depth), sizeof(img->depth), HAL_MAX_DELAY);
-    HAL_Delay(200); // Allow receiver to process metadata
-
-    // Step 3: Send image pixel data in blocks
-    const uint8_t *pixelPtr = img->pixels;
-    uint8_t testarr[] = "ozan durgut ses ver";
-    for (uint32_t i = 0; i < blockCount; i++)
-    {
-        HAL_UART_Transmit_DMA(&huart1, pixelPtr, blockSize);
-        pixelPtr += blockSize;
-        while (!tx_flag)
-            ;
-        tx_flag = false;
-    }
-
-    // Step 4: Send any remaining bytes
-    if (lastBlockSize > 0)
-    {
-        HAL_UART_Transmit_DMA(&huart1, pixelPtr, lastBlockSize);
-        while (!tx_flag)
-            ;
-        tx_flag = false;
-    }
-}
-*/
 
 static int serial_capture(Image *img)
 {
@@ -276,8 +190,6 @@ static int serial_send_jpeg(const Image *img)
         HAL_UART_Transmit(&huart1, ptr, chunk, HAL_MAX_DELAY);
         ptr += chunk;
         remaining -= chunk;
-
-        HAL_Delay(1);  // Delay is fine for UART pacing, or use DMA for better performance
     }
     return EMBEDDIP_OK;
 }
@@ -311,83 +223,6 @@ serial_send_1d(const void *data, uint8_t elem_size, uint32_t length, Serial1DDat
     return EMBEDDIP_OK;
 }
 
-/*
-static void serial_capture(Image *img)
-{
-
-    uint8_t request_start_sequence[3] = "STR";
-
-    uint16_t _blocksize = 65535, _lastblocksize = 0;
-    uint32_t i = 0, _blockCount = 0;
-
-    uint16_t sizear[3] = {img->width, img->height, img->format};
-
-    if (img->size < 65536)
-        _blocksize = img->size;
-
-    _blockCount = img->size / _blocksize;
-    _lastblocksize = (uint16_t)(img->size % _blocksize);
-
-    HAL_UART_Transmit(&huart1, request_start_sequence, 3, HAL_MAX_DELAY);
-    HAL_Delay(1);
-
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->width), sizeof(uint16_t), HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->height), sizeof(uint16_t),
-                      HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->format), sizeof(uint16_t),
-                      HAL_MAX_DELAY);
-
-    for (i = 0; i < _blockCount; i++)
-        HAL_UART_Receive(&huart1, img->pixels + (i * _blocksize), _blocksize,
-                         HAL_MAX_DELAY);
-
-    if (_lastblocksize)
-        HAL_UART_Receive(&huart1, img->pixels + (i * _blocksize),
-                         _lastblocksize, HAL_MAX_DELAY);
-
-    return;
-}
-
-static void serial_send(const Image *img)
-{
-    uint8_t request_start_sequence[3] = "STW";
-
-    uint16_t _blocksize = 65535, _lastblocksize = 0;
-    uint32_t i = 0, _blockCount = 0;
-
-    if (img->size < 65536)
-        _blocksize = img->size;
-
-    _blockCount = img->size / _blocksize;
-    _lastblocksize = (uint16_t)(img->size % _blocksize);
-
-    HAL_UART_Transmit(&huart1, request_start_sequence, 3, HAL_MAX_DELAY);
-    HAL_Delay(1);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->width), sizeof(uint16_t), HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->height), sizeof(uint16_t),
-                      HAL_MAX_DELAY);
-    HAL_UART_Transmit(&huart1, (uint8_t *)(&img->format), sizeof(uint8_t),
-                      HAL_MAX_DELAY);
-    HAL_Delay(200);
-    for (i = 0; i < _blockCount; i++)
-        HAL_UART_Transmit(&huart1, img->pixels + (i * _blocksize), _blocksize,
-                          HAL_MAX_DELAY);
-
-    if (_lastblocksize)
-        HAL_UART_Transmit(&huart1, img->pixels + (i * _blocksize),
-                          _lastblocksize, HAL_MAX_DELAY);
-}
-
-int _write(int file, char *ptr, int len)
-{
-    for (int i = 0; i < len; i++)
-    {
-        HAL_UART_Transmit(&huart1, (uint8_t *)&ptr[i], 1, HAL_MAX_DELAY);
-    }
-    return len;
-}
-*/
-
 // Define the object
 serial_t stm32_uart = {
     .init = serial_init,
@@ -398,4 +233,4 @@ serial_t stm32_uart = {
     .flush = serial_flush,
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/embedDIP.h b/embedDIP.h
index d6aa56c..28f80fa 100755
--- a/embedDIP.h
+++ b/embedDIP.h
@@ -46,7 +46,7 @@ extern "C" {
  */
 
 /** @brief Major version (breaking changes). */
-#define EMBED_DIP_VERSION_MAJOR 0U
+#define EMBED_DIP_VERSION_MAJOR 1U
 /** @brief Minor version (new features, backward compatible). */
 #define EMBED_DIP_VERSION_MINOR 1U
 /** @brief Patch version (bug fixes, no API changes). */
@@ -72,6 +72,7 @@ extern "C" {
 #include "core/memory_manager.h"         /**< Allocators and memory helpers. */
 #include "device/serial/serial.h"        /**< Serial I/O abstraction. */
 #include "imgproc/color.h"               /**< Color conversions and helpers. */
+#include "imgproc/compress.h"            /**< JPEG compression helper. */
 #include "imgproc/connectedcomponents.h" /**< Connected components labeling. */
 #include "imgproc/drawing.h"             /**< Drawing primitives and shapes. */
 #include "imgproc/fft.h"                 /**< Frequency-domain processing. */
@@ -91,11 +92,11 @@ extern "C" {
  * @{
  */
 
-#if defined(TARGET_BOARD_ESP32)
+#if defined(EMBED_DIP_BOARD_ESP32)
     #include "device/camera/camera.h" /**< Camera abstraction. */
 #endif
 
-#if defined(TARGET_BOARD_STM32F7)
+#if defined(EMBED_DIP_BOARD_STM32F7)
     #include "device/camera/camera.h"   /**< Camera abstraction. */
     #include "device/display/display.h" /**< Display abstraction. */
 #endif
diff --git a/cmake/embedDIPConfig.cmake.in b/embedDIPConfig.cmake.in
old mode 100755
new mode 100644
similarity index 100%
rename from cmake/embedDIPConfig.cmake.in
rename to embedDIPConfig.cmake.in
diff --git a/embedDIP_configs.h b/embedDIP_configs.h
index d26ed97..b381edc 100755
--- a/embedDIP_configs.h
+++ b/embedDIP_configs.h
@@ -9,54 +9,82 @@
  * @file embedDIP_configs.h
  * @brief User-editable build configuration for EmbedDIP.
  *
- * Define exactly **one** target board and (optionally) override feature flags
- * and device selections.
+ * Select exactly one board, one architecture family, and one CPU variant via
+ * compiler defines.
+ * Typical usage with CMake: set EMBEDDIP_TARGET_BOARD, EMBEDDIP_ARCH, EMBEDDIP_CPU.
  */
 
 /* -------------------------------------------------------------------------- */
-/* Target selection */
+/* Hard-switch guard (legacy macros removed)                                   */
 /* -------------------------------------------------------------------------- */
-/**
- * @defgroup embedDIP_cfg_target Target selection
- * @brief Choose exactly one target platform.
- * @{
- *
- */
+#if defined(TARGET_BOARD_STM32F7) || defined(TARGET_BOARD_ESP32) || defined(TARGET_BOARD_OTHER)
+    #error                                                                                         \
+        "Legacy TARGET_BOARD_* macros are not supported. Use EMBED_DIP_BOARD_* and EMBED_DIP_ARCH_* instead."
+#endif
 
-/* Uncomment **one** of the following, or define via compiler flags. */
-#define TARGET_BOARD_STM32F7 1
-// #define TARGET_BOARD_ESP32   1
-/* #define TARGET_BOARD_OTHER   1 */
+/* -------------------------------------------------------------------------- */
+/* Target selection                                                            */
+/* -------------------------------------------------------------------------- */
+/* Uncomment only if you do not provide these from the build system. */
+/* #define EMBED_DIP_BOARD_STM32F7 1 */
+/* #define EMBED_DIP_BOARD_ESP32   1 */
+
+/* #define EMBED_DIP_ARCH_ARM     1 */
+/* #define EMBED_DIP_ARCH_XTENSA  1 */
 
-/* Sanity check: ensure exactly one target is selected. */
-#if ((defined(TARGET_BOARD_STM32F7) ? 1 : 0) + (defined(TARGET_BOARD_ESP32) ? 1 : 0) +             \
-     (defined(TARGET_BOARD_OTHER) ? 1 : 0)) == 0
+/* #define EMBED_DIP_CPU_CORTEX_M7 1 */
+/* #define EMBED_DIP_CPU_LX6       1 */
+/* #define EMBED_DIP_CPU_LX7       1 */
+
+/* Sanity check: exactly one board. */
+#if ((defined(EMBED_DIP_BOARD_STM32F7) ? 1 : 0) + (defined(EMBED_DIP_BOARD_ESP32) ? 1 : 0)) == 0
     #error                                                                                         \
-        "No target selected: define exactly one of TARGET_BOARD_STM32F7, TARGET_BOARD_ESP32, TARGET_BOARD_OTHER."
-#elif ((defined(TARGET_BOARD_STM32F7) ? 1 : 0) + (defined(TARGET_BOARD_ESP32) ? 1 : 0) +           \
-       (defined(TARGET_BOARD_OTHER) ? 1 : 0)) > 1
+        "No board selected: define exactly one of EMBED_DIP_BOARD_STM32F7 or EMBED_DIP_BOARD_ESP32."
+#elif ((defined(EMBED_DIP_BOARD_STM32F7) ? 1 : 0) + (defined(EMBED_DIP_BOARD_ESP32) ? 1 : 0)) > 1
     #error                                                                                         \
-        "Multiple targets selected: define **only one** of TARGET_BOARD_STM32F7, TARGET_BOARD_ESP32, TARGET_BOARD_OTHER."
+        "Multiple boards selected: define only one of EMBED_DIP_BOARD_STM32F7 or EMBED_DIP_BOARD_ESP32."
+#endif
+
+/* Sanity check: exactly one architecture family. */
+#if ((defined(EMBED_DIP_ARCH_ARM) ? 1 : 0) + (defined(EMBED_DIP_ARCH_XTENSA) ? 1 : 0)) == 0
+    #error                                                                                         \
+        "No architecture family selected: define exactly one of EMBED_DIP_ARCH_ARM or EMBED_DIP_ARCH_XTENSA."
+#elif ((defined(EMBED_DIP_ARCH_ARM) ? 1 : 0) + (defined(EMBED_DIP_ARCH_XTENSA) ? 1 : 0)) > 1
+    #error "Multiple architecture families selected: define only one EMBED_DIP_ARCH_* macro."
+#endif
+
+/* Sanity check: exactly one CPU variant. */
+#if ((defined(EMBED_DIP_CPU_CORTEX_M7) ? 1 : 0) + (defined(EMBED_DIP_CPU_LX6) ? 1 : 0) +           \
+     (defined(EMBED_DIP_CPU_LX7) ? 1 : 0)) == 0
+    #error                                                                                         \
+        "No CPU selected: define exactly one of EMBED_DIP_CPU_CORTEX_M7, EMBED_DIP_CPU_LX6, EMBED_DIP_CPU_LX7."
+#elif ((defined(EMBED_DIP_CPU_CORTEX_M7) ? 1 : 0) + (defined(EMBED_DIP_CPU_LX6) ? 1 : 0) +         \
+       (defined(EMBED_DIP_CPU_LX7) ? 1 : 0)) > 1
+    #error "Multiple CPUs selected: define only one EMBED_DIP_CPU_* macro."
+#endif
+
+/* Board/architecture/CPU compatibility matrix. */
+#if defined(EMBED_DIP_BOARD_STM32F7)
+    #if !(defined(EMBED_DIP_ARCH_ARM) && defined(EMBED_DIP_CPU_CORTEX_M7))
+        #error                                                                                     \
+            "Invalid combination: EMBED_DIP_BOARD_STM32F7 requires EMBED_DIP_ARCH_ARM + EMBED_DIP_CPU_CORTEX_M7."
+    #endif
+#elif defined(EMBED_DIP_BOARD_ESP32)
+    #if !(defined(EMBED_DIP_ARCH_XTENSA) &&                                                        \
+          (defined(EMBED_DIP_CPU_LX6) || defined(EMBED_DIP_CPU_LX7)))
+        #error                                                                                     \
+            "Invalid combination: EMBED_DIP_BOARD_ESP32 requires EMBED_DIP_ARCH_XTENSA + (EMBED_DIP_CPU_LX6 or EMBED_DIP_CPU_LX7)."
+    #endif
 #endif
-/** @} */ /* end of embedDIP_cfg_target */
 
 /**
  * @defgroup embedDIP_cfg_features Feature flags
  * @brief Enable/disable optional subsystems.
  * @{
- *
- * Each flag defaults to 1 (enabled) when applicable for the target. Define
- * as 0 to disable at compile time.
- *
- * - `ENABLE_UART_LOGGING` : UART-based logging helpers
- * - `ENABLE_IMAGE_PROCESSING` : image processing modules
- * - `ENABLE_CAMERA_INPUT` : camera capture interfaces
- * - `ENABLE_DISPLAY_OUTPUT` : display output interfaces
  */
 
 /* ============================== STM32F7 =================================== */
-#if defined(TARGET_BOARD_STM32F7)
-    /** @brief Vendor-family define for STM32F7. */
+#if defined(EMBED_DIP_BOARD_STM32F7)
     #ifndef STM32F7xx
         #define STM32F7xx 1
     #endif
@@ -74,20 +102,18 @@
         #define ENABLE_DISPLAY_OUTPUT 1
     #endif
 
-    /* Devices available on STM32F7 builds (overridable) */
     #ifndef DEVICE_OV5640
-        #define DEVICE_OV5640 1 /**< OV5640 camera module present. */
+        #define DEVICE_OV5640 1
     #endif
     #ifndef DEVICE_RK043FN48H
-        #define DEVICE_RK043FN48H 1 /**< RK043FN48H display panel present. */
+        #define DEVICE_RK043FN48H 1
     #endif
     #ifndef DEVICE_STM32_UART
-        #define DEVICE_STM32_UART 1 /**< Use STM32 HAL UART backend. */
+        #define DEVICE_STM32_UART 1
     #endif
 
 /* =============================== ESP32 ==================================== */
-#elif defined(TARGET_BOARD_ESP32)
-    /** @brief Arduino-style arch define for ESP32 builds. */
+#elif defined(EMBED_DIP_BOARD_ESP32)
     #ifndef ARDUINO_ARCH_ESP32
         #define ARDUINO_ARCH_ESP32 1
     #endif
@@ -102,48 +128,17 @@
         #define ENABLE_CAMERA_INPUT 1
     #endif
     #ifndef ENABLE_DISPLAY_OUTPUT
-        #define ENABLE_DISPLAY_OUTPUT 0 /* default off unless a display is wired */
+        #define ENABLE_DISPLAY_OUTPUT 0
     #endif
 
-    /* Devices available on ESP32 builds (overridable) */
     #ifndef DEVICE_OV2640
-        #define DEVICE_OV2640 1 /**< OV2640 camera module present. */
+        #define DEVICE_OV2640 1
     #endif
     #ifndef DEVICE_ESP32_UART
-        #define DEVICE_ESP32_UART 1 /**< Use ESP32 UART backend. */
-    #endif
-
-/* ============================== OTHER ===================================== */
-#elif defined(TARGET_BOARD_OTHER)
-    /**
-     * @brief Generic/other target: start with minimal defaults and enable what you
-     * need.
-     * @note Adjust device macros below to match your board.
-     */
-    #ifndef ENABLE_UART_LOGGING
-        #define ENABLE_UART_LOGGING 0
+        #define DEVICE_ESP32_UART 1
     #endif
-    #ifndef ENABLE_IMAGE_PROCESSING
-        #define ENABLE_IMAGE_PROCESSING 1
-    #endif
-    #ifndef ENABLE_CAMERA_INPUT
-        #define ENABLE_CAMERA_INPUT 0
-    #endif
-    #ifndef ENABLE_DISPLAY_OUTPUT
-        #define ENABLE_DISPLAY_OUTPUT 0
-    #endif
-
-    /* Example device toggles (customize for your platform) */
-    #ifndef DEVICE_OV5640
-        #define DEVICE_OV5640 0
-    #endif
-    #ifndef DEVICE_OV2640
-        #define DEVICE_OV2640 0
-    #endif
-
-#else
-    #error "Unexpected configuration state. This should be unreachable."
 #endif
+
 /** @} */ /* end of embedDIP_cfg_features */
 
 /**
diff --git a/imgproc/compress.c b/imgproc/compress.c
new file mode 100644
index 0000000..3b1ef36
--- /dev/null
+++ b/imgproc/compress.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2025 EmbedDIP
+
+#include "imgproc/compress.h"
+
+#include <stdlib.h>
+
+#if defined(EMBEDDIP_HAVE_LIBJPEG)
+
+    #include "jpeglib.h"
+
+typedef struct {
+    struct jpeg_destination_mgr pub;
+    JOCTET *buffer;
+    uint32_t capacity;
+    JOCTET spill[64];
+    int overflow;
+} fixed_dest_mgr_t;
+
+static void fixed_dest_init(j_compress_ptr cinfo)
+{
+    fixed_dest_mgr_t *dest = (fixed_dest_mgr_t *)cinfo->dest;
+    dest->pub.next_output_byte = dest->buffer;
+    dest->pub.free_in_buffer = dest->capacity;
+    dest->overflow = 0;
+}
+
+static boolean fixed_dest_empty(j_compress_ptr cinfo)
+{
+    fixed_dest_mgr_t *dest = (fixed_dest_mgr_t *)cinfo->dest;
+    dest->overflow = 1;
+    dest->pub.next_output_byte = dest->spill;
+    dest->pub.free_in_buffer = sizeof(dest->spill);
+    return TRUE;
+}
+
+static void fixed_dest_term(j_compress_ptr cinfo)
+{
+    (void)cinfo;
+}
+
+static void jpeg_fixed_dest(j_compress_ptr cinfo, uint8_t *out, uint32_t out_capacity)
+{
+    fixed_dest_mgr_t *dest = (fixed_dest_mgr_t *)cinfo->dest;
+    if (dest == NULL) {
+        cinfo->dest = (struct jpeg_destination_mgr *)(*cinfo->mem->alloc_small)(
+            (j_common_ptr)cinfo, JPOOL_PERMANENT, sizeof(fixed_dest_mgr_t));
+        dest = (fixed_dest_mgr_t *)cinfo->dest;
+    }
+
+    dest->buffer = out;
+    dest->capacity = out_capacity;
+    dest->overflow = 0;
+    dest->pub.init_destination = fixed_dest_init;
+    dest->pub.empty_output_buffer = fixed_dest_empty;
+    dest->pub.term_destination = fixed_dest_term;
+}
+
+int compress(Image *src, Image *dst, int format, int quality)
+{
+    if (!src || !dst || !src->pixels || !dst->pixels) {
+        return -1;
+    }
+
+    if (format != IMAGE_COMP_JPEG) {
+        return -1;
+    }
+
+    struct jpeg_compress_struct cinfo;
+    struct jpeg_error_mgr jerr;
+
+    cinfo.err = jpeg_std_error(&jerr);
+    jpeg_create_compress(&cinfo);
+
+    uint32_t dst_capacity = dst->width * dst->height * dst->depth;
+    if (dst_capacity == 0) {
+        jpeg_destroy_compress(&cinfo);
+        return -1;
+    }
+    jpeg_fixed_dest(&cinfo, (uint8_t *)dst->pixels, dst_capacity);
+
+    cinfo.image_width = src->width;
+    cinfo.image_height = src->height;
+
+    static JSAMPLE *row_buffer = NULL;
+    static uint32_t row_buffer_capacity = 0;
+    int row_stride;
+
+    if (src->format == IMAGE_FORMAT_RGB565) {
+        cinfo.input_components = 3;
+        cinfo.in_color_space = JCS_RGB;
+        jpeg_set_defaults(&cinfo);
+        jpeg_set_quality(&cinfo, quality, TRUE);
+        cinfo.dct_method = JDCT_IFAST;
+        cinfo.optimize_coding = FALSE;
+
+        jpeg_start_compress(&cinfo, TRUE);
+
+        row_stride = src->width * 3;
+        if ((uint32_t)row_stride > row_buffer_capacity) {
+            JSAMPLE *new_row_buffer = (JSAMPLE *)realloc(row_buffer, row_stride);
+            if (!new_row_buffer) {
+                jpeg_destroy_compress(&cinfo);
+                return -1;
+            }
+            row_buffer = new_row_buffer;
+            row_buffer_capacity = (uint32_t)row_stride;
+        }
+
+        if (!row_buffer) {
+            jpeg_destroy_compress(&cinfo);
+            return -1;
+        }
+
+        uint16_t *src_pixels = (uint16_t *)src->pixels;
+        while (cinfo.next_scanline < cinfo.image_height) {
+            for (uint32_t x = 0; x < src->width; x++) {
+                uint16_t pixel = src_pixels[cinfo.next_scanline * src->width + x];
+                row_buffer[x * 3 + 0] = (uint8_t)(((pixel >> 11) & 0x1F) << 3);
+                row_buffer[x * 3 + 1] = (uint8_t)(((pixel >> 5) & 0x3F) << 2);
+                row_buffer[x * 3 + 2] = (uint8_t)((pixel & 0x1F) << 3);
+            }
+            JSAMPROW row_pointer = row_buffer;
+            jpeg_write_scanlines(&cinfo, &row_pointer, 1);
+        }
+    } else if (src->format == IMAGE_FORMAT_RGB888) {
+        cinfo.input_components = 3;
+        cinfo.in_color_space = JCS_RGB;
+        jpeg_set_defaults(&cinfo);
+        jpeg_set_quality(&cinfo, quality, TRUE);
+        cinfo.dct_method = JDCT_IFAST;
+        cinfo.optimize_coding = FALSE;
+
+        jpeg_start_compress(&cinfo, TRUE);
+
+        row_stride = src->width * 3;
+        uint8_t *src_pixels = (uint8_t *)src->pixels;
+        while (cinfo.next_scanline < cinfo.image_height) {
+            JSAMPROW row_pointer = &src_pixels[cinfo.next_scanline * row_stride];
+            jpeg_write_scanlines(&cinfo, &row_pointer, 1);
+        }
+    } else if (src->format == IMAGE_FORMAT_GRAYSCALE) {
+        cinfo.input_components = 1;
+        cinfo.in_color_space = JCS_GRAYSCALE;
+        jpeg_set_defaults(&cinfo);
+        jpeg_set_quality(&cinfo, quality, TRUE);
+        cinfo.dct_method = JDCT_IFAST;
+        cinfo.optimize_coding = FALSE;
+
+        jpeg_start_compress(&cinfo, TRUE);
+
+        row_stride = src->width;
+        uint8_t *src_pixels = (uint8_t *)src->pixels;
+        while (cinfo.next_scanline < cinfo.image_height) {
+            JSAMPROW row_pointer = &src_pixels[cinfo.next_scanline * row_stride];
+            jpeg_write_scanlines(&cinfo, &row_pointer, 1);
+        }
+    } else {
+        jpeg_destroy_compress(&cinfo);
+        return -1;
+    }
+
+    jpeg_finish_compress(&cinfo);
+
+    fixed_dest_mgr_t *dest = (fixed_dest_mgr_t *)cinfo.dest;
+    if (!dest || dest->overflow) {
+        jpeg_destroy_compress(&cinfo);
+        return -1;
+    }
+
+    dst->size = (uint32_t)(dst_capacity - dest->pub.free_in_buffer);
+    jpeg_destroy_compress(&cinfo);
+    return 0;
+}
+
+#else
+
+int compress(Image *src, Image *dst, int format, int quality)
+{
+    (void)src;
+    (void)dst;
+    (void)format;
+    (void)quality;
+    return -1;
+}
+
+#endif
diff --git a/imgproc/compress.h b/imgproc/compress.h
new file mode 100644
index 0000000..74bd474
--- /dev/null
+++ b/imgproc/compress.h
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2025 EmbedDIP
+
+#ifndef EMBEDDIP_IMGPROC_COMPRESS_H
+#define EMBEDDIP_IMGPROC_COMPRESS_H
+
+#include "core/image.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Compression format */
+#define IMAGE_COMP_JPEG 0
+
+/**
+ * @brief Compress image to JPEG format.
+ * @param src Source image (RGB565, RGB888, or grayscale).
+ * @param dst Destination image (JPEG payload stored in pixels buffer).
+ * @param format Compression format (use IMAGE_COMP_JPEG).
+ * @param quality JPEG quality (1-100, higher = better quality).
+ * @return 0 on success, -1 on error.
+ */
+int compress(Image *src, Image *dst, int format, int quality);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* EMBEDDIP_IMGPROC_COMPRESS_H */
diff --git a/imgproc/fft.c b/imgproc/fft.c
new file mode 100644
index 0000000..9f1ddee
--- /dev/null
+++ b/imgproc/fft.c
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2025 EmbedDIP
+
+#include <arch/fft_backend.h>
+#include <board/common.h>
+#include <core/memory_manager.h>
+#include <embedDIP_configs.h>
+#include <imgproc/fft.h>
+
+static bool isValidFFTSize(int w, int h)
+{
+    return (w == h) && ((w & (w - 1)) == 0);
+}
+
+static void transposeComplex(const float *src, float *dst, int n)
+{
+    for (int y = 0; y < n; ++y) {
+        for (int x = 0; x < n; ++x) {
+            int src_idx = 2 * (y * n + x);
+            int dst_idx = 2 * (x * n + y);
+            dst[dst_idx] = src[src_idx];
+            dst[dst_idx + 1] = src[src_idx + 1];
+        }
+    }
+}
+
+embeddip_status_t fft(const Image *src, Image *dst)
+{
+    if (!src || !dst || !src->pixels)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    if (!isValidFFTSize(src->width, src->height))
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+
+    int n = src->width;
+    embeddip_status_t status = embeddip_fft_backend_init(n);
+    if (status != EMBEDDIP_OK)
+        return status;
+
+    if (isChalsEmpty(dst)) {
+        status = createChalsComplex(dst, 2);
+        if (status != EMBEDDIP_OK)
+            return status;
+        dst->is_chals = 1;
+    }
+
+    if (!dst->chals || !dst->chals->ch[0] || !dst->chals->ch[1])
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    float *buf0 = dst->chals->ch[0];
+    float *buf1 = dst->chals->ch[1];
+
+    const uint8_t *pixels = (const uint8_t *)src->pixels;
+    for (int i = 0; i < n * n; ++i) {
+        buf0[2 * i] = (float)pixels[i];
+        buf0[2 * i + 1] = 0.0f;
+    }
+
+    for (int row = 0; row < n; ++row) {
+        status = embeddip_fft_backend_forward_1d(buf0 + row * n * 2, n);
+        if (status != EMBEDDIP_OK)
+            return status;
+    }
+
+    transposeComplex(buf0, buf1, n);
+
+    for (int row = 0; row < n; ++row) {
+        status = embeddip_fft_backend_forward_1d(buf1 + row * n * 2, n);
+        if (status != EMBEDDIP_OK)
+            return status;
+    }
+
+    transposeComplex(buf1, buf0, n);
+
+    for (int i = 0; i < n * n * 2; ++i) {
+        buf1[i] = buf0[i];
+    }
+
+    dst->log = IMAGE_DATA_COMPLEX;
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t ifft(const Image *src, Image *dst)
+{
+    if (!src || !dst || !src->chals)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    if (!isValidFFTSize(src->width, src->height))
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+
+    if (src->log != IMAGE_DATA_COMPLEX && src->log != IMAGE_DATA_CH0)
+        return EMBEDDIP_ERROR_INVALID_ARG;
+
+    int n = src->width;
+    embeddip_status_t status = embeddip_fft_backend_init(n);
+    if (status != EMBEDDIP_OK)
+        return status;
+
+    float *input = (src->log == IMAGE_DATA_COMPLEX) ? src->chals->ch[1] : src->chals->ch[0];
+    if (!input)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    float *tmp = (float *)memory_alloc((size_t)n * (size_t)n * 2U * sizeof(float));
+    if (!tmp)
+        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+
+    if (isChalsEmpty(dst)) {
+        status = createChals(dst, 1);
+        if (status != EMBEDDIP_OK) {
+            memory_free(tmp);
+            return status;
+        }
+        dst->is_chals = 1;
+    }
+
+    if (!dst->chals || !dst->chals->ch[0]) {
+        memory_free(tmp);
+        return EMBEDDIP_ERROR_NULL_PTR;
+    }
+
+    for (int row = 0; row < n; ++row) {
+        status = embeddip_fft_backend_inverse_1d(input + row * n * 2, n);
+        if (status != EMBEDDIP_OK) {
+            memory_free(tmp);
+            return status;
+        }
+    }
+
+    transposeComplex(input, tmp, n);
+
+    for (int row = 0; row < n; ++row) {
+        status = embeddip_fft_backend_inverse_1d(tmp + row * n * 2, n);
+        if (status != EMBEDDIP_OK) {
+            memory_free(tmp);
+            return status;
+        }
+    }
+
+    transposeComplex(tmp, input, n);
+
+    for (int i = 0; i < n * n; ++i) {
+        dst->chals->ch[0][i] = input[2 * i];
+    }
+
+    dst->log = IMAGE_DATA_CH0;
+    memory_free(tmp);
+    return EMBEDDIP_OK;
+}
+
+static embeddip_status_t getComplexInput(const Image *src, float **out)
+{
+    if (!src || !out || !src->chals) {
+        return EMBEDDIP_ERROR_NULL_PTR;
+    }
+
+    if (src->log == IMAGE_DATA_COMPLEX) {
+        *out = src->chals->ch[1];
+    } else if (src->log == IMAGE_DATA_CH0) {
+        *out = src->chals->ch[0];
+    } else {
+        return EMBEDDIP_ERROR_INVALID_ARG;
+    }
+
+    return (*out != NULL) ? EMBEDDIP_OK : EMBEDDIP_ERROR_NULL_PTR;
+}
+
+embeddip_status_t _log_(Image *img)
+{
+    if (!img)
+        return EMBEDDIP_ERROR_NULL_PTR;
+    if (isChalsEmpty(img))
+        return EMBEDDIP_ERROR_INVALID_ARG;
+
+    float *data = img->chals->ch[0];
+    if (!data)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    for (int i = 0; i < img->size; ++i) {
+        float v = data[i];
+#if defined(EMBED_DIP_ARCH_XTENSA)
+        v += 1e-3f;  // Preserve previous Xtensa behavior and avoid log(0).
+#endif
+        data[i] = logf(v);
+    }
+
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t _add_(Image *img, float value)
+{
+    if (!img)
+        return EMBEDDIP_ERROR_NULL_PTR;
+    if (isChalsEmpty(img))
+        return EMBEDDIP_ERROR_INVALID_ARG;
+
+    float *data = img->chals->ch[0];
+    if (!data)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    for (int i = 0; i < img->size; ++i) {
+        data[i] += value;
+    }
+
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t _abs_(const Image *src, Image *dst)
+{
+    if (!src || !dst)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    int size = src->width * src->height;
+
+    float *fft = NULL;
+    embeddip_status_t status = getComplexInput(src, &fft);
+    if (status != EMBEDDIP_OK)
+        return status;
+
+    if (isChalsEmpty(dst)) {
+        status = createChals(dst, 1);
+        if (status != EMBEDDIP_OK)
+            return status;
+        dst->is_chals = 1;
+    }
+
+    float *mag = dst->chals->ch[0];
+    if (!mag)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    for (int i = 0; i < size; ++i) {
+        float re = fft[2 * i];
+        float im = fft[2 * i + 1];
+        mag[i] = sqrtf(re * re + im * im);
+    }
+
+    dst->log = IMAGE_DATA_MAGNITUDE;
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t _phase_(const Image *src, Image *dst)
+{
+    if (!src || !dst)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    int size = src->width * src->height;
+
+    float *fft = NULL;
+    embeddip_status_t status = getComplexInput(src, &fft);
+    if (status != EMBEDDIP_OK)
+        return status;
+
+    if (isChalsEmpty(dst)) {
+        status = createChals(dst, 1);
+        if (status != EMBEDDIP_OK)
+            return status;
+        dst->is_chals = 1;
+    }
+
+    float *out = dst->chals->ch[0];
+    if (!out)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    for (int i = 0; i < size; ++i) {
+        out[i] = atan2f(fft[2 * i + 1], fft[2 * i]);
+    }
+
+    dst->log = IMAGE_DATA_PHASE;
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t fftshift(Image *img)
+{
+    if (!img || isChalsEmpty(img))
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    float *data = (img->log == IMAGE_DATA_COMPLEX) ? img->chals->ch[1] : img->chals->ch[0];
+    if (!data)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    int width = img->width;
+    int height = img->height;
+    int cx = width / 2;
+    int cy = height / 2;
+
+    for (int y = 0; y < cy; ++y) {
+        for (int x = 0; x < cx; ++x) {
+            int q0 = 2 * ((y * width) + x);
+            int q1 = 2 * ((y * width) + x + cx);
+            int q2 = 2 * (((y + cy) * width) + x);
+            int q3 = 2 * (((y + cy) * width) + x + cx);
+
+            for (int i = 0; i < 2; ++i) {
+                float tmp = data[q0 + i];
+                data[q0 + i] = data[q3 + i];
+                data[q3 + i] = tmp;
+
+                tmp = data[q1 + i];
+                data[q1 + i] = data[q2 + i];
+                data[q2 + i] = tmp;
+            }
+        }
+    }
+
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t polarToCart(const Image *mag_img, const Image *phase_img, Image *dst)
+{
+    if (!mag_img || !phase_img || !dst)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    if (mag_img->width != phase_img->width || mag_img->height != phase_img->height)
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+
+    if (!mag_img->chals || !phase_img->chals || !mag_img->chals->ch[0] || !phase_img->chals->ch[0])
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    int size = mag_img->width * mag_img->height;
+
+    if (!isChalsEmpty(dst) && dst->chals && dst->chals->ch[0]) {
+        memory_free(dst->chals->ch[0]);
+        dst->chals->ch[0] = NULL;
+    }
+
+    embeddip_status_t status = createChalsComplex(dst, 1);
+    if (status != EMBEDDIP_OK)
+        return status;
+
+    float *mag = mag_img->chals->ch[0];
+    float *phs = phase_img->chals->ch[0];
+    float *fft = dst->chals->ch[0];
+
+    for (int i = 0; i < size; ++i) {
+        fft[2 * i] = mag[i] * cosf(phs[i]);
+        fft[2 * i + 1] = mag[i] * sinf(phs[i]);
+    }
+
+    dst->log = IMAGE_DATA_CH0;
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t multiply(const Image *img1, const Image *img2, Image *outImg)
+{
+    if (!img1 || !img2 || !outImg)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    if (img1->width != img2->width || img1->height != img2->height)
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+
+    if (isChalsEmpty(outImg)) {
+        embeddip_status_t status = createChals(outImg, 1);
+        if (status != EMBEDDIP_OK)
+            return status;
+        outImg->is_chals = 1;
+    }
+
+    float *in1 = NULL;
+    float *in2 = NULL;
+
+    if (img1->log == IMAGE_DATA_CH0) {
+        in1 = img1->chals ? img1->chals->ch[0] : NULL;
+    } else if (img1->log == IMAGE_DATA_COMPLEX) {
+        in1 = img1->chals ? img1->chals->ch[1] : NULL;
+    } else if (img1->log == IMAGE_DATA_PIXELS) {
+        in1 = (float *)img1->pixels;
+    } else {
+        return EMBEDDIP_ERROR_INVALID_ARG;
+    }
+
+    if (img2->log == IMAGE_DATA_CH0) {
+        in2 = img2->chals ? img2->chals->ch[0] : NULL;
+    } else if (img2->log == IMAGE_DATA_COMPLEX) {
+        in2 = img2->chals ? img2->chals->ch[1] : NULL;
+    } else if (img2->log == IMAGE_DATA_PIXELS) {
+        in2 = (float *)img2->pixels;
+    } else {
+        return EMBEDDIP_ERROR_INVALID_ARG;
+    }
+
+    if (!in1 || !in2 || !outImg->chals || !outImg->chals->ch[0])
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    float *out = outImg->chals->ch[0];
+    int size = img1->width * img1->height;
+    for (int i = 0; i < size; ++i) {
+        out[i] = in1[i] * in2[i];
+    }
+
+    outImg->log = IMAGE_DATA_CH0;
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t difference(const Image *src1, const Image *src2, Image *dst)
+{
+    if (!src1 || !src2 || !dst)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    if (src1->width != src2->width || src1->height != src2->height)
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+
+    int size = src1->width * src1->height;
+
+    if (isChalsEmpty(dst)) {
+        embeddip_status_t status = createChals(dst, 1);
+        if (status != EMBEDDIP_OK)
+            return status;
+        dst->is_chals = 1;
+    }
+
+    float *out = dst->chals->ch[0];
+    if (!out)
+        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+
+    if (src1->log == IMAGE_DATA_PIXELS &&
+        (src2->log == IMAGE_DATA_CH0 || src2->log == IMAGE_DATA_MAGNITUDE)) {
+        if (!src1->pixels || !src2->chals || !src2->chals->ch[0])
+            return EMBEDDIP_ERROR_NULL_PTR;
+
+        uint8_t *pix1 = src1->pixels;
+        float *ch2 = src2->chals->ch[0];
+        for (int i = 0; i < size; ++i)
+            out[i] = fmaxf((float)pix1[i] - ch2[i], 0.0f);
+    } else if (src1->log == IMAGE_DATA_PIXELS && src2->log == IMAGE_DATA_PIXELS) {
+        if (!src1->pixels || !src2->pixels)
+            return EMBEDDIP_ERROR_NULL_PTR;
+
+        uint8_t *pix1 = src1->pixels;
+        uint8_t *pix2 = src2->pixels;
+        for (int i = 0; i < size; ++i)
+            out[i] = fmaxf((float)(pix1[i] - pix2[i]), 0.0f);
+    } else if ((src1->log == IMAGE_DATA_CH0 || src1->log == IMAGE_DATA_MAGNITUDE) &&
+               (src2->log == IMAGE_DATA_CH0 || src2->log == IMAGE_DATA_MAGNITUDE)) {
+        if (!src1->chals || !src1->chals->ch[0] || !src2->chals || !src2->chals->ch[0])
+            return EMBEDDIP_ERROR_NULL_PTR;
+
+        float *ch1 = src1->chals->ch[0];
+        float *ch2 = src2->chals->ch[0];
+        for (int i = 0; i < size; ++i)
+            out[i] = fmaxf(ch1[i] - ch2[i], 0.0f);
+    } else if ((src1->log == IMAGE_DATA_CH0 || src1->log == IMAGE_DATA_MAGNITUDE) &&
+               src2->log == IMAGE_DATA_PIXELS) {
+        if (!src1->chals || !src1->chals->ch[0] || !src2->pixels)
+            return EMBEDDIP_ERROR_NULL_PTR;
+
+        float *ch1 = src1->chals->ch[0];
+        uint8_t *pix2 = src2->pixels;
+        for (int i = 0; i < size; ++i)
+            out[i] = fmaxf(ch1[i] - (float)pix2[i], 0.0f);
+    } else {
+        return EMBEDDIP_ERROR_INVALID_ARG;
+    }
+
+    dst->log = IMAGE_DATA_CH0;
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t
+getFilter(Image *filter_img, FrequencyFilterType filter_type, float cutoff1, float cutoff2)
+{
+    if (!filter_img)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    if (cutoff1 < 0.0f)
+        return EMBEDDIP_ERROR_INVALID_ARG;
+
+    if (filter_type == FREQ_FILTER_IDEAL_BANDPASS || filter_type == FREQ_FILTER_GAUSSIAN_BANDPASS) {
+        if (cutoff2 < 0.0f || cutoff1 >= cutoff2)
+            return EMBEDDIP_ERROR_INVALID_ARG;
+    }
+
+    int w = filter_img->width;
+    int h = filter_img->height;
+    int cx = w / 2;
+    int cy = h / 2;
+
+    filter_img->format = IMAGE_FORMAT_GRAYSCALE;
+
+    if (isChalsEmpty(filter_img)) {
+        embeddip_status_t status = createChals(filter_img, 1);
+        if (status != EMBEDDIP_OK)
+            return status;
+        filter_img->is_chals = 1;
+    }
+
+    float *mask = filter_img->chals->ch[0];
+    if (!mask)
+        return EMBEDDIP_ERROR_NULL_PTR;
+
+    for (int y = 0; y < h; ++y) {
+        for (int x = 0; x < w; ++x) {
+            int dx = x - cx;
+            int dy = y - cy;
+            float d = sqrtf((float)(dx * dx + dy * dy));
+
+            float value = 0.0f;
+            switch (filter_type) {
+            case FREQ_FILTER_IDEAL_LOWPASS:
+                value = (d <= cutoff1) ? 1.0f : 0.0f;
+                break;
+            case FREQ_FILTER_GAUSSIAN_LOWPASS:
+                value = expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
+                break;
+            case FREQ_FILTER_IDEAL_HIGHPASS:
+                value = (d >= cutoff1) ? 1.0f : 0.0f;
+                break;
+            case FREQ_FILTER_GAUSSIAN_HIGHPASS:
+                value = 1.0f - expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
+                break;
+            case FREQ_FILTER_IDEAL_BANDPASS:
+                value = (d >= cutoff1 && d <= cutoff2) ? 1.0f : 0.0f;
+                break;
+            case FREQ_FILTER_GAUSSIAN_BANDPASS: {
+                float gLow = expf(-(d * d) / (2.0f * cutoff2 * cutoff2));
+                float gHigh = expf(-(d * d) / (2.0f * cutoff1 * cutoff1));
+                value = gLow - gHigh;
+                break;
+            }
+            default:
+                value = 0.0f;
+                break;
+            }
+
+            mask[y * w + x] = value;
+        }
+    }
+
+    filter_img->log = IMAGE_DATA_CH0;
+    return EMBEDDIP_OK;
+}
+
+embeddip_status_t ffilter2D(const Image *src_fft, const Image *filter, Image *dst)
+{
+    if (!src_fft || !filter || !dst)
+        return EMBEDDIP_ERROR_NULL_PTR;
+    if (isChalsEmpty(src_fft) || isChalsEmpty(filter))
+        return EMBEDDIP_ERROR_INVALID_ARG;
+
+    int size = src_fft->width * src_fft->height;
+
+    Image *mag_img = NULL;
+    Image *phase_img = NULL;
+
+    embeddip_status_t status =
+        createImageWH(src_fft->width, src_fft->height, src_fft->format, &mag_img);
+    if (status != EMBEDDIP_OK)
+        return status;
+
+    status = createImageWH(src_fft->width, src_fft->height, src_fft->format, &phase_img);
+    if (status != EMBEDDIP_OK) {
+        deleteImage(mag_img);
+        return status;
+    }
+
+    status = _abs_(src_fft, mag_img);
+    if (status != EMBEDDIP_OK) {
+        deleteImage(mag_img);
+        deleteImage(phase_img);
+        return status;
+    }
+
+    status = _phase_(src_fft, phase_img);
+    if (status != EMBEDDIP_OK) {
+        deleteImage(mag_img);
+        deleteImage(phase_img);
+        return status;
+    }
+
+    if (!mag_img->chals || !mag_img->chals->ch[0] || !filter->chals || !filter->chals->ch[0]) {
+        deleteImage(mag_img);
+        deleteImage(phase_img);
+        return EMBEDDIP_ERROR_NULL_PTR;
+    }
+
+    float *mag = mag_img->chals->ch[0];
+    float *mask = filter->chals->ch[0];
+
+    for (int i = 0; i < size; ++i)
+        mag[i] *= mask[i];
+
+    status = polarToCart(mag_img, phase_img, dst);
+
+    deleteImage(mag_img);
+    deleteImage(phase_img);
+    return status;
+}
diff --git a/imgproc/filter.c b/imgproc/filter.c
index 9b092ac..a53278f 100755
--- a/imgproc/filter.c
+++ b/imgproc/filter.c
@@ -1046,6 +1046,8 @@ void nonMaximumSuppression(const Image *magImg, const Image *phaseImg, Image *ds
     uint32_t w = magImg->width, h = magImg->height;
     if (w != phaseImg->width || h != phaseImg->height)
         return;
+    if (!magImg->chals || !magImg->chals->ch[0] || !phaseImg->chals || !phaseImg->chals->ch[0])
+        return;
     uint32_t N = w * h;
 
     const float *mag = magImg->chals->ch[0];
@@ -1053,14 +1055,18 @@ void nonMaximumSuppression(const Image *magImg, const Image *phaseImg, Image *ds
 
     if (!dst->chals) {
         dst->chals = (channels_t *)memory_alloc(sizeof(channels_t));
+        if (!dst->chals)
+            return;
         memset(dst->chals, 0, sizeof(channels_t));
     }
     dst->chals->ch[0] = (float *)memory_alloc((size_t)N * sizeof(float));
+    if (!dst->chals->ch[0])
+        return;
     dst->is_chals = 1;
     float *dst_data = dst->chals->ch[0];
 
     // Initialize all to zero (including borders)
-    memset(dst, 0, N * sizeof(float));
+    memset(dst_data, 0, (size_t)N * sizeof(float));
 
     // Iterate, skip borders
     for (uint32_t y = 1; y < h - 1; y++) {
@@ -1110,14 +1116,25 @@ void doubleThreshold(const Image *src,
 {
     if (!src || !dst)
         return;
+    if (!src->chals || !src->chals->ch[0])
+        return;
     uint32_t N = src->width * src->height;
     const float *src_data = src->chals->ch[0];
+    if (lowThresh > highThresh) {
+        float tmp = lowThresh;
+        lowThresh = highThresh;
+        highThresh = tmp;
+    }
 
     if (!dst->chals) {
         dst->chals = (channels_t *)memory_alloc(sizeof(channels_t));
+        if (!dst->chals)
+            return;
         memset(dst->chals, 0, sizeof(channels_t));
     }
     dst->chals->ch[0] = (float *)memory_alloc((size_t)N * sizeof(float));
+    if (!dst->chals->ch[0])
+        return;
     dst->is_chals = 1;
     float *dst_data = dst->chals->ch[0];
 
@@ -1142,32 +1159,60 @@ void hysteresis(const Image *src, Image *dst, float weakVal, float strongVal)
     if (!src || !dst)
         return;
     uint32_t w = src->width, h = src->height;
+    uint32_t N = w * h;
+    if (!src->chals || !src->chals->ch[0] || N == 0)
+        return;
+    const float *src_data = src->chals->ch[0];
 
     if (!dst->chals) {
         dst->chals = (channels_t *)memory_alloc(sizeof(channels_t));
+        if (!dst->chals)
+            return;
         memset(dst->chals, 0, sizeof(channels_t));
     }
-    dst->chals->ch[0] = (float *)memory_alloc((size_t)w * h * sizeof(float));
+    dst->chals->ch[0] = (float *)memory_alloc((size_t)N * sizeof(float));
+    if (!dst->chals->ch[0])
+        return;
     dst->is_chals = 1;
     float *dst_data = dst->chals->ch[0];
-    memcpy(dst, src, (size_t)w * h * sizeof(float));
+    memset(dst_data, 0, (size_t)N * sizeof(float));
 
-    for (uint32_t y = 1; y < h - 1; y++) {
-        for (uint32_t x = 1; x < w - 1; x++) {
-            uint32_t idx = y * w + x;
-            if (dst_data[idx] == weakVal) {
-                bool connected = false;
-                for (int j = -1; j <= 1; j++) {
-                    for (int i = -1; i <= 1; i++) {
-                        if (dst_data[(y + j) * w + (x + i)] == strongVal) {
-                            connected = true;
-                        }
-                    }
+    int *stack = (int *)memory_alloc((size_t)N * sizeof(int));
+    if (!stack)
+        return;
+    uint32_t sp = 0;
+
+    for (uint32_t i = 0; i < N; ++i) {
+        if (src_data[i] == strongVal) {
+            dst_data[i] = strongVal;
+            stack[sp++] = (int)i;
+        }
+    }
+
+    while (sp > 0) {
+        int idx = stack[--sp];
+        uint32_t x = (uint32_t)idx % w;
+        uint32_t y = (uint32_t)idx / w;
+
+        int y0 = (y > 0) ? (int)y - 1 : 0;
+        int y1 = (y + 1 < h) ? (int)y + 1 : (int)h - 1;
+        int x0 = (x > 0) ? (int)x - 1 : 0;
+        int x1 = (x + 1 < w) ? (int)x + 1 : (int)w - 1;
+
+        for (int ny = y0; ny <= y1; ++ny) {
+            for (int nx = x0; nx <= x1; ++nx) {
+                uint32_t nidx = (uint32_t)ny * w + (uint32_t)nx;
+                if (dst_data[nidx] == strongVal)
+                    continue;
+                if (src_data[nidx] == weakVal) {
+                    dst_data[nidx] = strongVal;
+                    stack[sp++] = (int)nidx;
                 }
-                dst_data[idx] = connected ? strongVal : 0.0f;
             }
         }
     }
+
+    memory_free(stack);
     dst->log = IMAGE_DATA_CH0;
 }
 
@@ -1465,17 +1510,92 @@ embeddip_status_t Canny(const Image *src,
     CHECK_NULL_INT(dst);
 
     // --- Step 1: Gaussian smoothing + gradients ---
-    float sigma =
-        1.0;  // 0.3 * ((aperture_size - 1) * 0.5 - 1) + 0.8; // could derive from aperture_size
+    int k = (aperture_size < 3) ? 3 : aperture_size;
+    if ((k & 1) == 0)
+        ++k;
+    if (k > 7)
+        k = 7;
+    float sigma = 0.3f * ((float)(k - 1) * 0.5f - 1.0f) + 0.8f;
+
     Image *Ix = createImageWH_legacy(src->width, src->height, src->format);
     Image *Iy = createImageWH_legacy(src->width, src->height, src->format);
-    gaussianGradients(src, Ix, Iy, sigma);
+    if (!Ix || !Iy) {
+        deleteImage(Ix);
+        deleteImage(Iy);
+        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+    }
+    embeddip_status_t st = gaussianGradients(src, Ix, Iy, sigma);
+    if (st != EMBEDDIP_OK) {
+        deleteImage(Ix);
+        deleteImage(Iy);
+        return st;
+    }
 
     // --- Step 2: magnitude + phase ---
     Image *Mag = createImageWH_legacy(src->width, src->height, src->format);
     Image *Phase = createImageWH_legacy(src->width, src->height, src->format);
-    gradientMagnitude(Ix, Iy, Mag);
-    gradientPhase(Ix, Iy, Phase);
+    if (!Mag || !Phase) {
+        deleteImage(Ix);
+        deleteImage(Iy);
+        deleteImage(Mag);
+        deleteImage(Phase);
+        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+    }
+
+    if (l2_gradient) {
+        st = gradientMagnitude(Ix, Iy, Mag);
+    } else {
+        uint32_t N = src->size;
+        if (!Mag->chals) {
+            Mag->chals = (channels_t *)memory_alloc(sizeof(channels_t));
+            if (!Mag->chals) {
+                deleteImage(Ix);
+                deleteImage(Iy);
+                deleteImage(Mag);
+                deleteImage(Phase);
+                return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+            }
+            memset(Mag->chals, 0, sizeof(channels_t));
+        }
+        Mag->chals->ch[0] = (float *)memory_alloc((size_t)N * sizeof(float));
+        if (!Mag->chals->ch[0]) {
+            deleteImage(Ix);
+            deleteImage(Iy);
+            deleteImage(Mag);
+            deleteImage(Phase);
+            return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+        }
+        const float *ix = Ix->chals ? Ix->chals->ch[0] : NULL;
+        const float *iy = Iy->chals ? Iy->chals->ch[0] : NULL;
+        if (!ix || !iy) {
+            deleteImage(Ix);
+            deleteImage(Iy);
+            deleteImage(Mag);
+            deleteImage(Phase);
+            return EMBEDDIP_ERROR_INVALID_ARG;
+        }
+        for (uint32_t i = 0; i < N; ++i) {
+            Mag->chals->ch[0][i] = fabsf(ix[i]) + fabsf(iy[i]);
+        }
+        Mag->is_chals = 1;
+        Mag->log = IMAGE_DATA_CH0;
+        st = EMBEDDIP_OK;
+    }
+    if (st != EMBEDDIP_OK) {
+        deleteImage(Ix);
+        deleteImage(Iy);
+        deleteImage(Mag);
+        deleteImage(Phase);
+        return st;
+    }
+    st = gradientPhase(Ix, Iy, Phase);
+    if (st != EMBEDDIP_OK) {
+        deleteImage(Ix);
+        deleteImage(Iy);
+        deleteImage(Mag);
+        deleteImage(Phase);
+        return st;
+    }
 
     float *data = Mag->chals->ch[0];
 
@@ -1503,10 +1623,25 @@ embeddip_status_t Canny(const Image *src,
 
     // --- Step 3: NMS ---
     Image *Nms = createImageWH_legacy(src->width, src->height, src->format);
+    if (!Nms) {
+        deleteImage(Ix);
+        deleteImage(Iy);
+        deleteImage(Mag);
+        deleteImage(Phase);
+        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+    }
     nonMaximumSuppression(Mag, Phase, Nms);
 
     // --- Step 4: Double threshold ---
     Image *Dt = createImageWH_legacy(src->width, src->height, src->format);
+    if (!Dt) {
+        deleteImage(Ix);
+        deleteImage(Iy);
+        deleteImage(Mag);
+        deleteImage(Phase);
+        deleteImage(Nms);
+        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+    }
     doubleThreshold(Nms, Dt, (float)threshold1, (float)threshold2, 50.0f, 255.0f);
 
     // --- Step 5: Hysteresis ---
diff --git a/imgproc/morph.c b/imgproc/morph.c
index 755cee1..42ffe4d 100644
--- a/imgproc/morph.c
+++ b/imgproc/morph.c
@@ -88,6 +88,7 @@ embeddip_status_t erode(const Image *src, Image *dst, const Kernel *kernel, uint
         return EMBEDDIP_ERROR_OUT_OF_MEMORY;
     }
 
+    // First iteration must start from source image content.
     memcpy(ping, src->pixels, src->size);
 
     for (uint8_t it = 0; it < iterations; ++it) {
@@ -164,6 +165,7 @@ embeddip_status_t dilate(const Image *src, Image *dst, const Kernel *kernel, uin
         return EMBEDDIP_ERROR_OUT_OF_MEMORY;
     }
 
+    // First iteration must start from source image content.
     memcpy(ping, src->pixels, src->size);
 
     for (uint8_t it = 0; it < iterations; ++it) {
diff --git a/imgproc/segmentation.c b/imgproc/segmentation.c
index b0609ae..570fd7c 100644
--- a/imgproc/segmentation.c
+++ b/imgproc/segmentation.c
@@ -581,6 +581,13 @@ embeddip_status_t grayscaleRegionGrowing(const Image *inImg,
         return EMBEDDIP_ERROR_OUT_OF_MEMORY;
     }
 
+    int top = 0;
+    int dx[4] = {0, -1, 1, 0};
+    int dy[4] = {-1, 0, 0, 1};
+    float regionMean = 0.0f;
+    int regionCount = 0;
+
+    // Global multi-seed initialization (single visited map and single adaptive model).
     for (int s = 0; s < numSeeds; ++s) {
         int seedX = seeds[s].x;
         int seedY = seeds[s].y;
@@ -591,43 +598,49 @@ embeddip_status_t grayscaleRegionGrowing(const Image *inImg,
         if (visited[seedIndex])
             continue;
 
-        // Run the same region growing as single-seed
-        int top = 0;
+        if (top >= STACK_SIZE) {
+            memory_free(visited);
+            memory_free(stack);
+            return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+        }
+
         stack[top++] = seeds[s];
         visited[seedIndex] = true;
         dst[seedIndex] = 255;
 
-        long sum = src[seedIndex];
-        int count = 1;
+        regionCount++;
+        regionMean += ((float)src[seedIndex] - regionMean) / (float)regionCount;
+    }
 
-        int dx[4] = {0, -1, 1, 0};
-        int dy[4] = {-1, 0, 0, 1};
+    if (regionCount == 0) {
+        memory_free(visited);
+        memory_free(stack);
+        return EMBEDDIP_ERROR_INVALID_ARG;
+    }
 
-        while (top > 0) {
-            Point p = stack[--top];
-            uint8_t regionMean = (uint8_t)(sum / count);
+    while (top > 0) {
+        Point p = stack[--top];
 
-            for (int i = 0; i < 4; ++i) {
-                int nx = p.x + dx[i];
-                int ny = p.y + dy[i];
-                int nidx = ny * width + nx;
+        for (int i = 0; i < 4; ++i) {
+            int nx = p.x + dx[i];
+            int ny = p.y + dy[i];
+            int nidx = ny * width + nx;
 
-                if (nx >= 0 && nx < width && ny >= 0 && ny < height && !visited[nidx]) {
-                    uint8_t neighborValue = src[nidx];
-                    if (abs((int)neighborValue - (int)regionMean) <= tolerance) {
-                        visited[nidx] = true;
-                        dst[nidx] = 255;
-                        stack[top++] = (Point){nx, ny};
+            if (nx >= 0 && nx < width && ny >= 0 && ny < height && !visited[nidx]) {
+                uint8_t neighborValue = src[nidx];
+                if (abs((int)neighborValue - (int)regionMean) <= tolerance) {
+                    visited[nidx] = true;
+                    dst[nidx] = 255;
 
-                        sum += neighborValue;
-                        count++;
+                    regionCount++;
+                    regionMean += ((float)neighborValue - regionMean) / (float)regionCount;
 
-                        if (top >= STACK_SIZE) {
-                            memory_free(visited);
-                            memory_free(stack);
-                            return EMBEDDIP_ERROR_OUT_OF_MEMORY;  // Stack overflow
-                        }
+                    if (top >= STACK_SIZE) {
+                        memory_free(visited);
+                        memory_free(stack);
+                        return EMBEDDIP_ERROR_OUT_OF_MEMORY;  // Stack overflow
                     }
+                    stack[top++] = (Point){nx, ny};
                 }
             }
         }
@@ -694,9 +707,9 @@ colorRegionGrowing_single(const Image *inImg, Image *outImg, int seedX, int seed
     int top = 0;
 
     int seedIndex = seedY * width + seedX;
-    float h0 = src[seedIndex * 3] / 255.0f;
-    float s0 = src[seedIndex * 3 + 1] / 255.0f;
-    float i0 = src[seedIndex * 3 + 2] / 255.0f;
+    float regionMean[3];
+    read_vec3_norm(inImg, seedIndex, regionMean);
+    int regionCount = 1;
 
     stack[top++] = (Point){seedX, seedY};
     visited[seedIndex] = true;
@@ -718,16 +731,9 @@ colorRegionGrowing_single(const Image *inImg, Image *outImg, int seedX, int seed
             int nidx = ny * width + nx;
 
             if (nx >= 0 && nx < width && ny >= 0 && ny < height && !visited[nidx]) {
-                float h = src[nidx * 3] / 255.0f;
-                float s = src[nidx * 3 + 1] / 255.0f;
-                float ii = src[nidx * 3 + 2] / 255.0f;
-
-                // Hue distance with wraparound
-                float dh = fminf(fabsf(h - h0), 1.0f - fabsf(h - h0));
-                float ds = s - s0;
-                float di = ii - i0;
-
-                float dist = sqrtf(dh * dh + ds * ds + di * di);
+                float v[3];
+                read_vec3_norm(inImg, nidx, v);
+                float dist = color_distance(v, regionMean, inImg->format);
 
                 if (dist <= tolerance) {
                     visited[nidx] = true;
@@ -737,6 +743,12 @@ colorRegionGrowing_single(const Image *inImg, Image *outImg, int seedX, int seed
                     dst[nidx * 3 + 1] = src[nidx * 3 + 1];
                     dst[nidx * 3 + 2] = src[nidx * 3 + 2];
 
+                    // Update running region mean (adaptive region growing).
+                    regionCount++;
+                    regionMean[0] += (v[0] - regionMean[0]) / (float)regionCount;
+                    regionMean[1] += (v[1] - regionMean[1]) / (float)regionCount;
+                    regionMean[2] += (v[2] - regionMean[2]) / (float)regionCount;
+
                     stack[top++] = (Point){nx, ny};
 
                     if (top >= STACK_SIZE) {
@@ -822,66 +834,93 @@ embeddip_status_t colorRegionGrowing(const Image *inImg,
     const int dx[4] = {0, -1, 1, 0};
     const int dy[4] = {-1, 0, 0, 1};
 
+    memset(visited, 0, (size_t)inImg->size * sizeof(bool));
+
+    // Global multi-seed region: one visited map and one adaptive region model.
+    int top = 0;
+    float regionMean[3] = {0.0f, 0.0f, 0.0f};
+    int regionCount = 0;
+
     for (int s = 0; s < numSeeds; ++s) {
         int seedX = seeds[s].x;
         int seedY = seeds[s].y;
-
         if ((unsigned)seedX >= (unsigned)width || (unsigned)seedY >= (unsigned)height)
             continue;
 
-        const int seedIndex = seedY * width + seedX;
-
-        memset(visited, 0, (size_t)inImg->size * sizeof(bool));
-
-        float seedVec[3];
-        read_vec3_norm(inImg, seedIndex, seedVec);
+        int seedIndex = seedY * width + seedX;
+        if (visited[seedIndex])
+            continue;
 
-        int top = 0;
+        if (top >= STACK_SIZE) {
+            memory_free(visited);
+            memory_free(stack);
+            return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+        }
         stack[top++] = (Point){seedX, seedY};
         visited[seedIndex] = true;
+
+        float v[3];
+        read_vec3_norm(inImg, seedIndex, v);
+        regionCount++;
+        regionMean[0] += (v[0] - regionMean[0]) / (float)regionCount;
+        regionMean[1] += (v[1] - regionMean[1]) / (float)regionCount;
+        regionMean[2] += (v[2] - regionMean[2]) / (float)regionCount;
+
         if (outputColorful) {
-            for (int c = 0; c < 3; ++c) {
-                outData[seedIndex * 3 + c] = inData[seedIndex * inDepth + c];
-            }
+            outData[seedIndex * 3 + 0] = (uint8_t)CLAMP((int)lrintf(v[0] * 255.0f), 0, 255);
+            outData[seedIndex * 3 + 1] = (uint8_t)CLAMP((int)lrintf(v[1] * 255.0f), 0, 255);
+            outData[seedIndex * 3 + 2] = (uint8_t)CLAMP((int)lrintf(v[2] * 255.0f), 0, 255);
         } else {
             outData[seedIndex] = 255;
         }
+    }
 
-        while (top > 0) {
-            Point p = stack[--top];
+    if (regionCount == 0) {
+        memory_free(visited);
+        memory_free(stack);
+        return EMBEDDIP_ERROR_INVALID_ARG;
+    }
 
-            for (int d = 0; d < 4; ++d) {
-                int nx = p.x + dx[d];
-                int ny = p.y + dy[d];
+    while (top > 0) {
+        Point p = stack[--top];
 
-                if ((unsigned)nx >= (unsigned)width || (unsigned)ny >= (unsigned)height)
-                    continue;
+        for (int d = 0; d < 4; ++d) {
+            int nx = p.x + dx[d];
+            int ny = p.y + dy[d];
 
-                int nidx = ny * width + nx;
-                if (visited[nidx])
-                    continue;
+            if ((unsigned)nx >= (unsigned)width || (unsigned)ny >= (unsigned)height)
+                continue;
 
-                float v[3];
-                read_vec3_norm(inImg, nidx, v);
+            int nidx = ny * width + nx;
+            if (visited[nidx])
+                continue;
 
-                float dist = color_distance(v, seedVec, inImg->format);
-                if (dist <= tolerance) {
-                    visited[nidx] = true;
-                    if (outputColorful) {
-                        for (int c = 0; c < 3; ++c) {
-                            outData[nidx * 3 + c] = inData[nidx * inDepth + c];
-                        }
-                    } else {
-                        outData[nidx] = 255;
-                    }
+            float v[3];
+            read_vec3_norm(inImg, nidx, v);
 
-                    if (top >= STACK_SIZE) {
-                        memory_free(visited);
-                        memory_free(stack);
-                        return EMBEDDIP_ERROR_OUT_OF_MEMORY;  // Stack overflow
-                    }
-                    stack[top++] = (Point){nx, ny};
+            float dist = color_distance(v, regionMean, inImg->format);
+            if (dist <= tolerance) {
+                visited[nidx] = true;
+
+                if (outputColorful) {
+                    outData[nidx * 3 + 0] = (uint8_t)CLAMP((int)lrintf(v[0] * 255.0f), 0, 255);
+                    outData[nidx * 3 + 1] = (uint8_t)CLAMP((int)lrintf(v[1] * 255.0f), 0, 255);
+                    outData[nidx * 3 + 2] = (uint8_t)CLAMP((int)lrintf(v[2] * 255.0f), 0, 255);
+                } else {
+                    outData[nidx] = 255;
+                }
+
+                regionCount++;
+                regionMean[0] += (v[0] - regionMean[0]) / (float)regionCount;
+                regionMean[1] += (v[1] - regionMean[1]) / (float)regionCount;
+                regionMean[2] += (v[2] - regionMean[2]) / (float)regionCount;
+
+                if (top >= STACK_SIZE) {
+                    memory_free(visited);
+                    memory_free(stack);
+                    return EMBEDDIP_ERROR_OUT_OF_MEMORY;  // Stack overflow
                 }
+                stack[top++] = (Point){nx, ny};
             }
         }
     }
@@ -910,121 +949,6 @@ static float gaussian_prob(float x, float mean, float var)
     return (1.0f / sqrtf(2.0f * M_PI * var)) * expf(-(diff * diff) / (2.0f * var));
 }
 
-embeddip_status_t grabCutLite_working(const Image *src, Image *mask, Rectangle roi, int iterations)
-{
-    const int size = src->width * src->height;
-    const uint8_t *img1 = src->pixels;
-    uint8_t *mask_data = (uint8_t *)mask->pixels;
-
-    for (int iter = 0; iter < iterations; ++iter) {
-        uint32_t fgSum = 0, fgCount = 0;
-        uint32_t bgSum = 0, bgCount = 0;
-
-        // Step 1: Compute foreground and background means
-        for (int i = 0; i < size; ++i) {
-            if (mask_data[i] == 2) {
-                fgSum += img1[i];
-                fgCount++;
-            } else if (mask_data[i] == 0) {
-                bgSum += img1[i];
-                bgCount++;
-            }
-        }
-
-        // Fallback if no foreground was found (bootstrap)
-        if (fgCount == 0) {
-            for (int i = 0; i < size; ++i) {
-                if (mask_data[i] == 1) {
-                    fgSum += img1[i];
-                    fgCount++;
-                }
-            }
-        }
-
-        if (fgCount == 0 || bgCount == 0)
-            break;  // Not enough info to proceed
-
-        uint8_t fgMean = fgSum / fgCount;
-        uint8_t bgMean = bgSum / bgCount;
-
-        // Debug
-        // printf("Iter %d: fgMean=%d, bgMean=%d\n", iter, fgMean, bgMean);
-
-        // Step 2: Update probable region
-        for (int i = 0; i < size; ++i) {
-            if (mask_data[i] == 1) {
-                int distFg = abs((int)img1[i] - (int)fgMean);
-                int distBg = abs((int)img1[i] - (int)bgMean);
-
-                // Reclassify as closer to fg or bg
-                if (distFg < distBg)
-                    mask_data[i] = 2;  // Becomes foreground
-                else
-                    mask_data[i] = 0;  // Becomes background
-            }
-        }
-    }
-    return EMBEDDIP_OK;
-}
-
-embeddip_status_t grabCutLitesd(const Image *src, Image *mask, Rectangle roi, int iterations)
-{
-    const int size = src->width * src->height;
-    const uint8_t *inImg_pixels = src->pixels;
-    uint8_t *mask_data = (uint8_t *)mask->pixels;
-
-    for (int iter = 0; iter < iterations; ++iter) {
-        uint32_t fgSum = 0, fgCount = 0;
-        uint32_t bgSum = 0, bgCount = 0;
-
-        // Step 1: Compute foreground and background means
-        for (int i = 0; i < size; ++i) {
-            if (mask_data[i] == 2) {
-                fgSum += inImg_pixels[i];
-                fgCount++;
-            } else if (mask_data[i] == 0) {
-                bgSum += inImg_pixels[i];
-                bgCount++;
-            }
-        }
-
-        // Fallback if no foreground was found (bootstrap)
-        if (fgCount == 0) {
-            for (int i = 0; i < size; ++i) {
-                if (mask_data[i] == 1) {
-                    fgSum += inImg_pixels[i];
-                    fgCount++;
-                }
-            }
-        }
-
-        if (fgCount == 0 || bgCount == 0)
-            break;  // Not enough info to proceed
-
-        uint8_t fgMean = fgSum / fgCount;
-        uint8_t bgMean = bgSum / bgCount;
-
-        // Debug
-        // printf("Iter %d: fgMean=%d, bgMean=%d\n", iter, fgMean, bgMean);
-
-        return EMBEDDIP_OK;
-        // Step 2: Update probable region
-        for (int i = 0; i < size; ++i) {
-            if (mask_data[i] == 1) {
-                int distFg = abs((int)inImg_pixels[i] - (int)fgMean);
-                int distBg = abs((int)inImg_pixels[i] - (int)bgMean);
-
-                // Reclassify as closer to fg or bg
-                if (distFg < distBg)
-                    mask_data[i] = 2;  // Becomes foreground
-                else
-                    mask_data[i] = 0;  // Becomes background
-            }
-        }
-    }
-    return EMBEDDIP_OK;
-}
-
 /**
  * @brief Performs a simplified GrabCut-inspired segmentation on a grayscale image using a
  * rectangular ROI.
@@ -1147,300 +1071,378 @@ embeddip_status_t grabCutLite(const Image *src, Image *mask, Rectangle roi, int
     return EMBEDDIP_OK;
 }
 
-embeddip_status_t
-grabCutGrayscaleRealistic(const Image *src, Image *mask, Rectangle roi, int max_iter)
+typedef struct {
+    int to;
+    int next;
+    float cap;
+} gc_edge_t;
+
+typedef struct {
+    int n;
+    int source;
+    int sink;
+    int *head;
+    gc_edge_t *edges;
+    int edge_count;
+    int edge_cap;
+    int *level;
+    int *it;
+    int *queue;
+    uint8_t *seen;
+} gc_graph_t;
+
+static int gc_init(gc_graph_t *g, int n, int edge_cap)
 {
-    if (!src || !mask || !src->pixels || src->format != IMAGE_FORMAT_GRAYSCALE)
-        return EMBEDDIP_ERROR_NULL_PTR;
+    g->n = n;
+    g->source = n - 2;
+    g->sink = n - 1;
+    g->edge_count = 0;
+    g->edge_cap = edge_cap;
+    g->head = (int *)memory_alloc((size_t)n * sizeof(int));
+    g->edges = (gc_edge_t *)memory_alloc((size_t)edge_cap * sizeof(gc_edge_t));
+    g->level = (int *)memory_alloc((size_t)n * sizeof(int));
+    g->it = (int *)memory_alloc((size_t)n * sizeof(int));
+    g->queue = (int *)memory_alloc((size_t)n * sizeof(int));
+    g->seen = (uint8_t *)memory_alloc((size_t)n);
+    if (!g->head || !g->edges || !g->level || !g->it || !g->queue || !g->seen) {
+        return -1;
+    }
+    for (int i = 0; i < n; ++i)
+        g->head[i] = -1;
+    return 0;
+}
 
-    int width = src->width;
-    int height = src->height;
-    int size = width * height;
-    const uint8_t *src_data = (const uint8_t *)src->pixels;
-    uint8_t *mask_data = (uint8_t *)mask->pixels;
+static void gc_free(gc_graph_t *g)
+{
+    if (g->head)
+        memory_free(g->head);
+    if (g->edges)
+        memory_free(g->edges);
+    if (g->level)
+        memory_free(g->level);
+    if (g->it)
+        memory_free(g->it);
+    if (g->queue)
+        memory_free(g->queue);
+    if (g->seen)
+        memory_free(g->seen);
+    memset(g, 0, sizeof(*g));
+}
 
-    // Allocate component responsibilities
-    uint8_t *labels = (uint8_t *)memory_alloc(size * sizeof(uint8_t));  // 0=BG, 1=FG
-    float(*fg_resp)[GMM_COMPONENTS] =
-        (float(*)[GMM_COMPONENTS])memory_alloc(size * GMM_COMPONENTS * sizeof(float));
-    float(*bg_resp)[GMM_COMPONENTS] =
-        (float(*)[GMM_COMPONENTS])memory_alloc(size * GMM_COMPONENTS * sizeof(float));
+static int gc_add_edge(gc_graph_t *g, int u, int v, float cap)
+{
+    if (g->edge_count + 2 > g->edge_cap)
+        return -1;
+    g->edges[g->edge_count] = (gc_edge_t){.to = v, .next = g->head[u], .cap = cap};
+    g->head[u] = g->edge_count++;
+    g->edges[g->edge_count] = (gc_edge_t){.to = u, .next = g->head[v], .cap = 0.0f};
+    g->head[v] = g->edge_count++;
+    return 0;
+}
 
-    GMMComponent fg_gmm[GMM_COMPONENTS];
-    GMMComponent bg_gmm[GMM_COMPONENTS];
+static int gc_add_undirected(gc_graph_t *g, int u, int v, float cap)
+{
+    if (gc_add_edge(g, u, v, cap) != 0)
+        return -1;
+    if (gc_add_edge(g, v, u, cap) != 0)
+        return -1;
+    return 0;
+}
 
-    // Step 1: Initialize mask from ROI
-    for (int y = 0; y < height; ++y) {
-        for (int x = 0; x < width; ++x) {
-            int idx = y * width + x;
-            if (x >= roi.x && x < roi.x + roi.width && y >= roi.y && y < roi.y + roi.height) {
-                mask_data[idx] = FOREGROUND;
-                labels[idx] = 1;
-            } else {
-                mask_data[idx] = BACKGROUND;
-                labels[idx] = 0;
+static int gc_bfs(gc_graph_t *g)
+{
+    for (int i = 0; i < g->n; ++i)
+        g->level[i] = -1;
+    int qh = 0, qt = 0;
+    g->level[g->source] = 0;
+    g->queue[qt++] = g->source;
+    while (qh < qt) {
+        int u = g->queue[qh++];
+        for (int ei = g->head[u]; ei != -1; ei = g->edges[ei].next) {
+            gc_edge_t *e = &g->edges[ei];
+            if (e->cap > 1e-6f && g->level[e->to] < 0) {
+                g->level[e->to] = g->level[u] + 1;
+                g->queue[qt++] = e->to;
             }
         }
     }
+    return g->level[g->sink] >= 0;
+}
 
-    // Step 2: Initialize GMMs with 2 components
-    for (int i = 0; i < GMM_COMPONENTS; ++i) {
-        fg_gmm[i].mean = 50.0f + 100 * i;
-        fg_gmm[i].variance = 500.0f;
-        fg_gmm[i].weight = 0.5f;
-
-        bg_gmm[i].mean = 50.0f + 100 * i;
-        bg_gmm[i].variance = 500.0f;
-        bg_gmm[i].weight = 0.5f;
-    }
-
-    // Step 3: EM Iterations
-    for (int iter = 0; iter < max_iter; ++iter) {
-        // E-Step: compute responsibilities
-        for (int i = 0; i < size; ++i) {
-            float x = (float)src_data[i];
-            float total_fg = 0.0f, total_bg = 0.0f;
-
-            // Foreground responsibilities
-            for (int c = 0; c < GMM_COMPONENTS; ++c) {
-                fg_resp[i][c] =
-                    fg_gmm[c].weight * gaussian_prob(x, fg_gmm[c].mean, fg_gmm[c].variance);
-                total_fg += fg_resp[i][c];
-            }
-            for (int c = 0; c < GMM_COMPONENTS; ++c)
-                fg_resp[i][c] /= (total_fg + 1e-6f);
-
-            // Background responsibilities
-            for (int c = 0; c < GMM_COMPONENTS; ++c) {
-                bg_resp[i][c] =
-                    bg_gmm[c].weight * gaussian_prob(x, bg_gmm[c].mean, bg_gmm[c].variance);
-                total_bg += bg_resp[i][c];
-            }
-            for (int c = 0; c < GMM_COMPONENTS; ++c)
-                bg_resp[i][c] /= (total_bg + 1e-6f);
-        }
-
-        // M-Step: update GMM parameters
-        for (int c = 0; c < GMM_COMPONENTS; ++c) {
-            // FG
-            float w_sum = 0.0f, x_sum = 0.0f, x2_sum = 0.0f;
-            for (int i = 0; i < size; ++i) {
-                if (labels[i] == 1) {
-                    float r = fg_resp[i][c];
-                    float x = (float)src_data[i];
-                    w_sum += r;
-                    x_sum += r * x;
-                    x2_sum += r * x * x;
-                }
-            }
-            if (w_sum > 1e-6f) {
-                fg_gmm[c].weight = w_sum;
-                fg_gmm[c].mean = x_sum / w_sum;
-                fg_gmm[c].variance =
-                    fmaxf((x2_sum / w_sum) - fg_gmm[c].mean * fg_gmm[c].mean, 10.0f);
-            }
-
-            // BG
-            w_sum = x_sum = x2_sum = 0.0f;
-            for (int i = 0; i < size; ++i) {
-                if (labels[i] == 0) {
-                    float r = bg_resp[i][c];
-                    float x = (float)src_data[i];
-                    w_sum += r;
-                    x_sum += r * x;
-                    x2_sum += r * x * x;
-                }
-            }
-            if (w_sum > 1e-6f) {
-                bg_gmm[c].weight = w_sum;
-                bg_gmm[c].mean = x_sum / w_sum;
-                bg_gmm[c].variance =
-                    fmaxf((x2_sum / w_sum) - bg_gmm[c].mean * bg_gmm[c].mean, 10.0f);
-            }
-        }
-
-        // Normalize GMM weights
-        float fg_total = 0.0f, bg_total = 0.0f;
-        for (int c = 0; c < GMM_COMPONENTS; ++c) {
-            fg_total += fg_gmm[c].weight;
-            bg_total += bg_gmm[c].weight;
-        }
-        for (int c = 0; c < GMM_COMPONENTS; ++c) {
-            fg_gmm[c].weight /= fg_total;
-            bg_gmm[c].weight /= bg_total;
+static float gc_dfs(gc_graph_t *g, int u, float f)
+{
+    if (u == g->sink)
+        return f;
+    for (int *pei = &g->it[u]; *pei != -1; *pei = g->edges[*pei].next) {
+        int ei = *pei;
+        gc_edge_t *e = &g->edges[ei];
+        if (e->cap <= 1e-6f || g->level[e->to] != g->level[u] + 1)
+            continue;
+        float pushed = gc_dfs(g, e->to, fminf(f, e->cap));
+        if (pushed > 1e-6f) {
+            e->cap -= pushed;
+            g->edges[ei ^ 1].cap += pushed;
+            return pushed;
         }
+    }
+    return 0.0f;
+}
 
-        // Reassign labels
-        for (int i = 0; i < size; ++i) {
-            float x = (float)src_data[i];
-            float p_fg = 0.0f, p_bg = 0.0f;
-            for (int c = 0; c < GMM_COMPONENTS; ++c) {
-                p_fg += fg_gmm[c].weight * gaussian_prob(x, fg_gmm[c].mean, fg_gmm[c].variance);
-                p_bg += bg_gmm[c].weight * gaussian_prob(x, bg_gmm[c].mean, bg_gmm[c].variance);
-            }
-            labels[i] = (p_fg > p_bg) ? 1 : 0;
-            mask_data[i] = labels[i] ? FOREGROUND : BACKGROUND;
+static float gc_maxflow(gc_graph_t *g)
+{
+    float flow = 0.0f;
+    while (gc_bfs(g)) {
+        for (int i = 0; i < g->n; ++i)
+            g->it[i] = g->head[i];
+        while (1) {
+            float pushed = gc_dfs(g, g->source, 1e20f);
+            if (pushed <= 1e-6f)
+                break;
+            flow += pushed;
         }
     }
-
-    memory_free(labels);
-    memory_free(fg_resp);
-    memory_free(bg_resp);
-    return EMBEDDIP_OK;
+    return flow;
 }
 
-typedef struct {
-    float weight;
-    float mean[3];      // [R, G, B]
-    float variance[3];  // diagonal covariance
-} GMMComponentRGB;
-
-float gaussian_prob_rgb(const uint8_t *pixel, const GMMComponentRGB *comp)
+static void gc_mark_source_side(gc_graph_t *g)
 {
-    float prob = 1.0f;
-    for (int i = 0; i < 3; ++i) {
-        float diff = (float)pixel[i] - comp->mean[i];
-        float var = comp->variance[i];
-        prob *= (1.0f / sqrtf(2.0f * M_PI * var)) * expf(-diff * diff / (2.0f * var));
+    memset(g->seen, 0, (size_t)g->n);
+    int qh = 0, qt = 0;
+    g->seen[g->source] = 1;
+    g->queue[qt++] = g->source;
+    while (qh < qt) {
+        int u = g->queue[qh++];
+        for (int ei = g->head[u]; ei != -1; ei = g->edges[ei].next) {
+            gc_edge_t *e = &g->edges[ei];
+            if (e->cap > 1e-6f && !g->seen[e->to]) {
+                g->seen[e->to] = 1;
+                g->queue[qt++] = e->to;
+            }
+        }
     }
-    return prob;
 }
 
-embeddip_status_t grabCutRGB(const Image *src, Image *mask, Rectangle roi, int max_iter)
+embeddip_status_t grabCut(const Image *src, Image *mask, Rectangle roi, int max_iter)
 {
-    if (!src || !mask || !src->pixels || src->format != IMAGE_FORMAT_RGB888)
+    if (!src || !mask || !src->pixels || !mask->pixels)
         return EMBEDDIP_ERROR_NULL_PTR;
+    if (src->format != IMAGE_FORMAT_GRAYSCALE)
+        return EMBEDDIP_ERROR_INVALID_FORMAT;
+    if (mask->format != IMAGE_FORMAT_MASK && mask->format != IMAGE_FORMAT_GRAYSCALE)
+        return EMBEDDIP_ERROR_INVALID_FORMAT;
+    if (src->width != mask->width || src->height != mask->height)
+        return EMBEDDIP_ERROR_INVALID_SIZE;
+    if (max_iter <= 0)
+        max_iter = MAX_ITER_GRABCUT;
 
-    int width = src->width;
-    int height = src->height;
-    int size = width * height;
+    const int width = (int)src->width;
+    const int height = (int)src->height;
     const uint8_t *src_data = (const uint8_t *)src->pixels;
     uint8_t *mask_data = (uint8_t *)mask->pixels;
+    memset(mask_data, BACKGROUND, (size_t)width * (size_t)height);
+
+    int x0 = roi.x < 0 ? 0 : roi.x;
+    int y0 = roi.y < 0 ? 0 : roi.y;
+    int x1 = roi.x + roi.width;
+    int y1 = roi.y + roi.height;
+    if (x1 > width)
+        x1 = width;
+    if (y1 > height)
+        y1 = height;
+    if (x0 >= x1 || y0 >= y1)
+        return EMBEDDIP_ERROR_INVALID_ARG;
 
-    // Allocate label buffer (0 = BG, 1 = FG)
-    uint8_t *labels = (uint8_t *)memory_alloc(size * sizeof(uint8_t));
-    float(*fg_resp)[GMM_COMPONENTS] =
-        (float(*)[GMM_COMPONENTS])memory_alloc(size * GMM_COMPONENTS * sizeof(float));
-    float(*bg_resp)[GMM_COMPONENTS] =
-        (float(*)[GMM_COMPONENTS])memory_alloc(size * GMM_COMPONENTS * sizeof(float));
-
-    GMMComponentRGB fg_gmm[GMM_COMPONENTS];
-    GMMComponentRGB bg_gmm[GMM_COMPONENTS];
+    // Downsample ROI for embedded memory/perf while still using graph-cut.
+    int ds = 2;
+    const int target_max_nodes = 7000;
+    int sw = (x1 - x0 + ds - 1) / ds;
+    int sh = (y1 - y0 + ds - 1) / ds;
+    while (sw * sh > target_max_nodes && ds < 16) {
+        ds *= 2;
+        sw = (x1 - x0 + ds - 1) / ds;
+        sh = (y1 - y0 + ds - 1) / ds;
+    }
+    const int sn = sw * sh;
+    uint8_t *labels = (uint8_t *)memory_alloc((size_t)sn);  // 0=BG, 1=FG
+    uint8_t *small = (uint8_t *)memory_alloc((size_t)sn);
+    if (!labels || !small) {
+        if (labels)
+            memory_free(labels);
+        if (small)
+            memory_free(small);
+        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+    }
 
-    // Step 1: Initial Labeling from ROI
-    for (int y = 0; y < height; ++y) {
-        for (int x = 0; x < width; ++x) {
-            int idx = y * width + x;
-            if (x >= roi.x && x < roi.x + roi.width && y >= roi.y && y < roi.y + roi.height) {
-                mask_data[idx] = FOREGROUND;
-                labels[idx] = 1;
-            } else {
-                mask_data[idx] = BACKGROUND;
-                labels[idx] = 0;
-            }
+    for (int sy = 0; sy < sh; ++sy) {
+        for (int sx = 0; sx < sw; ++sx) {
+            int xx = x0 + sx * ds;
+            int yy = y0 + sy * ds;
+            if (xx >= width)
+                xx = width - 1;
+            if (yy >= height)
+                yy = height - 1;
+            small[sy * sw + sx] = src_data[yy * width + xx];
         }
     }
 
-    // Step 2: Init GMMs
-    for (int c = 0; c < GMM_COMPONENTS; ++c) {
-        for (int ch = 0; ch < 3; ++ch) {
-            fg_gmm[c].mean[ch] = 100.0f + 50 * c;
-            fg_gmm[c].variance[ch] = 1000.0f;
-            bg_gmm[c].mean[ch] = 50.0f + 100 * c;
-            bg_gmm[c].variance[ch] = 1000.0f;
+    int border = ((sw < sh) ? sw : sh) / 10;
+    if (border < 2)
+        border = 2;
+    for (int sy = 0; sy < sh; ++sy) {
+        for (int sx = 0; sx < sw; ++sx) {
+            int near_left = sx < border;
+            int near_right = (sw - 1 - sx) < border;
+            int near_top = sy < border;
+            int near_bottom = (sh - 1 - sy) < border;
+            labels[sy * sw + sx] = (near_left || near_right || near_top || near_bottom) ? 0u : 1u;
         }
-        fg_gmm[c].weight = 0.5f;
-        bg_gmm[c].weight = 0.5f;
     }
 
-    // Step 3: EM Iterations
+    const float lambda = 25.0f;
+    const float hard_cap = 1e6f;
+
     for (int iter = 0; iter < max_iter; ++iter) {
-        // E-Step: compute responsibilities
-        for (int i = 0; i < size; ++i) {
-            const uint8_t *px = &src_data[i * 3];
-            float total_fg = 0.0f, total_bg = 0.0f;
-
-            for (int c = 0; c < GMM_COMPONENTS; ++c) {
-                fg_resp[i][c] = fg_gmm[c].weight * gaussian_prob_rgb(px, &fg_gmm[c]);
-                bg_resp[i][c] = bg_gmm[c].weight * gaussian_prob_rgb(px, &bg_gmm[c]);
-                total_fg += fg_resp[i][c];
-                total_bg += bg_resp[i][c];
+        float fg_sum = 0.0f, fg_sqsum = 0.0f, fg_cnt = 0.0f;
+        float bg_sum = 0.0f, bg_sqsum = 0.0f, bg_cnt = 0.0f;
+        for (int i = 0; i < sn; ++i) {
+            float v = (float)small[i];
+            if (labels[i]) {
+                fg_sum += v;
+                fg_sqsum += v * v;
+                fg_cnt += 1.0f;
+            } else {
+                bg_sum += v;
+                bg_sqsum += v * v;
+                bg_cnt += 1.0f;
             }
-            for (int c = 0; c < GMM_COMPONENTS; ++c) {
-                fg_resp[i][c] /= (total_fg + 1e-6f);
-                bg_resp[i][c] /= (total_bg + 1e-6f);
+        }
+        if (fg_cnt < 1.0f || bg_cnt < 1.0f)
+            break;
+
+        float mu_fg = fg_sum / fg_cnt;
+        float mu_bg = bg_sum / bg_cnt;
+        float var_fg = fmaxf((fg_sqsum / fg_cnt) - mu_fg * mu_fg, 25.0f);
+        float var_bg = fmaxf((bg_sqsum / bg_cnt) - mu_bg * mu_bg, 25.0f);
+
+        float d2_sum = 0.0f;
+        int d2_cnt = 0;
+        for (int y = 0; y < sh; ++y) {
+            for (int x = 0; x < sw; ++x) {
+                int i = y * sw + x;
+                if (x + 1 < sw) {
+                    float d = (float)small[i] - (float)small[i + 1];
+                    d2_sum += d * d;
+                    d2_cnt++;
+                }
+                if (y + 1 < sh) {
+                    float d = (float)small[i] - (float)small[i + sw];
+                    d2_sum += d * d;
+                    d2_cnt++;
+                }
             }
         }
+        float beta = 1.0f / (2.0f * (d2_sum / (float)(d2_cnt + 1)) + 1e-6f);
+
+        gc_graph_t g = {0};
+        int node_n = sn + 2;
+        // Terminal edges: ~4*sn, n-links: ~4*((sw-1)*sh + sw*(sh-1)), plus border t-links.
+        // Worst-case storage bound with current representation:
+        // - terminal links: 2 add_edge/pixel => 4*sn edges
+        // - smoothness links: 4 edges per right/down neighbor pair
+        //   pairs = (sw-1)*sh + sw*(sh-1) = 2*sn - sw - sh
+        //   => 4*(2*sn - sw - sh) edges
+        // - hard-ring t-links: up to 1 add_edge/pixel in worst case => 2*sn edges
+        // Total <= 14*sn - 4*(sw+sh), add safety margin.
+        int edge_cap = 14 * sn + 512;
+        if (gc_init(&g, node_n, edge_cap) != 0) {
+            gc_free(&g);
+            memory_free(labels);
+            memory_free(small);
+            return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+        }
 
-        // M-Step: update GMM parameters
-        for (int c = 0; c < GMM_COMPONENTS; ++c) {
-            float fg_wsum = 0.0f, bg_wsum = 0.0f;
-            float fg_sum[3] = {0}, fg_sqsum[3] = {0};
-            float bg_sum[3] = {0}, bg_sqsum[3] = {0};
+        for (int y = 0; y < sh; ++y) {
+            for (int x = 0; x < sw; ++x) {
+                int p = y * sw + x;
+                float pix = (float)small[p];
+                float dbg = 0.5f * logf(var_bg) + ((pix - mu_bg) * (pix - mu_bg)) / (2.0f * var_bg);
+                float dfg = 0.5f * logf(var_fg) + ((pix - mu_fg) * (pix - mu_fg)) / (2.0f * var_fg);
+                if (dbg < 0.0f)
+                    dbg = 0.0f;
+                if (dfg < 0.0f)
+                    dfg = 0.0f;
+
+                if (gc_add_edge(&g, g.source, p, dbg + 1e-3f) != 0 ||
+                    gc_add_edge(&g, p, g.sink, dfg + 1e-3f) != 0) {
+                    gc_free(&g);
+                    memory_free(labels);
+                    memory_free(small);
+                    return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+                }
 
-            for (int i = 0; i < size; ++i) {
-                const uint8_t *px = &src_data[i * 3];
-                if (labels[i] == 1) {
-                    float r = fg_resp[i][c];
-                    fg_wsum += r;
-                    for (int ch = 0; ch < 3; ++ch) {
-                        fg_sum[ch] += r * px[ch];
-                        fg_sqsum[ch] += r * px[ch] * px[ch];
-                    }
-                } else {
-                    float r = bg_resp[i][c];
-                    bg_wsum += r;
-                    for (int ch = 0; ch < 3; ++ch) {
-                        bg_sum[ch] += r * px[ch];
-                        bg_sqsum[ch] += r * px[ch] * px[ch];
+                int near_left = x < border;
+                int near_right = (sw - 1 - x) < border;
+                int near_top = y < border;
+                int near_bottom = (sh - 1 - y) < border;
+                int is_ring = near_left || near_right || near_top || near_bottom;
+                if (is_ring) {
+                    if (gc_add_edge(&g, p, g.sink, hard_cap) != 0) {
+                        gc_free(&g);
+                        memory_free(labels);
+                        memory_free(small);
+                        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
                     }
                 }
-            }
 
-            for (int ch = 0; ch < 3; ++ch) {
-                if (fg_wsum > 1e-6f) {
-                    fg_gmm[c].mean[ch] = fg_sum[ch] / fg_wsum;
-                    float var = (fg_sqsum[ch] / fg_wsum) - fg_gmm[c].mean[ch] * fg_gmm[c].mean[ch];
-                    fg_gmm[c].variance[ch] = fmaxf(var, 10.0f);
+                if (x + 1 < sw) {
+                    int q = p + 1;
+                    float d = (float)small[p] - (float)small[q];
+                    float w = lambda * expf(-beta * d * d) + 1e-3f;
+                    if (gc_add_undirected(&g, p, q, w) != 0) {
+                        gc_free(&g);
+                        memory_free(labels);
+                        memory_free(small);
+                        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+                    }
                 }
-
-                if (bg_wsum > 1e-6f) {
-                    bg_gmm[c].mean[ch] = bg_sum[ch] / bg_wsum;
-                    float var = (bg_sqsum[ch] / bg_wsum) - bg_gmm[c].mean[ch] * bg_gmm[c].mean[ch];
-                    bg_gmm[c].variance[ch] = fmaxf(var, 10.0f);
+                if (y + 1 < sh) {
+                    int q = p + sw;
+                    float d = (float)small[p] - (float)small[q];
+                    float w = lambda * expf(-beta * d * d) + 1e-3f;
+                    if (gc_add_undirected(&g, p, q, w) != 0) {
+                        gc_free(&g);
+                        memory_free(labels);
+                        memory_free(small);
+                        return EMBEDDIP_ERROR_OUT_OF_MEMORY;
+                    }
                 }
             }
-
-            fg_gmm[c].weight = fg_wsum;
-            bg_gmm[c].weight = bg_wsum;
         }
 
-        // Normalize weights
-        float fg_total = 0.0f, bg_total = 0.0f;
-        for (int c = 0; c < GMM_COMPONENTS; ++c) {
-            fg_total += fg_gmm[c].weight;
-            bg_total += bg_gmm[c].weight;
-        }
-        for (int c = 0; c < GMM_COMPONENTS; ++c) {
-            fg_gmm[c].weight /= (fg_total + 1e-6f);
-            bg_gmm[c].weight /= (bg_total + 1e-6f);
+        gc_maxflow(&g);
+        gc_mark_source_side(&g);
+        for (int i = 0; i < sn; ++i) {
+            labels[i] = g.seen[i] ? 1u : 0u;
         }
-
-        // Reassign labels and update mask
-        for (int i = 0; i < size; ++i) {
-            const uint8_t *px = &src_data[i * 3];
-            float p_fg = 0.0f, p_bg = 0.0f;
-            for (int c = 0; c < GMM_COMPONENTS; ++c) {
-                p_fg += fg_gmm[c].weight * gaussian_prob_rgb(px, &fg_gmm[c]);
-                p_bg += bg_gmm[c].weight * gaussian_prob_rgb(px, &bg_gmm[c]);
-            }
-            labels[i] = (p_fg > p_bg) ? 1 : 0;
-            mask_data[i] = labels[i] ? FOREGROUND : BACKGROUND;
+        gc_free(&g);
+    }
+
+    memset(mask_data, BACKGROUND, (size_t)width * (size_t)height);
+    for (int y = y0; y < y1; ++y) {
+        for (int x = x0; x < x1; ++x) {
+            int sx = (x - x0) / ds;
+            int sy = (y - y0) / ds;
+            if (sx >= sw)
+                sx = sw - 1;
+            if (sy >= sh)
+                sy = sh - 1;
+            int si = sy * sw + sx;
+            mask_data[y * width + x] = labels[si] ? FOREGROUND : BACKGROUND;
         }
     }
+    mask->log = IMAGE_DATA_PIXELS;
 
     memory_free(labels);
-    memory_free(fg_resp);
-    memory_free(bg_resp);
+    memory_free(small);
     return EMBEDDIP_OK;
 }
diff --git a/imgproc/segmentation.h b/imgproc/segmentation.h
index 195e1c6..367e3dc 100644
--- a/imgproc/segmentation.h
+++ b/imgproc/segmentation.h
@@ -83,18 +83,7 @@ embeddip_status_t colorRegionGrowing(const Image *inImg,
                                      float tolerance);
 
 /**
- * @brief GrabCut segmentation (working version).
- *
- * @param[in]  src        Pointer to input grayscale image.
- * @param[out] mask       Pointer to output mask image.
- * @param[in]  roi        Region of interest.
- * @param[in]  iterations Number of iterations.
- * @return EMBEDDIP_OK on success, error code otherwise.
- */
-embeddip_status_t grabCutLite_working(const Image *src, Image *mask, Rectangle roi, int iterations);
-
-/**
- * @brief GrabCut segmentation (sd version).
+ * @brief GrabCut segmentation (grayscale realistic).
  *
  * @param[in]  src        Pointer to input grayscale image.
  * @param[out] mask       Pointer to output mask image.
@@ -102,10 +91,10 @@ embeddip_status_t grabCutLite_working(const Image *src, Image *mask, Rectangle r
  * @param[in]  iterations Number of iterations.
  * @return EMBEDDIP_OK on success, error code otherwise.
  */
-embeddip_status_t grabCutLitesd(const Image *src, Image *mask, Rectangle roi, int iterations);
+embeddip_status_t grabCut(const Image *src, Image *mask, Rectangle roi, int iterations);
 
 /**
- * @brief GrabCut segmentation (main version).
+ * @brief GrabCut segmentation (lightweight version).
  *
  * @param[in]  src        Pointer to input image.
  * @param[out] mask       Pointer to output mask image.
@@ -115,29 +104,6 @@ embeddip_status_t grabCutLitesd(const Image *src, Image *mask, Rectangle roi, in
  */
 embeddip_status_t grabCutLite(const Image *src, Image *mask, Rectangle roi, int iterations);
 
-/**
- * @brief GrabCut segmentation (grayscale realistic).
- *
- * @param[in]  src        Pointer to input grayscale image.
- * @param[out] mask       Pointer to output mask image.
- * @param[in]  roi        Region of interest.
- * @param[in]  iterations Number of iterations.
- * @return EMBEDDIP_OK on success, error code otherwise.
- */
-embeddip_status_t
-grabCutGrayscaleRealistic(const Image *src, Image *mask, Rectangle roi, int iterations);
-
-/**
- * @brief GrabCut segmentation (RGB version).
- *
- * @param[in]  src        Pointer to input RGB image.
- * @param[out] mask       Pointer to output mask image.
- * @param[in]  roi        Region of interest.
- * @param[in]  iterations Number of iterations.
- * @return EMBEDDIP_OK on success, error code otherwise.
- */
-embeddip_status_t grabCutRGB(const Image *src, Image *mask, Rectangle roi, int iterations);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/wrapper/ImageWrapper.cpp b/wrapper/ImageWrapper.cpp
index 283297a..64716e7 100755
--- a/wrapper/ImageWrapper.cpp
+++ b/wrapper/ImageWrapper.cpp
@@ -108,6 +108,14 @@ bool Image::isChalsEmpty() const noexcept
     return image_ ? ::isChalsEmpty(image_) : true;
 }
 
+/**
+ * @brief Compresses this image into JPEG format.
+ */
+int Image::compressJPEG(Image &out, int quality) const noexcept
+{
+    return ::compress(raw(), out.raw(), IMAGE_COMP_JPEG, quality);
+}
+
 /**
  * @brief Applies negative transform.
  */
@@ -648,14 +656,6 @@ void Image::ifft(Image &out) const
     ::ifft(raw(), out.raw());
 }
 
-/**
- * @brief Reverses FFT quadrant shift.
- */
-void Image::ifftshift()
-{
-    ::fftshift(raw());
-}
-
 /**
  * @brief Converts polar components to complex/cartesian form.
  */
@@ -752,12 +752,12 @@ void Image::bitwiseNot(Image &out) const
 }
 
 /**
- * @brief Runs simplified GrabCut workflow that outputs a mask.
+ * @brief Runs grayscale graph-cut GrabCut.
  */
-// TODO
-// void Image::grabCutLitesd(Image &maskImg, int iterations) const {
-//  ::grabCutLite_working(raw(), maskImg.raw(), iterations);
-//}
+embeddip_status_t Image::grabCut(Image &maskImg, Rectangle roi, int iterations) const
+{
+    return ::grabCut(raw(), maskImg.raw(), roi, iterations);
+}
 
 /**
  * @brief Runs simplified GrabCut within ROI.
@@ -767,22 +767,6 @@ void Image::grabCutLite(Image &outImg, Rectangle roi, int iterations) const
     ::grabCutLite(raw(), outImg.raw(), roi, iterations);
 }
 
-/**
- * @brief Runs simplified GrabCut for RGB888 data.
- */
-void Image::grabCutLite888(Image &outImg, Rectangle roi, int iterations) const
-{
-    ::grabCutRGB(raw(), outImg.raw(), roi, iterations);
-}
-
-/**
- * @brief Runs RGB graph-cut segmentation.
- */
-void Image::grabCutRGB(Image &outMask, Rectangle roi, int max_iter) const
-{
-    ::grabCutRGB(raw(), outMask.raw(), roi, max_iter);
-}
-
 /**
  * @brief Thresholds image by hue interval.
  */
diff --git a/wrapper/ImageWrapper.hpp b/wrapper/ImageWrapper.hpp
index e978928..1d6583a 100755
--- a/wrapper/ImageWrapper.hpp
+++ b/wrapper/ImageWrapper.hpp
@@ -14,6 +14,7 @@ extern "C" {
 #include "core/memory_manager.h"         /**< Allocators and memory helpers. */
 #include "device/serial/serial.h"        /**< Serial I/O abstraction. */
 #include "imgproc/color.h"               /**< Color conversions and helpers. */
+#include "imgproc/compress.h"            /**< JPEG compression helper. */
 #include "imgproc/connectedcomponents.h" /**< Connected components labeling. */
 #include "imgproc/drawing.h"             /**< Drawing primitives and shapes. */
 #include "imgproc/fft.h"                 /**< Frequency-domain processing. */
@@ -290,6 +291,15 @@ class Image
     bool isChalsEmpty() const noexcept;
 
     // Pixel operations
+    /**
+     * @brief Compress this image into JPEG payload stored in output image.
+     * @param[out] out Destination image buffer that will hold JPEG bytes.
+     * @param[in] quality JPEG quality in range [1, 100].
+     * @return 0 on success, -1 on error.
+     * @see ::compress For underlying C implementation
+     */
+    int compressJPEG(Image &out, int quality = 75) const noexcept;
+
     /**
      * @brief Computes negative image transform.
      * @param[out] out Output image for inverted result
@@ -702,12 +712,6 @@ class Image
      */
     void fftshift();
 
-    /**
-     * @brief Reverses frequency shift operation.
-     * @see ::ifftshift For underlying C implementation
-     */
-    void ifftshift();
-
     /**
      * @brief Builds frequency-domain mask in this image.
      * @param[in] type Filter type (lowpass, highpass, bandpass)
@@ -794,12 +798,13 @@ class Image
     void bitwiseNot(Image &out) const;
 
     /**
-     * @brief Runs simplified GrabCut and returns mask output.
-     * @param[out] maskImg Output segmentation mask
-     * @param[in] iterations Number of refinement iterations
-     * @see ::grabCutLitesd For underlying C implementation
+     * @brief Runs grayscale graph-cut GrabCut in ROI.
+     * @param[out] maskImg Output segmentation mask.
+     * @param[in] roi Region of interest.
+     * @param[in] iterations Number of refinement iterations.
+     * @return C-layer status code.
      */
-    void grabCutLitesd(Image &maskImg, int iterations) const;
+    embeddip_status_t grabCut(Image &maskImg, Rectangle roi, int iterations) const;
 
     /**
      * @brief Runs simplified GrabCut in a rectangular ROI.
@@ -809,22 +814,6 @@ class Image
      */
     void grabCutLite(Image &outImg, Rectangle roi, int iterations) const;
 
-    /**
-     * @brief Runs simplified GrabCut for RGB888 images.
-     * @param outImg Output segmentation image/mask.
-     * @param roi Region of interest.
-     * @param iterations Number of refinement iterations.
-     */
-    void grabCutLite888(Image &outImg, Rectangle roi, int iterations) const;
-
-    /**
-     * @brief Runs RGB graph-cut segmentation in ROI.
-     * @param outMask Output binary mask.
-     * @param roi Region of interest.
-     * @param max_iter Maximum iteration count.
-     */
-    void grabCutRGB(Image &outMask, Rectangle roi, int max_iter) const;
-
     /**
      * @brief Thresholds image by hue range.
      * @param output Output binary mask image.