From 9e244b47d023380420de0e5ea0f0668c0dce15ba Mon Sep 17 00:00:00 2001 From: chmjkb Date: Wed, 20 May 2026 12:52:48 +0200 Subject: [PATCH 1/5] mvp --- .../app/object_detection/index.tsx | 6 ++ .../app/vision_camera/index.tsx | 3 + .../tasks/ObjectDetectionTask.tsx | 30 +++++-- .../object_detection/ObjectDetection.cpp | 89 +++++++++++++------ .../models/object_detection/ObjectDetection.h | 26 ++++-- .../tests/integration/ObjectDetectionTest.cpp | 84 ++++++++--------- .../utils/computer_vision/Processing.h | 65 ++++++++++++++ .../src/constants/commonVision.ts | 11 +++ .../computer_vision/ObjectDetectionModule.ts | 36 +++++++- .../src/types/objectDetection.ts | 23 ++++- 10 files changed, 289 insertions(+), 84 deletions(-) diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx index 1ed3c136ba..d2cbcc91d0 100644 --- a/apps/computer-vision/app/object_detection/index.tsx +++ b/apps/computer-vision/app/object_detection/index.tsx @@ -8,6 +8,11 @@ import { useObjectDetection, ObjectDetectionModelSources, } from 'react-native-executorch'; + +const BLAZEFACE: ObjectDetectionModelSources = { + modelName: 'blazeface', + modelSource: require('../../assets/blazeface.pte'), +}; import { View, StyleSheet, Image, Text } from 'react-native'; import ImageWithBboxes from '../../components/ImageWithBboxes'; import React, { useContext, useEffect, useState } from 'react'; @@ -30,6 +35,7 @@ const MODELS: ModelOption[] = [ { label: 'YOLO26M', value: objectDetection.yolo26m() }, { label: 'YOLO26L', value: objectDetection.yolo26l() }, { label: 'YOLO26X', value: objectDetection.yolo26x() }, + { label: 'BlazeFace', value: BLAZEFACE }, ]; import ErrorBanner from '../../components/ErrorBanner'; diff --git a/apps/computer-vision/app/vision_camera/index.tsx b/apps/computer-vision/app/vision_camera/index.tsx index 99fe0b1ac7..0066b36396 100644 --- a/apps/computer-vision/app/vision_camera/index.tsx +++ b/apps/computer-vision/app/vision_camera/index.tsx @@ -45,6 +45,7 @@ type ModelId = | 'objectDetectionSsdlite' | 'objectDetectionRfdetr' | 'objectDetectionYolo26n' + | 'objectDetectionBlazeface' | 'segmentationDeeplabResnet50' | 'segmentationDeeplabResnet101' | 'segmentationDeeplabMobilenet' @@ -105,6 +106,7 @@ const TASKS: Task[] = [ { id: 'objectDetectionSsdlite', label: 'SSDLite MobileNet' }, { id: 'objectDetectionRfdetr', label: 'RF-DETR Nano' }, { id: 'objectDetectionYolo26n', label: 'YOLO26N' }, + { id: 'objectDetectionBlazeface', label: 'BlazeFace' }, ], }, { @@ -270,6 +272,7 @@ export default function VisionCameraScreen() { | 'objectDetectionSsdlite' | 'objectDetectionRfdetr' | 'objectDetectionYolo26n' + | 'objectDetectionBlazeface' } /> )} diff --git a/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx b/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx index e05de26105..aeb364c377 100644 --- a/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx +++ b/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx @@ -8,15 +8,23 @@ import { useObjectDetection, CocoLabel, CocoLabelYolo, + BlazeFaceLabel, + ObjectDetectionModelSources, } from 'react-native-executorch'; import BoundingBoxes from '../../BoundingBoxes'; import { FRAME_TARGET_RESOLUTION, TaskProps } from './types'; const objectDetection = models.object_detection; +const BLAZEFACE: ObjectDetectionModelSources = { + modelName: 'blazeface', + modelSource: require('../../../assets/blazeface.pte'), +}; + type ObjModelId = | 'objectDetectionSsdlite' | 'objectDetectionRfdetr' - | 'objectDetectionYolo26n'; + | 'objectDetectionYolo26n' + | 'objectDetectionBlazeface'; type Props = TaskProps & { activeModel: ObjModelId }; @@ -44,13 +52,18 @@ export default function ObjectDetectionTask({ model: objectDetection.yolo26n(), preventLoad: activeModel !== 'objectDetectionYolo26n', }); + const blazeface = useObjectDetection({ + model: BLAZEFACE, + preventLoad: activeModel !== 'objectDetectionBlazeface', + }); - const active = - activeModel === 'objectDetectionSsdlite' - ? ssdlite - : activeModel === 'objectDetectionRfdetr' - ? rfdetr - : yolo26n; + const detectors = { + objectDetectionSsdlite: ssdlite, + objectDetectionRfdetr: rfdetr, + objectDetectionYolo26n: yolo26n, + objectDetectionBlazeface: blazeface, + } satisfies Record; + const active = detectors[activeModel]; type CommonDetection = Omit & { label: string }; @@ -80,7 +93,8 @@ export default function ObjectDetectionTask({ (p: { results: | Detection[] - | Detection[]; + | Detection[] + | Detection[]; imageWidth: number; imageHeight: number; }) => { diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp index 24c4e1083a..6c18183ea9 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp @@ -82,16 +82,10 @@ std::set ObjectDetection::prepareAllowedClasses( return allowedClasses; } -std::vector -ObjectDetection::postprocess(const std::vector &tensors, - cv::Size originalSize, double detectionThreshold, - double iouThreshold, - const std::vector &classIndices) { - const cv::Size inputSize = modelInputSize(); - float widthRatio = static_cast(originalSize.width) / inputSize.width; - float heightRatio = - static_cast(originalSize.height) / inputSize.height; - +std::vector ObjectDetection::postprocess( + const std::vector &tensors, const BoxTransform &transform, + double detectionThreshold, double iouThreshold, + const std::vector &classIndices, bool useWeightedNms) { // Prepare allowed classes set for filtering auto allowedClasses = prepareAllowedClasses(classIndices); @@ -124,10 +118,13 @@ ObjectDetection::postprocess(const std::vector &tensors, continue; } - float x1 = bboxes[i * 4] * widthRatio; - float y1 = bboxes[i * 4 + 1] * heightRatio; - float x2 = bboxes[i * 4 + 2] * widthRatio; - float y2 = bboxes[i * 4 + 3] * heightRatio; + // Map model-input pixel coords back to source-image coords. The same + // affine `x_src = x_model * scale + offset` works for stretch and + // letterbox preprocessing — offsets are zero in the stretch case. + float x1 = bboxes[i * 4] * transform.scaleX + transform.offsetX; + float y1 = bboxes[i * 4 + 1] * transform.scaleY + transform.offsetY; + float x2 = bboxes[i * 4 + 2] * transform.scaleX + transform.offsetX; + float y2 = bboxes[i * 4 + 3] * transform.scaleY + transform.offsetY; if (std::cmp_greater_equal(labelIdx, labelNames_.size())) { throw RnExecutorchError( @@ -140,12 +137,17 @@ ObjectDetection::postprocess(const std::vector &tensors, labelNames_[labelIdx], labelIdx, scores[i]); } - return utils::computer_vision::nonMaxSuppression(detections, iouThreshold); + return useWeightedNms + ? utils::computer_vision::weightedNonMaxSuppression(detections, + iouThreshold) + : utils::computer_vision::nonMaxSuppression(detections, + iouThreshold); } std::vector ObjectDetection::runInference( cv::Mat image, double detectionThreshold, double iouThreshold, - const std::vector &classIndices, const std::string &methodName) { + const std::vector &classIndices, const std::string &methodName, + bool useWeightedNms, bool useLetterbox) { if (detectionThreshold < 0.0 || detectionThreshold > 1.0) { throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, "detectionThreshold must be in range [0, 1]"); @@ -171,7 +173,38 @@ std::vector ObjectDetection::runInference( } modelInputShape_ = inputShapes[0]; - cv::Mat preprocessed = preprocess(image); + const cv::Size inputSize = modelInputSize(); + cv::Mat preprocessed; + BoxTransform transform; + if (useLetterbox) { + // Aspect-preserving fit + center-pad with black bars. Models trained on + // natural-aspect crops (BlazeFace) need this — plain cv::resize stretches + // the face and shifts where anchors fire. + const float fitScale = + std::min(static_cast(inputSize.width) / originalSize.width, + static_cast(inputSize.height) / originalSize.height); + const int newW = + static_cast(std::round(originalSize.width * fitScale)); + const int newH = + static_cast(std::round(originalSize.height * fitScale)); + const int padX = (inputSize.width - newW) / 2; + const int padY = (inputSize.height - newH) / 2; + + cv::Mat resized; + cv::resize(image, resized, cv::Size(newW, newH), 0, 0, cv::INTER_AREA); + cv::copyMakeBorder(resized, preprocessed, padY, + inputSize.height - newH - padY, padX, + inputSize.width - newW - padX, cv::BORDER_CONSTANT, + cv::Scalar(0, 0, 0)); + + const float inv = 1.0f / fitScale; + transform = {inv, inv, -padX * inv, -padY * inv}; + } else { + preprocessed = preprocess(image); + transform = {static_cast(originalSize.width) / inputSize.width, + static_cast(originalSize.height) / inputSize.height, + 0.0f, 0.0f}; + } auto inputTensor = (normMean_ && normStd_) @@ -188,31 +221,34 @@ std::vector ObjectDetection::runInference( "Ensure the model input is correct."); } - return postprocess(executeResult.get(), originalSize, detectionThreshold, - iouThreshold, classIndices); + return postprocess(executeResult.get(), transform, detectionThreshold, + iouThreshold, classIndices, useWeightedNms); } std::vector ObjectDetection::generateFromString( std::string imageSource, double detectionThreshold, double iouThreshold, - std::vector classIndices, std::string methodName) { + std::vector classIndices, std::string methodName, + bool useWeightedNms, bool useLetterbox) { cv::Mat imageBGR = image_processing::readImage(imageSource); cv::Mat imageRGB; cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB); return runInference(imageRGB, detectionThreshold, iouThreshold, classIndices, - methodName); + methodName, useWeightedNms, useLetterbox); } std::vector ObjectDetection::generateFromFrame( jsi::Runtime &runtime, const jsi::Value &frameData, double detectionThreshold, double iouThreshold, - std::vector classIndices, std::string methodName) { + std::vector classIndices, std::string methodName, + bool useWeightedNms, bool useLetterbox) { auto orient = ::rnexecutorch::utils::readFrameOrientation(runtime, frameData); cv::Mat frame = extractFromFrame(runtime, frameData); cv::Mat rotated = ::rnexecutorch::utils::rotateFrameForModel(frame, orient); - auto detections = runInference(rotated, detectionThreshold, iouThreshold, - classIndices, methodName); + auto detections = + runInference(rotated, detectionThreshold, iouThreshold, classIndices, + methodName, useWeightedNms, useLetterbox); for (auto &det : detections) { ::rnexecutorch::utils::inverseRotateBbox(det.bbox, orient, rotated.size()); @@ -222,10 +258,11 @@ std::vector ObjectDetection::generateFromFrame( std::vector ObjectDetection::generateFromPixels( JSTensorViewIn pixelData, double detectionThreshold, double iouThreshold, - std::vector classIndices, std::string methodName) { + std::vector classIndices, std::string methodName, + bool useWeightedNms, bool useLetterbox) { cv::Mat image = extractFromPixels(pixelData); return runInference(image, detectionThreshold, iouThreshold, classIndices, - methodName); + methodName, useWeightedNms, useLetterbox); } } // namespace rnexecutorch::models::object_detection diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h index 6e3c01356e..7fcb960cea 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h @@ -14,6 +14,16 @@ namespace models::object_detection { using executorch::extension::TensorPtr; using executorch::runtime::EValue; +/// Affine transform from model-input pixel coords back to source-image coords: +/// `x_src = x_model * scaleX + offsetX`. Covers both plain stretch (offsets +/// zero) and letterbox (offsets carry the centre-pad). +struct BoxTransform { + float scaleX; + float scaleY; + float offsetX; + float offsetY; +}; + /** * @brief Object detection model that detects and localises objects in images. * @@ -75,15 +85,18 @@ class ObjectDetection : public VisionModel { [[nodiscard("Registered non-void function")]] std::vector generateFromString(std::string imageSource, double detectionThreshold, double iouThreshold, std::vector classIndices, - std::string methodName); + std::string methodName, bool useWeightedNms, + bool useLetterbox); [[nodiscard("Registered non-void function")]] std::vector generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData, double detectionThreshold, double iouThreshold, - std::vector classIndices, std::string methodName); + std::vector classIndices, std::string methodName, + bool useWeightedNms, bool useLetterbox); [[nodiscard("Registered non-void function")]] std::vector generateFromPixels(JSTensorViewIn pixelData, double detectionThreshold, double iouThreshold, std::vector classIndices, - std::string methodName); + std::string methodName, bool useWeightedNms, + bool useLetterbox); protected: /** @@ -99,7 +112,8 @@ class ObjectDetection : public VisionModel { std::vector runInference(cv::Mat image, double detectionThreshold, double iouThreshold, const std::vector &classIndices, - const std::string &methodName); + const std::string &methodName, bool useWeightedNms, + bool useLetterbox); private: /** @@ -121,9 +135,9 @@ class ObjectDetection : public VisionModel { * the size of @ref labelNames_. */ std::vector - postprocess(const std::vector &tensors, cv::Size originalSize, + postprocess(const std::vector &tensors, const BoxTransform &transform, double detectionThreshold, double iouThreshold, - const std::vector &classIndices); + const std::vector &classIndices, bool useWeightedNms); /** * @brief Ensures the specified method is loaded, unloading any previous diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp index 5c5bb6e736..98fa6eff68 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp @@ -51,7 +51,7 @@ template <> struct ModelTraits { static void callGenerate(ModelType &model) { (void)model.generateFromString(kValidTestImagePath, 0.5, 0.55, {}, - "forward"); + "forward", false, false); } }; } // namespace model_tests @@ -69,14 +69,16 @@ TEST(ObjectDetectionGenerateTests, InvalidImagePathThrows) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg", 0.5, - 0.55, {}, "forward"), + 0.55, {}, "forward", false, + false), RnExecutorchError); } TEST(ObjectDetectionGenerateTests, EmptyImagePathThrows) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - EXPECT_THROW((void)model.generateFromString("", 0.5, 0.55, {}, "forward"), + EXPECT_THROW((void)model.generateFromString("", 0.5, 0.55, {}, "forward", + false, false), RnExecutorchError); } @@ -84,7 +86,8 @@ TEST(ObjectDetectionGenerateTests, MalformedURIThrows) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad", 0.5, - 0.55, {}, "forward"), + 0.55, {}, "forward", false, + false), RnExecutorchError); } @@ -92,7 +95,7 @@ TEST(ObjectDetectionGenerateTests, NegativeThresholdThrows) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, -0.1, 0.55, - {}, "forward"), + {}, "forward", false, false), RnExecutorchError); } @@ -100,33 +103,33 @@ TEST(ObjectDetectionGenerateTests, ThresholdAboveOneThrows) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, 1.1, 0.55, - {}, "forward"), + {}, "forward", false, false), RnExecutorchError); } TEST(ObjectDetectionGenerateTests, ValidImageReturnsResults) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - auto results = - model.generateFromString(kValidTestImagePath, 0.3, 0.55, {}, "forward"); + auto results = model.generateFromString(kValidTestImagePath, 0.3, 0.55, {}, + "forward", false, false); EXPECT_GE(results.size(), 0u); } TEST(ObjectDetectionGenerateTests, HighThresholdReturnsFewerResults) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - auto lowThresholdResults = - model.generateFromString(kValidTestImagePath, 0.1, 0.55, {}, "forward"); - auto highThresholdResults = - model.generateFromString(kValidTestImagePath, 0.9, 0.55, {}, "forward"); + auto lowThresholdResults = model.generateFromString( + kValidTestImagePath, 0.1, 0.55, {}, "forward", false, false); + auto highThresholdResults = model.generateFromString( + kValidTestImagePath, 0.9, 0.55, {}, "forward", false, false); EXPECT_GE(lowThresholdResults.size(), highThresholdResults.size()); } TEST(ObjectDetectionGenerateTests, DetectionsHaveValidBoundingBoxes) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - auto results = - model.generateFromString(kValidTestImagePath, 0.3, 0.55, {}, "forward"); + auto results = model.generateFromString(kValidTestImagePath, 0.3, 0.55, {}, + "forward", false, false); for (const auto &detection : results) { EXPECT_LE(detection.bbox.p1.x, detection.bbox.p2.x); @@ -139,8 +142,8 @@ TEST(ObjectDetectionGenerateTests, DetectionsHaveValidBoundingBoxes) { TEST(ObjectDetectionGenerateTests, DetectionsHaveValidScores) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - auto results = - model.generateFromString(kValidTestImagePath, 0.3, 0.55, {}, "forward"); + auto results = model.generateFromString(kValidTestImagePath, 0.3, 0.55, {}, + "forward", false, false); for (const auto &detection : results) { EXPECT_GE(detection.score, 0.0f); @@ -151,8 +154,8 @@ TEST(ObjectDetectionGenerateTests, DetectionsHaveValidScores) { TEST(ObjectDetectionGenerateTests, DetectionsHaveValidLabels) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - auto results = - model.generateFromString(kValidTestImagePath, 0.3, 0.55, {}, "forward"); + auto results = model.generateFromString(kValidTestImagePath, 0.3, 0.55, {}, + "forward", false, false); for (const auto &detection : results) { const auto &label = detection.label; @@ -173,7 +176,8 @@ TEST(ObjectDetectionPixelTests, ValidPixelDataReturnsResults) { JSTensorViewIn tensorView{pixelData.data(), {height, width, channels}, executorch::aten::ScalarType::Byte}; - auto results = model.generateFromPixels(tensorView, 0.3, 0.55, {}, "forward"); + auto results = model.generateFromPixels(tensorView, 0.3, 0.55, {}, "forward", + false, false); EXPECT_GE(results.size(), 0u); } @@ -185,9 +189,9 @@ TEST(ObjectDetectionPixelTests, NegativeThresholdThrows) { JSTensorViewIn tensorView{pixelData.data(), {height, width, channels}, executorch::aten::ScalarType::Byte}; - EXPECT_THROW( - (void)model.generateFromPixels(tensorView, -0.1, 0.55, {}, "forward"), - RnExecutorchError); + EXPECT_THROW((void)model.generateFromPixels(tensorView, -0.1, 0.55, {}, + "forward", false, false), + RnExecutorchError); } TEST(ObjectDetectionPixelTests, ThresholdAboveOneThrows) { @@ -198,9 +202,9 @@ TEST(ObjectDetectionPixelTests, ThresholdAboveOneThrows) { JSTensorViewIn tensorView{pixelData.data(), {height, width, channels}, executorch::aten::ScalarType::Byte}; - EXPECT_THROW( - (void)model.generateFromPixels(tensorView, 1.1, 0.55, {}, "forward"), - RnExecutorchError); + EXPECT_THROW((void)model.generateFromPixels(tensorView, 1.1, 0.55, {}, + "forward", false, false), + RnExecutorchError); } TEST(ObjectDetectionInheritedTests, GetInputShapeWorks) { @@ -255,7 +259,7 @@ TEST(ObjectDetectionNormTests, ValidNormParamsGenerateSucceeds) { ObjectDetection model(kValidObjectDetectionModelPath, mean, std, kCocoLabels, nullptr); EXPECT_NO_THROW((void)model.generateFromString(kValidTestImagePath, 0.5, 0.55, - {}, "forward")); + {}, "forward", false, false)); } // ============================================================================ @@ -265,16 +269,16 @@ TEST(ObjectDetectionMethodTests, InvalidMethodNameThrows) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, 0.5, 0.55, - {}, "forward_999"), + {}, "forward_999", false, false), RnExecutorchError); } TEST(ObjectDetectionMethodTests, EmptyMethodNameThrows) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - EXPECT_THROW( - (void)model.generateFromString(kValidTestImagePath, 0.5, 0.55, {}, ""), - RnExecutorchError); + EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, 0.5, 0.55, + {}, "", false, false), + RnExecutorchError); } // ============================================================================ @@ -285,8 +289,8 @@ TEST(ObjectDetectionClassFilterTests, ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); // Only request "person" class (index 0 in COCO) - auto results = - model.generateFromString(kValidTestImagePath, 0.3, 0.55, {0}, "forward"); + auto results = model.generateFromString(kValidTestImagePath, 0.3, 0.55, {0}, + "forward", false, false); for (const auto &det : results) { EXPECT_EQ(det.label, "person"); } @@ -296,11 +300,11 @@ TEST(ObjectDetectionClassFilterTests, EmptyClassIndicesReturnsMoreOrEqualResults) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - auto allClasses = - model.generateFromString(kValidTestImagePath, 0.3, 0.55, {}, "forward"); + auto allClasses = model.generateFromString(kValidTestImagePath, 0.3, 0.55, {}, + "forward", false, false); // person (0) only - auto filtered = - model.generateFromString(kValidTestImagePath, 0.3, 0.55, {0}, "forward"); + auto filtered = model.generateFromString(kValidTestImagePath, 0.3, 0.55, {0}, + "forward", false, false); EXPECT_GE(allClasses.size(), filtered.size()); } @@ -311,10 +315,10 @@ TEST(ObjectDetectionIouTests, HigherIouThresholdReturnsSameOrMoreResults) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); // High IoU threshold = less aggressive NMS = more boxes survive - auto highIou = - model.generateFromString(kValidTestImagePath, 0.3, 0.9, {}, "forward"); + auto highIou = model.generateFromString(kValidTestImagePath, 0.3, 0.9, {}, + "forward", false, false); // Low IoU threshold = more aggressive NMS = fewer boxes survive - auto lowIou = - model.generateFromString(kValidTestImagePath, 0.3, 0.1, {}, "forward"); + auto lowIou = model.generateFromString(kValidTestImagePath, 0.3, 0.1, {}, + "forward", false, false); EXPECT_GE(highIou.size(), lowIou.size()); } diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h index 3bd3022d4a..5d88a202dd 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h +++ b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h @@ -48,4 +48,69 @@ std::vector nonMaxSuppression(std::vector items, double iouThreshold) { return result; } +// Weighted (blending) NMS, used by BlazeFace-style detectors. Overlapping +// anchors of the same class are score-weighted-averaged into a single box +// instead of greedily pruned to the top scorer. The output box position is +// `sum(box_i * score_i) / sum(score_i)` (paper § 3.2). The output score is +// the *max* of the cluster, not the mean: with a low pre-NMS threshold the +// mean drifts around the cluster floor and makes the detection flicker +// in/out as low-confidence anchors enter/leave the cluster between frames. +template +std::vector weightedNonMaxSuppression(std::vector items, + double iouThreshold) { + if (items.empty()) { + return {}; + } + + std::ranges::sort(items, + [](const T &a, const T &b) { return a.score > b.score; }); + + std::vector result; + std::vector consumed(items.size(), false); + + for (size_t i = 0; i < items.size(); ++i) { + if (consumed[i]) { + continue; + } + consumed[i] = true; + + float totalScore = items[i].score; + float wx1 = items[i].bbox.p1.x * items[i].score; + float wy1 = items[i].bbox.p1.y * items[i].score; + float wx2 = items[i].bbox.p2.x * items[i].score; + float wy2 = items[i].bbox.p2.y * items[i].score; + + for (size_t j = i + 1; j < items.size(); ++j) { + if (consumed[j]) { + continue; + } + + if constexpr (requires(T t) { t.classIndex; }) { + if (items[i].classIndex != items[j].classIndex) { + continue; + } + } + + float iou = computeIoU(items[i].bbox, items[j].bbox); + if (iou > iouThreshold) { + consumed[j] = true; + totalScore += items[j].score; + wx1 += items[j].bbox.p1.x * items[j].score; + wy1 += items[j].bbox.p1.y * items[j].score; + wx2 += items[j].bbox.p2.x * items[j].score; + wy2 += items[j].bbox.p2.y * items[j].score; + } + } + + T blended = items[i]; + if (totalScore > 0.0f) { + blended.bbox.p1 = {wx1 / totalScore, wy1 / totalScore}; + blended.bbox.p2 = {wx2 / totalScore, wy2 / totalScore}; + } + result.push_back(blended); + } + + return result; +} + } // namespace rnexecutorch::utils::computer_vision diff --git a/packages/react-native-executorch/src/constants/commonVision.ts b/packages/react-native-executorch/src/constants/commonVision.ts index 6221d5701e..c70efc2f09 100644 --- a/packages/react-native-executorch/src/constants/commonVision.ts +++ b/packages/react-native-executorch/src/constants/commonVision.ts @@ -211,3 +211,14 @@ export enum CocoLabelYolo { export enum FastSAMLabel { OBJECT = 0, } + +/** + * Class label for BlazeFace face detection. + * + * BlazeFace is a single-class face detector. The exported model emits a flat + * class tensor of zeros for every anchor. + * @category Types + */ +export enum BlazeFaceLabel { + FACE = 0, +} diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts index 7274209df6..34689f3430 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts @@ -15,6 +15,7 @@ import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; import { RnExecutorchError } from '../../errors/errorUtils'; import { buildLabelArray } from '../../utils/labelUtils'; import { + BlazeFaceLabel, CocoLabel, CocoLabelYolo, IMAGENET1K_MEAN, @@ -26,6 +27,8 @@ import { VisionLabeledModule, } from './VisionLabeledModule'; +const BLAZEFACE_NORM: readonly [number, number, number] = [0.5, 0.5, 0.5]; + const YOLO_DETECTION_CONFIG = { labelMap: CocoLabelYolo, preprocessorConfig: undefined, @@ -57,6 +60,16 @@ const ModelConfigs = { 'yolo26m': YOLO_DETECTION_CONFIG, 'yolo26l': YOLO_DETECTION_CONFIG, 'yolo26x': YOLO_DETECTION_CONFIG, + 'blazeface': { + labelMap: BlazeFaceLabel, + preprocessorConfig: { normMean: BLAZEFACE_NORM, normStd: BLAZEFACE_NORM }, + availableInputSizes: undefined, + defaultInputSize: undefined, + defaultDetectionThreshold: 0.5, + defaultIouThreshold: 0.3, + defaultUseWeightedNms: true, + defaultUseLetterbox: true, + }, } as const satisfies Record< ObjectDetectionModelName, ObjectDetectionConfig @@ -173,6 +186,9 @@ export class ObjectDetectionModule< const defaultIouThreshold = this.modelConfig.defaultIouThreshold ?? 0.55; const defaultInputSize = this.modelConfig.defaultInputSize; const availableInputSizes = this.modelConfig.availableInputSizes; + const defaultUseWeightedNms = + this.modelConfig.defaultUseWeightedNms ?? false; + const defaultUseLetterbox = this.modelConfig.defaultUseLetterbox ?? false; return ( frame: any, @@ -185,6 +201,8 @@ export class ObjectDetectionModule< options?.detectionThreshold ?? defaultDetectionThreshold; const iouThreshold = options?.iouThreshold ?? defaultIouThreshold; const inputSize = options?.inputSize ?? defaultInputSize; + const useWeightedNms = options?.useWeightedNms ?? defaultUseWeightedNms; + const useLetterbox = options?.useLetterbox ?? defaultUseLetterbox; if ( availableInputSizes && @@ -215,7 +233,9 @@ export class ObjectDetectionModule< detectionThreshold, iouThreshold, classIndices, - methodName + methodName, + useWeightedNms, + useLetterbox ); }; } @@ -255,6 +275,12 @@ export class ObjectDetectionModule< const iouThreshold = options?.iouThreshold ?? this.modelConfig.defaultIouThreshold ?? 0.55; const inputSize = options?.inputSize ?? this.modelConfig.defaultInputSize; + const useWeightedNms = + options?.useWeightedNms ?? + this.modelConfig.defaultUseWeightedNms ?? + false; + const useLetterbox = + options?.useLetterbox ?? this.modelConfig.defaultUseLetterbox ?? false; // Validate inputSize against availableInputSizes if ( @@ -290,14 +316,18 @@ export class ObjectDetectionModule< detectionThreshold, iouThreshold, classIndices, - methodName + methodName, + useWeightedNms, + useLetterbox ) : await this.nativeModule.generateFromPixels( input, detectionThreshold, iouThreshold, classIndices, - methodName + methodName, + useWeightedNms, + useLetterbox ); } diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts index 271676c439..ac23deca4d 100644 --- a/packages/react-native-executorch/src/types/objectDetection.ts +++ b/packages/react-native-executorch/src/types/objectDetection.ts @@ -46,6 +46,22 @@ export interface ObjectDetectionOptions { iouThreshold?: number; inputSize?: number; classesOfInterest?: (keyof L)[]; + /** + * If true, use weighted (blending) NMS instead of greedy NMS. Score-weighted + * averaging of overlapping boxes — required for BlazeFace-style models + * where individual anchors aren't trained to be accurate on their own. + * Defaults to the model preset's `defaultUseWeightedNms` (false unless the + * preset opts in, e.g. `blazeface`). + */ + useWeightedNms?: boolean; + /** + * If true, preprocess by aspect-preserving fit + center-pad (letterbox) + * instead of a plain stretch resize to the model's input size. Required + * for models trained on natural-aspect crops (e.g. BlazeFace) — a stretched + * portrait frame shifts anchor positions and predicts a too-narrow box. + * Defaults to the model preset's `defaultUseLetterbox`. + */ + useLetterbox?: boolean; } /** @@ -60,7 +76,8 @@ export type ObjectDetectionModelSources = | { modelName: 'yolo26s'; modelSource: ResourceSource } | { modelName: 'yolo26m'; modelSource: ResourceSource } | { modelName: 'yolo26l'; modelSource: ResourceSource } - | { modelName: 'yolo26x'; modelSource: ResourceSource }; + | { modelName: 'yolo26x'; modelSource: ResourceSource } + | { modelName: 'blazeface'; modelSource: ResourceSource }; /** * Union of all built-in object detection model names. @@ -84,6 +101,10 @@ export type ObjectDetectionConfig = { preprocessorConfig?: { normMean?: Triple; normStd?: Triple }; defaultDetectionThreshold?: number; defaultIouThreshold?: number; + /** Default NMS mode for this model. Overridable per-call via `useWeightedNms`. */ + defaultUseWeightedNms?: boolean; + /** Default preprocessing for this model. Overridable per-call via `useLetterbox`. */ + defaultUseLetterbox?: boolean; } & ( | { availableInputSizes: readonly number[]; From 05d54ebebb28f28ad526afbfa6f3d1f3364a29cf Mon Sep 17 00:00:00 2001 From: chmjkb Date: Thu, 21 May 2026 08:17:06 +0200 Subject: [PATCH 2/5] wip --- .../app/instance_segmentation/index.tsx | 2 +- .../app/segment_anything/index.tsx | 2 +- .../tasks/InstanceSegmentationTask.tsx | 2 +- .../BaseInstanceSegmentation.cpp | 35 +++++---- .../BaseInstanceSegmentation.h | 20 +++--- .../integration/InstanceSegmentationTest.cpp | 72 ++++++++++--------- .../InstanceSegmentationModule.ts | 38 ++++++---- .../computer_vision/ObjectDetectionModule.ts | 31 ++++---- .../src/types/common.ts | 23 ++++++ .../src/types/instanceSegmentation.ts | 29 ++++++-- .../src/types/objectDetection.ts | 41 +++++------ 11 files changed, 181 insertions(+), 114 deletions(-) diff --git a/apps/computer-vision/app/instance_segmentation/index.tsx b/apps/computer-vision/app/instance_segmentation/index.tsx index fea9698eb3..b7a2a54b4d 100644 --- a/apps/computer-vision/app/instance_segmentation/index.tsx +++ b/apps/computer-vision/app/instance_segmentation/index.tsx @@ -112,7 +112,7 @@ export default function InstanceSegmentationScreen() { const start = Date.now(); const output = await forward(imageUri, { confidenceThreshold: 0.5, - iouThreshold: 0.55, + nms: { iouThreshold: 0.55 }, maxInstances: 20, returnMaskAtOriginalResolution: true, inputSize, diff --git a/apps/computer-vision/app/segment_anything/index.tsx b/apps/computer-vision/app/segment_anything/index.tsx index ac7bbd06b5..97d507df09 100644 --- a/apps/computer-vision/app/segment_anything/index.tsx +++ b/apps/computer-vision/app/segment_anything/index.tsx @@ -241,7 +241,7 @@ export default function SegmentAnythingScreen() { const start = Date.now(); const output = await forward(imageUri, { confidenceThreshold: 0.4, - iouThreshold: 0.9, + nms: { iouThreshold: 0.9 }, maxInstances: 50, returnMaskAtOriginalResolution: true, }); diff --git a/apps/computer-vision/components/vision_camera/tasks/InstanceSegmentationTask.tsx b/apps/computer-vision/components/vision_camera/tasks/InstanceSegmentationTask.tsx index cd203eeb66..7791862d1b 100644 --- a/apps/computer-vision/components/vision_camera/tasks/InstanceSegmentationTask.tsx +++ b/apps/computer-vision/components/vision_camera/tasks/InstanceSegmentationTask.tsx @@ -133,7 +133,7 @@ export default function InstanceSegmentationTask({ const ih = frame.width > frame.height ? frame.width : frame.height; const result = instSegRof(frame, isFrontCamera, { confidenceThreshold: 0.5, - iouThreshold: 0.5, + nms: { iouThreshold: 0.5 }, maxInstances: 5, returnMaskAtOriginalResolution: false, ...(activeModel === 'instanceSegmentationYolo26n' && { diff --git a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp index 24ef7a8e22..e8eca0c54d 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp @@ -57,7 +57,8 @@ TensorPtr BaseInstanceSegmentation::buildInputTensor(const cv::Mat &image) { std::vector BaseInstanceSegmentation::runInference( const cv::Mat &image, double confidenceThreshold, double iouThreshold, int32_t maxInstances, const std::vector &classIndices, - bool returnMaskAtOriginalResolution, const std::string &methodName) { + bool returnMaskAtOriginalResolution, const std::string &methodName, + bool useWeightedNms) { std::scoped_lock lock(inference_mutex_); @@ -86,34 +87,37 @@ std::vector BaseInstanceSegmentation::runInference( auto instances = collectInstances( forwardResult.get(), originalSize, modelInputSize, confidenceThreshold, classIndices, returnMaskAtOriginalResolution); - return finalizeInstances(std::move(instances), iouThreshold, maxInstances); + return finalizeInstances(std::move(instances), iouThreshold, maxInstances, + useWeightedNms); } std::vector BaseInstanceSegmentation::generateFromString( std::string imageSource, double confidenceThreshold, double iouThreshold, int32_t maxInstances, std::vector classIndices, - bool returnMaskAtOriginalResolution, std::string methodName) { + bool returnMaskAtOriginalResolution, std::string methodName, + bool useWeightedNms) { cv::Mat imageBGR = image_processing::readImage(imageSource); cv::Mat imageRGB; cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB); return runInference(imageRGB, confidenceThreshold, iouThreshold, maxInstances, - classIndices, returnMaskAtOriginalResolution, methodName); + classIndices, returnMaskAtOriginalResolution, methodName, + useWeightedNms); } std::vector BaseInstanceSegmentation::generateFromFrame( jsi::Runtime &runtime, const jsi::Value &frameData, double confidenceThreshold, double iouThreshold, int32_t maxInstances, std::vector classIndices, bool returnMaskAtOriginalResolution, - std::string methodName) { + std::string methodName, bool useWeightedNms) { auto orient = ::rnexecutorch::utils::readFrameOrientation(runtime, frameData); cv::Mat frame = extractFromFrame(runtime, frameData); cv::Mat rotated = utils::rotateFrameForModel(frame, orient); - auto instances = - runInference(rotated, confidenceThreshold, iouThreshold, maxInstances, - classIndices, returnMaskAtOriginalResolution, methodName); + auto instances = runInference( + rotated, confidenceThreshold, iouThreshold, maxInstances, classIndices, + returnMaskAtOriginalResolution, methodName, useWeightedNms); for (auto &inst : instances) { utils::inverseRotateBbox(inst.bbox, orient, rotated.size()); // Inverse-rotate the mask to match the screen orientation @@ -131,11 +135,13 @@ std::vector BaseInstanceSegmentation::generateFromFrame( std::vector BaseInstanceSegmentation::generateFromPixels( JSTensorViewIn tensorView, double confidenceThreshold, double iouThreshold, int32_t maxInstances, std::vector classIndices, - bool returnMaskAtOriginalResolution, std::string methodName) { + bool returnMaskAtOriginalResolution, std::string methodName, + bool useWeightedNms) { cv::Mat image = extractFromPixels(tensorView); return runInference(image, confidenceThreshold, iouThreshold, maxInstances, - classIndices, returnMaskAtOriginalResolution, methodName); + classIndices, returnMaskAtOriginalResolution, methodName, + useWeightedNms); } std::tuple @@ -296,11 +302,14 @@ void BaseInstanceSegmentation::ensureMethodLoaded( std::vector BaseInstanceSegmentation::finalizeInstances( std::vector instances, double iouThreshold, - int32_t maxInstances) const { + int32_t maxInstances, bool useWeightedNms) const { if (applyNMS_) { - instances = - utils::computer_vision::nonMaxSuppression(instances, iouThreshold); + instances = useWeightedNms + ? utils::computer_vision::weightedNonMaxSuppression( + instances, iouThreshold) + : utils::computer_vision::nonMaxSuppression(instances, + iouThreshold); } if (std::cmp_greater(instances.size(), maxInstances)) { diff --git a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h index 341d0f2235..1b511d8d6e 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h @@ -28,30 +28,32 @@ class BaseInstanceSegmentation : public VisionModel { double iouThreshold, int32_t maxInstances, std::vector classIndices, bool returnMaskAtOriginalResolution, - std::string methodName); + std::string methodName, bool useWeightedNms); [[nodiscard("Registered non-void function")]] std::vector generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData, double confidenceThreshold, double iouThreshold, int32_t maxInstances, std::vector classIndices, - bool returnMaskAtOriginalResolution, - std::string methodName); + bool returnMaskAtOriginalResolution, std::string methodName, + bool useWeightedNms); [[nodiscard("Registered non-void function")]] std::vector generateFromPixels(JSTensorViewIn tensorView, double confidenceThreshold, double iouThreshold, int32_t maxInstances, std::vector classIndices, bool returnMaskAtOriginalResolution, - std::string methodName); + std::string methodName, bool useWeightedNms); protected: cv::Size modelInputSize() const override; private: - std::vector runInference( - const cv::Mat &image, double confidenceThreshold, double iouThreshold, - int32_t maxInstances, const std::vector &classIndices, - bool returnMaskAtOriginalResolution, const std::string &methodName); + std::vector + runInference(const cv::Mat &image, double confidenceThreshold, + double iouThreshold, int32_t maxInstances, + const std::vector &classIndices, + bool returnMaskAtOriginalResolution, + const std::string &methodName, bool useWeightedNms); TensorPtr buildInputTensor(const cv::Mat &image); @@ -89,7 +91,7 @@ class BaseInstanceSegmentation : public VisionModel { std::vector finalizeInstances(std::vector instances, double iouThreshold, - int32_t maxInstances) const; + int32_t maxInstances, bool useWeightedNms) const; cv::Mat processMaskFromLogits( const cv::Mat &logitsMat, const utils::computer_vision::BBox &bboxModel, diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/InstanceSegmentationTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/InstanceSegmentationTest.cpp index ff003eb62d..255f782a0a 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/InstanceSegmentationTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/InstanceSegmentationTest.cpp @@ -33,7 +33,7 @@ template <> struct ModelTraits { static void callGenerate(ModelType &model) { (void)model.generateFromString(kValidTestImagePath, 0.5, 0.5, 100, {}, true, - kMethodName); + kMethodName, false); } }; } // namespace model_tests @@ -51,16 +51,17 @@ TEST(InstanceSegGenerateTests, InvalidImagePathThrows) { BaseInstanceSegmentation model(kValidInstanceSegModelPath, {}, {}, true, nullptr); EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg", 0.5, 0.5, - 100, {}, true, kMethodName), + 100, {}, true, kMethodName, + false), RnExecutorchError); } TEST(InstanceSegGenerateTests, EmptyImagePathThrows) { BaseInstanceSegmentation model(kValidInstanceSegModelPath, {}, {}, true, nullptr); - EXPECT_THROW( - (void)model.generateFromString("", 0.5, 0.5, 100, {}, true, kMethodName), - RnExecutorchError); + EXPECT_THROW((void)model.generateFromString("", 0.5, 0.5, 100, {}, true, + kMethodName, false), + RnExecutorchError); } TEST(InstanceSegGenerateTests, EmptyMethodNameThrows) { @@ -75,7 +76,8 @@ TEST(InstanceSegGenerateTests, NegativeConfidenceThrows) { BaseInstanceSegmentation model(kValidInstanceSegModelPath, {}, {}, true, nullptr); EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, -0.1, 0.5, - 100, {}, true, kMethodName), + 100, {}, true, kMethodName, + false), RnExecutorchError); } @@ -83,7 +85,8 @@ TEST(InstanceSegGenerateTests, ConfidenceAboveOneThrows) { BaseInstanceSegmentation model(kValidInstanceSegModelPath, {}, {}, true, nullptr); EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, 1.1, 0.5, - 100, {}, true, kMethodName), + 100, {}, true, kMethodName, + false), RnExecutorchError); } @@ -91,7 +94,8 @@ TEST(InstanceSegGenerateTests, NegativeIouThresholdThrows) { BaseInstanceSegmentation model(kValidInstanceSegModelPath, {}, {}, true, nullptr); EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, 0.5, -0.1, - 100, {}, true, kMethodName), + 100, {}, true, kMethodName, + false), RnExecutorchError); } @@ -99,7 +103,8 @@ TEST(InstanceSegGenerateTests, IouThresholdAboveOneThrows) { BaseInstanceSegmentation model(kValidInstanceSegModelPath, {}, {}, true, nullptr); EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, 0.5, 1.1, - 100, {}, true, kMethodName), + 100, {}, true, kMethodName, + false), RnExecutorchError); } @@ -107,7 +112,7 @@ TEST(InstanceSegGenerateTests, ValidImageReturnsResults) { BaseInstanceSegmentation model(kValidInstanceSegModelPath, {}, {}, true, nullptr); auto results = model.generateFromString(kValidTestImagePath, 0.3, 0.5, 100, - {}, true, kMethodName); + {}, true, kMethodName, false); EXPECT_FALSE(results.empty()); } @@ -115,9 +120,9 @@ TEST(InstanceSegGenerateTests, HighThresholdReturnsFewerResults) { BaseInstanceSegmentation model(kValidInstanceSegModelPath, {}, {}, true, nullptr); auto lowResults = model.generateFromString(kValidTestImagePath, 0.1, 0.5, 100, - {}, true, kMethodName); - auto highResults = model.generateFromString(kValidTestImagePath, 0.9, 0.5, - 100, {}, true, kMethodName); + {}, true, kMethodName, false); + auto highResults = model.generateFromString( + kValidTestImagePath, 0.9, 0.5, 100, {}, true, kMethodName, false); EXPECT_GE(lowResults.size(), highResults.size()); } @@ -125,7 +130,7 @@ TEST(InstanceSegGenerateTests, MaxInstancesLimitsResults) { BaseInstanceSegmentation model(kValidInstanceSegModelPath, {}, {}, true, nullptr); auto results = model.generateFromString(kValidTestImagePath, 0.1, 0.5, 2, {}, - true, kMethodName); + true, kMethodName, false); EXPECT_LE(results.size(), 2u); } @@ -136,7 +141,7 @@ TEST(InstanceSegResultTests, InstancesHaveValidBoundingBoxes) { BaseInstanceSegmentation model(kValidInstanceSegModelPath, {}, {}, true, nullptr); auto results = model.generateFromString(kValidTestImagePath, 0.3, 0.5, 100, - {}, true, kMethodName); + {}, true, kMethodName, false); for (const auto &inst : results) { EXPECT_LE(inst.bbox.p1.x, inst.bbox.p2.x); @@ -150,7 +155,7 @@ TEST(InstanceSegResultTests, InstancesHaveValidScores) { BaseInstanceSegmentation model(kValidInstanceSegModelPath, {}, {}, true, nullptr); auto results = model.generateFromString(kValidTestImagePath, 0.3, 0.5, 100, - {}, true, kMethodName); + {}, true, kMethodName, false); for (const auto &inst : results) { EXPECT_GE(inst.score, 0.0f); @@ -162,7 +167,7 @@ TEST(InstanceSegResultTests, InstancesHaveValidMasks) { BaseInstanceSegmentation model(kValidInstanceSegModelPath, {}, {}, true, nullptr); auto results = model.generateFromString(kValidTestImagePath, 0.3, 0.5, 100, - {}, true, kMethodName); + {}, true, kMethodName, false); for (const auto &inst : results) { EXPECT_GT(inst.maskWidth, 0); @@ -181,7 +186,7 @@ TEST(InstanceSegResultTests, InstancesHaveValidClassIndices) { BaseInstanceSegmentation model(kValidInstanceSegModelPath, {}, {}, true, nullptr); auto results = model.generateFromString(kValidTestImagePath, 0.3, 0.5, 100, - {}, true, kMethodName); + {}, true, kMethodName, false); for (const auto &inst : results) { EXPECT_GE(inst.classIndex, 0); @@ -197,8 +202,9 @@ TEST(InstanceSegFilterTests, ClassFilterReturnsOnlyMatchingClasses) { nullptr); // Filter to class index 0 (PERSON in CocoLabelYolo) std::vector classIndices = {0}; - auto results = model.generateFromString(kValidTestImagePath, 0.3, 0.5, 100, - classIndices, true, kMethodName); + auto results = + model.generateFromString(kValidTestImagePath, 0.3, 0.5, 100, classIndices, + true, kMethodName, false); for (const auto &inst : results) { EXPECT_EQ(inst.classIndex, 0); @@ -209,11 +215,11 @@ TEST(InstanceSegFilterTests, EmptyFilterReturnsAllClasses) { BaseInstanceSegmentation model(kValidInstanceSegModelPath, {}, {}, true, nullptr); auto allResults = model.generateFromString(kValidTestImagePath, 0.3, 0.5, 100, - {}, true, kMethodName); + {}, true, kMethodName, false); EXPECT_FALSE(allResults.empty()); auto noResults = model.generateFromString(kValidTestImagePath, 0.3, 0.5, 100, - {50}, true, kMethodName); + {50}, true, kMethodName, false); EXPECT_TRUE(noResults.empty()); } @@ -224,9 +230,9 @@ TEST(InstanceSegMaskTests, LowResMaskIsSmallerThanOriginal) { BaseInstanceSegmentation model(kValidInstanceSegModelPath, {}, {}, true, nullptr); auto hiRes = model.generateFromString(kValidTestImagePath, 0.3, 0.5, 100, {}, - true, kMethodName); + true, kMethodName, false); auto loRes = model.generateFromString(kValidTestImagePath, 0.3, 0.5, 100, {}, - false, kMethodName); + false, kMethodName, false); if (!hiRes.empty() && !loRes.empty()) { EXPECT_LE(loRes[0].mask->size(), hiRes[0].mask->size()); @@ -243,9 +249,9 @@ TEST(InstanceSegNMSTests, NMSEnabledReturnsFewerOrEqualResults) { false, nullptr); auto nmsResults = modelWithNMS.generateFromString( - kValidTestImagePath, 0.3, 0.5, 100, {}, true, kMethodName); + kValidTestImagePath, 0.3, 0.5, 100, {}, true, kMethodName, false); auto noNmsResults = modelWithoutNMS.generateFromString( - kValidTestImagePath, 0.3, 0.5, 100, {}, true, kMethodName); + kValidTestImagePath, 0.3, 0.5, 100, {}, true, kMethodName, false); EXPECT_LE(nmsResults.size(), noNmsResults.size()); } @@ -262,7 +268,7 @@ TEST(InstanceSegPixelTests, ValidPixelDataReturnsResults) { {height, width, channels}, executorch::aten::ScalarType::Byte}; auto results = model.generateFromPixels(tensorView, 0.3, 0.5, 100, {}, true, - kMethodName); + kMethodName, false); EXPECT_GE(results.size(), 0u); } @@ -275,7 +281,7 @@ TEST(InstanceSegPixelTests, NegativeConfidenceThrows) { {height, width, channels}, executorch::aten::ScalarType::Byte}; EXPECT_THROW((void)model.generateFromPixels(tensorView, -0.1, 0.5, 100, {}, - true, kMethodName), + true, kMethodName, false), RnExecutorchError); } @@ -288,7 +294,7 @@ TEST(InstanceSegPixelTests, ConfidenceAboveOneThrows) { {height, width, channels}, executorch::aten::ScalarType::Byte}; EXPECT_THROW((void)model.generateFromPixels(tensorView, 1.1, 0.5, 100, {}, - true, kMethodName), + true, kMethodName, false), RnExecutorchError); } @@ -307,14 +313,14 @@ TEST(InstanceSegInheritedTests, GetInputShapeWorks) { TEST(InstanceSegInheritedTests, GetAllInputShapesWorks) { BaseInstanceSegmentation model(kValidInstanceSegModelPath, {}, {}, true, nullptr); - auto shapes = model.getAllInputShapes(kMethodName); + auto shapes = model.getAllInputShapes(kMethodName, false); EXPECT_FALSE(shapes.empty()); } TEST(InstanceSegInheritedTests, GetMethodMetaWorks) { BaseInstanceSegmentation model(kValidInstanceSegModelPath, {}, {}, true, nullptr); - auto result = model.getMethodMeta(kMethodName); + auto result = model.getMethodMeta(kMethodName, false); EXPECT_TRUE(result.ok()); } @@ -333,6 +339,6 @@ TEST(InstanceSegNormTests, ValidNormParamsGenerateSucceeds) { const std::vector std = {0.229f, 0.224f, 0.225f}; BaseInstanceSegmentation model(kValidInstanceSegModelPath, mean, std, true, nullptr); - EXPECT_NO_THROW((void)model.generateFromString(kValidTestImagePath, 0.5, 0.5, - 100, {}, true, kMethodName)); + EXPECT_NO_THROW((void)model.generateFromString( + kValidTestImagePath, 0.5, 0.5, 100, {}, true, kMethodName, false)); } diff --git a/packages/react-native-executorch/src/modules/computer_vision/InstanceSegmentationModule.ts b/packages/react-native-executorch/src/modules/computer_vision/InstanceSegmentationModule.ts index e7e96f2deb..49c8bac13a 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/InstanceSegmentationModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/InstanceSegmentationModule.ts @@ -34,7 +34,7 @@ const YOLO_SEG_CONFIG = { availableInputSizes: [384, 512, 640] as const, defaultInputSize: 384, defaultConfidenceThreshold: 0.5, - defaultIouThreshold: 0.5, + defaultNms: { iouThreshold: 0.5 }, postprocessorConfig: { applyNMS: false, }, @@ -46,7 +46,7 @@ const FASTSAM_CONFIG = { availableInputSizes: undefined, defaultInputSize: undefined, defaultConfidenceThreshold: 0.5, - defaultIouThreshold: 0.9, + defaultNms: { iouThreshold: 0.9 }, postprocessorConfig: { applyNMS: true, }, @@ -58,7 +58,7 @@ const RF_DETR_NANO_SEG_CONFIG = { availableInputSizes: undefined, defaultInputSize: undefined, //RFDetr exposes only one method named forward defaultConfidenceThreshold: 0.5, - defaultIouThreshold: 0.5, + defaultNms: { iouThreshold: 0.5 }, postprocessorConfig: { applyNMS: true, }, @@ -143,7 +143,7 @@ type ResolveLabels = * * const results = await segmentation.forward('path/to/image.jpg', { * confidenceThreshold: 0.5, - * iouThreshold: 0.45, + * nms: { iouThreshold: 0.45 }, * maxInstances: 20, * inputSize: 640, * }); @@ -237,7 +237,7 @@ export class InstanceSegmentationModule< * availableInputSizes: [640], * defaultInputSize: 640, * defaultConfidenceThreshold: 0.5, - * defaultIouThreshold: 0.45, + * defaultNms: { iouThreshold: 0.45 }, * postprocessorConfig: { applyNMS: true }, * }, * ); @@ -323,7 +323,9 @@ export class InstanceSegmentationModule< const labelEnumOffset = this.labelEnumOffset; const defaultConfidenceThreshold = this.modelConfig.defaultConfidenceThreshold ?? 0.5; - const defaultIouThreshold = this.modelConfig.defaultIouThreshold ?? 0.5; + const defaultNmsMode = this.modelConfig.defaultNms?.mode ?? 'greedy'; + const defaultIouThreshold = + this.modelConfig.defaultNms?.iouThreshold ?? 0.5; const defaultInputSize = this.modelConfig.defaultInputSize; return ( @@ -335,7 +337,9 @@ export class InstanceSegmentationModule< const confidenceThreshold = options?.confidenceThreshold ?? defaultConfidenceThreshold; - const iouThreshold = options?.iouThreshold ?? defaultIouThreshold; + const nmsMode = options?.nms?.mode ?? defaultNmsMode; + const iouThreshold = options?.nms?.iouThreshold ?? defaultIouThreshold; + const useWeightedNms = nmsMode === 'weighted'; const maxInstances = options?.maxInstances ?? 100; const returnMaskAtOriginalResolution = options?.returnMaskAtOriginalResolution ?? true; @@ -360,7 +364,8 @@ export class InstanceSegmentationModule< maxInstances, classIndices, returnMaskAtOriginalResolution, - methodName + methodName, + useWeightedNms ); return nativeResults.map((inst: any) => ({ bbox: inst.bbox, @@ -381,14 +386,14 @@ export class InstanceSegmentationModule< * 1. **String path/URI**: File path, URL, or Base64-encoded string * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) * @param input - Image source (string path or PixelData object) - * @param options - Optional configuration for the segmentation process. Includes `confidenceThreshold`, `iouThreshold`, `maxInstances`, `classesOfInterest`, `returnMaskAtOriginalResolution`, and `inputSize`. + * @param options - Optional configuration for the segmentation process. Includes `confidenceThreshold`, `nms`, `maxInstances`, `classesOfInterest`, `returnMaskAtOriginalResolution`, and `inputSize`. * @returns A Promise resolving to an array of {@link SegmentedInstance} objects with `bbox`, `mask`, `maskWidth`, `maskHeight`, `label`, `score`. * @throws {RnExecutorchError} If the model is not loaded or if an invalid `inputSize` is provided. * @example * ```ts * const results = await segmentation.forward('path/to/image.jpg', { * confidenceThreshold: 0.6, - * iouThreshold: 0.5, + * nms: { iouThreshold: 0.5 }, * maxInstances: 10, * inputSize: 640, * classesOfInterest: ['PERSON', 'CAR'], @@ -415,8 +420,13 @@ export class InstanceSegmentationModule< options?.confidenceThreshold ?? this.modelConfig.defaultConfidenceThreshold ?? 0.5; + const nmsMode = + options?.nms?.mode ?? this.modelConfig.defaultNms?.mode ?? 'greedy'; const iouThreshold = - options?.iouThreshold ?? this.modelConfig.defaultIouThreshold ?? 0.5; + options?.nms?.iouThreshold ?? + this.modelConfig.defaultNms?.iouThreshold ?? + 0.5; + const useWeightedNms = nmsMode === 'weighted'; const maxInstances = options?.maxInstances ?? 100; const returnMaskAtOriginalResolution = options?.returnMaskAtOriginalResolution ?? true; @@ -457,7 +467,8 @@ export class InstanceSegmentationModule< maxInstances, classIndices, returnMaskAtOriginalResolution, - methodName + methodName, + useWeightedNms ) : await this.nativeModule.generateFromPixels( input, @@ -466,7 +477,8 @@ export class InstanceSegmentationModule< maxInstances, classIndices, returnMaskAtOriginalResolution, - methodName + methodName, + useWeightedNms ); return nativeResult.map((inst) => ({ diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts index 34689f3430..74e9f3c328 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts @@ -35,7 +35,7 @@ const YOLO_DETECTION_CONFIG = { availableInputSizes: [384, 512, 640] as const, defaultInputSize: 384, defaultDetectionThreshold: 0.5, - defaultIouThreshold: 0.5, + defaultNms: { iouThreshold: 0.5 }, } satisfies ObjectDetectionConfig; const ModelConfigs = { @@ -45,7 +45,7 @@ const ModelConfigs = { availableInputSizes: undefined, defaultInputSize: undefined, defaultDetectionThreshold: 0.7, - defaultIouThreshold: 0.55, + defaultNms: { iouThreshold: 0.55 }, }, 'rf-detr-nano': { labelMap: CocoLabel, @@ -53,7 +53,7 @@ const ModelConfigs = { availableInputSizes: undefined, defaultInputSize: undefined, defaultDetectionThreshold: 0.7, - defaultIouThreshold: 0.55, + defaultNms: { iouThreshold: 0.55 }, }, 'yolo26n': YOLO_DETECTION_CONFIG, 'yolo26s': YOLO_DETECTION_CONFIG, @@ -66,8 +66,7 @@ const ModelConfigs = { availableInputSizes: undefined, defaultInputSize: undefined, defaultDetectionThreshold: 0.5, - defaultIouThreshold: 0.3, - defaultUseWeightedNms: true, + defaultNms: { mode: 'weighted', iouThreshold: 0.3 }, defaultUseLetterbox: true, }, } as const satisfies Record< @@ -183,11 +182,11 @@ export class ObjectDetectionModule< const defaultDetectionThreshold = this.modelConfig.defaultDetectionThreshold ?? 0.7; - const defaultIouThreshold = this.modelConfig.defaultIouThreshold ?? 0.55; + const defaultNmsMode = this.modelConfig.defaultNms?.mode ?? 'greedy'; + const defaultIouThreshold = + this.modelConfig.defaultNms?.iouThreshold ?? 0.55; const defaultInputSize = this.modelConfig.defaultInputSize; const availableInputSizes = this.modelConfig.availableInputSizes; - const defaultUseWeightedNms = - this.modelConfig.defaultUseWeightedNms ?? false; const defaultUseLetterbox = this.modelConfig.defaultUseLetterbox ?? false; return ( @@ -199,9 +198,10 @@ export class ObjectDetectionModule< const detectionThreshold = options?.detectionThreshold ?? defaultDetectionThreshold; - const iouThreshold = options?.iouThreshold ?? defaultIouThreshold; + const nmsMode = options?.nms?.mode ?? defaultNmsMode; + const iouThreshold = options?.nms?.iouThreshold ?? defaultIouThreshold; const inputSize = options?.inputSize ?? defaultInputSize; - const useWeightedNms = options?.useWeightedNms ?? defaultUseWeightedNms; + const useWeightedNms = nmsMode === 'weighted'; const useLetterbox = options?.useLetterbox ?? defaultUseLetterbox; if ( @@ -272,13 +272,14 @@ export class ObjectDetectionModule< options?.detectionThreshold ?? this.modelConfig.defaultDetectionThreshold ?? 0.7; + const nmsMode = + options?.nms?.mode ?? this.modelConfig.defaultNms?.mode ?? 'greedy'; const iouThreshold = - options?.iouThreshold ?? this.modelConfig.defaultIouThreshold ?? 0.55; + options?.nms?.iouThreshold ?? + this.modelConfig.defaultNms?.iouThreshold ?? + 0.55; const inputSize = options?.inputSize ?? this.modelConfig.defaultInputSize; - const useWeightedNms = - options?.useWeightedNms ?? - this.modelConfig.defaultUseWeightedNms ?? - false; + const useWeightedNms = nmsMode === 'weighted'; const useLetterbox = options?.useLetterbox ?? this.modelConfig.defaultUseLetterbox ?? false; diff --git a/packages/react-native-executorch/src/types/common.ts b/packages/react-native-executorch/src/types/common.ts index e8afa8ff4d..d787a29bc6 100644 --- a/packages/react-native-executorch/src/types/common.ts +++ b/packages/react-native-executorch/src/types/common.ts @@ -145,6 +145,29 @@ export type LabelEnum = Readonly>; * @category Types */ export type Triple = readonly [T, T, T]; + +/** + * Non-maximum suppression configuration shared by computer-vision models that + * deduplicate overlapping detections (object detection, instance segmentation). + * + * - `'greedy'` (default): standard NMS — keep the highest-scoring box, drop + * anything overlapping it above {@link iouThreshold}. Suits detectors like + * YOLO / SSDLite whose individual anchors are trained to be accurate + * standalone. + * - `'weighted'`: score-weighted blending — average overlapping boxes by + * their scores instead of pruning to a single winner. Required for + * detectors like BlazeFace whose anchors were trained as an ensemble. + * + * Both fields are optional in per-call options and the model preset's + * `defaultNms`; missing fields fall back through the preset default to the + * runtime default. + * @category Types + */ +export interface NmsConfig { + mode?: 'greedy' | 'weighted'; + /** IoU threshold (0-1). Boxes overlapping above this are suppressed (greedy) or merged (weighted). */ + iouThreshold?: number; +} /** * Represents raw pixel data in RGB format for vision models. * diff --git a/packages/react-native-executorch/src/types/instanceSegmentation.ts b/packages/react-native-executorch/src/types/instanceSegmentation.ts index ff7f4ae314..8ca81204df 100644 --- a/packages/react-native-executorch/src/types/instanceSegmentation.ts +++ b/packages/react-native-executorch/src/types/instanceSegmentation.ts @@ -1,5 +1,12 @@ import { RnExecutorchError } from '../errors/errorUtils'; -import { LabelEnum, ResourceSource, Triple, Frame, PixelData } from './common'; +import { + LabelEnum, + NmsConfig, + ResourceSource, + Triple, + Frame, + PixelData, +} from './common'; import { Bbox } from './objectDetection'; /** @@ -48,10 +55,12 @@ export interface InstanceSegmentationOptions { */ confidenceThreshold?: number; /** - * IoU threshold for non-maximum suppression. - * Defaults to model's defaultIouThreshold (typically 0.5). + * Override NMS mode and/or `iouThreshold` for this call. Partial — missing + * fields fall back to the model preset's `defaultNms`. Ignored for models + * whose preset disables external NMS (`postprocessorConfig.applyNMS: false`, + * e.g. YOLO-seg which does NMS internally). */ - iouThreshold?: number; + nms?: NmsConfig; /** * Maximum number of instances to return. Default: 100 */ @@ -89,9 +98,19 @@ export type InstanceSegmentationConfig = { normMean?: Triple; normStd?: Triple; }; + /** + * `applyNMS: false` for models that produce already-deduplicated detections + * (e.g. YOLO-seg, where NMS runs inside the model graph). Property of the + * model architecture — not user-tuneable per call. + */ postprocessorConfig?: { applyNMS?: boolean }; defaultConfidenceThreshold?: number; - defaultIouThreshold?: number; + /** + * Default NMS configuration. Overridable per-call via + * {@link InstanceSegmentationOptions.nms}. Has no effect when + * `postprocessorConfig.applyNMS` is `false`. + */ + defaultNms?: NmsConfig; } & ( | { availableInputSizes: readonly number[]; diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts index ac23deca4d..4b864c78c7 100644 --- a/packages/react-native-executorch/src/types/objectDetection.ts +++ b/packages/react-native-executorch/src/types/objectDetection.ts @@ -1,5 +1,12 @@ import { RnExecutorchError } from '../errors/errorUtils'; -import { LabelEnum, Triple, ResourceSource, PixelData, Frame } from './common'; +import { + LabelEnum, + NmsConfig, + Triple, + ResourceSource, + PixelData, + Frame, +} from './common'; import { CocoLabel } from '../constants/commonVision'; export { CocoLabel }; @@ -36,24 +43,16 @@ export interface Detection { * Options for configuring object detection inference. * @category Types * @typeParam L - The label enum type for filtering classes of interest. - * @property {number} [detectionThreshold] - Minimum confidence score for detections (0-1). Defaults to model-specific value. - * @property {number} [iouThreshold] - IoU threshold for non-maximum suppression (0-1). Defaults to model-specific value. - * @property {number} [inputSize] - Input size for multi-method models (e.g., 384, 512, 640 for YOLO). Required for YOLO models if not using default. - * @property {(keyof L)[]} [classesOfInterest] - Optional array of class labels to filter detections. Only detections matching these classes will be returned. */ export interface ObjectDetectionOptions { + /** Minimum confidence score for detections (0-1). Defaults to the model preset's value. */ detectionThreshold?: number; - iouThreshold?: number; + /** Override NMS mode and/or `iouThreshold` for this call. Partial — missing fields fall back to the model preset's `defaultNms`. */ + nms?: NmsConfig; + /** Input size for multi-method models (e.g. 384/512/640 for YOLO). */ inputSize?: number; + /** Restrict output to these class labels. */ classesOfInterest?: (keyof L)[]; - /** - * If true, use weighted (blending) NMS instead of greedy NMS. Score-weighted - * averaging of overlapping boxes — required for BlazeFace-style models - * where individual anchors aren't trained to be accurate on their own. - * Defaults to the model preset's `defaultUseWeightedNms` (false unless the - * preset opts in, e.g. `blazeface`). - */ - useWeightedNms?: boolean; /** * If true, preprocess by aspect-preserving fit + center-pad (letterbox) * instead of a plain stretch resize to the model's input size. Required @@ -89,20 +88,16 @@ export type ObjectDetectionModelName = ObjectDetectionModelSources['modelName']; * Configuration for a custom object detection model. * @category Types * @typeParam T - The label enum type for the model. - * @property {T} labelMap - The label mapping for the model. - * @property {object} [preprocessorConfig] - Optional preprocessing configuration with normalization parameters. - * @property {number} [defaultDetectionThreshold] - Default detection confidence threshold (0-1). - * @property {number} [defaultIouThreshold] - Default IoU threshold for non-maximum suppression (0-1). - * @property {readonly number[]} [availableInputSizes] - For multi-method models, the available input sizes (e.g., [384, 512, 640]). - * @property {number} [defaultInputSize] - For multi-method models, the default input size to use. */ export type ObjectDetectionConfig = { + /** The label mapping for the model. */ labelMap: T; + /** Optional input normalisation: `(pixel - normMean) / normStd`. */ preprocessorConfig?: { normMean?: Triple; normStd?: Triple }; + /** Default detection confidence threshold (0-1). */ defaultDetectionThreshold?: number; - defaultIouThreshold?: number; - /** Default NMS mode for this model. Overridable per-call via `useWeightedNms`. */ - defaultUseWeightedNms?: boolean; + /** Default NMS configuration. Overridable per-call via {@link ObjectDetectionOptions.nms}. */ + defaultNms?: NmsConfig; /** Default preprocessing for this model. Overridable per-call via `useLetterbox`. */ defaultUseLetterbox?: boolean; } & ( From b3fcb856e934709f513003dc4d1c510229789264 Mon Sep 17 00:00:00 2001 From: chmjkb Date: Tue, 26 May 2026 10:39:29 +0200 Subject: [PATCH 3/5] refactor: remove nmsConfig, avoid making breaking changes --- .../app/instance_segmentation/index.tsx | 2 +- .../app/segment_anything/index.tsx | 2 +- .../tasks/InstanceSegmentationTask.tsx | 2 +- .../InstanceSegmentationModule.ts | 31 ++++++--------- .../computer_vision/ObjectDetectionModule.ts | 34 +++++++--------- .../src/types/common.ts | 22 ----------- .../src/types/instanceSegmentation.ts | 31 +++++++-------- .../src/types/objectDetection.ts | 39 ++++++++----------- 8 files changed, 60 insertions(+), 103 deletions(-) diff --git a/apps/computer-vision/app/instance_segmentation/index.tsx b/apps/computer-vision/app/instance_segmentation/index.tsx index b7a2a54b4d..fea9698eb3 100644 --- a/apps/computer-vision/app/instance_segmentation/index.tsx +++ b/apps/computer-vision/app/instance_segmentation/index.tsx @@ -112,7 +112,7 @@ export default function InstanceSegmentationScreen() { const start = Date.now(); const output = await forward(imageUri, { confidenceThreshold: 0.5, - nms: { iouThreshold: 0.55 }, + iouThreshold: 0.55, maxInstances: 20, returnMaskAtOriginalResolution: true, inputSize, diff --git a/apps/computer-vision/app/segment_anything/index.tsx b/apps/computer-vision/app/segment_anything/index.tsx index 97d507df09..ac7bbd06b5 100644 --- a/apps/computer-vision/app/segment_anything/index.tsx +++ b/apps/computer-vision/app/segment_anything/index.tsx @@ -241,7 +241,7 @@ export default function SegmentAnythingScreen() { const start = Date.now(); const output = await forward(imageUri, { confidenceThreshold: 0.4, - nms: { iouThreshold: 0.9 }, + iouThreshold: 0.9, maxInstances: 50, returnMaskAtOriginalResolution: true, }); diff --git a/apps/computer-vision/components/vision_camera/tasks/InstanceSegmentationTask.tsx b/apps/computer-vision/components/vision_camera/tasks/InstanceSegmentationTask.tsx index 7791862d1b..cd203eeb66 100644 --- a/apps/computer-vision/components/vision_camera/tasks/InstanceSegmentationTask.tsx +++ b/apps/computer-vision/components/vision_camera/tasks/InstanceSegmentationTask.tsx @@ -133,7 +133,7 @@ export default function InstanceSegmentationTask({ const ih = frame.width > frame.height ? frame.width : frame.height; const result = instSegRof(frame, isFrontCamera, { confidenceThreshold: 0.5, - nms: { iouThreshold: 0.5 }, + iouThreshold: 0.5, maxInstances: 5, returnMaskAtOriginalResolution: false, ...(activeModel === 'instanceSegmentationYolo26n' && { diff --git a/packages/react-native-executorch/src/modules/computer_vision/InstanceSegmentationModule.ts b/packages/react-native-executorch/src/modules/computer_vision/InstanceSegmentationModule.ts index 49c8bac13a..3091e85ab5 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/InstanceSegmentationModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/InstanceSegmentationModule.ts @@ -34,7 +34,7 @@ const YOLO_SEG_CONFIG = { availableInputSizes: [384, 512, 640] as const, defaultInputSize: 384, defaultConfidenceThreshold: 0.5, - defaultNms: { iouThreshold: 0.5 }, + defaultIouThreshold: 0.5, postprocessorConfig: { applyNMS: false, }, @@ -46,7 +46,7 @@ const FASTSAM_CONFIG = { availableInputSizes: undefined, defaultInputSize: undefined, defaultConfidenceThreshold: 0.5, - defaultNms: { iouThreshold: 0.9 }, + defaultIouThreshold: 0.9, postprocessorConfig: { applyNMS: true, }, @@ -58,7 +58,7 @@ const RF_DETR_NANO_SEG_CONFIG = { availableInputSizes: undefined, defaultInputSize: undefined, //RFDetr exposes only one method named forward defaultConfidenceThreshold: 0.5, - defaultNms: { iouThreshold: 0.5 }, + defaultIouThreshold: 0.5, postprocessorConfig: { applyNMS: true, }, @@ -143,7 +143,7 @@ type ResolveLabels = * * const results = await segmentation.forward('path/to/image.jpg', { * confidenceThreshold: 0.5, - * nms: { iouThreshold: 0.45 }, + * iouThreshold: 0.45, * maxInstances: 20, * inputSize: 640, * }); @@ -237,7 +237,7 @@ export class InstanceSegmentationModule< * availableInputSizes: [640], * defaultInputSize: 640, * defaultConfidenceThreshold: 0.5, - * defaultNms: { iouThreshold: 0.45 }, + * defaultIouThreshold: 0.45, * postprocessorConfig: { applyNMS: true }, * }, * ); @@ -323,10 +323,9 @@ export class InstanceSegmentationModule< const labelEnumOffset = this.labelEnumOffset; const defaultConfidenceThreshold = this.modelConfig.defaultConfidenceThreshold ?? 0.5; - const defaultNmsMode = this.modelConfig.defaultNms?.mode ?? 'greedy'; - const defaultIouThreshold = - this.modelConfig.defaultNms?.iouThreshold ?? 0.5; + const defaultIouThreshold = this.modelConfig.defaultIouThreshold ?? 0.5; const defaultInputSize = this.modelConfig.defaultInputSize; + const useWeightedNms = this.modelConfig.nmsMode === 'weighted'; return ( frame: Frame, @@ -337,9 +336,7 @@ export class InstanceSegmentationModule< const confidenceThreshold = options?.confidenceThreshold ?? defaultConfidenceThreshold; - const nmsMode = options?.nms?.mode ?? defaultNmsMode; - const iouThreshold = options?.nms?.iouThreshold ?? defaultIouThreshold; - const useWeightedNms = nmsMode === 'weighted'; + const iouThreshold = options?.iouThreshold ?? defaultIouThreshold; const maxInstances = options?.maxInstances ?? 100; const returnMaskAtOriginalResolution = options?.returnMaskAtOriginalResolution ?? true; @@ -386,14 +383,14 @@ export class InstanceSegmentationModule< * 1. **String path/URI**: File path, URL, or Base64-encoded string * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) * @param input - Image source (string path or PixelData object) - * @param options - Optional configuration for the segmentation process. Includes `confidenceThreshold`, `nms`, `maxInstances`, `classesOfInterest`, `returnMaskAtOriginalResolution`, and `inputSize`. + * @param options - Optional configuration for the segmentation process. Includes `confidenceThreshold`, `iouThreshold`, `maxInstances`, `classesOfInterest`, `returnMaskAtOriginalResolution`, and `inputSize`. * @returns A Promise resolving to an array of {@link SegmentedInstance} objects with `bbox`, `mask`, `maskWidth`, `maskHeight`, `label`, `score`. * @throws {RnExecutorchError} If the model is not loaded or if an invalid `inputSize` is provided. * @example * ```ts * const results = await segmentation.forward('path/to/image.jpg', { * confidenceThreshold: 0.6, - * nms: { iouThreshold: 0.5 }, + * iouThreshold: 0.5, * maxInstances: 10, * inputSize: 640, * classesOfInterest: ['PERSON', 'CAR'], @@ -420,13 +417,9 @@ export class InstanceSegmentationModule< options?.confidenceThreshold ?? this.modelConfig.defaultConfidenceThreshold ?? 0.5; - const nmsMode = - options?.nms?.mode ?? this.modelConfig.defaultNms?.mode ?? 'greedy'; const iouThreshold = - options?.nms?.iouThreshold ?? - this.modelConfig.defaultNms?.iouThreshold ?? - 0.5; - const useWeightedNms = nmsMode === 'weighted'; + options?.iouThreshold ?? this.modelConfig.defaultIouThreshold ?? 0.5; + const useWeightedNms = this.modelConfig.nmsMode === 'weighted'; const maxInstances = options?.maxInstances ?? 100; const returnMaskAtOriginalResolution = options?.returnMaskAtOriginalResolution ?? true; diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts index 74e9f3c328..ad76114b66 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts @@ -35,7 +35,7 @@ const YOLO_DETECTION_CONFIG = { availableInputSizes: [384, 512, 640] as const, defaultInputSize: 384, defaultDetectionThreshold: 0.5, - defaultNms: { iouThreshold: 0.5 }, + defaultIouThreshold: 0.5, } satisfies ObjectDetectionConfig; const ModelConfigs = { @@ -45,7 +45,7 @@ const ModelConfigs = { availableInputSizes: undefined, defaultInputSize: undefined, defaultDetectionThreshold: 0.7, - defaultNms: { iouThreshold: 0.55 }, + defaultIouThreshold: 0.55, }, 'rf-detr-nano': { labelMap: CocoLabel, @@ -53,7 +53,7 @@ const ModelConfigs = { availableInputSizes: undefined, defaultInputSize: undefined, defaultDetectionThreshold: 0.7, - defaultNms: { iouThreshold: 0.55 }, + defaultIouThreshold: 0.55, }, 'yolo26n': YOLO_DETECTION_CONFIG, 'yolo26s': YOLO_DETECTION_CONFIG, @@ -66,8 +66,9 @@ const ModelConfigs = { availableInputSizes: undefined, defaultInputSize: undefined, defaultDetectionThreshold: 0.5, - defaultNms: { mode: 'weighted', iouThreshold: 0.3 }, - defaultUseLetterbox: true, + defaultIouThreshold: 0.3, + nmsMode: 'weighted', + useLetterbox: true, }, } as const satisfies Record< ObjectDetectionModelName, @@ -182,12 +183,11 @@ export class ObjectDetectionModule< const defaultDetectionThreshold = this.modelConfig.defaultDetectionThreshold ?? 0.7; - const defaultNmsMode = this.modelConfig.defaultNms?.mode ?? 'greedy'; - const defaultIouThreshold = - this.modelConfig.defaultNms?.iouThreshold ?? 0.55; + const defaultIouThreshold = this.modelConfig.defaultIouThreshold ?? 0.55; const defaultInputSize = this.modelConfig.defaultInputSize; const availableInputSizes = this.modelConfig.availableInputSizes; - const defaultUseLetterbox = this.modelConfig.defaultUseLetterbox ?? false; + const useWeightedNms = this.modelConfig.nmsMode === 'weighted'; + const useLetterbox = this.modelConfig.useLetterbox ?? false; return ( frame: any, @@ -198,11 +198,8 @@ export class ObjectDetectionModule< const detectionThreshold = options?.detectionThreshold ?? defaultDetectionThreshold; - const nmsMode = options?.nms?.mode ?? defaultNmsMode; - const iouThreshold = options?.nms?.iouThreshold ?? defaultIouThreshold; + const iouThreshold = options?.iouThreshold ?? defaultIouThreshold; const inputSize = options?.inputSize ?? defaultInputSize; - const useWeightedNms = nmsMode === 'weighted'; - const useLetterbox = options?.useLetterbox ?? defaultUseLetterbox; if ( availableInputSizes && @@ -272,16 +269,11 @@ export class ObjectDetectionModule< options?.detectionThreshold ?? this.modelConfig.defaultDetectionThreshold ?? 0.7; - const nmsMode = - options?.nms?.mode ?? this.modelConfig.defaultNms?.mode ?? 'greedy'; const iouThreshold = - options?.nms?.iouThreshold ?? - this.modelConfig.defaultNms?.iouThreshold ?? - 0.55; + options?.iouThreshold ?? this.modelConfig.defaultIouThreshold ?? 0.55; const inputSize = options?.inputSize ?? this.modelConfig.defaultInputSize; - const useWeightedNms = nmsMode === 'weighted'; - const useLetterbox = - options?.useLetterbox ?? this.modelConfig.defaultUseLetterbox ?? false; + const useWeightedNms = this.modelConfig.nmsMode === 'weighted'; + const useLetterbox = this.modelConfig.useLetterbox ?? false; // Validate inputSize against availableInputSizes if ( diff --git a/packages/react-native-executorch/src/types/common.ts b/packages/react-native-executorch/src/types/common.ts index d787a29bc6..777b4d5654 100644 --- a/packages/react-native-executorch/src/types/common.ts +++ b/packages/react-native-executorch/src/types/common.ts @@ -146,28 +146,6 @@ export type LabelEnum = Readonly>; */ export type Triple = readonly [T, T, T]; -/** - * Non-maximum suppression configuration shared by computer-vision models that - * deduplicate overlapping detections (object detection, instance segmentation). - * - * - `'greedy'` (default): standard NMS — keep the highest-scoring box, drop - * anything overlapping it above {@link iouThreshold}. Suits detectors like - * YOLO / SSDLite whose individual anchors are trained to be accurate - * standalone. - * - `'weighted'`: score-weighted blending — average overlapping boxes by - * their scores instead of pruning to a single winner. Required for - * detectors like BlazeFace whose anchors were trained as an ensemble. - * - * Both fields are optional in per-call options and the model preset's - * `defaultNms`; missing fields fall back through the preset default to the - * runtime default. - * @category Types - */ -export interface NmsConfig { - mode?: 'greedy' | 'weighted'; - /** IoU threshold (0-1). Boxes overlapping above this are suppressed (greedy) or merged (weighted). */ - iouThreshold?: number; -} /** * Represents raw pixel data in RGB format for vision models. * diff --git a/packages/react-native-executorch/src/types/instanceSegmentation.ts b/packages/react-native-executorch/src/types/instanceSegmentation.ts index 8ca81204df..dc6d6398b3 100644 --- a/packages/react-native-executorch/src/types/instanceSegmentation.ts +++ b/packages/react-native-executorch/src/types/instanceSegmentation.ts @@ -1,12 +1,5 @@ import { RnExecutorchError } from '../errors/errorUtils'; -import { - LabelEnum, - NmsConfig, - ResourceSource, - Triple, - Frame, - PixelData, -} from './common'; +import { LabelEnum, ResourceSource, Triple, Frame, PixelData } from './common'; import { Bbox } from './objectDetection'; /** @@ -55,12 +48,12 @@ export interface InstanceSegmentationOptions { */ confidenceThreshold?: number; /** - * Override NMS mode and/or `iouThreshold` for this call. Partial — missing - * fields fall back to the model preset's `defaultNms`. Ignored for models - * whose preset disables external NMS (`postprocessorConfig.applyNMS: false`, - * e.g. YOLO-seg which does NMS internally). + * IoU threshold for non-maximum suppression (0-1). Defaults to the model + * preset's `defaultIouThreshold`. Ignored for models whose preset disables + * external NMS (`postprocessorConfig.applyNMS: false`, e.g. YOLO-seg which + * does NMS internally). */ - nms?: NmsConfig; + iouThreshold?: number; /** * Maximum number of instances to return. Default: 100 */ @@ -106,11 +99,17 @@ export type InstanceSegmentationConfig = { postprocessorConfig?: { applyNMS?: boolean }; defaultConfidenceThreshold?: number; /** - * Default NMS configuration. Overridable per-call via - * {@link InstanceSegmentationOptions.nms}. Has no effect when + * Default IoU threshold for non-maximum suppression (0-1). Overridable per-call + * via {@link InstanceSegmentationOptions.iouThreshold}. Has no effect when * `postprocessorConfig.applyNMS` is `false`. */ - defaultNms?: NmsConfig; + defaultIouThreshold?: number; + /** + * NMS algorithm baked into the model preset. Architectural — not per-call tuneable. + * - `'greedy'` (default): standard NMS, suits detectors whose anchors are independently accurate. + * - `'weighted'`: score-weighted box blending, required for ensemble-trained detectors. + */ + nmsMode?: 'greedy' | 'weighted'; } & ( | { availableInputSizes: readonly number[]; diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts index 4b864c78c7..213e7a7b60 100644 --- a/packages/react-native-executorch/src/types/objectDetection.ts +++ b/packages/react-native-executorch/src/types/objectDetection.ts @@ -1,12 +1,5 @@ import { RnExecutorchError } from '../errors/errorUtils'; -import { - LabelEnum, - NmsConfig, - Triple, - ResourceSource, - PixelData, - Frame, -} from './common'; +import { LabelEnum, Triple, ResourceSource, PixelData, Frame } from './common'; import { CocoLabel } from '../constants/commonVision'; export { CocoLabel }; @@ -47,20 +40,12 @@ export interface Detection { export interface ObjectDetectionOptions { /** Minimum confidence score for detections (0-1). Defaults to the model preset's value. */ detectionThreshold?: number; - /** Override NMS mode and/or `iouThreshold` for this call. Partial — missing fields fall back to the model preset's `defaultNms`. */ - nms?: NmsConfig; + /** IoU threshold for non-maximum suppression (0-1). Defaults to the model preset's `defaultIouThreshold`. */ + iouThreshold?: number; /** Input size for multi-method models (e.g. 384/512/640 for YOLO). */ inputSize?: number; /** Restrict output to these class labels. */ classesOfInterest?: (keyof L)[]; - /** - * If true, preprocess by aspect-preserving fit + center-pad (letterbox) - * instead of a plain stretch resize to the model's input size. Required - * for models trained on natural-aspect crops (e.g. BlazeFace) — a stretched - * portrait frame shifts anchor positions and predicts a too-narrow box. - * Defaults to the model preset's `defaultUseLetterbox`. - */ - useLetterbox?: boolean; } /** @@ -96,10 +81,20 @@ export type ObjectDetectionConfig = { preprocessorConfig?: { normMean?: Triple; normStd?: Triple }; /** Default detection confidence threshold (0-1). */ defaultDetectionThreshold?: number; - /** Default NMS configuration. Overridable per-call via {@link ObjectDetectionOptions.nms}. */ - defaultNms?: NmsConfig; - /** Default preprocessing for this model. Overridable per-call via `useLetterbox`. */ - defaultUseLetterbox?: boolean; + /** Default IoU threshold for non-maximum suppression (0-1). Overridable per-call via {@link ObjectDetectionOptions.iouThreshold}. */ + defaultIouThreshold?: number; + /** + * NMS algorithm baked into the model preset. Architectural — not per-call tuneable. + * - `'greedy'` (default): standard NMS, suits detectors whose anchors are independently accurate (YOLO, SSDLite, RF-DETR). + * - `'weighted'`: score-weighted box blending, required for ensemble-trained detectors like BlazeFace. + */ + nmsMode?: 'greedy' | 'weighted'; + /** + * Whether the model expects aspect-preserving fit + center-pad (letterbox) preprocessing + * instead of plain stretch resize. Architectural property of the model — not per-call tuneable. + * BlazeFace requires letterbox; YOLO/SSDLite/RF-DETR do not. + */ + useLetterbox?: boolean; } & ( | { availableInputSizes: readonly number[]; From e62427e63d80c21072b41e4b508f6bed50e7d99f Mon Sep 17 00:00:00 2001 From: chmjkb Date: Tue, 26 May 2026 11:32:27 +0200 Subject: [PATCH 4/5] chore: add model to the registry, replace require() usage --- apps/computer-vision/app/object_detection/index.tsx | 7 +------ .../vision_camera/tasks/ObjectDetectionTask.tsx | 8 +------- .../src/constants/modelRegistry.ts | 1 + .../src/constants/modelUrls.ts | 13 ++++++++++++- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx index d2cbcc91d0..398d81bb1b 100644 --- a/apps/computer-vision/app/object_detection/index.tsx +++ b/apps/computer-vision/app/object_detection/index.tsx @@ -8,11 +8,6 @@ import { useObjectDetection, ObjectDetectionModelSources, } from 'react-native-executorch'; - -const BLAZEFACE: ObjectDetectionModelSources = { - modelName: 'blazeface', - modelSource: require('../../assets/blazeface.pte'), -}; import { View, StyleSheet, Image, Text } from 'react-native'; import ImageWithBboxes from '../../components/ImageWithBboxes'; import React, { useContext, useEffect, useState } from 'react'; @@ -35,7 +30,7 @@ const MODELS: ModelOption[] = [ { label: 'YOLO26M', value: objectDetection.yolo26m() }, { label: 'YOLO26L', value: objectDetection.yolo26l() }, { label: 'YOLO26X', value: objectDetection.yolo26x() }, - { label: 'BlazeFace', value: BLAZEFACE }, + { label: 'BlazeFace', value: objectDetection.blazeface() }, ]; import ErrorBanner from '../../components/ErrorBanner'; diff --git a/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx b/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx index aeb364c377..9a2f2d6577 100644 --- a/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx +++ b/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx @@ -9,17 +9,11 @@ import { CocoLabel, CocoLabelYolo, BlazeFaceLabel, - ObjectDetectionModelSources, } from 'react-native-executorch'; import BoundingBoxes from '../../BoundingBoxes'; import { FRAME_TARGET_RESOLUTION, TaskProps } from './types'; const objectDetection = models.object_detection; -const BLAZEFACE: ObjectDetectionModelSources = { - modelName: 'blazeface', - modelSource: require('../../../assets/blazeface.pte'), -}; - type ObjModelId = | 'objectDetectionSsdlite' | 'objectDetectionRfdetr' @@ -53,7 +47,7 @@ export default function ObjectDetectionTask({ preventLoad: activeModel !== 'objectDetectionYolo26n', }); const blazeface = useObjectDetection({ - model: BLAZEFACE, + model: objectDetection.blazeface(), preventLoad: activeModel !== 'objectDetectionBlazeface', }); diff --git a/packages/react-native-executorch/src/constants/modelRegistry.ts b/packages/react-native-executorch/src/constants/modelRegistry.ts index 9c9da9c420..5645def7ad 100644 --- a/packages/react-native-executorch/src/constants/modelRegistry.ts +++ b/packages/react-native-executorch/src/constants/modelRegistry.ts @@ -518,6 +518,7 @@ export const models = { yolo26m: base(M.YOLO26M), yolo26l: base(M.YOLO26L), yolo26x: base(M.YOLO26X), + blazeface: base(M.BLAZEFACE), }, pose_estimation: { yolo26n: base(M.YOLO26N_POSE), diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts index 4dc8966aee..0b13c8444d 100644 --- a/packages/react-native-executorch/src/constants/modelUrls.ts +++ b/packages/react-native-executorch/src/constants/modelUrls.ts @@ -3,7 +3,7 @@ import { PRIVACY_FILTER_NEMOTRON_LABELS, PRIVACY_FILTER_OPENAI_LABELS, } from './privacyFilterLabels'; -import { URL_PREFIX, PREVIOUS_VERSION_TAG } from './versions'; +import { URL_PREFIX, PREVIOUS_VERSION_TAG, VERSION_TAG } from './versions'; // LLMs @@ -682,6 +682,17 @@ export const YOLO26X = { modelSource: YOLO26X_DETECTION_MODEL, } as const; +// BlazeFace — pinned to VERSION_TAG (v0.10.0) where the HF repo first publishes. +const BLAZEFACE_XNNPACK_FP32_MODEL = `${URL_PREFIX}-blazeface/${VERSION_TAG}/xnnpack/blazeface.pte`; + +/** + * @category Models - Object Detection + */ +export const BLAZEFACE = { + modelName: 'blazeface', + modelSource: BLAZEFACE_XNNPACK_FP32_MODEL, +} as const; + // YOLO26 Pose Estimation const YOLO26N_POSE_MODEL = `${URL_PREFIX}-yolo26-pose/${PREVIOUS_VERSION_TAG}/xnnpack/yolo26_pose_n_xnnpack_fp32.pte`; From 4bdf7b335873d0c2338babc4581cb3f744a62c56 Mon Sep 17 00:00:00 2001 From: chmjkb Date: Tue, 26 May 2026 11:43:43 +0200 Subject: [PATCH 5/5] chore: fix comment --- .../react-native-executorch/src/types/instanceSegmentation.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/react-native-executorch/src/types/instanceSegmentation.ts b/packages/react-native-executorch/src/types/instanceSegmentation.ts index dc6d6398b3..cf57eb662b 100644 --- a/packages/react-native-executorch/src/types/instanceSegmentation.ts +++ b/packages/react-native-executorch/src/types/instanceSegmentation.ts @@ -50,8 +50,7 @@ export interface InstanceSegmentationOptions { /** * IoU threshold for non-maximum suppression (0-1). Defaults to the model * preset's `defaultIouThreshold`. Ignored for models whose preset disables - * external NMS (`postprocessorConfig.applyNMS: false`, e.g. YOLO-seg which - * does NMS internally). + * external NMS (`postprocessorConfig.applyNMS: false`, e.g. YOLO-seg which is NMS-free) */ iouThreshold?: number; /**