software-mansion · chmjkb · May 20, 2026 · May 21, 2026 · May 26, 2026 · May 26, 2026
diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx
@@ -30,6 +30,7 @@ const MODELS: ModelOption<ObjectDetectionModelSources>[] = [
   { label: 'YOLO26M', value: objectDetection.yolo26m() },
   { label: 'YOLO26L', value: objectDetection.yolo26l() },
   { label: 'YOLO26X', value: objectDetection.yolo26x() },
+  { label: 'BlazeFace', value: objectDetection.blazeface() },
 ];
 import ErrorBanner from '../../components/ErrorBanner';
 

diff --git a/apps/computer-vision/app/vision_camera/index.tsx b/apps/computer-vision/app/vision_camera/index.tsx
@@ -45,6 +45,7 @@ type ModelId =
   | 'objectDetectionSsdlite'
   | 'objectDetectionRfdetr'
   | 'objectDetectionYolo26n'
+  | 'objectDetectionBlazeface'
   | 'segmentationDeeplabResnet50'
   | 'segmentationDeeplabResnet101'
   | 'segmentationDeeplabMobilenet'
@@ -105,6 +106,7 @@ const TASKS: Task[] = [
       { id: 'objectDetectionSsdlite', label: 'SSDLite MobileNet' },
       { id: 'objectDetectionRfdetr', label: 'RF-DETR Nano' },
       { id: 'objectDetectionYolo26n', label: 'YOLO26N' },
+      { id: 'objectDetectionBlazeface', label: 'BlazeFace' },
     ],
   },
   {
@@ -270,6 +272,7 @@ export default function VisionCameraScreen() {
               | 'objectDetectionSsdlite'
               | 'objectDetectionRfdetr'
               | 'objectDetectionYolo26n'
+              | 'objectDetectionBlazeface'
           }
         />
       )}

diff --git a/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx b/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx
@@ -8,6 +8,7 @@ import {
   useObjectDetection,
   CocoLabel,
   CocoLabelYolo,
+  BlazeFaceLabel,
 } from 'react-native-executorch';
 import BoundingBoxes from '../../BoundingBoxes';
 import { FRAME_TARGET_RESOLUTION, TaskProps } from './types';
@@ -16,7 +17,8 @@ const objectDetection = models.object_detection;
 type ObjModelId =
   | 'objectDetectionSsdlite'
   | 'objectDetectionRfdetr'
-  | 'objectDetectionYolo26n';
+  | 'objectDetectionYolo26n'
+  | 'objectDetectionBlazeface';
 
 type Props = TaskProps & { activeModel: ObjModelId };
 
@@ -44,13 +46,18 @@ export default function ObjectDetectionTask({
     model: objectDetection.yolo26n(),
     preventLoad: activeModel !== 'objectDetectionYolo26n',
   });
+  const blazeface = useObjectDetection({
+    model: objectDetection.blazeface(),
+    preventLoad: activeModel !== 'objectDetectionBlazeface',
+  });
 
-  const active =
-    activeModel === 'objectDetectionSsdlite'
-      ? ssdlite
-      : activeModel === 'objectDetectionRfdetr'
-        ? rfdetr
-        : yolo26n;
+  const detectors = {
+    objectDetectionSsdlite: ssdlite,
+    objectDetectionRfdetr: rfdetr,
+    objectDetectionYolo26n: yolo26n,
+    objectDetectionBlazeface: blazeface,
+  } satisfies Record<ObjModelId, unknown>;
+  const active = detectors[activeModel];
 
   type CommonDetection = Omit<Detection, 'label'> & { label: string };
 
@@ -80,7 +87,8 @@ export default function ObjectDetectionTask({
     (p: {
       results:
         | Detection<typeof CocoLabel>[]
-        | Detection<typeof CocoLabelYolo>[];
+        | Detection<typeof CocoLabelYolo>[]
+        | Detection<typeof BlazeFaceLabel>[];
       imageWidth: number;
       imageHeight: number;
     }) => {

diff --git a/...-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp b/...-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp
@@ -57,7 +57,8 @@ TensorPtr BaseInstanceSegmentation::buildInputTensor(const cv::Mat &image) {
 std::vector<types::Instance> BaseInstanceSegmentation::runInference(
     const cv::Mat &image, double confidenceThreshold, double iouThreshold,
     int32_t maxInstances, const std::vector<int32_t> &classIndices,
-    bool returnMaskAtOriginalResolution, const std::string &methodName) {
+    bool returnMaskAtOriginalResolution, const std::string &methodName,
+    bool useWeightedNms) {
 
   std::scoped_lock lock(inference_mutex_);
 
@@ -86,34 +87,37 @@ std::vector<types::Instance> BaseInstanceSegmentation::runInference(
   auto instances = collectInstances(
       forwardResult.get(), originalSize, modelInputSize, confidenceThreshold,
       classIndices, returnMaskAtOriginalResolution);
-  return finalizeInstances(std::move(instances), iouThreshold, maxInstances);
+  return finalizeInstances(std::move(instances), iouThreshold, maxInstances,
+                           useWeightedNms);
 }
 
 std::vector<types::Instance> BaseInstanceSegmentation::generateFromString(
     std::string imageSource, double confidenceThreshold, double iouThreshold,
     int32_t maxInstances, std::vector<int32_t> classIndices,
-    bool returnMaskAtOriginalResolution, std::string methodName) {
+    bool returnMaskAtOriginalResolution, std::string methodName,
+    bool useWeightedNms) {
 
   cv::Mat imageBGR = image_processing::readImage(imageSource);
   cv::Mat imageRGB;
   cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB);
 
   return runInference(imageRGB, confidenceThreshold, iouThreshold, maxInstances,
-                      classIndices, returnMaskAtOriginalResolution, methodName);
+                      classIndices, returnMaskAtOriginalResolution, methodName,
+                      useWeightedNms);
 }
 
 std::vector<types::Instance> BaseInstanceSegmentation::generateFromFrame(
     jsi::Runtime &runtime, const jsi::Value &frameData,
     double confidenceThreshold, double iouThreshold, int32_t maxInstances,
     std::vector<int32_t> classIndices, bool returnMaskAtOriginalResolution,
-    std::string methodName) {
+    std::string methodName, bool useWeightedNms) {
 
   auto orient = ::rnexecutorch::utils::readFrameOrientation(runtime, frameData);
   cv::Mat frame = extractFromFrame(runtime, frameData);
   cv::Mat rotated = utils::rotateFrameForModel(frame, orient);
-  auto instances =
-      runInference(rotated, confidenceThreshold, iouThreshold, maxInstances,
-                   classIndices, returnMaskAtOriginalResolution, methodName);
+  auto instances = runInference(
+      rotated, confidenceThreshold, iouThreshold, maxInstances, classIndices,
+      returnMaskAtOriginalResolution, methodName, useWeightedNms);
   for (auto &inst : instances) {
     utils::inverseRotateBbox(inst.bbox, orient, rotated.size());
     // Inverse-rotate the mask to match the screen orientation
@@ -131,11 +135,13 @@ std::vector<types::Instance> BaseInstanceSegmentation::generateFromFrame(
 std::vector<types::Instance> BaseInstanceSegmentation::generateFromPixels(
     JSTensorViewIn tensorView, double confidenceThreshold, double iouThreshold,
     int32_t maxInstances, std::vector<int32_t> classIndices,
-    bool returnMaskAtOriginalResolution, std::string methodName) {
+    bool returnMaskAtOriginalResolution, std::string methodName,
+    bool useWeightedNms) {
 
   cv::Mat image = extractFromPixels(tensorView);
   return runInference(image, confidenceThreshold, iouThreshold, maxInstances,
-                      classIndices, returnMaskAtOriginalResolution, methodName);
+                      classIndices, returnMaskAtOriginalResolution, methodName,
+                      useWeightedNms);
 }
 
 std::tuple<utils::computer_vision::BBox, float, int32_t>
@@ -296,11 +302,14 @@ void BaseInstanceSegmentation::ensureMethodLoaded(
 
 std::vector<types::Instance> BaseInstanceSegmentation::finalizeInstances(
     std::vector<types::Instance> instances, double iouThreshold,
-    int32_t maxInstances) const {
+    int32_t maxInstances, bool useWeightedNms) const {
 
   if (applyNMS_) {
-    instances =
-        utils::computer_vision::nonMaxSuppression(instances, iouThreshold);
+    instances = useWeightedNms
+                    ? utils::computer_vision::weightedNonMaxSuppression(
+                          instances, iouThreshold)
+                    : utils::computer_vision::nonMaxSuppression(instances,
+                                                                iouThreshold);
   }
 
   if (std::cmp_greater(instances.size(), maxInstances)) {

diff --git a/...ve-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h b/...ve-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h
@@ -28,30 +28,32 @@ class BaseInstanceSegmentation : public VisionModel {
                      double iouThreshold, int32_t maxInstances,
                      std::vector<int32_t> classIndices,
                      bool returnMaskAtOriginalResolution,
-                     std::string methodName);
+                     std::string methodName, bool useWeightedNms);
 
   [[nodiscard("Registered non-void function")]] std::vector<types::Instance>
   generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData,
                     double confidenceThreshold, double iouThreshold,
                     int32_t maxInstances, std::vector<int32_t> classIndices,
-                    bool returnMaskAtOriginalResolution,
-                    std::string methodName);
+                    bool returnMaskAtOriginalResolution, std::string methodName,
+                    bool useWeightedNms);
 
   [[nodiscard("Registered non-void function")]] std::vector<types::Instance>
   generateFromPixels(JSTensorViewIn tensorView, double confidenceThreshold,
                      double iouThreshold, int32_t maxInstances,
                      std::vector<int32_t> classIndices,
                      bool returnMaskAtOriginalResolution,
-                     std::string methodName);
+                     std::string methodName, bool useWeightedNms);
 
 protected:
   cv::Size modelInputSize() const override;
 
 private:
-  std::vector<types::Instance> runInference(
-      const cv::Mat &image, double confidenceThreshold, double iouThreshold,
-      int32_t maxInstances, const std::vector<int32_t> &classIndices,
-      bool returnMaskAtOriginalResolution, const std::string &methodName);
+  std::vector<types::Instance>
+  runInference(const cv::Mat &image, double confidenceThreshold,
+               double iouThreshold, int32_t maxInstances,
+               const std::vector<int32_t> &classIndices,
+               bool returnMaskAtOriginalResolution,
+               const std::string &methodName, bool useWeightedNms);
 
   TensorPtr buildInputTensor(const cv::Mat &image);
 
@@ -89,7 +91,7 @@ class BaseInstanceSegmentation : public VisionModel {
 
   std::vector<types::Instance>
   finalizeInstances(std::vector<types::Instance> instances, double iouThreshold,
-                    int32_t maxInstances) const;
+                    int32_t maxInstances, bool useWeightedNms) const;
 
   cv::Mat processMaskFromLogits(
       const cv::Mat &logitsMat, const utils::computer_vision::BBox &bboxModel,

diff --git a/...s/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/...s/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
@@ -82,16 +82,10 @@ std::set<int32_t> ObjectDetection::prepareAllowedClasses(
   return allowedClasses;
 }
 
-std::vector<types::Detection>
-ObjectDetection::postprocess(const std::vector<EValue> &tensors,
-                             cv::Size originalSize, double detectionThreshold,
-                             double iouThreshold,
-                             const std::vector<int32_t> &classIndices) {
-  const cv::Size inputSize = modelInputSize();
-  float widthRatio = static_cast<float>(originalSize.width) / inputSize.width;
-  float heightRatio =
-      static_cast<float>(originalSize.height) / inputSize.height;
-
+std::vector<types::Detection> ObjectDetection::postprocess(
+    const std::vector<EValue> &tensors, const BoxTransform &transform,
+    double detectionThreshold, double iouThreshold,
+    const std::vector<int32_t> &classIndices, bool useWeightedNms) {
   // Prepare allowed classes set for filtering
   auto allowedClasses = prepareAllowedClasses(classIndices);
 
@@ -124,10 +118,13 @@ ObjectDetection::postprocess(const std::vector<EValue> &tensors,
       continue;
     }
 
-    float x1 = bboxes[i * 4] * widthRatio;
-    float y1 = bboxes[i * 4 + 1] * heightRatio;
-    float x2 = bboxes[i * 4 + 2] * widthRatio;
-    float y2 = bboxes[i * 4 + 3] * heightRatio;
+    // Map model-input pixel coords back to source-image coords. The same
+    // affine `x_src = x_model * scale + offset` works for stretch and
+    // letterbox preprocessing — offsets are zero in the stretch case.
+    float x1 = bboxes[i * 4] * transform.scaleX + transform.offsetX;
+    float y1 = bboxes[i * 4 + 1] * transform.scaleY + transform.offsetY;
+    float x2 = bboxes[i * 4 + 2] * transform.scaleX + transform.offsetX;
+    float y2 = bboxes[i * 4 + 3] * transform.scaleY + transform.offsetY;
 
     if (std::cmp_greater_equal(labelIdx, labelNames_.size())) {
       throw RnExecutorchError(
@@ -140,12 +137,17 @@ ObjectDetection::postprocess(const std::vector<EValue> &tensors,
                             labelNames_[labelIdx], labelIdx, scores[i]);
   }
 
-  return utils::computer_vision::nonMaxSuppression(detections, iouThreshold);
+  return useWeightedNms
+             ? utils::computer_vision::weightedNonMaxSuppression(detections,
+                                                                 iouThreshold)
+             : utils::computer_vision::nonMaxSuppression(detections,
+                                                         iouThreshold);
 }
 
 std::vector<types::Detection> ObjectDetection::runInference(
     cv::Mat image, double detectionThreshold, double iouThreshold,
-    const std::vector<int32_t> &classIndices, const std::string &methodName) {
+    const std::vector<int32_t> &classIndices, const std::string &methodName,
+    bool useWeightedNms, bool useLetterbox) {
   if (detectionThreshold < 0.0 || detectionThreshold > 1.0) {
     throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
                             "detectionThreshold must be in range [0, 1]");
@@ -171,7 +173,38 @@ std::vector<types::Detection> ObjectDetection::runInference(
   }
   modelInputShape_ = inputShapes[0];
 
-  cv::Mat preprocessed = preprocess(image);
+  const cv::Size inputSize = modelInputSize();
+  cv::Mat preprocessed;
+  BoxTransform transform;
+  if (useLetterbox) {
+    // Aspect-preserving fit + center-pad with black bars. Models trained on
+    // natural-aspect crops (BlazeFace) need this — plain cv::resize stretches
+    // the face and shifts where anchors fire.
+    const float fitScale =
+        std::min(static_cast<float>(inputSize.width) / originalSize.width,
+                 static_cast<float>(inputSize.height) / originalSize.height);
+    const int newW =
+        static_cast<int>(std::round(originalSize.width * fitScale));
+    const int newH =
+        static_cast<int>(std::round(originalSize.height * fitScale));
+    const int padX = (inputSize.width - newW) / 2;
+    const int padY = (inputSize.height - newH) / 2;
+
+    cv::Mat resized;
+    cv::resize(image, resized, cv::Size(newW, newH), 0, 0, cv::INTER_AREA);
+    cv::copyMakeBorder(resized, preprocessed, padY,
+                       inputSize.height - newH - padY, padX,
+                       inputSize.width - newW - padX, cv::BORDER_CONSTANT,
+                       cv::Scalar(0, 0, 0));
+
+    const float inv = 1.0f / fitScale;
+    transform = {inv, inv, -padX * inv, -padY * inv};
+  } else {
+    preprocessed = preprocess(image);
+    transform = {static_cast<float>(originalSize.width) / inputSize.width,
+                 static_cast<float>(originalSize.height) / inputSize.height,
+                 0.0f, 0.0f};
+  }
 
   auto inputTensor =
       (normMean_ && normStd_)
@@ -188,31 +221,34 @@ std::vector<types::Detection> ObjectDetection::runInference(
                                 "Ensure the model input is correct.");
   }
 
-  return postprocess(executeResult.get(), originalSize, detectionThreshold,
-                     iouThreshold, classIndices);
+  return postprocess(executeResult.get(), transform, detectionThreshold,
+                     iouThreshold, classIndices, useWeightedNms);
 }
 
 std::vector<types::Detection> ObjectDetection::generateFromString(
     std::string imageSource, double detectionThreshold, double iouThreshold,
-    std::vector<int32_t> classIndices, std::string methodName) {
+    std::vector<int32_t> classIndices, std::string methodName,
+    bool useWeightedNms, bool useLetterbox) {
   cv::Mat imageBGR = image_processing::readImage(imageSource);
 
   cv::Mat imageRGB;
   cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB);
 
   return runInference(imageRGB, detectionThreshold, iouThreshold, classIndices,
-                      methodName);
+                      methodName, useWeightedNms, useLetterbox);
 }
 
 std::vector<types::Detection> ObjectDetection::generateFromFrame(
     jsi::Runtime &runtime, const jsi::Value &frameData,
     double detectionThreshold, double iouThreshold,
-    std::vector<int32_t> classIndices, std::string methodName) {
+    std::vector<int32_t> classIndices, std::string methodName,
+    bool useWeightedNms, bool useLetterbox) {
   auto orient = ::rnexecutorch::utils::readFrameOrientation(runtime, frameData);
   cv::Mat frame = extractFromFrame(runtime, frameData);
   cv::Mat rotated = ::rnexecutorch::utils::rotateFrameForModel(frame, orient);
-  auto detections = runInference(rotated, detectionThreshold, iouThreshold,
-                                 classIndices, methodName);
+  auto detections =
+      runInference(rotated, detectionThreshold, iouThreshold, classIndices,
+                   methodName, useWeightedNms, useLetterbox);
 
   for (auto &det : detections) {
     ::rnexecutorch::utils::inverseRotateBbox(det.bbox, orient, rotated.size());
@@ -222,10 +258,11 @@ std::vector<types::Detection> ObjectDetection::generateFromFrame(
 
 std::vector<types::Detection> ObjectDetection::generateFromPixels(
     JSTensorViewIn pixelData, double detectionThreshold, double iouThreshold,
-    std::vector<int32_t> classIndices, std::string methodName) {
+    std::vector<int32_t> classIndices, std::string methodName,
+    bool useWeightedNms, bool useLetterbox) {
   cv::Mat image = extractFromPixels(pixelData);
 
   return runInference(image, detectionThreshold, iouThreshold, classIndices,
-                      methodName);
+                      methodName, useWeightedNms, useLetterbox);
 }
 } // namespace rnexecutorch::models::object_detection