From 48244b89975e39f7cb1c13d08d0f76a22db8d3b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5kon=20Hukkel=C3=A5s?= <hakon.hukkelas@ntnu.no>
Date: Sun, 1 Jun 2025 08:01:43 +0200
Subject: [PATCH 1/8] add: UV setup & github actions pipeline

---
 .github/actions/setup-uv/action.yml | 12 ++++++++++++
 .github/workflows/format.yml        | 14 ++++++++++++++
 pyproject.toml                      | 20 ++++++++++++++++++++
 setup.py                            | 28 ----------------------------
 4 files changed, 46 insertions(+), 28 deletions(-)
 create mode 100644 .github/actions/setup-uv/action.yml
 create mode 100644 .github/workflows/format.yml
 create mode 100644 pyproject.toml
 delete mode 100644 setup.py

diff --git a/.github/actions/setup-uv/action.yml b/.github/actions/setup-uv/action.yml
new file mode 100644
index 0000000..2606145
--- /dev/null
+++ b/.github/actions/setup-uv/action.yml
@@ -0,0 +1,12 @@
+name: Setup UV
+description: Installs UV from Astral.sh
+runs:
+  using: "composite"
+  steps:
+    - name: Install curl and UV
+      shell: bash
+      run: |
+        sudo apt update
+        sudo apt install -y curl
+        curl -LsSf https://astral.sh/uv/install.sh | sh
+        echo "$HOME/.local/bin" >> $GITHUB_PATH
diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
new file mode 100644
index 0000000..340443e
--- /dev/null
+++ b/.github/workflows/format.yml
@@ -0,0 +1,14 @@
+name: Format
+on: [push]
+jobs:
+  my-job:
+    runs-on: ubuntu-24.04
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup UV
+        uses: ./.github/actions/setup-uv
+
+      - name: Build
+        run: uv build
+
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..de388c7
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,20 @@
+[project]
+name="face_detection"
+version="0.2.1"
+description="A simple and lightweight package for state of the art face detection with GPU support."
+readme="README.md"
+requires-python=">=3.9"
+license="apache-2.0"
+classifiers=[
+    "Operating System :: OS Independent",
+]
+dependencies = [
+    "numpy>=2.0.2",
+    "torch>=2.7.0",
+]
+
+[build-system]
+requires = ["setuptools", "torch"]
+
+[tool.setuptools]
+packages = ["face_detection"]
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 61dfd4c..0000000
--- a/setup.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import setuptools
-import torch
-import torchvision
-
-torch_ver = [int(x) for x in torch.__version__.split(".")[:2]]
-assert torch_ver >= [1, 6], "Requires PyTorch >= 1.6"
-torchvision_ver = [int(x) for x in torchvision.__version__.split(".")[:2]]
-assert torchvision_ver >= [0, 3], "Requires torchvision >= 0.3"
-
-setuptools.setup(
-    name="face_detection",
-    version="0.2.1",
-    author="Håkon Hukkelås",
-    description="A simple and lightweight package for state of the art face detection with GPU support.",
-    long_description="".join(open("README.md", "r").readlines()),
-    long_description_content_type="text/markdown",
-    url="https://github.com/hukkelas/DSFD-Pytorch-Inference",
-    python_requires='>=3.6',
-    license="apache-2.0",
-    classifiers=[
-        "License :: OSI Approved :: Apache Software License",
-        "Operating System :: OS Independent",
-    ],
-    install_requires=[
-        "numpy",
-    ],
-    packages=setuptools.find_packages()
-)

From 59bd11793d674f5077153525b0a9b1d3fbef194e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5kon=20Hukkel=C3=A5s?= <hakon.hukkelas@ntnu.no>
Date: Sun, 1 Jun 2025 08:44:43 +0200
Subject: [PATCH 2/8] add: tests for all face detectors

---
 .github/workflows/format.yml |  2 +-
 .github/workflows/test.yml   | 14 ++++++++
 pyproject.toml               | 11 ++++--
 tests/test_detector.py       | 68 ++++++++++++++++++++++++++++++++++++
 4 files changed, 92 insertions(+), 3 deletions(-)
 create mode 100644 .github/workflows/test.yml
 create mode 100644 tests/test_detector.py

diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
index 340443e..9c1e1d0 100644
--- a/.github/workflows/format.yml
+++ b/.github/workflows/format.yml
@@ -1,7 +1,7 @@
 name: Format
 on: [push]
 jobs:
-  my-job:
+  format:
     runs-on: ubuntu-24.04
     steps:
       - uses: actions/checkout@v4
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000..d34a0cf
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,14 @@
+name: Test
+on: [push]
+jobs:
+  pytest:
+    runs-on: ubuntu-24.04
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup UV
+        uses: ./.github/actions/setup-uv
+
+      - name: test
+        run: uv run pytest
+
diff --git a/pyproject.toml b/pyproject.toml
index de388c7..d397cc1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,8 +9,9 @@ classifiers=[
     "Operating System :: OS Independent",
 ]
 dependencies = [
-    "numpy>=2.0.2",
-    "torch>=2.7.0",
+    "numpy",
+    "torch",
+    "torchvision",
 ]
 
 [build-system]
@@ -18,3 +19,9 @@ requires = ["setuptools", "torch"]
 
 [tool.setuptools]
 packages = ["face_detection"]
+
+[dependency-groups]
+dev = [
+    "opencv-python",
+    "pytest>=8.3.5",
+]
diff --git a/tests/test_detector.py b/tests/test_detector.py
new file mode 100644
index 0000000..5e2a7a8
--- /dev/null
+++ b/tests/test_detector.py
@@ -0,0 +1,68 @@
+import numpy as np
+import pytest
+import cv2
+import face_detection  # your face detection library
+
+def compute_iou(boxA, boxB):
+    xA = max(boxA[0], boxB[0])
+    yA = max(boxA[1], boxB[1])
+    xB = min(boxA[2], boxB[2])
+    yB = min(boxA[3], boxB[3])
+
+    print(boxA, boxB)
+    interArea = max(0, xB - xA) * max(0, yB - yA)
+    if interArea == 0:
+        print("Ret 0")
+        return 0.0
+
+    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
+    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
+    iou = interArea / float(boxAArea + boxBArea - interArea)
+
+    print("IoU", iou)
+
+    return iou
+
+
+@pytest.fixture
+def ground_truth_boxes():
+    return np.array([
+        [337.8219142,  227.30235955, 363.18236876, 260.75754449],
+        [120.61462998, 244.68149829, 153.73102021, 290.13813281],
+        [793.31824303,  88.6468603,  837.80744743, 153.03655452],
+        [499.23486614, 212.40574998, 521.46317768, 241.84556359],
+        [412.37690353, 219.29100847, 437.20971298, 250.56026506],
+        [654.66749144, 203.24960518, 676.66251707, 231.10678673],
+        [692.63414764, 248.56575656, 726.75259781, 292.49138522],
+        [215.16035197, 269.50566196, 240.76163981, 303.02491093],
+        [189.08402371, 212.22481942, 210.5982945,  240.76419282],
+        [571.04836243, 213.0569253,  590.01044816, 238.5836339],
+        [ 16.7418344,  235.77498758,  41.44155097, 265.93795145],
+        [284.28320718, 213.93544269, 304.40658212, 238.0858829],
+        [167.58154631,  76.92867303, 187.13439512, 102.97041345],
+    ])
+
+@pytest.mark.parametrize("detector_name", [
+    "DSFDDetector",
+    "RetinaNetResNet50",
+    "RetinaNetMobileNetV1"
+])
+def test_detector_detects_boxes_with_iou(detector_name,  ground_truth_boxes):
+    detector = face_detection.build_detector(
+        detector_name,
+        max_resolution=1080,
+        confidence_threshold=0.5
+    )
+    impath = "images/11_Meeting_Meeting_11_Meeting_Meeting_11_176.jpg"
+    img = cv2.imread(impath)
+
+    detections = detector.detect(img[:, :, ::-1])[:, :4]
+
+    for gt_box in ground_truth_boxes:
+        print("CHECKING")
+        matched = any(compute_iou(gt_box, det_box) >= 0.5 for det_box in detections)
+        assert matched, (
+            f"{detector_name} failed to detect ground truth box {gt_box} "
+            f"with IoU >= 0.5"
+        )
+

From f9e2d47d780a1db8b278b0f5b02a3a692f4ed2f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5kon=20Hukkel=C3=A5s?= <hakon.hukkelas@ntnu.no>
Date: Sun, 1 Jun 2025 17:25:42 +0200
Subject: [PATCH 3/8] add: uv ruff check

---
 .github/workflows/{format.yml => ruff.yml} | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 rename .github/workflows/{format.yml => ruff.yml} (82%)

diff --git a/.github/workflows/format.yml b/.github/workflows/ruff.yml
similarity index 82%
rename from .github/workflows/format.yml
rename to .github/workflows/ruff.yml
index 9c1e1d0..7da5f7a 100644
--- a/.github/workflows/format.yml
+++ b/.github/workflows/ruff.yml
@@ -1,7 +1,7 @@
 name: Format
 on: [push]
 jobs:
-  format:
+  ruff:
     runs-on: ubuntu-24.04
     steps:
       - uses: actions/checkout@v4
@@ -10,5 +10,5 @@ jobs:
         uses: ./.github/actions/setup-uv
 
       - name: Build
-        run: uv build
+        run: uv run ruff check 
 

From 015f2fe887a0f35071f9cf24c1f60c8c0752702b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5kon=20Hukkel=C3=A5s?= <hakon.hukkelas@ntnu.no>
Date: Mon, 9 Jun 2025 17:29:04 +0200
Subject: [PATCH 4/8] fix: ruff format

---
 .github/workflows/ruff.yml | 4 +++-
 pyproject.toml             | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
index 7da5f7a..08d51d9 100644
--- a/.github/workflows/ruff.yml
+++ b/.github/workflows/ruff.yml
@@ -9,6 +9,8 @@ jobs:
       - name: Setup UV
         uses: ./.github/actions/setup-uv
 
-      - name: Build
+      - name: Install ruff
+        run: uv add ruff
+      - name: Run ruff
         run: uv run ruff check 
 
diff --git a/pyproject.toml b/pyproject.toml
index d397cc1..8cb6c59 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,4 +24,5 @@ packages = ["face_detection"]
 dev = [
     "opencv-python",
     "pytest>=8.3.5",
+    "ruff"
 ]

From 6a01f9fe2caba325aa86be573d341bb7b5c80f44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5kon=20Hukkel=C3=A5s?= <hakon.hukkelas@ntnu.no>
Date: Mon, 9 Jun 2025 17:32:42 +0200
Subject: [PATCH 5/8] format only

---
 benchmark.py                                  |   8 +-
 face_detection/__init__.py                    |   2 +-
 face_detection/base.py                        |  41 ++++---
 face_detection/box_utils.py                   |  11 +-
 face_detection/build.py                       |  31 +++--
 face_detection/dsfd/__init__.py               |   2 +-
 face_detection/dsfd/config.py                 |  64 +++++++----
 face_detection/dsfd/detect.py                 |  18 ++-
 face_detection/dsfd/face_ssd.py               | 106 +++++++++---------
 face_detection/dsfd/utils.py                  |  35 +++---
 face_detection/registry.py                    |  33 +++---
 face_detection/retinaface/__init__.py         |   2 +-
 face_detection/retinaface/config.py           |  69 ++++++------
 face_detection/retinaface/detect.py           |  33 ++----
 face_detection/retinaface/models/net.py       |  58 +++++-----
 .../retinaface/models/retinaface.py           |  43 +++----
 face_detection/retinaface/onnx.py             |  25 +++--
 face_detection/retinaface/prior_box.py        |  25 +++--
 face_detection/retinaface/tensorrt_wrap.py    |  99 +++++++++-------
 face_detection/retinaface/utils.py            |  31 ++---
 test.py                                       |  20 +---
 tests/test_detector.py                        |  49 ++++----
 22 files changed, 418 insertions(+), 387 deletions(-)

diff --git a/benchmark.py b/benchmark.py
index 6bd9f21..2bf52ac 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -8,10 +8,7 @@
     num = 1000
 
     for detector in face_detection.available_detectors:
-        detector = face_detection.build_detector(
-            detector,
-            fp16_inference=True
-        )
+        detector = face_detection.build_detector(detector, fp16_inference=True)
         im = "images/0_Parade_Parade_0_873.jpg"
         im = cv2.imread(im)[:, :, ::-1]
         t = time.time()
@@ -23,4 +20,5 @@
         ms = avg_time * 1000
         print(
             f"Detector: {detector.__class__.__name__}. Average inference time over image shape: {im.shape} is:",
-            f"{ms:.2f} ms, fps: {fps:.2f}")
+            f"{ms:.2f} ms, fps: {fps:.2f}",
+        )
diff --git a/face_detection/__init__.py b/face_detection/__init__.py
index e589bff..a4d0316 100644
--- a/face_detection/__init__.py
+++ b/face_detection/__init__.py
@@ -1,3 +1,3 @@
 from .build import build_detector, available_detectors
 from .dsfd import DSFDDetector
-from .retinaface import RetinaNetMobileNetV1, RetinaNetResNet50
\ No newline at end of file
+from .retinaface import RetinaNetMobileNetV1, RetinaNetResNet50
diff --git a/face_detection/base.py b/face_detection/base.py
index b0ec89c..73e3773 100644
--- a/face_detection/base.py
+++ b/face_detection/base.py
@@ -7,24 +7,23 @@
 
 
 def check_image(im: np.ndarray):
-    assert im.dtype == np.uint8,\
-        f"Expect image to have dtype np.uint8. Was: {im.dtype}"
-    assert len(im.shape) == 4,\
-        f"Expected image to have 4 dimensions. got: {im.shape}"
-    assert im.shape[-1] == 3,\
+    assert im.dtype == np.uint8, f"Expect image to have dtype np.uint8. Was: {im.dtype}"
+    assert len(im.shape) == 4, f"Expected image to have 4 dimensions. got: {im.shape}"
+    assert im.shape[-1] == 3, (
         f"Expected image to be RGB, got: {im.shape[-1]} color channels"
+    )
 
 
 class Detector(ABC):
-
     def __init__(
-            self,
-            confidence_threshold: float,
-            nms_iou_threshold: float,
-            device: torch.device,
-            max_resolution: int,
-            fp16_inference: bool,
-            clip_boxes: bool):
+        self,
+        confidence_threshold: float,
+        nms_iou_threshold: float,
+        device: torch.device,
+        max_resolution: int,
+        fp16_inference: bool,
+        clip_boxes: bool,
+    ):
         """
         Args:
             confidence_threshold (float): Threshold to filter out bounding boxes
@@ -40,11 +39,9 @@ def __init__(
         self.max_resolution = max_resolution
         self.fp16_inference = fp16_inference
         self.clip_boxes = clip_boxes
-        self.mean = np.array(
-            [123, 117, 104], dtype=np.float32).reshape(1, 1, 1, 3)
+        self.mean = np.array([123, 117, 104], dtype=np.float32).reshape(1, 1, 1, 3)
 
-    def detect(
-            self, image: np.ndarray, shrink=1.0) -> np.ndarray:
+    def detect(self, image: np.ndarray, shrink=1.0) -> np.ndarray:
         """Takes an RGB image and performs and returns a set of bounding boxes as
             detections
         Args:
@@ -77,7 +74,7 @@ def filter_boxes(self, boxes: torch.Tensor) -> typing.List[np.ndarray]:
         """
         final_output = []
         for i in range(len(boxes)):
-            scores = boxes[i, :,  4]
+            scores = boxes[i, :, 4]
             keep_idx = scores >= self.confidence_threshold
             boxes_ = boxes[i, keep_idx, :-1]
             scores = scores[keep_idx]
@@ -99,7 +96,7 @@ def resize(self, image, shrink: float):
         shrink_factor = self.max_resolution / max((height, width))
         if shrink_factor <= shrink:
             shrink = shrink_factor
-        size = (int(height*shrink), int(width*shrink))
+        size = (int(height * shrink), int(width * shrink))
         image = torch.nn.functional.interpolate(image, size=size)
         return image
 
@@ -130,8 +127,7 @@ def _batched_detect(self, image: np.ndarray) -> typing.List[np.ndarray]:
         return boxes
 
     @torch.no_grad()
-    def batched_detect(
-            self, image: np.ndarray, shrink=1.0) -> typing.List[np.ndarray]:
+    def batched_detect(self, image: np.ndarray, shrink=1.0) -> typing.List[np.ndarray]:
         """Takes N RGB image and performs and returns a set of bounding boxes as
             detections
         Args:
@@ -150,5 +146,6 @@ def batched_detect(
 
     def validate_detections(self, boxes: typing.List[np.ndarray]):
         for box in boxes:
-            assert np.all(box[:, 4] <= 1) and np.all(box[:, 4] >= 0),\
+            assert np.all(box[:, 4] <= 1) and np.all(box[:, 4] >= 0), (
                 f"Confidence values not valid: {box}"
+            )
diff --git a/face_detection/box_utils.py b/face_detection/box_utils.py
index 643a23a..7044cea 100644
--- a/face_detection/box_utils.py
+++ b/face_detection/box_utils.py
@@ -14,10 +14,13 @@ def batched_decode(loc, priors, variances, to_XYXY=True):
         decoded bounding box predictions
     """
     priors = priors[None]
-    boxes = torch.cat((
-        priors[:, :, :2] + loc[:, :, :2] * variances[0] * priors[:, :,  2:],
-        priors[:, :, 2:] * torch.exp(loc[:, :, 2:] * variances[1])),
-        dim=2)
+    boxes = torch.cat(
+        (
+            priors[:, :, :2] + loc[:, :, :2] * variances[0] * priors[:, :, 2:],
+            priors[:, :, 2:] * torch.exp(loc[:, :, 2:] * variances[1]),
+        ),
+        dim=2,
+    )
     if to_XYXY:
         boxes[:, :, :2] -= boxes[:, :, 2:] / 2
         boxes[:, :, 2:] += boxes[:, :, :2]
diff --git a/face_detection/build.py b/face_detection/build.py
index ebf84ac..874970e 100644
--- a/face_detection/build.py
+++ b/face_detection/build.py
@@ -2,26 +2,23 @@
 from .base import Detector
 from .torch_utils import get_device
 
-available_detectors = [
-    "DSFDDetector",
-    "RetinaNetResNet50",
-    "RetinaNetMobileNetV1"
-]
+available_detectors = ["DSFDDetector", "RetinaNetResNet50", "RetinaNetMobileNetV1"]
 DETECTOR_REGISTRY = Registry("DETECTORS")
 
 
 def build_detector(
-        name: str = "DSFDDetector",
-        confidence_threshold: float = 0.5,
-        nms_iou_threshold: float = 0.3,
-        device=get_device(),
-        max_resolution: int = None,
-        fp16_inference: bool = False,
-        clip_boxes: bool = False
-        ) -> Detector:
-    assert name in available_detectors,\
-        f"Detector not available. Chooce one of the following"+\
-        ",".join(available_detectors)
+    name: str = "DSFDDetector",
+    confidence_threshold: float = 0.5,
+    nms_iou_threshold: float = 0.3,
+    device=get_device(),
+    max_resolution: int = None,
+    fp16_inference: bool = False,
+    clip_boxes: bool = False,
+) -> Detector:
+    assert name in available_detectors, (
+        f"Detector not available. Chooce one of the following"
+        + ",".join(available_detectors)
+    )
     args = dict(
         type=name,
         confidence_threshold=confidence_threshold,
@@ -29,7 +26,7 @@ def build_detector(
         device=device,
         max_resolution=max_resolution,
         fp16_inference=fp16_inference,
-        clip_boxes=clip_boxes
+        clip_boxes=clip_boxes,
     )
     detector = build_from_cfg(args, DETECTOR_REGISTRY)
     return detector
diff --git a/face_detection/dsfd/__init__.py b/face_detection/dsfd/__init__.py
index 9121fca..63b1827 100644
--- a/face_detection/dsfd/__init__.py
+++ b/face_detection/dsfd/__init__.py
@@ -1 +1 @@
-from .detect import DSFDDetector
\ No newline at end of file
+from .detect import DSFDDetector
diff --git a/face_detection/dsfd/config.py b/face_detection/dsfd/config.py
index 96fb1e2..8388102 100644
--- a/face_detection/dsfd/config.py
+++ b/face_detection/dsfd/config.py
@@ -1,25 +1,43 @@
 resnet152_model_config = {
-    'num_classes': 2,
-    'feature_maps': [160, 80, 40, 20, 10, 5],
-    'min_dim': 640,
-    'steps': [4, 8, 16, 32, 64, 128],   # stride 
-    'variance': [0.1, 0.2],
-    'clip': True,  # make default box in [0,1]
-    'base': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', 512, 512, 512] , 
-    'extras': [256, 'S', 512, 128, 'S', 256],
-    'mbox': [1, 1, 1, 1, 1, 1] , 
-    'min_sizes': [16, 32, 64, 128, 256, 512],
-    'max_sizes': [],
-    'aspect_ratios': [ [1.5],[1.5],[1.5],[1.5],[1.5],[1.5] ],   # [1,2]  default 1
-    'backbone': 'resnet152' , # vgg, resnet, detnet, resnet50
-    'feature_pyramid_network':True ,
-    'bottom_up_path': False ,
-    'feature_enhance_module': True ,
-    'max_in_out': True , 
-    'focal_loss': False ,
-    'progressive_anchor': True ,
-    'refinedet': False ,   
-    'max_out': False , 
-    'anchor_compensation': False , 
-    'data_anchor_sampling': False ,
+    "num_classes": 2,
+    "feature_maps": [160, 80, 40, 20, 10, 5],
+    "min_dim": 640,
+    "steps": [4, 8, 16, 32, 64, 128],  # stride
+    "variance": [0.1, 0.2],
+    "clip": True,  # make default box in [0,1]
+    "base": [
+        64,
+        64,
+        "M",
+        128,
+        128,
+        "M",
+        256,
+        256,
+        256,
+        "C",
+        512,
+        512,
+        512,
+        "M",
+        512,
+        512,
+        512,
+    ],
+    "extras": [256, "S", 512, 128, "S", 256],
+    "mbox": [1, 1, 1, 1, 1, 1],
+    "min_sizes": [16, 32, 64, 128, 256, 512],
+    "max_sizes": [],
+    "aspect_ratios": [[1.5], [1.5], [1.5], [1.5], [1.5], [1.5]],  # [1,2]  default 1
+    "backbone": "resnet152",  # vgg, resnet, detnet, resnet50
+    "feature_pyramid_network": True,
+    "bottom_up_path": False,
+    "feature_enhance_module": True,
+    "max_in_out": True,
+    "focal_loss": False,
+    "progressive_anchor": True,
+    "refinedet": False,
+    "max_out": False,
+    "anchor_compensation": False,
+    "data_anchor_sampling": False,
 }
diff --git a/face_detection/dsfd/detect.py b/face_detection/dsfd/detect.py
index c25f5f1..350764a 100644
--- a/face_detection/dsfd/detect.py
+++ b/face_detection/dsfd/detect.py
@@ -13,21 +13,21 @@
 
 @DETECTOR_REGISTRY.register_module
 class DSFDDetector(Detector):
-
-    def __init__(
-            self, *args, **kwargs):
+    def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         state_dict = load_state_dict_from_url(
-            model_url,
-            map_location=self.device,
-            progress=True)
+            model_url, map_location=self.device, progress=True
+        )
         self.net = SSD(resnet152_model_config)
         self.net.load_state_dict(state_dict)
         self.net.eval()
         self.net = self.net.to(self.device)
 
     @torch.no_grad()
-    def _detect(self, x: torch.Tensor,) -> typing.List[np.ndarray]:
+    def _detect(
+        self,
+        x: torch.Tensor,
+    ) -> typing.List[np.ndarray]:
         """Batched detect
         Args:
             image (np.ndarray): shape [N, H, W, 3]
@@ -37,7 +37,5 @@ def _detect(self, x: torch.Tensor,) -> typing.List[np.ndarray]:
         # Expects BGR
         x = x[:, [2, 1, 0], :, :]
         with torch.cuda.amp.autocast(enabled=self.fp16_inference):
-            boxes = self.net(
-                x, self.confidence_threshold, self.nms_iou_threshold
-            )
+            boxes = self.net(x, self.confidence_threshold, self.nms_iou_threshold)
         return boxes
diff --git a/face_detection/dsfd/face_ssd.py b/face_detection/dsfd/face_ssd.py
index 7f3bec9..ee1ac2f 100644
--- a/face_detection/dsfd/face_ssd.py
+++ b/face_detection/dsfd/face_ssd.py
@@ -7,14 +7,13 @@
 
 
 class FEM(nn.Module):
-
     def __init__(self, channel_size):
         super(FEM, self).__init__()
         self.cs = channel_size
         self.cpm1 = nn.Conv2d(self.cs, 256, kernel_size=3, padding=1)
         self.cpm2 = nn.Conv2d(self.cs, 256, kernel_size=3, dilation=2, padding=2)
         self.cpm3 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
-        self.cpm4 = nn.Conv2d(256, 128, kernel_size=3, dilation=2,  padding=2)
+        self.cpm4 = nn.Conv2d(256, 128, kernel_size=3, dilation=2, padding=2)
         self.cpm5 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
 
     def forward(self, x):
@@ -46,13 +45,13 @@ class SSD(nn.Module):
 
     def __init__(self, cfg):
         super(SSD, self).__init__()
-        self.num_classes = 2 # Background and face
+        self.num_classes = 2  # Background and face
         self.cfg = cfg
 
         resnet = torchvision.models.resnet152(pretrained=False)
         self.layer1 = nn.Sequential(
-            resnet.conv1, resnet.bn1, resnet.relu,
-            resnet.maxpool, resnet.layer1)
+            resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1
+        )
         self.layer2 = nn.Sequential(resnet.layer2)
         self.layer3 = nn.Sequential(resnet.layer3)
         self.layer4 = nn.Sequential(resnet.layer4)
@@ -62,7 +61,7 @@ def __init__(self, cfg):
             nn.ReLU(inplace=True),
             nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=2),
             nn.BatchNorm2d(512),
-            nn.ReLU(inplace=True)
+            nn.ReLU(inplace=True),
         )
         self.layer6 = nn.Sequential(
             nn.Conv2d(512, 128, kernel_size=1),
@@ -70,7 +69,7 @@ def __init__(self, cfg):
             nn.ReLU(inplace=True),
             nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2),
             nn.BatchNorm2d(256),
-            nn.ReLU(inplace=True)
+            nn.ReLU(inplace=True),
         )
 
         output_channels = [256, 512, 1024, 2048, 512, 256]
@@ -95,7 +94,7 @@ def __init__(self, cfg):
         self.cpm6_2 = FEM(cpm_in[4])
         self.cpm7_2 = FEM(cpm_in[5])
 
-        head = pa_multibox(output_channels, self.cfg['mbox'], self.num_classes)  
+        head = pa_multibox(output_channels, self.cfg["mbox"], self.num_classes)
         self.loc = nn.ModuleList(head[0])
         self.conf = nn.ModuleList(head[1])
 
@@ -103,14 +102,21 @@ def __init__(self, cfg):
         self.softmax = nn.Softmax(dim=-1)
 
         # Cache to stop computing new priors per fowrard pass
-        self.prior_cache = {
-        }
+        self.prior_cache = {}
 
     def init_priors(self, feature_maps, image_size):
-
         # Hacky key system, but works....
-        key = ".".join([str(item) for i in range(len(feature_maps)) for item in feature_maps[i]]) + \
-              "," + ".".join([str(_) for _ in image_size])
+        key = (
+            ".".join(
+                [
+                    str(item)
+                    for i in range(len(feature_maps))
+                    for item in feature_maps[i]
+                ]
+            )
+            + ","
+            + ".".join([str(_) for _ in image_size])
+        )
         if key in self.prior_cache:
             return self.prior_cache[key].clone()
 
@@ -150,13 +156,10 @@ def forward(self, x, confidence_threshold, nms_threshold):
         conv6_2_x = self.layer5(fc7_x)
         conv7_2_x = self.layer6(conv6_2_x)
 
-        # FPN              
-        lfpn3 = self._upsample_product(
-            self.latlayer3(fc7_x), self.smooth3(conv5_3_x))
-        lfpn2 = self._upsample_product(
-            self.latlayer2(lfpn3), self.smooth2(conv4_3_x))
-        lfpn1 = self._upsample_product(
-            self.latlayer1(lfpn2), self.smooth1(conv3_3_x))
+        # FPN
+        lfpn3 = self._upsample_product(self.latlayer3(fc7_x), self.smooth3(conv5_3_x))
+        lfpn2 = self._upsample_product(self.latlayer2(lfpn3), self.smooth2(conv4_3_x))
+        lfpn1 = self._upsample_product(self.latlayer1(lfpn2), self.smooth1(conv3_3_x))
 
         conv5_3_x = lfpn3
         conv4_3_x = lfpn2
@@ -168,12 +171,13 @@ def forward(self, x, confidence_threshold, nms_threshold):
             self.cpm5_3(conv5_3_x),
             self.cpm7(fc7_x),
             self.cpm6_2(conv6_2_x),
-            self.cpm7_2(conv7_2_x)]
+            self.cpm7_2(conv7_2_x),
+        ]
         # Feature Enhance Module
         # apply multibox head to source layers
 
         featuremap_size = []
-        for (x, l, c) in zip(sources, self.loc, self.conf):
+        for x, l, c in zip(sources, self.loc, self.conf):
             featuremap_size.append([x.shape[2], x.shape[3]])
             loc.append(l(x).permute(0, 2, 3, 1).contiguous())
 
@@ -183,23 +187,23 @@ def forward(self, x, confidence_threshold, nms_threshold):
 
             conf.append(out.permute(0, 2, 3, 1).contiguous())
         # Progressive Anchor
-        mbox_num = self.cfg['mbox'][0]
-        face_loc = torch.cat([
-            o[:, :, :, :4*mbox_num].contiguous().view(o.size(0), -1)
-            for o in loc], dim=1)
-        face_conf = torch.cat([
-            o[:, :, :, :2*mbox_num].contiguous().view(o.size(0), -1)
-            for o in conf], dim=1)
+        mbox_num = self.cfg["mbox"][0]
+        face_loc = torch.cat(
+            [o[:, :, :, : 4 * mbox_num].contiguous().view(o.size(0), -1) for o in loc],
+            dim=1,
+        )
+        face_conf = torch.cat(
+            [o[:, :, :, : 2 * mbox_num].contiguous().view(o.size(0), -1) for o in conf],
+            dim=1,
+        )
         # Test Phase
         self.priors = self.init_priors(featuremap_size, image_size)
         self.priors = self.priors.to(face_conf.device)
-        conf_preds = face_conf.view(
-            face_conf.size(0), -1, self.num_classes).softmax(dim=-1)
-        face_loc = face_loc.view(face_loc.size(0), -1, 4)
-        boxes = batched_decode(
-            face_loc, self.priors,
-            self.cfg["variance"]
+        conf_preds = face_conf.view(face_conf.size(0), -1, self.num_classes).softmax(
+            dim=-1
         )
+        face_loc = face_loc.view(face_loc.size(0), -1, 4)
+        boxes = batched_decode(face_loc, self.priors, self.cfg["variance"])
         scores = conf_preds.view(-1, self.priors.shape[0], 2)[:, :, 1:]
         output = torch.cat((boxes, scores), dim=-1)
         return output
@@ -214,12 +218,11 @@ def mio_module(self, each_mmbox, len_conf):
         if len(chunk) == 6:
             out = torch.cat([out, chunk[4], chunk[5]], dim=1)
         elif len(chunk) == 8:
-            out = torch.cat(
-                [out, chunk[4], chunk[5], chunk[6], chunk[7]], dim=1)
+            out = torch.cat([out, chunk[4], chunk[5], chunk[6], chunk[7]], dim=1)
         return out
 
     def _upsample_product(self, x, y):
-        '''Upsample and add two feature maps.
+        """Upsample and add two feature maps.
         Args:
           x: (Variable) top feature map to be upsampled.
           y: (Variable) lateral feature map.
@@ -233,11 +236,12 @@ def _upsample_product(self, x, y):
         conv2d feature map size: [N,_,8,8] ->
         upsampled feature map size: [N,_,16,16]
         So we choose bilinear upsample which supports arbitrary output sizes.
-        '''
+        """
         # Deprecation warning. align_corners=False default in 0.4.0, but in 0.3.0 it was True
         # Original code was written in 0.3.1, I guess this is correct.
         return y * F.interpolate(
-            x, size=y.shape[2:], mode="bilinear", align_corners=True)
+            x, size=y.shape[2:], mode="bilinear", align_corners=True
+        )
 
 
 class DeepHeadModule(nn.Module):
@@ -248,13 +252,19 @@ def __init__(self, input_channels, output_channels):
         self._mid_channels = min(self._input_channels, 256)
 
         self.conv1 = nn.Conv2d(
-            self._input_channels, self._mid_channels, kernel_size=3, padding=1)
+            self._input_channels, self._mid_channels, kernel_size=3, padding=1
+        )
         self.conv2 = nn.Conv2d(
-            self._mid_channels, self._mid_channels, kernel_size=3, padding=1)
+            self._mid_channels, self._mid_channels, kernel_size=3, padding=1
+        )
         self.conv3 = nn.Conv2d(
-            self._mid_channels, self._mid_channels, kernel_size=3, padding=1)
+            self._mid_channels, self._mid_channels, kernel_size=3, padding=1
+        )
         self.conv4 = nn.Conv2d(
-            self._mid_channels, self._output_channels, kernel_size=1,)
+            self._mid_channels,
+            self._output_channels,
+            kernel_size=1,
+        )
 
     def forward(self, x):
         out = self.conv1(x).relu()
@@ -278,10 +288,6 @@ def pa_multibox(output_channels, mbox_cfg, num_classes):
         else:
             loc_output = 12
             conf_output = 6
-        loc_layers += [
-            DeepHeadModule(input_channels, mbox_cfg[k] * loc_output)]
-        conf_layers += [
-            DeepHeadModule(input_channels, mbox_cfg[k] * (2+conf_output))]
+        loc_layers += [DeepHeadModule(input_channels, mbox_cfg[k] * loc_output)]
+        conf_layers += [DeepHeadModule(input_channels, mbox_cfg[k] * (2 + conf_output))]
     return (loc_layers, conf_layers)
-
-
diff --git a/face_detection/dsfd/utils.py b/face_detection/dsfd/utils.py
index 30e7733..8690554 100644
--- a/face_detection/dsfd/utils.py
+++ b/face_detection/dsfd/utils.py
@@ -6,22 +6,23 @@ class PriorBox(object):
     """Compute priorbox coordinates in center-offset form for each source
     feature map.
     """
+
     def __init__(self, cfg, image_size, feature_maps):
         super(PriorBox, self).__init__()
         self.image_size = image_size
         self.feature_maps = feature_maps
         # number of priors for feature map location (either 4 or 6)
-        self.num_priors = len(cfg['aspect_ratios'])
-        self.variance = cfg['variance'] or [0.1]
+        self.num_priors = len(cfg["aspect_ratios"])
+        self.variance = cfg["variance"] or [0.1]
         self.min_sizes = cfg["min_sizes"]
 
         self.max_sizes = cfg["max_sizes"]
-        self.steps = cfg['steps']
-        self.aspect_ratios = cfg['aspect_ratios']
-        self.clip = cfg['clip']
+        self.steps = cfg["steps"]
+        self.aspect_ratios = cfg["aspect_ratios"]
+        self.clip = cfg["clip"]
         for v in self.variance:
             if v <= 0:
-                raise ValueError('Variances must be greater than 0')
+                raise ValueError("Variances must be greater than 0")
 
     def forward(self):
         mean = []
@@ -36,7 +37,6 @@ def forward(self):
         for k, f in enumerate(self.feature_maps):
             for i in range(f[0]):
                 for j in range(f[1]):
-
                     f_k_i = self.image_size[0] / self.steps[k]
                     f_k_j = self.image_size[1] / self.steps[k]
 
@@ -46,8 +46,8 @@ def forward(self):
 
                     # aspect_ratio: 1
                     # rel size: min_size
-                    s_k_i = self.min_sizes[k]/self.image_size[1]
-                    s_k_j = self.min_sizes[k]/self.image_size[0]
+                    s_k_i = self.min_sizes[k] / self.image_size[1]
+                    s_k_j = self.min_sizes[k] / self.image_size[0]
 
                     if len(self.aspect_ratios[0]) == 0:
                         mean += [cx, cy, s_k_i, s_k_j]
@@ -56,15 +56,24 @@ def forward(self):
                     # rel size: sqrt(s_k * s_(k+1))
 
                     if len(self.max_sizes) == len(self.min_sizes):
-                        s_k_prime_i = math.sqrt(s_k_i * (self.max_sizes[k] / self.image_size[1]))
-                        s_k_prime_j = math.sqrt(s_k_j * (self.max_sizes[k] / self.image_size[0]))    
+                        s_k_prime_i = math.sqrt(
+                            s_k_i * (self.max_sizes[k] / self.image_size[1])
+                        )
+                        s_k_prime_j = math.sqrt(
+                            s_k_j * (self.max_sizes[k] / self.image_size[0])
+                        )
                         mean += [cx, cy, s_k_prime_i, s_k_prime_j]
 
                     # rest of aspect ratios
                     for ar in self.aspect_ratios[k]:
                         if len(self.max_sizes) == len(self.min_sizes):
-                            mean += [cx, cy, s_k_prime_i/math.sqrt(ar), s_k_prime_j*math.sqrt(ar)]
-                        mean += [cx, cy, s_k_i/math.sqrt(ar), s_k_j*math.sqrt(ar)]
+                            mean += [
+                                cx,
+                                cy,
+                                s_k_prime_i / math.sqrt(ar),
+                                s_k_prime_j * math.sqrt(ar),
+                            ]
+                        mean += [cx, cy, s_k_i / math.sqrt(ar), s_k_j * math.sqrt(ar)]
 
         # back to torch land
         output = torch.Tensor(mean).view(-1, 4)
diff --git a/face_detection/registry.py b/face_detection/registry.py
index a94e475..35cc2bf 100644
--- a/face_detection/registry.py
+++ b/face_detection/registry.py
@@ -2,14 +2,14 @@
 
 
 class Registry(object):
-
     def __init__(self, name):
         self._name = name
         self._module_dict = dict()
 
     def __repr__(self):
-        format_str = self.__class__.__name__ + '(name={}, items={})'.format(
-            self._name, list(self._module_dict.keys()))
+        format_str = self.__class__.__name__ + "(name={}, items={})".format(
+            self._name, list(self._module_dict.keys())
+        )
         return format_str
 
     @property
@@ -23,8 +23,7 @@ def module_dict(self):
     def get(self, key):
         obj = self._module_dict.get(key, None)
         if obj is None:
-            raise KeyError(
-                f'{key} is not in the {self._name} registry.')
+            raise KeyError(f"{key} is not in the {self._name} registry.")
         return obj
 
     def _register_module(self, module_class, force=False):
@@ -33,12 +32,14 @@ def _register_module(self, module_class, force=False):
             module (:obj:`nn.Module`): Module to be registered.
         """
         if not isinstance(module_class, type):
-            raise TypeError('module must be a class, but got {}'.format(
-                type(module_class)))
+            raise TypeError(
+                "module must be a class, but got {}".format(type(module_class))
+            )
         module_name = module_class.__name__
         if not force and module_name in self._module_dict:
-            raise KeyError('{} is already registered in {}'.format(
-                module_name, self.name))
+            raise KeyError(
+                "{} is already registered in {}".format(module_name, self.name)
+            )
         self._module_dict[module_name] = module_class
 
     def register_module(self, cls=None, force=False):
@@ -57,17 +58,19 @@ def build_from_cfg(cfg, registry, **kwargs):
     Returns:
         obj: The constructed object.
     """
-    assert isinstance(cfg, dict) and 'type' in cfg
+    assert isinstance(cfg, dict) and "type" in cfg
     args = cfg.copy()
-    obj_type = args.pop('type')
+    obj_type = args.pop("type")
     if isinstance(obj_type, str):
         obj_cls = registry.get(obj_type)
         if obj_cls is None:
-            raise KeyError('{} is not in the {} registry'.format(
-                obj_type, registry.name))
+            raise KeyError(
+                "{} is not in the {} registry".format(obj_type, registry.name)
+            )
     elif isinstance(obj_type, type):
         obj_cls = obj_type
     else:
-        raise TypeError('type must be a str or valid type, but got {}'.format(
-            type(obj_type)))
+        raise TypeError(
+            "type must be a str or valid type, but got {}".format(type(obj_type))
+        )
     return obj_cls(**args, **kwargs)
diff --git a/face_detection/retinaface/__init__.py b/face_detection/retinaface/__init__.py
index 8c1a129..ed589e8 100644
--- a/face_detection/retinaface/__init__.py
+++ b/face_detection/retinaface/__init__.py
@@ -1,3 +1,3 @@
 # Adapted from https://github.com/biubug6/Pytorch_Retinaface
 # Original license: MIT
-from .detect import RetinaNetMobileNetV1, RetinaNetResNet50
\ No newline at end of file
+from .detect import RetinaNetMobileNetV1, RetinaNetResNet50
diff --git a/face_detection/retinaface/config.py b/face_detection/retinaface/config.py
index 2a9c1ba..772556f 100644
--- a/face_detection/retinaface/config.py
+++ b/face_detection/retinaface/config.py
@@ -2,42 +2,41 @@
 # Original license: MIT
 
 cfg_mnet = {
-    'name': 'mobilenet0.25',
-    'min_sizes': [[16, 32], [64, 128], [256, 512]],
-    'steps': [8, 16, 32],
-    'variance': [0.1, 0.2],
-    'clip': False,
-    'loc_weight': 2.0,
-    'gpu_train': True,
-    'batch_size': 32,
-    'ngpu': 1,
-    'epoch': 250,
-    'decay1': 190,
-    'decay2': 220,
-    'image_size': 640,
-    'pretrain': True,
-    'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
-    'in_channel': 32,
-    'out_channel': 64
+    "name": "mobilenet0.25",
+    "min_sizes": [[16, 32], [64, 128], [256, 512]],
+    "steps": [8, 16, 32],
+    "variance": [0.1, 0.2],
+    "clip": False,
+    "loc_weight": 2.0,
+    "gpu_train": True,
+    "batch_size": 32,
+    "ngpu": 1,
+    "epoch": 250,
+    "decay1": 190,
+    "decay2": 220,
+    "image_size": 640,
+    "pretrain": True,
+    "return_layers": {"stage1": 1, "stage2": 2, "stage3": 3},
+    "in_channel": 32,
+    "out_channel": 64,
 }
 
 cfg_re50 = {
-    'name': 'Resnet50',
-    'min_sizes': [[16, 32], [64, 128], [256, 512]],
-    'steps': [8, 16, 32],
-    'variance': [0.1, 0.2],
-    'clip': False,
-    'loc_weight': 2.0,
-    'gpu_train': True,
-    'batch_size': 24,
-    'ngpu': 4,
-    'epoch': 100,
-    'decay1': 70,
-    'decay2': 90,
-    'image_size': 840,
-    'pretrain': True,
-    'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3},
-    'in_channel': 256,
-    'out_channel': 256
+    "name": "Resnet50",
+    "min_sizes": [[16, 32], [64, 128], [256, 512]],
+    "steps": [8, 16, 32],
+    "variance": [0.1, 0.2],
+    "clip": False,
+    "loc_weight": 2.0,
+    "gpu_train": True,
+    "batch_size": 24,
+    "ngpu": 4,
+    "epoch": 100,
+    "decay1": 70,
+    "decay2": 90,
+    "image_size": 840,
+    "pretrain": True,
+    "return_layers": {"layer2": 1, "layer3": 2, "layer4": 3},
+    "in_channel": 256,
+    "out_channel": 256,
 }
-
diff --git a/face_detection/retinaface/detect.py b/face_detection/retinaface/detect.py
index b3fe62f..47ea947 100644
--- a/face_detection/retinaface/detect.py
+++ b/face_detection/retinaface/detect.py
@@ -16,25 +16,20 @@
 
 
 class RetinaNetDetector(Detector):
-
-    def __init__(
-            self,
-            model: str,
-            *args,
-            **kwargs):
+    def __init__(self, model: str, *args, **kwargs):
         super().__init__(*args, **kwargs)
         if model == "mobilenet":
             cfg = cfg_mnet
             state_dict = load_state_dict_from_url(
                 "https://raw.githubusercontent.com/hukkelas/DSFD-Pytorch-Inference/master/RetinaFace_mobilenet025.pth",
-                map_location=torch_utils.get_device()
+                map_location=torch_utils.get_device(),
             )
         else:
             assert model == "resnet50"
             cfg = cfg_re50
             state_dict = load_state_dict_from_url(
                 "https://api.loke.aws.unit.no/dlr-gui-backend-resources-content/v2/contents/links/8dd81669-eb84-4520-8173-dbe49d72f44cb2eef6da-3983-4a12-9085-d11555b93842c19bdf27-b924-4214-9381-e6cac30b87cf",
-                map_location=torch_utils.get_device()
+                map_location=torch_utils.get_device(),
             )
             state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
         net = RetinaFace(cfg=cfg)
@@ -46,7 +41,8 @@ def __init__(
         self.prior_box_cache = {}
 
     def batched_detect_with_landmarks(
-            self, image: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]:
+        self, image: np.ndarray
+    ) -> typing.Tuple[np.ndarray, np.ndarray]:
         """Takes N images and performs and returns a set of bounding boxes as
             detections
         Args:
@@ -75,8 +71,7 @@ def batched_detect_with_landmarks(
             scores_ = scores_[keep_idx]
             landms_ = landms_[keep_idx]
             # Non maxima suppression
-            keep_idx = nms(
-                boxes_, scores_, self.nms_iou_threshold)
+            keep_idx = nms(boxes_, scores_, self.nms_iou_threshold)
             boxes_ = boxes_[keep_idx]
             scores_ = scores_[keep_idx]
             landms_ = landms_[keep_idx]
@@ -91,16 +86,13 @@ def batched_detect_with_landmarks(
             landms_ = landms_.cpu().numpy().reshape(-1, 5, 2)
             landms_[:, :, 0] *= width
             landms_[:, :, 1] *= height
-            dets = torch.cat(
-                (boxes_, scores_.view(-1, 1)), dim=1).cpu().numpy()
+            dets = torch.cat((boxes_, scores_.view(-1, 1)), dim=1).cpu().numpy()
             final_output_box.append(dets)
             final_output_landmarks.append(landms_)
         return final_output_box, final_output_landmarks
 
     @torch.no_grad()
-    def _detect(
-            self, image: np.ndarray,
-            return_landmarks=False) -> np.ndarray:
+    def _detect(self, image: np.ndarray, return_landmarks=False) -> np.ndarray:
         """Batched detect
         Args:
             image (np.ndarray): shape [N, H, W, 3]
@@ -115,29 +107,26 @@ def _detect(
             if image.shape[2:] in self.prior_box_cache:
                 priors = self.prior_box_cache[image.shape[2:]]
             else:
-                priorbox = PriorBox(
-                    self.cfg, image_size=(height, width))
+                priorbox = PriorBox(self.cfg, image_size=(height, width))
                 priors = priorbox.forward()
                 self.prior_box_cache[image.shape[2:]] = priors
             priors = torch_utils.to_cuda(priors, self.device)
             prior_data = priors.data
-            boxes = batched_decode(loc, prior_data, self.cfg['variance'])
+            boxes = batched_decode(loc, prior_data, self.cfg["variance"])
             boxes = torch.cat((boxes, scores), dim=-1)
         if return_landmarks:
-            landms = decode_landm(landms, prior_data, self.cfg['variance'])
+            landms = decode_landm(landms, prior_data, self.cfg["variance"])
             return boxes, landms
         return boxes
 
 
 @DETECTOR_REGISTRY.register_module
 class RetinaNetResNet50(RetinaNetDetector):
-
     def __init__(self, *args, **kwargs):
         super().__init__("resnet50", *args, **kwargs)
 
 
 @DETECTOR_REGISTRY.register_module
 class RetinaNetMobileNetV1(RetinaNetDetector):
-
     def __init__(self, *args, **kwargs):
         super().__init__("mobilenet", *args, **kwargs)
diff --git a/face_detection/retinaface/models/net.py b/face_detection/retinaface/models/net.py
index 777fa95..4d5d1ae 100644
--- a/face_detection/retinaface/models/net.py
+++ b/face_detection/retinaface/models/net.py
@@ -9,7 +9,7 @@ def conv_bn(inp, oup, stride=1, leaky=0):
     return nn.Sequential(
         nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
         nn.BatchNorm2d(oup),
-        nn.LeakyReLU(negative_slope=leaky, inplace=True)
+        nn.LeakyReLU(negative_slope=leaky, inplace=True),
     )
 
 
@@ -24,7 +24,7 @@ def conv_bn1X1(inp, oup, stride=1, leaky=0):
     return nn.Sequential(
         nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
         nn.BatchNorm2d(oup),
-        nn.LeakyReLU(negative_slope=leaky, inplace=True)
+        nn.LeakyReLU(negative_slope=leaky, inplace=True),
     )
 
 
@@ -33,7 +33,6 @@ def conv_dw(inp, oup, stride, leaky=0.1):
         nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
         nn.BatchNorm2d(inp),
         nn.LeakyReLU(negative_slope=leaky, inplace=True),
-
         nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
         nn.BatchNorm2d(oup),
         nn.LeakyReLU(negative_slope=leaky, inplace=True),
@@ -45,19 +44,17 @@ def __init__(self, in_channel, out_channel):
         super().__init__()
         assert out_channel % 4 == 0
         leaky = 0
-        if (out_channel <= 64):
+        if out_channel <= 64:
             leaky = 0.1
-        self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1)
+        self.conv3X3 = conv_bn_no_relu(in_channel, out_channel // 2, stride=1)
 
-        self.conv5X5_1 = conv_bn(
-            in_channel, out_channel//4, stride=1, leaky=leaky)
-        self.conv5X5_2 = conv_bn_no_relu(
-            out_channel//4, out_channel//4, stride=1)
+        self.conv5X5_1 = conv_bn(in_channel, out_channel // 4, stride=1, leaky=leaky)
+        self.conv5X5_2 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1)
 
         self.conv7X7_2 = conv_bn(
-            out_channel//4, out_channel//4, stride=1, leaky=leaky)
-        self.conv7x7_3 = conv_bn_no_relu(
-            out_channel//4, out_channel//4, stride=1)
+            out_channel // 4, out_channel // 4, stride=1, leaky=leaky
+        )
+        self.conv7x7_3 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1)
 
     def forward(self, input_):
         conv3X3 = self.conv3X3(input_)
@@ -77,14 +74,11 @@ class FPN(nn.Module):
     def __init__(self, in_channels_list, out_channels):
         super().__init__()
         leaky = 0
-        if (out_channels <= 64):
+        if out_channels <= 64:
             leaky = 0.1
-        self.output1 = conv_bn1X1(
-            in_channels_list[0], out_channels, leaky=leaky)
-        self.output2 = conv_bn1X1(
-            in_channels_list[1], out_channels, leaky=leaky)
-        self.output3 = conv_bn1X1(
-            in_channels_list[2], out_channels, leaky=leaky)
+        self.output1 = conv_bn1X1(in_channels_list[0], out_channels, leaky=leaky)
+        self.output2 = conv_bn1X1(in_channels_list[1], out_channels, leaky=leaky)
+        self.output3 = conv_bn1X1(in_channels_list[2], out_channels, leaky=leaky)
 
         self.merge1 = conv_bn(out_channels, out_channels, leaky=leaky)
         self.merge2 = conv_bn(out_channels, out_channels, leaky=leaky)
@@ -95,12 +89,14 @@ def forward(self, input_):
         output2 = self.output2(input_[1])
         output3 = self.output3(input_[2])
         up3 = F.interpolate(
-            output3, size=[int(output2.size(2)), int(output2.size(3))], mode="nearest")
+            output3, size=[int(output2.size(2)), int(output2.size(3))], mode="nearest"
+        )
         output2 = output2 + up3
         output2 = self.merge2(output2)
 
         up2 = F.interpolate(
-            output2, size=[int(output1.size(2)), int(output1.size(3))], mode="nearest")
+            output2, size=[int(output1.size(2)), int(output1.size(3))], mode="nearest"
+        )
         output1 = output1 + up2
         output1 = self.merge1(output1)
 
@@ -112,8 +108,8 @@ class MobileNetV1(nn.Module):
     def __init__(self):
         super(MobileNetV1, self).__init__()
         self.stage1 = nn.Sequential(
-            conv_bn(3, 8, 2, leaky=0.1),    # 3
-            conv_dw(8, 16, 1),   # 7
+            conv_bn(3, 8, 2, leaky=0.1),  # 3
+            conv_dw(8, 16, 1),  # 7
             conv_dw(16, 32, 2),  # 11
             conv_dw(32, 32, 1),  # 19
             conv_dw(32, 64, 2),  # 27
@@ -121,17 +117,17 @@ def __init__(self):
         )
         self.stage2 = nn.Sequential(
             conv_dw(64, 128, 2),  # 43 + 16 = 59
-            conv_dw(128, 128, 1), # 59 + 32 = 91
-            conv_dw(128, 128, 1), # 91 + 32 = 123
-            conv_dw(128, 128, 1), # 123 + 32 = 155
-            conv_dw(128, 128, 1), # 155 + 32 = 187
-            conv_dw(128, 128, 1), # 187 + 32 = 219
+            conv_dw(128, 128, 1),  # 59 + 32 = 91
+            conv_dw(128, 128, 1),  # 91 + 32 = 123
+            conv_dw(128, 128, 1),  # 123 + 32 = 155
+            conv_dw(128, 128, 1),  # 155 + 32 = 187
+            conv_dw(128, 128, 1),  # 187 + 32 = 219
         )
         self.stage3 = nn.Sequential(
-            conv_dw(128, 256, 2), # 219 +3 2 = 241
-            conv_dw(256, 256, 1), # 241 + 64 = 301
+            conv_dw(128, 256, 2),  # 219 +3 2 = 241
+            conv_dw(256, 256, 1),  # 241 + 64 = 301
         )
-        self.avg = nn.AdaptiveAvgPool2d((1,1))
+        self.avg = nn.AdaptiveAvgPool2d((1, 1))
         self.fc = nn.Linear(256, 1000)
 
     def forward(self, x):
diff --git a/face_detection/retinaface/models/retinaface.py b/face_detection/retinaface/models/retinaface.py
index 77fb358..71796a6 100644
--- a/face_detection/retinaface/models/retinaface.py
+++ b/face_detection/retinaface/models/retinaface.py
@@ -10,9 +10,7 @@ class ClassHead(nn.Module):
     def __init__(self, inchannels=512, num_anchors=3):
         super().__init__()
         self.num_anchors = num_anchors
-        self.conv1x1 = nn.Conv2d(
-            inchannels, self.num_anchors*2,
-            kernel_size=1)
+        self.conv1x1 = nn.Conv2d(inchannels, self.num_anchors * 2, kernel_size=1)
 
     def forward(self, x):
         out = self.conv1x1(x)
@@ -24,9 +22,7 @@ def forward(self, x):
 class BboxHead(nn.Module):
     def __init__(self, inchannels=512, num_anchors=3):
         super().__init__()
-        self.conv1x1 = nn.Conv2d(
-            inchannels, num_anchors*4,
-            kernel_size=1)
+        self.conv1x1 = nn.Conv2d(inchannels, num_anchors * 4, kernel_size=1)
 
     def forward(self, x):
         out = self.conv1x1(x)
@@ -38,8 +34,7 @@ def forward(self, x):
 class LandmarkHead(nn.Module):
     def __init__(self, inchannels=512, num_anchors=3):
         super().__init__()
-        self.conv1x1 = nn.Conv2d(
-            inchannels, num_anchors*10, kernel_size=1)
+        self.conv1x1 = nn.Conv2d(inchannels, num_anchors * 10, kernel_size=1)
 
     def forward(self, x):
         out = self.conv1x1(x)
@@ -56,35 +51,38 @@ def __init__(self, cfg):
         """
         super().__init__()
         backbone = None
-        if cfg['name'] == 'mobilenet0.25':
+        if cfg["name"] == "mobilenet0.25":
             backbone = MobileNetV1()
-        elif cfg['name'] == 'Resnet50':
+        elif cfg["name"] == "Resnet50":
             import torchvision.models as models
+
             backbone = models.resnet50(pretrained=False)
 
-        self.body = _utils.IntermediateLayerGetter(backbone, cfg['return_layers'])
-        in_channels_stage2 = cfg['in_channel']
+        self.body = _utils.IntermediateLayerGetter(backbone, cfg["return_layers"])
+        in_channels_stage2 = cfg["in_channel"]
         in_channels_list = [
             in_channels_stage2 * 2,
             in_channels_stage2 * 4,
             in_channels_stage2 * 8,
         ]
-        out_channels = cfg['out_channel']
+        out_channels = cfg["out_channel"]
         self.fpn = FPN(in_channels_list, out_channels)
         self.ssh1 = SSH(out_channels, out_channels)
         self.ssh2 = SSH(out_channels, out_channels)
         self.ssh3 = SSH(out_channels, out_channels)
 
-        self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg['out_channel'])
-        self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg['out_channel'])
-        self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=cfg['out_channel'])
+        self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg["out_channel"])
+        self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg["out_channel"])
+        self.LandmarkHead = self._make_landmark_head(
+            fpn_num=3, inchannels=cfg["out_channel"]
+        )
 
     def _make_class_head(self, fpn_num=3, inchannels=64, anchor_num=2):
         classhead = nn.ModuleList()
         for i in range(fpn_num):
             classhead.append(ClassHead(inchannels, anchor_num))
         return classhead
-    
+
     def _make_bbox_head(self, fpn_num=3, inchannels=64, anchor_num=2):
         bboxhead = nn.ModuleList()
         for i in range(fpn_num):
@@ -110,10 +108,13 @@ def forward(self, inputs):
         features = [feature1, feature2, feature3]
 
         bbox_regressions = torch.cat(
-            [self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1)
+            [self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1
+        )
         classifications = torch.cat(
-            [self.ClassHead[i](feature) for i, feature in enumerate(features)],dim=1)
+            [self.ClassHead[i](feature) for i, feature in enumerate(features)], dim=1
+        )
         ldm_regressions = torch.cat(
-            [self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1)
+            [self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1
+        )
 
-        return (bbox_regressions, classifications.softmax(dim=-1), ldm_regressions)
\ No newline at end of file
+        return (bbox_regressions, classifications.softmax(dim=-1), ldm_regressions)
diff --git a/face_detection/retinaface/onnx.py b/face_detection/retinaface/onnx.py
index 1f53c04..7cdd615 100644
--- a/face_detection/retinaface/onnx.py
+++ b/face_detection/retinaface/onnx.py
@@ -13,14 +13,13 @@
 
 
 class RetinaNetDetectorONNX(torch.nn.Module):
-
     def __init__(self, input_imshape, inference_imshape):
         super().__init__()
         self.device = torch.device("cpu")
         cfg = cfg_re50
         state_dict = load_state_dict_from_url(
             "https://folk.ntnu.no/haakohu/RetinaFace_ResNet50.pth",
-            map_location=torch_utils.get_device()
+            map_location=torch_utils.get_device(),
         )
         state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
         net = RetinaFace(cfg=cfg)
@@ -28,7 +27,7 @@ def __init__(self, input_imshape, inference_imshape):
         net.load_state_dict(state_dict)
         self.net = net.to(self.device)
         self.input_imshape = input_imshape
-        self.inference_imshape = inference_imshape # (height, width)
+        self.inference_imshape = inference_imshape  # (height, width)
         self.mean = np.array([104, 117, 123], dtype=np.float32)
         self.mean = torch.from_numpy(self.mean).reshape((1, 3, 1, 1))
         self.mean = torch.nn.Parameter(self.mean).float().to(self.device)
@@ -42,7 +41,7 @@ def export_onnx(self, onnx_filepath):
             image = cv2.imread("images/0_Parade_marchingband_1_765.jpg")
         except:
             raise FileNotFoundError()
-            
+
         height, width = self.input_imshape
         image = cv2.resize(image, (width, height))
 
@@ -54,29 +53,33 @@ def export_onnx(self, onnx_filepath):
 
         output_names = ["loc"]
         torch.onnx.export(
-            self, example_inputs,
+            self,
+            example_inputs,
             onnx_filepath,
             verbose=True,
             input_names=["image"],
             output_names=output_names,
             export_params=True,
-            opset_version=10 # functional interpolate does not support opset 11+
-            )
+            opset_version=10,  # functional interpolate does not support opset 11+
+        )
         np.save(f"outputs.npy", actual_outputs)
 
     @torch.no_grad()
     def forward(self, image):
         """
-            image: shape [1, 3, H, W]
-            Exports model where outputs are NOT thresholded or performed NMS on.
+        image: shape [1, 3, H, W]
+        Exports model where outputs are NOT thresholded or performed NMS on.
         """
-        image = torch.nn.functional.interpolate(image, self.inference_imshape, mode="nearest")
+        image = torch.nn.functional.interpolate(
+            image, self.inference_imshape, mode="nearest"
+        )
         # Expects BGR
         image = image - self.mean
         assert image.shape[2] == self.inference_imshape[0]
         assert image.shape[3] == self.inference_imshape[1]
-        assert image.shape[0] == 1,\
+        assert image.shape[0] == 1, (
             "The ONNX export only supports one image at a time tensors currently"
+        )
         loc, conf, landms = self.net(image)  # forward pass
         assert conf.shape[2] == 2
         scores = conf[:, :, 1:]
diff --git a/face_detection/retinaface/prior_box.py b/face_detection/retinaface/prior_box.py
index ec9b409..86bcf47 100644
--- a/face_detection/retinaface/prior_box.py
+++ b/face_detection/retinaface/prior_box.py
@@ -9,8 +9,8 @@ def generate_prior_box(feature_maps, image_size, steps, min_sizes):
     n_anchors = 0
     for x in feature_maps:
         n_anchors += int(x[0]) * int(x[1]) * len(min_sizes[0])
-    anchors = np.empty((n_anchors*4), dtype=np.float64)
-#    print(feature_maps, image_size, steps, min_sizes)
+    anchors = np.empty((n_anchors * 4), dtype=np.float64)
+    #    print(feature_maps, image_size, steps, min_sizes)
     idx_anchor = 0
     for k, f in enumerate(feature_maps):
         min_sizes_ = min_sizes[k]
@@ -23,20 +23,25 @@ def generate_prior_box(feature_maps, image_size, steps, min_sizes):
                     dense_cy = [y * steps[k] / image_size[0] for y in [i + 0.5]]
                     for cy in dense_cy:
                         for cx in dense_cx:
-                            anchors[idx_anchor:idx_anchor+4] = [cx, cy, s_kx, s_ky]
-                            idx_anchor += 1*4
-#    assert idx_anchor == anchors.shape[0], f"{anchors.shape[0]}, {idx_anchor}"
+                            anchors[idx_anchor : idx_anchor + 4] = [cx, cy, s_kx, s_ky]
+                            idx_anchor += 1 * 4
+    #    assert idx_anchor == anchors.shape[0], f"{anchors.shape[0]}, {idx_anchor}"
     return anchors
 
 
 class PriorBox(object):
-    def __init__(self, cfg, image_size=None, phase='train'):
+    def __init__(self, cfg, image_size=None, phase="train"):
         super(PriorBox, self).__init__()
-        self.min_sizes =  np.array(cfg['min_sizes']).astype(np.int16)
-        self.steps = np.array(cfg['steps']).astype(np.int16)
-        self.clip = cfg['clip']
+        self.min_sizes = np.array(cfg["min_sizes"]).astype(np.int16)
+        self.steps = np.array(cfg["steps"]).astype(np.int16)
+        self.clip = cfg["clip"]
         self.image_size = np.array(image_size).astype(np.int16)
-        self.feature_maps = np.array([[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]).astype(np.int16)
+        self.feature_maps = np.array(
+            [
+                [ceil(self.image_size[0] / step), ceil(self.image_size[1] / step)]
+                for step in self.steps
+            ]
+        ).astype(np.int16)
         self.name = "s"
 
     def forward(self):
diff --git a/face_detection/retinaface/tensorrt_wrap.py b/face_detection/retinaface/tensorrt_wrap.py
index 106052c..a060864 100644
--- a/face_detection/retinaface/tensorrt_wrap.py
+++ b/face_detection/retinaface/tensorrt_wrap.py
@@ -14,23 +14,24 @@
 
 
 class TensorRTRetinaFace:
-
     def __init__(
-            self,
-            input_imshape,
-            inference_imshape,
-            confidence_threshold: float = 0.5,
-            nms_threshold: float = 0.3):
+        self,
+        input_imshape,
+        inference_imshape,
+        confidence_threshold: float = 0.5,
+        nms_threshold: float = 0.3,
+    ):
         self.inference_imshape = inference_imshape
         self.input_imshape = input_imshape
         self.confidence_threshold = confidence_threshold
         self.nms_threshold = nms_threshold
-        identifier = "_".join(str(x) for x in list(input_imshape) + list(inference_imshape))
+        identifier = "_".join(
+            str(x) for x in list(input_imshape) + list(inference_imshape)
+        )
         onnx_filepath = f"retinaface_input_{identifier}_.onnx"
         onnx_filepath = os.path.join(cache_dir, onnx_filepath)
         if not os.path.isfile(onnx_filepath):
-            detector = RetinaNetDetectorONNX(
-                input_imshape, inference_imshape)
+            detector = RetinaNetDetectorONNX(input_imshape, inference_imshape)
             detector.export_onnx(onnx_filepath)
         self.TRT_LOGGER = trt.Logger(trt.tensorrt.Logger.Severity.INFO)
         self.engine_path = onnx_filepath.replace(".onnx", ".trt")
@@ -45,45 +46,62 @@ def initialize_bindings(self):
             print(
                 self.engine.get_binding_name(idx),
                 self.engine.get_binding_dtype(idx),
-                self.engine.get_binding_shape(idx))
+                self.engine.get_binding_shape(idx),
+            )
             if self.engine.binding_is_input(idx):  # we expect only one input
                 input_shape = self.engine.get_binding_shape(idx)
-                input_size = trt.volume(input_shape) * self.engine.max_batch_size * np.dtype(np.float32).itemsize  # in bytes
-                self.input_bindings.append({
-                    "input_shape": input_shape,
-                    "input_size": input_size,
-                    "device_input": cuda.mem_alloc(input_size),
-                })
+                input_size = (
+                    trt.volume(input_shape)
+                    * self.engine.max_batch_size
+                    * np.dtype(np.float32).itemsize
+                )  # in bytes
+                self.input_bindings.append(
+                    {
+                        "input_shape": input_shape,
+                        "input_size": input_size,
+                        "device_input": cuda.mem_alloc(input_size),
+                    }
+                )
             else:  # and one output
                 output_shape = self.engine.get_binding_shape(idx)
-                host_output = cuda.pagelocked_empty(trt.volume(output_shape) * self.engine.max_batch_size, dtype=np.float32)
+                host_output = cuda.pagelocked_empty(
+                    trt.volume(output_shape) * self.engine.max_batch_size,
+                    dtype=np.float32,
+                )
                 device_output = cuda.mem_alloc(host_output.nbytes)
-                self.output_bindings.append({
-                    "output_shape": output_shape,
-                    "host_output": host_output,
-                    "device_output": device_output,
-                    "name": self.engine.get_binding_name(idx)
-                })
+                self.output_bindings.append(
+                    {
+                        "output_shape": output_shape,
+                        "host_output": host_output,
+                        "device_output": device_output,
+                        "name": self.engine.get_binding_name(idx),
+                    }
+                )
 
     def build_engine(self, onnx_filepath: str):
         if os.path.isfile(self.engine_path):
-            with open(self.engine_path, "rb") as f, trt.Runtime(self.TRT_LOGGER) as runtime:
+            with (
+                open(self.engine_path, "rb") as f,
+                trt.Runtime(self.TRT_LOGGER) as runtime,
+            ):
                 engine = runtime.deserialize_cuda_engine(f.read())
                 return engine
 
         builder = trt.Builder(self.TRT_LOGGER)
-        network_creation_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+        network_creation_flag = 1 << int(
+            trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH
+        )
         network = builder.create_network(network_creation_flag)
         print(network)
 
         parser = trt.OnnxParser(network, self.TRT_LOGGER)
         # parse ONNX
-        with open(onnx_filepath, 'rb') as model:
-            print('Beginning ONNX file parsing')
+        with open(onnx_filepath, "rb") as model:
+            print("Beginning ONNX file parsing")
             if not parser.parse(model.read()):
                 for error in range(parser.num_errors):
                     print(parser.get_error(error))
-        print('Completed parsing of ONNX file')
+        print("Completed parsing of ONNX file")
         builder.max_batch_size = 1
         builder.debug_sync = True
         builder.max_workspace_size = 2**34
@@ -91,7 +109,7 @@ def build_engine(self, onnx_filepath: str):
         if builder.platform_has_fast_fp16:
             builder.fp16_mode = True
 
-        print('Building an engine...')
+        print("Building an engine...")
         engine = builder.build_cuda_engine(network)
         print("Completed creating Engine")
 
@@ -101,16 +119,13 @@ def build_engine(self, onnx_filepath: str):
 
     def run_engine(self, img):
         stream = cuda.Stream()
-        cuda.memcpy_htod_async(
-            self.input_bindings[0]["device_input"], img, stream)
-        bs = [int(x["device_input"]) for x in self.input_bindings] +\
-             [int(x["device_output"]) for x in self.output_bindings]
-        self.context.execute_async(
-            bindings=bs,
-            stream_handle=stream.handle)
+        cuda.memcpy_htod_async(self.input_bindings[0]["device_input"], img, stream)
+        bs = [int(x["device_input"]) for x in self.input_bindings] + [
+            int(x["device_output"]) for x in self.output_bindings
+        ]
+        self.context.execute_async(bindings=bs, stream_handle=stream.handle)
         for out in self.output_bindings:
-            cuda.memcpy_dtoh_async(
-                out["host_output"], out["device_output"], stream)
+            cuda.memcpy_dtoh_async(out["host_output"], out["device_output"], stream)
             out["host_output"] = out["host_output"].reshape(out["output_shape"])
         assert len(self.output_bindings) == 1
         stream.synchronize()
@@ -146,15 +161,13 @@ def infer(self, img):
     height = 720
     expected_imsize = (height, width)
     image = cv2.resize(image, (width, height))
-    detector = TensorRTRetinaFace(
-        (height, width),
-        (480, 640))
+    detector = TensorRTRetinaFace((height, width), (480, 640))
     print(detector.infer(image))
     boxes, landms, scores = detector.infer(image)
     for i in range(boxes.shape[0]):
         print(boxes[i])
         x0, y0, x1, y1 = boxes[i].astype(int)
-        image = cv2.rectangle(image, (x0, y0), (x1, y1),(255, 0, 0), 1 )
+        image = cv2.rectangle(image, (x0, y0), (x1, y1), (255, 0, 0), 1)
         for kp in landms[i]:
             image = cv2.circle(image, tuple(kp), 5, (255, 0, 0))
-    cv2.imwrite("test.png", image)
\ No newline at end of file
+    cv2.imwrite("test.png", image)
diff --git a/face_detection/retinaface/utils.py b/face_detection/retinaface/utils.py
index e4ff17b..f9794b5 100644
--- a/face_detection/retinaface/utils.py
+++ b/face_detection/retinaface/utils.py
@@ -17,12 +17,16 @@ def decode_landm(pre, priors, variances):
         decoded landm predictions
     """
     priors = priors[None]
-    landms = torch.cat((priors[:, :, :2] + pre[:, :, :2] * variances[0] * priors[:, :, 2:],
-                        priors[:, :, :2] + pre[:, :, 2:4] * variances[0] * priors[:, :, 2:],
-                        priors[:, :, :2] + pre[:, :, 4:6] * variances[0] * priors[:, :, 2:],
-                        priors[:, :, :2] + pre[:, :, 6:8] * variances[0] * priors[:, :, 2:],
-                        priors[:, :, :2] + pre[:, :, 8:10] * variances[0] * priors[:, :, 2:],
-                        ), dim=2)
+    landms = torch.cat(
+        (
+            priors[:, :, :2] + pre[:, :, :2] * variances[0] * priors[:, :, 2:],
+            priors[:, :, :2] + pre[:, :, 2:4] * variances[0] * priors[:, :, 2:],
+            priors[:, :, :2] + pre[:, :, 4:6] * variances[0] * priors[:, :, 2:],
+            priors[:, :, :2] + pre[:, :, 6:8] * variances[0] * priors[:, :, 2:],
+            priors[:, :, :2] + pre[:, :, 8:10] * variances[0] * priors[:, :, 2:],
+        ),
+        dim=2,
+    )
     return landms
 
 
@@ -34,13 +38,13 @@ def python_nms(boxes, overlapThresh):
     boxes = boxes.astype(np.float32)
     if boxes.dtype.kind == "i":
         boxes = boxes.astype("float")
-    # initialize the list of picked indexes	
+    # initialize the list of picked indexes
     keep_idx = []
     # grab the coordinates of the bounding boxes
-    x1 = boxes[:,0]
-    y1 = boxes[:,1]
-    x2 = boxes[:,2]
-    y2 = boxes[:,3]
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
     area = (x2 - x1 + 1) * (y2 - y1 + 1)
     idxs = np.argsort(y2)
     # keep looping while some indexes still remain in the indexes
@@ -64,6 +68,7 @@ def python_nms(boxes, overlapThresh):
         # compute the ratio of overlap
         overlap = (w * h) / area[idxs[:last]]
         # delete all indexes from the index list that have
-        idxs = np.delete(idxs, np.concatenate(([last],
-            np.where(overlap > overlapThresh)[0])))
+        idxs = np.delete(
+            idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0]))
+        )
     return keep_idx
diff --git a/test.py b/test.py
index b64c959..a84512f 100644
--- a/test.py
+++ b/test.py
@@ -14,25 +14,17 @@ def draw_faces(im, bboxes):
 if __name__ == "__main__":
     impaths = "images"
     impaths = glob.glob(os.path.join(impaths, "*.jpg"))
-    detector = face_detection.build_detector(
-        "DSFDDetector",
-        max_resolution=1080
-    )
+    detector = face_detection.build_detector("DSFDDetector", max_resolution=1080)
     for impath in impaths:
-        if impath.endswith("out.jpg"): continue
+        if impath.endswith("out.jpg"):
+            continue
         im = cv2.imread(impath)
         print("Processing:", impath)
         t = time.time()
-        dets = detector.detect(
-            im[:, :, ::-1]
-        )[:, :4]
-        print(f"Detection time: {time.time()- t:.3f}")
+        dets = detector.detect(im[:, :, ::-1])[:, :4]
+        print(f"Detection time: {time.time() - t:.3f}")
         draw_faces(im, dets)
         imname = os.path.basename(impath).split(".")[0]
-        output_path = os.path.join(
-            os.path.dirname(impath),
-            f"{imname}_out.jpg"
-        )
+        output_path = os.path.join(os.path.dirname(impath), f"{imname}_out.jpg")
 
         cv2.imwrite(output_path, im)
-        
\ No newline at end of file
diff --git a/tests/test_detector.py b/tests/test_detector.py
index 5e2a7a8..cf35929 100644
--- a/tests/test_detector.py
+++ b/tests/test_detector.py
@@ -3,6 +3,7 @@
 import cv2
 import face_detection  # your face detection library
 
+
 def compute_iou(boxA, boxB):
     xA = max(boxA[0], boxB[0])
     yA = max(boxA[1], boxB[1])
@@ -26,32 +27,31 @@ def compute_iou(boxA, boxB):
 
 @pytest.fixture
 def ground_truth_boxes():
-    return np.array([
-        [337.8219142,  227.30235955, 363.18236876, 260.75754449],
-        [120.61462998, 244.68149829, 153.73102021, 290.13813281],
-        [793.31824303,  88.6468603,  837.80744743, 153.03655452],
-        [499.23486614, 212.40574998, 521.46317768, 241.84556359],
-        [412.37690353, 219.29100847, 437.20971298, 250.56026506],
-        [654.66749144, 203.24960518, 676.66251707, 231.10678673],
-        [692.63414764, 248.56575656, 726.75259781, 292.49138522],
-        [215.16035197, 269.50566196, 240.76163981, 303.02491093],
-        [189.08402371, 212.22481942, 210.5982945,  240.76419282],
-        [571.04836243, 213.0569253,  590.01044816, 238.5836339],
-        [ 16.7418344,  235.77498758,  41.44155097, 265.93795145],
-        [284.28320718, 213.93544269, 304.40658212, 238.0858829],
-        [167.58154631,  76.92867303, 187.13439512, 102.97041345],
-    ])
+    return np.array(
+        [
+            [337.8219142, 227.30235955, 363.18236876, 260.75754449],
+            [120.61462998, 244.68149829, 153.73102021, 290.13813281],
+            [793.31824303, 88.6468603, 837.80744743, 153.03655452],
+            [499.23486614, 212.40574998, 521.46317768, 241.84556359],
+            [412.37690353, 219.29100847, 437.20971298, 250.56026506],
+            [654.66749144, 203.24960518, 676.66251707, 231.10678673],
+            [692.63414764, 248.56575656, 726.75259781, 292.49138522],
+            [215.16035197, 269.50566196, 240.76163981, 303.02491093],
+            [189.08402371, 212.22481942, 210.5982945, 240.76419282],
+            [571.04836243, 213.0569253, 590.01044816, 238.5836339],
+            [16.7418344, 235.77498758, 41.44155097, 265.93795145],
+            [284.28320718, 213.93544269, 304.40658212, 238.0858829],
+            [167.58154631, 76.92867303, 187.13439512, 102.97041345],
+        ]
+    )
 
-@pytest.mark.parametrize("detector_name", [
-    "DSFDDetector",
-    "RetinaNetResNet50",
-    "RetinaNetMobileNetV1"
-])
-def test_detector_detects_boxes_with_iou(detector_name,  ground_truth_boxes):
+
+@pytest.mark.parametrize(
+    "detector_name", ["DSFDDetector", "RetinaNetResNet50", "RetinaNetMobileNetV1"]
+)
+def test_detector_detects_boxes_with_iou(detector_name, ground_truth_boxes):
     detector = face_detection.build_detector(
-        detector_name,
-        max_resolution=1080,
-        confidence_threshold=0.5
+        detector_name, max_resolution=1080, confidence_threshold=0.5
     )
     impath = "images/11_Meeting_Meeting_11_Meeting_Meeting_11_176.jpg"
     img = cv2.imread(impath)
@@ -65,4 +65,3 @@ def test_detector_detects_boxes_with_iou(detector_name,  ground_truth_boxes):
             f"{detector_name} failed to detect ground truth box {gt_box} "
             f"with IoU >= 0.5"
         )
-

From ebf337f4bc46965e81405b09540c4b28b8404e5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5kon=20Hukkel=C3=A5s?= <hakon.hukkelas@ntnu.no>
Date: Mon, 9 Jun 2025 17:40:49 +0200
Subject: [PATCH 6/8] fix: ruff check

---
 .github/workflows/ruff.yml                 | 4 +++-
 face_detection/__init__.py                 | 8 ++++++++
 face_detection/build.py                    | 2 +-
 face_detection/dsfd/__init__.py            | 2 ++
 face_detection/dsfd/detect.py              | 1 -
 face_detection/dsfd/face_ssd.py            | 4 ++--
 face_detection/retinaface/__init__.py      | 2 ++
 face_detection/retinaface/onnx.py          | 4 ++--
 face_detection/retinaface/tensorrt_wrap.py | 1 -
 face_detection/torch_utils.py              | 2 +-
 10 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
index 08d51d9..90a8965 100644
--- a/.github/workflows/ruff.yml
+++ b/.github/workflows/ruff.yml
@@ -11,6 +11,8 @@ jobs:
 
       - name: Install ruff
         run: uv add ruff
-      - name: Run ruff
+      - name: Run ruff check
         run: uv run ruff check 
+      - name: Run ruff format check
+        run: uv run ruff format --check 
 
diff --git a/face_detection/__init__.py b/face_detection/__init__.py
index a4d0316..7a77d0b 100644
--- a/face_detection/__init__.py
+++ b/face_detection/__init__.py
@@ -1,3 +1,11 @@
 from .build import build_detector, available_detectors
 from .dsfd import DSFDDetector
 from .retinaface import RetinaNetMobileNetV1, RetinaNetResNet50
+
+__all__ = [
+    "build_detector",
+    "available_detectors",
+    "RetinaNetMobileNetV1",
+    "RetinaNetResNet50",
+    "DSFDDetector",
+]
diff --git a/face_detection/build.py b/face_detection/build.py
index 874970e..6ae1728 100644
--- a/face_detection/build.py
+++ b/face_detection/build.py
@@ -16,7 +16,7 @@ def build_detector(
     clip_boxes: bool = False,
 ) -> Detector:
     assert name in available_detectors, (
-        f"Detector not available. Chooce one of the following"
+        "Detector not available. Chooce one of the following"
         + ",".join(available_detectors)
     )
     args = dict(
diff --git a/face_detection/dsfd/__init__.py b/face_detection/dsfd/__init__.py
index 63b1827..bb05cfd 100644
--- a/face_detection/dsfd/__init__.py
+++ b/face_detection/dsfd/__init__.py
@@ -1 +1,3 @@
 from .detect import DSFDDetector
+
+__all__ = ["DSFDDetector"]
diff --git a/face_detection/dsfd/detect.py b/face_detection/dsfd/detect.py
index 350764a..8ec0ab0 100644
--- a/face_detection/dsfd/detect.py
+++ b/face_detection/dsfd/detect.py
@@ -3,7 +3,6 @@
 import typing
 from .face_ssd import SSD
 from .config import resnet152_model_config
-from .. import torch_utils
 from torch.hub import load_state_dict_from_url
 from ..base import Detector
 from ..build import DETECTOR_REGISTRY
diff --git a/face_detection/dsfd/face_ssd.py b/face_detection/dsfd/face_ssd.py
index ee1ac2f..5618a9b 100644
--- a/face_detection/dsfd/face_ssd.py
+++ b/face_detection/dsfd/face_ssd.py
@@ -177,9 +177,9 @@ def forward(self, x, confidence_threshold, nms_threshold):
         # apply multibox head to source layers
 
         featuremap_size = []
-        for x, l, c in zip(sources, self.loc, self.conf):
+        for x, loc, c in zip(sources, self.loc, self.conf):
             featuremap_size.append([x.shape[2], x.shape[3]])
-            loc.append(l(x).permute(0, 2, 3, 1).contiguous())
+            loc.append(loc(x).permute(0, 2, 3, 1).contiguous())
 
             # Max in out
             len_conf = len(conf)
diff --git a/face_detection/retinaface/__init__.py b/face_detection/retinaface/__init__.py
index ed589e8..f010d41 100644
--- a/face_detection/retinaface/__init__.py
+++ b/face_detection/retinaface/__init__.py
@@ -1,3 +1,5 @@
 # Adapted from https://github.com/biubug6/Pytorch_Retinaface
 # Original license: MIT
 from .detect import RetinaNetMobileNetV1, RetinaNetResNet50
+
+__all__ = ["RetinaNetMobileNetV1", "RetinaNetResNet50"]
diff --git a/face_detection/retinaface/onnx.py b/face_detection/retinaface/onnx.py
index 7cdd615..5d69065 100644
--- a/face_detection/retinaface/onnx.py
+++ b/face_detection/retinaface/onnx.py
@@ -39,7 +39,7 @@ def __init__(self, input_imshape, inference_imshape):
     def export_onnx(self, onnx_filepath):
         try:
             image = cv2.imread("images/0_Parade_marchingband_1_765.jpg")
-        except:
+        except Exception:
             raise FileNotFoundError()
 
         height, width = self.input_imshape
@@ -62,7 +62,7 @@ def export_onnx(self, onnx_filepath):
             export_params=True,
             opset_version=10,  # functional interpolate does not support opset 11+
         )
-        np.save(f"outputs.npy", actual_outputs)
+        np.save("outputs.npy", actual_outputs)
 
     @torch.no_grad()
     def forward(self, image):
diff --git a/face_detection/retinaface/tensorrt_wrap.py b/face_detection/retinaface/tensorrt_wrap.py
index a060864..d8e1dde 100644
--- a/face_detection/retinaface/tensorrt_wrap.py
+++ b/face_detection/retinaface/tensorrt_wrap.py
@@ -1,5 +1,4 @@
 import pycuda.driver as cuda
-import pycuda.autoinit
 import os
 import numpy as np
 import cv2
diff --git a/face_detection/torch_utils.py b/face_detection/torch_utils.py
index 2f891af..63ac6ea 100644
--- a/face_detection/torch_utils.py
+++ b/face_detection/torch_utils.py
@@ -4,7 +4,7 @@
 
 def to_cuda(elements, device):
     if torch.cuda.is_available():
-        if type(elements) == tuple or type(elements) == list:
+        if isinstance(elements, tuple) or isinstance(elements, list):
             return [x.to(device) for x in elements]
         return elements.to(device)
     return elements

From 04cf6ff85375f8d82a2ca4a1da467b36b142ed7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5kon=20Hukkel=C3=A5s?= <hakon.hukkelas@ntnu.no>
Date: Mon, 9 Jun 2025 18:25:27 +0200
Subject: [PATCH 7/8] fix: dynamic versioning based on git tag

---
 face_detection/dsfd/face_ssd.py | 4 ++--
 pyproject.toml                  | 7 +++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/face_detection/dsfd/face_ssd.py b/face_detection/dsfd/face_ssd.py
index 5618a9b..5059d9e 100644
--- a/face_detection/dsfd/face_ssd.py
+++ b/face_detection/dsfd/face_ssd.py
@@ -177,9 +177,9 @@ def forward(self, x, confidence_threshold, nms_threshold):
         # apply multibox head to source layers
 
         featuremap_size = []
-        for x, loc, c in zip(sources, self.loc, self.conf):
+        for x, layer, c in zip(sources, self.loc, self.conf):
             featuremap_size.append([x.shape[2], x.shape[3]])
-            loc.append(loc(x).permute(0, 2, 3, 1).contiguous())
+            loc.append(layer(x).permute(0, 2, 3, 1).contiguous())
 
             # Max in out
             len_conf = len(conf)
diff --git a/pyproject.toml b/pyproject.toml
index 8cb6c59..b21e288 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,5 @@
 [project]
 name="face_detection"
-version="0.2.1"
 description="A simple and lightweight package for state of the art face detection with GPU support."
 readme="README.md"
 requires-python=">=3.9"
@@ -13,9 +12,11 @@ dependencies = [
     "torch",
     "torchvision",
 ]
+dynamic = ["version"]  # Remove static version and add this line
 
 [build-system]
-requires = ["setuptools", "torch"]
+requires = ["setuptools", "torch", "hatchling", "uv-dynamic-versioning"]
+build-backend = "hatchling.build"
 
 [tool.setuptools]
 packages = ["face_detection"]
@@ -26,3 +27,5 @@ dev = [
     "pytest>=8.3.5",
     "ruff"
 ]
+[tool.hatch.version]
+source = "uv-dynamic-versioning"

From 99d1bcc3e686c12817c55f7295362f7f0d41ecd1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5kon=20Hukkel=C3=A5s?= <hakon.hukkelas@ntnu.no>
Date: Mon, 9 Jun 2025 18:35:30 +0200
Subject: [PATCH 8/8] misc: update README.md

---
 README.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/README.md b/README.md
index fb12f00..bf14c5f 100644
--- a/README.md
+++ b/README.md
@@ -116,6 +116,13 @@ boxes, landmarks, scores = detector.infer(image)
 
 ```
 
+## Formatting
+ALl code should be formatted with ruff:
+```
+uv run ruff format
+uv run ruff check
+```
+
 ## Citation
 If you find this code useful, remember to cite the original authors:
 ```