From 48244b89975e39f7cb1c13d08d0f76a22db8d3b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Hukkel=C3=A5s?= Date: Sun, 1 Jun 2025 08:01:43 +0200 Subject: [PATCH 1/8] add: UV setup & github actions pipeline --- .github/actions/setup-uv/action.yml | 12 ++++++++++++ .github/workflows/format.yml | 14 ++++++++++++++ pyproject.toml | 20 ++++++++++++++++++++ setup.py | 28 ---------------------------- 4 files changed, 46 insertions(+), 28 deletions(-) create mode 100644 .github/actions/setup-uv/action.yml create mode 100644 .github/workflows/format.yml create mode 100644 pyproject.toml delete mode 100644 setup.py diff --git a/.github/actions/setup-uv/action.yml b/.github/actions/setup-uv/action.yml new file mode 100644 index 0000000..2606145 --- /dev/null +++ b/.github/actions/setup-uv/action.yml @@ -0,0 +1,12 @@ +name: Setup UV +description: Installs UV from Astral.sh +runs: + using: "composite" + steps: + - name: Install curl and UV + shell: bash + run: | + sudo apt update + sudo apt install -y curl + curl -LsSf https://astral.sh/uv/install.sh | sh + echo "$HOME/.local/bin" >> $GITHUB_PATH diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml new file mode 100644 index 0000000..340443e --- /dev/null +++ b/.github/workflows/format.yml @@ -0,0 +1,14 @@ +name: Format +on: [push] +jobs: + my-job: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + + - name: Setup UV + uses: ./.github/actions/setup-uv + + - name: Build + run: uv build + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..de388c7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,20 @@ +[project] +name="face_detection" +version="0.2.1" +description="A simple and lightweight package for state of the art face detection with GPU support." +readme="README.md" +requires-python=">=3.9" +license="apache-2.0" +classifiers=[ + "Operating System :: OS Independent", +] +dependencies = [ + "numpy>=2.0.2", + "torch>=2.7.0", +] + +[build-system] +requires = ["setuptools", "torch"] + +[tool.setuptools] +packages = ["face_detection"] diff --git a/setup.py b/setup.py deleted file mode 100644 index 61dfd4c..0000000 --- a/setup.py +++ /dev/null @@ -1,28 +0,0 @@ -import setuptools -import torch -import torchvision - -torch_ver = [int(x) for x in torch.__version__.split(".")[:2]] -assert torch_ver >= [1, 6], "Requires PyTorch >= 1.6" -torchvision_ver = [int(x) for x in torchvision.__version__.split(".")[:2]] -assert torchvision_ver >= [0, 3], "Requires torchvision >= 0.3" - -setuptools.setup( - name="face_detection", - version="0.2.1", - author="Håkon Hukkelås", - description="A simple and lightweight package for state of the art face detection with GPU support.", - long_description="".join(open("README.md", "r").readlines()), - long_description_content_type="text/markdown", - url="https://github.com/hukkelas/DSFD-Pytorch-Inference", - python_requires='>=3.6', - license="apache-2.0", - classifiers=[ - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - ], - install_requires=[ - "numpy", - ], - packages=setuptools.find_packages() -) From 59bd11793d674f5077153525b0a9b1d3fbef194e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Hukkel=C3=A5s?= Date: Sun, 1 Jun 2025 08:44:43 +0200 Subject: [PATCH 2/8] add: tests for all face detectors --- .github/workflows/format.yml | 2 +- .github/workflows/test.yml | 14 ++++++++ pyproject.toml | 11 ++++-- tests/test_detector.py | 68 ++++++++++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/test.yml create mode 100644 tests/test_detector.py diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index 340443e..9c1e1d0 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -1,7 +1,7 @@ name: Format on: [push] jobs: - my-job: + format: runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..d34a0cf --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,14 @@ +name: Test +on: [push] +jobs: + pytest: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + + - name: Setup UV + uses: ./.github/actions/setup-uv + + - name: test + run: uv run pytest + diff --git a/pyproject.toml b/pyproject.toml index de388c7..d397cc1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,8 +9,9 @@ classifiers=[ "Operating System :: OS Independent", ] dependencies = [ - "numpy>=2.0.2", - "torch>=2.7.0", + "numpy", + "torch", + "torchvision", ] [build-system] @@ -18,3 +19,9 @@ requires = ["setuptools", "torch"] [tool.setuptools] packages = ["face_detection"] + +[dependency-groups] +dev = [ + "opencv-python", + "pytest>=8.3.5", +] diff --git a/tests/test_detector.py b/tests/test_detector.py new file mode 100644 index 0000000..5e2a7a8 --- /dev/null +++ b/tests/test_detector.py @@ -0,0 +1,68 @@ +import numpy as np +import pytest +import cv2 +import face_detection # your face detection library + +def compute_iou(boxA, boxB): + xA = max(boxA[0], boxB[0]) + yA = max(boxA[1], boxB[1]) + xB = min(boxA[2], boxB[2]) + yB = min(boxA[3], boxB[3]) + + print(boxA, boxB) + interArea = max(0, xB - xA) * max(0, yB - yA) + if interArea == 0: + print("Ret 0") + return 0.0 + + boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1]) + boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1]) + iou = interArea / float(boxAArea + boxBArea - interArea) + + print("IoU", iou) + + return iou + + +@pytest.fixture +def ground_truth_boxes(): + return np.array([ + [337.8219142, 227.30235955, 363.18236876, 260.75754449], + [120.61462998, 244.68149829, 153.73102021, 290.13813281], + [793.31824303, 88.6468603, 837.80744743, 153.03655452], + [499.23486614, 212.40574998, 521.46317768, 241.84556359], + [412.37690353, 219.29100847, 437.20971298, 250.56026506], + [654.66749144, 203.24960518, 676.66251707, 231.10678673], + [692.63414764, 248.56575656, 726.75259781, 292.49138522], + [215.16035197, 269.50566196, 240.76163981, 303.02491093], + [189.08402371, 212.22481942, 210.5982945, 240.76419282], + [571.04836243, 213.0569253, 590.01044816, 238.5836339], + [ 16.7418344, 235.77498758, 41.44155097, 265.93795145], + [284.28320718, 213.93544269, 304.40658212, 238.0858829], + [167.58154631, 76.92867303, 187.13439512, 102.97041345], + ]) + +@pytest.mark.parametrize("detector_name", [ + "DSFDDetector", + "RetinaNetResNet50", + "RetinaNetMobileNetV1" +]) +def test_detector_detects_boxes_with_iou(detector_name, ground_truth_boxes): + detector = face_detection.build_detector( + detector_name, + max_resolution=1080, + confidence_threshold=0.5 + ) + impath = "images/11_Meeting_Meeting_11_Meeting_Meeting_11_176.jpg" + img = cv2.imread(impath) + + detections = detector.detect(img[:, :, ::-1])[:, :4] + + for gt_box in ground_truth_boxes: + print("CHECKING") + matched = any(compute_iou(gt_box, det_box) >= 0.5 for det_box in detections) + assert matched, ( + f"{detector_name} failed to detect ground truth box {gt_box} " + f"with IoU >= 0.5" + ) + From f9e2d47d780a1db8b278b0f5b02a3a692f4ed2f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Hukkel=C3=A5s?= Date: Sun, 1 Jun 2025 17:25:42 +0200 Subject: [PATCH 3/8] add: uv ruff check --- .github/workflows/{format.yml => ruff.yml} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename .github/workflows/{format.yml => ruff.yml} (82%) diff --git a/.github/workflows/format.yml b/.github/workflows/ruff.yml similarity index 82% rename from .github/workflows/format.yml rename to .github/workflows/ruff.yml index 9c1e1d0..7da5f7a 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/ruff.yml @@ -1,7 +1,7 @@ name: Format on: [push] jobs: - format: + ruff: runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v4 @@ -10,5 +10,5 @@ jobs: uses: ./.github/actions/setup-uv - name: Build - run: uv build + run: uv run ruff check From 015f2fe887a0f35071f9cf24c1f60c8c0752702b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Hukkel=C3=A5s?= Date: Mon, 9 Jun 2025 17:29:04 +0200 Subject: [PATCH 4/8] fix: ruff format --- .github/workflows/ruff.yml | 4 +++- pyproject.toml | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml index 7da5f7a..08d51d9 100644 --- a/.github/workflows/ruff.yml +++ b/.github/workflows/ruff.yml @@ -9,6 +9,8 @@ jobs: - name: Setup UV uses: ./.github/actions/setup-uv - - name: Build + - name: Install ruff + run: uv add ruff + - name: Run ruff run: uv run ruff check diff --git a/pyproject.toml b/pyproject.toml index d397cc1..8cb6c59 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,4 +24,5 @@ packages = ["face_detection"] dev = [ "opencv-python", "pytest>=8.3.5", + "ruff" ] From 6a01f9fe2caba325aa86be573d341bb7b5c80f44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Hukkel=C3=A5s?= Date: Mon, 9 Jun 2025 17:32:42 +0200 Subject: [PATCH 5/8] format only --- benchmark.py | 8 +- face_detection/__init__.py | 2 +- face_detection/base.py | 41 ++++--- face_detection/box_utils.py | 11 +- face_detection/build.py | 31 +++-- face_detection/dsfd/__init__.py | 2 +- face_detection/dsfd/config.py | 64 +++++++---- face_detection/dsfd/detect.py | 18 ++- face_detection/dsfd/face_ssd.py | 106 +++++++++--------- face_detection/dsfd/utils.py | 35 +++--- face_detection/registry.py | 33 +++--- face_detection/retinaface/__init__.py | 2 +- face_detection/retinaface/config.py | 69 ++++++------ face_detection/retinaface/detect.py | 33 ++---- face_detection/retinaface/models/net.py | 58 +++++----- .../retinaface/models/retinaface.py | 43 +++---- face_detection/retinaface/onnx.py | 25 +++-- face_detection/retinaface/prior_box.py | 25 +++-- face_detection/retinaface/tensorrt_wrap.py | 99 +++++++++------- face_detection/retinaface/utils.py | 31 ++--- test.py | 20 +--- tests/test_detector.py | 49 ++++---- 22 files changed, 418 insertions(+), 387 deletions(-) diff --git a/benchmark.py b/benchmark.py index 6bd9f21..2bf52ac 100644 --- a/benchmark.py +++ b/benchmark.py @@ -8,10 +8,7 @@ num = 1000 for detector in face_detection.available_detectors: - detector = face_detection.build_detector( - detector, - fp16_inference=True - ) + detector = face_detection.build_detector(detector, fp16_inference=True) im = "images/0_Parade_Parade_0_873.jpg" im = cv2.imread(im)[:, :, ::-1] t = time.time() @@ -23,4 +20,5 @@ ms = avg_time * 1000 print( f"Detector: {detector.__class__.__name__}. Average inference time over image shape: {im.shape} is:", - f"{ms:.2f} ms, fps: {fps:.2f}") + f"{ms:.2f} ms, fps: {fps:.2f}", + ) diff --git a/face_detection/__init__.py b/face_detection/__init__.py index e589bff..a4d0316 100644 --- a/face_detection/__init__.py +++ b/face_detection/__init__.py @@ -1,3 +1,3 @@ from .build import build_detector, available_detectors from .dsfd import DSFDDetector -from .retinaface import RetinaNetMobileNetV1, RetinaNetResNet50 \ No newline at end of file +from .retinaface import RetinaNetMobileNetV1, RetinaNetResNet50 diff --git a/face_detection/base.py b/face_detection/base.py index b0ec89c..73e3773 100644 --- a/face_detection/base.py +++ b/face_detection/base.py @@ -7,24 +7,23 @@ def check_image(im: np.ndarray): - assert im.dtype == np.uint8,\ - f"Expect image to have dtype np.uint8. Was: {im.dtype}" - assert len(im.shape) == 4,\ - f"Expected image to have 4 dimensions. got: {im.shape}" - assert im.shape[-1] == 3,\ + assert im.dtype == np.uint8, f"Expect image to have dtype np.uint8. Was: {im.dtype}" + assert len(im.shape) == 4, f"Expected image to have 4 dimensions. got: {im.shape}" + assert im.shape[-1] == 3, ( f"Expected image to be RGB, got: {im.shape[-1]} color channels" + ) class Detector(ABC): - def __init__( - self, - confidence_threshold: float, - nms_iou_threshold: float, - device: torch.device, - max_resolution: int, - fp16_inference: bool, - clip_boxes: bool): + self, + confidence_threshold: float, + nms_iou_threshold: float, + device: torch.device, + max_resolution: int, + fp16_inference: bool, + clip_boxes: bool, + ): """ Args: confidence_threshold (float): Threshold to filter out bounding boxes @@ -40,11 +39,9 @@ def __init__( self.max_resolution = max_resolution self.fp16_inference = fp16_inference self.clip_boxes = clip_boxes - self.mean = np.array( - [123, 117, 104], dtype=np.float32).reshape(1, 1, 1, 3) + self.mean = np.array([123, 117, 104], dtype=np.float32).reshape(1, 1, 1, 3) - def detect( - self, image: np.ndarray, shrink=1.0) -> np.ndarray: + def detect(self, image: np.ndarray, shrink=1.0) -> np.ndarray: """Takes an RGB image and performs and returns a set of bounding boxes as detections Args: @@ -77,7 +74,7 @@ def filter_boxes(self, boxes: torch.Tensor) -> typing.List[np.ndarray]: """ final_output = [] for i in range(len(boxes)): - scores = boxes[i, :, 4] + scores = boxes[i, :, 4] keep_idx = scores >= self.confidence_threshold boxes_ = boxes[i, keep_idx, :-1] scores = scores[keep_idx] @@ -99,7 +96,7 @@ def resize(self, image, shrink: float): shrink_factor = self.max_resolution / max((height, width)) if shrink_factor <= shrink: shrink = shrink_factor - size = (int(height*shrink), int(width*shrink)) + size = (int(height * shrink), int(width * shrink)) image = torch.nn.functional.interpolate(image, size=size) return image @@ -130,8 +127,7 @@ def _batched_detect(self, image: np.ndarray) -> typing.List[np.ndarray]: return boxes @torch.no_grad() - def batched_detect( - self, image: np.ndarray, shrink=1.0) -> typing.List[np.ndarray]: + def batched_detect(self, image: np.ndarray, shrink=1.0) -> typing.List[np.ndarray]: """Takes N RGB image and performs and returns a set of bounding boxes as detections Args: @@ -150,5 +146,6 @@ def batched_detect( def validate_detections(self, boxes: typing.List[np.ndarray]): for box in boxes: - assert np.all(box[:, 4] <= 1) and np.all(box[:, 4] >= 0),\ + assert np.all(box[:, 4] <= 1) and np.all(box[:, 4] >= 0), ( f"Confidence values not valid: {box}" + ) diff --git a/face_detection/box_utils.py b/face_detection/box_utils.py index 643a23a..7044cea 100644 --- a/face_detection/box_utils.py +++ b/face_detection/box_utils.py @@ -14,10 +14,13 @@ def batched_decode(loc, priors, variances, to_XYXY=True): decoded bounding box predictions """ priors = priors[None] - boxes = torch.cat(( - priors[:, :, :2] + loc[:, :, :2] * variances[0] * priors[:, :, 2:], - priors[:, :, 2:] * torch.exp(loc[:, :, 2:] * variances[1])), - dim=2) + boxes = torch.cat( + ( + priors[:, :, :2] + loc[:, :, :2] * variances[0] * priors[:, :, 2:], + priors[:, :, 2:] * torch.exp(loc[:, :, 2:] * variances[1]), + ), + dim=2, + ) if to_XYXY: boxes[:, :, :2] -= boxes[:, :, 2:] / 2 boxes[:, :, 2:] += boxes[:, :, :2] diff --git a/face_detection/build.py b/face_detection/build.py index ebf84ac..874970e 100644 --- a/face_detection/build.py +++ b/face_detection/build.py @@ -2,26 +2,23 @@ from .base import Detector from .torch_utils import get_device -available_detectors = [ - "DSFDDetector", - "RetinaNetResNet50", - "RetinaNetMobileNetV1" -] +available_detectors = ["DSFDDetector", "RetinaNetResNet50", "RetinaNetMobileNetV1"] DETECTOR_REGISTRY = Registry("DETECTORS") def build_detector( - name: str = "DSFDDetector", - confidence_threshold: float = 0.5, - nms_iou_threshold: float = 0.3, - device=get_device(), - max_resolution: int = None, - fp16_inference: bool = False, - clip_boxes: bool = False - ) -> Detector: - assert name in available_detectors,\ - f"Detector not available. Chooce one of the following"+\ - ",".join(available_detectors) + name: str = "DSFDDetector", + confidence_threshold: float = 0.5, + nms_iou_threshold: float = 0.3, + device=get_device(), + max_resolution: int = None, + fp16_inference: bool = False, + clip_boxes: bool = False, +) -> Detector: + assert name in available_detectors, ( + f"Detector not available. Chooce one of the following" + + ",".join(available_detectors) + ) args = dict( type=name, confidence_threshold=confidence_threshold, @@ -29,7 +26,7 @@ def build_detector( device=device, max_resolution=max_resolution, fp16_inference=fp16_inference, - clip_boxes=clip_boxes + clip_boxes=clip_boxes, ) detector = build_from_cfg(args, DETECTOR_REGISTRY) return detector diff --git a/face_detection/dsfd/__init__.py b/face_detection/dsfd/__init__.py index 9121fca..63b1827 100644 --- a/face_detection/dsfd/__init__.py +++ b/face_detection/dsfd/__init__.py @@ -1 +1 @@ -from .detect import DSFDDetector \ No newline at end of file +from .detect import DSFDDetector diff --git a/face_detection/dsfd/config.py b/face_detection/dsfd/config.py index 96fb1e2..8388102 100644 --- a/face_detection/dsfd/config.py +++ b/face_detection/dsfd/config.py @@ -1,25 +1,43 @@ resnet152_model_config = { - 'num_classes': 2, - 'feature_maps': [160, 80, 40, 20, 10, 5], - 'min_dim': 640, - 'steps': [4, 8, 16, 32, 64, 128], # stride - 'variance': [0.1, 0.2], - 'clip': True, # make default box in [0,1] - 'base': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', 512, 512, 512] , - 'extras': [256, 'S', 512, 128, 'S', 256], - 'mbox': [1, 1, 1, 1, 1, 1] , - 'min_sizes': [16, 32, 64, 128, 256, 512], - 'max_sizes': [], - 'aspect_ratios': [ [1.5],[1.5],[1.5],[1.5],[1.5],[1.5] ], # [1,2] default 1 - 'backbone': 'resnet152' , # vgg, resnet, detnet, resnet50 - 'feature_pyramid_network':True , - 'bottom_up_path': False , - 'feature_enhance_module': True , - 'max_in_out': True , - 'focal_loss': False , - 'progressive_anchor': True , - 'refinedet': False , - 'max_out': False , - 'anchor_compensation': False , - 'data_anchor_sampling': False , + "num_classes": 2, + "feature_maps": [160, 80, 40, 20, 10, 5], + "min_dim": 640, + "steps": [4, 8, 16, 32, 64, 128], # stride + "variance": [0.1, 0.2], + "clip": True, # make default box in [0,1] + "base": [ + 64, + 64, + "M", + 128, + 128, + "M", + 256, + 256, + 256, + "C", + 512, + 512, + 512, + "M", + 512, + 512, + 512, + ], + "extras": [256, "S", 512, 128, "S", 256], + "mbox": [1, 1, 1, 1, 1, 1], + "min_sizes": [16, 32, 64, 128, 256, 512], + "max_sizes": [], + "aspect_ratios": [[1.5], [1.5], [1.5], [1.5], [1.5], [1.5]], # [1,2] default 1 + "backbone": "resnet152", # vgg, resnet, detnet, resnet50 + "feature_pyramid_network": True, + "bottom_up_path": False, + "feature_enhance_module": True, + "max_in_out": True, + "focal_loss": False, + "progressive_anchor": True, + "refinedet": False, + "max_out": False, + "anchor_compensation": False, + "data_anchor_sampling": False, } diff --git a/face_detection/dsfd/detect.py b/face_detection/dsfd/detect.py index c25f5f1..350764a 100644 --- a/face_detection/dsfd/detect.py +++ b/face_detection/dsfd/detect.py @@ -13,21 +13,21 @@ @DETECTOR_REGISTRY.register_module class DSFDDetector(Detector): - - def __init__( - self, *args, **kwargs): + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) state_dict = load_state_dict_from_url( - model_url, - map_location=self.device, - progress=True) + model_url, map_location=self.device, progress=True + ) self.net = SSD(resnet152_model_config) self.net.load_state_dict(state_dict) self.net.eval() self.net = self.net.to(self.device) @torch.no_grad() - def _detect(self, x: torch.Tensor,) -> typing.List[np.ndarray]: + def _detect( + self, + x: torch.Tensor, + ) -> typing.List[np.ndarray]: """Batched detect Args: image (np.ndarray): shape [N, H, W, 3] @@ -37,7 +37,5 @@ def _detect(self, x: torch.Tensor,) -> typing.List[np.ndarray]: # Expects BGR x = x[:, [2, 1, 0], :, :] with torch.cuda.amp.autocast(enabled=self.fp16_inference): - boxes = self.net( - x, self.confidence_threshold, self.nms_iou_threshold - ) + boxes = self.net(x, self.confidence_threshold, self.nms_iou_threshold) return boxes diff --git a/face_detection/dsfd/face_ssd.py b/face_detection/dsfd/face_ssd.py index 7f3bec9..ee1ac2f 100644 --- a/face_detection/dsfd/face_ssd.py +++ b/face_detection/dsfd/face_ssd.py @@ -7,14 +7,13 @@ class FEM(nn.Module): - def __init__(self, channel_size): super(FEM, self).__init__() self.cs = channel_size self.cpm1 = nn.Conv2d(self.cs, 256, kernel_size=3, padding=1) self.cpm2 = nn.Conv2d(self.cs, 256, kernel_size=3, dilation=2, padding=2) self.cpm3 = nn.Conv2d(256, 128, kernel_size=3, padding=1) - self.cpm4 = nn.Conv2d(256, 128, kernel_size=3, dilation=2, padding=2) + self.cpm4 = nn.Conv2d(256, 128, kernel_size=3, dilation=2, padding=2) self.cpm5 = nn.Conv2d(128, 128, kernel_size=3, padding=1) def forward(self, x): @@ -46,13 +45,13 @@ class SSD(nn.Module): def __init__(self, cfg): super(SSD, self).__init__() - self.num_classes = 2 # Background and face + self.num_classes = 2 # Background and face self.cfg = cfg resnet = torchvision.models.resnet152(pretrained=False) self.layer1 = nn.Sequential( - resnet.conv1, resnet.bn1, resnet.relu, - resnet.maxpool, resnet.layer1) + resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1 + ) self.layer2 = nn.Sequential(resnet.layer2) self.layer3 = nn.Sequential(resnet.layer3) self.layer4 = nn.Sequential(resnet.layer4) @@ -62,7 +61,7 @@ def __init__(self, cfg): nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=2), nn.BatchNorm2d(512), - nn.ReLU(inplace=True) + nn.ReLU(inplace=True), ) self.layer6 = nn.Sequential( nn.Conv2d(512, 128, kernel_size=1), @@ -70,7 +69,7 @@ def __init__(self, cfg): nn.ReLU(inplace=True), nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2), nn.BatchNorm2d(256), - nn.ReLU(inplace=True) + nn.ReLU(inplace=True), ) output_channels = [256, 512, 1024, 2048, 512, 256] @@ -95,7 +94,7 @@ def __init__(self, cfg): self.cpm6_2 = FEM(cpm_in[4]) self.cpm7_2 = FEM(cpm_in[5]) - head = pa_multibox(output_channels, self.cfg['mbox'], self.num_classes) + head = pa_multibox(output_channels, self.cfg["mbox"], self.num_classes) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) @@ -103,14 +102,21 @@ def __init__(self, cfg): self.softmax = nn.Softmax(dim=-1) # Cache to stop computing new priors per fowrard pass - self.prior_cache = { - } + self.prior_cache = {} def init_priors(self, feature_maps, image_size): - # Hacky key system, but works.... - key = ".".join([str(item) for i in range(len(feature_maps)) for item in feature_maps[i]]) + \ - "," + ".".join([str(_) for _ in image_size]) + key = ( + ".".join( + [ + str(item) + for i in range(len(feature_maps)) + for item in feature_maps[i] + ] + ) + + "," + + ".".join([str(_) for _ in image_size]) + ) if key in self.prior_cache: return self.prior_cache[key].clone() @@ -150,13 +156,10 @@ def forward(self, x, confidence_threshold, nms_threshold): conv6_2_x = self.layer5(fc7_x) conv7_2_x = self.layer6(conv6_2_x) - # FPN - lfpn3 = self._upsample_product( - self.latlayer3(fc7_x), self.smooth3(conv5_3_x)) - lfpn2 = self._upsample_product( - self.latlayer2(lfpn3), self.smooth2(conv4_3_x)) - lfpn1 = self._upsample_product( - self.latlayer1(lfpn2), self.smooth1(conv3_3_x)) + # FPN + lfpn3 = self._upsample_product(self.latlayer3(fc7_x), self.smooth3(conv5_3_x)) + lfpn2 = self._upsample_product(self.latlayer2(lfpn3), self.smooth2(conv4_3_x)) + lfpn1 = self._upsample_product(self.latlayer1(lfpn2), self.smooth1(conv3_3_x)) conv5_3_x = lfpn3 conv4_3_x = lfpn2 @@ -168,12 +171,13 @@ def forward(self, x, confidence_threshold, nms_threshold): self.cpm5_3(conv5_3_x), self.cpm7(fc7_x), self.cpm6_2(conv6_2_x), - self.cpm7_2(conv7_2_x)] + self.cpm7_2(conv7_2_x), + ] # Feature Enhance Module # apply multibox head to source layers featuremap_size = [] - for (x, l, c) in zip(sources, self.loc, self.conf): + for x, l, c in zip(sources, self.loc, self.conf): featuremap_size.append([x.shape[2], x.shape[3]]) loc.append(l(x).permute(0, 2, 3, 1).contiguous()) @@ -183,23 +187,23 @@ def forward(self, x, confidence_threshold, nms_threshold): conf.append(out.permute(0, 2, 3, 1).contiguous()) # Progressive Anchor - mbox_num = self.cfg['mbox'][0] - face_loc = torch.cat([ - o[:, :, :, :4*mbox_num].contiguous().view(o.size(0), -1) - for o in loc], dim=1) - face_conf = torch.cat([ - o[:, :, :, :2*mbox_num].contiguous().view(o.size(0), -1) - for o in conf], dim=1) + mbox_num = self.cfg["mbox"][0] + face_loc = torch.cat( + [o[:, :, :, : 4 * mbox_num].contiguous().view(o.size(0), -1) for o in loc], + dim=1, + ) + face_conf = torch.cat( + [o[:, :, :, : 2 * mbox_num].contiguous().view(o.size(0), -1) for o in conf], + dim=1, + ) # Test Phase self.priors = self.init_priors(featuremap_size, image_size) self.priors = self.priors.to(face_conf.device) - conf_preds = face_conf.view( - face_conf.size(0), -1, self.num_classes).softmax(dim=-1) - face_loc = face_loc.view(face_loc.size(0), -1, 4) - boxes = batched_decode( - face_loc, self.priors, - self.cfg["variance"] + conf_preds = face_conf.view(face_conf.size(0), -1, self.num_classes).softmax( + dim=-1 ) + face_loc = face_loc.view(face_loc.size(0), -1, 4) + boxes = batched_decode(face_loc, self.priors, self.cfg["variance"]) scores = conf_preds.view(-1, self.priors.shape[0], 2)[:, :, 1:] output = torch.cat((boxes, scores), dim=-1) return output @@ -214,12 +218,11 @@ def mio_module(self, each_mmbox, len_conf): if len(chunk) == 6: out = torch.cat([out, chunk[4], chunk[5]], dim=1) elif len(chunk) == 8: - out = torch.cat( - [out, chunk[4], chunk[5], chunk[6], chunk[7]], dim=1) + out = torch.cat([out, chunk[4], chunk[5], chunk[6], chunk[7]], dim=1) return out def _upsample_product(self, x, y): - '''Upsample and add two feature maps. + """Upsample and add two feature maps. Args: x: (Variable) top feature map to be upsampled. y: (Variable) lateral feature map. @@ -233,11 +236,12 @@ def _upsample_product(self, x, y): conv2d feature map size: [N,_,8,8] -> upsampled feature map size: [N,_,16,16] So we choose bilinear upsample which supports arbitrary output sizes. - ''' + """ # Deprecation warning. align_corners=False default in 0.4.0, but in 0.3.0 it was True # Original code was written in 0.3.1, I guess this is correct. return y * F.interpolate( - x, size=y.shape[2:], mode="bilinear", align_corners=True) + x, size=y.shape[2:], mode="bilinear", align_corners=True + ) class DeepHeadModule(nn.Module): @@ -248,13 +252,19 @@ def __init__(self, input_channels, output_channels): self._mid_channels = min(self._input_channels, 256) self.conv1 = nn.Conv2d( - self._input_channels, self._mid_channels, kernel_size=3, padding=1) + self._input_channels, self._mid_channels, kernel_size=3, padding=1 + ) self.conv2 = nn.Conv2d( - self._mid_channels, self._mid_channels, kernel_size=3, padding=1) + self._mid_channels, self._mid_channels, kernel_size=3, padding=1 + ) self.conv3 = nn.Conv2d( - self._mid_channels, self._mid_channels, kernel_size=3, padding=1) + self._mid_channels, self._mid_channels, kernel_size=3, padding=1 + ) self.conv4 = nn.Conv2d( - self._mid_channels, self._output_channels, kernel_size=1,) + self._mid_channels, + self._output_channels, + kernel_size=1, + ) def forward(self, x): out = self.conv1(x).relu() @@ -278,10 +288,6 @@ def pa_multibox(output_channels, mbox_cfg, num_classes): else: loc_output = 12 conf_output = 6 - loc_layers += [ - DeepHeadModule(input_channels, mbox_cfg[k] * loc_output)] - conf_layers += [ - DeepHeadModule(input_channels, mbox_cfg[k] * (2+conf_output))] + loc_layers += [DeepHeadModule(input_channels, mbox_cfg[k] * loc_output)] + conf_layers += [DeepHeadModule(input_channels, mbox_cfg[k] * (2 + conf_output))] return (loc_layers, conf_layers) - - diff --git a/face_detection/dsfd/utils.py b/face_detection/dsfd/utils.py index 30e7733..8690554 100644 --- a/face_detection/dsfd/utils.py +++ b/face_detection/dsfd/utils.py @@ -6,22 +6,23 @@ class PriorBox(object): """Compute priorbox coordinates in center-offset form for each source feature map. """ + def __init__(self, cfg, image_size, feature_maps): super(PriorBox, self).__init__() self.image_size = image_size self.feature_maps = feature_maps # number of priors for feature map location (either 4 or 6) - self.num_priors = len(cfg['aspect_ratios']) - self.variance = cfg['variance'] or [0.1] + self.num_priors = len(cfg["aspect_ratios"]) + self.variance = cfg["variance"] or [0.1] self.min_sizes = cfg["min_sizes"] self.max_sizes = cfg["max_sizes"] - self.steps = cfg['steps'] - self.aspect_ratios = cfg['aspect_ratios'] - self.clip = cfg['clip'] + self.steps = cfg["steps"] + self.aspect_ratios = cfg["aspect_ratios"] + self.clip = cfg["clip"] for v in self.variance: if v <= 0: - raise ValueError('Variances must be greater than 0') + raise ValueError("Variances must be greater than 0") def forward(self): mean = [] @@ -36,7 +37,6 @@ def forward(self): for k, f in enumerate(self.feature_maps): for i in range(f[0]): for j in range(f[1]): - f_k_i = self.image_size[0] / self.steps[k] f_k_j = self.image_size[1] / self.steps[k] @@ -46,8 +46,8 @@ def forward(self): # aspect_ratio: 1 # rel size: min_size - s_k_i = self.min_sizes[k]/self.image_size[1] - s_k_j = self.min_sizes[k]/self.image_size[0] + s_k_i = self.min_sizes[k] / self.image_size[1] + s_k_j = self.min_sizes[k] / self.image_size[0] if len(self.aspect_ratios[0]) == 0: mean += [cx, cy, s_k_i, s_k_j] @@ -56,15 +56,24 @@ def forward(self): # rel size: sqrt(s_k * s_(k+1)) if len(self.max_sizes) == len(self.min_sizes): - s_k_prime_i = math.sqrt(s_k_i * (self.max_sizes[k] / self.image_size[1])) - s_k_prime_j = math.sqrt(s_k_j * (self.max_sizes[k] / self.image_size[0])) + s_k_prime_i = math.sqrt( + s_k_i * (self.max_sizes[k] / self.image_size[1]) + ) + s_k_prime_j = math.sqrt( + s_k_j * (self.max_sizes[k] / self.image_size[0]) + ) mean += [cx, cy, s_k_prime_i, s_k_prime_j] # rest of aspect ratios for ar in self.aspect_ratios[k]: if len(self.max_sizes) == len(self.min_sizes): - mean += [cx, cy, s_k_prime_i/math.sqrt(ar), s_k_prime_j*math.sqrt(ar)] - mean += [cx, cy, s_k_i/math.sqrt(ar), s_k_j*math.sqrt(ar)] + mean += [ + cx, + cy, + s_k_prime_i / math.sqrt(ar), + s_k_prime_j * math.sqrt(ar), + ] + mean += [cx, cy, s_k_i / math.sqrt(ar), s_k_j * math.sqrt(ar)] # back to torch land output = torch.Tensor(mean).view(-1, 4) diff --git a/face_detection/registry.py b/face_detection/registry.py index a94e475..35cc2bf 100644 --- a/face_detection/registry.py +++ b/face_detection/registry.py @@ -2,14 +2,14 @@ class Registry(object): - def __init__(self, name): self._name = name self._module_dict = dict() def __repr__(self): - format_str = self.__class__.__name__ + '(name={}, items={})'.format( - self._name, list(self._module_dict.keys())) + format_str = self.__class__.__name__ + "(name={}, items={})".format( + self._name, list(self._module_dict.keys()) + ) return format_str @property @@ -23,8 +23,7 @@ def module_dict(self): def get(self, key): obj = self._module_dict.get(key, None) if obj is None: - raise KeyError( - f'{key} is not in the {self._name} registry.') + raise KeyError(f"{key} is not in the {self._name} registry.") return obj def _register_module(self, module_class, force=False): @@ -33,12 +32,14 @@ def _register_module(self, module_class, force=False): module (:obj:`nn.Module`): Module to be registered. """ if not isinstance(module_class, type): - raise TypeError('module must be a class, but got {}'.format( - type(module_class))) + raise TypeError( + "module must be a class, but got {}".format(type(module_class)) + ) module_name = module_class.__name__ if not force and module_name in self._module_dict: - raise KeyError('{} is already registered in {}'.format( - module_name, self.name)) + raise KeyError( + "{} is already registered in {}".format(module_name, self.name) + ) self._module_dict[module_name] = module_class def register_module(self, cls=None, force=False): @@ -57,17 +58,19 @@ def build_from_cfg(cfg, registry, **kwargs): Returns: obj: The constructed object. """ - assert isinstance(cfg, dict) and 'type' in cfg + assert isinstance(cfg, dict) and "type" in cfg args = cfg.copy() - obj_type = args.pop('type') + obj_type = args.pop("type") if isinstance(obj_type, str): obj_cls = registry.get(obj_type) if obj_cls is None: - raise KeyError('{} is not in the {} registry'.format( - obj_type, registry.name)) + raise KeyError( + "{} is not in the {} registry".format(obj_type, registry.name) + ) elif isinstance(obj_type, type): obj_cls = obj_type else: - raise TypeError('type must be a str or valid type, but got {}'.format( - type(obj_type))) + raise TypeError( + "type must be a str or valid type, but got {}".format(type(obj_type)) + ) return obj_cls(**args, **kwargs) diff --git a/face_detection/retinaface/__init__.py b/face_detection/retinaface/__init__.py index 8c1a129..ed589e8 100644 --- a/face_detection/retinaface/__init__.py +++ b/face_detection/retinaface/__init__.py @@ -1,3 +1,3 @@ # Adapted from https://github.com/biubug6/Pytorch_Retinaface # Original license: MIT -from .detect import RetinaNetMobileNetV1, RetinaNetResNet50 \ No newline at end of file +from .detect import RetinaNetMobileNetV1, RetinaNetResNet50 diff --git a/face_detection/retinaface/config.py b/face_detection/retinaface/config.py index 2a9c1ba..772556f 100644 --- a/face_detection/retinaface/config.py +++ b/face_detection/retinaface/config.py @@ -2,42 +2,41 @@ # Original license: MIT cfg_mnet = { - 'name': 'mobilenet0.25', - 'min_sizes': [[16, 32], [64, 128], [256, 512]], - 'steps': [8, 16, 32], - 'variance': [0.1, 0.2], - 'clip': False, - 'loc_weight': 2.0, - 'gpu_train': True, - 'batch_size': 32, - 'ngpu': 1, - 'epoch': 250, - 'decay1': 190, - 'decay2': 220, - 'image_size': 640, - 'pretrain': True, - 'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3}, - 'in_channel': 32, - 'out_channel': 64 + "name": "mobilenet0.25", + "min_sizes": [[16, 32], [64, 128], [256, 512]], + "steps": [8, 16, 32], + "variance": [0.1, 0.2], + "clip": False, + "loc_weight": 2.0, + "gpu_train": True, + "batch_size": 32, + "ngpu": 1, + "epoch": 250, + "decay1": 190, + "decay2": 220, + "image_size": 640, + "pretrain": True, + "return_layers": {"stage1": 1, "stage2": 2, "stage3": 3}, + "in_channel": 32, + "out_channel": 64, } cfg_re50 = { - 'name': 'Resnet50', - 'min_sizes': [[16, 32], [64, 128], [256, 512]], - 'steps': [8, 16, 32], - 'variance': [0.1, 0.2], - 'clip': False, - 'loc_weight': 2.0, - 'gpu_train': True, - 'batch_size': 24, - 'ngpu': 4, - 'epoch': 100, - 'decay1': 70, - 'decay2': 90, - 'image_size': 840, - 'pretrain': True, - 'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3}, - 'in_channel': 256, - 'out_channel': 256 + "name": "Resnet50", + "min_sizes": [[16, 32], [64, 128], [256, 512]], + "steps": [8, 16, 32], + "variance": [0.1, 0.2], + "clip": False, + "loc_weight": 2.0, + "gpu_train": True, + "batch_size": 24, + "ngpu": 4, + "epoch": 100, + "decay1": 70, + "decay2": 90, + "image_size": 840, + "pretrain": True, + "return_layers": {"layer2": 1, "layer3": 2, "layer4": 3}, + "in_channel": 256, + "out_channel": 256, } - diff --git a/face_detection/retinaface/detect.py b/face_detection/retinaface/detect.py index b3fe62f..47ea947 100644 --- a/face_detection/retinaface/detect.py +++ b/face_detection/retinaface/detect.py @@ -16,25 +16,20 @@ class RetinaNetDetector(Detector): - - def __init__( - self, - model: str, - *args, - **kwargs): + def __init__(self, model: str, *args, **kwargs): super().__init__(*args, **kwargs) if model == "mobilenet": cfg = cfg_mnet state_dict = load_state_dict_from_url( "https://raw.githubusercontent.com/hukkelas/DSFD-Pytorch-Inference/master/RetinaFace_mobilenet025.pth", - map_location=torch_utils.get_device() + map_location=torch_utils.get_device(), ) else: assert model == "resnet50" cfg = cfg_re50 state_dict = load_state_dict_from_url( "https://api.loke.aws.unit.no/dlr-gui-backend-resources-content/v2/contents/links/8dd81669-eb84-4520-8173-dbe49d72f44cb2eef6da-3983-4a12-9085-d11555b93842c19bdf27-b924-4214-9381-e6cac30b87cf", - map_location=torch_utils.get_device() + map_location=torch_utils.get_device(), ) state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()} net = RetinaFace(cfg=cfg) @@ -46,7 +41,8 @@ def __init__( self.prior_box_cache = {} def batched_detect_with_landmarks( - self, image: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]: + self, image: np.ndarray + ) -> typing.Tuple[np.ndarray, np.ndarray]: """Takes N images and performs and returns a set of bounding boxes as detections Args: @@ -75,8 +71,7 @@ def batched_detect_with_landmarks( scores_ = scores_[keep_idx] landms_ = landms_[keep_idx] # Non maxima suppression - keep_idx = nms( - boxes_, scores_, self.nms_iou_threshold) + keep_idx = nms(boxes_, scores_, self.nms_iou_threshold) boxes_ = boxes_[keep_idx] scores_ = scores_[keep_idx] landms_ = landms_[keep_idx] @@ -91,16 +86,13 @@ def batched_detect_with_landmarks( landms_ = landms_.cpu().numpy().reshape(-1, 5, 2) landms_[:, :, 0] *= width landms_[:, :, 1] *= height - dets = torch.cat( - (boxes_, scores_.view(-1, 1)), dim=1).cpu().numpy() + dets = torch.cat((boxes_, scores_.view(-1, 1)), dim=1).cpu().numpy() final_output_box.append(dets) final_output_landmarks.append(landms_) return final_output_box, final_output_landmarks @torch.no_grad() - def _detect( - self, image: np.ndarray, - return_landmarks=False) -> np.ndarray: + def _detect(self, image: np.ndarray, return_landmarks=False) -> np.ndarray: """Batched detect Args: image (np.ndarray): shape [N, H, W, 3] @@ -115,29 +107,26 @@ def _detect( if image.shape[2:] in self.prior_box_cache: priors = self.prior_box_cache[image.shape[2:]] else: - priorbox = PriorBox( - self.cfg, image_size=(height, width)) + priorbox = PriorBox(self.cfg, image_size=(height, width)) priors = priorbox.forward() self.prior_box_cache[image.shape[2:]] = priors priors = torch_utils.to_cuda(priors, self.device) prior_data = priors.data - boxes = batched_decode(loc, prior_data, self.cfg['variance']) + boxes = batched_decode(loc, prior_data, self.cfg["variance"]) boxes = torch.cat((boxes, scores), dim=-1) if return_landmarks: - landms = decode_landm(landms, prior_data, self.cfg['variance']) + landms = decode_landm(landms, prior_data, self.cfg["variance"]) return boxes, landms return boxes @DETECTOR_REGISTRY.register_module class RetinaNetResNet50(RetinaNetDetector): - def __init__(self, *args, **kwargs): super().__init__("resnet50", *args, **kwargs) @DETECTOR_REGISTRY.register_module class RetinaNetMobileNetV1(RetinaNetDetector): - def __init__(self, *args, **kwargs): super().__init__("mobilenet", *args, **kwargs) diff --git a/face_detection/retinaface/models/net.py b/face_detection/retinaface/models/net.py index 777fa95..4d5d1ae 100644 --- a/face_detection/retinaface/models/net.py +++ b/face_detection/retinaface/models/net.py @@ -9,7 +9,7 @@ def conv_bn(inp, oup, stride=1, leaky=0): return nn.Sequential( nn.Conv2d(inp, oup, 3, stride, 1, bias=False), nn.BatchNorm2d(oup), - nn.LeakyReLU(negative_slope=leaky, inplace=True) + nn.LeakyReLU(negative_slope=leaky, inplace=True), ) @@ -24,7 +24,7 @@ def conv_bn1X1(inp, oup, stride=1, leaky=0): return nn.Sequential( nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False), nn.BatchNorm2d(oup), - nn.LeakyReLU(negative_slope=leaky, inplace=True) + nn.LeakyReLU(negative_slope=leaky, inplace=True), ) @@ -33,7 +33,6 @@ def conv_dw(inp, oup, stride, leaky=0.1): nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), nn.BatchNorm2d(inp), nn.LeakyReLU(negative_slope=leaky, inplace=True), - nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), nn.LeakyReLU(negative_slope=leaky, inplace=True), @@ -45,19 +44,17 @@ def __init__(self, in_channel, out_channel): super().__init__() assert out_channel % 4 == 0 leaky = 0 - if (out_channel <= 64): + if out_channel <= 64: leaky = 0.1 - self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1) + self.conv3X3 = conv_bn_no_relu(in_channel, out_channel // 2, stride=1) - self.conv5X5_1 = conv_bn( - in_channel, out_channel//4, stride=1, leaky=leaky) - self.conv5X5_2 = conv_bn_no_relu( - out_channel//4, out_channel//4, stride=1) + self.conv5X5_1 = conv_bn(in_channel, out_channel // 4, stride=1, leaky=leaky) + self.conv5X5_2 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1) self.conv7X7_2 = conv_bn( - out_channel//4, out_channel//4, stride=1, leaky=leaky) - self.conv7x7_3 = conv_bn_no_relu( - out_channel//4, out_channel//4, stride=1) + out_channel // 4, out_channel // 4, stride=1, leaky=leaky + ) + self.conv7x7_3 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1) def forward(self, input_): conv3X3 = self.conv3X3(input_) @@ -77,14 +74,11 @@ class FPN(nn.Module): def __init__(self, in_channels_list, out_channels): super().__init__() leaky = 0 - if (out_channels <= 64): + if out_channels <= 64: leaky = 0.1 - self.output1 = conv_bn1X1( - in_channels_list[0], out_channels, leaky=leaky) - self.output2 = conv_bn1X1( - in_channels_list[1], out_channels, leaky=leaky) - self.output3 = conv_bn1X1( - in_channels_list[2], out_channels, leaky=leaky) + self.output1 = conv_bn1X1(in_channels_list[0], out_channels, leaky=leaky) + self.output2 = conv_bn1X1(in_channels_list[1], out_channels, leaky=leaky) + self.output3 = conv_bn1X1(in_channels_list[2], out_channels, leaky=leaky) self.merge1 = conv_bn(out_channels, out_channels, leaky=leaky) self.merge2 = conv_bn(out_channels, out_channels, leaky=leaky) @@ -95,12 +89,14 @@ def forward(self, input_): output2 = self.output2(input_[1]) output3 = self.output3(input_[2]) up3 = F.interpolate( - output3, size=[int(output2.size(2)), int(output2.size(3))], mode="nearest") + output3, size=[int(output2.size(2)), int(output2.size(3))], mode="nearest" + ) output2 = output2 + up3 output2 = self.merge2(output2) up2 = F.interpolate( - output2, size=[int(output1.size(2)), int(output1.size(3))], mode="nearest") + output2, size=[int(output1.size(2)), int(output1.size(3))], mode="nearest" + ) output1 = output1 + up2 output1 = self.merge1(output1) @@ -112,8 +108,8 @@ class MobileNetV1(nn.Module): def __init__(self): super(MobileNetV1, self).__init__() self.stage1 = nn.Sequential( - conv_bn(3, 8, 2, leaky=0.1), # 3 - conv_dw(8, 16, 1), # 7 + conv_bn(3, 8, 2, leaky=0.1), # 3 + conv_dw(8, 16, 1), # 7 conv_dw(16, 32, 2), # 11 conv_dw(32, 32, 1), # 19 conv_dw(32, 64, 2), # 27 @@ -121,17 +117,17 @@ def __init__(self): ) self.stage2 = nn.Sequential( conv_dw(64, 128, 2), # 43 + 16 = 59 - conv_dw(128, 128, 1), # 59 + 32 = 91 - conv_dw(128, 128, 1), # 91 + 32 = 123 - conv_dw(128, 128, 1), # 123 + 32 = 155 - conv_dw(128, 128, 1), # 155 + 32 = 187 - conv_dw(128, 128, 1), # 187 + 32 = 219 + conv_dw(128, 128, 1), # 59 + 32 = 91 + conv_dw(128, 128, 1), # 91 + 32 = 123 + conv_dw(128, 128, 1), # 123 + 32 = 155 + conv_dw(128, 128, 1), # 155 + 32 = 187 + conv_dw(128, 128, 1), # 187 + 32 = 219 ) self.stage3 = nn.Sequential( - conv_dw(128, 256, 2), # 219 +3 2 = 241 - conv_dw(256, 256, 1), # 241 + 64 = 301 + conv_dw(128, 256, 2), # 219 +3 2 = 241 + conv_dw(256, 256, 1), # 241 + 64 = 301 ) - self.avg = nn.AdaptiveAvgPool2d((1,1)) + self.avg = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(256, 1000) def forward(self, x): diff --git a/face_detection/retinaface/models/retinaface.py b/face_detection/retinaface/models/retinaface.py index 77fb358..71796a6 100644 --- a/face_detection/retinaface/models/retinaface.py +++ b/face_detection/retinaface/models/retinaface.py @@ -10,9 +10,7 @@ class ClassHead(nn.Module): def __init__(self, inchannels=512, num_anchors=3): super().__init__() self.num_anchors = num_anchors - self.conv1x1 = nn.Conv2d( - inchannels, self.num_anchors*2, - kernel_size=1) + self.conv1x1 = nn.Conv2d(inchannels, self.num_anchors * 2, kernel_size=1) def forward(self, x): out = self.conv1x1(x) @@ -24,9 +22,7 @@ def forward(self, x): class BboxHead(nn.Module): def __init__(self, inchannels=512, num_anchors=3): super().__init__() - self.conv1x1 = nn.Conv2d( - inchannels, num_anchors*4, - kernel_size=1) + self.conv1x1 = nn.Conv2d(inchannels, num_anchors * 4, kernel_size=1) def forward(self, x): out = self.conv1x1(x) @@ -38,8 +34,7 @@ def forward(self, x): class LandmarkHead(nn.Module): def __init__(self, inchannels=512, num_anchors=3): super().__init__() - self.conv1x1 = nn.Conv2d( - inchannels, num_anchors*10, kernel_size=1) + self.conv1x1 = nn.Conv2d(inchannels, num_anchors * 10, kernel_size=1) def forward(self, x): out = self.conv1x1(x) @@ -56,35 +51,38 @@ def __init__(self, cfg): """ super().__init__() backbone = None - if cfg['name'] == 'mobilenet0.25': + if cfg["name"] == "mobilenet0.25": backbone = MobileNetV1() - elif cfg['name'] == 'Resnet50': + elif cfg["name"] == "Resnet50": import torchvision.models as models + backbone = models.resnet50(pretrained=False) - self.body = _utils.IntermediateLayerGetter(backbone, cfg['return_layers']) - in_channels_stage2 = cfg['in_channel'] + self.body = _utils.IntermediateLayerGetter(backbone, cfg["return_layers"]) + in_channels_stage2 = cfg["in_channel"] in_channels_list = [ in_channels_stage2 * 2, in_channels_stage2 * 4, in_channels_stage2 * 8, ] - out_channels = cfg['out_channel'] + out_channels = cfg["out_channel"] self.fpn = FPN(in_channels_list, out_channels) self.ssh1 = SSH(out_channels, out_channels) self.ssh2 = SSH(out_channels, out_channels) self.ssh3 = SSH(out_channels, out_channels) - self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg['out_channel']) - self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg['out_channel']) - self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=cfg['out_channel']) + self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg["out_channel"]) + self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg["out_channel"]) + self.LandmarkHead = self._make_landmark_head( + fpn_num=3, inchannels=cfg["out_channel"] + ) def _make_class_head(self, fpn_num=3, inchannels=64, anchor_num=2): classhead = nn.ModuleList() for i in range(fpn_num): classhead.append(ClassHead(inchannels, anchor_num)) return classhead - + def _make_bbox_head(self, fpn_num=3, inchannels=64, anchor_num=2): bboxhead = nn.ModuleList() for i in range(fpn_num): @@ -110,10 +108,13 @@ def forward(self, inputs): features = [feature1, feature2, feature3] bbox_regressions = torch.cat( - [self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1) + [self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1 + ) classifications = torch.cat( - [self.ClassHead[i](feature) for i, feature in enumerate(features)],dim=1) + [self.ClassHead[i](feature) for i, feature in enumerate(features)], dim=1 + ) ldm_regressions = torch.cat( - [self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1) + [self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1 + ) - return (bbox_regressions, classifications.softmax(dim=-1), ldm_regressions) \ No newline at end of file + return (bbox_regressions, classifications.softmax(dim=-1), ldm_regressions) diff --git a/face_detection/retinaface/onnx.py b/face_detection/retinaface/onnx.py index 1f53c04..7cdd615 100644 --- a/face_detection/retinaface/onnx.py +++ b/face_detection/retinaface/onnx.py @@ -13,14 +13,13 @@ class RetinaNetDetectorONNX(torch.nn.Module): - def __init__(self, input_imshape, inference_imshape): super().__init__() self.device = torch.device("cpu") cfg = cfg_re50 state_dict = load_state_dict_from_url( "https://folk.ntnu.no/haakohu/RetinaFace_ResNet50.pth", - map_location=torch_utils.get_device() + map_location=torch_utils.get_device(), ) state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()} net = RetinaFace(cfg=cfg) @@ -28,7 +27,7 @@ def __init__(self, input_imshape, inference_imshape): net.load_state_dict(state_dict) self.net = net.to(self.device) self.input_imshape = input_imshape - self.inference_imshape = inference_imshape # (height, width) + self.inference_imshape = inference_imshape # (height, width) self.mean = np.array([104, 117, 123], dtype=np.float32) self.mean = torch.from_numpy(self.mean).reshape((1, 3, 1, 1)) self.mean = torch.nn.Parameter(self.mean).float().to(self.device) @@ -42,7 +41,7 @@ def export_onnx(self, onnx_filepath): image = cv2.imread("images/0_Parade_marchingband_1_765.jpg") except: raise FileNotFoundError() - + height, width = self.input_imshape image = cv2.resize(image, (width, height)) @@ -54,29 +53,33 @@ def export_onnx(self, onnx_filepath): output_names = ["loc"] torch.onnx.export( - self, example_inputs, + self, + example_inputs, onnx_filepath, verbose=True, input_names=["image"], output_names=output_names, export_params=True, - opset_version=10 # functional interpolate does not support opset 11+ - ) + opset_version=10, # functional interpolate does not support opset 11+ + ) np.save(f"outputs.npy", actual_outputs) @torch.no_grad() def forward(self, image): """ - image: shape [1, 3, H, W] - Exports model where outputs are NOT thresholded or performed NMS on. + image: shape [1, 3, H, W] + Exports model where outputs are NOT thresholded or performed NMS on. """ - image = torch.nn.functional.interpolate(image, self.inference_imshape, mode="nearest") + image = torch.nn.functional.interpolate( + image, self.inference_imshape, mode="nearest" + ) # Expects BGR image = image - self.mean assert image.shape[2] == self.inference_imshape[0] assert image.shape[3] == self.inference_imshape[1] - assert image.shape[0] == 1,\ + assert image.shape[0] == 1, ( "The ONNX export only supports one image at a time tensors currently" + ) loc, conf, landms = self.net(image) # forward pass assert conf.shape[2] == 2 scores = conf[:, :, 1:] diff --git a/face_detection/retinaface/prior_box.py b/face_detection/retinaface/prior_box.py index ec9b409..86bcf47 100644 --- a/face_detection/retinaface/prior_box.py +++ b/face_detection/retinaface/prior_box.py @@ -9,8 +9,8 @@ def generate_prior_box(feature_maps, image_size, steps, min_sizes): n_anchors = 0 for x in feature_maps: n_anchors += int(x[0]) * int(x[1]) * len(min_sizes[0]) - anchors = np.empty((n_anchors*4), dtype=np.float64) -# print(feature_maps, image_size, steps, min_sizes) + anchors = np.empty((n_anchors * 4), dtype=np.float64) + # print(feature_maps, image_size, steps, min_sizes) idx_anchor = 0 for k, f in enumerate(feature_maps): min_sizes_ = min_sizes[k] @@ -23,20 +23,25 @@ def generate_prior_box(feature_maps, image_size, steps, min_sizes): dense_cy = [y * steps[k] / image_size[0] for y in [i + 0.5]] for cy in dense_cy: for cx in dense_cx: - anchors[idx_anchor:idx_anchor+4] = [cx, cy, s_kx, s_ky] - idx_anchor += 1*4 -# assert idx_anchor == anchors.shape[0], f"{anchors.shape[0]}, {idx_anchor}" + anchors[idx_anchor : idx_anchor + 4] = [cx, cy, s_kx, s_ky] + idx_anchor += 1 * 4 + # assert idx_anchor == anchors.shape[0], f"{anchors.shape[0]}, {idx_anchor}" return anchors class PriorBox(object): - def __init__(self, cfg, image_size=None, phase='train'): + def __init__(self, cfg, image_size=None, phase="train"): super(PriorBox, self).__init__() - self.min_sizes = np.array(cfg['min_sizes']).astype(np.int16) - self.steps = np.array(cfg['steps']).astype(np.int16) - self.clip = cfg['clip'] + self.min_sizes = np.array(cfg["min_sizes"]).astype(np.int16) + self.steps = np.array(cfg["steps"]).astype(np.int16) + self.clip = cfg["clip"] self.image_size = np.array(image_size).astype(np.int16) - self.feature_maps = np.array([[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]).astype(np.int16) + self.feature_maps = np.array( + [ + [ceil(self.image_size[0] / step), ceil(self.image_size[1] / step)] + for step in self.steps + ] + ).astype(np.int16) self.name = "s" def forward(self): diff --git a/face_detection/retinaface/tensorrt_wrap.py b/face_detection/retinaface/tensorrt_wrap.py index 106052c..a060864 100644 --- a/face_detection/retinaface/tensorrt_wrap.py +++ b/face_detection/retinaface/tensorrt_wrap.py @@ -14,23 +14,24 @@ class TensorRTRetinaFace: - def __init__( - self, - input_imshape, - inference_imshape, - confidence_threshold: float = 0.5, - nms_threshold: float = 0.3): + self, + input_imshape, + inference_imshape, + confidence_threshold: float = 0.5, + nms_threshold: float = 0.3, + ): self.inference_imshape = inference_imshape self.input_imshape = input_imshape self.confidence_threshold = confidence_threshold self.nms_threshold = nms_threshold - identifier = "_".join(str(x) for x in list(input_imshape) + list(inference_imshape)) + identifier = "_".join( + str(x) for x in list(input_imshape) + list(inference_imshape) + ) onnx_filepath = f"retinaface_input_{identifier}_.onnx" onnx_filepath = os.path.join(cache_dir, onnx_filepath) if not os.path.isfile(onnx_filepath): - detector = RetinaNetDetectorONNX( - input_imshape, inference_imshape) + detector = RetinaNetDetectorONNX(input_imshape, inference_imshape) detector.export_onnx(onnx_filepath) self.TRT_LOGGER = trt.Logger(trt.tensorrt.Logger.Severity.INFO) self.engine_path = onnx_filepath.replace(".onnx", ".trt") @@ -45,45 +46,62 @@ def initialize_bindings(self): print( self.engine.get_binding_name(idx), self.engine.get_binding_dtype(idx), - self.engine.get_binding_shape(idx)) + self.engine.get_binding_shape(idx), + ) if self.engine.binding_is_input(idx): # we expect only one input input_shape = self.engine.get_binding_shape(idx) - input_size = trt.volume(input_shape) * self.engine.max_batch_size * np.dtype(np.float32).itemsize # in bytes - self.input_bindings.append({ - "input_shape": input_shape, - "input_size": input_size, - "device_input": cuda.mem_alloc(input_size), - }) + input_size = ( + trt.volume(input_shape) + * self.engine.max_batch_size + * np.dtype(np.float32).itemsize + ) # in bytes + self.input_bindings.append( + { + "input_shape": input_shape, + "input_size": input_size, + "device_input": cuda.mem_alloc(input_size), + } + ) else: # and one output output_shape = self.engine.get_binding_shape(idx) - host_output = cuda.pagelocked_empty(trt.volume(output_shape) * self.engine.max_batch_size, dtype=np.float32) + host_output = cuda.pagelocked_empty( + trt.volume(output_shape) * self.engine.max_batch_size, + dtype=np.float32, + ) device_output = cuda.mem_alloc(host_output.nbytes) - self.output_bindings.append({ - "output_shape": output_shape, - "host_output": host_output, - "device_output": device_output, - "name": self.engine.get_binding_name(idx) - }) + self.output_bindings.append( + { + "output_shape": output_shape, + "host_output": host_output, + "device_output": device_output, + "name": self.engine.get_binding_name(idx), + } + ) def build_engine(self, onnx_filepath: str): if os.path.isfile(self.engine_path): - with open(self.engine_path, "rb") as f, trt.Runtime(self.TRT_LOGGER) as runtime: + with ( + open(self.engine_path, "rb") as f, + trt.Runtime(self.TRT_LOGGER) as runtime, + ): engine = runtime.deserialize_cuda_engine(f.read()) return engine builder = trt.Builder(self.TRT_LOGGER) - network_creation_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network_creation_flag = 1 << int( + trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH + ) network = builder.create_network(network_creation_flag) print(network) parser = trt.OnnxParser(network, self.TRT_LOGGER) # parse ONNX - with open(onnx_filepath, 'rb') as model: - print('Beginning ONNX file parsing') + with open(onnx_filepath, "rb") as model: + print("Beginning ONNX file parsing") if not parser.parse(model.read()): for error in range(parser.num_errors): print(parser.get_error(error)) - print('Completed parsing of ONNX file') + print("Completed parsing of ONNX file") builder.max_batch_size = 1 builder.debug_sync = True builder.max_workspace_size = 2**34 @@ -91,7 +109,7 @@ def build_engine(self, onnx_filepath: str): if builder.platform_has_fast_fp16: builder.fp16_mode = True - print('Building an engine...') + print("Building an engine...") engine = builder.build_cuda_engine(network) print("Completed creating Engine") @@ -101,16 +119,13 @@ def build_engine(self, onnx_filepath: str): def run_engine(self, img): stream = cuda.Stream() - cuda.memcpy_htod_async( - self.input_bindings[0]["device_input"], img, stream) - bs = [int(x["device_input"]) for x in self.input_bindings] +\ - [int(x["device_output"]) for x in self.output_bindings] - self.context.execute_async( - bindings=bs, - stream_handle=stream.handle) + cuda.memcpy_htod_async(self.input_bindings[0]["device_input"], img, stream) + bs = [int(x["device_input"]) for x in self.input_bindings] + [ + int(x["device_output"]) for x in self.output_bindings + ] + self.context.execute_async(bindings=bs, stream_handle=stream.handle) for out in self.output_bindings: - cuda.memcpy_dtoh_async( - out["host_output"], out["device_output"], stream) + cuda.memcpy_dtoh_async(out["host_output"], out["device_output"], stream) out["host_output"] = out["host_output"].reshape(out["output_shape"]) assert len(self.output_bindings) == 1 stream.synchronize() @@ -146,15 +161,13 @@ def infer(self, img): height = 720 expected_imsize = (height, width) image = cv2.resize(image, (width, height)) - detector = TensorRTRetinaFace( - (height, width), - (480, 640)) + detector = TensorRTRetinaFace((height, width), (480, 640)) print(detector.infer(image)) boxes, landms, scores = detector.infer(image) for i in range(boxes.shape[0]): print(boxes[i]) x0, y0, x1, y1 = boxes[i].astype(int) - image = cv2.rectangle(image, (x0, y0), (x1, y1),(255, 0, 0), 1 ) + image = cv2.rectangle(image, (x0, y0), (x1, y1), (255, 0, 0), 1) for kp in landms[i]: image = cv2.circle(image, tuple(kp), 5, (255, 0, 0)) - cv2.imwrite("test.png", image) \ No newline at end of file + cv2.imwrite("test.png", image) diff --git a/face_detection/retinaface/utils.py b/face_detection/retinaface/utils.py index e4ff17b..f9794b5 100644 --- a/face_detection/retinaface/utils.py +++ b/face_detection/retinaface/utils.py @@ -17,12 +17,16 @@ def decode_landm(pre, priors, variances): decoded landm predictions """ priors = priors[None] - landms = torch.cat((priors[:, :, :2] + pre[:, :, :2] * variances[0] * priors[:, :, 2:], - priors[:, :, :2] + pre[:, :, 2:4] * variances[0] * priors[:, :, 2:], - priors[:, :, :2] + pre[:, :, 4:6] * variances[0] * priors[:, :, 2:], - priors[:, :, :2] + pre[:, :, 6:8] * variances[0] * priors[:, :, 2:], - priors[:, :, :2] + pre[:, :, 8:10] * variances[0] * priors[:, :, 2:], - ), dim=2) + landms = torch.cat( + ( + priors[:, :, :2] + pre[:, :, :2] * variances[0] * priors[:, :, 2:], + priors[:, :, :2] + pre[:, :, 2:4] * variances[0] * priors[:, :, 2:], + priors[:, :, :2] + pre[:, :, 4:6] * variances[0] * priors[:, :, 2:], + priors[:, :, :2] + pre[:, :, 6:8] * variances[0] * priors[:, :, 2:], + priors[:, :, :2] + pre[:, :, 8:10] * variances[0] * priors[:, :, 2:], + ), + dim=2, + ) return landms @@ -34,13 +38,13 @@ def python_nms(boxes, overlapThresh): boxes = boxes.astype(np.float32) if boxes.dtype.kind == "i": boxes = boxes.astype("float") - # initialize the list of picked indexes + # initialize the list of picked indexes keep_idx = [] # grab the coordinates of the bounding boxes - x1 = boxes[:,0] - y1 = boxes[:,1] - x2 = boxes[:,2] - y2 = boxes[:,3] + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] area = (x2 - x1 + 1) * (y2 - y1 + 1) idxs = np.argsort(y2) # keep looping while some indexes still remain in the indexes @@ -64,6 +68,7 @@ def python_nms(boxes, overlapThresh): # compute the ratio of overlap overlap = (w * h) / area[idxs[:last]] # delete all indexes from the index list that have - idxs = np.delete(idxs, np.concatenate(([last], - np.where(overlap > overlapThresh)[0]))) + idxs = np.delete( + idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0])) + ) return keep_idx diff --git a/test.py b/test.py index b64c959..a84512f 100644 --- a/test.py +++ b/test.py @@ -14,25 +14,17 @@ def draw_faces(im, bboxes): if __name__ == "__main__": impaths = "images" impaths = glob.glob(os.path.join(impaths, "*.jpg")) - detector = face_detection.build_detector( - "DSFDDetector", - max_resolution=1080 - ) + detector = face_detection.build_detector("DSFDDetector", max_resolution=1080) for impath in impaths: - if impath.endswith("out.jpg"): continue + if impath.endswith("out.jpg"): + continue im = cv2.imread(impath) print("Processing:", impath) t = time.time() - dets = detector.detect( - im[:, :, ::-1] - )[:, :4] - print(f"Detection time: {time.time()- t:.3f}") + dets = detector.detect(im[:, :, ::-1])[:, :4] + print(f"Detection time: {time.time() - t:.3f}") draw_faces(im, dets) imname = os.path.basename(impath).split(".")[0] - output_path = os.path.join( - os.path.dirname(impath), - f"{imname}_out.jpg" - ) + output_path = os.path.join(os.path.dirname(impath), f"{imname}_out.jpg") cv2.imwrite(output_path, im) - \ No newline at end of file diff --git a/tests/test_detector.py b/tests/test_detector.py index 5e2a7a8..cf35929 100644 --- a/tests/test_detector.py +++ b/tests/test_detector.py @@ -3,6 +3,7 @@ import cv2 import face_detection # your face detection library + def compute_iou(boxA, boxB): xA = max(boxA[0], boxB[0]) yA = max(boxA[1], boxB[1]) @@ -26,32 +27,31 @@ def compute_iou(boxA, boxB): @pytest.fixture def ground_truth_boxes(): - return np.array([ - [337.8219142, 227.30235955, 363.18236876, 260.75754449], - [120.61462998, 244.68149829, 153.73102021, 290.13813281], - [793.31824303, 88.6468603, 837.80744743, 153.03655452], - [499.23486614, 212.40574998, 521.46317768, 241.84556359], - [412.37690353, 219.29100847, 437.20971298, 250.56026506], - [654.66749144, 203.24960518, 676.66251707, 231.10678673], - [692.63414764, 248.56575656, 726.75259781, 292.49138522], - [215.16035197, 269.50566196, 240.76163981, 303.02491093], - [189.08402371, 212.22481942, 210.5982945, 240.76419282], - [571.04836243, 213.0569253, 590.01044816, 238.5836339], - [ 16.7418344, 235.77498758, 41.44155097, 265.93795145], - [284.28320718, 213.93544269, 304.40658212, 238.0858829], - [167.58154631, 76.92867303, 187.13439512, 102.97041345], - ]) + return np.array( + [ + [337.8219142, 227.30235955, 363.18236876, 260.75754449], + [120.61462998, 244.68149829, 153.73102021, 290.13813281], + [793.31824303, 88.6468603, 837.80744743, 153.03655452], + [499.23486614, 212.40574998, 521.46317768, 241.84556359], + [412.37690353, 219.29100847, 437.20971298, 250.56026506], + [654.66749144, 203.24960518, 676.66251707, 231.10678673], + [692.63414764, 248.56575656, 726.75259781, 292.49138522], + [215.16035197, 269.50566196, 240.76163981, 303.02491093], + [189.08402371, 212.22481942, 210.5982945, 240.76419282], + [571.04836243, 213.0569253, 590.01044816, 238.5836339], + [16.7418344, 235.77498758, 41.44155097, 265.93795145], + [284.28320718, 213.93544269, 304.40658212, 238.0858829], + [167.58154631, 76.92867303, 187.13439512, 102.97041345], + ] + ) -@pytest.mark.parametrize("detector_name", [ - "DSFDDetector", - "RetinaNetResNet50", - "RetinaNetMobileNetV1" -]) -def test_detector_detects_boxes_with_iou(detector_name, ground_truth_boxes): + +@pytest.mark.parametrize( + "detector_name", ["DSFDDetector", "RetinaNetResNet50", "RetinaNetMobileNetV1"] +) +def test_detector_detects_boxes_with_iou(detector_name, ground_truth_boxes): detector = face_detection.build_detector( - detector_name, - max_resolution=1080, - confidence_threshold=0.5 + detector_name, max_resolution=1080, confidence_threshold=0.5 ) impath = "images/11_Meeting_Meeting_11_Meeting_Meeting_11_176.jpg" img = cv2.imread(impath) @@ -65,4 +65,3 @@ def test_detector_detects_boxes_with_iou(detector_name, ground_truth_boxes): f"{detector_name} failed to detect ground truth box {gt_box} " f"with IoU >= 0.5" ) - From ebf337f4bc46965e81405b09540c4b28b8404e5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Hukkel=C3=A5s?= Date: Mon, 9 Jun 2025 17:40:49 +0200 Subject: [PATCH 6/8] fix: ruff check --- .github/workflows/ruff.yml | 4 +++- face_detection/__init__.py | 8 ++++++++ face_detection/build.py | 2 +- face_detection/dsfd/__init__.py | 2 ++ face_detection/dsfd/detect.py | 1 - face_detection/dsfd/face_ssd.py | 4 ++-- face_detection/retinaface/__init__.py | 2 ++ face_detection/retinaface/onnx.py | 4 ++-- face_detection/retinaface/tensorrt_wrap.py | 1 - face_detection/torch_utils.py | 2 +- 10 files changed, 21 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml index 08d51d9..90a8965 100644 --- a/.github/workflows/ruff.yml +++ b/.github/workflows/ruff.yml @@ -11,6 +11,8 @@ jobs: - name: Install ruff run: uv add ruff - - name: Run ruff + - name: Run ruff check run: uv run ruff check + - name: Run ruff format check + run: uv run ruff format --check diff --git a/face_detection/__init__.py b/face_detection/__init__.py index a4d0316..7a77d0b 100644 --- a/face_detection/__init__.py +++ b/face_detection/__init__.py @@ -1,3 +1,11 @@ from .build import build_detector, available_detectors from .dsfd import DSFDDetector from .retinaface import RetinaNetMobileNetV1, RetinaNetResNet50 + +__all__ = [ + "build_detector", + "available_detectors", + "RetinaNetMobileNetV1", + "RetinaNetResNet50", + "DSFDDetector", +] diff --git a/face_detection/build.py b/face_detection/build.py index 874970e..6ae1728 100644 --- a/face_detection/build.py +++ b/face_detection/build.py @@ -16,7 +16,7 @@ def build_detector( clip_boxes: bool = False, ) -> Detector: assert name in available_detectors, ( - f"Detector not available. Chooce one of the following" + "Detector not available. Chooce one of the following" + ",".join(available_detectors) ) args = dict( diff --git a/face_detection/dsfd/__init__.py b/face_detection/dsfd/__init__.py index 63b1827..bb05cfd 100644 --- a/face_detection/dsfd/__init__.py +++ b/face_detection/dsfd/__init__.py @@ -1 +1,3 @@ from .detect import DSFDDetector + +__all__ = ["DSFDDetector"] diff --git a/face_detection/dsfd/detect.py b/face_detection/dsfd/detect.py index 350764a..8ec0ab0 100644 --- a/face_detection/dsfd/detect.py +++ b/face_detection/dsfd/detect.py @@ -3,7 +3,6 @@ import typing from .face_ssd import SSD from .config import resnet152_model_config -from .. import torch_utils from torch.hub import load_state_dict_from_url from ..base import Detector from ..build import DETECTOR_REGISTRY diff --git a/face_detection/dsfd/face_ssd.py b/face_detection/dsfd/face_ssd.py index ee1ac2f..5618a9b 100644 --- a/face_detection/dsfd/face_ssd.py +++ b/face_detection/dsfd/face_ssd.py @@ -177,9 +177,9 @@ def forward(self, x, confidence_threshold, nms_threshold): # apply multibox head to source layers featuremap_size = [] - for x, l, c in zip(sources, self.loc, self.conf): + for x, loc, c in zip(sources, self.loc, self.conf): featuremap_size.append([x.shape[2], x.shape[3]]) - loc.append(l(x).permute(0, 2, 3, 1).contiguous()) + loc.append(loc(x).permute(0, 2, 3, 1).contiguous()) # Max in out len_conf = len(conf) diff --git a/face_detection/retinaface/__init__.py b/face_detection/retinaface/__init__.py index ed589e8..f010d41 100644 --- a/face_detection/retinaface/__init__.py +++ b/face_detection/retinaface/__init__.py @@ -1,3 +1,5 @@ # Adapted from https://github.com/biubug6/Pytorch_Retinaface # Original license: MIT from .detect import RetinaNetMobileNetV1, RetinaNetResNet50 + +__all__ = ["RetinaNetMobileNetV1", "RetinaNetResNet50"] diff --git a/face_detection/retinaface/onnx.py b/face_detection/retinaface/onnx.py index 7cdd615..5d69065 100644 --- a/face_detection/retinaface/onnx.py +++ b/face_detection/retinaface/onnx.py @@ -39,7 +39,7 @@ def __init__(self, input_imshape, inference_imshape): def export_onnx(self, onnx_filepath): try: image = cv2.imread("images/0_Parade_marchingband_1_765.jpg") - except: + except Exception: raise FileNotFoundError() height, width = self.input_imshape @@ -62,7 +62,7 @@ def export_onnx(self, onnx_filepath): export_params=True, opset_version=10, # functional interpolate does not support opset 11+ ) - np.save(f"outputs.npy", actual_outputs) + np.save("outputs.npy", actual_outputs) @torch.no_grad() def forward(self, image): diff --git a/face_detection/retinaface/tensorrt_wrap.py b/face_detection/retinaface/tensorrt_wrap.py index a060864..d8e1dde 100644 --- a/face_detection/retinaface/tensorrt_wrap.py +++ b/face_detection/retinaface/tensorrt_wrap.py @@ -1,5 +1,4 @@ import pycuda.driver as cuda -import pycuda.autoinit import os import numpy as np import cv2 diff --git a/face_detection/torch_utils.py b/face_detection/torch_utils.py index 2f891af..63ac6ea 100644 --- a/face_detection/torch_utils.py +++ b/face_detection/torch_utils.py @@ -4,7 +4,7 @@ def to_cuda(elements, device): if torch.cuda.is_available(): - if type(elements) == tuple or type(elements) == list: + if isinstance(elements, tuple) or isinstance(elements, list): return [x.to(device) for x in elements] return elements.to(device) return elements From 04cf6ff85375f8d82a2ca4a1da467b36b142ed7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Hukkel=C3=A5s?= Date: Mon, 9 Jun 2025 18:25:27 +0200 Subject: [PATCH 7/8] fix: dynamic versioning based on git tag --- face_detection/dsfd/face_ssd.py | 4 ++-- pyproject.toml | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/face_detection/dsfd/face_ssd.py b/face_detection/dsfd/face_ssd.py index 5618a9b..5059d9e 100644 --- a/face_detection/dsfd/face_ssd.py +++ b/face_detection/dsfd/face_ssd.py @@ -177,9 +177,9 @@ def forward(self, x, confidence_threshold, nms_threshold): # apply multibox head to source layers featuremap_size = [] - for x, loc, c in zip(sources, self.loc, self.conf): + for x, layer, c in zip(sources, self.loc, self.conf): featuremap_size.append([x.shape[2], x.shape[3]]) - loc.append(loc(x).permute(0, 2, 3, 1).contiguous()) + loc.append(layer(x).permute(0, 2, 3, 1).contiguous()) # Max in out len_conf = len(conf) diff --git a/pyproject.toml b/pyproject.toml index 8cb6c59..b21e288 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,5 @@ [project] name="face_detection" -version="0.2.1" description="A simple and lightweight package for state of the art face detection with GPU support." readme="README.md" requires-python=">=3.9" @@ -13,9 +12,11 @@ dependencies = [ "torch", "torchvision", ] +dynamic = ["version"] # Remove static version and add this line [build-system] -requires = ["setuptools", "torch"] +requires = ["setuptools", "torch", "hatchling", "uv-dynamic-versioning"] +build-backend = "hatchling.build" [tool.setuptools] packages = ["face_detection"] @@ -26,3 +27,5 @@ dev = [ "pytest>=8.3.5", "ruff" ] +[tool.hatch.version] +source = "uv-dynamic-versioning" From 99d1bcc3e686c12817c55f7295362f7f0d41ecd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Hukkel=C3=A5s?= Date: Mon, 9 Jun 2025 18:35:30 +0200 Subject: [PATCH 8/8] misc: update README.md --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index fb12f00..bf14c5f 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,13 @@ boxes, landmarks, scores = detector.infer(image) ``` +## Formatting +ALl code should be formatted with ruff: +``` +uv run ruff format +uv run ruff check +``` + ## Citation If you find this code useful, remember to cite the original authors: ```