From 9535a45274cb77254ac8cd6b5bbeae00db9b8b51 Mon Sep 17 00:00:00 2001 From: Dani Rogmans Date: Tue, 10 Mar 2026 13:16:59 +0000 Subject: [PATCH 01/18] submodule upgrades --- tools/yolo/YOLOv6 | 2 +- tools/yolo/ultralytics | 2 +- tools/yolo/yolov5 | 2 +- tools/yolo/yolov5_exporter.py | 6 ++++++ 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/yolo/YOLOv6 b/tools/yolo/YOLOv6 index 081be9a..e86a483 160000 --- a/tools/yolo/YOLOv6 +++ b/tools/yolo/YOLOv6 @@ -1 +1 @@ -Subproject commit 081be9a0ed2878ce126a9e9ec04d5aa03af4dbe5 +Subproject commit e86a483f3f6bded25d45970b56831345a99744a4 diff --git a/tools/yolo/ultralytics b/tools/yolo/ultralytics index 0537be1..f55a0cc 160000 --- a/tools/yolo/ultralytics +++ b/tools/yolo/ultralytics @@ -1 +1 @@ -Subproject commit 0537be116924fef9ec3a66e4689134a6a59e7dce +Subproject commit f55a0ccd79312703b0e1e2d9f38aecab3a6dd202 diff --git a/tools/yolo/yolov5 b/tools/yolo/yolov5 index c7a2d6b..da75ed9 160000 --- a/tools/yolo/yolov5 +++ b/tools/yolo/yolov5 @@ -1 +1 @@ -Subproject commit c7a2d6bcf4f7e88db53f3d09a8484391dac7bc89 +Subproject commit da75ed9d0898627adf6c42e7cc4e65a64b2a1738 diff --git a/tools/yolo/yolov5_exporter.py b/tools/yolo/yolov5_exporter.py index c83fbe8..9defb8c 100644 --- a/tools/yolo/yolov5_exporter.py +++ b/tools/yolo/yolov5_exporter.py @@ -13,6 +13,12 @@ from tools.utils.constants import Encoding current_dir = os.path.dirname(os.path.abspath(__file__)) +# Add ultralytics submodule to sys.path so that yolov5's internal +# `import ultralytics` resolves to the local submodule instead of +# triggering an auto-install via pip. +ultralytics_path = os.path.join(current_dir, "ultralytics") +if ultralytics_path not in sys.path: + sys.path.insert(0, ultralytics_path) yolov5_path = os.path.join(current_dir, "yolov5") # Ensure it's first in sys.path if yolov5_path not in sys.path: From f09ff0d4ec763ce816b5cbcd5ecdabe768842dbe Mon Sep 17 00:00:00 2001 From: Dani Rogmans Date: Wed, 11 Mar 2026 09:11:05 +0000 Subject: [PATCH 02/18] remove sys.path patch --- tools/yolo/yolov5_exporter.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/yolo/yolov5_exporter.py b/tools/yolo/yolov5_exporter.py index 9defb8c..c83fbe8 100644 --- a/tools/yolo/yolov5_exporter.py +++ b/tools/yolo/yolov5_exporter.py @@ -13,12 +13,6 @@ from tools.utils.constants import Encoding current_dir = os.path.dirname(os.path.abspath(__file__)) -# Add ultralytics submodule to sys.path so that yolov5's internal -# `import ultralytics` resolves to the local submodule instead of -# triggering an auto-install via pip. -ultralytics_path = os.path.join(current_dir, "ultralytics") -if ultralytics_path not in sys.path: - sys.path.insert(0, ultralytics_path) yolov5_path = os.path.join(current_dir, "yolov5") # Ensure it's first in sys.path if yolov5_path not in sys.path: From 366537fc6fa6d64ebb7369613649ee6800020248 Mon Sep 17 00:00:00 2001 From: Dani Rogmans Date: Wed, 11 Mar 2026 09:18:18 +0000 Subject: [PATCH 03/18] ultralytics requirement --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index a34eff3..e4f135e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,3 +16,4 @@ psutil seaborn mmcv>=1.5.0,<2.0.0 dill==0.4.0 +ultralytics>=8.2.64 \ No newline at end of file From 368b1b722513893e1497dfe8c60d07b0cc15bc37 Mon Sep 17 00:00:00 2001 From: Dani Rogmans Date: Wed, 11 Mar 2026 09:19:47 +0000 Subject: [PATCH 04/18] precommit newline --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e4f135e..ff41183 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,4 @@ psutil seaborn mmcv>=1.5.0,<2.0.0 dill==0.4.0 -ultralytics>=8.2.64 \ No newline at end of file +ultralytics>=8.2.64 From fab4fee198f44967fc1ffa454e2f37db9ccd3ec7 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Wed, 11 Mar 2026 09:26:34 +0000 Subject: [PATCH 05/18] [Automated] Updated coverage badge --- media/coverage_badge.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index 0ec92b7..bc3407c 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -15,7 +15,7 @@ coverage coverage - 21% - 21% + 23% + 23% From 7c952d44333fa0de51691b198eb7e73861dceb28 Mon Sep 17 00:00:00 2001 From: Dani Rogmans Date: Wed, 11 Mar 2026 11:45:53 +0000 Subject: [PATCH 06/18] remove ultralytics from requirements.txt and bring back the sys.path workaround --- requirements.txt | 1 - tools/yolo/yolov5_exporter.py | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index ff41183..a34eff3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,3 @@ psutil seaborn mmcv>=1.5.0,<2.0.0 dill==0.4.0 -ultralytics>=8.2.64 diff --git a/tools/yolo/yolov5_exporter.py b/tools/yolo/yolov5_exporter.py index c83fbe8..818ee86 100644 --- a/tools/yolo/yolov5_exporter.py +++ b/tools/yolo/yolov5_exporter.py @@ -13,6 +13,11 @@ from tools.utils.constants import Encoding current_dir = os.path.dirname(os.path.abspath(__file__)) +# Add ultralytics submodule to sys.path so that yolov5's internal +# `import ultralytics` resolves to the local submodule +ultralytics_path = os.path.join(current_dir, "ultralytics") +if ultralytics_path not in sys.path: + sys.path.insert(0, ultralytics_path) yolov5_path = os.path.join(current_dir, "yolov5") # Ensure it's first in sys.path if yolov5_path not in sys.path: From 92d5264fa16779550c497411dfb1736a0b5e0f39 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Wed, 11 Mar 2026 11:57:57 +0000 Subject: [PATCH 07/18] [Automated] Updated coverage badge --- media/coverage_badge.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index bc3407c..0ec92b7 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -15,7 +15,7 @@ coverage coverage - 23% - 23% + 21% + 21% From 44682651d96b03a2ba92dc4630968344370b4a23 Mon Sep 17 00:00:00 2001 From: Dani Rogmans Date: Wed, 11 Mar 2026 15:17:56 +0000 Subject: [PATCH 08/18] requirement to bypass new ultralytics automatically importing cv2 --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index a34eff3..3c17f04 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,3 +16,4 @@ psutil seaborn mmcv>=1.5.0,<2.0.0 dill==0.4.0 +opencv-python-headless>=4.6.0 From a0460636b63b1e022c261eede1050c328525b80f Mon Sep 17 00:00:00 2001 From: Dani Rogmans Date: Thu, 12 Mar 2026 09:45:54 +0000 Subject: [PATCH 09/18] luxonis-ml[data] unused --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 3c17f04..3c10fa8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ torchvision>=0.10.1 Pillow>=7.1.2 PyYAML>=5.3.1 gcsfs -luxonis-ml[data,nn_archive,utils]~=0.8.0 +luxonis-ml[nn_archive,utils]~=0.8.0 onnx==1.17.0 numpy>=1.19.5,<2.1.0 onnxruntime>=1.20.1 From d718c46791d702e8fe8225bf9f572effac41636d Mon Sep 17 00:00:00 2001 From: Dani Rogmans Date: Thu, 12 Mar 2026 10:52:13 +0000 Subject: [PATCH 10/18] rollback before 2ac3371a because of possible ultralytics regression --- requirements.txt | 3 +-- tools/yolo/ultralytics | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3c10fa8..a34eff3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ torchvision>=0.10.1 Pillow>=7.1.2 PyYAML>=5.3.1 gcsfs -luxonis-ml[nn_archive,utils]~=0.8.0 +luxonis-ml[data,nn_archive,utils]~=0.8.0 onnx==1.17.0 numpy>=1.19.5,<2.1.0 onnxruntime>=1.20.1 @@ -16,4 +16,3 @@ psutil seaborn mmcv>=1.5.0,<2.0.0 dill==0.4.0 -opencv-python-headless>=4.6.0 diff --git a/tools/yolo/ultralytics b/tools/yolo/ultralytics index f55a0cc..9df1ad8 160000 --- a/tools/yolo/ultralytics +++ b/tools/yolo/ultralytics @@ -1 +1 @@ -Subproject commit f55a0ccd79312703b0e1e2d9f38aecab3a6dd202 +Subproject commit 9df1ad8f450d54e0d9c9bda809bb0231bd87dda1 From 50888ec78cb4e948b3e3cdb2275ea9cf4dbea670 Mon Sep 17 00:00:00 2001 From: Dani Rogmans Date: Thu, 12 Mar 2026 12:29:43 +0000 Subject: [PATCH 11/18] onnxsim down to 0.5.0 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a34eff3..e126a80 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ luxonis-ml[data,nn_archive,utils]~=0.8.0 onnx==1.17.0 numpy>=1.19.5,<2.1.0 onnxruntime>=1.20.1 -onnxsim>=0.4.36 +onnxsim>=0.4.36,<0.6 s3fs tqdm s3transfer From 566abb289f59618086f4db7b9a1647a2c47df809 Mon Sep 17 00:00:00 2001 From: Dani Rogmans Date: Thu, 12 Mar 2026 13:00:48 +0000 Subject: [PATCH 12/18] restore ultralytics commit hash to newest one before rollback --- tools/yolo/ultralytics | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/yolo/ultralytics b/tools/yolo/ultralytics index 9df1ad8..f55a0cc 160000 --- a/tools/yolo/ultralytics +++ b/tools/yolo/ultralytics @@ -1 +1 @@ -Subproject commit 9df1ad8f450d54e0d9c9bda809bb0231bd87dda1 +Subproject commit f55a0ccd79312703b0e1e2d9f38aecab3a6dd202 From 5261959fe5a846216eb6d73e47f94e1f46777e99 Mon Sep 17 00:00:00 2001 From: Dani Rogmans Date: Fri, 19 Jun 2026 13:23:52 +0200 Subject: [PATCH 13/18] Revert YOLOv6 submodule bump --- tools/yolo/YOLOv6 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/yolo/YOLOv6 b/tools/yolo/YOLOv6 index e86a483..081be9a 160000 --- a/tools/yolo/YOLOv6 +++ b/tools/yolo/YOLOv6 @@ -1 +1 @@ -Subproject commit e86a483f3f6bded25d45970b56831345a99744a4 +Subproject commit 081be9a0ed2878ce126a9e9ec04d5aa03af4dbe5 From 0481d3272739a85564d397b66fef41271bb44b54 Mon Sep 17 00:00:00 2001 From: Dani Rogmans Date: Fri, 19 Jun 2026 13:24:34 +0200 Subject: [PATCH 14/18] Update ultralytics and yolov5 submodules --- tools/yolo/ultralytics | 2 +- tools/yolo/yolov5 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/yolo/ultralytics b/tools/yolo/ultralytics index f55a0cc..d03e470 160000 --- a/tools/yolo/ultralytics +++ b/tools/yolo/ultralytics @@ -1 +1 @@ -Subproject commit f55a0ccd79312703b0e1e2d9f38aecab3a6dd202 +Subproject commit d03e470ad14a5505c66985e27eb9e752c877983c diff --git a/tools/yolo/yolov5 b/tools/yolo/yolov5 index da75ed9..59fd578 160000 --- a/tools/yolo/yolov5 +++ b/tools/yolo/yolov5 @@ -1 +1 @@ -Subproject commit da75ed9d0898627adf6c42e7cc4e65a64b2a1738 +Subproject commit 59fd578644bf7210c8f8f83c7bc6831b53161728 From eb1d6a8014582fe227ed1d902cf88418e1000988 Mon Sep 17 00:00:00 2001 From: Dani Rogmans Date: Fri, 19 Jun 2026 13:32:39 +0200 Subject: [PATCH 15/18] add requests to requirements.txt to match yolov5s new minimum --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index e126a80..6806a4a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,3 +16,4 @@ psutil seaborn mmcv>=1.5.0,<2.0.0 dill==0.4.0 +requests>=2.32.2 From f4f02a9f9f3bec90995d7320a7169cde9cec4e59 Mon Sep 17 00:00:00 2001 From: Dani Rogmans Date: Fri, 19 Jun 2026 14:29:07 +0200 Subject: [PATCH 16/18] Update DetectV26, SegmentV26 and PoseV26 for new Ultralytics head behavior --- tools/modules/heads.py | 81 ++++++++++-------------------------------- 1 file changed, 18 insertions(+), 63 deletions(-) diff --git a/tools/modules/heads.py b/tools/modules/heads.py index fb5e058..1d466be 100644 --- a/tools/modules/heads.py +++ b/tools/modules/heads.py @@ -555,6 +555,7 @@ def __init__( self.reg_max = old_detect.reg_max # DFL channels self.no = old_detect.no # number of outputs per anchor self.stride = old_detect.stride # strides computed during build + self.dfl = old_detect.dfl # Use one2one heads for NMS-free inference self.cv2 = old_detect.one2one_cv2 @@ -571,11 +572,8 @@ def forward(self, x): boxes = [] scores = [] for i in range(self.nl): - box = self.cv2[i](x[i]) - - cls_regress = self.cv3[i](x[i]) - boxes.append(box.view(bs, 4, -1)) - scores.append(cls_regress.view(bs, self.nc, -1)) + boxes.append(self.cv2[i](x[i]).view(bs, 4 * self.reg_max, -1)) + scores.append(self.cv3[i](x[i]).view(bs, self.nc, -1)) preds = { "boxes": torch.cat(boxes, dim=2), @@ -590,7 +588,6 @@ def forward(self, x): def _get_decode_boxes(self, preds): # Emulate ultralytics.nn.modules.head.Detect._get_decode_boxes for end2end export. - # preds["boxes"]: (N, 4, A), preds["feats"]: list of feature maps (N, C, H_i, W_i) shape = preds["feats"][0].shape # BCHW if self.dynamic or self.shape != shape: anchor_points, stride_tensor = self._make_anchors( @@ -600,10 +597,8 @@ def _get_decode_boxes(self, preds): self.strides = stride_tensor.transpose(0, 1) self.shape = shape - # anchors: (1, 2, A), strides: (1, 1, A) - # returns: decoded boxes (N, 4, A) in xyxy pixels dbox = self.dist2bbox( - preds["boxes"], self.anchors.unsqueeze(0), xywh=False, dim=1 + self.dfl(preds["boxes"]), self.anchors.unsqueeze(0), xywh=False, dim=1 ) return dbox * self.strides @@ -691,17 +686,9 @@ def forward(self, x): scores = [] mask_coeffs = [] for i in range(self.nl): - # Box regression - box = self.cv2[i](x[i]) - boxes.append(box.view(bs, 4, -1)) - - # Class scores - cls_regress = self.cv3[i](x[i]) - scores.append(cls_regress.view(bs, self.nc, -1)) - - # Mask coefficients - mask = self.cv4[i](x[i]) - mask_coeffs.append(mask.view(bs, self.nm, -1)) + boxes.append(self.cv2[i](x[i]).view(bs, 4 * self.reg_max, -1)) + scores.append(self.cv3[i](x[i]).view(bs, self.nc, -1)) + mask_coeffs.append(self.cv4[i](x[i]).view(bs, self.nm, -1)) preds = { "boxes": torch.cat(boxes, dim=2), @@ -732,7 +719,7 @@ def _get_proto(self, x): Proto26 takes all feature maps and returns prototype masks. """ - return self.proto(x, return_semseg=False) + return self.proto(x) class PoseV26(DetectV26): @@ -783,18 +770,10 @@ def forward(self, x): scores = [] kpts_raw = [] for i in range(self.nl): - # Box regression - box = self.cv2[i](x[i]) - boxes.append(box.view(bs, 4, -1)) - - # Class scores - cls_regress = self.cv3[i](x[i]) - scores.append(cls_regress.view(bs, self.nc, -1)) - - # Keypoints: cv4 extracts features, cv4_kpts predicts keypoints + boxes.append(self.cv2[i](x[i]).view(bs, 4 * self.reg_max, -1)) + scores.append(self.cv3[i](x[i]).view(bs, self.nc, -1)) feat = self.cv4[i](x[i]) - kpt = self.cv4_kpts[i](feat) - kpts_raw.append(kpt.view(bs, self.nk, -1)) + kpts_raw.append(self.cv4_kpts[i](feat).view(bs, self.nk, -1)) preds = { "boxes": torch.cat(boxes, dim=2), @@ -802,56 +781,32 @@ def forward(self, x): "feats": x, } - # Decode boxes to pixel coordinates (this also sets self.anchors and self.strides) - # from the parent DetectV26 dbox = self._get_decode_boxes(preds) - # Detection output: boxes (4) + class scores (nc) y = torch.cat((dbox, preds["scores"].sigmoid()), 1) # (bs, 4+nc, num_anchors) y = y.permute(0, 2, 1) # (bs, num_anchors, 4+nc) - # Decode and concatenate keypoints - # Note: After _get_decode_boxes, self.anchors is (2, A) and self.strides is (1, A) kpts_cat = torch.cat(kpts_raw, dim=2) # (bs, nk, num_anchors) - kpts_decoded = self._kpts_decode(bs, kpts_cat) # (bs, nk, num_anchors) + kpts_decoded = self._kpts_decode(kpts_cat) # (bs, nk, num_anchors) kpts_decoded = kpts_decoded.permute(0, 2, 1) # (bs, num_anchors, nk) return y, kpts_decoded - def _kpts_decode(self, bs, kpts): + def _kpts_decode(self, kpts): """Decode keypoints from raw predictions to pixel coordinates. Emulate ultralytics.nn.modules.head.Pose26.kpts_decode. Args: - bs: Batch size kpts: Raw keypoint predictions (bs, nk, num_anchors) Returns: Decoded keypoints (bs, nk, num_anchors) with x, y in pixel coords """ ndim = self.kpt_shape[1] - num_kpts = self.kpt_shape[0] - num_anchors = kpts.shape[2] - - # Reshape to (bs, num_keypoints, ndim, num_anchors) - y = kpts.view(bs, num_kpts, ndim, num_anchors) - - # After _get_decode_boxes, anchors and strides are already in the right format: - # self.anchors: (2, num_anchors), self.strides: (1, num_anchors) - # Reshape for broadcasting with y[:, :, :2, :] which is (bs, num_kpts, 2, num_anchors) - anchors_reshaped = self.anchors.view(1, 1, 2, num_anchors) # (1, 1, 2, A) - strides_reshaped = self.strides.view(1, 1, 1, num_anchors) # (1, 1, 1, A) - - # Decode xy: (raw + anchor) * stride - xy = (y[:, :, :2, :] + anchors_reshaped) * strides_reshaped - + y = kpts.clone() if ndim == 3: - # Visibility score (sigmoid) - vis = y[:, :, 2:3, :].sigmoid() - decoded = torch.cat((xy, vis), dim=2) - else: - decoded = xy - - # Reshape back to (bs, nk, num_anchors) - return decoded.view(bs, self.nk, num_anchors) + y[:, 2::ndim] = y[:, 2::ndim].sigmoid() + y[:, 0::ndim] = (y[:, 0::ndim] + self.anchors[0]) * self.strides + y[:, 1::ndim] = (y[:, 1::ndim] + self.anchors[1]) * self.strides + return y From da031c6522cb5c4f06eae06131827b7d2750caee Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Fri, 19 Jun 2026 12:44:53 +0000 Subject: [PATCH 17/18] [Automated] Updated coverage badge --- media/coverage_badge.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index 0ec92b7..fd6a4f9 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -15,7 +15,7 @@ coverage coverage - 21% - 21% + 22% + 22% From 17f8ae17e247996d5337cf872349502b2f0e1fff Mon Sep 17 00:00:00 2001 From: Dani Rogmans Date: Fri, 19 Jun 2026 17:22:31 +0200 Subject: [PATCH 18/18] Revert most head-related changes except the rename from return_semseg to return_semantic --- tools/modules/heads.py | 72 +++++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 18 deletions(-) diff --git a/tools/modules/heads.py b/tools/modules/heads.py index a6cc065..351ca9e 100644 --- a/tools/modules/heads.py +++ b/tools/modules/heads.py @@ -562,7 +562,6 @@ def __init__( self.reg_max = old_detect.reg_max # DFL channels self.no = old_detect.no # number of outputs per anchor self.stride = old_detect.stride # strides computed during build - self.dfl = old_detect.dfl # Use one2one heads for NMS-free inference self.cv2 = old_detect.one2one_cv2 @@ -579,8 +578,12 @@ def forward(self, x): boxes = [] scores = [] for i in range(self.nl): - boxes.append(self.cv2[i](x[i]).view(bs, 4 * self.reg_max, -1)) - scores.append(self.cv3[i](x[i]).view(bs, self.nc, -1)) + # Box regression + box = self.cv2[i](x[i]) + # Class scores + cls_regress = self.cv3[i](x[i]) + boxes.append(box.view(bs, 4, -1)) + scores.append(cls_regress.view(bs, self.nc, -1)) preds = { "boxes": torch.cat(boxes, dim=2), @@ -588,6 +591,7 @@ def forward(self, x): "feats": x, } + # Detection output: boxes (4) + confidence (1) + class scores (nc) dbox = self._get_decode_boxes(preds) cls_scores = preds["scores"].sigmoid() # (bs, nc, num_anchors) conf, _ = cls_scores.max(1, keepdim=True) # ReduceMax: (bs, 1, num_anchors) @@ -607,7 +611,7 @@ def _get_decode_boxes(self, preds): self.shape = shape dbox = self.dist2bbox( - self.dfl(preds["boxes"]), self.anchors.unsqueeze(0), xywh=False, dim=1 + preds["boxes"], self.anchors.unsqueeze(0), xywh=False, dim=1 ) return dbox * self.strides @@ -696,9 +700,15 @@ def forward(self, x): scores = [] mask_coeffs = [] for i in range(self.nl): - boxes.append(self.cv2[i](x[i]).view(bs, 4 * self.reg_max, -1)) - scores.append(self.cv3[i](x[i]).view(bs, self.nc, -1)) - mask_coeffs.append(self.cv4[i](x[i]).view(bs, self.nm, -1)) + # Box regression + box = self.cv2[i](x[i]) + # Class scores + cls_regress = self.cv3[i](x[i]) + # Mask coefficients + mask = self.cv4[i](x[i]) + boxes.append(box.view(bs, 4, -1)) + scores.append(cls_regress.view(bs, self.nc, -1)) + mask_coeffs.append(mask.view(bs, self.nm, -1)) preds = { "boxes": torch.cat(boxes, dim=2), @@ -731,7 +741,7 @@ def _get_proto(self, x): Proto26 takes all feature maps and returns prototype masks. """ - return self.proto(x) + return self.proto(x, return_semantic=False) class PoseV26(DetectV26): @@ -783,10 +793,16 @@ def forward(self, x): scores = [] kpts_raw = [] for i in range(self.nl): - boxes.append(self.cv2[i](x[i]).view(bs, 4 * self.reg_max, -1)) - scores.append(self.cv3[i](x[i]).view(bs, self.nc, -1)) + # Box regression + box = self.cv2[i](x[i]) + # Class scores + cls_regress = self.cv3[i](x[i]) + # Keypoints: cv4 extracts features, cv4_kpts predicts keypoints feat = self.cv4[i](x[i]) - kpts_raw.append(self.cv4_kpts[i](feat).view(bs, self.nk, -1)) + kpt = self.cv4_kpts[i](feat) + boxes.append(box.view(bs, 4, -1)) + scores.append(cls_regress.view(bs, self.nc, -1)) + kpts_raw.append(kpt.view(bs, self.nk, -1)) preds = { "boxes": torch.cat(boxes, dim=2), @@ -794,6 +810,8 @@ def forward(self, x): "feats": x, } + # Decode boxes to pixel coordinates (this also sets self.anchors and self.strides) + # from the parent DetectV26 dbox = self._get_decode_boxes(preds) # Detection output: boxes (4) + confidence (1) + class scores (nc) @@ -802,13 +820,14 @@ def forward(self, x): y = torch.cat((dbox, conf, cls_scores), 1) # (bs, 4+1+nc, num_anchors) y = y.permute(0, 2, 1) # (bs, num_anchors, 5+nc) + # Decode and concatenate keypoints kpts_cat = torch.cat(kpts_raw, dim=2) # (bs, nk, num_anchors) - kpts_decoded = self._kpts_decode(kpts_cat) # (bs, nk, num_anchors) + kpts_decoded = self._kpts_decode(bs, kpts_cat) # (bs, nk, num_anchors) kpts_decoded = kpts_decoded.permute(0, 2, 1) # (bs, num_anchors, nk) return y, kpts_decoded - def _kpts_decode(self, kpts): + def _kpts_decode(self, bs, kpts): """Decode keypoints from raw predictions to pixel coordinates. Emulate ultralytics.nn.modules.head.Pose26.kpts_decode. @@ -820,9 +839,26 @@ def _kpts_decode(self, kpts): Decoded keypoints (bs, nk, num_anchors) with x, y in pixel coords """ ndim = self.kpt_shape[1] - y = kpts.clone() + num_kpts = self.kpt_shape[0] + num_anchors = kpts.shape[2] + + # Reshape to (bs, num_keypoints, ndim, num_anchors) + y = kpts.view(bs, num_kpts, ndim, num_anchors) + + # After _get_decode_boxes, anchors and strides are already in the right format: + # self.anchors: (2, num_anchors), self.strides: (1, num_anchors) + # Reshape for broadcasting with y[:, :, :2, :] which is (bs, num_kpts, 2, num_anchors) + anchors_reshaped = self.anchors.view(1, 1, 2, num_anchors) + strides_reshaped = self.strides.view(1, 1, 1, num_anchors) + # Decode xy: (raw + anchor) * stride + xy = (y[:, :, :2, :] + anchors_reshaped) * strides_reshaped + if ndim == 3: - y[:, 2::ndim] = y[:, 2::ndim].sigmoid() - y[:, 0::ndim] = (y[:, 0::ndim] + self.anchors[0]) * self.strides - y[:, 1::ndim] = (y[:, 1::ndim] + self.anchors[1]) * self.strides - return y + # Visibility score (sigmoid) + vis = y[:, :, 2:3, :].sigmoid() + decoded = torch.cat((xy, vis), dim=2) + else: + decoded = xy + + # Reshape back to (bs, nk, num_anchors) + return decoded.view(bs, self.nk, num_anchors)