diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index 0ec92b7..fd6a4f9 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -15,7 +15,7 @@ coverage coverage - 21% - 21% + 22% + 22% diff --git a/requirements.txt b/requirements.txt index 6b50db1..a45a94b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,3 +16,4 @@ psutil seaborn mmcv>=1.5.0,<2.0.0 dill==0.4.0 +requests>=2.32.2 diff --git a/tools/modules/heads.py b/tools/modules/heads.py index 5c407c9..351ca9e 100644 --- a/tools/modules/heads.py +++ b/tools/modules/heads.py @@ -578,8 +578,9 @@ def forward(self, x): boxes = [] scores = [] for i in range(self.nl): + # Box regression box = self.cv2[i](x[i]) - + # Class scores cls_regress = self.cv3[i](x[i]) boxes.append(box.view(bs, 4, -1)) scores.append(cls_regress.view(bs, self.nc, -1)) @@ -590,6 +591,7 @@ def forward(self, x): "feats": x, } + # Detection output: boxes (4) + confidence (1) + class scores (nc) dbox = self._get_decode_boxes(preds) cls_scores = preds["scores"].sigmoid() # (bs, nc, num_anchors) conf, _ = cls_scores.max(1, keepdim=True) # ReduceMax: (bs, 1, num_anchors) @@ -599,7 +601,6 @@ def forward(self, x): def _get_decode_boxes(self, preds): # Emulate ultralytics.nn.modules.head.Detect._get_decode_boxes for end2end export. - # preds["boxes"]: (N, 4, A), preds["feats"]: list of feature maps (N, C, H_i, W_i) shape = preds["feats"][0].shape # BCHW if self.dynamic or self.shape != shape: anchor_points, stride_tensor = self._make_anchors( @@ -609,8 +610,6 @@ def _get_decode_boxes(self, preds): self.strides = stride_tensor.transpose(0, 1) self.shape = shape - # anchors: (1, 2, A), strides: (1, 1, A) - # returns: decoded boxes (N, 4, A) in xyxy pixels dbox = self.dist2bbox( preds["boxes"], self.anchors.unsqueeze(0), xywh=False, dim=1 ) @@ -703,14 +702,12 @@ def forward(self, x): for i in range(self.nl): # Box regression box = self.cv2[i](x[i]) - boxes.append(box.view(bs, 4, -1)) - # Class scores cls_regress = self.cv3[i](x[i]) - scores.append(cls_regress.view(bs, self.nc, -1)) - # Mask coefficients mask = self.cv4[i](x[i]) + boxes.append(box.view(bs, 4, -1)) + scores.append(cls_regress.view(bs, self.nc, -1)) mask_coeffs.append(mask.view(bs, self.nm, -1)) preds = { @@ -744,7 +741,7 @@ def _get_proto(self, x): Proto26 takes all feature maps and returns prototype masks. """ - return self.proto(x, return_semseg=False) + return self.proto(x, return_semantic=False) class PoseV26(DetectV26): @@ -798,15 +795,13 @@ def forward(self, x): for i in range(self.nl): # Box regression box = self.cv2[i](x[i]) - boxes.append(box.view(bs, 4, -1)) - # Class scores cls_regress = self.cv3[i](x[i]) - scores.append(cls_regress.view(bs, self.nc, -1)) - # Keypoints: cv4 extracts features, cv4_kpts predicts keypoints feat = self.cv4[i](x[i]) kpt = self.cv4_kpts[i](feat) + boxes.append(box.view(bs, 4, -1)) + scores.append(cls_regress.view(bs, self.nc, -1)) kpts_raw.append(kpt.view(bs, self.nk, -1)) preds = { @@ -826,7 +821,6 @@ def forward(self, x): y = y.permute(0, 2, 1) # (bs, num_anchors, 5+nc) # Decode and concatenate keypoints - # Note: After _get_decode_boxes, self.anchors is (2, A) and self.strides is (1, A) kpts_cat = torch.cat(kpts_raw, dim=2) # (bs, nk, num_anchors) kpts_decoded = self._kpts_decode(bs, kpts_cat) # (bs, nk, num_anchors) kpts_decoded = kpts_decoded.permute(0, 2, 1) # (bs, num_anchors, nk) @@ -839,7 +833,6 @@ def _kpts_decode(self, bs, kpts): Emulate ultralytics.nn.modules.head.Pose26.kpts_decode. Args: - bs: Batch size kpts: Raw keypoint predictions (bs, nk, num_anchors) Returns: @@ -855,9 +848,8 @@ def _kpts_decode(self, bs, kpts): # After _get_decode_boxes, anchors and strides are already in the right format: # self.anchors: (2, num_anchors), self.strides: (1, num_anchors) # Reshape for broadcasting with y[:, :, :2, :] which is (bs, num_kpts, 2, num_anchors) - anchors_reshaped = self.anchors.view(1, 1, 2, num_anchors) # (1, 1, 2, A) - strides_reshaped = self.strides.view(1, 1, 1, num_anchors) # (1, 1, 1, A) - + anchors_reshaped = self.anchors.view(1, 1, 2, num_anchors) + strides_reshaped = self.strides.view(1, 1, 1, num_anchors) # Decode xy: (raw + anchor) * stride xy = (y[:, :, :2, :] + anchors_reshaped) * strides_reshaped diff --git a/tools/yolo/ultralytics b/tools/yolo/ultralytics index 0537be1..d03e470 160000 --- a/tools/yolo/ultralytics +++ b/tools/yolo/ultralytics @@ -1 +1 @@ -Subproject commit 0537be116924fef9ec3a66e4689134a6a59e7dce +Subproject commit d03e470ad14a5505c66985e27eb9e752c877983c diff --git a/tools/yolo/yolov5 b/tools/yolo/yolov5 index c7a2d6b..59fd578 160000 --- a/tools/yolo/yolov5 +++ b/tools/yolo/yolov5 @@ -1 +1 @@ -Subproject commit c7a2d6bcf4f7e88db53f3d09a8484391dac7bc89 +Subproject commit 59fd578644bf7210c8f8f83c7bc6831b53161728 diff --git a/tools/yolo/yolov5_exporter.py b/tools/yolo/yolov5_exporter.py index c83fbe8..818ee86 100644 --- a/tools/yolo/yolov5_exporter.py +++ b/tools/yolo/yolov5_exporter.py @@ -13,6 +13,11 @@ from tools.utils.constants import Encoding current_dir = os.path.dirname(os.path.abspath(__file__)) +# Add ultralytics submodule to sys.path so that yolov5's internal +# `import ultralytics` resolves to the local submodule +ultralytics_path = os.path.join(current_dir, "ultralytics") +if ultralytics_path not in sys.path: + sys.path.insert(0, ultralytics_path) yolov5_path = os.path.join(current_dir, "yolov5") # Ensure it's first in sys.path if yolov5_path not in sys.path: