combined_detection.py add merge, fusing 3d bboxes

lukasdumasius · web-flow · commit cfd3994bd0ed · 2025-05-04T20:15:02.000-05:00
diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py
@@ -8,47 +8,72 @@
 import time
 import os
 import yaml
+from typing import Dict, List, Optional, Tuple
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+
 
 from jsk_recognition_msgs.msg import BoundingBox, BoundingBoxArray
 
 
-class CombinedDetector3D(Component):
-    """
-    Fuses the boxes in the lists of bounding boxes published by YoloNode and 
-    PointPillarsNode with late sensor fusion.
-    TODO: SUBSCRIBE TO BOUNDING BOX LISTS AND PERFORM LATE SENSOR FUSION IN THIS FILE.
-    TODO: MODIFY YAML FILE FOR THE CONTROL TEAM'S BASIC PATH PLANNING CODE?
+# TODO: Import IOU from IOU funcs in SensorFusion?
+
+# Reuse eval funcs?
+def calculate_3d_iou(box1: BoundingBox, box2: BoundingBox) -> float:
+    return 0.0
+
+def merge_boxes(box1: BoundingBox, box2: BoundingBox) -> BoundingBox:
+     # TODO:  merging 
+     # Heuristics-  Average pose
+     #         Average dimensions
+     #         Use highest score
+     #         Label specific logic
+     merged_box = BoundingBox()
+     merged_box.header = box1.header # Use header from one input
+
+     ## Avg position, average dimensions, max score, box1 label
+     merged_box.pose.position.x = (box1.pose.position.x + box2.pose.position.x) / 2.0
+     merged_box.pose.position.y = (box1.pose.position.y + box2.pose.position.y) / 2.0
+     merged_box.pose.position.z = (box1.pose.position.z + box2.pose.position.z) / 2.0
+     # Avg orientation (quaternions)
+     merged_box.pose.orientation = box1.pose.orientation 
+     merged_box.dimensions.x = (box1.dimensions.x + box2.dimensions.x) / 2.0
+     merged_box.dimensions.y = (box1.dimensions.y + box2.dimensions.y) / 2.0
+     merged_box.dimensions.z = (box1.dimensions.z + box2.dimensions.z) / 2.0
+     merged_box.value = max(box1.value, box2.value) # Max score
+     merged_box.label = box1.label # Label from first box
 
-    Tracking is optional: set `enable_tracking=False` to disable persistent tracking
-    and return only detections from the current frame.
+     return merged_box
 
-    Supports multiple cameras; each camera’s intrinsics and extrinsics are
-    loaded from a single YAML calibration file via plain PyYAML.
-    """
 
+class CombinedDetector3D(Component):
     def __init__(
         self,
         vehicle_interface: GEMInterface,
         enable_tracking: bool = True,
         use_start_frame: bool = True,
-        **kwargs
+        iou_threshold: float = 0.1, 
+        **kwargs 
     ):
-        # Core interfaces and state
         self.vehicle_interface   = vehicle_interface
-        self.current_agents      = {}
-        self.tracked_agents      = {}
+        self.tracked_agents: Dict[str, AgentState] = {}
         self.ped_counter         = 0
-        self.latest_yolo_bbxs    = None # Stores the latest list of YOLO bounding boxes
-        self.latest_pp_bbxs      = None # Stores the latest list of PointPillars bounding boxes
-        self.start_pose_abs      = None
-        self.start_time          = None
+        self.latest_yolo_bbxs: Optional[BoundingBoxArray] = None
+        self.latest_pp_bbxs: Optional[BoundingBoxArray] = None
+        self.start_pose_abs: Optional[ObjectPose]      = None
+        self.start_time: Optional[float]          = None
 
-        # Config flags
         self.enable_tracking = enable_tracking
         self.use_start_frame = use_start_frame
+        self.iou_threshold = iou_threshold
+
+        self.yolo_topic = "/yolo_boxes"
+        self.pp_topic = "/pointpillars_boxes"
+
+        rospy.loginfo(f"CombinedDetector3D Initialized. Subscribing to '{self.yolo_topic}' and '{self.pp_topic}'.")
 
     def rate(self) -> float:
-        return 8
+        return 8.0
 
     def state_inputs(self) -> list:
         return ['vehicle']
@@ -57,61 +82,165 @@ def state_outputs(self) -> list:
         return ['agents']
 
     def initialize(self):
-        # Subscribe to the BoundingBox
-        self.yolo_sub = Subscriber('/yolo_boxes', BoundingBoxArray)
-        self.pp_sub = Subscriber('/pointpillars_boxes', BoundingBoxArray)
-        self.sync = ApproximateTimeSynchronizer([
-            self.yolo_sub, self.pp_sub
-        ], queue_size=50, slop=0.05) # GREATLY DECREASED QUEUE SIZE, 50 might even be too much
+        self.yolo_sub = Subscriber(self.yolo_topic, BoundingBoxArray)
+        self.pp_sub = Subscriber(self.pp_topic, BoundingBoxArray)
+
+        queue_size = 10
+        slop = 0.1
+
+        self.sync = ApproximateTimeSynchronizer(
+            [self.yolo_sub, self.pp_sub],
+            queue_size=queue_size,
+            slop=slop
+        )
         self.sync.registerCallback(self.synchronized_callback)
+        rospy.loginfo("CombinedDetector3D Subscribers Initialized.")
 
-    def synchronized_callback(self, yolo_bbxs_msg, pp_bbxs_msg):
+    def synchronized_callback(self, yolo_bbxs_msg: BoundingBoxArray, pp_bbxs_msg: BoundingBoxArray):
         self.latest_yolo_bbxs = yolo_bbxs_msg
         self.latest_pp_bbxs = pp_bbxs_msg
 
-    def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
-        # Gate guards against data not being present for both sensors:
-        if self.latest_yolo_bbxs is None or self.latest_pp_bbxs is None:
-            return {}
-        
-        # Set up current time variables
-        start = time.time()
+    def update(self, state: AllState) -> Dict[str, AgentState]:
+        vehicle = state.vehicle
         current_time = self.vehicle_interface.time()
 
+        yolo_bbx_array = self.latest_yolo_bbxs
+        pp_bbx_array = self.latest_pp_bbxs
+
+        if yolo_bbx_array is None or pp_bbx_array is None:
+            return {} 
+
         if self.start_time is None:
             self.start_time = current_time
-        time_elapsed = current_time - self.start_time
-
-        agents = {}
-        # TODO: Loop through bounding box lists here
-        # The bounding box lists that were matched up by self.synchronized_callback SHOULD match up 
-        # correctly because we manually inserted the time stamp of the lidar data into the header
-        # of the bounding box list. So since ApproximateTimeSynchronizer syncs up messages which
-        # have similar time stamps (assumed to be determined by the time stamp in the message header),
-        # the bounding box lists being compared should be from the same point cloud data. The image
-        # data paired with it may be slightly different but since the bounding boxes from both models
-        # were built in 3D space using the lidar data, they should pair up well enough
-
-        # The bounding boxes in both lists SHOULD ALREADY BE IN THE VEHICLE FRAME since we transformed
-        # the data from lidar->vehicle before creating the bounding boxes and then publishing.
-
-        # To compare the bounding boxes in the lists, we can either use a 2D intersection over union birds
-        # eye view approach (since point pillars creates vertical pillars anyways) or we can do a 3D
-        # intersection over union. We could then merge the boxes that match closely by averaging
-        # their positions and dimensions and then we'd choose the label with the highest confidence.
-
-        # For the leftover bounding boxes, we can still use them with their original confidence
-        # (confidence was placed in the value field of each box).
-
-        # Finally, we would need to convert each box to an AgentState object
-        # We would then need to transform the AgentState object to the start frame to compare with old
-        # AgentState objects to assign id's and calculate velocity
-        # Then we would need to return the new list of AgentState objects
-
-        end = time.time()
-        # print('-------processing time---', end -start)
+        if self.use_start_frame and self.start_pose_abs is None:
+            self.start_pose_abs = vehicle.pose
+            rospy.loginfo("CombinedDetector3D latched start pose.")
+
+        current_frame_agents = self._fuse_bounding_boxes(yolo_bbx_array, pp_bbx_array, vehicle, current_time)
+
+        if self.enable_tracking:
+            self._update_tracking(current_frame_agents)
+        else:
+            self.tracked_agents = current_frame_agents # NOTE: No deepcopy
+
         return self.tracked_agents
 
+
+    def _fuse_bounding_boxes(self,
+                             yolo_bbx_array: BoundingBoxArray,
+                             pp_bbx_array: BoundingBoxArray,
+                             vehicle_state: VehicleState,
+                             current_time: float
+                            ) -> Dict[str, AgentState]:
+        current_agents_in_frame: Dict[str, AgentState] = {}
+        yolo_boxes: List[BoundingBox] = yolo_bbx_array.boxes
+        pp_boxes: List[BoundingBox] = pp_bbx_array.boxes
+
+        output_frame_enum = ObjectFrameEnum.START if self.use_start_frame else ObjectFrameEnum.CURRENT
+
+        matched_yolo_indices = set()
+        matched_pp_indices = set()
+        fused_boxes_list: List[BoundingBox] = [] 
+
+        # Can optimize from NxM loop
+        for i, yolo_box in enumerate(yolo_boxes):
+            best_match_j = -1
+            best_iou = -1.0
+            for j, pp_box in enumerate(pp_boxes):
+                if j in matched_pp_indices: # Skip already matched PP boxes
+                    continue
+
+                ## IoU
+                iou = calculate_3d_iou(yolo_box, pp_box)
+
+                if iou > self.iou_threshold and iou > best_iou:
+                    best_iou = iou
+                    best_match_j = j
+
+            if best_match_j != -1:
+                rospy.logdebug(f"Matched YOLO box {i} with PP box {best_match_j} (IoU: {best_iou:.3f})")
+                matched_yolo_indices.add(i)
+                matched_pp_indices.add(best_match_j)
+                merged = merge_boxes(yolo_box, pp_boxes[best_match_j])
+                fused_boxes_list.append(merged)
+
+        ## Unmatched Bboxes
+        for i, yolo_box in enumerate(yolo_boxes):
+            if i not in matched_yolo_indices:
+                fused_boxes_list.append(yolo_box)
+                rospy.logdebug(f"Kept unmatched YOLO box {i}")
+
+        for j, pp_box in enumerate(pp_boxes):
+            if j not in matched_pp_indices:
+                fused_boxes_list.append(pp_box)
+                rospy.logdebug(f"Kept unmatched PP box {j}")
+
+        # Agenstate
+        for i, box in enumerate(fused_boxes_list):
+            try:
+                 # Cur vehicle frame
+                pos_x = box.pose.position.x; pos_y = box.pose.position.y; pos_z = box.pose.position.z
+                quat_x = box.pose.orientation.x; quat_y = box.pose.orientation.y; quat_z = box.pose.orientation.z; quat_w = box.pose.orientation.w
+                yaw, pitch, roll = R.from_quat([quat_x, quat_y, quat_z, quat_w]).as_euler('zyx', degrees=False)
+
+                # Start frame
+                if self.use_start_frame and self.start_pose_abs is not None:
+                     vehicle_pose_in_start_frame = vehicle_state.pose.to_frame(
+                         ObjectFrameEnum.START, vehicle_state.pose, self.start_pose_abs
+                     )
+                     T_vehicle_to_start = pose_to_matrix(vehicle_pose_in_start_frame)
+                     object_pose_current_h = np.array([[pos_x],[pos_y],[pos_z],[1.0]])
+                     object_pose_start_h = T_vehicle_to_start @ object_pose_current_h
+                     final_x, final_y, final_z = object_pose_start_h[:3, 0]
+                else:
+                      final_x, final_y, final_z = pos_x, pos_y, pos_z
+
+                final_pose = ObjectPose(
+                    t=current_time, x=final_x, y=final_y, z=final_z,
+                    yaw=yaw, pitch=pitch, roll=roll, frame=output_frame_enum
+                )
+                dims = (box.dimensions.x, box.dimensions.y, box.dimensions.z)
+                ######### Mapping based on label (integer) from BoundingBox msg
+                agent_type = AgentEnum.PEDESTRIAN if box.label == 0 else AgentEnum.UNKNOWN # Needs refinement
+                activity = AgentActivityEnum.UNKNOWN # Placeholder
+
+                # temp id 
+                # _update_tracking assign persistent IDs
+                temp_agent_id = f"FrameDet_{i}"
+
+                current_agents_in_frame[temp_agent_id] = AgentState(
+                    pose=final_pose, dimensions=dims, outline=None, type=agent_type,
+                    activity=activity, velocity=(0.0,0.0,0.0), yaw_rate=0.0
+                    #  score=box.value  # score
+                )
+            except Exception as e:
+                rospy.logwarn(f"Failed to convert final BoundingBox {i} to AgentState: {e}")
+                continue
+
+        return current_agents_in_frame
+
+
+    def _update_tracking(self, current_frame_agents: Dict[str, AgentState]):
+
+        #   Todo tracking
+        ## Match 'current_frame_agents' to 'self.tracked_agents'.
+        ##    - Use position (already in correct START or CURRENT frame), maybe size/type.
+        ##    - Need a matching algorithm (e.g., nearest neighbor within radius, Hungarian).
+        ## For matched pairs:
+        ##    - Update the existing agent in 'self.tracked_agents' (e.g., smooth pose, update timestamp).
+        ## For unmatched 'current_frame_agents':
+        ##    - These are new detections. Assign a persistent ID (e.g., f"Ped_{self.ped_counter}").
+        ##    - Increment self.ped_counter.
+        ##    - Add them to 'self.tracked_agents'.
+        ## For unmatched 'self.tracked_agents' (agents not seen this frame):
+        ##    - Increment a 'missed frames' counter or check timestamp.
+        ##    - If missed for too long (e.g., > 1 second), remove from 'self.tracked_agents'.
+
+        # return without tracking
+        self.tracked_agents = current_frame_agents
+
+
+
 # Fake 2D Combined Detector for testing purposes
 # TODO FIX THIS
 class FakeCombinedDetector2D(Component):
@@ -151,4 +280,4 @@ def box_to_fake_agent(box):
 
 
 if __name__ == '__main__':
-    pass
+    pass