@@ -114,6 +114,24 @@ def merge_boxes(box1: BoundingBox, box2: BoundingBox, mode: str = "Average") ->
114114
115115 # For label, use the one from the higher-score box
116116 merged_box .label = box1 .label if score1 >= score2 else box2 .label
117+
118+ elif mode == "BEV" :
119+ # Merge the bounding boxes from Bird's Eye View (BEV)
120+ # Average the x and y centers and dimensions
121+ # Average the yaw orientation
122+ # Use YOLO bounding box (box1) for z dimension and z center
123+ merged_box .pose .position .x = (box1 .pose .position .x + box2 .pose .position .x ) / 2.0
124+ merged_box .pose .position .y = (box1 .pose .position .y + box2 .pose .position .y ) / 2.0
125+ merged_box .pose .position .z = copy .deepcopy (box1 .pose .orientation )
126+
127+ # Avg orientations (quaternions)
128+ merged_box .pose .orientation = avg_orientations (box1 .pose .orientation , box2 .pose .orientation )
129+
130+ merged_box .dimensions .x = (box1 .dimensions .x + box2 .dimensions .x ) / 2.0
131+ merged_box .dimensions .y = (box1 .dimensions .y + box2 .dimensions .y ) / 2.0
132+ merged_box .dimensions .z = copy .deepcopy (box1 .dimensions .z )
133+
134+ merged_box .label = box1 .label # Label from first box
117135
118136 else : # Default to "Average" mode
119137 # Original averaging logic
@@ -156,6 +174,45 @@ def get_volume(box):
156174 return box .dimensions .x * box .dimensions .y * box .dimensions .z
157175
158176
177+ def get_bev_aabb_corners (box : BoundingBox ):
178+ """
179+ Get axis-aligned bounding box corners for 2D Bird's Eye View IoU calculation.
180+ Returns:
181+ min_x, max_x, min_y, max_y
182+ """
183+ cx , cy = box .pose .position .x , box .pose .position .y
184+ l , w = box .dimensions .x , box .dimensions .y
185+
186+ return cx , cx + l , cy , cy + w
187+
188+
189+ def calculate_bev_iou (box1 : BoundingBox , box2 : BoundingBox ):
190+ """
191+ Calculates the 2D Bird's Eye View IoU between two bounding boxes.
192+ Ignores z-axis and yaw (assumes axis aligned bounding boxes).
193+ """
194+ min_x1 , max_x1 , min_y1 , max_y1 = get_bev_aabb_corners (box1 )
195+ min_x2 , max_x2 , min_y2 , max_y2 = get_bev_aabb_corners (box2 )
196+
197+ # Calculate intersection in BEV
198+ inter_min_x = max (min_x1 , min_x2 )
199+ inter_max_x = min (max_x1 , max_x2 )
200+ inter_min_y = max (min_y1 , min_y2 )
201+ inter_max_y = min (max_y1 , max_y2 )
202+
203+ inter_w = max (0 , inter_max_x - inter_min_x )
204+ inter_h = max (0 , inter_max_y - inter_min_y )
205+ intersection_area = inter_w * inter_h
206+
207+ # Calculate union area
208+ area1 = max (box1 .dimensions .x * box1 .dimensions .y , 1e-6 )
209+ area2 = max (box2 .dimensions .x * box2 .dimensions .y , 1e-6 )
210+ union_area = max (area1 + area2 - intersection_area , 1e-6 )
211+
212+ iou = intersection_area / union_area
213+ return max (0.0 , min (iou , 1.0 )) # Clamp to [0, 1]
214+
215+
159216class CombinedDetector3D (Component ):
160217 """
161218 Combines detections from multiple 3D object detectors (YOLO and PointPillars).
@@ -185,6 +242,7 @@ def __init__(
185242 self .use_start_frame = use_start_frame
186243 self .iou_threshold = iou_threshold
187244 self .merge_mode = merge_mode
245+ self .merge_in_bev = (merge_mode == "BEV" )
188246
189247 self .yolo_topic = '/yolo_boxes'
190248 self .pp_topic = '/pointpillars_boxes'
@@ -227,47 +285,131 @@ def synchronized_callback(self, yolo_bbxs_msg: BoundingBoxArray, pp_bbxs_msg: Bo
227285 def update (self , vehicle : VehicleState ) -> Dict [str , AgentState ]:
228286 """Update function called by the GEMstack pipeline."""
229287 current_time = self .vehicle_interface .time ()
288+ agents : Dict [str , AgentState ] = {}
289+
290+ if self .start_time is None :
291+ self .start_time = current_time
292+ if self .start_pose_abs is None :
293+ self .start_pose_abs = vehicle .pose
294+ rospy .loginfo ("CombinedDetector3D latched start pose." )
230295
231296 yolo_bbx_array = copy .deepcopy (self .latest_yolo_bbxs )
232297 pp_bbx_array = copy .deepcopy (self .latest_pp_bbxs )
233298
234299 if yolo_bbx_array is None or pp_bbx_array is None :
235300 return {}
236301
237- if self .start_time is None :
238- self .start_time = current_time
239- if self .use_start_frame and self .start_pose_abs is None :
240- self .start_pose_abs = vehicle .pose
241- rospy .loginfo ("CombinedDetector3D latched start pose." )
302+ original_header = yolo_bbx_array .header
303+ fused_boxes_list = self ._fuse_bounding_boxes (yolo_bbx_array , pp_bbx_array )
242304
243- return self ._fuse_bounding_boxes (yolo_bbx_array , pp_bbx_array , vehicle , current_time )
305+ # Used to visualize the combined results in the current frame
306+ fused_bb_array = BoundingBoxArray ()
307+ fused_bb_array .header = original_header
308+
309+ for i , box in enumerate (fused_boxes_list ):
310+ fused_bb_array .boxes .append (box )
311+ rospy .loginfo (len (fused_boxes_list ))
312+
313+ # Get position and orientation in current vehicle frame
314+ pos_x = box .pose .position .x
315+ pos_y = box .pose .position .y
316+ pos_z = box .pose .position .z
317+ quat_x = box .pose .orientation .x
318+ quat_y = box .pose .orientation .y
319+ quat_z = box .pose .orientation .z
320+ quat_w = box .pose .orientation .w
321+ yaw , pitch , roll = R .from_quat ([quat_x , quat_y , quat_z , quat_w ]).as_euler ('zyx' , degrees = False )
322+
323+ # Convert to start frame
324+ vehicle_start_pose = vehicle .pose .to_frame (
325+ ObjectFrameEnum .START , vehicle .pose , self .start_pose_abs
326+ )
327+ T_vehicle_to_start = pose_to_matrix (vehicle_start_pose )
328+ object_pose_current_h = np .array ([[pos_x ],[pos_y ],[pos_z ],[1.0 ]])
329+ object_pose_start_h = T_vehicle_to_start @ object_pose_current_h
330+ final_x , final_y , final_z = object_pose_start_h [:3 , 0 ]
331+
332+ new_pose = ObjectPose (
333+ t = current_time , x = final_x , y = final_y , z = final_z ,
334+ yaw = yaw , pitch = pitch , roll = roll , frame = ObjectFrameEnum .START
335+ )
336+ dims = (box .dimensions .x , box .dimensions .y , box .dimensions .z * 2.0 ) # AgentState has z center on the floor and height is full height.
337+
338+ new_pose = ObjectPose (
339+ t = current_time , x = final_x , y = final_y , z = final_z - box .dimensions .z / 2.0 ,
340+ yaw = yaw , pitch = pitch , roll = roll , frame = ObjectFrameEnum .START
341+ )
342+
343+ existing_id = match_existing_pedestrian (
344+ new_center = np .array ([new_pose .x , new_pose .y , new_pose .z ]),
345+ new_dims = dims ,
346+ existing_agents = self .tracked_agents ,
347+ distance_threshold = 2.0
348+ )
349+
350+ if existing_id is not None :
351+ old_state = self .tracked_agents [existing_id ]
352+ dt = new_pose .t - old_state .pose .t
353+ vx , vy , vz = compute_velocity (old_state .pose , new_pose , dt )
354+ updated_agent = AgentState (
355+ pose = new_pose ,
356+ dimensions = dims ,
357+ outline = None ,
358+ type = AgentEnum .PEDESTRIAN ,
359+ activity = AgentActivityEnum .MOVING ,
360+ velocity = (vx , vy , vz ),
361+ yaw_rate = 0
362+ )
363+ agents [existing_id ] = updated_agent
364+ self .tracked_agents [existing_id ] = updated_agent
365+ else :
366+ agent_id = f"pedestrian{ self .ped_counter } "
367+ self .ped_counter += 1
368+ new_agent = AgentState (
369+ pose = new_pose ,
370+ dimensions = dims ,
371+ outline = None ,
372+ type = AgentEnum .PEDESTRIAN ,
373+ activity = AgentActivityEnum .MOVING ,
374+ velocity = (0 , 0 , 0 ),
375+ yaw_rate = 0
376+ )
377+ agents [agent_id ] = new_agent
378+ self .tracked_agents [agent_id ] = new_agent
379+
380+ self .pub_fused .publish (fused_bb_array )
381+
382+ stale_ids = [agent_id for agent_id , agent in self .tracked_agents .items ()
383+ if current_time - agent .pose .t > 5.0 ]
384+ for agent_id in stale_ids :
385+ rospy .loginfo (f"Removing stale agent: { agent_id } \n " )
386+ for agent_id , agent in agents .items ():
387+ p = agent .pose
388+ # Format pose and velocity with 3 decimals (or as needed)
389+ rospy .loginfo (
390+ f"Agent ID: { agent_id } \n "
391+ f"Pose: (x: { p .x :.3f} , y: { p .y :.3f} , z: { p .z :.3f} , "
392+ f"yaw: { p .yaw :.3f} , pitch: { p .pitch :.3f} , roll: { p .roll :.3f} )\n "
393+ f"Velocity: (vx: { agent .velocity [0 ]:.3f} , vy: { agent .velocity [1 ]:.3f} , vz: { agent .velocity [2 ]:.3f} )\n "
394+ )
395+
396+ return agents
244397
245398
246399 def _fuse_bounding_boxes (self ,
247400 yolo_bbx_array : BoundingBoxArray ,
248401 pp_bbx_array : BoundingBoxArray ,
249- vehicle_state : VehicleState ,
250- current_time : float
251- ) -> Dict [str , AgentState ]:
402+ ):
252403 """
253404 Fuse bounding boxes from multiple detectors.
254405
255406 Args:
256407 yolo_bbx_array: Bounding boxes from YOLO detector
257408 pp_bbx_array: Bounding boxes from PointPillars detector
258- vehicle_state: Current vehicle state
259- current_time: Current timestamp
260-
261- Returns:
262- Dictionary of agent states
263409 """
264- original_header = yolo_bbx_array .header
265- agents : Dict [str , AgentState ] = {}
266410 yolo_boxes : List [BoundingBox ] = yolo_bbx_array .boxes
267411 pp_boxes : List [BoundingBox ] = pp_bbx_array .boxes
268412
269- output_frame_enum = ObjectFrameEnum .START if self .use_start_frame else ObjectFrameEnum .CURRENT
270-
271413 matched_yolo_indices = set ()
272414 matched_pp_indices = set ()
273415 fused_boxes_list : List [BoundingBox ] = []
@@ -282,7 +424,10 @@ def _fuse_bounding_boxes(self,
282424 continue
283425
284426 ## IoU
285- iou = calculate_3d_iou (yolo_box , pp_box , get_aabb_corners , get_volume )
427+ if self .merge_in_bev :
428+ iou = calculate_bev_iou (yolo_box , pp_box )
429+ else :
430+ iou = calculate_3d_iou (yolo_box , pp_box , get_aabb_corners , get_volume )
286431
287432 if iou > self .iou_threshold and iou > best_iou :
288433 best_iou = iou
@@ -308,104 +453,7 @@ def _fuse_bounding_boxes(self,
308453 fused_boxes_list .append (pp_box )
309454 rospy .logdebug (f"Kept unmatched PP box { j } " )
310455
311- # Work in progress to visualize combined results
312- fused_bb_array = BoundingBoxArray ()
313- fused_bb_array .header = original_header
314-
315- for i , box in enumerate (fused_boxes_list ):
316- fused_bb_array .boxes .append (box )
317- rospy .loginfo (len (fused_boxes_list ))
318-
319- try :
320- # Get position and orientation in current vehicle frame
321- pos_x = box .pose .position .x
322- pos_y = box .pose .position .y
323- pos_z = box .pose .position .z
324- quat_x = box .pose .orientation .x
325- quat_y = box .pose .orientation .y
326- quat_z = box .pose .orientation .z
327- quat_w = box .pose .orientation .w
328- yaw , pitch , roll = R .from_quat ([quat_x , quat_y , quat_z , quat_w ]).as_euler ('zyx' , degrees = False )
329-
330- # Convert to start frame
331- vehicle_pose_in_start_frame = vehicle_state .pose .to_frame (
332- ObjectFrameEnum .START , vehicle_state .pose , self .start_pose_abs
333- )
334- T_vehicle_to_start = pose_to_matrix (vehicle_pose_in_start_frame )
335- object_pose_current_h = np .array ([[pos_x ],[pos_y ],[pos_z ],[1.0 ]])
336- object_pose_start_h = T_vehicle_to_start @ object_pose_current_h
337- final_x , final_y , final_z = object_pose_start_h [:3 , 0 ]
338-
339- new_pose = ObjectPose (
340- t = current_time , x = final_x , y = final_y , z = final_z ,
341- yaw = yaw , pitch = pitch , roll = roll , frame = output_frame_enum
342- )
343- dims = (box .dimensions .x , box .dimensions .y , box .dimensions .z )
344-
345- new_pose = ObjectPose (
346- t = current_time , x = final_x , y = final_y , z = final_z - box .dimensions .z / 2.0 ,
347- yaw = yaw , pitch = pitch , roll = roll , frame = output_frame_enum
348- )
349- dims [2 ] = dims [2 ] * 2.0 # AgentState has z center on the floor and height is full height.
350-
351- existing_id = match_existing_pedestrian (
352- new_center = np .array ([new_pose .x , new_pose .y , new_pose .z ]),
353- new_dims = dims ,
354- existing_agents = self .tracked_agents ,
355- distance_threshold = 2.0
356- )
357-
358- if existing_id is not None :
359- old_state = self .tracked_agents [existing_id ]
360- dt = new_pose .t - old_state .pose .t
361- vx , vy , vz = compute_velocity (old_state .pose , new_pose , dt )
362- updated_agent = AgentState (
363- pose = new_pose ,
364- dimensions = dims ,
365- outline = None ,
366- type = AgentEnum .PEDESTRIAN ,
367- activity = AgentActivityEnum .MOVING ,
368- velocity = (vx , vy , vz ),
369- yaw_rate = 0
370- )
371- agents [existing_id ] = updated_agent
372- self .tracked_agents [existing_id ] = updated_agent
373- else :
374- agent_id = f"pedestrian{ self .ped_counter } "
375- self .ped_counter += 1
376- new_agent = AgentState (
377- pose = new_pose ,
378- dimensions = dims ,
379- outline = None ,
380- type = AgentEnum .PEDESTRIAN ,
381- activity = AgentActivityEnum .MOVING ,
382- velocity = (0 , 0 , 0 ),
383- yaw_rate = 0
384- )
385- agents [agent_id ] = new_agent
386- self .tracked_agents [agent_id ] = new_agent
387-
388- except Exception as e :
389- rospy .logwarn (f"Failed to convert final BoundingBox { i } to AgentState: { e } " )
390- continue
391-
392- self .pub_fused .publish (fused_bb_array )
393-
394- stale_ids = [agent_id for agent_id , agent in self .tracked_agents .items ()
395- if current_time - agent .pose .t > 5.0 ]
396- for agent_id in stale_ids :
397- rospy .loginfo (f"Removing stale agent: { agent_id } \n " )
398- for agent_id , agent in agents .items ():
399- p = agent .pose
400- # Format pose and velocity with 3 decimals (or as needed)
401- rospy .loginfo (
402- f"Agent ID: { agent_id } \n "
403- f"Pose: (x: { p .x :.3f} , y: { p .y :.3f} , z: { p .z :.3f} , "
404- f"yaw: { p .yaw :.3f} , pitch: { p .pitch :.3f} , roll: { p .roll :.3f} )\n "
405- f"Velocity: (vx: { agent .velocity [0 ]:.3f} , vy: { agent .velocity [1 ]:.3f} , vz: { agent .velocity [2 ]:.3f} )\n "
406- )
407-
408- return agents
456+ return fused_boxes_list
409457
410458
411459# Fake 2D Combined Detector for testing purposes
0 commit comments