Skip to content

Commit a741ef0

Browse files
committed
Added BEV bounding box fusing. Fixed a bug that was in the combined_detection.py that would trigger an exception
1 parent 3bb641b commit a741ef0

3 files changed

Lines changed: 172 additions & 123 deletions

File tree

GEMstack/onboard/perception/combined_detection.py

Lines changed: 165 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,24 @@ def merge_boxes(box1: BoundingBox, box2: BoundingBox, mode: str = "Average") ->
114114

115115
# For label, use the one from the higher-score box
116116
merged_box.label = box1.label if score1 >= score2 else box2.label
117+
118+
elif mode == "BEV":
119+
# Merge the bounding boxes from Bird's Eye View (BEV)
120+
# Average the x and y centers and dimensions
121+
# Average the yaw orientation
122+
# Use YOLO bounding box (box1) for z dimension and z center
123+
merged_box.pose.position.x = (box1.pose.position.x + box2.pose.position.x) / 2.0
124+
merged_box.pose.position.y = (box1.pose.position.y + box2.pose.position.y) / 2.0
125+
merged_box.pose.position.z = copy.deepcopy(box1.pose.orientation)
126+
127+
# Avg orientations (quaternions)
128+
merged_box.pose.orientation = avg_orientations(box1.pose.orientation, box2.pose.orientation)
129+
130+
merged_box.dimensions.x = (box1.dimensions.x + box2.dimensions.x) / 2.0
131+
merged_box.dimensions.y = (box1.dimensions.y + box2.dimensions.y) / 2.0
132+
merged_box.dimensions.z = copy.deepcopy(box1.dimensions.z)
133+
134+
merged_box.label = box1.label # Label from first box
117135

118136
else: # Default to "Average" mode
119137
# Original averaging logic
@@ -156,6 +174,45 @@ def get_volume(box):
156174
return box.dimensions.x * box.dimensions.y * box.dimensions.z
157175

158176

177+
def get_bev_aabb_corners(box: BoundingBox):
178+
"""
179+
Get axis-aligned bounding box corners for 2D Bird's Eye View IoU calculation.
180+
Returns:
181+
min_x, max_x, min_y, max_y
182+
"""
183+
cx, cy = box.pose.position.x, box.pose.position.y
184+
l, w = box.dimensions.x, box.dimensions.y
185+
186+
return cx, cx + l, cy, cy + w
187+
188+
189+
def calculate_bev_iou(box1: BoundingBox, box2: BoundingBox):
190+
"""
191+
Calculates the 2D Bird's Eye View IoU between two bounding boxes.
192+
Ignores z-axis and yaw (assumes axis aligned bounding boxes).
193+
"""
194+
min_x1, max_x1, min_y1, max_y1 = get_bev_aabb_corners(box1)
195+
min_x2, max_x2, min_y2, max_y2 = get_bev_aabb_corners(box2)
196+
197+
# Calculate intersection in BEV
198+
inter_min_x = max(min_x1, min_x2)
199+
inter_max_x = min(max_x1, max_x2)
200+
inter_min_y = max(min_y1, min_y2)
201+
inter_max_y = min(max_y1, max_y2)
202+
203+
inter_w = max(0, inter_max_x - inter_min_x)
204+
inter_h = max(0, inter_max_y - inter_min_y)
205+
intersection_area = inter_w * inter_h
206+
207+
# Calculate union area
208+
area1 = max(box1.dimensions.x * box1.dimensions.y, 1e-6)
209+
area2 = max(box2.dimensions.x * box2.dimensions.y, 1e-6)
210+
union_area = max(area1 + area2 - intersection_area, 1e-6)
211+
212+
iou = intersection_area / union_area
213+
return max(0.0, min(iou, 1.0)) # Clamp to [0, 1]
214+
215+
159216
class CombinedDetector3D(Component):
160217
"""
161218
Combines detections from multiple 3D object detectors (YOLO and PointPillars).
@@ -185,6 +242,7 @@ def __init__(
185242
self.use_start_frame = use_start_frame
186243
self.iou_threshold = iou_threshold
187244
self.merge_mode = merge_mode
245+
self.merge_in_bev = (merge_mode == "BEV")
188246

189247
self.yolo_topic = '/yolo_boxes'
190248
self.pp_topic = '/pointpillars_boxes'
@@ -227,47 +285,131 @@ def synchronized_callback(self, yolo_bbxs_msg: BoundingBoxArray, pp_bbxs_msg: Bo
227285
def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
228286
"""Update function called by the GEMstack pipeline."""
229287
current_time = self.vehicle_interface.time()
288+
agents: Dict[str, AgentState] = {}
289+
290+
if self.start_time is None:
291+
self.start_time = current_time
292+
if self.start_pose_abs is None:
293+
self.start_pose_abs = vehicle.pose
294+
rospy.loginfo("CombinedDetector3D latched start pose.")
230295

231296
yolo_bbx_array = copy.deepcopy(self.latest_yolo_bbxs)
232297
pp_bbx_array = copy.deepcopy(self.latest_pp_bbxs)
233298

234299
if yolo_bbx_array is None or pp_bbx_array is None:
235300
return {}
236301

237-
if self.start_time is None:
238-
self.start_time = current_time
239-
if self.use_start_frame and self.start_pose_abs is None:
240-
self.start_pose_abs = vehicle.pose
241-
rospy.loginfo("CombinedDetector3D latched start pose.")
302+
original_header = yolo_bbx_array.header
303+
fused_boxes_list = self._fuse_bounding_boxes(yolo_bbx_array, pp_bbx_array)
242304

243-
return self._fuse_bounding_boxes(yolo_bbx_array, pp_bbx_array, vehicle, current_time)
305+
# Used to visualize the combined results in the current frame
306+
fused_bb_array = BoundingBoxArray()
307+
fused_bb_array.header = original_header
308+
309+
for i, box in enumerate(fused_boxes_list):
310+
fused_bb_array.boxes.append(box)
311+
rospy.loginfo(len(fused_boxes_list))
312+
313+
# Get position and orientation in current vehicle frame
314+
pos_x = box.pose.position.x
315+
pos_y = box.pose.position.y
316+
pos_z = box.pose.position.z
317+
quat_x = box.pose.orientation.x
318+
quat_y = box.pose.orientation.y
319+
quat_z = box.pose.orientation.z
320+
quat_w = box.pose.orientation.w
321+
yaw, pitch, roll = R.from_quat([quat_x, quat_y, quat_z, quat_w]).as_euler('zyx', degrees=False)
322+
323+
# Convert to start frame
324+
vehicle_start_pose = vehicle.pose.to_frame(
325+
ObjectFrameEnum.START, vehicle.pose, self.start_pose_abs
326+
)
327+
T_vehicle_to_start = pose_to_matrix(vehicle_start_pose)
328+
object_pose_current_h = np.array([[pos_x],[pos_y],[pos_z],[1.0]])
329+
object_pose_start_h = T_vehicle_to_start @ object_pose_current_h
330+
final_x, final_y, final_z = object_pose_start_h[:3, 0]
331+
332+
new_pose = ObjectPose(
333+
t=current_time, x=final_x, y=final_y, z=final_z,
334+
yaw=yaw, pitch=pitch, roll=roll, frame=ObjectFrameEnum.START
335+
)
336+
dims = (box.dimensions.x, box.dimensions.y, box.dimensions.z * 2.0) # AgentState has z center on the floor and height is full height.
337+
338+
new_pose = ObjectPose(
339+
t=current_time, x=final_x, y=final_y, z=final_z - box.dimensions.z / 2.0,
340+
yaw=yaw, pitch=pitch, roll=roll, frame=ObjectFrameEnum.START
341+
)
342+
343+
existing_id = match_existing_pedestrian(
344+
new_center=np.array([new_pose.x, new_pose.y, new_pose.z]),
345+
new_dims=dims,
346+
existing_agents=self.tracked_agents,
347+
distance_threshold=2.0
348+
)
349+
350+
if existing_id is not None:
351+
old_state = self.tracked_agents[existing_id]
352+
dt = new_pose.t - old_state.pose.t
353+
vx, vy, vz = compute_velocity(old_state.pose, new_pose, dt)
354+
updated_agent = AgentState(
355+
pose=new_pose,
356+
dimensions=dims,
357+
outline=None,
358+
type=AgentEnum.PEDESTRIAN,
359+
activity=AgentActivityEnum.MOVING,
360+
velocity=(vx, vy, vz),
361+
yaw_rate=0
362+
)
363+
agents[existing_id] = updated_agent
364+
self.tracked_agents[existing_id] = updated_agent
365+
else:
366+
agent_id = f"pedestrian{self.ped_counter}"
367+
self.ped_counter += 1
368+
new_agent = AgentState(
369+
pose=new_pose,
370+
dimensions=dims,
371+
outline=None,
372+
type=AgentEnum.PEDESTRIAN,
373+
activity=AgentActivityEnum.MOVING,
374+
velocity=(0, 0, 0),
375+
yaw_rate=0
376+
)
377+
agents[agent_id] = new_agent
378+
self.tracked_agents[agent_id] = new_agent
379+
380+
self.pub_fused.publish(fused_bb_array)
381+
382+
stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items()
383+
if current_time - agent.pose.t > 5.0]
384+
for agent_id in stale_ids:
385+
rospy.loginfo(f"Removing stale agent: {agent_id}\n")
386+
for agent_id, agent in agents.items():
387+
p = agent.pose
388+
# Format pose and velocity with 3 decimals (or as needed)
389+
rospy.loginfo(
390+
f"Agent ID: {agent_id}\n"
391+
f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, "
392+
f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n"
393+
f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n"
394+
)
395+
396+
return agents
244397

245398

246399
def _fuse_bounding_boxes(self,
247400
yolo_bbx_array: BoundingBoxArray,
248401
pp_bbx_array: BoundingBoxArray,
249-
vehicle_state: VehicleState,
250-
current_time: float
251-
) -> Dict[str, AgentState]:
402+
):
252403
"""
253404
Fuse bounding boxes from multiple detectors.
254405
255406
Args:
256407
yolo_bbx_array: Bounding boxes from YOLO detector
257408
pp_bbx_array: Bounding boxes from PointPillars detector
258-
vehicle_state: Current vehicle state
259-
current_time: Current timestamp
260-
261-
Returns:
262-
Dictionary of agent states
263409
"""
264-
original_header = yolo_bbx_array.header
265-
agents: Dict[str, AgentState] = {}
266410
yolo_boxes: List[BoundingBox] = yolo_bbx_array.boxes
267411
pp_boxes: List[BoundingBox] = pp_bbx_array.boxes
268412

269-
output_frame_enum = ObjectFrameEnum.START if self.use_start_frame else ObjectFrameEnum.CURRENT
270-
271413
matched_yolo_indices = set()
272414
matched_pp_indices = set()
273415
fused_boxes_list: List[BoundingBox] = []
@@ -282,7 +424,10 @@ def _fuse_bounding_boxes(self,
282424
continue
283425

284426
## IoU
285-
iou = calculate_3d_iou(yolo_box, pp_box, get_aabb_corners, get_volume)
427+
if self.merge_in_bev:
428+
iou = calculate_bev_iou(yolo_box, pp_box)
429+
else:
430+
iou = calculate_3d_iou(yolo_box, pp_box, get_aabb_corners, get_volume)
286431

287432
if iou > self.iou_threshold and iou > best_iou:
288433
best_iou = iou
@@ -308,104 +453,7 @@ def _fuse_bounding_boxes(self,
308453
fused_boxes_list.append(pp_box)
309454
rospy.logdebug(f"Kept unmatched PP box {j}")
310455

311-
# Work in progress to visualize combined results
312-
fused_bb_array = BoundingBoxArray()
313-
fused_bb_array.header = original_header
314-
315-
for i, box in enumerate(fused_boxes_list):
316-
fused_bb_array.boxes.append(box)
317-
rospy.loginfo(len(fused_boxes_list))
318-
319-
try:
320-
# Get position and orientation in current vehicle frame
321-
pos_x = box.pose.position.x
322-
pos_y = box.pose.position.y
323-
pos_z = box.pose.position.z
324-
quat_x = box.pose.orientation.x
325-
quat_y = box.pose.orientation.y
326-
quat_z = box.pose.orientation.z
327-
quat_w = box.pose.orientation.w
328-
yaw, pitch, roll = R.from_quat([quat_x, quat_y, quat_z, quat_w]).as_euler('zyx', degrees=False)
329-
330-
# Convert to start frame
331-
vehicle_pose_in_start_frame = vehicle_state.pose.to_frame(
332-
ObjectFrameEnum.START, vehicle_state.pose, self.start_pose_abs
333-
)
334-
T_vehicle_to_start = pose_to_matrix(vehicle_pose_in_start_frame)
335-
object_pose_current_h = np.array([[pos_x],[pos_y],[pos_z],[1.0]])
336-
object_pose_start_h = T_vehicle_to_start @ object_pose_current_h
337-
final_x, final_y, final_z = object_pose_start_h[:3, 0]
338-
339-
new_pose = ObjectPose(
340-
t=current_time, x=final_x, y=final_y, z=final_z,
341-
yaw=yaw, pitch=pitch, roll=roll, frame=output_frame_enum
342-
)
343-
dims = (box.dimensions.x, box.dimensions.y, box.dimensions.z)
344-
345-
new_pose = ObjectPose(
346-
t=current_time, x=final_x, y=final_y, z=final_z - box.dimensions.z / 2.0,
347-
yaw=yaw, pitch=pitch, roll=roll, frame=output_frame_enum
348-
)
349-
dims[2] = dims[2] * 2.0 # AgentState has z center on the floor and height is full height.
350-
351-
existing_id = match_existing_pedestrian(
352-
new_center=np.array([new_pose.x, new_pose.y, new_pose.z]),
353-
new_dims=dims,
354-
existing_agents=self.tracked_agents,
355-
distance_threshold=2.0
356-
)
357-
358-
if existing_id is not None:
359-
old_state = self.tracked_agents[existing_id]
360-
dt = new_pose.t - old_state.pose.t
361-
vx, vy, vz = compute_velocity(old_state.pose, new_pose, dt)
362-
updated_agent = AgentState(
363-
pose=new_pose,
364-
dimensions=dims,
365-
outline=None,
366-
type=AgentEnum.PEDESTRIAN,
367-
activity=AgentActivityEnum.MOVING,
368-
velocity=(vx, vy, vz),
369-
yaw_rate=0
370-
)
371-
agents[existing_id] = updated_agent
372-
self.tracked_agents[existing_id] = updated_agent
373-
else:
374-
agent_id = f"pedestrian{self.ped_counter}"
375-
self.ped_counter += 1
376-
new_agent = AgentState(
377-
pose=new_pose,
378-
dimensions=dims,
379-
outline=None,
380-
type=AgentEnum.PEDESTRIAN,
381-
activity=AgentActivityEnum.MOVING,
382-
velocity=(0, 0, 0),
383-
yaw_rate=0
384-
)
385-
agents[agent_id] = new_agent
386-
self.tracked_agents[agent_id] = new_agent
387-
388-
except Exception as e:
389-
rospy.logwarn(f"Failed to convert final BoundingBox {i} to AgentState: {e}")
390-
continue
391-
392-
self.pub_fused.publish(fused_bb_array)
393-
394-
stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items()
395-
if current_time - agent.pose.t > 5.0]
396-
for agent_id in stale_ids:
397-
rospy.loginfo(f"Removing stale agent: {agent_id}\n")
398-
for agent_id, agent in agents.items():
399-
p = agent.pose
400-
# Format pose and velocity with 3 decimals (or as needed)
401-
rospy.loginfo(
402-
f"Agent ID: {agent_id}\n"
403-
f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, "
404-
f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n"
405-
f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n"
406-
)
407-
408-
return agents
456+
return fused_boxes_list
409457

410458

411459
# Fake 2D Combined Detector for testing purposes

GEMstack/onboard/perception/point_pillars_node.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -175,16 +175,17 @@ def synchronized_callback(self, image_msg, lidar_msg):
175175
R_vehicle = self.T_l2v[:3, :3] @ R_lidar
176176
vehicle_yaw, vehicle_pitch, vehicle_roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False)
177177

178+
# Modify bounding box so part of it is longer below ground level
178179
bottom = z_vehicle - h / 2.0
179180
if bottom < 0.0:
180181
# Cut the box at ground level:
181-
# bottom = 0.0 # Can modify this later if we want a small seperation between the bounding box and ground
182-
# top = z_vehicle + h / 2.0
182+
bottom = 0.0
183+
top = z_vehicle + h / 2.0
183184

184185
# Or Shift the box vertically to make it sit at ground level:
185-
top = z_vehicle + h / 2.0
186-
top = top + abs(bottom)
187-
bottom = 0.0
186+
# top = z_vehicle + h / 2.0
187+
# top = top + abs(bottom)
188+
# bottom = 0.0
188189

189190
z_vehicle = (top - bottom) / 2.0
190191
h = abs(top - z_vehicle)

launch/combined_detection.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ drive:
1818
# optional overrides
1919
enable_tracking: True # True if you want to enable tracking function
2020
use_start_frame: True # True if you need output in START frame, otherwise in CURRENT frame
21-
merge_mode: "Score" # Merging strategy to use: "Average", "Score", or "Max"
21+
merge_mode: "BEV" # Merging strategy to use: "Average", "Score", "Max", or "BEV"
2222

2323
perception_normalization : StandardPerceptionNormalizer
2424
planning:

0 commit comments

Comments
 (0)