Skip to content

Commit b35252b

Browse files
committed
Finalize fusion.py with pedestrian centers, vel, dims, AgentStates. Merging into pedestrian_detection next.
1 parent 006b555 commit b35252b

3 files changed

Lines changed: 212 additions & 67 deletions

File tree

GEMstack/onboard/perception/fusion.py

Lines changed: 204 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,66 @@
1-
from cv_bridge import CvBridge
2-
from sensor_msgs.msg import Image, PointCloud2
3-
from ultralytics import YOLO
4-
from fusion_utils import *
1+
"""
2+
Lidar + Camera fusion and object detection
3+
"""
4+
"""
5+
Terminal 1:
6+
---------------
7+
source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
8+
roscore
9+
10+
Terminal 2:
11+
---------------
12+
source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
13+
python3 /home/Admin/transform.py
14+
15+
Terminal 3:
16+
---------------
17+
source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
18+
rosbag play ~/vehicle.bag
19+
20+
Terminal 4:
21+
---------------
22+
source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
23+
cd GEMStack
24+
python3 -m GEMstack.onboard.perception.fusion
25+
26+
Terminal 5:
27+
---------------
28+
source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
29+
rviz
30+
"""
31+
32+
# Python
33+
import os
34+
from typing import List, Dict
35+
from collections import defaultdict
36+
# ROS, CV
537
import rospy
638
import message_filters
7-
import os
839
import tf
9-
40+
from cv_bridge import CvBridge
41+
from sensor_msgs.msg import Image, PointCloud2
42+
# GEMStack
43+
from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum
44+
from ultralytics import YOLO
45+
from .fusion_utils import *
1046

1147
class Fusion3D():
48+
# TODO: Pull params into a JSON/yaml
49+
# TODO: Convert lists into np.arrays where possible, vectorize calculations
50+
# TODO: Implement logging instead of print
51+
# TODO: Finish splitting this class + utils into separate classes
52+
# TODO: Decide if we want to name dets "peds" or "objs"/"agents"
53+
# Maybe peds for now and Agents in agent_detection.py?
1254
def __init__(self):
55+
# Publish debug/viz rostopics if true
56+
self.debug = True
1357
# Setup variables
1458
self.bridge = CvBridge()
59+
# TODO: Wrap detector into GEMDetector?
1560
self.detector = YOLO(os.getcwd()+'/GEMstack/knowledge/detection/yolov8n.pt')
16-
self.last_person_boxes = []
17-
self.pedestrians = {}
18-
self.visualization = True
61+
# track_id: AgentState
62+
self.prev_agents = dict()
63+
self.current_agents = dict()
1964
self.confidence = 0.7
2065
self.classes_to_detect = 0
2166
self.ground_threshold = 1.6
@@ -30,46 +75,169 @@ def __init__(self):
3075
self.rgb_rosbag = message_filters.Subscriber('/oak/rgb/image_raw', Image)
3176
self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2)
3277
self.sync = message_filters.ApproximateTimeSynchronizer([self.rgb_rosbag, self.top_lidar_rosbag], queue_size=10, slop=0.1)
33-
self.sync.registerCallback(self.fusion_callback)
78+
self.sync.registerCallback(self.ouster_oak_callback)
3479

3580
# TF listener to get transformation from LiDAR to Camera
3681
self.tf_listener = tf.TransformListener()
3782

38-
# Publishers
83+
if self.debug: self.init_debug()
84+
85+
def init_debug(self,):
86+
# Debug Publishers
3987
self.pub_pedestrians_pc2 = rospy.Publisher("/point_cloud/pedestrians", PointCloud2, queue_size=10)
40-
if(self.visualization):
41-
self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
88+
self.pub_obj_centers_pc2 = rospy.Publisher("/point_cloud/obj_centers", PointCloud2, queue_size=10)
89+
self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
90+
91+
def update(self, vehicle : VehicleState) -> Dict[str,AgentState]:
92+
return self.current_agents
93+
94+
# TODO: Improve Algo Knn, ransac, etc.
95+
def find_centers(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]:
96+
clusters = [np.array(clust) for clust in clusters]
97+
centers = [np.array(()) if clust.size == 0 else np.mean(clust, axis=0) for clust in clusters]
98+
return centers
99+
100+
# Beware: AgentState(PhysicalObject) builds bbox from
101+
# dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not
102+
# [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2]
103+
def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]:
104+
# Add some small constant to height to compensate for
105+
# objects distance to ground we filtered from lidar,
106+
# other heuristics to imrpove stability for find_ funcs ?
107+
clusters = [np.array(clust) for clust in clusters]
108+
dims = [np.array(()) if clust.size == 0 else np.max(clust, axis= 0) - np.min(clust, axis= 0) for clust in clusters]
109+
return dims
110+
111+
# TODO: Slower but cleaner to input self.current_agents dict
112+
# TODO: Moving Average across last N iterations pos/vel? Less spurious vals
113+
# TODO Akul: Fix velocity calculation to calculate in ObjectFrameEnum.START
114+
# work towards own tracking class instead of simple YOLO track?
115+
def find_vels(self, track_ids, obj_centers):
116+
# Object not seen -> velocity = None
117+
track_id_center_map = dict(zip(track_ids, obj_centers))
118+
vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods.
119+
120+
for prev_track_id, prev_agent in self.prev_agents.items():
121+
if prev_track_id in track_ids:
122+
# TODO: Add prev_agents to memory to avoid None velocity
123+
# We should only be missing prev pose on first sight of track_id Agent.
124+
# print("shape 1: ", track_id_center_map[prev_agent.track_id])
125+
# print("shape 2: ", np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z]))
126+
# prev can be 3 separate Nones, current is just empty array... make this symmetrical
127+
if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3:
128+
vels[prev_track_id] = track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])
129+
return vels
130+
42131

132+
# TODO: Separate debug/viz class, separate this func into bbox and 2d 3d points
133+
def viz_object_states(self, cv_image, boxes, extracted_pts_all):
134+
# Extract 3D pedestrians points in lidar frame
135+
# ** These are camera frame after transform_lidar_points, right?
136+
pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all]
43137

44-
def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
138+
# Object center viz
139+
obj_3d_obj_centers = list()
140+
for track_id, agent in self.current_agents.items():
141+
if agent.pose.x != None and agent.pose.y != None and agent.pose.z != None:
142+
obj_3d_obj_centers.append((agent.pose.x, agent.pose.y, agent.pose.z)) # **
143+
if len(obj_3d_obj_centers) > 0:
144+
ros_obj_3d_obj_centers_pc2 = create_point_cloud(obj_3d_obj_centers, color=(0, 128, 0))
145+
self.pub_obj_centers_pc2.publish(ros_obj_3d_obj_centers_pc2)
146+
147+
# Extract 2D pedestrians points and bbox in camera frame
148+
extracted_2d_pts = [list(extracted_pts[:, :2].astype(int)) for extracted_pts in extracted_pts_all]
149+
flattened_pedestrians_2d_pts = list()
150+
for pts in extracted_2d_pts: flattened_pedestrians_2d_pts.extend(pts)
151+
152+
for ind, bbox in enumerate(boxes):
153+
xywh = bbox.xywh[0].tolist()
154+
cv_image = vis_2d_bbox(cv_image, xywh, bbox)
155+
156+
flattened_pedestrians_3d_pts = list()
157+
for pts in pedestrians_3d_pts: flattened_pedestrians_3d_pts.extend(pts)
158+
159+
# Draw projected 2D LiDAR points on the image.
160+
for pt in flattened_pedestrians_2d_pts:
161+
cv2.circle(cv_image, pt, 2, (0, 0, 255), -1)
162+
ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8')
163+
self.pub_image.publish(ros_img)
164+
165+
# Draw 3D pedestrian pointclouds
166+
# Create point cloud from extracted 3D points
167+
ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts)
168+
self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2)
169+
170+
171+
def update_object_states(self, track_result, extracted_pts_all) -> None:
172+
self.prev_agents = self.current_agents.copy()
173+
self.current_agents.clear()
174+
175+
# Change pedestrians_3d_pts to dicts matching track_ids
176+
track_ids = track_result[0].boxes.id.int().cpu().tolist()
177+
num_objs = len(track_ids)
178+
boxes = track_result[0].boxes
179+
180+
# Extract 3D pedestrians points in lidar frame
181+
# ** These are camera frame after transform_lidar_points, right?
182+
pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all]
183+
if len(pedestrians_3d_pts) != num_objs:
184+
raise Exception('Perception - Camera detections, points clusters num. mismatch')
185+
186+
# TODO: Slower but cleaner to pass dicts of AgentState
187+
# or at least {track_ids: centers/pts/etc}
188+
# TODO: Combine funcs for efficiency in C.
189+
# Separate numpy prob still faster for now
190+
obj_centers = self.find_centers(pedestrians_3d_pts)
191+
obj_dims = self.find_dims(pedestrians_3d_pts)
192+
obj_vels = self.find_vels(track_ids, obj_centers)
193+
194+
# Update Current AgentStates
195+
for ind in range(num_objs):
196+
obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind]
197+
obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind]
198+
self.current_agents[track_ids[ind]] = (
199+
AgentState(
200+
track_id = track_ids[ind],
201+
pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
202+
# (l, w, h)
203+
# TODO: confirm (z -> l, x -> w, y -> h)
204+
dimensions=(obj_dim[2], obj_dim[0],obj_dim[1]),
205+
outline=None,
206+
type=AgentEnum.PEDESTRIAN,
207+
activity=AgentActivityEnum.MOVING,
208+
velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(vels[track_ids[ind]]),
209+
yaw_rate=0
210+
))
211+
212+
213+
def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
45214
# Convert to cv2 image and run detector
46215
cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8")
47216
track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
48217

49218
# Convert 1D PointCloud2 data to x, y, z coords
50219
lidar_points = convert_pointcloud2_to_xyz(lidar_pc2_msg)
220+
# print("len lidar_points", len(lidar_points))
51221

52222
# Downsample xyz point clouds
53223
downsampled_points = downsample_points(lidar_points)
224+
# print("len downsampled_points", len(downsampled_points))
54225

55226
# Transform LiDAR points into the camera coordinate frame.
56227
lidar_in_camera = transform_lidar_points(downsampled_points, self.R, self.t)
57-
228+
# print("len lidar_in_camera", len(lidar_in_camera))
229+
58230
# Project the transformed points into the image plane.
59231
projected_pts = project_points(lidar_in_camera, self.K)
232+
# print("len projected_pts", len(projected_pts))
60233

61234
# Process bboxes
62-
self.last_person_boxes = []
63235
boxes = track_result[0].boxes
64236

65-
# Unpacking box dimentions detected into x,y,w,h
66-
pedestrians_3d_pts = []
67-
flattened_pedestrians_2d_pts = []
68-
flattened_pedestrians_3d_pts = []
237+
extracted_pts_all = list()
69238

70-
for box in boxes:
71-
xywh = box.xywh[0].tolist()
72-
self.last_person_boxes.append(xywh)
239+
for ind, bbox in enumerate(boxes):
240+
xywh = bbox.xywh[0].tolist()
73241

74242
# Extracting projected pts
75243
x, y, w, h = xywh
@@ -78,54 +246,24 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
78246
top_bound = int(y - h / 2)
79247
bottom_bound = int(y + h / 2)
80248

81-
if len(projected_pts) > 0:
82-
pts = np.array(projected_pts)
83-
extracted_pts = pts[(pts[:, 0] > left_bound) &
84-
(pts[:, 0] < right_bound) &
85-
(pts[:, 1] > top_bound) &
86-
(pts[:, 1] < bottom_bound)
87-
]
88-
89-
#if no points extracted for this bbox, skip
90-
if len(extracted_pts) < 1:
91-
continue
92-
93-
# Apply ground and max distance filter to the extracted 5D points
94-
extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold)
95-
extracted_pts = filter_far_points(extracted_pts)
96-
97-
# Extract 2D pedestrians points in camera frame
98-
extracted_2d_pts = list(extracted_pts[:, :2].astype(int))
99-
flattened_pedestrians_2d_pts = flattened_pedestrians_2d_pts + extracted_2d_pts
100-
101-
# Extract 3D pedestrians points in lidar frame
102-
extracted_3d_pts = list(extracted_pts[:, -3:])
103-
pedestrians_3d_pts.append(extracted_3d_pts)
104-
flattened_pedestrians_3d_pts = flattened_pedestrians_3d_pts + extracted_3d_pts
105-
106-
# Used for visualization
107-
if(self.visualization):
108-
cv_image = vis_2d_bbox(cv_image, xywh, box)
109-
110-
if len(flattened_pedestrians_3d_pts) > 0:
111-
# Draw projected 2D LiDAR points on the image.
112-
for pt in flattened_pedestrians_2d_pts:
113-
cv2.circle(cv_image, pt, 2, (0, 0, 255), -1)
114-
115-
# Create point cloud from extracted 3D points
116-
ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts)
117-
self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2)
118-
119-
# Used for visualization
120-
if(self.visualization):
121-
ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8')
122-
self.pub_image.publish(ros_img)
249+
pts = np.array(projected_pts)
250+
extracted_pts = pts[(pts[:, 0] > left_bound) &
251+
(pts[:, 0] < right_bound) &
252+
(pts[:, 1] > top_bound) &
253+
(pts[:, 1] < bottom_bound)
254+
]
123255

256+
# Apply ground and max distance filter to the extracted 5D points
257+
extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold)
258+
extracted_pts = filter_far_points(extracted_pts)
259+
extracted_pts_all.append(extracted_pts)
260+
261+
self.update_object_states(track_result, extracted_pts_all)
262+
self.viz_object_states(cv_image, boxes, extracted_pts_all)
124263

125264

126265
if __name__ == '__main__':
127266
rospy.init_node('fusion_node', anonymous=True)
128267
Fusion3D()
129268
while not rospy.core.is_shutdown():
130269
rospy.rostime.wallsleep(0.5)
131-

GEMstack/onboard/perception/fusion_utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def filter_ground_points(lidar_points, ground_threshold = 0):
3737

3838
def filter_far_points(lidar_points, max_dist_percent=0.85):
3939
""" Filter points beyond a percentage threshold of max distance in a point cluster """
40+
if lidar_points.shape[0] == 0: return lidar_points
4041
max_dist = np.max(lidar_points[:, 4])
4142
filtered_array = lidar_points[lidar_points[:, 4] < max_dist_percent * max_dist]
4243
return filtered_array
@@ -164,4 +165,6 @@ def create_point_cloud(points, color=(255, 0, 0)):
164165

165166
point_cloud_data = [(x, y, z, packed_color) for x, y, z in points]
166167

167-
return pc2.create_cloud(header, fields, point_cloud_data)
168+
return pc2.create_cloud(header, fields, point_cloud_data)
169+
170+

GEMstack/state/agent.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ class AgentState(PhysicalObject):
2727
activity : AgentActivityEnum
2828
velocity : Tuple[float,float,float] #estimated velocity in x,y,z, m/s and in agent's local frame
2929
yaw_rate : float #estimated yaw rate, in radians/s
30+
# Added by Perception for persistent objs across frames
31+
# Check track_id to verify this is the same Agent you've
32+
# seen before
33+
track_id : int
3034

3135
def velocity_local(self) -> Tuple[float,float,float]:
3236
"""Returns velocity in m/s in the agent's local frame."""

0 commit comments

Comments
 (0)