1- from cv_bridge import CvBridge
2- from sensor_msgs .msg import Image , PointCloud2
3- from ultralytics import YOLO
4- from fusion_utils import *
1+ """
2+ Lidar + Camera fusion and object detection
3+ """
4+ """
5+ Terminal 1:
6+ ---------------
7+ source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
8+ roscore
9+
10+ Terminal 2:
11+ ---------------
12+ source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
13+ python3 /home/Admin/transform.py
14+
15+ Terminal 3:
16+ ---------------
17+ source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
18+ rosbag play ~/vehicle.bag
19+
20+ Terminal 4:
21+ ---------------
22+ source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
23+ cd GEMStack
24+ python3 -m GEMstack.onboard.perception.fusion
25+
26+ Terminal 5:
27+ ---------------
28+ source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
29+ rviz
30+ """
31+
32+ # Python
33+ import os
34+ from typing import List , Dict
35+ from collections import defaultdict
36+ # ROS, CV
537import rospy
638import message_filters
7- import os
839import tf
9-
40+ from cv_bridge import CvBridge
41+ from sensor_msgs .msg import Image , PointCloud2
42+ # GEMStack
43+ from ...state import AllState ,VehicleState ,ObjectPose ,ObjectFrameEnum ,AgentState ,AgentEnum ,AgentActivityEnum
44+ from ultralytics import YOLO
45+ from .fusion_utils import *
1046
1147class Fusion3D ():
48+ # TODO: Pull params into a JSON/yaml
49+ # TODO: Convert lists into np.arrays where possible, vectorize calculations
50+ # TODO: Implement logging instead of print
51+ # TODO: Finish splitting this class + utils into separate classes
52+ # TODO: Decide if we want to name dets "peds" or "objs"/"agents"
53+ # Maybe peds for now and Agents in agent_detection.py?
1254 def __init__ (self ):
55+ # Publish debug/viz rostopics if true
56+ self .debug = True
1357 # Setup variables
1458 self .bridge = CvBridge ()
59+ # TODO: Wrap detector into GEMDetector?
1560 self .detector = YOLO (os .getcwd ()+ '/GEMstack/knowledge/detection/yolov8n.pt' )
16- self . last_person_boxes = []
17- self .pedestrians = {}
18- self .visualization = True
61+ # track_id: AgentState
62+ self .prev_agents = dict ()
63+ self .current_agents = dict ()
1964 self .confidence = 0.7
2065 self .classes_to_detect = 0
2166 self .ground_threshold = 1.6
@@ -30,46 +75,169 @@ def __init__(self):
3075 self .rgb_rosbag = message_filters .Subscriber ('/oak/rgb/image_raw' , Image )
3176 self .top_lidar_rosbag = message_filters .Subscriber ('/ouster/points' , PointCloud2 )
3277 self .sync = message_filters .ApproximateTimeSynchronizer ([self .rgb_rosbag , self .top_lidar_rosbag ], queue_size = 10 , slop = 0.1 )
33- self .sync .registerCallback (self .fusion_callback )
78+ self .sync .registerCallback (self .ouster_oak_callback )
3479
3580 # TF listener to get transformation from LiDAR to Camera
3681 self .tf_listener = tf .TransformListener ()
3782
38- # Publishers
83+ if self .debug : self .init_debug ()
84+
85+ def init_debug (self ,):
86+ # Debug Publishers
3987 self .pub_pedestrians_pc2 = rospy .Publisher ("/point_cloud/pedestrians" , PointCloud2 , queue_size = 10 )
40- if (self .visualization ):
41- self .pub_image = rospy .Publisher ("/camera/image_detection" , Image , queue_size = 1 )
88+ self .pub_obj_centers_pc2 = rospy .Publisher ("/point_cloud/obj_centers" , PointCloud2 , queue_size = 10 )
89+ self .pub_image = rospy .Publisher ("/camera/image_detection" , Image , queue_size = 1 )
90+
91+ def update (self , vehicle : VehicleState ) -> Dict [str ,AgentState ]:
92+ return self .current_agents
93+
94+ # TODO: Improve Algo Knn, ransac, etc.
95+ def find_centers (self , clusters : List [List [np .ndarray ]]) -> List [np .ndarray ]:
96+ clusters = [np .array (clust ) for clust in clusters ]
97+ centers = [np .array (()) if clust .size == 0 else np .mean (clust , axis = 0 ) for clust in clusters ]
98+ return centers
99+
100+ # Beware: AgentState(PhysicalObject) builds bbox from
101+ # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not
102+ # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2]
103+ def find_dims (self , clusters : List [List [np .ndarray ]]) -> List [np .ndarray ]:
104+ # Add some small constant to height to compensate for
105+ # objects distance to ground we filtered from lidar,
106+ # other heuristics to imrpove stability for find_ funcs ?
107+ clusters = [np .array (clust ) for clust in clusters ]
108+ dims = [np .array (()) if clust .size == 0 else np .max (clust , axis = 0 ) - np .min (clust , axis = 0 ) for clust in clusters ]
109+ return dims
110+
111+ # TODO: Slower but cleaner to input self.current_agents dict
112+ # TODO: Moving Average across last N iterations pos/vel? Less spurious vals
113+ # TODO Akul: Fix velocity calculation to calculate in ObjectFrameEnum.START
114+ # work towards own tracking class instead of simple YOLO track?
115+ def find_vels (self , track_ids , obj_centers ):
116+ # Object not seen -> velocity = None
117+ track_id_center_map = dict (zip (track_ids , obj_centers ))
118+ vels = defaultdict (lambda : np .array (())) # None is faster, np.array matches other find_ methods.
119+
120+ for prev_track_id , prev_agent in self .prev_agents .items ():
121+ if prev_track_id in track_ids :
122+ # TODO: Add prev_agents to memory to avoid None velocity
123+ # We should only be missing prev pose on first sight of track_id Agent.
124+ # print("shape 1: ", track_id_center_map[prev_agent.track_id])
125+ # print("shape 2: ", np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z]))
126+ # prev can be 3 separate Nones, current is just empty array... make this symmetrical
127+ if prev_agent .pose .x and prev_agent .pose .y and prev_agent .pose .z and track_id_center_map [prev_agent .track_id ].shape == 3 :
128+ vels [prev_track_id ] = track_id_center_map [prev_track_id ] - np .array ([prev_agent .pose .x , prev_agent .pose .y , prev_agent .pose .z ])
129+ return vels
130+
42131
132+ # TODO: Separate debug/viz class, separate this func into bbox and 2d 3d points
133+ def viz_object_states (self , cv_image , boxes , extracted_pts_all ):
134+ # Extract 3D pedestrians points in lidar frame
135+ # ** These are camera frame after transform_lidar_points, right?
136+ pedestrians_3d_pts = [list (extracted_pts [:, - 3 :]) for extracted_pts in extracted_pts_all ]
43137
44- def fusion_callback (self , rgb_image_msg : Image , lidar_pc2_msg : PointCloud2 ):
138+ # Object center viz
139+ obj_3d_obj_centers = list ()
140+ for track_id , agent in self .current_agents .items ():
141+ if agent .pose .x != None and agent .pose .y != None and agent .pose .z != None :
142+ obj_3d_obj_centers .append ((agent .pose .x , agent .pose .y , agent .pose .z )) # **
143+ if len (obj_3d_obj_centers ) > 0 :
144+ ros_obj_3d_obj_centers_pc2 = create_point_cloud (obj_3d_obj_centers , color = (0 , 128 , 0 ))
145+ self .pub_obj_centers_pc2 .publish (ros_obj_3d_obj_centers_pc2 )
146+
147+ # Extract 2D pedestrians points and bbox in camera frame
148+ extracted_2d_pts = [list (extracted_pts [:, :2 ].astype (int )) for extracted_pts in extracted_pts_all ]
149+ flattened_pedestrians_2d_pts = list ()
150+ for pts in extracted_2d_pts : flattened_pedestrians_2d_pts .extend (pts )
151+
152+ for ind , bbox in enumerate (boxes ):
153+ xywh = bbox .xywh [0 ].tolist ()
154+ cv_image = vis_2d_bbox (cv_image , xywh , bbox )
155+
156+ flattened_pedestrians_3d_pts = list ()
157+ for pts in pedestrians_3d_pts : flattened_pedestrians_3d_pts .extend (pts )
158+
159+ # Draw projected 2D LiDAR points on the image.
160+ for pt in flattened_pedestrians_2d_pts :
161+ cv2 .circle (cv_image , pt , 2 , (0 , 0 , 255 ), - 1 )
162+ ros_img = self .bridge .cv2_to_imgmsg (cv_image , 'bgr8' )
163+ self .pub_image .publish (ros_img )
164+
165+ # Draw 3D pedestrian pointclouds
166+ # Create point cloud from extracted 3D points
167+ ros_extracted_pedestrian_pc2 = create_point_cloud (flattened_pedestrians_3d_pts )
168+ self .pub_pedestrians_pc2 .publish (ros_extracted_pedestrian_pc2 )
169+
170+
171+ def update_object_states (self , track_result , extracted_pts_all ) -> None :
172+ self .prev_agents = self .current_agents .copy ()
173+ self .current_agents .clear ()
174+
175+ # Change pedestrians_3d_pts to dicts matching track_ids
176+ track_ids = track_result [0 ].boxes .id .int ().cpu ().tolist ()
177+ num_objs = len (track_ids )
178+ boxes = track_result [0 ].boxes
179+
180+ # Extract 3D pedestrians points in lidar frame
181+ # ** These are camera frame after transform_lidar_points, right?
182+ pedestrians_3d_pts = [list (extracted_pts [:, - 3 :]) for extracted_pts in extracted_pts_all ]
183+ if len (pedestrians_3d_pts ) != num_objs :
184+ raise Exception ('Perception - Camera detections, points clusters num. mismatch' )
185+
186+ # TODO: Slower but cleaner to pass dicts of AgentState
187+ # or at least {track_ids: centers/pts/etc}
188+ # TODO: Combine funcs for efficiency in C.
189+ # Separate numpy prob still faster for now
190+ obj_centers = self .find_centers (pedestrians_3d_pts )
191+ obj_dims = self .find_dims (pedestrians_3d_pts )
192+ obj_vels = self .find_vels (track_ids , obj_centers )
193+
194+ # Update Current AgentStates
195+ for ind in range (num_objs ):
196+ obj_center = (None , None , None ) if obj_centers [ind ].size == 0 else obj_centers [ind ]
197+ obj_dim = (None , None , None ) if obj_dims [ind ].size == 0 else obj_dims [ind ]
198+ self .current_agents [track_ids [ind ]] = (
199+ AgentState (
200+ track_id = track_ids [ind ],
201+ pose = ObjectPose (t = 0 , x = obj_center [0 ], y = obj_center [1 ], z = obj_center [2 ] ,yaw = 0 ,pitch = 0 ,roll = 0 ,frame = ObjectFrameEnum .CURRENT ),
202+ # (l, w, h)
203+ # TODO: confirm (z -> l, x -> w, y -> h)
204+ dimensions = (obj_dim [2 ], obj_dim [0 ],obj_dim [1 ]),
205+ outline = None ,
206+ type = AgentEnum .PEDESTRIAN ,
207+ activity = AgentActivityEnum .MOVING ,
208+ velocity = None if obj_vels [track_ids [ind ]].size == 0 else tuple (vels [track_ids [ind ]]),
209+ yaw_rate = 0
210+ ))
211+
212+
213+ def ouster_oak_callback (self , rgb_image_msg : Image , lidar_pc2_msg : PointCloud2 ):
45214 # Convert to cv2 image and run detector
46215 cv_image = self .bridge .imgmsg_to_cv2 (rgb_image_msg , "bgr8" )
47216 track_result = self .detector .track (source = cv_image , classes = self .classes_to_detect , persist = True , conf = self .confidence )
48217
49218 # Convert 1D PointCloud2 data to x, y, z coords
50219 lidar_points = convert_pointcloud2_to_xyz (lidar_pc2_msg )
220+ # print("len lidar_points", len(lidar_points))
51221
52222 # Downsample xyz point clouds
53223 downsampled_points = downsample_points (lidar_points )
224+ # print("len downsampled_points", len(downsampled_points))
54225
55226 # Transform LiDAR points into the camera coordinate frame.
56227 lidar_in_camera = transform_lidar_points (downsampled_points , self .R , self .t )
57-
228+ # print("len lidar_in_camera", len(lidar_in_camera))
229+
58230 # Project the transformed points into the image plane.
59231 projected_pts = project_points (lidar_in_camera , self .K )
232+ # print("len projected_pts", len(projected_pts))
60233
61234 # Process bboxes
62- self .last_person_boxes = []
63235 boxes = track_result [0 ].boxes
64236
65- # Unpacking box dimentions detected into x,y,w,h
66- pedestrians_3d_pts = []
67- flattened_pedestrians_2d_pts = []
68- flattened_pedestrians_3d_pts = []
237+ extracted_pts_all = list ()
69238
70- for box in boxes :
71- xywh = box .xywh [0 ].tolist ()
72- self .last_person_boxes .append (xywh )
239+ for ind , bbox in enumerate (boxes ):
240+ xywh = bbox .xywh [0 ].tolist ()
73241
74242 # Extracting projected pts
75243 x , y , w , h = xywh
@@ -78,54 +246,24 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
78246 top_bound = int (y - h / 2 )
79247 bottom_bound = int (y + h / 2 )
80248
81- if len (projected_pts ) > 0 :
82- pts = np .array (projected_pts )
83- extracted_pts = pts [(pts [:, 0 ] > left_bound ) &
84- (pts [:, 0 ] < right_bound ) &
85- (pts [:, 1 ] > top_bound ) &
86- (pts [:, 1 ] < bottom_bound )
87- ]
88-
89- #if no points extracted for this bbox, skip
90- if len (extracted_pts ) < 1 :
91- continue
92-
93- # Apply ground and max distance filter to the extracted 5D points
94- extracted_pts = filter_ground_points (extracted_pts , self .ground_threshold )
95- extracted_pts = filter_far_points (extracted_pts )
96-
97- # Extract 2D pedestrians points in camera frame
98- extracted_2d_pts = list (extracted_pts [:, :2 ].astype (int ))
99- flattened_pedestrians_2d_pts = flattened_pedestrians_2d_pts + extracted_2d_pts
100-
101- # Extract 3D pedestrians points in lidar frame
102- extracted_3d_pts = list (extracted_pts [:, - 3 :])
103- pedestrians_3d_pts .append (extracted_3d_pts )
104- flattened_pedestrians_3d_pts = flattened_pedestrians_3d_pts + extracted_3d_pts
105-
106- # Used for visualization
107- if (self .visualization ):
108- cv_image = vis_2d_bbox (cv_image , xywh , box )
109-
110- if len (flattened_pedestrians_3d_pts ) > 0 :
111- # Draw projected 2D LiDAR points on the image.
112- for pt in flattened_pedestrians_2d_pts :
113- cv2 .circle (cv_image , pt , 2 , (0 , 0 , 255 ), - 1 )
114-
115- # Create point cloud from extracted 3D points
116- ros_extracted_pedestrian_pc2 = create_point_cloud (flattened_pedestrians_3d_pts )
117- self .pub_pedestrians_pc2 .publish (ros_extracted_pedestrian_pc2 )
118-
119- # Used for visualization
120- if (self .visualization ):
121- ros_img = self .bridge .cv2_to_imgmsg (cv_image , 'bgr8' )
122- self .pub_image .publish (ros_img )
249+ pts = np .array (projected_pts )
250+ extracted_pts = pts [(pts [:, 0 ] > left_bound ) &
251+ (pts [:, 0 ] < right_bound ) &
252+ (pts [:, 1 ] > top_bound ) &
253+ (pts [:, 1 ] < bottom_bound )
254+ ]
123255
256+ # Apply ground and max distance filter to the extracted 5D points
257+ extracted_pts = filter_ground_points (extracted_pts , self .ground_threshold )
258+ extracted_pts = filter_far_points (extracted_pts )
259+ extracted_pts_all .append (extracted_pts )
260+
261+ self .update_object_states (track_result , extracted_pts_all )
262+ self .viz_object_states (cv_image , boxes , extracted_pts_all )
124263
125264
126265if __name__ == '__main__' :
127266 rospy .init_node ('fusion_node' , anonymous = True )
128267 Fusion3D ()
129268 while not rospy .core .is_shutdown ():
130269 rospy .rostime .wallsleep (0.5 )
131-
0 commit comments