From faa03966850220fec24d1024a9f7d22bbbea7155 Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Tue, 11 Feb 2025 21:21:16 -0600 Subject: [PATCH 01/46] test fusion --- GEMstack/onboard/perception/fusion.py | 34 ++++++++++++++++++++++++ GEMstack/onboard/perception/transform.py | 20 ++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 GEMstack/onboard/perception/fusion.py create mode 100644 GEMstack/onboard/perception/transform.py diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py new file mode 100644 index 000000000..0b6e4f205 --- /dev/null +++ b/GEMstack/onboard/perception/fusion.py @@ -0,0 +1,34 @@ +from cv_bridge import CvBridge +from sensor_msgs.msg import Image, PointCloud2 +import rospy +import message_filters + + +class Fusion3D(): + def __init__(self): + self.bridge = CvBridge() + self.stereo_rosbag = message_filters.Subscriber('/oak/stereo/image_raw', Image, self.stereo_callback, queue_size=1) + self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2, self.top_lidar_callback, queue_size=1) + + ts = message_filters.ApproximateTimeSynchronizer([self.stereo_rosbag, self.top_lidar_rosbag], queue_size=10, slop=0.1) + ts.registerCallback(self.callback) + rospy.spin() + + def callback(left_img, right_img, lidar_msg, camera_info): + # Process synchronized data here + pass + + def stereo_callback(self, image: Image): + image = self.bridge.imgmsg_to_cv2(image, "16UC1") + print(f"stereo: {image.shape}") + + def top_lidar_callback(self, point_cloud: PointCloud2): + # point_cloud = self.bridge.imgmsg_to_cv2(point_cloud, "16UC1") + print(f"point_cloud: {point_cloud.fields}") + +if __name__ == '__main__': + rospy.init_node('fusion_node', anonymous=True) + Fusion3D() + while not rospy.core.is_shutdown(): + rospy.rostime.wallsleep(0.5) + diff --git a/GEMstack/onboard/perception/transform.py b/GEMstack/onboard/perception/transform.py new file mode 100644 index 000000000..8d47598fa --- /dev/null +++ b/GEMstack/onboard/perception/transform.py @@ -0,0 +1,20 @@ +import rospy +import tf + +def publish_tf(): + rospy.init_node('pointcloud_tf_broadcaster') + br = tf.TransformBroadcaster() + rate = rospy.Rate(10) # 10 Hz + + while not rospy.is_shutdown(): + br.sendTransform( + (0, 0, 1), # (x, y, z) translation + tf.transformations.quaternion_from_euler(0, 0, 0), # (roll, pitch, yaw) + rospy.Time.now(), + "oak_rgb_camera_optical_frame", # Child frame (sensor) + "map" # Parent frame (world) + ) + rate.sleep() + +if __name__ == "__main__": + publish_tf() \ No newline at end of file From 97b6dc688caf21150bb1deb43080c58ee972864e Mon Sep 17 00:00:00 2001 From: nvikramraj Date: Thu, 13 Feb 2025 16:17:31 -0600 Subject: [PATCH 02/46] Lidar to Base frame transformation --- .../perception/pedestrian_detection.py | 83 +++++++++++++++++-- requirements.txt | 1 + 2 files changed, 75 insertions(+), 9 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index f91145274..aeda0b15b 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -5,9 +5,13 @@ import cv2 from typing import Dict from cv_bridge import CvBridge -from sensor_msgs.msg import Image +from sensor_msgs.msg import Image, PointCloud2, PointField +from tf2_sensor_msgs.tf2_sensor_msgs import do_transform_cloud +from geometry_msgs.msg import TransformStamped import rospy import os +import numpy as np +import tf2_ros def box_to_fake_agent(box): @@ -56,8 +60,8 @@ def initialize(self): """Initializes subscribers and publishers self.vehicle_interface.subscribe_sensor -> GEM Car camera subscription - self.rosbag_test -> ROS Bag topic subscription - self.pub_image -> Publishes Image with detection for visualization + self.rosbag_cam_sub -> ROS Bag topic subscription + self.rosbag_cam_pub -> Publishes Image with detection for visualization """ #tell the vehicle to use image_callback whenever 'front_camera' gets a reading, and it expects images of type cv2.Mat @@ -68,12 +72,74 @@ def initialize(self): # Webcam # rospy.Subscriber('/webcam', Image, self.image_callback) + # Conversion tools + self.cv_bridge = CvBridge() + self.tf_buffer = tf2_ros.Buffer() + self.tf_listener = tf2_ros.TransformListener(self.tf_buffer) + # Testing with rosbag - self.rosbag_test = rospy.Subscriber('/oak/rgb/image_raw',Image, self.image_callback,queue_size=1) if(self.visualization): - self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) + self.rosbag_cam_pub = rospy.Publisher("/camera/image_detection", Image, queue_size=1) + + # Lidar Tranform topic publisher + self.rosbag_lidar_livox_pub = rospy.Publisher("/livox/transformed_lidar", PointCloud2, queue_size=1) + # self.rosbag_lidar_ouster_pub = rospy.Publisher("/ouster/transformed_lidar", PointCloud2, queue_size=1) + + self.rosbag_cam_sub = rospy.Subscriber('/oak/rgb/image_raw',Image, self.image_callback,queue_size=1) + self.rosbag_lidar_livox_sub = rospy.Subscriber('/livox/lidar',PointCloud2, self.lidar_livox_callback,queue_size=1) + # self.rosbag_lidar_ouster_sub = rospy.Subscriber('/ouster/points',PointCloud2, self.lidar_ouster_callback,queue_size=1) + pass + + def publish_base_link(self): + + br = tf2_ros.TransformBroadcaster() + transform = TransformStamped() + + transform.header.stamp = rospy.Time.now() + transform.header.frame_id = "map" # Parent frame + transform.child_frame_id = "base_link" # Child frame + + # Set translation (change as per actual vehicle position) + transform.transform.translation.x = 0.0 + transform.transform.translation.y = 0.0 + transform.transform.translation.z = 0.0 + + # Set rotation (quaternion) + transform.transform.rotation.x = 0.0 + transform.transform.rotation.y = 0.0 + transform.transform.rotation.z = 0.0 + transform.transform.rotation.w = 1.0 # No rotation + br.sendTransform(transform) + + def lidar_livox_callback(self,pointcloud : PointCloud2): + + transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0)) + + # Transform the point cloud + transformed_cloud = do_transform_cloud(pointcloud, transform) + + # Publish transformed point cloud + self.rosbag_lidar_livox_pub.publish(transformed_cloud) + + if(self.visualization): + self.publish_base_link() + + # def lidar_ouster_callback(self,pointcloud : PointCloud2): + + # transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0)) + + # # Transform the point cloud + # transformed_cloud = do_transform_cloud(pointcloud, transform) + + # # Publish transformed point cloud + # self.rosbag_lidar_livox_pub.publish(transformed_cloud) + + # if(self.visualization): + # self.publish_base_link() + + # Use cv2.Mat for GEM Car, Image for RosBag def image_callback(self, image : Image): #image : cv2.Mat): @@ -92,8 +158,7 @@ def image_callback(self, image : Image): #image : cv2.Mat): # track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence) # Convert to CV2 format for RosBag - bridge = CvBridge() - image = bridge.imgmsg_to_cv2(image, "bgr8") + image = self.cv_bridge.imgmsg_to_cv2(image, "bgr8") track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence) self.last_person_boxes = [] @@ -155,8 +220,8 @@ def image_callback(self, image : Image): #image : cv2.Mat): # Used for visualization if(self.visualization): - ros_img = bridge.cv2_to_imgmsg(image, 'bgr8') - self.pub_image.publish(ros_img) + ros_img = self.cv_bridge.cv2_to_imgmsg(image, 'bgr8') + self.rosbag_cam_pub.publish(ros_img) #uncomment if you want to debug the detector... # print(self.last_person_boxes) diff --git a/requirements.txt b/requirements.txt index aba104c78..1679dba33 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ dacite # Perception ultralytics +lap==0.5.12 \ No newline at end of file From b087f8d9b403aecd1ea24dc4cd1a501924e13873 Mon Sep 17 00:00:00 2001 From: nvikramraj Date: Thu, 13 Feb 2025 16:27:39 -0600 Subject: [PATCH 03/46] added few comments --- .../perception/pedestrian_detection.py | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index aeda0b15b..bf3134a68 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -83,16 +83,17 @@ def initialize(self): # Lidar Tranform topic publisher self.rosbag_lidar_livox_pub = rospy.Publisher("/livox/transformed_lidar", PointCloud2, queue_size=1) - # self.rosbag_lidar_ouster_pub = rospy.Publisher("/ouster/transformed_lidar", PointCloud2, queue_size=1) + self.rosbag_lidar_ouster_pub = rospy.Publisher("/ouster/transformed_lidar", PointCloud2, queue_size=1) self.rosbag_cam_sub = rospy.Subscriber('/oak/rgb/image_raw',Image, self.image_callback,queue_size=1) self.rosbag_lidar_livox_sub = rospy.Subscriber('/livox/lidar',PointCloud2, self.lidar_livox_callback,queue_size=1) - # self.rosbag_lidar_ouster_sub = rospy.Subscriber('/ouster/points',PointCloud2, self.lidar_ouster_callback,queue_size=1) + self.rosbag_lidar_ouster_sub = rospy.Subscriber('/ouster/points',PointCloud2, self.lidar_ouster_callback,queue_size=1) pass - def publish_base_link(self): + # Publishes baselink to map transformation for visualization + def publish_base_link_to_map(self): br = tf2_ros.TransformBroadcaster() transform = TransformStamped() @@ -113,6 +114,8 @@ def publish_base_link(self): transform.transform.rotation.w = 1.0 # No rotation br.sendTransform(transform) + + # Livox Lidar data def lidar_livox_callback(self,pointcloud : PointCloud2): transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0)) @@ -124,20 +127,21 @@ def lidar_livox_callback(self,pointcloud : PointCloud2): self.rosbag_lidar_livox_pub.publish(transformed_cloud) if(self.visualization): - self.publish_base_link() + self.publish_base_link_to_map() - # def lidar_ouster_callback(self,pointcloud : PointCloud2): + # Ouster Lidar data + def lidar_ouster_callback(self,pointcloud : PointCloud2): - # transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0)) + transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0)) - # # Transform the point cloud - # transformed_cloud = do_transform_cloud(pointcloud, transform) + # Transform the point cloud + transformed_cloud = do_transform_cloud(pointcloud, transform) - # # Publish transformed point cloud - # self.rosbag_lidar_livox_pub.publish(transformed_cloud) + # Publish transformed point cloud + self.rosbag_lidar_livox_pub.publish(transformed_cloud) - # if(self.visualization): - # self.publish_base_link() + if(self.visualization): + self.publish_base_link_to_map() # Use cv2.Mat for GEM Car, Image for RosBag From 475b8c91ae9bd40b93c7aaded7d0d7a623464b95 Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Fri, 14 Feb 2025 20:02:30 -0600 Subject: [PATCH 04/46] align lidar pc2 to camera frame --- .../calibration/camera_intrinsics.json | 7 + .../perception/calibration/extrinsics/R.npy | Bin 0 -> 200 bytes .../calibration/extrinsics/rvec.npy | Bin 0 -> 152 bytes .../perception/calibration/extrinsics/t.npy | Bin 0 -> 152 bytes GEMstack/onboard/perception/fusion.py | 80 +++++++-- GEMstack/onboard/perception/fusion_utils.py | 159 ++++++++++++++++++ GEMstack/onboard/perception/transform.py | 2 +- 7 files changed, 232 insertions(+), 16 deletions(-) create mode 100644 GEMstack/onboard/perception/calibration/camera_intrinsics.json create mode 100644 GEMstack/onboard/perception/calibration/extrinsics/R.npy create mode 100644 GEMstack/onboard/perception/calibration/extrinsics/rvec.npy create mode 100644 GEMstack/onboard/perception/calibration/extrinsics/t.npy create mode 100644 GEMstack/onboard/perception/fusion_utils.py diff --git a/GEMstack/onboard/perception/calibration/camera_intrinsics.json b/GEMstack/onboard/perception/calibration/camera_intrinsics.json new file mode 100644 index 000000000..74318680c --- /dev/null +++ b/GEMstack/onboard/perception/calibration/camera_intrinsics.json @@ -0,0 +1,7 @@ +{ + "fx": 684.8333129882812, + "fy": 684.6096801757812, + "cx": 573.37109375, + "cy": 363.700927734375 + } + \ No newline at end of file diff --git a/GEMstack/onboard/perception/calibration/extrinsics/R.npy b/GEMstack/onboard/perception/calibration/extrinsics/R.npy new file mode 100644 index 0000000000000000000000000000000000000000..7d44aa76dc77d0199fe86055c0f3d0e2e068d1e1 GIT binary patch literal 200 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= zXCxM+0{I%oIts>`ItsN4WCJb%fwxMDr{~)LE9si%@#p>ihKYjy;`i(KAE>>{UAU@z vzr*Jf@d8Hm_6&!%YcKf!et)igplIr!_x1(a2fkgnHrJlvT6b{B{<8f5RmVXB literal 0 HcmV?d00001 diff --git a/GEMstack/onboard/perception/calibration/extrinsics/rvec.npy b/GEMstack/onboard/perception/calibration/extrinsics/rvec.npy new file mode 100644 index 0000000000000000000000000000000000000000..62d1809f1f7ea2665a24df654d48ec92a21310e7 GIT binary patch literal 152 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= xXCxM+0{I%oItqrGItsN4WCN}*xxZd?bbYozBkxmtO5yW<;qS-&F5LTM4*;iRDntMP literal 0 HcmV?d00001 diff --git a/GEMstack/onboard/perception/calibration/extrinsics/t.npy b/GEMstack/onboard/perception/calibration/extrinsics/t.npy new file mode 100644 index 0000000000000000000000000000000000000000..b133fa55fd0c5362ac4d93bac53efeb471dfee3f GIT binary patch literal 152 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= xXCxM+0{I%oItqrGItsN4WCJeCQ?j?%^Ox^`s-3-k&cPe|OJ3%@a}Rp99{`L8DPRBq literal 0 HcmV?d00001 diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index 0b6e4f205..d8cd1a41a 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -1,30 +1,80 @@ from cv_bridge import CvBridge from sensor_msgs.msg import Image, PointCloud2 +from ultralytics import YOLO +from fusion_utils import * import rospy import message_filters - +import os +import tf class Fusion3D(): def __init__(self): + # Setup variables self.bridge = CvBridge() - self.stereo_rosbag = message_filters.Subscriber('/oak/stereo/image_raw', Image, self.stereo_callback, queue_size=1) - self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2, self.top_lidar_callback, queue_size=1) + self.detector = YOLO(os.getcwd()+'/GEMstack/knowledge/detection/yolov8n.pt') + self.last_person_boxes = [] + self.pedestrians = {} + self.visualization = True + self.confidence = 0.7 + self.classes_to_detect = 0 + + # Load calibration data + self.R = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/R.npy') + self.t = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/t.npy') + self.K = load_intrinsics(os.getcwd()+ '/GEMstack/onboard/perception/calibration/camera_intrinsics.json') + + # Subscribers and sychronizers + self.rgb_rosbag = message_filters.Subscriber('/oak/rgb/image_raw', Image) + self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2) + self.sync = message_filters.ApproximateTimeSynchronizer([self.rgb_rosbag, self.top_lidar_rosbag], queue_size=10, slop=0.1) + self.sync.registerCallback(self.fusion_callback) + + # TF listener to get transformation from LiDAR to Camera + self.tf_listener = tf.TransformListener() + + # Publishers + if(self.visualization): + self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) + + + + def fusion_callback(self, image: Image, lidar_pc2_msg: PointCloud2): + image = self.bridge.imgmsg_to_cv2(image, "bgr8") + track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence) + + self.last_person_boxes = [] + boxes = track_result[0].boxes + + # Unpacking box dimentions detected into x,y,w,h + for box in boxes: + xywh = box.xywh[0].tolist() + self.last_person_boxes.append(xywh) + + # Used for visualization + if(self.visualization): + image = vis_2d_bbox(image, xywh, box) - ts = message_filters.ApproximateTimeSynchronizer([self.stereo_rosbag, self.top_lidar_rosbag], queue_size=10, slop=0.1) - ts.registerCallback(self.callback) - rospy.spin() + # Convert 1D PointCloud2 data to x, y, z coords + lidar_points = convert_pointcloud2_to_xyz(lidar_pc2_msg) + + # Transform LiDAR points into the camera coordinate frame. + lidar_in_camera = transform_lidar_points(lidar_points, self.R, self.t) + + # Project the transformed points into the image plane. + projected_pts = project_points(lidar_in_camera, self.K) + + # Draw projected LiDAR points on the image. + for pt in projected_pts: + cv2.circle(image, pt, 2, (0, 0, 255), -1) - def callback(left_img, right_img, lidar_msg, camera_info): - # Process synchronized data here - pass + # visualize_point_cloud(p_img_cloud) + + # Used for visualization + if(self.visualization): + ros_img = self.bridge.cv2_to_imgmsg(image, 'bgr8') + self.pub_image.publish(ros_img) - def stereo_callback(self, image: Image): - image = self.bridge.imgmsg_to_cv2(image, "16UC1") - print(f"stereo: {image.shape}") - def top_lidar_callback(self, point_cloud: PointCloud2): - # point_cloud = self.bridge.imgmsg_to_cv2(point_cloud, "16UC1") - print(f"point_cloud: {point_cloud.fields}") if __name__ == '__main__': rospy.init_node('fusion_node', anonymous=True) diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/fusion_utils.py new file mode 100644 index 000000000..ad2c13c19 --- /dev/null +++ b/GEMstack/onboard/perception/fusion_utils.py @@ -0,0 +1,159 @@ +from sensor_msgs.msg import PointCloud2 +import numpy as np +import sensor_msgs.point_cloud2 as pc2 +import open3d as o3d +import cv2 +import json + + +def convert_pointcloud2_to_xyz(lidar_pc2_msg: PointCloud2): + """ Convert 1D PointCloud2 data to x, y, z coords """ + lidar_points = [] + for point in pc2.read_points(lidar_pc2_msg, field_names=("x", "y", "z", "intensity"), skip_nans=True): + lidar_points.append([point[0], point[1], point[2]]) + return np.array(lidar_points) + + +# Credits: The following lines of codes (17 to 159 excluding lines 80 to 115) are adapted from the Calibration Team B +def load_extrinsics(extrinsics_file): + """ + Load calibrated extrinsics from a .npz file. + Assumes the file contains keys 'R' and 't'. + """ + data = np.load(extrinsics_file) + return data + + +def load_intrinsics(intrinsics_file): + """ + Load camera intrinsics from a JSON file. + Expects keys: 'fx', 'fy', 'cx', and 'cy'. + """ + with open(intrinsics_file, 'r') as f: + intrinsics = json.load(f) + fx = intrinsics["fx"] + fy = intrinsics["fy"] + cx = intrinsics["cx"] + cy = intrinsics["cy"] + K = np.array([[fx, 0, cx], + [0, fy, cy], + [0, 0, 1]], dtype=np.float32) + return K + + +def load_lidar_scan(lidar_file): + """ + Load a LiDAR scan from a file. + This function handles both npy (returns a NumPy array) and npz files. + """ + data = np.load(lidar_file, allow_pickle=True) + if isinstance(data, np.ndarray): + return data.astype(np.float32) + else: + key = list(data.keys())[0] + return data[key].astype(np.float32) + + +def transform_lidar_points(lidar_points, R, t): + """ + Transform LiDAR points from the LiDAR frame into the camera frame. + p_cam = R * p_lidar + t. + """ + P_cam = (R @ lidar_points.T + t.reshape(3,1)).T + return P_cam + + +def project_points(points_3d, K): + """ + Project 3D points (in the camera frame) into 2D image coordinates using the camera matrix K. + Only projects points with z > 0. + """ + proj_points = [] + for pt in points_3d: + if pt[2] > 0: # only project points in front of the camera + u = K[0, 0] * (pt[0] / pt[2]) + K[0, 2] + v = K[1, 1] * (pt[1] / pt[2]) + K[1, 2] + proj_points.append((int(u), int(v))) + return proj_points + + +def vis_2d_bbox(image, xywh, box): + # Setup + label_text = "Pedestrian " + font = cv2.FONT_HERSHEY_SIMPLEX + font_scale = 0.5 + font_color = (255, 255, 255) + line_type = 1 + text_thickness = 2 + + x, y, w, h = xywh + + if box.id is not None: + id = box.id.item() + else: + id = 0 + + # Draw bounding box + cv2.rectangle(image, (int(x - w / 2), int(y - h / 2)), (int(x + w / 2), int(y + h / 2)), (255, 0, 255), 3) + + # Define text label + x = int(x - w / 2) + y = int(y - h / 2) + label = label_text + str(id) + " : " + str(round(box.conf.item(), 2)) + + # Get text size + text_size, baseline = cv2.getTextSize(label, font, font_scale, line_type) + text_w, text_h = text_size + + # Position text above the bounding box + text_x = x + text_y = y - 10 if y - 10 > 10 else y + h + text_h + + # Draw main text on top of the outline + cv2.putText(image, label, (text_x, text_y - baseline), font, font_scale, font_color, text_thickness) + + return image + + +def visualize_point_cloud(points): + """ + Visualizes the given point cloud using Open3D. + + Args: + points (np.ndarray): Nx3 array of point cloud coordinates. + """ + pc = o3d.geometry.PointCloud() + pc.points = o3d.utility.Vector3dVector(points) + pc.paint_uniform_color([0.1, 0.7, 0.9]) # Light blue color + o3d.visualization.draw_geometries([pc]) + + +def visualize_plane(inlier_cloud, outlier_cloud, bounding_box_2d_points): + """ + Visualizes the detected plane with its 2D bounding box. + + :param inlier_cloud: Open3D point cloud containing plane points. + :param outlier_cloud: Open3D point cloud containing non-plane points. + :param bounding_box_2d_points: 4 corner points of the 2D bounding box on the plane. + """ + inlier_cloud.paint_uniform_color([1, 0, 0]) # Red for the plane + outlier_cloud.paint_uniform_color([0.5, 0.5, 0.5]) # Gray for other points + + # Create bounding box visualization + bounding_box_pcd = o3d.geometry.PointCloud() + bounding_box_pcd.points = o3d.utility.Vector3dVector(bounding_box_2d_points) + bounding_box_pcd.paint_uniform_color([0, 1, 0]) # Green for bounding box corners + + # Create a bounding box line set (connect corners) + lines = [ + [0, 1], [1, 2], [2, 3], [3, 0] # Edges of the rectangle + ] + + bounding_box_lines = o3d.geometry.LineSet() + bounding_box_lines.points = o3d.utility.Vector3dVector(bounding_box_2d_points) + bounding_box_lines.lines = o3d.utility.Vector2iVector(lines) + + bounding_box_lines.paint_uniform_color([0, 1, 0]) # Green for bounding box edges + + # Visualize + o3d.visualization.draw_geometries([inlier_cloud, outlier_cloud, bounding_box_pcd, bounding_box_lines]) \ No newline at end of file diff --git a/GEMstack/onboard/perception/transform.py b/GEMstack/onboard/perception/transform.py index 8d47598fa..4487e051c 100644 --- a/GEMstack/onboard/perception/transform.py +++ b/GEMstack/onboard/perception/transform.py @@ -11,7 +11,7 @@ def publish_tf(): (0, 0, 1), # (x, y, z) translation tf.transformations.quaternion_from_euler(0, 0, 0), # (roll, pitch, yaw) rospy.Time.now(), - "oak_rgb_camera_optical_frame", # Child frame (sensor) + "os_sensor", # Child frame (sensor) "map" # Parent frame (world) ) rate.sleep() From 1355d5faa0002cbe44164bc12c62ed5701409808 Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Fri, 14 Feb 2025 21:06:27 -0600 Subject: [PATCH 05/46] extract clouds in 2d --- GEMstack/onboard/perception/fusion.py | 41 +++++++++++++++++++-------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index d8cd1a41a..d6bca214a 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -42,32 +42,49 @@ def fusion_callback(self, image: Image, lidar_pc2_msg: PointCloud2): image = self.bridge.imgmsg_to_cv2(image, "bgr8") track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence) + # Convert 1D PointCloud2 data to x, y, z coords + lidar_points = convert_pointcloud2_to_xyz(lidar_pc2_msg) + + # Transform LiDAR points into the camera coordinate frame. + lidar_in_camera = transform_lidar_points(lidar_points, self.R, self.t) + + # Project the transformed points into the image plane. + projected_pts = project_points(lidar_in_camera, self.K) + + # Process bboxes self.last_person_boxes = [] boxes = track_result[0].boxes # Unpacking box dimentions detected into x,y,w,h + all_extracted_pts = [] for box in boxes: xywh = box.xywh[0].tolist() self.last_person_boxes.append(xywh) + # Extracting projected pts + x, y, w, h = xywh + left_bound = int(x - w / 2) + right_bound = int(x + w / 2) + top_bound = int(y - h / 2) + bottom_bound = int(y + h / 2) + + if len(projected_pts) > 0: + pts = np.array(projected_pts) + extracted_pts = pts[(pts[:, 0] > left_bound) & + (pts[:, 0] < right_bound) & + (pts[:, 1] > top_bound) & + (pts[:, 1] < bottom_bound) + ] + + all_extracted_pts = all_extracted_pts + list(extracted_pts) + # Used for visualization if(self.visualization): image = vis_2d_bbox(image, xywh, box) - - # Convert 1D PointCloud2 data to x, y, z coords - lidar_points = convert_pointcloud2_to_xyz(lidar_pc2_msg) - - # Transform LiDAR points into the camera coordinate frame. - lidar_in_camera = transform_lidar_points(lidar_points, self.R, self.t) - - # Project the transformed points into the image plane. - projected_pts = project_points(lidar_in_camera, self.K) # Draw projected LiDAR points on the image. - for pt in projected_pts: + for pt in all_extracted_pts: cv2.circle(image, pt, 2, (0, 0, 255), -1) - - # visualize_point_cloud(p_img_cloud) # Used for visualization if(self.visualization): From 8316a20ba289a5a349ac35558da5f10bf4addde6 Mon Sep 17 00:00:00 2001 From: nvikramraj Date: Fri, 14 Feb 2025 21:48:36 -0600 Subject: [PATCH 06/46] converted lidar to cam frame --- GEMstack/knowledge/calibration/extrinsics.npz | Bin 0 -> 850 bytes GEMstack/knowledge/calibration/intrinsic.json | 7 ++ .../perception/pedestrian_detection.py | 101 ++++++++++++++---- 3 files changed, 90 insertions(+), 18 deletions(-) create mode 100644 GEMstack/knowledge/calibration/extrinsics.npz create mode 100644 GEMstack/knowledge/calibration/intrinsic.json diff --git a/GEMstack/knowledge/calibration/extrinsics.npz b/GEMstack/knowledge/calibration/extrinsics.npz new file mode 100644 index 0000000000000000000000000000000000000000..663fe14bd0da3d734e4dbacac1d377c6c845ca42 GIT binary patch literal 850 zcmWIWW@Zs#fB;2?2#tS}PXIX}%*r6b5Tut^P|3(302Tl#0!e_tWWP|~fJjD$GKOmP zl+@znB6TYTb(=H`bsYuuwEUuyqQt!T{Gyapkhoi7PH`$wyf`DVAQi~hFxF8p*3?m` zRUjL12?)GZN<2N+{$EMgG><>;_cu%w^cTNhzyCn(UGBnF<@+5zpNJPQs<&r2v|W3_ z|M&ZI?E^(q|Gc*^&_3|(!nL{f4A;7YL-v>L4?yvLl()3c4505o807sDnD;>fAONE& z^}Zp6-nTp@dy74P`TnQc+1uwFys^LJWzIYIpl4{_S=_#LHOxC4K<^Zlr6vznAAQOF z^`fKev;7%)pW0IjpZ5!YKkj$o-Y5G2Z$>5&W?U&05-T9Gfe}Q4(<{0b)K~>6gMbFc n|3C&fj?uNCh8#=-$d+?Jdtl)h;LXYgl4AzKIY4>=6Nm=@B*( 0: # only project points in front of the camera + u = K[0, 0] * (p[0] / p[2]) + K[0, 2] + v = K[1, 1] * (p[1] / p[2]) + K[1, 2] + cv2.circle(self.rosbag_image, (int(u),int(v)), 2, (0, 0, 255), -1) + + ros_img = self.cv_bridge.cv2_to_imgmsg(self.rosbag_image, 'bgr8') + self.rosbag_cam_lidar_pub.publish(ros_img) + + + + # if(self.visualization): + # transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0)) + # # Transform the point cloud + # transformed_cloud = do_transform_cloud(pointcloud, transform) + # # Publish transformed point cloud + # self.rosbag_lidar_ouster_pub.publish(transformed_cloud) + # self.publish_base_link_to_map() + + + def load_extrinsics(self,extrinsics_file): + """ + Load calibrated extrinsics from a .npz file. + Assumes the file contains keys 'R' and 't'. + """ + data = np.load(extrinsics_file) + R = data['R'] + t = data['t'] + return R, t + + def load_intrinsics(self,intrinsics_file): + """ + Load camera intrinsics from a JSON file. + Expects keys: 'fx', 'fy', 'cx', and 'cy'. + """ + with open(intrinsics_file, 'r') as f: + intrinsics = json.load(f) + fx = intrinsics["fx"] + fy = intrinsics["fy"] + cx = intrinsics["cx"] + cy = intrinsics["cy"] + K = np.array([[fx, 0, cx], + [0, fy, cy], + [0, 0, 1]], dtype=np.float32) + return K # Use cv2.Mat for GEM Car, Image for RosBag @@ -220,6 +284,7 @@ def image_callback(self, image : Image): #image : cv2.Mat): # Draw main text on top of the outline cv2.putText(image, label, (text_x, text_y - baseline), font, font_scale, font_color, text_thickness) + self.rosbag_image = image # Used for visualization From dc69933c8197f84056c46611ff34517df03bf92a Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Fri, 14 Feb 2025 22:03:38 -0600 Subject: [PATCH 07/46] add od3 dialect --- GEMstack/onboard/perception/fusion.py | 13 +++++++++++++ GEMstack/onboard/perception/fusion_utils.py | 5 +---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index d6bca214a..159fbc683 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -50,6 +50,19 @@ def fusion_callback(self, image: Image, lidar_pc2_msg: PointCloud2): # Project the transformed points into the image plane. projected_pts = project_points(lidar_in_camera, self.K) + + # Convert numpy array to Open3D point cloud + transformed_points = projected_pts + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(transformed_points) + + # Apply voxel grid downsampling + voxel_size = 0.1 # Adjust for desired resolution + downsampled_pcd = pcd.voxel_down_sample(voxel_size=voxel_size) + + # Convert back to numpy array + transformed_points = np.asarray(downsampled_pcd.points) + print(f"after :{transformed_points.shape}") # Process bboxes self.last_person_boxes = [] diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/fusion_utils.py index ad2c13c19..e0408bb4c 100644 --- a/GEMstack/onboard/perception/fusion_utils.py +++ b/GEMstack/onboard/perception/fusion_utils.py @@ -8,10 +8,7 @@ def convert_pointcloud2_to_xyz(lidar_pc2_msg: PointCloud2): """ Convert 1D PointCloud2 data to x, y, z coords """ - lidar_points = [] - for point in pc2.read_points(lidar_pc2_msg, field_names=("x", "y", "z", "intensity"), skip_nans=True): - lidar_points.append([point[0], point[1], point[2]]) - return np.array(lidar_points) + return np.array(list(pc2.read_points(lidar_pc2_msg, skip_nans=True)), dtype=np.float32)[:, :3] # Credits: The following lines of codes (17 to 159 excluding lines 80 to 115) are adapted from the Calibration Team B From 28726180e25cf7cfcf271000179619098109d447 Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Fri, 14 Feb 2025 22:07:08 -0600 Subject: [PATCH 08/46] add o3d import --- GEMstack/onboard/perception/fusion.py | 1 + 1 file changed, 1 insertion(+) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index 159fbc683..ed6cd760f 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -2,6 +2,7 @@ from sensor_msgs.msg import Image, PointCloud2 from ultralytics import YOLO from fusion_utils import * +import open3d as o3d import rospy import message_filters import os From bf699815b7c39c3c86925ba1016b976de4fdae1b Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Fri, 14 Feb 2025 22:17:40 -0600 Subject: [PATCH 09/46] downsample pc2 --- GEMstack/onboard/perception/fusion.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index ed6cd760f..5299e3457 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -45,17 +45,10 @@ def fusion_callback(self, image: Image, lidar_pc2_msg: PointCloud2): # Convert 1D PointCloud2 data to x, y, z coords lidar_points = convert_pointcloud2_to_xyz(lidar_pc2_msg) - - # Transform LiDAR points into the camera coordinate frame. - lidar_in_camera = transform_lidar_points(lidar_points, self.R, self.t) - - # Project the transformed points into the image plane. - projected_pts = project_points(lidar_in_camera, self.K) # Convert numpy array to Open3D point cloud - transformed_points = projected_pts pcd = o3d.geometry.PointCloud() - pcd.points = o3d.utility.Vector3dVector(transformed_points) + pcd.points = o3d.utility.Vector3dVector(lidar_points) # Apply voxel grid downsampling voxel_size = 0.1 # Adjust for desired resolution @@ -63,7 +56,13 @@ def fusion_callback(self, image: Image, lidar_pc2_msg: PointCloud2): # Convert back to numpy array transformed_points = np.asarray(downsampled_pcd.points) - print(f"after :{transformed_points.shape}") + + # Transform LiDAR points into the camera coordinate frame. + lidar_in_camera = transform_lidar_points(transformed_points, self.R, self.t) + + # Project the transformed points into the image plane. + projected_pts = project_points(lidar_in_camera, self.K) + # Process bboxes self.last_person_boxes = [] From ced4804737f8c90264c50bb7e5b88e8029b4edc0 Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Fri, 14 Feb 2025 22:33:34 -0600 Subject: [PATCH 10/46] rename variables --- GEMstack/onboard/perception/fusion.py | 32 +++++++-------------- GEMstack/onboard/perception/fusion_utils.py | 15 ++++++++++ 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index 5299e3457..d98cbac6a 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -2,12 +2,12 @@ from sensor_msgs.msg import Image, PointCloud2 from ultralytics import YOLO from fusion_utils import * -import open3d as o3d import rospy import message_filters import os import tf + class Fusion3D(): def __init__(self): # Setup variables @@ -22,7 +22,7 @@ def __init__(self): # Load calibration data self.R = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/R.npy') self.t = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/t.npy') - self.K = load_intrinsics(os.getcwd()+ '/GEMstack/onboard/perception/calibration/camera_intrinsics.json') + self.K = load_intrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/camera_intrinsics.json') # Subscribers and sychronizers self.rgb_rosbag = message_filters.Subscriber('/oak/rgb/image_raw', Image) @@ -38,32 +38,22 @@ def __init__(self): self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) - - def fusion_callback(self, image: Image, lidar_pc2_msg: PointCloud2): - image = self.bridge.imgmsg_to_cv2(image, "bgr8") - track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence) + def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): + cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") + track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence) # Convert 1D PointCloud2 data to x, y, z coords lidar_points = convert_pointcloud2_to_xyz(lidar_pc2_msg) - # Convert numpy array to Open3D point cloud - pcd = o3d.geometry.PointCloud() - pcd.points = o3d.utility.Vector3dVector(lidar_points) - - # Apply voxel grid downsampling - voxel_size = 0.1 # Adjust for desired resolution - downsampled_pcd = pcd.voxel_down_sample(voxel_size=voxel_size) - - # Convert back to numpy array - transformed_points = np.asarray(downsampled_pcd.points) + # Downsample xyz point clouds + downsampled_points = downsample_points(lidar_points) # Transform LiDAR points into the camera coordinate frame. - lidar_in_camera = transform_lidar_points(transformed_points, self.R, self.t) + lidar_in_camera = transform_lidar_points(downsampled_points, self.R, self.t) # Project the transformed points into the image plane. projected_pts = project_points(lidar_in_camera, self.K) - # Process bboxes self.last_person_boxes = [] boxes = track_result[0].boxes @@ -93,15 +83,15 @@ def fusion_callback(self, image: Image, lidar_pc2_msg: PointCloud2): # Used for visualization if(self.visualization): - image = vis_2d_bbox(image, xywh, box) + cv_image = vis_2d_bbox(cv_image, xywh, box) # Draw projected LiDAR points on the image. for pt in all_extracted_pts: - cv2.circle(image, pt, 2, (0, 0, 255), -1) + cv2.circle(cv_image, pt, 2, (0, 0, 255), -1) # Used for visualization if(self.visualization): - ros_img = self.bridge.cv2_to_imgmsg(image, 'bgr8') + ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8') self.pub_image.publish(ros_img) diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/fusion_utils.py index e0408bb4c..24b61366c 100644 --- a/GEMstack/onboard/perception/fusion_utils.py +++ b/GEMstack/onboard/perception/fusion_utils.py @@ -11,6 +11,21 @@ def convert_pointcloud2_to_xyz(lidar_pc2_msg: PointCloud2): return np.array(list(pc2.read_points(lidar_pc2_msg, skip_nans=True)), dtype=np.float32)[:, :3] +def downsample_points(lidar_points): + """ Downsample point clouds """ + # Convert numpy array to Open3D point cloud + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(lidar_points) + + # Apply voxel grid downsampling + voxel_size = 0.1 # Adjust for desired resolution + downsampled_pcd = pcd.voxel_down_sample(voxel_size=voxel_size) + + # Convert back to numpy array + transformed_points = np.asarray(downsampled_pcd.points) + return transformed_points + + # Credits: The following lines of codes (17 to 159 excluding lines 80 to 115) are adapted from the Calibration Team B def load_extrinsics(extrinsics_file): """ From d319eaa3b09063c977299bc6eb6e3258fc455d83 Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Sat, 15 Feb 2025 00:46:45 -0600 Subject: [PATCH 11/46] extract 3D pc2 and visualize --- GEMstack/onboard/perception/fusion.py | 21 ++++++++-- GEMstack/onboard/perception/fusion_utils.py | 43 ++++++++++++++++++--- GEMstack/onboard/perception/transform.py | 5 ++- 3 files changed, 58 insertions(+), 11 deletions(-) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index d98cbac6a..f48da3e1b 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -34,6 +34,7 @@ def __init__(self): self.tf_listener = tf.TransformListener() # Publishers + self.pub_pedestrians_pc2 = rospy.Publisher("/point_cloud/pedestrians", PointCloud2, queue_size=10) if(self.visualization): self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) @@ -80,14 +81,26 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): ] all_extracted_pts = all_extracted_pts + list(extracted_pts) - + # Used for visualization if(self.visualization): cv_image = vis_2d_bbox(cv_image, xywh, box) - # Draw projected LiDAR points on the image. - for pt in all_extracted_pts: - cv2.circle(cv_image, pt, 2, (0, 0, 255), -1) + if len(all_extracted_pts) > 0: + # Extract 2D points + extracted_2d_pts = list(np.array(all_extracted_pts)[:, :2].astype(int)) + + # Draw projected 2D LiDAR points on the image. + for pt in extracted_2d_pts: + cv2.circle(cv_image, pt, 2, (0, 0, 255), -1) + + # Extract 3D points + extracted_3d_pts = list(np.array(all_extracted_pts)[:, -3:]) + + # Create point cloud from extracted 3D points + ros_extracted_pedestrian_pc2 = create_point_cloud(extracted_3d_pts) + self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2) + # Used for visualization if(self.visualization): diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/fusion_utils.py index 24b61366c..69f27d5a6 100644 --- a/GEMstack/onboard/perception/fusion_utils.py +++ b/GEMstack/onboard/perception/fusion_utils.py @@ -1,9 +1,12 @@ -from sensor_msgs.msg import PointCloud2 +from sensor_msgs.msg import PointCloud2, PointField import numpy as np import sensor_msgs.point_cloud2 as pc2 import open3d as o3d import cv2 import json +import rospy +import numpy as np +import struct def convert_pointcloud2_to_xyz(lidar_pc2_msg: PointCloud2): @@ -85,7 +88,7 @@ def project_points(points_3d, K): if pt[2] > 0: # only project points in front of the camera u = K[0, 0] * (pt[0] / pt[2]) + K[0, 2] v = K[1, 1] * (pt[1] / pt[2]) + K[1, 2] - proj_points.append((int(u), int(v))) + proj_points.append((int(u), int(v), pt[0], pt[1], pt[2])) return proj_points @@ -134,10 +137,16 @@ def visualize_point_cloud(points): Args: points (np.ndarray): Nx3 array of point cloud coordinates. """ + # Create a visualization window + vis = o3d.visualization.Visualizer() + vis.create_window() + pc = o3d.geometry.PointCloud() pc.points = o3d.utility.Vector3dVector(points) - pc.paint_uniform_color([0.1, 0.7, 0.9]) # Light blue color - o3d.visualization.draw_geometries([pc]) + pc.paint_uniform_color([0.1, 0.7, 0.9]) + + vis.add_geometry(pc) + vis.run() def visualize_plane(inlier_cloud, outlier_cloud, bounding_box_2d_points): @@ -168,4 +177,28 @@ def visualize_plane(inlier_cloud, outlier_cloud, bounding_box_2d_points): bounding_box_lines.paint_uniform_color([0, 1, 0]) # Green for bounding box edges # Visualize - o3d.visualization.draw_geometries([inlier_cloud, outlier_cloud, bounding_box_pcd, bounding_box_lines]) \ No newline at end of file + o3d.visualization.draw_geometries([inlier_cloud, outlier_cloud, bounding_box_pcd, bounding_box_lines]) + + +def create_point_cloud(points, color=(255, 0, 0)): + """ + Converts a list of (x, y, z) points into a PointCloud2 message. + """ + header = rospy.Header() + header.stamp = rospy.Time.now() + header.frame_id = "map" # Change to your TF frame + + fields = [ + PointField(name="x", offset=0, datatype=PointField.FLOAT32, count=1), + PointField(name="y", offset=4, datatype=PointField.FLOAT32, count=1), + PointField(name="z", offset=8, datatype=PointField.FLOAT32, count=1), + PointField(name="rgb", offset=12, datatype=PointField.FLOAT32, count=1), + ] + + # Convert RGB color to packed float32 + r, g, b = color + packed_color = struct.unpack('f', struct.pack('I', (r << 16) | (g << 8) | b))[0] + + point_cloud_data = [(x, y, z, packed_color) for x, y, z in points] + + return pc2.create_cloud(header, fields, point_cloud_data) \ No newline at end of file diff --git a/GEMstack/onboard/perception/transform.py b/GEMstack/onboard/perception/transform.py index 4487e051c..5e442a093 100644 --- a/GEMstack/onboard/perception/transform.py +++ b/GEMstack/onboard/perception/transform.py @@ -1,5 +1,6 @@ import rospy import tf +import numpy as np def publish_tf(): rospy.init_node('pointcloud_tf_broadcaster') @@ -8,8 +9,8 @@ def publish_tf(): while not rospy.is_shutdown(): br.sendTransform( - (0, 0, 1), # (x, y, z) translation - tf.transformations.quaternion_from_euler(0, 0, 0), # (roll, pitch, yaw) + (0, 1.5, 7), # (x, y, z) translation + tf.transformations.quaternion_from_euler(0.5* np.pi, 0, 0), # (roll, pitch, yaw) rospy.Time.now(), "os_sensor", # Child frame (sensor) "map" # Parent frame (world) From 49f45eef28f0af872b66973e5ce7d1b7e5806ed9 Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Sat, 15 Feb 2025 02:17:54 -0600 Subject: [PATCH 12/46] segregate points by pedestrains --- GEMstack/onboard/perception/fusion.py | 26 +++++----- GEMstack/onboard/perception/fusion_utils.py | 53 +-------------------- 2 files changed, 16 insertions(+), 63 deletions(-) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index f48da3e1b..6bc0ab406 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -60,7 +60,11 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): boxes = track_result[0].boxes # Unpacking box dimentions detected into x,y,w,h - all_extracted_pts = [] + extracted_2d_pedestrians_pts = [] + extracted_3d_pedestrians_pts = [] + flattened_extracted_2d_pedestrians_pts = [] + flattened_extracted_3d_pedestrians_pts = [] + for box in boxes: xywh = box.xywh[0].tolist() self.last_person_boxes.append(xywh) @@ -80,25 +84,25 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): (pts[:, 1] < bottom_bound) ] - all_extracted_pts = all_extracted_pts + list(extracted_pts) + extracted_2d_pts = list(np.array(extracted_pts)[:, :2].astype(int)) + extracted_2d_pedestrians_pts.append(extracted_2d_pts) + flattened_extracted_2d_pedestrians_pts = flattened_extracted_2d_pedestrians_pts + extracted_2d_pts + + extracted_3d_pts = list(np.array(extracted_pts)[:, -3:]) + extracted_3d_pedestrians_pts.append(extracted_3d_pts) + flattened_extracted_3d_pedestrians_pts = flattened_extracted_3d_pedestrians_pts + extracted_3d_pts # Used for visualization if(self.visualization): cv_image = vis_2d_bbox(cv_image, xywh, box) - if len(all_extracted_pts) > 0: - # Extract 2D points - extracted_2d_pts = list(np.array(all_extracted_pts)[:, :2].astype(int)) - + if len(extracted_2d_pedestrians_pts) > 0: # Draw projected 2D LiDAR points on the image. - for pt in extracted_2d_pts: + for pt in flattened_extracted_2d_pedestrians_pts: cv2.circle(cv_image, pt, 2, (0, 0, 255), -1) - # Extract 3D points - extracted_3d_pts = list(np.array(all_extracted_pts)[:, -3:]) - # Create point cloud from extracted 3D points - ros_extracted_pedestrian_pc2 = create_point_cloud(extracted_3d_pts) + ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_extracted_3d_pedestrians_pts) self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2) diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/fusion_utils.py index 69f27d5a6..b8ffd03e1 100644 --- a/GEMstack/onboard/perception/fusion_utils.py +++ b/GEMstack/onboard/perception/fusion_utils.py @@ -29,7 +29,7 @@ def downsample_points(lidar_points): return transformed_points -# Credits: The following lines of codes (17 to 159 excluding lines 80 to 115) are adapted from the Calibration Team B +# Credits: The following lines of codes (from 33 to 92) are adapted from the Calibration Team B def load_extrinsics(extrinsics_file): """ Load calibrated extrinsics from a .npz file. @@ -126,60 +126,9 @@ def vis_2d_bbox(image, xywh, box): # Draw main text on top of the outline cv2.putText(image, label, (text_x, text_y - baseline), font, font_scale, font_color, text_thickness) - return image -def visualize_point_cloud(points): - """ - Visualizes the given point cloud using Open3D. - - Args: - points (np.ndarray): Nx3 array of point cloud coordinates. - """ - # Create a visualization window - vis = o3d.visualization.Visualizer() - vis.create_window() - - pc = o3d.geometry.PointCloud() - pc.points = o3d.utility.Vector3dVector(points) - pc.paint_uniform_color([0.1, 0.7, 0.9]) - - vis.add_geometry(pc) - vis.run() - - -def visualize_plane(inlier_cloud, outlier_cloud, bounding_box_2d_points): - """ - Visualizes the detected plane with its 2D bounding box. - - :param inlier_cloud: Open3D point cloud containing plane points. - :param outlier_cloud: Open3D point cloud containing non-plane points. - :param bounding_box_2d_points: 4 corner points of the 2D bounding box on the plane. - """ - inlier_cloud.paint_uniform_color([1, 0, 0]) # Red for the plane - outlier_cloud.paint_uniform_color([0.5, 0.5, 0.5]) # Gray for other points - - # Create bounding box visualization - bounding_box_pcd = o3d.geometry.PointCloud() - bounding_box_pcd.points = o3d.utility.Vector3dVector(bounding_box_2d_points) - bounding_box_pcd.paint_uniform_color([0, 1, 0]) # Green for bounding box corners - - # Create a bounding box line set (connect corners) - lines = [ - [0, 1], [1, 2], [2, 3], [3, 0] # Edges of the rectangle - ] - - bounding_box_lines = o3d.geometry.LineSet() - bounding_box_lines.points = o3d.utility.Vector3dVector(bounding_box_2d_points) - bounding_box_lines.lines = o3d.utility.Vector2iVector(lines) - - bounding_box_lines.paint_uniform_color([0, 1, 0]) # Green for bounding box edges - - # Visualize - o3d.visualization.draw_geometries([inlier_cloud, outlier_cloud, bounding_box_pcd, bounding_box_lines]) - - def create_point_cloud(points, color=(255, 0, 0)): """ Converts a list of (x, y, z) points into a PointCloud2 message. From d3c9e4c1a4f76217c3c1971690c6cc9eb7c9907b Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Sat, 15 Feb 2025 02:19:43 -0600 Subject: [PATCH 13/46] update comments --- GEMstack/onboard/perception/fusion.py | 1 + 1 file changed, 1 insertion(+) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index 6bc0ab406..a7cd21713 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -40,6 +40,7 @@ def __init__(self): def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): + # Convert to cv2 image and run detector cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence) From c23b36c5704713e0edc8b372c0ec58a9d816080b Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Sat, 15 Feb 2025 02:28:39 -0600 Subject: [PATCH 14/46] rename variables --- GEMstack/onboard/perception/fusion.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index a7cd21713..0ac34638d 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -61,10 +61,9 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): boxes = track_result[0].boxes # Unpacking box dimentions detected into x,y,w,h - extracted_2d_pedestrians_pts = [] - extracted_3d_pedestrians_pts = [] - flattened_extracted_2d_pedestrians_pts = [] - flattened_extracted_3d_pedestrians_pts = [] + pedestrians_3d_pts = [] + flattened_pedestrians_2d_pts = [] + flattened_pedestrians_3d_pts = [] for box in boxes: xywh = box.xywh[0].tolist() @@ -86,24 +85,23 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): ] extracted_2d_pts = list(np.array(extracted_pts)[:, :2].astype(int)) - extracted_2d_pedestrians_pts.append(extracted_2d_pts) - flattened_extracted_2d_pedestrians_pts = flattened_extracted_2d_pedestrians_pts + extracted_2d_pts + flattened_pedestrians_2d_pts = flattened_pedestrians_2d_pts + extracted_2d_pts extracted_3d_pts = list(np.array(extracted_pts)[:, -3:]) - extracted_3d_pedestrians_pts.append(extracted_3d_pts) - flattened_extracted_3d_pedestrians_pts = flattened_extracted_3d_pedestrians_pts + extracted_3d_pts + pedestrians_3d_pts.append(extracted_3d_pts) + flattened_pedestrians_3d_pts = flattened_pedestrians_3d_pts + extracted_3d_pts # Used for visualization if(self.visualization): cv_image = vis_2d_bbox(cv_image, xywh, box) - if len(extracted_2d_pedestrians_pts) > 0: + if len(pedestrians_3d_pts) > 0: # Draw projected 2D LiDAR points on the image. - for pt in flattened_extracted_2d_pedestrians_pts: + for pt in flattened_pedestrians_2d_pts: cv2.circle(cv_image, pt, 2, (0, 0, 255), -1) # Create point cloud from extracted 3D points - ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_extracted_3d_pedestrians_pts) + ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts) self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2) From 5aa571b04bb4979a46c5942ea5c648c4f3f83b88 Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Sat, 15 Feb 2025 02:30:54 -0600 Subject: [PATCH 15/46] remove empty line --- GEMstack/onboard/perception/fusion.py | 1 - 1 file changed, 1 deletion(-) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index 0ac34638d..c9f1101de 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -104,7 +104,6 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts) self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2) - # Used for visualization if(self.visualization): ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8') From 9008a9ad563fe6128d99d41d5d094d93a798f1ab Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Sat, 15 Feb 2025 16:16:24 -0600 Subject: [PATCH 16/46] add ground and max dist filter --- GEMstack/onboard/perception/fusion.py | 20 ++++++++++++++++---- GEMstack/onboard/perception/fusion_utils.py | 14 +++++++++++++- GEMstack/onboard/perception/transform.py | 2 +- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index c9f1101de..18f390935 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -18,6 +18,8 @@ def __init__(self): self.visualization = True self.confidence = 0.7 self.classes_to_detect = 0 + self.ground_threshold = 1.6 + self.max_dist_percent = 0.7 # Load calibration data self.R = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/R.npy') @@ -84,10 +86,17 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): (pts[:, 1] < bottom_bound) ] - extracted_2d_pts = list(np.array(extracted_pts)[:, :2].astype(int)) + #if no points extracted for this bbox, skip + if len(extracted_pts) < 1: + continue + + extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold) + extracted_pts = filter_far_points(extracted_pts) + + extracted_2d_pts = list(extracted_pts[:, :2].astype(int)) flattened_pedestrians_2d_pts = flattened_pedestrians_2d_pts + extracted_2d_pts - - extracted_3d_pts = list(np.array(extracted_pts)[:, -3:]) + + extracted_3d_pts = list(extracted_pts[:, -3:]) pedestrians_3d_pts.append(extracted_3d_pts) flattened_pedestrians_3d_pts = flattened_pedestrians_3d_pts + extracted_3d_pts @@ -95,7 +104,10 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): if(self.visualization): cv_image = vis_2d_bbox(cv_image, xywh, box) - if len(pedestrians_3d_pts) > 0: + if len(flattened_pedestrians_3d_pts) > 0: + # print(f"x_dim pedestrians_3d_pts: {np.max(np.array(flattened_pedestrians_3d_pts)[:, 0]), np.min(np.array(flattened_pedestrians_3d_pts)[:, 0])}") + # print(f"y_dim pedestrians_3d_pts: {np.max(np.array(flattened_pedestrians_3d_pts)[:, 1]), np.min(np.array(flattened_pedestrians_3d_pts)[:, 1])}") + # print(f"z_dim pedestrians_3d_pts: {np.max(np.array(flattened_pedestrians_3d_pts)[:, 2]), np.min(np.array(flattened_pedestrians_3d_pts)[:, 2])}") # Draw projected 2D LiDAR points on the image. for pt in flattened_pedestrians_2d_pts: cv2.circle(cv_image, pt, 2, (0, 0, 255), -1) diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/fusion_utils.py index b8ffd03e1..4aacb4810 100644 --- a/GEMstack/onboard/perception/fusion_utils.py +++ b/GEMstack/onboard/perception/fusion_utils.py @@ -1,4 +1,5 @@ from sensor_msgs.msg import PointCloud2, PointField +from scipy.stats import zscore import numpy as np import sensor_msgs.point_cloud2 as pc2 import open3d as o3d @@ -23,12 +24,23 @@ def downsample_points(lidar_points): # Apply voxel grid downsampling voxel_size = 0.1 # Adjust for desired resolution downsampled_pcd = pcd.voxel_down_sample(voxel_size=voxel_size) - + # Convert back to numpy array transformed_points = np.asarray(downsampled_pcd.points) return transformed_points +def filter_ground_points(lidar_points, ground_threshold = 0): + filtered_array = lidar_points[lidar_points[:, 3] < ground_threshold] + return filtered_array + + +def filter_far_points(lidar_points, max_dist_percent=0.85): + max_dist = np.max(lidar_points[:, 4]) + filtered_array = lidar_points[lidar_points[:, 4] < max_dist_percent * max_dist] + return filtered_array + + # Credits: The following lines of codes (from 33 to 92) are adapted from the Calibration Team B def load_extrinsics(extrinsics_file): """ diff --git a/GEMstack/onboard/perception/transform.py b/GEMstack/onboard/perception/transform.py index 5e442a093..6e921f56c 100644 --- a/GEMstack/onboard/perception/transform.py +++ b/GEMstack/onboard/perception/transform.py @@ -9,7 +9,7 @@ def publish_tf(): while not rospy.is_shutdown(): br.sendTransform( - (0, 1.5, 7), # (x, y, z) translation + (0, 1.6, 7), # (x, y, z) translation tf.transformations.quaternion_from_euler(0.5* np.pi, 0, 0), # (roll, pitch, yaw) rospy.Time.now(), "os_sensor", # Child frame (sensor) From c31546dba56640a2794585b7efcc49af9c315e5f Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Sat, 15 Feb 2025 16:35:57 -0600 Subject: [PATCH 17/46] add comments --- GEMstack/onboard/perception/fusion.py | 6 +++--- GEMstack/onboard/perception/fusion_utils.py | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index 18f390935..bfd39f7c1 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -90,12 +90,15 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): if len(extracted_pts) < 1: continue + # Apply ground and max distance filter to extracted 5D points extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold) extracted_pts = filter_far_points(extracted_pts) + # Extract 2D pedestrians points camera frame extracted_2d_pts = list(extracted_pts[:, :2].astype(int)) flattened_pedestrians_2d_pts = flattened_pedestrians_2d_pts + extracted_2d_pts + # Extract 3D pedestrians points in lidar frame extracted_3d_pts = list(extracted_pts[:, -3:]) pedestrians_3d_pts.append(extracted_3d_pts) flattened_pedestrians_3d_pts = flattened_pedestrians_3d_pts + extracted_3d_pts @@ -105,9 +108,6 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): cv_image = vis_2d_bbox(cv_image, xywh, box) if len(flattened_pedestrians_3d_pts) > 0: - # print(f"x_dim pedestrians_3d_pts: {np.max(np.array(flattened_pedestrians_3d_pts)[:, 0]), np.min(np.array(flattened_pedestrians_3d_pts)[:, 0])}") - # print(f"y_dim pedestrians_3d_pts: {np.max(np.array(flattened_pedestrians_3d_pts)[:, 1]), np.min(np.array(flattened_pedestrians_3d_pts)[:, 1])}") - # print(f"z_dim pedestrians_3d_pts: {np.max(np.array(flattened_pedestrians_3d_pts)[:, 2]), np.min(np.array(flattened_pedestrians_3d_pts)[:, 2])}") # Draw projected 2D LiDAR points on the image. for pt in flattened_pedestrians_2d_pts: cv2.circle(cv_image, pt, 2, (0, 0, 255), -1) diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/fusion_utils.py index 4aacb4810..cd6fd1ce7 100644 --- a/GEMstack/onboard/perception/fusion_utils.py +++ b/GEMstack/onboard/perception/fusion_utils.py @@ -1,5 +1,4 @@ from sensor_msgs.msg import PointCloud2, PointField -from scipy.stats import zscore import numpy as np import sensor_msgs.point_cloud2 as pc2 import open3d as o3d @@ -31,11 +30,13 @@ def downsample_points(lidar_points): def filter_ground_points(lidar_points, ground_threshold = 0): + """ Filter points given an elevation of ground threshold """ filtered_array = lidar_points[lidar_points[:, 3] < ground_threshold] return filtered_array def filter_far_points(lidar_points, max_dist_percent=0.85): + """ Filter points beyond a percentage threshold of max distance in a point cluster """ max_dist = np.max(lidar_points[:, 4]) filtered_array = lidar_points[lidar_points[:, 4] < max_dist_percent * max_dist] return filtered_array @@ -100,6 +101,7 @@ def project_points(points_3d, K): if pt[2] > 0: # only project points in front of the camera u = K[0, 0] * (pt[0] / pt[2]) + K[0, 2] v = K[1, 1] * (pt[1] / pt[2]) + K[1, 2] + # 5D points that stores original lidar points proj_points.append((int(u), int(v), pt[0], pt[1], pt[2])) return proj_points From 006b5550658e6f0028f65ef59d3b61c1c6f3505b Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Sat, 15 Feb 2025 16:42:45 -0600 Subject: [PATCH 18/46] update comments --- GEMstack/onboard/perception/fusion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index bfd39f7c1..6208d5e0e 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -90,11 +90,11 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): if len(extracted_pts) < 1: continue - # Apply ground and max distance filter to extracted 5D points + # Apply ground and max distance filter to the extracted 5D points extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold) extracted_pts = filter_far_points(extracted_pts) - # Extract 2D pedestrians points camera frame + # Extract 2D pedestrians points in camera frame extracted_2d_pts = list(extracted_pts[:, :2].astype(int)) flattened_pedestrians_2d_pts = flattened_pedestrians_2d_pts + extracted_2d_pts From b35252bed6e3ca6c81ea5415a030282472af065b Mon Sep 17 00:00:00 2001 From: lukasdumasius Date: Mon, 17 Feb 2025 01:40:24 -0600 Subject: [PATCH 19/46] Finalize fusion.py with pedestrian centers, vel, dims, AgentStates. Merging into pedestrian_detection next. --- GEMstack/onboard/perception/fusion.py | 270 +++++++++++++++----- GEMstack/onboard/perception/fusion_utils.py | 5 +- GEMstack/state/agent.py | 4 + 3 files changed, 212 insertions(+), 67 deletions(-) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index 6208d5e0e..6255f47f0 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -1,21 +1,66 @@ -from cv_bridge import CvBridge -from sensor_msgs.msg import Image, PointCloud2 -from ultralytics import YOLO -from fusion_utils import * +""" +Lidar + Camera fusion and object detection +""" +""" +Terminal 1: +--------------- +source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash +roscore + +Terminal 2: +--------------- +source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash +python3 /home/Admin/transform.py + +Terminal 3: +--------------- +source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash +rosbag play ~/vehicle.bag + +Terminal 4: +--------------- +source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash +cd GEMStack +python3 -m GEMstack.onboard.perception.fusion + +Terminal 5: +--------------- +source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash +rviz +""" + +# Python +import os +from typing import List, Dict +from collections import defaultdict +# ROS, CV import rospy import message_filters -import os import tf - +from cv_bridge import CvBridge +from sensor_msgs.msg import Image, PointCloud2 +# GEMStack +from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum +from ultralytics import YOLO +from .fusion_utils import * class Fusion3D(): + # TODO: Pull params into a JSON/yaml + # TODO: Convert lists into np.arrays where possible, vectorize calculations + # TODO: Implement logging instead of print + # TODO: Finish splitting this class + utils into separate classes + # TODO: Decide if we want to name dets "peds" or "objs"/"agents" + # Maybe peds for now and Agents in agent_detection.py? def __init__(self): + # Publish debug/viz rostopics if true + self.debug = True # Setup variables self.bridge = CvBridge() + # TODO: Wrap detector into GEMDetector? self.detector = YOLO(os.getcwd()+'/GEMstack/knowledge/detection/yolov8n.pt') - self.last_person_boxes = [] - self.pedestrians = {} - self.visualization = True + # track_id: AgentState + self.prev_agents = dict() + self.current_agents = dict() self.confidence = 0.7 self.classes_to_detect = 0 self.ground_threshold = 1.6 @@ -30,46 +75,169 @@ def __init__(self): self.rgb_rosbag = message_filters.Subscriber('/oak/rgb/image_raw', Image) self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2) self.sync = message_filters.ApproximateTimeSynchronizer([self.rgb_rosbag, self.top_lidar_rosbag], queue_size=10, slop=0.1) - self.sync.registerCallback(self.fusion_callback) + self.sync.registerCallback(self.ouster_oak_callback) # TF listener to get transformation from LiDAR to Camera self.tf_listener = tf.TransformListener() - # Publishers + if self.debug: self.init_debug() + + def init_debug(self,): + # Debug Publishers self.pub_pedestrians_pc2 = rospy.Publisher("/point_cloud/pedestrians", PointCloud2, queue_size=10) - if(self.visualization): - self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) + self.pub_obj_centers_pc2 = rospy.Publisher("/point_cloud/obj_centers", PointCloud2, queue_size=10) + self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) + + def update(self, vehicle : VehicleState) -> Dict[str,AgentState]: + return self.current_agents + + # TODO: Improve Algo Knn, ransac, etc. + def find_centers(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]: + clusters = [np.array(clust) for clust in clusters] + centers = [np.array(()) if clust.size == 0 else np.mean(clust, axis=0) for clust in clusters] + return centers + + # Beware: AgentState(PhysicalObject) builds bbox from + # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not + # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2] + def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]: + # Add some small constant to height to compensate for + # objects distance to ground we filtered from lidar, + # other heuristics to imrpove stability for find_ funcs ? + clusters = [np.array(clust) for clust in clusters] + dims = [np.array(()) if clust.size == 0 else np.max(clust, axis= 0) - np.min(clust, axis= 0) for clust in clusters] + return dims + + # TODO: Slower but cleaner to input self.current_agents dict + # TODO: Moving Average across last N iterations pos/vel? Less spurious vals + # TODO Akul: Fix velocity calculation to calculate in ObjectFrameEnum.START + # work towards own tracking class instead of simple YOLO track? + def find_vels(self, track_ids, obj_centers): + # Object not seen -> velocity = None + track_id_center_map = dict(zip(track_ids, obj_centers)) + vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods. + + for prev_track_id, prev_agent in self.prev_agents.items(): + if prev_track_id in track_ids: + # TODO: Add prev_agents to memory to avoid None velocity + # We should only be missing prev pose on first sight of track_id Agent. + # print("shape 1: ", track_id_center_map[prev_agent.track_id]) + # print("shape 2: ", np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) + # prev can be 3 separate Nones, current is just empty array... make this symmetrical + if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3: + vels[prev_track_id] = track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z]) + return vels + + # TODO: Separate debug/viz class, separate this func into bbox and 2d 3d points + def viz_object_states(self, cv_image, boxes, extracted_pts_all): + # Extract 3D pedestrians points in lidar frame + # ** These are camera frame after transform_lidar_points, right? + pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] - def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): + # Object center viz + obj_3d_obj_centers = list() + for track_id, agent in self.current_agents.items(): + if agent.pose.x != None and agent.pose.y != None and agent.pose.z != None: + obj_3d_obj_centers.append((agent.pose.x, agent.pose.y, agent.pose.z)) # ** + if len(obj_3d_obj_centers) > 0: + ros_obj_3d_obj_centers_pc2 = create_point_cloud(obj_3d_obj_centers, color=(0, 128, 0)) + self.pub_obj_centers_pc2.publish(ros_obj_3d_obj_centers_pc2) + + # Extract 2D pedestrians points and bbox in camera frame + extracted_2d_pts = [list(extracted_pts[:, :2].astype(int)) for extracted_pts in extracted_pts_all] + flattened_pedestrians_2d_pts = list() + for pts in extracted_2d_pts: flattened_pedestrians_2d_pts.extend(pts) + + for ind, bbox in enumerate(boxes): + xywh = bbox.xywh[0].tolist() + cv_image = vis_2d_bbox(cv_image, xywh, bbox) + + flattened_pedestrians_3d_pts = list() + for pts in pedestrians_3d_pts: flattened_pedestrians_3d_pts.extend(pts) + + # Draw projected 2D LiDAR points on the image. + for pt in flattened_pedestrians_2d_pts: + cv2.circle(cv_image, pt, 2, (0, 0, 255), -1) + ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8') + self.pub_image.publish(ros_img) + + # Draw 3D pedestrian pointclouds + # Create point cloud from extracted 3D points + ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts) + self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2) + + + def update_object_states(self, track_result, extracted_pts_all) -> None: + self.prev_agents = self.current_agents.copy() + self.current_agents.clear() + + # Change pedestrians_3d_pts to dicts matching track_ids + track_ids = track_result[0].boxes.id.int().cpu().tolist() + num_objs = len(track_ids) + boxes = track_result[0].boxes + + # Extract 3D pedestrians points in lidar frame + # ** These are camera frame after transform_lidar_points, right? + pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] + if len(pedestrians_3d_pts) != num_objs: + raise Exception('Perception - Camera detections, points clusters num. mismatch') + + # TODO: Slower but cleaner to pass dicts of AgentState + # or at least {track_ids: centers/pts/etc} + # TODO: Combine funcs for efficiency in C. + # Separate numpy prob still faster for now + obj_centers = self.find_centers(pedestrians_3d_pts) + obj_dims = self.find_dims(pedestrians_3d_pts) + obj_vels = self.find_vels(track_ids, obj_centers) + + # Update Current AgentStates + for ind in range(num_objs): + obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind] + obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind] + self.current_agents[track_ids[ind]] = ( + AgentState( + track_id = track_ids[ind], + pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + # (l, w, h) + # TODO: confirm (z -> l, x -> w, y -> h) + dimensions=(obj_dim[2], obj_dim[0],obj_dim[1]), + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(vels[track_ids[ind]]), + yaw_rate=0 + )) + + + def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): # Convert to cv2 image and run detector cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence) # Convert 1D PointCloud2 data to x, y, z coords lidar_points = convert_pointcloud2_to_xyz(lidar_pc2_msg) + # print("len lidar_points", len(lidar_points)) # Downsample xyz point clouds downsampled_points = downsample_points(lidar_points) + # print("len downsampled_points", len(downsampled_points)) # Transform LiDAR points into the camera coordinate frame. lidar_in_camera = transform_lidar_points(downsampled_points, self.R, self.t) - + # print("len lidar_in_camera", len(lidar_in_camera)) + # Project the transformed points into the image plane. projected_pts = project_points(lidar_in_camera, self.K) + # print("len projected_pts", len(projected_pts)) # Process bboxes - self.last_person_boxes = [] boxes = track_result[0].boxes - # Unpacking box dimentions detected into x,y,w,h - pedestrians_3d_pts = [] - flattened_pedestrians_2d_pts = [] - flattened_pedestrians_3d_pts = [] + extracted_pts_all = list() - for box in boxes: - xywh = box.xywh[0].tolist() - self.last_person_boxes.append(xywh) + for ind, bbox in enumerate(boxes): + xywh = bbox.xywh[0].tolist() # Extracting projected pts x, y, w, h = xywh @@ -78,49 +246,20 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): top_bound = int(y - h / 2) bottom_bound = int(y + h / 2) - if len(projected_pts) > 0: - pts = np.array(projected_pts) - extracted_pts = pts[(pts[:, 0] > left_bound) & - (pts[:, 0] < right_bound) & - (pts[:, 1] > top_bound) & - (pts[:, 1] < bottom_bound) - ] - - #if no points extracted for this bbox, skip - if len(extracted_pts) < 1: - continue - - # Apply ground and max distance filter to the extracted 5D points - extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold) - extracted_pts = filter_far_points(extracted_pts) - - # Extract 2D pedestrians points in camera frame - extracted_2d_pts = list(extracted_pts[:, :2].astype(int)) - flattened_pedestrians_2d_pts = flattened_pedestrians_2d_pts + extracted_2d_pts - - # Extract 3D pedestrians points in lidar frame - extracted_3d_pts = list(extracted_pts[:, -3:]) - pedestrians_3d_pts.append(extracted_3d_pts) - flattened_pedestrians_3d_pts = flattened_pedestrians_3d_pts + extracted_3d_pts - - # Used for visualization - if(self.visualization): - cv_image = vis_2d_bbox(cv_image, xywh, box) - - if len(flattened_pedestrians_3d_pts) > 0: - # Draw projected 2D LiDAR points on the image. - for pt in flattened_pedestrians_2d_pts: - cv2.circle(cv_image, pt, 2, (0, 0, 255), -1) - - # Create point cloud from extracted 3D points - ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts) - self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2) - - # Used for visualization - if(self.visualization): - ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8') - self.pub_image.publish(ros_img) + pts = np.array(projected_pts) + extracted_pts = pts[(pts[:, 0] > left_bound) & + (pts[:, 0] < right_bound) & + (pts[:, 1] > top_bound) & + (pts[:, 1] < bottom_bound) + ] + # Apply ground and max distance filter to the extracted 5D points + extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold) + extracted_pts = filter_far_points(extracted_pts) + extracted_pts_all.append(extracted_pts) + + self.update_object_states(track_result, extracted_pts_all) + self.viz_object_states(cv_image, boxes, extracted_pts_all) if __name__ == '__main__': @@ -128,4 +267,3 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): Fusion3D() while not rospy.core.is_shutdown(): rospy.rostime.wallsleep(0.5) - diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/fusion_utils.py index cd6fd1ce7..4163d7b2c 100644 --- a/GEMstack/onboard/perception/fusion_utils.py +++ b/GEMstack/onboard/perception/fusion_utils.py @@ -37,6 +37,7 @@ def filter_ground_points(lidar_points, ground_threshold = 0): def filter_far_points(lidar_points, max_dist_percent=0.85): """ Filter points beyond a percentage threshold of max distance in a point cluster """ + if lidar_points.shape[0] == 0: return lidar_points max_dist = np.max(lidar_points[:, 4]) filtered_array = lidar_points[lidar_points[:, 4] < max_dist_percent * max_dist] return filtered_array @@ -164,4 +165,6 @@ def create_point_cloud(points, color=(255, 0, 0)): point_cloud_data = [(x, y, z, packed_color) for x, y, z in points] - return pc2.create_cloud(header, fields, point_cloud_data) \ No newline at end of file + return pc2.create_cloud(header, fields, point_cloud_data) + + \ No newline at end of file diff --git a/GEMstack/state/agent.py b/GEMstack/state/agent.py index 757805837..b0ad7aaad 100644 --- a/GEMstack/state/agent.py +++ b/GEMstack/state/agent.py @@ -27,6 +27,10 @@ class AgentState(PhysicalObject): activity : AgentActivityEnum velocity : Tuple[float,float,float] #estimated velocity in x,y,z, m/s and in agent's local frame yaw_rate : float #estimated yaw rate, in radians/s + # Added by Perception for persistent objs across frames + # Check track_id to verify this is the same Agent you've + # seen before + track_id : int def velocity_local(self) -> Tuple[float,float,float]: """Returns velocity in m/s in the agent's local frame.""" From eeeeb5407c181f03243e24badb17eaff652194c6 Mon Sep 17 00:00:00 2001 From: lukasdumasius Date: Mon, 17 Feb 2025 02:06:39 -0600 Subject: [PATCH 20/46] Port fusion.py to pedestrian_detection.py --- GEMstack/onboard/perception/fusion.py | 4 +- .../perception/pedestrian_detection.py | 383 +++++++++++------- 2 files changed, 248 insertions(+), 139 deletions(-) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index 6255f47f0..df83bedba 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -15,7 +15,7 @@ Terminal 3: --------------- source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash -rosbag play ~/vehicle.bag +rosbag play -l ~/rosbags/vehicle.bag Terminal 4: --------------- @@ -259,7 +259,7 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): extracted_pts_all.append(extracted_pts) self.update_object_states(track_result, extracted_pts_all) - self.viz_object_states(cv_image, boxes, extracted_pts_all) + if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all) if __name__ == '__main__': diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index f91145274..1b5a246f9 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -1,14 +1,51 @@ +""" +Lidar + Camera fusion and object detection +""" +""" +Terminal 1: +--------------- +source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash +roscore + +Terminal 2: +--------------- +source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash +python3 /home/Admin/transform.py + +Terminal 3: +--------------- +source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash +rosbag play -l ~/rosbags/vehicle.bag + +Terminal 4: +--------------- +source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash +cd GEMStack +python3 main.py --variant=detector_only launch/pedestrian_detection.yaml + +Terminal 5: +--------------- +source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash +rviz +""" + +# Python +import os +from typing import List, Dict +from collections import defaultdict +# ROS, CV +import rospy +import message_filters +import tf +from cv_bridge import CvBridge +from sensor_msgs.msg import Image, PointCloud2 +# YOLO +from ultralytics import YOLO +# GEMStack from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum +from .fusion_utils import * from ..interface.gem import GEMInterface from ..component import Component -from ultralytics import YOLO -import cv2 -from typing import Dict -from cv_bridge import CvBridge -from sensor_msgs.msg import Image -import rospy -import os - def box_to_fake_agent(box): """Creates a fake agent state from an (x,y,w,h) bounding box. @@ -22,158 +59,230 @@ def box_to_fake_agent(box): class PedestrianDetector2D(Component): - """Detects pedestrians. - - self.vehicle -> GEM car info - self.detector -> pedestrian detection model - self.last_person_boxes -> used to store boxes detected in every frame of the video - self.pedestrians -> stores the agentstate (pedestrian) found during the video in a dict, not sure if required can be removed - self.visualization -> enable this to view pedestrian detected - self.confidence -> min model confidence level - self.classes_to_detect -> Classes for the model to detect - - """ - def __init__(self,vehicle_interface : GEMInterface): + # TODO: Pull params into a JSON/yaml + # TODO: Convert lists into np.arrays where possible, vectorize calculations + # TODO: Implement logging instead of print + # TODO: Finish splitting this class + utils into separate classes + # TODO: Decide if we want to name dets "peds" or "objs"/"agents" + # Maybe peds for now and Agents in agent_detection.py? + def __init__(self, vehicle_interface : GEMInterface): self.vehicle_interface = vehicle_interface - self.detector = YOLO(os.getcwd()+'/GEMstack/knowledge/detection/yolov8n.pt') # change to get model value from sys arg - self.last_person_boxes = [] - self.pedestrians = {} - self.visualization = True # Set this to true for visualization, later change to get value from sys arg + # Publish debug/viz rostopics if true + self.debug = True + # Setup variables + self.bridge = CvBridge() + # TODO: Wrap detector into GEMDetector? + self.detector = YOLO(os.getcwd()+'/GEMstack/knowledge/detection/yolov8n.pt') + # track_id: AgentState + self.prev_agents = dict() + self.current_agents = dict() self.confidence = 0.7 self.classes_to_detect = 0 + self.ground_threshold = 1.6 + self.max_dist_percent = 0.7 - def rate(self): - return 4.0 - - def state_inputs(self): - return ['vehicle'] + # Load calibration data + self.R = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/R.npy') + self.t = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/t.npy') + self.K = load_intrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/camera_intrinsics.json') + + # Subscribers and sychronizers + self.rgb_rosbag = message_filters.Subscriber('/oak/rgb/image_raw', Image) + self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2) + self.sync = message_filters.ApproximateTimeSynchronizer([self.rgb_rosbag, self.top_lidar_rosbag], queue_size=10, slop=0.1) + self.sync.registerCallback(self.ouster_oak_callback) + + # TF listener to get transformation from LiDAR to Camera + self.tf_listener = tf.TransformListener() + + if self.debug: self.init_debug() - def state_outputs(self): - return ['agents'] + def init_debug(self,): + # Debug Publishers + self.pub_pedestrians_pc2 = rospy.Publisher("/point_cloud/pedestrians", PointCloud2, queue_size=10) + self.pub_obj_centers_pc2 = rospy.Publisher("/point_cloud/obj_centers", PointCloud2, queue_size=10) + self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) + + def update(self, vehicle : VehicleState) -> Dict[str,AgentState]: + return self.current_agents + + # TODO: Improve Algo Knn, ransac, etc. + def find_centers(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]: + clusters = [np.array(clust) for clust in clusters] + centers = [np.array(()) if clust.size == 0 else np.mean(clust, axis=0) for clust in clusters] + return centers - def initialize(self): + # Beware: AgentState(PhysicalObject) builds bbox from + # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not + # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2] + def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]: + # Add some small constant to height to compensate for + # objects distance to ground we filtered from lidar, + # other heuristics to imrpove stability for find_ funcs ? + clusters = [np.array(clust) for clust in clusters] + dims = [np.array(()) if clust.size == 0 else np.max(clust, axis= 0) - np.min(clust, axis= 0) for clust in clusters] + return dims - """Initializes subscribers and publishers - - self.vehicle_interface.subscribe_sensor -> GEM Car camera subscription - self.rosbag_test -> ROS Bag topic subscription - self.pub_image -> Publishes Image with detection for visualization + # TODO: Slower but cleaner to input self.current_agents dict + # TODO: Moving Average across last N iterations pos/vel? Less spurious vals + # TODO Akul: Fix velocity calculation to calculate in ObjectFrameEnum.START + # work towards own tracking class instead of simple YOLO track? + def find_vels(self, track_ids, obj_centers): + # Object not seen -> velocity = None + track_id_center_map = dict(zip(track_ids, obj_centers)) + vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods. - """ - #tell the vehicle to use image_callback whenever 'front_camera' gets a reading, and it expects images of type cv2.Mat + for prev_track_id, prev_agent in self.prev_agents.items(): + if prev_track_id in track_ids: + # TODO: Add prev_agents to memory to avoid None velocity + # We should only be missing prev pose on first sight of track_id Agent. + # print("shape 1: ", track_id_center_map[prev_agent.track_id]) + # print("shape 2: ", np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) + # prev can be 3 separate Nones, current is just empty array... make this symmetrical + if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3: + vels[prev_track_id] = track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z]) + return vels - # GEM Car subacriber - # self.vehicle_interface.subscribe_sensor('front_camera',self.image_callback,cv2.Mat) - # Webcam - # rospy.Subscriber('/webcam', Image, self.image_callback) + # TODO: Separate debug/viz class, separate this func into bbox and 2d 3d points + def viz_object_states(self, cv_image, boxes, extracted_pts_all): + # Extract 3D pedestrians points in lidar frame + # ** These are camera frame after transform_lidar_points, right? + pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] - # Testing with rosbag - self.rosbag_test = rospy.Subscriber('/oak/rgb/image_raw',Image, self.image_callback,queue_size=1) - if(self.visualization): - self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) - pass - - # Use cv2.Mat for GEM Car, Image for RosBag - def image_callback(self, image : Image): #image : cv2.Mat): + # Object center viz + obj_3d_obj_centers = list() + for track_id, agent in self.current_agents.items(): + if agent.pose.x != None and agent.pose.y != None and agent.pose.z != None: + obj_3d_obj_centers.append((agent.pose.x, agent.pose.y, agent.pose.z)) # ** + if len(obj_3d_obj_centers) > 0: + ros_obj_3d_obj_centers_pc2 = create_point_cloud(obj_3d_obj_centers, color=(0, 128, 0)) + self.pub_obj_centers_pc2.publish(ros_obj_3d_obj_centers_pc2) + + # Extract 2D pedestrians points and bbox in camera frame + extracted_2d_pts = [list(extracted_pts[:, :2].astype(int)) for extracted_pts in extracted_pts_all] + flattened_pedestrians_2d_pts = list() + for pts in extracted_2d_pts: flattened_pedestrians_2d_pts.extend(pts) - """Detects pedestrians using the model provided when new image is passed. + for ind, bbox in enumerate(boxes): + xywh = bbox.xywh[0].tolist() + cv_image = vis_2d_bbox(cv_image, xywh, bbox) + + flattened_pedestrians_3d_pts = list() + for pts in pedestrians_3d_pts: flattened_pedestrians_3d_pts.extend(pts) - Converts Image.msg to cv2 format (Might need to change this to use cv2.Mat) and uses the model to detect pedestrian - IF visualization is true, will publish an image with pedestrians detected. + # Draw projected 2D LiDAR points on the image. + for pt in flattened_pedestrians_2d_pts: + cv2.circle(cv_image, pt, 2, (0, 0, 255), -1) + ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8') + self.pub_image.publish(ros_img) - Hardcoded values for now: - Detected only pedestrians -> Class = 0 - Confidence level -> 0.7 + # Draw 3D pedestrian pointclouds + # Create point cloud from extracted 3D points + ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts) + self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2) - """ - # Use Image directly for GEM Car - # track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence) + def update_object_states(self, track_result, extracted_pts_all) -> None: + self.prev_agents = self.current_agents.copy() + self.current_agents.clear() + + # Change pedestrians_3d_pts to dicts matching track_ids + track_ids = track_result[0].boxes.id.int().cpu().tolist() + num_objs = len(track_ids) + boxes = track_result[0].boxes - # Convert to CV2 format for RosBag - bridge = CvBridge() - image = bridge.imgmsg_to_cv2(image, "bgr8") - track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence) + # Extract 3D pedestrians points in lidar frame + # ** These are camera frame after transform_lidar_points, right? + pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] + if len(pedestrians_3d_pts) != num_objs: + raise Exception('Perception - Camera detections, points clusters num. mismatch') + + # TODO: Slower but cleaner to pass dicts of AgentState + # or at least {track_ids: centers/pts/etc} + # TODO: Combine funcs for efficiency in C. + # Separate numpy prob still faster for now + obj_centers = self.find_centers(pedestrians_3d_pts) + obj_dims = self.find_dims(pedestrians_3d_pts) + obj_vels = self.find_vels(track_ids, obj_centers) - self.last_person_boxes = [] + # Update Current AgentStates + for ind in range(num_objs): + obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind] + obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind] + self.current_agents[track_ids[ind]] = ( + AgentState( + track_id = track_ids[ind], + pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + # (l, w, h) + # TODO: confirm (z -> l, x -> w, y -> h) + dimensions=(obj_dim[2], obj_dim[0],obj_dim[1]), + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(vels[track_ids[ind]]), + yaw_rate=0 + )) + + def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): + # Convert to cv2 image and run detector + cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") + track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence) + + # Convert 1D PointCloud2 data to x, y, z coords + lidar_points = convert_pointcloud2_to_xyz(lidar_pc2_msg) + # print("len lidar_points", len(lidar_points)) + + # Downsample xyz point clouds + downsampled_points = downsample_points(lidar_points) + # print("len downsampled_points", len(downsampled_points)) + + # Transform LiDAR points into the camera coordinate frame. + lidar_in_camera = transform_lidar_points(downsampled_points, self.R, self.t) + # print("len lidar_in_camera", len(lidar_in_camera)) + + # Project the transformed points into the image plane. + projected_pts = project_points(lidar_in_camera, self.K) + # print("len projected_pts", len(projected_pts)) + + # Process bboxes boxes = track_result[0].boxes - # Used for visualization - if(self.visualization): - label_text = "Pedestrian " - font = cv2.FONT_HERSHEY_SIMPLEX - font_scale = 0.5 - font_color = (255, 255, 255) # White text - outline_color = (0, 0, 0) # Black outline - line_type = 1 - text_thickness = 2 # Text thickness - outline_thickness = 1 # Thickness of the text outline - - # Unpacking box dimentions detected into x,y,w,h - for box in boxes: - - xywh = box.xywh[0].tolist() - self.last_person_boxes.append(xywh) + extracted_pts_all = list() + + for ind, bbox in enumerate(boxes): + xywh = bbox.xywh[0].tolist() + + # Extracting projected pts x, y, w, h = xywh - id = box.id.item() - - # Stores AgentState in a dict, can be removed if not required - pose = ObjectPose(t=0,x=x,y=y,z=0,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT) - dims = (w,h,0) - if(id not in self.pedestrians.keys()): - self.pedestrians[id] = AgentState(pose=pose,dimensions=dims,outline=None,type=AgentEnum.PEDESTRIAN,activity=AgentActivityEnum.MOVING,velocity=(0,0,0),yaw_rate=0) - else: - self.pedestrians[id].pose = pose - self.pedestrians[id].dims = dims - - # Used for visualization - if(self.visualization): - # Draw bounding box - cv2.rectangle(image, (int(x - w / 2), int(y - h / 2)), (int(x + w / 2), int(y + h / 2)), (255, 0, 255), 3) - - # Define text label - x = int(x - w / 2) - y = int(y - h / 2) - label = label_text + str(id) + " : " + str(round(box.conf.item(), 2)) - - # Get text size - text_size, baseline = cv2.getTextSize(label, font, font_scale, line_type) - text_w, text_h = text_size - - # Position text above the bounding box - text_x = x - text_y = y - 10 if y - 10 > 10 else y + h + text_h - - # Draw text outline for better visibility, uncomment for outline - # for dx, dy in [(-1, -1), (-1, 1), (1, -1), (1, 1)]: - # cv2.putText(image, label, (text_x + dx, text_y - baseline + dy), font, font_scale, outline_color, outline_thickness) - - # Draw main text on top of the outline - cv2.putText(image, label, (text_x, text_y - baseline), font, font_scale, font_color, text_thickness) + left_bound = int(x - w / 2) + right_bound = int(x + w / 2) + top_bound = int(y - h / 2) + bottom_bound = int(y + h / 2) - - # Used for visualization - if(self.visualization): - ros_img = bridge.cv2_to_imgmsg(image, 'bgr8') - self.pub_image.publish(ros_img) - - #uncomment if you want to debug the detector... - # print(self.last_person_boxes) - # print(self.pedestrians.keys()) - #for bb in self.last_person_boxes: - # x,y,w,h = bb - # cv2.rectangle(image, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), (255, 0, 255), 3) - #cv2.imwrite("pedestrian_detections.png",image) + pts = np.array(projected_pts) + extracted_pts = pts[(pts[:, 0] > left_bound) & + (pts[:, 0] < right_bound) & + (pts[:, 1] > top_bound) & + (pts[:, 1] < bottom_bound) + ] + + # Apply ground and max distance filter to the extracted 5D points + extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold) + extracted_pts = filter_far_points(extracted_pts) + extracted_pts_all.append(extracted_pts) + + self.update_object_states(track_result, extracted_pts_all) + if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all) + + def rate(self): + return 4.0 - def update(self, vehicle : VehicleState) -> Dict[str,AgentState]: - res = {} - for i,b in enumerate(self.last_person_boxes): - x,y,w,h = b - res['pedestrian'+str(i)] = box_to_fake_agent(b) - if len(res) > 0: - print("Detected",len(res),"pedestrians") - return res + def state_inputs(self): + return ['vehicle'] + + def state_outputs(self): + return ['agents'] class FakePedestrianDetector2D(Component): From 19ad11ea327334785ec62d8c4f30bb86b36f8f40 Mon Sep 17 00:00:00 2001 From: lukasdumasius Date: Mon, 17 Feb 2025 02:11:10 -0600 Subject: [PATCH 21/46] Add transform.py instructions --- GEMstack/onboard/perception/fusion.py | 2 +- GEMstack/onboard/perception/pedestrian_detection.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index df83bedba..838e5a0db 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -10,7 +10,7 @@ Terminal 2: --------------- source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash -python3 /home/Admin/transform.py +python3 GEMstack/onboard/perception/transform.py Terminal 3: --------------- diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 1b5a246f9..bc27871ce 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -10,7 +10,7 @@ Terminal 2: --------------- source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash -python3 /home/Admin/transform.py +python3 GEMstack/onboard/perception/transform.py Terminal 3: --------------- From 0a214246a7d5ab07214a5d8508c73e7c3ead6b34 Mon Sep 17 00:00:00 2001 From: lukasdumasius Date: Mon, 17 Feb 2025 03:21:09 -0600 Subject: [PATCH 22/46] pedestrian_detection.py type hints --- GEMstack/onboard/perception/fusion.py | 9 +++--- .../perception/pedestrian_detection.py | 32 +++++++++---------- ...utils.py => pedestrian_detection_utils.py} | 0 3 files changed, 21 insertions(+), 20 deletions(-) rename GEMstack/onboard/perception/{fusion_utils.py => pedestrian_detection_utils.py} (100%) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index 838e5a0db..7d6558433 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -1,5 +1,5 @@ """ -Lidar + Camera fusion and object detection +Top ouster lidar + Oak front camera fusion, object detection """ """ Terminal 1: @@ -46,9 +46,9 @@ class Fusion3D(): # TODO: Pull params into a JSON/yaml - # TODO: Convert lists into np.arrays where possible, vectorize calculations - # TODO: Implement logging instead of print - # TODO: Finish splitting this class + utils into separate classes + # TODO: Convert some lists into np.arrays, vectorize calculations + # TODO: Implement logging instead of print, cleanup comments + # TODO: Cleanup funcs + split into separate classes # TODO: Decide if we want to name dets "peds" or "objs"/"agents" # Maybe peds for now and Agents in agent_detection.py? def __init__(self): @@ -112,6 +112,7 @@ def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]: # TODO: Moving Average across last N iterations pos/vel? Less spurious vals # TODO Akul: Fix velocity calculation to calculate in ObjectFrameEnum.START # work towards own tracking class instead of simple YOLO track? + # ret: Dict[track_id: vel[x, y, z]] def find_vels(self, track_ids, obj_centers): # Object not seen -> velocity = None track_id_center_map = dict(zip(track_ids, obj_centers)) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index bc27871ce..dd67ed065 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -1,5 +1,5 @@ """ -Lidar + Camera fusion and object detection +Top ouster lidar + Oak front camera fusion, object detection """ """ Terminal 1: @@ -41,9 +41,10 @@ from sensor_msgs.msg import Image, PointCloud2 # YOLO from ultralytics import YOLO +from ultralytics.engine.results import Results, Boxes # GEMStack from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum -from .fusion_utils import * +from .pedestrian_detection_utils import * from ..interface.gem import GEMInterface from ..component import Component @@ -60,12 +61,12 @@ def box_to_fake_agent(box): class PedestrianDetector2D(Component): # TODO: Pull params into a JSON/yaml - # TODO: Convert lists into np.arrays where possible, vectorize calculations - # TODO: Implement logging instead of print - # TODO: Finish splitting this class + utils into separate classes + # TODO: Convert some lists into np.arrays, vectorize calculations + # TODO: Implement logging instead of print, cleanup comments + # TODO: Cleanup funcs + split into separate classes # TODO: Decide if we want to name dets "peds" or "objs"/"agents" # Maybe peds for now and Agents in agent_detection.py? - def __init__(self, vehicle_interface : GEMInterface): + def __init__(self, vehicle_interface : GEMInterface) -> None: self.vehicle_interface = vehicle_interface # Publish debug/viz rostopics if true self.debug = True @@ -97,7 +98,7 @@ def __init__(self, vehicle_interface : GEMInterface): if self.debug: self.init_debug() - def init_debug(self,): + def init_debug(self,) -> None: # Debug Publishers self.pub_pedestrians_pc2 = rospy.Publisher("/point_cloud/pedestrians", PointCloud2, queue_size=10) self.pub_obj_centers_pc2 = rospy.Publisher("/point_cloud/obj_centers", PointCloud2, queue_size=10) @@ -126,8 +127,9 @@ def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]: # TODO: Slower but cleaner to input self.current_agents dict # TODO: Moving Average across last N iterations pos/vel? Less spurious vals # TODO Akul: Fix velocity calculation to calculate in ObjectFrameEnum.START - # work towards own tracking class instead of simple YOLO track? - def find_vels(self, track_ids, obj_centers): + # Work towards own tracking class instead of simple YOLO track? + # ret: Dict[track_id: vel[x, y, z]] + def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict[int, np.ndarray]: # Object not seen -> velocity = None track_id_center_map = dict(zip(track_ids, obj_centers)) vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods. @@ -144,8 +146,8 @@ def find_vels(self, track_ids, obj_centers): return vels - # TODO: Separate debug/viz class, separate this func into bbox and 2d 3d points - def viz_object_states(self, cv_image, boxes, extracted_pts_all): + # TODO: Separate debug/viz class, bbox and 2d 3d points funcs + def viz_object_states(self, cv_image: cv2.typing.MatLike, boxes: Boxes, extracted_pts_all: List[np.ndarray]) -> None: # Extract 3D pedestrians points in lidar frame # ** These are camera frame after transform_lidar_points, right? pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] @@ -183,7 +185,7 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all): self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2) - def update_object_states(self, track_result, extracted_pts_all) -> None: + def update_object_states(self, track_result: List[Results], extracted_pts_all: List[np.ndarray]) -> None: self.prev_agents = self.current_agents.copy() self.current_agents.clear() @@ -224,7 +226,7 @@ def update_object_states(self, track_result, extracted_pts_all) -> None: yaw_rate=0 )) - def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): + def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2) -> None: # Convert to cv2 image and run detector cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence) @@ -247,9 +249,7 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): # Process bboxes boxes = track_result[0].boxes - extracted_pts_all = list() - for ind, bbox in enumerate(boxes): xywh = bbox.xywh[0].tolist() @@ -271,7 +271,7 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold) extracted_pts = filter_far_points(extracted_pts) extracted_pts_all.append(extracted_pts) - + self.update_object_states(track_result, extracted_pts_all) if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all) diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/pedestrian_detection_utils.py similarity index 100% rename from GEMstack/onboard/perception/fusion_utils.py rename to GEMstack/onboard/perception/pedestrian_detection_utils.py From c7246ed4ace2f27eef010df7d942c0852aca36f6 Mon Sep 17 00:00:00 2001 From: lukasdumasius Date: Mon, 17 Feb 2025 09:54:18 -0600 Subject: [PATCH 23/46] pedestrian_detection.py add time for velocity --- GEMstack/onboard/perception/fusion.py | 2 +- GEMstack/onboard/perception/pedestrian_detection.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py index 7d6558433..b642a0625 100644 --- a/GEMstack/onboard/perception/fusion.py +++ b/GEMstack/onboard/perception/fusion.py @@ -42,7 +42,7 @@ # GEMStack from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum from ultralytics import YOLO -from .fusion_utils import * +from .pedestrian_detection_utils import * class Fusion3D(): # TODO: Pull params into a JSON/yaml diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index dd67ed065..1f8cfe5bc 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -77,12 +77,16 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: # track_id: AgentState self.prev_agents = dict() self.current_agents = dict() + # TODO Implement time + self.prev_time = 0 + self.curr_time = 1 # Avoid divide by 0 for placebolder, 0 self.confidence = 0.7 self.classes_to_detect = 0 self.ground_threshold = 1.6 self.max_dist_percent = 0.7 # Load calibration data + # TODO: Maybe lets add one word or link what R t K are? self.R = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/R.npy') self.t = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/t.npy') self.K = load_intrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/camera_intrinsics.json') @@ -128,6 +132,7 @@ def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]: # TODO: Moving Average across last N iterations pos/vel? Less spurious vals # TODO Akul: Fix velocity calculation to calculate in ObjectFrameEnum.START # Work towards own tracking class instead of simple YOLO track? + # Fix division by time # ret: Dict[track_id: vel[x, y, z]] def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict[int, np.ndarray]: # Object not seen -> velocity = None @@ -142,7 +147,7 @@ def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict # print("shape 2: ", np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) # prev can be 3 separate Nones, current is just empty array... make this symmetrical if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3: - vels[prev_track_id] = track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z]) + vels[prev_track_id] = (track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) / (self.curr_time - self.prev_time) return vels From b62c6fc60e6e6a1b8d9ab782e14b097b0a06ce27 Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Mon, 17 Feb 2025 15:15:55 -0600 Subject: [PATCH 24/46] integrate updated depth filter and 3D bboxes vis --- GEMstack/onboard/perception/fusion.py | 270 ------------------ .../perception/pedestrian_detection.py | 88 ++++-- .../perception/pedestrian_detection_utils.py | 82 +++++- 3 files changed, 135 insertions(+), 305 deletions(-) delete mode 100644 GEMstack/onboard/perception/fusion.py diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py deleted file mode 100644 index b642a0625..000000000 --- a/GEMstack/onboard/perception/fusion.py +++ /dev/null @@ -1,270 +0,0 @@ -""" -Top ouster lidar + Oak front camera fusion, object detection -""" -""" -Terminal 1: ---------------- -source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash -roscore - -Terminal 2: ---------------- -source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash -python3 GEMstack/onboard/perception/transform.py - -Terminal 3: ---------------- -source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash -rosbag play -l ~/rosbags/vehicle.bag - -Terminal 4: ---------------- -source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash -cd GEMStack -python3 -m GEMstack.onboard.perception.fusion - -Terminal 5: ---------------- -source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash -rviz -""" - -# Python -import os -from typing import List, Dict -from collections import defaultdict -# ROS, CV -import rospy -import message_filters -import tf -from cv_bridge import CvBridge -from sensor_msgs.msg import Image, PointCloud2 -# GEMStack -from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum -from ultralytics import YOLO -from .pedestrian_detection_utils import * - -class Fusion3D(): - # TODO: Pull params into a JSON/yaml - # TODO: Convert some lists into np.arrays, vectorize calculations - # TODO: Implement logging instead of print, cleanup comments - # TODO: Cleanup funcs + split into separate classes - # TODO: Decide if we want to name dets "peds" or "objs"/"agents" - # Maybe peds for now and Agents in agent_detection.py? - def __init__(self): - # Publish debug/viz rostopics if true - self.debug = True - # Setup variables - self.bridge = CvBridge() - # TODO: Wrap detector into GEMDetector? - self.detector = YOLO(os.getcwd()+'/GEMstack/knowledge/detection/yolov8n.pt') - # track_id: AgentState - self.prev_agents = dict() - self.current_agents = dict() - self.confidence = 0.7 - self.classes_to_detect = 0 - self.ground_threshold = 1.6 - self.max_dist_percent = 0.7 - - # Load calibration data - self.R = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/R.npy') - self.t = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/t.npy') - self.K = load_intrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/camera_intrinsics.json') - - # Subscribers and sychronizers - self.rgb_rosbag = message_filters.Subscriber('/oak/rgb/image_raw', Image) - self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2) - self.sync = message_filters.ApproximateTimeSynchronizer([self.rgb_rosbag, self.top_lidar_rosbag], queue_size=10, slop=0.1) - self.sync.registerCallback(self.ouster_oak_callback) - - # TF listener to get transformation from LiDAR to Camera - self.tf_listener = tf.TransformListener() - - if self.debug: self.init_debug() - - def init_debug(self,): - # Debug Publishers - self.pub_pedestrians_pc2 = rospy.Publisher("/point_cloud/pedestrians", PointCloud2, queue_size=10) - self.pub_obj_centers_pc2 = rospy.Publisher("/point_cloud/obj_centers", PointCloud2, queue_size=10) - self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) - - def update(self, vehicle : VehicleState) -> Dict[str,AgentState]: - return self.current_agents - - # TODO: Improve Algo Knn, ransac, etc. - def find_centers(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]: - clusters = [np.array(clust) for clust in clusters] - centers = [np.array(()) if clust.size == 0 else np.mean(clust, axis=0) for clust in clusters] - return centers - - # Beware: AgentState(PhysicalObject) builds bbox from - # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not - # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2] - def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]: - # Add some small constant to height to compensate for - # objects distance to ground we filtered from lidar, - # other heuristics to imrpove stability for find_ funcs ? - clusters = [np.array(clust) for clust in clusters] - dims = [np.array(()) if clust.size == 0 else np.max(clust, axis= 0) - np.min(clust, axis= 0) for clust in clusters] - return dims - - # TODO: Slower but cleaner to input self.current_agents dict - # TODO: Moving Average across last N iterations pos/vel? Less spurious vals - # TODO Akul: Fix velocity calculation to calculate in ObjectFrameEnum.START - # work towards own tracking class instead of simple YOLO track? - # ret: Dict[track_id: vel[x, y, z]] - def find_vels(self, track_ids, obj_centers): - # Object not seen -> velocity = None - track_id_center_map = dict(zip(track_ids, obj_centers)) - vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods. - - for prev_track_id, prev_agent in self.prev_agents.items(): - if prev_track_id in track_ids: - # TODO: Add prev_agents to memory to avoid None velocity - # We should only be missing prev pose on first sight of track_id Agent. - # print("shape 1: ", track_id_center_map[prev_agent.track_id]) - # print("shape 2: ", np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) - # prev can be 3 separate Nones, current is just empty array... make this symmetrical - if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3: - vels[prev_track_id] = track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z]) - return vels - - - # TODO: Separate debug/viz class, separate this func into bbox and 2d 3d points - def viz_object_states(self, cv_image, boxes, extracted_pts_all): - # Extract 3D pedestrians points in lidar frame - # ** These are camera frame after transform_lidar_points, right? - pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] - - # Object center viz - obj_3d_obj_centers = list() - for track_id, agent in self.current_agents.items(): - if agent.pose.x != None and agent.pose.y != None and agent.pose.z != None: - obj_3d_obj_centers.append((agent.pose.x, agent.pose.y, agent.pose.z)) # ** - if len(obj_3d_obj_centers) > 0: - ros_obj_3d_obj_centers_pc2 = create_point_cloud(obj_3d_obj_centers, color=(0, 128, 0)) - self.pub_obj_centers_pc2.publish(ros_obj_3d_obj_centers_pc2) - - # Extract 2D pedestrians points and bbox in camera frame - extracted_2d_pts = [list(extracted_pts[:, :2].astype(int)) for extracted_pts in extracted_pts_all] - flattened_pedestrians_2d_pts = list() - for pts in extracted_2d_pts: flattened_pedestrians_2d_pts.extend(pts) - - for ind, bbox in enumerate(boxes): - xywh = bbox.xywh[0].tolist() - cv_image = vis_2d_bbox(cv_image, xywh, bbox) - - flattened_pedestrians_3d_pts = list() - for pts in pedestrians_3d_pts: flattened_pedestrians_3d_pts.extend(pts) - - # Draw projected 2D LiDAR points on the image. - for pt in flattened_pedestrians_2d_pts: - cv2.circle(cv_image, pt, 2, (0, 0, 255), -1) - ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8') - self.pub_image.publish(ros_img) - - # Draw 3D pedestrian pointclouds - # Create point cloud from extracted 3D points - ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts) - self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2) - - - def update_object_states(self, track_result, extracted_pts_all) -> None: - self.prev_agents = self.current_agents.copy() - self.current_agents.clear() - - # Change pedestrians_3d_pts to dicts matching track_ids - track_ids = track_result[0].boxes.id.int().cpu().tolist() - num_objs = len(track_ids) - boxes = track_result[0].boxes - - # Extract 3D pedestrians points in lidar frame - # ** These are camera frame after transform_lidar_points, right? - pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] - if len(pedestrians_3d_pts) != num_objs: - raise Exception('Perception - Camera detections, points clusters num. mismatch') - - # TODO: Slower but cleaner to pass dicts of AgentState - # or at least {track_ids: centers/pts/etc} - # TODO: Combine funcs for efficiency in C. - # Separate numpy prob still faster for now - obj_centers = self.find_centers(pedestrians_3d_pts) - obj_dims = self.find_dims(pedestrians_3d_pts) - obj_vels = self.find_vels(track_ids, obj_centers) - - # Update Current AgentStates - for ind in range(num_objs): - obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind] - obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind] - self.current_agents[track_ids[ind]] = ( - AgentState( - track_id = track_ids[ind], - pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), - # (l, w, h) - # TODO: confirm (z -> l, x -> w, y -> h) - dimensions=(obj_dim[2], obj_dim[0],obj_dim[1]), - outline=None, - type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.MOVING, - velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(vels[track_ids[ind]]), - yaw_rate=0 - )) - - - def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): - # Convert to cv2 image and run detector - cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") - track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence) - - # Convert 1D PointCloud2 data to x, y, z coords - lidar_points = convert_pointcloud2_to_xyz(lidar_pc2_msg) - # print("len lidar_points", len(lidar_points)) - - # Downsample xyz point clouds - downsampled_points = downsample_points(lidar_points) - # print("len downsampled_points", len(downsampled_points)) - - # Transform LiDAR points into the camera coordinate frame. - lidar_in_camera = transform_lidar_points(downsampled_points, self.R, self.t) - # print("len lidar_in_camera", len(lidar_in_camera)) - - # Project the transformed points into the image plane. - projected_pts = project_points(lidar_in_camera, self.K) - # print("len projected_pts", len(projected_pts)) - - # Process bboxes - boxes = track_result[0].boxes - - extracted_pts_all = list() - - for ind, bbox in enumerate(boxes): - xywh = bbox.xywh[0].tolist() - - # Extracting projected pts - x, y, w, h = xywh - left_bound = int(x - w / 2) - right_bound = int(x + w / 2) - top_bound = int(y - h / 2) - bottom_bound = int(y + h / 2) - - pts = np.array(projected_pts) - extracted_pts = pts[(pts[:, 0] > left_bound) & - (pts[:, 0] < right_bound) & - (pts[:, 1] > top_bound) & - (pts[:, 1] < bottom_bound) - ] - - # Apply ground and max distance filter to the extracted 5D points - extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold) - extracted_pts = filter_far_points(extracted_pts) - extracted_pts_all.append(extracted_pts) - - self.update_object_states(track_result, extracted_pts_all) - if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all) - - -if __name__ == '__main__': - rospy.init_node('fusion_node', anonymous=True) - Fusion3D() - while not rospy.core.is_shutdown(): - rospy.rostime.wallsleep(0.5) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 1f8cfe5bc..bd43494db 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -39,6 +39,7 @@ import tf from cv_bridge import CvBridge from sensor_msgs.msg import Image, PointCloud2 +from visualization_msgs.msg import MarkerArray # YOLO from ultralytics import YOLO from ultralytics.engine.results import Results, Boxes @@ -83,7 +84,7 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: self.confidence = 0.7 self.classes_to_detect = 0 self.ground_threshold = 1.6 - self.max_dist_percent = 0.7 + self.max_human_depth = 0.9 # Load calibration data # TODO: Maybe lets add one word or link what R t K are? @@ -106,6 +107,7 @@ def init_debug(self,) -> None: # Debug Publishers self.pub_pedestrians_pc2 = rospy.Publisher("/point_cloud/pedestrians", PointCloud2, queue_size=10) self.pub_obj_centers_pc2 = rospy.Publisher("/point_cloud/obj_centers", PointCloud2, queue_size=10) + self.pub_bboxes_markers = rospy.Publisher("/markers/bboxes", MarkerArray, queue_size=10) self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) def update(self, vehicle : VehicleState) -> Dict[str,AgentState]: @@ -152,19 +154,19 @@ def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict # TODO: Separate debug/viz class, bbox and 2d 3d points funcs - def viz_object_states(self, cv_image: cv2.typing.MatLike, boxes: Boxes, extracted_pts_all: List[np.ndarray]) -> None: + def viz_object_states(self, cv_image, boxes, extracted_pts_all): # Extract 3D pedestrians points in lidar frame # ** These are camera frame after transform_lidar_points, right? pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] # Object center viz obj_3d_obj_centers = list() + obj_3d_obj_dims = list() for track_id, agent in self.current_agents.items(): if agent.pose.x != None and agent.pose.y != None and agent.pose.z != None: obj_3d_obj_centers.append((agent.pose.x, agent.pose.y, agent.pose.z)) # ** - if len(obj_3d_obj_centers) > 0: - ros_obj_3d_obj_centers_pc2 = create_point_cloud(obj_3d_obj_centers, color=(0, 128, 0)) - self.pub_obj_centers_pc2.publish(ros_obj_3d_obj_centers_pc2) + if agent.dimensions != None and agent.dimensions[0] != None and agent.dimensions[1] != None and agent.dimensions[2] != None: + obj_3d_obj_dims.append(agent.dimensions) # Extract 2D pedestrians points and bbox in camera frame extracted_2d_pts = [list(extracted_pts[:, :2].astype(int)) for extracted_pts in extracted_pts_all] @@ -178,21 +180,39 @@ def viz_object_states(self, cv_image: cv2.typing.MatLike, boxes: Boxes, extracte flattened_pedestrians_3d_pts = list() for pts in pedestrians_3d_pts: flattened_pedestrians_3d_pts.extend(pts) - # Draw projected 2D LiDAR points on the image. - for pt in flattened_pedestrians_2d_pts: - cv2.circle(cv_image, pt, 2, (0, 0, 255), -1) - ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8') - self.pub_image.publish(ros_img) + if len(flattened_pedestrians_3d_pts) > 0: + # Draw projected 2D LiDAR points on the image. + for pt in flattened_pedestrians_2d_pts: + cv2.circle(cv_image, pt, 2, (0, 0, 255), -1) + ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8') + self.pub_image.publish(ros_img) + + # Draw 3D pedestrian pointclouds + # Create point cloud from extracted 3D points + ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts) + self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2) + + # Draw 3D pedestrian centers and dimensions + if len(obj_3d_obj_centers) > 0 and len(obj_3d_obj_dims) > 0: + # Draw 3D pedestrian center pointclouds + ros_obj_3d_obj_centers_pc2 = create_point_cloud(obj_3d_obj_centers, color=(0, 128, 0)) + self.pub_obj_centers_pc2.publish(ros_obj_3d_obj_centers_pc2) - # Draw 3D pedestrian pointclouds - # Create point cloud from extracted 3D points - ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts) - self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2) + # Draw 3D pedestrian bboxes markers + # Create bbox marker from pedestrain dimensions + ros_delete_bboxes_markers = delete_bbox_marker() + self.pub_bboxes_markers.publish(ros_delete_bboxes_markers) + ros_pedestrians_bboxes_markers = create_bbox_marker(obj_3d_obj_centers, obj_3d_obj_dims) + self.pub_bboxes_markers.publish(ros_pedestrians_bboxes_markers) def update_object_states(self, track_result: List[Results], extracted_pts_all: List[np.ndarray]) -> None: self.prev_agents = self.current_agents.copy() - self.current_agents.clear() + self.current_agents.clear() + + # Return if no track results available + if track_result[0].boxes.id == None: + return # Change pedestrians_3d_pts to dicts matching track_ids track_ids = track_result[0].boxes.id.int().cpu().tolist() @@ -223,15 +243,15 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), # (l, w, h) # TODO: confirm (z -> l, x -> w, y -> h) - dimensions=(obj_dim[2], obj_dim[0],obj_dim[1]), + dimensions=(obj_dim[0], obj_dim[1], obj_dim[2]), outline=None, type=AgentEnum.PEDESTRIAN, activity=AgentActivityEnum.MOVING, - velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(vels[track_ids[ind]]), + velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(obj_vels[track_ids[ind]]), yaw_rate=0 )) - def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2) -> None: + def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): # Convert to cv2 image and run detector cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence) @@ -254,7 +274,9 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2) # Process bboxes boxes = track_result[0].boxes + extracted_pts_all = list() + for ind, bbox in enumerate(boxes): xywh = bbox.xywh[0].tolist() @@ -266,19 +288,23 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2) bottom_bound = int(y + h / 2) pts = np.array(projected_pts) - extracted_pts = pts[(pts[:, 0] > left_bound) & - (pts[:, 0] < right_bound) & - (pts[:, 1] > top_bound) & - (pts[:, 1] < bottom_bound) - ] - - # Apply ground and max distance filter to the extracted 5D points - extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold) - extracted_pts = filter_far_points(extracted_pts) - extracted_pts_all.append(extracted_pts) - - self.update_object_states(track_result, extracted_pts_all) - if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all) + # Checking projected_pts length is very important + if len(projected_pts) > 0: + extracted_pts = pts[(pts[:, 0] > left_bound) & + (pts[:, 0] < right_bound) & + (pts[:, 1] > top_bound) & + (pts[:, 1] < bottom_bound) + ] + + # Apply ground and max distance filter to the extracted 5D points + extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold) + extracted_pts = filter_depth_points(extracted_pts) + extracted_pts_all.append(extracted_pts) + + if len(extracted_pts_all) > 0 and len(track_result) > 0: + self.update_object_states(track_result, extracted_pts_all) + if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all) + def rate(self): return 4.0 diff --git a/GEMstack/onboard/perception/pedestrian_detection_utils.py b/GEMstack/onboard/perception/pedestrian_detection_utils.py index 4163d7b2c..c74603232 100644 --- a/GEMstack/onboard/perception/pedestrian_detection_utils.py +++ b/GEMstack/onboard/perception/pedestrian_detection_utils.py @@ -1,4 +1,5 @@ from sensor_msgs.msg import PointCloud2, PointField +from visualization_msgs.msg import Marker, MarkerArray import numpy as np import sensor_msgs.point_cloud2 as pc2 import open3d as o3d @@ -35,11 +36,15 @@ def filter_ground_points(lidar_points, ground_threshold = 0): return filtered_array -def filter_far_points(lidar_points, max_dist_percent=0.85): - """ Filter points beyond a percentage threshold of max distance in a point cluster """ +def filter_depth_points(lidar_points, max_human_depth=0.9): + """ Filter points beyond a max possible human depth in a point cluster """ if lidar_points.shape[0] == 0: return lidar_points - max_dist = np.max(lidar_points[:, 4]) - filtered_array = lidar_points[lidar_points[:, 4] < max_dist_percent * max_dist] + lidar_points_dist = lidar_points[:, 4] + min_dist = np.min(lidar_points_dist) + max_dist = np.max(lidar_points_dist) + max_possible_dist = min_dist + max_human_depth + actual_dist = min(max_dist, max_possible_dist) + filtered_array = lidar_points[lidar_points_dist < actual_dist] return filtered_array @@ -167,4 +172,73 @@ def create_point_cloud(points, color=(255, 0, 0)): return pc2.create_cloud(header, fields, point_cloud_data) + +def create_bbox_marker(centroids, dimensions): + """ + Create 3D bbox markers from centroids and dimensions + """ + marker_array = MarkerArray() + + for i, (centroid, dimension) in enumerate(zip(centroids, dimensions)): + # Skip if no centroid or dimension + if (centroid == None) or (dimension == None): + continue + + marker = Marker() + marker.header.frame_id = "map" # Reference frame + marker.header.stamp = rospy.Time.now() + marker.ns = "bounding_boxes" + marker.id = i # Unique ID for each marker + marker.type = Marker.CUBE # Cube for bounding box + marker.action = Marker.ADD + + # Position (center of the bounding box) + c_x, c_y, c_z = centroid + if (not isinstance(c_x, float)) or (not isinstance(c_y, float)) or (not isinstance(c_z, float)): + continue + + marker.pose.position.x = c_x + marker.pose.position.y = c_y + marker.pose.position.z = c_z + + # Orientation (default, no rotation) + marker.pose.orientation.x = 0.0 + marker.pose.orientation.y = 0.0 + marker.pose.orientation.z = 0.0 + marker.pose.orientation.w = 1.0 + + # Bounding box dimensions + d_x, d_y, d_z = dimension + if (not isinstance(d_x, float)) or (not isinstance(d_y, float)) or (not isinstance(d_z, float)): + continue + + marker.scale.x = d_x + marker.scale.y = d_y + marker.scale.z = d_z + + # Random colors for each bounding box + marker.color.r = 0.0 # Varying colors + marker.color.g = 1.0 + marker.color.b = 1.5 + marker.color.a = 0.2 # Transparency + + marker.lifetime = rospy.Duration() # Persistent + marker_array.markers.append(marker) + return marker_array + + +def delete_bbox_marker(): + """ + Delete 3D bbox markers given ID ranges + """ + marker_array = MarkerArray() + for i in range(6): + marker = Marker() + marker.ns = "bounding_boxes" + marker.id = i + marker.action = Marker.DELETE + marker_array.markers.append(marker) + return marker_array + + \ No newline at end of file From 6cd1d4fdda418d6dd27d7f3c48583fd13f220bfb Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Mon, 17 Feb 2025 18:10:36 -0600 Subject: [PATCH 25/46] apply lidar to vehicle transform to centers --- .../perception/pedestrian_detection.py | 30 +++++++++++++++---- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index bd43494db..947bd125a 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -88,10 +88,15 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: # Load calibration data # TODO: Maybe lets add one word or link what R t K are? - self.R = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/R.npy') - self.t = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/t.npy') + self.R_lidar_to_oak = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/R.npy') + self.t_lidar_to_oak = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/t.npy') self.K = load_intrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/camera_intrinsics.json') + self.R_lidar_to_vehicle = np.array([[0.9995121, 0.02132941, 0.02281911], + [-0.02124771, 0.99976695, -0.00381707], + [-0.02289521, 0.00333035, 0.9997323]]) + self.t_lidar_to_vehicle = np.array([[0.0], [0.0], [0.35]]) + # Subscribers and sychronizers self.rgb_rosbag = message_filters.Subscriber('/oak/rgb/image_raw', Image) self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2) @@ -179,7 +184,7 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all): flattened_pedestrians_3d_pts = list() for pts in pedestrians_3d_pts: flattened_pedestrians_3d_pts.extend(pts) - + if len(flattened_pedestrians_3d_pts) > 0: # Draw projected 2D LiDAR points on the image. for pt in flattened_pedestrians_2d_pts: @@ -188,8 +193,10 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all): self.pub_image.publish(ros_img) # Draw 3D pedestrian pointclouds + # Tranform 3D pedestrians points to vehicle frame for better visualization. Turn off for performance + flattened_pedestrians_3d_pts_vehicle = transform_lidar_points(np.array(flattened_pedestrians_3d_pts), self.R_lidar_to_vehicle, self.t_lidar_to_vehicle) # Create point cloud from extracted 3D points - ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts) + ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts_vehicle) self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2) # Draw 3D pedestrian centers and dimensions @@ -233,9 +240,20 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L obj_dims = self.find_dims(pedestrians_3d_pts) obj_vels = self.find_vels(track_ids, obj_centers) + # Transform centers from top_lidar frame to vehicle frame + # Need to transform the center point one by one since matrix op can't deal with empty points + obj_centers_vehicle = [] + for obj_center in obj_centers: + if len(obj_center) > 0: + obj_center = np.array([obj_center]) + obj_center_vehicle = transform_lidar_points(obj_center, self.R_lidar_to_vehicle, self.t_lidar_to_vehicle)[0] + obj_centers_vehicle.append(obj_center_vehicle) + else: + obj_centers_vehicle.append(np.array(())) + # Update Current AgentStates for ind in range(num_objs): - obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind] + obj_center = (None, None, None) if obj_centers_vehicle[ind].size == 0 else obj_centers_vehicle[ind] obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind] self.current_agents[track_ids[ind]] = ( AgentState( @@ -265,7 +283,7 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): # print("len downsampled_points", len(downsampled_points)) # Transform LiDAR points into the camera coordinate frame. - lidar_in_camera = transform_lidar_points(downsampled_points, self.R, self.t) + lidar_in_camera = transform_lidar_points(downsampled_points, self.R_lidar_to_oak, self.t_lidar_to_oak) # print("len lidar_in_camera", len(lidar_in_camera)) # Project the transformed points into the image plane. From 7ad05fd7adbcfb44068baabe9c83ec48d68cca2e Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Mon, 17 Feb 2025 21:46:59 -0600 Subject: [PATCH 26/46] Check len(0) ouster points pedestrian_detection.py --- GEMstack/onboard/perception/pedestrian_detection.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 947bd125a..54c7ce0f5 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -318,10 +318,11 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold) extracted_pts = filter_depth_points(extracted_pts) extracted_pts_all.append(extracted_pts) + else: extracted_pts_all.append(np.array(()) - if len(extracted_pts_all) > 0 and len(track_result) > 0: - self.update_object_states(track_result, extracted_pts_all) - if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all) + #if len(extracted_pts_all) > 0 and len(track_result) > 0: + self.update_object_states(track_result, extracted_pts_all) + if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all) def rate(self): From 68d0a75c714f7db8eea85691d79c113214903a3b Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Mon, 17 Feb 2025 22:03:36 -0600 Subject: [PATCH 27/46] fix lidar frame mapping --- .../perception/pedestrian_detection.py | 13 ++++++------ .../perception/pedestrian_detection_utils.py | 20 +++++++++---------- GEMstack/onboard/perception/transform.py | 6 ++++-- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 947bd125a..6170b5786 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -83,7 +83,7 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: self.curr_time = 1 # Avoid divide by 0 for placebolder, 0 self.confidence = 0.7 self.classes_to_detect = 0 - self.ground_threshold = 1.6 + self.ground_threshold = -2.0 self.max_human_depth = 0.9 # Load calibration data @@ -162,7 +162,7 @@ def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict def viz_object_states(self, cv_image, boxes, extracted_pts_all): # Extract 3D pedestrians points in lidar frame # ** These are camera frame after transform_lidar_points, right? - pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] + pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] # Object center viz obj_3d_obj_centers = list() @@ -196,7 +196,8 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all): # Tranform 3D pedestrians points to vehicle frame for better visualization. Turn off for performance flattened_pedestrians_3d_pts_vehicle = transform_lidar_points(np.array(flattened_pedestrians_3d_pts), self.R_lidar_to_vehicle, self.t_lidar_to_vehicle) # Create point cloud from extracted 3D points - ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts_vehicle) + # ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts_vehicle) + ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts) self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2) # Draw 3D pedestrian centers and dimensions @@ -287,8 +288,8 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): # print("len lidar_in_camera", len(lidar_in_camera)) # Project the transformed points into the image plane. - projected_pts = project_points(lidar_in_camera, self.K) - # print("len projected_pts", len(projected_pts)) + projected_pts = project_points(lidar_in_camera, self.K, downsampled_points) + # print("projected_pts", len(projected_pts)) # Process bboxes boxes = track_result[0].boxes @@ -316,7 +317,7 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): # Apply ground and max distance filter to the extracted 5D points extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold) - extracted_pts = filter_depth_points(extracted_pts) + extracted_pts = filter_depth_points(extracted_pts, self.max_human_depth) extracted_pts_all.append(extracted_pts) if len(extracted_pts_all) > 0 and len(track_result) > 0: diff --git a/GEMstack/onboard/perception/pedestrian_detection_utils.py b/GEMstack/onboard/perception/pedestrian_detection_utils.py index c74603232..788ee2e54 100644 --- a/GEMstack/onboard/perception/pedestrian_detection_utils.py +++ b/GEMstack/onboard/perception/pedestrian_detection_utils.py @@ -32,14 +32,14 @@ def downsample_points(lidar_points): def filter_ground_points(lidar_points, ground_threshold = 0): """ Filter points given an elevation of ground threshold """ - filtered_array = lidar_points[lidar_points[:, 3] < ground_threshold] + filtered_array = lidar_points[lidar_points[:, 4] > ground_threshold] return filtered_array def filter_depth_points(lidar_points, max_human_depth=0.9): """ Filter points beyond a max possible human depth in a point cluster """ if lidar_points.shape[0] == 0: return lidar_points - lidar_points_dist = lidar_points[:, 4] + lidar_points_dist = lidar_points[:, 2] min_dist = np.min(lidar_points_dist) max_dist = np.max(lidar_points_dist) max_possible_dist = min_dist + max_human_depth @@ -97,19 +97,19 @@ def transform_lidar_points(lidar_points, R, t): return P_cam -def project_points(points_3d, K): +def project_points(points_3d, K, lidar_points): """ Project 3D points (in the camera frame) into 2D image coordinates using the camera matrix K. Only projects points with z > 0. """ proj_points = [] - for pt in points_3d: + for pt, l_pt in zip(points_3d, lidar_points): if pt[2] > 0: # only project points in front of the camera u = K[0, 0] * (pt[0] / pt[2]) + K[0, 2] v = K[1, 1] * (pt[1] / pt[2]) + K[1, 2] - # 5D points that stores original lidar points - proj_points.append((int(u), int(v), pt[0], pt[1], pt[2])) - return proj_points + # 5D data point + proj_points.append((int(u), int(v), l_pt[0], l_pt[1], l_pt[2])) + return np.array(proj_points) def vis_2d_bbox(image, xywh, box): @@ -155,7 +155,7 @@ def create_point_cloud(points, color=(255, 0, 0)): """ header = rospy.Header() header.stamp = rospy.Time.now() - header.frame_id = "map" # Change to your TF frame + header.frame_id = "os_sensor" # Change to your TF frame fields = [ PointField(name="x", offset=0, datatype=PointField.FLOAT32, count=1), @@ -185,7 +185,7 @@ def create_bbox_marker(centroids, dimensions): continue marker = Marker() - marker.header.frame_id = "map" # Reference frame + marker.header.frame_id = "os_sensor" # Reference frame marker.header.stamp = rospy.Time.now() marker.ns = "bounding_boxes" marker.id = i # Unique ID for each marker @@ -241,4 +241,4 @@ def delete_bbox_marker(): return marker_array - \ No newline at end of file + diff --git a/GEMstack/onboard/perception/transform.py b/GEMstack/onboard/perception/transform.py index 6e921f56c..b876cc595 100644 --- a/GEMstack/onboard/perception/transform.py +++ b/GEMstack/onboard/perception/transform.py @@ -9,8 +9,10 @@ def publish_tf(): while not rospy.is_shutdown(): br.sendTransform( - (0, 1.6, 7), # (x, y, z) translation - tf.transformations.quaternion_from_euler(0.5* np.pi, 0, 0), # (roll, pitch, yaw) + # (0, 1.6, 7), # (x, y, z) translation + # tf.transformations.quaternion_from_euler(0.5* np.pi, 0, 0), # (roll, pitch, yaw) + (0, 0, 0), # (x, y, z) translation + tf.transformations.quaternion_from_euler(0, 0, 0), # (roll, pitch, yaw) rospy.Time.now(), "os_sensor", # Child frame (sensor) "map" # Parent frame (world) From 0857f0c92e0bcc488a7bb5a82fcbb3920c43565a Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Mon, 17 Feb 2025 22:05:01 -0600 Subject: [PATCH 28/46] reslove conflicts --- GEMstack/onboard/perception/pedestrian_detection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 9da4a4c81..31085ce8f 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -319,7 +319,7 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold) extracted_pts = filter_depth_points(extracted_pts, self.max_human_depth) extracted_pts_all.append(extracted_pts) - else: extracted_pts_all.append(np.array(()) + else: extracted_pts_all.append(np.array(())) #if len(extracted_pts_all) > 0 and len(track_result) > 0: self.update_object_states(track_result, extracted_pts_all) From fcca8c01f59a224bb0ea50cc4ccbff8082d360d5 Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Mon, 17 Feb 2025 22:42:55 -0600 Subject: [PATCH 29/46] toggle vehicle frame --- .../perception/pedestrian_detection.py | 34 +++++++++++-------- GEMstack/onboard/perception/transform.py | 2 -- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 31085ce8f..2b6626ce9 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -85,6 +85,7 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: self.classes_to_detect = 0 self.ground_threshold = -2.0 self.max_human_depth = 0.9 + self.vehicle_frame = False # Indicate whether pedestrians centroids and point clouds are in the vehicle frame # Load calibration data # TODO: Maybe lets add one word or link what R t K are? @@ -193,10 +194,12 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all): self.pub_image.publish(ros_img) # Draw 3D pedestrian pointclouds - # Tranform 3D pedestrians points to vehicle frame for better visualization. Turn off for performance - flattened_pedestrians_3d_pts_vehicle = transform_lidar_points(np.array(flattened_pedestrians_3d_pts), self.R_lidar_to_vehicle, self.t_lidar_to_vehicle) + if self.vehicle_frame: + # If in vehicle frame, tranform 3D pedestrians points to vehicle frame for better visualization. + flattened_pedestrians_3d_pts_vehicle = transform_lidar_points(np.array(flattened_pedestrians_3d_pts), self.R_lidar_to_vehicle, self.t_lidar_to_vehicle) + flattened_pedestrians_3d_pts = flattened_pedestrians_3d_pts_vehicle + # Create point cloud from extracted 3D points - # ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts_vehicle) ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts) self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2) @@ -241,20 +244,22 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L obj_dims = self.find_dims(pedestrians_3d_pts) obj_vels = self.find_vels(track_ids, obj_centers) - # Transform centers from top_lidar frame to vehicle frame + # If in vehicle frame, transform centers from top_lidar frame to vehicle frame # Need to transform the center point one by one since matrix op can't deal with empty points - obj_centers_vehicle = [] - for obj_center in obj_centers: - if len(obj_center) > 0: - obj_center = np.array([obj_center]) - obj_center_vehicle = transform_lidar_points(obj_center, self.R_lidar_to_vehicle, self.t_lidar_to_vehicle)[0] - obj_centers_vehicle.append(obj_center_vehicle) - else: - obj_centers_vehicle.append(np.array(())) + if self.vehicle_frame: + obj_centers_vehicle = [] + for obj_center in obj_centers: + if len(obj_center) > 0: + obj_center = np.array([obj_center]) + obj_center_vehicle = transform_lidar_points(obj_center, self.R_lidar_to_vehicle, self.t_lidar_to_vehicle)[0] + obj_centers_vehicle.append(obj_center_vehicle) + else: + obj_centers_vehicle.append(np.array(())) + obj_centers = obj_centers_vehicle # Update Current AgentStates for ind in range(num_objs): - obj_center = (None, None, None) if obj_centers_vehicle[ind].size == 0 else obj_centers_vehicle[ind] + obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind] obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind] self.current_agents[track_ids[ind]] = ( AgentState( @@ -319,7 +324,8 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold) extracted_pts = filter_depth_points(extracted_pts, self.max_human_depth) extracted_pts_all.append(extracted_pts) - else: extracted_pts_all.append(np.array(())) + # Still causing errors, I will turn this on later + # else: extracted_pts_all.append(np.array(())) #if len(extracted_pts_all) > 0 and len(track_result) > 0: self.update_object_states(track_result, extracted_pts_all) diff --git a/GEMstack/onboard/perception/transform.py b/GEMstack/onboard/perception/transform.py index b876cc595..8032752b1 100644 --- a/GEMstack/onboard/perception/transform.py +++ b/GEMstack/onboard/perception/transform.py @@ -9,8 +9,6 @@ def publish_tf(): while not rospy.is_shutdown(): br.sendTransform( - # (0, 1.6, 7), # (x, y, z) translation - # tf.transformations.quaternion_from_euler(0.5* np.pi, 0, 0), # (roll, pitch, yaw) (0, 0, 0), # (x, y, z) translation tf.transformations.quaternion_from_euler(0, 0, 0), # (roll, pitch, yaw) rospy.Time.now(), From d820c3c63c2707178dd07744acc811962a8a9610 Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Mon, 17 Feb 2025 22:43:05 -0600 Subject: [PATCH 30/46] toggle vehicle frame --- GEMstack/onboard/perception/pedestrian_detection.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 2b6626ce9..a76ab6178 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -324,8 +324,7 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold) extracted_pts = filter_depth_points(extracted_pts, self.max_human_depth) extracted_pts_all.append(extracted_pts) - # Still causing errors, I will turn this on later - # else: extracted_pts_all.append(np.array(())) + else: extracted_pts_all.append(np.array(())) #if len(extracted_pts_all) > 0 and len(track_result) > 0: self.update_object_states(track_result, extracted_pts_all) From 248911cefb95d95c14a82981c816d0de01dfd0da Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Mon, 17 Feb 2025 23:27:48 -0600 Subject: [PATCH 31/46] add comments for bug fixes --- GEMstack/onboard/perception/pedestrian_detection.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index a76ab6178..2a8ac193c 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -163,6 +163,7 @@ def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict def viz_object_states(self, cv_image, boxes, extracted_pts_all): # Extract 3D pedestrians points in lidar frame # ** These are camera frame after transform_lidar_points, right? + # ** It was in camera frame before. I fixed it. Now they are in lidar frame! pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] # Object center viz @@ -232,6 +233,7 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L # Extract 3D pedestrians points in lidar frame # ** These are camera frame after transform_lidar_points, right? + # ** It was in camera frame before. I fixed it. Now they are in lidar frame! pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] if len(pedestrians_3d_pts) != num_objs: raise Exception('Perception - Camera detections, points clusters num. mismatch') @@ -326,7 +328,6 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): extracted_pts_all.append(extracted_pts) else: extracted_pts_all.append(np.array(())) - #if len(extracted_pts_all) > 0 and len(track_result) > 0: self.update_object_states(track_result, extracted_pts_all) if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all) From 5700e095c62b9dbb52447522699015ef7d877c06 Mon Sep 17 00:00:00 2001 From: KenC1014 Date: Mon, 17 Feb 2025 23:54:22 -0600 Subject: [PATCH 32/46] handle empty list --- GEMstack/onboard/perception/pedestrian_detection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 2a8ac193c..9970c6a29 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -164,7 +164,7 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all): # Extract 3D pedestrians points in lidar frame # ** These are camera frame after transform_lidar_points, right? # ** It was in camera frame before. I fixed it. Now they are in lidar frame! - pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] + pedestrians_3d_pts = [[] if len(extracted_pts) == 0 else list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] # Object center viz obj_3d_obj_centers = list() @@ -176,7 +176,7 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all): obj_3d_obj_dims.append(agent.dimensions) # Extract 2D pedestrians points and bbox in camera frame - extracted_2d_pts = [list(extracted_pts[:, :2].astype(int)) for extracted_pts in extracted_pts_all] + extracted_2d_pts = [[] if len(extracted_pts) == 0 else list(extracted_pts[:, :2].astype(int)) for extracted_pts in extracted_pts_all] flattened_pedestrians_2d_pts = list() for pts in extracted_2d_pts: flattened_pedestrians_2d_pts.extend(pts) @@ -234,7 +234,7 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L # Extract 3D pedestrians points in lidar frame # ** These are camera frame after transform_lidar_points, right? # ** It was in camera frame before. I fixed it. Now they are in lidar frame! - pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] + pedestrians_3d_pts = [[] if len(extracted_pts) == 0 else list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] if len(pedestrians_3d_pts) != num_objs: raise Exception('Perception - Camera detections, points clusters num. mismatch') From bf4f433fb4dde499e257041579dff6da70a44b64 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 08:17:26 -0500 Subject: [PATCH 33/46] Found some frame problems in parent branch --- GEMstack/onboard/perception/AgentTracker.py | 144 ++++++++++++++++++ GEMstack/onboard/perception/IdTracker.py | 12 ++ GEMstack/onboard/perception/PrevAgent.py | 17 +++ .../perception/pedestrian_detection.py | 22 ++- 4 files changed, 187 insertions(+), 8 deletions(-) create mode 100644 GEMstack/onboard/perception/AgentTracker.py create mode 100644 GEMstack/onboard/perception/IdTracker.py create mode 100644 GEMstack/onboard/perception/PrevAgent.py diff --git a/GEMstack/onboard/perception/AgentTracker.py b/GEMstack/onboard/perception/AgentTracker.py new file mode 100644 index 000000000..55e2cd917 --- /dev/null +++ b/GEMstack/onboard/perception/AgentTracker.py @@ -0,0 +1,144 @@ +import math +from typing import Dict, List + +from GEMstack.onboard.perception.IdTracker import IdTracker +from GEMstack.onboard.perception.PrevAgent import PrevAgent +from GEMstack.state.agent import AgentState + + +class AgentTracker(): + """Associates and tracks AgentState agents. + """ + def __init__(self): + # List of PrevAgent objects (each keeps track of the last seen state and time since seen) + self.prev_agents: List[PrevAgent] = [] + # List of currently visible AgentState objects + self.current_agents: List[AgentState] = [] + # Maximum time (in seconds) to keep a lost agent before dropping it. + self.drop_agent_t: float = 1.0 + # Id tracker for creating new unique pedestrian IDs. + self.id_tracker = IdTracker() + + def assign_ids(self, agents: list) -> Dict[str,AgentState]: + # Act with the assumption that you are being sent a list of AgentState objects and you need to use the object fields to keep track of them for your task + # Further act on the assumption that we will decide the id's of the pedestrians by assuming that 2 pedestrians are the same pedestrian if a + # previously stored AgentState pose and dimensions overlap with a newly passed in AgentState + # Act on the assumption that the AgentState objects are all in reference to the start frame of the vehicle + # some helper functions in this class, LostAgent.py, and IdTracker.py have been created to try to help you out with your task. + # Assume that the output returned from this function will be a dictionary of AgentState objects with the key corresponding to their id + """ + Associates new AgentState objects with existing tracked agents based on overlap. + If an agent does not match any previously tracked agent, a new unique id is assigned. + Also updates the “lost” time for agents that are not matched in this frame. + + Parameters: + agents (list): List of AgentState objects for the current frame + (already converted to the start frame of reference). + + Returns: + Dict[str, AgentState]: Dictionary mapping agent id (as a string) to AgentState. + """ + dt = 1.0 # Assuming a fixed time-step of 1 second (for simplicity) + output_agents: Dict[str, AgentState] = {} + matched_ids = set() + updated_prev_agents: List[PrevAgent] = [] + + # Process each new agent from the current frame. + for new_agent in agents: + found_match = None + # Look for a previously tracked agent whose bounding box overlaps. + for prev in self.prev_agents: + if prev.last_id in matched_ids: + continue # Already matched with another new agent. + if self.__agents_overlap(prev.last_state, new_agent): + found_match = prev + break + + if found_match is not None: + # update velocity using the change in position. + if hasattr(new_agent, 'velocity'): + new_agent.velocity = self.__calculate_velocity(found_match.last_state, new_agent, dt) + + # Update the matched agent’s state and reset its lost-time counter. + found_match.last_state = new_agent + found_match.time_since_seen = 0.0 + matched_ids.add(found_match.last_id) + output_agents[str(found_match.last_id)] = new_agent + updated_prev_agents.append(found_match) + else: + # No match found – assign a new unique id. + new_id = self.id_tracker.get_new_ped_id() + new_prev = PrevAgent(new_id, new_agent) + # initialize velocity to 0. + if hasattr(new_agent, 'velocity'): + new_agent.velocity = 0.0 + output_agents[str(new_id)] = new_agent + updated_prev_agents.append(new_prev) + + # For all previously tracked agents that were not matched this frame, + # update their time-since-seen and only keep them if they have not timed out. + for prev in self.prev_agents: + if prev.last_id not in matched_ids: + prev.update_time(dt) + if prev.time_since_seen < self.drop_agent_t: + updated_prev_agents.append(prev) + + # Save the updated list of tracked agents. + self.prev_agents = updated_prev_agents + # update current_agents to include only those seen in the current frame. + self.current_agents = list(output_agents.values()) + return output_agents + + def __convert_to_start_frame(self): + """Converts a list of AgentState agents from ouster Lidar frame of + reference (which is in reference to the current frame) to start + frame frame of reference + """ + # you can ignore this function akul + pass + + def __agents_overlap(self, ped1, ped2) -> bool: + """ + Determines if two AgentState objects overlap based on their pose and dimensions. + + Assumes each AgentState has: + - pose with attributes x and y. + - dimensions with attributes width and height. + + Returns: + bool: True if they overlap; False otherwise. + """ + # Get first agent's properties. + x1, y1 = ped1.pose.x, ped1.pose.y + w1, h1 = ped1.dimensions.width, ped1.dimensions.height + + # Get second agent's properties. + x2, y2 = ped2.pose.x, ped2.pose.y + w2, h2 = ped2.dimensions.width, ped2.dimensions.height + + # Compute bounding boxes (assuming (x, y) is the center). + left1, right1 = x1 - w1 / 2, x1 + w1 / 2 + top1, bottom1 = y1 - h1 / 2, y1 + h1 / 2 + + left2, right2 = x2 - w2 / 2, x2 + w2 / 2 + top2, bottom2 = y2 - h2 / 2, y2 + h2 / 2 + + # Check for overlap between the bounding boxes. + overlap = not (right1 < left2 or left1 > right2 or bottom1 < top2 or top1 > bottom2) + return overlap + + def __calculate_velocity(self, old_state: AgentState, new_state: AgentState, dt: float) -> float: + """ + Calculates the velocity based on the change in pose over time. + + Parameters: + old_state (AgentState): The previous state. + new_state (AgentState): The current state. + dt (float): Time difference between the two states. + + Returns: + float: The computed velocity. + """ + dx = new_state.pose.x - old_state.pose.x + dy = new_state.pose.y - old_state.pose.y + return math.sqrt(dx * dx + dy * dy) / dt if dt > 0 else 0.0 diff --git a/GEMstack/onboard/perception/IdTracker.py b/GEMstack/onboard/perception/IdTracker.py new file mode 100644 index 000000000..1c9c6ecbd --- /dev/null +++ b/GEMstack/onboard/perception/IdTracker.py @@ -0,0 +1,12 @@ +class IdTracker(): + """Abstracts out id tracking + """ + def __init__(self): + self.__ped_id = 0 + + def get_new_ped_id(self) -> int: + """Returns a unique pedestrian id + """ + assigned_id = self.__id + self.__ped_id += 1 # id will intentionally overflow to get back to 0 + return assigned_id diff --git a/GEMstack/onboard/perception/PrevAgent.py b/GEMstack/onboard/perception/PrevAgent.py new file mode 100644 index 000000000..210cea556 --- /dev/null +++ b/GEMstack/onboard/perception/PrevAgent.py @@ -0,0 +1,17 @@ +# GEM imports: +from ...state import AgentState + +class PrevAgent(): + def __init__(self, last_id: int, last_state: AgentState): + self.time_since_seen: float = 0.0 # Time since the agent was last seen in seconds + self.last_id: int = last_id + self.last_state: AgentState = last_state + + def update_time(time: float): + """Updates the time since the agent was last seen + """ + if time <= 0: + # TODO: log error here + print("UPDATE TIME FOR LOST AGENT WAS LESS THAN OR EQUAL TO 0") + else: + self.time_since_seen += time \ No newline at end of file diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 9970c6a29..519d85505 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -48,6 +48,8 @@ from .pedestrian_detection_utils import * from ..interface.gem import GEMInterface from ..component import Component +from .AgentTracker import AgentTracker + def box_to_fake_agent(box): """Creates a fake agent state from an (x,y,w,h) bounding box. @@ -238,14 +240,7 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L if len(pedestrians_3d_pts) != num_objs: raise Exception('Perception - Camera detections, points clusters num. mismatch') - # TODO: Slower but cleaner to pass dicts of AgentState - # or at least {track_ids: centers/pts/etc} - # TODO: Combine funcs for efficiency in C. - # Separate numpy prob still faster for now - obj_centers = self.find_centers(pedestrians_3d_pts) - obj_dims = self.find_dims(pedestrians_3d_pts) - obj_vels = self.find_vels(track_ids, obj_centers) - + # TODO: CONVERT FROM LIDAR FRAME TO VEHICLE FRAME HERE (vehicle frame is center of rear axle at vehicle's current location) # If in vehicle frame, transform centers from top_lidar frame to vehicle frame # Need to transform the center point one by one since matrix op can't deal with empty points if self.vehicle_frame: @@ -259,6 +254,17 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L obj_centers_vehicle.append(np.array(())) obj_centers = obj_centers_vehicle + # TODO: Slower but cleaner to pass dicts of AgentState + # or at least {track_ids: centers/pts/etc} + # TODO: Combine funcs for efficiency in C. + # Separate numpy prob still faster for now + obj_centers = self.find_centers(pedestrians_3d_pts) + obj_dims = self.find_dims(pedestrians_3d_pts) + + # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE + # Then compare previous and current agents with the same id to calculate velocity + obj_vels = self.find_vels(track_ids, obj_centers) + # Update Current AgentStates for ind in range(num_objs): obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind] From 8a7ceb64c7910f25b53ca806257e5ede867be2c3 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 11:31:55 -0500 Subject: [PATCH 34/46] Wrote in logic to finish part 3. Still need to implement 1-2 transforms and to fix a small list bug --- GEMstack/onboard/perception/AgentTracker.py | 144 ----------------- GEMstack/onboard/perception/IdTracker.py | 8 +- GEMstack/onboard/perception/PrevAgent.py | 17 -- .../perception/pedestrian_detection.py | 149 +++++++++++++++--- 4 files changed, 129 insertions(+), 189 deletions(-) delete mode 100644 GEMstack/onboard/perception/AgentTracker.py delete mode 100644 GEMstack/onboard/perception/PrevAgent.py diff --git a/GEMstack/onboard/perception/AgentTracker.py b/GEMstack/onboard/perception/AgentTracker.py deleted file mode 100644 index 55e2cd917..000000000 --- a/GEMstack/onboard/perception/AgentTracker.py +++ /dev/null @@ -1,144 +0,0 @@ -import math -from typing import Dict, List - -from GEMstack.onboard.perception.IdTracker import IdTracker -from GEMstack.onboard.perception.PrevAgent import PrevAgent -from GEMstack.state.agent import AgentState - - -class AgentTracker(): - """Associates and tracks AgentState agents. - """ - def __init__(self): - # List of PrevAgent objects (each keeps track of the last seen state and time since seen) - self.prev_agents: List[PrevAgent] = [] - # List of currently visible AgentState objects - self.current_agents: List[AgentState] = [] - # Maximum time (in seconds) to keep a lost agent before dropping it. - self.drop_agent_t: float = 1.0 - # Id tracker for creating new unique pedestrian IDs. - self.id_tracker = IdTracker() - - def assign_ids(self, agents: list) -> Dict[str,AgentState]: - # Act with the assumption that you are being sent a list of AgentState objects and you need to use the object fields to keep track of them for your task - # Further act on the assumption that we will decide the id's of the pedestrians by assuming that 2 pedestrians are the same pedestrian if a - # previously stored AgentState pose and dimensions overlap with a newly passed in AgentState - # Act on the assumption that the AgentState objects are all in reference to the start frame of the vehicle - # some helper functions in this class, LostAgent.py, and IdTracker.py have been created to try to help you out with your task. - # Assume that the output returned from this function will be a dictionary of AgentState objects with the key corresponding to their id - """ - Associates new AgentState objects with existing tracked agents based on overlap. - If an agent does not match any previously tracked agent, a new unique id is assigned. - Also updates the “lost” time for agents that are not matched in this frame. - - Parameters: - agents (list): List of AgentState objects for the current frame - (already converted to the start frame of reference). - - Returns: - Dict[str, AgentState]: Dictionary mapping agent id (as a string) to AgentState. - """ - dt = 1.0 # Assuming a fixed time-step of 1 second (for simplicity) - output_agents: Dict[str, AgentState] = {} - matched_ids = set() - updated_prev_agents: List[PrevAgent] = [] - - # Process each new agent from the current frame. - for new_agent in agents: - found_match = None - # Look for a previously tracked agent whose bounding box overlaps. - for prev in self.prev_agents: - if prev.last_id in matched_ids: - continue # Already matched with another new agent. - if self.__agents_overlap(prev.last_state, new_agent): - found_match = prev - break - - if found_match is not None: - # update velocity using the change in position. - if hasattr(new_agent, 'velocity'): - new_agent.velocity = self.__calculate_velocity(found_match.last_state, new_agent, dt) - - # Update the matched agent’s state and reset its lost-time counter. - found_match.last_state = new_agent - found_match.time_since_seen = 0.0 - matched_ids.add(found_match.last_id) - output_agents[str(found_match.last_id)] = new_agent - updated_prev_agents.append(found_match) - else: - # No match found – assign a new unique id. - new_id = self.id_tracker.get_new_ped_id() - new_prev = PrevAgent(new_id, new_agent) - # initialize velocity to 0. - if hasattr(new_agent, 'velocity'): - new_agent.velocity = 0.0 - output_agents[str(new_id)] = new_agent - updated_prev_agents.append(new_prev) - - # For all previously tracked agents that were not matched this frame, - # update their time-since-seen and only keep them if they have not timed out. - for prev in self.prev_agents: - if prev.last_id not in matched_ids: - prev.update_time(dt) - if prev.time_since_seen < self.drop_agent_t: - updated_prev_agents.append(prev) - - # Save the updated list of tracked agents. - self.prev_agents = updated_prev_agents - # update current_agents to include only those seen in the current frame. - self.current_agents = list(output_agents.values()) - return output_agents - - def __convert_to_start_frame(self): - """Converts a list of AgentState agents from ouster Lidar frame of - reference (which is in reference to the current frame) to start - frame frame of reference - """ - # you can ignore this function akul - pass - - def __agents_overlap(self, ped1, ped2) -> bool: - """ - Determines if two AgentState objects overlap based on their pose and dimensions. - - Assumes each AgentState has: - - pose with attributes x and y. - - dimensions with attributes width and height. - - Returns: - bool: True if they overlap; False otherwise. - """ - # Get first agent's properties. - x1, y1 = ped1.pose.x, ped1.pose.y - w1, h1 = ped1.dimensions.width, ped1.dimensions.height - - # Get second agent's properties. - x2, y2 = ped2.pose.x, ped2.pose.y - w2, h2 = ped2.dimensions.width, ped2.dimensions.height - - # Compute bounding boxes (assuming (x, y) is the center). - left1, right1 = x1 - w1 / 2, x1 + w1 / 2 - top1, bottom1 = y1 - h1 / 2, y1 + h1 / 2 - - left2, right2 = x2 - w2 / 2, x2 + w2 / 2 - top2, bottom2 = y2 - h2 / 2, y2 + h2 / 2 - - # Check for overlap between the bounding boxes. - overlap = not (right1 < left2 or left1 > right2 or bottom1 < top2 or top1 > bottom2) - return overlap - - def __calculate_velocity(self, old_state: AgentState, new_state: AgentState, dt: float) -> float: - """ - Calculates the velocity based on the change in pose over time. - - Parameters: - old_state (AgentState): The previous state. - new_state (AgentState): The current state. - dt (float): Time difference between the two states. - - Returns: - float: The computed velocity. - """ - dx = new_state.pose.x - old_state.pose.x - dy = new_state.pose.y - old_state.pose.y - return math.sqrt(dx * dx + dy * dy) / dt if dt > 0 else 0.0 diff --git a/GEMstack/onboard/perception/IdTracker.py b/GEMstack/onboard/perception/IdTracker.py index 1c9c6ecbd..15641fa00 100644 --- a/GEMstack/onboard/perception/IdTracker.py +++ b/GEMstack/onboard/perception/IdTracker.py @@ -2,11 +2,11 @@ class IdTracker(): """Abstracts out id tracking """ def __init__(self): - self.__ped_id = 0 + self.__id = 0 - def get_new_ped_id(self) -> int: - """Returns a unique pedestrian id + def get_new_id(self) -> int: + """Returns a unique agent id """ assigned_id = self.__id - self.__ped_id += 1 # id will intentionally overflow to get back to 0 + self.__id += 1 # id will intentionally overflow to get back to 0 return assigned_id diff --git a/GEMstack/onboard/perception/PrevAgent.py b/GEMstack/onboard/perception/PrevAgent.py deleted file mode 100644 index 210cea556..000000000 --- a/GEMstack/onboard/perception/PrevAgent.py +++ /dev/null @@ -1,17 +0,0 @@ -# GEM imports: -from ...state import AgentState - -class PrevAgent(): - def __init__(self, last_id: int, last_state: AgentState): - self.time_since_seen: float = 0.0 # Time since the agent was last seen in seconds - self.last_id: int = last_id - self.last_state: AgentState = last_state - - def update_time(time: float): - """Updates the time since the agent was last seen - """ - if time <= 0: - # TODO: log error here - print("UPDATE TIME FOR LOST AGENT WAS LESS THAN OR EQUAL TO 0") - else: - self.time_since_seen += time \ No newline at end of file diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 519d85505..e5832601d 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -33,6 +33,7 @@ import os from typing import List, Dict from collections import defaultdict +from datetime import datetime # ROS, CV import rospy import message_filters @@ -48,7 +49,7 @@ from .pedestrian_detection_utils import * from ..interface.gem import GEMInterface from ..component import Component -from .AgentTracker import AgentTracker +from .IdTracker import IdTracker def box_to_fake_agent(box): @@ -110,6 +111,10 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: self.tf_listener = tf.TransformListener() if self.debug: self.init_debug() + + self.prev_time = None # Time in seconds since last scan for basic velocity calculation + self.current_time = None # Time in seconds of current scan for basic velocity calculation + self.id_tracker = IdTracker() def init_debug(self,) -> None: # Debug Publishers @@ -144,7 +149,7 @@ def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]: # Work towards own tracking class instead of simple YOLO track? # Fix division by time # ret: Dict[track_id: vel[x, y, z]] - def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict[int, np.ndarray]: + def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]) -> Dict[int, np.ndarray]: # Object not seen -> velocity = None track_id_center_map = dict(zip(track_ids, obj_centers)) vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods. @@ -159,8 +164,7 @@ def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3: vels[prev_track_id] = (track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) / (self.curr_time - self.prev_time) return vels - - + # TODO: Separate debug/viz class, bbox and 2d 3d points funcs def viz_object_states(self, cv_image, boxes, extracted_pts_all): # Extract 3D pedestrians points in lidar frame @@ -221,7 +225,7 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all): def update_object_states(self, track_result: List[Results], extracted_pts_all: List[np.ndarray]) -> None: - self.prev_agents = self.current_agents.copy() + # self.prev_agents = self.current_agents.copy() self.current_agents.clear() # Return if no track results available @@ -263,27 +267,124 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE # Then compare previous and current agents with the same id to calculate velocity - obj_vels = self.find_vels(track_ids, obj_centers) - - # Update Current AgentStates - for ind in range(num_objs): - obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind] - obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind] - self.current_agents[track_ids[ind]] = ( - AgentState( - track_id = track_ids[ind], - pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), - # (l, w, h) - # TODO: confirm (z -> l, x -> w, y -> h) - dimensions=(obj_dim[0], obj_dim[1], obj_dim[2]), - outline=None, - type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.MOVING, - velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(obj_vels[track_ids[ind]]), - yaw_rate=0 - )) + self.find_vels_and_ids(obj_centers, obj_dims) + # obj_vels = self.find_vels(track_ids, obj_centers) + + # # Update Current AgentStates + # for ind in range(num_objs): + # obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind] + # obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind] + # self.current_agents[track_ids[ind]] = ( + # AgentState( + # track_id = track_ids[ind], + # pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + # # (l, w, h) + # # TODO: confirm (z -> l, x -> w, y -> h) + # dimensions=(obj_dim[0], obj_dim[1], obj_dim[2]), + # outline=None, + # type=AgentEnum.PEDESTRIAN, + # activity=AgentActivityEnum.MOVING, + # velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(obj_vels[track_ids[ind]]), + # yaw_rate=0 + # )) + + def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]): + new_prev_agents = {} # Stores current agents in START frame for next time through (since + # planning wants us to send them agents in CURRENT frame) + # Object not seen -> velocity = None + vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods. + + # THIS ASSUMES EVERYTHING IS IN THE SAME FRAME WHICH WOULD WORK FOR STATIONARY CAR. + # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE + assigned = False + num_pairings = len(obj_centers) + converted_centers = obj_centers # TODO: REPLACE WITH THIS: self.convert_vehicle_frame_to_start_frame(obj_centers) + + # Loop through the indexes of the obj_center and obj_dim pairings + for idx in range(num_pairings): + assigned = False + + # Loop through previous agents backwards + for prev_id, prev_state in reversed(self.prev_agents.items()): + # If an obj_center and obj_dim pair overlaps with a previous agent, assume that they're the same agent + if self.agents_overlap(converted_centers[idx], obj_dims[idx], prev_state): + assigned = True + + if self.prev_time == None: + # This shouldn't ever be triggered + vel = 0 + else: + delta_t = self.curr_time - self.prev_time + vel = (obj_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds() + print("VELOCITY:") + print(vel) + + self.current_agents[prev_id] = ( + AgentState( + track_id = prev_id, + pose=ObjectPose(t=0, x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + # (l, w, h) + # TODO: confirm (z -> l, x -> w, y -> h) + dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity= None if vel.size == 0 else tuple(vel), + yaw_rate=0 + )) + del self.prev_agents[prev_id] # Remove previous agent from previous agents + break + + # If not assigned: + if not assigned: + # Set velocity to 0 and assign the new agent a new id with IdTracker + id = self.id_tracker.get_new_id() + self.current_agents[id] = ( + AgentState( + track_id = id, + pose=ObjectPose(t=0, x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + # (l, w, h) + # TODO: confirm (z -> l, x -> w, y -> h) + dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity= None, + yaw_rate=0 + )) + self.prev_agents = new_prev_agents + + # Calculates whether 2 agents overlap in START frame. True if they do, false if not + def agents_overlap(obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: AgentState) -> bool: + # Calculate corners of obj_center and obj_dim pairing + x1_min, x1_max = obj_center[0] - obj_dim[0] / 2.0, obj_center[0] + obj_dim[0] / 2.0 + y1_min, y1_max = obj_center[1] - obj_dim[1] / 2.0, obj_center[1] + obj_dim[1] / 2.0 # CENTER CALCULATION + z1_min, z1_max = obj_center[2] - obj_dim[2] / 2.0, obj_center[2] + obj_dim[2] / 2.0 + + # Calculate corners of AgentState + # Beware: AgentState(PhysicalObject) builds bbox from + # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not + # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2] + # TODO: confirm (z -> l, x -> w, y -> h) + x2_min, x2_max = prev_agent.pose.x - prev_agent.dimensions.width / 2.0, prev_agent.pose.x + prev_agent.dimensions.width / 2.0 + y2_min, y2_max = prev_agent.pose.y, prev_agent.pose.y + prev_agent.dimensions.height # AGENT STATE CALCULATION + z2_min, z2_max = prev_agent.pose.z - prev_agent.dimensions.length / 2.0, prev_agent.pose.z + prev_agent.dimensions.length / 2.0 + + # True if they overlap, false if not + return ( + ( (x1_min <= x2_min and x2_min <= x1_max) or (x2_min <= x1_min and x1_min <= x2_max) ) and + ( (y1_min <= y2_min and y2_min <= y1_max) or (y2_min <= y1_min and y1_min <= y2_max) ) and + ( (z1_min <= z2_min and z2_min <= z1_max) or (z2_min <= z1_min and z1_min <= z2_max) ) + ) + + def convert_vehicle_frame_to_start_frame(obj_centers: List[np.ndarray]) -> List[np.ndarray]: + pass def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): + # Update times for basic velocity calculation + self.prev_time = self.current_time + self.current_time = datetime.now() + # Convert to cv2 image and run detector cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence) From 2aa0ba6ef0dc690ac3e74439b2bb1bbb168e75c5 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 16:25:47 -0500 Subject: [PATCH 35/46] Fixed find_vels_and_ids logic bugs, AgentState bugs, timing bugs, etc --- .../perception/pedestrian_detection.py | 66 ++++++++++++++----- 1 file changed, 51 insertions(+), 15 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index e5832601d..c515ffe07 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -82,8 +82,6 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: self.prev_agents = dict() self.current_agents = dict() # TODO Implement time - self.prev_time = 0 - self.curr_time = 1 # Avoid divide by 0 for placebolder, 0 self.confidence = 0.7 self.classes_to_detect = 0 self.ground_threshold = -2.0 @@ -113,7 +111,7 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: if self.debug: self.init_debug() self.prev_time = None # Time in seconds since last scan for basic velocity calculation - self.current_time = None # Time in seconds of current scan for basic velocity calculation + self.curr_time = None # Time in seconds of current scan for basic velocity calculation self.id_tracker = IdTracker() def init_debug(self,) -> None: @@ -267,6 +265,9 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE # Then compare previous and current agents with the same id to calculate velocity + # for idx in range(len(obj_centers)): + # print(obj_centers[idx]) + # print(obj_dims[idx]) self.find_vels_and_ids(obj_centers, obj_dims) # obj_vels = self.find_vels(track_ids, obj_centers) @@ -289,6 +290,12 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L # )) def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]): + # Gate to check whether dims and centers are empty: + if ((len(obj_dims) == 1 or len(obj_centers) == 1) and (obj_centers[0].size == 0 or obj_dims[0].size == 0)): + self.prev_agents = self.current_agents.copy() + self.current_agents = {} # There weren't any pedestrians detected + return + new_prev_agents = {} # Stores current agents in START frame for next time through (since # planning wants us to send them agents in CURRENT frame) # Object not seen -> velocity = None @@ -298,6 +305,9 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE assigned = False num_pairings = len(obj_centers) + if num_pairings != len(obj_dims): + for i in range(25): + print("ERROR NUM PARINGS AND OBJ DIMS ARENT THE SAME") converted_centers = obj_centers # TODO: REPLACE WITH THIS: self.convert_vehicle_frame_to_start_frame(obj_centers) # Loop through the indexes of the obj_center and obj_dim pairings @@ -311,11 +321,11 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda assigned = True if self.prev_time == None: - # This shouldn't ever be triggered - vel = 0 + # This will be triggered if the very first message has pedestrians in it + vel = 0.0 else: delta_t = self.curr_time - self.prev_time - vel = (obj_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds() + vel = (converted_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds() print("VELOCITY:") print(vel) @@ -329,7 +339,20 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda outline=None, type=AgentEnum.PEDESTRIAN, activity=AgentActivityEnum.MOVING, - velocity= None if vel.size == 0 else tuple(vel), + velocity=None if vel.size == 0 else tuple(vel), + yaw_rate=0 + )) + new_prev_agents[prev_id] = ( + AgentState( + track_id = prev_id, + pose=ObjectPose(t=0, x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + # (l, w, h) + # TODO: confirm (z -> l, x -> w, y -> h) + dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity=None if vel.size == 0 else tuple(vel), yaw_rate=0 )) del self.prev_agents[prev_id] # Remove previous agent from previous agents @@ -349,13 +372,26 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda outline=None, type=AgentEnum.PEDESTRIAN, activity=AgentActivityEnum.MOVING, - velocity= None, + velocity=None, + yaw_rate=0 + )) + new_prev_agents[id] = ( + AgentState( + track_id = id, + pose=ObjectPose(t=0, x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + # (l, w, h) + # TODO: confirm (z -> l, x -> w, y -> h) + dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity=None, yaw_rate=0 )) self.prev_agents = new_prev_agents # Calculates whether 2 agents overlap in START frame. True if they do, false if not - def agents_overlap(obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: AgentState) -> bool: + def agents_overlap(self, obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: AgentState) -> bool: # Calculate corners of obj_center and obj_dim pairing x1_min, x1_max = obj_center[0] - obj_dim[0] / 2.0, obj_center[0] + obj_dim[0] / 2.0 y1_min, y1_max = obj_center[1] - obj_dim[1] / 2.0, obj_center[1] + obj_dim[1] / 2.0 # CENTER CALCULATION @@ -366,9 +402,9 @@ def agents_overlap(obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: Agen # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2] # TODO: confirm (z -> l, x -> w, y -> h) - x2_min, x2_max = prev_agent.pose.x - prev_agent.dimensions.width / 2.0, prev_agent.pose.x + prev_agent.dimensions.width / 2.0 - y2_min, y2_max = prev_agent.pose.y, prev_agent.pose.y + prev_agent.dimensions.height # AGENT STATE CALCULATION - z2_min, z2_max = prev_agent.pose.z - prev_agent.dimensions.length / 2.0, prev_agent.pose.z + prev_agent.dimensions.length / 2.0 + x2_min, x2_max = prev_agent.pose.x - prev_agent.dimensions[0] / 2.0, prev_agent.pose.x + prev_agent.dimensions[0] / 2.0 + y2_min, y2_max = prev_agent.pose.y, prev_agent.pose.y + prev_agent.dimensions[1] # AGENT STATE CALCULATION + z2_min, z2_max = prev_agent.pose.z - prev_agent.dimensions[2] / 2.0, prev_agent.pose.z + prev_agent.dimensions[2] / 2.0 # True if they overlap, false if not return ( @@ -377,13 +413,13 @@ def agents_overlap(obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: Agen ( (z1_min <= z2_min and z2_min <= z1_max) or (z2_min <= z1_min and z1_min <= z2_max) ) ) - def convert_vehicle_frame_to_start_frame(obj_centers: List[np.ndarray]) -> List[np.ndarray]: + def convert_vehicle_frame_to_start_frame(self, obj_centers: List[np.ndarray]) -> List[np.ndarray]: pass def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): # Update times for basic velocity calculation - self.prev_time = self.current_time - self.current_time = datetime.now() + self.prev_time = self.curr_time + self.curr_time = datetime.now() # Convert to cv2 image and run detector cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") From f080070f15fa62abfec8f4f22f6949331ef7421b Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 17:55:44 -0500 Subject: [PATCH 36/46] I think I updated a comment --- .../onboard/perception/pedestrian_detection.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index c515ffe07..f78b0c686 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -86,7 +86,7 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: self.classes_to_detect = 0 self.ground_threshold = -2.0 self.max_human_depth = 0.9 - self.vehicle_frame = False # Indicate whether pedestrians centroids and point clouds are in the vehicle frame + self.vehicle_frame = True # Indicate whether pedestrians centroids and point clouds are in the vehicle frame # Load calibration data # TODO: Maybe lets add one word or link what R t K are? @@ -290,11 +290,17 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L # )) def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]): - # Gate to check whether dims and centers are empty: + # Gate to check whether dims and centers are empty (will happen if no pedestrians are scanned): if ((len(obj_dims) == 1 or len(obj_centers) == 1) and (obj_centers[0].size == 0 or obj_dims[0].size == 0)): self.prev_agents = self.current_agents.copy() self.current_agents = {} # There weren't any pedestrians detected return + elif (len(obj_centers) != len(obj_dims)): + for i in range(25): + print("ERROR NUM PARINGS AND OBJ DIMS ARENT THE SAME") + self.prev_agents = self.current_agents.copy() + self.current_agents = {} # There weren't any pedestrians detected + return new_prev_agents = {} # Stores current agents in START frame for next time through (since # planning wants us to send them agents in CURRENT frame) @@ -305,9 +311,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE assigned = False num_pairings = len(obj_centers) - if num_pairings != len(obj_dims): - for i in range(25): - print("ERROR NUM PARINGS AND OBJ DIMS ARENT THE SAME") + converted_centers = obj_centers # TODO: REPLACE WITH THIS: self.convert_vehicle_frame_to_start_frame(obj_centers) # Loop through the indexes of the obj_center and obj_dim pairings @@ -326,8 +330,8 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda else: delta_t = self.curr_time - self.prev_time vel = (converted_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds() - print("VELOCITY:") - print(vel) + print("VELOCITY:") + print(vel) self.current_agents[prev_id] = ( AgentState( From db6d689ee54ea33dd1f29a6ebde756aaeb55fdd2 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 18:01:53 -0500 Subject: [PATCH 37/46] Cleaned up code a bit. Will add Vikram's suggestions in Slack next --- .../perception/pedestrian_detection.py | 34 +++---------------- 1 file changed, 5 insertions(+), 29 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index f78b0c686..bf4b10e12 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -242,7 +242,6 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L if len(pedestrians_3d_pts) != num_objs: raise Exception('Perception - Camera detections, points clusters num. mismatch') - # TODO: CONVERT FROM LIDAR FRAME TO VEHICLE FRAME HERE (vehicle frame is center of rear axle at vehicle's current location) # If in vehicle frame, transform centers from top_lidar frame to vehicle frame # Need to transform the center point one by one since matrix op can't deal with empty points if self.vehicle_frame: @@ -260,35 +259,15 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L # or at least {track_ids: centers/pts/etc} # TODO: Combine funcs for efficiency in C. # Separate numpy prob still faster for now - obj_centers = self.find_centers(pedestrians_3d_pts) + obj_centers = self.find_centers(pedestrians_3d_pts) # Centers are calculated in current vehicle frame here (center of rear axle) obj_dims = self.find_dims(pedestrians_3d_pts) # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE - # Then compare previous and current agents with the same id to calculate velocity - # for idx in range(len(obj_centers)): - # print(obj_centers[idx]) - # print(obj_dims[idx]) self.find_vels_and_ids(obj_centers, obj_dims) - # obj_vels = self.find_vels(track_ids, obj_centers) - - # # Update Current AgentStates - # for ind in range(num_objs): - # obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind] - # obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind] - # self.current_agents[track_ids[ind]] = ( - # AgentState( - # track_id = track_ids[ind], - # pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), - # # (l, w, h) - # # TODO: confirm (z -> l, x -> w, y -> h) - # dimensions=(obj_dim[0], obj_dim[1], obj_dim[2]), - # outline=None, - # type=AgentEnum.PEDESTRIAN, - # activity=AgentActivityEnum.MOVING, - # velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(obj_vels[track_ids[ind]]), - # yaw_rate=0 - # )) + # TODO: Refactor to make more efficient + # TODO: Moving Average across last N iterations pos/vel? Less spurious vals + # TODO Fix velocity calculation to calculate in ObjectFrameEnum.START def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]): # Gate to check whether dims and centers are empty (will happen if no pedestrians are scanned): if ((len(obj_dims) == 1 or len(obj_centers) == 1) and (obj_centers[0].size == 0 or obj_dims[0].size == 0)): @@ -304,14 +283,11 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda new_prev_agents = {} # Stores current agents in START frame for next time through (since # planning wants us to send them agents in CURRENT frame) - # Object not seen -> velocity = None - vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods. - # THIS ASSUMES EVERYTHING IS IN THE SAME FRAME WHICH WOULD WORK FOR STATIONARY CAR. - # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE assigned = False num_pairings = len(obj_centers) + # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE converted_centers = obj_centers # TODO: REPLACE WITH THIS: self.convert_vehicle_frame_to_start_frame(obj_centers) # Loop through the indexes of the obj_center and obj_dim pairings From bf80bbeb034e8d90c22937d0c416833ceaec6721 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 18:33:13 -0500 Subject: [PATCH 38/46] Accidentally moved a few lines of code. Fixed the error that resulted --- .../onboard/perception/pedestrian_detection.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index bf4b10e12..9e7815c93 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -242,6 +242,13 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L if len(pedestrians_3d_pts) != num_objs: raise Exception('Perception - Camera detections, points clusters num. mismatch') + # TODO: Slower but cleaner to pass dicts of AgentState + # or at least {track_ids: centers/pts/etc} + # TODO: Combine funcs for efficiency in C. + # Separate numpy prob still faster for now + obj_centers = self.find_centers(pedestrians_3d_pts) # Centers are calculated in lidar frame here + obj_dims = self.find_dims(pedestrians_3d_pts) + # If in vehicle frame, transform centers from top_lidar frame to vehicle frame # Need to transform the center point one by one since matrix op can't deal with empty points if self.vehicle_frame: @@ -254,13 +261,6 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L else: obj_centers_vehicle.append(np.array(())) obj_centers = obj_centers_vehicle - - # TODO: Slower but cleaner to pass dicts of AgentState - # or at least {track_ids: centers/pts/etc} - # TODO: Combine funcs for efficiency in C. - # Separate numpy prob still faster for now - obj_centers = self.find_centers(pedestrians_3d_pts) # Centers are calculated in current vehicle frame here (center of rear axle) - obj_dims = self.find_dims(pedestrians_3d_pts) # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE self.find_vels_and_ids(obj_centers, obj_dims) From 6addddb87a9959c71e9c8821cb739f0be8625a7e Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 19:17:49 -0500 Subject: [PATCH 39/46] In the process of converting to start frame. Pushing current WORKING code for Vikram --- .../perception/pedestrian_detection.py | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 9e7815c93..2c24f3bef 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -45,13 +45,14 @@ from ultralytics import YOLO from ultralytics.engine.results import Results, Boxes # GEMStack -from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum +from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum,ObjectFrameEnum from .pedestrian_detection_utils import * from ..interface.gem import GEMInterface from ..component import Component from .IdTracker import IdTracker + def box_to_fake_agent(box): """Creates a fake agent state from an (x,y,w,h) bounding box. @@ -262,7 +263,6 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L obj_centers_vehicle.append(np.array(())) obj_centers = obj_centers_vehicle - # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE self.find_vels_and_ids(obj_centers, obj_dims) # TODO: Refactor to make more efficient @@ -288,7 +288,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda num_pairings = len(obj_centers) # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE - converted_centers = obj_centers # TODO: REPLACE WITH THIS: self.convert_vehicle_frame_to_start_frame(obj_centers) + converted_centers = obj_centers # TODO: Add this in when finished self.convert_vehicle_frame_to_start_frame(obj_centers) # Loop through the indexes of the obj_center and obj_dim pairings for idx in range(num_pairings): @@ -302,7 +302,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda if self.prev_time == None: # This will be triggered if the very first message has pedestrians in it - vel = 0.0 + vel = np.array([0, 0, 0]) else: delta_t = self.curr_time - self.prev_time vel = (converted_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds() @@ -318,7 +318,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), outline=None, type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.MOVING, + activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING, velocity=None if vel.size == 0 else tuple(vel), yaw_rate=0 )) @@ -331,7 +331,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), outline=None, type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.MOVING, + activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING, velocity=None if vel.size == 0 else tuple(vel), yaw_rate=0 )) @@ -351,7 +351,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), outline=None, type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.MOVING, + activity=AgentActivityEnum.UNDETERMINED, velocity=None, yaw_rate=0 )) @@ -364,7 +364,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), outline=None, type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.MOVING, + activity=AgentActivityEnum.UNDETERMINED, velocity=None, yaw_rate=0 )) @@ -393,9 +393,13 @@ def agents_overlap(self, obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent ( (z1_min <= z2_min and z2_min <= z1_max) or (z2_min <= z1_min and z1_min <= z2_max) ) ) - def convert_vehicle_frame_to_start_frame(self, obj_centers: List[np.ndarray]) -> List[np.ndarray]: + def convert_vehicle_frame_to_start_frame(self, vehicle_states: List[AgentState]) -> List[AgentState]: + # num_states = len(vehicle_states) + # for idx in num_states: + # num_states[idx].to_frame() pass + def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): # Update times for basic velocity calculation self.prev_time = self.curr_time From a18d1876b154329aedfd71e4487cef4da8a141a5 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 21:50:44 -0500 Subject: [PATCH 40/46] Took absolutely forever to find the secret sauce. Please excuse the mess --- .../perception/pedestrian_detection.py | 80 +++++++++++++------ 1 file changed, 54 insertions(+), 26 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 2c24f3bef..381075c9b 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -114,6 +114,10 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: self.prev_time = None # Time in seconds since last scan for basic velocity calculation self.curr_time = None # Time in seconds of current scan for basic velocity calculation self.id_tracker = IdTracker() + + # Update function variables + self.t_start = None # datetime.now() # Estimated start frame time + self.start_pose_abs = None def init_debug(self,) -> None: # Debug Publishers @@ -123,6 +127,10 @@ def init_debug(self,) -> None: self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) def update(self, vehicle : VehicleState) -> Dict[str,AgentState]: + if self.t_start is None: + self.t_start = self.vehicle_interface.time() + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose return self.current_agents # TODO: Improve Algo Knn, ransac, etc. @@ -148,21 +156,21 @@ def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]: # Work towards own tracking class instead of simple YOLO track? # Fix division by time # ret: Dict[track_id: vel[x, y, z]] - def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]) -> Dict[int, np.ndarray]: - # Object not seen -> velocity = None - track_id_center_map = dict(zip(track_ids, obj_centers)) - vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods. - - for prev_track_id, prev_agent in self.prev_agents.items(): - if prev_track_id in track_ids: - # TODO: Add prev_agents to memory to avoid None velocity - # We should only be missing prev pose on first sight of track_id Agent. - # print("shape 1: ", track_id_center_map[prev_agent.track_id]) - # print("shape 2: ", np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) - # prev can be 3 separate Nones, current is just empty array... make this symmetrical - if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3: - vels[prev_track_id] = (track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) / (self.curr_time - self.prev_time) - return vels + # def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]) -> Dict[int, np.ndarray]: + # # Object not seen -> velocity = None + # track_id_center_map = dict(zip(track_ids, obj_centers)) + # vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods. + + # for prev_track_id, prev_agent in self.prev_agents.items(): + # if prev_track_id in track_ids: + # # TODO: Add prev_agents to memory to avoid None velocity + # # We should only be missing prev pose on first sight of track_id Agent. + # # print("shape 1: ", track_id_center_map[prev_agent.track_id]) + # # print("shape 2: ", np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) + # # prev can be 3 separate Nones, current is just empty array... make this symmetrical + # if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3: + # vels[prev_track_id] = (track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) / (self.curr_time - self.prev_time) + # return vels # TODO: Separate debug/viz class, bbox and 2d 3d points funcs def viz_object_states(self, cv_image, boxes, extracted_pts_all): @@ -287,7 +295,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda assigned = False num_pairings = len(obj_centers) - # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE + # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE converted_centers = obj_centers # TODO: Add this in when finished self.convert_vehicle_frame_to_start_frame(obj_centers) # Loop through the indexes of the obj_center and obj_dim pairings @@ -312,7 +320,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda self.current_agents[prev_id] = ( AgentState( track_id = prev_id, - pose=ObjectPose(t=0, x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), # (l, w, h) # TODO: confirm (z -> l, x -> w, y -> h) dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), @@ -325,7 +333,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda new_prev_agents[prev_id] = ( AgentState( track_id = prev_id, - pose=ObjectPose(t=0, x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), # (l, w, h) # TODO: confirm (z -> l, x -> w, y -> h) dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), @@ -341,11 +349,11 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda # If not assigned: if not assigned: # Set velocity to 0 and assign the new agent a new id with IdTracker - id = self.id_tracker.get_new_id() + id = "ped" + str(self.id_tracker.get_new_id()) self.current_agents[id] = ( AgentState( track_id = id, - pose=ObjectPose(t=0, x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), # (l, w, h) # TODO: confirm (z -> l, x -> w, y -> h) dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), @@ -358,7 +366,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda new_prev_agents[id] = ( AgentState( track_id = id, - pose=ObjectPose(t=0, x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), # (l, w, h) # TODO: confirm (z -> l, x -> w, y -> h) dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), @@ -394,11 +402,31 @@ def agents_overlap(self, obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent ) def convert_vehicle_frame_to_start_frame(self, vehicle_states: List[AgentState]) -> List[AgentState]: - # num_states = len(vehicle_states) - # for idx in num_states: - # num_states[idx].to_frame() - pass - + num_states = len(vehicle_states) + for idx in num_states: + vehicle_states[idx] = vehicle_states[idx].to_frame(frame=ObjectFrameEnum.START, current_pose=current_obj_pose, start_pose_abs=self.start_pose_abs) + + # current_obj_pose = ObjectPose( + # frame=ObjectFrameEnum.CURRENT, + # t=(self.curr_time-self.t_start).total_seconds(), + # x=vehicle_states[idx].pose.x, + # y=vehicle_states[idx].pose.y, + # z=vehicle_states[idx].pose.z + # ) + # vehicle_states[0] = vehicle_states[0].to_frame(frame=ObjectFrameEnum.START, current_pose=current_obj_pose, start_pose_abs=self.start_pose_abs) + # # start_obj_pose??? + # state = AgentState( + # track_id =0, + # pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + # # (l, w, h) + # # TODO: confirm (z -> l, x -> w, y -> h) + # dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), + # outline=None, + # type=AgentEnum.PEDESTRIAN, + # activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING, + # velocity=None if vel.size == 0 else tuple(vel), + # yaw_rate=0 + # ) def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): # Update times for basic velocity calculation From 0b5d88b5d5d11880aab18d3155947286fe264468 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 22:30:00 -0500 Subject: [PATCH 41/46] finally think I figured out start frame transform. In the process of converting to AgentStates --- .../perception/pedestrian_detection.py | 67 +++++++++++++------ 1 file changed, 47 insertions(+), 20 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 381075c9b..a4666f691 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -379,17 +379,18 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda self.prev_agents = new_prev_agents # Calculates whether 2 agents overlap in START frame. True if they do, false if not - def agents_overlap(self, obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: AgentState) -> bool: - # Calculate corners of obj_center and obj_dim pairing - x1_min, x1_max = obj_center[0] - obj_dim[0] / 2.0, obj_center[0] + obj_dim[0] / 2.0 - y1_min, y1_max = obj_center[1] - obj_dim[1] / 2.0, obj_center[1] + obj_dim[1] / 2.0 # CENTER CALCULATION - z1_min, z1_max = obj_center[2] - obj_dim[2] / 2.0, obj_center[2] + obj_dim[2] / 2.0 - + def agents_overlap(self, curr_agent: AgentState, prev_agent: AgentState) -> bool: # Calculate corners of AgentState # Beware: AgentState(PhysicalObject) builds bbox from # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2] # TODO: confirm (z -> l, x -> w, y -> h) + + # Calculate corners of obj_center and obj_dim pairing + x1_min, x1_max = curr_agent.pose.x - curr_agent.dimensions[0] / 2.0, curr_agent.pose.x + curr_agent.dimensions[0] / 2.0 + y1_min, y1_max = curr_agent.pose.y, curr_agent.pose.y + curr_agent.dimensions[1] # AGENT STATE CALCULATION + z1_min, z1_max = curr_agent.pose.z - curr_agent.dimensions[2] / 2.0, curr_agent.pose.z + curr_agent.dimensions[2] / 2.0 + x2_min, x2_max = prev_agent.pose.x - prev_agent.dimensions[0] / 2.0, prev_agent.pose.x + prev_agent.dimensions[0] / 2.0 y2_min, y2_max = prev_agent.pose.y, prev_agent.pose.y + prev_agent.dimensions[1] # AGENT STATE CALCULATION z2_min, z2_max = prev_agent.pose.z - prev_agent.dimensions[2] / 2.0, prev_agent.pose.z + prev_agent.dimensions[2] / 2.0 @@ -401,20 +402,46 @@ def agents_overlap(self, obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent ( (z1_min <= z2_min and z2_min <= z1_max) or (z2_min <= z1_min and z1_min <= z2_max) ) ) - def convert_vehicle_frame_to_start_frame(self, vehicle_states: List[AgentState]) -> List[AgentState]: - num_states = len(vehicle_states) - for idx in num_states: - vehicle_states[idx] = vehicle_states[idx].to_frame(frame=ObjectFrameEnum.START, current_pose=current_obj_pose, start_pose_abs=self.start_pose_abs) - - # current_obj_pose = ObjectPose( - # frame=ObjectFrameEnum.CURRENT, - # t=(self.curr_time-self.t_start).total_seconds(), - # x=vehicle_states[idx].pose.x, - # y=vehicle_states[idx].pose.y, - # z=vehicle_states[idx].pose.z - # ) - # vehicle_states[0] = vehicle_states[0].to_frame(frame=ObjectFrameEnum.START, current_pose=current_obj_pose, start_pose_abs=self.start_pose_abs) - # # start_obj_pose??? + def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]) -> List[AgentState]: + # Create list of agent states in current vehicle frame: + agents = [] + num_pairings = len(obj_centers) + for idx in range(num_pairings): + # Create agent in current frame: + state = AgentState( + track_id="TBD", # Temporary + pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + # Beware: AgentState(PhysicalObject) builds bbox from + # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not + # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2] + # (l, w, h) + # TODO: confirm (z -> l, x -> w, y -> h) + dimensions=(obj_dims[idx][2], obj_dims[idx][0], obj_centers[idx][1] + obj_dims[idx][1]), + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, # Temporary + velocity=None, # Temporary + yaw_rate=0 + ) + + # Convert agent to start frame and add to agents list: + agents.append( + state.to_frame( + frame=ObjectFrameEnum.START, + current_pose=ObjectPose( + frame=ObjectFrameEnum.CURRENT, + t=(self.curr_time-self.t_start).total_seconds(), + x=state.pose.x, + y=state.pose.y, + z=state.pose.z + ), + start_pose_abs=self.start_pose_abs + ) + ) + + # Return the agent states converted to start frame: + return agents + # state = AgentState( # track_id =0, # pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), From 96b9885b31805b0068e4374f8f5f7a9a57f4c4fa Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 23:13:05 -0500 Subject: [PATCH 42/46] Pushing code so Lukas can try to replicate error message --- .../perception/pedestrian_detection.py | 81 +++++++------------ 1 file changed, 28 insertions(+), 53 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index a4666f691..5f1cd0568 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -116,7 +116,7 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: self.id_tracker = IdTracker() # Update function variables - self.t_start = None # datetime.now() # Estimated start frame time + self.t_start = datetime.now() # Estimated start frame time self.start_pose_abs = None def init_debug(self,) -> None: @@ -127,8 +127,6 @@ def init_debug(self,) -> None: self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) def update(self, vehicle : VehicleState) -> Dict[str,AgentState]: - if self.t_start is None: - self.t_start = self.vehicle_interface.time() if self.start_pose_abs is None: self.start_pose_abs = vehicle.pose return self.current_agents @@ -295,8 +293,8 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda assigned = False num_pairings = len(obj_centers) - # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE - converted_centers = obj_centers # TODO: Add this in when finished self.convert_vehicle_frame_to_start_frame(obj_centers) + # Create a list of agent states in the start frame + agents_sf = self.create_agent_states_in_start_frame(obj_centers=obj_centers, obj_dims=obj_dims) # Create agents in start frame # Loop through the indexes of the obj_center and obj_dim pairings for idx in range(num_pairings): @@ -305,7 +303,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda # Loop through previous agents backwards for prev_id, prev_state in reversed(self.prev_agents.items()): # If an obj_center and obj_dim pair overlaps with a previous agent, assume that they're the same agent - if self.agents_overlap(converted_centers[idx], obj_dims[idx], prev_state): + if self.agents_overlap(agents_sf[idx], prev_state): assigned = True if self.prev_time == None: @@ -313,10 +311,11 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda vel = np.array([0, 0, 0]) else: delta_t = self.curr_time - self.prev_time - vel = (converted_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds() + vel = (np.array([agents_sf[idx].pose.x, agents_sf[idx].pose.y, agents_sf[idx].pose.z]) - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds() print("VELOCITY:") print(vel) + # Create current frame agent at planning group's request: self.current_agents[prev_id] = ( AgentState( track_id = prev_id, @@ -327,29 +326,25 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda outline=None, type=AgentEnum.PEDESTRIAN, activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING, - velocity=None if vel.size == 0 else tuple(vel), - yaw_rate=0 - )) - new_prev_agents[prev_id] = ( - AgentState( - track_id = prev_id, - pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), - # (l, w, h) - # TODO: confirm (z -> l, x -> w, y -> h) - dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), - outline=None, - type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING, - velocity=None if vel.size == 0 else tuple(vel), + velocity=(0, 0, 0) if vel.size == 0 else tuple(vel), yaw_rate=0 )) + + # Fix start frame agent and store in this dict: + agents_sf[idx].track_id = prev_id + agents_sf[idx].activity = AgentActivityEnum.STOPPED if (np.all(vel == 0) or vel.size == 0) else AgentActivityEnum.MOVING + agents_sf[idx].velocity = (0, 0, 0) if vel.size == 0 else tuple(vel) + + new_prev_agents[prev_id] = agents_sf[idx] del self.prev_agents[prev_id] # Remove previous agent from previous agents break # If not assigned: if not assigned: # Set velocity to 0 and assign the new agent a new id with IdTracker - id = "ped" + str(self.id_tracker.get_new_id()) + id = self.id_tracker.get_new_id() + + # Create current frame agent at planning group's request: self.current_agents[id] = ( AgentState( track_id = id, @@ -360,22 +355,15 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda outline=None, type=AgentEnum.PEDESTRIAN, activity=AgentActivityEnum.UNDETERMINED, - velocity=None, - yaw_rate=0 - )) - new_prev_agents[id] = ( - AgentState( - track_id = id, - pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), - # (l, w, h) - # TODO: confirm (z -> l, x -> w, y -> h) - dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), - outline=None, - type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.UNDETERMINED, - velocity=None, + velocity=(0, 0, 0), yaw_rate=0 )) + + # Fix start frame agent and store in this dict: + agents_sf[idx].track_id = id + agents_sf[idx].activity = AgentActivityEnum.UNDETERMINED + agents_sf[idx].velocity = (0, 0, 0) + new_prev_agents[id] = agents_sf[idx] self.prev_agents = new_prev_agents # Calculates whether 2 agents overlap in START frame. True if they do, false if not @@ -409,8 +397,8 @@ def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_ for idx in range(num_pairings): # Create agent in current frame: state = AgentState( - track_id="TBD", # Temporary - pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + track_id=0, # Temporary + pose=ObjectPose(t=(self.curr_time - self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), # Beware: AgentState(PhysicalObject) builds bbox from # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2] @@ -419,7 +407,7 @@ def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_ dimensions=(obj_dims[idx][2], obj_dims[idx][0], obj_centers[idx][1] + obj_dims[idx][1]), outline=None, type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.MOVING, # Temporary + activity=AgentActivityEnum.UNDETERMINED, # Temporary velocity=None, # Temporary yaw_rate=0 ) @@ -430,7 +418,7 @@ def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_ frame=ObjectFrameEnum.START, current_pose=ObjectPose( frame=ObjectFrameEnum.CURRENT, - t=(self.curr_time-self.t_start).total_seconds(), + t=(self.curr_time - self.t_start).total_seconds(), x=state.pose.x, y=state.pose.y, z=state.pose.z @@ -442,19 +430,6 @@ def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_ # Return the agent states converted to start frame: return agents - # state = AgentState( - # track_id =0, - # pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), - # # (l, w, h) - # # TODO: confirm (z -> l, x -> w, y -> h) - # dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), - # outline=None, - # type=AgentEnum.PEDESTRIAN, - # activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING, - # velocity=None if vel.size == 0 else tuple(vel), - # yaw_rate=0 - # ) - def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): # Update times for basic velocity calculation self.prev_time = self.curr_time From a936d3d989ae80fc245001df930b2ca927bfea37 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 19 Feb 2025 00:57:40 -0500 Subject: [PATCH 43/46] Fixed logic for gate which tosses out points --- .../onboard/perception/pedestrian_detection.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 9fe5eeaf2..51d90248c 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -274,15 +274,15 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L # TODO Fix velocity calculation to calculate in ObjectFrameEnum.START def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]): # Gate to check whether dims and centers are empty (will happen if no pedestrians are scanned): - if ((len(obj_dims) == 1 or len(obj_centers) == 1) and (obj_centers[0].size == 0 or obj_dims[0].size == 0)): - self.prev_agents = self.current_agents.copy() - self.current_agents = {} # There weren't any pedestrians detected - return - elif (len(obj_centers) != len(obj_dims)): - for i in range(25): - print("ERROR NUM PARINGS AND OBJ DIMS ARENT THE SAME") - self.prev_agents = self.current_agents.copy() - self.current_agents = {} # There weren't any pedestrians detected + for idx in range(len(obj_dims) -1, -1, -1): + if (obj_centers[idx].size == 0) or (obj_dims[0].size == 0): + del obj_centers[idx] + del obj_dims[idx] + + # Nothing was scanned, erase current (for current output) and previous list (for next time through because nothing was scanned) + if (len(obj_dims) == 0 or len(obj_centers) == 0): + self.current_agents = {} + self.prev_agents = {} return new_prev_agents = {} # Stores current agents in START frame for next time through (since From 98f445f6cb914f95db3bea071a47a7de5b071055 Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Wed, 19 Feb 2025 09:51:04 -0600 Subject: [PATCH 44/46] Update pedestrian_detection.py --- GEMstack/onboard/perception/pedestrian_detection.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 9970c6a29..58b8e7ac5 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -263,6 +263,8 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L for ind in range(num_objs): obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind] obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind] + + # Part 3 - Change creating this to start frame self.current_agents[track_ids[ind]] = ( AgentState( track_id = track_ids[ind], From dad7cc1a36c23c0ac178d43846dbb72f99eaf033 Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Wed, 19 Feb 2025 10:02:13 -0600 Subject: [PATCH 45/46] Update pedestrian_detection.py --- GEMstack/onboard/perception/pedestrian_detection.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 58b8e7ac5..1de87c18c 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -140,7 +140,8 @@ def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]: # TODO: Moving Average across last N iterations pos/vel? Less spurious vals # TODO Akul: Fix velocity calculation to calculate in ObjectFrameEnum.START # Work towards own tracking class instead of simple YOLO track? - # Fix division by time + # Fix 1: division by time + # Fix 2: Put centers and dims in start frame for velocity calc + final agentstate in update_object_states # ret: Dict[track_id: vel[x, y, z]] def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict[int, np.ndarray]: # Object not seen -> velocity = None From 502f3a80e700d57f38d9e45f59e1fd6c3b1f3987 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 19 Feb 2025 13:20:51 -0500 Subject: [PATCH 46/46] Sending untransformed version to planning team --- .../perception/pedestrian_detection.py | 41 ++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 51d90248c..2a3f84d1a 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -115,7 +115,14 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: # Update function variables self.t_start = datetime.now() # Estimated start frame time - self.start_pose_abs = None + # self.start_pose_abs = None + # self.start_pose_abs = ObjectPose( + # frame=ObjectFrameEnum.GLOBAL, + # t=0, + # x=0, + # y=0, + # z=0 + # ) def init_debug(self,) -> None: # Debug Publishers @@ -125,8 +132,10 @@ def init_debug(self,) -> None: self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) def update(self, vehicle : VehicleState) -> Dict[str,AgentState]: - if self.start_pose_abs is None: - self.start_pose_abs = vehicle.pose + # if self.start_pose_abs is None: + # self.start_pose_abs = vehicle.pose # Store first pose which is start frame + # self.start_pose_abs.to_frame(frame=ObjectFrameEnum.GLOBAL, current_pose=) + # self.current_vehicle_pose_sf = vehicle.pose # Vehicle pose in start frame return self.current_agents # TODO: Improve Algo Knn, ransac, etc. @@ -409,20 +418,22 @@ def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_ velocity=None, # Temporary yaw_rate=0 ) - + # Convert agent to start frame and add to agents list: agents.append( - state.to_frame( - frame=ObjectFrameEnum.START, - current_pose=ObjectPose( - frame=ObjectFrameEnum.CURRENT, - t=(self.curr_time - self.t_start).total_seconds(), - x=state.pose.x, - y=state.pose.y, - z=state.pose.z - ), - start_pose_abs=self.start_pose_abs - ) + state + # state.to_frame( + # frame=ObjectFrameEnum.START, + # current_pose=self.current_vehicle_pose_sf, + # # current_pose=ObjectPose( + # # frame=ObjectFrameEnum.CURRENT, + # # t=(self.curr_time - self.t_start).total_seconds(), + # # x=state.pose.x, + # # y=state.pose.y, + # # z=state.pose.z + # # ), + # start_pose_abs=self.start_pose_abs + # ) ) # Return the agent states converted to start frame: