From 97b6dc688caf21150bb1deb43080c58ee972864e Mon Sep 17 00:00:00 2001 From: nvikramraj Date: Thu, 13 Feb 2025 16:17:31 -0600 Subject: [PATCH 01/15] Lidar to Base frame transformation --- .../perception/pedestrian_detection.py | 83 +++++++++++++++++-- requirements.txt | 1 + 2 files changed, 75 insertions(+), 9 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index f91145274..aeda0b15b 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -5,9 +5,13 @@ import cv2 from typing import Dict from cv_bridge import CvBridge -from sensor_msgs.msg import Image +from sensor_msgs.msg import Image, PointCloud2, PointField +from tf2_sensor_msgs.tf2_sensor_msgs import do_transform_cloud +from geometry_msgs.msg import TransformStamped import rospy import os +import numpy as np +import tf2_ros def box_to_fake_agent(box): @@ -56,8 +60,8 @@ def initialize(self): """Initializes subscribers and publishers self.vehicle_interface.subscribe_sensor -> GEM Car camera subscription - self.rosbag_test -> ROS Bag topic subscription - self.pub_image -> Publishes Image with detection for visualization + self.rosbag_cam_sub -> ROS Bag topic subscription + self.rosbag_cam_pub -> Publishes Image with detection for visualization """ #tell the vehicle to use image_callback whenever 'front_camera' gets a reading, and it expects images of type cv2.Mat @@ -68,12 +72,74 @@ def initialize(self): # Webcam # rospy.Subscriber('/webcam', Image, self.image_callback) + # Conversion tools + self.cv_bridge = CvBridge() + self.tf_buffer = tf2_ros.Buffer() + self.tf_listener = tf2_ros.TransformListener(self.tf_buffer) + # Testing with rosbag - self.rosbag_test = rospy.Subscriber('/oak/rgb/image_raw',Image, self.image_callback,queue_size=1) if(self.visualization): - self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) + self.rosbag_cam_pub = rospy.Publisher("/camera/image_detection", Image, queue_size=1) + + # Lidar Tranform topic publisher + self.rosbag_lidar_livox_pub = rospy.Publisher("/livox/transformed_lidar", PointCloud2, queue_size=1) + # self.rosbag_lidar_ouster_pub = rospy.Publisher("/ouster/transformed_lidar", PointCloud2, queue_size=1) + + self.rosbag_cam_sub = rospy.Subscriber('/oak/rgb/image_raw',Image, self.image_callback,queue_size=1) + self.rosbag_lidar_livox_sub = rospy.Subscriber('/livox/lidar',PointCloud2, self.lidar_livox_callback,queue_size=1) + # self.rosbag_lidar_ouster_sub = rospy.Subscriber('/ouster/points',PointCloud2, self.lidar_ouster_callback,queue_size=1) + pass + + def publish_base_link(self): + + br = tf2_ros.TransformBroadcaster() + transform = TransformStamped() + + transform.header.stamp = rospy.Time.now() + transform.header.frame_id = "map" # Parent frame + transform.child_frame_id = "base_link" # Child frame + + # Set translation (change as per actual vehicle position) + transform.transform.translation.x = 0.0 + transform.transform.translation.y = 0.0 + transform.transform.translation.z = 0.0 + + # Set rotation (quaternion) + transform.transform.rotation.x = 0.0 + transform.transform.rotation.y = 0.0 + transform.transform.rotation.z = 0.0 + transform.transform.rotation.w = 1.0 # No rotation + br.sendTransform(transform) + + def lidar_livox_callback(self,pointcloud : PointCloud2): + + transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0)) + + # Transform the point cloud + transformed_cloud = do_transform_cloud(pointcloud, transform) + + # Publish transformed point cloud + self.rosbag_lidar_livox_pub.publish(transformed_cloud) + + if(self.visualization): + self.publish_base_link() + + # def lidar_ouster_callback(self,pointcloud : PointCloud2): + + # transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0)) + + # # Transform the point cloud + # transformed_cloud = do_transform_cloud(pointcloud, transform) + + # # Publish transformed point cloud + # self.rosbag_lidar_livox_pub.publish(transformed_cloud) + + # if(self.visualization): + # self.publish_base_link() + + # Use cv2.Mat for GEM Car, Image for RosBag def image_callback(self, image : Image): #image : cv2.Mat): @@ -92,8 +158,7 @@ def image_callback(self, image : Image): #image : cv2.Mat): # track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence) # Convert to CV2 format for RosBag - bridge = CvBridge() - image = bridge.imgmsg_to_cv2(image, "bgr8") + image = self.cv_bridge.imgmsg_to_cv2(image, "bgr8") track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence) self.last_person_boxes = [] @@ -155,8 +220,8 @@ def image_callback(self, image : Image): #image : cv2.Mat): # Used for visualization if(self.visualization): - ros_img = bridge.cv2_to_imgmsg(image, 'bgr8') - self.pub_image.publish(ros_img) + ros_img = self.cv_bridge.cv2_to_imgmsg(image, 'bgr8') + self.rosbag_cam_pub.publish(ros_img) #uncomment if you want to debug the detector... # print(self.last_person_boxes) diff --git a/requirements.txt b/requirements.txt index aba104c78..1679dba33 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ dacite # Perception ultralytics +lap==0.5.12 \ No newline at end of file From b087f8d9b403aecd1ea24dc4cd1a501924e13873 Mon Sep 17 00:00:00 2001 From: nvikramraj Date: Thu, 13 Feb 2025 16:27:39 -0600 Subject: [PATCH 02/15] added few comments --- .../perception/pedestrian_detection.py | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index aeda0b15b..bf3134a68 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -83,16 +83,17 @@ def initialize(self): # Lidar Tranform topic publisher self.rosbag_lidar_livox_pub = rospy.Publisher("/livox/transformed_lidar", PointCloud2, queue_size=1) - # self.rosbag_lidar_ouster_pub = rospy.Publisher("/ouster/transformed_lidar", PointCloud2, queue_size=1) + self.rosbag_lidar_ouster_pub = rospy.Publisher("/ouster/transformed_lidar", PointCloud2, queue_size=1) self.rosbag_cam_sub = rospy.Subscriber('/oak/rgb/image_raw',Image, self.image_callback,queue_size=1) self.rosbag_lidar_livox_sub = rospy.Subscriber('/livox/lidar',PointCloud2, self.lidar_livox_callback,queue_size=1) - # self.rosbag_lidar_ouster_sub = rospy.Subscriber('/ouster/points',PointCloud2, self.lidar_ouster_callback,queue_size=1) + self.rosbag_lidar_ouster_sub = rospy.Subscriber('/ouster/points',PointCloud2, self.lidar_ouster_callback,queue_size=1) pass - def publish_base_link(self): + # Publishes baselink to map transformation for visualization + def publish_base_link_to_map(self): br = tf2_ros.TransformBroadcaster() transform = TransformStamped() @@ -113,6 +114,8 @@ def publish_base_link(self): transform.transform.rotation.w = 1.0 # No rotation br.sendTransform(transform) + + # Livox Lidar data def lidar_livox_callback(self,pointcloud : PointCloud2): transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0)) @@ -124,20 +127,21 @@ def lidar_livox_callback(self,pointcloud : PointCloud2): self.rosbag_lidar_livox_pub.publish(transformed_cloud) if(self.visualization): - self.publish_base_link() + self.publish_base_link_to_map() - # def lidar_ouster_callback(self,pointcloud : PointCloud2): + # Ouster Lidar data + def lidar_ouster_callback(self,pointcloud : PointCloud2): - # transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0)) + transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0)) - # # Transform the point cloud - # transformed_cloud = do_transform_cloud(pointcloud, transform) + # Transform the point cloud + transformed_cloud = do_transform_cloud(pointcloud, transform) - # # Publish transformed point cloud - # self.rosbag_lidar_livox_pub.publish(transformed_cloud) + # Publish transformed point cloud + self.rosbag_lidar_livox_pub.publish(transformed_cloud) - # if(self.visualization): - # self.publish_base_link() + if(self.visualization): + self.publish_base_link_to_map() # Use cv2.Mat for GEM Car, Image for RosBag From 8316a20ba289a5a349ac35558da5f10bf4addde6 Mon Sep 17 00:00:00 2001 From: nvikramraj Date: Fri, 14 Feb 2025 21:48:36 -0600 Subject: [PATCH 03/15] converted lidar to cam frame --- GEMstack/knowledge/calibration/extrinsics.npz | Bin 0 -> 850 bytes GEMstack/knowledge/calibration/intrinsic.json | 7 ++ .../perception/pedestrian_detection.py | 101 ++++++++++++++---- 3 files changed, 90 insertions(+), 18 deletions(-) create mode 100644 GEMstack/knowledge/calibration/extrinsics.npz create mode 100644 GEMstack/knowledge/calibration/intrinsic.json diff --git a/GEMstack/knowledge/calibration/extrinsics.npz b/GEMstack/knowledge/calibration/extrinsics.npz new file mode 100644 index 0000000000000000000000000000000000000000..663fe14bd0da3d734e4dbacac1d377c6c845ca42 GIT binary patch literal 850 zcmWIWW@Zs#fB;2?2#tS}PXIX}%*r6b5Tut^P|3(302Tl#0!e_tWWP|~fJjD$GKOmP zl+@znB6TYTb(=H`bsYuuwEUuyqQt!T{Gyapkhoi7PH`$wyf`DVAQi~hFxF8p*3?m` zRUjL12?)GZN<2N+{$EMgG><>;_cu%w^cTNhzyCn(UGBnF<@+5zpNJPQs<&r2v|W3_ z|M&ZI?E^(q|Gc*^&_3|(!nL{f4A;7YL-v>L4?yvLl()3c4505o807sDnD;>fAONE& z^}Zp6-nTp@dy74P`TnQc+1uwFys^LJWzIYIpl4{_S=_#LHOxC4K<^Zlr6vznAAQOF z^`fKev;7%)pW0IjpZ5!YKkj$o-Y5G2Z$>5&W?U&05-T9Gfe}Q4(<{0b)K~>6gMbFc n|3C&fj?uNCh8#=-$d+?Jdtl)h;LXYgl4AzKIY4>=6Nm=@B*( 0: # only project points in front of the camera + u = K[0, 0] * (p[0] / p[2]) + K[0, 2] + v = K[1, 1] * (p[1] / p[2]) + K[1, 2] + cv2.circle(self.rosbag_image, (int(u),int(v)), 2, (0, 0, 255), -1) + + ros_img = self.cv_bridge.cv2_to_imgmsg(self.rosbag_image, 'bgr8') + self.rosbag_cam_lidar_pub.publish(ros_img) + + + + # if(self.visualization): + # transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0)) + # # Transform the point cloud + # transformed_cloud = do_transform_cloud(pointcloud, transform) + # # Publish transformed point cloud + # self.rosbag_lidar_ouster_pub.publish(transformed_cloud) + # self.publish_base_link_to_map() + + + def load_extrinsics(self,extrinsics_file): + """ + Load calibrated extrinsics from a .npz file. + Assumes the file contains keys 'R' and 't'. + """ + data = np.load(extrinsics_file) + R = data['R'] + t = data['t'] + return R, t + + def load_intrinsics(self,intrinsics_file): + """ + Load camera intrinsics from a JSON file. + Expects keys: 'fx', 'fy', 'cx', and 'cy'. + """ + with open(intrinsics_file, 'r') as f: + intrinsics = json.load(f) + fx = intrinsics["fx"] + fy = intrinsics["fy"] + cx = intrinsics["cx"] + cy = intrinsics["cy"] + K = np.array([[fx, 0, cx], + [0, fy, cy], + [0, 0, 1]], dtype=np.float32) + return K # Use cv2.Mat for GEM Car, Image for RosBag @@ -220,6 +284,7 @@ def image_callback(self, image : Image): #image : cv2.Mat): # Draw main text on top of the outline cv2.putText(image, label, (text_x, text_y - baseline), font, font_scale, font_color, text_thickness) + self.rosbag_image = image # Used for visualization From bf4f433fb4dde499e257041579dff6da70a44b64 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 08:17:26 -0500 Subject: [PATCH 04/15] Found some frame problems in parent branch --- GEMstack/onboard/perception/AgentTracker.py | 144 ++++++++++++++++++ GEMstack/onboard/perception/IdTracker.py | 12 ++ GEMstack/onboard/perception/PrevAgent.py | 17 +++ .../perception/pedestrian_detection.py | 22 ++- 4 files changed, 187 insertions(+), 8 deletions(-) create mode 100644 GEMstack/onboard/perception/AgentTracker.py create mode 100644 GEMstack/onboard/perception/IdTracker.py create mode 100644 GEMstack/onboard/perception/PrevAgent.py diff --git a/GEMstack/onboard/perception/AgentTracker.py b/GEMstack/onboard/perception/AgentTracker.py new file mode 100644 index 000000000..55e2cd917 --- /dev/null +++ b/GEMstack/onboard/perception/AgentTracker.py @@ -0,0 +1,144 @@ +import math +from typing import Dict, List + +from GEMstack.onboard.perception.IdTracker import IdTracker +from GEMstack.onboard.perception.PrevAgent import PrevAgent +from GEMstack.state.agent import AgentState + + +class AgentTracker(): + """Associates and tracks AgentState agents. + """ + def __init__(self): + # List of PrevAgent objects (each keeps track of the last seen state and time since seen) + self.prev_agents: List[PrevAgent] = [] + # List of currently visible AgentState objects + self.current_agents: List[AgentState] = [] + # Maximum time (in seconds) to keep a lost agent before dropping it. + self.drop_agent_t: float = 1.0 + # Id tracker for creating new unique pedestrian IDs. + self.id_tracker = IdTracker() + + def assign_ids(self, agents: list) -> Dict[str,AgentState]: + # Act with the assumption that you are being sent a list of AgentState objects and you need to use the object fields to keep track of them for your task + # Further act on the assumption that we will decide the id's of the pedestrians by assuming that 2 pedestrians are the same pedestrian if a + # previously stored AgentState pose and dimensions overlap with a newly passed in AgentState + # Act on the assumption that the AgentState objects are all in reference to the start frame of the vehicle + # some helper functions in this class, LostAgent.py, and IdTracker.py have been created to try to help you out with your task. + # Assume that the output returned from this function will be a dictionary of AgentState objects with the key corresponding to their id + """ + Associates new AgentState objects with existing tracked agents based on overlap. + If an agent does not match any previously tracked agent, a new unique id is assigned. + Also updates the “lost” time for agents that are not matched in this frame. + + Parameters: + agents (list): List of AgentState objects for the current frame + (already converted to the start frame of reference). + + Returns: + Dict[str, AgentState]: Dictionary mapping agent id (as a string) to AgentState. + """ + dt = 1.0 # Assuming a fixed time-step of 1 second (for simplicity) + output_agents: Dict[str, AgentState] = {} + matched_ids = set() + updated_prev_agents: List[PrevAgent] = [] + + # Process each new agent from the current frame. + for new_agent in agents: + found_match = None + # Look for a previously tracked agent whose bounding box overlaps. + for prev in self.prev_agents: + if prev.last_id in matched_ids: + continue # Already matched with another new agent. + if self.__agents_overlap(prev.last_state, new_agent): + found_match = prev + break + + if found_match is not None: + # update velocity using the change in position. + if hasattr(new_agent, 'velocity'): + new_agent.velocity = self.__calculate_velocity(found_match.last_state, new_agent, dt) + + # Update the matched agent’s state and reset its lost-time counter. + found_match.last_state = new_agent + found_match.time_since_seen = 0.0 + matched_ids.add(found_match.last_id) + output_agents[str(found_match.last_id)] = new_agent + updated_prev_agents.append(found_match) + else: + # No match found – assign a new unique id. + new_id = self.id_tracker.get_new_ped_id() + new_prev = PrevAgent(new_id, new_agent) + # initialize velocity to 0. + if hasattr(new_agent, 'velocity'): + new_agent.velocity = 0.0 + output_agents[str(new_id)] = new_agent + updated_prev_agents.append(new_prev) + + # For all previously tracked agents that were not matched this frame, + # update their time-since-seen and only keep them if they have not timed out. + for prev in self.prev_agents: + if prev.last_id not in matched_ids: + prev.update_time(dt) + if prev.time_since_seen < self.drop_agent_t: + updated_prev_agents.append(prev) + + # Save the updated list of tracked agents. + self.prev_agents = updated_prev_agents + # update current_agents to include only those seen in the current frame. + self.current_agents = list(output_agents.values()) + return output_agents + + def __convert_to_start_frame(self): + """Converts a list of AgentState agents from ouster Lidar frame of + reference (which is in reference to the current frame) to start + frame frame of reference + """ + # you can ignore this function akul + pass + + def __agents_overlap(self, ped1, ped2) -> bool: + """ + Determines if two AgentState objects overlap based on their pose and dimensions. + + Assumes each AgentState has: + - pose with attributes x and y. + - dimensions with attributes width and height. + + Returns: + bool: True if they overlap; False otherwise. + """ + # Get first agent's properties. + x1, y1 = ped1.pose.x, ped1.pose.y + w1, h1 = ped1.dimensions.width, ped1.dimensions.height + + # Get second agent's properties. + x2, y2 = ped2.pose.x, ped2.pose.y + w2, h2 = ped2.dimensions.width, ped2.dimensions.height + + # Compute bounding boxes (assuming (x, y) is the center). + left1, right1 = x1 - w1 / 2, x1 + w1 / 2 + top1, bottom1 = y1 - h1 / 2, y1 + h1 / 2 + + left2, right2 = x2 - w2 / 2, x2 + w2 / 2 + top2, bottom2 = y2 - h2 / 2, y2 + h2 / 2 + + # Check for overlap between the bounding boxes. + overlap = not (right1 < left2 or left1 > right2 or bottom1 < top2 or top1 > bottom2) + return overlap + + def __calculate_velocity(self, old_state: AgentState, new_state: AgentState, dt: float) -> float: + """ + Calculates the velocity based on the change in pose over time. + + Parameters: + old_state (AgentState): The previous state. + new_state (AgentState): The current state. + dt (float): Time difference between the two states. + + Returns: + float: The computed velocity. + """ + dx = new_state.pose.x - old_state.pose.x + dy = new_state.pose.y - old_state.pose.y + return math.sqrt(dx * dx + dy * dy) / dt if dt > 0 else 0.0 diff --git a/GEMstack/onboard/perception/IdTracker.py b/GEMstack/onboard/perception/IdTracker.py new file mode 100644 index 000000000..1c9c6ecbd --- /dev/null +++ b/GEMstack/onboard/perception/IdTracker.py @@ -0,0 +1,12 @@ +class IdTracker(): + """Abstracts out id tracking + """ + def __init__(self): + self.__ped_id = 0 + + def get_new_ped_id(self) -> int: + """Returns a unique pedestrian id + """ + assigned_id = self.__id + self.__ped_id += 1 # id will intentionally overflow to get back to 0 + return assigned_id diff --git a/GEMstack/onboard/perception/PrevAgent.py b/GEMstack/onboard/perception/PrevAgent.py new file mode 100644 index 000000000..210cea556 --- /dev/null +++ b/GEMstack/onboard/perception/PrevAgent.py @@ -0,0 +1,17 @@ +# GEM imports: +from ...state import AgentState + +class PrevAgent(): + def __init__(self, last_id: int, last_state: AgentState): + self.time_since_seen: float = 0.0 # Time since the agent was last seen in seconds + self.last_id: int = last_id + self.last_state: AgentState = last_state + + def update_time(time: float): + """Updates the time since the agent was last seen + """ + if time <= 0: + # TODO: log error here + print("UPDATE TIME FOR LOST AGENT WAS LESS THAN OR EQUAL TO 0") + else: + self.time_since_seen += time \ No newline at end of file diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 9970c6a29..519d85505 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -48,6 +48,8 @@ from .pedestrian_detection_utils import * from ..interface.gem import GEMInterface from ..component import Component +from .AgentTracker import AgentTracker + def box_to_fake_agent(box): """Creates a fake agent state from an (x,y,w,h) bounding box. @@ -238,14 +240,7 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L if len(pedestrians_3d_pts) != num_objs: raise Exception('Perception - Camera detections, points clusters num. mismatch') - # TODO: Slower but cleaner to pass dicts of AgentState - # or at least {track_ids: centers/pts/etc} - # TODO: Combine funcs for efficiency in C. - # Separate numpy prob still faster for now - obj_centers = self.find_centers(pedestrians_3d_pts) - obj_dims = self.find_dims(pedestrians_3d_pts) - obj_vels = self.find_vels(track_ids, obj_centers) - + # TODO: CONVERT FROM LIDAR FRAME TO VEHICLE FRAME HERE (vehicle frame is center of rear axle at vehicle's current location) # If in vehicle frame, transform centers from top_lidar frame to vehicle frame # Need to transform the center point one by one since matrix op can't deal with empty points if self.vehicle_frame: @@ -259,6 +254,17 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L obj_centers_vehicle.append(np.array(())) obj_centers = obj_centers_vehicle + # TODO: Slower but cleaner to pass dicts of AgentState + # or at least {track_ids: centers/pts/etc} + # TODO: Combine funcs for efficiency in C. + # Separate numpy prob still faster for now + obj_centers = self.find_centers(pedestrians_3d_pts) + obj_dims = self.find_dims(pedestrians_3d_pts) + + # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE + # Then compare previous and current agents with the same id to calculate velocity + obj_vels = self.find_vels(track_ids, obj_centers) + # Update Current AgentStates for ind in range(num_objs): obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind] From 8a7ceb64c7910f25b53ca806257e5ede867be2c3 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 11:31:55 -0500 Subject: [PATCH 05/15] Wrote in logic to finish part 3. Still need to implement 1-2 transforms and to fix a small list bug --- GEMstack/onboard/perception/AgentTracker.py | 144 ----------------- GEMstack/onboard/perception/IdTracker.py | 8 +- GEMstack/onboard/perception/PrevAgent.py | 17 -- .../perception/pedestrian_detection.py | 149 +++++++++++++++--- 4 files changed, 129 insertions(+), 189 deletions(-) delete mode 100644 GEMstack/onboard/perception/AgentTracker.py delete mode 100644 GEMstack/onboard/perception/PrevAgent.py diff --git a/GEMstack/onboard/perception/AgentTracker.py b/GEMstack/onboard/perception/AgentTracker.py deleted file mode 100644 index 55e2cd917..000000000 --- a/GEMstack/onboard/perception/AgentTracker.py +++ /dev/null @@ -1,144 +0,0 @@ -import math -from typing import Dict, List - -from GEMstack.onboard.perception.IdTracker import IdTracker -from GEMstack.onboard.perception.PrevAgent import PrevAgent -from GEMstack.state.agent import AgentState - - -class AgentTracker(): - """Associates and tracks AgentState agents. - """ - def __init__(self): - # List of PrevAgent objects (each keeps track of the last seen state and time since seen) - self.prev_agents: List[PrevAgent] = [] - # List of currently visible AgentState objects - self.current_agents: List[AgentState] = [] - # Maximum time (in seconds) to keep a lost agent before dropping it. - self.drop_agent_t: float = 1.0 - # Id tracker for creating new unique pedestrian IDs. - self.id_tracker = IdTracker() - - def assign_ids(self, agents: list) -> Dict[str,AgentState]: - # Act with the assumption that you are being sent a list of AgentState objects and you need to use the object fields to keep track of them for your task - # Further act on the assumption that we will decide the id's of the pedestrians by assuming that 2 pedestrians are the same pedestrian if a - # previously stored AgentState pose and dimensions overlap with a newly passed in AgentState - # Act on the assumption that the AgentState objects are all in reference to the start frame of the vehicle - # some helper functions in this class, LostAgent.py, and IdTracker.py have been created to try to help you out with your task. - # Assume that the output returned from this function will be a dictionary of AgentState objects with the key corresponding to their id - """ - Associates new AgentState objects with existing tracked agents based on overlap. - If an agent does not match any previously tracked agent, a new unique id is assigned. - Also updates the “lost” time for agents that are not matched in this frame. - - Parameters: - agents (list): List of AgentState objects for the current frame - (already converted to the start frame of reference). - - Returns: - Dict[str, AgentState]: Dictionary mapping agent id (as a string) to AgentState. - """ - dt = 1.0 # Assuming a fixed time-step of 1 second (for simplicity) - output_agents: Dict[str, AgentState] = {} - matched_ids = set() - updated_prev_agents: List[PrevAgent] = [] - - # Process each new agent from the current frame. - for new_agent in agents: - found_match = None - # Look for a previously tracked agent whose bounding box overlaps. - for prev in self.prev_agents: - if prev.last_id in matched_ids: - continue # Already matched with another new agent. - if self.__agents_overlap(prev.last_state, new_agent): - found_match = prev - break - - if found_match is not None: - # update velocity using the change in position. - if hasattr(new_agent, 'velocity'): - new_agent.velocity = self.__calculate_velocity(found_match.last_state, new_agent, dt) - - # Update the matched agent’s state and reset its lost-time counter. - found_match.last_state = new_agent - found_match.time_since_seen = 0.0 - matched_ids.add(found_match.last_id) - output_agents[str(found_match.last_id)] = new_agent - updated_prev_agents.append(found_match) - else: - # No match found – assign a new unique id. - new_id = self.id_tracker.get_new_ped_id() - new_prev = PrevAgent(new_id, new_agent) - # initialize velocity to 0. - if hasattr(new_agent, 'velocity'): - new_agent.velocity = 0.0 - output_agents[str(new_id)] = new_agent - updated_prev_agents.append(new_prev) - - # For all previously tracked agents that were not matched this frame, - # update their time-since-seen and only keep them if they have not timed out. - for prev in self.prev_agents: - if prev.last_id not in matched_ids: - prev.update_time(dt) - if prev.time_since_seen < self.drop_agent_t: - updated_prev_agents.append(prev) - - # Save the updated list of tracked agents. - self.prev_agents = updated_prev_agents - # update current_agents to include only those seen in the current frame. - self.current_agents = list(output_agents.values()) - return output_agents - - def __convert_to_start_frame(self): - """Converts a list of AgentState agents from ouster Lidar frame of - reference (which is in reference to the current frame) to start - frame frame of reference - """ - # you can ignore this function akul - pass - - def __agents_overlap(self, ped1, ped2) -> bool: - """ - Determines if two AgentState objects overlap based on their pose and dimensions. - - Assumes each AgentState has: - - pose with attributes x and y. - - dimensions with attributes width and height. - - Returns: - bool: True if they overlap; False otherwise. - """ - # Get first agent's properties. - x1, y1 = ped1.pose.x, ped1.pose.y - w1, h1 = ped1.dimensions.width, ped1.dimensions.height - - # Get second agent's properties. - x2, y2 = ped2.pose.x, ped2.pose.y - w2, h2 = ped2.dimensions.width, ped2.dimensions.height - - # Compute bounding boxes (assuming (x, y) is the center). - left1, right1 = x1 - w1 / 2, x1 + w1 / 2 - top1, bottom1 = y1 - h1 / 2, y1 + h1 / 2 - - left2, right2 = x2 - w2 / 2, x2 + w2 / 2 - top2, bottom2 = y2 - h2 / 2, y2 + h2 / 2 - - # Check for overlap between the bounding boxes. - overlap = not (right1 < left2 or left1 > right2 or bottom1 < top2 or top1 > bottom2) - return overlap - - def __calculate_velocity(self, old_state: AgentState, new_state: AgentState, dt: float) -> float: - """ - Calculates the velocity based on the change in pose over time. - - Parameters: - old_state (AgentState): The previous state. - new_state (AgentState): The current state. - dt (float): Time difference between the two states. - - Returns: - float: The computed velocity. - """ - dx = new_state.pose.x - old_state.pose.x - dy = new_state.pose.y - old_state.pose.y - return math.sqrt(dx * dx + dy * dy) / dt if dt > 0 else 0.0 diff --git a/GEMstack/onboard/perception/IdTracker.py b/GEMstack/onboard/perception/IdTracker.py index 1c9c6ecbd..15641fa00 100644 --- a/GEMstack/onboard/perception/IdTracker.py +++ b/GEMstack/onboard/perception/IdTracker.py @@ -2,11 +2,11 @@ class IdTracker(): """Abstracts out id tracking """ def __init__(self): - self.__ped_id = 0 + self.__id = 0 - def get_new_ped_id(self) -> int: - """Returns a unique pedestrian id + def get_new_id(self) -> int: + """Returns a unique agent id """ assigned_id = self.__id - self.__ped_id += 1 # id will intentionally overflow to get back to 0 + self.__id += 1 # id will intentionally overflow to get back to 0 return assigned_id diff --git a/GEMstack/onboard/perception/PrevAgent.py b/GEMstack/onboard/perception/PrevAgent.py deleted file mode 100644 index 210cea556..000000000 --- a/GEMstack/onboard/perception/PrevAgent.py +++ /dev/null @@ -1,17 +0,0 @@ -# GEM imports: -from ...state import AgentState - -class PrevAgent(): - def __init__(self, last_id: int, last_state: AgentState): - self.time_since_seen: float = 0.0 # Time since the agent was last seen in seconds - self.last_id: int = last_id - self.last_state: AgentState = last_state - - def update_time(time: float): - """Updates the time since the agent was last seen - """ - if time <= 0: - # TODO: log error here - print("UPDATE TIME FOR LOST AGENT WAS LESS THAN OR EQUAL TO 0") - else: - self.time_since_seen += time \ No newline at end of file diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 519d85505..e5832601d 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -33,6 +33,7 @@ import os from typing import List, Dict from collections import defaultdict +from datetime import datetime # ROS, CV import rospy import message_filters @@ -48,7 +49,7 @@ from .pedestrian_detection_utils import * from ..interface.gem import GEMInterface from ..component import Component -from .AgentTracker import AgentTracker +from .IdTracker import IdTracker def box_to_fake_agent(box): @@ -110,6 +111,10 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: self.tf_listener = tf.TransformListener() if self.debug: self.init_debug() + + self.prev_time = None # Time in seconds since last scan for basic velocity calculation + self.current_time = None # Time in seconds of current scan for basic velocity calculation + self.id_tracker = IdTracker() def init_debug(self,) -> None: # Debug Publishers @@ -144,7 +149,7 @@ def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]: # Work towards own tracking class instead of simple YOLO track? # Fix division by time # ret: Dict[track_id: vel[x, y, z]] - def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict[int, np.ndarray]: + def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]) -> Dict[int, np.ndarray]: # Object not seen -> velocity = None track_id_center_map = dict(zip(track_ids, obj_centers)) vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods. @@ -159,8 +164,7 @@ def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3: vels[prev_track_id] = (track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) / (self.curr_time - self.prev_time) return vels - - + # TODO: Separate debug/viz class, bbox and 2d 3d points funcs def viz_object_states(self, cv_image, boxes, extracted_pts_all): # Extract 3D pedestrians points in lidar frame @@ -221,7 +225,7 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all): def update_object_states(self, track_result: List[Results], extracted_pts_all: List[np.ndarray]) -> None: - self.prev_agents = self.current_agents.copy() + # self.prev_agents = self.current_agents.copy() self.current_agents.clear() # Return if no track results available @@ -263,27 +267,124 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE # Then compare previous and current agents with the same id to calculate velocity - obj_vels = self.find_vels(track_ids, obj_centers) - - # Update Current AgentStates - for ind in range(num_objs): - obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind] - obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind] - self.current_agents[track_ids[ind]] = ( - AgentState( - track_id = track_ids[ind], - pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), - # (l, w, h) - # TODO: confirm (z -> l, x -> w, y -> h) - dimensions=(obj_dim[0], obj_dim[1], obj_dim[2]), - outline=None, - type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.MOVING, - velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(obj_vels[track_ids[ind]]), - yaw_rate=0 - )) + self.find_vels_and_ids(obj_centers, obj_dims) + # obj_vels = self.find_vels(track_ids, obj_centers) + + # # Update Current AgentStates + # for ind in range(num_objs): + # obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind] + # obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind] + # self.current_agents[track_ids[ind]] = ( + # AgentState( + # track_id = track_ids[ind], + # pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + # # (l, w, h) + # # TODO: confirm (z -> l, x -> w, y -> h) + # dimensions=(obj_dim[0], obj_dim[1], obj_dim[2]), + # outline=None, + # type=AgentEnum.PEDESTRIAN, + # activity=AgentActivityEnum.MOVING, + # velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(obj_vels[track_ids[ind]]), + # yaw_rate=0 + # )) + + def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]): + new_prev_agents = {} # Stores current agents in START frame for next time through (since + # planning wants us to send them agents in CURRENT frame) + # Object not seen -> velocity = None + vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods. + + # THIS ASSUMES EVERYTHING IS IN THE SAME FRAME WHICH WOULD WORK FOR STATIONARY CAR. + # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE + assigned = False + num_pairings = len(obj_centers) + converted_centers = obj_centers # TODO: REPLACE WITH THIS: self.convert_vehicle_frame_to_start_frame(obj_centers) + + # Loop through the indexes of the obj_center and obj_dim pairings + for idx in range(num_pairings): + assigned = False + + # Loop through previous agents backwards + for prev_id, prev_state in reversed(self.prev_agents.items()): + # If an obj_center and obj_dim pair overlaps with a previous agent, assume that they're the same agent + if self.agents_overlap(converted_centers[idx], obj_dims[idx], prev_state): + assigned = True + + if self.prev_time == None: + # This shouldn't ever be triggered + vel = 0 + else: + delta_t = self.curr_time - self.prev_time + vel = (obj_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds() + print("VELOCITY:") + print(vel) + + self.current_agents[prev_id] = ( + AgentState( + track_id = prev_id, + pose=ObjectPose(t=0, x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + # (l, w, h) + # TODO: confirm (z -> l, x -> w, y -> h) + dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity= None if vel.size == 0 else tuple(vel), + yaw_rate=0 + )) + del self.prev_agents[prev_id] # Remove previous agent from previous agents + break + + # If not assigned: + if not assigned: + # Set velocity to 0 and assign the new agent a new id with IdTracker + id = self.id_tracker.get_new_id() + self.current_agents[id] = ( + AgentState( + track_id = id, + pose=ObjectPose(t=0, x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + # (l, w, h) + # TODO: confirm (z -> l, x -> w, y -> h) + dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity= None, + yaw_rate=0 + )) + self.prev_agents = new_prev_agents + + # Calculates whether 2 agents overlap in START frame. True if they do, false if not + def agents_overlap(obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: AgentState) -> bool: + # Calculate corners of obj_center and obj_dim pairing + x1_min, x1_max = obj_center[0] - obj_dim[0] / 2.0, obj_center[0] + obj_dim[0] / 2.0 + y1_min, y1_max = obj_center[1] - obj_dim[1] / 2.0, obj_center[1] + obj_dim[1] / 2.0 # CENTER CALCULATION + z1_min, z1_max = obj_center[2] - obj_dim[2] / 2.0, obj_center[2] + obj_dim[2] / 2.0 + + # Calculate corners of AgentState + # Beware: AgentState(PhysicalObject) builds bbox from + # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not + # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2] + # TODO: confirm (z -> l, x -> w, y -> h) + x2_min, x2_max = prev_agent.pose.x - prev_agent.dimensions.width / 2.0, prev_agent.pose.x + prev_agent.dimensions.width / 2.0 + y2_min, y2_max = prev_agent.pose.y, prev_agent.pose.y + prev_agent.dimensions.height # AGENT STATE CALCULATION + z2_min, z2_max = prev_agent.pose.z - prev_agent.dimensions.length / 2.0, prev_agent.pose.z + prev_agent.dimensions.length / 2.0 + + # True if they overlap, false if not + return ( + ( (x1_min <= x2_min and x2_min <= x1_max) or (x2_min <= x1_min and x1_min <= x2_max) ) and + ( (y1_min <= y2_min and y2_min <= y1_max) or (y2_min <= y1_min and y1_min <= y2_max) ) and + ( (z1_min <= z2_min and z2_min <= z1_max) or (z2_min <= z1_min and z1_min <= z2_max) ) + ) + + def convert_vehicle_frame_to_start_frame(obj_centers: List[np.ndarray]) -> List[np.ndarray]: + pass def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): + # Update times for basic velocity calculation + self.prev_time = self.current_time + self.current_time = datetime.now() + # Convert to cv2 image and run detector cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence) From 2aa0ba6ef0dc690ac3e74439b2bb1bbb168e75c5 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 16:25:47 -0500 Subject: [PATCH 06/15] Fixed find_vels_and_ids logic bugs, AgentState bugs, timing bugs, etc --- .../perception/pedestrian_detection.py | 66 ++++++++++++++----- 1 file changed, 51 insertions(+), 15 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index e5832601d..c515ffe07 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -82,8 +82,6 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: self.prev_agents = dict() self.current_agents = dict() # TODO Implement time - self.prev_time = 0 - self.curr_time = 1 # Avoid divide by 0 for placebolder, 0 self.confidence = 0.7 self.classes_to_detect = 0 self.ground_threshold = -2.0 @@ -113,7 +111,7 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: if self.debug: self.init_debug() self.prev_time = None # Time in seconds since last scan for basic velocity calculation - self.current_time = None # Time in seconds of current scan for basic velocity calculation + self.curr_time = None # Time in seconds of current scan for basic velocity calculation self.id_tracker = IdTracker() def init_debug(self,) -> None: @@ -267,6 +265,9 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE # Then compare previous and current agents with the same id to calculate velocity + # for idx in range(len(obj_centers)): + # print(obj_centers[idx]) + # print(obj_dims[idx]) self.find_vels_and_ids(obj_centers, obj_dims) # obj_vels = self.find_vels(track_ids, obj_centers) @@ -289,6 +290,12 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L # )) def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]): + # Gate to check whether dims and centers are empty: + if ((len(obj_dims) == 1 or len(obj_centers) == 1) and (obj_centers[0].size == 0 or obj_dims[0].size == 0)): + self.prev_agents = self.current_agents.copy() + self.current_agents = {} # There weren't any pedestrians detected + return + new_prev_agents = {} # Stores current agents in START frame for next time through (since # planning wants us to send them agents in CURRENT frame) # Object not seen -> velocity = None @@ -298,6 +305,9 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE assigned = False num_pairings = len(obj_centers) + if num_pairings != len(obj_dims): + for i in range(25): + print("ERROR NUM PARINGS AND OBJ DIMS ARENT THE SAME") converted_centers = obj_centers # TODO: REPLACE WITH THIS: self.convert_vehicle_frame_to_start_frame(obj_centers) # Loop through the indexes of the obj_center and obj_dim pairings @@ -311,11 +321,11 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda assigned = True if self.prev_time == None: - # This shouldn't ever be triggered - vel = 0 + # This will be triggered if the very first message has pedestrians in it + vel = 0.0 else: delta_t = self.curr_time - self.prev_time - vel = (obj_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds() + vel = (converted_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds() print("VELOCITY:") print(vel) @@ -329,7 +339,20 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda outline=None, type=AgentEnum.PEDESTRIAN, activity=AgentActivityEnum.MOVING, - velocity= None if vel.size == 0 else tuple(vel), + velocity=None if vel.size == 0 else tuple(vel), + yaw_rate=0 + )) + new_prev_agents[prev_id] = ( + AgentState( + track_id = prev_id, + pose=ObjectPose(t=0, x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + # (l, w, h) + # TODO: confirm (z -> l, x -> w, y -> h) + dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity=None if vel.size == 0 else tuple(vel), yaw_rate=0 )) del self.prev_agents[prev_id] # Remove previous agent from previous agents @@ -349,13 +372,26 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda outline=None, type=AgentEnum.PEDESTRIAN, activity=AgentActivityEnum.MOVING, - velocity= None, + velocity=None, + yaw_rate=0 + )) + new_prev_agents[id] = ( + AgentState( + track_id = id, + pose=ObjectPose(t=0, x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + # (l, w, h) + # TODO: confirm (z -> l, x -> w, y -> h) + dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity=None, yaw_rate=0 )) self.prev_agents = new_prev_agents # Calculates whether 2 agents overlap in START frame. True if they do, false if not - def agents_overlap(obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: AgentState) -> bool: + def agents_overlap(self, obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: AgentState) -> bool: # Calculate corners of obj_center and obj_dim pairing x1_min, x1_max = obj_center[0] - obj_dim[0] / 2.0, obj_center[0] + obj_dim[0] / 2.0 y1_min, y1_max = obj_center[1] - obj_dim[1] / 2.0, obj_center[1] + obj_dim[1] / 2.0 # CENTER CALCULATION @@ -366,9 +402,9 @@ def agents_overlap(obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: Agen # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2] # TODO: confirm (z -> l, x -> w, y -> h) - x2_min, x2_max = prev_agent.pose.x - prev_agent.dimensions.width / 2.0, prev_agent.pose.x + prev_agent.dimensions.width / 2.0 - y2_min, y2_max = prev_agent.pose.y, prev_agent.pose.y + prev_agent.dimensions.height # AGENT STATE CALCULATION - z2_min, z2_max = prev_agent.pose.z - prev_agent.dimensions.length / 2.0, prev_agent.pose.z + prev_agent.dimensions.length / 2.0 + x2_min, x2_max = prev_agent.pose.x - prev_agent.dimensions[0] / 2.0, prev_agent.pose.x + prev_agent.dimensions[0] / 2.0 + y2_min, y2_max = prev_agent.pose.y, prev_agent.pose.y + prev_agent.dimensions[1] # AGENT STATE CALCULATION + z2_min, z2_max = prev_agent.pose.z - prev_agent.dimensions[2] / 2.0, prev_agent.pose.z + prev_agent.dimensions[2] / 2.0 # True if they overlap, false if not return ( @@ -377,13 +413,13 @@ def agents_overlap(obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: Agen ( (z1_min <= z2_min and z2_min <= z1_max) or (z2_min <= z1_min and z1_min <= z2_max) ) ) - def convert_vehicle_frame_to_start_frame(obj_centers: List[np.ndarray]) -> List[np.ndarray]: + def convert_vehicle_frame_to_start_frame(self, obj_centers: List[np.ndarray]) -> List[np.ndarray]: pass def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): # Update times for basic velocity calculation - self.prev_time = self.current_time - self.current_time = datetime.now() + self.prev_time = self.curr_time + self.curr_time = datetime.now() # Convert to cv2 image and run detector cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") From f080070f15fa62abfec8f4f22f6949331ef7421b Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 17:55:44 -0500 Subject: [PATCH 07/15] I think I updated a comment --- .../onboard/perception/pedestrian_detection.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index c515ffe07..f78b0c686 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -86,7 +86,7 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: self.classes_to_detect = 0 self.ground_threshold = -2.0 self.max_human_depth = 0.9 - self.vehicle_frame = False # Indicate whether pedestrians centroids and point clouds are in the vehicle frame + self.vehicle_frame = True # Indicate whether pedestrians centroids and point clouds are in the vehicle frame # Load calibration data # TODO: Maybe lets add one word or link what R t K are? @@ -290,11 +290,17 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L # )) def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]): - # Gate to check whether dims and centers are empty: + # Gate to check whether dims and centers are empty (will happen if no pedestrians are scanned): if ((len(obj_dims) == 1 or len(obj_centers) == 1) and (obj_centers[0].size == 0 or obj_dims[0].size == 0)): self.prev_agents = self.current_agents.copy() self.current_agents = {} # There weren't any pedestrians detected return + elif (len(obj_centers) != len(obj_dims)): + for i in range(25): + print("ERROR NUM PARINGS AND OBJ DIMS ARENT THE SAME") + self.prev_agents = self.current_agents.copy() + self.current_agents = {} # There weren't any pedestrians detected + return new_prev_agents = {} # Stores current agents in START frame for next time through (since # planning wants us to send them agents in CURRENT frame) @@ -305,9 +311,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE assigned = False num_pairings = len(obj_centers) - if num_pairings != len(obj_dims): - for i in range(25): - print("ERROR NUM PARINGS AND OBJ DIMS ARENT THE SAME") + converted_centers = obj_centers # TODO: REPLACE WITH THIS: self.convert_vehicle_frame_to_start_frame(obj_centers) # Loop through the indexes of the obj_center and obj_dim pairings @@ -326,8 +330,8 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda else: delta_t = self.curr_time - self.prev_time vel = (converted_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds() - print("VELOCITY:") - print(vel) + print("VELOCITY:") + print(vel) self.current_agents[prev_id] = ( AgentState( From db6d689ee54ea33dd1f29a6ebde756aaeb55fdd2 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 18:01:53 -0500 Subject: [PATCH 08/15] Cleaned up code a bit. Will add Vikram's suggestions in Slack next --- .../perception/pedestrian_detection.py | 34 +++---------------- 1 file changed, 5 insertions(+), 29 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index f78b0c686..bf4b10e12 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -242,7 +242,6 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L if len(pedestrians_3d_pts) != num_objs: raise Exception('Perception - Camera detections, points clusters num. mismatch') - # TODO: CONVERT FROM LIDAR FRAME TO VEHICLE FRAME HERE (vehicle frame is center of rear axle at vehicle's current location) # If in vehicle frame, transform centers from top_lidar frame to vehicle frame # Need to transform the center point one by one since matrix op can't deal with empty points if self.vehicle_frame: @@ -260,35 +259,15 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L # or at least {track_ids: centers/pts/etc} # TODO: Combine funcs for efficiency in C. # Separate numpy prob still faster for now - obj_centers = self.find_centers(pedestrians_3d_pts) + obj_centers = self.find_centers(pedestrians_3d_pts) # Centers are calculated in current vehicle frame here (center of rear axle) obj_dims = self.find_dims(pedestrians_3d_pts) # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE - # Then compare previous and current agents with the same id to calculate velocity - # for idx in range(len(obj_centers)): - # print(obj_centers[idx]) - # print(obj_dims[idx]) self.find_vels_and_ids(obj_centers, obj_dims) - # obj_vels = self.find_vels(track_ids, obj_centers) - - # # Update Current AgentStates - # for ind in range(num_objs): - # obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind] - # obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind] - # self.current_agents[track_ids[ind]] = ( - # AgentState( - # track_id = track_ids[ind], - # pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), - # # (l, w, h) - # # TODO: confirm (z -> l, x -> w, y -> h) - # dimensions=(obj_dim[0], obj_dim[1], obj_dim[2]), - # outline=None, - # type=AgentEnum.PEDESTRIAN, - # activity=AgentActivityEnum.MOVING, - # velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(obj_vels[track_ids[ind]]), - # yaw_rate=0 - # )) + # TODO: Refactor to make more efficient + # TODO: Moving Average across last N iterations pos/vel? Less spurious vals + # TODO Fix velocity calculation to calculate in ObjectFrameEnum.START def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]): # Gate to check whether dims and centers are empty (will happen if no pedestrians are scanned): if ((len(obj_dims) == 1 or len(obj_centers) == 1) and (obj_centers[0].size == 0 or obj_dims[0].size == 0)): @@ -304,14 +283,11 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda new_prev_agents = {} # Stores current agents in START frame for next time through (since # planning wants us to send them agents in CURRENT frame) - # Object not seen -> velocity = None - vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods. - # THIS ASSUMES EVERYTHING IS IN THE SAME FRAME WHICH WOULD WORK FOR STATIONARY CAR. - # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE assigned = False num_pairings = len(obj_centers) + # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE converted_centers = obj_centers # TODO: REPLACE WITH THIS: self.convert_vehicle_frame_to_start_frame(obj_centers) # Loop through the indexes of the obj_center and obj_dim pairings From bf80bbeb034e8d90c22937d0c416833ceaec6721 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 18:33:13 -0500 Subject: [PATCH 09/15] Accidentally moved a few lines of code. Fixed the error that resulted --- .../onboard/perception/pedestrian_detection.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index bf4b10e12..9e7815c93 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -242,6 +242,13 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L if len(pedestrians_3d_pts) != num_objs: raise Exception('Perception - Camera detections, points clusters num. mismatch') + # TODO: Slower but cleaner to pass dicts of AgentState + # or at least {track_ids: centers/pts/etc} + # TODO: Combine funcs for efficiency in C. + # Separate numpy prob still faster for now + obj_centers = self.find_centers(pedestrians_3d_pts) # Centers are calculated in lidar frame here + obj_dims = self.find_dims(pedestrians_3d_pts) + # If in vehicle frame, transform centers from top_lidar frame to vehicle frame # Need to transform the center point one by one since matrix op can't deal with empty points if self.vehicle_frame: @@ -254,13 +261,6 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L else: obj_centers_vehicle.append(np.array(())) obj_centers = obj_centers_vehicle - - # TODO: Slower but cleaner to pass dicts of AgentState - # or at least {track_ids: centers/pts/etc} - # TODO: Combine funcs for efficiency in C. - # Separate numpy prob still faster for now - obj_centers = self.find_centers(pedestrians_3d_pts) # Centers are calculated in current vehicle frame here (center of rear axle) - obj_dims = self.find_dims(pedestrians_3d_pts) # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE self.find_vels_and_ids(obj_centers, obj_dims) From 6addddb87a9959c71e9c8821cb739f0be8625a7e Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 19:17:49 -0500 Subject: [PATCH 10/15] In the process of converting to start frame. Pushing current WORKING code for Vikram --- .../perception/pedestrian_detection.py | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 9e7815c93..2c24f3bef 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -45,13 +45,14 @@ from ultralytics import YOLO from ultralytics.engine.results import Results, Boxes # GEMStack -from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum +from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum,ObjectFrameEnum from .pedestrian_detection_utils import * from ..interface.gem import GEMInterface from ..component import Component from .IdTracker import IdTracker + def box_to_fake_agent(box): """Creates a fake agent state from an (x,y,w,h) bounding box. @@ -262,7 +263,6 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L obj_centers_vehicle.append(np.array(())) obj_centers = obj_centers_vehicle - # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE self.find_vels_and_ids(obj_centers, obj_dims) # TODO: Refactor to make more efficient @@ -288,7 +288,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda num_pairings = len(obj_centers) # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE - converted_centers = obj_centers # TODO: REPLACE WITH THIS: self.convert_vehicle_frame_to_start_frame(obj_centers) + converted_centers = obj_centers # TODO: Add this in when finished self.convert_vehicle_frame_to_start_frame(obj_centers) # Loop through the indexes of the obj_center and obj_dim pairings for idx in range(num_pairings): @@ -302,7 +302,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda if self.prev_time == None: # This will be triggered if the very first message has pedestrians in it - vel = 0.0 + vel = np.array([0, 0, 0]) else: delta_t = self.curr_time - self.prev_time vel = (converted_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds() @@ -318,7 +318,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), outline=None, type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.MOVING, + activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING, velocity=None if vel.size == 0 else tuple(vel), yaw_rate=0 )) @@ -331,7 +331,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), outline=None, type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.MOVING, + activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING, velocity=None if vel.size == 0 else tuple(vel), yaw_rate=0 )) @@ -351,7 +351,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), outline=None, type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.MOVING, + activity=AgentActivityEnum.UNDETERMINED, velocity=None, yaw_rate=0 )) @@ -364,7 +364,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), outline=None, type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.MOVING, + activity=AgentActivityEnum.UNDETERMINED, velocity=None, yaw_rate=0 )) @@ -393,9 +393,13 @@ def agents_overlap(self, obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent ( (z1_min <= z2_min and z2_min <= z1_max) or (z2_min <= z1_min and z1_min <= z2_max) ) ) - def convert_vehicle_frame_to_start_frame(self, obj_centers: List[np.ndarray]) -> List[np.ndarray]: + def convert_vehicle_frame_to_start_frame(self, vehicle_states: List[AgentState]) -> List[AgentState]: + # num_states = len(vehicle_states) + # for idx in num_states: + # num_states[idx].to_frame() pass + def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): # Update times for basic velocity calculation self.prev_time = self.curr_time From a18d1876b154329aedfd71e4487cef4da8a141a5 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 21:50:44 -0500 Subject: [PATCH 11/15] Took absolutely forever to find the secret sauce. Please excuse the mess --- .../perception/pedestrian_detection.py | 80 +++++++++++++------ 1 file changed, 54 insertions(+), 26 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 2c24f3bef..381075c9b 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -114,6 +114,10 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: self.prev_time = None # Time in seconds since last scan for basic velocity calculation self.curr_time = None # Time in seconds of current scan for basic velocity calculation self.id_tracker = IdTracker() + + # Update function variables + self.t_start = None # datetime.now() # Estimated start frame time + self.start_pose_abs = None def init_debug(self,) -> None: # Debug Publishers @@ -123,6 +127,10 @@ def init_debug(self,) -> None: self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) def update(self, vehicle : VehicleState) -> Dict[str,AgentState]: + if self.t_start is None: + self.t_start = self.vehicle_interface.time() + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose return self.current_agents # TODO: Improve Algo Knn, ransac, etc. @@ -148,21 +156,21 @@ def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]: # Work towards own tracking class instead of simple YOLO track? # Fix division by time # ret: Dict[track_id: vel[x, y, z]] - def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]) -> Dict[int, np.ndarray]: - # Object not seen -> velocity = None - track_id_center_map = dict(zip(track_ids, obj_centers)) - vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods. - - for prev_track_id, prev_agent in self.prev_agents.items(): - if prev_track_id in track_ids: - # TODO: Add prev_agents to memory to avoid None velocity - # We should only be missing prev pose on first sight of track_id Agent. - # print("shape 1: ", track_id_center_map[prev_agent.track_id]) - # print("shape 2: ", np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) - # prev can be 3 separate Nones, current is just empty array... make this symmetrical - if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3: - vels[prev_track_id] = (track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) / (self.curr_time - self.prev_time) - return vels + # def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]) -> Dict[int, np.ndarray]: + # # Object not seen -> velocity = None + # track_id_center_map = dict(zip(track_ids, obj_centers)) + # vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods. + + # for prev_track_id, prev_agent in self.prev_agents.items(): + # if prev_track_id in track_ids: + # # TODO: Add prev_agents to memory to avoid None velocity + # # We should only be missing prev pose on first sight of track_id Agent. + # # print("shape 1: ", track_id_center_map[prev_agent.track_id]) + # # print("shape 2: ", np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) + # # prev can be 3 separate Nones, current is just empty array... make this symmetrical + # if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3: + # vels[prev_track_id] = (track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) / (self.curr_time - self.prev_time) + # return vels # TODO: Separate debug/viz class, bbox and 2d 3d points funcs def viz_object_states(self, cv_image, boxes, extracted_pts_all): @@ -287,7 +295,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda assigned = False num_pairings = len(obj_centers) - # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE + # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE converted_centers = obj_centers # TODO: Add this in when finished self.convert_vehicle_frame_to_start_frame(obj_centers) # Loop through the indexes of the obj_center and obj_dim pairings @@ -312,7 +320,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda self.current_agents[prev_id] = ( AgentState( track_id = prev_id, - pose=ObjectPose(t=0, x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), # (l, w, h) # TODO: confirm (z -> l, x -> w, y -> h) dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), @@ -325,7 +333,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda new_prev_agents[prev_id] = ( AgentState( track_id = prev_id, - pose=ObjectPose(t=0, x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), # (l, w, h) # TODO: confirm (z -> l, x -> w, y -> h) dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), @@ -341,11 +349,11 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda # If not assigned: if not assigned: # Set velocity to 0 and assign the new agent a new id with IdTracker - id = self.id_tracker.get_new_id() + id = "ped" + str(self.id_tracker.get_new_id()) self.current_agents[id] = ( AgentState( track_id = id, - pose=ObjectPose(t=0, x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), # (l, w, h) # TODO: confirm (z -> l, x -> w, y -> h) dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), @@ -358,7 +366,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda new_prev_agents[id] = ( AgentState( track_id = id, - pose=ObjectPose(t=0, x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), # (l, w, h) # TODO: confirm (z -> l, x -> w, y -> h) dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), @@ -394,11 +402,31 @@ def agents_overlap(self, obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent ) def convert_vehicle_frame_to_start_frame(self, vehicle_states: List[AgentState]) -> List[AgentState]: - # num_states = len(vehicle_states) - # for idx in num_states: - # num_states[idx].to_frame() - pass - + num_states = len(vehicle_states) + for idx in num_states: + vehicle_states[idx] = vehicle_states[idx].to_frame(frame=ObjectFrameEnum.START, current_pose=current_obj_pose, start_pose_abs=self.start_pose_abs) + + # current_obj_pose = ObjectPose( + # frame=ObjectFrameEnum.CURRENT, + # t=(self.curr_time-self.t_start).total_seconds(), + # x=vehicle_states[idx].pose.x, + # y=vehicle_states[idx].pose.y, + # z=vehicle_states[idx].pose.z + # ) + # vehicle_states[0] = vehicle_states[0].to_frame(frame=ObjectFrameEnum.START, current_pose=current_obj_pose, start_pose_abs=self.start_pose_abs) + # # start_obj_pose??? + # state = AgentState( + # track_id =0, + # pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + # # (l, w, h) + # # TODO: confirm (z -> l, x -> w, y -> h) + # dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), + # outline=None, + # type=AgentEnum.PEDESTRIAN, + # activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING, + # velocity=None if vel.size == 0 else tuple(vel), + # yaw_rate=0 + # ) def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): # Update times for basic velocity calculation From 0b5d88b5d5d11880aab18d3155947286fe264468 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 22:30:00 -0500 Subject: [PATCH 12/15] finally think I figured out start frame transform. In the process of converting to AgentStates --- .../perception/pedestrian_detection.py | 67 +++++++++++++------ 1 file changed, 47 insertions(+), 20 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 381075c9b..a4666f691 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -379,17 +379,18 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda self.prev_agents = new_prev_agents # Calculates whether 2 agents overlap in START frame. True if they do, false if not - def agents_overlap(self, obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: AgentState) -> bool: - # Calculate corners of obj_center and obj_dim pairing - x1_min, x1_max = obj_center[0] - obj_dim[0] / 2.0, obj_center[0] + obj_dim[0] / 2.0 - y1_min, y1_max = obj_center[1] - obj_dim[1] / 2.0, obj_center[1] + obj_dim[1] / 2.0 # CENTER CALCULATION - z1_min, z1_max = obj_center[2] - obj_dim[2] / 2.0, obj_center[2] + obj_dim[2] / 2.0 - + def agents_overlap(self, curr_agent: AgentState, prev_agent: AgentState) -> bool: # Calculate corners of AgentState # Beware: AgentState(PhysicalObject) builds bbox from # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2] # TODO: confirm (z -> l, x -> w, y -> h) + + # Calculate corners of obj_center and obj_dim pairing + x1_min, x1_max = curr_agent.pose.x - curr_agent.dimensions[0] / 2.0, curr_agent.pose.x + curr_agent.dimensions[0] / 2.0 + y1_min, y1_max = curr_agent.pose.y, curr_agent.pose.y + curr_agent.dimensions[1] # AGENT STATE CALCULATION + z1_min, z1_max = curr_agent.pose.z - curr_agent.dimensions[2] / 2.0, curr_agent.pose.z + curr_agent.dimensions[2] / 2.0 + x2_min, x2_max = prev_agent.pose.x - prev_agent.dimensions[0] / 2.0, prev_agent.pose.x + prev_agent.dimensions[0] / 2.0 y2_min, y2_max = prev_agent.pose.y, prev_agent.pose.y + prev_agent.dimensions[1] # AGENT STATE CALCULATION z2_min, z2_max = prev_agent.pose.z - prev_agent.dimensions[2] / 2.0, prev_agent.pose.z + prev_agent.dimensions[2] / 2.0 @@ -401,20 +402,46 @@ def agents_overlap(self, obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent ( (z1_min <= z2_min and z2_min <= z1_max) or (z2_min <= z1_min and z1_min <= z2_max) ) ) - def convert_vehicle_frame_to_start_frame(self, vehicle_states: List[AgentState]) -> List[AgentState]: - num_states = len(vehicle_states) - for idx in num_states: - vehicle_states[idx] = vehicle_states[idx].to_frame(frame=ObjectFrameEnum.START, current_pose=current_obj_pose, start_pose_abs=self.start_pose_abs) - - # current_obj_pose = ObjectPose( - # frame=ObjectFrameEnum.CURRENT, - # t=(self.curr_time-self.t_start).total_seconds(), - # x=vehicle_states[idx].pose.x, - # y=vehicle_states[idx].pose.y, - # z=vehicle_states[idx].pose.z - # ) - # vehicle_states[0] = vehicle_states[0].to_frame(frame=ObjectFrameEnum.START, current_pose=current_obj_pose, start_pose_abs=self.start_pose_abs) - # # start_obj_pose??? + def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]) -> List[AgentState]: + # Create list of agent states in current vehicle frame: + agents = [] + num_pairings = len(obj_centers) + for idx in range(num_pairings): + # Create agent in current frame: + state = AgentState( + track_id="TBD", # Temporary + pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + # Beware: AgentState(PhysicalObject) builds bbox from + # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not + # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2] + # (l, w, h) + # TODO: confirm (z -> l, x -> w, y -> h) + dimensions=(obj_dims[idx][2], obj_dims[idx][0], obj_centers[idx][1] + obj_dims[idx][1]), + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, # Temporary + velocity=None, # Temporary + yaw_rate=0 + ) + + # Convert agent to start frame and add to agents list: + agents.append( + state.to_frame( + frame=ObjectFrameEnum.START, + current_pose=ObjectPose( + frame=ObjectFrameEnum.CURRENT, + t=(self.curr_time-self.t_start).total_seconds(), + x=state.pose.x, + y=state.pose.y, + z=state.pose.z + ), + start_pose_abs=self.start_pose_abs + ) + ) + + # Return the agent states converted to start frame: + return agents + # state = AgentState( # track_id =0, # pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), From 96b9885b31805b0068e4374f8f5f7a9a57f4c4fa Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 18 Feb 2025 23:13:05 -0500 Subject: [PATCH 13/15] Pushing code so Lukas can try to replicate error message --- .../perception/pedestrian_detection.py | 81 +++++++------------ 1 file changed, 28 insertions(+), 53 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index a4666f691..5f1cd0568 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -116,7 +116,7 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: self.id_tracker = IdTracker() # Update function variables - self.t_start = None # datetime.now() # Estimated start frame time + self.t_start = datetime.now() # Estimated start frame time self.start_pose_abs = None def init_debug(self,) -> None: @@ -127,8 +127,6 @@ def init_debug(self,) -> None: self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) def update(self, vehicle : VehicleState) -> Dict[str,AgentState]: - if self.t_start is None: - self.t_start = self.vehicle_interface.time() if self.start_pose_abs is None: self.start_pose_abs = vehicle.pose return self.current_agents @@ -295,8 +293,8 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda assigned = False num_pairings = len(obj_centers) - # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE - converted_centers = obj_centers # TODO: Add this in when finished self.convert_vehicle_frame_to_start_frame(obj_centers) + # Create a list of agent states in the start frame + agents_sf = self.create_agent_states_in_start_frame(obj_centers=obj_centers, obj_dims=obj_dims) # Create agents in start frame # Loop through the indexes of the obj_center and obj_dim pairings for idx in range(num_pairings): @@ -305,7 +303,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda # Loop through previous agents backwards for prev_id, prev_state in reversed(self.prev_agents.items()): # If an obj_center and obj_dim pair overlaps with a previous agent, assume that they're the same agent - if self.agents_overlap(converted_centers[idx], obj_dims[idx], prev_state): + if self.agents_overlap(agents_sf[idx], prev_state): assigned = True if self.prev_time == None: @@ -313,10 +311,11 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda vel = np.array([0, 0, 0]) else: delta_t = self.curr_time - self.prev_time - vel = (converted_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds() + vel = (np.array([agents_sf[idx].pose.x, agents_sf[idx].pose.y, agents_sf[idx].pose.z]) - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds() print("VELOCITY:") print(vel) + # Create current frame agent at planning group's request: self.current_agents[prev_id] = ( AgentState( track_id = prev_id, @@ -327,29 +326,25 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda outline=None, type=AgentEnum.PEDESTRIAN, activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING, - velocity=None if vel.size == 0 else tuple(vel), - yaw_rate=0 - )) - new_prev_agents[prev_id] = ( - AgentState( - track_id = prev_id, - pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), - # (l, w, h) - # TODO: confirm (z -> l, x -> w, y -> h) - dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), - outline=None, - type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING, - velocity=None if vel.size == 0 else tuple(vel), + velocity=(0, 0, 0) if vel.size == 0 else tuple(vel), yaw_rate=0 )) + + # Fix start frame agent and store in this dict: + agents_sf[idx].track_id = prev_id + agents_sf[idx].activity = AgentActivityEnum.STOPPED if (np.all(vel == 0) or vel.size == 0) else AgentActivityEnum.MOVING + agents_sf[idx].velocity = (0, 0, 0) if vel.size == 0 else tuple(vel) + + new_prev_agents[prev_id] = agents_sf[idx] del self.prev_agents[prev_id] # Remove previous agent from previous agents break # If not assigned: if not assigned: # Set velocity to 0 and assign the new agent a new id with IdTracker - id = "ped" + str(self.id_tracker.get_new_id()) + id = self.id_tracker.get_new_id() + + # Create current frame agent at planning group's request: self.current_agents[id] = ( AgentState( track_id = id, @@ -360,22 +355,15 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda outline=None, type=AgentEnum.PEDESTRIAN, activity=AgentActivityEnum.UNDETERMINED, - velocity=None, - yaw_rate=0 - )) - new_prev_agents[id] = ( - AgentState( - track_id = id, - pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), - # (l, w, h) - # TODO: confirm (z -> l, x -> w, y -> h) - dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), - outline=None, - type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.UNDETERMINED, - velocity=None, + velocity=(0, 0, 0), yaw_rate=0 )) + + # Fix start frame agent and store in this dict: + agents_sf[idx].track_id = id + agents_sf[idx].activity = AgentActivityEnum.UNDETERMINED + agents_sf[idx].velocity = (0, 0, 0) + new_prev_agents[id] = agents_sf[idx] self.prev_agents = new_prev_agents # Calculates whether 2 agents overlap in START frame. True if they do, false if not @@ -409,8 +397,8 @@ def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_ for idx in range(num_pairings): # Create agent in current frame: state = AgentState( - track_id="TBD", # Temporary - pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + track_id=0, # Temporary + pose=ObjectPose(t=(self.curr_time - self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), # Beware: AgentState(PhysicalObject) builds bbox from # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2] @@ -419,7 +407,7 @@ def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_ dimensions=(obj_dims[idx][2], obj_dims[idx][0], obj_centers[idx][1] + obj_dims[idx][1]), outline=None, type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.MOVING, # Temporary + activity=AgentActivityEnum.UNDETERMINED, # Temporary velocity=None, # Temporary yaw_rate=0 ) @@ -430,7 +418,7 @@ def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_ frame=ObjectFrameEnum.START, current_pose=ObjectPose( frame=ObjectFrameEnum.CURRENT, - t=(self.curr_time-self.t_start).total_seconds(), + t=(self.curr_time - self.t_start).total_seconds(), x=state.pose.x, y=state.pose.y, z=state.pose.z @@ -442,19 +430,6 @@ def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_ # Return the agent states converted to start frame: return agents - # state = AgentState( - # track_id =0, - # pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), - # # (l, w, h) - # # TODO: confirm (z -> l, x -> w, y -> h) - # dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]), - # outline=None, - # type=AgentEnum.PEDESTRIAN, - # activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING, - # velocity=None if vel.size == 0 else tuple(vel), - # yaw_rate=0 - # ) - def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): # Update times for basic velocity calculation self.prev_time = self.curr_time From a936d3d989ae80fc245001df930b2ca927bfea37 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 19 Feb 2025 00:57:40 -0500 Subject: [PATCH 14/15] Fixed logic for gate which tosses out points --- .../onboard/perception/pedestrian_detection.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 9fe5eeaf2..51d90248c 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -274,15 +274,15 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L # TODO Fix velocity calculation to calculate in ObjectFrameEnum.START def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]): # Gate to check whether dims and centers are empty (will happen if no pedestrians are scanned): - if ((len(obj_dims) == 1 or len(obj_centers) == 1) and (obj_centers[0].size == 0 or obj_dims[0].size == 0)): - self.prev_agents = self.current_agents.copy() - self.current_agents = {} # There weren't any pedestrians detected - return - elif (len(obj_centers) != len(obj_dims)): - for i in range(25): - print("ERROR NUM PARINGS AND OBJ DIMS ARENT THE SAME") - self.prev_agents = self.current_agents.copy() - self.current_agents = {} # There weren't any pedestrians detected + for idx in range(len(obj_dims) -1, -1, -1): + if (obj_centers[idx].size == 0) or (obj_dims[0].size == 0): + del obj_centers[idx] + del obj_dims[idx] + + # Nothing was scanned, erase current (for current output) and previous list (for next time through because nothing was scanned) + if (len(obj_dims) == 0 or len(obj_centers) == 0): + self.current_agents = {} + self.prev_agents = {} return new_prev_agents = {} # Stores current agents in START frame for next time through (since From 502f3a80e700d57f38d9e45f59e1fd6c3b1f3987 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 19 Feb 2025 13:20:51 -0500 Subject: [PATCH 15/15] Sending untransformed version to planning team --- .../perception/pedestrian_detection.py | 41 ++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 51d90248c..2a3f84d1a 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -115,7 +115,14 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: # Update function variables self.t_start = datetime.now() # Estimated start frame time - self.start_pose_abs = None + # self.start_pose_abs = None + # self.start_pose_abs = ObjectPose( + # frame=ObjectFrameEnum.GLOBAL, + # t=0, + # x=0, + # y=0, + # z=0 + # ) def init_debug(self,) -> None: # Debug Publishers @@ -125,8 +132,10 @@ def init_debug(self,) -> None: self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) def update(self, vehicle : VehicleState) -> Dict[str,AgentState]: - if self.start_pose_abs is None: - self.start_pose_abs = vehicle.pose + # if self.start_pose_abs is None: + # self.start_pose_abs = vehicle.pose # Store first pose which is start frame + # self.start_pose_abs.to_frame(frame=ObjectFrameEnum.GLOBAL, current_pose=) + # self.current_vehicle_pose_sf = vehicle.pose # Vehicle pose in start frame return self.current_agents # TODO: Improve Algo Knn, ransac, etc. @@ -409,20 +418,22 @@ def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_ velocity=None, # Temporary yaw_rate=0 ) - + # Convert agent to start frame and add to agents list: agents.append( - state.to_frame( - frame=ObjectFrameEnum.START, - current_pose=ObjectPose( - frame=ObjectFrameEnum.CURRENT, - t=(self.curr_time - self.t_start).total_seconds(), - x=state.pose.x, - y=state.pose.y, - z=state.pose.z - ), - start_pose_abs=self.start_pose_abs - ) + state + # state.to_frame( + # frame=ObjectFrameEnum.START, + # current_pose=self.current_vehicle_pose_sf, + # # current_pose=ObjectPose( + # # frame=ObjectFrameEnum.CURRENT, + # # t=(self.curr_time - self.t_start).total_seconds(), + # # x=state.pose.x, + # # y=state.pose.y, + # # z=state.pose.z + # # ), + # start_pose_abs=self.start_pose_abs + # ) ) # Return the agent states converted to start frame: