diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index c2f8c58a8..f12cec27e 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -15,7 +15,7 @@ Terminal 3: --------------- source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash -rosbag play -l ~/rosbags/vehicle.bag +rosbag play -l ~/rosbags/moving_vehicle_stopping_pedestrian.bag --topics /oak/rgb/camera_info /oak/rgb/image_raw /ouster/points /septentrio_gnss/imu /septentrio_gnss/insnavgeod Terminal 4: --------------- @@ -33,8 +33,8 @@ import os from typing import List, Dict from collections import defaultdict -from datetime import datetime -import copy +# from datetime import datetime +from copy import deepcopy # ROS, CV import rospy import message_filters @@ -107,19 +107,10 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: self.t_vehicle_to_world = None self.t_vehicle_to_start = None - # Rospy Subscribers and sychronizers - # self.rgb_rosbag = message_filters.Subscriber('/oak/rgb/image_raw', Image) - # self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2) - # self.sync = message_filters.ApproximateTimeSynchronizer([self.rgb_rosbag, self.top_lidar_rosbag], queue_size=10, slop=0.1) - # self.sync.registerCallback(self.ouster_oak_callback) - # GEMStack Subscribers and sychronizers # LIDAR Camera fusion self.vehicle_interface.subscribe_sensor('sensor_fusion_Lidar_Camera',self.ouster_oak_callback) - # LIDAR Camera GNSS fusion - # self.vehicle_interface.subscribe_sensor('sensor_fusion_Lidar_Camera_GNSS',self.ouster_oak_callback) - # TF listener to get transformation from LiDAR to Camera self.tf_listener = tf.TransformListener() @@ -130,7 +121,7 @@ def __init__(self, vehicle_interface : GEMInterface) -> None: self.id_tracker = IdTracker() # Update function variables - self.t_start = datetime.now() # Estimated start frame time + self.t_start = None # Estimated start frame time self.start_pose_abs = None # Get this from GNSS (GLOBAL frame) self.current_vehicle_state = None # Current vehicle state from GNSS in GLOBAL frame self.previous_vehicle_state = None # Previous vehicle state from GNSS in GLOBAL frame @@ -142,13 +133,6 @@ def init_debug(self,) -> None: self.pub_bboxes_markers = rospy.Publisher("/markers/bboxes", MarkerArray, queue_size=10) self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1) - - # Test code to check gnss , can be removed - # Debugging - def gnss_test(self, cv_image: cv2.Mat, lidar_points: np.ndarray, vehicle_state: GNSSReading): - print(f"vehicle global state: {vehicle_state}") - - def update(self, vehicle : VehicleState) -> Dict[str,AgentState]: # Edge cases @@ -159,6 +143,8 @@ def update(self, vehicle : VehicleState) -> Dict[str,AgentState]: # Storing initial pose if (self.start_pose_abs == None): self.start_pose_abs = vehicle.pose + if self.t_start == None: + self.t_start = vehicle.pose.t return self.current_agents else: self.previous_vehicle_state = self.current_vehicle_state @@ -167,43 +153,49 @@ def update(self, vehicle : VehicleState) -> Dict[str,AgentState]: if(self.current_agent_obj_dims == {}): return self.current_agents - print(f"Global state : {vehicle}") + # Update current time: + self.curr_time = vehicle.pose.t + + # (f"Global state : {vehicle}") # Convert pose to start state. Need to use previous_vehicle state as pedestrian info is delayed vehicle_start_pose = vehicle.pose.to_frame(ObjectFrameEnum.START,self.previous_vehicle_state.pose,self.start_pose_abs) - print(f"Start state : {vehicle_start_pose}") - - print(f"ped pose vehicle state = {self.current_agent_obj_dims['pose']}") - print(f"ped dimenstions vehicle state = {self.current_agent_obj_dims['dims']}") # converting to start frame - for i,pose in enumerate(self.current_agent_obj_dims['pose']): - self.current_agent_obj_dims['pose'][i] = np.array(vehicle_start_pose.apply(pose)) - - for i,dims in enumerate(self.current_agent_obj_dims['dims']): - self.current_agent_obj_dims['dims'][i] = np.array(vehicle_start_pose.apply(dims)) - - - print(f"ped pose start state = {self.current_agent_obj_dims['pose']}") - print(f"ped dimenstions start state = {self.current_agent_obj_dims['dims']}") + # for dim, pose in zip(self.current_agent_obj_dims['dims'], self.current_agent_obj_dims['pose']): + # print("DIM: ", type(dim), dim) + # print("POSE: ", type(pose), pose) + self.current_agent_obj_dims['pose'] = [np.array(vehicle_start_pose.apply(pose)) for pose in self.current_agent_obj_dims['pose']] + temp_dims = list() + for dim in self.current_agent_obj_dims['dims']: + temp_dims.append(np.array([vehicle_start_pose.apply(corner) for corner in dim])) + self.current_agent_obj_dims['dims'] = temp_dims # Prepare variables for find_vels_and_ids - self.prev_agents = self.current_agents.copy() + self.prev_agents = deepcopy(self.current_agents) self.current_agents.clear() # Note this below function will probably throw a type error. I think we'll need to index the # tuples by index 0 in the lists but I'm not sure: agents = self.create_agent_states(obj_centers=self.current_agent_obj_dims['pose'], obj_dims=self.current_agent_obj_dims['dims']) # Calculating the velocity of agents and tracking their ids - self.find_vels_and_ids(agents=agents) + backOrder = self.find_vels_and_ids(agents=agents) - # Convert to current vehicle frame here for planning group. - # Create a new dictionary here which is a deepcopy of self.current_agents - # convert this new dictionary to current vehicle frame - # return this new dictionary - # OR just make planning group do the conversion. Depends on what data we want to log? + # Create a dictionary of vehicle frame agents: + vehicle_agents = self.create_vehicle_dict(agents, backOrder) + # Create a list containing the ids of the new agents in the vehicle frame agent's order - return self.current_agents + return vehicle_agents + + def create_vehicle_dict(self, agents: List[AgentState], backOrder: List[int]): + vehicle_agents = {} + + for idx in backOrder: + vehicle_agents[backOrder[idx]] = agents[idx] + vehicle_agents[backOrder[idx]].activity = self.current_agents[backOrder[idx]].activity + vehicle_agents[backOrder[idx]].velocity = self.current_agents[backOrder[idx]].velocity + + return vehicle_agents # TODO: Improve Algo Knn, ransac, etc. def find_centers(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]: @@ -219,9 +211,27 @@ def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]: # objects distance to ground we filtered from lidar, # other heuristics to imrpove stability for find_ funcs ? clusters = [np.array(clust) for clust in clusters] + # x, y, z 1 value dims = [np.array(()) if clust.size == 0 else np.max(clust, axis= 0) - np.min(clust, axis= 0) for clust in clusters] + for i in range(len(dims)): + if dims[i].size == 0: continue + else: + # -dims[i]/2, dims[i]/2 + # 8 point bbox + dims[i] = np.vstack([[-dims[i][0]/2.0, -dims[i][1]/2.0, -dims[i][2]/2.0], + [-dims[i][0]/2.0, -dims[i][1]/2.0, dims[i][2]/2.0], + [-dims[i][0]/2.0, dims[i][1]/2.0, -dims[i][2]/2.0], + [-dims[i][0]/2.0, dims[i][1]/2.0, dims[i][2]/2.0], + [dims[i][0]/2.0, -dims[i][1]/2.0, -dims[i][2]/2.0], + [dims[i][0]/2.0, -dims[i][1]/2.0, dims[i][2]/2.0], + [dims[i][0]/2.0, dims[i][1]/2.0, -dims[i][2]/2.0], + [dims[i][0]/2.0, dims[i][1]/2.0, dims[i][2]/2.0]] + ) + + # Dims are 2 points: [ [min_x, min_y, min_z], [max_x, max_y, max_z] ] + # Need that for transform return dims - + # TODO: Separate debug/viz class, bbox and 2d 3d points funcs def viz_object_states(self, cv_image, boxes, extracted_pts_all): # Extract 3D pedestrians points in lidar frame @@ -263,6 +273,8 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all): flattened_pedestrians_3d_pts_vehicle = transform_lidar_points(np.array(flattened_pedestrians_3d_pts), self.R_lidar_to_vehicle, self.t_lidar_to_vehicle) flattened_pedestrians_3d_pts = flattened_pedestrians_3d_pts_vehicle + + # Create point cloud from extracted 3D points ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts) self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2) @@ -289,43 +301,62 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L return # Change pedestrians_3d_pts to dicts matching track_ids - track_ids = track_result[0].boxes.id.int().cpu().tolist() - num_objs = len(track_ids) - boxes = track_result[0].boxes # Extract 3D pedestrians points in lidar frame # ** These are camera frame after transform_lidar_points, right? # ** It was in camera frame before. I fixed it. Now they are in lidar frame! pedestrians_3d_pts = [[] if len(extracted_pts) == 0 else list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] - if len(pedestrians_3d_pts) != num_objs: - raise Exception('Perception - Camera detections, points clusters num. mismatch') + if len(pedestrians_3d_pts) != len(track_result[0].boxes): + raise Exception( f"Length of extracted pedestrian clusters ({len(pedestrians_3d_pts)}) and number of camera bboxes ({len(track_result[0].boxes)}) are not equal") + # TODO: Slower but cleaner to pass dicts of AgentState # or at least {track_ids: centers/pts/etc} # TODO: Combine funcs for efficiency in C. # Separate numpy prob still faster for now obj_centers = self.find_centers(pedestrians_3d_pts) # Centers are calculated in lidar frame here - obj_dims = self.find_dims(pedestrians_3d_pts) + obj_dims = self.find_dims(pedestrians_3d_pts) # 8 point dims of bounding box # Pose is stored in vehicle frame and converted to start frame in the update function obj_centers_vehicle = [] - for obj_center in obj_centers: - if len(obj_center) > 0: - obj_center = np.array([obj_center]) - obj_center_vehicle = transform_lidar_points(obj_center, self.R_lidar_to_vehicle, self.t_lidar_to_vehicle)[0] - obj_centers_vehicle.append(obj_center_vehicle) - else: - obj_centers_vehicle.append(np.array(())) - obj_centers = obj_centers_vehicle + obj_dims_vehicle = [] + if self.vehicle_frame: + for obj_center in obj_centers: + if obj_center.size > 0: + obj_center = np.array([obj_center]) + obj_center_vehicle = transform_lidar_points(obj_center, self.R_lidar_to_vehicle, self.t_lidar_to_vehicle)[0] + obj_centers_vehicle.append(obj_center_vehicle) + else: + obj_centers_vehicle.append(np.array(())) + obj_centers = obj_centers_vehicle + + if(len(obj_center) != 0) and (len(obj_dims) != 0): + for obj_dim in obj_dims: + if len(obj_dim) > 0: + # obj_dim_min = np.array([obj_dim[0]]) + # obj_dim_max = np.array([obj_dim[1]]) + # obj_dim_min_vehicle = transform_lidar_points(obj_dim_min, self.R_lidar_to_vehicle, self.t_lidar_to_vehicle)[0] + # obj_dim_max_vehicle = transform_lidar_points(obj_dim_max, self.R_lidar_to_vehicle, self.t_lidar_to_vehicle)[0] + # # Dims are 2 points: [ [min_x, min_y, min_z], [max_x, max_y, max_z] ] + # obj_dims_vehicle.append(np.vstack([obj_dim_min_vehicle, obj_dim_max_vehicle])) + obj_dim_vehicle = transform_lidar_points(obj_dim, self.R_lidar_to_vehicle, self.t_lidar_to_vehicle) + obj_dims_vehicle.append(obj_dim_vehicle) + else: obj_dims_vehicle.append(np.array(())) + obj_dims = obj_dims_vehicle + + self.current_agent_obj_dims["pose"] = obj_centers + self.current_agent_obj_dims["dims"] = obj_dims - if(len(obj_center) != 0) and (len(obj_dims) != 0): - self.current_agent_obj_dims["pose"] = obj_center - self.current_agent_obj_dims["dims"] = obj_dims # TODO: Refactor to make more efficient # TODO: Moving Average across last N iterations pos/vel? Less spurious vals def find_vels_and_ids(self, agents: List[AgentState]): # self.prev_agents was assigned a deepcopy of self.current_agents and then self.current_agents # was cleared before this function was called + + # Create a list containing the ids of the new agents in the vehicle frame agent's order + backOrder = [] + for each in agents: + backOrder.append(None) # Nothing was scanned, erase current (for current output) and previous list (for next time through because nothing was scanned) if (len(agents) == 0): @@ -346,19 +377,20 @@ def find_vels_and_ids(self, agents: List[AgentState]): if self.prev_time == None: # This will be triggered if the very first message has pedestrians in it - vel = np.array([0, 0, 0]) + vel = [0, 0, 0] else: delta_t = self.curr_time - self.prev_time - vel = (np.array([agents[idx].pose.x, agents[idx].pose.y, agents[idx].pose.z]) - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds() - print("VELOCITY:") - print(vel) + vel = list((np.array([agents[idx].pose.x, agents[idx].pose.y, agents[idx].pose.z]) - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t) + # print("VELOCITY:") + # print(vel) # Fix start frame agent and store in this dict: - agents[idx].track_id = prev_id - agents[idx].activity = AgentActivityEnum.STOPPED if (np.all(vel == 0) or vel.size == 0) else AgentActivityEnum.MOVING - agents[idx].velocity = (0, 0, 0) if vel.size == 0 else tuple(vel) + # TODO: Use np.isclose + agents[idx].activity = AgentActivityEnum.STOPPED if (np.all(vel == 0) or len(vel) == 0) else AgentActivityEnum.MOVING + agents[idx].velocity = (0, 0, 0) if len(vel) == 0 else tuple(vel) self.current_agents[prev_id] = agents[idx] + backOrder[idx] = prev_id del self.prev_agents[prev_id] # Remove previous agent from previous agents so it doesn't get assigned multiple times on accident break @@ -367,10 +399,11 @@ def find_vels_and_ids(self, agents: List[AgentState]): id = self.id_tracker.get_new_id() # Fix start frame agent and store in this dict: - agents[idx].track_id = id agents[idx].activity = AgentActivityEnum.UNDETERMINED agents[idx].velocity = (0, 0, 0) self.current_agents[id] = agents[idx] + backOrder[idx] = id + return backOrder # Calculates whether 2 agents overlap. True if they do, false if not def agents_overlap(self, curr_agent: AgentState, prev_agent: AgentState) -> bool: @@ -392,33 +425,43 @@ def agents_overlap(self, curr_agent: AgentState, prev_agent: AgentState) -> bool ) def create_agent_states(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]) -> List[AgentState]: + # Dims are 2 points: [ [min_x, min_y, min_z], [max_x, max_y, max_z] ] + # Gate to check whether dims and centers are empty (will happen if no pedestrians are scanned): for idx in range(len(obj_dims) -1, -1, -1): - if (obj_centers[idx].size == 0) or (obj_dims[0].size == 0): - del obj_centers[idx] - del obj_dims[idx] - elif (obj_centers[idx].size != obj_dims[0].size): + # print("type:", type(obj_centers[idx]), type(obj_dims[idx])) + if obj_centers[idx].size == 0 or obj_dims[idx].size == 0: del obj_centers[idx] del obj_dims[idx] + if (len(obj_centers) != len(obj_dims)): + raise Exception(f"Length of object centers ({len(obj_centers)}) and dimensions ({len(obj_dims)}) are not equal") + + # Convert from 2 point to 1 point dims + # obj_dims = [np.abs( dim[0] - dim[1]) for dim in obj_dims] + # elif (obj_centers[idx].size != obj_dims[0].size): + # del obj_centers[idx] + # del obj_dims[idx] # Create list of agent states in current vehicle frame: + + xyz_lengths_start = [np.max(obj_dim, axis=0) - np.min(obj_dim, axis=0) for obj_dim in obj_dims] + xyz_lengths_start = [xyz_length.astype(float) for xyz_length in xyz_lengths_start] agents = [] num_pairings = len(obj_centers) for idx in range(num_pairings): # Create agent in current frame: agents.append(AgentState( - track_id=0, # Temporary - pose=ObjectPose(t=(self.curr_time - self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2] - obj_dims[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), + pose=ObjectPose(t=self.curr_time - self.t_start, x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2] - xyz_lengths_start[idx][2]/2.0, yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT), # Beware: AgentState(PhysicalObject) builds bbox from # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2] # (l, w, h) # TODO: confirm (z -> l, x -> w, y -> h) - dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_centers[idx][2] + obj_dims[idx][2]), + dimensions=(xyz_lengths_start[idx][0], xyz_lengths_start[idx][1], xyz_lengths_start[idx][2]), outline=None, type=AgentEnum.PEDESTRIAN, activity=AgentActivityEnum.UNDETERMINED, # Temporary velocity=None, # Temporary - yaw_rate=0 + yaw_rate=0, )) return agents @@ -482,7 +525,7 @@ def ouster_oak_callback(self, cv_image: cv2.Mat, lidar_points: np.ndarray): # Update times for basic velocity calculation self.prev_time = self.curr_time - self.curr_time = datetime.now() + # self.curr_time = datetime.now() # Updating in update function now self.cv_image = cv_image self.lidar_points = lidar_points @@ -497,15 +540,12 @@ def ouster_oak_callback(self, cv_image: cv2.Mat, lidar_points: np.ndarray): # Downsample xyz point clouds downsampled_points = downsample_points(lidar_points) - # print("len downsampled_points", len(downsampled_points)) # Transform LiDAR points into the camera coordinate frame. lidar_in_camera = transform_lidar_points(downsampled_points, self.R_lidar_to_oak, self.t_lidar_to_oak) - # print("len lidar_in_camera", len(lidar_in_camera)) # Project the transformed points into the image plane. projected_pts = project_points(lidar_in_camera, self.K, downsampled_points) - # print("projected_pts", len(projected_pts)) # Process bboxes boxes = track_result[0].boxes diff --git a/GEMstack/state/agent.py b/GEMstack/state/agent.py index b0ad7aaad..757805837 100644 --- a/GEMstack/state/agent.py +++ b/GEMstack/state/agent.py @@ -27,10 +27,6 @@ class AgentState(PhysicalObject): activity : AgentActivityEnum velocity : Tuple[float,float,float] #estimated velocity in x,y,z, m/s and in agent's local frame yaw_rate : float #estimated yaw rate, in radians/s - # Added by Perception for persistent objs across frames - # Check track_id to verify this is the same Agent you've - # seen before - track_id : int def velocity_local(self) -> Tuple[float,float,float]: """Returns velocity in m/s in the agent's local frame.""" diff --git a/GEMstack/state/physical_object.py b/GEMstack/state/physical_object.py index 5147c1ee8..4b8f0ddf7 100644 --- a/GEMstack/state/physical_object.py +++ b/GEMstack/state/physical_object.py @@ -77,6 +77,7 @@ def apply(self,point): If point is 2D, then the pitch and roll are ignored. """ + # print("apply: ", type(point), point) assert len(point) in [2,3],"Must provide a 2D or 3D point" oz = self.z if self.z is not None else 0.0 if self.frame == ObjectFrameEnum.GLOBAL: