From a03c2044f2f033c0c7dad19afe2cdd278df54953 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Wed, 23 Apr 2025 10:39:57 -0500 Subject: [PATCH 01/28] Create cone_detection.py --- GEMstack/onboard/perception/cone_detection.py | 666 ++++++++++++++++++ 1 file changed, 666 insertions(+) create mode 100644 GEMstack/onboard/perception/cone_detection.py diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py new file mode 100644 index 000000000..d41c527ae --- /dev/null +++ b/GEMstack/onboard/perception/cone_detection.py @@ -0,0 +1,666 @@ +from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum +from ..interface.gem import GEMInterface +from ..component import Component +from ultralytics import YOLO +import cv2 +from typing import Dict +import open3d as o3d +import numpy as np +from sklearn.cluster import DBSCAN +from scipy.spatial.transform import Rotation as R +import rospy +from sensor_msgs.msg import PointCloud2, Image +import sensor_msgs.point_cloud2 as pc2 +import struct, ctypes +from message_filters import Subscriber, ApproximateTimeSynchronizer +from cv_bridge import CvBridge +import time +import math +import ros_numpy +import os + + +# ----- Helper Functions ----- + +def cylindrical_roi(points, center, radius, height): + horizontal_dist = np.linalg.norm(points[:, :2] - center[:2], axis=1) + vertical_diff = np.abs(points[:, 2] - center[2]) + mask = (horizontal_dist <= radius) & (vertical_diff <= height / 2) + return points[mask] + + +def undistort_image(image, K, D): + h, w = image.shape[:2] + newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) + undistorted = cv2.undistort(image, K, D, None, newK) + return undistorted, newK + +def filter_points_within_threshold(points, threshold=15.0): + distances = np.linalg.norm(points, axis=1) + mask = distances <= threshold + return points[mask] + + +def match_existing_cone( + new_center: np.ndarray, + new_dims: tuple, + existing_agents: Dict[str, AgentState], + distance_threshold: float = 1.0 +) -> str: + """ + Find the closest existing Cone agent within a specified distance threshold. + """ + best_agent_id = None + best_dist = float('inf') + for agent_id, agent_state in existing_agents.items(): + old_center = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z]) + dist = np.linalg.norm(new_center - old_center) + if dist < distance_threshold and dist < best_dist: + best_dist = dist + best_agent_id = agent_id + return best_agent_id + + +def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple: + """ + Compute the (vx, vy, vz) velocity based on change in pose over time. + """ + if dt <= 0: + return (0, 0, 0) + vx = (new_pose.x - old_pose.x) / dt + vy = (new_pose.y - old_pose.y) / dt + vz = (new_pose.z - old_pose.z) / dt + return (vx, vy, vz) + + +def extract_roi_box(lidar_pc, center, half_extents): + """ + Extract a region of interest (ROI) from the LiDAR point cloud defined by an axis-aligned bounding box. + """ + lower = center - half_extents + upper = center + half_extents + mask = np.all((lidar_pc >= lower) & (lidar_pc <= upper), axis=1) + return lidar_pc[mask] + + +def pc2_to_numpy(pc2_msg, want_rgb=False): + """ + Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy. + This function extracts the x, y, z coordinates from the point cloud. + """ + # Convert the ROS message to a numpy structured array + pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg) + # Stack x,y,z fields to a (N,3) array + pts = np.stack((np.array(pc['x']).ravel(), + np.array(pc['y']).ravel(), + np.array(pc['z']).ravel()), axis=1) + # Apply filtering (for example, x > 0 and z in a specified range) + mask = (pts[:, 0] > -0.5) & (pts[:, 2] < -1) & (pts[:, 2] > -2.7) + return pts[mask] + + +def backproject_pixel(u, v, K): + """ + Backprojects a pixel coordinate (u, v) into a normalized 3D ray in the camera coordinate system. + """ + cx, cy = K[0, 2], K[1, 2] + fx, fy = K[0, 0], K[1, 1] + x = (u - cx) / fx + y = (v - cy) / fy + ray_dir = np.array([x, y, 1.0]) + return ray_dir / np.linalg.norm(ray_dir) + + +def find_human_center_on_ray(lidar_pc, ray_origin, ray_direction, + t_min, t_max, t_step, + distance_threshold, min_points, ransac_threshold): + """ + Identify the center of a human along a projected ray. + (This function is no longer used in the new approach.) + """ + return None, None, None + + +def extract_roi(pc, center, roi_radius): + """ + Extract points from a point cloud that lie within a specified radius of a center point. + """ + distances = np.linalg.norm(pc - center, axis=1) + return pc[distances < roi_radius] + + +def refine_cluster(roi_points, center, eps=0.2, min_samples=10): + """ + Refine a point cluster by applying DBSCAN and return the cluster closest to 'center'. + """ + if roi_points.shape[0] < min_samples: + return roi_points + clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(roi_points) + labels = clustering.labels_ + valid_clusters = [roi_points[labels == l] for l in set(labels) if l != -1] + if not valid_clusters: + return roi_points + best_cluster = min(valid_clusters, key=lambda c: np.linalg.norm(np.mean(c, axis=0) - center)) + return best_cluster + + +def remove_ground_by_min_range(cluster, z_range=0.05): + """ + Remove points within z_range of the minimum z (assumed to be ground). + """ + if cluster is None or cluster.shape[0] == 0: + return cluster + min_z = np.min(cluster[:, 2]) + filtered = cluster[cluster[:, 2] > (min_z + z_range)] + return filtered + + +def get_bounding_box_center_and_dimensions(points): + """ + Calculate the axis-aligned bounding box's center and dimensions for a set of 3D points. + """ + if points.shape[0] == 0: + return None, None + min_vals = np.min(points, axis=0) + max_vals = np.max(points, axis=0) + center = (min_vals + max_vals) / 2 + dimensions = max_vals - min_vals + return center, dimensions + + +def create_ray_line_set(start, end): + """ + Create an Open3D LineSet object representing a ray between two 3D points. + The line is colored yellow. + """ + points = [start, end] + lines = [[0, 1]] + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(points) + line_set.lines = o3d.utility.Vector2iVector(lines) + line_set.colors = o3d.utility.Vector3dVector([[1, 1, 0]]) + return line_set + + +def downsample_points(lidar_points, voxel_size=0.15): + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(lidar_points) + down_pcd = pcd.voxel_down_sample(voxel_size=voxel_size) + return np.asarray(down_pcd.points) + + +def filter_depth_points(lidar_points, max_depth_diff=0.9, use_norm=True): + if lidar_points.shape[0] == 0: + return lidar_points + + if use_norm: + depths = np.linalg.norm(lidar_points, axis=1) + else: + depths = lidar_points[:, 0] + + min_depth = np.min(depths) + max_possible_depth = min_depth + max_depth_diff + mask = depths < max_possible_depth + return lidar_points[mask] + + +def visualize_geometries(geometries, window_name="Open3D", width=800, height=600, point_size=5.0): + """ + Visualize a list of Open3D geometry objects in a dedicated window. + """ + vis = o3d.visualization.Visualizer() + vis.create_window(window_name=window_name, width=width, height=height) + for geom in geometries: + vis.add_geometry(geom) + opt = vis.get_render_option() + opt.point_size = point_size + vis.run() + vis.destroy_window() + + +def pose_to_matrix(pose): + """ + Compose a 4x4 transformation matrix from a pose state. + Assumes pose has attributes: x, y, z, yaw, pitch, roll, + where the angles are given in degrees. + """ + x = pose.x if pose.x is not None else 0.0 + y = pose.y if pose.y is not None else 0.0 + z = pose.z if pose.z is not None else 0.0 + if pose.yaw is not None and pose.pitch is not None and pose.roll is not None: + yaw = math.radians(pose.yaw) + pitch = math.radians(pose.pitch) + roll = math.radians(pose.roll) + else: + yaw = 0.0 + pitch = 0.0 + roll = 0.0 + R_mat = R.from_euler('zyx', [yaw, pitch, roll]).as_matrix() + T = np.eye(4) + T[:3, :3] = R_mat + T[:3, 3] = np.array([x, y, z]) + return T + + +def transform_points_l2c(lidar_points, T_l2c): + N = lidar_points.shape[0] + pts_hom = np.hstack((lidar_points, np.ones((N, 1)))) # (N,4) + pts_cam = (T_l2c @ pts_hom.T).T # (N,4) + return pts_cam[:, :3] + + +# ----- New: Vectorized projection function ----- +def project_points(pts_cam, K, original_lidar_points): + """ + Vectorized version. + pts_cam: (N,3) array of points in camera coordinates. + original_lidar_points: (N,3) array of points in LiDAR coordinates. + Returns a (M,5) array: [u, v, X_lidar, Y_lidar, Z_lidar] for all points with Z>0. + """ + mask = pts_cam[:, 2] > 0 + pts_cam_valid = pts_cam[mask] + lidar_valid = original_lidar_points[mask] + Xc = pts_cam_valid[:, 0] + Yc = pts_cam_valid[:, 1] + Zc = pts_cam_valid[:, 2] + u = (K[0, 0] * (Xc / Zc) + K[0, 2]).astype(np.int32) + v = (K[1, 1] * (Yc / Zc) + K[1, 2]).astype(np.int32) + proj = np.column_stack((u, v, lidar_valid)) + return proj + + +class ConeDetector3D(Component): + """ + Detects cones by fusing YOLO 2D detections with LiDAR point cloud data. + + Tracking is optional: set `enable_tracking=False` to disable persistent tracking + and return only detections from the current frame. + """ + + def __init__(self, vehicle_interface: GEMInterface): + self.vehicle_interface = vehicle_interface + self.enable_tracking = False + self.current_agents = {} + self.tracked_agents = {} + self.cone_counter = 0 + self.latest_image = None + self.latest_lidar = None + self.bridge = CvBridge() + self.start_pose_abs = None + self.camera_front = True + self.visualize_2d = False + self.use_cyl_roi = False + self.start_time = None + self.use_start_frame = False + self.save_data = False + self.orientation = False + + def rate(self) -> float: + return 4.0 + + def state_inputs(self) -> list: + return ['vehicle'] + + def state_outputs(self) -> list: + return ['agents'] + + def initialize(self): + self.rgb_sub = Subscriber('/oak/rgb/image_raw', Image) + self.lidar_sub = Subscriber('/ouster/points', PointCloud2) + self.sync = ApproximateTimeSynchronizer([self.rgb_sub, self.lidar_sub], + queue_size=10, slop=0.1) + self.sync.registerCallback(self.synchronized_callback) + self.detector = YOLO('/home/gem/s2025_perception_merge/GEMstack/GEMstack/knowledge/detection/cone.pt') + self.detector.to('cuda') + + if self.camera_front: + self.K = np.array([[684.83331299, 0., 573.37109375], + [0., 684.60968018, 363.70092773], + [0., 0., 1.]]) + else: + self.K = np.array([[1.17625545e+03, 0.00000000e+00, 9.66432645e+02], + [0.00000000e+00, 1.17514569e+03, 6.08580326e+02], + [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]]) + + if self.camera_front: + self.D = np.array([0.0, 0.0, 0.0, 0.0, 0.0]) + else: + self.D = np.array([-2.70136325e-01, 1.64393255e-01, -1.60720782e-03, -7.41246708e-05, + -6.19939758e-02]) + + self.T_l2v = np.array([[0.99939639, 0.02547917, 0.023615, 1.1], + [-0.02530848, 0.99965156, -0.00749882, 0.03773583], + [-0.02379784, 0.00689664, 0.999693, 1.95320223], + [0., 0., 0., 1.]]) + if self.camera_front: + self.T_l2c = np.array([ + [0.001090, -0.999489, -0.031941, 0.149698], + [-0.007664, 0.031932, -0.999461, -0.397813], + [0.999970, 0.001334, -0.007625, -0.691405], + [0., 0., 0., 1.000000] + ]) + else: + self.T_l2c = np.array([[-0.71836368, -0.69527204, -0.02346088, 0.05718003], + [-0.09720448, 0.13371206, -0.98624154, -0.1598301], + [0.68884317, -0.7061996, -0.16363744, -1.04767285], + [0., 0., 0., 1.]] + ) + self.T_c2l = np.linalg.inv(self.T_l2c) + self.R_c2l = self.T_c2l[:3, :3] + self.camera_origin_in_lidar = self.T_c2l[:3, 3] + + def synchronized_callback(self, image_msg, lidar_msg): + step1 = time.time() + try: + self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8") + except Exception as e: + rospy.logerr("Failed to convert image: {}".format(e)) + self.latest_image = None + step2 = time.time() + self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False) + step3 = time.time() + print('image callback: ', step2 - step1, 'lidar callback ', step3 - step2) + + def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: + downsample = False + if self.latest_image is None or self.latest_lidar is None: + return {} + + # Ensure data/ exists and build timestamp + os.makedirs("data", exist_ok=True) + current_time = self.vehicle_interface.time() + if self.start_time is None: + self.start_time = current_time + time_elapsed = current_time - self.start_time + + if self.save_data: + tstamp = int(self.vehicle_interface.time() * 1000) + # 1) Dump raw image + cv2.imwrite(f"data/{tstamp}_image.png", self.latest_image) + # 2) Dump raw LiDAR + np.savez(f"data/{tstamp}_lidar.npz", lidar=self.latest_lidar) + # 3) Write BEFORE_TRANSFORM + with open(f"data/{tstamp}_vehstate.txt", "w") as f: + vp = vehicle.pose + f.write( + f"BEFORE_TRANSFORM " + f"x={vp.x:.3f}, y={vp.y:.3f}, z={vp.z:.3f}, " + f"yaw={vp.yaw:.2f}, pitch={vp.pitch:.2f}, roll={vp.roll:.2f}\n" + ) + # Compute vehicle_start_pose in either START or CURRENT + if self.use_start_frame: + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose + vehicle_start_pose = vehicle.pose.to_frame( + ObjectFrameEnum.START, + vehicle.pose, + self.start_pose_abs + ) + mode = "START" + else: + vehicle_start_pose = vehicle.pose + mode = "CURRENT" + with open(f"data/{tstamp}_vehstate.txt", "a") as f: + f.write( + f"AFTER_TRANSFORM " + f"x={vehicle_start_pose.x:.3f}, " + f"y={vehicle_start_pose.y:.3f}, " + f"z={vehicle_start_pose.z:.3f}, " + f"yaw={vehicle_start_pose.yaw:.2f}, " + f"pitch={vehicle_start_pose.pitch:.2f}, " + f"roll={vehicle_start_pose.roll:.2f}, " + f"frame={mode}\n" + ) + + undistorted_img, current_K = undistort_image(self.latest_image, self.K, self.D) + self.current_K = current_K + self.latest_image = undistorted_img + orig_H, orig_W = undistorted_img.shape[:2] + + # --- Begin modifications for three-angle detection --- + img_normal = undistorted_img + results_normal = self.detector(img_normal, conf=0.3, classes=[0]) + combined_boxes = [] + if self.orientation: + img_left = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_COUNTERCLOCKWISE) + img_right = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_CLOCKWISE) + results_left = self.detector(img_left, conf=0.3, classes=[0]) + results_right = self.detector(img_right, conf=0.3, classes=[0]) + boxes_left = np.array(results_left[0].boxes.xywh.cpu()) if len(results_left) > 0 else [] + boxes_right = np.array(results_right[0].boxes.xywh.cpu()) if len(results_right) > 0 else [] + for box in boxes_left: + cx, cy, w, h = box + new_cx = cy + new_cy = orig_W - 1 - cx + combined_boxes.append((new_cx, new_cy, h, w, AgentActivityEnum.RIGHT)) + for box in boxes_right: + cx, cy, w, h = box + new_cx = orig_H - 1 - cy + new_cy = cx + combined_boxes.append((new_cx, new_cy, h, w, AgentActivityEnum.LEFT)) + + boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else [] + for box in boxes_normal: + cx, cy, w, h = box + combined_boxes.append((cx, cy, w, h, AgentActivityEnum.STANDING)) + + if getattr(self, 'visualize_2d', False): + for (cx, cy, w, h, activity) in combined_boxes: + left = int(cx - w / 2) + right = int(cx + w / 2) + top = int(cy - h / 2) + bottom = int(cy + h / 2) + if activity == AgentActivityEnum.STANDING: + color = (255, 0, 0) + label = "STANDING" + elif activity == AgentActivityEnum.RIGHT: + color = (0, 255, 0) + label = "RIGHT" + elif activity == AgentActivityEnum.LEFT: + color = (0, 0, 255) + label = "LEFT" + else: + color = (255, 255, 255) + label = "UNKNOWN" + cv2.rectangle(undistorted_img, (left, top), (right, bottom), color, 2) + cv2.putText(undistorted_img, label, (left, max(top - 5, 20)), + cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2) + cv2.imshow("Detection - Cone 2D", undistorted_img) + + if downsample: + lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) + else: + lidar_down = self.latest_lidar.copy() + + pts_cam = transform_points_l2c(lidar_down, self.T_l2c) + projected_pts = project_points(pts_cam, self.current_K, lidar_down) + + agents = {} + + for i, box_info in enumerate(combined_boxes): + cx, cy, w, h, activity = box_info + start = time.time() + left = int(cx - w / 2) + right = int(cx + w / 2) + top = int(cy - h / 2) + bottom = int(cy + h / 2) + mask = (projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & \ + (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom) + roi_pts = projected_pts[mask] + if roi_pts.shape[0] < 5: + continue + points_3d = roi_pts[:, 2:5] + points_3d = filter_points_within_threshold(points_3d, 30) + points_3d = filter_depth_points(points_3d, max_depth_diff=0.3) + + if self.use_cyl_roi: + global_filtered = filter_points_within_threshold(lidar_down, 30) + roi_cyl = cylindrical_roi(global_filtered, np.mean(points_3d, axis=0), radius=0.3, height=1.2) + refined_cluster = remove_ground_by_min_range(roi_cyl, z_range=0.01) + refined_cluster = filter_depth_points(refined_cluster, max_depth_diff=0.2) + else: + refined_cluster = remove_ground_by_min_range(points_3d, z_range=0.05) + end1 = time.time() + if refined_cluster.shape[0] < 4: + continue + + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(refined_cluster) + obb = pcd.get_oriented_bounding_box() + refined_center = obb.center + dims = tuple(obb.extent) + R_lidar = obb.R.copy() + end2 = time.time() + + refined_center_hom = np.append(refined_center, 1) + refined_center_vehicle_hom = self.T_l2v @ refined_center_hom + refined_center_vehicle = refined_center_vehicle_hom[:3] + + R_vehicle = self.T_l2v[:3, :3] @ R_lidar + yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) + refined_center = refined_center_vehicle + + if self.use_start_frame: + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose + vehicle_start_pose = vehicle.pose.to_frame( + ObjectFrameEnum.START, + vehicle.pose, + self.start_pose_abs + ) + T_vehicle_to_start = pose_to_matrix(vehicle_start_pose) + xp, yp, zp = (T_vehicle_to_start @ np.append(refined_center, 1))[:3] + out_frame = ObjectFrameEnum.START + else: + xp, yp, zp = refined_center + out_frame = ObjectFrameEnum.CURRENT + + new_pose = ObjectPose( + t=current_time, + x=xp, y=yp, z=zp, + yaw=yaw, pitch=pitch, roll=roll, + frame=out_frame + ) + + # --- Optional tracking --- + if self.enable_tracking: + existing_id = match_existing_cone( + np.array([new_pose.x, new_pose.y, new_pose.z]), + dims, + self.tracked_agents, + distance_threshold=2.0 + ) + if existing_id is not None: + old_state = self.tracked_agents[existing_id] + if vehicle.v < 0.1: + alpha = 0.1 + avg_x = alpha * new_pose.x + (1 - alpha) * old_state.pose.x + avg_y = alpha * new_pose.y + (1 - alpha) * old_state.pose.y + avg_z = alpha * new_pose.z + (1 - alpha) * old_state.pose.z + avg_yaw = alpha * new_pose.yaw + (1 - alpha) * old_state.pose.yaw + avg_pitch = alpha * new_pose.pitch + (1 - alpha) * old_state.pose.pitch + avg_roll = alpha * new_pose.roll + (1 - alpha) * old_state.pose.roll + + updated_pose = ObjectPose( + t=new_pose.t, + x=avg_x, + y=avg_y, + z=avg_z, + yaw=avg_yaw, + pitch=avg_pitch, + roll=avg_roll, + frame=new_pose.frame + ) + updated_agent = AgentState( + pose=updated_pose, + dimensions=dims, + outline=None, + type=AgentEnum.CONE, + activity=activity, + velocity=(0, 0, 0), + yaw_rate=0 + ) + else: + updated_agent = old_state + agents[existing_id] = updated_agent + self.tracked_agents[existing_id] = updated_agent + else: + agent_id = f"Cone{self.cone_counter}" + self.cone_counter += 1 + new_agent = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.CONE, + activity=activity, + velocity=(0, 0, 0), + yaw_rate=0 + ) + agents[agent_id] = new_agent + self.tracked_agents[agent_id] = new_agent + else: + agent_id = f"Cone{self.cone_counter}" + self.cone_counter += 1 + new_agent = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.CONE, + activity=activity, + velocity=(0, 0, 0), + yaw_rate=0 + ) + agents[agent_id] = new_agent + + self.current_agents = agents + + # If tracking not enabled, return only current frame detections + if not self.enable_tracking: + return self.current_agents + + stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items() + if current_time - agent.pose.t > 5.0] + for agent_id in stale_ids: + rospy.loginfo(f"Removing stale agent: {agent_id}\n") + del self.tracked_agents[agent_id] + + return self.tracked_agents + + # ----- Fake Cone Detector 2D (for Testing Purposes) ----- + +class FakConeDetector(Component): + def __init__(self, vehicle_interface: GEMInterface): + self.vehicle_interface = vehicle_interface + self.times = [(5.0, 20.0), (30.0, 35.0)] + self.t_start = None + + def rate(self): + return 4.0 + + def state_inputs(self): + return ['vehicle'] + + def state_outputs(self): + return ['agents'] + + def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: + if self.t_start is None: + self.t_start = self.vehicle_interface.time() + t = self.vehicle_interface.time() - self.t_start + res = {} + for time_range in self.times: + if t >= time_range[0] and t <= time_range[1]: + res['cone0'] = box_to_fake_agent((0, 0, 0, 0)) + rospy.loginfo("Detected a Cone (simulated)") + return res + +def box_to_fake_agent(box): + x, y, w, h = box + pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT) + dims = (w, h, 0) + return AgentState(pose=pose, dimensions=dims, outline=None, + type=AgentEnum.CONE, activity=AgentActivityEnum.MOVING, + velocity=(0, 0, 0), yaw_rate=0) + +if __name__ == '__main__': + pass From 0b84d288a422dade88ece3cc447a0fcbef059181 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Wed, 23 Apr 2025 10:40:41 -0500 Subject: [PATCH 02/28] Create cone_detection.yaml --- launch/cone_detection.yaml | 119 +++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 launch/cone_detection.yaml diff --git a/launch/cone_detection.yaml b/launch/cone_detection.yaml new file mode 100644 index 000000000..165a5b9b8 --- /dev/null +++ b/launch/cone_detection.yaml @@ -0,0 +1,119 @@ +description: "Run the yielding trajectory planner on the real vehicle with real perception" +mode: hardware +vehicle_interface: gem_hardware.GEMHardwareInterface +mission_execution: StandardExecutor + +# Recovery behavior after a component failure +recovery: + planning: + trajectory_tracking : recovery.StopTrajectoryTracker + +# Driving behavior for the GEM vehicle. Runs real pedestrian perception, yield planner, but does not send commands to real vehicle. +drive: + perception: + state_estimation : GNSSStateEstimator + agent_detection : cone_detection.ConeDetector3D + perception_normalization : StandardPerceptionNormalizer + planning: + relations_estimation: + type: pedestrian_yield_logic.PedestrianYielder + args: + mode: 'real' + params: { + 'yielder': 'expert', # 'expert', 'analytic', or 'simulation' + 'planner': 'milestone', # 'milestone', 'dt', or 'dx' + 'desired_speed': 1.0, # m/s, 1.5 m/s seems max speed? Feb24 + 'acceleration': 0.75 # m/s2, 0.5 is not enough to start moving. Feb24 + } + route_planning: + type: StaticRoutePlanner + args: [!relative_path '../GEMstack/knowledge/routes/forward_15m.csv','start'] + motion_planning: longitudinal_planning.YieldTrajectoryPlanner + trajectory_tracking: + type: pure_pursuit.PurePursuitTrajectoryTracker + print: False + + +log: + # Specify the top-level folder to save the log files. Default is 'logs' + #folder : 'logs' + # If prefix is specified, then the log folder will be named with the prefix followed by the date and time. Default no prefix + #prefix : 'fixed_route_' + # If suffix is specified, then logs will output to folder/prefix+suffix. Default uses date and time as the suffix + #suffix : 'test3' + # Specify which ros topics to record to vehicle.bag. Default records nothing. This records the "standard" ROS topics. + ros_topics : + # Specify options to pass to rosbag record. Default is no options. + #rosbag_options : '--split --size=1024' + # If True, then record all readings / commands of the vehicle interface. Default False + vehicle_interface : True + # Specify which components to record to behavior.json. Default records nothing + components : ['state_estimation','agent_detection','motion_planning'] + # Specify which components of state to record to state.json. Default records nothing + #state: ['all'] + # Specify the rate in Hz at which to record state to state.json. Default records at the pipeline's rate + #state_rate: 10 + # If True, then make plots of the recorded data specifically from PurePursuitTrajectoryTracker_debug.csv and report metrics. Default False + #auto_plot : True +replay: # Add items here to set certain topics / inputs to be replayed from logs + # Specify which log folder to replay from + log: + ros_topics : [] + components : [] + +#usually can keep this constant +computation_graph: !include "../GEMstack/knowledge/defaults/computation_graph.yaml" + +variants: + detector_only: + run: + description: "Run the pedestrian detection code" + drive: + planning: + trajectory_tracking: + real_sim: + run: + description: "Run the pedestrian detection code with real detection and fake simulation" + mode: hardware + vehicle_interface: + type: gem_mixed.GEMRealSensorsWithSimMotionInterface + args: + scene: !relative_path '../scenes/xyhead_demo.yaml' + mission_execution: StandardExecutor + require_engaged: False + visualization: !include "klampt_visualization.yaml" + drive: + perception: + state_estimation : OmniscientStateEstimator + planning: + relations_estimation: + type: pedestrian_yield_logic.PedestrianYielder + args: + mode: 'sim' + + fake_real: + run: + description: "Run the yielding trajectory planner on the real vehicle with faked perception" + drive: + perception: + agent_detection : cone_detection.FakeConeDetector2D + + fake_sim: + run: + description: "Run the yielding trajectory planner in simulation with faked perception" + mode: simulation + vehicle_interface: + type: gem_simulator.GEMDoubleIntegratorSimulationInterface + args: + scene: !relative_path '../scenes/xyhead_demo.yaml' + visualization: !include "klampt_visualization.yaml" + drive: + perception: + # agent_detection : pedestrian_detection.FakePedestrianDetector2D + agent_detection : OmniscientAgentDetector #this option reads agents from the simulator + state_estimation : OmniscientStateEstimator + planning: + relations_estimation: + type: pedestrian_yield_logic.PedestrianYielder + args: + mode: 'sim' From ec41cf7187d9f57939f5a1fe3ddb7c5049d7da36 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Wed, 23 Apr 2025 10:44:36 -0500 Subject: [PATCH 03/28] Update agent.py --- GEMstack/state/agent.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/GEMstack/state/agent.py b/GEMstack/state/agent.py index 757805837..2ca23ba76 100644 --- a/GEMstack/state/agent.py +++ b/GEMstack/state/agent.py @@ -11,6 +11,7 @@ class AgentEnum(Enum): LARGE_TRUCK = 2 PEDESTRIAN = 3 BICYCLIST = 4 + CONE = 5 class AgentActivityEnum(Enum): @@ -18,6 +19,9 @@ class AgentActivityEnum(Enum): MOVING = 1 # standard motion. Predictions will be used here FAST = 2 # indicates faster than usual motion, e.g., runners. UNDETERMINED = 3 # unknown activity + STANDING = 4 # standing cone + LEFT = 5 # flipped cone facing left + RIGHT = 6 # flipped cone facing right @dataclass @@ -41,4 +45,4 @@ def velocity_parent(self) -> Tuple[float,float,float]: def to_frame(self, frame : ObjectFrameEnum, current_pose = None, start_pose_abs = None) -> AgentState: newpose = self.pose.to_frame(frame,current_pose,start_pose_abs) newvelocity = convert_vector(self.velocity,self.pose.frame,frame,current_pose,start_pose_abs) - return replace(self,pose=newpose,velocity=newvelocity) \ No newline at end of file + return replace(self,pose=newpose,velocity=newvelocity) From fa9ef66753827d9ee1766e0f40a4bfe763f0fa86 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Wed, 30 Apr 2025 22:30:34 -0500 Subject: [PATCH 04/28] Create cameras.yaml --- GEMstack/knowledge/calibration/cameras.yaml | 26 +++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 GEMstack/knowledge/calibration/cameras.yaml diff --git a/GEMstack/knowledge/calibration/cameras.yaml b/GEMstack/knowledge/calibration/cameras.yaml new file mode 100644 index 000000000..e240957aa --- /dev/null +++ b/GEMstack/knowledge/calibration/cameras.yaml @@ -0,0 +1,26 @@ +cameras: + front: + K: + - [684.83331299, 0.0, 573.37109375] + - [0.0, 684.60968018, 363.70092773] + - [0.0, 0.0, 1.0] + D: [0.0, 0.0, 0.0, 0.0, 0.0] + T_l2c: + - [ 0.0289748006, -0.999580136, 0.0000368439, -0.0307300513] + - [-0.0094993062, -0.0003122155, -0.999954834, -0.386689354 ] + - [ 0.999534999, 0.0289731321, -0.0095043721, -0.671425124 ] + - [ 0.0, 0.0, 0.0, 1.0 ] + + front_right: + K: + - [1176.25545, 0.0, 966.432645] + - [0.0, 1175.14569, 608.580326] + - [0.0, 0.0, 1.0 ] + D: [-0.270136325, 0.164393255, -0.00160720782, -0.0000741246708, -0.0619939758] + T_l2c: + - [-0.71836368, -0.69527204, -0.02346088, 0.05718003] + - [-0.09720448, 0.13371206, -0.98624154, -0.15983010] + - [ 0.68884317, -0.70619960, -0.16363744, -1.04767285] + - [ 0.0, 0.0, 0.0, 1.0 ] + + # add other cameras (back_left, front_right, back_right) similarly... From 0136e45152c5d74b4fb1a36ba1efe38bbd4b9b3f Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Wed, 30 Apr 2025 22:33:52 -0500 Subject: [PATCH 05/28] Update cone_detection.yaml --- launch/cone_detection.yaml | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/launch/cone_detection.yaml b/launch/cone_detection.yaml index 165a5b9b8..f4d2e21e2 100644 --- a/launch/cone_detection.yaml +++ b/launch/cone_detection.yaml @@ -9,10 +9,23 @@ recovery: trajectory_tracking : recovery.StopTrajectoryTracker # Driving behavior for the GEM vehicle. Runs real pedestrian perception, yield planner, but does not send commands to real vehicle. -drive: +drive: perception: state_estimation : GNSSStateEstimator - agent_detection : cone_detection.ConeDetector3D + agent_detection : + type: cone_detection.ConeDetector3D + args: + camera_name: front_right #[front, front_right] + camera_calib_file: ./GEMstack/knowledge/calibration/cameras.yaml + + # optional overrides + enable_tracking: False # True if you want ot enable tracking function + visualize_2d: False # True if you see 2D detection visualization + use_cyl_roi: False # True if you want to use a cylinder roi (no need to use if calibration is accurate) + save_data: False # True if you want to save the sensor input data + orientation: False # True if you need to detect flipped cones + use_start_frame: False # True if you need output in START frame, otherwise in CURRENT frame + perception_normalization : StandardPerceptionNormalizer planning: relations_estimation: From 4e252c6a58661a9f6001d2869368653e4f6902e0 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Wed, 30 Apr 2025 22:34:31 -0500 Subject: [PATCH 06/28] Update cone_detection.py --- GEMstack/onboard/perception/cone_detection.py | 263 ++++++++++++------ 1 file changed, 171 insertions(+), 92 deletions(-) diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py index d41c527ae..81ddce240 100644 --- a/GEMstack/onboard/perception/cone_detection.py +++ b/GEMstack/onboard/perception/cone_detection.py @@ -18,6 +18,7 @@ import math import ros_numpy import os +import yaml # ----- Helper Functions ----- @@ -29,12 +30,6 @@ def cylindrical_roi(points, center, radius, height): return points[mask] -def undistort_image(image, K, D): - h, w = image.shape[:2] - newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) - undistorted = cv2.undistort(image, K, D, None, newK) - return undistorted, newK - def filter_points_within_threshold(points, threshold=15.0): distances = np.linalg.norm(points, axis=1) mask = distances <= threshold @@ -275,28 +270,79 @@ class ConeDetector3D(Component): Tracking is optional: set `enable_tracking=False` to disable persistent tracking and return only detections from the current frame. + + Supports multiple cameras; each camera’s intrinsics and extrinsics are + loaded from a single YAML calibration file via plain PyYAML. """ - def __init__(self, vehicle_interface: GEMInterface): - self.vehicle_interface = vehicle_interface - self.enable_tracking = False - self.current_agents = {} - self.tracked_agents = {} - self.cone_counter = 0 - self.latest_image = None - self.latest_lidar = None - self.bridge = CvBridge() - self.start_pose_abs = None - self.camera_front = True - self.visualize_2d = False - self.use_cyl_roi = False - self.start_time = None - self.use_start_frame = False - self.save_data = False - self.orientation = False + def __init__( + self, + vehicle_interface: GEMInterface, + camera_name: str, + camera_calib_file: str, + enable_tracking: bool = True, + visualize_2d: bool = False, + use_cyl_roi: bool = False, + T_l2v: list = None, + save_data: bool = True, + orientation: bool = True, + use_start_frame: bool = True, + **kwargs + ): + # Core interfaces and state + self.vehicle_interface = vehicle_interface + self.current_agents = {} + self.tracked_agents = {} + self.cone_counter = 0 + self.latest_image = None + self.latest_lidar = None + self.bridge = CvBridge() + self.start_pose_abs = None + self.start_time = None + + # Config flags + self.camera_name = camera_name + self.enable_tracking = enable_tracking + self.visualize_2d = visualize_2d + self.use_cyl_roi = use_cyl_roi + self.save_data = save_data + self.orientation = orientation + self.use_start_frame = use_start_frame + + # 1) Load lidar→vehicle transform + if T_l2v is not None: + self.T_l2v = np.array(T_l2v) + else: + self.T_l2v = np.array([ + [0.99939639, 0.02547917, 0.023615, 1.1], + [-0.02530848, 0.99965156, -0.00749882, 0.03773583], + [-0.02379784, 0.00689664, 0.999693, 1.95320223], + [0.0, 0.0, 0.0, 1.0] + ]) + + # 2) Load camera intrinsics/extrinsics from the supplied YAML + with open(camera_calib_file, 'r') as f: + calib = yaml.safe_load(f) + + # Expect structure: + # cameras: + # front: + # K: [[...], [...], [...]] + # D: [...] + # T_l2c: [[...], ..., [...]] + cam_cfg = calib['cameras'][camera_name] + self.K = np.array(cam_cfg['K']) + self.D = np.array(cam_cfg['D']) + self.T_l2c = np.array(cam_cfg['T_l2c']) + + # Derived transforms + + self.undistort_map1 = None + self.undistort_map2 = None + self.camera_front = (camera_name=='front') def rate(self) -> float: - return 4.0 + return 8 def state_inputs(self) -> list: return ['vehicle'] @@ -305,48 +351,30 @@ def state_outputs(self) -> list: return ['agents'] def initialize(self): - self.rgb_sub = Subscriber('/oak/rgb/image_raw', Image) + # --- Determine the correct RGB topic for this camera --- + rgb_topic_map = { + 'front': '/oak/rgb/image_raw', + 'front_right': '/camera_fr/arena_camera_node/image_raw', + # add additional camera mappings here if needed + } + rgb_topic = rgb_topic_map.get( + self.camera_name, + f'/{self.camera_name}/rgb/image_raw' + ) + + # Subscribe to the RGB and LiDAR streams + self.rgb_sub = Subscriber(rgb_topic, Image) self.lidar_sub = Subscriber('/ouster/points', PointCloud2) - self.sync = ApproximateTimeSynchronizer([self.rgb_sub, self.lidar_sub], - queue_size=10, slop=0.1) + self.sync = ApproximateTimeSynchronizer([ + self.rgb_sub, self.lidar_sub + ], queue_size=200, slop=0.1) self.sync.registerCallback(self.synchronized_callback) - self.detector = YOLO('/home/gem/s2025_perception_merge/GEMstack/GEMstack/knowledge/detection/cone.pt') - self.detector.to('cuda') - - if self.camera_front: - self.K = np.array([[684.83331299, 0., 573.37109375], - [0., 684.60968018, 363.70092773], - [0., 0., 1.]]) - else: - self.K = np.array([[1.17625545e+03, 0.00000000e+00, 9.66432645e+02], - [0.00000000e+00, 1.17514569e+03, 6.08580326e+02], - [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]]) - if self.camera_front: - self.D = np.array([0.0, 0.0, 0.0, 0.0, 0.0]) - else: - self.D = np.array([-2.70136325e-01, 1.64393255e-01, -1.60720782e-03, -7.41246708e-05, - -6.19939758e-02]) - - self.T_l2v = np.array([[0.99939639, 0.02547917, 0.023615, 1.1], - [-0.02530848, 0.99965156, -0.00749882, 0.03773583], - [-0.02379784, 0.00689664, 0.999693, 1.95320223], - [0., 0., 0., 1.]]) - if self.camera_front: - self.T_l2c = np.array([ - [0.001090, -0.999489, -0.031941, 0.149698], - [-0.007664, 0.031932, -0.999461, -0.397813], - [0.999970, 0.001334, -0.007625, -0.691405], - [0., 0., 0., 1.000000] - ]) - else: - self.T_l2c = np.array([[-0.71836368, -0.69527204, -0.02346088, 0.05718003], - [-0.09720448, 0.13371206, -0.98624154, -0.1598301], - [0.68884317, -0.7061996, -0.16363744, -1.04767285], - [0., 0., 0., 1.]] - ) - self.T_c2l = np.linalg.inv(self.T_l2c) - self.R_c2l = self.T_c2l[:3, :3] + # Initialize the YOLO detector + self.detector = YOLO('GEMstack/knowledge/detection/cone.pt') + self.detector.to('cuda') + self.T_c2l = np.linalg.inv(self.T_l2c) + self.R_c2l = self.T_c2l[:3, :3] self.camera_origin_in_lidar = self.T_c2l[:3, 3] def synchronized_callback(self, image_msg, lidar_msg): @@ -359,24 +387,44 @@ def synchronized_callback(self, image_msg, lidar_msg): step2 = time.time() self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False) step3 = time.time() - print('image callback: ', step2 - step1, 'lidar callback ', step3 - step2) + # print('image callback: ', step2 - step1, 'lidar callback ', step3 - step2) + + def undistort_image(self, image, K, D): + h, w = image.shape[:2] + newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) + if self.undistort_map1 is None or self.undistort_map2 is None: + self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap(K, D, R=None, + newCameraMatrix=newK, size=(w, h), + m1type=cv2.CV_32FC1) + + start = time.time() + undistorted = cv2.remap(image, self.undistort_map1, self.undistort_map2, interpolation=cv2.INTER_NEAREST) + end = time.time() + # print('--------undistort', end-start) + return undistorted, newK def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: + + start = time.time() downsample = False if self.latest_image is None or self.latest_lidar is None: return {} - + lastest_image = self.latest_image.copy() # Ensure data/ exists and build timestamp - os.makedirs("data", exist_ok=True) + if downsample: + lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) + else: + lidar_down = self.latest_lidar.copy() current_time = self.vehicle_interface.time() if self.start_time is None: self.start_time = current_time time_elapsed = current_time - self.start_time if self.save_data: + os.makedirs("data", exist_ok=True) tstamp = int(self.vehicle_interface.time() * 1000) # 1) Dump raw image - cv2.imwrite(f"data/{tstamp}_image.png", self.latest_image) + cv2.imwrite(f"data/{tstamp}_image.png", lastest_image) # 2) Dump raw LiDAR np.savez(f"data/{tstamp}_lidar.npz", lidar=self.latest_lidar) # 3) Write BEFORE_TRANSFORM @@ -411,21 +459,30 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: f"roll={vehicle_start_pose.roll:.2f}, " f"frame={mode}\n" ) - - undistorted_img, current_K = undistort_image(self.latest_image, self.K, self.D) - self.current_K = current_K - self.latest_image = undistorted_img - orig_H, orig_W = undistorted_img.shape[:2] - - # --- Begin modifications for three-angle detection --- - img_normal = undistorted_img - results_normal = self.detector(img_normal, conf=0.3, classes=[0]) + if self.camera_front == False: + start = time.time() + undistorted_img, current_K = self.undistort_image(lastest_image, self.K, self.D) + end = time.time() + # print('-------processing time undistort_image---', end -start) + self.current_K = current_K + orig_H, orig_W = undistorted_img.shape[:2] + + # --- Begin modifications for three-angle detection --- + img_normal = undistorted_img + else: + img_normal = lastest_image.copy() + undistorted_img = lastest_image.copy() + orig_H, orig_W = lastest_image.shape[:2] + self.current_K = self.K + results_normal = self.detector(img_normal, conf=0.25, classes=[0]) combined_boxes = [] + if not self.enable_tracking: + self.cone_counter = 0 if self.orientation: img_left = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_COUNTERCLOCKWISE) img_right = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_CLOCKWISE) - results_left = self.detector(img_left, conf=0.3, classes=[0]) - results_right = self.detector(img_right, conf=0.3, classes=[0]) + results_left = self.detector(img_left, conf=0.05, classes=[0]) + results_right = self.detector(img_right, conf=0.05, classes=[0]) boxes_left = np.array(results_left[0].boxes.xywh.cpu()) if len(results_left) > 0 else [] boxes_right = np.array(results_right[0].boxes.xywh.cpu()) if len(results_right) > 0 else [] for box in boxes_left: @@ -467,19 +524,16 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2) cv2.imshow("Detection - Cone 2D", undistorted_img) - if downsample: - lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) - else: - lidar_down = self.latest_lidar.copy() - + start = time.time() pts_cam = transform_points_l2c(lidar_down, self.T_l2c) projected_pts = project_points(pts_cam, self.current_K, lidar_down) + end = time.time() + # print('-------processing time1---', end -start) agents = {} for i, box_info in enumerate(combined_boxes): cx, cy, w, h, activity = box_info - start = time.time() left = int(cx - w / 2) right = int(cx + w / 2) top = int(cy - h / 2) @@ -489,18 +543,19 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: roi_pts = projected_pts[mask] if roi_pts.shape[0] < 5: continue + points_3d = roi_pts[:, 2:5] points_3d = filter_points_within_threshold(points_3d, 30) points_3d = filter_depth_points(points_3d, max_depth_diff=0.3) if self.use_cyl_roi: global_filtered = filter_points_within_threshold(lidar_down, 30) - roi_cyl = cylindrical_roi(global_filtered, np.mean(points_3d, axis=0), radius=0.3, height=1.2) + roi_cyl = cylindrical_roi(global_filtered, np.mean(points_3d, axis=0), radius=0.4, height=1.2) refined_cluster = remove_ground_by_min_range(roi_cyl, z_range=0.01) - refined_cluster = filter_depth_points(refined_cluster, max_depth_diff=0.2) + refined_cluster = filter_depth_points(refined_cluster, max_depth_diff=0.3) else: - refined_cluster = remove_ground_by_min_range(points_3d, z_range=0.05) - end1 = time.time() + refined_cluster = points_3d.copy() + # end1 = time.time() if refined_cluster.shape[0] < 4: continue @@ -510,7 +565,6 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: refined_center = obb.center dims = tuple(obb.extent) R_lidar = obb.R.copy() - end2 = time.time() refined_center_hom = np.append(refined_center, 1) refined_center_vehicle_hom = self.T_l2v @ refined_center_hom @@ -552,7 +606,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: ) if existing_id is not None: old_state = self.tracked_agents[existing_id] - if vehicle.v < 0.1: + if vehicle.v < 100: alpha = 0.1 avg_x = alpha * new_pose.x + (1 - alpha) * old_state.pose.x avg_y = alpha * new_pose.y + (1 - alpha) * old_state.pose.y @@ -616,18 +670,41 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: # If tracking not enabled, return only current frame detections if not self.enable_tracking: + for agent_id, agent in self.current_agents.items(): + p = agent.pose + rospy.loginfo( + f"Agent ID: {agent_id}\n" + f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " + f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" + f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n" + f"type:{agent.activity}" + ) + end = time.time() + # print('-------processing time', end -start) return self.current_agents stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items() - if current_time - agent.pose.t > 5.0] + if current_time - agent.pose.t > 20.0] for agent_id in stale_ids: rospy.loginfo(f"Removing stale agent: {agent_id}\n") del self.tracked_agents[agent_id] - + if self.enable_tracking: + for agent_id, agent in self.tracked_agents.items(): + p = agent.pose + rospy.loginfo( + f"Agent ID: {agent_id}\n" + f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " + f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" + f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n" + f"type:{agent.activity}" + ) + end = time.time() + # print('-------processing time', end -start) return self.tracked_agents # ----- Fake Cone Detector 2D (for Testing Purposes) ----- + class FakConeDetector(Component): def __init__(self, vehicle_interface: GEMInterface): self.vehicle_interface = vehicle_interface @@ -654,6 +731,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: rospy.loginfo("Detected a Cone (simulated)") return res + def box_to_fake_agent(box): x, y, w, h = box pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT) @@ -662,5 +740,6 @@ def box_to_fake_agent(box): type=AgentEnum.CONE, activity=AgentActivityEnum.MOVING, velocity=(0, 0, 0), yaw_rate=0) + if __name__ == '__main__': pass From ec0a5cf5925324a89f253134efbde46ea3cfef8a Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Wed, 7 May 2025 10:26:05 -0500 Subject: [PATCH 07/28] Update cone_detection.py changed AgentState to ObstacleState, and also changed the corresponding yaml file --- GEMstack/onboard/perception/cone_detection.py | 483 +++++------------- 1 file changed, 127 insertions(+), 356 deletions(-) diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py index 81ddce240..b9fbc1e27 100644 --- a/GEMstack/onboard/perception/cone_detection.py +++ b/GEMstack/onboard/perception/cone_detection.py @@ -1,269 +1,22 @@ -from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum +from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, ObstacleState, ObstacleMaterialEnum, ObstacleStateEnum from ..interface.gem import GEMInterface from ..component import Component +from .perception_utils import * from ultralytics import YOLO import cv2 from typing import Dict import open3d as o3d import numpy as np -from sklearn.cluster import DBSCAN from scipy.spatial.transform import Rotation as R import rospy from sensor_msgs.msg import PointCloud2, Image -import sensor_msgs.point_cloud2 as pc2 -import struct, ctypes from message_filters import Subscriber, ApproximateTimeSynchronizer from cv_bridge import CvBridge import time -import math -import ros_numpy import os import yaml -# ----- Helper Functions ----- - -def cylindrical_roi(points, center, radius, height): - horizontal_dist = np.linalg.norm(points[:, :2] - center[:2], axis=1) - vertical_diff = np.abs(points[:, 2] - center[2]) - mask = (horizontal_dist <= radius) & (vertical_diff <= height / 2) - return points[mask] - - -def filter_points_within_threshold(points, threshold=15.0): - distances = np.linalg.norm(points, axis=1) - mask = distances <= threshold - return points[mask] - - -def match_existing_cone( - new_center: np.ndarray, - new_dims: tuple, - existing_agents: Dict[str, AgentState], - distance_threshold: float = 1.0 -) -> str: - """ - Find the closest existing Cone agent within a specified distance threshold. - """ - best_agent_id = None - best_dist = float('inf') - for agent_id, agent_state in existing_agents.items(): - old_center = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z]) - dist = np.linalg.norm(new_center - old_center) - if dist < distance_threshold and dist < best_dist: - best_dist = dist - best_agent_id = agent_id - return best_agent_id - - -def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple: - """ - Compute the (vx, vy, vz) velocity based on change in pose over time. - """ - if dt <= 0: - return (0, 0, 0) - vx = (new_pose.x - old_pose.x) / dt - vy = (new_pose.y - old_pose.y) / dt - vz = (new_pose.z - old_pose.z) / dt - return (vx, vy, vz) - - -def extract_roi_box(lidar_pc, center, half_extents): - """ - Extract a region of interest (ROI) from the LiDAR point cloud defined by an axis-aligned bounding box. - """ - lower = center - half_extents - upper = center + half_extents - mask = np.all((lidar_pc >= lower) & (lidar_pc <= upper), axis=1) - return lidar_pc[mask] - - -def pc2_to_numpy(pc2_msg, want_rgb=False): - """ - Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy. - This function extracts the x, y, z coordinates from the point cloud. - """ - # Convert the ROS message to a numpy structured array - pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg) - # Stack x,y,z fields to a (N,3) array - pts = np.stack((np.array(pc['x']).ravel(), - np.array(pc['y']).ravel(), - np.array(pc['z']).ravel()), axis=1) - # Apply filtering (for example, x > 0 and z in a specified range) - mask = (pts[:, 0] > -0.5) & (pts[:, 2] < -1) & (pts[:, 2] > -2.7) - return pts[mask] - - -def backproject_pixel(u, v, K): - """ - Backprojects a pixel coordinate (u, v) into a normalized 3D ray in the camera coordinate system. - """ - cx, cy = K[0, 2], K[1, 2] - fx, fy = K[0, 0], K[1, 1] - x = (u - cx) / fx - y = (v - cy) / fy - ray_dir = np.array([x, y, 1.0]) - return ray_dir / np.linalg.norm(ray_dir) - - -def find_human_center_on_ray(lidar_pc, ray_origin, ray_direction, - t_min, t_max, t_step, - distance_threshold, min_points, ransac_threshold): - """ - Identify the center of a human along a projected ray. - (This function is no longer used in the new approach.) - """ - return None, None, None - - -def extract_roi(pc, center, roi_radius): - """ - Extract points from a point cloud that lie within a specified radius of a center point. - """ - distances = np.linalg.norm(pc - center, axis=1) - return pc[distances < roi_radius] - - -def refine_cluster(roi_points, center, eps=0.2, min_samples=10): - """ - Refine a point cluster by applying DBSCAN and return the cluster closest to 'center'. - """ - if roi_points.shape[0] < min_samples: - return roi_points - clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(roi_points) - labels = clustering.labels_ - valid_clusters = [roi_points[labels == l] for l in set(labels) if l != -1] - if not valid_clusters: - return roi_points - best_cluster = min(valid_clusters, key=lambda c: np.linalg.norm(np.mean(c, axis=0) - center)) - return best_cluster - - -def remove_ground_by_min_range(cluster, z_range=0.05): - """ - Remove points within z_range of the minimum z (assumed to be ground). - """ - if cluster is None or cluster.shape[0] == 0: - return cluster - min_z = np.min(cluster[:, 2]) - filtered = cluster[cluster[:, 2] > (min_z + z_range)] - return filtered - - -def get_bounding_box_center_and_dimensions(points): - """ - Calculate the axis-aligned bounding box's center and dimensions for a set of 3D points. - """ - if points.shape[0] == 0: - return None, None - min_vals = np.min(points, axis=0) - max_vals = np.max(points, axis=0) - center = (min_vals + max_vals) / 2 - dimensions = max_vals - min_vals - return center, dimensions - - -def create_ray_line_set(start, end): - """ - Create an Open3D LineSet object representing a ray between two 3D points. - The line is colored yellow. - """ - points = [start, end] - lines = [[0, 1]] - line_set = o3d.geometry.LineSet() - line_set.points = o3d.utility.Vector3dVector(points) - line_set.lines = o3d.utility.Vector2iVector(lines) - line_set.colors = o3d.utility.Vector3dVector([[1, 1, 0]]) - return line_set - - -def downsample_points(lidar_points, voxel_size=0.15): - pcd = o3d.geometry.PointCloud() - pcd.points = o3d.utility.Vector3dVector(lidar_points) - down_pcd = pcd.voxel_down_sample(voxel_size=voxel_size) - return np.asarray(down_pcd.points) - - -def filter_depth_points(lidar_points, max_depth_diff=0.9, use_norm=True): - if lidar_points.shape[0] == 0: - return lidar_points - - if use_norm: - depths = np.linalg.norm(lidar_points, axis=1) - else: - depths = lidar_points[:, 0] - - min_depth = np.min(depths) - max_possible_depth = min_depth + max_depth_diff - mask = depths < max_possible_depth - return lidar_points[mask] - - -def visualize_geometries(geometries, window_name="Open3D", width=800, height=600, point_size=5.0): - """ - Visualize a list of Open3D geometry objects in a dedicated window. - """ - vis = o3d.visualization.Visualizer() - vis.create_window(window_name=window_name, width=width, height=height) - for geom in geometries: - vis.add_geometry(geom) - opt = vis.get_render_option() - opt.point_size = point_size - vis.run() - vis.destroy_window() - - -def pose_to_matrix(pose): - """ - Compose a 4x4 transformation matrix from a pose state. - Assumes pose has attributes: x, y, z, yaw, pitch, roll, - where the angles are given in degrees. - """ - x = pose.x if pose.x is not None else 0.0 - y = pose.y if pose.y is not None else 0.0 - z = pose.z if pose.z is not None else 0.0 - if pose.yaw is not None and pose.pitch is not None and pose.roll is not None: - yaw = math.radians(pose.yaw) - pitch = math.radians(pose.pitch) - roll = math.radians(pose.roll) - else: - yaw = 0.0 - pitch = 0.0 - roll = 0.0 - R_mat = R.from_euler('zyx', [yaw, pitch, roll]).as_matrix() - T = np.eye(4) - T[:3, :3] = R_mat - T[:3, 3] = np.array([x, y, z]) - return T - - -def transform_points_l2c(lidar_points, T_l2c): - N = lidar_points.shape[0] - pts_hom = np.hstack((lidar_points, np.ones((N, 1)))) # (N,4) - pts_cam = (T_l2c @ pts_hom.T).T # (N,4) - return pts_cam[:, :3] - - -# ----- New: Vectorized projection function ----- -def project_points(pts_cam, K, original_lidar_points): - """ - Vectorized version. - pts_cam: (N,3) array of points in camera coordinates. - original_lidar_points: (N,3) array of points in LiDAR coordinates. - Returns a (M,5) array: [u, v, X_lidar, Y_lidar, Z_lidar] for all points with Z>0. - """ - mask = pts_cam[:, 2] > 0 - pts_cam_valid = pts_cam[mask] - lidar_valid = original_lidar_points[mask] - Xc = pts_cam_valid[:, 0] - Yc = pts_cam_valid[:, 1] - Zc = pts_cam_valid[:, 2] - u = (K[0, 0] * (Xc / Zc) + K[0, 2]).astype(np.int32) - v = (K[1, 1] * (Yc / Zc) + K[1, 2]).astype(np.int32) - proj = np.column_stack((u, v, lidar_valid)) - return proj - - class ConeDetector3D(Component): """ Detects cones by fusing YOLO 2D detections with LiDAR point cloud data. @@ -291,8 +44,8 @@ def __init__( ): # Core interfaces and state self.vehicle_interface = vehicle_interface - self.current_agents = {} - self.tracked_agents = {} + self.current_obstacles = {} + self.tracked_obstacles = {} self.cone_counter = 0 self.latest_image = None self.latest_lidar = None @@ -348,7 +101,7 @@ def state_inputs(self) -> list: return ['vehicle'] def state_outputs(self) -> list: - return ['agents'] + return ['obstacles'] def initialize(self): # --- Determine the correct RGB topic for this camera --- @@ -367,7 +120,7 @@ def initialize(self): self.lidar_sub = Subscriber('/ouster/points', PointCloud2) self.sync = ApproximateTimeSynchronizer([ self.rgb_sub, self.lidar_sub - ], queue_size=200, slop=0.1) + ], queue_size=500, slop=0.05) self.sync.registerCallback(self.synchronized_callback) # Initialize the YOLO detector @@ -403,65 +156,33 @@ def undistort_image(self, image, K, D): # print('--------undistort', end-start) return undistorted, newK - def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: - - start = time.time() + def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: downsample = False + # Gate guards against data not being present for both sensors: if self.latest_image is None or self.latest_lidar is None: return {} - lastest_image = self.latest_image.copy() - # Ensure data/ exists and build timestamp + latest_image = self.latest_image.copy() + + # Set up current time variables + start = time.time() + current_time = self.vehicle_interface.time() + if downsample: lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) else: lidar_down = self.latest_lidar.copy() - current_time = self.vehicle_interface.time() + if self.start_time is None: self.start_time = current_time time_elapsed = current_time - self.start_time + # Ensure data/ exists and build timestamp if self.save_data: - os.makedirs("data", exist_ok=True) - tstamp = int(self.vehicle_interface.time() * 1000) - # 1) Dump raw image - cv2.imwrite(f"data/{tstamp}_image.png", lastest_image) - # 2) Dump raw LiDAR - np.savez(f"data/{tstamp}_lidar.npz", lidar=self.latest_lidar) - # 3) Write BEFORE_TRANSFORM - with open(f"data/{tstamp}_vehstate.txt", "w") as f: - vp = vehicle.pose - f.write( - f"BEFORE_TRANSFORM " - f"x={vp.x:.3f}, y={vp.y:.3f}, z={vp.z:.3f}, " - f"yaw={vp.yaw:.2f}, pitch={vp.pitch:.2f}, roll={vp.roll:.2f}\n" - ) - # Compute vehicle_start_pose in either START or CURRENT - if self.use_start_frame: - if self.start_pose_abs is None: - self.start_pose_abs = vehicle.pose - vehicle_start_pose = vehicle.pose.to_frame( - ObjectFrameEnum.START, - vehicle.pose, - self.start_pose_abs - ) - mode = "START" - else: - vehicle_start_pose = vehicle.pose - mode = "CURRENT" - with open(f"data/{tstamp}_vehstate.txt", "a") as f: - f.write( - f"AFTER_TRANSFORM " - f"x={vehicle_start_pose.x:.3f}, " - f"y={vehicle_start_pose.y:.3f}, " - f"z={vehicle_start_pose.z:.3f}, " - f"yaw={vehicle_start_pose.yaw:.2f}, " - f"pitch={vehicle_start_pose.pitch:.2f}, " - f"roll={vehicle_start_pose.roll:.2f}, " - f"frame={mode}\n" - ) + self.save_sensor_data(vehicle=vehicle, latest_image=latest_image) + if self.camera_front == False: start = time.time() - undistorted_img, current_K = self.undistort_image(lastest_image, self.K, self.D) + undistorted_img, current_K = self.undistort_image(latest_image, self.K, self.D) end = time.time() # print('-------processing time undistort_image---', end -start) self.current_K = current_K @@ -470,50 +191,52 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: # --- Begin modifications for three-angle detection --- img_normal = undistorted_img else: - img_normal = lastest_image.copy() - undistorted_img = lastest_image.copy() - orig_H, orig_W = lastest_image.shape[:2] + img_normal = latest_image.copy() + undistorted_img = latest_image.copy() + orig_H, orig_W = latest_image.shape[:2] self.current_K = self.K - results_normal = self.detector(img_normal, conf=0.25, classes=[0]) + results_normal = self.detector(img_normal, conf=0.35, classes=[0]) combined_boxes = [] if not self.enable_tracking: self.cone_counter = 0 if self.orientation: img_left = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_COUNTERCLOCKWISE) img_right = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_CLOCKWISE) - results_left = self.detector(img_left, conf=0.05, classes=[0]) - results_right = self.detector(img_right, conf=0.05, classes=[0]) + results_left = self.detector(img_left, conf=0.15, classes=[0]) + results_right = self.detector(img_right, conf=0.15, classes=[0]) boxes_left = np.array(results_left[0].boxes.xywh.cpu()) if len(results_left) > 0 else [] boxes_right = np.array(results_right[0].boxes.xywh.cpu()) if len(results_right) > 0 else [] for box in boxes_left: cx, cy, w, h = box new_cx = cy new_cy = orig_W - 1 - cx - combined_boxes.append((new_cx, new_cy, h, w, AgentActivityEnum.RIGHT)) + combined_boxes.append((new_cx, new_cy, h, w, ObstacleStateEnum.RIGHT)) for box in boxes_right: cx, cy, w, h = box new_cx = orig_H - 1 - cy new_cy = cx - combined_boxes.append((new_cx, new_cy, h, w, AgentActivityEnum.LEFT)) + combined_boxes.append((new_cx, new_cy, h, w, ObstacleStateEnum.LEFT)) boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else [] for box in boxes_normal: cx, cy, w, h = box - combined_boxes.append((cx, cy, w, h, AgentActivityEnum.STANDING)) + combined_boxes.append((cx, cy, w, h, ObstacleStateEnum.STANDING)) + # Visualize the received images in 2D with their corresponding labels + # It draws rectangles and labels on the images: if getattr(self, 'visualize_2d', False): for (cx, cy, w, h, activity) in combined_boxes: left = int(cx - w / 2) right = int(cx + w / 2) top = int(cy - h / 2) bottom = int(cy + h / 2) - if activity == AgentActivityEnum.STANDING: + if activity == ObstacleStateEnum.STANDING: color = (255, 0, 0) label = "STANDING" - elif activity == AgentActivityEnum.RIGHT: + elif activity == ObstacleStateEnum.RIGHT: color = (0, 255, 0) label = "RIGHT" - elif activity == AgentActivityEnum.LEFT: + elif activity == ObstacleStateEnum.LEFT: color = (0, 0, 255) label = "LEFT" else: @@ -525,17 +248,24 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: cv2.imshow("Detection - Cone 2D", undistorted_img) start = time.time() + # Transform the lidar points from lidar frame of reference to camera EXTRINSIC frame of reference. + # Then project the pixels onto the lidar points to "paint them" (essentially determine which points are associated with detected objects) pts_cam = transform_points_l2c(lidar_down, self.T_l2c) projected_pts = project_points(pts_cam, self.current_K, lidar_down) + # What is returned: + # projected_pts[:, 0]: u-coordinate in the image (horizontal pixel position) + # projected_pts[:, 1]: v-coordinate in the image (vertical pixel position) + # projected_pts[:, 2:5]: original X, Y, Z coordinates in the LiDAR frame + end = time.time() # print('-------processing time1---', end -start) - agents = {} + obstacles = {} for i, box_info in enumerate(combined_boxes): cx, cy, w, h, activity = box_info - left = int(cx - w / 2) - right = int(cx + w / 2) + left = int(cx - w / 1.6) + right = int(cx + w / 1.6) top = int(cy - h / 2) bottom = int(cy + h / 2) mask = (projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & \ @@ -545,8 +275,9 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: continue points_3d = roi_pts[:, 2:5] - points_3d = filter_points_within_threshold(points_3d, 30) - points_3d = filter_depth_points(points_3d, max_depth_diff=0.3) + points_3d = filter_points_within_threshold(points_3d, 40) + points_3d = remove_ground_by_min_range(points_3d, z_range=0.08) + points_3d = filter_depth_points(points_3d, max_depth_diff=0.5) if self.use_cyl_roi: global_filtered = filter_points_within_threshold(lidar_down, 30) @@ -601,11 +332,11 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: existing_id = match_existing_cone( np.array([new_pose.x, new_pose.y, new_pose.z]), dims, - self.tracked_agents, - distance_threshold=2.0 + self.tracked_obstacles, + distance_threshold=2 ) if existing_id is not None: - old_state = self.tracked_agents[existing_id] + old_state = self.tracked_obstacles[existing_id] if vehicle.v < 100: alpha = 0.1 avg_x = alpha * new_pose.x + (1 - alpha) * old_state.pose.x @@ -625,82 +356,122 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: roll=avg_roll, frame=new_pose.frame ) - updated_agent = AgentState( + updated_obstacle = ObstacleState( pose=updated_pose, dimensions=dims, outline=None, - type=AgentEnum.CONE, + type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=activity, velocity=(0, 0, 0), yaw_rate=0 ) else: - updated_agent = old_state - agents[existing_id] = updated_agent - self.tracked_agents[existing_id] = updated_agent + updated_obstacle = old_state + obstacles[existing_id] = updated_obstacle + self.tracked_obstacles[existing_id] = updated_obstacle else: - agent_id = f"Cone{self.cone_counter}" + obstacle_id = f"Cone{self.cone_counter}" self.cone_counter += 1 - new_agent = AgentState( + new_obstacle = ObstacleState( pose=new_pose, dimensions=dims, outline=None, - type=AgentEnum.CONE, + type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=activity, velocity=(0, 0, 0), yaw_rate=0 ) - agents[agent_id] = new_agent - self.tracked_agents[agent_id] = new_agent + obstacles[obstacle_id] = new_obstacle + self.tracked_obstacles[obstacle_id] = new_obstacle else: - agent_id = f"Cone{self.cone_counter}" + obstacle_id = f"Cone{self.cone_counter}" self.cone_counter += 1 - new_agent = AgentState( + new_obstacle = ObstacleState( pose=new_pose, dimensions=dims, outline=None, - type=AgentEnum.CONE, + type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=activity, velocity=(0, 0, 0), yaw_rate=0 ) - agents[agent_id] = new_agent + obstacles[obstacle_id] = new_obstacle - self.current_agents = agents + self.current_obstacles = obstacles # If tracking not enabled, return only current frame detections if not self.enable_tracking: - for agent_id, agent in self.current_agents.items(): - p = agent.pose + for obstacle_id, obstacle in self.current_obstacles.items(): + p = obstacle.pose rospy.loginfo( - f"Agent ID: {agent_id}\n" + f"Agent ID: {obstacle_id}\n" f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" - f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n" - f"type:{agent.activity}" + f"Velocity: (vx: {obstacle.velocity[0]:.3f}, vy: {obstacle.velocity[1]:.3f}, vz: {obstacle.velocity[2]:.3f})\n" + f"type:{obstacle.activity}" ) end = time.time() # print('-------processing time', end -start) - return self.current_agents + return self.current_obstacles - stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items() - if current_time - agent.pose.t > 20.0] - for agent_id in stale_ids: - rospy.loginfo(f"Removing stale agent: {agent_id}\n") - del self.tracked_agents[agent_id] + stale_ids = [obstacle_id for obstacle_id, obstacle in self.tracked_obstacles.items() + if current_time - obstacle.pose.t > 20.0] + for obstacle_id in stale_ids: + rospy.loginfo(f"Removing stale obstacle: {obstacle_id}\n") + del self.tracked_obstacles[obstacle_id] if self.enable_tracking: - for agent_id, agent in self.tracked_agents.items(): - p = agent.pose + for obstacle_id, obstacle in self.tracked_obstacles.items(): + p = obstacle.pose rospy.loginfo( - f"Agent ID: {agent_id}\n" + f"Agent ID: {obstacle_id}\n" f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" - f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n" - f"type:{agent.activity}" + f"Velocity: (vx: {obstacle.velocity[0]:.3f}, vy: {obstacle.velocity[1]:.3f}, vz: {obstacle.velocity[2]:.3f})\n" + f"type:{obstacle.activity}" ) end = time.time() # print('-------processing time', end -start) - return self.tracked_agents + return self.tracked_obstacles + + def save_sensor_data(self, vehicle: VehicleState, latest_image) -> None: + os.makedirs("data", exist_ok=True) + tstamp = int(self.vehicle_interface.time() * 1000) + # 1) Dump raw image + cv2.imwrite(f"data/{tstamp}_image.png", latest_image) + # 2) Dump raw LiDAR + np.savez(f"data/{tstamp}_lidar.npz", lidar=self.latest_lidar) + # 3) Write BEFORE_TRANSFORM + with open(f"data/{tstamp}_vehstate.txt", "w") as f: + vp = vehicle.pose + f.write( + f"BEFORE_TRANSFORM " + f"x={vp.x:.3f}, y={vp.y:.3f}, z={vp.z:.3f}, " + f"yaw={vp.yaw:.2f}, pitch={vp.pitch:.2f}, roll={vp.roll:.2f}\n" + ) + # Compute vehicle_start_pose in either START or CURRENT + if self.use_start_frame: + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose + vehicle_start_pose = vehicle.pose.to_frame( + ObjectFrameEnum.START, + vehicle.pose, + self.start_pose_abs + ) + mode = "START" + else: + vehicle_start_pose = vehicle.pose + mode = "CURRENT" + with open(f"data/{tstamp}_vehstate.txt", "a") as f: + f.write( + f"AFTER_TRANSFORM " + f"x={vehicle_start_pose.x:.3f}, " + f"y={vehicle_start_pose.y:.3f}, " + f"z={vehicle_start_pose.z:.3f}, " + f"yaw={vehicle_start_pose.yaw:.2f}, " + f"pitch={vehicle_start_pose.pitch:.2f}, " + f"roll={vehicle_start_pose.roll:.2f}, " + f"frame={mode}\n" + ) # ----- Fake Cone Detector 2D (for Testing Purposes) ----- @@ -718,26 +489,26 @@ def state_inputs(self): return ['vehicle'] def state_outputs(self): - return ['agents'] + return ['obstacles'] - def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: + def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: if self.t_start is None: self.t_start = self.vehicle_interface.time() t = self.vehicle_interface.time() - self.t_start res = {} for time_range in self.times: if t >= time_range[0] and t <= time_range[1]: - res['cone0'] = box_to_fake_agent((0, 0, 0, 0)) + res['cone0'] = box_to_fake_obstacle((0, 0, 0, 0)) rospy.loginfo("Detected a Cone (simulated)") return res -def box_to_fake_agent(box): +def box_to_fake_obstacle(box): x, y, w, h = box pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT) dims = (w, h, 0) - return AgentState(pose=pose, dimensions=dims, outline=None, - type=AgentEnum.CONE, activity=AgentActivityEnum.MOVING, + return ObstacleState(pose=pose, dimensions=dims, outline=None, + type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=ObstacleStateEnum.STANDING, velocity=(0, 0, 0), yaw_rate=0) From 4dd6e5c0cfc724f8a34ead5730f2c10de09b3ade Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Wed, 7 May 2025 10:26:40 -0500 Subject: [PATCH 08/28] Update agent.py --- GEMstack/state/agent.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/GEMstack/state/agent.py b/GEMstack/state/agent.py index 2ca23ba76..65c2bfeb0 100644 --- a/GEMstack/state/agent.py +++ b/GEMstack/state/agent.py @@ -11,18 +11,13 @@ class AgentEnum(Enum): LARGE_TRUCK = 2 PEDESTRIAN = 3 BICYCLIST = 4 - CONE = 5 - class AgentActivityEnum(Enum): STOPPED = 0 # standing pedestrians, parked cars, etc. No need to predict motion. MOVING = 1 # standard motion. Predictions will be used here FAST = 2 # indicates faster than usual motion, e.g., runners. UNDETERMINED = 3 # unknown activity - STANDING = 4 # standing cone - LEFT = 5 # flipped cone facing left - RIGHT = 6 # flipped cone facing right - + STANDING = 4 # standing @dataclass @register From 45168fd63d99fbdfabffda9287bc0b7bd2081c01 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Wed, 7 May 2025 10:27:44 -0500 Subject: [PATCH 09/28] Update obstacle.py --- GEMstack/state/obstacle.py | 39 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/GEMstack/state/obstacle.py b/GEMstack/state/obstacle.py index 8fddd5cc0..61389819b 100644 --- a/GEMstack/state/obstacle.py +++ b/GEMstack/state/obstacle.py @@ -1,7 +1,9 @@ -from dataclasses import dataclass +from __future__ import annotations +from dataclasses import dataclass, replace from ..utils.serialization import register -from .physical_object import PhysicalObject +from .physical_object import ObjectFrameEnum,ObjectPose,PhysicalObject,convert_vector from enum import Enum +from typing import Tuple class ObstacleMaterialEnum(Enum): UNKNOWN = 0 @@ -16,6 +18,16 @@ class ObstacleMaterialEnum(Enum): SMALL_ANIMAL = 9 ROADKILL = 10 +class ObstacleStateEnum(Enum): + STOPPED = 0 # standing pedestrians, parked cars, etc. No need to predict motion. + MOVING = 1 # standard motion. Predictions will be used here + FAST = 2 # indicates faster than usual motion + UNDETERMINED = 3 # unknown activity + STANDING = 4 # standing cone + LEFT = 5 # flipped cone facing left + RIGHT = 6 # flipped cone facing right + + @dataclass @register @@ -23,3 +35,26 @@ class Obstacle(PhysicalObject): material : ObstacleMaterialEnum collidable : bool + +@dataclass +@register +class ObstacleState(PhysicalObject): + type: ObstacleMaterialEnum + activity: ObstacleStateEnum + velocity: Tuple[float, float, float] # estimated velocity in x,y,z, m/s and in agent's local frame + yaw_rate: float # estimated yaw rate, in radians/s + + def velocity_local(self) -> Tuple[float, float, float]: + """Returns velocity in m/s in the agent's local frame.""" + return self.velocity + + def velocity_parent(self) -> Tuple[float, float, float]: + """Returns velocity in m/s in the agent pose's parent frame. + I.e., if the pose frame is CURRENT, then will return the velocity in + the CURRENT frame.""" + return self.pose.rotation().dot(self.velocity).tolist() + + def to_frame(self, frame: ObjectFrameEnum, current_pose=None, start_pose_abs=None) -> ObstacleState: + newpose = self.pose.to_frame(frame, current_pose, start_pose_abs) + newvelocity = convert_vector(self.velocity, self.pose.frame, frame, current_pose, start_pose_abs) + return replace(self, pose=newpose, velocity=newvelocity) From 6782b7c271340b003e008ffe542e30339fab0705 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Wed, 7 May 2025 10:28:19 -0500 Subject: [PATCH 10/28] Update cone_detection.yaml --- launch/cone_detection.yaml | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/launch/cone_detection.yaml b/launch/cone_detection.yaml index f4d2e21e2..cec5420da 100644 --- a/launch/cone_detection.yaml +++ b/launch/cone_detection.yaml @@ -12,19 +12,23 @@ recovery: drive: perception: state_estimation : GNSSStateEstimator - agent_detection : + obstacle_detection : type: cone_detection.ConeDetector3D args: - camera_name: front_right #[front, front_right] + camera_name: front #[front, front_right] camera_calib_file: ./GEMstack/knowledge/calibration/cameras.yaml - # optional overrides - enable_tracking: False # True if you want ot enable tracking function - visualize_2d: False # True if you see 2D detection visualization - use_cyl_roi: False # True if you want to use a cylinder roi (no need to use if calibration is accurate) - save_data: False # True if you want to save the sensor input data - orientation: False # True if you need to detect flipped cones - use_start_frame: False # True if you need output in START frame, otherwise in CURRENT frame + T_l2v: + - [0.99939639, 0.02547917, 0.023615, 1.1] + - [-0.02530848, 0.99965156, -0.00749882, 0.03773583] + - [-0.02379784, 0.00689664, 0.999693, 1.95320223] + - [0.0, 0.0, 0.0, 1.0] + enable_tracking: True # True if you want to enable tracking + visualize_2d: False # True to see 2D detection visualization + use_cyl_roi: False # True to use a cylinder ROI + save_data: False # True to save sensor input data + orientation: True # True to detect flipped cones + use_start_frame: True # True to output in START frame perception_normalization : StandardPerceptionNormalizer planning: @@ -61,7 +65,7 @@ log: # If True, then record all readings / commands of the vehicle interface. Default False vehicle_interface : True # Specify which components to record to behavior.json. Default records nothing - components : ['state_estimation','agent_detection','motion_planning'] + components : ['state_estimation','obstacle_detection','motion_planning'] # Specify which components of state to record to state.json. Default records nothing #state: ['all'] # Specify the rate in Hz at which to record state to state.json. Default records at the pipeline's rate @@ -109,7 +113,7 @@ variants: description: "Run the yielding trajectory planner on the real vehicle with faked perception" drive: perception: - agent_detection : cone_detection.FakeConeDetector2D + obstacle_detection : cone_detection.FakeConeDetector2D fake_sim: run: From c7270573fd9993d37d477ecf62dbd504b004be5a Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Wed, 7 May 2025 23:57:08 -0500 Subject: [PATCH 11/28] Update cone_detection.py --- GEMstack/onboard/perception/cone_detection.py | 84 ++++++++----------- 1 file changed, 37 insertions(+), 47 deletions(-) diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py index b9fbc1e27..7af1b558f 100644 --- a/GEMstack/onboard/perception/cone_detection.py +++ b/GEMstack/onboard/perception/cone_detection.py @@ -1,4 +1,5 @@ -from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, ObstacleState, ObstacleMaterialEnum, ObstacleStateEnum +from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, ObstacleState, ObstacleMaterialEnum, \ + ObstacleStateEnum from ..interface.gem import GEMInterface from ..component import Component from .perception_utils import * @@ -29,50 +30,38 @@ class ConeDetector3D(Component): """ def __init__( - self, - vehicle_interface: GEMInterface, - camera_name: str, - camera_calib_file: str, - enable_tracking: bool = True, - visualize_2d: bool = False, - use_cyl_roi: bool = False, - T_l2v: list = None, - save_data: bool = True, - orientation: bool = True, - use_start_frame: bool = True, - **kwargs + self, + vehicle_interface: GEMInterface, + camera_name: str, + camera_calib_file: str, + enable_tracking: bool = True, + visualize_2d: bool = False, + use_cyl_roi: bool = False, + save_data: bool = True, + orientation: bool = True, + use_start_frame: bool = True, + **kwargs ): # Core interfaces and state - self.vehicle_interface = vehicle_interface - self.current_obstacles = {} - self.tracked_obstacles = {} - self.cone_counter = 0 - self.latest_image = None - self.latest_lidar = None - self.bridge = CvBridge() - self.start_pose_abs = None - self.start_time = None + self.vehicle_interface = vehicle_interface + self.current_obstacles = {} + self.tracked_obstacles = {} + self.cone_counter = 0 + self.latest_image = None + self.latest_lidar = None + self.bridge = CvBridge() + self.start_pose_abs = None + self.start_time = None # Config flags - self.camera_name = camera_name + self.camera_name = camera_name self.enable_tracking = enable_tracking - self.visualize_2d = visualize_2d - self.use_cyl_roi = use_cyl_roi - self.save_data = save_data - self.orientation = orientation + self.visualize_2d = visualize_2d + self.use_cyl_roi = use_cyl_roi + self.save_data = save_data + self.orientation = orientation self.use_start_frame = use_start_frame - # 1) Load lidar→vehicle transform - if T_l2v is not None: - self.T_l2v = np.array(T_l2v) - else: - self.T_l2v = np.array([ - [0.99939639, 0.02547917, 0.023615, 1.1], - [-0.02530848, 0.99965156, -0.00749882, 0.03773583], - [-0.02379784, 0.00689664, 0.999693, 1.95320223], - [0.0, 0.0, 0.0, 1.0] - ]) - # 2) Load camera intrinsics/extrinsics from the supplied YAML with open(camera_calib_file, 'r') as f: calib = yaml.safe_load(f) @@ -84,15 +73,16 @@ def __init__( # D: [...] # T_l2c: [[...], ..., [...]] cam_cfg = calib['cameras'][camera_name] - self.K = np.array(cam_cfg['K']) - self.D = np.array(cam_cfg['D']) + self.K = np.array(cam_cfg['K']) + self.D = np.array(cam_cfg['D']) self.T_l2c = np.array(cam_cfg['T_l2c']) + self.T_l2v = np.array(cam_cfg['T_l2v']) # Derived transforms self.undistort_map1 = None self.undistort_map2 = None - self.camera_front = (camera_name=='front') + self.camera_front = (camera_name == 'front') def rate(self) -> float: return 8 @@ -126,8 +116,8 @@ def initialize(self): # Initialize the YOLO detector self.detector = YOLO('GEMstack/knowledge/detection/cone.pt') self.detector.to('cuda') - self.T_c2l = np.linalg.inv(self.T_l2c) - self.R_c2l = self.T_c2l[:3, :3] + self.T_c2l = np.linalg.inv(self.T_l2c) + self.R_c2l = self.T_c2l[:3, :3] self.camera_origin_in_lidar = self.T_c2l[:3, 3] def synchronized_callback(self, image_msg, lidar_msg): @@ -162,7 +152,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: if self.latest_image is None or self.latest_lidar is None: return {} latest_image = self.latest_image.copy() - + # Set up current time variables start = time.time() current_time = self.vehicle_interface.time() @@ -179,7 +169,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: # Ensure data/ exists and build timestamp if self.save_data: self.save_sensor_data(vehicle=vehicle, latest_image=latest_image) - + if self.camera_front == False: start = time.time() undistorted_img, current_K = self.undistort_image(latest_image, self.K, self.D) @@ -508,8 +498,8 @@ def box_to_fake_obstacle(box): pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT) dims = (w, h, 0) return ObstacleState(pose=pose, dimensions=dims, outline=None, - type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=ObstacleStateEnum.STANDING, - velocity=(0, 0, 0), yaw_rate=0) + type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=ObstacleStateEnum.STANDING, + velocity=(0, 0, 0), yaw_rate=0) if __name__ == '__main__': From 696fc4e7f489273cb13d24fd6abce3a35da5dc72 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Wed, 7 May 2025 23:57:36 -0500 Subject: [PATCH 12/28] Update cone_detection.yaml moved the l2v to cameras.yaml --- launch/cone_detection.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/launch/cone_detection.yaml b/launch/cone_detection.yaml index cec5420da..ec0881a74 100644 --- a/launch/cone_detection.yaml +++ b/launch/cone_detection.yaml @@ -17,12 +17,6 @@ drive: args: camera_name: front #[front, front_right] camera_calib_file: ./GEMstack/knowledge/calibration/cameras.yaml - - T_l2v: - - [0.99939639, 0.02547917, 0.023615, 1.1] - - [-0.02530848, 0.99965156, -0.00749882, 0.03773583] - - [-0.02379784, 0.00689664, 0.999693, 1.95320223] - - [0.0, 0.0, 0.0, 1.0] enable_tracking: True # True if you want to enable tracking visualize_2d: False # True to see 2D detection visualization use_cyl_roi: False # True to use a cylinder ROI From d40dc354e5b399c3209b4c9f4a58f838645319a5 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Wed, 7 May 2025 23:58:22 -0500 Subject: [PATCH 13/28] Update cameras.yaml moved l2v here --- GEMstack/knowledge/calibration/cameras.yaml | 55 ++++++++++++--------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/GEMstack/knowledge/calibration/cameras.yaml b/GEMstack/knowledge/calibration/cameras.yaml index e240957aa..4872db65d 100644 --- a/GEMstack/knowledge/calibration/cameras.yaml +++ b/GEMstack/knowledge/calibration/cameras.yaml @@ -1,26 +1,35 @@ cameras: - front: - K: - - [684.83331299, 0.0, 573.37109375] - - [0.0, 684.60968018, 363.70092773] - - [0.0, 0.0, 1.0] - D: [0.0, 0.0, 0.0, 0.0, 0.0] - T_l2c: - - [ 0.0289748006, -0.999580136, 0.0000368439, -0.0307300513] - - [-0.0094993062, -0.0003122155, -0.999954834, -0.386689354 ] - - [ 0.999534999, 0.0289731321, -0.0095043721, -0.671425124 ] - - [ 0.0, 0.0, 0.0, 1.0 ] + front: + K: + - [684.83331299, 0.0, 573.37109375] + - [0.0, 684.60968018, 363.70092773] + - [0.0, 0.0, 1.0] + D: [0.0, 0.0, 0.0, 0.0, 0.0] + T_l2c: + - [ 0.0289748006, -0.999580136, 0.0000368439, -0.0307300513] + - [-0.0094993062, -0.0003122155, -0.999954834, -0.386689354 ] + - [ 0.999534999, 0.0289731321, -0.0095043721, -0.671425124 ] + - [ 0.0, 0.0, 0.0, 1.0 ] + T_l2v: + - [ 0.99939639, 0.02547917, 0.023615, 1.1 ] + - [ -0.02530848, 0.99965156, -0.00749882, 0.03773583 ] + - [ -0.02379784, 0.00689664, 0.999693, 1.95320223 ] + - [ 0.0, 0.0, 0.0, 1.0 ] - front_right: - K: - - [1176.25545, 0.0, 966.432645] - - [0.0, 1175.14569, 608.580326] - - [0.0, 0.0, 1.0 ] - D: [-0.270136325, 0.164393255, -0.00160720782, -0.0000741246708, -0.0619939758] - T_l2c: - - [-0.71836368, -0.69527204, -0.02346088, 0.05718003] - - [-0.09720448, 0.13371206, -0.98624154, -0.15983010] - - [ 0.68884317, -0.70619960, -0.16363744, -1.04767285] - - [ 0.0, 0.0, 0.0, 1.0 ] + front_right: + K: + - [1176.25545, 0.0, 966.432645] + - [0.0, 1175.14569, 608.580326] + - [0.0, 0.0, 1.0 ] + D: [-0.270136325, 0.164393255, -0.00160720782, -0.0000741246708, -0.0619939758] + T_l2c: + - [-0.71836368, -0.69527204, -0.02346088, 0.05718003] + - [-0.09720448, 0.13371206, -0.98624154, -0.15983010] + - [ 0.68884317, -0.70619960, -0.16363744, -1.04767285] + - [ 0.0, 0.0, 0.0, 1.0 ] + T_l2v: + - [0.99939639, 0.02547917, 0.023615, 1.1] + - [-0.02530848, 0.99965156, -0.00749882, 0.03773583] + - [-0.02379784, 0.00689664, 0.999693, 1.95320223] + - [0.0, 0.0, 0.0, 1.0 ] - # add other cameras (back_left, front_right, back_right) similarly... From f84c1788c49a3404c036cf54c1abdacfa90a0745 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Thu, 8 May 2025 00:02:05 -0500 Subject: [PATCH 14/28] Update agent.py no longer changing this file --- GEMstack/state/agent.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GEMstack/state/agent.py b/GEMstack/state/agent.py index 65c2bfeb0..e295fbba8 100644 --- a/GEMstack/state/agent.py +++ b/GEMstack/state/agent.py @@ -12,12 +12,13 @@ class AgentEnum(Enum): PEDESTRIAN = 3 BICYCLIST = 4 + class AgentActivityEnum(Enum): STOPPED = 0 # standing pedestrians, parked cars, etc. No need to predict motion. MOVING = 1 # standard motion. Predictions will be used here FAST = 2 # indicates faster than usual motion, e.g., runners. UNDETERMINED = 3 # unknown activity - STANDING = 4 # standing + @dataclass @register From d2c6d894d46b91dddcb163c968a1f105a3678ca6 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Fri, 9 May 2025 15:57:30 -0500 Subject: [PATCH 15/28] Update agent.py From b3713cd1ceabf997b35c66c3208485152b398d0b Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Fri, 9 May 2025 15:59:15 -0500 Subject: [PATCH 16/28] Update obstacle.py --- GEMstack/state/obstacle.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/GEMstack/state/obstacle.py b/GEMstack/state/obstacle.py index 61389819b..05f98b498 100644 --- a/GEMstack/state/obstacle.py +++ b/GEMstack/state/obstacle.py @@ -41,18 +41,6 @@ class Obstacle(PhysicalObject): class ObstacleState(PhysicalObject): type: ObstacleMaterialEnum activity: ObstacleStateEnum - velocity: Tuple[float, float, float] # estimated velocity in x,y,z, m/s and in agent's local frame - yaw_rate: float # estimated yaw rate, in radians/s - - def velocity_local(self) -> Tuple[float, float, float]: - """Returns velocity in m/s in the agent's local frame.""" - return self.velocity - - def velocity_parent(self) -> Tuple[float, float, float]: - """Returns velocity in m/s in the agent pose's parent frame. - I.e., if the pose frame is CURRENT, then will return the velocity in - the CURRENT frame.""" - return self.pose.rotation().dot(self.velocity).tolist() def to_frame(self, frame: ObjectFrameEnum, current_pose=None, start_pose_abs=None) -> ObstacleState: newpose = self.pose.to_frame(frame, current_pose, start_pose_abs) From bd957e90acd5b5c2c8799c79c6dbbe5fea7a18fb Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Fri, 9 May 2025 16:22:06 -0500 Subject: [PATCH 17/28] Update agent.py From 9be95fbacfbdccf71262fb823d3f948dd956e2d3 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Fri, 9 May 2025 16:23:17 -0500 Subject: [PATCH 18/28] Update agent.py From 367e66a87a81f307b11f819bcd62a85052f8be9e Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Fri, 9 May 2025 16:38:06 -0500 Subject: [PATCH 19/28] Update cone_detection.py --- GEMstack/onboard/perception/cone_detection.py | 42 ++++++++++--------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py index 7af1b558f..80757241a 100644 --- a/GEMstack/onboard/perception/cone_detection.py +++ b/GEMstack/onboard/perception/cone_detection.py @@ -1,3 +1,5 @@ +from open3d.examples.geometry.point_cloud_transformation import transform + from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, ObstacleState, ObstacleMaterialEnum, \ ObstacleStateEnum from ..interface.gem import GEMInterface @@ -110,7 +112,7 @@ def initialize(self): self.lidar_sub = Subscriber('/ouster/points', PointCloud2) self.sync = ApproximateTimeSynchronizer([ self.rgb_sub, self.lidar_sub - ], queue_size=500, slop=0.05) + ], queue_size=500, slop=0.03) self.sync.registerCallback(self.synchronized_callback) # Initialize the YOLO detector @@ -185,6 +187,8 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: undistorted_img = latest_image.copy() orig_H, orig_W = latest_image.shape[:2] self.current_K = self.K + # print(self.K) + # print(self.T_l2c) results_normal = self.detector(img_normal, conf=0.35, classes=[0]) combined_boxes = [] if not self.enable_tracking: @@ -198,14 +202,18 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: boxes_right = np.array(results_right[0].boxes.xywh.cpu()) if len(results_right) > 0 else [] for box in boxes_left: cx, cy, w, h = box - new_cx = cy - new_cy = orig_W - 1 - cx - combined_boxes.append((new_cx, new_cy, h, w, ObstacleStateEnum.RIGHT)) + new_cx = orig_W - 1 - cy + new_cy = cx + new_w = h # Swap width and height. + new_h = w + combined_boxes.append((new_cx, new_cy, new_w, new_h, ObstacleStateEnum.RIGHT)) for box in boxes_right: cx, cy, w, h = box - new_cx = orig_H - 1 - cy - new_cy = cx - combined_boxes.append((new_cx, new_cy, h, w, ObstacleStateEnum.LEFT)) + new_cx = cy + new_cy = orig_H - 1 - cx + new_w = h # Swap width and height. + new_h = w + combined_boxes.append((new_cx, new_cy, new_w, new_h, ObstacleStateEnum.LEFT)) boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else [] for box in boxes_normal: @@ -254,6 +262,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: for i, box_info in enumerate(combined_boxes): cx, cy, w, h, activity = box_info + # print(cx, cy, w, h) left = int(cx - w / 1.6) right = int(cx + w / 1.6) top = int(cy - h / 2) @@ -261,10 +270,12 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: mask = (projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & \ (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom) roi_pts = projected_pts[mask] + # print(roi_pts) if roi_pts.shape[0] < 5: continue points_3d = roi_pts[:, 2:5] + points_3d = filter_points_within_threshold(points_3d, 40) points_3d = remove_ground_by_min_range(points_3d, z_range=0.08) points_3d = filter_depth_points(points_3d, max_depth_diff=0.5) @@ -303,7 +314,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: vehicle.pose, self.start_pose_abs ) - T_vehicle_to_start = pose_to_matrix(vehicle_start_pose) + T_vehicle_to_start = vehicle_start_pose.transform() xp, yp, zp = (T_vehicle_to_start @ np.append(refined_center, 1))[:3] out_frame = ObjectFrameEnum.START else: @@ -352,8 +363,6 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: outline=None, type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=activity, - velocity=(0, 0, 0), - yaw_rate=0 ) else: updated_obstacle = old_state @@ -368,8 +377,6 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: outline=None, type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=activity, - velocity=(0, 0, 0), - yaw_rate=0 ) obstacles[obstacle_id] = new_obstacle self.tracked_obstacles[obstacle_id] = new_obstacle @@ -382,8 +389,6 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: outline=None, type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=activity, - velocity=(0, 0, 0), - yaw_rate=0 ) obstacles[obstacle_id] = new_obstacle @@ -394,10 +399,9 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: for obstacle_id, obstacle in self.current_obstacles.items(): p = obstacle.pose rospy.loginfo( - f"Agent ID: {obstacle_id}\n" + f"Cone ID: {obstacle_id}\n" f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" - f"Velocity: (vx: {obstacle.velocity[0]:.3f}, vy: {obstacle.velocity[1]:.3f}, vz: {obstacle.velocity[2]:.3f})\n" f"type:{obstacle.activity}" ) end = time.time() @@ -413,10 +417,9 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: for obstacle_id, obstacle in self.tracked_obstacles.items(): p = obstacle.pose rospy.loginfo( - f"Agent ID: {obstacle_id}\n" + f"Cone ID: {obstacle_id}\n" f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" - f"Velocity: (vx: {obstacle.velocity[0]:.3f}, vy: {obstacle.velocity[1]:.3f}, vz: {obstacle.velocity[2]:.3f})\n" f"type:{obstacle.activity}" ) end = time.time() @@ -498,8 +501,7 @@ def box_to_fake_obstacle(box): pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT) dims = (w, h, 0) return ObstacleState(pose=pose, dimensions=dims, outline=None, - type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=ObstacleStateEnum.STANDING, - velocity=(0, 0, 0), yaw_rate=0) + type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=ObstacleStateEnum.STANDING) if __name__ == '__main__': From 7b7aeaa9f77129df9188424a3384fb6b6916aa66 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Fri, 9 May 2025 16:39:25 -0500 Subject: [PATCH 20/28] Create perception_utils.py --- .../onboard/perception/perception_utils.py | 226 ++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 GEMstack/onboard/perception/perception_utils.py diff --git a/GEMstack/onboard/perception/perception_utils.py b/GEMstack/onboard/perception/perception_utils.py new file mode 100644 index 000000000..a95172d2f --- /dev/null +++ b/GEMstack/onboard/perception/perception_utils.py @@ -0,0 +1,226 @@ +from ...state import ObjectPose, AgentState +from typing import Dict +import open3d as o3d +import numpy as np +from sklearn.cluster import DBSCAN +from scipy.spatial.transform import Rotation as R +from sensor_msgs.msg import PointCloud2 +import sensor_msgs.point_cloud2 as pc2 +import ros_numpy + + +# ----- Helper Functions ----- + +def cylindrical_roi(points, center, radius, height): + horizontal_dist = np.linalg.norm(points[:, :2] - center[:2], axis=1) + vertical_diff = np.abs(points[:, 2] - center[2]) + mask = (horizontal_dist <= radius) & (vertical_diff <= height / 2) + return points[mask] + + +def filter_points_within_threshold(points, threshold=15.0): + distances = np.linalg.norm(points, axis=1) + mask = distances <= threshold + return points[mask] + +def match_existing_cone( + new_center: np.ndarray, + new_dims: tuple, + existing_agents: Dict[str, AgentState], + distance_threshold: float = 1.0 +) -> str: + """ + Find the closest existing Cone agent within a specified distance threshold. + """ + best_agent_id = None + best_dist = float('inf') + for agent_id, agent_state in existing_agents.items(): + old_center = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z]) + dist = np.linalg.norm(new_center - old_center) + if dist < distance_threshold and dist < best_dist: + best_dist = dist + best_agent_id = agent_id + return best_agent_id + + +def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple: + """ + Compute the (vx, vy, vz) velocity based on change in pose over time. + """ + if dt <= 0: + return (0, 0, 0) + vx = (new_pose.x - old_pose.x) / dt + vy = (new_pose.y - old_pose.y) / dt + vz = (new_pose.z - old_pose.z) / dt + return (vx, vy, vz) + + +def extract_roi_box(lidar_pc, center, half_extents): + """ + Extract a region of interest (ROI) from the LiDAR point cloud defined by an axis-aligned bounding box. + """ + lower = center - half_extents + upper = center + half_extents + mask = np.all((lidar_pc >= lower) & (lidar_pc <= upper), axis=1) + return lidar_pc[mask] + + +def pc2_to_numpy(pc2_msg, want_rgb=False): + """ + Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy. + This function extracts the x, y, z coordinates from the point cloud. + """ + # Convert the ROS message to a numpy structured array + pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg) + # Stack x,y,z fields to a (N,3) array + pts = np.stack((np.array(pc['x']).ravel(), + np.array(pc['y']).ravel(), + np.array(pc['z']).ravel()), axis=1) + # Apply filtering (for example, x > 0 and z in a specified range) + mask = (pts[:, 0] > -0.5) & (pts[:, 2] < -1) & (pts[:, 2] > -2.7) + return pts[mask] + + +def backproject_pixel(u, v, K): + """ + Backprojects a pixel coordinate (u, v) into a normalized 3D ray in the camera coordinate system. + """ + cx, cy = K[0, 2], K[1, 2] + fx, fy = K[0, 0], K[1, 1] + x = (u - cx) / fx + y = (v - cy) / fy + ray_dir = np.array([x, y, 1.0]) + return ray_dir / np.linalg.norm(ray_dir) + + +def find_human_center_on_ray(lidar_pc, ray_origin, ray_direction, + t_min, t_max, t_step, + distance_threshold, min_points, ransac_threshold): + """ + Identify the center of a human along a projected ray. + (This function is no longer used in the new approach.) + """ + return None, None, None + + +def extract_roi(pc, center, roi_radius): + """ + Extract points from a point cloud that lie within a specified radius of a center point. + """ + distances = np.linalg.norm(pc - center, axis=1) + return pc[distances < roi_radius] + + +def refine_cluster(roi_points, center, eps=0.2, min_samples=10): + """ + Refine a point cluster by applying DBSCAN and return the cluster closest to 'center'. + """ + if roi_points.shape[0] < min_samples: + return roi_points + clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(roi_points) + labels = clustering.labels_ + valid_clusters = [roi_points[labels == l] for l in set(labels) if l != -1] + if not valid_clusters: + return roi_points + best_cluster = min(valid_clusters, key=lambda c: np.linalg.norm(np.mean(c, axis=0) - center)) + return best_cluster + + +def remove_ground_by_min_range(cluster, z_range=0.05): + """ + Remove points within z_range of the minimum z (assumed to be ground). + """ + if cluster is None or cluster.shape[0] == 0: + return cluster + min_z = np.min(cluster[:, 2]) + filtered = cluster[cluster[:, 2] > (min_z + z_range)] + return filtered + + +def get_bounding_box_center_and_dimensions(points): + """ + Calculate the axis-aligned bounding box's center and dimensions for a set of 3D points. + """ + if points.shape[0] == 0: + return None, None + min_vals = np.min(points, axis=0) + max_vals = np.max(points, axis=0) + center = (min_vals + max_vals) / 2 + dimensions = max_vals - min_vals + return center, dimensions + + +def create_ray_line_set(start, end): + """ + Create an Open3D LineSet object representing a ray between two 3D points. + The line is colored yellow. + """ + points = [start, end] + lines = [[0, 1]] + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(points) + line_set.lines = o3d.utility.Vector2iVector(lines) + line_set.colors = o3d.utility.Vector3dVector([[1, 1, 0]]) + return line_set + + +def downsample_points(lidar_points, voxel_size=0.15): + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(lidar_points) + down_pcd = pcd.voxel_down_sample(voxel_size=voxel_size) + return np.asarray(down_pcd.points) + + +def filter_depth_points(lidar_points, max_depth_diff=0.9, use_norm=True): + if lidar_points.shape[0] == 0: + return lidar_points + + if use_norm: + depths = np.linalg.norm(lidar_points, axis=1) + else: + depths = lidar_points[:, 0] + + min_depth = np.min(depths) + max_possible_depth = min_depth + max_depth_diff + mask = depths < max_possible_depth + return lidar_points[mask] + + +def visualize_geometries(geometries, window_name="Open3D", width=800, height=600, point_size=5.0): + """ + Visualize a list of Open3D geometry objects in a dedicated window. + """ + vis = o3d.visualization.Visualizer() + vis.create_window(window_name=window_name, width=width, height=height) + for geom in geometries: + vis.add_geometry(geom) + opt = vis.get_render_option() + opt.point_size = point_size + vis.run() + vis.destroy_window() + +def transform_points_l2c(lidar_points, T_l2c): + N = lidar_points.shape[0] + pts_hom = np.hstack((lidar_points, np.ones((N, 1)))) # (N,4) + pts_cam = (T_l2c @ pts_hom.T).T # (N,4) + return pts_cam[:, :3] + + +# ----- New: Vectorized projection function ----- +def project_points(pts_cam, K, original_lidar_points): + """ + Vectorized version. + pts_cam: (N,3) array of points in camera coordinates. + original_lidar_points: (N,3) array of points in LiDAR coordinates. + Returns a (M,5) array: [u, v, X_lidar, Y_lidar, Z_lidar] for all points with Z>0. + """ + mask = pts_cam[:, 2] > 0 + pts_cam_valid = pts_cam[mask] + lidar_valid = original_lidar_points[mask] + Xc = pts_cam_valid[:, 0] + Yc = pts_cam_valid[:, 1] + Zc = pts_cam_valid[:, 2] + u = (K[0, 0] * (Xc / Zc) + K[0, 2]).astype(np.int32) + v = (K[1, 1] * (Yc / Zc) + K[1, 2]).astype(np.int32) + proj = np.column_stack((u, v, lidar_valid)) + return proj From 4284265b94ce81ac8a07e1cd3bc27d89c4f8982e Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Fri, 9 May 2025 16:41:38 -0500 Subject: [PATCH 21/28] Update agent.py From 16f1ac4733ff39969dd8bdfac941f4f73c01558d Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Sat, 10 May 2025 10:44:11 -0500 Subject: [PATCH 22/28] Update cone_detection.py --- GEMstack/onboard/perception/cone_detection.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py index 80757241a..1846722a8 100644 --- a/GEMstack/onboard/perception/cone_detection.py +++ b/GEMstack/onboard/perception/cone_detection.py @@ -1,5 +1,3 @@ -from open3d.examples.geometry.point_cloud_transformation import transform - from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, ObstacleState, ObstacleMaterialEnum, \ ObstacleStateEnum from ..interface.gem import GEMInterface From ce4bba3d2afa18d0737e9c50c375f3ca2938c638 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Mon, 12 May 2025 13:26:13 -0500 Subject: [PATCH 23/28] Update cone_detection.py --- GEMstack/onboard/perception/cone_detection.py | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py index 1846722a8..e0de4abef 100644 --- a/GEMstack/onboard/perception/cone_detection.py +++ b/GEMstack/onboard/perception/cone_detection.py @@ -1,4 +1,4 @@ -from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, ObstacleState, ObstacleMaterialEnum, \ +from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, Obstacle, ObstacleMaterialEnum, \ ObstacleStateEnum from ..interface.gem import GEMInterface from ..component import Component @@ -146,7 +146,7 @@ def undistort_image(self, image, K, D): # print('--------undistort', end-start) return undistorted, newK - def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: + def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: downsample = False # Gate guards against data not being present for both sensors: if self.latest_image is None or self.latest_lidar is None: @@ -221,18 +221,18 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: # Visualize the received images in 2D with their corresponding labels # It draws rectangles and labels on the images: if getattr(self, 'visualize_2d', False): - for (cx, cy, w, h, activity) in combined_boxes: + for (cx, cy, w, h, state) in combined_boxes: left = int(cx - w / 2) right = int(cx + w / 2) top = int(cy - h / 2) bottom = int(cy + h / 2) - if activity == ObstacleStateEnum.STANDING: + if state == ObstacleStateEnum.STANDING: color = (255, 0, 0) label = "STANDING" - elif activity == ObstacleStateEnum.RIGHT: + elif state == ObstacleStateEnum.RIGHT: color = (0, 255, 0) label = "RIGHT" - elif activity == ObstacleStateEnum.LEFT: + elif state == ObstacleStateEnum.LEFT: color = (0, 0, 255) label = "LEFT" else: @@ -259,7 +259,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: obstacles = {} for i, box_info in enumerate(combined_boxes): - cx, cy, w, h, activity = box_info + cx, cy, w, h, state = box_info # print(cx, cy, w, h) left = int(cx - w / 1.6) right = int(cx + w / 1.6) @@ -355,12 +355,12 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: roll=avg_roll, frame=new_pose.frame ) - updated_obstacle = ObstacleState( + updated_obstacle = Obstacle( pose=updated_pose, dimensions=dims, outline=None, - type=ObstacleMaterialEnum.TRAFFIC_CONE, - activity=activity, + material=ObstacleMaterialEnum.TRAFFIC_CONE, + state=state, ) else: updated_obstacle = old_state @@ -369,24 +369,24 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: else: obstacle_id = f"Cone{self.cone_counter}" self.cone_counter += 1 - new_obstacle = ObstacleState( + new_obstacle = Obstacle( pose=new_pose, dimensions=dims, outline=None, - type=ObstacleMaterialEnum.TRAFFIC_CONE, - activity=activity, + material=ObstacleMaterialEnum.TRAFFIC_CONE, + state=state, ) obstacles[obstacle_id] = new_obstacle self.tracked_obstacles[obstacle_id] = new_obstacle else: obstacle_id = f"Cone{self.cone_counter}" self.cone_counter += 1 - new_obstacle = ObstacleState( + new_obstacle = Obstacle( pose=new_pose, dimensions=dims, outline=None, - type=ObstacleMaterialEnum.TRAFFIC_CONE, - activity=activity, + material=ObstacleMaterialEnum.TRAFFIC_CONE, + state=state, ) obstacles[obstacle_id] = new_obstacle @@ -400,7 +400,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: f"Cone ID: {obstacle_id}\n" f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" - f"type:{obstacle.activity}" + f"state:{obstacle.state}" ) end = time.time() # print('-------processing time', end -start) @@ -418,7 +418,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: f"Cone ID: {obstacle_id}\n" f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" - f"type:{obstacle.activity}" + f"state:{obstacle.state}" ) end = time.time() # print('-------processing time', end -start) @@ -482,7 +482,7 @@ def state_inputs(self): def state_outputs(self): return ['obstacles'] - def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]: + def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: if self.t_start is None: self.t_start = self.vehicle_interface.time() t = self.vehicle_interface.time() - self.t_start @@ -498,8 +498,8 @@ def box_to_fake_obstacle(box): x, y, w, h = box pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT) dims = (w, h, 0) - return ObstacleState(pose=pose, dimensions=dims, outline=None, - type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=ObstacleStateEnum.STANDING) + return Obstacle(pose=pose, dimensions=dims, outline=None, + material=ObstacleMaterialEnum.TRAFFIC_CONE, state=ObstacleStateEnum.STANDING) if __name__ == '__main__': From e73d5b47bc5dbfa26fdaa6a2c708352922fd434a Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Mon, 12 May 2025 13:26:45 -0500 Subject: [PATCH 24/28] Update __init__.py Updated enums in Obstacle --- GEMstack/state/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GEMstack/state/__init__.py b/GEMstack/state/__init__.py index 8ddc0c5b0..5ec5f71c2 100644 --- a/GEMstack/state/__init__.py +++ b/GEMstack/state/__init__.py @@ -11,6 +11,7 @@ 'Obstacle', 'Sign', 'AgentState','AgentEnum','AgentActivityEnum', + 'ObstacleMaterialEnum','ObstacleStateEnum', 'SceneState', 'VehicleIntent','VehicleIntentEnum', 'AgentIntent', @@ -23,7 +24,7 @@ from .trajectory import Path,Trajectory from .vehicle import VehicleState,VehicleGearEnum from .roadgraph import Roadgraph, RoadgraphLane, RoadgraphCurve, RoadgraphRegion, RoadgraphCurveEnum, RoadgraphLaneEnum, RoadgraphRegionEnum, RoadgraphSurfaceEnum, RoadgraphConnectionEnum -from .obstacle import Obstacle, ObstacleMaterialEnum +from .obstacle import Obstacle, ObstacleMaterialEnum, ObstacleStateEnum from .sign import Sign, SignEnum, SignalLightEnum, SignState from .roadmap import Roadmap from .agent import AgentState, AgentEnum, AgentActivityEnum From 67ce6055778792b3e82c41d5d008b3c3bc521fea Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Mon, 12 May 2025 13:28:19 -0500 Subject: [PATCH 25/28] Update obstacle.py removed ObstacleState amd redundant StateEnum --- GEMstack/state/obstacle.py | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/GEMstack/state/obstacle.py b/GEMstack/state/obstacle.py index 05f98b498..8ddc10490 100644 --- a/GEMstack/state/obstacle.py +++ b/GEMstack/state/obstacle.py @@ -19,13 +19,10 @@ class ObstacleMaterialEnum(Enum): ROADKILL = 10 class ObstacleStateEnum(Enum): - STOPPED = 0 # standing pedestrians, parked cars, etc. No need to predict motion. - MOVING = 1 # standard motion. Predictions will be used here - FAST = 2 # indicates faster than usual motion - UNDETERMINED = 3 # unknown activity - STANDING = 4 # standing cone - LEFT = 5 # flipped cone facing left - RIGHT = 6 # flipped cone facing right + UNDETERMINED = 0 # unknown activity + STANDING = 1 # standing cone + LEFT = 2 # flipped cone facing left + RIGHT = 3 # flipped cone facing right @@ -34,15 +31,8 @@ class ObstacleStateEnum(Enum): class Obstacle(PhysicalObject): material : ObstacleMaterialEnum collidable : bool + state: ObstacleStateEnum - -@dataclass -@register -class ObstacleState(PhysicalObject): - type: ObstacleMaterialEnum - activity: ObstacleStateEnum - - def to_frame(self, frame: ObjectFrameEnum, current_pose=None, start_pose_abs=None) -> ObstacleState: + def to_frame(self, frame: ObjectFrameEnum, current_pose=None, start_pose_abs=None) -> Obstacle: newpose = self.pose.to_frame(frame, current_pose, start_pose_abs) - newvelocity = convert_vector(self.velocity, self.pose.frame, frame, current_pose, start_pose_abs) - return replace(self, pose=newpose, velocity=newvelocity) + return replace(self, pose=newpose) From 958890de5d1e964196ab5ee60eaf1d951bb02014 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Mon, 12 May 2025 13:30:07 -0500 Subject: [PATCH 26/28] Update obstacle.py removed to_frame. Physical Object already has a to_frame function --- GEMstack/state/obstacle.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/GEMstack/state/obstacle.py b/GEMstack/state/obstacle.py index 8ddc10490..251fb30db 100644 --- a/GEMstack/state/obstacle.py +++ b/GEMstack/state/obstacle.py @@ -33,6 +33,3 @@ class Obstacle(PhysicalObject): collidable : bool state: ObstacleStateEnum - def to_frame(self, frame: ObjectFrameEnum, current_pose=None, start_pose_abs=None) -> Obstacle: - newpose = self.pose.to_frame(frame, current_pose, start_pose_abs) - return replace(self, pose=newpose) From d0b1cf603db67c9741210e151517af211d138466 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Mon, 12 May 2025 13:51:20 -0500 Subject: [PATCH 27/28] Update cone_detection.yaml --- launch/cone_detection.yaml | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/launch/cone_detection.yaml b/launch/cone_detection.yaml index ec0881a74..6b2d0a82e 100644 --- a/launch/cone_detection.yaml +++ b/launch/cone_detection.yaml @@ -26,23 +26,6 @@ drive: perception_normalization : StandardPerceptionNormalizer planning: - relations_estimation: - type: pedestrian_yield_logic.PedestrianYielder - args: - mode: 'real' - params: { - 'yielder': 'expert', # 'expert', 'analytic', or 'simulation' - 'planner': 'milestone', # 'milestone', 'dt', or 'dx' - 'desired_speed': 1.0, # m/s, 1.5 m/s seems max speed? Feb24 - 'acceleration': 0.75 # m/s2, 0.5 is not enough to start moving. Feb24 - } - route_planning: - type: StaticRoutePlanner - args: [!relative_path '../GEMstack/knowledge/routes/forward_15m.csv','start'] - motion_planning: longitudinal_planning.YieldTrajectoryPlanner - trajectory_tracking: - type: pure_pursuit.PurePursuitTrajectoryTracker - print: False log: From dd6a0cff2b962989c710d9e29f05695a423bb85e Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Mon, 12 May 2025 13:52:03 -0500 Subject: [PATCH 28/28] Update cone_detection.py --- GEMstack/onboard/perception/cone_detection.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py index e0de4abef..42b74bb6b 100644 --- a/GEMstack/onboard/perception/cone_detection.py +++ b/GEMstack/onboard/perception/cone_detection.py @@ -361,6 +361,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: outline=None, material=ObstacleMaterialEnum.TRAFFIC_CONE, state=state, + collidable=True ) else: updated_obstacle = old_state @@ -375,6 +376,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: outline=None, material=ObstacleMaterialEnum.TRAFFIC_CONE, state=state, + collidable=True ) obstacles[obstacle_id] = new_obstacle self.tracked_obstacles[obstacle_id] = new_obstacle @@ -387,6 +389,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: outline=None, material=ObstacleMaterialEnum.TRAFFIC_CONE, state=state, + collidable = True ) obstacles[obstacle_id] = new_obstacle @@ -499,7 +502,7 @@ def box_to_fake_obstacle(box): pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT) dims = (w, h, 0) return Obstacle(pose=pose, dimensions=dims, outline=None, - material=ObstacleMaterialEnum.TRAFFIC_CONE, state=ObstacleStateEnum.STANDING) + material=ObstacleMaterialEnum.TRAFFIC_CONE, state=ObstacleStateEnum.STANDING, collidable=True) if __name__ == '__main__':