From a03c2044f2f033c0c7dad19afe2cdd278df54953 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Wed, 23 Apr 2025 10:39:57 -0500
Subject: [PATCH 01/28] Create cone_detection.py

---
 GEMstack/onboard/perception/cone_detection.py | 666 ++++++++++++++++++
 1 file changed, 666 insertions(+)
 create mode 100644 GEMstack/onboard/perception/cone_detection.py

diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py
new file mode 100644
index 000000000..d41c527ae
--- /dev/null
+++ b/GEMstack/onboard/perception/cone_detection.py
@@ -0,0 +1,666 @@
+from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum
+from ..interface.gem import GEMInterface
+from ..component import Component
+from ultralytics import YOLO
+import cv2
+from typing import Dict
+import open3d as o3d
+import numpy as np
+from sklearn.cluster import DBSCAN
+from scipy.spatial.transform import Rotation as R
+import rospy
+from sensor_msgs.msg import PointCloud2, Image
+import sensor_msgs.point_cloud2 as pc2
+import struct, ctypes
+from message_filters import Subscriber, ApproximateTimeSynchronizer
+from cv_bridge import CvBridge
+import time
+import math
+import ros_numpy
+import os
+
+
+# ----- Helper Functions -----
+
+def cylindrical_roi(points, center, radius, height):
+    horizontal_dist = np.linalg.norm(points[:, :2] - center[:2], axis=1)
+    vertical_diff = np.abs(points[:, 2] - center[2])
+    mask = (horizontal_dist <= radius) & (vertical_diff <= height / 2)
+    return points[mask]
+
+
+def undistort_image(image, K, D):
+    h, w = image.shape[:2]
+    newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
+    undistorted = cv2.undistort(image, K, D, None, newK)
+    return undistorted, newK
+
+def filter_points_within_threshold(points, threshold=15.0):
+    distances = np.linalg.norm(points, axis=1)
+    mask = distances <= threshold
+    return points[mask]
+
+
+def match_existing_cone(
+        new_center: np.ndarray,
+        new_dims: tuple,
+        existing_agents: Dict[str, AgentState],
+        distance_threshold: float = 1.0
+) -> str:
+    """
+    Find the closest existing Cone agent within a specified distance threshold.
+    """
+    best_agent_id = None
+    best_dist = float('inf')
+    for agent_id, agent_state in existing_agents.items():
+        old_center = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z])
+        dist = np.linalg.norm(new_center - old_center)
+        if dist < distance_threshold and dist < best_dist:
+            best_dist = dist
+            best_agent_id = agent_id
+    return best_agent_id
+
+
+def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple:
+    """
+    Compute the (vx, vy, vz) velocity based on change in pose over time.
+    """
+    if dt <= 0:
+        return (0, 0, 0)
+    vx = (new_pose.x - old_pose.x) / dt
+    vy = (new_pose.y - old_pose.y) / dt
+    vz = (new_pose.z - old_pose.z) / dt
+    return (vx, vy, vz)
+
+
+def extract_roi_box(lidar_pc, center, half_extents):
+    """
+    Extract a region of interest (ROI) from the LiDAR point cloud defined by an axis-aligned bounding box.
+    """
+    lower = center - half_extents
+    upper = center + half_extents
+    mask = np.all((lidar_pc >= lower) & (lidar_pc <= upper), axis=1)
+    return lidar_pc[mask]
+
+
+def pc2_to_numpy(pc2_msg, want_rgb=False):
+    """
+    Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy.
+    This function extracts the x, y, z coordinates from the point cloud.
+    """
+    # Convert the ROS message to a numpy structured array
+    pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg)
+    # Stack x,y,z fields to a (N,3) array
+    pts = np.stack((np.array(pc['x']).ravel(),
+                    np.array(pc['y']).ravel(),
+                    np.array(pc['z']).ravel()), axis=1)
+    # Apply filtering (for example, x > 0 and z in a specified range)
+    mask = (pts[:, 0] > -0.5) & (pts[:, 2] < -1) & (pts[:, 2] > -2.7)
+    return pts[mask]
+
+
+def backproject_pixel(u, v, K):
+    """
+    Backprojects a pixel coordinate (u, v) into a normalized 3D ray in the camera coordinate system.
+    """
+    cx, cy = K[0, 2], K[1, 2]
+    fx, fy = K[0, 0], K[1, 1]
+    x = (u - cx) / fx
+    y = (v - cy) / fy
+    ray_dir = np.array([x, y, 1.0])
+    return ray_dir / np.linalg.norm(ray_dir)
+
+
+def find_human_center_on_ray(lidar_pc, ray_origin, ray_direction,
+                             t_min, t_max, t_step,
+                             distance_threshold, min_points, ransac_threshold):
+    """
+    Identify the center of a human along a projected ray.
+    (This function is no longer used in the new approach.)
+    """
+    return None, None, None
+
+
+def extract_roi(pc, center, roi_radius):
+    """
+    Extract points from a point cloud that lie within a specified radius of a center point.
+    """
+    distances = np.linalg.norm(pc - center, axis=1)
+    return pc[distances < roi_radius]
+
+
+def refine_cluster(roi_points, center, eps=0.2, min_samples=10):
+    """
+    Refine a point cluster by applying DBSCAN and return the cluster closest to 'center'.
+    """
+    if roi_points.shape[0] < min_samples:
+        return roi_points
+    clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(roi_points)
+    labels = clustering.labels_
+    valid_clusters = [roi_points[labels == l] for l in set(labels) if l != -1]
+    if not valid_clusters:
+        return roi_points
+    best_cluster = min(valid_clusters, key=lambda c: np.linalg.norm(np.mean(c, axis=0) - center))
+    return best_cluster
+
+
+def remove_ground_by_min_range(cluster, z_range=0.05):
+    """
+    Remove points within z_range of the minimum z (assumed to be ground).
+    """
+    if cluster is None or cluster.shape[0] == 0:
+        return cluster
+    min_z = np.min(cluster[:, 2])
+    filtered = cluster[cluster[:, 2] > (min_z + z_range)]
+    return filtered
+
+
+def get_bounding_box_center_and_dimensions(points):
+    """
+    Calculate the axis-aligned bounding box's center and dimensions for a set of 3D points.
+    """
+    if points.shape[0] == 0:
+        return None, None
+    min_vals = np.min(points, axis=0)
+    max_vals = np.max(points, axis=0)
+    center = (min_vals + max_vals) / 2
+    dimensions = max_vals - min_vals
+    return center, dimensions
+
+
+def create_ray_line_set(start, end):
+    """
+    Create an Open3D LineSet object representing a ray between two 3D points.
+    The line is colored yellow.
+    """
+    points = [start, end]
+    lines = [[0, 1]]
+    line_set = o3d.geometry.LineSet()
+    line_set.points = o3d.utility.Vector3dVector(points)
+    line_set.lines = o3d.utility.Vector2iVector(lines)
+    line_set.colors = o3d.utility.Vector3dVector([[1, 1, 0]])
+    return line_set
+
+
+def downsample_points(lidar_points, voxel_size=0.15):
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(lidar_points)
+    down_pcd = pcd.voxel_down_sample(voxel_size=voxel_size)
+    return np.asarray(down_pcd.points)
+
+
+def filter_depth_points(lidar_points, max_depth_diff=0.9, use_norm=True):
+    if lidar_points.shape[0] == 0:
+        return lidar_points
+
+    if use_norm:
+        depths = np.linalg.norm(lidar_points, axis=1)
+    else:
+        depths = lidar_points[:, 0]
+
+    min_depth = np.min(depths)
+    max_possible_depth = min_depth + max_depth_diff
+    mask = depths < max_possible_depth
+    return lidar_points[mask]
+
+
+def visualize_geometries(geometries, window_name="Open3D", width=800, height=600, point_size=5.0):
+    """
+    Visualize a list of Open3D geometry objects in a dedicated window.
+    """
+    vis = o3d.visualization.Visualizer()
+    vis.create_window(window_name=window_name, width=width, height=height)
+    for geom in geometries:
+        vis.add_geometry(geom)
+    opt = vis.get_render_option()
+    opt.point_size = point_size
+    vis.run()
+    vis.destroy_window()
+
+
+def pose_to_matrix(pose):
+    """
+    Compose a 4x4 transformation matrix from a pose state.
+    Assumes pose has attributes: x, y, z, yaw, pitch, roll,
+    where the angles are given in degrees.
+    """
+    x = pose.x if pose.x is not None else 0.0
+    y = pose.y if pose.y is not None else 0.0
+    z = pose.z if pose.z is not None else 0.0
+    if pose.yaw is not None and pose.pitch is not None and pose.roll is not None:
+        yaw = math.radians(pose.yaw)
+        pitch = math.radians(pose.pitch)
+        roll = math.radians(pose.roll)
+    else:
+        yaw = 0.0
+        pitch = 0.0
+        roll = 0.0
+    R_mat = R.from_euler('zyx', [yaw, pitch, roll]).as_matrix()
+    T = np.eye(4)
+    T[:3, :3] = R_mat
+    T[:3, 3] = np.array([x, y, z])
+    return T
+
+
+def transform_points_l2c(lidar_points, T_l2c):
+    N = lidar_points.shape[0]
+    pts_hom = np.hstack((lidar_points, np.ones((N, 1))))  # (N,4)
+    pts_cam = (T_l2c @ pts_hom.T).T  # (N,4)
+    return pts_cam[:, :3]
+
+
+# ----- New: Vectorized projection function -----
+def project_points(pts_cam, K, original_lidar_points):
+    """
+    Vectorized version.
+    pts_cam: (N,3) array of points in camera coordinates.
+    original_lidar_points: (N,3) array of points in LiDAR coordinates.
+    Returns a (M,5) array: [u, v, X_lidar, Y_lidar, Z_lidar] for all points with Z>0.
+    """
+    mask = pts_cam[:, 2] > 0
+    pts_cam_valid = pts_cam[mask]
+    lidar_valid = original_lidar_points[mask]
+    Xc = pts_cam_valid[:, 0]
+    Yc = pts_cam_valid[:, 1]
+    Zc = pts_cam_valid[:, 2]
+    u = (K[0, 0] * (Xc / Zc) + K[0, 2]).astype(np.int32)
+    v = (K[1, 1] * (Yc / Zc) + K[1, 2]).astype(np.int32)
+    proj = np.column_stack((u, v, lidar_valid))
+    return proj
+
+
+class ConeDetector3D(Component):
+    """
+    Detects cones by fusing YOLO 2D detections with LiDAR point cloud data.
+
+    Tracking is optional: set `enable_tracking=False` to disable persistent tracking
+    and return only detections from the current frame.
+    """
+
+    def __init__(self, vehicle_interface: GEMInterface):
+        self.vehicle_interface = vehicle_interface
+        self.enable_tracking = False
+        self.current_agents = {}
+        self.tracked_agents = {}
+        self.cone_counter = 0
+        self.latest_image = None
+        self.latest_lidar = None
+        self.bridge = CvBridge()
+        self.start_pose_abs = None
+        self.camera_front = True
+        self.visualize_2d = False
+        self.use_cyl_roi = False
+        self.start_time = None
+        self.use_start_frame = False
+        self.save_data = False
+        self.orientation = False
+
+    def rate(self) -> float:
+        return 4.0
+
+    def state_inputs(self) -> list:
+        return ['vehicle']
+
+    def state_outputs(self) -> list:
+        return ['agents']
+
+    def initialize(self):
+        self.rgb_sub = Subscriber('/oak/rgb/image_raw', Image)
+        self.lidar_sub = Subscriber('/ouster/points', PointCloud2)
+        self.sync = ApproximateTimeSynchronizer([self.rgb_sub, self.lidar_sub],
+                                                queue_size=10, slop=0.1)
+        self.sync.registerCallback(self.synchronized_callback)
+        self.detector = YOLO('/home/gem/s2025_perception_merge/GEMstack/GEMstack/knowledge/detection/cone.pt')
+        self.detector.to('cuda')
+
+        if self.camera_front:
+            self.K = np.array([[684.83331299, 0., 573.37109375],
+                               [0., 684.60968018, 363.70092773],
+                               [0., 0., 1.]])
+        else:
+            self.K = np.array([[1.17625545e+03, 0.00000000e+00, 9.66432645e+02],
+                               [0.00000000e+00, 1.17514569e+03, 6.08580326e+02],
+                               [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])
+
+        if self.camera_front:
+            self.D = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
+        else:
+            self.D = np.array([-2.70136325e-01, 1.64393255e-01, -1.60720782e-03, -7.41246708e-05,
+                               -6.19939758e-02])
+
+        self.T_l2v = np.array([[0.99939639, 0.02547917, 0.023615, 1.1],
+                               [-0.02530848, 0.99965156, -0.00749882, 0.03773583],
+                               [-0.02379784, 0.00689664, 0.999693, 1.95320223],
+                               [0., 0., 0., 1.]])
+        if self.camera_front:
+            self.T_l2c = np.array([
+                [0.001090, -0.999489, -0.031941, 0.149698],
+                [-0.007664, 0.031932, -0.999461, -0.397813],
+                [0.999970, 0.001334, -0.007625, -0.691405],
+                [0., 0., 0., 1.000000]
+            ])
+        else:
+            self.T_l2c = np.array([[-0.71836368, -0.69527204, -0.02346088, 0.05718003],
+                                   [-0.09720448, 0.13371206, -0.98624154, -0.1598301],
+                                   [0.68884317, -0.7061996, -0.16363744, -1.04767285],
+                                   [0., 0., 0., 1.]]
+                                  )
+        self.T_c2l = np.linalg.inv(self.T_l2c)
+        self.R_c2l = self.T_c2l[:3, :3]
+        self.camera_origin_in_lidar = self.T_c2l[:3, 3]
+
+    def synchronized_callback(self, image_msg, lidar_msg):
+        step1 = time.time()
+        try:
+            self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8")
+        except Exception as e:
+            rospy.logerr("Failed to convert image: {}".format(e))
+            self.latest_image = None
+        step2 = time.time()
+        self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False)
+        step3 = time.time()
+        print('image callback: ', step2 - step1, 'lidar callback ', step3 - step2)
+
+    def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
+        downsample = False
+        if self.latest_image is None or self.latest_lidar is None:
+            return {}
+
+        # Ensure data/ exists and build timestamp
+        os.makedirs("data", exist_ok=True)
+        current_time = self.vehicle_interface.time()
+        if self.start_time is None:
+            self.start_time = current_time
+        time_elapsed = current_time - self.start_time
+
+        if self.save_data:
+            tstamp = int(self.vehicle_interface.time() * 1000)
+            # 1) Dump raw image
+            cv2.imwrite(f"data/{tstamp}_image.png", self.latest_image)
+            # 2) Dump raw LiDAR
+            np.savez(f"data/{tstamp}_lidar.npz", lidar=self.latest_lidar)
+            # 3) Write BEFORE_TRANSFORM
+            with open(f"data/{tstamp}_vehstate.txt", "w") as f:
+                vp = vehicle.pose
+                f.write(
+                    f"BEFORE_TRANSFORM "
+                    f"x={vp.x:.3f}, y={vp.y:.3f}, z={vp.z:.3f}, "
+                    f"yaw={vp.yaw:.2f}, pitch={vp.pitch:.2f}, roll={vp.roll:.2f}\n"
+                )
+            # Compute vehicle_start_pose in either START or CURRENT
+            if self.use_start_frame:
+                if self.start_pose_abs is None:
+                    self.start_pose_abs = vehicle.pose
+                vehicle_start_pose = vehicle.pose.to_frame(
+                    ObjectFrameEnum.START,
+                    vehicle.pose,
+                    self.start_pose_abs
+                )
+                mode = "START"
+            else:
+                vehicle_start_pose = vehicle.pose
+                mode = "CURRENT"
+            with open(f"data/{tstamp}_vehstate.txt", "a") as f:
+                f.write(
+                    f"AFTER_TRANSFORM "
+                    f"x={vehicle_start_pose.x:.3f}, "
+                    f"y={vehicle_start_pose.y:.3f}, "
+                    f"z={vehicle_start_pose.z:.3f}, "
+                    f"yaw={vehicle_start_pose.yaw:.2f}, "
+                    f"pitch={vehicle_start_pose.pitch:.2f}, "
+                    f"roll={vehicle_start_pose.roll:.2f}, "
+                    f"frame={mode}\n"
+                )
+
+        undistorted_img, current_K = undistort_image(self.latest_image, self.K, self.D)
+        self.current_K = current_K
+        self.latest_image = undistorted_img
+        orig_H, orig_W = undistorted_img.shape[:2]
+
+        # --- Begin modifications for three-angle detection ---
+        img_normal = undistorted_img
+        results_normal = self.detector(img_normal, conf=0.3, classes=[0])
+        combined_boxes = []
+        if self.orientation:
+            img_left = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_COUNTERCLOCKWISE)
+            img_right = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_CLOCKWISE)
+            results_left = self.detector(img_left, conf=0.3, classes=[0])
+            results_right = self.detector(img_right, conf=0.3, classes=[0])
+            boxes_left = np.array(results_left[0].boxes.xywh.cpu()) if len(results_left) > 0 else []
+            boxes_right = np.array(results_right[0].boxes.xywh.cpu()) if len(results_right) > 0 else []
+            for box in boxes_left:
+                cx, cy, w, h = box
+                new_cx = cy
+                new_cy = orig_W - 1 - cx
+                combined_boxes.append((new_cx, new_cy, h, w, AgentActivityEnum.RIGHT))
+            for box in boxes_right:
+                cx, cy, w, h = box
+                new_cx = orig_H - 1 - cy
+                new_cy = cx
+                combined_boxes.append((new_cx, new_cy, h, w, AgentActivityEnum.LEFT))
+
+        boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else []
+        for box in boxes_normal:
+            cx, cy, w, h = box
+            combined_boxes.append((cx, cy, w, h, AgentActivityEnum.STANDING))
+
+        if getattr(self, 'visualize_2d', False):
+            for (cx, cy, w, h, activity) in combined_boxes:
+                left = int(cx - w / 2)
+                right = int(cx + w / 2)
+                top = int(cy - h / 2)
+                bottom = int(cy + h / 2)
+                if activity == AgentActivityEnum.STANDING:
+                    color = (255, 0, 0)
+                    label = "STANDING"
+                elif activity == AgentActivityEnum.RIGHT:
+                    color = (0, 255, 0)
+                    label = "RIGHT"
+                elif activity == AgentActivityEnum.LEFT:
+                    color = (0, 0, 255)
+                    label = "LEFT"
+                else:
+                    color = (255, 255, 255)
+                    label = "UNKNOWN"
+                cv2.rectangle(undistorted_img, (left, top), (right, bottom), color, 2)
+                cv2.putText(undistorted_img, label, (left, max(top - 5, 20)),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
+            cv2.imshow("Detection - Cone 2D", undistorted_img)
+
+        if downsample:
+            lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1)
+        else:
+            lidar_down = self.latest_lidar.copy()
+
+        pts_cam = transform_points_l2c(lidar_down, self.T_l2c)
+        projected_pts = project_points(pts_cam, self.current_K, lidar_down)
+
+        agents = {}
+
+        for i, box_info in enumerate(combined_boxes):
+            cx, cy, w, h, activity = box_info
+            start = time.time()
+            left = int(cx - w / 2)
+            right = int(cx + w / 2)
+            top = int(cy - h / 2)
+            bottom = int(cy + h / 2)
+            mask = (projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & \
+                   (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom)
+            roi_pts = projected_pts[mask]
+            if roi_pts.shape[0] < 5:
+                continue
+            points_3d = roi_pts[:, 2:5]
+            points_3d = filter_points_within_threshold(points_3d, 30)
+            points_3d = filter_depth_points(points_3d, max_depth_diff=0.3)
+
+            if self.use_cyl_roi:
+                global_filtered = filter_points_within_threshold(lidar_down, 30)
+                roi_cyl = cylindrical_roi(global_filtered, np.mean(points_3d, axis=0), radius=0.3, height=1.2)
+                refined_cluster = remove_ground_by_min_range(roi_cyl, z_range=0.01)
+                refined_cluster = filter_depth_points(refined_cluster, max_depth_diff=0.2)
+            else:
+                refined_cluster = remove_ground_by_min_range(points_3d, z_range=0.05)
+            end1 = time.time()
+            if refined_cluster.shape[0] < 4:
+                continue
+
+            pcd = o3d.geometry.PointCloud()
+            pcd.points = o3d.utility.Vector3dVector(refined_cluster)
+            obb = pcd.get_oriented_bounding_box()
+            refined_center = obb.center
+            dims = tuple(obb.extent)
+            R_lidar = obb.R.copy()
+            end2 = time.time()
+
+            refined_center_hom = np.append(refined_center, 1)
+            refined_center_vehicle_hom = self.T_l2v @ refined_center_hom
+            refined_center_vehicle = refined_center_vehicle_hom[:3]
+
+            R_vehicle = self.T_l2v[:3, :3] @ R_lidar
+            yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False)
+            refined_center = refined_center_vehicle
+
+            if self.use_start_frame:
+                if self.start_pose_abs is None:
+                    self.start_pose_abs = vehicle.pose
+                vehicle_start_pose = vehicle.pose.to_frame(
+                    ObjectFrameEnum.START,
+                    vehicle.pose,
+                    self.start_pose_abs
+                )
+                T_vehicle_to_start = pose_to_matrix(vehicle_start_pose)
+                xp, yp, zp = (T_vehicle_to_start @ np.append(refined_center, 1))[:3]
+                out_frame = ObjectFrameEnum.START
+            else:
+                xp, yp, zp = refined_center
+                out_frame = ObjectFrameEnum.CURRENT
+
+            new_pose = ObjectPose(
+                t=current_time,
+                x=xp, y=yp, z=zp,
+                yaw=yaw, pitch=pitch, roll=roll,
+                frame=out_frame
+            )
+
+            # --- Optional tracking ---
+            if self.enable_tracking:
+                existing_id = match_existing_cone(
+                    np.array([new_pose.x, new_pose.y, new_pose.z]),
+                    dims,
+                    self.tracked_agents,
+                    distance_threshold=2.0
+                )
+                if existing_id is not None:
+                    old_state = self.tracked_agents[existing_id]
+                    if vehicle.v < 0.1:
+                        alpha = 0.1
+                        avg_x = alpha * new_pose.x + (1 - alpha) * old_state.pose.x
+                        avg_y = alpha * new_pose.y + (1 - alpha) * old_state.pose.y
+                        avg_z = alpha * new_pose.z + (1 - alpha) * old_state.pose.z
+                        avg_yaw = alpha * new_pose.yaw + (1 - alpha) * old_state.pose.yaw
+                        avg_pitch = alpha * new_pose.pitch + (1 - alpha) * old_state.pose.pitch
+                        avg_roll = alpha * new_pose.roll + (1 - alpha) * old_state.pose.roll
+
+                        updated_pose = ObjectPose(
+                            t=new_pose.t,
+                            x=avg_x,
+                            y=avg_y,
+                            z=avg_z,
+                            yaw=avg_yaw,
+                            pitch=avg_pitch,
+                            roll=avg_roll,
+                            frame=new_pose.frame
+                        )
+                        updated_agent = AgentState(
+                            pose=updated_pose,
+                            dimensions=dims,
+                            outline=None,
+                            type=AgentEnum.CONE,
+                            activity=activity,
+                            velocity=(0, 0, 0),
+                            yaw_rate=0
+                        )
+                    else:
+                        updated_agent = old_state
+                    agents[existing_id] = updated_agent
+                    self.tracked_agents[existing_id] = updated_agent
+                else:
+                    agent_id = f"Cone{self.cone_counter}"
+                    self.cone_counter += 1
+                    new_agent = AgentState(
+                        pose=new_pose,
+                        dimensions=dims,
+                        outline=None,
+                        type=AgentEnum.CONE,
+                        activity=activity,
+                        velocity=(0, 0, 0),
+                        yaw_rate=0
+                    )
+                    agents[agent_id] = new_agent
+                    self.tracked_agents[agent_id] = new_agent
+            else:
+                agent_id = f"Cone{self.cone_counter}"
+                self.cone_counter += 1
+                new_agent = AgentState(
+                    pose=new_pose,
+                    dimensions=dims,
+                    outline=None,
+                    type=AgentEnum.CONE,
+                    activity=activity,
+                    velocity=(0, 0, 0),
+                    yaw_rate=0
+                )
+                agents[agent_id] = new_agent
+
+        self.current_agents = agents
+
+        # If tracking not enabled, return only current frame detections
+        if not self.enable_tracking:
+            return self.current_agents
+
+        stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items()
+                     if current_time - agent.pose.t > 5.0]
+        for agent_id in stale_ids:
+            rospy.loginfo(f"Removing stale agent: {agent_id}\n")
+            del self.tracked_agents[agent_id]
+
+        return self.tracked_agents
+
+    # ----- Fake Cone Detector 2D (for Testing Purposes) -----
+
+class FakConeDetector(Component):
+    def __init__(self, vehicle_interface: GEMInterface):
+        self.vehicle_interface = vehicle_interface
+        self.times = [(5.0, 20.0), (30.0, 35.0)]
+        self.t_start = None
+
+    def rate(self):
+        return 4.0
+
+    def state_inputs(self):
+        return ['vehicle']
+
+    def state_outputs(self):
+        return ['agents']
+
+    def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
+        if self.t_start is None:
+            self.t_start = self.vehicle_interface.time()
+        t = self.vehicle_interface.time() - self.t_start
+        res = {}
+        for time_range in self.times:
+            if t >= time_range[0] and t <= time_range[1]:
+                res['cone0'] = box_to_fake_agent((0, 0, 0, 0))
+                rospy.loginfo("Detected a Cone (simulated)")
+        return res
+
+def box_to_fake_agent(box):
+    x, y, w, h = box
+    pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT)
+    dims = (w, h, 0)
+    return AgentState(pose=pose, dimensions=dims, outline=None,
+                      type=AgentEnum.CONE, activity=AgentActivityEnum.MOVING,
+                      velocity=(0, 0, 0), yaw_rate=0)
+
+if __name__ == '__main__':
+    pass

From 0b84d288a422dade88ece3cc447a0fcbef059181 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Wed, 23 Apr 2025 10:40:41 -0500
Subject: [PATCH 02/28] Create cone_detection.yaml

---
 launch/cone_detection.yaml | 119 +++++++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)
 create mode 100644 launch/cone_detection.yaml

diff --git a/launch/cone_detection.yaml b/launch/cone_detection.yaml
new file mode 100644
index 000000000..165a5b9b8
--- /dev/null
+++ b/launch/cone_detection.yaml
@@ -0,0 +1,119 @@
+description: "Run the yielding trajectory planner on the real vehicle with real perception"
+mode: hardware
+vehicle_interface: gem_hardware.GEMHardwareInterface
+mission_execution: StandardExecutor
+
+# Recovery behavior after a component failure
+recovery: 
+    planning: 
+        trajectory_tracking : recovery.StopTrajectoryTracker
+
+# Driving behavior for the GEM vehicle.  Runs real pedestrian perception, yield planner, but does not send commands to real vehicle.
+drive: 
+    perception:
+        state_estimation : GNSSStateEstimator
+        agent_detection : cone_detection.ConeDetector3D
+        perception_normalization : StandardPerceptionNormalizer
+    planning:
+        relations_estimation: 
+            type: pedestrian_yield_logic.PedestrianYielder
+            args:
+                mode: 'real'
+                params: {
+                    'yielder':       'expert', # 'expert', 'analytic', or 'simulation'
+                    'planner':       'milestone', # 'milestone', 'dt', or 'dx'
+                    'desired_speed': 1.0,  # m/s, 1.5 m/s seems max speed? Feb24
+                    'acceleration':  0.75  # m/s2, 0.5 is not enough to start moving. Feb24
+                    }
+        route_planning:
+            type: StaticRoutePlanner
+            args: [!relative_path '../GEMstack/knowledge/routes/forward_15m.csv','start']
+        motion_planning: longitudinal_planning.YieldTrajectoryPlanner
+        trajectory_tracking:
+            type: pure_pursuit.PurePursuitTrajectoryTracker
+            print: False
+
+
+log:
+    # Specify the top-level folder to save the log files.  Default is 'logs'
+    #folder : 'logs' 
+    # If prefix is specified, then the log folder will be named with the prefix followed by the date and time. Default no prefix
+    #prefix : 'fixed_route_'  
+    # If suffix is specified, then logs will output to folder/prefix+suffix. Default uses date and time as the suffix
+    #suffix : 'test3'  
+    # Specify which ros topics to record to vehicle.bag. Default records nothing. This records the "standard" ROS topics.
+    ros_topics : 
+    # Specify options to pass to rosbag record. Default is no options.
+    #rosbag_options : '--split --size=1024' 
+    # If True, then record all readings / commands of the vehicle interface. Default False
+    vehicle_interface : True
+    # Specify which components to record to behavior.json. Default records nothing
+    components : ['state_estimation','agent_detection','motion_planning']
+    # Specify which components of state to record to state.json. Default records nothing 
+    #state: ['all']
+    # Specify the rate in Hz at which to record state to state.json. Default records at the pipeline's rate
+    #state_rate: 10
+    # If True, then make plots of the recorded data specifically from PurePursuitTrajectoryTracker_debug.csv and report metrics. Default False
+    #auto_plot : True
+replay:  # Add items here to set certain topics / inputs to be replayed from logs
+    # Specify which log folder to replay from
+    log: 
+    ros_topics : []
+    components : []
+
+#usually can keep this constant
+computation_graph: !include "../GEMstack/knowledge/defaults/computation_graph.yaml"
+
+variants:
+    detector_only:
+        run:
+            description: "Run the pedestrian detection code"
+        drive:
+            planning:
+                trajectory_tracking:
+    real_sim:
+        run:
+            description: "Run the pedestrian detection code with real detection and fake simulation"
+            mode: hardware
+            vehicle_interface:
+                type: gem_mixed.GEMRealSensorsWithSimMotionInterface
+                args:
+                    scene: !relative_path '../scenes/xyhead_demo.yaml'
+            mission_execution: StandardExecutor
+            require_engaged: False
+            visualization: !include "klampt_visualization.yaml"
+            drive: 
+                perception:
+                    state_estimation : OmniscientStateEstimator
+                planning:
+                    relations_estimation: 
+                        type: pedestrian_yield_logic.PedestrianYielder
+                        args:
+                            mode: 'sim'
+
+    fake_real:
+        run:
+            description: "Run the yielding trajectory planner on the real vehicle with faked perception"
+            drive:
+                perception:
+                    agent_detection : cone_detection.FakeConeDetector2D
+
+    fake_sim:
+        run:
+            description: "Run the yielding trajectory planner in simulation with faked perception"
+            mode: simulation
+            vehicle_interface:
+                type: gem_simulator.GEMDoubleIntegratorSimulationInterface
+                args:
+                    scene: !relative_path '../scenes/xyhead_demo.yaml'
+            visualization: !include "klampt_visualization.yaml"
+            drive: 
+                perception:
+                    # agent_detection : pedestrian_detection.FakePedestrianDetector2D
+                    agent_detection : OmniscientAgentDetector  #this option reads agents from the simulator
+                    state_estimation : OmniscientStateEstimator
+                planning:
+                    relations_estimation: 
+                        type: pedestrian_yield_logic.PedestrianYielder
+                        args:
+                            mode: 'sim'

From ec41cf7187d9f57939f5a1fe3ddb7c5049d7da36 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Wed, 23 Apr 2025 10:44:36 -0500
Subject: [PATCH 03/28] Update agent.py

---
 GEMstack/state/agent.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/GEMstack/state/agent.py b/GEMstack/state/agent.py
index 757805837..2ca23ba76 100644
--- a/GEMstack/state/agent.py
+++ b/GEMstack/state/agent.py
@@ -11,6 +11,7 @@ class AgentEnum(Enum):
     LARGE_TRUCK = 2
     PEDESTRIAN = 3
     BICYCLIST = 4
+    CONE = 5
 
 
 class AgentActivityEnum(Enum):
@@ -18,6 +19,9 @@ class AgentActivityEnum(Enum):
     MOVING = 1          # standard motion.  Predictions will be used here
     FAST = 2            # indicates faster than usual motion, e.g., runners.
     UNDETERMINED = 3    # unknown activity
+    STANDING = 4        # standing cone
+    LEFT = 5            # flipped cone facing left
+    RIGHT = 6           # flipped cone facing right
 
 
 @dataclass
@@ -41,4 +45,4 @@ def velocity_parent(self) -> Tuple[float,float,float]:
     def to_frame(self, frame : ObjectFrameEnum, current_pose = None, start_pose_abs = None) -> AgentState:
         newpose = self.pose.to_frame(frame,current_pose,start_pose_abs)
         newvelocity = convert_vector(self.velocity,self.pose.frame,frame,current_pose,start_pose_abs)
-        return replace(self,pose=newpose,velocity=newvelocity)
\ No newline at end of file
+        return replace(self,pose=newpose,velocity=newvelocity)

From fa9ef66753827d9ee1766e0f40a4bfe763f0fa86 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Wed, 30 Apr 2025 22:30:34 -0500
Subject: [PATCH 04/28] Create cameras.yaml

---
 GEMstack/knowledge/calibration/cameras.yaml | 26 +++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 GEMstack/knowledge/calibration/cameras.yaml

diff --git a/GEMstack/knowledge/calibration/cameras.yaml b/GEMstack/knowledge/calibration/cameras.yaml
new file mode 100644
index 000000000..e240957aa
--- /dev/null
+++ b/GEMstack/knowledge/calibration/cameras.yaml
@@ -0,0 +1,26 @@
+cameras:
+  front:
+    K:
+      - [684.83331299, 0.0,          573.37109375]
+      - [0.0,          684.60968018, 363.70092773]
+      - [0.0,          0.0,          1.0]
+    D: [0.0, 0.0, 0.0, 0.0, 0.0]
+    T_l2c:
+      - [ 0.0289748006, -0.999580136,  0.0000368439, -0.0307300513]
+      - [-0.0094993062, -0.0003122155, -0.999954834,  -0.386689354 ]
+      - [ 0.999534999,   0.0289731321, -0.0095043721, -0.671425124 ]
+      - [ 0.0,           0.0,           0.0,           1.0      ]
+
+  front_right:
+    K:
+      - [1176.25545,    0.0,          966.432645]
+      - [0.0,          1175.14569,    608.580326]
+      - [0.0,          0.0,          1.0       ]
+    D: [-0.270136325, 0.164393255, -0.00160720782, -0.0000741246708, -0.0619939758]
+    T_l2c:
+      - [-0.71836368, -0.69527204, -0.02346088,  0.05718003]
+      - [-0.09720448,  0.13371206, -0.98624154, -0.15983010]
+      - [ 0.68884317, -0.70619960, -0.16363744, -1.04767285]
+      - [ 0.0,          0.0,          0.0,          1.0      ]
+
+  # add other cameras (back_left, front_right, back_right) similarly...

From 0136e45152c5d74b4fb1a36ba1efe38bbd4b9b3f Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Wed, 30 Apr 2025 22:33:52 -0500
Subject: [PATCH 05/28] Update cone_detection.yaml

---
 launch/cone_detection.yaml | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/launch/cone_detection.yaml b/launch/cone_detection.yaml
index 165a5b9b8..f4d2e21e2 100644
--- a/launch/cone_detection.yaml
+++ b/launch/cone_detection.yaml
@@ -9,10 +9,23 @@ recovery:
         trajectory_tracking : recovery.StopTrajectoryTracker
 
 # Driving behavior for the GEM vehicle.  Runs real pedestrian perception, yield planner, but does not send commands to real vehicle.
-drive: 
+drive:
     perception:
         state_estimation : GNSSStateEstimator
-        agent_detection : cone_detection.ConeDetector3D
+        agent_detection :
+            type: cone_detection.ConeDetector3D
+            args:
+                camera_name: front_right #[front, front_right]
+                camera_calib_file: ./GEMstack/knowledge/calibration/cameras.yaml
+
+                # optional overrides
+                enable_tracking: False # True if you want ot enable tracking function
+                visualize_2d: False # True if you see 2D detection visualization
+                use_cyl_roi: False # True if you want to use a cylinder roi (no need to use if calibration is accurate)
+                save_data: False # True if you want to save the sensor input data
+                orientation: False # True if you need to detect flipped cones
+                use_start_frame: False # True if you need output in START frame, otherwise in CURRENT frame
+
         perception_normalization : StandardPerceptionNormalizer
     planning:
         relations_estimation: 

From 4e252c6a58661a9f6001d2869368653e4f6902e0 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Wed, 30 Apr 2025 22:34:31 -0500
Subject: [PATCH 06/28] Update cone_detection.py

---
 GEMstack/onboard/perception/cone_detection.py | 263 ++++++++++++------
 1 file changed, 171 insertions(+), 92 deletions(-)

diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py
index d41c527ae..81ddce240 100644
--- a/GEMstack/onboard/perception/cone_detection.py
+++ b/GEMstack/onboard/perception/cone_detection.py
@@ -18,6 +18,7 @@
 import math
 import ros_numpy
 import os
+import yaml
 
 
 # ----- Helper Functions -----
@@ -29,12 +30,6 @@ def cylindrical_roi(points, center, radius, height):
     return points[mask]
 
 
-def undistort_image(image, K, D):
-    h, w = image.shape[:2]
-    newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
-    undistorted = cv2.undistort(image, K, D, None, newK)
-    return undistorted, newK
-
 def filter_points_within_threshold(points, threshold=15.0):
     distances = np.linalg.norm(points, axis=1)
     mask = distances <= threshold
@@ -275,28 +270,79 @@ class ConeDetector3D(Component):
 
     Tracking is optional: set `enable_tracking=False` to disable persistent tracking
     and return only detections from the current frame.
+
+    Supports multiple cameras; each camera’s intrinsics and extrinsics are
+    loaded from a single YAML calibration file via plain PyYAML.
     """
 
-    def __init__(self, vehicle_interface: GEMInterface):
-        self.vehicle_interface = vehicle_interface
-        self.enable_tracking = False
-        self.current_agents = {}
-        self.tracked_agents = {}
-        self.cone_counter = 0
-        self.latest_image = None
-        self.latest_lidar = None
-        self.bridge = CvBridge()
-        self.start_pose_abs = None
-        self.camera_front = True
-        self.visualize_2d = False
-        self.use_cyl_roi = False
-        self.start_time = None
-        self.use_start_frame = False
-        self.save_data = False
-        self.orientation = False
+    def __init__(
+        self,
+        vehicle_interface: GEMInterface,
+        camera_name: str,
+        camera_calib_file: str,
+        enable_tracking: bool = True,
+        visualize_2d: bool = False,
+        use_cyl_roi: bool = False,
+        T_l2v: list = None,
+        save_data: bool = True,
+        orientation: bool = True,
+        use_start_frame: bool = True,
+        **kwargs
+    ):
+        # Core interfaces and state
+        self.vehicle_interface   = vehicle_interface
+        self.current_agents      = {}
+        self.tracked_agents      = {}
+        self.cone_counter        = 0
+        self.latest_image        = None
+        self.latest_lidar        = None
+        self.bridge              = CvBridge()
+        self.start_pose_abs      = None
+        self.start_time          = None
+
+        # Config flags
+        self.camera_name     = camera_name
+        self.enable_tracking = enable_tracking
+        self.visualize_2d    = visualize_2d
+        self.use_cyl_roi     = use_cyl_roi
+        self.save_data       = save_data
+        self.orientation     = orientation
+        self.use_start_frame = use_start_frame
+
+        # 1) Load lidar→vehicle transform
+        if T_l2v is not None:
+            self.T_l2v = np.array(T_l2v)
+        else:
+            self.T_l2v = np.array([
+                [0.99939639,  0.02547917,  0.023615,    1.1],
+                [-0.02530848, 0.99965156, -0.00749882,  0.03773583],
+                [-0.02379784, 0.00689664,  0.999693,     1.95320223],
+                [0.0,         0.0,         0.0,          1.0]
+            ])
+
+        # 2) Load camera intrinsics/extrinsics from the supplied YAML
+        with open(camera_calib_file, 'r') as f:
+            calib = yaml.safe_load(f)
+
+        # Expect structure:
+        # cameras:
+        #   front:
+        #     K:   [[...], [...], [...]]
+        #     D:   [...]
+        #     T_l2c: [[...], ..., [...]]
+        cam_cfg = calib['cameras'][camera_name]
+        self.K     = np.array(cam_cfg['K'])
+        self.D     = np.array(cam_cfg['D'])
+        self.T_l2c = np.array(cam_cfg['T_l2c'])
+
+        # Derived transforms
+
+        self.undistort_map1 = None
+        self.undistort_map2 = None
+        self.camera_front = (camera_name=='front')
 
     def rate(self) -> float:
-        return 4.0
+        return 8
 
     def state_inputs(self) -> list:
         return ['vehicle']
@@ -305,48 +351,30 @@ def state_outputs(self) -> list:
         return ['agents']
 
     def initialize(self):
-        self.rgb_sub = Subscriber('/oak/rgb/image_raw', Image)
+        # --- Determine the correct RGB topic for this camera ---
+        rgb_topic_map = {
+            'front': '/oak/rgb/image_raw',
+            'front_right': '/camera_fr/arena_camera_node/image_raw',
+            # add additional camera mappings here if needed
+        }
+        rgb_topic = rgb_topic_map.get(
+            self.camera_name,
+            f'/{self.camera_name}/rgb/image_raw'
+        )
+
+        # Subscribe to the RGB and LiDAR streams
+        self.rgb_sub = Subscriber(rgb_topic, Image)
         self.lidar_sub = Subscriber('/ouster/points', PointCloud2)
-        self.sync = ApproximateTimeSynchronizer([self.rgb_sub, self.lidar_sub],
-                                                queue_size=10, slop=0.1)
+        self.sync = ApproximateTimeSynchronizer([
+            self.rgb_sub, self.lidar_sub
+        ], queue_size=200, slop=0.1)
         self.sync.registerCallback(self.synchronized_callback)
-        self.detector = YOLO('/home/gem/s2025_perception_merge/GEMstack/GEMstack/knowledge/detection/cone.pt')
-        self.detector.to('cuda')
-
-        if self.camera_front:
-            self.K = np.array([[684.83331299, 0., 573.37109375],
-                               [0., 684.60968018, 363.70092773],
-                               [0., 0., 1.]])
-        else:
-            self.K = np.array([[1.17625545e+03, 0.00000000e+00, 9.66432645e+02],
-                               [0.00000000e+00, 1.17514569e+03, 6.08580326e+02],
-                               [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])
 
-        if self.camera_front:
-            self.D = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
-        else:
-            self.D = np.array([-2.70136325e-01, 1.64393255e-01, -1.60720782e-03, -7.41246708e-05,
-                               -6.19939758e-02])
-
-        self.T_l2v = np.array([[0.99939639, 0.02547917, 0.023615, 1.1],
-                               [-0.02530848, 0.99965156, -0.00749882, 0.03773583],
-                               [-0.02379784, 0.00689664, 0.999693, 1.95320223],
-                               [0., 0., 0., 1.]])
-        if self.camera_front:
-            self.T_l2c = np.array([
-                [0.001090, -0.999489, -0.031941, 0.149698],
-                [-0.007664, 0.031932, -0.999461, -0.397813],
-                [0.999970, 0.001334, -0.007625, -0.691405],
-                [0., 0., 0., 1.000000]
-            ])
-        else:
-            self.T_l2c = np.array([[-0.71836368, -0.69527204, -0.02346088, 0.05718003],
-                                   [-0.09720448, 0.13371206, -0.98624154, -0.1598301],
-                                   [0.68884317, -0.7061996, -0.16363744, -1.04767285],
-                                   [0., 0., 0., 1.]]
-                                  )
-        self.T_c2l = np.linalg.inv(self.T_l2c)
-        self.R_c2l = self.T_c2l[:3, :3]
+        # Initialize the YOLO detector
+        self.detector = YOLO('GEMstack/knowledge/detection/cone.pt')
+        self.detector.to('cuda')
+        self.T_c2l                = np.linalg.inv(self.T_l2c)
+        self.R_c2l                = self.T_c2l[:3, :3]
         self.camera_origin_in_lidar = self.T_c2l[:3, 3]
 
     def synchronized_callback(self, image_msg, lidar_msg):
@@ -359,24 +387,44 @@ def synchronized_callback(self, image_msg, lidar_msg):
         step2 = time.time()
         self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False)
         step3 = time.time()
-        print('image callback: ', step2 - step1, 'lidar callback ', step3 - step2)
+        # print('image callback: ', step2 - step1, 'lidar callback ', step3 - step2)
+
+    def undistort_image(self, image, K, D):
+        h, w = image.shape[:2]
+        newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
+        if self.undistort_map1 is None or self.undistort_map2 is None:
+            self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap(K, D, R=None,
+                                                                                   newCameraMatrix=newK, size=(w, h),
+                                                                                   m1type=cv2.CV_32FC1)
+
+        start = time.time()
+        undistorted = cv2.remap(image, self.undistort_map1, self.undistort_map2, interpolation=cv2.INTER_NEAREST)
+        end = time.time()
+        # print('--------undistort', end-start)
+        return undistorted, newK
 
     def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
+
+        start = time.time()
         downsample = False
         if self.latest_image is None or self.latest_lidar is None:
             return {}
-
+        lastest_image = self.latest_image.copy()
         # Ensure data/ exists and build timestamp
-        os.makedirs("data", exist_ok=True)
+        if downsample:
+            lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1)
+        else:
+            lidar_down = self.latest_lidar.copy()
         current_time = self.vehicle_interface.time()
         if self.start_time is None:
             self.start_time = current_time
         time_elapsed = current_time - self.start_time
 
         if self.save_data:
+            os.makedirs("data", exist_ok=True)
             tstamp = int(self.vehicle_interface.time() * 1000)
             # 1) Dump raw image
-            cv2.imwrite(f"data/{tstamp}_image.png", self.latest_image)
+            cv2.imwrite(f"data/{tstamp}_image.png", lastest_image)
             # 2) Dump raw LiDAR
             np.savez(f"data/{tstamp}_lidar.npz", lidar=self.latest_lidar)
             # 3) Write BEFORE_TRANSFORM
@@ -411,21 +459,30 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
                     f"roll={vehicle_start_pose.roll:.2f}, "
                     f"frame={mode}\n"
                 )
-
-        undistorted_img, current_K = undistort_image(self.latest_image, self.K, self.D)
-        self.current_K = current_K
-        self.latest_image = undistorted_img
-        orig_H, orig_W = undistorted_img.shape[:2]
-
-        # --- Begin modifications for three-angle detection ---
-        img_normal = undistorted_img
-        results_normal = self.detector(img_normal, conf=0.3, classes=[0])
+        if self.camera_front == False:
+            start = time.time()
+            undistorted_img, current_K = self.undistort_image(lastest_image, self.K, self.D)
+            end = time.time()
+            # print('-------processing time undistort_image---', end -start)
+            self.current_K = current_K
+            orig_H, orig_W = undistorted_img.shape[:2]
+
+            # --- Begin modifications for three-angle detection ---
+            img_normal = undistorted_img
+        else:
+            img_normal = lastest_image.copy()
+            undistorted_img = lastest_image.copy()
+            orig_H, orig_W = lastest_image.shape[:2]
+            self.current_K = self.K
+        results_normal = self.detector(img_normal, conf=0.25, classes=[0])
         combined_boxes = []
+        if not self.enable_tracking:
+            self.cone_counter = 0
         if self.orientation:
             img_left = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_COUNTERCLOCKWISE)
             img_right = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_CLOCKWISE)
-            results_left = self.detector(img_left, conf=0.3, classes=[0])
-            results_right = self.detector(img_right, conf=0.3, classes=[0])
+            results_left = self.detector(img_left, conf=0.05, classes=[0])
+            results_right = self.detector(img_right, conf=0.05, classes=[0])
             boxes_left = np.array(results_left[0].boxes.xywh.cpu()) if len(results_left) > 0 else []
             boxes_right = np.array(results_right[0].boxes.xywh.cpu()) if len(results_right) > 0 else []
             for box in boxes_left:
@@ -467,19 +524,16 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
                             cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
             cv2.imshow("Detection - Cone 2D", undistorted_img)
 
-        if downsample:
-            lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1)
-        else:
-            lidar_down = self.latest_lidar.copy()
-
+        start = time.time()
         pts_cam = transform_points_l2c(lidar_down, self.T_l2c)
         projected_pts = project_points(pts_cam, self.current_K, lidar_down)
+        end = time.time()
+        # print('-------processing time1---', end -start)
 
         agents = {}
 
         for i, box_info in enumerate(combined_boxes):
             cx, cy, w, h, activity = box_info
-            start = time.time()
             left = int(cx - w / 2)
             right = int(cx + w / 2)
             top = int(cy - h / 2)
@@ -489,18 +543,19 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
             roi_pts = projected_pts[mask]
             if roi_pts.shape[0] < 5:
                 continue
+
             points_3d = roi_pts[:, 2:5]
             points_3d = filter_points_within_threshold(points_3d, 30)
             points_3d = filter_depth_points(points_3d, max_depth_diff=0.3)
 
             if self.use_cyl_roi:
                 global_filtered = filter_points_within_threshold(lidar_down, 30)
-                roi_cyl = cylindrical_roi(global_filtered, np.mean(points_3d, axis=0), radius=0.3, height=1.2)
+                roi_cyl = cylindrical_roi(global_filtered, np.mean(points_3d, axis=0), radius=0.4, height=1.2)
                 refined_cluster = remove_ground_by_min_range(roi_cyl, z_range=0.01)
-                refined_cluster = filter_depth_points(refined_cluster, max_depth_diff=0.2)
+                refined_cluster = filter_depth_points(refined_cluster, max_depth_diff=0.3)
             else:
-                refined_cluster = remove_ground_by_min_range(points_3d, z_range=0.05)
-            end1 = time.time()
+                refined_cluster = points_3d.copy()
+            # end1 = time.time()
             if refined_cluster.shape[0] < 4:
                 continue
 
@@ -510,7 +565,6 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
             refined_center = obb.center
             dims = tuple(obb.extent)
             R_lidar = obb.R.copy()
-            end2 = time.time()
 
             refined_center_hom = np.append(refined_center, 1)
             refined_center_vehicle_hom = self.T_l2v @ refined_center_hom
@@ -552,7 +606,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
                 )
                 if existing_id is not None:
                     old_state = self.tracked_agents[existing_id]
-                    if vehicle.v < 0.1:
+                    if vehicle.v < 100:
                         alpha = 0.1
                         avg_x = alpha * new_pose.x + (1 - alpha) * old_state.pose.x
                         avg_y = alpha * new_pose.y + (1 - alpha) * old_state.pose.y
@@ -616,18 +670,41 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
 
         # If tracking not enabled, return only current frame detections
         if not self.enable_tracking:
+            for agent_id, agent in self.current_agents.items():
+                p = agent.pose
+                rospy.loginfo(
+                    f"Agent ID: {agent_id}\n"
+                    f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, "
+                    f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n"
+                    f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n"
+                    f"type:{agent.activity}"
+                )
+            end = time.time()
+            # print('-------processing time', end -start)
             return self.current_agents
 
         stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items()
-                     if current_time - agent.pose.t > 5.0]
+                     if current_time - agent.pose.t > 20.0]
         for agent_id in stale_ids:
             rospy.loginfo(f"Removing stale agent: {agent_id}\n")
             del self.tracked_agents[agent_id]
-
+        if self.enable_tracking:
+            for agent_id, agent in self.tracked_agents.items():
+                p = agent.pose
+                rospy.loginfo(
+                    f"Agent ID: {agent_id}\n"
+                    f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, "
+                    f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n"
+                    f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n"
+                    f"type:{agent.activity}"
+                )
+        end = time.time()
+        # print('-------processing time', end -start)
         return self.tracked_agents
 
     # ----- Fake Cone Detector 2D (for Testing Purposes) -----
 
+
 class FakConeDetector(Component):
     def __init__(self, vehicle_interface: GEMInterface):
         self.vehicle_interface = vehicle_interface
@@ -654,6 +731,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
                 rospy.loginfo("Detected a Cone (simulated)")
         return res
 
+
 def box_to_fake_agent(box):
     x, y, w, h = box
     pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT)
@@ -662,5 +740,6 @@ def box_to_fake_agent(box):
                       type=AgentEnum.CONE, activity=AgentActivityEnum.MOVING,
                       velocity=(0, 0, 0), yaw_rate=0)
 
+
 if __name__ == '__main__':
     pass

From ec0a5cf5925324a89f253134efbde46ea3cfef8a Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Wed, 7 May 2025 10:26:05 -0500
Subject: [PATCH 07/28] Update cone_detection.py

changed AgentState to ObstacleState, and also changed the corresponding yaml file
---
 GEMstack/onboard/perception/cone_detection.py | 483 +++++-------------
 1 file changed, 127 insertions(+), 356 deletions(-)

diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py
index 81ddce240..b9fbc1e27 100644
--- a/GEMstack/onboard/perception/cone_detection.py
+++ b/GEMstack/onboard/perception/cone_detection.py
@@ -1,269 +1,22 @@
-from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum
+from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, ObstacleState, ObstacleMaterialEnum, ObstacleStateEnum
 from ..interface.gem import GEMInterface
 from ..component import Component
+from .perception_utils import *
 from ultralytics import YOLO
 import cv2
 from typing import Dict
 import open3d as o3d
 import numpy as np
-from sklearn.cluster import DBSCAN
 from scipy.spatial.transform import Rotation as R
 import rospy
 from sensor_msgs.msg import PointCloud2, Image
-import sensor_msgs.point_cloud2 as pc2
-import struct, ctypes
 from message_filters import Subscriber, ApproximateTimeSynchronizer
 from cv_bridge import CvBridge
 import time
-import math
-import ros_numpy
 import os
 import yaml
 
 
-# ----- Helper Functions -----
-
-def cylindrical_roi(points, center, radius, height):
-    horizontal_dist = np.linalg.norm(points[:, :2] - center[:2], axis=1)
-    vertical_diff = np.abs(points[:, 2] - center[2])
-    mask = (horizontal_dist <= radius) & (vertical_diff <= height / 2)
-    return points[mask]
-
-
-def filter_points_within_threshold(points, threshold=15.0):
-    distances = np.linalg.norm(points, axis=1)
-    mask = distances <= threshold
-    return points[mask]
-
-
-def match_existing_cone(
-        new_center: np.ndarray,
-        new_dims: tuple,
-        existing_agents: Dict[str, AgentState],
-        distance_threshold: float = 1.0
-) -> str:
-    """
-    Find the closest existing Cone agent within a specified distance threshold.
-    """
-    best_agent_id = None
-    best_dist = float('inf')
-    for agent_id, agent_state in existing_agents.items():
-        old_center = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z])
-        dist = np.linalg.norm(new_center - old_center)
-        if dist < distance_threshold and dist < best_dist:
-            best_dist = dist
-            best_agent_id = agent_id
-    return best_agent_id
-
-
-def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple:
-    """
-    Compute the (vx, vy, vz) velocity based on change in pose over time.
-    """
-    if dt <= 0:
-        return (0, 0, 0)
-    vx = (new_pose.x - old_pose.x) / dt
-    vy = (new_pose.y - old_pose.y) / dt
-    vz = (new_pose.z - old_pose.z) / dt
-    return (vx, vy, vz)
-
-
-def extract_roi_box(lidar_pc, center, half_extents):
-    """
-    Extract a region of interest (ROI) from the LiDAR point cloud defined by an axis-aligned bounding box.
-    """
-    lower = center - half_extents
-    upper = center + half_extents
-    mask = np.all((lidar_pc >= lower) & (lidar_pc <= upper), axis=1)
-    return lidar_pc[mask]
-
-
-def pc2_to_numpy(pc2_msg, want_rgb=False):
-    """
-    Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy.
-    This function extracts the x, y, z coordinates from the point cloud.
-    """
-    # Convert the ROS message to a numpy structured array
-    pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg)
-    # Stack x,y,z fields to a (N,3) array
-    pts = np.stack((np.array(pc['x']).ravel(),
-                    np.array(pc['y']).ravel(),
-                    np.array(pc['z']).ravel()), axis=1)
-    # Apply filtering (for example, x > 0 and z in a specified range)
-    mask = (pts[:, 0] > -0.5) & (pts[:, 2] < -1) & (pts[:, 2] > -2.7)
-    return pts[mask]
-
-
-def backproject_pixel(u, v, K):
-    """
-    Backprojects a pixel coordinate (u, v) into a normalized 3D ray in the camera coordinate system.
-    """
-    cx, cy = K[0, 2], K[1, 2]
-    fx, fy = K[0, 0], K[1, 1]
-    x = (u - cx) / fx
-    y = (v - cy) / fy
-    ray_dir = np.array([x, y, 1.0])
-    return ray_dir / np.linalg.norm(ray_dir)
-
-
-def find_human_center_on_ray(lidar_pc, ray_origin, ray_direction,
-                             t_min, t_max, t_step,
-                             distance_threshold, min_points, ransac_threshold):
-    """
-    Identify the center of a human along a projected ray.
-    (This function is no longer used in the new approach.)
-    """
-    return None, None, None
-
-
-def extract_roi(pc, center, roi_radius):
-    """
-    Extract points from a point cloud that lie within a specified radius of a center point.
-    """
-    distances = np.linalg.norm(pc - center, axis=1)
-    return pc[distances < roi_radius]
-
-
-def refine_cluster(roi_points, center, eps=0.2, min_samples=10):
-    """
-    Refine a point cluster by applying DBSCAN and return the cluster closest to 'center'.
-    """
-    if roi_points.shape[0] < min_samples:
-        return roi_points
-    clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(roi_points)
-    labels = clustering.labels_
-    valid_clusters = [roi_points[labels == l] for l in set(labels) if l != -1]
-    if not valid_clusters:
-        return roi_points
-    best_cluster = min(valid_clusters, key=lambda c: np.linalg.norm(np.mean(c, axis=0) - center))
-    return best_cluster
-
-
-def remove_ground_by_min_range(cluster, z_range=0.05):
-    """
-    Remove points within z_range of the minimum z (assumed to be ground).
-    """
-    if cluster is None or cluster.shape[0] == 0:
-        return cluster
-    min_z = np.min(cluster[:, 2])
-    filtered = cluster[cluster[:, 2] > (min_z + z_range)]
-    return filtered
-
-
-def get_bounding_box_center_and_dimensions(points):
-    """
-    Calculate the axis-aligned bounding box's center and dimensions for a set of 3D points.
-    """
-    if points.shape[0] == 0:
-        return None, None
-    min_vals = np.min(points, axis=0)
-    max_vals = np.max(points, axis=0)
-    center = (min_vals + max_vals) / 2
-    dimensions = max_vals - min_vals
-    return center, dimensions
-
-
-def create_ray_line_set(start, end):
-    """
-    Create an Open3D LineSet object representing a ray between two 3D points.
-    The line is colored yellow.
-    """
-    points = [start, end]
-    lines = [[0, 1]]
-    line_set = o3d.geometry.LineSet()
-    line_set.points = o3d.utility.Vector3dVector(points)
-    line_set.lines = o3d.utility.Vector2iVector(lines)
-    line_set.colors = o3d.utility.Vector3dVector([[1, 1, 0]])
-    return line_set
-
-
-def downsample_points(lidar_points, voxel_size=0.15):
-    pcd = o3d.geometry.PointCloud()
-    pcd.points = o3d.utility.Vector3dVector(lidar_points)
-    down_pcd = pcd.voxel_down_sample(voxel_size=voxel_size)
-    return np.asarray(down_pcd.points)
-
-
-def filter_depth_points(lidar_points, max_depth_diff=0.9, use_norm=True):
-    if lidar_points.shape[0] == 0:
-        return lidar_points
-
-    if use_norm:
-        depths = np.linalg.norm(lidar_points, axis=1)
-    else:
-        depths = lidar_points[:, 0]
-
-    min_depth = np.min(depths)
-    max_possible_depth = min_depth + max_depth_diff
-    mask = depths < max_possible_depth
-    return lidar_points[mask]
-
-
-def visualize_geometries(geometries, window_name="Open3D", width=800, height=600, point_size=5.0):
-    """
-    Visualize a list of Open3D geometry objects in a dedicated window.
-    """
-    vis = o3d.visualization.Visualizer()
-    vis.create_window(window_name=window_name, width=width, height=height)
-    for geom in geometries:
-        vis.add_geometry(geom)
-    opt = vis.get_render_option()
-    opt.point_size = point_size
-    vis.run()
-    vis.destroy_window()
-
-
-def pose_to_matrix(pose):
-    """
-    Compose a 4x4 transformation matrix from a pose state.
-    Assumes pose has attributes: x, y, z, yaw, pitch, roll,
-    where the angles are given in degrees.
-    """
-    x = pose.x if pose.x is not None else 0.0
-    y = pose.y if pose.y is not None else 0.0
-    z = pose.z if pose.z is not None else 0.0
-    if pose.yaw is not None and pose.pitch is not None and pose.roll is not None:
-        yaw = math.radians(pose.yaw)
-        pitch = math.radians(pose.pitch)
-        roll = math.radians(pose.roll)
-    else:
-        yaw = 0.0
-        pitch = 0.0
-        roll = 0.0
-    R_mat = R.from_euler('zyx', [yaw, pitch, roll]).as_matrix()
-    T = np.eye(4)
-    T[:3, :3] = R_mat
-    T[:3, 3] = np.array([x, y, z])
-    return T
-
-
-def transform_points_l2c(lidar_points, T_l2c):
-    N = lidar_points.shape[0]
-    pts_hom = np.hstack((lidar_points, np.ones((N, 1))))  # (N,4)
-    pts_cam = (T_l2c @ pts_hom.T).T  # (N,4)
-    return pts_cam[:, :3]
-
-
-# ----- New: Vectorized projection function -----
-def project_points(pts_cam, K, original_lidar_points):
-    """
-    Vectorized version.
-    pts_cam: (N,3) array of points in camera coordinates.
-    original_lidar_points: (N,3) array of points in LiDAR coordinates.
-    Returns a (M,5) array: [u, v, X_lidar, Y_lidar, Z_lidar] for all points with Z>0.
-    """
-    mask = pts_cam[:, 2] > 0
-    pts_cam_valid = pts_cam[mask]
-    lidar_valid = original_lidar_points[mask]
-    Xc = pts_cam_valid[:, 0]
-    Yc = pts_cam_valid[:, 1]
-    Zc = pts_cam_valid[:, 2]
-    u = (K[0, 0] * (Xc / Zc) + K[0, 2]).astype(np.int32)
-    v = (K[1, 1] * (Yc / Zc) + K[1, 2]).astype(np.int32)
-    proj = np.column_stack((u, v, lidar_valid))
-    return proj
-
-
 class ConeDetector3D(Component):
     """
     Detects cones by fusing YOLO 2D detections with LiDAR point cloud data.
@@ -291,8 +44,8 @@ def __init__(
     ):
         # Core interfaces and state
         self.vehicle_interface   = vehicle_interface
-        self.current_agents      = {}
-        self.tracked_agents      = {}
+        self.current_obstacles      = {}
+        self.tracked_obstacles      = {}
         self.cone_counter        = 0
         self.latest_image        = None
         self.latest_lidar        = None
@@ -348,7 +101,7 @@ def state_inputs(self) -> list:
         return ['vehicle']
 
     def state_outputs(self) -> list:
-        return ['agents']
+        return ['obstacles']
 
     def initialize(self):
         # --- Determine the correct RGB topic for this camera ---
@@ -367,7 +120,7 @@ def initialize(self):
         self.lidar_sub = Subscriber('/ouster/points', PointCloud2)
         self.sync = ApproximateTimeSynchronizer([
             self.rgb_sub, self.lidar_sub
-        ], queue_size=200, slop=0.1)
+        ], queue_size=500, slop=0.05)
         self.sync.registerCallback(self.synchronized_callback)
 
         # Initialize the YOLO detector
@@ -403,65 +156,33 @@ def undistort_image(self, image, K, D):
         # print('--------undistort', end-start)
         return undistorted, newK
 
-    def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
-
-        start = time.time()
+    def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
         downsample = False
+        # Gate guards against data not being present for both sensors:
         if self.latest_image is None or self.latest_lidar is None:
             return {}
-        lastest_image = self.latest_image.copy()
-        # Ensure data/ exists and build timestamp
+        latest_image = self.latest_image.copy()
+        
+        # Set up current time variables
+        start = time.time()
+        current_time = self.vehicle_interface.time()
+
         if downsample:
             lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1)
         else:
             lidar_down = self.latest_lidar.copy()
-        current_time = self.vehicle_interface.time()
+
         if self.start_time is None:
             self.start_time = current_time
         time_elapsed = current_time - self.start_time
 
+        # Ensure data/ exists and build timestamp
         if self.save_data:
-            os.makedirs("data", exist_ok=True)
-            tstamp = int(self.vehicle_interface.time() * 1000)
-            # 1) Dump raw image
-            cv2.imwrite(f"data/{tstamp}_image.png", lastest_image)
-            # 2) Dump raw LiDAR
-            np.savez(f"data/{tstamp}_lidar.npz", lidar=self.latest_lidar)
-            # 3) Write BEFORE_TRANSFORM
-            with open(f"data/{tstamp}_vehstate.txt", "w") as f:
-                vp = vehicle.pose
-                f.write(
-                    f"BEFORE_TRANSFORM "
-                    f"x={vp.x:.3f}, y={vp.y:.3f}, z={vp.z:.3f}, "
-                    f"yaw={vp.yaw:.2f}, pitch={vp.pitch:.2f}, roll={vp.roll:.2f}\n"
-                )
-            # Compute vehicle_start_pose in either START or CURRENT
-            if self.use_start_frame:
-                if self.start_pose_abs is None:
-                    self.start_pose_abs = vehicle.pose
-                vehicle_start_pose = vehicle.pose.to_frame(
-                    ObjectFrameEnum.START,
-                    vehicle.pose,
-                    self.start_pose_abs
-                )
-                mode = "START"
-            else:
-                vehicle_start_pose = vehicle.pose
-                mode = "CURRENT"
-            with open(f"data/{tstamp}_vehstate.txt", "a") as f:
-                f.write(
-                    f"AFTER_TRANSFORM "
-                    f"x={vehicle_start_pose.x:.3f}, "
-                    f"y={vehicle_start_pose.y:.3f}, "
-                    f"z={vehicle_start_pose.z:.3f}, "
-                    f"yaw={vehicle_start_pose.yaw:.2f}, "
-                    f"pitch={vehicle_start_pose.pitch:.2f}, "
-                    f"roll={vehicle_start_pose.roll:.2f}, "
-                    f"frame={mode}\n"
-                )
+            self.save_sensor_data(vehicle=vehicle, latest_image=latest_image)
+        
         if self.camera_front == False:
             start = time.time()
-            undistorted_img, current_K = self.undistort_image(lastest_image, self.K, self.D)
+            undistorted_img, current_K = self.undistort_image(latest_image, self.K, self.D)
             end = time.time()
             # print('-------processing time undistort_image---', end -start)
             self.current_K = current_K
@@ -470,50 +191,52 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
             # --- Begin modifications for three-angle detection ---
             img_normal = undistorted_img
         else:
-            img_normal = lastest_image.copy()
-            undistorted_img = lastest_image.copy()
-            orig_H, orig_W = lastest_image.shape[:2]
+            img_normal = latest_image.copy()
+            undistorted_img = latest_image.copy()
+            orig_H, orig_W = latest_image.shape[:2]
             self.current_K = self.K
-        results_normal = self.detector(img_normal, conf=0.25, classes=[0])
+        results_normal = self.detector(img_normal, conf=0.35, classes=[0])
         combined_boxes = []
         if not self.enable_tracking:
             self.cone_counter = 0
         if self.orientation:
             img_left = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_COUNTERCLOCKWISE)
             img_right = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_CLOCKWISE)
-            results_left = self.detector(img_left, conf=0.05, classes=[0])
-            results_right = self.detector(img_right, conf=0.05, classes=[0])
+            results_left = self.detector(img_left, conf=0.15, classes=[0])
+            results_right = self.detector(img_right, conf=0.15, classes=[0])
             boxes_left = np.array(results_left[0].boxes.xywh.cpu()) if len(results_left) > 0 else []
             boxes_right = np.array(results_right[0].boxes.xywh.cpu()) if len(results_right) > 0 else []
             for box in boxes_left:
                 cx, cy, w, h = box
                 new_cx = cy
                 new_cy = orig_W - 1 - cx
-                combined_boxes.append((new_cx, new_cy, h, w, AgentActivityEnum.RIGHT))
+                combined_boxes.append((new_cx, new_cy, h, w, ObstacleStateEnum.RIGHT))
             for box in boxes_right:
                 cx, cy, w, h = box
                 new_cx = orig_H - 1 - cy
                 new_cy = cx
-                combined_boxes.append((new_cx, new_cy, h, w, AgentActivityEnum.LEFT))
+                combined_boxes.append((new_cx, new_cy, h, w, ObstacleStateEnum.LEFT))
 
         boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else []
         for box in boxes_normal:
             cx, cy, w, h = box
-            combined_boxes.append((cx, cy, w, h, AgentActivityEnum.STANDING))
+            combined_boxes.append((cx, cy, w, h, ObstacleStateEnum.STANDING))
 
+        # Visualize the received images in 2D with their corresponding labels
+        # It draws rectangles and labels on the images:
         if getattr(self, 'visualize_2d', False):
             for (cx, cy, w, h, activity) in combined_boxes:
                 left = int(cx - w / 2)
                 right = int(cx + w / 2)
                 top = int(cy - h / 2)
                 bottom = int(cy + h / 2)
-                if activity == AgentActivityEnum.STANDING:
+                if activity == ObstacleStateEnum.STANDING:
                     color = (255, 0, 0)
                     label = "STANDING"
-                elif activity == AgentActivityEnum.RIGHT:
+                elif activity == ObstacleStateEnum.RIGHT:
                     color = (0, 255, 0)
                     label = "RIGHT"
-                elif activity == AgentActivityEnum.LEFT:
+                elif activity == ObstacleStateEnum.LEFT:
                     color = (0, 0, 255)
                     label = "LEFT"
                 else:
@@ -525,17 +248,24 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
             cv2.imshow("Detection - Cone 2D", undistorted_img)
 
         start = time.time()
+        # Transform the lidar points from lidar frame of reference to camera EXTRINSIC frame of reference.
+        # Then project the pixels onto the lidar points to "paint them" (essentially determine which points are associated with detected objects)
         pts_cam = transform_points_l2c(lidar_down, self.T_l2c)
         projected_pts = project_points(pts_cam, self.current_K, lidar_down)
+        # What is returned:
+        # projected_pts[:, 0]: u-coordinate in the image (horizontal pixel position)
+        # projected_pts[:, 1]: v-coordinate in the image (vertical pixel position)
+        # projected_pts[:, 2:5]: original X, Y, Z coordinates in the LiDAR frame
+
         end = time.time()
         # print('-------processing time1---', end -start)
 
-        agents = {}
+        obstacles = {}
 
         for i, box_info in enumerate(combined_boxes):
             cx, cy, w, h, activity = box_info
-            left = int(cx - w / 2)
-            right = int(cx + w / 2)
+            left = int(cx - w / 1.6)
+            right = int(cx + w / 1.6)
             top = int(cy - h / 2)
             bottom = int(cy + h / 2)
             mask = (projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & \
@@ -545,8 +275,9 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
                 continue
 
             points_3d = roi_pts[:, 2:5]
-            points_3d = filter_points_within_threshold(points_3d, 30)
-            points_3d = filter_depth_points(points_3d, max_depth_diff=0.3)
+            points_3d = filter_points_within_threshold(points_3d, 40)
+            points_3d = remove_ground_by_min_range(points_3d, z_range=0.08)
+            points_3d = filter_depth_points(points_3d, max_depth_diff=0.5)
 
             if self.use_cyl_roi:
                 global_filtered = filter_points_within_threshold(lidar_down, 30)
@@ -601,11 +332,11 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
                 existing_id = match_existing_cone(
                     np.array([new_pose.x, new_pose.y, new_pose.z]),
                     dims,
-                    self.tracked_agents,
-                    distance_threshold=2.0
+                    self.tracked_obstacles,
+                    distance_threshold=2
                 )
                 if existing_id is not None:
-                    old_state = self.tracked_agents[existing_id]
+                    old_state = self.tracked_obstacles[existing_id]
                     if vehicle.v < 100:
                         alpha = 0.1
                         avg_x = alpha * new_pose.x + (1 - alpha) * old_state.pose.x
@@ -625,82 +356,122 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
                             roll=avg_roll,
                             frame=new_pose.frame
                         )
-                        updated_agent = AgentState(
+                        updated_obstacle = ObstacleState(
                             pose=updated_pose,
                             dimensions=dims,
                             outline=None,
-                            type=AgentEnum.CONE,
+                            type=ObstacleMaterialEnum.TRAFFIC_CONE,
                             activity=activity,
                             velocity=(0, 0, 0),
                             yaw_rate=0
                         )
                     else:
-                        updated_agent = old_state
-                    agents[existing_id] = updated_agent
-                    self.tracked_agents[existing_id] = updated_agent
+                        updated_obstacle = old_state
+                    obstacles[existing_id] = updated_obstacle
+                    self.tracked_obstacles[existing_id] = updated_obstacle
                 else:
-                    agent_id = f"Cone{self.cone_counter}"
+                    obstacle_id = f"Cone{self.cone_counter}"
                     self.cone_counter += 1
-                    new_agent = AgentState(
+                    new_obstacle = ObstacleState(
                         pose=new_pose,
                         dimensions=dims,
                         outline=None,
-                        type=AgentEnum.CONE,
+                        type=ObstacleMaterialEnum.TRAFFIC_CONE,
                         activity=activity,
                         velocity=(0, 0, 0),
                         yaw_rate=0
                     )
-                    agents[agent_id] = new_agent
-                    self.tracked_agents[agent_id] = new_agent
+                    obstacles[obstacle_id] = new_obstacle
+                    self.tracked_obstacles[obstacle_id] = new_obstacle
             else:
-                agent_id = f"Cone{self.cone_counter}"
+                obstacle_id = f"Cone{self.cone_counter}"
                 self.cone_counter += 1
-                new_agent = AgentState(
+                new_obstacle = ObstacleState(
                     pose=new_pose,
                     dimensions=dims,
                     outline=None,
-                    type=AgentEnum.CONE,
+                    type=ObstacleMaterialEnum.TRAFFIC_CONE,
                     activity=activity,
                     velocity=(0, 0, 0),
                     yaw_rate=0
                 )
-                agents[agent_id] = new_agent
+                obstacles[obstacle_id] = new_obstacle
 
-        self.current_agents = agents
+        self.current_obstacles = obstacles
 
         # If tracking not enabled, return only current frame detections
         if not self.enable_tracking:
-            for agent_id, agent in self.current_agents.items():
-                p = agent.pose
+            for obstacle_id, obstacle in self.current_obstacles.items():
+                p = obstacle.pose
                 rospy.loginfo(
-                    f"Agent ID: {agent_id}\n"
+                    f"Agent ID: {obstacle_id}\n"
                     f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, "
                     f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n"
-                    f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n"
-                    f"type:{agent.activity}"
+                    f"Velocity: (vx: {obstacle.velocity[0]:.3f}, vy: {obstacle.velocity[1]:.3f}, vz: {obstacle.velocity[2]:.3f})\n"
+                    f"type:{obstacle.activity}"
                 )
             end = time.time()
             # print('-------processing time', end -start)
-            return self.current_agents
+            return self.current_obstacles
 
-        stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items()
-                     if current_time - agent.pose.t > 20.0]
-        for agent_id in stale_ids:
-            rospy.loginfo(f"Removing stale agent: {agent_id}\n")
-            del self.tracked_agents[agent_id]
+        stale_ids = [obstacle_id for obstacle_id, obstacle in self.tracked_obstacles.items()
+                     if current_time - obstacle.pose.t > 20.0]
+        for obstacle_id in stale_ids:
+            rospy.loginfo(f"Removing stale obstacle: {obstacle_id}\n")
+            del self.tracked_obstacles[obstacle_id]
         if self.enable_tracking:
-            for agent_id, agent in self.tracked_agents.items():
-                p = agent.pose
+            for obstacle_id, obstacle in self.tracked_obstacles.items():
+                p = obstacle.pose
                 rospy.loginfo(
-                    f"Agent ID: {agent_id}\n"
+                    f"Agent ID: {obstacle_id}\n"
                     f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, "
                     f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n"
-                    f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n"
-                    f"type:{agent.activity}"
+                    f"Velocity: (vx: {obstacle.velocity[0]:.3f}, vy: {obstacle.velocity[1]:.3f}, vz: {obstacle.velocity[2]:.3f})\n"
+                    f"type:{obstacle.activity}"
                 )
         end = time.time()
         # print('-------processing time', end -start)
-        return self.tracked_agents
+        return self.tracked_obstacles
+
+    def save_sensor_data(self, vehicle: VehicleState, latest_image) -> None:
+        os.makedirs("data", exist_ok=True)
+        tstamp = int(self.vehicle_interface.time() * 1000)
+        # 1) Dump raw image
+        cv2.imwrite(f"data/{tstamp}_image.png", latest_image)
+        # 2) Dump raw LiDAR
+        np.savez(f"data/{tstamp}_lidar.npz", lidar=self.latest_lidar)
+        # 3) Write BEFORE_TRANSFORM
+        with open(f"data/{tstamp}_vehstate.txt", "w") as f:
+            vp = vehicle.pose
+            f.write(
+                f"BEFORE_TRANSFORM "
+                f"x={vp.x:.3f}, y={vp.y:.3f}, z={vp.z:.3f}, "
+                f"yaw={vp.yaw:.2f}, pitch={vp.pitch:.2f}, roll={vp.roll:.2f}\n"
+            )
+        # Compute vehicle_start_pose in either START or CURRENT
+        if self.use_start_frame:
+            if self.start_pose_abs is None:
+                self.start_pose_abs = vehicle.pose
+            vehicle_start_pose = vehicle.pose.to_frame(
+                ObjectFrameEnum.START,
+                vehicle.pose,
+                self.start_pose_abs
+            )
+            mode = "START"
+        else:
+            vehicle_start_pose = vehicle.pose
+            mode = "CURRENT"
+        with open(f"data/{tstamp}_vehstate.txt", "a") as f:
+            f.write(
+                f"AFTER_TRANSFORM "
+                f"x={vehicle_start_pose.x:.3f}, "
+                f"y={vehicle_start_pose.y:.3f}, "
+                f"z={vehicle_start_pose.z:.3f}, "
+                f"yaw={vehicle_start_pose.yaw:.2f}, "
+                f"pitch={vehicle_start_pose.pitch:.2f}, "
+                f"roll={vehicle_start_pose.roll:.2f}, "
+                f"frame={mode}\n"
+            )
 
     # ----- Fake Cone Detector 2D (for Testing Purposes) -----
 
@@ -718,26 +489,26 @@ def state_inputs(self):
         return ['vehicle']
 
     def state_outputs(self):
-        return ['agents']
+        return ['obstacles']
 
-    def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
+    def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
         if self.t_start is None:
             self.t_start = self.vehicle_interface.time()
         t = self.vehicle_interface.time() - self.t_start
         res = {}
         for time_range in self.times:
             if t >= time_range[0] and t <= time_range[1]:
-                res['cone0'] = box_to_fake_agent((0, 0, 0, 0))
+                res['cone0'] = box_to_fake_obstacle((0, 0, 0, 0))
                 rospy.loginfo("Detected a Cone (simulated)")
         return res
 
 
-def box_to_fake_agent(box):
+def box_to_fake_obstacle(box):
     x, y, w, h = box
     pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT)
     dims = (w, h, 0)
-    return AgentState(pose=pose, dimensions=dims, outline=None,
-                      type=AgentEnum.CONE, activity=AgentActivityEnum.MOVING,
+    return ObstacleState(pose=pose, dimensions=dims, outline=None,
+                      type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=ObstacleStateEnum.STANDING,
                       velocity=(0, 0, 0), yaw_rate=0)
 
 
From 4dd6e5c0cfc724f8a34ead5730f2c10de09b3ade Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Wed, 7 May 2025 10:26:40 -0500
Subject: [PATCH 08/28] Update agent.py

---
 GEMstack/state/agent.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/GEMstack/state/agent.py b/GEMstack/state/agent.py
index 2ca23ba76..65c2bfeb0 100644
--- a/GEMstack/state/agent.py
+++ b/GEMstack/state/agent.py
@@ -11,18 +11,13 @@ class AgentEnum(Enum):
     LARGE_TRUCK = 2
     PEDESTRIAN = 3
     BICYCLIST = 4
-    CONE = 5
-
 
 class AgentActivityEnum(Enum):
     STOPPED = 0         # standing pedestrians, parked cars, etc. No need to predict motion.
     MOVING = 1          # standard motion.  Predictions will be used here
     FAST = 2            # indicates faster than usual motion, e.g., runners.
     UNDETERMINED = 3    # unknown activity
-    STANDING = 4        # standing cone
-    LEFT = 5            # flipped cone facing left
-    RIGHT = 6           # flipped cone facing right
-
+    STANDING = 4        # standing 
 
 @dataclass
 @register

From 45168fd63d99fbdfabffda9287bc0b7bd2081c01 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Wed, 7 May 2025 10:27:44 -0500
Subject: [PATCH 09/28] Update obstacle.py

---
 GEMstack/state/obstacle.py | 39 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/GEMstack/state/obstacle.py b/GEMstack/state/obstacle.py
index 8fddd5cc0..61389819b 100644
--- a/GEMstack/state/obstacle.py
+++ b/GEMstack/state/obstacle.py
@@ -1,7 +1,9 @@
-from dataclasses import dataclass
+from __future__ import annotations
+from dataclasses import dataclass, replace
 from ..utils.serialization import register
-from .physical_object import PhysicalObject
+from .physical_object import ObjectFrameEnum,ObjectPose,PhysicalObject,convert_vector
 from enum import Enum
+from typing import Tuple
 
 class ObstacleMaterialEnum(Enum):
     UNKNOWN = 0
@@ -16,6 +18,16 @@ class ObstacleMaterialEnum(Enum):
     SMALL_ANIMAL = 9
     ROADKILL = 10
 
+class ObstacleStateEnum(Enum):
+    STOPPED = 0         # standing pedestrians, parked cars, etc. No need to predict motion.
+    MOVING = 1          # standard motion.  Predictions will be used here
+    FAST = 2            # indicates faster than usual motion
+    UNDETERMINED = 3    # unknown activity
+    STANDING = 4        # standing cone
+    LEFT = 5            # flipped cone facing left
+    RIGHT = 6           # flipped cone facing right
+
+
 
 @dataclass
 @register
@@ -23,3 +35,26 @@ class Obstacle(PhysicalObject):
     material : ObstacleMaterialEnum
     collidable : bool
 
+
+@dataclass
+@register
+class ObstacleState(PhysicalObject):
+    type: ObstacleMaterialEnum
+    activity: ObstacleStateEnum
+    velocity: Tuple[float, float, float]  # estimated velocity in x,y,z, m/s and in agent's local frame
+    yaw_rate: float  # estimated yaw rate, in radians/s
+
+    def velocity_local(self) -> Tuple[float, float, float]:
+        """Returns velocity in m/s in the agent's local frame."""
+        return self.velocity
+
+    def velocity_parent(self) -> Tuple[float, float, float]:
+        """Returns velocity in m/s in the agent pose's parent frame.
+        I.e., if the pose frame is CURRENT, then will return the velocity in
+        the CURRENT frame."""
+        return self.pose.rotation().dot(self.velocity).tolist()
+
+    def to_frame(self, frame: ObjectFrameEnum, current_pose=None, start_pose_abs=None) -> ObstacleState:
+        newpose = self.pose.to_frame(frame, current_pose, start_pose_abs)
+        newvelocity = convert_vector(self.velocity, self.pose.frame, frame, current_pose, start_pose_abs)
+        return replace(self, pose=newpose, velocity=newvelocity)

From 6782b7c271340b003e008ffe542e30339fab0705 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Wed, 7 May 2025 10:28:19 -0500
Subject: [PATCH 10/28] Update cone_detection.yaml

---
 launch/cone_detection.yaml | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/launch/cone_detection.yaml b/launch/cone_detection.yaml
index f4d2e21e2..cec5420da 100644
--- a/launch/cone_detection.yaml
+++ b/launch/cone_detection.yaml
@@ -12,19 +12,23 @@ recovery:
 drive:
     perception:
         state_estimation : GNSSStateEstimator
-        agent_detection :
+        obstacle_detection :
             type: cone_detection.ConeDetector3D
             args:
-                camera_name: front_right #[front, front_right]
+                camera_name: front #[front, front_right]
                 camera_calib_file: ./GEMstack/knowledge/calibration/cameras.yaml
 
-                # optional overrides
-                enable_tracking: False # True if you want ot enable tracking function
-                visualize_2d: False # True if you see 2D detection visualization
-                use_cyl_roi: False # True if you want to use a cylinder roi (no need to use if calibration is accurate)
-                save_data: False # True if you want to save the sensor input data
-                orientation: False # True if you need to detect flipped cones
-                use_start_frame: False # True if you need output in START frame, otherwise in CURRENT frame
+                T_l2v:
+                  - [0.99939639,  0.02547917,  0.023615,    1.1]
+                  - [-0.02530848, 0.99965156, -0.00749882,  0.03773583]
+                  - [-0.02379784, 0.00689664,  0.999693,     1.95320223]
+                  - [0.0,         0.0,         0.0,          1.0]
+                enable_tracking:  True   # True if you want to enable tracking
+                visualize_2d:     False  # True to see 2D detection visualization
+                use_cyl_roi:      False  # True to use a cylinder ROI
+                save_data:        False  # True to save sensor input data
+                orientation:      True   # True to detect flipped cones
+                use_start_frame:  True   # True to output in START frame
 
         perception_normalization : StandardPerceptionNormalizer
     planning:
@@ -61,7 +65,7 @@ log:
     # If True, then record all readings / commands of the vehicle interface. Default False
     vehicle_interface : True
     # Specify which components to record to behavior.json. Default records nothing
-    components : ['state_estimation','agent_detection','motion_planning']
+    components : ['state_estimation','obstacle_detection','motion_planning']
     # Specify which components of state to record to state.json. Default records nothing 
     #state: ['all']
     # Specify the rate in Hz at which to record state to state.json. Default records at the pipeline's rate
@@ -109,7 +113,7 @@ variants:
             description: "Run the yielding trajectory planner on the real vehicle with faked perception"
             drive:
                 perception:
-                    agent_detection : cone_detection.FakeConeDetector2D
+                    obstacle_detection : cone_detection.FakeConeDetector2D
 
     fake_sim:
         run:

From c7270573fd9993d37d477ecf62dbd504b004be5a Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Wed, 7 May 2025 23:57:08 -0500
Subject: [PATCH 11/28] Update cone_detection.py

---
 GEMstack/onboard/perception/cone_detection.py | 84 ++++++++-----------
 1 file changed, 37 insertions(+), 47 deletions(-)

diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py
index b9fbc1e27..7af1b558f 100644
--- a/GEMstack/onboard/perception/cone_detection.py
+++ b/GEMstack/onboard/perception/cone_detection.py
@@ -1,4 +1,5 @@
-from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, ObstacleState, ObstacleMaterialEnum, ObstacleStateEnum
+from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, ObstacleState, ObstacleMaterialEnum, \
+    ObstacleStateEnum
 from ..interface.gem import GEMInterface
 from ..component import Component
 from .perception_utils import *
@@ -29,50 +30,38 @@ class ConeDetector3D(Component):
     """
 
     def __init__(
-        self,
-        vehicle_interface: GEMInterface,
-        camera_name: str,
-        camera_calib_file: str,
-        enable_tracking: bool = True,
-        visualize_2d: bool = False,
-        use_cyl_roi: bool = False,
-        T_l2v: list = None,
-        save_data: bool = True,
-        orientation: bool = True,
-        use_start_frame: bool = True,
-        **kwargs
+            self,
+            vehicle_interface: GEMInterface,
+            camera_name: str,
+            camera_calib_file: str,
+            enable_tracking: bool = True,
+            visualize_2d: bool = False,
+            use_cyl_roi: bool = False,
+            save_data: bool = True,
+            orientation: bool = True,
+            use_start_frame: bool = True,
+            **kwargs
     ):
         # Core interfaces and state
-        self.vehicle_interface   = vehicle_interface
-        self.current_obstacles      = {}
-        self.tracked_obstacles      = {}
-        self.cone_counter        = 0
-        self.latest_image        = None
-        self.latest_lidar        = None
-        self.bridge              = CvBridge()
-        self.start_pose_abs      = None
-        self.start_time          = None
+        self.vehicle_interface = vehicle_interface
+        self.current_obstacles = {}
+        self.tracked_obstacles = {}
+        self.cone_counter = 0
+        self.latest_image = None
+        self.latest_lidar = None
+        self.bridge = CvBridge()
+        self.start_pose_abs = None
+        self.start_time = None
 
         # Config flags
-        self.camera_name     = camera_name
+        self.camera_name = camera_name
         self.enable_tracking = enable_tracking
-        self.visualize_2d    = visualize_2d
-        self.use_cyl_roi     = use_cyl_roi
-        self.save_data       = save_data
-        self.orientation     = orientation
+        self.visualize_2d = visualize_2d
+        self.use_cyl_roi = use_cyl_roi
+        self.save_data = save_data
+        self.orientation = orientation
         self.use_start_frame = use_start_frame
 
-        # 1) Load lidar→vehicle transform
-        if T_l2v is not None:
-            self.T_l2v = np.array(T_l2v)
-        else:
-            self.T_l2v = np.array([
-                [0.99939639,  0.02547917,  0.023615,    1.1],
-                [-0.02530848, 0.99965156, -0.00749882,  0.03773583],
-                [-0.02379784, 0.00689664,  0.999693,     1.95320223],
-                [0.0,         0.0,         0.0,          1.0]
-            ])
-
         # 2) Load camera intrinsics/extrinsics from the supplied YAML
         with open(camera_calib_file, 'r') as f:
             calib = yaml.safe_load(f)
@@ -84,15 +73,16 @@ def __init__(
         #     D:   [...]
         #     T_l2c: [[...], ..., [...]]
         cam_cfg = calib['cameras'][camera_name]
-        self.K     = np.array(cam_cfg['K'])
-        self.D     = np.array(cam_cfg['D'])
+        self.K = np.array(cam_cfg['K'])
+        self.D = np.array(cam_cfg['D'])
         self.T_l2c = np.array(cam_cfg['T_l2c'])
+        self.T_l2v = np.array(cam_cfg['T_l2v'])
 
         # Derived transforms
 
         self.undistort_map1 = None
         self.undistort_map2 = None
-        self.camera_front = (camera_name=='front')
+        self.camera_front = (camera_name == 'front')
 
     def rate(self) -> float:
         return 8
@@ -126,8 +116,8 @@ def initialize(self):
         # Initialize the YOLO detector
         self.detector = YOLO('GEMstack/knowledge/detection/cone.pt')
         self.detector.to('cuda')
-        self.T_c2l                = np.linalg.inv(self.T_l2c)
-        self.R_c2l                = self.T_c2l[:3, :3]
+        self.T_c2l = np.linalg.inv(self.T_l2c)
+        self.R_c2l = self.T_c2l[:3, :3]
         self.camera_origin_in_lidar = self.T_c2l[:3, 3]
 
     def synchronized_callback(self, image_msg, lidar_msg):
@@ -162,7 +152,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
         if self.latest_image is None or self.latest_lidar is None:
             return {}
         latest_image = self.latest_image.copy()
-        
+
         # Set up current time variables
         start = time.time()
         current_time = self.vehicle_interface.time()
@@ -179,7 +169,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
         # Ensure data/ exists and build timestamp
         if self.save_data:
             self.save_sensor_data(vehicle=vehicle, latest_image=latest_image)
-        
+
         if self.camera_front == False:
             start = time.time()
             undistorted_img, current_K = self.undistort_image(latest_image, self.K, self.D)
@@ -508,8 +498,8 @@ def box_to_fake_obstacle(box):
     pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT)
     dims = (w, h, 0)
     return ObstacleState(pose=pose, dimensions=dims, outline=None,
-                      type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=ObstacleStateEnum.STANDING,
-                      velocity=(0, 0, 0), yaw_rate=0)
+                         type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=ObstacleStateEnum.STANDING,
+                         velocity=(0, 0, 0), yaw_rate=0)
 
 
 if __name__ == '__main__':

From 696fc4e7f489273cb13d24fd6abce3a35da5dc72 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Wed, 7 May 2025 23:57:36 -0500
Subject: [PATCH 12/28] Update cone_detection.yaml

moved the l2v to cameras.yaml
---
 launch/cone_detection.yaml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/launch/cone_detection.yaml b/launch/cone_detection.yaml
index cec5420da..ec0881a74 100644
--- a/launch/cone_detection.yaml
+++ b/launch/cone_detection.yaml
@@ -17,12 +17,6 @@ drive:
             args:
                 camera_name: front #[front, front_right]
                 camera_calib_file: ./GEMstack/knowledge/calibration/cameras.yaml
-
-                T_l2v:
-                  - [0.99939639,  0.02547917,  0.023615,    1.1]
-                  - [-0.02530848, 0.99965156, -0.00749882,  0.03773583]
-                  - [-0.02379784, 0.00689664,  0.999693,     1.95320223]
-                  - [0.0,         0.0,         0.0,          1.0]
                 enable_tracking:  True   # True if you want to enable tracking
                 visualize_2d:     False  # True to see 2D detection visualization
                 use_cyl_roi:      False  # True to use a cylinder ROI

From d40dc354e5b399c3209b4c9f4a58f838645319a5 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Wed, 7 May 2025 23:58:22 -0500
Subject: [PATCH 13/28] Update cameras.yaml

moved l2v here
---
 GEMstack/knowledge/calibration/cameras.yaml | 55 ++++++++++++---------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/GEMstack/knowledge/calibration/cameras.yaml b/GEMstack/knowledge/calibration/cameras.yaml
index e240957aa..4872db65d 100644
--- a/GEMstack/knowledge/calibration/cameras.yaml
+++ b/GEMstack/knowledge/calibration/cameras.yaml
@@ -1,26 +1,35 @@
 cameras:
-  front:
-    K:
-      - [684.83331299, 0.0,          573.37109375]
-      - [0.0,          684.60968018, 363.70092773]
-      - [0.0,          0.0,          1.0]
-    D: [0.0, 0.0, 0.0, 0.0, 0.0]
-    T_l2c:
-      - [ 0.0289748006, -0.999580136,  0.0000368439, -0.0307300513]
-      - [-0.0094993062, -0.0003122155, -0.999954834,  -0.386689354 ]
-      - [ 0.999534999,   0.0289731321, -0.0095043721, -0.671425124 ]
-      - [ 0.0,           0.0,           0.0,           1.0      ]
+   front:
+     K:
+       - [684.83331299, 0.0,          573.37109375]
+       - [0.0,          684.60968018, 363.70092773]
+       - [0.0,          0.0,          1.0]
+     D: [0.0, 0.0, 0.0, 0.0, 0.0]
+     T_l2c:
+       - [ 0.0289748006, -0.999580136,  0.0000368439, -0.0307300513]
+       - [-0.0094993062, -0.0003122155, -0.999954834,  -0.386689354 ]
+       - [ 0.999534999,   0.0289731321, -0.0095043721, -0.671425124 ]
+       - [ 0.0,           0.0,           0.0,           1.0      ]
+     T_l2v:
+       - [ 0.99939639, 0.02547917, 0.023615, 1.1 ]
+       - [ -0.02530848, 0.99965156, -0.00749882, 0.03773583 ]
+       - [ -0.02379784, 0.00689664, 0.999693, 1.95320223 ]
+       - [ 0.0,          0.0,          0.0,          1.0 ]
 
-  front_right:
-    K:
-      - [1176.25545,    0.0,          966.432645]
-      - [0.0,          1175.14569,    608.580326]
-      - [0.0,          0.0,          1.0       ]
-    D: [-0.270136325, 0.164393255, -0.00160720782, -0.0000741246708, -0.0619939758]
-    T_l2c:
-      - [-0.71836368, -0.69527204, -0.02346088,  0.05718003]
-      - [-0.09720448,  0.13371206, -0.98624154, -0.15983010]
-      - [ 0.68884317, -0.70619960, -0.16363744, -1.04767285]
-      - [ 0.0,          0.0,          0.0,          1.0      ]
+   front_right:
+     K:
+       - [1176.25545,    0.0,          966.432645]
+       - [0.0,          1175.14569,    608.580326]
+       - [0.0,          0.0,          1.0       ]
+     D: [-0.270136325, 0.164393255, -0.00160720782, -0.0000741246708, -0.0619939758]
+     T_l2c:
+       - [-0.71836368, -0.69527204, -0.02346088,  0.05718003]
+       - [-0.09720448,  0.13371206, -0.98624154, -0.15983010]
+       - [ 0.68884317, -0.70619960, -0.16363744, -1.04767285]
+       - [ 0.0,          0.0,          0.0,          1.0      ]
+     T_l2v:
+       - [0.99939639, 0.02547917, 0.023615, 1.1]
+       - [-0.02530848, 0.99965156, -0.00749882, 0.03773583]
+       - [-0.02379784, 0.00689664, 0.999693, 1.95320223]
+       - [0.0,          0.0,          0.0,          1.0 ]
 
-  # add other cameras (back_left, front_right, back_right) similarly...

From f84c1788c49a3404c036cf54c1abdacfa90a0745 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Thu, 8 May 2025 00:02:05 -0500
Subject: [PATCH 14/28] Update agent.py

no longer changing this file
---
 GEMstack/state/agent.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GEMstack/state/agent.py b/GEMstack/state/agent.py
index 65c2bfeb0..e295fbba8 100644
--- a/GEMstack/state/agent.py
+++ b/GEMstack/state/agent.py
@@ -12,12 +12,13 @@ class AgentEnum(Enum):
     PEDESTRIAN = 3
     BICYCLIST = 4
 
+
 class AgentActivityEnum(Enum):
     STOPPED = 0         # standing pedestrians, parked cars, etc. No need to predict motion.
     MOVING = 1          # standard motion.  Predictions will be used here
     FAST = 2            # indicates faster than usual motion, e.g., runners.
     UNDETERMINED = 3    # unknown activity
-    STANDING = 4        # standing 
+
 
 @dataclass
 @register

From d2c6d894d46b91dddcb163c968a1f105a3678ca6 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Fri, 9 May 2025 15:57:30 -0500
Subject: [PATCH 15/28] Update agent.py


From b3713cd1ceabf997b35c66c3208485152b398d0b Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Fri, 9 May 2025 15:59:15 -0500
Subject: [PATCH 16/28] Update obstacle.py

---
 GEMstack/state/obstacle.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/GEMstack/state/obstacle.py b/GEMstack/state/obstacle.py
index 61389819b..05f98b498 100644
--- a/GEMstack/state/obstacle.py
+++ b/GEMstack/state/obstacle.py
@@ -41,18 +41,6 @@ class Obstacle(PhysicalObject):
 class ObstacleState(PhysicalObject):
     type: ObstacleMaterialEnum
     activity: ObstacleStateEnum
-    velocity: Tuple[float, float, float]  # estimated velocity in x,y,z, m/s and in agent's local frame
-    yaw_rate: float  # estimated yaw rate, in radians/s
-
-    def velocity_local(self) -> Tuple[float, float, float]:
-        """Returns velocity in m/s in the agent's local frame."""
-        return self.velocity
-
-    def velocity_parent(self) -> Tuple[float, float, float]:
-        """Returns velocity in m/s in the agent pose's parent frame.
-        I.e., if the pose frame is CURRENT, then will return the velocity in
-        the CURRENT frame."""
-        return self.pose.rotation().dot(self.velocity).tolist()
 
     def to_frame(self, frame: ObjectFrameEnum, current_pose=None, start_pose_abs=None) -> ObstacleState:
         newpose = self.pose.to_frame(frame, current_pose, start_pose_abs)

From bd957e90acd5b5c2c8799c79c6dbbe5fea7a18fb Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Fri, 9 May 2025 16:22:06 -0500
Subject: [PATCH 17/28] Update agent.py


From 9be95fbacfbdccf71262fb823d3f948dd956e2d3 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Fri, 9 May 2025 16:23:17 -0500
Subject: [PATCH 18/28] Update agent.py


From 367e66a87a81f307b11f819bcd62a85052f8be9e Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Fri, 9 May 2025 16:38:06 -0500
Subject: [PATCH 19/28] Update cone_detection.py

---
 GEMstack/onboard/perception/cone_detection.py | 42 ++++++++++---------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py
index 7af1b558f..80757241a 100644
--- a/GEMstack/onboard/perception/cone_detection.py
+++ b/GEMstack/onboard/perception/cone_detection.py
@@ -1,3 +1,5 @@
+from open3d.examples.geometry.point_cloud_transformation import transform
+
 from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, ObstacleState, ObstacleMaterialEnum, \
     ObstacleStateEnum
 from ..interface.gem import GEMInterface
@@ -110,7 +112,7 @@ def initialize(self):
         self.lidar_sub = Subscriber('/ouster/points', PointCloud2)
         self.sync = ApproximateTimeSynchronizer([
             self.rgb_sub, self.lidar_sub
-        ], queue_size=500, slop=0.05)
+        ], queue_size=500, slop=0.03)
         self.sync.registerCallback(self.synchronized_callback)
 
         # Initialize the YOLO detector
@@ -185,6 +187,8 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
             undistorted_img = latest_image.copy()
             orig_H, orig_W = latest_image.shape[:2]
             self.current_K = self.K
+            # print(self.K)
+            # print(self.T_l2c)
         results_normal = self.detector(img_normal, conf=0.35, classes=[0])
         combined_boxes = []
         if not self.enable_tracking:
@@ -198,14 +202,18 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
             boxes_right = np.array(results_right[0].boxes.xywh.cpu()) if len(results_right) > 0 else []
             for box in boxes_left:
                 cx, cy, w, h = box
-                new_cx = cy
-                new_cy = orig_W - 1 - cx
-                combined_boxes.append((new_cx, new_cy, h, w, ObstacleStateEnum.RIGHT))
+                new_cx = orig_W - 1 - cy
+                new_cy = cx
+                new_w = h  # Swap width and height.
+                new_h = w
+                combined_boxes.append((new_cx, new_cy, new_w, new_h, ObstacleStateEnum.RIGHT))
             for box in boxes_right:
                 cx, cy, w, h = box
-                new_cx = orig_H - 1 - cy
-                new_cy = cx
-                combined_boxes.append((new_cx, new_cy, h, w, ObstacleStateEnum.LEFT))
+                new_cx = cy
+                new_cy = orig_H - 1 - cx
+                new_w = h  # Swap width and height.
+                new_h = w
+                combined_boxes.append((new_cx, new_cy, new_w, new_h, ObstacleStateEnum.LEFT))
 
         boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else []
         for box in boxes_normal:
@@ -254,6 +262,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
 
         for i, box_info in enumerate(combined_boxes):
             cx, cy, w, h, activity = box_info
+            # print(cx, cy, w, h)
             left = int(cx - w / 1.6)
             right = int(cx + w / 1.6)
             top = int(cy - h / 2)
@@ -261,10 +270,12 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
             mask = (projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & \
                    (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom)
             roi_pts = projected_pts[mask]
+            # print(roi_pts)
             if roi_pts.shape[0] < 5:
                 continue
 
             points_3d = roi_pts[:, 2:5]
+
             points_3d = filter_points_within_threshold(points_3d, 40)
             points_3d = remove_ground_by_min_range(points_3d, z_range=0.08)
             points_3d = filter_depth_points(points_3d, max_depth_diff=0.5)
@@ -303,7 +314,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
                     vehicle.pose,
                     self.start_pose_abs
                 )
-                T_vehicle_to_start = pose_to_matrix(vehicle_start_pose)
+                T_vehicle_to_start = vehicle_start_pose.transform()
                 xp, yp, zp = (T_vehicle_to_start @ np.append(refined_center, 1))[:3]
                 out_frame = ObjectFrameEnum.START
             else:
@@ -352,8 +363,6 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
                             outline=None,
                             type=ObstacleMaterialEnum.TRAFFIC_CONE,
                             activity=activity,
-                            velocity=(0, 0, 0),
-                            yaw_rate=0
                         )
                     else:
                         updated_obstacle = old_state
@@ -368,8 +377,6 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
                         outline=None,
                         type=ObstacleMaterialEnum.TRAFFIC_CONE,
                         activity=activity,
-                        velocity=(0, 0, 0),
-                        yaw_rate=0
                     )
                     obstacles[obstacle_id] = new_obstacle
                     self.tracked_obstacles[obstacle_id] = new_obstacle
@@ -382,8 +389,6 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
                     outline=None,
                     type=ObstacleMaterialEnum.TRAFFIC_CONE,
                     activity=activity,
-                    velocity=(0, 0, 0),
-                    yaw_rate=0
                 )
                 obstacles[obstacle_id] = new_obstacle
 
@@ -394,10 +399,9 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
             for obstacle_id, obstacle in self.current_obstacles.items():
                 p = obstacle.pose
                 rospy.loginfo(
-                    f"Agent ID: {obstacle_id}\n"
+                    f"Cone ID: {obstacle_id}\n"
                     f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, "
                     f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n"
-                    f"Velocity: (vx: {obstacle.velocity[0]:.3f}, vy: {obstacle.velocity[1]:.3f}, vz: {obstacle.velocity[2]:.3f})\n"
                     f"type:{obstacle.activity}"
                 )
             end = time.time()
@@ -413,10 +417,9 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
             for obstacle_id, obstacle in self.tracked_obstacles.items():
                 p = obstacle.pose
                 rospy.loginfo(
-                    f"Agent ID: {obstacle_id}\n"
+                    f"Cone ID: {obstacle_id}\n"
                     f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, "
                     f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n"
-                    f"Velocity: (vx: {obstacle.velocity[0]:.3f}, vy: {obstacle.velocity[1]:.3f}, vz: {obstacle.velocity[2]:.3f})\n"
                     f"type:{obstacle.activity}"
                 )
         end = time.time()
@@ -498,8 +501,7 @@ def box_to_fake_obstacle(box):
     pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT)
     dims = (w, h, 0)
     return ObstacleState(pose=pose, dimensions=dims, outline=None,
-                         type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=ObstacleStateEnum.STANDING,
-                         velocity=(0, 0, 0), yaw_rate=0)
+                         type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=ObstacleStateEnum.STANDING)
 
 
 if __name__ == '__main__':

From 7b7aeaa9f77129df9188424a3384fb6b6916aa66 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Fri, 9 May 2025 16:39:25 -0500
Subject: [PATCH 20/28] Create perception_utils.py

---
 .../onboard/perception/perception_utils.py    | 226 ++++++++++++++++++
 1 file changed, 226 insertions(+)
 create mode 100644 GEMstack/onboard/perception/perception_utils.py

diff --git a/GEMstack/onboard/perception/perception_utils.py b/GEMstack/onboard/perception/perception_utils.py
new file mode 100644
index 000000000..a95172d2f
--- /dev/null
+++ b/GEMstack/onboard/perception/perception_utils.py
@@ -0,0 +1,226 @@
+from ...state import ObjectPose, AgentState
+from typing import Dict
+import open3d as o3d
+import numpy as np
+from sklearn.cluster import DBSCAN
+from scipy.spatial.transform import Rotation as R
+from sensor_msgs.msg import PointCloud2
+import sensor_msgs.point_cloud2 as pc2
+import ros_numpy
+
+
+# ----- Helper Functions -----
+
+def cylindrical_roi(points, center, radius, height):
+    horizontal_dist = np.linalg.norm(points[:, :2] - center[:2], axis=1)
+    vertical_diff = np.abs(points[:, 2] - center[2])
+    mask = (horizontal_dist <= radius) & (vertical_diff <= height / 2)
+    return points[mask]
+
+
+def filter_points_within_threshold(points, threshold=15.0):
+    distances = np.linalg.norm(points, axis=1)
+    mask = distances <= threshold
+    return points[mask]
+
+def match_existing_cone(
+        new_center: np.ndarray,
+        new_dims: tuple,
+        existing_agents: Dict[str, AgentState],
+        distance_threshold: float = 1.0
+) -> str:
+    """
+    Find the closest existing Cone agent within a specified distance threshold.
+    """
+    best_agent_id = None
+    best_dist = float('inf')
+    for agent_id, agent_state in existing_agents.items():
+        old_center = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z])
+        dist = np.linalg.norm(new_center - old_center)
+        if dist < distance_threshold and dist < best_dist:
+            best_dist = dist
+            best_agent_id = agent_id
+    return best_agent_id
+
+
+def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple:
+    """
+    Compute the (vx, vy, vz) velocity based on change in pose over time.
+    """
+    if dt <= 0:
+        return (0, 0, 0)
+    vx = (new_pose.x - old_pose.x) / dt
+    vy = (new_pose.y - old_pose.y) / dt
+    vz = (new_pose.z - old_pose.z) / dt
+    return (vx, vy, vz)
+
+
+def extract_roi_box(lidar_pc, center, half_extents):
+    """
+    Extract a region of interest (ROI) from the LiDAR point cloud defined by an axis-aligned bounding box.
+    """
+    lower = center - half_extents
+    upper = center + half_extents
+    mask = np.all((lidar_pc >= lower) & (lidar_pc <= upper), axis=1)
+    return lidar_pc[mask]
+
+
+def pc2_to_numpy(pc2_msg, want_rgb=False):
+    """
+    Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy.
+    This function extracts the x, y, z coordinates from the point cloud.
+    """
+    # Convert the ROS message to a numpy structured array
+    pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg)
+    # Stack x,y,z fields to a (N,3) array
+    pts = np.stack((np.array(pc['x']).ravel(),
+                    np.array(pc['y']).ravel(),
+                    np.array(pc['z']).ravel()), axis=1)
+    # Apply filtering (for example, x > 0 and z in a specified range)
+    mask = (pts[:, 0] > -0.5) & (pts[:, 2] < -1) & (pts[:, 2] > -2.7)
+    return pts[mask]
+
+
+def backproject_pixel(u, v, K):
+    """
+    Backprojects a pixel coordinate (u, v) into a normalized 3D ray in the camera coordinate system.
+    """
+    cx, cy = K[0, 2], K[1, 2]
+    fx, fy = K[0, 0], K[1, 1]
+    x = (u - cx) / fx
+    y = (v - cy) / fy
+    ray_dir = np.array([x, y, 1.0])
+    return ray_dir / np.linalg.norm(ray_dir)
+
+
+def find_human_center_on_ray(lidar_pc, ray_origin, ray_direction,
+                             t_min, t_max, t_step,
+                             distance_threshold, min_points, ransac_threshold):
+    """
+    Identify the center of a human along a projected ray.
+    (This function is no longer used in the new approach.)
+    """
+    return None, None, None
+
+
+def extract_roi(pc, center, roi_radius):
+    """
+    Extract points from a point cloud that lie within a specified radius of a center point.
+    """
+    distances = np.linalg.norm(pc - center, axis=1)
+    return pc[distances < roi_radius]
+
+
+def refine_cluster(roi_points, center, eps=0.2, min_samples=10):
+    """
+    Refine a point cluster by applying DBSCAN and return the cluster closest to 'center'.
+    """
+    if roi_points.shape[0] < min_samples:
+        return roi_points
+    clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(roi_points)
+    labels = clustering.labels_
+    valid_clusters = [roi_points[labels == l] for l in set(labels) if l != -1]
+    if not valid_clusters:
+        return roi_points
+    best_cluster = min(valid_clusters, key=lambda c: np.linalg.norm(np.mean(c, axis=0) - center))
+    return best_cluster
+
+
+def remove_ground_by_min_range(cluster, z_range=0.05):
+    """
+    Remove points within z_range of the minimum z (assumed to be ground).
+    """
+    if cluster is None or cluster.shape[0] == 0:
+        return cluster
+    min_z = np.min(cluster[:, 2])
+    filtered = cluster[cluster[:, 2] > (min_z + z_range)]
+    return filtered
+
+
+def get_bounding_box_center_and_dimensions(points):
+    """
+    Calculate the axis-aligned bounding box's center and dimensions for a set of 3D points.
+    """
+    if points.shape[0] == 0:
+        return None, None
+    min_vals = np.min(points, axis=0)
+    max_vals = np.max(points, axis=0)
+    center = (min_vals + max_vals) / 2
+    dimensions = max_vals - min_vals
+    return center, dimensions
+
+
+def create_ray_line_set(start, end):
+    """
+    Create an Open3D LineSet object representing a ray between two 3D points.
+    The line is colored yellow.
+    """
+    points = [start, end]
+    lines = [[0, 1]]
+    line_set = o3d.geometry.LineSet()
+    line_set.points = o3d.utility.Vector3dVector(points)
+    line_set.lines = o3d.utility.Vector2iVector(lines)
+    line_set.colors = o3d.utility.Vector3dVector([[1, 1, 0]])
+    return line_set
+
+
+def downsample_points(lidar_points, voxel_size=0.15):
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(lidar_points)
+    down_pcd = pcd.voxel_down_sample(voxel_size=voxel_size)
+    return np.asarray(down_pcd.points)
+
+
+def filter_depth_points(lidar_points, max_depth_diff=0.9, use_norm=True):
+    if lidar_points.shape[0] == 0:
+        return lidar_points
+
+    if use_norm:
+        depths = np.linalg.norm(lidar_points, axis=1)
+    else:
+        depths = lidar_points[:, 0]
+
+    min_depth = np.min(depths)
+    max_possible_depth = min_depth + max_depth_diff
+    mask = depths < max_possible_depth
+    return lidar_points[mask]
+
+
+def visualize_geometries(geometries, window_name="Open3D", width=800, height=600, point_size=5.0):
+    """
+    Visualize a list of Open3D geometry objects in a dedicated window.
+    """
+    vis = o3d.visualization.Visualizer()
+    vis.create_window(window_name=window_name, width=width, height=height)
+    for geom in geometries:
+        vis.add_geometry(geom)
+    opt = vis.get_render_option()
+    opt.point_size = point_size
+    vis.run()
+    vis.destroy_window()
+
+def transform_points_l2c(lidar_points, T_l2c):
+    N = lidar_points.shape[0]
+    pts_hom = np.hstack((lidar_points, np.ones((N, 1))))  # (N,4)
+    pts_cam = (T_l2c @ pts_hom.T).T  # (N,4)
+    return pts_cam[:, :3]
+
+
+# ----- New: Vectorized projection function -----
+def project_points(pts_cam, K, original_lidar_points):
+    """
+    Vectorized version.
+    pts_cam: (N,3) array of points in camera coordinates.
+    original_lidar_points: (N,3) array of points in LiDAR coordinates.
+    Returns a (M,5) array: [u, v, X_lidar, Y_lidar, Z_lidar] for all points with Z>0.
+    """
+    mask = pts_cam[:, 2] > 0
+    pts_cam_valid = pts_cam[mask]
+    lidar_valid = original_lidar_points[mask]
+    Xc = pts_cam_valid[:, 0]
+    Yc = pts_cam_valid[:, 1]
+    Zc = pts_cam_valid[:, 2]
+    u = (K[0, 0] * (Xc / Zc) + K[0, 2]).astype(np.int32)
+    v = (K[1, 1] * (Yc / Zc) + K[1, 2]).astype(np.int32)
+    proj = np.column_stack((u, v, lidar_valid))
+    return proj

From 4284265b94ce81ac8a07e1cd3bc27d89c4f8982e Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Fri, 9 May 2025 16:41:38 -0500
Subject: [PATCH 21/28] Update agent.py


From 16f1ac4733ff39969dd8bdfac941f4f73c01558d Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Sat, 10 May 2025 10:44:11 -0500
Subject: [PATCH 22/28] Update cone_detection.py

---
 GEMstack/onboard/perception/cone_detection.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py
index 80757241a..1846722a8 100644
--- a/GEMstack/onboard/perception/cone_detection.py
+++ b/GEMstack/onboard/perception/cone_detection.py
@@ -1,5 +1,3 @@
-from open3d.examples.geometry.point_cloud_transformation import transform
-
 from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, ObstacleState, ObstacleMaterialEnum, \
     ObstacleStateEnum
 from ..interface.gem import GEMInterface

From ce4bba3d2afa18d0737e9c50c375f3ca2938c638 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Mon, 12 May 2025 13:26:13 -0500
Subject: [PATCH 23/28] Update cone_detection.py

---
 GEMstack/onboard/perception/cone_detection.py | 42 +++++++++----------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py
index 1846722a8..e0de4abef 100644
--- a/GEMstack/onboard/perception/cone_detection.py
+++ b/GEMstack/onboard/perception/cone_detection.py
@@ -1,4 +1,4 @@
-from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, ObstacleState, ObstacleMaterialEnum, \
+from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, Obstacle, ObstacleMaterialEnum, \
     ObstacleStateEnum
 from ..interface.gem import GEMInterface
 from ..component import Component
@@ -146,7 +146,7 @@ def undistort_image(self, image, K, D):
         # print('--------undistort', end-start)
         return undistorted, newK
 
-    def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
+    def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]:
         downsample = False
         # Gate guards against data not being present for both sensors:
         if self.latest_image is None or self.latest_lidar is None:
@@ -221,18 +221,18 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
         # Visualize the received images in 2D with their corresponding labels
         # It draws rectangles and labels on the images:
         if getattr(self, 'visualize_2d', False):
-            for (cx, cy, w, h, activity) in combined_boxes:
+            for (cx, cy, w, h, state) in combined_boxes:
                 left = int(cx - w / 2)
                 right = int(cx + w / 2)
                 top = int(cy - h / 2)
                 bottom = int(cy + h / 2)
-                if activity == ObstacleStateEnum.STANDING:
+                if state == ObstacleStateEnum.STANDING:
                     color = (255, 0, 0)
                     label = "STANDING"
-                elif activity == ObstacleStateEnum.RIGHT:
+                elif state == ObstacleStateEnum.RIGHT:
                     color = (0, 255, 0)
                     label = "RIGHT"
-                elif activity == ObstacleStateEnum.LEFT:
+                elif state == ObstacleStateEnum.LEFT:
                     color = (0, 0, 255)
                     label = "LEFT"
                 else:
@@ -259,7 +259,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
         obstacles = {}
 
         for i, box_info in enumerate(combined_boxes):
-            cx, cy, w, h, activity = box_info
+            cx, cy, w, h, state = box_info
             # print(cx, cy, w, h)
             left = int(cx - w / 1.6)
             right = int(cx + w / 1.6)
@@ -355,12 +355,12 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
                             roll=avg_roll,
                             frame=new_pose.frame
                         )
-                        updated_obstacle = ObstacleState(
+                        updated_obstacle = Obstacle(
                             pose=updated_pose,
                             dimensions=dims,
                             outline=None,
-                            type=ObstacleMaterialEnum.TRAFFIC_CONE,
-                            activity=activity,
+                            material=ObstacleMaterialEnum.TRAFFIC_CONE,
+                            state=state,
                         )
                     else:
                         updated_obstacle = old_state
@@ -369,24 +369,24 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
                 else:
                     obstacle_id = f"Cone{self.cone_counter}"
                     self.cone_counter += 1
-                    new_obstacle = ObstacleState(
+                    new_obstacle = Obstacle(
                         pose=new_pose,
                         dimensions=dims,
                         outline=None,
-                        type=ObstacleMaterialEnum.TRAFFIC_CONE,
-                        activity=activity,
+                        material=ObstacleMaterialEnum.TRAFFIC_CONE,
+                        state=state,
                     )
                     obstacles[obstacle_id] = new_obstacle
                     self.tracked_obstacles[obstacle_id] = new_obstacle
             else:
                 obstacle_id = f"Cone{self.cone_counter}"
                 self.cone_counter += 1
-                new_obstacle = ObstacleState(
+                new_obstacle = Obstacle(
                     pose=new_pose,
                     dimensions=dims,
                     outline=None,
-                    type=ObstacleMaterialEnum.TRAFFIC_CONE,
-                    activity=activity,
+                    material=ObstacleMaterialEnum.TRAFFIC_CONE,
+                    state=state,
                 )
                 obstacles[obstacle_id] = new_obstacle
 
@@ -400,7 +400,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
                     f"Cone ID: {obstacle_id}\n"
                     f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, "
                     f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n"
-                    f"type:{obstacle.activity}"
+                    f"state:{obstacle.state}"
                 )
             end = time.time()
             # print('-------processing time', end -start)
@@ -418,7 +418,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
                     f"Cone ID: {obstacle_id}\n"
                     f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, "
                     f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n"
-                    f"type:{obstacle.activity}"
+                    f"state:{obstacle.state}"
                 )
         end = time.time()
         # print('-------processing time', end -start)
@@ -482,7 +482,7 @@ def state_inputs(self):
     def state_outputs(self):
         return ['obstacles']
 
-    def update(self, vehicle: VehicleState) -> Dict[str, ObstacleState]:
+    def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]:
         if self.t_start is None:
             self.t_start = self.vehicle_interface.time()
         t = self.vehicle_interface.time() - self.t_start
@@ -498,8 +498,8 @@ def box_to_fake_obstacle(box):
     x, y, w, h = box
     pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT)
     dims = (w, h, 0)
-    return ObstacleState(pose=pose, dimensions=dims, outline=None,
-                         type=ObstacleMaterialEnum.TRAFFIC_CONE, activity=ObstacleStateEnum.STANDING)
+    return Obstacle(pose=pose, dimensions=dims, outline=None,
+                         material=ObstacleMaterialEnum.TRAFFIC_CONE, state=ObstacleStateEnum.STANDING)
 
 
 if __name__ == '__main__':

From e73d5b47bc5dbfa26fdaa6a2c708352922fd434a Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Mon, 12 May 2025 13:26:45 -0500
Subject: [PATCH 24/28] Update __init__.py

Updated enums in Obstacle
---
 GEMstack/state/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GEMstack/state/__init__.py b/GEMstack/state/__init__.py
index 8ddc0c5b0..5ec5f71c2 100644
--- a/GEMstack/state/__init__.py
+++ b/GEMstack/state/__init__.py
@@ -11,6 +11,7 @@
            'Obstacle',
            'Sign',
            'AgentState','AgentEnum','AgentActivityEnum',
+           'ObstacleMaterialEnum','ObstacleStateEnum',
            'SceneState',
            'VehicleIntent','VehicleIntentEnum',
            'AgentIntent',
@@ -23,7 +24,7 @@
 from .trajectory import Path,Trajectory
 from .vehicle import VehicleState,VehicleGearEnum
 from .roadgraph import Roadgraph, RoadgraphLane, RoadgraphCurve, RoadgraphRegion, RoadgraphCurveEnum, RoadgraphLaneEnum, RoadgraphRegionEnum, RoadgraphSurfaceEnum, RoadgraphConnectionEnum
-from .obstacle import Obstacle, ObstacleMaterialEnum
+from .obstacle import Obstacle, ObstacleMaterialEnum, ObstacleStateEnum
 from .sign import Sign, SignEnum, SignalLightEnum, SignState
 from .roadmap import Roadmap
 from .agent import AgentState, AgentEnum, AgentActivityEnum

From 67ce6055778792b3e82c41d5d008b3c3bc521fea Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Mon, 12 May 2025 13:28:19 -0500
Subject: [PATCH 25/28] Update obstacle.py

removed ObstacleState amd redundant StateEnum
---
 GEMstack/state/obstacle.py | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/GEMstack/state/obstacle.py b/GEMstack/state/obstacle.py
index 05f98b498..8ddc10490 100644
--- a/GEMstack/state/obstacle.py
+++ b/GEMstack/state/obstacle.py
@@ -19,13 +19,10 @@ class ObstacleMaterialEnum(Enum):
     ROADKILL = 10
 
 class ObstacleStateEnum(Enum):
-    STOPPED = 0         # standing pedestrians, parked cars, etc. No need to predict motion.
-    MOVING = 1          # standard motion.  Predictions will be used here
-    FAST = 2            # indicates faster than usual motion
-    UNDETERMINED = 3    # unknown activity
-    STANDING = 4        # standing cone
-    LEFT = 5            # flipped cone facing left
-    RIGHT = 6           # flipped cone facing right
+    UNDETERMINED = 0    # unknown activity
+    STANDING = 1        # standing cone
+    LEFT = 2            # flipped cone facing left
+    RIGHT = 3           # flipped cone facing right
 
 
@@ -34,15 +31,8 @@ class ObstacleStateEnum(Enum):
 class Obstacle(PhysicalObject):
     material : ObstacleMaterialEnum
     collidable : bool
+    state: ObstacleStateEnum
 
-
-@dataclass
-@register
-class ObstacleState(PhysicalObject):
-    type: ObstacleMaterialEnum
-    activity: ObstacleStateEnum
-
-    def to_frame(self, frame: ObjectFrameEnum, current_pose=None, start_pose_abs=None) -> ObstacleState:
+    def to_frame(self, frame: ObjectFrameEnum, current_pose=None, start_pose_abs=None) -> Obstacle:
         newpose = self.pose.to_frame(frame, current_pose, start_pose_abs)
-        newvelocity = convert_vector(self.velocity, self.pose.frame, frame, current_pose, start_pose_abs)
-        return replace(self, pose=newpose, velocity=newvelocity)
+        return replace(self, pose=newpose)

From 958890de5d1e964196ab5ee60eaf1d951bb02014 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Mon, 12 May 2025 13:30:07 -0500
Subject: [PATCH 26/28] Update obstacle.py

removed to_frame. Physical Object already has a to_frame function
---
 GEMstack/state/obstacle.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/GEMstack/state/obstacle.py b/GEMstack/state/obstacle.py
index 8ddc10490..251fb30db 100644
--- a/GEMstack/state/obstacle.py
+++ b/GEMstack/state/obstacle.py
@@ -33,6 +33,3 @@ class Obstacle(PhysicalObject):
     collidable : bool
     state: ObstacleStateEnum
 
-    def to_frame(self, frame: ObjectFrameEnum, current_pose=None, start_pose_abs=None) -> Obstacle:
-        newpose = self.pose.to_frame(frame, current_pose, start_pose_abs)
-        return replace(self, pose=newpose)

From d0b1cf603db67c9741210e151517af211d138466 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Mon, 12 May 2025 13:51:20 -0500
Subject: [PATCH 27/28] Update cone_detection.yaml

---
 launch/cone_detection.yaml | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/launch/cone_detection.yaml b/launch/cone_detection.yaml
index ec0881a74..6b2d0a82e 100644
--- a/launch/cone_detection.yaml
+++ b/launch/cone_detection.yaml
@@ -26,23 +26,6 @@ drive:
 
         perception_normalization : StandardPerceptionNormalizer
     planning:
-        relations_estimation: 
-            type: pedestrian_yield_logic.PedestrianYielder
-            args:
-                mode: 'real'
-                params: {
-                    'yielder':       'expert', # 'expert', 'analytic', or 'simulation'
-                    'planner':       'milestone', # 'milestone', 'dt', or 'dx'
-                    'desired_speed': 1.0,  # m/s, 1.5 m/s seems max speed? Feb24
-                    'acceleration':  0.75  # m/s2, 0.5 is not enough to start moving. Feb24
-                    }
-        route_planning:
-            type: StaticRoutePlanner
-            args: [!relative_path '../GEMstack/knowledge/routes/forward_15m.csv','start']
-        motion_planning: longitudinal_planning.YieldTrajectoryPlanner
-        trajectory_tracking:
-            type: pure_pursuit.PurePursuitTrajectoryTracker
-            print: False
 
 
 log:

From dd6a0cff2b962989c710d9e29f05695a423bb85e Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Mon, 12 May 2025 13:52:03 -0500
Subject: [PATCH 28/28] Update cone_detection.py

---
 GEMstack/onboard/perception/cone_detection.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py
index e0de4abef..42b74bb6b 100644
--- a/GEMstack/onboard/perception/cone_detection.py
+++ b/GEMstack/onboard/perception/cone_detection.py
@@ -361,6 +361,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]:
                             outline=None,
                             material=ObstacleMaterialEnum.TRAFFIC_CONE,
                             state=state,
+                            collidable=True
                         )
                     else:
                         updated_obstacle = old_state
@@ -375,6 +376,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]:
                         outline=None,
                         material=ObstacleMaterialEnum.TRAFFIC_CONE,
                         state=state,
+                        collidable=True
                     )
                     obstacles[obstacle_id] = new_obstacle
                     self.tracked_obstacles[obstacle_id] = new_obstacle
@@ -387,6 +389,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]:
                     outline=None,
                     material=ObstacleMaterialEnum.TRAFFIC_CONE,
                     state=state,
+                    collidable = True
                 )
                 obstacles[obstacle_id] = new_obstacle
 
@@ -499,7 +502,7 @@ def box_to_fake_obstacle(box):
     pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT)
     dims = (w, h, 0)
     return Obstacle(pose=pose, dimensions=dims, outline=None,
-                         material=ObstacleMaterialEnum.TRAFFIC_CONE, state=ObstacleStateEnum.STANDING)
+                         material=ObstacleMaterialEnum.TRAFFIC_CONE, state=ObstacleStateEnum.STANDING, collidable=True)
 
 
 if __name__ == '__main__':