From a03c2044f2f033c0c7dad19afe2cdd278df54953 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Wed, 23 Apr 2025 10:39:57 -0500
Subject: [PATCH 1/3] Create cone_detection.py

---
 GEMstack/onboard/perception/cone_detection.py | 666 ++++++++++++++++++
 1 file changed, 666 insertions(+)
 create mode 100644 GEMstack/onboard/perception/cone_detection.py

diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py
new file mode 100644
index 000000000..d41c527ae
--- /dev/null
+++ b/GEMstack/onboard/perception/cone_detection.py
@@ -0,0 +1,666 @@
+from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum
+from ..interface.gem import GEMInterface
+from ..component import Component
+from ultralytics import YOLO
+import cv2
+from typing import Dict
+import open3d as o3d
+import numpy as np
+from sklearn.cluster import DBSCAN
+from scipy.spatial.transform import Rotation as R
+import rospy
+from sensor_msgs.msg import PointCloud2, Image
+import sensor_msgs.point_cloud2 as pc2
+import struct, ctypes
+from message_filters import Subscriber, ApproximateTimeSynchronizer
+from cv_bridge import CvBridge
+import time
+import math
+import ros_numpy
+import os
+
+
+# ----- Helper Functions -----
+
+def cylindrical_roi(points, center, radius, height):
+    horizontal_dist = np.linalg.norm(points[:, :2] - center[:2], axis=1)
+    vertical_diff = np.abs(points[:, 2] - center[2])
+    mask = (horizontal_dist <= radius) & (vertical_diff <= height / 2)
+    return points[mask]
+
+
+def undistort_image(image, K, D):
+    h, w = image.shape[:2]
+    newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
+    undistorted = cv2.undistort(image, K, D, None, newK)
+    return undistorted, newK
+
+def filter_points_within_threshold(points, threshold=15.0):
+    distances = np.linalg.norm(points, axis=1)
+    mask = distances <= threshold
+    return points[mask]
+
+
+def match_existing_cone(
+        new_center: np.ndarray,
+        new_dims: tuple,
+        existing_agents: Dict[str, AgentState],
+        distance_threshold: float = 1.0
+) -> str:
+    """
+    Find the closest existing Cone agent within a specified distance threshold.
+    """
+    best_agent_id = None
+    best_dist = float('inf')
+    for agent_id, agent_state in existing_agents.items():
+        old_center = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z])
+        dist = np.linalg.norm(new_center - old_center)
+        if dist < distance_threshold and dist < best_dist:
+            best_dist = dist
+            best_agent_id = agent_id
+    return best_agent_id
+
+
+def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple:
+    """
+    Compute the (vx, vy, vz) velocity based on change in pose over time.
+    """
+    if dt <= 0:
+        return (0, 0, 0)
+    vx = (new_pose.x - old_pose.x) / dt
+    vy = (new_pose.y - old_pose.y) / dt
+    vz = (new_pose.z - old_pose.z) / dt
+    return (vx, vy, vz)
+
+
+def extract_roi_box(lidar_pc, center, half_extents):
+    """
+    Extract a region of interest (ROI) from the LiDAR point cloud defined by an axis-aligned bounding box.
+    """
+    lower = center - half_extents
+    upper = center + half_extents
+    mask = np.all((lidar_pc >= lower) & (lidar_pc <= upper), axis=1)
+    return lidar_pc[mask]
+
+
+def pc2_to_numpy(pc2_msg, want_rgb=False):
+    """
+    Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy.
+    This function extracts the x, y, z coordinates from the point cloud.
+    """
+    # Convert the ROS message to a numpy structured array
+    pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg)
+    # Stack x,y,z fields to a (N,3) array
+    pts = np.stack((np.array(pc['x']).ravel(),
+                    np.array(pc['y']).ravel(),
+                    np.array(pc['z']).ravel()), axis=1)
+    # Apply filtering (for example, x > 0 and z in a specified range)
+    mask = (pts[:, 0] > -0.5) & (pts[:, 2] < -1) & (pts[:, 2] > -2.7)
+    return pts[mask]
+
+
+def backproject_pixel(u, v, K):
+    """
+    Backprojects a pixel coordinate (u, v) into a normalized 3D ray in the camera coordinate system.
+    """
+    cx, cy = K[0, 2], K[1, 2]
+    fx, fy = K[0, 0], K[1, 1]
+    x = (u - cx) / fx
+    y = (v - cy) / fy
+    ray_dir = np.array([x, y, 1.0])
+    return ray_dir / np.linalg.norm(ray_dir)
+
+
+def find_human_center_on_ray(lidar_pc, ray_origin, ray_direction,
+                             t_min, t_max, t_step,
+                             distance_threshold, min_points, ransac_threshold):
+    """
+    Identify the center of a human along a projected ray.
+    (This function is no longer used in the new approach.)
+    """
+    return None, None, None
+
+
+def extract_roi(pc, center, roi_radius):
+    """
+    Extract points from a point cloud that lie within a specified radius of a center point.
+    """
+    distances = np.linalg.norm(pc - center, axis=1)
+    return pc[distances < roi_radius]
+
+
+def refine_cluster(roi_points, center, eps=0.2, min_samples=10):
+    """
+    Refine a point cluster by applying DBSCAN and return the cluster closest to 'center'.
+    """
+    if roi_points.shape[0] < min_samples:
+        return roi_points
+    clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(roi_points)
+    labels = clustering.labels_
+    valid_clusters = [roi_points[labels == l] for l in set(labels) if l != -1]
+    if not valid_clusters:
+        return roi_points
+    best_cluster = min(valid_clusters, key=lambda c: np.linalg.norm(np.mean(c, axis=0) - center))
+    return best_cluster
+
+
+def remove_ground_by_min_range(cluster, z_range=0.05):
+    """
+    Remove points within z_range of the minimum z (assumed to be ground).
+    """
+    if cluster is None or cluster.shape[0] == 0:
+        return cluster
+    min_z = np.min(cluster[:, 2])
+    filtered = cluster[cluster[:, 2] > (min_z + z_range)]
+    return filtered
+
+
+def get_bounding_box_center_and_dimensions(points):
+    """
+    Calculate the axis-aligned bounding box's center and dimensions for a set of 3D points.
+    """
+    if points.shape[0] == 0:
+        return None, None
+    min_vals = np.min(points, axis=0)
+    max_vals = np.max(points, axis=0)
+    center = (min_vals + max_vals) / 2
+    dimensions = max_vals - min_vals
+    return center, dimensions
+
+
+def create_ray_line_set(start, end):
+    """
+    Create an Open3D LineSet object representing a ray between two 3D points.
+    The line is colored yellow.
+    """
+    points = [start, end]
+    lines = [[0, 1]]
+    line_set = o3d.geometry.LineSet()
+    line_set.points = o3d.utility.Vector3dVector(points)
+    line_set.lines = o3d.utility.Vector2iVector(lines)
+    line_set.colors = o3d.utility.Vector3dVector([[1, 1, 0]])
+    return line_set
+
+
+def downsample_points(lidar_points, voxel_size=0.15):
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(lidar_points)
+    down_pcd = pcd.voxel_down_sample(voxel_size=voxel_size)
+    return np.asarray(down_pcd.points)
+
+
+def filter_depth_points(lidar_points, max_depth_diff=0.9, use_norm=True):
+    if lidar_points.shape[0] == 0:
+        return lidar_points
+
+    if use_norm:
+        depths = np.linalg.norm(lidar_points, axis=1)
+    else:
+        depths = lidar_points[:, 0]
+
+    min_depth = np.min(depths)
+    max_possible_depth = min_depth + max_depth_diff
+    mask = depths < max_possible_depth
+    return lidar_points[mask]
+
+
+def visualize_geometries(geometries, window_name="Open3D", width=800, height=600, point_size=5.0):
+    """
+    Visualize a list of Open3D geometry objects in a dedicated window.
+    """
+    vis = o3d.visualization.Visualizer()
+    vis.create_window(window_name=window_name, width=width, height=height)
+    for geom in geometries:
+        vis.add_geometry(geom)
+    opt = vis.get_render_option()
+    opt.point_size = point_size
+    vis.run()
+    vis.destroy_window()
+
+
+def pose_to_matrix(pose):
+    """
+    Compose a 4x4 transformation matrix from a pose state.
+    Assumes pose has attributes: x, y, z, yaw, pitch, roll,
+    where the angles are given in degrees.
+    """
+    x = pose.x if pose.x is not None else 0.0
+    y = pose.y if pose.y is not None else 0.0
+    z = pose.z if pose.z is not None else 0.0
+    if pose.yaw is not None and pose.pitch is not None and pose.roll is not None:
+        yaw = math.radians(pose.yaw)
+        pitch = math.radians(pose.pitch)
+        roll = math.radians(pose.roll)
+    else:
+        yaw = 0.0
+        pitch = 0.0
+        roll = 0.0
+    R_mat = R.from_euler('zyx', [yaw, pitch, roll]).as_matrix()
+    T = np.eye(4)
+    T[:3, :3] = R_mat
+    T[:3, 3] = np.array([x, y, z])
+    return T
+
+
+def transform_points_l2c(lidar_points, T_l2c):
+    N = lidar_points.shape[0]
+    pts_hom = np.hstack((lidar_points, np.ones((N, 1))))  # (N,4)
+    pts_cam = (T_l2c @ pts_hom.T).T  # (N,4)
+    return pts_cam[:, :3]
+
+
+# ----- New: Vectorized projection function -----
+def project_points(pts_cam, K, original_lidar_points):
+    """
+    Vectorized version.
+    pts_cam: (N,3) array of points in camera coordinates.
+    original_lidar_points: (N,3) array of points in LiDAR coordinates.
+    Returns a (M,5) array: [u, v, X_lidar, Y_lidar, Z_lidar] for all points with Z>0.
+    """
+    mask = pts_cam[:, 2] > 0
+    pts_cam_valid = pts_cam[mask]
+    lidar_valid = original_lidar_points[mask]
+    Xc = pts_cam_valid[:, 0]
+    Yc = pts_cam_valid[:, 1]
+    Zc = pts_cam_valid[:, 2]
+    u = (K[0, 0] * (Xc / Zc) + K[0, 2]).astype(np.int32)
+    v = (K[1, 1] * (Yc / Zc) + K[1, 2]).astype(np.int32)
+    proj = np.column_stack((u, v, lidar_valid))
+    return proj
+
+
+class ConeDetector3D(Component):
+    """
+    Detects cones by fusing YOLO 2D detections with LiDAR point cloud data.
+
+    Tracking is optional: set `enable_tracking=False` to disable persistent tracking
+    and return only detections from the current frame.
+    """
+
+    def __init__(self, vehicle_interface: GEMInterface):
+        self.vehicle_interface = vehicle_interface
+        self.enable_tracking = False
+        self.current_agents = {}
+        self.tracked_agents = {}
+        self.cone_counter = 0
+        self.latest_image = None
+        self.latest_lidar = None
+        self.bridge = CvBridge()
+        self.start_pose_abs = None
+        self.camera_front = True
+        self.visualize_2d = False
+        self.use_cyl_roi = False
+        self.start_time = None
+        self.use_start_frame = False
+        self.save_data = False
+        self.orientation = False
+
+    def rate(self) -> float:
+        return 4.0
+
+    def state_inputs(self) -> list:
+        return ['vehicle']
+
+    def state_outputs(self) -> list:
+        return ['agents']
+
+    def initialize(self):
+        self.rgb_sub = Subscriber('/oak/rgb/image_raw', Image)
+        self.lidar_sub = Subscriber('/ouster/points', PointCloud2)
+        self.sync = ApproximateTimeSynchronizer([self.rgb_sub, self.lidar_sub],
+                                                queue_size=10, slop=0.1)
+        self.sync.registerCallback(self.synchronized_callback)
+        self.detector = YOLO('/home/gem/s2025_perception_merge/GEMstack/GEMstack/knowledge/detection/cone.pt')
+        self.detector.to('cuda')
+
+        if self.camera_front:
+            self.K = np.array([[684.83331299, 0., 573.37109375],
+                               [0., 684.60968018, 363.70092773],
+                               [0., 0., 1.]])
+        else:
+            self.K = np.array([[1.17625545e+03, 0.00000000e+00, 9.66432645e+02],
+                               [0.00000000e+00, 1.17514569e+03, 6.08580326e+02],
+                               [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])
+
+        if self.camera_front:
+            self.D = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
+        else:
+            self.D = np.array([-2.70136325e-01, 1.64393255e-01, -1.60720782e-03, -7.41246708e-05,
+                               -6.19939758e-02])
+
+        self.T_l2v = np.array([[0.99939639, 0.02547917, 0.023615, 1.1],
+                               [-0.02530848, 0.99965156, -0.00749882, 0.03773583],
+                               [-0.02379784, 0.00689664, 0.999693, 1.95320223],
+                               [0., 0., 0., 1.]])
+        if self.camera_front:
+            self.T_l2c = np.array([
+                [0.001090, -0.999489, -0.031941, 0.149698],
+                [-0.007664, 0.031932, -0.999461, -0.397813],
+                [0.999970, 0.001334, -0.007625, -0.691405],
+                [0., 0., 0., 1.000000]
+            ])
+        else:
+            self.T_l2c = np.array([[-0.71836368, -0.69527204, -0.02346088, 0.05718003],
+                                   [-0.09720448, 0.13371206, -0.98624154, -0.1598301],
+                                   [0.68884317, -0.7061996, -0.16363744, -1.04767285],
+                                   [0., 0., 0., 1.]]
+                                  )
+        self.T_c2l = np.linalg.inv(self.T_l2c)
+        self.R_c2l = self.T_c2l[:3, :3]
+        self.camera_origin_in_lidar = self.T_c2l[:3, 3]
+
+    def synchronized_callback(self, image_msg, lidar_msg):
+        step1 = time.time()
+        try:
+            self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8")
+        except Exception as e:
+            rospy.logerr("Failed to convert image: {}".format(e))
+            self.latest_image = None
+        step2 = time.time()
+        self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False)
+        step3 = time.time()
+        print('image callback: ', step2 - step1, 'lidar callback ', step3 - step2)
+
+    def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
+        downsample = False
+        if self.latest_image is None or self.latest_lidar is None:
+            return {}
+
+        # Ensure data/ exists and build timestamp
+        os.makedirs("data", exist_ok=True)
+        current_time = self.vehicle_interface.time()
+        if self.start_time is None:
+            self.start_time = current_time
+        time_elapsed = current_time - self.start_time
+
+        if self.save_data:
+            tstamp = int(self.vehicle_interface.time() * 1000)
+            # 1) Dump raw image
+            cv2.imwrite(f"data/{tstamp}_image.png", self.latest_image)
+            # 2) Dump raw LiDAR
+            np.savez(f"data/{tstamp}_lidar.npz", lidar=self.latest_lidar)
+            # 3) Write BEFORE_TRANSFORM
+            with open(f"data/{tstamp}_vehstate.txt", "w") as f:
+                vp = vehicle.pose
+                f.write(
+                    f"BEFORE_TRANSFORM "
+                    f"x={vp.x:.3f}, y={vp.y:.3f}, z={vp.z:.3f}, "
+                    f"yaw={vp.yaw:.2f}, pitch={vp.pitch:.2f}, roll={vp.roll:.2f}\n"
+                )
+            # Compute vehicle_start_pose in either START or CURRENT
+            if self.use_start_frame:
+                if self.start_pose_abs is None:
+                    self.start_pose_abs = vehicle.pose
+                vehicle_start_pose = vehicle.pose.to_frame(
+                    ObjectFrameEnum.START,
+                    vehicle.pose,
+                    self.start_pose_abs
+                )
+                mode = "START"
+            else:
+                vehicle_start_pose = vehicle.pose
+                mode = "CURRENT"
+            with open(f"data/{tstamp}_vehstate.txt", "a") as f:
+                f.write(
+                    f"AFTER_TRANSFORM "
+                    f"x={vehicle_start_pose.x:.3f}, "
+                    f"y={vehicle_start_pose.y:.3f}, "
+                    f"z={vehicle_start_pose.z:.3f}, "
+                    f"yaw={vehicle_start_pose.yaw:.2f}, "
+                    f"pitch={vehicle_start_pose.pitch:.2f}, "
+                    f"roll={vehicle_start_pose.roll:.2f}, "
+                    f"frame={mode}\n"
+                )
+
+        undistorted_img, current_K = undistort_image(self.latest_image, self.K, self.D)
+        self.current_K = current_K
+        self.latest_image = undistorted_img
+        orig_H, orig_W = undistorted_img.shape[:2]
+
+        # --- Begin modifications for three-angle detection ---
+        img_normal = undistorted_img
+        results_normal = self.detector(img_normal, conf=0.3, classes=[0])
+        combined_boxes = []
+        if self.orientation:
+            img_left = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_COUNTERCLOCKWISE)
+            img_right = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_CLOCKWISE)
+            results_left = self.detector(img_left, conf=0.3, classes=[0])
+            results_right = self.detector(img_right, conf=0.3, classes=[0])
+            boxes_left = np.array(results_left[0].boxes.xywh.cpu()) if len(results_left) > 0 else []
+            boxes_right = np.array(results_right[0].boxes.xywh.cpu()) if len(results_right) > 0 else []
+            for box in boxes_left:
+                cx, cy, w, h = box
+                new_cx = cy
+                new_cy = orig_W - 1 - cx
+                combined_boxes.append((new_cx, new_cy, h, w, AgentActivityEnum.RIGHT))
+            for box in boxes_right:
+                cx, cy, w, h = box
+                new_cx = orig_H - 1 - cy
+                new_cy = cx
+                combined_boxes.append((new_cx, new_cy, h, w, AgentActivityEnum.LEFT))
+
+        boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else []
+        for box in boxes_normal:
+            cx, cy, w, h = box
+            combined_boxes.append((cx, cy, w, h, AgentActivityEnum.STANDING))
+
+        if getattr(self, 'visualize_2d', False):
+            for (cx, cy, w, h, activity) in combined_boxes:
+                left = int(cx - w / 2)
+                right = int(cx + w / 2)
+                top = int(cy - h / 2)
+                bottom = int(cy + h / 2)
+                if activity == AgentActivityEnum.STANDING:
+                    color = (255, 0, 0)
+                    label = "STANDING"
+                elif activity == AgentActivityEnum.RIGHT:
+                    color = (0, 255, 0)
+                    label = "RIGHT"
+                elif activity == AgentActivityEnum.LEFT:
+                    color = (0, 0, 255)
+                    label = "LEFT"
+                else:
+                    color = (255, 255, 255)
+                    label = "UNKNOWN"
+                cv2.rectangle(undistorted_img, (left, top), (right, bottom), color, 2)
+                cv2.putText(undistorted_img, label, (left, max(top - 5, 20)),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
+            cv2.imshow("Detection - Cone 2D", undistorted_img)
+
+        if downsample:
+            lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1)
+        else:
+            lidar_down = self.latest_lidar.copy()
+
+        pts_cam = transform_points_l2c(lidar_down, self.T_l2c)
+        projected_pts = project_points(pts_cam, self.current_K, lidar_down)
+
+        agents = {}
+
+        for i, box_info in enumerate(combined_boxes):
+            cx, cy, w, h, activity = box_info
+            start = time.time()
+            left = int(cx - w / 2)
+            right = int(cx + w / 2)
+            top = int(cy - h / 2)
+            bottom = int(cy + h / 2)
+            mask = (projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & \
+                   (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom)
+            roi_pts = projected_pts[mask]
+            if roi_pts.shape[0] < 5:
+                continue
+            points_3d = roi_pts[:, 2:5]
+            points_3d = filter_points_within_threshold(points_3d, 30)
+            points_3d = filter_depth_points(points_3d, max_depth_diff=0.3)
+
+            if self.use_cyl_roi:
+                global_filtered = filter_points_within_threshold(lidar_down, 30)
+                roi_cyl = cylindrical_roi(global_filtered, np.mean(points_3d, axis=0), radius=0.3, height=1.2)
+                refined_cluster = remove_ground_by_min_range(roi_cyl, z_range=0.01)
+                refined_cluster = filter_depth_points(refined_cluster, max_depth_diff=0.2)
+            else:
+                refined_cluster = remove_ground_by_min_range(points_3d, z_range=0.05)
+            end1 = time.time()
+            if refined_cluster.shape[0] < 4:
+                continue
+
+            pcd = o3d.geometry.PointCloud()
+            pcd.points = o3d.utility.Vector3dVector(refined_cluster)
+            obb = pcd.get_oriented_bounding_box()
+            refined_center = obb.center
+            dims = tuple(obb.extent)
+            R_lidar = obb.R.copy()
+            end2 = time.time()
+
+            refined_center_hom = np.append(refined_center, 1)
+            refined_center_vehicle_hom = self.T_l2v @ refined_center_hom
+            refined_center_vehicle = refined_center_vehicle_hom[:3]
+
+            R_vehicle = self.T_l2v[:3, :3] @ R_lidar
+            yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False)
+            refined_center = refined_center_vehicle
+
+            if self.use_start_frame:
+                if self.start_pose_abs is None:
+                    self.start_pose_abs = vehicle.pose
+                vehicle_start_pose = vehicle.pose.to_frame(
+                    ObjectFrameEnum.START,
+                    vehicle.pose,
+                    self.start_pose_abs
+                )
+                T_vehicle_to_start = pose_to_matrix(vehicle_start_pose)
+                xp, yp, zp = (T_vehicle_to_start @ np.append(refined_center, 1))[:3]
+                out_frame = ObjectFrameEnum.START
+            else:
+                xp, yp, zp = refined_center
+                out_frame = ObjectFrameEnum.CURRENT
+
+            new_pose = ObjectPose(
+                t=current_time,
+                x=xp, y=yp, z=zp,
+                yaw=yaw, pitch=pitch, roll=roll,
+                frame=out_frame
+            )
+
+            # --- Optional tracking ---
+            if self.enable_tracking:
+                existing_id = match_existing_cone(
+                    np.array([new_pose.x, new_pose.y, new_pose.z]),
+                    dims,
+                    self.tracked_agents,
+                    distance_threshold=2.0
+                )
+                if existing_id is not None:
+                    old_state = self.tracked_agents[existing_id]
+                    if vehicle.v < 0.1:
+                        alpha = 0.1
+                        avg_x = alpha * new_pose.x + (1 - alpha) * old_state.pose.x
+                        avg_y = alpha * new_pose.y + (1 - alpha) * old_state.pose.y
+                        avg_z = alpha * new_pose.z + (1 - alpha) * old_state.pose.z
+                        avg_yaw = alpha * new_pose.yaw + (1 - alpha) * old_state.pose.yaw
+                        avg_pitch = alpha * new_pose.pitch + (1 - alpha) * old_state.pose.pitch
+                        avg_roll = alpha * new_pose.roll + (1 - alpha) * old_state.pose.roll
+
+                        updated_pose = ObjectPose(
+                            t=new_pose.t,
+                            x=avg_x,
+                            y=avg_y,
+                            z=avg_z,
+                            yaw=avg_yaw,
+                            pitch=avg_pitch,
+                            roll=avg_roll,
+                            frame=new_pose.frame
+                        )
+                        updated_agent = AgentState(
+                            pose=updated_pose,
+                            dimensions=dims,
+                            outline=None,
+                            type=AgentEnum.CONE,
+                            activity=activity,
+                            velocity=(0, 0, 0),
+                            yaw_rate=0
+                        )
+                    else:
+                        updated_agent = old_state
+                    agents[existing_id] = updated_agent
+                    self.tracked_agents[existing_id] = updated_agent
+                else:
+                    agent_id = f"Cone{self.cone_counter}"
+                    self.cone_counter += 1
+                    new_agent = AgentState(
+                        pose=new_pose,
+                        dimensions=dims,
+                        outline=None,
+                        type=AgentEnum.CONE,
+                        activity=activity,
+                        velocity=(0, 0, 0),
+                        yaw_rate=0
+                    )
+                    agents[agent_id] = new_agent
+                    self.tracked_agents[agent_id] = new_agent
+            else:
+                agent_id = f"Cone{self.cone_counter}"
+                self.cone_counter += 1
+                new_agent = AgentState(
+                    pose=new_pose,
+                    dimensions=dims,
+                    outline=None,
+                    type=AgentEnum.CONE,
+                    activity=activity,
+                    velocity=(0, 0, 0),
+                    yaw_rate=0
+                )
+                agents[agent_id] = new_agent
+
+        self.current_agents = agents
+
+        # If tracking not enabled, return only current frame detections
+        if not self.enable_tracking:
+            return self.current_agents
+
+        stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items()
+                     if current_time - agent.pose.t > 5.0]
+        for agent_id in stale_ids:
+            rospy.loginfo(f"Removing stale agent: {agent_id}\n")
+            del self.tracked_agents[agent_id]
+
+        return self.tracked_agents
+
+    # ----- Fake Cone Detector 2D (for Testing Purposes) -----
+
+class FakConeDetector(Component):
+    def __init__(self, vehicle_interface: GEMInterface):
+        self.vehicle_interface = vehicle_interface
+        self.times = [(5.0, 20.0), (30.0, 35.0)]
+        self.t_start = None
+
+    def rate(self):
+        return 4.0
+
+    def state_inputs(self):
+        return ['vehicle']
+
+    def state_outputs(self):
+        return ['agents']
+
+    def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
+        if self.t_start is None:
+            self.t_start = self.vehicle_interface.time()
+        t = self.vehicle_interface.time() - self.t_start
+        res = {}
+        for time_range in self.times:
+            if t >= time_range[0] and t <= time_range[1]:
+                res['cone0'] = box_to_fake_agent((0, 0, 0, 0))
+                rospy.loginfo("Detected a Cone (simulated)")
+        return res
+
+def box_to_fake_agent(box):
+    x, y, w, h = box
+    pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT)
+    dims = (w, h, 0)
+    return AgentState(pose=pose, dimensions=dims, outline=None,
+                      type=AgentEnum.CONE, activity=AgentActivityEnum.MOVING,
+                      velocity=(0, 0, 0), yaw_rate=0)
+
+if __name__ == '__main__':
+    pass

From 0b84d288a422dade88ece3cc447a0fcbef059181 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Wed, 23 Apr 2025 10:40:41 -0500
Subject: [PATCH 2/3] Create cone_detection.yaml

---
 launch/cone_detection.yaml | 119 +++++++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)
 create mode 100644 launch/cone_detection.yaml

diff --git a/launch/cone_detection.yaml b/launch/cone_detection.yaml
new file mode 100644
index 000000000..165a5b9b8
--- /dev/null
+++ b/launch/cone_detection.yaml
@@ -0,0 +1,119 @@
+description: "Run the yielding trajectory planner on the real vehicle with real perception"
+mode: hardware
+vehicle_interface: gem_hardware.GEMHardwareInterface
+mission_execution: StandardExecutor
+
+# Recovery behavior after a component failure
+recovery: 
+    planning: 
+        trajectory_tracking : recovery.StopTrajectoryTracker
+
+# Driving behavior for the GEM vehicle.  Runs real pedestrian perception, yield planner, but does not send commands to real vehicle.
+drive: 
+    perception:
+        state_estimation : GNSSStateEstimator
+        agent_detection : cone_detection.ConeDetector3D
+        perception_normalization : StandardPerceptionNormalizer
+    planning:
+        relations_estimation: 
+            type: pedestrian_yield_logic.PedestrianYielder
+            args:
+                mode: 'real'
+                params: {
+                    'yielder':       'expert', # 'expert', 'analytic', or 'simulation'
+                    'planner':       'milestone', # 'milestone', 'dt', or 'dx'
+                    'desired_speed': 1.0,  # m/s, 1.5 m/s seems max speed? Feb24
+                    'acceleration':  0.75  # m/s2, 0.5 is not enough to start moving. Feb24
+                    }
+        route_planning:
+            type: StaticRoutePlanner
+            args: [!relative_path '../GEMstack/knowledge/routes/forward_15m.csv','start']
+        motion_planning: longitudinal_planning.YieldTrajectoryPlanner
+        trajectory_tracking:
+            type: pure_pursuit.PurePursuitTrajectoryTracker
+            print: False
+
+
+log:
+    # Specify the top-level folder to save the log files.  Default is 'logs'
+    #folder : 'logs' 
+    # If prefix is specified, then the log folder will be named with the prefix followed by the date and time. Default no prefix
+    #prefix : 'fixed_route_'  
+    # If suffix is specified, then logs will output to folder/prefix+suffix. Default uses date and time as the suffix
+    #suffix : 'test3'  
+    # Specify which ros topics to record to vehicle.bag. Default records nothing. This records the "standard" ROS topics.
+    ros_topics : 
+    # Specify options to pass to rosbag record. Default is no options.
+    #rosbag_options : '--split --size=1024' 
+    # If True, then record all readings / commands of the vehicle interface. Default False
+    vehicle_interface : True
+    # Specify which components to record to behavior.json. Default records nothing
+    components : ['state_estimation','agent_detection','motion_planning']
+    # Specify which components of state to record to state.json. Default records nothing 
+    #state: ['all']
+    # Specify the rate in Hz at which to record state to state.json. Default records at the pipeline's rate
+    #state_rate: 10
+    # If True, then make plots of the recorded data specifically from PurePursuitTrajectoryTracker_debug.csv and report metrics. Default False
+    #auto_plot : True
+replay:  # Add items here to set certain topics / inputs to be replayed from logs
+    # Specify which log folder to replay from
+    log: 
+    ros_topics : []
+    components : []
+
+#usually can keep this constant
+computation_graph: !include "../GEMstack/knowledge/defaults/computation_graph.yaml"
+
+variants:
+    detector_only:
+        run:
+            description: "Run the pedestrian detection code"
+        drive:
+            planning:
+                trajectory_tracking:
+    real_sim:
+        run:
+            description: "Run the pedestrian detection code with real detection and fake simulation"
+            mode: hardware
+            vehicle_interface:
+                type: gem_mixed.GEMRealSensorsWithSimMotionInterface
+                args:
+                    scene: !relative_path '../scenes/xyhead_demo.yaml'
+            mission_execution: StandardExecutor
+            require_engaged: False
+            visualization: !include "klampt_visualization.yaml"
+            drive: 
+                perception:
+                    state_estimation : OmniscientStateEstimator
+                planning:
+                    relations_estimation: 
+                        type: pedestrian_yield_logic.PedestrianYielder
+                        args:
+                            mode: 'sim'
+
+    fake_real:
+        run:
+            description: "Run the yielding trajectory planner on the real vehicle with faked perception"
+            drive:
+                perception:
+                    agent_detection : cone_detection.FakeConeDetector2D
+
+    fake_sim:
+        run:
+            description: "Run the yielding trajectory planner in simulation with faked perception"
+            mode: simulation
+            vehicle_interface:
+                type: gem_simulator.GEMDoubleIntegratorSimulationInterface
+                args:
+                    scene: !relative_path '../scenes/xyhead_demo.yaml'
+            visualization: !include "klampt_visualization.yaml"
+            drive: 
+                perception:
+                    # agent_detection : pedestrian_detection.FakePedestrianDetector2D
+                    agent_detection : OmniscientAgentDetector  #this option reads agents from the simulator
+                    state_estimation : OmniscientStateEstimator
+                planning:
+                    relations_estimation: 
+                        type: pedestrian_yield_logic.PedestrianYielder
+                        args:
+                            mode: 'sim'

From ec41cf7187d9f57939f5a1fe3ddb7c5049d7da36 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Wed, 23 Apr 2025 10:44:36 -0500
Subject: [PATCH 3/3] Update agent.py

---
 GEMstack/state/agent.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/GEMstack/state/agent.py b/GEMstack/state/agent.py
index 757805837..2ca23ba76 100644
--- a/GEMstack/state/agent.py
+++ b/GEMstack/state/agent.py
@@ -11,6 +11,7 @@ class AgentEnum(Enum):
     LARGE_TRUCK = 2
     PEDESTRIAN = 3
     BICYCLIST = 4
+    CONE = 5
 
 
 class AgentActivityEnum(Enum):
@@ -18,6 +19,9 @@ class AgentActivityEnum(Enum):
     MOVING = 1          # standard motion.  Predictions will be used here
     FAST = 2            # indicates faster than usual motion, e.g., runners.
     UNDETERMINED = 3    # unknown activity
+    STANDING = 4        # standing cone
+    LEFT = 5            # flipped cone facing left
+    RIGHT = 6           # flipped cone facing right
 
 
 @dataclass
@@ -41,4 +45,4 @@ def velocity_parent(self) -> Tuple[float,float,float]:
     def to_frame(self, frame : ObjectFrameEnum, current_pose = None, start_pose_abs = None) -> AgentState:
         newpose = self.pose.to_frame(frame,current_pose,start_pose_abs)
         newvelocity = convert_vector(self.velocity,self.pose.frame,frame,current_pose,start_pose_abs)
-        return replace(self,pose=newpose,velocity=newvelocity)
\ No newline at end of file
+        return replace(self,pose=newpose,velocity=newvelocity)