From faa03966850220fec24d1024a9f7d22bbbea7155 Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Tue, 11 Feb 2025 21:21:16 -0600
Subject: [PATCH 01/46] test fusion

---
 GEMstack/onboard/perception/fusion.py    | 34 ++++++++++++++++++++++++
 GEMstack/onboard/perception/transform.py | 20 ++++++++++++++
 2 files changed, 54 insertions(+)
 create mode 100644 GEMstack/onboard/perception/fusion.py
 create mode 100644 GEMstack/onboard/perception/transform.py

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
new file mode 100644
index 000000000..0b6e4f205
--- /dev/null
+++ b/GEMstack/onboard/perception/fusion.py
@@ -0,0 +1,34 @@
+from cv_bridge import CvBridge
+from sensor_msgs.msg import Image, PointCloud2
+import rospy
+import message_filters
+
+
+class Fusion3D():
+    def __init__(self):
+        self.bridge = CvBridge()
+        self.stereo_rosbag = message_filters.Subscriber('/oak/stereo/image_raw', Image, self.stereo_callback, queue_size=1)
+        self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2, self.top_lidar_callback, queue_size=1)
+
+        ts = message_filters.ApproximateTimeSynchronizer([self.stereo_rosbag, self.top_lidar_rosbag], queue_size=10, slop=0.1)
+        ts.registerCallback(self.callback)
+        rospy.spin()
+
+    def callback(left_img, right_img, lidar_msg, camera_info):
+    # Process synchronized data here
+        pass
+
+    def stereo_callback(self, image: Image):
+        image = self.bridge.imgmsg_to_cv2(image, "16UC1")
+        print(f"stereo: {image.shape}")        
+
+    def top_lidar_callback(self, point_cloud: PointCloud2):
+        # point_cloud = self.bridge.imgmsg_to_cv2(point_cloud, "16UC1")
+        print(f"point_cloud: {point_cloud.fields}")       
+
+if __name__ == '__main__':
+    rospy.init_node('fusion_node', anonymous=True)
+    Fusion3D()
+    while not rospy.core.is_shutdown():
+        rospy.rostime.wallsleep(0.5)
+
diff --git a/GEMstack/onboard/perception/transform.py b/GEMstack/onboard/perception/transform.py
new file mode 100644
index 000000000..8d47598fa
--- /dev/null
+++ b/GEMstack/onboard/perception/transform.py
@@ -0,0 +1,20 @@
+import rospy
+import tf
+
+def publish_tf():
+    rospy.init_node('pointcloud_tf_broadcaster')
+    br = tf.TransformBroadcaster()
+    rate = rospy.Rate(10)  # 10 Hz
+
+    while not rospy.is_shutdown():
+        br.sendTransform(
+            (0, 0, 1),  # (x, y, z) translation
+            tf.transformations.quaternion_from_euler(0, 0, 0),  # (roll, pitch, yaw)
+            rospy.Time.now(),
+            "oak_rgb_camera_optical_frame",  # Child frame (sensor)
+            "map"  # Parent frame (world)
+        )
+        rate.sleep()
+
+if __name__ == "__main__":
+    publish_tf()
\ No newline at end of file

From 97b6dc688caf21150bb1deb43080c58ee972864e Mon Sep 17 00:00:00 2001
From: nvikramraj <nvikramraj1305@gmail.com>
Date: Thu, 13 Feb 2025 16:17:31 -0600
Subject: [PATCH 02/46] Lidar to Base frame transformation

---
 .../perception/pedestrian_detection.py        | 83 +++++++++++++++++--
 requirements.txt                              |  1 +
 2 files changed, 75 insertions(+), 9 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index f91145274..aeda0b15b 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -5,9 +5,13 @@
 import cv2
 from typing import Dict
 from cv_bridge import CvBridge
-from sensor_msgs.msg import Image
+from sensor_msgs.msg import Image, PointCloud2, PointField
+from tf2_sensor_msgs.tf2_sensor_msgs import do_transform_cloud
+from geometry_msgs.msg import TransformStamped
 import rospy
 import os
+import numpy as np
+import tf2_ros
 
 
 def box_to_fake_agent(box):
@@ -56,8 +60,8 @@ def initialize(self):
         """Initializes subscribers and publishers
         
         self.vehicle_interface.subscribe_sensor -> GEM Car camera subscription
-        self.rosbag_test -> ROS Bag topic subscription
-        self.pub_image -> Publishes Image with detection for visualization
+        self.rosbag_cam_sub -> ROS Bag topic subscription
+        self.rosbag_cam_pub -> Publishes Image with detection for visualization
 
         """
         #tell the vehicle to use image_callback whenever 'front_camera' gets a reading, and it expects images of type cv2.Mat
@@ -68,12 +72,74 @@ def initialize(self):
         # Webcam
         # rospy.Subscriber('/webcam', Image, self.image_callback)
 
+        # Conversion tools
+        self.cv_bridge = CvBridge()
+        self.tf_buffer = tf2_ros.Buffer()
+        self.tf_listener = tf2_ros.TransformListener(self.tf_buffer)
+
         # Testing with rosbag
-        self.rosbag_test = rospy.Subscriber('/oak/rgb/image_raw',Image, self.image_callback,queue_size=1)
         if(self.visualization):
-            self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
+            self.rosbag_cam_pub = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
+
+        # Lidar Tranform topic publisher
+        self.rosbag_lidar_livox_pub = rospy.Publisher("/livox/transformed_lidar", PointCloud2, queue_size=1)
+        # self.rosbag_lidar_ouster_pub = rospy.Publisher("/ouster/transformed_lidar", PointCloud2, queue_size=1)
+
+        self.rosbag_cam_sub = rospy.Subscriber('/oak/rgb/image_raw',Image, self.image_callback,queue_size=1)
+        self.rosbag_lidar_livox_sub = rospy.Subscriber('/livox/lidar',PointCloud2, self.lidar_livox_callback,queue_size=1)
+        # self.rosbag_lidar_ouster_sub = rospy.Subscriber('/ouster/points',PointCloud2, self.lidar_ouster_callback,queue_size=1)
+        
         pass
     
+
+    def publish_base_link(self):
+
+        br = tf2_ros.TransformBroadcaster()
+        transform = TransformStamped()
+        
+        transform.header.stamp = rospy.Time.now()
+        transform.header.frame_id = "map"  # Parent frame
+        transform.child_frame_id = "base_link"  # Child frame
+
+        # Set translation (change as per actual vehicle position)
+        transform.transform.translation.x = 0.0
+        transform.transform.translation.y = 0.0
+        transform.transform.translation.z = 0.0
+
+        # Set rotation (quaternion)
+        transform.transform.rotation.x = 0.0
+        transform.transform.rotation.y = 0.0
+        transform.transform.rotation.z = 0.0
+        transform.transform.rotation.w = 1.0  # No rotation
+        br.sendTransform(transform)
+
+    def lidar_livox_callback(self,pointcloud : PointCloud2):
+
+        transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0))
+        
+        # Transform the point cloud
+        transformed_cloud = do_transform_cloud(pointcloud, transform)
+
+        # Publish transformed point cloud
+        self.rosbag_lidar_livox_pub.publish(transformed_cloud)
+
+        if(self.visualization):
+            self.publish_base_link()
+
+    # def lidar_ouster_callback(self,pointcloud : PointCloud2):
+
+    #     transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0))
+        
+    #     # Transform the point cloud
+    #     transformed_cloud = do_transform_cloud(pointcloud, transform)
+
+    #     # Publish transformed point cloud
+    #     self.rosbag_lidar_livox_pub.publish(transformed_cloud)
+
+    #     if(self.visualization):
+    #         self.publish_base_link()
+
+    
     # Use cv2.Mat for GEM Car, Image for RosBag
     def image_callback(self, image : Image): #image : cv2.Mat):
 
@@ -92,8 +158,7 @@ def image_callback(self, image : Image): #image : cv2.Mat):
         # track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
 
         # Convert to CV2 format for RosBag
-        bridge = CvBridge() 
-        image = bridge.imgmsg_to_cv2(image, "bgr8") 
+        image = self.cv_bridge.imgmsg_to_cv2(image, "bgr8") 
         track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
 
         self.last_person_boxes = []
@@ -155,8 +220,8 @@ def image_callback(self, image : Image): #image : cv2.Mat):
         
         # Used for visualization
         if(self.visualization):
-            ros_img = bridge.cv2_to_imgmsg(image, 'bgr8')
-            self.pub_image.publish(ros_img)
+            ros_img = self.cv_bridge.cv2_to_imgmsg(image, 'bgr8')
+            self.rosbag_cam_pub.publish(ros_img)
 
         #uncomment if you want to debug the detector...
         # print(self.last_person_boxes)
diff --git a/requirements.txt b/requirements.txt
index aba104c78..1679dba33 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,3 +10,4 @@ dacite
 
 # Perception
 ultralytics
+lap==0.5.12
\ No newline at end of file

From b087f8d9b403aecd1ea24dc4cd1a501924e13873 Mon Sep 17 00:00:00 2001
From: nvikramraj <nvikramraj1305@gmail.com>
Date: Thu, 13 Feb 2025 16:27:39 -0600
Subject: [PATCH 03/46] added few comments

---
 .../perception/pedestrian_detection.py        | 28 +++++++++++--------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index aeda0b15b..bf3134a68 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -83,16 +83,17 @@ def initialize(self):
 
         # Lidar Tranform topic publisher
         self.rosbag_lidar_livox_pub = rospy.Publisher("/livox/transformed_lidar", PointCloud2, queue_size=1)
-        # self.rosbag_lidar_ouster_pub = rospy.Publisher("/ouster/transformed_lidar", PointCloud2, queue_size=1)
+        self.rosbag_lidar_ouster_pub = rospy.Publisher("/ouster/transformed_lidar", PointCloud2, queue_size=1)
 
         self.rosbag_cam_sub = rospy.Subscriber('/oak/rgb/image_raw',Image, self.image_callback,queue_size=1)
         self.rosbag_lidar_livox_sub = rospy.Subscriber('/livox/lidar',PointCloud2, self.lidar_livox_callback,queue_size=1)
-        # self.rosbag_lidar_ouster_sub = rospy.Subscriber('/ouster/points',PointCloud2, self.lidar_ouster_callback,queue_size=1)
+        self.rosbag_lidar_ouster_sub = rospy.Subscriber('/ouster/points',PointCloud2, self.lidar_ouster_callback,queue_size=1)
         
         pass
     
 
-    def publish_base_link(self):
+    # Publishes baselink to map transformation for visualization
+    def publish_base_link_to_map(self):
 
         br = tf2_ros.TransformBroadcaster()
         transform = TransformStamped()
@@ -113,6 +114,8 @@ def publish_base_link(self):
         transform.transform.rotation.w = 1.0  # No rotation
         br.sendTransform(transform)
 
+
+    # Livox Lidar data
     def lidar_livox_callback(self,pointcloud : PointCloud2):
 
         transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0))
@@ -124,20 +127,21 @@ def lidar_livox_callback(self,pointcloud : PointCloud2):
         self.rosbag_lidar_livox_pub.publish(transformed_cloud)
 
         if(self.visualization):
-            self.publish_base_link()
+            self.publish_base_link_to_map()
 
-    # def lidar_ouster_callback(self,pointcloud : PointCloud2):
+    # Ouster Lidar data
+    def lidar_ouster_callback(self,pointcloud : PointCloud2):
 
-    #     transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0))
+        transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0))
         
-    #     # Transform the point cloud
-    #     transformed_cloud = do_transform_cloud(pointcloud, transform)
+        # Transform the point cloud
+        transformed_cloud = do_transform_cloud(pointcloud, transform)
 
-    #     # Publish transformed point cloud
-    #     self.rosbag_lidar_livox_pub.publish(transformed_cloud)
+        # Publish transformed point cloud
+        self.rosbag_lidar_livox_pub.publish(transformed_cloud)
 
-    #     if(self.visualization):
-    #         self.publish_base_link()
+        if(self.visualization):
+            self.publish_base_link_to_map()
 
     
     # Use cv2.Mat for GEM Car, Image for RosBag

From 475b8c91ae9bd40b93c7aaded7d0d7a623464b95 Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Fri, 14 Feb 2025 20:02:30 -0600
Subject: [PATCH 04/46] align lidar pc2 to camera frame

---
 .../calibration/camera_intrinsics.json        |   7 +
 .../perception/calibration/extrinsics/R.npy   | Bin 0 -> 200 bytes
 .../calibration/extrinsics/rvec.npy           | Bin 0 -> 152 bytes
 .../perception/calibration/extrinsics/t.npy   | Bin 0 -> 152 bytes
 GEMstack/onboard/perception/fusion.py         |  80 +++++++--
 GEMstack/onboard/perception/fusion_utils.py   | 159 ++++++++++++++++++
 GEMstack/onboard/perception/transform.py      |   2 +-
 7 files changed, 232 insertions(+), 16 deletions(-)
 create mode 100644 GEMstack/onboard/perception/calibration/camera_intrinsics.json
 create mode 100644 GEMstack/onboard/perception/calibration/extrinsics/R.npy
 create mode 100644 GEMstack/onboard/perception/calibration/extrinsics/rvec.npy
 create mode 100644 GEMstack/onboard/perception/calibration/extrinsics/t.npy
 create mode 100644 GEMstack/onboard/perception/fusion_utils.py

diff --git a/GEMstack/onboard/perception/calibration/camera_intrinsics.json b/GEMstack/onboard/perception/calibration/camera_intrinsics.json
new file mode 100644
index 000000000..74318680c
--- /dev/null
+++ b/GEMstack/onboard/perception/calibration/camera_intrinsics.json
@@ -0,0 +1,7 @@
+{
+    "fx": 684.8333129882812,
+    "fy": 684.6096801757812,
+    "cx": 573.37109375,
+    "cy": 363.700927734375
+  }
+  
\ No newline at end of file
diff --git a/GEMstack/onboard/perception/calibration/extrinsics/R.npy b/GEMstack/onboard/perception/calibration/extrinsics/R.npy
new file mode 100644
index 0000000000000000000000000000000000000000..7d44aa76dc77d0199fe86055c0f3d0e2e068d1e1
GIT binary patch
literal 200
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
zXCxM+0{I%oIts>`ItsN4WCJb%fwxMDr{~)LE9si%@#p>ihKYjy;`i(KAE>>{UAU@z
vzr*Jf@d8Hm_6&!%YcKf!et)igplIr!_x1(a2fkgnHrJlvT6b{B{<8f5RmVXB

literal 0
HcmV?d00001

diff --git a/GEMstack/onboard/perception/calibration/extrinsics/rvec.npy b/GEMstack/onboard/perception/calibration/extrinsics/rvec.npy
new file mode 100644
index 0000000000000000000000000000000000000000..62d1809f1f7ea2665a24df654d48ec92a21310e7
GIT binary patch
literal 152
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
xXCxM+0{I%oItqrGItsN4WCN}*xxZd?bbYozBkxmtO5yW<;qS-&F5LTM4*;iRDntMP

literal 0
HcmV?d00001

diff --git a/GEMstack/onboard/perception/calibration/extrinsics/t.npy b/GEMstack/onboard/perception/calibration/extrinsics/t.npy
new file mode 100644
index 0000000000000000000000000000000000000000..b133fa55fd0c5362ac4d93bac53efeb471dfee3f
GIT binary patch
literal 152
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
xXCxM+0{I%oItqrGItsN4WCJeCQ?j?%^Ox^`s-3-k&cPe|OJ3%@a}Rp99{`L8DPRBq

literal 0
HcmV?d00001

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index 0b6e4f205..d8cd1a41a 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -1,30 +1,80 @@
 from cv_bridge import CvBridge
 from sensor_msgs.msg import Image, PointCloud2
+from ultralytics import YOLO
+from fusion_utils import *
 import rospy
 import message_filters
-
+import os
+import tf
 
 class Fusion3D():
     def __init__(self):
+        # Setup variables
         self.bridge = CvBridge()
-        self.stereo_rosbag = message_filters.Subscriber('/oak/stereo/image_raw', Image, self.stereo_callback, queue_size=1)
-        self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2, self.top_lidar_callback, queue_size=1)
+        self.detector = YOLO(os.getcwd()+'/GEMstack/knowledge/detection/yolov8n.pt')
+        self.last_person_boxes = [] 
+        self.pedestrians = {}
+        self.visualization = True
+        self.confidence = 0.7
+        self.classes_to_detect = 0
+
+        # Load calibration data
+        self.R = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/R.npy')
+        self.t = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/t.npy')
+        self.K = load_intrinsics(os.getcwd()+ '/GEMstack/onboard/perception/calibration/camera_intrinsics.json')
+
+        # Subscribers and sychronizers
+        self.rgb_rosbag = message_filters.Subscriber('/oak/rgb/image_raw', Image)
+        self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2)
+        self.sync = message_filters.ApproximateTimeSynchronizer([self.rgb_rosbag, self.top_lidar_rosbag], queue_size=10, slop=0.1)
+        self.sync.registerCallback(self.fusion_callback)
+
+        # TF listener to get transformation from LiDAR to Camera
+        self.tf_listener = tf.TransformListener()
+
+        # Publishers
+        if(self.visualization):
+            self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
+
+
+
+    def fusion_callback(self, image: Image, lidar_pc2_msg: PointCloud2):
+        image = self.bridge.imgmsg_to_cv2(image, "bgr8") 
+        track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
+
+        self.last_person_boxes = []
+        boxes = track_result[0].boxes
+
+        # Unpacking box dimentions detected into x,y,w,h
+        for box in boxes:
+            xywh = box.xywh[0].tolist()
+            self.last_person_boxes.append(xywh)
+
+            # Used for visualization
+            if(self.visualization):
+                image = vis_2d_bbox(image, xywh, box)
 
-        ts = message_filters.ApproximateTimeSynchronizer([self.stereo_rosbag, self.top_lidar_rosbag], queue_size=10, slop=0.1)
-        ts.registerCallback(self.callback)
-        rospy.spin()
+        # Convert 1D PointCloud2 data to x, y, z coords
+        lidar_points = convert_pointcloud2_to_xyz(lidar_pc2_msg)
+    
+        # Transform LiDAR points into the camera coordinate frame.
+        lidar_in_camera = transform_lidar_points(lidar_points, self.R, self.t)
+    
+        # Project the transformed points into the image plane.
+        projected_pts = project_points(lidar_in_camera, self.K)
+        
+        # Draw projected LiDAR points on the image.
+        for pt in projected_pts:
+            cv2.circle(image, pt, 2, (0, 0, 255), -1)
 
-    def callback(left_img, right_img, lidar_msg, camera_info):
-    # Process synchronized data here
-        pass
+        # visualize_point_cloud(p_img_cloud)
+        
+        # Used for visualization
+        if(self.visualization):
+            ros_img = self.bridge.cv2_to_imgmsg(image, 'bgr8')
+            self.pub_image.publish(ros_img)  
 
-    def stereo_callback(self, image: Image):
-        image = self.bridge.imgmsg_to_cv2(image, "16UC1")
-        print(f"stereo: {image.shape}")        
 
-    def top_lidar_callback(self, point_cloud: PointCloud2):
-        # point_cloud = self.bridge.imgmsg_to_cv2(point_cloud, "16UC1")
-        print(f"point_cloud: {point_cloud.fields}")       
 
 if __name__ == '__main__':
     rospy.init_node('fusion_node', anonymous=True)
diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/fusion_utils.py
new file mode 100644
index 000000000..ad2c13c19
--- /dev/null
+++ b/GEMstack/onboard/perception/fusion_utils.py
@@ -0,0 +1,159 @@
+from sensor_msgs.msg import PointCloud2
+import numpy as np
+import sensor_msgs.point_cloud2 as pc2
+import open3d as o3d
+import cv2
+import json
+
+
+def convert_pointcloud2_to_xyz(lidar_pc2_msg: PointCloud2):
+    """ Convert 1D PointCloud2 data to x, y, z coords """
+    lidar_points = []
+    for point in pc2.read_points(lidar_pc2_msg, field_names=("x", "y", "z", "intensity"), skip_nans=True):
+        lidar_points.append([point[0], point[1], point[2]])
+    return np.array(lidar_points)
+
+
+# Credits: The following lines of codes (17 to 159 excluding lines 80 to 115) are adapted from the Calibration Team B
+def load_extrinsics(extrinsics_file):
+    """
+    Load calibrated extrinsics from a .npz file.
+    Assumes the file contains keys 'R' and 't'.
+    """
+    data = np.load(extrinsics_file)
+    return data
+
+
+def load_intrinsics(intrinsics_file):
+    """
+    Load camera intrinsics from a JSON file.
+    Expects keys: 'fx', 'fy', 'cx', and 'cy'.
+    """
+    with open(intrinsics_file, 'r') as f:
+        intrinsics = json.load(f)
+    fx = intrinsics["fx"]
+    fy = intrinsics["fy"]
+    cx = intrinsics["cx"]
+    cy = intrinsics["cy"]
+    K = np.array([[fx, 0, cx],
+                  [0, fy, cy],
+                  [0,  0,  1]], dtype=np.float32)
+    return K
+
+
+def load_lidar_scan(lidar_file):
+    """
+    Load a LiDAR scan from a file.
+    This function handles both npy (returns a NumPy array) and npz files.
+    """
+    data = np.load(lidar_file, allow_pickle=True)
+    if isinstance(data, np.ndarray):
+        return data.astype(np.float32)
+    else:
+        key = list(data.keys())[0]
+        return data[key].astype(np.float32)
+
+
+def transform_lidar_points(lidar_points, R, t):
+    """
+    Transform LiDAR points from the LiDAR frame into the camera frame.
+    p_cam = R * p_lidar + t.
+    """
+    P_cam = (R @ lidar_points.T + t.reshape(3,1)).T
+    return P_cam
+
+
+def project_points(points_3d, K):
+    """
+    Project 3D points (in the camera frame) into 2D image coordinates using the camera matrix K.
+    Only projects points with z > 0.
+    """
+    proj_points = []
+    for pt in points_3d:
+        if pt[2] > 0:  # only project points in front of the camera
+            u = K[0, 0] * (pt[0] / pt[2]) + K[0, 2]
+            v = K[1, 1] * (pt[1] / pt[2]) + K[1, 2]
+            proj_points.append((int(u), int(v)))
+    return proj_points
+
+
+def vis_2d_bbox(image, xywh, box):
+    # Setup
+    label_text = "Pedestrian "
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    font_scale = 0.5
+    font_color = (255, 255, 255)
+    line_type = 1
+    text_thickness = 2
+
+    x, y, w, h = xywh
+
+    if box.id is not None:
+        id = box.id.item()
+    else:
+        id = 0
+
+    # Draw bounding box
+    cv2.rectangle(image, (int(x - w / 2), int(y - h / 2)), (int(x + w / 2), int(y + h / 2)), (255, 0, 255), 3)
+
+    # Define text label
+    x = int(x - w / 2)
+    y = int(y - h / 2)
+    label = label_text + str(id) + " : " + str(round(box.conf.item(), 2))
+
+    # Get text size
+    text_size, baseline = cv2.getTextSize(label, font, font_scale, line_type)
+    text_w, text_h = text_size
+
+    # Position text above the bounding box
+    text_x = x
+    text_y = y - 10 if y - 10 > 10 else y + h + text_h
+
+    # Draw main text on top of the outline
+    cv2.putText(image, label, (text_x, text_y - baseline), font, font_scale, font_color, text_thickness)
+
+    return image
+
+
+def visualize_point_cloud(points):
+    """
+    Visualizes the given point cloud using Open3D.
+
+    Args:
+        points (np.ndarray): Nx3 array of point cloud coordinates.
+    """
+    pc = o3d.geometry.PointCloud()
+    pc.points = o3d.utility.Vector3dVector(points)
+    pc.paint_uniform_color([0.1, 0.7, 0.9])  # Light blue color
+    o3d.visualization.draw_geometries([pc])
+
+
+def visualize_plane(inlier_cloud, outlier_cloud, bounding_box_2d_points):
+    """
+    Visualizes the detected plane with its 2D bounding box.
+
+    :param inlier_cloud: Open3D point cloud containing plane points.
+    :param outlier_cloud: Open3D point cloud containing non-plane points.
+    :param bounding_box_2d_points: 4 corner points of the 2D bounding box on the plane.
+    """
+    inlier_cloud.paint_uniform_color([1, 0, 0])  # Red for the plane
+    outlier_cloud.paint_uniform_color([0.5, 0.5, 0.5])  # Gray for other points
+
+    # Create bounding box visualization
+    bounding_box_pcd = o3d.geometry.PointCloud()
+    bounding_box_pcd.points = o3d.utility.Vector3dVector(bounding_box_2d_points)
+    bounding_box_pcd.paint_uniform_color([0, 1, 0])  # Green for bounding box corners
+    
+    # Create a bounding box line set (connect corners)
+    lines = [
+        [0, 1], [1, 2], [2, 3], [3, 0]   # Edges of the rectangle
+    ]
+    
+    bounding_box_lines = o3d.geometry.LineSet()
+    bounding_box_lines.points = o3d.utility.Vector3dVector(bounding_box_2d_points)
+    bounding_box_lines.lines = o3d.utility.Vector2iVector(lines)
+    
+    bounding_box_lines.paint_uniform_color([0, 1, 0])  # Green for bounding box edges
+    
+    # Visualize
+    o3d.visualization.draw_geometries([inlier_cloud, outlier_cloud, bounding_box_pcd, bounding_box_lines])
\ No newline at end of file
diff --git a/GEMstack/onboard/perception/transform.py b/GEMstack/onboard/perception/transform.py
index 8d47598fa..4487e051c 100644
--- a/GEMstack/onboard/perception/transform.py
+++ b/GEMstack/onboard/perception/transform.py
@@ -11,7 +11,7 @@ def publish_tf():
             (0, 0, 1),  # (x, y, z) translation
             tf.transformations.quaternion_from_euler(0, 0, 0),  # (roll, pitch, yaw)
             rospy.Time.now(),
-            "oak_rgb_camera_optical_frame",  # Child frame (sensor)
+            "os_sensor",  # Child frame (sensor)
             "map"  # Parent frame (world)
         )
         rate.sleep()

From 1355d5faa0002cbe44164bc12c62ed5701409808 Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Fri, 14 Feb 2025 21:06:27 -0600
Subject: [PATCH 05/46] extract clouds in 2d

---
 GEMstack/onboard/perception/fusion.py | 41 +++++++++++++++++++--------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index d8cd1a41a..d6bca214a 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -42,32 +42,49 @@ def fusion_callback(self, image: Image, lidar_pc2_msg: PointCloud2):
         image = self.bridge.imgmsg_to_cv2(image, "bgr8") 
         track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
 
+        # Convert 1D PointCloud2 data to x, y, z coords
+        lidar_points = convert_pointcloud2_to_xyz(lidar_pc2_msg)
+    
+        # Transform LiDAR points into the camera coordinate frame.
+        lidar_in_camera = transform_lidar_points(lidar_points, self.R, self.t)
+    
+        # Project the transformed points into the image plane.
+        projected_pts = project_points(lidar_in_camera, self.K)
+        
+        # Process bboxes
         self.last_person_boxes = []
         boxes = track_result[0].boxes
 
         # Unpacking box dimentions detected into x,y,w,h
+        all_extracted_pts = []
         for box in boxes:
             xywh = box.xywh[0].tolist()
             self.last_person_boxes.append(xywh)
 
+            # Extracting projected pts
+            x, y, w, h = xywh
+            left_bound = int(x - w / 2)
+            right_bound = int(x + w / 2)
+            top_bound = int(y - h / 2)
+            bottom_bound = int(y + h / 2)
+
+            if len(projected_pts) > 0:
+                pts = np.array(projected_pts)
+                extracted_pts = pts[(pts[:, 0] > left_bound) &
+                                    (pts[:, 0] < right_bound) &
+                                    (pts[:, 1] > top_bound) &
+                                    (pts[:, 1] < bottom_bound)
+                                    ]
+                
+                all_extracted_pts = all_extracted_pts + list(extracted_pts)
+            
             # Used for visualization
             if(self.visualization):
                 image = vis_2d_bbox(image, xywh, box)
-
-        # Convert 1D PointCloud2 data to x, y, z coords
-        lidar_points = convert_pointcloud2_to_xyz(lidar_pc2_msg)
-    
-        # Transform LiDAR points into the camera coordinate frame.
-        lidar_in_camera = transform_lidar_points(lidar_points, self.R, self.t)
-    
-        # Project the transformed points into the image plane.
-        projected_pts = project_points(lidar_in_camera, self.K)
         
         # Draw projected LiDAR points on the image.
-        for pt in projected_pts:
+        for pt in all_extracted_pts:
             cv2.circle(image, pt, 2, (0, 0, 255), -1)
-
-        # visualize_point_cloud(p_img_cloud)
         
         # Used for visualization
         if(self.visualization):

From 8316a20ba289a5a349ac35558da5f10bf4addde6 Mon Sep 17 00:00:00 2001
From: nvikramraj <nvikramraj1305@gmail.com>
Date: Fri, 14 Feb 2025 21:48:36 -0600
Subject: [PATCH 06/46] converted lidar to cam frame

---
 GEMstack/knowledge/calibration/extrinsics.npz | Bin 0 -> 850 bytes
 GEMstack/knowledge/calibration/intrinsic.json |   7 ++
 .../perception/pedestrian_detection.py        | 101 ++++++++++++++----
 3 files changed, 90 insertions(+), 18 deletions(-)
 create mode 100644 GEMstack/knowledge/calibration/extrinsics.npz
 create mode 100644 GEMstack/knowledge/calibration/intrinsic.json

diff --git a/GEMstack/knowledge/calibration/extrinsics.npz b/GEMstack/knowledge/calibration/extrinsics.npz
new file mode 100644
index 0000000000000000000000000000000000000000..663fe14bd0da3d734e4dbacac1d377c6c845ca42
GIT binary patch
literal 850
zcmWIWW@Zs#fB;2?2#tS}PXIX}%*r6b5Tut^P|3(302Tl#0!e_tWWP|~fJjD$GKOmP
zl+@znB6TYTb(=H`bsYuuwEUuyqQt!T{Gyapkhoi7PH`$wyf`DVAQi~hFxF8p*3?m`
zRUjL12?)GZN<2N+{$EMgG><>;_cu%w^cTNhzyCn(UGBnF<@+5zpNJPQs<&r2v|W3_
z|M&ZI?E^(q|Gc*^&_3|(!nL{f4A;7YL-v>L4?yvLl()3c4505o807sDnD;>fAONE&
z^}Zp6-nTp@dy74P`TnQc+1uwFys^LJWzIYIpl4{_S=_#LHOxC4K<^Zlr6vznAAQOF
z^`fKev;7%)pW0IjpZ5!YKkj$o-Y5G2Z$>5&W?U&05-T9Gfe}Q4(<{0b)K~>6gMbFc
n|3C&fj?uNCh8#=-$d+?Jdtl)h;LXYgl4AzKIY4>=6Nm=@B*(<v

literal 0
HcmV?d00001

diff --git a/GEMstack/knowledge/calibration/intrinsic.json b/GEMstack/knowledge/calibration/intrinsic.json
new file mode 100644
index 000000000..37e870fd6
--- /dev/null
+++ b/GEMstack/knowledge/calibration/intrinsic.json
@@ -0,0 +1,7 @@
+{
+    "fx": 684.8333129882812,
+    "fy": 684.6096801757812,
+    "cx": 573.37109375,
+    "cy": 363.700927734375
+  }
+  
diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index bf3134a68..dce59f38f 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -6,13 +6,15 @@
 from typing import Dict
 from cv_bridge import CvBridge
 from sensor_msgs.msg import Image, PointCloud2, PointField
+import sensor_msgs.point_cloud2 as pc2
 from tf2_sensor_msgs.tf2_sensor_msgs import do_transform_cloud
 from geometry_msgs.msg import TransformStamped
 import rospy
 import os
 import numpy as np
 import tf2_ros
-
+import json
+import open3d as o3d
 
 def box_to_fake_agent(box):
     """Creates a fake agent state from an (x,y,w,h) bounding box.
@@ -45,6 +47,7 @@ def __init__(self,vehicle_interface : GEMInterface):
         self.visualization = True # Set this to true for visualization, later change to get value from sys arg
         self.confidence = 0.7
         self.classes_to_detect = 0
+        self.rosbag_image = None
 
     def rate(self):
         return 4.0
@@ -67,7 +70,7 @@ def initialize(self):
         #tell the vehicle to use image_callback whenever 'front_camera' gets a reading, and it expects images of type cv2.Mat
 
         # GEM Car subacriber
-        # self.vehicle_interface.subscribe_sensor('front_camera',self.image_callback,cv2.Mat)
+        self.vehicle_interface.subscribe_sensor('front_camera',self.image_callback,cv2.Mat)
 
         # Webcam
         # rospy.Subscriber('/webcam', Image, self.image_callback)
@@ -80,13 +83,14 @@ def initialize(self):
         # Testing with rosbag
         if(self.visualization):
             self.rosbag_cam_pub = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
+            self.rosbag_cam_lidar_pub = rospy.Publisher("/camera/sensor_fusion", Image, queue_size=1)
 
         # Lidar Tranform topic publisher
         self.rosbag_lidar_livox_pub = rospy.Publisher("/livox/transformed_lidar", PointCloud2, queue_size=1)
         self.rosbag_lidar_ouster_pub = rospy.Publisher("/ouster/transformed_lidar", PointCloud2, queue_size=1)
 
         self.rosbag_cam_sub = rospy.Subscriber('/oak/rgb/image_raw',Image, self.image_callback,queue_size=1)
-        self.rosbag_lidar_livox_sub = rospy.Subscriber('/livox/lidar',PointCloud2, self.lidar_livox_callback,queue_size=1)
+        # self.rosbag_lidar_livox_sub = rospy.Subscriber('/livox/lidar',PointCloud2, self.lidar_livox_callback,queue_size=1)
         self.rosbag_lidar_ouster_sub = rospy.Subscriber('/ouster/points',PointCloud2, self.lidar_ouster_callback,queue_size=1)
         
         pass
@@ -118,30 +122,90 @@ def publish_base_link_to_map(self):
     # Livox Lidar data
     def lidar_livox_callback(self,pointcloud : PointCloud2):
 
-        transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0))
-        
-        # Transform the point cloud
-        transformed_cloud = do_transform_cloud(pointcloud, transform)
+        points = np.array(list(pc2.read_points(pointcloud, skip_nans=True)), dtype=np.float32)[:, :3]
+        intrinsic_file_path = os.getcwd()+'/GEMstack/knowledge/calibration/intrinsic.json'
+        extrinsics_file_path = os.getcwd()+'/GEMstack/knowledge/calibration/extrinsics.npz'
+        R, t = self.load_extrinsics(extrinsics_file_path)
+        K = self.load_intrinsics(intrinsic_file_path)
 
-        # Publish transformed point cloud
-        self.rosbag_lidar_livox_pub.publish(transformed_cloud)
 
-        if(self.visualization):
-            self.publish_base_link_to_map()
+        # if(self.visualization):
+        #     transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0))
+        #     # Transform the point cloud
+        #     transformed_cloud = do_transform_cloud(pointcloud, transform)
+        #     # Publish transformed point cloud
+        #     self.rosbag_lidar_livox_pub.publish(transformed_cloud)
+        #     self.publish_base_link_to_map()
 
     # Ouster Lidar data
     def lidar_ouster_callback(self,pointcloud : PointCloud2):
 
-        transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0))
+        points = np.array(list(pc2.read_points(pointcloud, skip_nans=True)), dtype=np.float32)[:, :3]
+        intrinsic_file_path = os.getcwd()+'/GEMstack/knowledge/calibration/intrinsic.json'
+        extrinsics_file_path = os.getcwd()+'/GEMstack/knowledge/calibration/extrinsics.npz'
+        R, t = self.load_extrinsics(extrinsics_file_path)
+        K = self.load_intrinsics(intrinsic_file_path)
         
-        # Transform the point cloud
-        transformed_cloud = do_transform_cloud(pointcloud, transform)
+        transformed_points = (R @ points.T + t.reshape(3,1)).T
 
-        # Publish transformed point cloud
-        self.rosbag_lidar_livox_pub.publish(transformed_cloud)
+        print(f"before :{transformed_points.shape}")
+        # Convert numpy array to Open3D point cloud
+        pcd = o3d.geometry.PointCloud()
+        pcd.points = o3d.utility.Vector3dVector(transformed_points)
 
-        if(self.visualization):
-            self.publish_base_link_to_map()
+        # Apply voxel grid downsampling
+        voxel_size = 0.1  # Adjust for desired resolution
+        downsampled_pcd = pcd.voxel_down_sample(voxel_size=voxel_size)
+
+        # Convert back to numpy array
+        transformed_points = np.asarray(downsampled_pcd.points)
+        print(f"after :{transformed_points.shape}")
+
+        for p in transformed_points:
+            if p[2] > 0:  # only project points in front of the camera
+                u = K[0, 0] * (p[0] / p[2]) + K[0, 2]
+                v = K[1, 1] * (p[1] / p[2]) + K[1, 2]
+                cv2.circle(self.rosbag_image, (int(u),int(v)), 2, (0, 0, 255), -1)
+
+        ros_img = self.cv_bridge.cv2_to_imgmsg(self.rosbag_image, 'bgr8')
+        self.rosbag_cam_lidar_pub.publish(ros_img)
+        
+        
+
+        # if(self.visualization):
+        #     transform = self.tf_buffer.lookup_transform('base_link', pointcloud.header.frame_id, rospy.Time(0), rospy.Duration(1.0))
+        #     # Transform the point cloud
+        #     transformed_cloud = do_transform_cloud(pointcloud, transform)
+        #     # Publish transformed point cloud
+        #     self.rosbag_lidar_ouster_pub.publish(transformed_cloud)
+        #     self.publish_base_link_to_map()
+
+
+    def load_extrinsics(self,extrinsics_file):
+        """
+        Load calibrated extrinsics from a .npz file.
+        Assumes the file contains keys 'R' and 't'.
+        """
+        data = np.load(extrinsics_file)
+        R = data['R']
+        t = data['t']
+        return R, t
+
+    def load_intrinsics(self,intrinsics_file):
+        """
+        Load camera intrinsics from a JSON file.
+        Expects keys: 'fx', 'fy', 'cx', and 'cy'.
+        """
+        with open(intrinsics_file, 'r') as f:
+            intrinsics = json.load(f)
+        fx = intrinsics["fx"]
+        fy = intrinsics["fy"]
+        cx = intrinsics["cx"]
+        cy = intrinsics["cy"]
+        K = np.array([[fx, 0, cx],
+                    [0, fy, cy],
+                    [0,  0,  1]], dtype=np.float32)
+        return K
 
     
     # Use cv2.Mat for GEM Car, Image for RosBag
@@ -220,6 +284,7 @@ def image_callback(self, image : Image): #image : cv2.Mat):
 
                 # Draw main text on top of the outline
                 cv2.putText(image, label, (text_x, text_y - baseline), font, font_scale, font_color, text_thickness)
+                self.rosbag_image = image
 
         
         # Used for visualization

From dc69933c8197f84056c46611ff34517df03bf92a Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Fri, 14 Feb 2025 22:03:38 -0600
Subject: [PATCH 07/46] add od3 dialect

---
 GEMstack/onboard/perception/fusion.py       | 13 +++++++++++++
 GEMstack/onboard/perception/fusion_utils.py |  5 +----
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index d6bca214a..159fbc683 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -50,6 +50,19 @@ def fusion_callback(self, image: Image, lidar_pc2_msg: PointCloud2):
     
         # Project the transformed points into the image plane.
         projected_pts = project_points(lidar_in_camera, self.K)
+
+        # Convert numpy array to Open3D point cloud
+        transformed_points = projected_pts
+        pcd = o3d.geometry.PointCloud()
+        pcd.points = o3d.utility.Vector3dVector(transformed_points)
+
+        # Apply voxel grid downsampling
+        voxel_size = 0.1  # Adjust for desired resolution
+        downsampled_pcd = pcd.voxel_down_sample(voxel_size=voxel_size)
+
+        # Convert back to numpy array
+        transformed_points = np.asarray(downsampled_pcd.points)
+        print(f"after :{transformed_points.shape}")
         
         # Process bboxes
         self.last_person_boxes = []
diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/fusion_utils.py
index ad2c13c19..e0408bb4c 100644
--- a/GEMstack/onboard/perception/fusion_utils.py
+++ b/GEMstack/onboard/perception/fusion_utils.py
@@ -8,10 +8,7 @@
 
 def convert_pointcloud2_to_xyz(lidar_pc2_msg: PointCloud2):
     """ Convert 1D PointCloud2 data to x, y, z coords """
-    lidar_points = []
-    for point in pc2.read_points(lidar_pc2_msg, field_names=("x", "y", "z", "intensity"), skip_nans=True):
-        lidar_points.append([point[0], point[1], point[2]])
-    return np.array(lidar_points)
+    return np.array(list(pc2.read_points(lidar_pc2_msg, skip_nans=True)), dtype=np.float32)[:, :3]
 
 
 # Credits: The following lines of codes (17 to 159 excluding lines 80 to 115) are adapted from the Calibration Team B

From 28726180e25cf7cfcf271000179619098109d447 Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Fri, 14 Feb 2025 22:07:08 -0600
Subject: [PATCH 08/46] add o3d import

---
 GEMstack/onboard/perception/fusion.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index 159fbc683..ed6cd760f 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -2,6 +2,7 @@
 from sensor_msgs.msg import Image, PointCloud2
 from ultralytics import YOLO
 from fusion_utils import *
+import open3d as o3d
 import rospy
 import message_filters
 import os

From bf699815b7c39c3c86925ba1016b976de4fdae1b Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Fri, 14 Feb 2025 22:17:40 -0600
Subject: [PATCH 09/46] downsample pc2

---
 GEMstack/onboard/perception/fusion.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index ed6cd760f..5299e3457 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -45,17 +45,10 @@ def fusion_callback(self, image: Image, lidar_pc2_msg: PointCloud2):
 
         # Convert 1D PointCloud2 data to x, y, z coords
         lidar_points = convert_pointcloud2_to_xyz(lidar_pc2_msg)
-    
-        # Transform LiDAR points into the camera coordinate frame.
-        lidar_in_camera = transform_lidar_points(lidar_points, self.R, self.t)
-    
-        # Project the transformed points into the image plane.
-        projected_pts = project_points(lidar_in_camera, self.K)
 
         # Convert numpy array to Open3D point cloud
-        transformed_points = projected_pts
         pcd = o3d.geometry.PointCloud()
-        pcd.points = o3d.utility.Vector3dVector(transformed_points)
+        pcd.points = o3d.utility.Vector3dVector(lidar_points)
 
         # Apply voxel grid downsampling
         voxel_size = 0.1  # Adjust for desired resolution
@@ -63,7 +56,13 @@ def fusion_callback(self, image: Image, lidar_pc2_msg: PointCloud2):
 
         # Convert back to numpy array
         transformed_points = np.asarray(downsampled_pcd.points)
-        print(f"after :{transformed_points.shape}")
+        
+        # Transform LiDAR points into the camera coordinate frame.
+        lidar_in_camera = transform_lidar_points(transformed_points, self.R, self.t)
+    
+        # Project the transformed points into the image plane.
+        projected_pts = project_points(lidar_in_camera, self.K)
+
         
         # Process bboxes
         self.last_person_boxes = []

From ced4804737f8c90264c50bb7e5b88e8029b4edc0 Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Fri, 14 Feb 2025 22:33:34 -0600
Subject: [PATCH 10/46] rename variables

---
 GEMstack/onboard/perception/fusion.py       | 32 +++++++--------------
 GEMstack/onboard/perception/fusion_utils.py | 15 ++++++++++
 2 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index 5299e3457..d98cbac6a 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -2,12 +2,12 @@
 from sensor_msgs.msg import Image, PointCloud2
 from ultralytics import YOLO
 from fusion_utils import *
-import open3d as o3d
 import rospy
 import message_filters
 import os
 import tf
 
+
 class Fusion3D():
     def __init__(self):
         # Setup variables
@@ -22,7 +22,7 @@ def __init__(self):
         # Load calibration data
         self.R = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/R.npy')
         self.t = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/t.npy')
-        self.K = load_intrinsics(os.getcwd()+ '/GEMstack/onboard/perception/calibration/camera_intrinsics.json')
+        self.K = load_intrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/camera_intrinsics.json')
 
         # Subscribers and sychronizers
         self.rgb_rosbag = message_filters.Subscriber('/oak/rgb/image_raw', Image)
@@ -38,32 +38,22 @@ def __init__(self):
             self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
 
 
-
-    def fusion_callback(self, image: Image, lidar_pc2_msg: PointCloud2):
-        image = self.bridge.imgmsg_to_cv2(image, "bgr8") 
-        track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
+    def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
+        cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") 
+        track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
 
         # Convert 1D PointCloud2 data to x, y, z coords
         lidar_points = convert_pointcloud2_to_xyz(lidar_pc2_msg)
 
-        # Convert numpy array to Open3D point cloud
-        pcd = o3d.geometry.PointCloud()
-        pcd.points = o3d.utility.Vector3dVector(lidar_points)
-
-        # Apply voxel grid downsampling
-        voxel_size = 0.1  # Adjust for desired resolution
-        downsampled_pcd = pcd.voxel_down_sample(voxel_size=voxel_size)
-
-        # Convert back to numpy array
-        transformed_points = np.asarray(downsampled_pcd.points)
+        # Downsample xyz point clouds
+        downsampled_points = downsample_points(lidar_points)
         
         # Transform LiDAR points into the camera coordinate frame.
-        lidar_in_camera = transform_lidar_points(transformed_points, self.R, self.t)
+        lidar_in_camera = transform_lidar_points(downsampled_points, self.R, self.t)
     
         # Project the transformed points into the image plane.
         projected_pts = project_points(lidar_in_camera, self.K)
 
-        
         # Process bboxes
         self.last_person_boxes = []
         boxes = track_result[0].boxes
@@ -93,15 +83,15 @@ def fusion_callback(self, image: Image, lidar_pc2_msg: PointCloud2):
             
             # Used for visualization
             if(self.visualization):
-                image = vis_2d_bbox(image, xywh, box)
+                cv_image = vis_2d_bbox(cv_image, xywh, box)
         
         # Draw projected LiDAR points on the image.
         for pt in all_extracted_pts:
-            cv2.circle(image, pt, 2, (0, 0, 255), -1)
+            cv2.circle(cv_image, pt, 2, (0, 0, 255), -1)
         
         # Used for visualization
         if(self.visualization):
-            ros_img = self.bridge.cv2_to_imgmsg(image, 'bgr8')
+            ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8')
             self.pub_image.publish(ros_img)  
 
 
diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/fusion_utils.py
index e0408bb4c..24b61366c 100644
--- a/GEMstack/onboard/perception/fusion_utils.py
+++ b/GEMstack/onboard/perception/fusion_utils.py
@@ -11,6 +11,21 @@ def convert_pointcloud2_to_xyz(lidar_pc2_msg: PointCloud2):
     return np.array(list(pc2.read_points(lidar_pc2_msg, skip_nans=True)), dtype=np.float32)[:, :3]
 
 
+def downsample_points(lidar_points):
+    """ Downsample point clouds """
+    # Convert numpy array to Open3D point cloud
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(lidar_points)
+
+    # Apply voxel grid downsampling
+    voxel_size = 0.1  # Adjust for desired resolution
+    downsampled_pcd = pcd.voxel_down_sample(voxel_size=voxel_size)
+
+    # Convert back to numpy array
+    transformed_points = np.asarray(downsampled_pcd.points)
+    return transformed_points
+
+
 # Credits: The following lines of codes (17 to 159 excluding lines 80 to 115) are adapted from the Calibration Team B
 def load_extrinsics(extrinsics_file):
     """

From d319eaa3b09063c977299bc6eb6e3258fc455d83 Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Sat, 15 Feb 2025 00:46:45 -0600
Subject: [PATCH 11/46] extract 3D pc2 and visualize

---
 GEMstack/onboard/perception/fusion.py       | 21 ++++++++--
 GEMstack/onboard/perception/fusion_utils.py | 43 ++++++++++++++++++---
 GEMstack/onboard/perception/transform.py    |  5 ++-
 3 files changed, 58 insertions(+), 11 deletions(-)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index d98cbac6a..f48da3e1b 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -34,6 +34,7 @@ def __init__(self):
         self.tf_listener = tf.TransformListener()
 
         # Publishers
+        self.pub_pedestrians_pc2 = rospy.Publisher("/point_cloud/pedestrians", PointCloud2, queue_size=10)
         if(self.visualization):
             self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
 
@@ -80,14 +81,26 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
                                     ]
                 
                 all_extracted_pts = all_extracted_pts + list(extracted_pts)
-            
+
             # Used for visualization
             if(self.visualization):
                 cv_image = vis_2d_bbox(cv_image, xywh, box)
         
-        # Draw projected LiDAR points on the image.
-        for pt in all_extracted_pts:
-            cv2.circle(cv_image, pt, 2, (0, 0, 255), -1)
+        if len(all_extracted_pts) > 0:
+            # Extract 2D points
+            extracted_2d_pts = list(np.array(all_extracted_pts)[:, :2].astype(int))
+
+            # Draw projected 2D LiDAR points on the image.
+            for pt in extracted_2d_pts:
+                cv2.circle(cv_image, pt, 2, (0, 0, 255), -1)
+
+            # Extract 3D points
+            extracted_3d_pts = list(np.array(all_extracted_pts)[:, -3:])
+
+            # Create point cloud from extracted 3D points
+            ros_extracted_pedestrian_pc2 = create_point_cloud(extracted_3d_pts)
+            self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2)
+
         
         # Used for visualization
         if(self.visualization):
diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/fusion_utils.py
index 24b61366c..69f27d5a6 100644
--- a/GEMstack/onboard/perception/fusion_utils.py
+++ b/GEMstack/onboard/perception/fusion_utils.py
@@ -1,9 +1,12 @@
-from sensor_msgs.msg import PointCloud2
+from sensor_msgs.msg import PointCloud2, PointField
 import numpy as np
 import sensor_msgs.point_cloud2 as pc2
 import open3d as o3d
 import cv2
 import json
+import rospy
+import numpy as np
+import struct
 
 
 def convert_pointcloud2_to_xyz(lidar_pc2_msg: PointCloud2):
@@ -85,7 +88,7 @@ def project_points(points_3d, K):
         if pt[2] > 0:  # only project points in front of the camera
             u = K[0, 0] * (pt[0] / pt[2]) + K[0, 2]
             v = K[1, 1] * (pt[1] / pt[2]) + K[1, 2]
-            proj_points.append((int(u), int(v)))
+            proj_points.append((int(u), int(v), pt[0], pt[1], pt[2]))
     return proj_points
 
 
@@ -134,10 +137,16 @@ def visualize_point_cloud(points):
     Args:
         points (np.ndarray): Nx3 array of point cloud coordinates.
     """
+    # Create a visualization window
+    vis = o3d.visualization.Visualizer()
+    vis.create_window()
+
     pc = o3d.geometry.PointCloud()
     pc.points = o3d.utility.Vector3dVector(points)
-    pc.paint_uniform_color([0.1, 0.7, 0.9])  # Light blue color
-    o3d.visualization.draw_geometries([pc])
+    pc.paint_uniform_color([0.1, 0.7, 0.9])
+
+    vis.add_geometry(pc)
+    vis.run()
 
 
 def visualize_plane(inlier_cloud, outlier_cloud, bounding_box_2d_points):
@@ -168,4 +177,28 @@ def visualize_plane(inlier_cloud, outlier_cloud, bounding_box_2d_points):
     bounding_box_lines.paint_uniform_color([0, 1, 0])  # Green for bounding box edges
     
     # Visualize
-    o3d.visualization.draw_geometries([inlier_cloud, outlier_cloud, bounding_box_pcd, bounding_box_lines])
\ No newline at end of file
+    o3d.visualization.draw_geometries([inlier_cloud, outlier_cloud, bounding_box_pcd, bounding_box_lines])
+
+
+def create_point_cloud(points, color=(255, 0, 0)):
+    """
+    Converts a list of (x, y, z) points into a PointCloud2 message.
+    """
+    header = rospy.Header()
+    header.stamp = rospy.Time.now()
+    header.frame_id = "map"  # Change to your TF frame
+
+    fields = [
+        PointField(name="x", offset=0, datatype=PointField.FLOAT32, count=1),
+        PointField(name="y", offset=4, datatype=PointField.FLOAT32, count=1),
+        PointField(name="z", offset=8, datatype=PointField.FLOAT32, count=1),
+        PointField(name="rgb", offset=12, datatype=PointField.FLOAT32, count=1),
+    ]
+
+    # Convert RGB color to packed float32
+    r, g, b = color
+    packed_color = struct.unpack('f', struct.pack('I', (r << 16) | (g << 8) | b))[0]
+
+    point_cloud_data = [(x, y, z, packed_color) for x, y, z in points]
+
+    return pc2.create_cloud(header, fields, point_cloud_data)
\ No newline at end of file
diff --git a/GEMstack/onboard/perception/transform.py b/GEMstack/onboard/perception/transform.py
index 4487e051c..5e442a093 100644
--- a/GEMstack/onboard/perception/transform.py
+++ b/GEMstack/onboard/perception/transform.py
@@ -1,5 +1,6 @@
 import rospy
 import tf
+import numpy as np
 
 def publish_tf():
     rospy.init_node('pointcloud_tf_broadcaster')
@@ -8,8 +9,8 @@ def publish_tf():
 
     while not rospy.is_shutdown():
         br.sendTransform(
-            (0, 0, 1),  # (x, y, z) translation
-            tf.transformations.quaternion_from_euler(0, 0, 0),  # (roll, pitch, yaw)
+            (0, 1.5, 7),  # (x, y, z) translation
+            tf.transformations.quaternion_from_euler(0.5* np.pi, 0, 0),  # (roll, pitch, yaw)
             rospy.Time.now(),
             "os_sensor",  # Child frame (sensor)
             "map"  # Parent frame (world)

From 49f45eef28f0af872b66973e5ce7d1b7e5806ed9 Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Sat, 15 Feb 2025 02:17:54 -0600
Subject: [PATCH 12/46] segregate points by pedestrains

---
 GEMstack/onboard/perception/fusion.py       | 26 +++++-----
 GEMstack/onboard/perception/fusion_utils.py | 53 +--------------------
 2 files changed, 16 insertions(+), 63 deletions(-)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index f48da3e1b..6bc0ab406 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -60,7 +60,11 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
         boxes = track_result[0].boxes
 
         # Unpacking box dimentions detected into x,y,w,h
-        all_extracted_pts = []
+        extracted_2d_pedestrians_pts = []
+        extracted_3d_pedestrians_pts = []
+        flattened_extracted_2d_pedestrians_pts = []
+        flattened_extracted_3d_pedestrians_pts = []
+
         for box in boxes:
             xywh = box.xywh[0].tolist()
             self.last_person_boxes.append(xywh)
@@ -80,25 +84,25 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
                                     (pts[:, 1] < bottom_bound)
                                     ]
                 
-                all_extracted_pts = all_extracted_pts + list(extracted_pts)
+                extracted_2d_pts = list(np.array(extracted_pts)[:, :2].astype(int))
+                extracted_2d_pedestrians_pts.append(extracted_2d_pts)
+                flattened_extracted_2d_pedestrians_pts = flattened_extracted_2d_pedestrians_pts + extracted_2d_pts
+               
+                extracted_3d_pts = list(np.array(extracted_pts)[:, -3:])
+                extracted_3d_pedestrians_pts.append(extracted_3d_pts)
+                flattened_extracted_3d_pedestrians_pts = flattened_extracted_3d_pedestrians_pts + extracted_3d_pts
 
             # Used for visualization
             if(self.visualization):
                 cv_image = vis_2d_bbox(cv_image, xywh, box)
         
-        if len(all_extracted_pts) > 0:
-            # Extract 2D points
-            extracted_2d_pts = list(np.array(all_extracted_pts)[:, :2].astype(int))
-
+        if len(extracted_2d_pedestrians_pts) > 0:
             # Draw projected 2D LiDAR points on the image.
-            for pt in extracted_2d_pts:
+            for pt in flattened_extracted_2d_pedestrians_pts:
                 cv2.circle(cv_image, pt, 2, (0, 0, 255), -1)
 
-            # Extract 3D points
-            extracted_3d_pts = list(np.array(all_extracted_pts)[:, -3:])
-
             # Create point cloud from extracted 3D points
-            ros_extracted_pedestrian_pc2 = create_point_cloud(extracted_3d_pts)
+            ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_extracted_3d_pedestrians_pts)
             self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2)
 
         
diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/fusion_utils.py
index 69f27d5a6..b8ffd03e1 100644
--- a/GEMstack/onboard/perception/fusion_utils.py
+++ b/GEMstack/onboard/perception/fusion_utils.py
@@ -29,7 +29,7 @@ def downsample_points(lidar_points):
     return transformed_points
 
 
-# Credits: The following lines of codes (17 to 159 excluding lines 80 to 115) are adapted from the Calibration Team B
+# Credits: The following lines of codes (from 33 to 92) are adapted from the Calibration Team B
 def load_extrinsics(extrinsics_file):
     """
     Load calibrated extrinsics from a .npz file.
@@ -126,60 +126,9 @@ def vis_2d_bbox(image, xywh, box):
 
     # Draw main text on top of the outline
     cv2.putText(image, label, (text_x, text_y - baseline), font, font_scale, font_color, text_thickness)
-
     return image
 
 
-def visualize_point_cloud(points):
-    """
-    Visualizes the given point cloud using Open3D.
-
-    Args:
-        points (np.ndarray): Nx3 array of point cloud coordinates.
-    """
-    # Create a visualization window
-    vis = o3d.visualization.Visualizer()
-    vis.create_window()
-
-    pc = o3d.geometry.PointCloud()
-    pc.points = o3d.utility.Vector3dVector(points)
-    pc.paint_uniform_color([0.1, 0.7, 0.9])
-
-    vis.add_geometry(pc)
-    vis.run()
-
-
-def visualize_plane(inlier_cloud, outlier_cloud, bounding_box_2d_points):
-    """
-    Visualizes the detected plane with its 2D bounding box.
-
-    :param inlier_cloud: Open3D point cloud containing plane points.
-    :param outlier_cloud: Open3D point cloud containing non-plane points.
-    :param bounding_box_2d_points: 4 corner points of the 2D bounding box on the plane.
-    """
-    inlier_cloud.paint_uniform_color([1, 0, 0])  # Red for the plane
-    outlier_cloud.paint_uniform_color([0.5, 0.5, 0.5])  # Gray for other points
-
-    # Create bounding box visualization
-    bounding_box_pcd = o3d.geometry.PointCloud()
-    bounding_box_pcd.points = o3d.utility.Vector3dVector(bounding_box_2d_points)
-    bounding_box_pcd.paint_uniform_color([0, 1, 0])  # Green for bounding box corners
-    
-    # Create a bounding box line set (connect corners)
-    lines = [
-        [0, 1], [1, 2], [2, 3], [3, 0]   # Edges of the rectangle
-    ]
-    
-    bounding_box_lines = o3d.geometry.LineSet()
-    bounding_box_lines.points = o3d.utility.Vector3dVector(bounding_box_2d_points)
-    bounding_box_lines.lines = o3d.utility.Vector2iVector(lines)
-    
-    bounding_box_lines.paint_uniform_color([0, 1, 0])  # Green for bounding box edges
-    
-    # Visualize
-    o3d.visualization.draw_geometries([inlier_cloud, outlier_cloud, bounding_box_pcd, bounding_box_lines])
-
-
 def create_point_cloud(points, color=(255, 0, 0)):
     """
     Converts a list of (x, y, z) points into a PointCloud2 message.

From d3c9e4c1a4f76217c3c1971690c6cc9eb7c9907b Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Sat, 15 Feb 2025 02:19:43 -0600
Subject: [PATCH 13/46] update comments

---
 GEMstack/onboard/perception/fusion.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index 6bc0ab406..a7cd21713 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -40,6 +40,7 @@ def __init__(self):
 
 
     def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
+        # Convert to cv2 image and run detector
         cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") 
         track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
 

From c23b36c5704713e0edc8b372c0ec58a9d816080b Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Sat, 15 Feb 2025 02:28:39 -0600
Subject: [PATCH 14/46] rename variables

---
 GEMstack/onboard/perception/fusion.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index a7cd21713..0ac34638d 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -61,10 +61,9 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
         boxes = track_result[0].boxes
 
         # Unpacking box dimentions detected into x,y,w,h
-        extracted_2d_pedestrians_pts = []
-        extracted_3d_pedestrians_pts = []
-        flattened_extracted_2d_pedestrians_pts = []
-        flattened_extracted_3d_pedestrians_pts = []
+        pedestrians_3d_pts = []
+        flattened_pedestrians_2d_pts = []
+        flattened_pedestrians_3d_pts = []
 
         for box in boxes:
             xywh = box.xywh[0].tolist()
@@ -86,24 +85,23 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
                                     ]
                 
                 extracted_2d_pts = list(np.array(extracted_pts)[:, :2].astype(int))
-                extracted_2d_pedestrians_pts.append(extracted_2d_pts)
-                flattened_extracted_2d_pedestrians_pts = flattened_extracted_2d_pedestrians_pts + extracted_2d_pts
+                flattened_pedestrians_2d_pts = flattened_pedestrians_2d_pts + extracted_2d_pts
                
                 extracted_3d_pts = list(np.array(extracted_pts)[:, -3:])
-                extracted_3d_pedestrians_pts.append(extracted_3d_pts)
-                flattened_extracted_3d_pedestrians_pts = flattened_extracted_3d_pedestrians_pts + extracted_3d_pts
+                pedestrians_3d_pts.append(extracted_3d_pts)
+                flattened_pedestrians_3d_pts = flattened_pedestrians_3d_pts + extracted_3d_pts
 
             # Used for visualization
             if(self.visualization):
                 cv_image = vis_2d_bbox(cv_image, xywh, box)
         
-        if len(extracted_2d_pedestrians_pts) > 0:
+        if len(pedestrians_3d_pts) > 0:
             # Draw projected 2D LiDAR points on the image.
-            for pt in flattened_extracted_2d_pedestrians_pts:
+            for pt in flattened_pedestrians_2d_pts:
                 cv2.circle(cv_image, pt, 2, (0, 0, 255), -1)
 
             # Create point cloud from extracted 3D points
-            ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_extracted_3d_pedestrians_pts)
+            ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts)
             self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2)
 
         

From 5aa571b04bb4979a46c5942ea5c648c4f3f83b88 Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Sat, 15 Feb 2025 02:30:54 -0600
Subject: [PATCH 15/46] remove empty line

---
 GEMstack/onboard/perception/fusion.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index 0ac34638d..c9f1101de 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -104,7 +104,6 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
             ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts)
             self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2)
 
-        
         # Used for visualization
         if(self.visualization):
             ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8')

From 9008a9ad563fe6128d99d41d5d094d93a798f1ab Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Sat, 15 Feb 2025 16:16:24 -0600
Subject: [PATCH 16/46] add ground and max dist filter

---
 GEMstack/onboard/perception/fusion.py       | 20 ++++++++++++++++----
 GEMstack/onboard/perception/fusion_utils.py | 14 +++++++++++++-
 GEMstack/onboard/perception/transform.py    |  2 +-
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index c9f1101de..18f390935 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -18,6 +18,8 @@ def __init__(self):
         self.visualization = True
         self.confidence = 0.7
         self.classes_to_detect = 0
+        self.ground_threshold = 1.6
+        self.max_dist_percent = 0.7
 
         # Load calibration data
         self.R = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/R.npy')
@@ -84,10 +86,17 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
                                     (pts[:, 1] < bottom_bound)
                                     ]
                 
-                extracted_2d_pts = list(np.array(extracted_pts)[:, :2].astype(int))
+                #if no points extracted for this bbox, skip
+                if len(extracted_pts) < 1:
+                    continue
+                
+                extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold)
+                extracted_pts = filter_far_points(extracted_pts)
+                
+                extracted_2d_pts = list(extracted_pts[:, :2].astype(int))
                 flattened_pedestrians_2d_pts = flattened_pedestrians_2d_pts + extracted_2d_pts
-               
-                extracted_3d_pts = list(np.array(extracted_pts)[:, -3:])
+
+                extracted_3d_pts = list(extracted_pts[:, -3:])
                 pedestrians_3d_pts.append(extracted_3d_pts)
                 flattened_pedestrians_3d_pts = flattened_pedestrians_3d_pts + extracted_3d_pts
 
@@ -95,7 +104,10 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
             if(self.visualization):
                 cv_image = vis_2d_bbox(cv_image, xywh, box)
         
-        if len(pedestrians_3d_pts) > 0:
+        if len(flattened_pedestrians_3d_pts) > 0:
+            # print(f"x_dim pedestrians_3d_pts: {np.max(np.array(flattened_pedestrians_3d_pts)[:, 0]), np.min(np.array(flattened_pedestrians_3d_pts)[:, 0])}")
+            # print(f"y_dim pedestrians_3d_pts: {np.max(np.array(flattened_pedestrians_3d_pts)[:, 1]), np.min(np.array(flattened_pedestrians_3d_pts)[:, 1])}")
+            # print(f"z_dim pedestrians_3d_pts: {np.max(np.array(flattened_pedestrians_3d_pts)[:, 2]), np.min(np.array(flattened_pedestrians_3d_pts)[:, 2])}")
             # Draw projected 2D LiDAR points on the image.
             for pt in flattened_pedestrians_2d_pts:
                 cv2.circle(cv_image, pt, 2, (0, 0, 255), -1)
diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/fusion_utils.py
index b8ffd03e1..4aacb4810 100644
--- a/GEMstack/onboard/perception/fusion_utils.py
+++ b/GEMstack/onboard/perception/fusion_utils.py
@@ -1,4 +1,5 @@
 from sensor_msgs.msg import PointCloud2, PointField
+from scipy.stats import zscore
 import numpy as np
 import sensor_msgs.point_cloud2 as pc2
 import open3d as o3d
@@ -23,12 +24,23 @@ def downsample_points(lidar_points):
     # Apply voxel grid downsampling
     voxel_size = 0.1  # Adjust for desired resolution
     downsampled_pcd = pcd.voxel_down_sample(voxel_size=voxel_size)
-
+    
     # Convert back to numpy array
     transformed_points = np.asarray(downsampled_pcd.points)
     return transformed_points
 
 
+def filter_ground_points(lidar_points, ground_threshold = 0):
+    filtered_array = lidar_points[lidar_points[:, 3] < ground_threshold]
+    return filtered_array
+
+
+def filter_far_points(lidar_points, max_dist_percent=0.85):
+    max_dist = np.max(lidar_points[:, 4])
+    filtered_array = lidar_points[lidar_points[:, 4] < max_dist_percent * max_dist]
+    return filtered_array
+
+
 # Credits: The following lines of codes (from 33 to 92) are adapted from the Calibration Team B
 def load_extrinsics(extrinsics_file):
     """
diff --git a/GEMstack/onboard/perception/transform.py b/GEMstack/onboard/perception/transform.py
index 5e442a093..6e921f56c 100644
--- a/GEMstack/onboard/perception/transform.py
+++ b/GEMstack/onboard/perception/transform.py
@@ -9,7 +9,7 @@ def publish_tf():
 
     while not rospy.is_shutdown():
         br.sendTransform(
-            (0, 1.5, 7),  # (x, y, z) translation
+            (0, 1.6, 7),  # (x, y, z) translation
             tf.transformations.quaternion_from_euler(0.5* np.pi, 0, 0),  # (roll, pitch, yaw)
             rospy.Time.now(),
             "os_sensor",  # Child frame (sensor)

From c31546dba56640a2794585b7efcc49af9c315e5f Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Sat, 15 Feb 2025 16:35:57 -0600
Subject: [PATCH 17/46] add comments

---
 GEMstack/onboard/perception/fusion.py       | 6 +++---
 GEMstack/onboard/perception/fusion_utils.py | 4 +++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index 18f390935..bfd39f7c1 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -90,12 +90,15 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
                 if len(extracted_pts) < 1:
                     continue
                 
+                # Apply ground and max distance filter to extracted 5D points
                 extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold)
                 extracted_pts = filter_far_points(extracted_pts)
                 
+                # Extract 2D pedestrians points camera frame
                 extracted_2d_pts = list(extracted_pts[:, :2].astype(int))
                 flattened_pedestrians_2d_pts = flattened_pedestrians_2d_pts + extracted_2d_pts
 
+                # Extract 3D pedestrians points in lidar frame
                 extracted_3d_pts = list(extracted_pts[:, -3:])
                 pedestrians_3d_pts.append(extracted_3d_pts)
                 flattened_pedestrians_3d_pts = flattened_pedestrians_3d_pts + extracted_3d_pts
@@ -105,9 +108,6 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
                 cv_image = vis_2d_bbox(cv_image, xywh, box)
         
         if len(flattened_pedestrians_3d_pts) > 0:
-            # print(f"x_dim pedestrians_3d_pts: {np.max(np.array(flattened_pedestrians_3d_pts)[:, 0]), np.min(np.array(flattened_pedestrians_3d_pts)[:, 0])}")
-            # print(f"y_dim pedestrians_3d_pts: {np.max(np.array(flattened_pedestrians_3d_pts)[:, 1]), np.min(np.array(flattened_pedestrians_3d_pts)[:, 1])}")
-            # print(f"z_dim pedestrians_3d_pts: {np.max(np.array(flattened_pedestrians_3d_pts)[:, 2]), np.min(np.array(flattened_pedestrians_3d_pts)[:, 2])}")
             # Draw projected 2D LiDAR points on the image.
             for pt in flattened_pedestrians_2d_pts:
                 cv2.circle(cv_image, pt, 2, (0, 0, 255), -1)
diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/fusion_utils.py
index 4aacb4810..cd6fd1ce7 100644
--- a/GEMstack/onboard/perception/fusion_utils.py
+++ b/GEMstack/onboard/perception/fusion_utils.py
@@ -1,5 +1,4 @@
 from sensor_msgs.msg import PointCloud2, PointField
-from scipy.stats import zscore
 import numpy as np
 import sensor_msgs.point_cloud2 as pc2
 import open3d as o3d
@@ -31,11 +30,13 @@ def downsample_points(lidar_points):
 
 
 def filter_ground_points(lidar_points, ground_threshold = 0):
+    """ Filter points given an elevation of ground threshold """
     filtered_array = lidar_points[lidar_points[:, 3] < ground_threshold]
     return filtered_array
 
 
 def filter_far_points(lidar_points, max_dist_percent=0.85):
+    """ Filter points beyond a percentage threshold of max distance in a point cluster """
     max_dist = np.max(lidar_points[:, 4])
     filtered_array = lidar_points[lidar_points[:, 4] < max_dist_percent * max_dist]
     return filtered_array
@@ -100,6 +101,7 @@ def project_points(points_3d, K):
         if pt[2] > 0:  # only project points in front of the camera
             u = K[0, 0] * (pt[0] / pt[2]) + K[0, 2]
             v = K[1, 1] * (pt[1] / pt[2]) + K[1, 2]
+            # 5D points that stores original lidar points
             proj_points.append((int(u), int(v), pt[0], pt[1], pt[2]))
     return proj_points
 

From 006b5550658e6f0028f65ef59d3b61c1c6f3505b Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Sat, 15 Feb 2025 16:42:45 -0600
Subject: [PATCH 18/46] update comments

---
 GEMstack/onboard/perception/fusion.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index bfd39f7c1..6208d5e0e 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -90,11 +90,11 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
                 if len(extracted_pts) < 1:
                     continue
                 
-                # Apply ground and max distance filter to extracted 5D points
+                # Apply ground and max distance filter to the extracted 5D points
                 extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold)
                 extracted_pts = filter_far_points(extracted_pts)
                 
-                # Extract 2D pedestrians points camera frame
+                # Extract 2D pedestrians points in camera frame
                 extracted_2d_pts = list(extracted_pts[:, :2].astype(int))
                 flattened_pedestrians_2d_pts = flattened_pedestrians_2d_pts + extracted_2d_pts
 

From b35252bed6e3ca6c81ea5415a030282472af065b Mon Sep 17 00:00:00 2001
From: lukasdumasius <lukasdumasius@gmail.com>
Date: Mon, 17 Feb 2025 01:40:24 -0600
Subject: [PATCH 19/46] Finalize fusion.py with pedestrian centers, vel, dims,
 AgentStates. Merging into pedestrian_detection next.

---
 GEMstack/onboard/perception/fusion.py       | 270 +++++++++++++++-----
 GEMstack/onboard/perception/fusion_utils.py |   5 +-
 GEMstack/state/agent.py                     |   4 +
 3 files changed, 212 insertions(+), 67 deletions(-)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index 6208d5e0e..6255f47f0 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -1,21 +1,66 @@
-from cv_bridge import CvBridge
-from sensor_msgs.msg import Image, PointCloud2
-from ultralytics import YOLO
-from fusion_utils import *
+"""
+Lidar + Camera fusion and object detection
+"""
+"""
+Terminal 1:
+---------------
+source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
+roscore
+
+Terminal 2:
+---------------
+source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
+python3 /home/Admin/transform.py
+
+Terminal 3:
+---------------
+source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
+rosbag play ~/vehicle.bag
+
+Terminal 4:
+---------------
+source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
+cd GEMStack
+python3 -m GEMstack.onboard.perception.fusion
+
+Terminal 5:
+---------------
+source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
+rviz
+"""
+
+# Python 
+import os
+from typing import List, Dict
+from collections import defaultdict
+# ROS, CV
 import rospy
 import message_filters
-import os
 import tf
-
+from cv_bridge import CvBridge
+from sensor_msgs.msg import Image, PointCloud2
+# GEMStack
+from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum
+from ultralytics import YOLO
+from .fusion_utils import *
 
 class Fusion3D():
+    # TODO: Pull params into a JSON/yaml
+    # TODO: Convert lists into np.arrays where possible, vectorize calculations
+    # TODO: Implement logging instead of print
+    # TODO: Finish splitting this class + utils into separate classes
+    # TODO: Decide if we want to name dets "peds" or "objs"/"agents"
+    #       Maybe peds for now and Agents in agent_detection.py?
     def __init__(self):
+        # Publish debug/viz rostopics if true
+        self.debug = True
         # Setup variables
         self.bridge = CvBridge()
+        # TODO: Wrap detector into GEMDetector?
         self.detector = YOLO(os.getcwd()+'/GEMstack/knowledge/detection/yolov8n.pt')
-        self.last_person_boxes = [] 
-        self.pedestrians = {}
-        self.visualization = True
+        # track_id: AgentState
+        self.prev_agents = dict()         
+        self.current_agents = dict()
         self.confidence = 0.7
         self.classes_to_detect = 0
         self.ground_threshold = 1.6
@@ -30,46 +75,169 @@ def __init__(self):
         self.rgb_rosbag = message_filters.Subscriber('/oak/rgb/image_raw', Image)
         self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2)
         self.sync = message_filters.ApproximateTimeSynchronizer([self.rgb_rosbag, self.top_lidar_rosbag], queue_size=10, slop=0.1)
-        self.sync.registerCallback(self.fusion_callback)
+        self.sync.registerCallback(self.ouster_oak_callback)
 
         # TF listener to get transformation from LiDAR to Camera
         self.tf_listener = tf.TransformListener()
 
-        # Publishers
+        if self.debug: self.init_debug()
+    
+    def init_debug(self,):
+         # Debug Publishers
         self.pub_pedestrians_pc2 = rospy.Publisher("/point_cloud/pedestrians", PointCloud2, queue_size=10)
-        if(self.visualization):
-            self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
+        self.pub_obj_centers_pc2 = rospy.Publisher("/point_cloud/obj_centers", PointCloud2, queue_size=10)
+        self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
+
+    def update(self, vehicle : VehicleState) -> Dict[str,AgentState]:
+        return self.current_agents
+
+    # TODO: Improve Algo Knn, ransac, etc.
+    def find_centers(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]:
+        clusters = [np.array(clust) for clust in clusters]
+        centers = [np.array(()) if clust.size == 0 else np.mean(clust, axis=0) for clust in clusters]
+        return centers
+    
+    # Beware: AgentState(PhysicalObject) builds bbox from 
+    # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not
+    # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2]
+    def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]:
+        # Add some small constant to height to compensate for
+        # objects distance to ground we filtered from lidar, 
+        # other heuristics to imrpove stability for find_ funcs ?
+        clusters = [np.array(clust) for clust in clusters]
+        dims = [np.array(()) if clust.size == 0 else np.max(clust, axis= 0) - np.min(clust, axis= 0) for clust in clusters]
+        return dims
+
+    # TODO: Slower but cleaner to input self.current_agents dict
+    # TODO: Moving Average across last N iterations pos/vel? Less spurious vals
+    # TODO Akul: Fix velocity calculation to calculate in ObjectFrameEnum.START
+    #            work towards own tracking class instead of simple YOLO track?
+    def find_vels(self, track_ids, obj_centers):
+        # Object not seen -> velocity = None
+        track_id_center_map = dict(zip(track_ids, obj_centers))
+        vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods.
+
+        for prev_track_id, prev_agent in self.prev_agents.items():
+            if prev_track_id in track_ids:
+                # TODO: Add prev_agents to memory to avoid None velocity
+                # We should only be missing prev pose on first sight of track_id Agent.
+                # print("shape 1: ", track_id_center_map[prev_agent.track_id])
+                # print("shape 2: ", np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z]))
+                # prev can be 3 separate Nones, current is just empty array... make this symmetrical
+                if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3:
+                    vels[prev_track_id] = track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])
+        return vels
+
 
+    # TODO: Separate debug/viz class, separate this func into bbox and 2d 3d points
+    def viz_object_states(self, cv_image, boxes, extracted_pts_all):
+        # Extract 3D pedestrians points in lidar frame
+        # ** These are camera frame after transform_lidar_points, right?
+        pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] 
 
-    def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
+        # Object center viz
+        obj_3d_obj_centers = list()
+        for track_id, agent in self.current_agents.items():
+            if agent.pose.x != None and agent.pose.y != None and agent.pose.z != None:
+                obj_3d_obj_centers.append((agent.pose.x, agent.pose.y, agent.pose.z)) # **
+        if len(obj_3d_obj_centers) > 0:
+            ros_obj_3d_obj_centers_pc2 = create_point_cloud(obj_3d_obj_centers, color=(0, 128, 0))
+            self.pub_obj_centers_pc2.publish(ros_obj_3d_obj_centers_pc2)
+        
+        # Extract 2D pedestrians points and bbox in camera frame
+        extracted_2d_pts = [list(extracted_pts[:, :2].astype(int)) for extracted_pts in extracted_pts_all]
+        flattened_pedestrians_2d_pts = list()
+        for pts in extracted_2d_pts:    flattened_pedestrians_2d_pts.extend(pts)
+
+        for ind, bbox in enumerate(boxes):
+            xywh = bbox.xywh[0].tolist()
+            cv_image = vis_2d_bbox(cv_image, xywh, bbox)
+        
+        flattened_pedestrians_3d_pts = list() 
+        for pts in pedestrians_3d_pts: flattened_pedestrians_3d_pts.extend(pts)
+
+        # Draw projected 2D LiDAR points on the image.
+        for pt in flattened_pedestrians_2d_pts:
+            cv2.circle(cv_image, pt, 2, (0, 0, 255), -1)
+        ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8')
+        self.pub_image.publish(ros_img)  
+
+        # Draw 3D pedestrian pointclouds
+        # Create point cloud from extracted 3D points
+        ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts)
+        self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2)
+        
+
+    def update_object_states(self, track_result, extracted_pts_all) -> None:
+        self.prev_agents = self.current_agents.copy()
+        self.current_agents.clear() 
+
+        # Change pedestrians_3d_pts to dicts matching track_ids
+        track_ids = track_result[0].boxes.id.int().cpu().tolist()
+        num_objs = len(track_ids)
+        boxes = track_result[0].boxes
+
+        # Extract 3D pedestrians points in lidar frame
+        # ** These are camera frame after transform_lidar_points, right?
+        pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] 
+        if len(pedestrians_3d_pts) != num_objs:
+            raise Exception('Perception - Camera detections, points clusters num. mismatch')
+        
+        # TODO: Slower but cleaner to pass dicts of AgentState
+        #       or at least {track_ids: centers/pts/etc}
+        # TODO: Combine funcs for efficiency in C.
+        #       Separate numpy prob still faster for now
+        obj_centers = self.find_centers(pedestrians_3d_pts)
+        obj_dims = self.find_dims(pedestrians_3d_pts)
+        obj_vels = self.find_vels(track_ids, obj_centers)
+
+        # Update Current AgentStates
+        for ind in range(num_objs):
+            obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind]
+            obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind]
+            self.current_agents[track_ids[ind]] = (
+                AgentState(
+                    track_id = track_ids[ind],
+                    pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
+                    # (l, w, h)
+                    # TODO: confirm (z -> l, x -> w, y -> h)
+                    dimensions=(obj_dim[2], obj_dim[0],obj_dim[1]),  
+                    outline=None,
+                    type=AgentEnum.PEDESTRIAN,
+                    activity=AgentActivityEnum.MOVING,
+                    velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(vels[track_ids[ind]]),
+                    yaw_rate=0
+                ))
+
+
+    def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
         # Convert to cv2 image and run detector
         cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") 
         track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
 
         # Convert 1D PointCloud2 data to x, y, z coords
         lidar_points = convert_pointcloud2_to_xyz(lidar_pc2_msg)
+        # print("len lidar_points", len(lidar_points))
 
         # Downsample xyz point clouds
         downsampled_points = downsample_points(lidar_points)
+        # print("len downsampled_points", len(downsampled_points))
         
         # Transform LiDAR points into the camera coordinate frame.
         lidar_in_camera = transform_lidar_points(downsampled_points, self.R, self.t)
-    
+        # print("len lidar_in_camera", len(lidar_in_camera))
+
         # Project the transformed points into the image plane.
         projected_pts = project_points(lidar_in_camera, self.K)
+        # print("len projected_pts", len(projected_pts))
 
         # Process bboxes
-        self.last_person_boxes = []
         boxes = track_result[0].boxes
 
-        # Unpacking box dimentions detected into x,y,w,h
-        pedestrians_3d_pts = []
-        flattened_pedestrians_2d_pts = []
-        flattened_pedestrians_3d_pts = []
+        extracted_pts_all = list()
 
-        for box in boxes:
-            xywh = box.xywh[0].tolist()
-            self.last_person_boxes.append(xywh)
+        for ind, bbox in enumerate(boxes):
+            xywh = bbox.xywh[0].tolist()
 
             # Extracting projected pts
             x, y, w, h = xywh
@@ -78,49 +246,20 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
             top_bound = int(y - h / 2)
             bottom_bound = int(y + h / 2)
 
-            if len(projected_pts) > 0:
-                pts = np.array(projected_pts)
-                extracted_pts = pts[(pts[:, 0] > left_bound) &
-                                    (pts[:, 0] < right_bound) &
-                                    (pts[:, 1] > top_bound) &
-                                    (pts[:, 1] < bottom_bound)
-                                    ]
-                
-                #if no points extracted for this bbox, skip
-                if len(extracted_pts) < 1:
-                    continue
-                
-                # Apply ground and max distance filter to the extracted 5D points
-                extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold)
-                extracted_pts = filter_far_points(extracted_pts)
-                
-                # Extract 2D pedestrians points in camera frame
-                extracted_2d_pts = list(extracted_pts[:, :2].astype(int))
-                flattened_pedestrians_2d_pts = flattened_pedestrians_2d_pts + extracted_2d_pts
-
-                # Extract 3D pedestrians points in lidar frame
-                extracted_3d_pts = list(extracted_pts[:, -3:])
-                pedestrians_3d_pts.append(extracted_3d_pts)
-                flattened_pedestrians_3d_pts = flattened_pedestrians_3d_pts + extracted_3d_pts
-
-            # Used for visualization
-            if(self.visualization):
-                cv_image = vis_2d_bbox(cv_image, xywh, box)
-        
-        if len(flattened_pedestrians_3d_pts) > 0:
-            # Draw projected 2D LiDAR points on the image.
-            for pt in flattened_pedestrians_2d_pts:
-                cv2.circle(cv_image, pt, 2, (0, 0, 255), -1)
-
-            # Create point cloud from extracted 3D points
-            ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts)
-            self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2)
-
-        # Used for visualization
-        if(self.visualization):
-            ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8')
-            self.pub_image.publish(ros_img)  
+            pts = np.array(projected_pts)
+            extracted_pts = pts[(pts[:, 0] > left_bound) &
+                                (pts[:, 0] < right_bound) &
+                                (pts[:, 1] > top_bound) &
+                                (pts[:, 1] < bottom_bound)
+                                ]
 
+            # Apply ground and max distance filter to the extracted 5D points
+            extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold)
+            extracted_pts = filter_far_points(extracted_pts)
+            extracted_pts_all.append(extracted_pts)
+            
+        self.update_object_states(track_result, extracted_pts_all)
+        self.viz_object_states(cv_image, boxes, extracted_pts_all)
 
 
 if __name__ == '__main__':
@@ -128,4 +267,3 @@ def fusion_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
     Fusion3D()
     while not rospy.core.is_shutdown():
         rospy.rostime.wallsleep(0.5)
-
diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/fusion_utils.py
index cd6fd1ce7..4163d7b2c 100644
--- a/GEMstack/onboard/perception/fusion_utils.py
+++ b/GEMstack/onboard/perception/fusion_utils.py
@@ -37,6 +37,7 @@ def filter_ground_points(lidar_points, ground_threshold = 0):
 
 def filter_far_points(lidar_points, max_dist_percent=0.85):
     """ Filter points beyond a percentage threshold of max distance in a point cluster """
+    if lidar_points.shape[0] == 0: return lidar_points
     max_dist = np.max(lidar_points[:, 4])
     filtered_array = lidar_points[lidar_points[:, 4] < max_dist_percent * max_dist]
     return filtered_array
@@ -164,4 +165,6 @@ def create_point_cloud(points, color=(255, 0, 0)):
 
     point_cloud_data = [(x, y, z, packed_color) for x, y, z in points]
 
-    return pc2.create_cloud(header, fields, point_cloud_data)
\ No newline at end of file
+    return pc2.create_cloud(header, fields, point_cloud_data)
+
+    
\ No newline at end of file
diff --git a/GEMstack/state/agent.py b/GEMstack/state/agent.py
index 757805837..b0ad7aaad 100644
--- a/GEMstack/state/agent.py
+++ b/GEMstack/state/agent.py
@@ -27,6 +27,10 @@ class AgentState(PhysicalObject):
     activity : AgentActivityEnum
     velocity : Tuple[float,float,float]     #estimated velocity in x,y,z, m/s and in agent's local frame
     yaw_rate : float                        #estimated yaw rate, in radians/s
+    # Added by Perception for persistent objs across frames
+    # Check track_id to verify this is the same Agent you've
+    # seen before
+    track_id : int      
 
     def velocity_local(self) -> Tuple[float,float,float]:
         """Returns velocity in m/s in the agent's local frame."""

From eeeeb5407c181f03243e24badb17eaff652194c6 Mon Sep 17 00:00:00 2001
From: lukasdumasius <lukasdumasius@gmail.com>
Date: Mon, 17 Feb 2025 02:06:39 -0600
Subject: [PATCH 20/46] Port fusion.py to pedestrian_detection.py

---
 GEMstack/onboard/perception/fusion.py         |   4 +-
 .../perception/pedestrian_detection.py        | 383 +++++++++++-------
 2 files changed, 248 insertions(+), 139 deletions(-)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index 6255f47f0..df83bedba 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -15,7 +15,7 @@
 Terminal 3:
 ---------------
 source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
-rosbag play ~/vehicle.bag
+rosbag play -l ~/rosbags/vehicle.bag
 
 Terminal 4:
 ---------------
@@ -259,7 +259,7 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
             extracted_pts_all.append(extracted_pts)
             
         self.update_object_states(track_result, extracted_pts_all)
-        self.viz_object_states(cv_image, boxes, extracted_pts_all)
+        if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all)
 
 
 if __name__ == '__main__':
diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index f91145274..1b5a246f9 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -1,14 +1,51 @@
+"""
+Lidar + Camera fusion and object detection
+"""
+"""
+Terminal 1:
+---------------
+source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
+roscore
+
+Terminal 2:
+---------------
+source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
+python3 /home/Admin/transform.py
+
+Terminal 3:
+---------------
+source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
+rosbag play -l ~/rosbags/vehicle.bag
+
+Terminal 4:
+---------------
+source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
+cd GEMStack
+python3 main.py --variant=detector_only launch/pedestrian_detection.yaml
+
+Terminal 5:
+---------------
+source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
+rviz
+"""
+
+# Python 
+import os
+from typing import List, Dict
+from collections import defaultdict
+# ROS, CV
+import rospy
+import message_filters
+import tf
+from cv_bridge import CvBridge
+from sensor_msgs.msg import Image, PointCloud2
+# YOLO
+from ultralytics import YOLO
+# GEMStack
 from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum
+from .fusion_utils import *
 from ..interface.gem import GEMInterface
 from ..component import Component
-from ultralytics import YOLO
-import cv2
-from typing import Dict
-from cv_bridge import CvBridge
-from sensor_msgs.msg import Image
-import rospy
-import os
-
 
 def box_to_fake_agent(box):
     """Creates a fake agent state from an (x,y,w,h) bounding box.
@@ -22,158 +59,230 @@ def box_to_fake_agent(box):
 
 
 class PedestrianDetector2D(Component):
-    """Detects pedestrians.
-    
-    self.vehicle -> GEM car info
-    self.detector -> pedestrian detection model
-    self.last_person_boxes -> used to store boxes detected in every frame of the video
-    self.pedestrians -> stores the agentstate (pedestrian) found during the video in a dict, not sure if required can be removed
-    self.visualization -> enable this to view pedestrian detected
-    self.confidence -> min model confidence level
-    self.classes_to_detect -> Classes for the model to detect
-    
-    """
-    def __init__(self,vehicle_interface : GEMInterface):
+    # TODO: Pull params into a JSON/yaml
+    # TODO: Convert lists into np.arrays where possible, vectorize calculations
+    # TODO: Implement logging instead of print
+    # TODO: Finish splitting this class + utils into separate classes
+    # TODO: Decide if we want to name dets "peds" or "objs"/"agents"
+    #       Maybe peds for now and Agents in agent_detection.py?
+    def __init__(self, vehicle_interface : GEMInterface):
         self.vehicle_interface = vehicle_interface
-        self.detector = YOLO(os.getcwd()+'/GEMstack/knowledge/detection/yolov8n.pt') # change to get model value from sys arg
-        self.last_person_boxes = [] 
-        self.pedestrians = {}
-        self.visualization = True # Set this to true for visualization, later change to get value from sys arg
+        # Publish debug/viz rostopics if true
+        self.debug = True
+        # Setup variables
+        self.bridge = CvBridge()
+        # TODO: Wrap detector into GEMDetector?
+        self.detector = YOLO(os.getcwd()+'/GEMstack/knowledge/detection/yolov8n.pt')
+        # track_id: AgentState
+        self.prev_agents = dict()         
+        self.current_agents = dict()
         self.confidence = 0.7
         self.classes_to_detect = 0
+        self.ground_threshold = 1.6
+        self.max_dist_percent = 0.7
 
-    def rate(self):
-        return 4.0
-    
-    def state_inputs(self):
-        return ['vehicle']
+        # Load calibration data
+        self.R = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/R.npy')
+        self.t = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/t.npy')
+        self.K = load_intrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/camera_intrinsics.json')
+
+        # Subscribers and sychronizers
+        self.rgb_rosbag = message_filters.Subscriber('/oak/rgb/image_raw', Image)
+        self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2)
+        self.sync = message_filters.ApproximateTimeSynchronizer([self.rgb_rosbag, self.top_lidar_rosbag], queue_size=10, slop=0.1)
+        self.sync.registerCallback(self.ouster_oak_callback)
+
+        # TF listener to get transformation from LiDAR to Camera
+        self.tf_listener = tf.TransformListener()
+
+        if self.debug: self.init_debug()
     
-    def state_outputs(self):
-        return ['agents']
+    def init_debug(self,):
+         # Debug Publishers
+        self.pub_pedestrians_pc2 = rospy.Publisher("/point_cloud/pedestrians", PointCloud2, queue_size=10)
+        self.pub_obj_centers_pc2 = rospy.Publisher("/point_cloud/obj_centers", PointCloud2, queue_size=10)
+        self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
+
+    def update(self, vehicle : VehicleState) -> Dict[str,AgentState]:
+        return self.current_agents
+
+    # TODO: Improve Algo Knn, ransac, etc.
+    def find_centers(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]:
+        clusters = [np.array(clust) for clust in clusters]
+        centers = [np.array(()) if clust.size == 0 else np.mean(clust, axis=0) for clust in clusters]
+        return centers
     
-    def initialize(self):
+    # Beware: AgentState(PhysicalObject) builds bbox from 
+    # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not
+    # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2]
+    def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]:
+        # Add some small constant to height to compensate for
+        # objects distance to ground we filtered from lidar, 
+        # other heuristics to imrpove stability for find_ funcs ?
+        clusters = [np.array(clust) for clust in clusters]
+        dims = [np.array(()) if clust.size == 0 else np.max(clust, axis= 0) - np.min(clust, axis= 0) for clust in clusters]
+        return dims
 
-        """Initializes subscribers and publishers
-        
-        self.vehicle_interface.subscribe_sensor -> GEM Car camera subscription
-        self.rosbag_test -> ROS Bag topic subscription
-        self.pub_image -> Publishes Image with detection for visualization
+    # TODO: Slower but cleaner to input self.current_agents dict
+    # TODO: Moving Average across last N iterations pos/vel? Less spurious vals
+    # TODO Akul: Fix velocity calculation to calculate in ObjectFrameEnum.START
+    #            work towards own tracking class instead of simple YOLO track?
+    def find_vels(self, track_ids, obj_centers):
+        # Object not seen -> velocity = None
+        track_id_center_map = dict(zip(track_ids, obj_centers))
+        vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods.
 
-        """
-        #tell the vehicle to use image_callback whenever 'front_camera' gets a reading, and it expects images of type cv2.Mat
+        for prev_track_id, prev_agent in self.prev_agents.items():
+            if prev_track_id in track_ids:
+                # TODO: Add prev_agents to memory to avoid None velocity
+                # We should only be missing prev pose on first sight of track_id Agent.
+                # print("shape 1: ", track_id_center_map[prev_agent.track_id])
+                # print("shape 2: ", np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z]))
+                # prev can be 3 separate Nones, current is just empty array... make this symmetrical
+                if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3:
+                    vels[prev_track_id] = track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])
+        return vels
 
-        # GEM Car subacriber
-        # self.vehicle_interface.subscribe_sensor('front_camera',self.image_callback,cv2.Mat)
 
-        # Webcam
-        # rospy.Subscriber('/webcam', Image, self.image_callback)
+    # TODO: Separate debug/viz class, separate this func into bbox and 2d 3d points
+    def viz_object_states(self, cv_image, boxes, extracted_pts_all):
+        # Extract 3D pedestrians points in lidar frame
+        # ** These are camera frame after transform_lidar_points, right?
+        pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] 
 
-        # Testing with rosbag
-        self.rosbag_test = rospy.Subscriber('/oak/rgb/image_raw',Image, self.image_callback,queue_size=1)
-        if(self.visualization):
-            self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
-        pass
-    
-    # Use cv2.Mat for GEM Car, Image for RosBag
-    def image_callback(self, image : Image): #image : cv2.Mat):
+        # Object center viz
+        obj_3d_obj_centers = list()
+        for track_id, agent in self.current_agents.items():
+            if agent.pose.x != None and agent.pose.y != None and agent.pose.z != None:
+                obj_3d_obj_centers.append((agent.pose.x, agent.pose.y, agent.pose.z)) # **
+        if len(obj_3d_obj_centers) > 0:
+            ros_obj_3d_obj_centers_pc2 = create_point_cloud(obj_3d_obj_centers, color=(0, 128, 0))
+            self.pub_obj_centers_pc2.publish(ros_obj_3d_obj_centers_pc2)
+        
+        # Extract 2D pedestrians points and bbox in camera frame
+        extracted_2d_pts = [list(extracted_pts[:, :2].astype(int)) for extracted_pts in extracted_pts_all]
+        flattened_pedestrians_2d_pts = list()
+        for pts in extracted_2d_pts:    flattened_pedestrians_2d_pts.extend(pts)
 
-        """Detects pedestrians using the model provided when new image is passed.
+        for ind, bbox in enumerate(boxes):
+            xywh = bbox.xywh[0].tolist()
+            cv_image = vis_2d_bbox(cv_image, xywh, bbox)
+        
+        flattened_pedestrians_3d_pts = list() 
+        for pts in pedestrians_3d_pts: flattened_pedestrians_3d_pts.extend(pts)
 
-        Converts Image.msg to cv2 format (Might need to change this to use cv2.Mat) and uses the model to detect pedestrian
-        IF visualization is true, will publish an image with pedestrians detected.
+        # Draw projected 2D LiDAR points on the image.
+        for pt in flattened_pedestrians_2d_pts:
+            cv2.circle(cv_image, pt, 2, (0, 0, 255), -1)
+        ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8')
+        self.pub_image.publish(ros_img)  
 
-        Hardcoded values for now:
-            Detected only pedestrians -> Class = 0
-            Confidence level -> 0.7
+        # Draw 3D pedestrian pointclouds
+        # Create point cloud from extracted 3D points
+        ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts)
+        self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2)
         
-        """
 
-        # Use Image directly for GEM Car
-        # track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
+    def update_object_states(self, track_result, extracted_pts_all) -> None:
+        self.prev_agents = self.current_agents.copy()
+        self.current_agents.clear() 
+
+        # Change pedestrians_3d_pts to dicts matching track_ids
+        track_ids = track_result[0].boxes.id.int().cpu().tolist()
+        num_objs = len(track_ids)
+        boxes = track_result[0].boxes
 
-        # Convert to CV2 format for RosBag
-        bridge = CvBridge() 
-        image = bridge.imgmsg_to_cv2(image, "bgr8") 
-        track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
+        # Extract 3D pedestrians points in lidar frame
+        # ** These are camera frame after transform_lidar_points, right?
+        pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] 
+        if len(pedestrians_3d_pts) != num_objs:
+            raise Exception('Perception - Camera detections, points clusters num. mismatch')
+        
+        # TODO: Slower but cleaner to pass dicts of AgentState
+        #       or at least {track_ids: centers/pts/etc}
+        # TODO: Combine funcs for efficiency in C.
+        #       Separate numpy prob still faster for now
+        obj_centers = self.find_centers(pedestrians_3d_pts)
+        obj_dims = self.find_dims(pedestrians_3d_pts)
+        obj_vels = self.find_vels(track_ids, obj_centers)
 
-        self.last_person_boxes = []
+        # Update Current AgentStates
+        for ind in range(num_objs):
+            obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind]
+            obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind]
+            self.current_agents[track_ids[ind]] = (
+                AgentState(
+                    track_id = track_ids[ind],
+                    pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
+                    # (l, w, h)
+                    # TODO: confirm (z -> l, x -> w, y -> h)
+                    dimensions=(obj_dim[2], obj_dim[0],obj_dim[1]),  
+                    outline=None,
+                    type=AgentEnum.PEDESTRIAN,
+                    activity=AgentActivityEnum.MOVING,
+                    velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(vels[track_ids[ind]]),
+                    yaw_rate=0
+                ))
+
+    def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
+        # Convert to cv2 image and run detector
+        cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") 
+        track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
+
+        # Convert 1D PointCloud2 data to x, y, z coords
+        lidar_points = convert_pointcloud2_to_xyz(lidar_pc2_msg)
+        # print("len lidar_points", len(lidar_points))
+
+        # Downsample xyz point clouds
+        downsampled_points = downsample_points(lidar_points)
+        # print("len downsampled_points", len(downsampled_points))
+        
+        # Transform LiDAR points into the camera coordinate frame.
+        lidar_in_camera = transform_lidar_points(downsampled_points, self.R, self.t)
+        # print("len lidar_in_camera", len(lidar_in_camera))
+
+        # Project the transformed points into the image plane.
+        projected_pts = project_points(lidar_in_camera, self.K)
+        # print("len projected_pts", len(projected_pts))
+
+        # Process bboxes
         boxes = track_result[0].boxes
 
-        # Used for visualization
-        if(self.visualization):
-            label_text = "Pedestrian "
-            font = cv2.FONT_HERSHEY_SIMPLEX
-            font_scale = 0.5
-            font_color = (255, 255, 255)  # White text
-            outline_color = (0, 0, 0)  # Black outline
-            line_type = 1
-            text_thickness = 2 # Text thickness
-            outline_thickness = 1  # Thickness of the text outline
-
-        # Unpacking box dimentions detected into x,y,w,h
-        for box in boxes:
-
-            xywh = box.xywh[0].tolist()
-            self.last_person_boxes.append(xywh)
+        extracted_pts_all = list()
+
+        for ind, bbox in enumerate(boxes):
+            xywh = bbox.xywh[0].tolist()
+
+            # Extracting projected pts
             x, y, w, h = xywh
-            id = box.id.item()
-
-            # Stores AgentState in a dict, can be removed if not required
-            pose = ObjectPose(t=0,x=x,y=y,z=0,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT)
-            dims = (w,h,0)
-            if(id not in self.pedestrians.keys()):
-                self.pedestrians[id] = AgentState(pose=pose,dimensions=dims,outline=None,type=AgentEnum.PEDESTRIAN,activity=AgentActivityEnum.MOVING,velocity=(0,0,0),yaw_rate=0)
-            else:
-                self.pedestrians[id].pose = pose
-                self.pedestrians[id].dims = dims
-
-            # Used for visualization
-            if(self.visualization):
-                # Draw bounding box
-                cv2.rectangle(image, (int(x - w / 2), int(y - h / 2)), (int(x + w / 2), int(y + h / 2)), (255, 0, 255), 3)
-
-                # Define text label
-                x = int(x - w / 2)
-                y = int(y - h / 2)
-                label = label_text + str(id) + " : " + str(round(box.conf.item(), 2))
-
-                # Get text size
-                text_size, baseline = cv2.getTextSize(label, font, font_scale, line_type)
-                text_w, text_h = text_size
-
-                # Position text above the bounding box
-                text_x = x
-                text_y = y - 10 if y - 10 > 10 else y + h + text_h
-
-                # Draw text outline for better visibility, uncomment for outline
-                # for dx, dy in [(-1, -1), (-1, 1), (1, -1), (1, 1)]:  
-                #     cv2.putText(image, label, (text_x + dx, text_y - baseline + dy), font, font_scale, outline_color, outline_thickness)
-
-                # Draw main text on top of the outline
-                cv2.putText(image, label, (text_x, text_y - baseline), font, font_scale, font_color, text_thickness)
+            left_bound = int(x - w / 2)
+            right_bound = int(x + w / 2)
+            top_bound = int(y - h / 2)
+            bottom_bound = int(y + h / 2)
 
-        
-        # Used for visualization
-        if(self.visualization):
-            ros_img = bridge.cv2_to_imgmsg(image, 'bgr8')
-            self.pub_image.publish(ros_img)
-
-        #uncomment if you want to debug the detector...
-        # print(self.last_person_boxes)
-        # print(self.pedestrians.keys())
-        #for bb in self.last_person_boxes:
-        #    x,y,w,h = bb
-        #    cv2.rectangle(image, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), (255, 0, 255), 3)
-        #cv2.imwrite("pedestrian_detections.png",image)
+            pts = np.array(projected_pts)
+            extracted_pts = pts[(pts[:, 0] > left_bound) &
+                                (pts[:, 0] < right_bound) &
+                                (pts[:, 1] > top_bound) &
+                                (pts[:, 1] < bottom_bound)
+                                ]
+
+            # Apply ground and max distance filter to the extracted 5D points
+            extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold)
+            extracted_pts = filter_far_points(extracted_pts)
+            extracted_pts_all.append(extracted_pts)
+            
+        self.update_object_states(track_result, extracted_pts_all)
+        if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all)
+
+    def rate(self):
+        return 4.0
     
-    def update(self, vehicle : VehicleState) -> Dict[str,AgentState]:
-        res = {}
-        for i,b in enumerate(self.last_person_boxes):
-            x,y,w,h = b
-            res['pedestrian'+str(i)] = box_to_fake_agent(b)
-        if len(res) > 0:
-            print("Detected",len(res),"pedestrians")
-        return res
+    def state_inputs(self):
+        return ['vehicle']
+    
+    def state_outputs(self):
+        return ['agents']
 
 
 class FakePedestrianDetector2D(Component):

From 19ad11ea327334785ec62d8c4f30bb86b36f8f40 Mon Sep 17 00:00:00 2001
From: lukasdumasius <lukasdumasius@gmail.com>
Date: Mon, 17 Feb 2025 02:11:10 -0600
Subject: [PATCH 21/46] Add transform.py instructions

---
 GEMstack/onboard/perception/fusion.py               | 2 +-
 GEMstack/onboard/perception/pedestrian_detection.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index df83bedba..838e5a0db 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -10,7 +10,7 @@
 Terminal 2:
 ---------------
 source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
-python3 /home/Admin/transform.py
+python3 GEMstack/onboard/perception/transform.py
 
 Terminal 3:
 ---------------
diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index 1b5a246f9..bc27871ce 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -10,7 +10,7 @@
 Terminal 2:
 ---------------
 source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
-python3 /home/Admin/transform.py
+python3 GEMstack/onboard/perception/transform.py
 
 Terminal 3:
 ---------------

From 0a214246a7d5ab07214a5d8508c73e7c3ead6b34 Mon Sep 17 00:00:00 2001
From: lukasdumasius <lukasdumasius@gmail.com>
Date: Mon, 17 Feb 2025 03:21:09 -0600
Subject: [PATCH 22/46] pedestrian_detection.py type hints

---
 GEMstack/onboard/perception/fusion.py         |  9 +++---
 .../perception/pedestrian_detection.py        | 32 +++++++++----------
 ...utils.py => pedestrian_detection_utils.py} |  0
 3 files changed, 21 insertions(+), 20 deletions(-)
 rename GEMstack/onboard/perception/{fusion_utils.py => pedestrian_detection_utils.py} (100%)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index 838e5a0db..7d6558433 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -1,5 +1,5 @@
 """
-Lidar + Camera fusion and object detection
+Top ouster lidar + Oak front camera fusion, object detection
 """
 """
 Terminal 1:
@@ -46,9 +46,9 @@
 
 class Fusion3D():
     # TODO: Pull params into a JSON/yaml
-    # TODO: Convert lists into np.arrays where possible, vectorize calculations
-    # TODO: Implement logging instead of print
-    # TODO: Finish splitting this class + utils into separate classes
+    # TODO: Convert some lists into np.arrays, vectorize calculations
+    # TODO: Implement logging instead of print, cleanup comments
+    # TODO: Cleanup funcs + split into separate classes
     # TODO: Decide if we want to name dets "peds" or "objs"/"agents"
     #       Maybe peds for now and Agents in agent_detection.py?
     def __init__(self):
@@ -112,6 +112,7 @@ def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]:
     # TODO: Moving Average across last N iterations pos/vel? Less spurious vals
     # TODO Akul: Fix velocity calculation to calculate in ObjectFrameEnum.START
     #            work towards own tracking class instead of simple YOLO track?
+    # ret: Dict[track_id: vel[x, y, z]]
     def find_vels(self, track_ids, obj_centers):
         # Object not seen -> velocity = None
         track_id_center_map = dict(zip(track_ids, obj_centers))
diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index bc27871ce..dd67ed065 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -1,5 +1,5 @@
 """
-Lidar + Camera fusion and object detection
+Top ouster lidar + Oak front camera fusion, object detection
 """
 """
 Terminal 1:
@@ -41,9 +41,10 @@
 from sensor_msgs.msg import Image, PointCloud2
 # YOLO
 from ultralytics import YOLO
+from ultralytics.engine.results import Results, Boxes
 # GEMStack
 from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum
-from .fusion_utils import *
+from .pedestrian_detection_utils import *
 from ..interface.gem import GEMInterface
 from ..component import Component
 
@@ -60,12 +61,12 @@ def box_to_fake_agent(box):
 
 class PedestrianDetector2D(Component):
     # TODO: Pull params into a JSON/yaml
-    # TODO: Convert lists into np.arrays where possible, vectorize calculations
-    # TODO: Implement logging instead of print
-    # TODO: Finish splitting this class + utils into separate classes
+    # TODO: Convert some lists into np.arrays, vectorize calculations
+    # TODO: Implement logging instead of print, cleanup comments
+    # TODO: Cleanup funcs + split into separate classes
     # TODO: Decide if we want to name dets "peds" or "objs"/"agents"
     #       Maybe peds for now and Agents in agent_detection.py?
-    def __init__(self, vehicle_interface : GEMInterface):
+    def __init__(self, vehicle_interface : GEMInterface) -> None:
         self.vehicle_interface = vehicle_interface
         # Publish debug/viz rostopics if true
         self.debug = True
@@ -97,7 +98,7 @@ def __init__(self, vehicle_interface : GEMInterface):
 
         if self.debug: self.init_debug()
     
-    def init_debug(self,):
+    def init_debug(self,) -> None:
          # Debug Publishers
         self.pub_pedestrians_pc2 = rospy.Publisher("/point_cloud/pedestrians", PointCloud2, queue_size=10)
         self.pub_obj_centers_pc2 = rospy.Publisher("/point_cloud/obj_centers", PointCloud2, queue_size=10)
@@ -126,8 +127,9 @@ def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]:
     # TODO: Slower but cleaner to input self.current_agents dict
     # TODO: Moving Average across last N iterations pos/vel? Less spurious vals
     # TODO Akul: Fix velocity calculation to calculate in ObjectFrameEnum.START
-    #            work towards own tracking class instead of simple YOLO track?
-    def find_vels(self, track_ids, obj_centers):
+    #            Work towards own tracking class instead of simple YOLO track?
+    # ret: Dict[track_id: vel[x, y, z]]
+    def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict[int, np.ndarray]:
         # Object not seen -> velocity = None
         track_id_center_map = dict(zip(track_ids, obj_centers))
         vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods.
@@ -144,8 +146,8 @@ def find_vels(self, track_ids, obj_centers):
         return vels
 
 
-    # TODO: Separate debug/viz class, separate this func into bbox and 2d 3d points
-    def viz_object_states(self, cv_image, boxes, extracted_pts_all):
+    # TODO: Separate debug/viz class, bbox and 2d 3d points funcs 
+    def viz_object_states(self, cv_image: cv2.typing.MatLike, boxes: Boxes, extracted_pts_all: List[np.ndarray]) -> None:
         # Extract 3D pedestrians points in lidar frame
         # ** These are camera frame after transform_lidar_points, right?
         pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] 
@@ -183,7 +185,7 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all):
         self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2)
         
 
-    def update_object_states(self, track_result, extracted_pts_all) -> None:
+    def update_object_states(self, track_result: List[Results], extracted_pts_all: List[np.ndarray]) -> None:
         self.prev_agents = self.current_agents.copy()
         self.current_agents.clear() 
 
@@ -224,7 +226,7 @@ def update_object_states(self, track_result, extracted_pts_all) -> None:
                     yaw_rate=0
                 ))
 
-    def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
+    def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2) -> None:
         # Convert to cv2 image and run detector
         cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") 
         track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
@@ -247,9 +249,7 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
 
         # Process bboxes
         boxes = track_result[0].boxes
-
         extracted_pts_all = list()
-
         for ind, bbox in enumerate(boxes):
             xywh = bbox.xywh[0].tolist()
 
@@ -271,7 +271,7 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
             extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold)
             extracted_pts = filter_far_points(extracted_pts)
             extracted_pts_all.append(extracted_pts)
-            
+
         self.update_object_states(track_result, extracted_pts_all)
         if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all)
 
diff --git a/GEMstack/onboard/perception/fusion_utils.py b/GEMstack/onboard/perception/pedestrian_detection_utils.py
similarity index 100%
rename from GEMstack/onboard/perception/fusion_utils.py
rename to GEMstack/onboard/perception/pedestrian_detection_utils.py

From c7246ed4ace2f27eef010df7d942c0852aca36f6 Mon Sep 17 00:00:00 2001
From: lukasdumasius <lukasdumasius@gmail.com>
Date: Mon, 17 Feb 2025 09:54:18 -0600
Subject: [PATCH 23/46] pedestrian_detection.py add time for velocity

---
 GEMstack/onboard/perception/fusion.py               | 2 +-
 GEMstack/onboard/perception/pedestrian_detection.py | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
index 7d6558433..b642a0625 100644
--- a/GEMstack/onboard/perception/fusion.py
+++ b/GEMstack/onboard/perception/fusion.py
@@ -42,7 +42,7 @@
 # GEMStack
 from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum
 from ultralytics import YOLO
-from .fusion_utils import *
+from .pedestrian_detection_utils import *
 
 class Fusion3D():
     # TODO: Pull params into a JSON/yaml
diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index dd67ed065..1f8cfe5bc 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -77,12 +77,16 @@ def __init__(self, vehicle_interface : GEMInterface) -> None:
         # track_id: AgentState
         self.prev_agents = dict()         
         self.current_agents = dict()
+        # TODO Implement time
+        self.prev_time = 0
+        self.curr_time = 1 # Avoid divide by 0 for placebolder, 0
         self.confidence = 0.7
         self.classes_to_detect = 0
         self.ground_threshold = 1.6
         self.max_dist_percent = 0.7
 
         # Load calibration data
+        # TODO: Maybe lets add one word or link what R t K are?
         self.R = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/R.npy')
         self.t = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/t.npy')
         self.K = load_intrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/camera_intrinsics.json')
@@ -128,6 +132,7 @@ def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]:
     # TODO: Moving Average across last N iterations pos/vel? Less spurious vals
     # TODO Akul: Fix velocity calculation to calculate in ObjectFrameEnum.START
     #            Work towards own tracking class instead of simple YOLO track?
+    #            Fix division by time
     # ret: Dict[track_id: vel[x, y, z]]
     def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict[int, np.ndarray]:
         # Object not seen -> velocity = None
@@ -142,7 +147,7 @@ def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict
                 # print("shape 2: ", np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z]))
                 # prev can be 3 separate Nones, current is just empty array... make this symmetrical
                 if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3:
-                    vels[prev_track_id] = track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])
+                    vels[prev_track_id] = (track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) / (self.curr_time - self.prev_time)
         return vels
 
 

From b62c6fc60e6e6a1b8d9ab782e14b097b0a06ce27 Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Mon, 17 Feb 2025 15:15:55 -0600
Subject: [PATCH 24/46] integrate updated depth filter and 3D bboxes vis

---
 GEMstack/onboard/perception/fusion.py         | 270 ------------------
 .../perception/pedestrian_detection.py        |  88 ++++--
 .../perception/pedestrian_detection_utils.py  |  82 +++++-
 3 files changed, 135 insertions(+), 305 deletions(-)
 delete mode 100644 GEMstack/onboard/perception/fusion.py

diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
deleted file mode 100644
index b642a0625..000000000
--- a/GEMstack/onboard/perception/fusion.py
+++ /dev/null
@@ -1,270 +0,0 @@
-"""
-Top ouster lidar + Oak front camera fusion, object detection
-"""
-"""
-Terminal 1:
----------------
-source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
-roscore
-
-Terminal 2:
----------------
-source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
-python3 GEMstack/onboard/perception/transform.py
-
-Terminal 3:
----------------
-source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
-rosbag play -l ~/rosbags/vehicle.bag
-
-Terminal 4:
----------------
-source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
-cd GEMStack
-python3 -m GEMstack.onboard.perception.fusion
-
-Terminal 5:
----------------
-source /opt/ros/noetic/setup.bash && source /catkin_ws/devel/setup.bash
-rviz
-"""
-
-# Python 
-import os
-from typing import List, Dict
-from collections import defaultdict
-# ROS, CV
-import rospy
-import message_filters
-import tf
-from cv_bridge import CvBridge
-from sensor_msgs.msg import Image, PointCloud2
-# GEMStack
-from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum
-from ultralytics import YOLO
-from .pedestrian_detection_utils import *
-
-class Fusion3D():
-    # TODO: Pull params into a JSON/yaml
-    # TODO: Convert some lists into np.arrays, vectorize calculations
-    # TODO: Implement logging instead of print, cleanup comments
-    # TODO: Cleanup funcs + split into separate classes
-    # TODO: Decide if we want to name dets "peds" or "objs"/"agents"
-    #       Maybe peds for now and Agents in agent_detection.py?
-    def __init__(self):
-        # Publish debug/viz rostopics if true
-        self.debug = True
-        # Setup variables
-        self.bridge = CvBridge()
-        # TODO: Wrap detector into GEMDetector?
-        self.detector = YOLO(os.getcwd()+'/GEMstack/knowledge/detection/yolov8n.pt')
-        # track_id: AgentState
-        self.prev_agents = dict()         
-        self.current_agents = dict()
-        self.confidence = 0.7
-        self.classes_to_detect = 0
-        self.ground_threshold = 1.6
-        self.max_dist_percent = 0.7
-
-        # Load calibration data
-        self.R = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/R.npy')
-        self.t = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/t.npy')
-        self.K = load_intrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/camera_intrinsics.json')
-
-        # Subscribers and sychronizers
-        self.rgb_rosbag = message_filters.Subscriber('/oak/rgb/image_raw', Image)
-        self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2)
-        self.sync = message_filters.ApproximateTimeSynchronizer([self.rgb_rosbag, self.top_lidar_rosbag], queue_size=10, slop=0.1)
-        self.sync.registerCallback(self.ouster_oak_callback)
-
-        # TF listener to get transformation from LiDAR to Camera
-        self.tf_listener = tf.TransformListener()
-
-        if self.debug: self.init_debug()
-    
-    def init_debug(self,):
-         # Debug Publishers
-        self.pub_pedestrians_pc2 = rospy.Publisher("/point_cloud/pedestrians", PointCloud2, queue_size=10)
-        self.pub_obj_centers_pc2 = rospy.Publisher("/point_cloud/obj_centers", PointCloud2, queue_size=10)
-        self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
-
-    def update(self, vehicle : VehicleState) -> Dict[str,AgentState]:
-        return self.current_agents
-
-    # TODO: Improve Algo Knn, ransac, etc.
-    def find_centers(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]:
-        clusters = [np.array(clust) for clust in clusters]
-        centers = [np.array(()) if clust.size == 0 else np.mean(clust, axis=0) for clust in clusters]
-        return centers
-    
-    # Beware: AgentState(PhysicalObject) builds bbox from 
-    # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not
-    # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2]
-    def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]:
-        # Add some small constant to height to compensate for
-        # objects distance to ground we filtered from lidar, 
-        # other heuristics to imrpove stability for find_ funcs ?
-        clusters = [np.array(clust) for clust in clusters]
-        dims = [np.array(()) if clust.size == 0 else np.max(clust, axis= 0) - np.min(clust, axis= 0) for clust in clusters]
-        return dims
-
-    # TODO: Slower but cleaner to input self.current_agents dict
-    # TODO: Moving Average across last N iterations pos/vel? Less spurious vals
-    # TODO Akul: Fix velocity calculation to calculate in ObjectFrameEnum.START
-    #            work towards own tracking class instead of simple YOLO track?
-    # ret: Dict[track_id: vel[x, y, z]]
-    def find_vels(self, track_ids, obj_centers):
-        # Object not seen -> velocity = None
-        track_id_center_map = dict(zip(track_ids, obj_centers))
-        vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods.
-
-        for prev_track_id, prev_agent in self.prev_agents.items():
-            if prev_track_id in track_ids:
-                # TODO: Add prev_agents to memory to avoid None velocity
-                # We should only be missing prev pose on first sight of track_id Agent.
-                # print("shape 1: ", track_id_center_map[prev_agent.track_id])
-                # print("shape 2: ", np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z]))
-                # prev can be 3 separate Nones, current is just empty array... make this symmetrical
-                if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3:
-                    vels[prev_track_id] = track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])
-        return vels
-
-
-    # TODO: Separate debug/viz class, separate this func into bbox and 2d 3d points
-    def viz_object_states(self, cv_image, boxes, extracted_pts_all):
-        # Extract 3D pedestrians points in lidar frame
-        # ** These are camera frame after transform_lidar_points, right?
-        pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] 
-
-        # Object center viz
-        obj_3d_obj_centers = list()
-        for track_id, agent in self.current_agents.items():
-            if agent.pose.x != None and agent.pose.y != None and agent.pose.z != None:
-                obj_3d_obj_centers.append((agent.pose.x, agent.pose.y, agent.pose.z)) # **
-        if len(obj_3d_obj_centers) > 0:
-            ros_obj_3d_obj_centers_pc2 = create_point_cloud(obj_3d_obj_centers, color=(0, 128, 0))
-            self.pub_obj_centers_pc2.publish(ros_obj_3d_obj_centers_pc2)
-        
-        # Extract 2D pedestrians points and bbox in camera frame
-        extracted_2d_pts = [list(extracted_pts[:, :2].astype(int)) for extracted_pts in extracted_pts_all]
-        flattened_pedestrians_2d_pts = list()
-        for pts in extracted_2d_pts:    flattened_pedestrians_2d_pts.extend(pts)
-
-        for ind, bbox in enumerate(boxes):
-            xywh = bbox.xywh[0].tolist()
-            cv_image = vis_2d_bbox(cv_image, xywh, bbox)
-        
-        flattened_pedestrians_3d_pts = list() 
-        for pts in pedestrians_3d_pts: flattened_pedestrians_3d_pts.extend(pts)
-
-        # Draw projected 2D LiDAR points on the image.
-        for pt in flattened_pedestrians_2d_pts:
-            cv2.circle(cv_image, pt, 2, (0, 0, 255), -1)
-        ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8')
-        self.pub_image.publish(ros_img)  
-
-        # Draw 3D pedestrian pointclouds
-        # Create point cloud from extracted 3D points
-        ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts)
-        self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2)
-        
-
-    def update_object_states(self, track_result, extracted_pts_all) -> None:
-        self.prev_agents = self.current_agents.copy()
-        self.current_agents.clear() 
-
-        # Change pedestrians_3d_pts to dicts matching track_ids
-        track_ids = track_result[0].boxes.id.int().cpu().tolist()
-        num_objs = len(track_ids)
-        boxes = track_result[0].boxes
-
-        # Extract 3D pedestrians points in lidar frame
-        # ** These are camera frame after transform_lidar_points, right?
-        pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] 
-        if len(pedestrians_3d_pts) != num_objs:
-            raise Exception('Perception - Camera detections, points clusters num. mismatch')
-        
-        # TODO: Slower but cleaner to pass dicts of AgentState
-        #       or at least {track_ids: centers/pts/etc}
-        # TODO: Combine funcs for efficiency in C.
-        #       Separate numpy prob still faster for now
-        obj_centers = self.find_centers(pedestrians_3d_pts)
-        obj_dims = self.find_dims(pedestrians_3d_pts)
-        obj_vels = self.find_vels(track_ids, obj_centers)
-
-        # Update Current AgentStates
-        for ind in range(num_objs):
-            obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind]
-            obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind]
-            self.current_agents[track_ids[ind]] = (
-                AgentState(
-                    track_id = track_ids[ind],
-                    pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
-                    # (l, w, h)
-                    # TODO: confirm (z -> l, x -> w, y -> h)
-                    dimensions=(obj_dim[2], obj_dim[0],obj_dim[1]),  
-                    outline=None,
-                    type=AgentEnum.PEDESTRIAN,
-                    activity=AgentActivityEnum.MOVING,
-                    velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(vels[track_ids[ind]]),
-                    yaw_rate=0
-                ))
-
-
-    def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
-        # Convert to cv2 image and run detector
-        cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") 
-        track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
-
-        # Convert 1D PointCloud2 data to x, y, z coords
-        lidar_points = convert_pointcloud2_to_xyz(lidar_pc2_msg)
-        # print("len lidar_points", len(lidar_points))
-
-        # Downsample xyz point clouds
-        downsampled_points = downsample_points(lidar_points)
-        # print("len downsampled_points", len(downsampled_points))
-        
-        # Transform LiDAR points into the camera coordinate frame.
-        lidar_in_camera = transform_lidar_points(downsampled_points, self.R, self.t)
-        # print("len lidar_in_camera", len(lidar_in_camera))
-
-        # Project the transformed points into the image plane.
-        projected_pts = project_points(lidar_in_camera, self.K)
-        # print("len projected_pts", len(projected_pts))
-
-        # Process bboxes
-        boxes = track_result[0].boxes
-
-        extracted_pts_all = list()
-
-        for ind, bbox in enumerate(boxes):
-            xywh = bbox.xywh[0].tolist()
-
-            # Extracting projected pts
-            x, y, w, h = xywh
-            left_bound = int(x - w / 2)
-            right_bound = int(x + w / 2)
-            top_bound = int(y - h / 2)
-            bottom_bound = int(y + h / 2)
-
-            pts = np.array(projected_pts)
-            extracted_pts = pts[(pts[:, 0] > left_bound) &
-                                (pts[:, 0] < right_bound) &
-                                (pts[:, 1] > top_bound) &
-                                (pts[:, 1] < bottom_bound)
-                                ]
-
-            # Apply ground and max distance filter to the extracted 5D points
-            extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold)
-            extracted_pts = filter_far_points(extracted_pts)
-            extracted_pts_all.append(extracted_pts)
-            
-        self.update_object_states(track_result, extracted_pts_all)
-        if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all)
-
-
-if __name__ == '__main__':
-    rospy.init_node('fusion_node', anonymous=True)
-    Fusion3D()
-    while not rospy.core.is_shutdown():
-        rospy.rostime.wallsleep(0.5)
diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index 1f8cfe5bc..bd43494db 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -39,6 +39,7 @@
 import tf
 from cv_bridge import CvBridge
 from sensor_msgs.msg import Image, PointCloud2
+from visualization_msgs.msg import MarkerArray
 # YOLO
 from ultralytics import YOLO
 from ultralytics.engine.results import Results, Boxes
@@ -83,7 +84,7 @@ def __init__(self, vehicle_interface : GEMInterface) -> None:
         self.confidence = 0.7
         self.classes_to_detect = 0
         self.ground_threshold = 1.6
-        self.max_dist_percent = 0.7
+        self.max_human_depth = 0.9
 
         # Load calibration data
         # TODO: Maybe lets add one word or link what R t K are?
@@ -106,6 +107,7 @@ def init_debug(self,) -> None:
          # Debug Publishers
         self.pub_pedestrians_pc2 = rospy.Publisher("/point_cloud/pedestrians", PointCloud2, queue_size=10)
         self.pub_obj_centers_pc2 = rospy.Publisher("/point_cloud/obj_centers", PointCloud2, queue_size=10)
+        self.pub_bboxes_markers = rospy.Publisher("/markers/bboxes", MarkerArray, queue_size=10)
         self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
 
     def update(self, vehicle : VehicleState) -> Dict[str,AgentState]:
@@ -152,19 +154,19 @@ def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict
 
 
     # TODO: Separate debug/viz class, bbox and 2d 3d points funcs 
-    def viz_object_states(self, cv_image: cv2.typing.MatLike, boxes: Boxes, extracted_pts_all: List[np.ndarray]) -> None:
+    def viz_object_states(self, cv_image, boxes, extracted_pts_all):
         # Extract 3D pedestrians points in lidar frame
         # ** These are camera frame after transform_lidar_points, right?
         pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] 
 
         # Object center viz
         obj_3d_obj_centers = list()
+        obj_3d_obj_dims = list()
         for track_id, agent in self.current_agents.items():
             if agent.pose.x != None and agent.pose.y != None and agent.pose.z != None:
                 obj_3d_obj_centers.append((agent.pose.x, agent.pose.y, agent.pose.z)) # **
-        if len(obj_3d_obj_centers) > 0:
-            ros_obj_3d_obj_centers_pc2 = create_point_cloud(obj_3d_obj_centers, color=(0, 128, 0))
-            self.pub_obj_centers_pc2.publish(ros_obj_3d_obj_centers_pc2)
+            if agent.dimensions != None and agent.dimensions[0] != None and agent.dimensions[1] != None and agent.dimensions[2] != None:
+                obj_3d_obj_dims.append(agent.dimensions)
         
         # Extract 2D pedestrians points and bbox in camera frame
         extracted_2d_pts = [list(extracted_pts[:, :2].astype(int)) for extracted_pts in extracted_pts_all]
@@ -178,21 +180,39 @@ def viz_object_states(self, cv_image: cv2.typing.MatLike, boxes: Boxes, extracte
         flattened_pedestrians_3d_pts = list() 
         for pts in pedestrians_3d_pts: flattened_pedestrians_3d_pts.extend(pts)
 
-        # Draw projected 2D LiDAR points on the image.
-        for pt in flattened_pedestrians_2d_pts:
-            cv2.circle(cv_image, pt, 2, (0, 0, 255), -1)
-        ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8')
-        self.pub_image.publish(ros_img)  
+        if len(flattened_pedestrians_3d_pts) > 0:
+            # Draw projected 2D LiDAR points on the image.
+            for pt in flattened_pedestrians_2d_pts:
+                cv2.circle(cv_image, pt, 2, (0, 0, 255), -1)
+            ros_img = self.bridge.cv2_to_imgmsg(cv_image, 'bgr8')
+            self.pub_image.publish(ros_img)  
+
+            # Draw 3D pedestrian pointclouds
+            # Create point cloud from extracted 3D points
+            ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts)
+            self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2)
+
+        # Draw 3D pedestrian centers and dimensions
+        if len(obj_3d_obj_centers) > 0 and len(obj_3d_obj_dims) > 0:
+            # Draw 3D pedestrian center pointclouds
+            ros_obj_3d_obj_centers_pc2 = create_point_cloud(obj_3d_obj_centers, color=(0, 128, 0))
+            self.pub_obj_centers_pc2.publish(ros_obj_3d_obj_centers_pc2)
 
-        # Draw 3D pedestrian pointclouds
-        # Create point cloud from extracted 3D points
-        ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts)
-        self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2)
+            # Draw 3D pedestrian bboxes markers
+            # Create bbox marker from pedestrain dimensions
+            ros_delete_bboxes_markers = delete_bbox_marker()
+            self.pub_bboxes_markers.publish(ros_delete_bboxes_markers)
+            ros_pedestrians_bboxes_markers = create_bbox_marker(obj_3d_obj_centers, obj_3d_obj_dims)
+            self.pub_bboxes_markers.publish(ros_pedestrians_bboxes_markers)
         
 
     def update_object_states(self, track_result: List[Results], extracted_pts_all: List[np.ndarray]) -> None:
         self.prev_agents = self.current_agents.copy()
-        self.current_agents.clear() 
+        self.current_agents.clear()
+
+        # Return if no track results available
+        if track_result[0].boxes.id == None:
+            return
 
         # Change pedestrians_3d_pts to dicts matching track_ids
         track_ids = track_result[0].boxes.id.int().cpu().tolist()
@@ -223,15 +243,15 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
                     pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
                     # (l, w, h)
                     # TODO: confirm (z -> l, x -> w, y -> h)
-                    dimensions=(obj_dim[2], obj_dim[0],obj_dim[1]),  
+                    dimensions=(obj_dim[0], obj_dim[1], obj_dim[2]),  
                     outline=None,
                     type=AgentEnum.PEDESTRIAN,
                     activity=AgentActivityEnum.MOVING,
-                    velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(vels[track_ids[ind]]),
+                    velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(obj_vels[track_ids[ind]]),
                     yaw_rate=0
                 ))
 
-    def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2) -> None:
+    def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
         # Convert to cv2 image and run detector
         cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") 
         track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
@@ -254,7 +274,9 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2)
 
         # Process bboxes
         boxes = track_result[0].boxes
+
         extracted_pts_all = list()
+
         for ind, bbox in enumerate(boxes):
             xywh = bbox.xywh[0].tolist()
 
@@ -266,19 +288,23 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2)
             bottom_bound = int(y + h / 2)
 
             pts = np.array(projected_pts)
-            extracted_pts = pts[(pts[:, 0] > left_bound) &
-                                (pts[:, 0] < right_bound) &
-                                (pts[:, 1] > top_bound) &
-                                (pts[:, 1] < bottom_bound)
-                                ]
-
-            # Apply ground and max distance filter to the extracted 5D points
-            extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold)
-            extracted_pts = filter_far_points(extracted_pts)
-            extracted_pts_all.append(extracted_pts)
-
-        self.update_object_states(track_result, extracted_pts_all)
-        if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all)
+            # Checking projected_pts length is very important
+            if len(projected_pts) > 0:
+                extracted_pts = pts[(pts[:, 0] > left_bound) &
+                                    (pts[:, 0] < right_bound) &
+                                    (pts[:, 1] > top_bound) &
+                                    (pts[:, 1] < bottom_bound)
+                                    ]
+
+                # Apply ground and max distance filter to the extracted 5D points
+                extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold)
+                extracted_pts = filter_depth_points(extracted_pts)
+                extracted_pts_all.append(extracted_pts)
+        
+        if len(extracted_pts_all) > 0 and len(track_result) > 0:
+            self.update_object_states(track_result, extracted_pts_all)
+            if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all)
+
 
     def rate(self):
         return 4.0
diff --git a/GEMstack/onboard/perception/pedestrian_detection_utils.py b/GEMstack/onboard/perception/pedestrian_detection_utils.py
index 4163d7b2c..c74603232 100644
--- a/GEMstack/onboard/perception/pedestrian_detection_utils.py
+++ b/GEMstack/onboard/perception/pedestrian_detection_utils.py
@@ -1,4 +1,5 @@
 from sensor_msgs.msg import PointCloud2, PointField
+from visualization_msgs.msg import Marker, MarkerArray
 import numpy as np
 import sensor_msgs.point_cloud2 as pc2
 import open3d as o3d
@@ -35,11 +36,15 @@ def filter_ground_points(lidar_points, ground_threshold = 0):
     return filtered_array
 
 
-def filter_far_points(lidar_points, max_dist_percent=0.85):
-    """ Filter points beyond a percentage threshold of max distance in a point cluster """
+def filter_depth_points(lidar_points, max_human_depth=0.9):
+    """ Filter points beyond a max possible human depth in a point cluster """
     if lidar_points.shape[0] == 0: return lidar_points
-    max_dist = np.max(lidar_points[:, 4])
-    filtered_array = lidar_points[lidar_points[:, 4] < max_dist_percent * max_dist]
+    lidar_points_dist = lidar_points[:, 4]
+    min_dist = np.min(lidar_points_dist)
+    max_dist = np.max(lidar_points_dist)
+    max_possible_dist = min_dist + max_human_depth
+    actual_dist = min(max_dist, max_possible_dist)
+    filtered_array = lidar_points[lidar_points_dist < actual_dist]
     return filtered_array
 
 
@@ -167,4 +172,73 @@ def create_point_cloud(points, color=(255, 0, 0)):
 
     return pc2.create_cloud(header, fields, point_cloud_data)
 
+
+def create_bbox_marker(centroids, dimensions):
+    """
+    Create 3D bbox markers from centroids and dimensions
+    """
+    marker_array = MarkerArray()
+
+    for i, (centroid, dimension) in enumerate(zip(centroids, dimensions)):
+        # Skip if no centroid or dimension
+        if (centroid == None) or (dimension == None):
+            continue
+
+        marker = Marker()
+        marker.header.frame_id = "map"  # Reference frame
+        marker.header.stamp = rospy.Time.now()
+        marker.ns = "bounding_boxes"
+        marker.id = i  # Unique ID for each marker
+        marker.type = Marker.CUBE  # Cube for bounding box
+        marker.action = Marker.ADD
+
+        # Position (center of the bounding box)
+        c_x, c_y, c_z = centroid
+        if (not isinstance(c_x, float)) or (not isinstance(c_y, float)) or (not isinstance(c_z, float)):
+            continue
+
+        marker.pose.position.x = c_x
+        marker.pose.position.y = c_y
+        marker.pose.position.z = c_z
+
+        # Orientation (default, no rotation)
+        marker.pose.orientation.x = 0.0
+        marker.pose.orientation.y = 0.0
+        marker.pose.orientation.z = 0.0
+        marker.pose.orientation.w = 1.0
+
+        # Bounding box dimensions
+        d_x, d_y, d_z = dimension
+        if (not isinstance(d_x, float)) or (not isinstance(d_y, float)) or (not isinstance(d_z, float)):
+            continue
+
+        marker.scale.x = d_x
+        marker.scale.y = d_y
+        marker.scale.z = d_z
+
+        # Random colors for each bounding box
+        marker.color.r = 0.0  # Varying colors
+        marker.color.g = 1.0
+        marker.color.b = 1.5
+        marker.color.a = 0.2  # Transparency
+
+        marker.lifetime = rospy.Duration()  # Persistent
+        marker_array.markers.append(marker)
+    return marker_array
+
+
+def delete_bbox_marker():
+    """
+    Delete 3D bbox markers given ID ranges
+    """
+    marker_array = MarkerArray()
+    for i in range(6):
+        marker = Marker()
+        marker.ns = "bounding_boxes"
+        marker.id = i
+        marker.action = Marker.DELETE
+        marker_array.markers.append(marker)
+    return marker_array
+
+
     
\ No newline at end of file

From 6cd1d4fdda418d6dd27d7f3c48583fd13f220bfb Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Mon, 17 Feb 2025 18:10:36 -0600
Subject: [PATCH 25/46] apply lidar to vehicle transform to centers

---
 .../perception/pedestrian_detection.py        | 30 +++++++++++++++----
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index bd43494db..947bd125a 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -88,10 +88,15 @@ def __init__(self, vehicle_interface : GEMInterface) -> None:
 
         # Load calibration data
         # TODO: Maybe lets add one word or link what R t K are?
-        self.R = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/R.npy')
-        self.t = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/t.npy')
+        self.R_lidar_to_oak = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/R.npy')
+        self.t_lidar_to_oak = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/t.npy')
         self.K = load_intrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/camera_intrinsics.json')
 
+        self.R_lidar_to_vehicle = np.array([[0.9995121, 0.02132941, 0.02281911], 
+            [-0.02124771, 0.99976695, -0.00381707], 
+            [-0.02289521, 0.00333035, 0.9997323]])
+        self.t_lidar_to_vehicle = np.array([[0.0], [0.0], [0.35]])
+
         # Subscribers and sychronizers
         self.rgb_rosbag = message_filters.Subscriber('/oak/rgb/image_raw', Image)
         self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2)
@@ -179,7 +184,7 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all):
         
         flattened_pedestrians_3d_pts = list() 
         for pts in pedestrians_3d_pts: flattened_pedestrians_3d_pts.extend(pts)
-
+	
         if len(flattened_pedestrians_3d_pts) > 0:
             # Draw projected 2D LiDAR points on the image.
             for pt in flattened_pedestrians_2d_pts:
@@ -188,8 +193,10 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all):
             self.pub_image.publish(ros_img)  
 
             # Draw 3D pedestrian pointclouds
+            # Tranform 3D pedestrians points to vehicle frame for better visualization. Turn off for performance
+            flattened_pedestrians_3d_pts_vehicle = transform_lidar_points(np.array(flattened_pedestrians_3d_pts), self.R_lidar_to_vehicle, self.t_lidar_to_vehicle)
             # Create point cloud from extracted 3D points
-            ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts)
+            ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts_vehicle)
             self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2)
 
         # Draw 3D pedestrian centers and dimensions
@@ -233,9 +240,20 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
         obj_dims = self.find_dims(pedestrians_3d_pts)
         obj_vels = self.find_vels(track_ids, obj_centers)
 
+        # Transform centers from top_lidar frame to vehicle frame
+        # Need to transform the center point one by one since matrix op can't deal with empty points
+        obj_centers_vehicle = []
+        for obj_center in obj_centers:
+            if len(obj_center) > 0:
+                obj_center = np.array([obj_center])
+                obj_center_vehicle = transform_lidar_points(obj_center, self.R_lidar_to_vehicle, self.t_lidar_to_vehicle)[0]
+                obj_centers_vehicle.append(obj_center_vehicle)
+            else:
+                obj_centers_vehicle.append(np.array(()))
+        
         # Update Current AgentStates
         for ind in range(num_objs):
-            obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind]
+            obj_center = (None, None, None) if obj_centers_vehicle[ind].size == 0 else obj_centers_vehicle[ind]
             obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind]
             self.current_agents[track_ids[ind]] = (
                 AgentState(
@@ -265,7 +283,7 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
         # print("len downsampled_points", len(downsampled_points))
         
         # Transform LiDAR points into the camera coordinate frame.
-        lidar_in_camera = transform_lidar_points(downsampled_points, self.R, self.t)
+        lidar_in_camera = transform_lidar_points(downsampled_points, self.R_lidar_to_oak, self.t_lidar_to_oak)
         # print("len lidar_in_camera", len(lidar_in_camera))
 
         # Project the transformed points into the image plane.

From 7ad05fd7adbcfb44068baabe9c83ec48d68cca2e Mon Sep 17 00:00:00 2001
From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com>
Date: Mon, 17 Feb 2025 21:46:59 -0600
Subject: [PATCH 26/46] Check len(0) ouster points pedestrian_detection.py

---
 GEMstack/onboard/perception/pedestrian_detection.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index 947bd125a..54c7ce0f5 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -318,10 +318,11 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
                 extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold)
                 extracted_pts = filter_depth_points(extracted_pts)
                 extracted_pts_all.append(extracted_pts)
+	    else: extracted_pts_all.append(np.array(())
         
-        if len(extracted_pts_all) > 0 and len(track_result) > 0:
-            self.update_object_states(track_result, extracted_pts_all)
-            if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all)
+        #if len(extracted_pts_all) > 0 and len(track_result) > 0:
+        self.update_object_states(track_result, extracted_pts_all)
+        if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all)
 
 
     def rate(self):

From 68d0a75c714f7db8eea85691d79c113214903a3b Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Mon, 17 Feb 2025 22:03:36 -0600
Subject: [PATCH 27/46] fix lidar frame mapping

---
 .../perception/pedestrian_detection.py        | 13 ++++++------
 .../perception/pedestrian_detection_utils.py  | 20 +++++++++----------
 GEMstack/onboard/perception/transform.py      |  6 ++++--
 3 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index 947bd125a..6170b5786 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -83,7 +83,7 @@ def __init__(self, vehicle_interface : GEMInterface) -> None:
         self.curr_time = 1 # Avoid divide by 0 for placebolder, 0
         self.confidence = 0.7
         self.classes_to_detect = 0
-        self.ground_threshold = 1.6
+        self.ground_threshold = -2.0
         self.max_human_depth = 0.9
 
         # Load calibration data
@@ -162,7 +162,7 @@ def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict
     def viz_object_states(self, cv_image, boxes, extracted_pts_all):
         # Extract 3D pedestrians points in lidar frame
         # ** These are camera frame after transform_lidar_points, right?
-        pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] 
+        pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all]
 
         # Object center viz
         obj_3d_obj_centers = list()
@@ -196,7 +196,8 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all):
             # Tranform 3D pedestrians points to vehicle frame for better visualization. Turn off for performance
             flattened_pedestrians_3d_pts_vehicle = transform_lidar_points(np.array(flattened_pedestrians_3d_pts), self.R_lidar_to_vehicle, self.t_lidar_to_vehicle)
             # Create point cloud from extracted 3D points
-            ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts_vehicle)
+            # ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts_vehicle)
+            ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts)
             self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2)
 
         # Draw 3D pedestrian centers and dimensions
@@ -287,8 +288,8 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
         # print("len lidar_in_camera", len(lidar_in_camera))
 
         # Project the transformed points into the image plane.
-        projected_pts = project_points(lidar_in_camera, self.K)
-        # print("len projected_pts", len(projected_pts))
+        projected_pts = project_points(lidar_in_camera, self.K, downsampled_points)
+        # print("projected_pts", len(projected_pts))
 
         # Process bboxes
         boxes = track_result[0].boxes
@@ -316,7 +317,7 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
 
                 # Apply ground and max distance filter to the extracted 5D points
                 extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold)
-                extracted_pts = filter_depth_points(extracted_pts)
+                extracted_pts = filter_depth_points(extracted_pts, self.max_human_depth)
                 extracted_pts_all.append(extracted_pts)
         
         if len(extracted_pts_all) > 0 and len(track_result) > 0:
diff --git a/GEMstack/onboard/perception/pedestrian_detection_utils.py b/GEMstack/onboard/perception/pedestrian_detection_utils.py
index c74603232..788ee2e54 100644
--- a/GEMstack/onboard/perception/pedestrian_detection_utils.py
+++ b/GEMstack/onboard/perception/pedestrian_detection_utils.py
@@ -32,14 +32,14 @@ def downsample_points(lidar_points):
 
 def filter_ground_points(lidar_points, ground_threshold = 0):
     """ Filter points given an elevation of ground threshold """
-    filtered_array = lidar_points[lidar_points[:, 3] < ground_threshold]
+    filtered_array = lidar_points[lidar_points[:, 4] > ground_threshold]
     return filtered_array
 
 
 def filter_depth_points(lidar_points, max_human_depth=0.9):
     """ Filter points beyond a max possible human depth in a point cluster """
     if lidar_points.shape[0] == 0: return lidar_points
-    lidar_points_dist = lidar_points[:, 4]
+    lidar_points_dist = lidar_points[:, 2]
     min_dist = np.min(lidar_points_dist)
     max_dist = np.max(lidar_points_dist)
     max_possible_dist = min_dist + max_human_depth
@@ -97,19 +97,19 @@ def transform_lidar_points(lidar_points, R, t):
     return P_cam
 
 
-def project_points(points_3d, K):
+def project_points(points_3d, K, lidar_points):
     """
     Project 3D points (in the camera frame) into 2D image coordinates using the camera matrix K.
     Only projects points with z > 0.
     """
     proj_points = []
-    for pt in points_3d:
+    for pt, l_pt in zip(points_3d, lidar_points):
         if pt[2] > 0:  # only project points in front of the camera
             u = K[0, 0] * (pt[0] / pt[2]) + K[0, 2]
             v = K[1, 1] * (pt[1] / pt[2]) + K[1, 2]
-            # 5D points that stores original lidar points
-            proj_points.append((int(u), int(v), pt[0], pt[1], pt[2]))
-    return proj_points
+            # 5D data point
+            proj_points.append((int(u), int(v), l_pt[0], l_pt[1], l_pt[2]))
+    return np.array(proj_points)
 
 
 def vis_2d_bbox(image, xywh, box):
@@ -155,7 +155,7 @@ def create_point_cloud(points, color=(255, 0, 0)):
     """
     header = rospy.Header()
     header.stamp = rospy.Time.now()
-    header.frame_id = "map"  # Change to your TF frame
+    header.frame_id = "os_sensor"  # Change to your TF frame
 
     fields = [
         PointField(name="x", offset=0, datatype=PointField.FLOAT32, count=1),
@@ -185,7 +185,7 @@ def create_bbox_marker(centroids, dimensions):
             continue
 
         marker = Marker()
-        marker.header.frame_id = "map"  # Reference frame
+        marker.header.frame_id = "os_sensor"  # Reference frame
         marker.header.stamp = rospy.Time.now()
         marker.ns = "bounding_boxes"
         marker.id = i  # Unique ID for each marker
@@ -241,4 +241,4 @@ def delete_bbox_marker():
     return marker_array
 
 
-    
\ No newline at end of file
+    
diff --git a/GEMstack/onboard/perception/transform.py b/GEMstack/onboard/perception/transform.py
index 6e921f56c..b876cc595 100644
--- a/GEMstack/onboard/perception/transform.py
+++ b/GEMstack/onboard/perception/transform.py
@@ -9,8 +9,10 @@ def publish_tf():
 
     while not rospy.is_shutdown():
         br.sendTransform(
-            (0, 1.6, 7),  # (x, y, z) translation
-            tf.transformations.quaternion_from_euler(0.5* np.pi, 0, 0),  # (roll, pitch, yaw)
+            # (0, 1.6, 7),  # (x, y, z) translation
+            # tf.transformations.quaternion_from_euler(0.5* np.pi, 0, 0),  # (roll, pitch, yaw)
+            (0, 0, 0),  # (x, y, z) translation
+            tf.transformations.quaternion_from_euler(0, 0, 0),  # (roll, pitch, yaw)
             rospy.Time.now(),
             "os_sensor",  # Child frame (sensor)
             "map"  # Parent frame (world)

From 0857f0c92e0bcc488a7bb5a82fcbb3920c43565a Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Mon, 17 Feb 2025 22:05:01 -0600
Subject: [PATCH 28/46] reslove conflicts

---
 GEMstack/onboard/perception/pedestrian_detection.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index 9da4a4c81..31085ce8f 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -319,7 +319,7 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
                 extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold)
                 extracted_pts = filter_depth_points(extracted_pts, self.max_human_depth)
                 extracted_pts_all.append(extracted_pts)
-	    else: extracted_pts_all.append(np.array(())
+            else: extracted_pts_all.append(np.array(()))
         
         #if len(extracted_pts_all) > 0 and len(track_result) > 0:
         self.update_object_states(track_result, extracted_pts_all)

From fcca8c01f59a224bb0ea50cc4ccbff8082d360d5 Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Mon, 17 Feb 2025 22:42:55 -0600
Subject: [PATCH 29/46] toggle vehicle frame

---
 .../perception/pedestrian_detection.py        | 34 +++++++++++--------
 GEMstack/onboard/perception/transform.py      |  2 --
 2 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index 31085ce8f..2b6626ce9 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -85,6 +85,7 @@ def __init__(self, vehicle_interface : GEMInterface) -> None:
         self.classes_to_detect = 0
         self.ground_threshold = -2.0
         self.max_human_depth = 0.9
+        self.vehicle_frame = False # Indicate whether pedestrians centroids and point clouds are in the vehicle frame
 
         # Load calibration data
         # TODO: Maybe lets add one word or link what R t K are?
@@ -193,10 +194,12 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all):
             self.pub_image.publish(ros_img)  
 
             # Draw 3D pedestrian pointclouds
-            # Tranform 3D pedestrians points to vehicle frame for better visualization. Turn off for performance
-            flattened_pedestrians_3d_pts_vehicle = transform_lidar_points(np.array(flattened_pedestrians_3d_pts), self.R_lidar_to_vehicle, self.t_lidar_to_vehicle)
+            if self.vehicle_frame:
+                # If in vehicle frame, tranform 3D pedestrians points to vehicle frame for better visualization.
+                flattened_pedestrians_3d_pts_vehicle = transform_lidar_points(np.array(flattened_pedestrians_3d_pts), self.R_lidar_to_vehicle, self.t_lidar_to_vehicle)
+                flattened_pedestrians_3d_pts = flattened_pedestrians_3d_pts_vehicle
+
             # Create point cloud from extracted 3D points
-            # ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts_vehicle)
             ros_extracted_pedestrian_pc2 = create_point_cloud(flattened_pedestrians_3d_pts)
             self.pub_pedestrians_pc2.publish(ros_extracted_pedestrian_pc2)
 
@@ -241,20 +244,22 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
         obj_dims = self.find_dims(pedestrians_3d_pts)
         obj_vels = self.find_vels(track_ids, obj_centers)
 
-        # Transform centers from top_lidar frame to vehicle frame
+        # If in vehicle frame, transform centers from top_lidar frame to vehicle frame
         # Need to transform the center point one by one since matrix op can't deal with empty points
-        obj_centers_vehicle = []
-        for obj_center in obj_centers:
-            if len(obj_center) > 0:
-                obj_center = np.array([obj_center])
-                obj_center_vehicle = transform_lidar_points(obj_center, self.R_lidar_to_vehicle, self.t_lidar_to_vehicle)[0]
-                obj_centers_vehicle.append(obj_center_vehicle)
-            else:
-                obj_centers_vehicle.append(np.array(()))
+        if self.vehicle_frame:
+            obj_centers_vehicle = []
+            for obj_center in obj_centers:
+                if len(obj_center) > 0:
+                    obj_center = np.array([obj_center])
+                    obj_center_vehicle = transform_lidar_points(obj_center, self.R_lidar_to_vehicle, self.t_lidar_to_vehicle)[0]
+                    obj_centers_vehicle.append(obj_center_vehicle)
+                else:
+                    obj_centers_vehicle.append(np.array(()))
+            obj_centers = obj_centers_vehicle
         
         # Update Current AgentStates
         for ind in range(num_objs):
-            obj_center = (None, None, None) if obj_centers_vehicle[ind].size == 0 else obj_centers_vehicle[ind]
+            obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind]
             obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind]
             self.current_agents[track_ids[ind]] = (
                 AgentState(
@@ -319,7 +324,8 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
                 extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold)
                 extracted_pts = filter_depth_points(extracted_pts, self.max_human_depth)
                 extracted_pts_all.append(extracted_pts)
-            else: extracted_pts_all.append(np.array(()))
+            # Still causing errors, I will turn this on later
+            # else: extracted_pts_all.append(np.array(()))
         
         #if len(extracted_pts_all) > 0 and len(track_result) > 0:
         self.update_object_states(track_result, extracted_pts_all)
diff --git a/GEMstack/onboard/perception/transform.py b/GEMstack/onboard/perception/transform.py
index b876cc595..8032752b1 100644
--- a/GEMstack/onboard/perception/transform.py
+++ b/GEMstack/onboard/perception/transform.py
@@ -9,8 +9,6 @@ def publish_tf():
 
     while not rospy.is_shutdown():
         br.sendTransform(
-            # (0, 1.6, 7),  # (x, y, z) translation
-            # tf.transformations.quaternion_from_euler(0.5* np.pi, 0, 0),  # (roll, pitch, yaw)
             (0, 0, 0),  # (x, y, z) translation
             tf.transformations.quaternion_from_euler(0, 0, 0),  # (roll, pitch, yaw)
             rospy.Time.now(),

From d820c3c63c2707178dd07744acc811962a8a9610 Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Mon, 17 Feb 2025 22:43:05 -0600
Subject: [PATCH 30/46] toggle vehicle frame

---
 GEMstack/onboard/perception/pedestrian_detection.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index 2b6626ce9..a76ab6178 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -324,8 +324,7 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
                 extracted_pts = filter_ground_points(extracted_pts, self.ground_threshold)
                 extracted_pts = filter_depth_points(extracted_pts, self.max_human_depth)
                 extracted_pts_all.append(extracted_pts)
-            # Still causing errors, I will turn this on later
-            # else: extracted_pts_all.append(np.array(()))
+            else: extracted_pts_all.append(np.array(()))
         
         #if len(extracted_pts_all) > 0 and len(track_result) > 0:
         self.update_object_states(track_result, extracted_pts_all)

From 248911cefb95d95c14a82981c816d0de01dfd0da Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Mon, 17 Feb 2025 23:27:48 -0600
Subject: [PATCH 31/46] add comments for bug fixes

---
 GEMstack/onboard/perception/pedestrian_detection.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index a76ab6178..2a8ac193c 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -163,6 +163,7 @@ def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict
     def viz_object_states(self, cv_image, boxes, extracted_pts_all):
         # Extract 3D pedestrians points in lidar frame
         # ** These are camera frame after transform_lidar_points, right?
+        # ** It was in camera frame before. I fixed it. Now they are in lidar frame!
         pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all]
 
         # Object center viz
@@ -232,6 +233,7 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
 
         # Extract 3D pedestrians points in lidar frame
         # ** These are camera frame after transform_lidar_points, right?
+        # ** It was in camera frame before. I fixed it. Now they are in lidar frame!
         pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] 
         if len(pedestrians_3d_pts) != num_objs:
             raise Exception('Perception - Camera detections, points clusters num. mismatch')
@@ -326,7 +328,6 @@ def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
                 extracted_pts_all.append(extracted_pts)
             else: extracted_pts_all.append(np.array(()))
         
-        #if len(extracted_pts_all) > 0 and len(track_result) > 0:
         self.update_object_states(track_result, extracted_pts_all)
         if self.debug: self.viz_object_states(cv_image, boxes, extracted_pts_all)
 

From 5700e095c62b9dbb52447522699015ef7d877c06 Mon Sep 17 00:00:00 2001
From: KenC1014 <kenken4016@gmail.com>
Date: Mon, 17 Feb 2025 23:54:22 -0600
Subject: [PATCH 32/46] handle empty list

---
 GEMstack/onboard/perception/pedestrian_detection.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index 2a8ac193c..9970c6a29 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -164,7 +164,7 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all):
         # Extract 3D pedestrians points in lidar frame
         # ** These are camera frame after transform_lidar_points, right?
         # ** It was in camera frame before. I fixed it. Now they are in lidar frame!
-        pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all]
+        pedestrians_3d_pts = [[] if len(extracted_pts) == 0 else list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] 
 
         # Object center viz
         obj_3d_obj_centers = list()
@@ -176,7 +176,7 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all):
                 obj_3d_obj_dims.append(agent.dimensions)
         
         # Extract 2D pedestrians points and bbox in camera frame
-        extracted_2d_pts = [list(extracted_pts[:, :2].astype(int)) for extracted_pts in extracted_pts_all]
+        extracted_2d_pts = [[] if len(extracted_pts) == 0 else list(extracted_pts[:, :2].astype(int)) for extracted_pts in extracted_pts_all]
         flattened_pedestrians_2d_pts = list()
         for pts in extracted_2d_pts:    flattened_pedestrians_2d_pts.extend(pts)
 
@@ -234,7 +234,7 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
         # Extract 3D pedestrians points in lidar frame
         # ** These are camera frame after transform_lidar_points, right?
         # ** It was in camera frame before. I fixed it. Now they are in lidar frame!
-        pedestrians_3d_pts = [list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] 
+        pedestrians_3d_pts = [[] if len(extracted_pts) == 0 else list(extracted_pts[:, -3:]) for extracted_pts in extracted_pts_all] 
         if len(pedestrians_3d_pts) != num_objs:
             raise Exception('Perception - Camera detections, points clusters num. mismatch')
         

From bf4f433fb4dde499e257041579dff6da70a44b64 Mon Sep 17 00:00:00 2001
From: LucasEby <lucaseby@outlook.com>
Date: Tue, 18 Feb 2025 08:17:26 -0500
Subject: [PATCH 33/46] Found some frame problems in parent branch

---
 GEMstack/onboard/perception/AgentTracker.py   | 144 ++++++++++++++++++
 GEMstack/onboard/perception/IdTracker.py      |  12 ++
 GEMstack/onboard/perception/PrevAgent.py      |  17 +++
 .../perception/pedestrian_detection.py        |  22 ++-
 4 files changed, 187 insertions(+), 8 deletions(-)
 create mode 100644 GEMstack/onboard/perception/AgentTracker.py
 create mode 100644 GEMstack/onboard/perception/IdTracker.py
 create mode 100644 GEMstack/onboard/perception/PrevAgent.py

diff --git a/GEMstack/onboard/perception/AgentTracker.py b/GEMstack/onboard/perception/AgentTracker.py
new file mode 100644
index 000000000..55e2cd917
--- /dev/null
+++ b/GEMstack/onboard/perception/AgentTracker.py
@@ -0,0 +1,144 @@
+import math
+from typing import Dict, List
+
+from GEMstack.onboard.perception.IdTracker import IdTracker
+from GEMstack.onboard.perception.PrevAgent import PrevAgent
+from GEMstack.state.agent import AgentState
+
+
+class AgentTracker():
+    """Associates and tracks AgentState agents.
+    """
+    def __init__(self):
+        # List of PrevAgent objects (each keeps track of the last seen state and time since seen)
+        self.prev_agents: List[PrevAgent] = []
+        # List of currently visible AgentState objects
+        self.current_agents: List[AgentState] = []
+        # Maximum time (in seconds) to keep a lost agent before dropping it.
+        self.drop_agent_t: float = 1.0
+        # Id tracker for creating new unique pedestrian IDs.
+        self.id_tracker = IdTracker()
+    
+    def assign_ids(self, agents: list) -> Dict[str,AgentState]:
+        # Act with the assumption that you are being sent a list of AgentState objects and you need to use the object fields to keep track of them for your task
+        # Further act on the assumption that we will decide the id's of the pedestrians by assuming that 2 pedestrians are the same pedestrian if a
+        # previously stored AgentState pose and dimensions overlap with a newly passed in AgentState
+        # Act on the assumption that the AgentState objects are all in reference to the start frame of the vehicle
+        # some helper functions in this class, LostAgent.py, and IdTracker.py have been created to try to help you out with your task.
+        # Assume that the output returned from this function will be a dictionary of AgentState objects with the key corresponding to their id
+        """
+        Associates new AgentState objects with existing tracked agents based on overlap.
+        If an agent does not match any previously tracked agent, a new unique id is assigned.
+        Also updates the “lost” time for agents that are not matched in this frame.
+
+        Parameters:
+            agents (list): List of AgentState objects for the current frame 
+                           (already converted to the start frame of reference).
+
+        Returns:
+            Dict[str, AgentState]: Dictionary mapping agent id (as a string) to AgentState.
+        """
+        dt = 1.0  # Assuming a fixed time-step of 1 second (for simplicity)
+        output_agents: Dict[str, AgentState] = {}
+        matched_ids = set()
+        updated_prev_agents: List[PrevAgent] = []
+
+        # Process each new agent from the current frame.
+        for new_agent in agents:
+            found_match = None
+            # Look for a previously tracked agent whose bounding box overlaps.
+            for prev in self.prev_agents:
+                if prev.last_id in matched_ids:
+                    continue  # Already matched with another new agent.
+                if self.__agents_overlap(prev.last_state, new_agent):
+                    found_match = prev
+                    break
+
+            if found_match is not None:
+                # update velocity using the change in position.
+                if hasattr(new_agent, 'velocity'):
+                    new_agent.velocity = self.__calculate_velocity(found_match.last_state, new_agent, dt)
+
+                # Update the matched agent’s state and reset its lost-time counter.
+                found_match.last_state = new_agent
+                found_match.time_since_seen = 0.0
+                matched_ids.add(found_match.last_id)
+                output_agents[str(found_match.last_id)] = new_agent
+                updated_prev_agents.append(found_match)
+            else:
+                # No match found – assign a new unique id.
+                new_id = self.id_tracker.get_new_ped_id()
+                new_prev = PrevAgent(new_id, new_agent)
+                # initialize velocity to 0.
+                if hasattr(new_agent, 'velocity'):
+                    new_agent.velocity = 0.0
+                output_agents[str(new_id)] = new_agent
+                updated_prev_agents.append(new_prev)
+
+        # For all previously tracked agents that were not matched this frame,
+        # update their time-since-seen and only keep them if they have not timed out.
+        for prev in self.prev_agents:
+            if prev.last_id not in matched_ids:
+                prev.update_time(dt)
+                if prev.time_since_seen < self.drop_agent_t:
+                    updated_prev_agents.append(prev)
+
+        # Save the updated list of tracked agents.
+        self.prev_agents = updated_prev_agents
+        # update current_agents to include only those seen in the current frame.
+        self.current_agents = list(output_agents.values())
+        return output_agents
+
+    def __convert_to_start_frame(self):
+        """Converts a list of AgentState agents from ouster Lidar frame of 
+        reference (which is in reference to the current frame) to start 
+        frame frame of reference
+        """
+        # you can ignore this function akul
+        pass
+        
+    def __agents_overlap(self, ped1, ped2) -> bool:
+        """
+        Determines if two AgentState objects overlap based on their pose and dimensions.
+
+        Assumes each AgentState has:
+            - pose with attributes x and y.
+            - dimensions with attributes width and height.
+
+        Returns:
+            bool: True if they overlap; False otherwise.
+        """
+        # Get first agent's properties.
+        x1, y1 = ped1.pose.x, ped1.pose.y
+        w1, h1 = ped1.dimensions.width, ped1.dimensions.height
+
+        # Get second agent's properties.
+        x2, y2 = ped2.pose.x, ped2.pose.y
+        w2, h2 = ped2.dimensions.width, ped2.dimensions.height
+
+        # Compute bounding boxes (assuming (x, y) is the center).
+        left1, right1 = x1 - w1 / 2, x1 + w1 / 2
+        top1, bottom1 = y1 - h1 / 2, y1 + h1 / 2
+
+        left2, right2 = x2 - w2 / 2, x2 + w2 / 2
+        top2, bottom2 = y2 - h2 / 2, y2 + h2 / 2
+
+        # Check for overlap between the bounding boxes.
+        overlap = not (right1 < left2 or left1 > right2 or bottom1 < top2 or top1 > bottom2)
+        return overlap
+
+    def __calculate_velocity(self, old_state: AgentState, new_state: AgentState, dt: float) -> float:
+        """
+        Calculates the velocity based on the change in pose over time.
+
+        Parameters:
+            old_state (AgentState): The previous state.
+            new_state (AgentState): The current state.
+            dt (float): Time difference between the two states.
+
+        Returns:
+            float: The computed velocity.
+        """
+        dx = new_state.pose.x - old_state.pose.x
+        dy = new_state.pose.y - old_state.pose.y
+        return math.sqrt(dx * dx + dy * dy) / dt if dt > 0 else 0.0
diff --git a/GEMstack/onboard/perception/IdTracker.py b/GEMstack/onboard/perception/IdTracker.py
new file mode 100644
index 000000000..1c9c6ecbd
--- /dev/null
+++ b/GEMstack/onboard/perception/IdTracker.py
@@ -0,0 +1,12 @@
+class IdTracker():
+    """Abstracts out id tracking
+    """
+    def __init__(self):
+        self.__ped_id = 0
+
+    def get_new_ped_id(self) -> int:
+        """Returns a unique pedestrian id
+        """
+        assigned_id = self.__id
+        self.__ped_id += 1 # id will intentionally overflow to get back to 0
+        return assigned_id
diff --git a/GEMstack/onboard/perception/PrevAgent.py b/GEMstack/onboard/perception/PrevAgent.py
new file mode 100644
index 000000000..210cea556
--- /dev/null
+++ b/GEMstack/onboard/perception/PrevAgent.py
@@ -0,0 +1,17 @@
+# GEM imports:
+from ...state import AgentState
+
+class PrevAgent():
+    def __init__(self, last_id: int, last_state: AgentState):
+        self.time_since_seen: float = 0.0 # Time since the agent was last seen in seconds
+        self.last_id: int = last_id
+        self.last_state: AgentState = last_state
+
+    def update_time(time: float):
+        """Updates the time since the agent was last seen
+        """
+        if time <= 0:
+            # TODO: log error here
+            print("UPDATE TIME FOR LOST AGENT WAS LESS THAN OR EQUAL TO 0")
+        else:
+            self.time_since_seen += time
\ No newline at end of file
diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index 9970c6a29..519d85505 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -48,6 +48,8 @@
 from .pedestrian_detection_utils import *
 from ..interface.gem import GEMInterface
 from ..component import Component
+from .AgentTracker import AgentTracker
+
 
 def box_to_fake_agent(box):
     """Creates a fake agent state from an (x,y,w,h) bounding box.
@@ -238,14 +240,7 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
         if len(pedestrians_3d_pts) != num_objs:
             raise Exception('Perception - Camera detections, points clusters num. mismatch')
         
-        # TODO: Slower but cleaner to pass dicts of AgentState
-        #       or at least {track_ids: centers/pts/etc}
-        # TODO: Combine funcs for efficiency in C.
-        #       Separate numpy prob still faster for now
-        obj_centers = self.find_centers(pedestrians_3d_pts)
-        obj_dims = self.find_dims(pedestrians_3d_pts)
-        obj_vels = self.find_vels(track_ids, obj_centers)
-
+        # TODO: CONVERT FROM LIDAR FRAME TO VEHICLE FRAME HERE (vehicle frame is center of rear axle at vehicle's current location)
         # If in vehicle frame, transform centers from top_lidar frame to vehicle frame
         # Need to transform the center point one by one since matrix op can't deal with empty points
         if self.vehicle_frame:
@@ -259,6 +254,17 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
                     obj_centers_vehicle.append(np.array(()))
             obj_centers = obj_centers_vehicle
         
+        # TODO: Slower but cleaner to pass dicts of AgentState
+        #       or at least {track_ids: centers/pts/etc}
+        # TODO: Combine funcs for efficiency in C.
+        #       Separate numpy prob still faster for now
+        obj_centers = self.find_centers(pedestrians_3d_pts)
+        obj_dims = self.find_dims(pedestrians_3d_pts)
+
+        # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE
+        # Then compare previous and current agents with the same id to calculate velocity
+        obj_vels = self.find_vels(track_ids, obj_centers)
+
         # Update Current AgentStates
         for ind in range(num_objs):
             obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind]

From 8a7ceb64c7910f25b53ca806257e5ede867be2c3 Mon Sep 17 00:00:00 2001
From: LucasEby <lucaseby@outlook.com>
Date: Tue, 18 Feb 2025 11:31:55 -0500
Subject: [PATCH 34/46] Wrote in logic to finish part 3. Still need to
 implement 1-2 transforms and to fix a small list bug

---
 GEMstack/onboard/perception/AgentTracker.py   | 144 -----------------
 GEMstack/onboard/perception/IdTracker.py      |   8 +-
 GEMstack/onboard/perception/PrevAgent.py      |  17 --
 .../perception/pedestrian_detection.py        | 149 +++++++++++++++---
 4 files changed, 129 insertions(+), 189 deletions(-)
 delete mode 100644 GEMstack/onboard/perception/AgentTracker.py
 delete mode 100644 GEMstack/onboard/perception/PrevAgent.py

diff --git a/GEMstack/onboard/perception/AgentTracker.py b/GEMstack/onboard/perception/AgentTracker.py
deleted file mode 100644
index 55e2cd917..000000000
--- a/GEMstack/onboard/perception/AgentTracker.py
+++ /dev/null
@@ -1,144 +0,0 @@
-import math
-from typing import Dict, List
-
-from GEMstack.onboard.perception.IdTracker import IdTracker
-from GEMstack.onboard.perception.PrevAgent import PrevAgent
-from GEMstack.state.agent import AgentState
-
-
-class AgentTracker():
-    """Associates and tracks AgentState agents.
-    """
-    def __init__(self):
-        # List of PrevAgent objects (each keeps track of the last seen state and time since seen)
-        self.prev_agents: List[PrevAgent] = []
-        # List of currently visible AgentState objects
-        self.current_agents: List[AgentState] = []
-        # Maximum time (in seconds) to keep a lost agent before dropping it.
-        self.drop_agent_t: float = 1.0
-        # Id tracker for creating new unique pedestrian IDs.
-        self.id_tracker = IdTracker()
-    
-    def assign_ids(self, agents: list) -> Dict[str,AgentState]:
-        # Act with the assumption that you are being sent a list of AgentState objects and you need to use the object fields to keep track of them for your task
-        # Further act on the assumption that we will decide the id's of the pedestrians by assuming that 2 pedestrians are the same pedestrian if a
-        # previously stored AgentState pose and dimensions overlap with a newly passed in AgentState
-        # Act on the assumption that the AgentState objects are all in reference to the start frame of the vehicle
-        # some helper functions in this class, LostAgent.py, and IdTracker.py have been created to try to help you out with your task.
-        # Assume that the output returned from this function will be a dictionary of AgentState objects with the key corresponding to their id
-        """
-        Associates new AgentState objects with existing tracked agents based on overlap.
-        If an agent does not match any previously tracked agent, a new unique id is assigned.
-        Also updates the “lost” time for agents that are not matched in this frame.
-
-        Parameters:
-            agents (list): List of AgentState objects for the current frame 
-                           (already converted to the start frame of reference).
-
-        Returns:
-            Dict[str, AgentState]: Dictionary mapping agent id (as a string) to AgentState.
-        """
-        dt = 1.0  # Assuming a fixed time-step of 1 second (for simplicity)
-        output_agents: Dict[str, AgentState] = {}
-        matched_ids = set()
-        updated_prev_agents: List[PrevAgent] = []
-
-        # Process each new agent from the current frame.
-        for new_agent in agents:
-            found_match = None
-            # Look for a previously tracked agent whose bounding box overlaps.
-            for prev in self.prev_agents:
-                if prev.last_id in matched_ids:
-                    continue  # Already matched with another new agent.
-                if self.__agents_overlap(prev.last_state, new_agent):
-                    found_match = prev
-                    break
-
-            if found_match is not None:
-                # update velocity using the change in position.
-                if hasattr(new_agent, 'velocity'):
-                    new_agent.velocity = self.__calculate_velocity(found_match.last_state, new_agent, dt)
-
-                # Update the matched agent’s state and reset its lost-time counter.
-                found_match.last_state = new_agent
-                found_match.time_since_seen = 0.0
-                matched_ids.add(found_match.last_id)
-                output_agents[str(found_match.last_id)] = new_agent
-                updated_prev_agents.append(found_match)
-            else:
-                # No match found – assign a new unique id.
-                new_id = self.id_tracker.get_new_ped_id()
-                new_prev = PrevAgent(new_id, new_agent)
-                # initialize velocity to 0.
-                if hasattr(new_agent, 'velocity'):
-                    new_agent.velocity = 0.0
-                output_agents[str(new_id)] = new_agent
-                updated_prev_agents.append(new_prev)
-
-        # For all previously tracked agents that were not matched this frame,
-        # update their time-since-seen and only keep them if they have not timed out.
-        for prev in self.prev_agents:
-            if prev.last_id not in matched_ids:
-                prev.update_time(dt)
-                if prev.time_since_seen < self.drop_agent_t:
-                    updated_prev_agents.append(prev)
-
-        # Save the updated list of tracked agents.
-        self.prev_agents = updated_prev_agents
-        # update current_agents to include only those seen in the current frame.
-        self.current_agents = list(output_agents.values())
-        return output_agents
-
-    def __convert_to_start_frame(self):
-        """Converts a list of AgentState agents from ouster Lidar frame of 
-        reference (which is in reference to the current frame) to start 
-        frame frame of reference
-        """
-        # you can ignore this function akul
-        pass
-        
-    def __agents_overlap(self, ped1, ped2) -> bool:
-        """
-        Determines if two AgentState objects overlap based on their pose and dimensions.
-
-        Assumes each AgentState has:
-            - pose with attributes x and y.
-            - dimensions with attributes width and height.
-
-        Returns:
-            bool: True if they overlap; False otherwise.
-        """
-        # Get first agent's properties.
-        x1, y1 = ped1.pose.x, ped1.pose.y
-        w1, h1 = ped1.dimensions.width, ped1.dimensions.height
-
-        # Get second agent's properties.
-        x2, y2 = ped2.pose.x, ped2.pose.y
-        w2, h2 = ped2.dimensions.width, ped2.dimensions.height
-
-        # Compute bounding boxes (assuming (x, y) is the center).
-        left1, right1 = x1 - w1 / 2, x1 + w1 / 2
-        top1, bottom1 = y1 - h1 / 2, y1 + h1 / 2
-
-        left2, right2 = x2 - w2 / 2, x2 + w2 / 2
-        top2, bottom2 = y2 - h2 / 2, y2 + h2 / 2
-
-        # Check for overlap between the bounding boxes.
-        overlap = not (right1 < left2 or left1 > right2 or bottom1 < top2 or top1 > bottom2)
-        return overlap
-
-    def __calculate_velocity(self, old_state: AgentState, new_state: AgentState, dt: float) -> float:
-        """
-        Calculates the velocity based on the change in pose over time.
-
-        Parameters:
-            old_state (AgentState): The previous state.
-            new_state (AgentState): The current state.
-            dt (float): Time difference between the two states.
-
-        Returns:
-            float: The computed velocity.
-        """
-        dx = new_state.pose.x - old_state.pose.x
-        dy = new_state.pose.y - old_state.pose.y
-        return math.sqrt(dx * dx + dy * dy) / dt if dt > 0 else 0.0
diff --git a/GEMstack/onboard/perception/IdTracker.py b/GEMstack/onboard/perception/IdTracker.py
index 1c9c6ecbd..15641fa00 100644
--- a/GEMstack/onboard/perception/IdTracker.py
+++ b/GEMstack/onboard/perception/IdTracker.py
@@ -2,11 +2,11 @@ class IdTracker():
     """Abstracts out id tracking
     """
     def __init__(self):
-        self.__ped_id = 0
+        self.__id = 0
 
-    def get_new_ped_id(self) -> int:
-        """Returns a unique pedestrian id
+    def get_new_id(self) -> int:
+        """Returns a unique agent id
         """
         assigned_id = self.__id
-        self.__ped_id += 1 # id will intentionally overflow to get back to 0
+        self.__id += 1 # id will intentionally overflow to get back to 0
         return assigned_id
diff --git a/GEMstack/onboard/perception/PrevAgent.py b/GEMstack/onboard/perception/PrevAgent.py
deleted file mode 100644
index 210cea556..000000000
--- a/GEMstack/onboard/perception/PrevAgent.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# GEM imports:
-from ...state import AgentState
-
-class PrevAgent():
-    def __init__(self, last_id: int, last_state: AgentState):
-        self.time_since_seen: float = 0.0 # Time since the agent was last seen in seconds
-        self.last_id: int = last_id
-        self.last_state: AgentState = last_state
-
-    def update_time(time: float):
-        """Updates the time since the agent was last seen
-        """
-        if time <= 0:
-            # TODO: log error here
-            print("UPDATE TIME FOR LOST AGENT WAS LESS THAN OR EQUAL TO 0")
-        else:
-            self.time_since_seen += time
\ No newline at end of file
diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index 519d85505..e5832601d 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -33,6 +33,7 @@
 import os
 from typing import List, Dict
 from collections import defaultdict
+from datetime import datetime
 # ROS, CV
 import rospy
 import message_filters
@@ -48,7 +49,7 @@
 from .pedestrian_detection_utils import *
 from ..interface.gem import GEMInterface
 from ..component import Component
-from .AgentTracker import AgentTracker
+from .IdTracker import IdTracker
 
 
 def box_to_fake_agent(box):
@@ -110,6 +111,10 @@ def __init__(self, vehicle_interface : GEMInterface) -> None:
         self.tf_listener = tf.TransformListener()
 
         if self.debug: self.init_debug()
+
+        self.prev_time = None # Time in seconds since last scan for basic velocity calculation
+        self.current_time = None # Time in seconds of current scan for basic velocity calculation
+        self.id_tracker = IdTracker()
     
     def init_debug(self,) -> None:
          # Debug Publishers
@@ -144,7 +149,7 @@ def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]:
     #            Work towards own tracking class instead of simple YOLO track?
     #            Fix division by time
     # ret: Dict[track_id: vel[x, y, z]]
-    def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict[int, np.ndarray]:
+    def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]) -> Dict[int, np.ndarray]:
         # Object not seen -> velocity = None
         track_id_center_map = dict(zip(track_ids, obj_centers))
         vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods.
@@ -159,8 +164,7 @@ def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict
                 if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3:
                     vels[prev_track_id] = (track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) / (self.curr_time - self.prev_time)
         return vels
-
-
+    
     # TODO: Separate debug/viz class, bbox and 2d 3d points funcs 
     def viz_object_states(self, cv_image, boxes, extracted_pts_all):
         # Extract 3D pedestrians points in lidar frame
@@ -221,7 +225,7 @@ def viz_object_states(self, cv_image, boxes, extracted_pts_all):
         
 
     def update_object_states(self, track_result: List[Results], extracted_pts_all: List[np.ndarray]) -> None:
-        self.prev_agents = self.current_agents.copy()
+        # self.prev_agents = self.current_agents.copy()
         self.current_agents.clear()
 
         # Return if no track results available
@@ -263,27 +267,124 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
 
         # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE
         # Then compare previous and current agents with the same id to calculate velocity
-        obj_vels = self.find_vels(track_ids, obj_centers)
-
-        # Update Current AgentStates
-        for ind in range(num_objs):
-            obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind]
-            obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind]
-            self.current_agents[track_ids[ind]] = (
-                AgentState(
-                    track_id = track_ids[ind],
-                    pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
-                    # (l, w, h)
-                    # TODO: confirm (z -> l, x -> w, y -> h)
-                    dimensions=(obj_dim[0], obj_dim[1], obj_dim[2]),  
-                    outline=None,
-                    type=AgentEnum.PEDESTRIAN,
-                    activity=AgentActivityEnum.MOVING,
-                    velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(obj_vels[track_ids[ind]]),
-                    yaw_rate=0
-                ))
+        self.find_vels_and_ids(obj_centers, obj_dims)
+        # obj_vels = self.find_vels(track_ids, obj_centers)
+
+        # # Update Current AgentStates
+        # for ind in range(num_objs):
+        #     obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind]
+        #     obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind]
+        #     self.current_agents[track_ids[ind]] = (
+        #         AgentState(
+        #             track_id = track_ids[ind],
+        #             pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
+        #             # (l, w, h)
+        #             # TODO: confirm (z -> l, x -> w, y -> h)
+        #             dimensions=(obj_dim[0], obj_dim[1], obj_dim[2]),  
+        #             outline=None,
+        #             type=AgentEnum.PEDESTRIAN,
+        #             activity=AgentActivityEnum.MOVING,
+        #             velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(obj_vels[track_ids[ind]]),
+        #             yaw_rate=0
+        #         ))
+            
+    def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]):
+        new_prev_agents = {} # Stores current agents in START frame for next time through (since 
+                             # planning wants us to send them agents in CURRENT frame)
+        # Object not seen -> velocity = None
+        vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods.
+
+        # THIS ASSUMES EVERYTHING IS IN THE SAME FRAME WHICH WOULD WORK FOR STATIONARY CAR.
+        # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE
+        assigned = False
+        num_pairings = len(obj_centers)
+        converted_centers = obj_centers # TODO: REPLACE WITH THIS: self.convert_vehicle_frame_to_start_frame(obj_centers)
+
+        # Loop through the indexes of the obj_center and obj_dim pairings
+        for idx in range(num_pairings):
+            assigned = False
+
+            # Loop through previous agents backwards
+            for prev_id, prev_state in reversed(self.prev_agents.items()):
+                # If an obj_center and obj_dim pair overlaps with a previous agent, assume that they're the same agent
+                if self.agents_overlap(converted_centers[idx], obj_dims[idx], prev_state):
+                    assigned = True
+
+                    if self.prev_time == None:
+                        # This shouldn't ever be triggered
+                        vel = 0
+                    else:
+                        delta_t = self.curr_time - self.prev_time
+                        vel = (obj_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds()
+                        print("VELOCITY:")
+                        print(vel)
+
+                    self.current_agents[prev_id] = (
+                        AgentState(
+                            track_id = prev_id,
+                            pose=ObjectPose(t=0, x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
+                            # (l, w, h)
+                            # TODO: confirm (z -> l, x -> w, y -> h)
+                            dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]),  
+                            outline=None,
+                            type=AgentEnum.PEDESTRIAN,
+                            activity=AgentActivityEnum.MOVING,
+                            velocity= None if vel.size == 0 else tuple(vel),
+                            yaw_rate=0
+                        ))
+                    del self.prev_agents[prev_id] # Remove previous agent from previous agents
+                    break
+
+            # If not assigned:
+            if not assigned:
+                # Set velocity to 0 and assign the new agent a new id with IdTracker
+                id = self.id_tracker.get_new_id()
+                self.current_agents[id] = (
+                    AgentState(
+                        track_id = id,
+                        pose=ObjectPose(t=0, x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
+                        # (l, w, h)
+                        # TODO: confirm (z -> l, x -> w, y -> h)
+                        dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]),  
+                        outline=None,
+                        type=AgentEnum.PEDESTRIAN,
+                        activity=AgentActivityEnum.MOVING,
+                        velocity= None,
+                        yaw_rate=0
+                    ))
+        self.prev_agents = new_prev_agents
+
+    # Calculates whether 2 agents overlap in START frame. True if they do, false if not
+    def agents_overlap(obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: AgentState) -> bool:
+        # Calculate corners of obj_center and obj_dim pairing
+        x1_min, x1_max = obj_center[0] - obj_dim[0] / 2.0, obj_center[0] + obj_dim[0] / 2.0
+        y1_min, y1_max = obj_center[1] - obj_dim[1] / 2.0, obj_center[1] + obj_dim[1] / 2.0 # CENTER CALCULATION
+        z1_min, z1_max = obj_center[2] - obj_dim[2] / 2.0, obj_center[2] + obj_dim[2] / 2.0
+
+        # Calculate corners of AgentState
+        # Beware: AgentState(PhysicalObject) builds bbox from 
+        # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not
+        # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2]
+        # TODO: confirm (z -> l, x -> w, y -> h)
+        x2_min, x2_max = prev_agent.pose.x - prev_agent.dimensions.width / 2.0, prev_agent.pose.x + prev_agent.dimensions.width / 2.0
+        y2_min, y2_max = prev_agent.pose.y, prev_agent.pose.y + prev_agent.dimensions.height # AGENT STATE CALCULATION
+        z2_min, z2_max = prev_agent.pose.z - prev_agent.dimensions.length / 2.0, prev_agent.pose.z + prev_agent.dimensions.length / 2.0
+        
+        # True if they overlap, false if not
+        return (
+            ( (x1_min <= x2_min and x2_min <= x1_max) or (x2_min <= x1_min and x1_min <= x2_max) ) and
+            ( (y1_min <= y2_min and y2_min <= y1_max) or (y2_min <= y1_min and y1_min <= y2_max) ) and
+            ( (z1_min <= z2_min and z2_min <= z1_max) or (z2_min <= z1_min and z1_min <= z2_max) )
+        )
+
+    def convert_vehicle_frame_to_start_frame(obj_centers: List[np.ndarray]) -> List[np.ndarray]:
+        pass
 
     def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
+        # Update times for basic velocity calculation
+        self.prev_time = self.current_time
+        self.current_time = datetime.now()
+
         # Convert to cv2 image and run detector
         cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") 
         track_result = self.detector.track(source=cv_image, classes=self.classes_to_detect, persist=True, conf=self.confidence)

From 2aa0ba6ef0dc690ac3e74439b2bb1bbb168e75c5 Mon Sep 17 00:00:00 2001
From: LucasEby <lucaseby@outlook.com>
Date: Tue, 18 Feb 2025 16:25:47 -0500
Subject: [PATCH 35/46] Fixed find_vels_and_ids logic bugs, AgentState bugs,
 timing bugs, etc

---
 .../perception/pedestrian_detection.py        | 66 ++++++++++++++-----
 1 file changed, 51 insertions(+), 15 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index e5832601d..c515ffe07 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -82,8 +82,6 @@ def __init__(self, vehicle_interface : GEMInterface) -> None:
         self.prev_agents = dict()         
         self.current_agents = dict()
         # TODO Implement time
-        self.prev_time = 0
-        self.curr_time = 1 # Avoid divide by 0 for placebolder, 0
         self.confidence = 0.7
         self.classes_to_detect = 0
         self.ground_threshold = -2.0
@@ -113,7 +111,7 @@ def __init__(self, vehicle_interface : GEMInterface) -> None:
         if self.debug: self.init_debug()
 
         self.prev_time = None # Time in seconds since last scan for basic velocity calculation
-        self.current_time = None # Time in seconds of current scan for basic velocity calculation
+        self.curr_time = None # Time in seconds of current scan for basic velocity calculation
         self.id_tracker = IdTracker()
     
     def init_debug(self,) -> None:
@@ -267,6 +265,9 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
 
         # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE
         # Then compare previous and current agents with the same id to calculate velocity
+        # for idx in range(len(obj_centers)):
+        #     print(obj_centers[idx])
+        #     print(obj_dims[idx])
         self.find_vels_and_ids(obj_centers, obj_dims)
         # obj_vels = self.find_vels(track_ids, obj_centers)
 
@@ -289,6 +290,12 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
         #         ))
             
     def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]):
+        # Gate to check whether dims and centers are empty:
+        if ((len(obj_dims) == 1 or len(obj_centers) == 1) and (obj_centers[0].size == 0 or obj_dims[0].size == 0)):
+            self.prev_agents = self.current_agents.copy()
+            self.current_agents = {} # There weren't any pedestrians detected
+            return
+        
         new_prev_agents = {} # Stores current agents in START frame for next time through (since 
                              # planning wants us to send them agents in CURRENT frame)
         # Object not seen -> velocity = None
@@ -298,6 +305,9 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
         # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE
         assigned = False
         num_pairings = len(obj_centers)
+        if num_pairings != len(obj_dims):
+            for i in range(25):
+                print("ERROR NUM PARINGS AND OBJ DIMS ARENT THE SAME")
         converted_centers = obj_centers # TODO: REPLACE WITH THIS: self.convert_vehicle_frame_to_start_frame(obj_centers)
 
         # Loop through the indexes of the obj_center and obj_dim pairings
@@ -311,11 +321,11 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
                     assigned = True
 
                     if self.prev_time == None:
-                        # This shouldn't ever be triggered
-                        vel = 0
+                        # This will be triggered if the very first message has pedestrians in it
+                        vel = 0.0
                     else:
                         delta_t = self.curr_time - self.prev_time
-                        vel = (obj_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds()
+                        vel = (converted_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds()
                         print("VELOCITY:")
                         print(vel)
 
@@ -329,7 +339,20 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
                             outline=None,
                             type=AgentEnum.PEDESTRIAN,
                             activity=AgentActivityEnum.MOVING,
-                            velocity= None if vel.size == 0 else tuple(vel),
+                            velocity=None if vel.size == 0 else tuple(vel),
+                            yaw_rate=0
+                        ))
+                    new_prev_agents[prev_id] = (
+                        AgentState(
+                            track_id = prev_id,
+                            pose=ObjectPose(t=0, x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
+                            # (l, w, h)
+                            # TODO: confirm (z -> l, x -> w, y -> h)
+                            dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]),  
+                            outline=None,
+                            type=AgentEnum.PEDESTRIAN,
+                            activity=AgentActivityEnum.MOVING,
+                            velocity=None if vel.size == 0 else tuple(vel),
                             yaw_rate=0
                         ))
                     del self.prev_agents[prev_id] # Remove previous agent from previous agents
@@ -349,13 +372,26 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
                         outline=None,
                         type=AgentEnum.PEDESTRIAN,
                         activity=AgentActivityEnum.MOVING,
-                        velocity= None,
+                        velocity=None,
+                        yaw_rate=0
+                    ))
+                new_prev_agents[id] = (
+                    AgentState(
+                        track_id = id,
+                        pose=ObjectPose(t=0, x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
+                        # (l, w, h)
+                        # TODO: confirm (z -> l, x -> w, y -> h)
+                        dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]),  
+                        outline=None,
+                        type=AgentEnum.PEDESTRIAN,
+                        activity=AgentActivityEnum.MOVING,
+                        velocity=None,
                         yaw_rate=0
                     ))
         self.prev_agents = new_prev_agents
 
     # Calculates whether 2 agents overlap in START frame. True if they do, false if not
-    def agents_overlap(obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: AgentState) -> bool:
+    def agents_overlap(self, obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: AgentState) -> bool:
         # Calculate corners of obj_center and obj_dim pairing
         x1_min, x1_max = obj_center[0] - obj_dim[0] / 2.0, obj_center[0] + obj_dim[0] / 2.0
         y1_min, y1_max = obj_center[1] - obj_dim[1] / 2.0, obj_center[1] + obj_dim[1] / 2.0 # CENTER CALCULATION
@@ -366,9 +402,9 @@ def agents_overlap(obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: Agen
         # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not
         # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2]
         # TODO: confirm (z -> l, x -> w, y -> h)
-        x2_min, x2_max = prev_agent.pose.x - prev_agent.dimensions.width / 2.0, prev_agent.pose.x + prev_agent.dimensions.width / 2.0
-        y2_min, y2_max = prev_agent.pose.y, prev_agent.pose.y + prev_agent.dimensions.height # AGENT STATE CALCULATION
-        z2_min, z2_max = prev_agent.pose.z - prev_agent.dimensions.length / 2.0, prev_agent.pose.z + prev_agent.dimensions.length / 2.0
+        x2_min, x2_max = prev_agent.pose.x - prev_agent.dimensions[0] / 2.0, prev_agent.pose.x + prev_agent.dimensions[0] / 2.0
+        y2_min, y2_max = prev_agent.pose.y, prev_agent.pose.y + prev_agent.dimensions[1] # AGENT STATE CALCULATION
+        z2_min, z2_max = prev_agent.pose.z - prev_agent.dimensions[2] / 2.0, prev_agent.pose.z + prev_agent.dimensions[2] / 2.0
         
         # True if they overlap, false if not
         return (
@@ -377,13 +413,13 @@ def agents_overlap(obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: Agen
             ( (z1_min <= z2_min and z2_min <= z1_max) or (z2_min <= z1_min and z1_min <= z2_max) )
         )
 
-    def convert_vehicle_frame_to_start_frame(obj_centers: List[np.ndarray]) -> List[np.ndarray]:
+    def convert_vehicle_frame_to_start_frame(self, obj_centers: List[np.ndarray]) -> List[np.ndarray]:
         pass
 
     def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
         # Update times for basic velocity calculation
-        self.prev_time = self.current_time
-        self.current_time = datetime.now()
+        self.prev_time = self.curr_time
+        self.curr_time = datetime.now()
 
         # Convert to cv2 image and run detector
         cv_image = self.bridge.imgmsg_to_cv2(rgb_image_msg, "bgr8") 

From f080070f15fa62abfec8f4f22f6949331ef7421b Mon Sep 17 00:00:00 2001
From: LucasEby <lucaseby@outlook.com>
Date: Tue, 18 Feb 2025 17:55:44 -0500
Subject: [PATCH 36/46] I think I updated a comment

---
 .../onboard/perception/pedestrian_detection.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index c515ffe07..f78b0c686 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -86,7 +86,7 @@ def __init__(self, vehicle_interface : GEMInterface) -> None:
         self.classes_to_detect = 0
         self.ground_threshold = -2.0
         self.max_human_depth = 0.9
-        self.vehicle_frame = False # Indicate whether pedestrians centroids and point clouds are in the vehicle frame
+        self.vehicle_frame = True # Indicate whether pedestrians centroids and point clouds are in the vehicle frame
 
         # Load calibration data
         # TODO: Maybe lets add one word or link what R t K are?
@@ -290,11 +290,17 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
         #         ))
             
     def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]):
-        # Gate to check whether dims and centers are empty:
+        # Gate to check whether dims and centers are empty (will happen if no pedestrians are scanned):
         if ((len(obj_dims) == 1 or len(obj_centers) == 1) and (obj_centers[0].size == 0 or obj_dims[0].size == 0)):
             self.prev_agents = self.current_agents.copy()
             self.current_agents = {} # There weren't any pedestrians detected
             return
+        elif (len(obj_centers) != len(obj_dims)):
+            for i in range(25):
+                print("ERROR NUM PARINGS AND OBJ DIMS ARENT THE SAME")
+            self.prev_agents = self.current_agents.copy()
+            self.current_agents = {} # There weren't any pedestrians detected
+            return
         
         new_prev_agents = {} # Stores current agents in START frame for next time through (since 
                              # planning wants us to send them agents in CURRENT frame)
@@ -305,9 +311,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
         # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE
         assigned = False
         num_pairings = len(obj_centers)
-        if num_pairings != len(obj_dims):
-            for i in range(25):
-                print("ERROR NUM PARINGS AND OBJ DIMS ARENT THE SAME")
+
         converted_centers = obj_centers # TODO: REPLACE WITH THIS: self.convert_vehicle_frame_to_start_frame(obj_centers)
 
         # Loop through the indexes of the obj_center and obj_dim pairings
@@ -326,8 +330,8 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
                     else:
                         delta_t = self.curr_time - self.prev_time
                         vel = (converted_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds()
-                        print("VELOCITY:")
-                        print(vel)
+                    print("VELOCITY:")
+                    print(vel)
 
                     self.current_agents[prev_id] = (
                         AgentState(

From db6d689ee54ea33dd1f29a6ebde756aaeb55fdd2 Mon Sep 17 00:00:00 2001
From: LucasEby <lucaseby@outlook.com>
Date: Tue, 18 Feb 2025 18:01:53 -0500
Subject: [PATCH 37/46] Cleaned up code a bit. Will add Vikram's suggestions in
 Slack next

---
 .../perception/pedestrian_detection.py        | 34 +++----------------
 1 file changed, 5 insertions(+), 29 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index f78b0c686..bf4b10e12 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -242,7 +242,6 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
         if len(pedestrians_3d_pts) != num_objs:
             raise Exception('Perception - Camera detections, points clusters num. mismatch')
         
-        # TODO: CONVERT FROM LIDAR FRAME TO VEHICLE FRAME HERE (vehicle frame is center of rear axle at vehicle's current location)
         # If in vehicle frame, transform centers from top_lidar frame to vehicle frame
         # Need to transform the center point one by one since matrix op can't deal with empty points
         if self.vehicle_frame:
@@ -260,35 +259,15 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
         #       or at least {track_ids: centers/pts/etc}
         # TODO: Combine funcs for efficiency in C.
         #       Separate numpy prob still faster for now
-        obj_centers = self.find_centers(pedestrians_3d_pts)
+        obj_centers = self.find_centers(pedestrians_3d_pts) # Centers are calculated in current vehicle frame here (center of rear axle) 
         obj_dims = self.find_dims(pedestrians_3d_pts)
 
         # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE
-        # Then compare previous and current agents with the same id to calculate velocity
-        # for idx in range(len(obj_centers)):
-        #     print(obj_centers[idx])
-        #     print(obj_dims[idx])
         self.find_vels_and_ids(obj_centers, obj_dims)
-        # obj_vels = self.find_vels(track_ids, obj_centers)
-
-        # # Update Current AgentStates
-        # for ind in range(num_objs):
-        #     obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind]
-        #     obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind]
-        #     self.current_agents[track_ids[ind]] = (
-        #         AgentState(
-        #             track_id = track_ids[ind],
-        #             pose=ObjectPose(t=0, x=obj_center[0], y=obj_center[1], z=obj_center[2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
-        #             # (l, w, h)
-        #             # TODO: confirm (z -> l, x -> w, y -> h)
-        #             dimensions=(obj_dim[0], obj_dim[1], obj_dim[2]),  
-        #             outline=None,
-        #             type=AgentEnum.PEDESTRIAN,
-        #             activity=AgentActivityEnum.MOVING,
-        #             velocity= None if obj_vels[track_ids[ind]].size == 0 else tuple(obj_vels[track_ids[ind]]),
-        #             yaw_rate=0
-        #         ))
             
+    # TODO: Refactor to make more efficient
+    # TODO: Moving Average across last N iterations pos/vel? Less spurious vals
+    # TODO Fix velocity calculation to calculate in ObjectFrameEnum.START
     def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]):
         # Gate to check whether dims and centers are empty (will happen if no pedestrians are scanned):
         if ((len(obj_dims) == 1 or len(obj_centers) == 1) and (obj_centers[0].size == 0 or obj_dims[0].size == 0)):
@@ -304,14 +283,11 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
         
         new_prev_agents = {} # Stores current agents in START frame for next time through (since 
                              # planning wants us to send them agents in CURRENT frame)
-        # Object not seen -> velocity = None
-        vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods.
 
-        # THIS ASSUMES EVERYTHING IS IN THE SAME FRAME WHICH WOULD WORK FOR STATIONARY CAR.
-        # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE
         assigned = False
         num_pairings = len(obj_centers)
 
+        # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE
         converted_centers = obj_centers # TODO: REPLACE WITH THIS: self.convert_vehicle_frame_to_start_frame(obj_centers)
 
         # Loop through the indexes of the obj_center and obj_dim pairings

From bf80bbeb034e8d90c22937d0c416833ceaec6721 Mon Sep 17 00:00:00 2001
From: LucasEby <lucaseby@outlook.com>
Date: Tue, 18 Feb 2025 18:33:13 -0500
Subject: [PATCH 38/46] Accidentally moved a few lines of code. Fixed the error
 that resulted

---
 .../onboard/perception/pedestrian_detection.py     | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index bf4b10e12..9e7815c93 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -242,6 +242,13 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
         if len(pedestrians_3d_pts) != num_objs:
             raise Exception('Perception - Camera detections, points clusters num. mismatch')
         
+        # TODO: Slower but cleaner to pass dicts of AgentState
+        #       or at least {track_ids: centers/pts/etc}
+        # TODO: Combine funcs for efficiency in C.
+        #       Separate numpy prob still faster for now
+        obj_centers = self.find_centers(pedestrians_3d_pts) # Centers are calculated in lidar frame here
+        obj_dims = self.find_dims(pedestrians_3d_pts)
+
         # If in vehicle frame, transform centers from top_lidar frame to vehicle frame
         # Need to transform the center point one by one since matrix op can't deal with empty points
         if self.vehicle_frame:
@@ -254,13 +261,6 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
                 else:
                     obj_centers_vehicle.append(np.array(()))
             obj_centers = obj_centers_vehicle
-        
-        # TODO: Slower but cleaner to pass dicts of AgentState
-        #       or at least {track_ids: centers/pts/etc}
-        # TODO: Combine funcs for efficiency in C.
-        #       Separate numpy prob still faster for now
-        obj_centers = self.find_centers(pedestrians_3d_pts) # Centers are calculated in current vehicle frame here (center of rear axle) 
-        obj_dims = self.find_dims(pedestrians_3d_pts)
 
         # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE
         self.find_vels_and_ids(obj_centers, obj_dims)

From 6addddb87a9959c71e9c8821cb739f0be8625a7e Mon Sep 17 00:00:00 2001
From: LucasEby <lucaseby@outlook.com>
Date: Tue, 18 Feb 2025 19:17:49 -0500
Subject: [PATCH 39/46] In the process of converting to start frame. Pushing
 current WORKING code for Vikram

---
 .../perception/pedestrian_detection.py        | 22 +++++++++++--------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index 9e7815c93..2c24f3bef 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -45,13 +45,14 @@
 from ultralytics import YOLO
 from ultralytics.engine.results import Results, Boxes
 # GEMStack
-from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum
+from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum,ObjectFrameEnum
 from .pedestrian_detection_utils import *
 from ..interface.gem import GEMInterface
 from ..component import Component
 from .IdTracker import IdTracker
 
 
+
 def box_to_fake_agent(box):
     """Creates a fake agent state from an (x,y,w,h) bounding box.
     
@@ -262,7 +263,6 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
                     obj_centers_vehicle.append(np.array(()))
             obj_centers = obj_centers_vehicle
 
-        # TODO: CONVERT FROM VEHICLE FRAME TO START FRAME HERE
         self.find_vels_and_ids(obj_centers, obj_dims)
             
     # TODO: Refactor to make more efficient
@@ -288,7 +288,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
         num_pairings = len(obj_centers)
 
         # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE
-        converted_centers = obj_centers # TODO: REPLACE WITH THIS: self.convert_vehicle_frame_to_start_frame(obj_centers)
+        converted_centers = obj_centers # TODO: Add this in when finished self.convert_vehicle_frame_to_start_frame(obj_centers)
 
         # Loop through the indexes of the obj_center and obj_dim pairings
         for idx in range(num_pairings):
@@ -302,7 +302,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
 
                     if self.prev_time == None:
                         # This will be triggered if the very first message has pedestrians in it
-                        vel = 0.0
+                        vel = np.array([0, 0, 0])
                     else:
                         delta_t = self.curr_time - self.prev_time
                         vel = (converted_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds()
@@ -318,7 +318,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
                             dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]),  
                             outline=None,
                             type=AgentEnum.PEDESTRIAN,
-                            activity=AgentActivityEnum.MOVING,
+                            activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING,
                             velocity=None if vel.size == 0 else tuple(vel),
                             yaw_rate=0
                         ))
@@ -331,7 +331,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
                             dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]),  
                             outline=None,
                             type=AgentEnum.PEDESTRIAN,
-                            activity=AgentActivityEnum.MOVING,
+                            activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING,
                             velocity=None if vel.size == 0 else tuple(vel),
                             yaw_rate=0
                         ))
@@ -351,7 +351,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
                         dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]),  
                         outline=None,
                         type=AgentEnum.PEDESTRIAN,
-                        activity=AgentActivityEnum.MOVING,
+                        activity=AgentActivityEnum.UNDETERMINED,
                         velocity=None,
                         yaw_rate=0
                     ))
@@ -364,7 +364,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
                         dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]),  
                         outline=None,
                         type=AgentEnum.PEDESTRIAN,
-                        activity=AgentActivityEnum.MOVING,
+                        activity=AgentActivityEnum.UNDETERMINED,
                         velocity=None,
                         yaw_rate=0
                     ))
@@ -393,9 +393,13 @@ def agents_overlap(self, obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent
             ( (z1_min <= z2_min and z2_min <= z1_max) or (z2_min <= z1_min and z1_min <= z2_max) )
         )
 
-    def convert_vehicle_frame_to_start_frame(self, obj_centers: List[np.ndarray]) -> List[np.ndarray]:
+    def convert_vehicle_frame_to_start_frame(self, vehicle_states: List[AgentState]) -> List[AgentState]:
+        # num_states = len(vehicle_states)
+        # for idx in num_states:
+        #     num_states[idx].to_frame()
         pass
 
+
     def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
         # Update times for basic velocity calculation
         self.prev_time = self.curr_time

From a18d1876b154329aedfd71e4487cef4da8a141a5 Mon Sep 17 00:00:00 2001
From: LucasEby <lucaseby@outlook.com>
Date: Tue, 18 Feb 2025 21:50:44 -0500
Subject: [PATCH 40/46] Took absolutely forever to find the secret sauce.
 Please excuse the mess

---
 .../perception/pedestrian_detection.py        | 80 +++++++++++++------
 1 file changed, 54 insertions(+), 26 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index 2c24f3bef..381075c9b 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -114,6 +114,10 @@ def __init__(self, vehicle_interface : GEMInterface) -> None:
         self.prev_time = None # Time in seconds since last scan for basic velocity calculation
         self.curr_time = None # Time in seconds of current scan for basic velocity calculation
         self.id_tracker = IdTracker()
+        
+        # Update function variables
+        self.t_start = None # datetime.now() # Estimated start frame time
+        self.start_pose_abs = None
     
     def init_debug(self,) -> None:
          # Debug Publishers
@@ -123,6 +127,10 @@ def init_debug(self,) -> None:
         self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
 
     def update(self, vehicle : VehicleState) -> Dict[str,AgentState]:
+        if self.t_start is None:
+            self.t_start = self.vehicle_interface.time()
+        if self.start_pose_abs is None:
+            self.start_pose_abs = vehicle.pose
         return self.current_agents
 
     # TODO: Improve Algo Knn, ransac, etc.
@@ -148,21 +156,21 @@ def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]:
     #            Work towards own tracking class instead of simple YOLO track?
     #            Fix division by time
     # ret: Dict[track_id: vel[x, y, z]]
-    def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]) -> Dict[int, np.ndarray]:
-        # Object not seen -> velocity = None
-        track_id_center_map = dict(zip(track_ids, obj_centers))
-        vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods.
-
-        for prev_track_id, prev_agent in self.prev_agents.items():
-            if prev_track_id in track_ids:
-                # TODO: Add prev_agents to memory to avoid None velocity
-                # We should only be missing prev pose on first sight of track_id Agent.
-                # print("shape 1: ", track_id_center_map[prev_agent.track_id])
-                # print("shape 2: ", np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z]))
-                # prev can be 3 separate Nones, current is just empty array... make this symmetrical
-                if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3:
-                    vels[prev_track_id] = (track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) / (self.curr_time - self.prev_time)
-        return vels
+    # def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]) -> Dict[int, np.ndarray]:
+    #     # Object not seen -> velocity = None
+    #     track_id_center_map = dict(zip(track_ids, obj_centers))
+    #     vels = defaultdict(lambda: np.array(())) # None is faster, np.array matches other find_ methods.
+
+    #     for prev_track_id, prev_agent in self.prev_agents.items():
+    #         if prev_track_id in track_ids:
+    #             # TODO: Add prev_agents to memory to avoid None velocity
+    #             # We should only be missing prev pose on first sight of track_id Agent.
+    #             # print("shape 1: ", track_id_center_map[prev_agent.track_id])
+    #             # print("shape 2: ", np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z]))
+    #             # prev can be 3 separate Nones, current is just empty array... make this symmetrical
+    #             if prev_agent.pose.x and prev_agent.pose.y and prev_agent.pose.z and track_id_center_map[prev_agent.track_id].shape == 3:
+    #                 vels[prev_track_id] = (track_id_center_map[prev_track_id] - np.array([prev_agent.pose.x, prev_agent.pose.y, prev_agent.pose.z])) / (self.curr_time - self.prev_time)
+    #     return vels
     
     # TODO: Separate debug/viz class, bbox and 2d 3d points funcs 
     def viz_object_states(self, cv_image, boxes, extracted_pts_all):
@@ -287,7 +295,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
         assigned = False
         num_pairings = len(obj_centers)
 
-        # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE
+        # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE        
         converted_centers = obj_centers # TODO: Add this in when finished self.convert_vehicle_frame_to_start_frame(obj_centers)
 
         # Loop through the indexes of the obj_center and obj_dim pairings
@@ -312,7 +320,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
                     self.current_agents[prev_id] = (
                         AgentState(
                             track_id = prev_id,
-                            pose=ObjectPose(t=0, x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
+                            pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
                             # (l, w, h)
                             # TODO: confirm (z -> l, x -> w, y -> h)
                             dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]),  
@@ -325,7 +333,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
                     new_prev_agents[prev_id] = (
                         AgentState(
                             track_id = prev_id,
-                            pose=ObjectPose(t=0, x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
+                            pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
                             # (l, w, h)
                             # TODO: confirm (z -> l, x -> w, y -> h)
                             dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]),  
@@ -341,11 +349,11 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
             # If not assigned:
             if not assigned:
                 # Set velocity to 0 and assign the new agent a new id with IdTracker
-                id = self.id_tracker.get_new_id()
+                id = "ped" + str(self.id_tracker.get_new_id())
                 self.current_agents[id] = (
                     AgentState(
                         track_id = id,
-                        pose=ObjectPose(t=0, x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
+                        pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
                         # (l, w, h)
                         # TODO: confirm (z -> l, x -> w, y -> h)
                         dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]),  
@@ -358,7 +366,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
                 new_prev_agents[id] = (
                     AgentState(
                         track_id = id,
-                        pose=ObjectPose(t=0, x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
+                        pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
                         # (l, w, h)
                         # TODO: confirm (z -> l, x -> w, y -> h)
                         dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]),  
@@ -394,11 +402,31 @@ def agents_overlap(self, obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent
         )
 
     def convert_vehicle_frame_to_start_frame(self, vehicle_states: List[AgentState]) -> List[AgentState]:
-        # num_states = len(vehicle_states)
-        # for idx in num_states:
-        #     num_states[idx].to_frame()
-        pass
-
+        num_states = len(vehicle_states)
+        for idx in num_states:
+            vehicle_states[idx] = vehicle_states[idx].to_frame(frame=ObjectFrameEnum.START, current_pose=current_obj_pose, start_pose_abs=self.start_pose_abs)
+
+        # current_obj_pose = ObjectPose(
+        #         frame=ObjectFrameEnum.CURRENT, 
+        #         t=(self.curr_time-self.t_start).total_seconds(), 
+        #         x=vehicle_states[idx].pose.x, 
+        #         y=vehicle_states[idx].pose.y, 
+        #         z=vehicle_states[idx].pose.z
+        #     )
+        # vehicle_states[0] = vehicle_states[0].to_frame(frame=ObjectFrameEnum.START, current_pose=current_obj_pose, start_pose_abs=self.start_pose_abs)
+        # # start_obj_pose???
+        # state = AgentState(
+        #                     track_id =0,
+        #                     pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
+        #                     # (l, w, h)
+        #                     # TODO: confirm (z -> l, x -> w, y -> h)
+        #                     dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]),  
+        #                     outline=None,
+        #                     type=AgentEnum.PEDESTRIAN,
+        #                     activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING,
+        #                     velocity=None if vel.size == 0 else tuple(vel),
+        #                     yaw_rate=0
+        #                 )
 
     def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
         # Update times for basic velocity calculation

From 0b5d88b5d5d11880aab18d3155947286fe264468 Mon Sep 17 00:00:00 2001
From: LucasEby <lucaseby@outlook.com>
Date: Tue, 18 Feb 2025 22:30:00 -0500
Subject: [PATCH 41/46] finally think I figured out start frame transform. In
 the process of converting to AgentStates

---
 .../perception/pedestrian_detection.py        | 67 +++++++++++++------
 1 file changed, 47 insertions(+), 20 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index 381075c9b..a4666f691 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -379,17 +379,18 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
         self.prev_agents = new_prev_agents
 
     # Calculates whether 2 agents overlap in START frame. True if they do, false if not
-    def agents_overlap(self, obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent: AgentState) -> bool:
-        # Calculate corners of obj_center and obj_dim pairing
-        x1_min, x1_max = obj_center[0] - obj_dim[0] / 2.0, obj_center[0] + obj_dim[0] / 2.0
-        y1_min, y1_max = obj_center[1] - obj_dim[1] / 2.0, obj_center[1] + obj_dim[1] / 2.0 # CENTER CALCULATION
-        z1_min, z1_max = obj_center[2] - obj_dim[2] / 2.0, obj_center[2] + obj_dim[2] / 2.0
-
+    def agents_overlap(self, curr_agent: AgentState, prev_agent: AgentState) -> bool:
         # Calculate corners of AgentState
         # Beware: AgentState(PhysicalObject) builds bbox from 
         # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not
         # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2]
         # TODO: confirm (z -> l, x -> w, y -> h)
+
+        # Calculate corners of obj_center and obj_dim pairing
+        x1_min, x1_max = curr_agent.pose.x - curr_agent.dimensions[0] / 2.0, curr_agent.pose.x + curr_agent.dimensions[0] / 2.0
+        y1_min, y1_max = curr_agent.pose.y, curr_agent.pose.y + curr_agent.dimensions[1] # AGENT STATE CALCULATION
+        z1_min, z1_max = curr_agent.pose.z - curr_agent.dimensions[2] / 2.0, curr_agent.pose.z + curr_agent.dimensions[2] / 2.0
+
         x2_min, x2_max = prev_agent.pose.x - prev_agent.dimensions[0] / 2.0, prev_agent.pose.x + prev_agent.dimensions[0] / 2.0
         y2_min, y2_max = prev_agent.pose.y, prev_agent.pose.y + prev_agent.dimensions[1] # AGENT STATE CALCULATION
         z2_min, z2_max = prev_agent.pose.z - prev_agent.dimensions[2] / 2.0, prev_agent.pose.z + prev_agent.dimensions[2] / 2.0
@@ -401,20 +402,46 @@ def agents_overlap(self, obj_center: np.ndarray, obj_dim: np.ndarray, prev_agent
             ( (z1_min <= z2_min and z2_min <= z1_max) or (z2_min <= z1_min and z1_min <= z2_max) )
         )
 
-    def convert_vehicle_frame_to_start_frame(self, vehicle_states: List[AgentState]) -> List[AgentState]:
-        num_states = len(vehicle_states)
-        for idx in num_states:
-            vehicle_states[idx] = vehicle_states[idx].to_frame(frame=ObjectFrameEnum.START, current_pose=current_obj_pose, start_pose_abs=self.start_pose_abs)
-
-        # current_obj_pose = ObjectPose(
-        #         frame=ObjectFrameEnum.CURRENT, 
-        #         t=(self.curr_time-self.t_start).total_seconds(), 
-        #         x=vehicle_states[idx].pose.x, 
-        #         y=vehicle_states[idx].pose.y, 
-        #         z=vehicle_states[idx].pose.z
-        #     )
-        # vehicle_states[0] = vehicle_states[0].to_frame(frame=ObjectFrameEnum.START, current_pose=current_obj_pose, start_pose_abs=self.start_pose_abs)
-        # # start_obj_pose???
+    def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]) -> List[AgentState]:
+        # Create list of agent states in current vehicle frame:
+        agents = []
+        num_pairings = len(obj_centers)
+        for idx in range(num_pairings):
+            # Create agent in current frame:
+            state = AgentState(
+                            track_id="TBD", # Temporary
+                            pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
+                            # Beware: AgentState(PhysicalObject) builds bbox from 
+                            # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not
+                            # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2]
+                            # (l, w, h)
+                            # TODO: confirm (z -> l, x -> w, y -> h)
+                            dimensions=(obj_dims[idx][2], obj_dims[idx][0], obj_centers[idx][1] + obj_dims[idx][1]),  
+                            outline=None,
+                            type=AgentEnum.PEDESTRIAN,
+                            activity=AgentActivityEnum.MOVING, # Temporary
+                            velocity=None, # Temporary
+                            yaw_rate=0
+                        )
+            
+            # Convert agent to start frame and add to agents list:
+            agents.append(
+                state.to_frame(
+                    frame=ObjectFrameEnum.START, 
+                    current_pose=ObjectPose(
+                        frame=ObjectFrameEnum.CURRENT, 
+                        t=(self.curr_time-self.t_start).total_seconds(), 
+                        x=state.pose.x, 
+                        y=state.pose.y, 
+                        z=state.pose.z
+                    ), 
+                    start_pose_abs=self.start_pose_abs
+                )
+            )
+
+        # Return the agent states converted to start frame:
+        return agents
+
         # state = AgentState(
         #                     track_id =0,
         #                     pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),

From 96b9885b31805b0068e4374f8f5f7a9a57f4c4fa Mon Sep 17 00:00:00 2001
From: LucasEby <lucaseby@outlook.com>
Date: Tue, 18 Feb 2025 23:13:05 -0500
Subject: [PATCH 42/46] Pushing code so Lukas can try to replicate error
 message

---
 .../perception/pedestrian_detection.py        | 81 +++++++------------
 1 file changed, 28 insertions(+), 53 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index a4666f691..5f1cd0568 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -116,7 +116,7 @@ def __init__(self, vehicle_interface : GEMInterface) -> None:
         self.id_tracker = IdTracker()
         
         # Update function variables
-        self.t_start = None # datetime.now() # Estimated start frame time
+        self.t_start = datetime.now() # Estimated start frame time
         self.start_pose_abs = None
     
     def init_debug(self,) -> None:
@@ -127,8 +127,6 @@ def init_debug(self,) -> None:
         self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
 
     def update(self, vehicle : VehicleState) -> Dict[str,AgentState]:
-        if self.t_start is None:
-            self.t_start = self.vehicle_interface.time()
         if self.start_pose_abs is None:
             self.start_pose_abs = vehicle.pose
         return self.current_agents
@@ -295,8 +293,8 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
         assigned = False
         num_pairings = len(obj_centers)
 
-        # TODO: NEED TO STORE AND INCORPORATE TRANSFORMS SOMEHOW TO DEAL WITH MOVING CAR CASE        
-        converted_centers = obj_centers # TODO: Add this in when finished self.convert_vehicle_frame_to_start_frame(obj_centers)
+        # Create a list of agent states in the start frame
+        agents_sf = self.create_agent_states_in_start_frame(obj_centers=obj_centers, obj_dims=obj_dims) # Create agents in start frame
 
         # Loop through the indexes of the obj_center and obj_dim pairings
         for idx in range(num_pairings):
@@ -305,7 +303,7 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
             # Loop through previous agents backwards
             for prev_id, prev_state in reversed(self.prev_agents.items()):
                 # If an obj_center and obj_dim pair overlaps with a previous agent, assume that they're the same agent
-                if self.agents_overlap(converted_centers[idx], obj_dims[idx], prev_state):
+                if self.agents_overlap(agents_sf[idx], prev_state):
                     assigned = True
 
                     if self.prev_time == None:
@@ -313,10 +311,11 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
                         vel = np.array([0, 0, 0])
                     else:
                         delta_t = self.curr_time - self.prev_time
-                        vel = (converted_centers[idx] - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds()
+                        vel = (np.array([agents_sf[idx].pose.x, agents_sf[idx].pose.y, agents_sf[idx].pose.z]) - np.array([prev_state.pose.x, prev_state.pose.y, prev_state.pose.z])) / delta_t.total_seconds()
                     print("VELOCITY:")
                     print(vel)
 
+                    # Create current frame agent at planning group's request:
                     self.current_agents[prev_id] = (
                         AgentState(
                             track_id = prev_id,
@@ -327,29 +326,25 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
                             outline=None,
                             type=AgentEnum.PEDESTRIAN,
                             activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING,
-                            velocity=None if vel.size == 0 else tuple(vel),
-                            yaw_rate=0
-                        ))
-                    new_prev_agents[prev_id] = (
-                        AgentState(
-                            track_id = prev_id,
-                            pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
-                            # (l, w, h)
-                            # TODO: confirm (z -> l, x -> w, y -> h)
-                            dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]),  
-                            outline=None,
-                            type=AgentEnum.PEDESTRIAN,
-                            activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING,
-                            velocity=None if vel.size == 0 else tuple(vel),
+                            velocity=(0, 0, 0) if vel.size == 0 else tuple(vel),
                             yaw_rate=0
                         ))
+                    
+                    # Fix start frame agent and store in this dict:
+                    agents_sf[idx].track_id = prev_id
+                    agents_sf[idx].activity = AgentActivityEnum.STOPPED if (np.all(vel == 0) or vel.size == 0) else AgentActivityEnum.MOVING
+                    agents_sf[idx].velocity = (0, 0, 0) if vel.size == 0 else tuple(vel)
+
+                    new_prev_agents[prev_id] = agents_sf[idx]
                     del self.prev_agents[prev_id] # Remove previous agent from previous agents
                     break
 
             # If not assigned:
             if not assigned:
                 # Set velocity to 0 and assign the new agent a new id with IdTracker
-                id = "ped" + str(self.id_tracker.get_new_id())
+                id = self.id_tracker.get_new_id()
+                
+                # Create current frame agent at planning group's request:
                 self.current_agents[id] = (
                     AgentState(
                         track_id = id,
@@ -360,22 +355,15 @@ def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.nda
                         outline=None,
                         type=AgentEnum.PEDESTRIAN,
                         activity=AgentActivityEnum.UNDETERMINED,
-                        velocity=None,
-                        yaw_rate=0
-                    ))
-                new_prev_agents[id] = (
-                    AgentState(
-                        track_id = id,
-                        pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=converted_centers[idx][0], y=converted_centers[idx][1], z=converted_centers[idx][2] ,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
-                        # (l, w, h)
-                        # TODO: confirm (z -> l, x -> w, y -> h)
-                        dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]),  
-                        outline=None,
-                        type=AgentEnum.PEDESTRIAN,
-                        activity=AgentActivityEnum.UNDETERMINED,
-                        velocity=None,
+                        velocity=(0, 0, 0),
                         yaw_rate=0
                     ))
+                
+                # Fix start frame agent and store in this dict:
+                agents_sf[idx].track_id = id
+                agents_sf[idx].activity = AgentActivityEnum.UNDETERMINED
+                agents_sf[idx].velocity = (0, 0, 0)
+                new_prev_agents[id] = agents_sf[idx]
         self.prev_agents = new_prev_agents
 
     # Calculates whether 2 agents overlap in START frame. True if they do, false if not
@@ -409,8 +397,8 @@ def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_
         for idx in range(num_pairings):
             # Create agent in current frame:
             state = AgentState(
-                            track_id="TBD", # Temporary
-                            pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
+                            track_id=0, # Temporary
+                            pose=ObjectPose(t=(self.curr_time - self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
                             # Beware: AgentState(PhysicalObject) builds bbox from 
                             # dims [-l/2,l/2] x [-w/2,w/2] x [0,h], not
                             # [-l/2,l/2] x [-w/2,w/2] x [-h/2,h/2]
@@ -419,7 +407,7 @@ def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_
                             dimensions=(obj_dims[idx][2], obj_dims[idx][0], obj_centers[idx][1] + obj_dims[idx][1]),  
                             outline=None,
                             type=AgentEnum.PEDESTRIAN,
-                            activity=AgentActivityEnum.MOVING, # Temporary
+                            activity=AgentActivityEnum.UNDETERMINED, # Temporary
                             velocity=None, # Temporary
                             yaw_rate=0
                         )
@@ -430,7 +418,7 @@ def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_
                     frame=ObjectFrameEnum.START, 
                     current_pose=ObjectPose(
                         frame=ObjectFrameEnum.CURRENT, 
-                        t=(self.curr_time-self.t_start).total_seconds(), 
+                        t=(self.curr_time - self.t_start).total_seconds(), 
                         x=state.pose.x, 
                         y=state.pose.y, 
                         z=state.pose.z
@@ -442,19 +430,6 @@ def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_
         # Return the agent states converted to start frame:
         return agents
 
-        # state = AgentState(
-        #                     track_id =0,
-        #                     pose=ObjectPose(t=(self.curr_time-self.t_start).total_seconds(), x=obj_centers[idx][0], y=obj_centers[idx][1], z=obj_centers[idx][2], yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT),
-        #                     # (l, w, h)
-        #                     # TODO: confirm (z -> l, x -> w, y -> h)
-        #                     dimensions=(obj_dims[idx][0], obj_dims[idx][1], obj_dims[idx][2]),  
-        #                     outline=None,
-        #                     type=AgentEnum.PEDESTRIAN,
-        #                     activity=AgentActivityEnum.STOPPED if np.all(vel == 0) else AgentActivityEnum.MOVING,
-        #                     velocity=None if vel.size == 0 else tuple(vel),
-        #                     yaw_rate=0
-        #                 )
-
     def ouster_oak_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2):
         # Update times for basic velocity calculation
         self.prev_time = self.curr_time

From a936d3d989ae80fc245001df930b2ca927bfea37 Mon Sep 17 00:00:00 2001
From: LucasEby <lucaseby@outlook.com>
Date: Wed, 19 Feb 2025 00:57:40 -0500
Subject: [PATCH 43/46] Fixed logic for gate which tosses out points

---
 .../onboard/perception/pedestrian_detection.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index 9fe5eeaf2..51d90248c 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -274,15 +274,15 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
     # TODO Fix velocity calculation to calculate in ObjectFrameEnum.START
     def find_vels_and_ids(self, obj_centers: List[np.ndarray], obj_dims: List[np.ndarray]):
         # Gate to check whether dims and centers are empty (will happen if no pedestrians are scanned):
-        if ((len(obj_dims) == 1 or len(obj_centers) == 1) and (obj_centers[0].size == 0 or obj_dims[0].size == 0)):
-            self.prev_agents = self.current_agents.copy()
-            self.current_agents = {} # There weren't any pedestrians detected
-            return
-        elif (len(obj_centers) != len(obj_dims)):
-            for i in range(25):
-                print("ERROR NUM PARINGS AND OBJ DIMS ARENT THE SAME")
-            self.prev_agents = self.current_agents.copy()
-            self.current_agents = {} # There weren't any pedestrians detected
+        for idx in range(len(obj_dims) -1, -1, -1):
+            if (obj_centers[idx].size == 0) or (obj_dims[0].size == 0):
+                del obj_centers[idx]
+                del obj_dims[idx]
+
+        # Nothing was scanned, erase current (for current output) and previous list (for next time through because nothing was scanned)
+        if (len(obj_dims) == 0 or len(obj_centers) == 0):
+            self.current_agents = {}
+            self.prev_agents = {}
             return
         
         new_prev_agents = {} # Stores current agents in START frame for next time through (since 

From 98f445f6cb914f95db3bea071a47a7de5b071055 Mon Sep 17 00:00:00 2001
From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com>
Date: Wed, 19 Feb 2025 09:51:04 -0600
Subject: [PATCH 44/46] Update pedestrian_detection.py

---
 GEMstack/onboard/perception/pedestrian_detection.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index 9970c6a29..58b8e7ac5 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -263,6 +263,8 @@ def update_object_states(self, track_result: List[Results], extracted_pts_all: L
         for ind in range(num_objs):
             obj_center = (None, None, None) if obj_centers[ind].size == 0 else obj_centers[ind]
             obj_dim = (None, None, None) if obj_dims[ind].size == 0 else obj_dims[ind]
+
+            # Part 3 - Change creating this to start frame
             self.current_agents[track_ids[ind]] = (
                 AgentState(
                     track_id = track_ids[ind],

From dad7cc1a36c23c0ac178d43846dbb72f99eaf033 Mon Sep 17 00:00:00 2001
From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com>
Date: Wed, 19 Feb 2025 10:02:13 -0600
Subject: [PATCH 45/46] Update pedestrian_detection.py

---
 GEMstack/onboard/perception/pedestrian_detection.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index 58b8e7ac5..1de87c18c 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -140,7 +140,8 @@ def find_dims(self, clusters: List[List[np.ndarray]]) -> List[np.ndarray]:
     # TODO: Moving Average across last N iterations pos/vel? Less spurious vals
     # TODO Akul: Fix velocity calculation to calculate in ObjectFrameEnum.START
     #            Work towards own tracking class instead of simple YOLO track?
-    #            Fix division by time
+    #            Fix 1: division by time
+    #            Fix 2: Put centers and dims in start frame for velocity calc +  final agentstate in update_object_states
     # ret: Dict[track_id: vel[x, y, z]]
     def find_vels(self, track_ids: List[int], obj_centers: List[np.ndarray]) -> Dict[int, np.ndarray]:
         # Object not seen -> velocity = None

From 502f3a80e700d57f38d9e45f59e1fd6c3b1f3987 Mon Sep 17 00:00:00 2001
From: LucasEby <lucaseby@outlook.com>
Date: Wed, 19 Feb 2025 13:20:51 -0500
Subject: [PATCH 46/46] Sending untransformed version to planning team

---
 .../perception/pedestrian_detection.py        | 41 ++++++++++++-------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
index 51d90248c..2a3f84d1a 100644
--- a/GEMstack/onboard/perception/pedestrian_detection.py
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -115,7 +115,14 @@ def __init__(self, vehicle_interface : GEMInterface) -> None:
         
         # Update function variables
         self.t_start = datetime.now() # Estimated start frame time
-        self.start_pose_abs = None
+        # self.start_pose_abs = None
+        # self.start_pose_abs = ObjectPose(
+        #     frame=ObjectFrameEnum.GLOBAL,
+        #     t=0,
+        #     x=0,
+        #     y=0,
+        #     z=0
+        # )
     
     def init_debug(self,) -> None:
          # Debug Publishers
@@ -125,8 +132,10 @@ def init_debug(self,) -> None:
         self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
 
     def update(self, vehicle : VehicleState) -> Dict[str,AgentState]:
-        if self.start_pose_abs is None:
-            self.start_pose_abs = vehicle.pose
+        # if self.start_pose_abs is None:
+            # self.start_pose_abs = vehicle.pose # Store first pose which is start frame
+            # self.start_pose_abs.to_frame(frame=ObjectFrameEnum.GLOBAL, current_pose=)
+        # self.current_vehicle_pose_sf = vehicle.pose # Vehicle pose in start frame
         return self.current_agents
 
     # TODO: Improve Algo Knn, ransac, etc.
@@ -409,20 +418,22 @@ def create_agent_states_in_start_frame(self, obj_centers: List[np.ndarray], obj_
                             velocity=None, # Temporary
                             yaw_rate=0
                         )
-            
+
             # Convert agent to start frame and add to agents list:
             agents.append(
-                state.to_frame(
-                    frame=ObjectFrameEnum.START, 
-                    current_pose=ObjectPose(
-                        frame=ObjectFrameEnum.CURRENT, 
-                        t=(self.curr_time - self.t_start).total_seconds(), 
-                        x=state.pose.x, 
-                        y=state.pose.y, 
-                        z=state.pose.z
-                    ), 
-                    start_pose_abs=self.start_pose_abs
-                )
+                state
+                # state.to_frame(
+                #     frame=ObjectFrameEnum.START, 
+                #     current_pose=self.current_vehicle_pose_sf,
+                #     # current_pose=ObjectPose(
+                #     #     frame=ObjectFrameEnum.CURRENT, 
+                #     #     t=(self.curr_time - self.t_start).total_seconds(), 
+                #     #     x=state.pose.x, 
+                #     #     y=state.pose.y, 
+                #     #     z=state.pose.z
+                #     # ), 
+                #     start_pose_abs=self.start_pose_abs
+                # )
             )
 
         # Return the agent states converted to start frame: