Further cleaned up code and wrote some comments to help out Akul

LucasEby · LucasEby · commit e1ab76a79b96 · 2025-02-16T17:13:56.000Z
diff --git a/GEMstack/onboard/perception/AgentTracker.py b/GEMstack/onboard/perception/AgentTracker.py
@@ -14,12 +14,23 @@ def assign_ids(self, agents: list) -> Dict[str,AgentState]:
         """ 
         """
         agents = {}
-    
+        # Act with the assumption that you are being sent a list of AgentState objects and you need to use the object fields to keep track of them for your task
+        # Further act on the assumption that we will decide the id's of the pedestrians by assuming that 2 pedestrians are the same pedestrian if a
+        # previously stored AgentState pose and dimensions overlap with a newly passed in AgentState
+
+        # Act on the assumption that the AgentState objects are all in reference to the start frame of the vehicle
+
+        # some helper functions in this class, LostAgent.py, and IdTracker.py have been created to try to help you out with your task.
+
+        # Assume that the output returned from this function will be a dictionary of AgentState objects with the key corresponding to their id
+        pass
+
     def __convert_to_start_frame(self):
         """Converts a list of AgentState agents from ouster Lidar frame of 
         reference (which is in reference to the current frame) to start 
         frame frame of reference
         """
+        # you can ignore this function akul
         pass
         
     def __agents_overlap(ped1, ped2) -> bool:
diff --git a/GEMstack/onboard/perception/fusion.py b/GEMstack/onboard/perception/fusion.py
@@ -28,14 +28,6 @@ def __init__(self):
 
         # Setup visualization variables
         self.visualization = True # Set this to true for visualization, later change to get value from sys arg
-        self.label_text = "Pedestrian "
-        self.font = cv2.FONT_HERSHEY_SIMPLEX
-        self.font_scale = 0.5
-        self.font_color = (255, 255, 255)  # White text
-        self.outline_color = (0, 0, 0)  # Black outline
-        self.line_type = 1
-        self.text_thickness = 2 # Text thickness
-        self.outline_thickness = 1  # Thickness of the text outline
 
         # Load calibration data
         self.R = load_extrinsics(os.getcwd() + '/GEMstack/onboard/perception/calibration/extrinsics/R.npy')
diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -78,24 +78,28 @@ def initialize(self):
     def detect_pedestrians(self, front_image_msg: Image, ouster_msg: PointCloud2D):
         """Fuses Image and Lidar information to detect pedestrians
         """
-        # combined_point_cloud = self.combine_point_clouds # Removed because we only have transformation for a single lidar (ouster) at the moment. Probably should return np.array
+        # TODO: make fusion_callback return some data. Call the rest of Lukas' functions and Akul's stuff inside of this function.
+        data = self.fusion.fusion_callback(rgb_image_msg=front_image_msg, lidar_pc2_msg=ouster_msg)
+        
+        # Original plan for how to do this:
+        # # combined_point_cloud = self.combine_point_clouds # Removed because we only have transformation for a single lidar (ouster) at the moment. Probably should return np.array
 
-        point_cloud = np.array(list(pc2.read_points(ouster_msg, skip_nans=True)), dtype=np.float32)[:, :3]
+        # point_cloud = np.array(list(pc2.read_points(ouster_msg, skip_nans=True)), dtype=np.float32)[:, :3]
 
-        image_pedestrians = self.detect_pedestrians_in_image(image=front_image_msg)
+        # image_pedestrians = self.detect_pedestrians_in_image(image=front_image_msg)
 
-        for id in image_pedestrians:
-            #### Scrum-20: Calculate and Convert Image Points to Lidar Frame of Reference Task:
-            (estimated_ped_cloud, flat_center) = self.extract_ped_cloud(point_cloud=point_cloud, image_pedestrian=image_pedestrians[key])
+        # for id in image_pedestrians:
+        #     #### Scrum-20: Calculate and Convert Image Points to Lidar Frame of Reference Task:
+        #     (estimated_ped_cloud, flat_center) = self.extract_ped_cloud(point_cloud=point_cloud, image_pedestrian=image_pedestrians[key])
 
-            #### Scrum-21 & 39:Calculate Pedestrian Center and Dimensions
-            # Determine a more exact center and dimensions of the pedestrian by ignoring background point cloud points
-            (pose, dims) = self.calc_ped_center_dims(estimated_ped_cloud, flat_center)
-            image_pedestrians[id].pose = pose
-            image_pedestrians[id].dims = dims
+        #     #### Scrum-21 & 39:Calculate Pedestrian Center and Dimensions
+        #     # Determine a more exact center and dimensions of the pedestrian by ignoring background point cloud points
+        #     (pose, dims) = self.calc_ped_center_dims(estimated_ped_cloud, flat_center)
+        #     image_pedestrians[id].pose = pose
+        #     image_pedestrians[id].dims = dims
 
-        #### Scrum-35: Associate and Track Pedestrian Id's
-        self.associate_and_track_peds(image_pedestrians)
+        # #### Scrum-35: Associate and Track Pedestrian Id's
+        # self.associate_and_track_peds(image_pedestrians)
 
     # TODO: refactor into pedestrian_detection.py
     def update(self, vehicle : VehicleState) -> Dict[str,AgentState]:
@@ -160,88 +164,6 @@ def update_object_states(track_result, flattened_pedestrians_3d_pts: list[np.nda
             for ind in len(num_objs)
         ]
     
-    # Use cv2.Mat for GEM Car, Image for RosBag
-    def detect_pedestrians_in_image(self, image : Union[cv2.Mat, Image]) -> dict:
-        """Detects pedestrians using the model provided when new image is passed.
-
-        Converts Image.msg to cv2 format and uses the model to detect pedestrian
-        IF visualization is true, will publish an image with pedestrians detected.
-
-        Hardcoded values for now:
-            Detected only pedestrians -> Class = 0
-        
-        """
-
-        # Use Image directly for GEM Car, convert to cv2.Mat for rosbag:
-        if type(image) == Image:
-            bridge = CvBridge()
-            image = bridge.imgmsg_to_cv2(image, "bgr8") 
-        track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
-
-        self.last_person_boxes = []
-        boxes = track_result[0].boxes
-        image_pedestrians = {} # Stores a dictionary of pedestrian AgentState objects with the YOLO predicted id as the key
-
-        # Unpacking box dimentions detected into x,y,w,h
-        for box in boxes:
-
-            xywh = box.xywh[0].tolist()
-            self.last_person_boxes.append(xywh)
-            x, y, w, h = xywh
-            id = box.id.item()
-
-            # Stores AgentState in a dict, can be removed if not required
-            pose = ObjectPose(t=0,x=x,y=y,z=0,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT)
-            dims = (w,h,0)
-            if(id not in pedestrians.keys()):
-                image_pedestrians[id] = AgentState(pose=pose,dimensions=dims,outline=None,type=AgentEnum.PEDESTRIAN,activity=AgentActivityEnum.MOVING,velocity=(0,0,0),yaw_rate=0)
-            else:
-                image_pedestrians[id].pose = pose
-                image_pedestrians[id].dims = dims
-
-            # Used for visualization
-            if(self.visualization):
-                self.__visualize_labeled_image(image, box, x, y, w, h)
-        
-        #uncomment if you want to debug the detector...
-        # print(self.last_person_boxes)
-        # print(pedestrians.keys())
-        #for bb in self.last_person_boxes:
-        #    x,y,w,h = bb
-        #    cv2.rectangle(image, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), (255, 0, 255), 3)
-        #cv2.imwrite("pedestrian_detections.png",image)
-
-        # Used for visualization
-        if(self.visualization):
-            ros_img = bridge.cv2_to_imgmsg(image, 'bgr8')
-            self.pub_image.publish(ros_img)
-
-        return image_pedestrians
-
-    def __visualize_labeled_image(self, image: cv2.Mat, box, x: float, y: float, w: float, h: float):
-        # Draw bounding box
-        cv2.rectangle(image, (int(x - w / 2), int(y - h / 2)), (int(x + w / 2), int(y + h / 2)), (255, 0, 255), 3)
-
-        # Define text label
-        x = int(x - w / 2)
-        y = int(y - h / 2)
-        label = self.label_text + str(id) + " : " + str(round(box.conf.item(), 2))
-
-        # Get text size
-        text_size, baseline = cv2.getTextSize(label, self.font, self.font_scale, self.line_type)
-        text_w, text_h = text_size
-
-        # Position text above the bounding box
-        text_x = x
-        text_y = y - 10 if y - 10 > 10 else y + h + text_h
-
-        # Draw text outline for better visibility, uncomment for outline
-        # for dx, dy in [(-1, -1), (-1, 1), (1, -1), (1, 1)]:  
-        #     cv2.putText(image, label, (text_x + dx, text_y - baseline + dy), self.font, self.font_scale, self.outline_color, self.outline_thickness)
-
-        # Draw main text on top of the outline
-        cv2.putText(image, label, (text_x, text_y - baseline), self.font, self.font_scale, self.font_color, self.text_thickness)
-    
     def extract_ped_cloud(point_cloud: np.array, image_pedestrian: AgentState):
         # return (estimated_ped_cloud, flat_center)
         pass