Skip to content

Commit 05748fb

Browse files
committed
Update cone_detection.py
1 parent 39922ef commit 05748fb

1 file changed

Lines changed: 56 additions & 60 deletions

File tree

GEMstack/onboard/perception/cone_detection.py

Lines changed: 56 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,17 @@
2121

2222
# ----- Helper Functions -----
2323

24+
def undistort_image(image, K, D):
25+
"""
26+
对图像进行畸变校正,返回校正后的图像及新的内参矩阵。
27+
其中 K 为原始内参,D 为畸变参数。
28+
"""
29+
h, w = image.shape[:2]
30+
newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
31+
undistorted = cv2.undistort(image, K, D, None, newK)
32+
return undistorted, newK
33+
34+
2435
def match_existing_cone(
2536
new_center: np.ndarray,
2637
new_dims: tuple,
@@ -63,40 +74,22 @@ def extract_roi_box(lidar_pc, center, half_extents):
6374
return lidar_pc[mask]
6475

6576

66-
# def pc2_to_numpy(pc2_msg, want_rgb=False):
67-
# """
68-
# Convert a ROS PointCloud2 message into a numpy array.
69-
# This function extracts the x, y, z coordinates from the point cloud.
70-
# """
71-
# start = time.time()
72-
# gen = pc2.read_points(pc2_msg, skip_nans=True)
73-
# end = time.time()
74-
# print('Read lidar points: ', end - start)
75-
# start = time.time()
76-
# pts = np.array(list(gen), dtype=np.float16)
77-
# pts = pts[:, :3] # Only x, y, z coordinates
78-
# mask = (pts[:, 0] > 0) & (pts[:, 2] < 2.5)
79-
# end = time.time()
80-
# print('Convert to numpy: ', end - start)
81-
# return pts[mask]
82-
8377
def pc2_to_numpy(pc2_msg, want_rgb=False):
8478
"""
8579
Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy.
8680
This function extracts the x, y, z coordinates from the point cloud.
8781
"""
8882
# Convert the ROS message to a numpy structured array
8983
pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg)
90-
# Convert each field to a 1D array and stack along axis 1 to get (N, 3)
84+
# Stack x,y,z fields to a (N,3) array
9185
pts = np.stack((np.array(pc['x']).ravel(),
9286
np.array(pc['y']).ravel(),
9387
np.array(pc['z']).ravel()), axis=1)
94-
# Apply filtering (for example, x > 0 and z < 2.5)
95-
mask = (pts[:, 0] > 0) & (pts[:, 2] < 2.5)
88+
# Apply filtering (for example, x > 0 and z in a specified range)
89+
mask = (pts[:, 0] > 0) & (pts[:, 2] < -1.5) & (pts[:, 2] > -2.7)
9690
return pts[mask]
9791

9892

99-
10093
def backproject_pixel(u, v, K):
10194
"""
10295
Backprojects a pixel coordinate (u, v) into a normalized 3D ray in the camera coordinate system.
@@ -179,14 +172,15 @@ def create_ray_line_set(start, end):
179172
line_set.colors = o3d.utility.Vector3dVector([[1, 1, 0]])
180173
return line_set
181174

175+
182176
def downsample_points(lidar_points, voxel_size=0.15):
183177
pcd = o3d.geometry.PointCloud()
184178
pcd.points = o3d.utility.Vector3dVector(lidar_points)
185179
down_pcd = pcd.voxel_down_sample(voxel_size=voxel_size)
186180
return np.asarray(down_pcd.points)
187181

188-
def filter_depth_points(lidar_points, max_human_depth=0.9):
189182

183+
def filter_depth_points(lidar_points, max_human_depth=0.9):
190184
if lidar_points.shape[0] == 0:
191185
return lidar_points
192186
lidar_points_dist = lidar_points[:, 0]
@@ -195,6 +189,7 @@ def filter_depth_points(lidar_points, max_human_depth=0.9):
195189
filtered_array = lidar_points[lidar_points_dist < max_possible_dist]
196190
return filtered_array
197191

192+
198193
def visualize_geometries(geometries, window_name="Open3D", width=800, height=600, point_size=5.0):
199194
"""
200195
Visualize a list of Open3D geometry objects in a dedicated window.
@@ -208,13 +203,13 @@ def visualize_geometries(geometries, window_name="Open3D", width=800, height=600
208203
vis.run()
209204
vis.destroy_window()
210205

206+
211207
def pose_to_matrix(pose):
212208
"""
213209
Compose a 4x4 transformation matrix from a pose state.
214210
Assumes pose has attributes: x, y, z, yaw, pitch, roll,
215211
where the angles are given in degrees.
216212
"""
217-
# Use default values if any are None (e.g. if the car is not moving)
218213
x = pose.x if pose.x is not None else 0.0
219214
y = pose.y if pose.y is not None else 0.0
220215
z = pose.z if pose.z is not None else 0.0
@@ -239,6 +234,7 @@ def transform_points_l2c(lidar_points, T_l2c):
239234
pts_cam = (T_l2c @ pts_hom.T).T # (N,4)
240235
return pts_cam[:, :3]
241236

237+
242238
# ----- New: Vectorized projection function -----
243239
def project_points(pts_cam, K, original_lidar_points):
244240
"""
@@ -302,19 +298,29 @@ def initialize(self):
302298
self.sync.registerCallback(self.synchronized_callback)
303299
self.detector = YOLO('../../knowledge/detection/cone.pt')
304300
self.detector.to('cuda')
305-
self.K = np.array([[684.83331299, 0., 573.37109375],
306-
[0., 684.60968018, 363.70092773],
307-
[0., 0., 1.]])
301+
# self.K = np.array([[1230.144096, 0., 978.828508],
302+
# [0., 1230.630424, 605.794034],
303+
# [0., 0., 1.]])
304+
305+
306+
# Below is the distortion vector; here we set it to all zeros (i.e. no distortion)
307+
# self.D = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
308+
self.D = [-0.23751890570984993, 0.08452214195986749, -0.00035324203850054794, -0.0003762498910536819, 0.0]
308309
self.T_l2v = np.array([[0.99939639, 0.02547917, 0.023615, 1.1],
309310
[-0.02530848, 0.99965156, -0.00749882, 0.03773583],
310311
[-0.02379784, 0.00689664, 0.999693, 1.95320223],
311312
[0., 0., 0., 1.]])
312-
self.T_l2c = np.array([
313-
[0.001090, -0.999489, -0.031941, 0.149698],
314-
[-0.007664, 0.031932, -0.999461, -0.397813],
315-
[0.999970, 0.001334, -0.007625, -0.691405],
316-
[0., 0., 0., 1.000000]
317-
])
313+
# self.T_l2c = np.array([
314+
# [0.001090, -0.999489, -0.031941, 0.149698],
315+
# [-0.007664, 0.031932, -0.999461, -0.397813],
316+
# [0.999970, 0.001334, -0.007625, -0.691405],
317+
# [0., 0., 0., 1.000000]
318+
# ])
319+
320+
self.T_l2c = np.array([[ 0.71082304, -0.70305212, -0.02608284, 0.17771596],
321+
[-0.13651802, -0.10076507, -0.98505595, -0.36321222],
322+
[ 0.68915595, 0.70388118, -0.1678969 , -0.62027912],
323+
[ 0. , 0. , 0. , 1. ]])
318324
self.T_c2l = np.linalg.inv(self.T_l2c)
319325
self.R_c2l = self.T_c2l[:3, :3]
320326
self.camera_origin_in_lidar = self.T_c2l[:3, 3]
@@ -329,35 +335,38 @@ def synchronized_callback(self, image_msg, lidar_msg):
329335
step2 = time.time()
330336
self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False)
331337
step3 = time.time()
332-
print('image callback: ', step2-step1, 'lidar callback ', step3- step2)
338+
print('image callback: ', step2 - step1, 'lidar callback ', step3 - step2)
333339

334340
def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
335341
downsample = False
336342
if self.latest_image is None or self.latest_lidar is None:
337343
return {}
338344

339345
current_time = self.vehicle_interface.time()
340-
# Run YOLO to obtain 2D detections (class 0: persons)
341-
#TODO Change class to cones
346+
347+
undistorted_img, current_K = undistort_image(self.latest_image, self.K, self.D)
348+
self.current_K = current_K
349+
self.latest_image = undistorted_img
350+
351+
# Run YOLO to obtain 2D detections (class 0: cones)
342352
results = self.detector(self.latest_image, conf=0.3, classes=[0])
343353
boxes = np.array(results[0].boxes.xywh.cpu())
344354
agents = {}
345355

346356
if downsample == True:
347357
lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1)
348358
pts_cam = transform_points_l2c(lidar_down, self.T_l2c)
349-
projected_pts = project_points(pts_cam, self.K, lidar_down)
350-
359+
projected_pts = project_points(pts_cam, self.current_K, lidar_down)
351360
else:
352-
# New approach: project the entire LiDAR point cloud to the image plane
353361
step00 = time.time()
354362
lidar_down = self.latest_lidar.copy()
355363
step01 = time.time()
356364
pts_cam = transform_points_l2c(lidar_down, self.T_l2c)
357365
step02 = time.time()
358-
projected_pts = project_points(pts_cam, self.K, lidar_down) # shape (N,5): [u, v, X, Y, Z]
366+
projected_pts = project_points(pts_cam, self.current_K, lidar_down) # shape (N,5): [u, v, X, Y, Z]
359367
step03 = time.time()
360-
print(f'copy lidar data {step01-step00}s, transoforming to camear {step02-step01}s, transforming to image {step03-step02}s')
368+
print(
369+
f'copy lidar data {step01 - step00}s, transforming to camera {step02 - step01}s, projecting to image {step03 - step02}s')
361370

362371
# For each 2D bounding box, filter projected points instead of ray-casting
363372
for i, box in enumerate(boxes):
@@ -376,13 +385,11 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
376385
points_3d = roi_pts[:, 2:5]
377386
points_3d = filter_depth_points(points_3d, max_human_depth=0.3)
378387

379-
380-
381388
# Cluster the points and remove ground
382-
refined_cluster = refine_cluster(points_3d, np.mean(points_3d, axis=0), eps=0.15, min_samples=10)
389+
refined_cluster = refine_cluster(points_3d, np.mean(points_3d, axis=0), eps=0.15, min_samples=5)
383390
refined_cluster = remove_ground_by_min_range(refined_cluster, z_range=0.01)
384391
end1 = time.time()
385-
print('refine cluster: ', end1-start)
392+
print('refine cluster: ', end1 - start)
386393
if refined_cluster.shape[0] < 5:
387394
continue
388395

@@ -394,7 +401,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
394401
dims = tuple(obb.extent)
395402
R_lidar = obb.R.copy()
396403
end2 = time.time()
397-
print('compute bounding box ', end2-end1)
404+
print('compute bounding box ', end2 - end1)
398405
# Transform the refined center from LiDAR to Vehicle frame
399406
refined_center_hom = np.append(refined_center, 1)
400407
refined_center_vehicle_hom = self.T_l2v @ refined_center_hom
@@ -405,18 +412,11 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
405412
yaw, pitch, roll = euler_vehicle
406413
refined_center = refined_center_vehicle
407414

408-
# Convert from Vehicle frame to START frame
409415
if self.start_pose_abs is None:
410-
self.start_pose_abs = vehicle.pose # Initialize once
416+
self.start_pose_abs = vehicle.pose
411417

412-
# Obtain the vehicle's pose in the START frame as a pose state.
413-
# Assume vehicle.pose.to_frame returns a pose state with attributes x, y, z, yaw, pitch, roll.
414418
vehicle_start_pose = vehicle.pose.to_frame(ObjectFrameEnum.START, vehicle.pose, self.start_pose_abs)
415-
416-
# Compose the 4x4 transformation matrix from the vehicle_start_pose
417419
T_vehicle_to_start = pose_to_matrix(vehicle_start_pose)
418-
419-
# Transform the refined center (in Vehicle frame) to the START frame
420420
refined_center_hom_vehicle = np.append(refined_center, 1)
421421
refined_center_start = (T_vehicle_to_start @ refined_center_hom_vehicle)[:3]
422422

@@ -475,17 +475,15 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
475475
rospy.loginfo(f"Removing stale agent: {agent_id}\n")
476476
for agent_id, agent in agents.items():
477477
p = agent.pose
478-
# Format pose and velocity with 3 decimals (or as needed)
479478
rospy.loginfo(
480479
f"Agent ID: {agent_id}\n"
481480
f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, "
482481
f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n"
483482
f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n"
484-
)
483+
)
485484
return agents
486485

487-
488-
# ----- Fake Cone Detector 2D (for Testing Purposes) -----
486+
# ----- Fake Cone Detector 2D (for Testing Purposes) -----
489487

490488
class FakConeDetector(Component):
491489
def __init__(self, vehicle_interface: GEMInterface):
@@ -513,7 +511,6 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
513511
rospy.loginfo("Detected a Cone (simulated)")
514512
return res
515513

516-
517514
def box_to_fake_agent(box):
518515
x, y, w, h = box
519516
pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT)
@@ -522,6 +519,5 @@ def box_to_fake_agent(box):
522519
type=AgentEnum.CONE, activity=AgentActivityEnum.MOVING,
523520
velocity=(0, 0, 0), yaw_rate=0)
524521

525-
526522
if __name__ == '__main__':
527-
pass
523+
pass

0 commit comments

Comments
 (0)