Skip to content

Commit cd940f4

Browse files
committed
Update pedestrian_detection_test_merged.py
1 parent 9a25cff commit cd940f4

1 file changed

Lines changed: 142 additions & 91 deletions

File tree

homework/pedestrian_detection_test_merged.py

Lines changed: 142 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,21 @@ def visualize_custom_points(image, points_lidar, T_l2c, K, color=(0, 255, 0)):
103103
cv2.circle(image, (u, v), 5, color, -1)
104104
return image
105105

106+
def filter_points_within_threshold(points, threshold=15.0):
107+
"""
108+
筛选出所有距离传感器小于或等于 threshold 米的点。
109+
110+
参数:
111+
points: numpy 数组,形状为 (N,3),每一行为一个三维点 [x, y, z]。
112+
threshold: 距离阈值(单位:米),默认值为 15.0 米。
113+
114+
返回:
115+
过滤后的点云数据(仅保留距离小于等于 threshold 米的点)。
116+
"""
117+
distances = np.linalg.norm(points, axis=1)
118+
mask = distances <= threshold
119+
return points[mask]
120+
106121
def refine_cluster(roi_points, center, eps=0.2, min_samples=10):
107122
"""
108123
对 roi_points (N,3) 进行 DBSCAN 聚类,并返回与 center 最近的簇
@@ -149,18 +164,33 @@ def compute_velocity(old_pose, new_pose, dt):
149164
vz = (new_pose.z - old_pose.z) / dt
150165
return (vx, vy, vz)
151166

152-
def filter_depth_points(lidar_points, max_human_depth=0.9):
167+
def filter_depth_points(lidar_points, max_depth_diff=0.9, use_norm=True):
153168
"""
154-
过滤超出 (min_depth + max_human_depth) 的点,
155-
假设点云中第一列为深度(X轴)信息,根据实际情况调整
169+
过滤超过 (min_depth + max_depth_diff) 范围之外的点。
170+
171+
如果 use_norm 为 True,则按照每个点的欧氏距离(norm)来计算深度,
172+
否则默认使用第一列(x轴)作为深度。
173+
174+
参数:
175+
lidar_points: (N, 3) 的点云数据
176+
max_depth_diff: 最小深度加上此阈值作为允许的深度范围
177+
use_norm: 是否使用欧氏距离作为深度(True)或直接使用 x 坐标(False)
178+
179+
返回:
180+
筛选后的点云数据
156181
"""
157182
if lidar_points.shape[0] == 0:
158183
return lidar_points
159-
lidar_points_dist = lidar_points[:, 0]
160-
min_dist = np.min(lidar_points_dist)
161-
max_possible_dist = min_dist + max_human_depth
162-
filtered_array = lidar_points[lidar_points_dist < max_possible_dist]
163-
return filtered_array
184+
185+
if use_norm:
186+
depths = np.linalg.norm(lidar_points, axis=1)
187+
else:
188+
depths = lidar_points[:, 0]
189+
190+
min_depth = np.min(depths)
191+
max_possible_depth = min_depth + max_depth_diff
192+
mask = depths < max_possible_depth
193+
return lidar_points[mask]
164194

165195
def display_reprojected_cluster(image, refined_cluster, T_l2c, K):
166196
"""
@@ -183,6 +213,46 @@ def display_reprojected_cluster(image, refined_cluster, T_l2c, K):
183213
cv2.circle(image, (u, v), 2, (255, 0, 0), -1)
184214
return image
185215

216+
def pose_to_matrix(pose):
217+
"""
218+
Compose a 4x4 transformation matrix from a pose state.
219+
Assumes pose has attributes: x, y, z, yaw, pitch, roll,
220+
where the angles are given in degrees.
221+
"""
222+
x = pose.x if pose.x is not None else 0.0
223+
y = pose.y if pose.y is not None else 0.0
224+
z = pose.z if pose.z is not None else 0.0
225+
if pose.yaw is not None and pose.pitch is not None and pose.roll is not None:
226+
yaw = np.radians(pose.yaw)
227+
pitch = np.radians(pose.pitch)
228+
roll = np.radians(pose.roll)
229+
else:
230+
yaw = pitch = roll = 0.0
231+
R_mat = R.from_euler('zyx', [yaw, pitch, roll]).as_matrix()
232+
T = np.eye(4)
233+
T[:3, :3] = R_mat
234+
T[:3, 3] = np.array([x, y, z])
235+
return T
236+
237+
# ----- New: Vectorized projection function -----
238+
def project_points(pts_cam, K, original_lidar_points):
239+
"""
240+
Vectorized version.
241+
pts_cam: (N,3) array of points in camera coordinates.
242+
original_lidar_points: (N,3) array of points in LiDAR coordinates.
243+
Returns a (M,5) array: [u, v, X_lidar, Y_lidar, Z_lidar] for all points with Z>0.
244+
"""
245+
mask = pts_cam[:, 2] > 0
246+
pts_cam_valid = pts_cam[mask]
247+
lidar_valid = original_lidar_points[mask]
248+
Xc = pts_cam_valid[:, 0]
249+
Yc = pts_cam_valid[:, 1]
250+
Zc = pts_cam_valid[:, 2]
251+
u = (K[0, 0] * (Xc / Zc) + K[0, 2]).astype(np.int32)
252+
v = (K[1, 1] * (Yc / Zc) + K[1, 2]).astype(np.int32)
253+
proj = np.column_stack((u, v, lidar_valid))
254+
return proj
255+
186256
# -----------------------------
187257
# 3) PedestrianDetector2D (新方法)
188258
# -----------------------------
@@ -201,62 +271,59 @@ class PedestrianDetector2D:
201271
def __init__(self, model_path='yolov8n.pt'):
202272
# 加载 YOLO 模型
203273
self.detector = YOLO(model_path)
204-
205-
# LiDAR->Vehicle 坐标系转换矩阵
206-
self.T_l2v = np.array([
207-
[0.99939639, 0.02547917, 0.023615, 1.1],
208-
[-0.02530848, 0.99965156, -0.00749882, 0.03773583],
209-
[-0.02379784, 0.00689664, 0.999693, 1.95320223],
210-
[0, 0, 0, 1]
211-
])
212-
213-
# 原始相机内参矩阵(未校正图像使用)
214-
self.K = np.array([
215-
[1230.1441, 0, 978.8285],
216-
[0, 1230.6304, 605.7940],
217-
[0, 0, 1]
218-
])
219-
220-
# 畸变系数
221-
self.D = np.array([-0.237519, 0.084522, -0.000353, -0.000376, 0.0])
222-
223-
# LiDAR->Camera 坐标系转换矩阵
224-
self.T_l2c = np.array([
225-
[ 0.72 , -0.694, 0.014, 0.12],
226-
[-0.166, -0.191, -0.967, 0.09],
227-
[ 0.673, 0.694, -0.253, -1.17],
228-
[ 0. , 0. , 0. , 1. ]
229-
])
230-
231-
# 跟踪相关变量
274+
self.camera_front = False
275+
276+
if self.camera_front:
277+
self.K = np.array([[684.83331299, 0., 573.37109375],
278+
[0., 684.60968018, 363.70092773],
279+
[0., 0., 1.]])
280+
else:
281+
self.K = np.array([[1230.144096, 0., 978.828508],
282+
[0., 1230.630424, 605.794034],
283+
[0., 0., 1.]])
284+
if self.camera_front:
285+
self.D = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
286+
else:
287+
self.D = np.array([-0.23751890570984993, 0.08452214195986749, -0.00035324203850054794, -0.0003762498910536819, 0.0])
288+
289+
self.T_l2v = np.array([[0.99939639, 0.02547917, 0.023615, 1.1],
290+
[-0.02530848, 0.99965156, -0.00749882, 0.03773583],
291+
[-0.02379784, 0.00689664, 0.999693, 1.95320223],
292+
[0., 0., 0., 1.]])
293+
if self.camera_front:
294+
self.T_l2c = np.array([
295+
[0.001090, -0.999489, -0.031941, 0.149698],
296+
[-0.007664, 0.031932, -0.999461, -0.397813],
297+
[0.999970, 0.001334, -0.007625, -0.691405],
298+
[0., 0., 0., 1.000000]
299+
])
300+
else:
301+
self.T_l2c = np.array([[0.71082304, -0.70305212, -0.02608284, 0.17771596],
302+
[-0.13651802, -0.10076507, -0.98505595, -0.56321222],
303+
[0.68915595, 0.70388118, -0.1678969, -0.62027912],
304+
[0., 0., 0., 1.]])
232305
self.tracked_agents = {}
233306
self.pedestrian_counter = 0
234-
# 存储当前帧的内参(经过畸变校正后),避免修改原始内参
235307
self.current_K = self.K.copy()
236-
# 用于存放调试时每个边界框对应的 frustum(LineSet)
237308
self.debug_frustums = []
309+
# 存储每个 refined cluster 的点云,用于 Open3D 可视化上色
310+
self.cluster_geometries = []
238311

239312
def process_frame(self, image, lidar_points, current_time=0.0, debug_reproj=False, debug_frustum=False):
240313
agents = {}
314+
self.cluster_geometries = [] # 重置 refined cluster 的集合
241315

242-
# 1) 图像畸变校正,获取当前帧的内参 newK(但不更新全局 self.K)
243-
# image, newK = undistort_image(image, self.K, self.D)
244-
# self.current_K = newK
245-
246-
self.current_K = self.K.copy()
316+
image, newK = undistort_image(image, self.K, self.D)
317+
self.current_K = newK
247318

248-
# 2) 下采样 LiDAR 点云(示例中直接复制原始点云)
249319
lidar_down = lidar_points.copy()
250320

251-
# 3) LiDAR -> Camera 坐标转换 & 投影到图像平面(使用当前帧内参)
252321
pts_cam = transform_points_l2c(lidar_down, self.T_l2c)
253322
projected_pts = project_points(pts_cam, self.current_K, lidar_down)
254323

255-
# 4) YOLO 2D 检测
256324
results = self.detector(image, conf=0.4, classes=[0])
257325
boxes = np.array(results[0].boxes.xywh.cpu()) if len(results) > 0 else []
258326

259-
# 绘制所有检测到的 2D 框(蓝色矩形)
260327
for box in boxes:
261328
cx, cy, w, h = box
262329
left = int(cx - w/2)
@@ -265,7 +332,6 @@ def process_frame(self, image, lidar_points, current_time=0.0, debug_reproj=Fals
265332
bottom = int(cy + h/2)
266333
cv2.rectangle(image, (left, top), (right, bottom), (255, 0, 0), 2)
267334

268-
# 5) 针对每个 YOLO 检测到的 2D 边界框进行后续处理(聚类、计算 oriented b-box 及跟踪匹配)
269335
if debug_frustum:
270336
self.debug_frustums = []
271337
for box in boxes:
@@ -293,13 +359,23 @@ def process_frame(self, image, lidar_points, current_time=0.0, debug_reproj=Fals
293359
points_3d = roi_2d_pts[:, 2:5]
294360
if debug_reproj:
295361
display_reprojected_cluster(image, points_3d, self.T_l2c, self.current_K)
296-
points_3d = filter_depth_points(points_3d, max_human_depth=0.2)
297-
298-
refined_cluster = refine_cluster(points_3d, np.mean(points_3d, axis=0), eps=0.3, min_samples=5)
299-
refined_cluster = remove_ground_by_min_range(refined_cluster, z_range=0.01)
300-
if refined_cluster.shape[0] < 5:
362+
start = time.time()
363+
points_3d = filter_points_within_threshold(points_3d, 15)
364+
end = time.time()
365+
print(start-end)
366+
# points_3d = remove_ground_by_min_range(points_3d, z_range=0.01)
367+
points_3d = filter_depth_points(points_3d, max_depth_diff=0.3)
368+
# refined_cluster = refine_cluster(points_3d, np.mean(points_3d, axis=0), eps=0.15, min_samples=5)
369+
refined_cluster = remove_ground_by_min_range(points_3d, z_range=0.01)
370+
if refined_cluster.shape[0] < 4:
301371
continue
302372

373+
# 创建 refined cluster 的点云并上色为红色
374+
pcd_cluster = o3d.geometry.PointCloud()
375+
pcd_cluster.points = o3d.utility.Vector3dVector(refined_cluster)
376+
pcd_cluster.paint_uniform_color([1, 0, 0])
377+
self.cluster_geometries.append(pcd_cluster)
378+
303379
pcd = o3d.geometry.PointCloud()
304380
pcd.points = o3d.utility.Vector3dVector(refined_cluster)
305381
obb = pcd.get_oriented_bounding_box()
@@ -362,7 +438,7 @@ def process_frame(self, image, lidar_points, current_time=0.0, debug_reproj=Fals
362438
agents[agent_id] = new_agent
363439
self.tracked_agents[agent_id] = new_agent
364440

365-
# 6) 在当前帧上绘制 hard-code 的 3D 点(红色圆点)
441+
# 绘制 hard-code 的 3D 点(红色圆点)到图像上
366442
my_points_lidar = np.array([
367443
[4.730639, 10.195478, -1.941095], # Point #189070
368444
[2.066050, 7.066111, -2.027844], # Point #223837
@@ -372,7 +448,6 @@ def process_frame(self, image, lidar_points, current_time=0.0, debug_reproj=Fals
372448
])
373449
visualize_custom_points(image, my_points_lidar, self.T_l2c, self.current_K, color=(0, 0, 255))
374450

375-
# 7) 在 process_frame 内直接显示图像(包含所有 boxes 与 hard-code 的点)
376451
cv2.imshow("Frame with Boxes and Custom Points", image)
377452
cv2.waitKey(1)
378453
return agents
@@ -393,7 +468,7 @@ def create_frustum_lines(self, box, K, z_near, z_far):
393468
fx = K[0, 0]
394469
fy = K[1, 1]
395470
cx = K[0, 2]
396-
cy = K[1, 2] # cy 取自 K 的第二行第三列
471+
cy = K[1, 2]
397472

398473
near_points = []
399474
far_points = []
@@ -413,7 +488,6 @@ def create_frustum_lines(self, box, K, z_near, z_far):
413488
near_points_lidar = (T_c2l @ near_points.T).T[:, :3]
414489
far_points_lidar = (T_c2l @ far_points.T).T[:, :3]
415490
all_points = np.vstack((near_points_lidar, far_points_lidar))
416-
# 连线:四边形边缘及对应近远点连接
417491
lines = [[i, i + 4] for i in range(4)]
418492
lines += [[0, 1], [1, 2], [2, 3], [3, 0]]
419493
lines += [[4, 5], [5, 6], [6, 7], [7, 4]]
@@ -427,14 +501,12 @@ def load_lidar_from_npz(file_path):
427501
return data['arr_0']
428502

429503
# -----------------------------
430-
# 5) 主函数:调用 process_frame,并保留原来的 Open3D 可视化
504+
# 5) 主函数:调用 process_frame,并只显示 refined cluster 中的点(不显示其他几何体)
431505
# -----------------------------
432506
def main():
433507
SHOW_VISUALIZATION = True
434-
# 修改模型路径为你使用的模型文件(例如 'cone.pt')
435508
detector = PedestrianDetector2D(model_path='cone.pt')
436509

437-
# 修改以下路径为你实际的数据路径
438510
image_files = sorted(glob.glob(os.path.join('../data', 'color*.png')))
439511
lidar_files = sorted(glob.glob(os.path.join('../data', 'lidar*.npz')))
440512
num_frames = min(len(image_files), len(lidar_files))
@@ -450,38 +522,17 @@ def main():
450522
agents = detector.process_frame(image, lidar_points, current_time=float(i),
451523
debug_reproj=True, debug_frustum=True)
452524

453-
# 这里保留原来的 Open3D 可视化(展示 3D 点云、检测目标中心、oriented b-box 以及 frustum)
454-
if SHOW_VISUALIZATION:
455-
pcd = o3d.geometry.PointCloud()
456-
pcd.points = o3d.utility.Vector3dVector(lidar_points)
457-
pcd.paint_uniform_color([0.7, 0.7, 0.7])
458-
geometries = [pcd]
459-
460-
T_v2l = np.linalg.inv(detector.T_l2v)
461-
for agent_id, agent_state in agents.items():
462-
pose_vehicle = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z, 1])
463-
center_lidar = (T_v2l @ pose_vehicle)[:3]
464-
465-
# 绘制目标中心(绿色小球)
466-
sphere = o3d.geometry.TriangleMesh.create_sphere(radius=0.05)
467-
sphere.translate(center_lidar)
468-
sphere.paint_uniform_color([0, 1, 0])
469-
geometries.append(sphere)
470-
471-
dims = agent_state.dimensions
472-
if dims != (0, 0, 0):
473-
yaw = agent_state.pose.yaw
474-
pitch = agent_state.pose.pitch
475-
roll = agent_state.pose.roll
476-
R_vehicle = R.from_euler('zyx', [yaw, pitch, roll], degrees=True).as_matrix()
477-
R_lidar = T_v2l[:3, :3] @ R_vehicle
478-
obb = o3d.geometry.OrientedBoundingBox(center_lidar, R_lidar, dims)
479-
obb.color = (1, 0, 1) # 紫红色
480-
geometries.append(obb)
481-
482-
if detector.debug_frustums:
483-
geometries.extend(detector.debug_frustums)
525+
# Print all agent information
526+
print("Detected Agents:")
527+
for agent_id, agent_state in agents.items():
528+
p = agent_state.pose
529+
print(f"Agent {agent_id}: Position=({p.x:.2f}, {p.y:.2f}, {p.z:.2f}), "
530+
f"Yaw={p.yaw:.2f}, Pitch={p.pitch:.2f}, Roll={p.roll:.2f}, "
531+
f"Dimensions={agent_state.dimensions}, Velocity={agent_state.velocity}")
484532

533+
# 在 Open3D 的可视化中,仅显示 refined cluster 点云(上色为红色)
534+
if SHOW_VISUALIZATION:
535+
geometries = detector.cluster_geometries if detector.cluster_geometries else []
485536
o3d.visualization.draw_geometries(geometries, window_name=f"Frame {i}")
486537

487538
cv2.destroyAllWindows()

0 commit comments

Comments
 (0)