Skip to content

Commit af6089c

Browse files
committed
Track pedestrians' 3d location and velocity
Add Tianyu's changes
1 parent 69487be commit af6089c

2 files changed

Lines changed: 361 additions & 1 deletion

File tree

homework/pointcloud.py

Lines changed: 355 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,355 @@
1+
import numpy as np
2+
import cv2
3+
from ultralytics import YOLO
4+
import open3d as o3d
5+
from sklearn.cluster import DBSCAN
6+
from scipy.spatial.transform import Rotation as R # For converting rotation matrix to Euler angles
7+
import time
8+
9+
# ----- Helper Functions -----
10+
11+
def fit_plane_ransac(points, threshold, min_inliers, iterations=100):
12+
"""
13+
A simple RANSAC plane fitting implementation.
14+
"""
15+
best_inliers_count = 0
16+
best_plane = None
17+
best_inliers = None
18+
n_points = points.shape[0]
19+
if n_points < 3:
20+
return None, None
21+
22+
for _ in range(iterations):
23+
idx = np.random.choice(n_points, 3, replace=False)
24+
sample = points[idx]
25+
p1, p2, p3 = sample
26+
normal = np.cross(p2 - p1, p3 - p1)
27+
norm = np.linalg.norm(normal)
28+
if norm == 0:
29+
continue
30+
normal = normal / norm
31+
d = -np.dot(normal, p1)
32+
plane = np.hstack([normal, d])
33+
distances = np.abs(points.dot(normal) + d)
34+
inliers = np.where(distances < threshold)[0]
35+
if len(inliers) > best_inliers_count:
36+
best_inliers_count = len(inliers)
37+
best_plane = plane
38+
best_inliers = inliers
39+
40+
if best_inliers_count >= min_inliers:
41+
return best_plane, best_inliers
42+
else:
43+
return None, None
44+
45+
def backproject_pixel(u, v, K):
46+
"""
47+
Backproject a pixel (u,v) into a normalized 3D ray in camera coordinates using the intrinsic matrix K.
48+
"""
49+
cx = K[0, 2]
50+
cy = K[1, 2]
51+
fx = K[0, 0]
52+
fy = K[1, 1]
53+
x = (u - cx) / fx
54+
y = (v - cy) / fy
55+
ray_dir = np.array([x, y, 1.0])
56+
return ray_dir / np.linalg.norm(ray_dir)
57+
58+
59+
def find_human_center_on_ray(lidar_pc, ray_origin, ray_direction,
60+
t_min, t_max, t_step,
61+
distance_threshold, min_points, ransac_threshold):
62+
"""
63+
Pre-filter the point cloud to only include points near the ray, then sweep along the ray.
64+
For each candidate along the ray, compute the centroid of nearby points and return that as the refined candidate.
65+
Returns (refined_candidate, None, None) if found; otherwise, (None, None, None).
66+
"""
67+
# Pre-filter: compute distance from each point to the ray.
68+
vecs = lidar_pc - ray_origin # Vectors from origin to points.
69+
proj_lengths = np.dot(vecs, ray_direction) # Projection lengths.
70+
proj_points = ray_origin + np.outer(proj_lengths, ray_direction)
71+
dists_to_ray = np.linalg.norm(lidar_pc - proj_points, axis=1)
72+
near_ray_mask = dists_to_ray < distance_threshold
73+
filtered_pc = lidar_pc[near_ray_mask]
74+
75+
# If too few points remain, return None.
76+
if filtered_pc.shape[0] < min_points:
77+
return None, None, None
78+
79+
# Sweep along the ray using the filtered point cloud.
80+
t_values = np.arange(t_min, t_max, t_step)
81+
for t in t_values:
82+
candidate = ray_origin + t * ray_direction
83+
dists = np.linalg.norm(filtered_pc - candidate, axis=1)
84+
nearby_points = filtered_pc[dists < distance_threshold]
85+
if nearby_points.shape[0] >= min_points:
86+
refined_candidate = np.mean(nearby_points, axis=0)
87+
return refined_candidate, None, None
88+
return None, None, None
89+
90+
91+
def extract_roi(pc, center, roi_radius):
92+
"""
93+
Extract points from the point cloud (pc) that lie within roi_radius of center.
94+
"""
95+
distances = np.linalg.norm(pc - center, axis=1)
96+
return pc[distances < roi_radius]
97+
98+
def refine_cluster(roi_points, center, eps=0.2, min_samples=10):
99+
"""
100+
Further refine a cluster using DBSCAN and return the cluster whose centroid is closest to center.
101+
"""
102+
clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(roi_points)
103+
labels = clustering.labels_
104+
unique_labels = set(labels)
105+
if -1 in unique_labels:
106+
unique_labels.remove(-1)
107+
if not unique_labels:
108+
return roi_points
109+
110+
best_cluster = None
111+
best_distance = float('inf')
112+
for label in unique_labels:
113+
cluster = roi_points[labels == label]
114+
cluster_center = np.mean(cluster, axis=0)
115+
dist = np.linalg.norm(cluster_center - center)
116+
if dist < best_distance:
117+
best_distance = dist
118+
best_cluster = cluster
119+
return best_cluster
120+
121+
def remove_ground_by_min_range(cluster, z_range=0.05):
122+
"""
123+
Remove ground points from the cluster by finding the minimum z value and eliminating
124+
all points within z_range of that minimum.
125+
"""
126+
if cluster is None or cluster.shape[0] == 0:
127+
return cluster
128+
min_z = np.min(cluster[:, 2])
129+
filtered = cluster[cluster[:, 2] > (min_z + z_range)]
130+
return filtered
131+
132+
def get_bounding_box_center_and_dimensions(points):
133+
"""
134+
Compute the bounding box center and dimensions (max - min) for the given points.
135+
"""
136+
if points.shape[0] == 0:
137+
return None, None
138+
min_vals = np.min(points, axis=0)
139+
max_vals = np.max(points, axis=0)
140+
center = (min_vals + max_vals) / 2
141+
dimensions = max_vals - min_vals
142+
return center, dimensions
143+
144+
def create_circle_line_set(center, radius, num_points=50):
145+
"""
146+
Create a LineSet representing a circle (in the X-Y plane) with given center and radius.
147+
"""
148+
theta = np.linspace(0, 2 * np.pi, num_points, endpoint=False)
149+
circle_points = []
150+
for angle in theta:
151+
x = center[0] + radius * np.cos(angle)
152+
y = center[1] + radius * np.sin(angle)
153+
z = center[2]
154+
circle_points.append([x, y, z])
155+
circle_points = np.array(circle_points)
156+
lines = [[i, (i + 1) % num_points] for i in range(num_points)]
157+
line_set = o3d.geometry.LineSet()
158+
line_set.points = o3d.utility.Vector3dVector(circle_points)
159+
line_set.lines = o3d.utility.Vector2iVector(lines)
160+
line_set.colors = o3d.utility.Vector3dVector([[0, 1, 0] for _ in range(len(lines))])
161+
return line_set
162+
163+
def create_ray_line_set(start, end):
164+
"""
165+
Create a LineSet representing a ray from 'start' to 'end' (colored yellow).
166+
"""
167+
points = [start, end]
168+
lines = [[0, 1]]
169+
line_set = o3d.geometry.LineSet()
170+
line_set.points = o3d.utility.Vector3dVector(points)
171+
line_set.lines = o3d.utility.Vector2iVector(lines)
172+
line_set.colors = o3d.utility.Vector3dVector([[1, 1, 0]])
173+
return line_set
174+
175+
176+
def extract_roi_box(lidar_pc, center, half_extents):
177+
lower = center - half_extents
178+
upper = center + half_extents
179+
mask = np.all((lidar_pc >= lower) & (lidar_pc <= upper), axis=1)
180+
return lidar_pc[mask]
181+
182+
def visualize_geometries(geometries, window_name="Open3D", width=800, height=600, point_size=5.0):
183+
"""
184+
Utility to visualize a list of Open3D geometries.
185+
"""
186+
vis = o3d.visualization.Visualizer()
187+
vis.create_window(window_name=window_name, width=width, height=height)
188+
for geom in geometries:
189+
vis.add_geometry(geom)
190+
opt = vis.get_render_option()
191+
opt.point_size = point_size
192+
vis.run()
193+
vis.destroy_window()
194+
195+
# ----- Main Processing -----
196+
197+
def main():
198+
# Load the color image.
199+
idx = 9
200+
201+
img = cv2.imread(f"../data/color{idx}.png")
202+
if img is None:
203+
print("Error: Could not load the color image.")
204+
return
205+
206+
# Show the original YOLO detection results on the image.
207+
model = YOLO("yolov8n.pt")
208+
results = model.predict(img, classes=[0], conf = 0.4) # detect only person (class id: 0)
209+
boxes = results[0].boxes.xywh.tolist() # each box: [center_x, center_y, w, h]
210+
for box in boxes:
211+
cx, cy, w, h = box
212+
top_left = (int(cx - w/2), int(cy - h/2))
213+
bottom_right = (int(cx + w/2), int(cy + h/2))
214+
cv2.rectangle(img, top_left, bottom_right, (255, 0, 0), 2)
215+
cv2.imshow("YOLO Detection", img)
216+
cv2.waitKey(1000)
217+
cv2.destroyAllWindows()
218+
219+
# Load LiDAR point cloud from NPZ (assumed key 'arr_0').
220+
lidar_data = np.load(f"../data/lidar{idx}.npz")
221+
lidar_pc = lidar_data['arr_0'] # (N,3) points in LiDAR coordinates
222+
223+
# ----- Camera Parameters & Transformations -----
224+
K = np.array([
225+
[684.83331299, 0., 573.37109375],
226+
[0., 684.60968018, 363.70092773],
227+
[0., 0., 1.]
228+
])
229+
T_l2c = np.array([
230+
[-0.01909581, -0.9997844, 0.0081547, 0.24521313],
231+
[0.06526397, -0.00938524, -0.9978239, -0.80389025],
232+
[0.9976853, -0.01852205, 0.06542912, -0.6605772],
233+
[0., 0., 0., 1.]
234+
])
235+
T_c2l = np.linalg.inv(T_l2c)
236+
camera_origin_in_lidar = T_c2l[:3, 3]
237+
R_c2l = T_c2l[:3, :3]
238+
239+
# Prepare lists for Open3D debugging visualization.
240+
# debug_all will contain all objects.
241+
# debug_filtered will exclude the full point cloud and the initial ROI.
242+
debug_all = []
243+
debug_filtered = []
244+
245+
# Add the full LiDAR point cloud (gray) only to debug_all.
246+
pcd_full = o3d.geometry.PointCloud()
247+
pcd_full.points = o3d.utility.Vector3dVector(lidar_pc)
248+
pcd_full.paint_uniform_color([0.7, 0.7, 0.7])
249+
debug_all.append(pcd_full)
250+
251+
# For each detected person:
252+
for box in boxes:
253+
start = time.time()
254+
cx, cy, w, h = box
255+
center_u, center_v = cx, cy
256+
ray_dir_cam = backproject_pixel(center_u, center_v, K)
257+
258+
ray_dir_lidar = R_c2l @ ray_dir_cam
259+
ray_dir_lidar /= np.linalg.norm(ray_dir_lidar)
260+
261+
intersection, _, _ = find_human_center_on_ray(
262+
lidar_pc, camera_origin_in_lidar, ray_dir_lidar,
263+
t_min=0.5, t_max=20.0, t_step=0.1,
264+
distance_threshold=0.5, min_points=10, ransac_threshold=0.05
265+
)
266+
267+
if intersection is None:
268+
continue
269+
270+
# Compute physical dimensions based on candidate depth.
271+
d = np.linalg.norm(intersection - camera_origin_in_lidar)
272+
physical_width = (w * d) / K[0, 0]
273+
physical_height = (h * d) / K[1, 1]
274+
depth_margin = physical_width # or a constant like 0.5
275+
half_extents = np.array([1.1*physical_width / 2, 1.1*depth_margin / 2, 1.25*physical_height / 2])
276+
277+
# Extract ROI using a 3D box that matches the 2D bounding box.
278+
roi_points = extract_roi_box(lidar_pc, intersection, half_extents)
279+
if roi_points.shape[0] < 10:
280+
continue
281+
282+
refined_cluster = refine_cluster(roi_points, intersection, eps=0.125, min_samples=10)
283+
284+
refined_cluster = remove_ground_by_min_range(refined_cluster, z_range=0.05)
285+
if refined_cluster is None or refined_cluster.shape[0] == 0:
286+
refined_center = intersection
287+
bbox_dims = np.array([0, 0, 0])
288+
yaw, pitch, roll = 0, 0, 0
289+
else:
290+
pcd_cluster = o3d.geometry.PointCloud()
291+
pcd_cluster.points = o3d.utility.Vector3dVector(refined_cluster)
292+
obb = pcd_cluster.get_oriented_bounding_box()
293+
refined_center = obb.center
294+
bbox_dims = obb.extent
295+
euler_angles = R.from_matrix(obb.R.copy()).as_euler('zyx', degrees=True)
296+
yaw, pitch, roll = euler_angles[0], euler_angles[1], euler_angles[2]
297+
print(f"Detected human - Pose (yaw, pitch, roll): {euler_angles}")
298+
print(f"Bounding box center: {refined_center}, Dimensions: {bbox_dims}")
299+
end = time.time()
300+
print(f"processing time: {end - start}s")
301+
# Project the refined 3D center back to the image.
302+
refined_center_h = np.hstack([refined_center, 1])
303+
cam_point = T_l2c @ refined_center_h
304+
cam_point = cam_point[:3] / cam_point[2]
305+
u_proj = int(round(K[0, 0] * cam_point[0] + K[0, 2]))
306+
v_proj = int(round(K[1, 1] * cam_point[1] + K[1, 2]))
307+
308+
# Draw the bounding box and projected 3D center on the image.
309+
top_left = (int(cx - w/2), int(cy - h/2))
310+
bottom_right = (int(cx + w/2), int(cy + h/2))
311+
cv2.rectangle(img, top_left, bottom_right, (255, 0, 0), 2)
312+
cv2.circle(img, (u_proj, v_proj), radius=8, color=(0, 255, 0), thickness=2)
313+
cv2.putText(img, "3D Center", (u_proj+5, v_proj-5),
314+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
315+
316+
# For debugging in Open3D:
317+
# Add the ROI point cloud (red) only to debug_all.
318+
pcd_roi = o3d.geometry.PointCloud()
319+
pcd_roi.points = o3d.utility.Vector3dVector(roi_points)
320+
pcd_roi.paint_uniform_color([1, 0, 0])
321+
debug_all.append(pcd_roi)
322+
# Add the refined cluster point cloud (blue) to both lists.
323+
pcd_cluster_vis = o3d.geometry.PointCloud()
324+
pcd_cluster_vis.points = o3d.utility.Vector3dVector(refined_cluster)
325+
pcd_cluster_vis.paint_uniform_color([0, 0, 1])
326+
debug_all.append(pcd_cluster_vis)
327+
debug_filtered.append(pcd_cluster_vis)
328+
# Add the refined center marker (green sphere) to both lists.
329+
sphere_center = o3d.geometry.TriangleMesh.create_sphere(radius=0.1)
330+
sphere_center.translate(refined_center)
331+
sphere_center.paint_uniform_color([0, 1, 0])
332+
debug_all.append(sphere_center)
333+
debug_filtered.append(sphere_center)
334+
# Add the ray (yellow) from camera origin to the refined center to both lists.
335+
ray_line = create_ray_line_set(camera_origin_in_lidar, refined_center)
336+
debug_all.append(ray_line)
337+
debug_filtered.append(ray_line)
338+
# Also add the oriented bounding box to the debug visualization.
339+
obb.color = (1, 0, 1) # Magenta
340+
debug_all.append(obb)
341+
debug_filtered.append(obb)
342+
343+
# Show final image with detections.
344+
cv2.imshow("3D Human Centers Projection", img)
345+
cv2.waitKey(0)
346+
cv2.destroyAllWindows()
347+
348+
# Open3D debugging visualizations.
349+
print("Launching Open3D debug visualization with ALL objects...")
350+
visualize_geometries(debug_all, window_name="LiDAR Debug Visualization (All)")
351+
print("Launching Open3D debug visualization without Full PointCloud and ROI...")
352+
visualize_geometries(debug_filtered, window_name="LiDAR Debug Visualization (Filtered)")
353+
354+
if __name__ == '__main__':
355+
main()

requirements.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,9 @@ shapely
77
klampt
88
pyyaml
99
dacite
10-
ultralytics
10+
<<<<<<< Updated upstream
11+
ultralytics
12+
=======
13+
ultralytics
14+
open3d
15+
>>>>>>> Stashed changes

0 commit comments

Comments
 (0)