I am trying to train a vision-only object detection model with Boreas. But when projecting the boxes from lidar to camera, in many cases they are completely off, like here:

I first thought this is a labeling quality issue, but when just visualizing the lidar point and boxes, everything looks fine. When projecting the point cloud to the image, we can already see that the projection does not work correctly:

I was also thinking this just comes from the fact that lidar and camera are not synchronous. But even samples with <2ms difference between lidar and camera have problems:

My last guess would be that there is a problem with the camera intrinsics (especially because boxes on the left side of the images tend to have more problems). But then I could not explain that the projection works fine for other samples.
from datetime import datetime, timezone
import os
import matplotlib.pyplot as plt
import numpy as np
from pyboreas import BoreasDataset
from pyboreas.utils.utils import get_inverse_tf
import matplotlib.transforms as mtf
DATASET_DIR = "/mnt/storage/external_datasets/boreas"
SPLIT = "boreas-objects-v1"
CAM_TIMESTAMPS = {
1598986441156000, 1598986348941415, 1598986348941415, 1598991985058697, 1598990646649543, 1598989153418153
}
OUTPUT_DIR = "tmp/boreas_official_plots"
def micro_to_datetime(micro):
return datetime.fromtimestamp(micro / 1_000_000, tz=timezone.utc)
def plot(lid, cam):
# BEV
bounds = [-75, 75, -75, 75, -5, 10] # xmin, xmax, ymin, ymax, zmin, zmax
bbs = lid.get_bounding_boxes()
bbs.filter_empty()
bbs.passthrough(bounds)
lid.passthrough(bounds)
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot()
rot = 90
M = mtf.Affine2D().rotate_deg(rot) + plt.gca().transData
ax.scatter(lid.points[:, 0], lid.points[:, 1], s=1, c=lid.points[:, 2], vmin=-5, vmax=10, transform=M)
ax.axis('equal')
bbs.render_2d(ax, transform=M)
plt.savefig(f"{OUTPUT_DIR}/{cam.timestamp_micro}_bev.png")
# Point clound (undistorted) in image
T_enu_camera = cam.pose
T_enu_lidar = lid.pose
T_camera_lidar = np.matmul(get_inverse_tf(T_enu_camera), T_enu_lidar)
lid.remove_motion(lid.body_rate, tref=lid.points[-1, -1])
lid.transform(T_camera_lidar)
lid.passthrough([-75, 75, -20, 20, 2, 40])
uv, colors, mask = lid.project_onto_image(seq.calib.P0)
fig = plt.figure(figsize=(24.48, 20.48), dpi=100)
ax = fig.add_subplot()
ax.imshow(cam.img)
ax.set_xlim(0, 2448)
ax.set_ylim(2048, 0)
ax.scatter(uv[:, 0], uv[:, 1], c=colors, marker=',', s=3, edgecolors='none', alpha=0.7, cmap='jet')
ax.set_axis_off()
plt.savefig(f"{OUTPUT_DIR}/{cam.timestamp_micro}_pc_projected.png")
# Bounding boxes in image
bbs = cam.get_bounding_boxes(seq.labelFiles, seq.labelTimes, seq.labelPoses)
UV = bbs.project(seq.calib.P0)
bbs.visualize(cam.img, seq.calib.P0, checkdims=True)
cam.visualize(show=False, save=f"{OUTPUT_DIR}/{cam.timestamp_micro}_box_projected.png")
# load and prepare dataset
split = [[SPLIT]]
bd = BoreasDataset(DATASET_DIR, split=split, labelFolder="labels_detection")
assert len(bd.sequences) == 1
seq = bd.sequences[0]
seq.filter_frames_gt() # removes lidar frames with no annotations (every 2.)
seq.synchronize_frames('lidar') # camera and lidar at same index are now the closest
os.makedirs(OUTPUT_DIR, exist_ok=True)
# get all camera timestamps
imgs_dir = os.path.join(DATASET_DIR, SPLIT, "camera")
all_img_timestamps = sorted(int(os.path.splitext(f)[0]) for f in os.listdir(imgs_dir) if f.endswith('.png'))
# get corresponding lidar and camera indices
cam_ts_to_idx = dict()
for frame_idx, cam_frame in enumerate(seq.camera_frames):
if cam_frame.timestamp_micro in CAM_TIMESTAMPS:
cam_ts_to_idx[cam_frame.timestamp_micro] = frame_idx
# get overview and plot
for cam_ts, idx in cam_ts_to_idx.items():
print("-" * 100)
cam_frame = seq.get_camera(idx)
assert cam_frame.timestamp_micro == cam_ts
lidar_frame = seq.get_lidar(idx)
lidar_ts = lidar_frame.timestamp_micro
print(f"Camera ts: {cam_ts} - {micro_to_datetime(cam_ts)}")
print(f"Lidar ts: {lidar_ts} - {micro_to_datetime(lidar_ts)}")
print(f"Diff Cam-Lidar: {abs(lidar_ts - cam_ts)/1000} ms")
img_idx = all_img_timestamps.index(cam_ts)
prev_img_ts = all_img_timestamps[img_idx - 1]
next_img_ts = all_img_timestamps[img_idx + 1]
print(f"Diff Prev Img: {abs(prev_img_ts - cam_ts)/1000} ms - Diff Next Img: {abs(next_img_ts - cam_ts)/1000} ms")
plot(lidar_frame, cam_frame)
Hi, thanks for providing this dataset and devkit!
I am trying to train a vision-only object detection model with Boreas. But when projecting the boxes from lidar to camera, in many cases they are completely off, like here:

I first thought this is a labeling quality issue, but when just visualizing the lidar point and boxes, everything looks fine. When projecting the point cloud to the image, we can already see that the projection does not work correctly:

I was also thinking this just comes from the fact that lidar and camera are not synchronous. But even samples with <2ms difference between lidar and camera have problems:

My last guess would be that there is a problem with the camera intrinsics (especially because boxes on the left side of the images tend to have more problems). But then I could not explain that the projection works fine for other samples.
My code (based on the projection and plotting code from the tutorial):