From d8f2c7006211378de384fc1d42d2d793ecd56509 Mon Sep 17 00:00:00 2001 From: Fetty Date: Mon, 7 Jul 2025 14:16:13 +0200 Subject: [PATCH 1/4] Docker utils added, demo scripts now support visualization --- README.md | 32 +++ demo_docker.py | 506 +++++++++++++++++++++++++++++++++++++++++ demo_docker.sh | 4 + docker/Dockerfile | 36 +++ docker/build | 15 ++ docker/run | 87 +++++++ environment.yml | 4 +- exts/setup.py | 26 ++- utils_infer.py | 3 +- visualization_tools.py | 166 ++++++++++++++ 10 files changed, 868 insertions(+), 11 deletions(-) create mode 100644 demo_docker.py create mode 100644 demo_docker.sh create mode 100644 docker/Dockerfile create mode 100755 docker/build create mode 100755 docker/run create mode 100644 visualization_tools.py diff --git a/README.md b/README.md index 1f72be0..26f3f02 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ ## 🆕 Update Log +- **[2025.07.07]** Now **docker** scripts available! - **[2025.04.23]** We have released the **2rd stage training code**! 🎉 - **[2025.04.11]** We have released the **inference code**! 🎉 @@ -36,6 +37,32 @@ DMD³C introduces a novel framework for **fine-grained depth completion** by dis ## 🚀 Getting Started +### (Docker only) +__Clone repo and set up docker:__ +```sh +git clone https://github.com/fetty31/DMD3C.git # clone repo +cd DMD3C/docker/ +chmod +x build run +./build # build docker image +./run # run docker container +``` +__Once inside docker, set up environment:__ +```sh +## 🐳 Inside the Docker container +# build conda env +cd home/DMD3C/ +conda env create -f environment.yml +conda init && source /root/.bashrc +conda activate bp + +# build cuda extension +cd exts +export CC=/usr/bin/gcc +export CXX=/usr/bin/g++ +python setup.py install +``` +__From this, jump to steps 3-5__ + ### 1. Clone Base Repository ```bash @@ -80,6 +107,11 @@ Run inference: bash demo.sh ``` +_(Or if you are using docker)_ +```bash +bash demo_docker.sh +``` + You will get results like this: ![supp-video 00_00_00-00_00_30](https://github.com/user-attachments/assets/a1412bca-c368-4d19-a081-79eeabaa2901) diff --git a/demo_docker.py b/demo_docker.py new file mode 100644 index 0000000..e2e0542 --- /dev/null +++ b/demo_docker.py @@ -0,0 +1,506 @@ +import open3d as o3d +import numpy as np +import cv2 +import os +import matplotlib.pyplot as plt +import torch +import hydra +from utils_infer import Trainer +from mpl_toolkits.mplot3d import Axes3D +from tqdm import tqdm +from PIL import Image +import time +from visualization_tools import ImageGridViewer, PointCloudImageViewer, PointCloudViewer + +def save_point_cloud_to_image(pcd, image_size=(1600, 1200)): + vis = o3d.visualization.Visualizer() + vis.create_window(visible=False, width=image_size[0], height=image_size[1]) # 设置窗口大小并不显示 + + # 添加点云到可视化器中 + vis.add_geometry(pcd) + + # 获取 ViewControl 对象并设置自定义视角 + view_control = vis.get_view_control() + + # 设置视角参数 + parameters = { + "boundingbox_max" : [ -1.5977719363706235, 11.519330868353832, 84.127326965332031 ], + "boundingbox_min" : [ -56.623178268627761, -50.724836932361825, 4.6948032379150391 ], + "field_of_view" : 60.0, + "front" : [ 0.36788389015943362, 0.28372788418091033, -0.8855280521244856 ], + "lookat" : [ -29.110475102499194, -19.602753032003996, 44.411065101623535 ], + "up" : [ -0.88654778269461509, -0.18027422226025128, -0.42606834403382188 ], + "zoom" : 0.5199999999999998 + } + + # 应用视角设置 + ctr = vis.get_view_control() + ctr.set_lookat(parameters["lookat"]) + ctr.set_front(parameters["front"]) + ctr.set_up(parameters["up"]) + ctr.set_zoom(parameters["zoom"]) + + # 渲染点云为图像 + vis.poll_events() + vis.update_renderer() + + # 获取点云的2D图像 + depth_image = np.asarray(vis.capture_screen_float_buffer(do_render=True)) + vis.destroy_window() + + # 调整尺寸并格式化图像 + depth_image = (depth_image * 255).astype(np.uint8) # 转换为8位图像 + depth_image = cv2.cvtColor(depth_image, cv2.COLOR_RGB2BGR) # 转换为BGR格式以便与OpenCV兼容 + + return depth_image + +def depth_to_point_cloud(depth, K_cam): + if isinstance(K_cam, np.ndarray): + K_cam = torch.from_numpy(K_cam).to(depth.device) + + A, B, H, W = depth.shape + i, j = torch.meshgrid( + torch.arange(0, H, device=depth.device), + torch.arange(0, W, device=depth.device), + indexing='ij' + ) + z = depth[0][0] + x = (j - K_cam[0, 2]) * z / K_cam[0, 0] + y = (i - K_cam[1, 2]) * z / K_cam[1, 1] + y = -y + + points = torch.stack((x, y, z), dim=2).reshape(-1, 3) + return points.detach().cpu().numpy() + +def depth_to_colored_point_cloud(depth, K_cam, bgr_image): + """ + Converts a depth map to a 3D point cloud and assigns RGB colors. + + Args: + depth (torch.Tensor): Shape (1, 1, H, W), depth values. + K_cam (np.ndarray or torch.Tensor): 3x3 intrinsic matrix. + bgr_image (np.ndarray): (H, W, 3) image in BGR format (from OpenCV). + + Returns: + points (np.ndarray): (N, 3) 3D points. + colors (np.ndarray): (N, 3) RGB colors in [0, 1]. + """ + if isinstance(K_cam, np.ndarray): + K_cam = torch.from_numpy(K_cam).to(depth.device) + + _, _, H, W = depth.shape + + # Pixel coordinates + i, j = torch.meshgrid( + torch.arange(0, H, device=depth.device), + torch.arange(0, W, device=depth.device), + indexing='ij' + ) + + z = depth[0, 0] + x = (j - K_cam[0, 2]) * z / K_cam[0, 0] + y = (i - K_cam[1, 2]) * z / K_cam[1, 1] + y = -y # flip if needed + + points = torch.stack((x, y, z), dim=2).reshape(-1, 3).detach().cpu().numpy() + + # Convert BGR to RGB and normalize + rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0 + colors = rgb_image.reshape(-1, 3) + + return points, colors + + +def read_calib_file(filepath): + """Read in a calibration file and parse into a dictionary.""" + data = {} + + with open(filepath, 'r') as f: + for line in f.readlines(): + key, value = line.split(':', 1) + # The only non-float values in these files are dates, which + # we don't care about anyway + try: + data[key] = np.array([float(x) for x in value.split()]) + except ValueError: + pass + + return data + + +def pull_K_cam(calib_path): + filedata = read_calib_file(calib_path) + P_rect_20 = np.reshape(filedata['P_rect_02'], (3, 4)) + K_cam = P_rect_20[0:3, 0:3] + return K_cam + + +def depth_color(val, min_d=0, max_d=120): + """ + print Color(HSV's H value) corresponding to distance(m) + close distance = red , far distance = blue + """ + np.clip(val, 0, max_d, out=val) # max distance is 120m but usually not usual + return (((val - min_d) / (max_d - min_d)) * 120).astype(np.uint8) + + +def in_h_range_points(points, m, n, fov): + """ extract horizontal in-range points """ + return np.logical_and(np.arctan2(n, m) > (-fov[1] * np.pi / 180), + np.arctan2(n, m) < (-fov[0] * np.pi / 180)) + + +def in_v_range_points(points, m, n, fov): + """ extract vertical in-range points """ + return np.logical_and(np.arctan2(n, m) < (fov[1] * np.pi / 180), + np.arctan2(n, m) > (fov[0] * np.pi / 180)) + + +def fov_setting(points, x, y, z, dist, h_fov, v_fov): + """ filter points based on h,v FOV """ + + if h_fov[1] == 180 and h_fov[0] == -180 and v_fov[1] == 2.0 and v_fov[0] == -24.9: + return points + + if h_fov[1] == 180 and h_fov[0] == -180: + return points[in_v_range_points(points, dist, z, v_fov)] + elif v_fov[1] == 2.0 and v_fov[0] == -24.9: + return points[in_h_range_points(points, x, y, h_fov)] + else: + h_points = in_h_range_points(points, x, y, h_fov) + v_points = in_v_range_points(points, dist, z, v_fov) + return points[np.logical_and(h_points, v_points)] + + +def in_range_points(points, size): + """ extract in-range points """ + return np.logical_and(points > 0, points < size) + + +def velo_points_filter(points, v_fov, h_fov): + """ extract points corresponding to FOV setting """ + + # Projecting to 2D + x = points[:, 0] + y = points[:, 1] + z = points[:, 2] + dist = np.sqrt(x ** 2 + y ** 2 + z ** 2) + + if h_fov[0] < -90: + h_fov = (-90,) + h_fov[1:] + if h_fov[1] > 90: + h_fov = h_fov[:1] + (90,) + + x_lim = fov_setting(x, x, y, z, dist, h_fov, v_fov)[:, None] + y_lim = fov_setting(y, x, y, z, dist, h_fov, v_fov)[:, None] + z_lim = fov_setting(z, x, y, z, dist, h_fov, v_fov)[:, None] + + # Stack arrays in sequence horizontally + xyz_ = np.hstack((x_lim, y_lim, z_lim)) + xyz_ = xyz_.T + + # stack (1,n) arrays filled with the number 1 + one_mat = np.full((1, xyz_.shape[1]), 1) + xyz_ = np.concatenate((xyz_, one_mat), axis=0) + + # need dist info for points color + dist_lim = fov_setting(dist, x, y, z, dist, h_fov, v_fov) + color = depth_color(dist_lim, 0, 70) + + return xyz_, color + + +def calib_velo2cam(filepath): + """ + get Rotation(R : 3x3), Translation(T : 3x1) matrix info + using R,T matrix, we can convert velodyne coordinates to camera coordinates + """ + with open(filepath, "r") as f: + file = f.readlines() + + for line in file: + (key, val) = line.split(':', 1) + if key == 'R': + R = np.fromstring(val, sep=' ') + R = R.reshape(3, 3) + if key == 'T': + T = np.fromstring(val, sep=' ') + T = T.reshape(3, 1) + return R, T + + +def calib_cam2cam(filepath, mode): + with open(filepath, "r") as f: + file = f.readlines() + + for line in file: + (key, val) = line.split(':', 1) + if key == ('P_rect_' + mode): + P_ = np.fromstring(val, sep=' ') + P_ = P_.reshape(3, 4) + # erase 4th column ([0,0,0]) + P_ = P_[:3, :3] + return P_ + + +def velo3d_2_camera2d_points(points, v_fov, h_fov, vc_path, cc_path, mode='02', image_shape=None): + """ + Return velodyne 3D points corresponding to camera 2D image and sparse depth map + """ + + # R_vc = Rotation matrix ( velodyne -> camera ) + # T_vc = Translation matrix ( velodyne -> camera ) + R_vc, T_vc = calib_velo2cam(vc_path) + + # P_ = Projection matrix ( camera coordinates 3d points -> image plane 2d points ) + P_ = calib_cam2cam(cc_path, mode) + xyz_v, c_ = velo_points_filter(points, v_fov, h_fov) + + RT_ = np.concatenate((R_vc, T_vc), axis=1) + + # Initialize sparse depth map + if image_shape is None: + raise ValueError( + "Image shape must be provided to generate sparse depth map") + + # Create a depth map with NaN values + depth_map = np.full(image_shape[:2], 0) + + # Convert velodyne coordinates(X_v, Y_v, Z_v) to camera coordinates(X_c, Y_c, Z_c) + for i in range(xyz_v.shape[1]): + xyz_v[:3, i] = np.matmul(RT_, xyz_v[:, i]) + + xyz_c = np.delete(xyz_v, 3, axis=0) + + # Convert camera coordinates(X_c, Y_c, Z_c) to image(pixel) coordinates(x,y) + for i in range(xyz_c.shape[1]): + xyz_c[:, i] = np.matmul(P_, xyz_c[:, i]) + + # Normalize by the third coordinate to get 2D pixel coordinates + xy_i = xyz_c[:2, :] / xyz_c[2, :] + depth_values = xyz_c[2, :] # Z-coordinate (depth) in camera space + + # Filter out points that are out of image bounds + valid_mask = np.logical_and.reduce(( + xy_i[0, :] >= 0, + xy_i[0, :] < image_shape[1], # x coordinate within image width + xy_i[1, :] >= 0, + xy_i[1, :] < image_shape[0], # y coordinate within image height + )) + + valid_points = xy_i[:, valid_mask] + valid_depths = depth_values[valid_mask] + + # Fill the depth map with depth values + for i in range(valid_points.shape[1]): + x = int(valid_points[0, i]) + y = int(valid_points[1, i]) + depth_map[y, x] = valid_depths[i] + + # Return both the projected points and the sparse depth map + return valid_points, c_, depth_map + + +def print_projection_cv2(points, color, image): + """ project converted velodyne points into camera image """ + + hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) + + for i in range(points.shape[1]): + cv2.circle(hsv_image, (np.int32(points[0][i]), np.int32( + points[1][i])), 2, (int(color[i]), 255, 255), -1) + + return cv2.cvtColor(hsv_image, cv2.COLOR_HSV2BGR) + + +def print_projection_plt(points, color, image): + """ project converted velodyne points into camera image """ + + hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) + + for i in range(points.shape[1]): + cv2.circle(hsv_image, (np.int32(points[0][i]), np.int32( + points[1][i])), 2, (int(color[i]), 255, 255), -1) + + return cv2.cvtColor(hsv_image, cv2.COLOR_HSV2RGB) + + +def load_from_bin(bin_path): + obj = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 4) + # ignore reflectivity info + return obj[:, :3] + + +def set_axes_equal(ax): + """使 3D 图的刻度长短一致""" + x_limits = ax.get_xlim3d() + y_limits = ax.get_ylim3d() + z_limits = ax.get_zlim3d() + + # 找到所有坐标的中心和范围 + x_range = abs(x_limits[1] - x_limits[0]) + x_middle = np.mean(x_limits) + y_range = abs(y_limits[1] - y_limits[0]) + y_middle = np.mean(y_limits) + z_range = abs(z_limits[1] - z_limits[0]) + z_middle = np.mean(z_limits) + + # 计算出最大的范围 + plot_radius = 0.5 * max([x_range, y_range, z_range]) + + # 设置每个坐标轴的范围,使其相等 + ax.set_xlim3d([x_middle - plot_radius, x_middle + plot_radius]) + ax.set_ylim3d([y_middle - plot_radius, y_middle + plot_radius]) + ax.set_zlim3d([z_middle - plot_radius, z_middle + plot_radius]) + + +def save_point_cloud_to_ply_open3d(point_cloud, image, filename): + # 创建 open3d 点云对象 + pcd = o3d.geometry.PointCloud() + + # 设置点云的坐标 + pcd.points = o3d.utility.Vector3dVector(point_cloud) + + # 将图像展开为 (N, 3) 形式的颜色数据 + colors = image.reshape(-1, 3) / 255.0 # 归一化颜色到 [0, 1] + + # 设置点云的颜色 + pcd.colors = o3d.utility.Vector3dVector(colors) + + # 保存点云为 .ply 文件 + o3d.io.write_point_cloud(filename, pcd) + return pcd + +def visualize_pointcloud(points: np.ndarray, colors: np.ndarray = None): + """ + Visualizes a 3D point cloud using Open3D. + + Args: + points (np.ndarray): Nx3 array of XYZ coordinates. + colors (np.ndarray, optional): Nx3 array of RGB colors in [0, 1]. Defaults to None. + """ + if points.ndim != 2 or points.shape[1] != 3: + raise ValueError("Points should be an Nx3 NumPy array.") + + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(points) + + if colors is not None: + if colors.shape != points.shape: + raise ValueError("Colors must have the same shape as points.") + pcd.colors = o3d.utility.Vector3dVector(colors) + + o3d.visualization.draw_geometries([pcd]) + + +@hydra.main(config_path='configs', config_name='config', version_base='1.2') +def main(cfg): + with Trainer(cfg) as run: + net = run.net_ema.module.cuda() + net.eval() + # 读取左目图像 + global_path = "/home/kitti_dataset/" + # base = "datas/kitti/raw/2011_09_26/2011_09_26_drive_0002_sync" + base = global_path + "2011_09_26/2011_09_26_drive_0005_sync" + + image_type = 'color' # 'grayscale' or 'color' image + + mode = '00' if image_type == 'grayscale' else '02' + + v2c_filepath = global_path + '2011_09_26/calib_velo_to_cam.txt' + c2c_filepath = global_path + '2011_09_26/calib_cam_to_cam.txt' + image_mean = np.array([90.9950, 96.2278, 94.3213]) + image_std = np.array([79.2382, 80.5267, 82.1483]) + image_height = 352 + image_width = 1216 + + t_file = base + "/image_" + mode + "/timestamps.txt" + with open(t_file, 'r') as f: + N = sum(1 for _ in f) + + # viewer = ImageGridViewer(titles=["Depth", "Raw", "Lidar", "Projected"]) + # viewer_3d = PointCloudImageViewer() + # viewer_pc = PointCloudViewer() + + for i in tqdm(range(N)): + + image = np.array(Image.open(os.path.join( + base, 'image_' + mode + f'/data/0000000{i:03d}.png')).convert('RGB'), dtype=np.uint8) + + if image is None: + break + width, height = image.shape[1], image.shape[0] + + # bin file -> numpy array + velo_points = load_from_bin(os.path.join( + base, f'velodyne_points/data/0000000{i:03d}.bin')) + + image_type = 'color' # 'grayscale' or 'color' image + # image_00 = 'grayscale image' , image_02 = 'color image' + mode = '00' if image_type == 'grayscale' else '02' + + ans, c_, lidar = velo3d_2_camera2d_points(velo_points, v_fov=(-24.9, 2.0), h_fov=(-45, 45), + vc_path=v2c_filepath, cc_path=c2c_filepath, mode=mode, + image_shape=image.shape) + + image_vis = print_projection_plt(points=ans, color=c_, image=image.copy()) + + # depth completion + K_cam = torch.from_numpy(pull_K_cam( + c2c_filepath).astype(np.float32)).cuda() + + tp = image.shape[0] - image_height + lp = (image.shape[1] - image_width) // 2 + image = image[tp:tp + image_height, lp:lp + image_width] + lidar = lidar[tp:tp + image_height, lp:lp + image_width, None] + image_vis = image_vis[tp:tp + image_height, lp:lp + image_width] + K_cam[0, 2] -= lp + K_cam[1, 2] -= tp + + image = (image - image_mean) / image_std + + image_tensor = image.transpose(2, 0, 1).astype(np.float32)[None] + lidar_tensor = lidar.transpose(2, 0, 1).astype(np.float32)[None] + + image_tensor = torch.from_numpy(image_tensor) + lidar_tensor = torch.from_numpy(lidar_tensor) + + output = net(image_tensor.cuda(), None, + lidar_tensor.cuda(), K_cam[None].cuda()) + + if isinstance(output, (list, tuple)): + output = output[-1] + + tensor_output = output + + output = output.squeeze().detach().cpu().numpy() + image = image * image_std + image_mean + + output_max, output_min = output.max(), output.min() + output_norm = (output - output_min) / (output_max - output_min) * 255 + output_norm = output_norm.astype('uint8') + output_color = cv2.applyColorMap(output_norm, cv2.COLORMAP_JET) + + # Transform from depth to 3D + # points_3d = depth_to_point_cloud(tensor_output, K_cam) + points_3d, colors = depth_to_colored_point_cloud(tensor_output, K_cam, image.astype(np.uint8)[:, :, ::-1]) + del tensor_output + + visualize_pointcloud(points_3d, colors) + + # viewer_3d.visualize_for_seconds(points_3d, output_color, 2) + + # viewer_pc.visualize_for_seconds(points_3d, 2, colors) + + # viewer.update([output_color, image.astype(np.uint8)[:, :, ::-1], lidar.astype(np.uint8) * 3, image_vis], + # duration=0.5) # Display for x seconds + + cv2.imwrite(f'outputs/0000000{i:03d}_depth.png', output_color) + + cv2.imwrite(f'outputs/0000000{i:03d}_image.png', image.astype(np.uint8)[:, :, ::-1]) + cv2.imwrite(f'outputs/0000000{i:03d}_lidar.png', lidar.astype(np.uint8) * 3) + cv2.imwrite(f'outputs/0000000{i:03d}_image_vis.png', image_vis) + + +if __name__ == '__main__': + main() diff --git a/demo_docker.sh b/demo_docker.sh new file mode 100644 index 0000000..cd45879 --- /dev/null +++ b/demo_docker.sh @@ -0,0 +1,4 @@ +python demo_docker.py gpus=[0] name=BP_KITTI ++chpt=dmd3c_distillation_depth_anything_v2.pth \ + net=PMP num_workers=1 \ + data=KITTI data.testset.mode=test data.testset.height=352 \ + test_batch_size=1 metric=RMSE ++save=true diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..f055d49 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,36 @@ +# Base image with CUDA and PyTorch support +FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 + +# System setup +RUN apt-get update && apt-get install -y \ + python3.10 python3-pip python3-dev git build-essential cmake \ + libopenblas-dev libomp-dev g++ curl vim wget libgl1 \ + && rm -rf /var/lib/apt/lists/* + +# Install Miniconda +ENV PATH="/root/miniconda3/bin:${PATH}" +ARG PATH="/root/miniconda3/bin:${PATH}" + +# Install Miniconda on x86 or ARM platforms +RUN arch=$(uname -m) && \ + if [ "$arch" = "x86_64" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh"; \ + elif [ "$arch" = "aarch64" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh"; \ + else \ + echo "Unsupported architecture: $arch"; \ + exit 1; \ + fi && \ + wget $MINICONDA_URL -O miniconda.sh && \ + mkdir -p /root/.conda && \ + bash miniconda.sh -b -p /root/miniconda3 && \ + rm -f miniconda.sh + +RUN conda --version + +# Clone DMD3C project +RUN cd /home \ + && git clone https://github.com/fetty31/DMD3C.git + +# Set python alias +RUN ln -s /usr/bin/python3 /usr/bin/python diff --git a/docker/build b/docker/build new file mode 100755 index 0000000..587d0de --- /dev/null +++ b/docker/build @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + +DOCKER_IMG="dmd3c-img" + +if [[ $# -eq 0 ]] ; then + echo "WARNING: No arguments provided, image ${DOCKER_IMG} will be created" +else + DOCKER_IMG=$1 +fi + +PROJECT_ROOT="$(cd "$(dirname "$0")"; pwd)" + +echo "----- Building Docker image from ${PROJECT_ROOT}/Dockerfile " +docker build -t ${DOCKER_IMG} ${PROJECT_ROOT} \ No newline at end of file diff --git a/docker/run b/docker/run new file mode 100755 index 0000000..eaeb0d4 --- /dev/null +++ b/docker/run @@ -0,0 +1,87 @@ +#!/bin/bash + +DOCKER_NAME="dmd3c-container" +DOCKER_IMG="dmd3c-img:latest" + +if [[ $# -eq 0 ]] ; then + echo "WARNING: No arguments provided, container ${DOCKER_NAME} from image ${DOCKER_IMG} will be started" +elif [[ $# -eq 1 ]] ; then + echo "WARNING: Only one argument provided, container $1 from image ${DOCKER_IMG} will be started" + DOCKER_NAME=$1 +else + DOCKER_NAME=$1 + DOCKER_IMG=$2 +fi + +# Variables required for logging as a user with the same id as the user running this script +export LOCAL_USER_ID=`id -u $USER` +export LOCAL_GROUP_ID=`id -g $USER` +export LOCAL_GROUP_NAME=`id -gn $USER` + +PROJECT_ROOT="$(cd "$(dirname "$0")"; cd ..; pwd)" + +# Allow customization for docker args +# For instance, to remove the container when it exits: DOCKER_USER_ARGS='--rm' pal_docker.sh ... +DOCKER_USER_ARGS="$DOCKER_USER_ARGS --env LOCAL_USER_ID --env LOCAL_GROUP_ID --env LOCAL_GROUP_NAME -it" +# DOCKER_USER_ARGS="-it" + +# Variables for forwarding ssh agent into docker container +SSH_AUTH_ARGS="" +if [ ! -z $SSH_AUTH_SOCK ]; then + DOCKER_SSH_AUTH_ARGS="-v $SSH_AUTH_SOCK:/run/host_ssh_auth_sock -e SSH_AUTH_SOCK=/run/host_ssh_auth_sock" +fi + +# Settings required for having nvidia GPU acceleration inside the docker +DOCKER_GPU_ARGS="--env DISPLAY=$DISPLAY --env QT_X11_NO_MITSHM=1 --volume=/tmp/.X11-unix:/tmp/.X11-unix:rw -e NVIDIA_DRIVER_CAPABILITIES=all" + +dpkg -l | grep nvidia-container-toolkit &> /dev/null +HAS_NVIDIA_TOOLKIT=$? + +type nvidia-docker &> /dev/null +HAS_NVIDIA_DOCKER=$? + +if [ $HAS_NVIDIA_TOOLKIT -eq 0 ]; then + docker_version=`docker version --format '{{.Client.Version}}' | cut -d. -f1` + if [ $docker_version -ge 19 ]; then + DOCKER_COMMAND="docker run --gpus all" + else + DOCKER_COMMAND="docker run --runtime=nvidia" + fi +elif [ $HAS_NVIDIA_DOCKER -eq 0 ]; then + DOCKER_COMMAND="nvidia-docker run" +else + echo "Running without nvidia-docker, if you have an NVidia card you may need it"\ + "to have GPU acceleration" + DOCKER_COMMAND="docker run" +fi + +DOCKER_NETWORK_ARGS="--net host" +if [[ "$@" == *"--net "* ]]; then + DOCKER_NETWORK_ARGS="" +fi + +XAUTH=/tmp/.docker.xauth + +xhost +local:root + +if docker ps -a | grep -q ${DOCKER_NAME}; then + echo "WARNING: A container named ${DOCKER_NAME} was already created; it will be started" + docker start -i ${DOCKER_NAME} +else + + $DOCKER_COMMAND \ + $DOCKER_USER_ARGS \ + $DOCKER_GPU_ARGS \ + $DOCKER_SSH_AUTH_ARGS \ + $DOCKER_NETWORK_ARGS \ + --privileged \ + --name ${DOCKER_NAME} \ + --env="XAUTHORITY=$XAUTH" \ + --volume="$HOME/Desktop/kitti_dataset:/home/kitti_dataset" \ + ${DOCKER_IMG} + +fi + +xhost -local:root +echo "Goodbye :)" + diff --git a/environment.yml b/environment.yml index 15bbc19..06c22e0 100644 --- a/environment.yml +++ b/environment.yml @@ -269,4 +269,6 @@ dependencies: - zipp=3.17.0=pyhd8ed1ab_0 - zlib=1.2.13=hd590300_5 - zstd=1.5.2=ha4553b6_0 - + - pip: + - open3d + - imutils diff --git a/exts/setup.py b/exts/setup.py index 18f7453..b8bc5c7 100644 --- a/exts/setup.py +++ b/exts/setup.py @@ -4,15 +4,23 @@ setup( name='BpOps', ext_modules=[ - CUDAExtension('BpOps', - [ - 'bp_cuda.cpp', - 'bp_cuda_kernel.cu', - ], - extra_compile_args={'cxx': ['-g'], - 'nvcc': ['-O3']} - ), + CUDAExtension( + name='BpOps', + sources=['bp_cuda.cpp', 'bp_cuda_kernel.cu'], + extra_compile_args={ + 'cxx': ['-O3', '-std=c++17'], + 'nvcc': [ + '-O3', + '-std=c++17', + '--expt-relaxed-constexpr', + '--expt-extended-lambda', + '-gencode=arch=compute_89,code=sm_89', # You can change based on your GPU + '-ccbin=/usr/bin/g++' # Ensures nvcc uses system g++ not conda-cc + ] + } + ), ], cmdclass={ 'build_ext': BuildExtension - }) + } +) diff --git a/utils_infer.py b/utils_infer.py index f19f783..1a7d00e 100644 --- a/utils_infer.py +++ b/utils_infer.py @@ -102,7 +102,8 @@ def init_net(self): if 'chpt' in self.cfg: self.ddp_log(f'resume CHECKPOINTS') save_path = os.path.join('checkpoints', self.cfg.chpt) - cp = torch.load(os.path.join(save_path, 'result_ema.pth'), map_location=torch.device('cpu')) + # cp = torch.load(os.path.join(save_path, 'result_ema.pth'), map_location=torch.device('cpu')) + cp = torch.load(save_path, map_location=torch.device('cpu')) model.load_state_dict(cp['net'], strict=True) self.best_metric_ema = cp['best_metric_ema'] print("Epoch:", cp["epoch"]) diff --git a/visualization_tools.py b/visualization_tools.py new file mode 100644 index 0000000..ab20c26 --- /dev/null +++ b/visualization_tools.py @@ -0,0 +1,166 @@ +import matplotlib.pyplot as plt +import cv2 +import numpy as np +import open3d as o3d +import open3d.visualization.gui as gui +import open3d.visualization.rendering as rendering +import matplotlib.cm as cm +import time +import threading + +class ImageGridViewer: + def __init__(self, titles=None): + """ + Initializes a persistent 2x2 image grid viewer that preserves image size. + + Args: + titles (list of str): Optional list of 4 titles for the subplots. + """ + self.fig, self.axes = plt.subplots(2, 2, figsize=(10, 8)) + self.titles = titles if titles else [""] * 4 + + self.images = [] + for ax, title in zip(self.axes.flatten(), self.titles): + ax.set_title(title) + ax.axis('off') + ax.set_aspect('equal') # Prevent image distortion + + # Initialize with dummy image to match interface + dummy_img = np.zeros((10, 10, 3), dtype=np.uint8) + im = ax.imshow(dummy_img, interpolation='none') + self.images.append(im) + + plt.subplots_adjust(wspace=0.01, hspace=0.01) + plt.ion() + plt.show() + + def update(self, images, duration=0): + """ + Updates the grid with new images and refreshes the display. + + Args: + images (list of np.ndarray): List of 4 OpenCV (BGR) images. + duration (float): Seconds to pause after displaying (0 = no pause). + """ + if len(images) != 4: + raise ValueError("Exactly 4 images required.") + + for i in range(4): + rgb_img = cv2.cvtColor(images[i], cv2.COLOR_BGR2RGB) + self.images[i].set_data(rgb_img) + self.images[i].set_extent((0, rgb_img.shape[1], rgb_img.shape[0], 0)) # Preserve size + + self.fig.canvas.draw() + self.fig.canvas.flush_events() + if duration > 0: + plt.pause(duration) + +class PointCloudImageViewer: + def __init__(self, window_title="Viewer", width=1280, height=720): + self.app = gui.Application.instance + self.app.initialize() + self.window = self.app.create_window(window_title, width, height) + self.scene_widget = gui.SceneWidget() + self.scene_widget.scene = rendering.Open3DScene(self.window.renderer) + + self.panel = gui.Horiz() + self.window.add_child(self.panel) + + # Placeholder image + dummy_img = o3d.geometry.Image(np.zeros((480, 640, 3), dtype=np.uint8)) + self.image_widget = gui.ImageWidget(dummy_img) + + self.panel.add_child(self.image_widget) + self.panel.add_child(self.scene_widget) + + self.geometry_name = "pointcloud" + + def update(self, pointcloud_np, image_cv2): + # Convert image + image_rgb = cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB) + o3d_image = o3d.geometry.Image(image_rgb) + + # Update image widget + self.image_widget.update_image(o3d_image) + + # Create new point cloud + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(pointcloud_np) + + # Clear old and add new geometry + self.scene_widget.scene.clear_geometry() + self.scene_widget.scene.add_geometry(self.geometry_name, pcd, rendering.MaterialRecord()) + + # Recenter camera + bounds = pcd.get_axis_aligned_bounding_box() + self.scene_widget.setup_camera(60.0, bounds, bounds.get_center()) + + def run(self): + gui.Application.instance.run() + + def run_one_tick(self): + self.app.run_one_tick() + + def visualize_for_seconds(self, pointcloud_np, image_cv2, duration_sec=5): + self.update(pointcloud_np, image_cv2) + start = time.time() + while time.time() - start < duration_sec: + self.app.run_one_tick() + time.sleep(0.01) # to prevent max CPU use + + def run_with_updates(self, data_loader_func, interval_sec=2): + def update_loop(): + while True: + pcd_np, img_cv2 = data_loader_func() + gui.Application.instance.post_to_main_thread( + self.window, lambda: self.update(pcd_np, img_cv2) + ) + time.sleep(interval_sec) + + threading.Thread(target=update_loop, daemon=True).start() + self.app.run() + +class PointCloudViewer: + def __init__(self, window_title="PointCloud Viewer", width=800, height=600): + self.app = gui.Application.instance + self.app.initialize() + self.window = self.app.create_window(window_title, width, height) + self.scene_widget = gui.SceneWidget() + self.scene_widget.scene = rendering.Open3DScene(self.window.renderer) + self.window.add_child(self.scene_widget) + + self.geometry_name = "pointcloud" + + def _color_by_x(self, points_np): + x = points_np[:, 0] + x_norm = (x - x.min()) / (x.max() - x.min() + 1e-8) # Normalize x to [0,1] + colors = cm.viridis(x_norm)[:, :3] # Drop alpha channel + return colors + + def update(self, points_np, colors = None): + # Create and set point cloud geometry + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(points_np) + + # Color by X values + if colors is None: + colors_np = self._color_by_x(points_np) + else: + colors_np = colors + pcd.colors = o3d.utility.Vector3dVector(colors_np) + + self.scene_widget.scene.clear_geometry() + self.scene_widget.scene.add_geometry(self.geometry_name, pcd, rendering.MaterialRecord()) + + bounds = pcd.get_axis_aligned_bounding_box() + self.scene_widget.setup_camera(60.0, bounds, bounds.get_center()) + + def visualize_for_seconds(self, pointcloud_np, duration_sec=2, colors_np=None): + self.update(pointcloud_np, colors_np) + start_time = time.time() + while time.time() - start_time < duration_sec: + self.app.run_one_tick() + time.sleep(0.01) + + def run(self): + self.app.run() From cedfee6a07c9f44d47c4b3769c9d6133a8f251f0 Mon Sep 17 00:00:00 2001 From: Fetty Date: Tue, 8 Jul 2025 13:26:22 +0000 Subject: [PATCH 2/4] Added demo for ONA robot (custom data) --- .gitignore | 3 + demo_docker.py | 20 +- demo_docker.sh | 0 demo_ona.py | 512 +++++++++++++++++++++++++++++++++++++++++ demo_ona.sh | 4 + environment.yml | 1 + exts/.gitignore | 3 + models/.gitignore | 2 + visualization_tools.py | 12 +- 9 files changed, 554 insertions(+), 3 deletions(-) create mode 100644 .gitignore mode change 100644 => 100755 demo_docker.sh create mode 100644 demo_ona.py create mode 100755 demo_ona.sh create mode 100644 exts/.gitignore create mode 100644 models/.gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1677adc --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/* +outputs/* +runs/* \ No newline at end of file diff --git a/demo_docker.py b/demo_docker.py index e2e0542..51e4a2d 100644 --- a/demo_docker.py +++ b/demo_docker.py @@ -390,8 +390,24 @@ def visualize_pointcloud(points: np.ndarray, colors: np.ndarray = None): raise ValueError("Colors must have the same shape as points.") pcd.colors = o3d.utility.Vector3dVector(colors) - o3d.visualization.draw_geometries([pcd]) + # Create a coordinate frame at the origin (0, 0, 0) + axis = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5, origin=[0, 0, 0]) + def set_top_down_view(vis): + ctr = vis.get_view_control() + ctr.set_front([0.0, 0.0, -1.0]) # Camera looks along -Z + ctr.set_up([0.0, 1.0, 0.0]) # Y axis is 'up' in the view + ctr.set_lookat(pcd.get_center()) # Focus on the center of the cloud + ctr.set_zoom(0.5) # Zoom level, adjust as needed + return False # Return False to stop calling repeatedly + + # Wrap inside a key callback so it remains interactive + key_to_callback = {ord("T"): set_top_down_view} + + print("Press 'T' in the viewer to set top-down view.") + o3d.visualization.draw_geometries_with_key_callbacks([pcd, axis], key_to_callback) + + # o3d.visualization.draw_geometries([pcd]) @hydra.main(config_path='configs', config_name='config', version_base='1.2') def main(cfg): @@ -484,7 +500,7 @@ def main(cfg): # Transform from depth to 3D # points_3d = depth_to_point_cloud(tensor_output, K_cam) points_3d, colors = depth_to_colored_point_cloud(tensor_output, K_cam, image.astype(np.uint8)[:, :, ::-1]) - del tensor_output + del tensor_output # free GPU memory visualize_pointcloud(points_3d, colors) diff --git a/demo_docker.sh b/demo_docker.sh old mode 100644 new mode 100755 diff --git a/demo_ona.py b/demo_ona.py new file mode 100644 index 0000000..62a3097 --- /dev/null +++ b/demo_ona.py @@ -0,0 +1,512 @@ +import open3d as o3d +import numpy as np +import cv2 +import os +import matplotlib.pyplot as plt +import torch +import hydra +from utils_infer import Trainer +from mpl_toolkits.mplot3d import Axes3D +from tqdm import tqdm +from PIL import Image +import time +from visualization_tools import ImageGridViewer, PointCloudImageViewer, PointCloudViewer + +from rosbags.rosbag1 import Reader +from rosbags.typesys import Stores, get_typestore + +import struct # to decode pointcloud_2 data + +def depth_to_point_cloud(depth, K_cam): + if isinstance(K_cam, np.ndarray): + K_cam = torch.from_numpy(K_cam).to(depth.device) + + A, B, H, W = depth.shape + i, j = torch.meshgrid( + torch.arange(0, H, device=depth.device), + torch.arange(0, W, device=depth.device), + indexing='ij' + ) + z = depth[0][0] + x = (j - K_cam[0, 2]) * z / K_cam[0, 0] + y = (i - K_cam[1, 2]) * z / K_cam[1, 1] + y = -y + + points = torch.stack((x, y, z), dim=2).reshape(-1, 3) + return points.detach().cpu().numpy() + +def depth_to_colored_point_cloud(depth, K_cam, bgr_image): + """ + Converts a depth map to a 3D point cloud and assigns RGB colors. + + Args: + depth (torch.Tensor): Shape (1, 1, H, W), depth values. + K_cam (np.ndarray or torch.Tensor): 3x3 intrinsic matrix. + bgr_image (np.ndarray): (H, W, 3) image in BGR format (from OpenCV). + + Returns: + points (np.ndarray): (N, 3) 3D points. + colors (np.ndarray): (N, 3) RGB colors in [0, 1]. + """ + if isinstance(K_cam, np.ndarray): + K_cam = torch.from_numpy(K_cam).to(depth.device) + + _, _, H, W = depth.shape + + # Pixel coordinates + i, j = torch.meshgrid( + torch.arange(0, H, device=depth.device), + torch.arange(0, W, device=depth.device), + indexing='ij' + ) + + z = depth[0, 0] + x = (j - K_cam[0, 2]) * z / K_cam[0, 0] + y = (i - K_cam[1, 2]) * z / K_cam[1, 1] + y = -y # flip if needed + + points = torch.stack((x, y, z), dim=2).reshape(-1, 3).detach().cpu().numpy() + + # Convert BGR to RGB and normalize + rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0 + colors = rgb_image.reshape(-1, 3) + + return points, colors + + +def read_calib_file(filepath): + """Read in a calibration file and parse into a dictionary.""" + data = {} + + with open(filepath, 'r') as f: + for line in f.readlines(): + key, value = line.split(':', 1) + # The only non-float values in these files are dates, which + # we don't care about anyway + try: + data[key] = np.array([float(x) for x in value.split()]) + except ValueError: + pass + + return data + + +def depth_color(val, min_d=0, max_d=120): + """ + print Color(HSV's H value) corresponding to distance(m) + close distance = red , far distance = blue + """ + np.clip(val, 0, max_d, out=val) # max distance is 120m but usually not usual + return (((val - min_d) / (max_d - min_d)) * 120).astype(np.uint8) + + +def in_h_range_points(points, m, n, fov): + """ extract horizontal in-range points """ + return np.logical_and(np.arctan2(n, m) > (-fov[1] * np.pi / 180), + np.arctan2(n, m) < (-fov[0] * np.pi / 180)) + + +def in_v_range_points(points, m, n, fov): + """ extract vertical in-range points """ + return np.logical_and(np.arctan2(n, m) < (fov[1] * np.pi / 180), + np.arctan2(n, m) > (fov[0] * np.pi / 180)) + + +def fov_setting(points, x, y, z, dist, h_fov, v_fov): + """ filter points based on h,v FOV """ + + if h_fov[1] == 180 and h_fov[0] == -180 and v_fov[1] == 2.0 and v_fov[0] == -24.9: + return points + + if h_fov[1] == 180 and h_fov[0] == -180: + return points[in_v_range_points(points, dist, z, v_fov)] + elif v_fov[1] == 2.0 and v_fov[0] == -24.9: + return points[in_h_range_points(points, x, y, h_fov)] + else: + h_points = in_h_range_points(points, x, y, h_fov) + v_points = in_v_range_points(points, dist, z, v_fov) + return points[np.logical_and(h_points, v_points)] + + +def velo_points_filter(points, v_fov, h_fov): + """ extract points corresponding to FOV setting """ + + # Projecting to 2D + x = points[:, 0] + y = points[:, 1] + z = points[:, 2] + dist = np.sqrt(x ** 2 + y ** 2 + z ** 2) + + if h_fov[0] < -90: + h_fov = (-90,) + h_fov[1:] + if h_fov[1] > 90: + h_fov = h_fov[:1] + (90,) + + x_lim = fov_setting(x, x, y, z, dist, h_fov, v_fov)[:, None] + y_lim = fov_setting(y, x, y, z, dist, h_fov, v_fov)[:, None] + z_lim = fov_setting(z, x, y, z, dist, h_fov, v_fov)[:, None] + + # Stack arrays in sequence horizontally + xyz_ = np.hstack((x_lim, y_lim, z_lim)) + xyz_ = xyz_.T + + # stack (1,n) arrays filled with the number 1 + one_mat = np.full((1, xyz_.shape[1]), 1) + xyz_ = np.concatenate((xyz_, one_mat), axis=0) + + # need dist info for points color + dist_lim = fov_setting(dist, x, y, z, dist, h_fov, v_fov) + color = depth_color(dist_lim, 0, 70) + + return xyz_, color + + +def calib_velo2cam(): + """ + get Rotation(R : 3x3), Translation(T : 3x1) matrix info + using R,T matrix, we can convert velodyne coordinates to camera coordinates + """ + + R = np.array([-0.1533276, -0.98776117, 0.02860976, + 0.04630053, -0.0361014, -0.99827499, + 0.98709012, -0.15173846, 0.05126921] + ) + R = R.reshape(3, 3) + T = np.array([0.28564802, 1.01355072, -0.9722258]) + T = T.reshape(3, 1) + return R, T + +def calib_cam2cam(): + P_ = np.array([659.29813, 0.0, 760.64067, + 0.0, 662.57773, 543.84931, + 0.0, 0.0, 1.0] + ) + P_ = P_.reshape(3, 3) + d = np.array([-0.193406, 0.026607, 0.001749, -0.000172, 0.0]) + + return P_, d + + +def velo3d_2_camera2d_points(points, v_fov, h_fov, K, D=None, image_shape=None): + """ + Project 3D Velodyne points to 2D image points using camera calibration with distortion correction. + """ + + # Get calibration: rotation & translation from Velodyne to Camera + R_vc, T_vc = calib_velo2cam() # R: (3x3), T: (3x1) + + # Filter points based on FOV + xyz_v, c_ = velo_points_filter(points, v_fov, h_fov) # xyz_v: 4xN (homogeneous) + + if image_shape is None: + raise ValueError("Image shape must be provided to generate sparse depth map") + + # Convert xyz_v (homogeneous) to 3xN + xyz_v = xyz_v[:3, :] + + # Transform points to camera coordinates: X_c = R * X_v + T + xyz_c = R_vc @ xyz_v + T_vc + + # Transpose to Nx3 for OpenCV + xyz_c = xyz_c.T.reshape(-1, 1, 3) + + # Project to 2D with distortion + if D is not None: + img_points, _ = cv2.projectPoints(xyz_c, np.zeros((3,1)), np.zeros((3,1)), K, D) + else: + img_points, _ = cv2.projectPoints(xyz_c, np.zeros((3,1)), np.zeros((3,1)), K, np.array([0,0,0,0,0])) + img_points = img_points.reshape(-1, 2) + + # Extract depth (Z in camera frame) + depth_values = xyz_c[:, 0, 2] + + # Create empty depth map + depth_map = np.full(image_shape[:2], 0, dtype=np.float32) + + # Filter valid points within image bounds + valid_mask = np.logical_and.reduce(( + img_points[:, 0] >= 0, + img_points[:, 0] < image_shape[1], + img_points[:, 1] >= 0, + img_points[:, 1] < image_shape[0], + )) + + valid_img_points = img_points[valid_mask] + valid_depths = depth_values[valid_mask] + + # Populate depth map + for pt, depth in zip(valid_img_points, valid_depths): + x, y = int(pt[0]), int(pt[1]) + depth_map[y, x] = depth + + return valid_img_points.T, c_, depth_map + +def print_projection_plt(points, color, image): + """ project converted velodyne points into camera image """ + + hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) + + for i in range(points.shape[1]): + cv2.circle(hsv_image, (np.int32(points[0][i]), np.int32( + points[1][i])), 2, (int(color[i]), 255, 255), -1) + + return cv2.cvtColor(hsv_image, cv2.COLOR_HSV2RGB) + + +def load_from_bin(bin_path): + obj = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 4) + # ignore reflectivity info + return obj[:, :3] + +def pointcloud2_to_xyz_array(cloud_msg): + """ + Convert sensor_msgs/PointCloud2 to an Nx3 NumPy array. + Assumes XYZ floats. + """ + fmt = 'fff' # just XYZ (each float32) + width = cloud_msg.width + height = cloud_msg.height + point_step = cloud_msg.point_step + row_step = cloud_msg.row_step + data = cloud_msg.data + + points = [] + for i in range(0, len(data), point_step): + x, y, z = struct.unpack_from(fmt, data, offset=i) + points.append((x, y, z)) + + return np.array(points, dtype=np.float32) + +def load_image_from_rosbag(bag_path, N=-1): + typestore = get_typestore(Stores.ROS1_NOETIC) + topic = "/ona2/sensors/flir_camera_front/image_raw" + images = [] + + with Reader(bag_path) as reader: + connections = [x for x in reader.connections if x.topic == topic] + c = 0 + + for connection, timestamp, rawdata in reader.messages(connections=connections): + + msg = typestore.deserialize_ros1(rawdata, connection.msgtype) + + if msg.encoding != "bayer_rggb8": + print(f"Unsupported encoding: {msg.encoding}") + continue + + height = msg.height + width = msg.width + data = np.frombuffer(msg.data, dtype=np.uint8).reshape((height, width)) + + # Debayer using OpenCV: Bayer RGGB -> RGB + rgb_img = cv2.cvtColor(data, cv2.COLOR_BAYER_RG2RGB) + images.append(rgb_img) + + c += 1 + if c > N and N > 0: + break + + if len(images) > 0: + print(f"Loaded images shape: {images[0].shape}, dtype: {images[0].dtype}") + return images + + +def load_pcd_from_rosbag(bag_path, N=-1): + # Create a typestore for the matching ROS release. + typestore = get_typestore(Stores.ROS1_NOETIC) + + # Topic to filter + topic = "/ona2/sensors/pandar_front/cloud" + pcds = [] + + c = 0 + # Create reader instance and open for reading. + with Reader(bag_path) as reader: + connections = [x for x in reader.connections if x.topic == topic] + + for connection, timestamp, rawdata in reader.messages(connections=connections): + + msg = typestore.deserialize_ros1(rawdata, connection.msgtype) + np_points = pointcloud2_to_xyz_array(msg) + pcds.append(np_points) + c += 1 + if c > N and N > 0: + break + + return pcds + + +def visualize_pointcloud(points: np.ndarray, colors: np.ndarray = None): + """ + Visualizes a 3D point cloud using Open3D. + + Args: + points (np.ndarray): Nx3 array of XYZ coordinates. + colors (np.ndarray, optional): Nx3 array of RGB colors in [0, 1]. Defaults to None. + """ + if points.ndim != 2 or points.shape[1] != 3: + raise ValueError("Points should be an Nx3 NumPy array.") + + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(points) + + if colors is not None: + if colors.shape != points.shape: + raise ValueError("Colors must have the same shape as points.") + pcd.colors = o3d.utility.Vector3dVector(colors) + + # Create a coordinate frame at the origin (0, 0, 0) + axis = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5, origin=[0, 0, 0]) + + def set_top_down_view(vis): + ctr = vis.get_view_control() + ctr.set_front([0.0, 0.0, -1.0]) # Camera looks along -Z + ctr.set_up([0.0, 1.0, 0.0]) # Y axis is 'up' in the view + ctr.set_lookat(pcd.get_center()) # Focus on the center of the cloud + ctr.set_zoom(0.5) # Zoom level, adjust as needed + return False # Return False to stop calling repeatedly + + # Wrap inside a key callback so it remains interactive + key_to_callback = {ord("T"): set_top_down_view} + + print("Press 'T' in the viewer to set top-down view.") + o3d.visualization.draw_geometries_with_key_callbacks([pcd, axis], key_to_callback) + + # o3d.visualization.draw_geometries([pcd]) + +@hydra.main(config_path='configs', config_name='config', version_base='1.2') +def main(cfg): + with Trainer(cfg) as run: + net = run.net_ema.module.cuda() + net.eval() + + global_path = "/home/kitti_dataset/2024_11_07_born/" + image_bag_name = "2024-11-07-11-24-29_3.bag" + pcd_bag_name = "2024-11-07-11-24-14_1.bag" + + v2c_filepath = global_path + '2011_09_26/calib_velo_to_cam.txt' + c2c_filepath = global_path + '2011_09_26/calib_cam_to_cam.txt' + image_mean = np.array([90.9950, 96.2278, 94.3213]) + image_std = np.array([79.2382, 80.5267, 82.1483]) + image_height = 352 + image_width = 1216 + + # Get LiDAR data from rosbag + pcd_array = load_pcd_from_rosbag(global_path + "robot/" + pcd_bag_name, 150) + + # Get image data from rosbag + images_array = load_image_from_rosbag(global_path + "camera/" + image_bag_name, 150) + + print(f"Loaded {len(images_array)} images and {len(pcd_array)} pcds.") + + # viewer = ImageGridViewer(titles=["Depth", "Raw", "Lidar", "Projected"]) + # viewer_3d = PointCloudImageViewer() + # viewer_pc = PointCloudViewer() + + N = len(images_array) + i_pcd = 0 + for i in tqdm(range(0,N,2)): # We take 1 pcd for each 2 images (LiDAR: 10Hz, Camera: 20Hz) + + image = images_array[i] + w, h = image.shape[1], image.shape[0] + + # Undistort image + K, D = calib_cam2cam() + + # Get optimal new camera matrix + # new_K, roi = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) + + # # Undistort using new_K + # undistorted_img = cv2.undistort(image, K, D, None, new_K) + # x, y, w, h = roi + # cropped_img = undistorted_img[y:y+h, x:x+w] + + # Resize image to model dimensions + resized = cv2.resize(image, (image_width, image_height), interpolation=cv2.INTER_AREA) + image = resized + + # Correct intrinsics + CALIB_W = 1440 + CALIB_H = 1080 + + sx = image_width / CALIB_W + sy = image_height / CALIB_H + + # new_K[0,0] *= sx + # new_K[1,1] *= sy + # new_K[0,2] *= sx + # new_K[1,2] *= sy + + K[0,0] *= sx + K[1,1] *= sy + K[0,2] *= sx + K[1,2] *= sy + + # Get pointcloud + pcd = pcd_array[i_pcd] + i_pcd += 1 + + # Project 3D points into 2D (in pixels) + ans, c_, lidar = velo3d_2_camera2d_points(pcd, v_fov=(-24.9, 2.0), h_fov=(-45, 45), + K=K, D=D, image_shape=image.shape) + + image_vis = print_projection_plt(points=ans, color=c_, image=image.copy()) + + # Depth completion + # K_cam = torch.from_numpy(new_K.astype(np.float32)).cuda() + K_cam = torch.from_numpy(K.astype(np.float32)).cuda() + + lidar = lidar[:, :, None] + + image = (image - image_mean) / image_std + + image_tensor = image.transpose(2, 0, 1).astype(np.float32)[None] + lidar_tensor = lidar.transpose(2, 0, 1).astype(np.float32)[None] + + image_tensor = torch.from_numpy(image_tensor) + lidar_tensor = torch.from_numpy(lidar_tensor) + + m_start = time.time() + + output = net(image_tensor.cuda(), None, + lidar_tensor.cuda(), K_cam[None].cuda()) + + m_end = time.time() + print(f"Inference time: {(m_end - m_start)*1000.0} ms") + + if isinstance(output, (list, tuple)): + output = output[-1] + + tensor_output = output + + output = output.squeeze().detach().cpu().numpy() + image = image * image_std + image_mean + + output_max, output_min = output.max(), output.min() + output_norm = (output - output_min) / (output_max - output_min) * 255 + output_norm = output_norm.astype('uint8') + output_color = cv2.applyColorMap(output_norm, cv2.COLORMAP_JET) + + # Transform from depth to 3D + # points_3d = depth_to_point_cloud(tensor_output, K_cam) + points_3d, colors = depth_to_colored_point_cloud(tensor_output, K_cam, image.astype(np.uint8)[:, :, ::-1]) + del tensor_output # free GPU memory + + visualize_pointcloud(points_3d, colors) + + # viewer_3d.visualize_for_seconds(points_3d, output_color, 2) + + # viewer_pc.visualize_for_seconds(points_3d, 5, colors) + + # viewer.update([output_color, image.astype(np.uint8)[:, :, ::-1], lidar.astype(np.uint8) * 3, image_vis], + # duration=0.1) # Display for x seconds + + # (Optional) Save outputs + # cv2.imwrite(f'outputs/0000000{i:03d}_depth.png', output_color) + + # cv2.imwrite(f'outputs/0000000{i:03d}_image.png', image.astype(np.uint8)[:, :, ::-1]) + # cv2.imwrite(f'outputs/0000000{i:03d}_lidar.png', lidar.astype(np.uint8) * 3) + # cv2.imwrite(f'outputs/0000000{i:03d}_image_vis.png', image_vis) + +if __name__ == '__main__': + main() diff --git a/demo_ona.sh b/demo_ona.sh new file mode 100755 index 0000000..a82ec61 --- /dev/null +++ b/demo_ona.sh @@ -0,0 +1,4 @@ +python demo_ona.py gpus=[0] name=BP_KITTI ++chpt=dmd3c_distillation_depth_anything_v2.pth \ + net=PMP num_workers=1 \ + data=KITTI data.testset.mode=test data.testset.height=352 \ + test_batch_size=1 metric=RMSE ++save=true diff --git a/environment.yml b/environment.yml index 06c22e0..37d8425 100644 --- a/environment.yml +++ b/environment.yml @@ -272,3 +272,4 @@ dependencies: - pip: - open3d - imutils + - rosbags diff --git a/exts/.gitignore b/exts/.gitignore new file mode 100644 index 0000000..9388d05 --- /dev/null +++ b/exts/.gitignore @@ -0,0 +1,3 @@ +dist/* +build/* +BpOps.egg-info/* \ No newline at end of file diff --git a/models/.gitignore b/models/.gitignore new file mode 100644 index 0000000..f2043a4 --- /dev/null +++ b/models/.gitignore @@ -0,0 +1,2 @@ +# Ignore everything in this directory +__pycache__/* \ No newline at end of file diff --git a/visualization_tools.py b/visualization_tools.py index ab20c26..aa02abf 100644 --- a/visualization_tools.py +++ b/visualization_tools.py @@ -153,7 +153,17 @@ def update(self, points_np, colors = None): self.scene_widget.scene.add_geometry(self.geometry_name, pcd, rendering.MaterialRecord()) bounds = pcd.get_axis_aligned_bounding_box() - self.scene_widget.setup_camera(60.0, bounds, bounds.get_center()) + center = bounds.get_center() + extent = bounds.get_extent() + diameter = np.linalg.norm(extent) + + lookat = pcd.get_center() + front = np.array([0.0, 0.0, 1.0]) + up = np.array([0.0, 1.0, 0.0]) + zoom_distance = 0.2 * np.linalg.norm(pcd.get_max_bound() - pcd.get_min_bound()) + + eye = lookat - front * zoom_distance + self.scene_widget.scene.camera.look_at(lookat, eye, up) def visualize_for_seconds(self, pointcloud_np, duration_sec=2, colors_np=None): self.update(pointcloud_np, colors_np) From 140e6ef1ea616561aa2ae20c64f28e3ec1d46de3 Mon Sep 17 00:00:00 2001 From: Fetty Date: Wed, 9 Jul 2025 12:58:26 +0000 Subject: [PATCH 3/4] Visualization updates --- demo_ona.py | 128 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 80 insertions(+), 48 deletions(-) diff --git a/demo_ona.py b/demo_ona.py index 62a3097..83c335a 100644 --- a/demo_ona.py +++ b/demo_ona.py @@ -155,8 +155,10 @@ def velo_points_filter(points, v_fov, h_fov): xyz_ = np.concatenate((xyz_, one_mat), axis=0) # need dist info for points color - dist_lim = fov_setting(dist, x, y, z, dist, h_fov, v_fov) - color = depth_color(dist_lim, 0, 70) + # dist_lim = fov_setting(dist, x, y, z, dist, h_fov, v_fov) + p3N = xyz_[:3, :] + norms = np.linalg.norm(p3N, axis=0) + color = depth_color(norms, 0, np.max(norms)) return xyz_, color @@ -214,7 +216,7 @@ def velo3d_2_camera2d_points(points, v_fov, h_fov, K, D=None, image_shape=None): if D is not None: img_points, _ = cv2.projectPoints(xyz_c, np.zeros((3,1)), np.zeros((3,1)), K, D) else: - img_points, _ = cv2.projectPoints(xyz_c, np.zeros((3,1)), np.zeros((3,1)), K, np.array([0,0,0,0,0])) + img_points, _ = cv2.projectPoints(xyz_c, np.zeros((3,1)), np.zeros((3,1)), K, None) img_points = img_points.reshape(-1, 2) # Extract depth (Z in camera frame) @@ -277,38 +279,56 @@ def pointcloud2_to_xyz_array(cloud_msg): return np.array(points, dtype=np.float32) -def load_image_from_rosbag(bag_path, N=-1): +def load_image_from_rosbag(bag_path, stamps, N=-1): typestore = get_typestore(Stores.ROS1_NOETIC) topic = "/ona2/sensors/flir_camera_front/image_raw" images = [] + pcd_ids = [] with Reader(bag_path) as reader: connections = [x for x in reader.connections if x.topic == topic] - c = 0 + c = 0 + stamp_i = 0 + start_found = False for connection, timestamp, rawdata in reader.messages(connections=connections): + + now = timestamp*1e-9 - msg = typestore.deserialize_ros1(rawdata, connection.msgtype) - - if msg.encoding != "bayer_rggb8": - print(f"Unsupported encoding: {msg.encoding}") + if now > stamps[min(stamp_i,len(stamps)-1)] and not start_found: + stamp_i += 1 continue + else: + start_found = True - height = msg.height - width = msg.width - data = np.frombuffer(msg.data, dtype=np.uint8).reshape((height, width)) + print(f"now: {now}") + print(f"stamp: {stamps[stamp_i]}") - # Debayer using OpenCV: Bayer RGGB -> RGB - rgb_img = cv2.cvtColor(data, cv2.COLOR_BAYER_RG2RGB) - images.append(rgb_img) + if now >= stamps[stamp_i] and now < stamps[min(stamp_i+1,len(stamps)-1)]: - c += 1 - if c > N and N > 0: - break + msg = typestore.deserialize_ros1(rawdata, connection.msgtype) + + if msg.encoding != "bayer_rggb8": + print(f"Unsupported encoding: {msg.encoding}") + continue + + height = msg.height + width = msg.width + data = np.frombuffer(msg.data, dtype=np.uint8).reshape((height, width)) + + # Debayer using OpenCV: Bayer RGGB -> RGB + rgb_img = cv2.cvtColor(data, cv2.COLOR_BAYER_RG2RGB) + images.append(rgb_img) + pcd_ids.append(stamp_i) + stamp_i += 1 + + c += 1 + if c > N and N > 0: + break if len(images) > 0: print(f"Loaded images shape: {images[0].shape}, dtype: {images[0].dtype}") - return images + return images, pcd_ids def load_pcd_from_rosbag(bag_path, N=-1): @@ -318,6 +338,7 @@ def load_pcd_from_rosbag(bag_path, N=-1): # Topic to filter topic = "/ona2/sensors/pandar_front/cloud" pcds = [] + stamps = [] c = 0 # Create reader instance and open for reading. @@ -329,11 +350,12 @@ def load_pcd_from_rosbag(bag_path, N=-1): msg = typestore.deserialize_ros1(rawdata, connection.msgtype) np_points = pointcloud2_to_xyz_array(msg) pcds.append(np_points) + stamps.append(timestamp*1e-9) c += 1 if c > N and N > 0: break - return pcds + return pcds, stamps def visualize_pointcloud(points: np.ndarray, colors: np.ndarray = None): @@ -384,28 +406,25 @@ def main(cfg): image_bag_name = "2024-11-07-11-24-29_3.bag" pcd_bag_name = "2024-11-07-11-24-14_1.bag" - v2c_filepath = global_path + '2011_09_26/calib_velo_to_cam.txt' - c2c_filepath = global_path + '2011_09_26/calib_cam_to_cam.txt' image_mean = np.array([90.9950, 96.2278, 94.3213]) image_std = np.array([79.2382, 80.5267, 82.1483]) image_height = 352 image_width = 1216 # Get LiDAR data from rosbag - pcd_array = load_pcd_from_rosbag(global_path + "robot/" + pcd_bag_name, 150) + pcd_array, stamps = load_pcd_from_rosbag(global_path + "robot/" + pcd_bag_name) # Get image data from rosbag - images_array = load_image_from_rosbag(global_path + "camera/" + image_bag_name, 150) + images_array, pcd_ids = load_image_from_rosbag(global_path + "camera/" + image_bag_name, stamps) print(f"Loaded {len(images_array)} images and {len(pcd_array)} pcds.") - # viewer = ImageGridViewer(titles=["Depth", "Raw", "Lidar", "Projected"]) + viewer = ImageGridViewer(titles=["Depth", "Raw", "Lidar", "Projected"]) # viewer_3d = PointCloudImageViewer() # viewer_pc = PointCloudViewer() N = len(images_array) - i_pcd = 0 - for i in tqdm(range(0,N,2)): # We take 1 pcd for each 2 images (LiDAR: 10Hz, Camera: 20Hz) + for i in tqdm(range(N)): image = images_array[i] w, h = image.shape[1], image.shape[0] @@ -413,16 +432,26 @@ def main(cfg): # Undistort image K, D = calib_cam2cam() + # h, w = img.shape[:2] + # new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) + + # # Compute undistort maps + # map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_16SC2) + + # # Apply remap + # undistorted_img = cv2.remap(img, map1, map2, interpolation=cv2.INTER_LINEAR) + # Get optimal new camera matrix - # new_K, roi = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) + new_K, roi = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) - # # Undistort using new_K - # undistorted_img = cv2.undistort(image, K, D, None, new_K) - # x, y, w, h = roi - # cropped_img = undistorted_img[y:y+h, x:x+w] + # Undistort using new_K + undistorted_img = cv2.undistort(image, K, D, None, new_K) + x, y, w, h = roi + cropped_img = undistorted_img[y:y+h, x:x+w] # Resize image to model dimensions - resized = cv2.resize(image, (image_width, image_height), interpolation=cv2.INTER_AREA) + # resized = cv2.resize(image, (image_width, image_height), interpolation=cv2.INTER_AREA) + resized = cv2.resize(undistorted_img, (image_width, image_height), interpolation=cv2.INTER_AREA) image = resized # Correct intrinsics @@ -432,29 +461,30 @@ def main(cfg): sx = image_width / CALIB_W sy = image_height / CALIB_H - # new_K[0,0] *= sx - # new_K[1,1] *= sy - # new_K[0,2] *= sx - # new_K[1,2] *= sy + new_K[0,0] *= sx + new_K[1,1] *= sy + new_K[0,2] *= sx + new_K[1,2] *= sy - K[0,0] *= sx - K[1,1] *= sy - K[0,2] *= sx - K[1,2] *= sy + # K[0,0] *= sx + # K[1,1] *= sy + # K[0,2] *= sx + # K[1,2] *= sy # Get pointcloud - pcd = pcd_array[i_pcd] - i_pcd += 1 + pcd = pcd_array[pcd_ids[i]] # Project 3D points into 2D (in pixels) + # ans, c_, lidar = velo3d_2_camera2d_points(pcd, v_fov=(-24.9, 2.0), h_fov=(-45, 45), + # K=K, D=D, image_shape=image.shape) ans, c_, lidar = velo3d_2_camera2d_points(pcd, v_fov=(-24.9, 2.0), h_fov=(-45, 45), - K=K, D=D, image_shape=image.shape) + K=new_K, D=None, image_shape=image.shape) image_vis = print_projection_plt(points=ans, color=c_, image=image.copy()) # Depth completion - # K_cam = torch.from_numpy(new_K.astype(np.float32)).cuda() - K_cam = torch.from_numpy(K.astype(np.float32)).cuda() + K_cam = torch.from_numpy(new_K.astype(np.float32)).cuda() + # K_cam = torch.from_numpy(K.astype(np.float32)).cuda() lidar = lidar[:, :, None] @@ -498,8 +528,10 @@ def main(cfg): # viewer_pc.visualize_for_seconds(points_3d, 5, colors) - # viewer.update([output_color, image.astype(np.uint8)[:, :, ::-1], lidar.astype(np.uint8) * 3, image_vis], - # duration=0.1) # Display for x seconds + lidar = depth_color(lidar, 0, 35) + lidar = cv2.applyColorMap(lidar.astype(np.uint8)*3, cv2.COLORMAP_TURBO) + viewer.update([output_color, image.astype(np.uint8)[:, :, ::-1], lidar, image_vis], + duration=0.1) # Display for x seconds # (Optional) Save outputs # cv2.imwrite(f'outputs/0000000{i:03d}_depth.png', output_color) From 5ba677d1e4b8038d3c2f72e1d6488361a267230d Mon Sep 17 00:00:00 2001 From: Fetty Date: Fri, 11 Jul 2025 13:09:53 +0000 Subject: [PATCH 4/4] Small fixes for better documentation of docker --- README.md | 16 +++++++++++++++- demo.py | 1 - demo_docker.py | 10 +++++----- demo_ona.py | 40 ++++++++++++++++++++++++++++++++++------ docker/Dockerfile | 2 ++ 5 files changed, 56 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 26f3f02..41711ba 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,13 @@ chmod +x build run ./build # build docker image ./run # run docker container ``` + +_Note that inside the `run` script the directory on your PC with the KITTI data is shared with the docker container via:_ +```sh +--volume="$HOME/Desktop/kitti_dataset:/home/kitti_dataset" +``` +_Change this line (80) inside the [run](docker/run) script in order to match with your KITTI dataset parent folder._ + __Once inside docker, set up environment:__ ```sh ## 🐳 Inside the Docker container @@ -92,7 +99,14 @@ Open `demo.py` and go to **line 338**, where you can modify the input sequence p ```python # demo.py (Line 338) -sequence = "/path/to/your/kitti/sequence" +base = "/your/path/to/kitti/sequence" +``` + +_(Or if you are using docker):_ +```python +# demo_docker.py (Line 418) +global_path = "/your/path/to/kitti/" +base = global_path + "/sequence" ``` Download pre-trained weights: diff --git a/demo.py b/demo.py index a9e5fbb..9443cde 100644 --- a/demo.py +++ b/demo.py @@ -335,7 +335,6 @@ def main(cfg): net = run.net_ema.module.cuda() net.eval() # 读取左目图像 - # base = "datas/kitti/raw/2011_09_26/2011_09_26_drive_0002_sync" base = "datas/kitti/raw/2011_09_26/2011_09_26_drive_0048_sync" image_type = 'color' # 'grayscale' or 'color' image diff --git a/demo_docker.py b/demo_docker.py index 51e4a2d..6abd560 100644 --- a/demo_docker.py +++ b/demo_docker.py @@ -67,7 +67,7 @@ def depth_to_point_cloud(depth, K_cam): z = depth[0][0] x = (j - K_cam[0, 2]) * z / K_cam[0, 0] y = (i - K_cam[1, 2]) * z / K_cam[1, 1] - y = -y + # y = -y points = torch.stack((x, y, z), dim=2).reshape(-1, 3) return points.detach().cpu().numpy() @@ -101,6 +101,7 @@ def depth_to_colored_point_cloud(depth, K_cam, bgr_image): x = (j - K_cam[0, 2]) * z / K_cam[0, 0] y = (i - K_cam[1, 2]) * z / K_cam[1, 1] y = -y # flip if needed + x = -x # flip if needed points = torch.stack((x, y, z), dim=2).reshape(-1, 3).detach().cpu().numpy() @@ -416,7 +417,6 @@ def main(cfg): net.eval() # 读取左目图像 global_path = "/home/kitti_dataset/" - # base = "datas/kitti/raw/2011_09_26/2011_09_26_drive_0002_sync" base = global_path + "2011_09_26/2011_09_26_drive_0005_sync" image_type = 'color' # 'grayscale' or 'color' image @@ -434,7 +434,7 @@ def main(cfg): with open(t_file, 'r') as f: N = sum(1 for _ in f) - # viewer = ImageGridViewer(titles=["Depth", "Raw", "Lidar", "Projected"]) + viewer = ImageGridViewer(titles=["Depth", "Raw", "Lidar", "Projected"]) # viewer_3d = PointCloudImageViewer() # viewer_pc = PointCloudViewer() @@ -508,8 +508,8 @@ def main(cfg): # viewer_pc.visualize_for_seconds(points_3d, 2, colors) - # viewer.update([output_color, image.astype(np.uint8)[:, :, ::-1], lidar.astype(np.uint8) * 3, image_vis], - # duration=0.5) # Display for x seconds + viewer.update([output_color, image.astype(np.uint8)[:, :, ::-1], lidar.astype(np.uint8) * 3, image_vis], + duration=0.5) # Display for x seconds cv2.imwrite(f'outputs/0000000{i:03d}_depth.png', output_color) diff --git a/demo_ona.py b/demo_ona.py index 83c335a..e48be4c 100644 --- a/demo_ona.py +++ b/demo_ona.py @@ -64,6 +64,7 @@ def depth_to_colored_point_cloud(depth, K_cam, bgr_image): x = (j - K_cam[0, 2]) * z / K_cam[0, 0] y = (i - K_cam[1, 2]) * z / K_cam[1, 1] y = -y # flip if needed + x = -x # flip if needed points = torch.stack((x, y, z), dim=2).reshape(-1, 3).detach().cpu().numpy() @@ -279,9 +280,22 @@ def pointcloud2_to_xyz_array(cloud_msg): return np.array(points, dtype=np.float32) -def load_image_from_rosbag(bag_path, stamps, N=-1): +def load_image_from_rosbag(bag_path, topic, stamps, N=-1): + """ + Loads each rgb image from a rosbag file taking also into account a simple synchronization + strategy w.r.t to the pcd timestamps. + + Args: + bag_path (str): path to the rosbag file. + topic (str): the topic with image data (sensor_msgs/ImageRaw) + stamps (list(float)): time stamp of each pointcloud/LiDAR frame. + N (int): optional max number of images retrieved + + Returns: + images (list(np.ndarray)): list of BGR images. + pcd_ids (list(int)): index of the closest (in time) pointclouds for the chosen images. + """ typestore = get_typestore(Stores.ROS1_NOETIC) - topic = "/ona2/sensors/flir_camera_front/image_raw" images = [] pcd_ids = [] @@ -331,12 +345,23 @@ def load_image_from_rosbag(bag_path, stamps, N=-1): return images, pcd_ids -def load_pcd_from_rosbag(bag_path, N=-1): +def load_pcd_from_rosbag(bag_path, topic, N=-1): + """ + Loads each pcd from a rosbag file. + + Args: + bag_path (str): path to the rosbag file. + topic (str): the topic with pointcloud data (sensor_msgs/PointCloud2) + N (int): optional max number of pcd retrieved + + Returns: + pcds (list(np.ndarray)): list of pointclouds (pcd). + stamps (list(float)): time stamp of each pointcloud/LiDAR frame. + """ # Create a typestore for the matching ROS release. typestore = get_typestore(Stores.ROS1_NOETIC) # Topic to filter - topic = "/ona2/sensors/pandar_front/cloud" pcds = [] stamps = [] @@ -406,16 +431,19 @@ def main(cfg): image_bag_name = "2024-11-07-11-24-29_3.bag" pcd_bag_name = "2024-11-07-11-24-14_1.bag" + topic_lidar = "/ona2/sensors/pandar_front/cloud" + topic_camera = "/ona2/sensors/flir_camera_front/image_raw" + image_mean = np.array([90.9950, 96.2278, 94.3213]) image_std = np.array([79.2382, 80.5267, 82.1483]) image_height = 352 image_width = 1216 # Get LiDAR data from rosbag - pcd_array, stamps = load_pcd_from_rosbag(global_path + "robot/" + pcd_bag_name) + pcd_array, stamps = load_pcd_from_rosbag(global_path + "robot/" + pcd_bag_name, topic_lidar) # Get image data from rosbag - images_array, pcd_ids = load_image_from_rosbag(global_path + "camera/" + image_bag_name, stamps) + images_array, pcd_ids = load_image_from_rosbag(global_path + "camera/" + image_bag_name, topic_camera, stamps) print(f"Loaded {len(images_array)} images and {len(pcd_array)} pcds.") diff --git a/docker/Dockerfile b/docker/Dockerfile index f055d49..83809ed 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,6 +1,8 @@ # Base image with CUDA and PyTorch support FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 +ARG DEBIAN_FRONTEND=noninteractive + # System setup RUN apt-get update && apt-get install -y \ python3.10 python3-pip python3-dev git build-essential cmake \