From d8f2c7006211378de384fc1d42d2d793ecd56509 Mon Sep 17 00:00:00 2001
From: Fetty <fetty3113@gmail.com>
Date: Mon, 7 Jul 2025 14:16:13 +0200
Subject: [PATCH 1/4] Docker utils added, demo scripts now support
 visualization

---
 README.md              |  32 +++
 demo_docker.py         | 506 +++++++++++++++++++++++++++++++++++++++++
 demo_docker.sh         |   4 +
 docker/Dockerfile      |  36 +++
 docker/build           |  15 ++
 docker/run             |  87 +++++++
 environment.yml        |   4 +-
 exts/setup.py          |  26 ++-
 utils_infer.py         |   3 +-
 visualization_tools.py | 166 ++++++++++++++
 10 files changed, 868 insertions(+), 11 deletions(-)
 create mode 100644 demo_docker.py
 create mode 100644 demo_docker.sh
 create mode 100644 docker/Dockerfile
 create mode 100755 docker/build
 create mode 100755 docker/run
 create mode 100644 visualization_tools.py

diff --git a/README.md b/README.md
index 1f72be0..26f3f02 100644
--- a/README.md
+++ b/README.md
@@ -8,6 +8,7 @@
 
 ## 🆕 Update Log
 
+- **[2025.07.07]** Now **docker** scripts available!  
 - **[2025.04.23]** We have released the **2rd stage training code**! 🎉  
 - **[2025.04.11]** We have released the **inference code**! 🎉  
 
@@ -36,6 +37,32 @@ DMD³C introduces a novel framework for **fine-grained depth completion** by dis
 
 ## 🚀 Getting Started
 
+### (Docker only)
+__Clone repo and set up docker:__
+```sh
+git clone https://github.com/fetty31/DMD3C.git # clone repo
+cd DMD3C/docker/
+chmod +x build run
+./build # build docker image
+./run   # run docker container
+```
+__Once inside docker, set up environment:__
+```sh
+## 🐳 Inside the Docker container
+# build conda env
+cd home/DMD3C/
+conda env create -f environment.yml 
+conda init && source /root/.bashrc
+conda activate bp
+
+# build cuda extension
+cd exts
+export CC=/usr/bin/gcc
+export CXX=/usr/bin/g++
+python setup.py install 
+```
+__From this, jump to steps 3-5__
+
 ### 1. Clone Base Repository
 
 ```bash
@@ -80,6 +107,11 @@ Run inference:
 bash demo.sh
 ```
 
+_(Or if you are using docker)_
+```bash
+bash demo_docker.sh
+```
+
 You will get results like this:
 
 ![supp-video 00_00_00-00_00_30](https://github.com/user-attachments/assets/a1412bca-c368-4d19-a081-79eeabaa2901)
diff --git a/demo_docker.py b/demo_docker.py
new file mode 100644
index 0000000..e2e0542
--- /dev/null
+++ b/demo_docker.py
@@ -0,0 +1,506 @@
+import open3d as o3d
+import numpy as np
+import cv2
+import os
+import matplotlib.pyplot as plt
+import torch
+import hydra
+from utils_infer import Trainer
+from mpl_toolkits.mplot3d import Axes3D
+from tqdm import tqdm
+from PIL import Image
+import time
+from visualization_tools import ImageGridViewer, PointCloudImageViewer, PointCloudViewer
+
+def save_point_cloud_to_image(pcd, image_size=(1600, 1200)):
+    vis = o3d.visualization.Visualizer()
+    vis.create_window(visible=False, width=image_size[0], height=image_size[1])  # 设置窗口大小并不显示
+
+    # 添加点云到可视化器中
+    vis.add_geometry(pcd)
+
+    # 获取 ViewControl 对象并设置自定义视角
+    view_control = vis.get_view_control()
+
+    # 设置视角参数
+    parameters = {
+			"boundingbox_max" : [ -1.5977719363706235, 11.519330868353832, 84.127326965332031 ],
+			"boundingbox_min" : [ -56.623178268627761, -50.724836932361825, 4.6948032379150391 ],
+			"field_of_view" : 60.0,
+			"front" : [ 0.36788389015943362, 0.28372788418091033, -0.8855280521244856 ],
+			"lookat" : [ -29.110475102499194, -19.602753032003996, 44.411065101623535 ],
+			"up" : [ -0.88654778269461509, -0.18027422226025128, -0.42606834403382188 ],
+			"zoom" : 0.5199999999999998
+		}
+
+    # 应用视角设置
+    ctr = vis.get_view_control()
+    ctr.set_lookat(parameters["lookat"])
+    ctr.set_front(parameters["front"])
+    ctr.set_up(parameters["up"])
+    ctr.set_zoom(parameters["zoom"])
+
+    # 渲染点云为图像
+    vis.poll_events()
+    vis.update_renderer()
+
+    # 获取点云的2D图像
+    depth_image = np.asarray(vis.capture_screen_float_buffer(do_render=True))
+    vis.destroy_window()
+
+    # 调整尺寸并格式化图像
+    depth_image = (depth_image * 255).astype(np.uint8)  # 转换为8位图像
+    depth_image = cv2.cvtColor(depth_image, cv2.COLOR_RGB2BGR)  # 转换为BGR格式以便与OpenCV兼容
+
+    return depth_image
+
+def depth_to_point_cloud(depth, K_cam):
+    if isinstance(K_cam, np.ndarray):
+        K_cam = torch.from_numpy(K_cam).to(depth.device)
+
+    A, B, H, W = depth.shape
+    i, j = torch.meshgrid(
+        torch.arange(0, H, device=depth.device),
+        torch.arange(0, W, device=depth.device),
+        indexing='ij'
+    )
+    z = depth[0][0]
+    x = (j - K_cam[0, 2]) * z / K_cam[0, 0]
+    y = (i - K_cam[1, 2]) * z / K_cam[1, 1]
+    y = -y
+
+    points = torch.stack((x, y, z), dim=2).reshape(-1, 3)
+    return points.detach().cpu().numpy()
+
+def depth_to_colored_point_cloud(depth, K_cam, bgr_image):
+    """
+    Converts a depth map to a 3D point cloud and assigns RGB colors.
+
+    Args:
+        depth (torch.Tensor): Shape (1, 1, H, W), depth values.
+        K_cam (np.ndarray or torch.Tensor): 3x3 intrinsic matrix.
+        bgr_image (np.ndarray): (H, W, 3) image in BGR format (from OpenCV).
+
+    Returns:
+        points (np.ndarray): (N, 3) 3D points.
+        colors (np.ndarray): (N, 3) RGB colors in [0, 1].
+    """
+    if isinstance(K_cam, np.ndarray):
+        K_cam = torch.from_numpy(K_cam).to(depth.device)
+
+    _, _, H, W = depth.shape
+
+    # Pixel coordinates
+    i, j = torch.meshgrid(
+        torch.arange(0, H, device=depth.device),
+        torch.arange(0, W, device=depth.device),
+        indexing='ij'
+    )
+
+    z = depth[0, 0]
+    x = (j - K_cam[0, 2]) * z / K_cam[0, 0]
+    y = (i - K_cam[1, 2]) * z / K_cam[1, 1]
+    y = -y  # flip if needed
+
+    points = torch.stack((x, y, z), dim=2).reshape(-1, 3).detach().cpu().numpy()
+
+    # Convert BGR to RGB and normalize
+    rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
+    colors = rgb_image.reshape(-1, 3)
+
+    return points, colors
+
+
+def read_calib_file(filepath):
+    """Read in a calibration file and parse into a dictionary."""
+    data = {}
+
+    with open(filepath, 'r') as f:
+        for line in f.readlines():
+            key, value = line.split(':', 1)
+            # The only non-float values in these files are dates, which
+            # we don't care about anyway
+            try:
+                data[key] = np.array([float(x) for x in value.split()])
+            except ValueError:
+                pass
+
+    return data
+
+
+def pull_K_cam(calib_path):
+    filedata = read_calib_file(calib_path)
+    P_rect_20 = np.reshape(filedata['P_rect_02'], (3, 4))
+    K_cam = P_rect_20[0:3, 0:3]
+    return K_cam
+
+
+def depth_color(val, min_d=0, max_d=120):
+    """ 
+    print Color(HSV's H value) corresponding to distance(m) 
+    close distance = red , far distance = blue
+    """
+    np.clip(val, 0, max_d, out=val)  # max distance is 120m but usually not usual
+    return (((val - min_d) / (max_d - min_d)) * 120).astype(np.uint8)
+
+
+def in_h_range_points(points, m, n, fov):
+    """ extract horizontal in-range points """
+    return np.logical_and(np.arctan2(n, m) > (-fov[1] * np.pi / 180),
+                          np.arctan2(n, m) < (-fov[0] * np.pi / 180))
+
+
+def in_v_range_points(points, m, n, fov):
+    """ extract vertical in-range points """
+    return np.logical_and(np.arctan2(n, m) < (fov[1] * np.pi / 180),
+                          np.arctan2(n, m) > (fov[0] * np.pi / 180))
+
+
+def fov_setting(points, x, y, z, dist, h_fov, v_fov):
+    """ filter points based on h,v FOV  """
+
+    if h_fov[1] == 180 and h_fov[0] == -180 and v_fov[1] == 2.0 and v_fov[0] == -24.9:
+        return points
+
+    if h_fov[1] == 180 and h_fov[0] == -180:
+        return points[in_v_range_points(points, dist, z, v_fov)]
+    elif v_fov[1] == 2.0 and v_fov[0] == -24.9:
+        return points[in_h_range_points(points, x, y, h_fov)]
+    else:
+        h_points = in_h_range_points(points, x, y, h_fov)
+        v_points = in_v_range_points(points, dist, z, v_fov)
+        return points[np.logical_and(h_points, v_points)]
+
+
+def in_range_points(points, size):
+    """ extract in-range points """
+    return np.logical_and(points > 0, points < size)
+
+
+def velo_points_filter(points, v_fov, h_fov):
+    """ extract points corresponding to FOV setting """
+
+    # Projecting to 2D
+    x = points[:, 0]
+    y = points[:, 1]
+    z = points[:, 2]
+    dist = np.sqrt(x ** 2 + y ** 2 + z ** 2)
+
+    if h_fov[0] < -90:
+        h_fov = (-90,) + h_fov[1:]
+    if h_fov[1] > 90:
+        h_fov = h_fov[:1] + (90,)
+
+    x_lim = fov_setting(x, x, y, z, dist, h_fov, v_fov)[:, None]
+    y_lim = fov_setting(y, x, y, z, dist, h_fov, v_fov)[:, None]
+    z_lim = fov_setting(z, x, y, z, dist, h_fov, v_fov)[:, None]
+
+    # Stack arrays in sequence horizontally
+    xyz_ = np.hstack((x_lim, y_lim, z_lim))
+    xyz_ = xyz_.T
+
+    # stack (1,n) arrays filled with the number 1
+    one_mat = np.full((1, xyz_.shape[1]), 1)
+    xyz_ = np.concatenate((xyz_, one_mat), axis=0)
+
+    # need dist info for points color
+    dist_lim = fov_setting(dist, x, y, z, dist, h_fov, v_fov)
+    color = depth_color(dist_lim, 0, 70)
+
+    return xyz_, color
+
+
+def calib_velo2cam(filepath):
+    """ 
+    get Rotation(R : 3x3), Translation(T : 3x1) matrix info 
+    using R,T matrix, we can convert velodyne coordinates to camera coordinates
+    """
+    with open(filepath, "r") as f:
+        file = f.readlines()
+
+        for line in file:
+            (key, val) = line.split(':', 1)
+            if key == 'R':
+                R = np.fromstring(val, sep=' ')
+                R = R.reshape(3, 3)
+            if key == 'T':
+                T = np.fromstring(val, sep=' ')
+                T = T.reshape(3, 1)
+    return R, T
+
+
+def calib_cam2cam(filepath, mode):
+    with open(filepath, "r") as f:
+        file = f.readlines()
+
+        for line in file:
+            (key, val) = line.split(':', 1)
+            if key == ('P_rect_' + mode):
+                P_ = np.fromstring(val, sep=' ')
+                P_ = P_.reshape(3, 4)
+                # erase 4th column ([0,0,0])
+                P_ = P_[:3, :3]
+    return P_
+
+
+def velo3d_2_camera2d_points(points, v_fov, h_fov, vc_path, cc_path, mode='02', image_shape=None):
+    """ 
+    Return velodyne 3D points corresponding to camera 2D image and sparse depth map
+    """
+
+    # R_vc = Rotation matrix ( velodyne -> camera )
+    # T_vc = Translation matrix ( velodyne -> camera )
+    R_vc, T_vc = calib_velo2cam(vc_path)
+
+    # P_ = Projection matrix ( camera coordinates 3d points -> image plane 2d points )
+    P_ = calib_cam2cam(cc_path, mode)
+    xyz_v, c_ = velo_points_filter(points, v_fov, h_fov)
+
+    RT_ = np.concatenate((R_vc, T_vc), axis=1)
+
+    # Initialize sparse depth map
+    if image_shape is None:
+        raise ValueError(
+            "Image shape must be provided to generate sparse depth map")
+
+    # Create a depth map with NaN values
+    depth_map = np.full(image_shape[:2], 0)
+
+    # Convert velodyne coordinates(X_v, Y_v, Z_v) to camera coordinates(X_c, Y_c, Z_c)
+    for i in range(xyz_v.shape[1]):
+        xyz_v[:3, i] = np.matmul(RT_, xyz_v[:, i])
+
+    xyz_c = np.delete(xyz_v, 3, axis=0)
+
+    # Convert camera coordinates(X_c, Y_c, Z_c) to image(pixel) coordinates(x,y)
+    for i in range(xyz_c.shape[1]):
+        xyz_c[:, i] = np.matmul(P_, xyz_c[:, i])
+
+    # Normalize by the third coordinate to get 2D pixel coordinates
+    xy_i = xyz_c[:2, :] / xyz_c[2, :]
+    depth_values = xyz_c[2, :]  # Z-coordinate (depth) in camera space
+
+    # Filter out points that are out of image bounds
+    valid_mask = np.logical_and.reduce((
+        xy_i[0, :] >= 0,
+        xy_i[0, :] < image_shape[1],  # x coordinate within image width
+        xy_i[1, :] >= 0,
+        xy_i[1, :] < image_shape[0],  # y coordinate within image height
+    ))
+
+    valid_points = xy_i[:, valid_mask]
+    valid_depths = depth_values[valid_mask]
+
+    # Fill the depth map with depth values
+    for i in range(valid_points.shape[1]):
+        x = int(valid_points[0, i])
+        y = int(valid_points[1, i])
+        depth_map[y, x] = valid_depths[i]
+
+    # Return both the projected points and the sparse depth map
+    return valid_points, c_, depth_map
+
+
+def print_projection_cv2(points, color, image):
+    """ project converted velodyne points into camera image """
+
+    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
+
+    for i in range(points.shape[1]):
+        cv2.circle(hsv_image, (np.int32(points[0][i]), np.int32(
+            points[1][i])), 2, (int(color[i]), 255, 255), -1)
+
+    return cv2.cvtColor(hsv_image, cv2.COLOR_HSV2BGR)
+
+
+def print_projection_plt(points, color, image):
+    """ project converted velodyne points into camera image """
+
+    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
+
+    for i in range(points.shape[1]):
+        cv2.circle(hsv_image, (np.int32(points[0][i]), np.int32(
+            points[1][i])), 2, (int(color[i]), 255, 255), -1)
+
+    return cv2.cvtColor(hsv_image, cv2.COLOR_HSV2RGB)
+
+
+def load_from_bin(bin_path):
+    obj = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 4)
+    # ignore reflectivity info
+    return obj[:, :3]
+
+
+def set_axes_equal(ax):
+    """使 3D 图的刻度长短一致"""
+    x_limits = ax.get_xlim3d()
+    y_limits = ax.get_ylim3d()
+    z_limits = ax.get_zlim3d()
+
+    # 找到所有坐标的中心和范围
+    x_range = abs(x_limits[1] - x_limits[0])
+    x_middle = np.mean(x_limits)
+    y_range = abs(y_limits[1] - y_limits[0])
+    y_middle = np.mean(y_limits)
+    z_range = abs(z_limits[1] - z_limits[0])
+    z_middle = np.mean(z_limits)
+
+    # 计算出最大的范围
+    plot_radius = 0.5 * max([x_range, y_range, z_range])
+
+    # 设置每个坐标轴的范围，使其相等
+    ax.set_xlim3d([x_middle - plot_radius, x_middle + plot_radius])
+    ax.set_ylim3d([y_middle - plot_radius, y_middle + plot_radius])
+    ax.set_zlim3d([z_middle - plot_radius, z_middle + plot_radius])
+
+
+def save_point_cloud_to_ply_open3d(point_cloud, image, filename):
+    # 创建 open3d 点云对象
+    pcd = o3d.geometry.PointCloud()
+
+    # 设置点云的坐标
+    pcd.points = o3d.utility.Vector3dVector(point_cloud)
+
+    # 将图像展开为 (N, 3) 形式的颜色数据
+    colors = image.reshape(-1, 3) / 255.0  # 归一化颜色到 [0, 1]
+
+    # 设置点云的颜色
+    pcd.colors = o3d.utility.Vector3dVector(colors)
+
+    # 保存点云为 .ply 文件
+    o3d.io.write_point_cloud(filename, pcd)
+    return pcd
+
+def visualize_pointcloud(points: np.ndarray, colors: np.ndarray = None):
+    """
+    Visualizes a 3D point cloud using Open3D.
+
+    Args:
+        points (np.ndarray): Nx3 array of XYZ coordinates.
+        colors (np.ndarray, optional): Nx3 array of RGB colors in [0, 1]. Defaults to None.
+    """
+    if points.ndim != 2 or points.shape[1] != 3:
+        raise ValueError("Points should be an Nx3 NumPy array.")
+    
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(points)
+
+    if colors is not None:
+        if colors.shape != points.shape:
+            raise ValueError("Colors must have the same shape as points.")
+        pcd.colors = o3d.utility.Vector3dVector(colors)
+
+    o3d.visualization.draw_geometries([pcd])
+
+
+@hydra.main(config_path='configs', config_name='config', version_base='1.2')
+def main(cfg):
+    with Trainer(cfg) as run:
+        net = run.net_ema.module.cuda()
+        net.eval()
+        # 读取左目图像
+        global_path = "/home/kitti_dataset/"
+        # base = "datas/kitti/raw/2011_09_26/2011_09_26_drive_0002_sync"
+        base = global_path + "2011_09_26/2011_09_26_drive_0005_sync"
+
+        image_type = 'color'  # 'grayscale' or 'color' image
+
+        mode = '00' if image_type == 'grayscale' else '02'
+
+        v2c_filepath = global_path + '2011_09_26/calib_velo_to_cam.txt'
+        c2c_filepath = global_path + '2011_09_26/calib_cam_to_cam.txt'
+        image_mean = np.array([90.9950, 96.2278, 94.3213])
+        image_std = np.array([79.2382, 80.5267, 82.1483])
+        image_height = 352
+        image_width = 1216
+
+        t_file = base + "/image_" + mode + "/timestamps.txt"
+        with open(t_file, 'r') as f:
+            N = sum(1 for _ in f)
+
+        # viewer = ImageGridViewer(titles=["Depth", "Raw", "Lidar", "Projected"])
+        # viewer_3d = PointCloudImageViewer()
+        # viewer_pc = PointCloudViewer()
+
+        for i in tqdm(range(N)):
+            
+            image = np.array(Image.open(os.path.join(
+                base, 'image_' + mode + f'/data/0000000{i:03d}.png')).convert('RGB'), dtype=np.uint8)
+            
+            if image is None:
+                break
+            width, height = image.shape[1], image.shape[0]
+
+            # bin file -> numpy array
+            velo_points = load_from_bin(os.path.join(
+                base, f'velodyne_points/data/0000000{i:03d}.bin'))
+
+            image_type = 'color'  # 'grayscale' or 'color' image
+            # image_00 = 'grayscale image' , image_02 = 'color image'
+            mode = '00' if image_type == 'grayscale' else '02'
+
+            ans, c_, lidar = velo3d_2_camera2d_points(velo_points, v_fov=(-24.9, 2.0), h_fov=(-45, 45),
+                                                      vc_path=v2c_filepath, cc_path=c2c_filepath, mode=mode,
+                                                      image_shape=image.shape)
+
+            image_vis = print_projection_plt(points=ans, color=c_, image=image.copy())
+
+            # depth completion
+            K_cam = torch.from_numpy(pull_K_cam(
+                c2c_filepath).astype(np.float32)).cuda()
+            
+            tp = image.shape[0] - image_height
+            lp = (image.shape[1] - image_width) // 2
+            image = image[tp:tp + image_height, lp:lp + image_width]
+            lidar = lidar[tp:tp + image_height, lp:lp + image_width, None]
+            image_vis = image_vis[tp:tp + image_height, lp:lp + image_width]
+            K_cam[0, 2] -= lp
+            K_cam[1, 2] -= tp
+
+            image = (image - image_mean) / image_std
+
+            image_tensor = image.transpose(2, 0, 1).astype(np.float32)[None]
+            lidar_tensor = lidar.transpose(2, 0, 1).astype(np.float32)[None]
+
+            image_tensor = torch.from_numpy(image_tensor)
+            lidar_tensor = torch.from_numpy(lidar_tensor)
+
+            output = net(image_tensor.cuda(), None,
+                         lidar_tensor.cuda(), K_cam[None].cuda())
+
+            if isinstance(output, (list, tuple)):
+                output = output[-1]
+
+            tensor_output = output
+            
+            output = output.squeeze().detach().cpu().numpy()
+            image = image * image_std + image_mean
+            
+            output_max, output_min = output.max(), output.min()
+            output_norm = (output - output_min) / (output_max - output_min) * 255
+            output_norm = output_norm.astype('uint8')
+            output_color = cv2.applyColorMap(output_norm, cv2.COLORMAP_JET)
+
+            # Transform from depth to 3D
+            # points_3d = depth_to_point_cloud(tensor_output, K_cam)
+            points_3d, colors = depth_to_colored_point_cloud(tensor_output, K_cam, image.astype(np.uint8)[:, :, ::-1])
+            del tensor_output
+
+            visualize_pointcloud(points_3d, colors)
+
+            # viewer_3d.visualize_for_seconds(points_3d, output_color, 2)
+
+            # viewer_pc.visualize_for_seconds(points_3d, 2, colors)
+
+            # viewer.update([output_color, image.astype(np.uint8)[:, :, ::-1], lidar.astype(np.uint8) * 3, image_vis],
+            #                duration=0.5)  # Display for x seconds
+
+            cv2.imwrite(f'outputs/0000000{i:03d}_depth.png', output_color)
+            
+            cv2.imwrite(f'outputs/0000000{i:03d}_image.png', image.astype(np.uint8)[:, :, ::-1])
+            cv2.imwrite(f'outputs/0000000{i:03d}_lidar.png', lidar.astype(np.uint8) * 3)
+            cv2.imwrite(f'outputs/0000000{i:03d}_image_vis.png', image_vis)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/demo_docker.sh b/demo_docker.sh
new file mode 100644
index 0000000..cd45879
--- /dev/null
+++ b/demo_docker.sh
@@ -0,0 +1,4 @@
+python demo_docker.py gpus=[0] name=BP_KITTI ++chpt=dmd3c_distillation_depth_anything_v2.pth \
+    net=PMP num_workers=1 \
+    data=KITTI data.testset.mode=test data.testset.height=352 \
+    test_batch_size=1 metric=RMSE ++save=true
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000..f055d49
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,36 @@
+# Base image with CUDA and PyTorch support
+FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
+
+# System setup
+RUN apt-get update && apt-get install -y \
+    python3.10 python3-pip python3-dev git build-essential cmake \
+    libopenblas-dev libomp-dev g++ curl vim wget libgl1 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Miniconda
+ENV PATH="/root/miniconda3/bin:${PATH}"
+ARG PATH="/root/miniconda3/bin:${PATH}"
+
+# Install Miniconda on x86 or ARM platforms
+RUN arch=$(uname -m) && \
+    if [ "$arch" = "x86_64" ]; then \
+    MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh"; \
+    elif [ "$arch" = "aarch64" ]; then \
+    MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh"; \
+    else \
+    echo "Unsupported architecture: $arch"; \
+    exit 1; \
+    fi && \
+    wget $MINICONDA_URL -O miniconda.sh && \
+    mkdir -p /root/.conda && \
+    bash miniconda.sh -b -p /root/miniconda3 && \
+    rm -f miniconda.sh
+
+RUN conda --version
+
+# Clone DMD3C project
+RUN	cd /home \
+    && git clone https://github.com/fetty31/DMD3C.git
+
+# Set python alias
+RUN ln -s /usr/bin/python3 /usr/bin/python
diff --git a/docker/build b/docker/build
new file mode 100755
index 0000000..587d0de
--- /dev/null
+++ b/docker/build
@@ -0,0 +1,15 @@
+#!/bin/bash
+set -e
+
+DOCKER_IMG="dmd3c-img"
+
+if [[ $# -eq 0 ]] ; then
+  echo "WARNING: No arguments provided, image ${DOCKER_IMG} will be created"
+else 
+  DOCKER_IMG=$1
+fi
+
+PROJECT_ROOT="$(cd "$(dirname "$0")"; pwd)"
+
+echo "----- Building Docker image from ${PROJECT_ROOT}/Dockerfile "
+docker build -t ${DOCKER_IMG} ${PROJECT_ROOT}
\ No newline at end of file
diff --git a/docker/run b/docker/run
new file mode 100755
index 0000000..eaeb0d4
--- /dev/null
+++ b/docker/run
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+DOCKER_NAME="dmd3c-container"
+DOCKER_IMG="dmd3c-img:latest"
+
+if [[ $# -eq 0 ]] ; then
+  echo "WARNING: No arguments provided, container ${DOCKER_NAME} from image ${DOCKER_IMG} will be started"
+elif [[ $# -eq 1 ]] ; then
+  echo "WARNING: Only one argument provided, container $1 from image ${DOCKER_IMG} will be started"
+  DOCKER_NAME=$1
+else
+  DOCKER_NAME=$1
+  DOCKER_IMG=$2
+fi
+
+# Variables required for logging as a user with the same id as the user running this script
+export LOCAL_USER_ID=`id -u $USER`
+export LOCAL_GROUP_ID=`id -g $USER`
+export LOCAL_GROUP_NAME=`id -gn $USER`
+
+PROJECT_ROOT="$(cd "$(dirname "$0")"; cd ..; pwd)"
+
+# Allow customization for docker args
+# For instance, to remove the container when it exits: DOCKER_USER_ARGS='--rm' pal_docker.sh ...
+DOCKER_USER_ARGS="$DOCKER_USER_ARGS --env LOCAL_USER_ID --env LOCAL_GROUP_ID --env LOCAL_GROUP_NAME -it"
+# DOCKER_USER_ARGS="-it"
+
+# Variables for forwarding ssh agent into docker container
+SSH_AUTH_ARGS=""
+if [ ! -z $SSH_AUTH_SOCK ]; then
+    DOCKER_SSH_AUTH_ARGS="-v $SSH_AUTH_SOCK:/run/host_ssh_auth_sock -e SSH_AUTH_SOCK=/run/host_ssh_auth_sock"
+fi
+
+# Settings required for having nvidia GPU acceleration inside the docker
+DOCKER_GPU_ARGS="--env DISPLAY=$DISPLAY --env QT_X11_NO_MITSHM=1 --volume=/tmp/.X11-unix:/tmp/.X11-unix:rw -e NVIDIA_DRIVER_CAPABILITIES=all"
+
+dpkg -l | grep nvidia-container-toolkit &> /dev/null
+HAS_NVIDIA_TOOLKIT=$?
+
+type nvidia-docker &> /dev/null
+HAS_NVIDIA_DOCKER=$?
+
+if [ $HAS_NVIDIA_TOOLKIT -eq 0 ]; then
+  docker_version=`docker version --format '{{.Client.Version}}' | cut -d. -f1`
+  if [ $docker_version -ge 19 ]; then
+	  DOCKER_COMMAND="docker run --gpus all"
+  else
+	  DOCKER_COMMAND="docker run --runtime=nvidia"
+  fi
+elif [ $HAS_NVIDIA_DOCKER -eq 0 ]; then
+  DOCKER_COMMAND="nvidia-docker run"
+else
+  echo "Running without nvidia-docker, if you have an NVidia card you may need it"\
+  "to have GPU acceleration"
+  DOCKER_COMMAND="docker run"
+fi
+
+DOCKER_NETWORK_ARGS="--net host"
+if [[ "$@" == *"--net "* ]]; then
+    DOCKER_NETWORK_ARGS=""
+fi
+
+XAUTH=/tmp/.docker.xauth
+
+xhost +local:root
+
+if docker ps -a | grep -q ${DOCKER_NAME}; then
+  echo "WARNING: A container named ${DOCKER_NAME} was already created; it will be started"
+  docker start -i ${DOCKER_NAME}
+else
+  
+  $DOCKER_COMMAND \
+  $DOCKER_USER_ARGS \
+  $DOCKER_GPU_ARGS \
+  $DOCKER_SSH_AUTH_ARGS \
+  $DOCKER_NETWORK_ARGS \
+  --privileged \
+  --name ${DOCKER_NAME} \
+  --env="XAUTHORITY=$XAUTH" \
+  --volume="$HOME/Desktop/kitti_dataset:/home/kitti_dataset" \
+  ${DOCKER_IMG} 
+
+fi
+
+xhost -local:root
+echo "Goodbye :)"
+
diff --git a/environment.yml b/environment.yml
index 15bbc19..06c22e0 100644
--- a/environment.yml
+++ b/environment.yml
@@ -269,4 +269,6 @@ dependencies:
   - zipp=3.17.0=pyhd8ed1ab_0
   - zlib=1.2.13=hd590300_5
   - zstd=1.5.2=ha4553b6_0
-
+  - pip:
+      - open3d
+      - imutils
diff --git a/exts/setup.py b/exts/setup.py
index 18f7453..b8bc5c7 100644
--- a/exts/setup.py
+++ b/exts/setup.py
@@ -4,15 +4,23 @@
 setup(
     name='BpOps',
     ext_modules=[
-        CUDAExtension('BpOps',
-                      [
-                          'bp_cuda.cpp',
-                          'bp_cuda_kernel.cu',
-                      ],
-                      extra_compile_args={'cxx': ['-g'],
-                                          'nvcc': ['-O3']}
-                      ),
+        CUDAExtension(
+            name='BpOps',
+            sources=['bp_cuda.cpp', 'bp_cuda_kernel.cu'],
+            extra_compile_args={
+                'cxx': ['-O3', '-std=c++17'],
+                'nvcc': [
+                    '-O3',
+                    '-std=c++17',
+                    '--expt-relaxed-constexpr',
+                    '--expt-extended-lambda',
+                    '-gencode=arch=compute_89,code=sm_89',  # You can change based on your GPU
+                    '-ccbin=/usr/bin/g++'                   # Ensures nvcc uses system g++ not conda-cc
+                ]
+            }
+        ),
     ],
     cmdclass={
         'build_ext': BuildExtension
-    })
+    }
+)
diff --git a/utils_infer.py b/utils_infer.py
index f19f783..1a7d00e 100644
--- a/utils_infer.py
+++ b/utils_infer.py
@@ -102,7 +102,8 @@ def init_net(self):
         if 'chpt' in self.cfg:
             self.ddp_log(f'resume CHECKPOINTS')
             save_path = os.path.join('checkpoints', self.cfg.chpt)
-            cp = torch.load(os.path.join(save_path, 'result_ema.pth'), map_location=torch.device('cpu'))
+            # cp = torch.load(os.path.join(save_path, 'result_ema.pth'), map_location=torch.device('cpu'))
+            cp = torch.load(save_path, map_location=torch.device('cpu'))
             model.load_state_dict(cp['net'], strict=True)
             self.best_metric_ema = cp['best_metric_ema']
             print("Epoch:", cp["epoch"])
diff --git a/visualization_tools.py b/visualization_tools.py
new file mode 100644
index 0000000..ab20c26
--- /dev/null
+++ b/visualization_tools.py
@@ -0,0 +1,166 @@
+import matplotlib.pyplot as plt
+import cv2
+import numpy as np
+import open3d as o3d
+import open3d.visualization.gui as gui
+import open3d.visualization.rendering as rendering
+import matplotlib.cm as cm
+import time
+import threading
+
+class ImageGridViewer:
+    def __init__(self, titles=None):
+        """
+        Initializes a persistent 2x2 image grid viewer that preserves image size.
+        
+        Args:
+            titles (list of str): Optional list of 4 titles for the subplots.
+        """
+        self.fig, self.axes = plt.subplots(2, 2, figsize=(10, 8))
+        self.titles = titles if titles else [""] * 4
+
+        self.images = []
+        for ax, title in zip(self.axes.flatten(), self.titles):
+            ax.set_title(title)
+            ax.axis('off')
+            ax.set_aspect('equal')  # Prevent image distortion
+
+            # Initialize with dummy image to match interface
+            dummy_img = np.zeros((10, 10, 3), dtype=np.uint8)
+            im = ax.imshow(dummy_img, interpolation='none')
+            self.images.append(im)
+
+        plt.subplots_adjust(wspace=0.01, hspace=0.01)
+        plt.ion()
+        plt.show()
+
+    def update(self, images, duration=0):
+        """
+        Updates the grid with new images and refreshes the display.
+        
+        Args:
+            images (list of np.ndarray): List of 4 OpenCV (BGR) images.
+            duration (float): Seconds to pause after displaying (0 = no pause).
+        """
+        if len(images) != 4:
+            raise ValueError("Exactly 4 images required.")
+
+        for i in range(4):
+            rgb_img = cv2.cvtColor(images[i], cv2.COLOR_BGR2RGB)
+            self.images[i].set_data(rgb_img)
+            self.images[i].set_extent((0, rgb_img.shape[1], rgb_img.shape[0], 0))  # Preserve size
+
+        self.fig.canvas.draw()
+        self.fig.canvas.flush_events()
+        if duration > 0:
+            plt.pause(duration)
+
+class PointCloudImageViewer:
+    def __init__(self, window_title="Viewer", width=1280, height=720):
+        self.app = gui.Application.instance
+        self.app.initialize()
+        self.window = self.app.create_window(window_title, width, height)
+        self.scene_widget = gui.SceneWidget()
+        self.scene_widget.scene = rendering.Open3DScene(self.window.renderer)
+
+        self.panel = gui.Horiz()
+        self.window.add_child(self.panel)
+
+        # Placeholder image
+        dummy_img = o3d.geometry.Image(np.zeros((480, 640, 3), dtype=np.uint8))
+        self.image_widget = gui.ImageWidget(dummy_img)
+
+        self.panel.add_child(self.image_widget)
+        self.panel.add_child(self.scene_widget)
+
+        self.geometry_name = "pointcloud"
+
+    def update(self, pointcloud_np, image_cv2):
+        # Convert image
+        image_rgb = cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB)
+        o3d_image = o3d.geometry.Image(image_rgb)
+
+        # Update image widget
+        self.image_widget.update_image(o3d_image)
+
+        # Create new point cloud
+        pcd = o3d.geometry.PointCloud()
+        pcd.points = o3d.utility.Vector3dVector(pointcloud_np)
+
+        # Clear old and add new geometry
+        self.scene_widget.scene.clear_geometry()
+        self.scene_widget.scene.add_geometry(self.geometry_name, pcd, rendering.MaterialRecord())
+
+        # Recenter camera
+        bounds = pcd.get_axis_aligned_bounding_box()
+        self.scene_widget.setup_camera(60.0, bounds, bounds.get_center())
+
+    def run(self):
+        gui.Application.instance.run()
+
+    def run_one_tick(self):
+        self.app.run_one_tick()
+
+    def visualize_for_seconds(self, pointcloud_np, image_cv2, duration_sec=5):
+        self.update(pointcloud_np, image_cv2)
+        start = time.time()
+        while time.time() - start < duration_sec:
+            self.app.run_one_tick()
+            time.sleep(0.01)  # to prevent max CPU use
+
+    def run_with_updates(self, data_loader_func, interval_sec=2):
+        def update_loop():
+            while True:
+                pcd_np, img_cv2 = data_loader_func()
+                gui.Application.instance.post_to_main_thread(
+                    self.window, lambda: self.update(pcd_np, img_cv2)
+                )
+                time.sleep(interval_sec)
+
+        threading.Thread(target=update_loop, daemon=True).start()
+        self.app.run()
+
+class PointCloudViewer:
+    def __init__(self, window_title="PointCloud Viewer", width=800, height=600):
+        self.app = gui.Application.instance
+        self.app.initialize()
+        self.window = self.app.create_window(window_title, width, height)
+        self.scene_widget = gui.SceneWidget()
+        self.scene_widget.scene = rendering.Open3DScene(self.window.renderer)
+        self.window.add_child(self.scene_widget)
+
+        self.geometry_name = "pointcloud"
+
+    def _color_by_x(self, points_np):
+        x = points_np[:, 0]
+        x_norm = (x - x.min()) / (x.max() - x.min() + 1e-8)  # Normalize x to [0,1]
+        colors = cm.viridis(x_norm)[:, :3]  # Drop alpha channel
+        return colors
+
+    def update(self, points_np, colors = None):
+        # Create and set point cloud geometry
+        pcd = o3d.geometry.PointCloud()
+        pcd.points = o3d.utility.Vector3dVector(points_np)
+
+        # Color by X values
+        if colors is None:
+            colors_np = self._color_by_x(points_np)
+        else:
+            colors_np = colors
+        pcd.colors = o3d.utility.Vector3dVector(colors_np)
+
+        self.scene_widget.scene.clear_geometry()
+        self.scene_widget.scene.add_geometry(self.geometry_name, pcd, rendering.MaterialRecord())
+
+        bounds = pcd.get_axis_aligned_bounding_box()
+        self.scene_widget.setup_camera(60.0, bounds, bounds.get_center())
+
+    def visualize_for_seconds(self, pointcloud_np, duration_sec=2, colors_np=None):
+        self.update(pointcloud_np, colors_np)
+        start_time = time.time()
+        while time.time() - start_time < duration_sec:
+            self.app.run_one_tick()
+            time.sleep(0.01)
+
+    def run(self):
+        self.app.run()

From cedfee6a07c9f44d47c4b3769c9d6133a8f251f0 Mon Sep 17 00:00:00 2001
From: Fetty <fetty3113@gmail.com>
Date: Tue, 8 Jul 2025 13:26:22 +0000
Subject: [PATCH 2/4] Added demo for ONA robot (custom data)

---
 .gitignore             |   3 +
 demo_docker.py         |  20 +-
 demo_docker.sh         |   0
 demo_ona.py            | 512 +++++++++++++++++++++++++++++++++++++++++
 demo_ona.sh            |   4 +
 environment.yml        |   1 +
 exts/.gitignore        |   3 +
 models/.gitignore      |   2 +
 visualization_tools.py |  12 +-
 9 files changed, 554 insertions(+), 3 deletions(-)
 create mode 100644 .gitignore
 mode change 100644 => 100755 demo_docker.sh
 create mode 100644 demo_ona.py
 create mode 100755 demo_ona.sh
 create mode 100644 exts/.gitignore
 create mode 100644 models/.gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1677adc
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+__pycache__/*
+outputs/*
+runs/*
\ No newline at end of file
diff --git a/demo_docker.py b/demo_docker.py
index e2e0542..51e4a2d 100644
--- a/demo_docker.py
+++ b/demo_docker.py
@@ -390,8 +390,24 @@ def visualize_pointcloud(points: np.ndarray, colors: np.ndarray = None):
             raise ValueError("Colors must have the same shape as points.")
         pcd.colors = o3d.utility.Vector3dVector(colors)
 
-    o3d.visualization.draw_geometries([pcd])
+    # Create a coordinate frame at the origin (0, 0, 0)
+    axis = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5, origin=[0, 0, 0])
 
+    def set_top_down_view(vis):
+        ctr = vis.get_view_control()
+        ctr.set_front([0.0, 0.0, -1.0])   # Camera looks along -Z
+        ctr.set_up([0.0, 1.0, 0.0])       # Y axis is 'up' in the view
+        ctr.set_lookat(pcd.get_center())  # Focus on the center of the cloud
+        ctr.set_zoom(0.5)                 # Zoom level, adjust as needed
+        return False  # Return False to stop calling repeatedly
+
+    # Wrap inside a key callback so it remains interactive
+    key_to_callback = {ord("T"): set_top_down_view}
+
+    print("Press 'T' in the viewer to set top-down view.")
+    o3d.visualization.draw_geometries_with_key_callbacks([pcd, axis], key_to_callback)
+
+    # o3d.visualization.draw_geometries([pcd])
 
 @hydra.main(config_path='configs', config_name='config', version_base='1.2')
 def main(cfg):
@@ -484,7 +500,7 @@ def main(cfg):
             # Transform from depth to 3D
             # points_3d = depth_to_point_cloud(tensor_output, K_cam)
             points_3d, colors = depth_to_colored_point_cloud(tensor_output, K_cam, image.astype(np.uint8)[:, :, ::-1])
-            del tensor_output
+            del tensor_output # free GPU memory
 
             visualize_pointcloud(points_3d, colors)
 
diff --git a/demo_docker.sh b/demo_docker.sh
old mode 100644
new mode 100755
diff --git a/demo_ona.py b/demo_ona.py
new file mode 100644
index 0000000..62a3097
--- /dev/null
+++ b/demo_ona.py
@@ -0,0 +1,512 @@
+import open3d as o3d
+import numpy as np
+import cv2
+import os
+import matplotlib.pyplot as plt
+import torch
+import hydra
+from utils_infer import Trainer
+from mpl_toolkits.mplot3d import Axes3D
+from tqdm import tqdm
+from PIL import Image
+import time
+from visualization_tools import ImageGridViewer, PointCloudImageViewer, PointCloudViewer
+
+from rosbags.rosbag1 import Reader
+from rosbags.typesys import Stores, get_typestore
+
+import struct # to decode pointcloud_2 data
+
+def depth_to_point_cloud(depth, K_cam):
+    if isinstance(K_cam, np.ndarray):
+        K_cam = torch.from_numpy(K_cam).to(depth.device)
+
+    A, B, H, W = depth.shape
+    i, j = torch.meshgrid(
+        torch.arange(0, H, device=depth.device),
+        torch.arange(0, W, device=depth.device),
+        indexing='ij'
+    )
+    z = depth[0][0]
+    x = (j - K_cam[0, 2]) * z / K_cam[0, 0]
+    y = (i - K_cam[1, 2]) * z / K_cam[1, 1]
+    y = -y
+
+    points = torch.stack((x, y, z), dim=2).reshape(-1, 3)
+    return points.detach().cpu().numpy()
+
+def depth_to_colored_point_cloud(depth, K_cam, bgr_image):
+    """
+    Converts a depth map to a 3D point cloud and assigns RGB colors.
+
+    Args:
+        depth (torch.Tensor): Shape (1, 1, H, W), depth values.
+        K_cam (np.ndarray or torch.Tensor): 3x3 intrinsic matrix.
+        bgr_image (np.ndarray): (H, W, 3) image in BGR format (from OpenCV).
+
+    Returns:
+        points (np.ndarray): (N, 3) 3D points.
+        colors (np.ndarray): (N, 3) RGB colors in [0, 1].
+    """
+    if isinstance(K_cam, np.ndarray):
+        K_cam = torch.from_numpy(K_cam).to(depth.device)
+
+    _, _, H, W = depth.shape
+
+    # Pixel coordinates
+    i, j = torch.meshgrid(
+        torch.arange(0, H, device=depth.device),
+        torch.arange(0, W, device=depth.device),
+        indexing='ij'
+    )
+
+    z = depth[0, 0]
+    x = (j - K_cam[0, 2]) * z / K_cam[0, 0]
+    y = (i - K_cam[1, 2]) * z / K_cam[1, 1]
+    y = -y  # flip if needed
+
+    points = torch.stack((x, y, z), dim=2).reshape(-1, 3).detach().cpu().numpy()
+
+    # Convert BGR to RGB and normalize
+    rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
+    colors = rgb_image.reshape(-1, 3)
+
+    return points, colors
+
+
+def read_calib_file(filepath):
+    """Read in a calibration file and parse into a dictionary."""
+    data = {}
+
+    with open(filepath, 'r') as f:
+        for line in f.readlines():
+            key, value = line.split(':', 1)
+            # The only non-float values in these files are dates, which
+            # we don't care about anyway
+            try:
+                data[key] = np.array([float(x) for x in value.split()])
+            except ValueError:
+                pass
+
+    return data
+
+
+def depth_color(val, min_d=0, max_d=120):
+    """ 
+    print Color(HSV's H value) corresponding to distance(m) 
+    close distance = red , far distance = blue
+    """
+    np.clip(val, 0, max_d, out=val)  # max distance is 120m but usually not usual
+    return (((val - min_d) / (max_d - min_d)) * 120).astype(np.uint8)
+
+
+def in_h_range_points(points, m, n, fov):
+    """ extract horizontal in-range points """
+    return np.logical_and(np.arctan2(n, m) > (-fov[1] * np.pi / 180),
+                          np.arctan2(n, m) < (-fov[0] * np.pi / 180))
+
+
+def in_v_range_points(points, m, n, fov):
+    """ extract vertical in-range points """
+    return np.logical_and(np.arctan2(n, m) < (fov[1] * np.pi / 180),
+                          np.arctan2(n, m) > (fov[0] * np.pi / 180))
+
+
+def fov_setting(points, x, y, z, dist, h_fov, v_fov):
+    """ filter points based on h,v FOV  """
+
+    if h_fov[1] == 180 and h_fov[0] == -180 and v_fov[1] == 2.0 and v_fov[0] == -24.9:
+        return points
+
+    if h_fov[1] == 180 and h_fov[0] == -180:
+        return points[in_v_range_points(points, dist, z, v_fov)]
+    elif v_fov[1] == 2.0 and v_fov[0] == -24.9:
+        return points[in_h_range_points(points, x, y, h_fov)]
+    else:
+        h_points = in_h_range_points(points, x, y, h_fov)
+        v_points = in_v_range_points(points, dist, z, v_fov)
+        return points[np.logical_and(h_points, v_points)]
+
+
+def velo_points_filter(points, v_fov, h_fov):
+    """ extract points corresponding to FOV setting """
+
+    # Projecting to 2D
+    x = points[:, 0]
+    y = points[:, 1]
+    z = points[:, 2]
+    dist = np.sqrt(x ** 2 + y ** 2 + z ** 2)
+
+    if h_fov[0] < -90:
+        h_fov = (-90,) + h_fov[1:]
+    if h_fov[1] > 90:
+        h_fov = h_fov[:1] + (90,)
+
+    x_lim = fov_setting(x, x, y, z, dist, h_fov, v_fov)[:, None]
+    y_lim = fov_setting(y, x, y, z, dist, h_fov, v_fov)[:, None]
+    z_lim = fov_setting(z, x, y, z, dist, h_fov, v_fov)[:, None]
+
+    # Stack arrays in sequence horizontally
+    xyz_ = np.hstack((x_lim, y_lim, z_lim))
+    xyz_ = xyz_.T
+
+    # stack (1,n) arrays filled with the number 1
+    one_mat = np.full((1, xyz_.shape[1]), 1)
+    xyz_ = np.concatenate((xyz_, one_mat), axis=0)
+
+    # need dist info for points color
+    dist_lim = fov_setting(dist, x, y, z, dist, h_fov, v_fov)
+    color = depth_color(dist_lim, 0, 70)
+
+    return xyz_, color
+
+
+def calib_velo2cam():
+    """ 
+    get Rotation(R : 3x3), Translation(T : 3x1) matrix info 
+    using R,T matrix, we can convert velodyne coordinates to camera coordinates
+    """
+
+    R = np.array([-0.1533276, -0.98776117, 0.02860976, 
+                0.04630053, -0.0361014, -0.99827499, 
+                0.98709012, -0.15173846, 0.05126921]
+                )
+    R = R.reshape(3, 3)
+    T = np.array([0.28564802, 1.01355072, -0.9722258])
+    T = T.reshape(3, 1)
+    return R, T
+
+def calib_cam2cam():
+    P_ = np.array([659.29813, 0.0, 760.64067,
+                0.0, 662.57773, 543.84931, 
+                0.0, 0.0, 1.0]
+                )
+    P_ = P_.reshape(3, 3)
+    d = np.array([-0.193406, 0.026607, 0.001749, -0.000172, 0.0])
+
+    return P_, d
+
+
+def velo3d_2_camera2d_points(points, v_fov, h_fov, K, D=None, image_shape=None):
+    """ 
+    Project 3D Velodyne points to 2D image points using camera calibration with distortion correction.
+    """
+
+    # Get calibration: rotation & translation from Velodyne to Camera
+    R_vc, T_vc = calib_velo2cam()  # R: (3x3), T: (3x1)
+
+    # Filter points based on FOV
+    xyz_v, c_ = velo_points_filter(points, v_fov, h_fov)  # xyz_v: 4xN (homogeneous)
+
+    if image_shape is None:
+        raise ValueError("Image shape must be provided to generate sparse depth map")
+
+    # Convert xyz_v (homogeneous) to 3xN
+    xyz_v = xyz_v[:3, :]
+
+    # Transform points to camera coordinates: X_c = R * X_v + T
+    xyz_c = R_vc @ xyz_v + T_vc
+
+    # Transpose to Nx3 for OpenCV
+    xyz_c = xyz_c.T.reshape(-1, 1, 3)
+
+    # Project to 2D with distortion
+    if D is not None:
+        img_points, _ = cv2.projectPoints(xyz_c, np.zeros((3,1)), np.zeros((3,1)), K, D)
+    else:
+        img_points, _ = cv2.projectPoints(xyz_c, np.zeros((3,1)), np.zeros((3,1)), K, np.array([0,0,0,0,0]))
+    img_points = img_points.reshape(-1, 2)
+
+    # Extract depth (Z in camera frame)
+    depth_values = xyz_c[:, 0, 2]
+
+    # Create empty depth map
+    depth_map = np.full(image_shape[:2], 0, dtype=np.float32)
+
+    # Filter valid points within image bounds
+    valid_mask = np.logical_and.reduce((
+        img_points[:, 0] >= 0,
+        img_points[:, 0] < image_shape[1],
+        img_points[:, 1] >= 0,
+        img_points[:, 1] < image_shape[0],
+    ))
+
+    valid_img_points = img_points[valid_mask]
+    valid_depths = depth_values[valid_mask]
+
+    # Populate depth map
+    for pt, depth in zip(valid_img_points, valid_depths):
+        x, y = int(pt[0]), int(pt[1])
+        depth_map[y, x] = depth
+
+    return valid_img_points.T, c_, depth_map
+
+def print_projection_plt(points, color, image):
+    """ project converted velodyne points into camera image """
+
+    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
+
+    for i in range(points.shape[1]):
+        cv2.circle(hsv_image, (np.int32(points[0][i]), np.int32(
+            points[1][i])), 2, (int(color[i]), 255, 255), -1)
+
+    return cv2.cvtColor(hsv_image, cv2.COLOR_HSV2RGB)
+
+
+def load_from_bin(bin_path):
+    obj = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 4)
+    # ignore reflectivity info
+    return obj[:, :3]
+
+def pointcloud2_to_xyz_array(cloud_msg):
+    """
+    Convert sensor_msgs/PointCloud2 to an Nx3 NumPy array.
+    Assumes XYZ floats.
+    """
+    fmt = 'fff'  # just XYZ (each float32)
+    width = cloud_msg.width
+    height = cloud_msg.height
+    point_step = cloud_msg.point_step
+    row_step = cloud_msg.row_step
+    data = cloud_msg.data
+
+    points = []
+    for i in range(0, len(data), point_step):
+        x, y, z = struct.unpack_from(fmt, data, offset=i)
+        points.append((x, y, z))
+
+    return np.array(points, dtype=np.float32)
+
+def load_image_from_rosbag(bag_path, N=-1):
+    typestore = get_typestore(Stores.ROS1_NOETIC)
+    topic = "/ona2/sensors/flir_camera_front/image_raw"
+    images = []
+
+    with Reader(bag_path) as reader:
+        connections = [x for x in reader.connections if x.topic == topic]
+        c = 0
+
+        for connection, timestamp, rawdata in reader.messages(connections=connections):
+
+            msg = typestore.deserialize_ros1(rawdata, connection.msgtype)
+
+            if msg.encoding != "bayer_rggb8":
+                print(f"Unsupported encoding: {msg.encoding}")
+                continue
+
+            height = msg.height
+            width = msg.width
+            data = np.frombuffer(msg.data, dtype=np.uint8).reshape((height, width))
+
+            # Debayer using OpenCV: Bayer RGGB -> RGB
+            rgb_img = cv2.cvtColor(data, cv2.COLOR_BAYER_RG2RGB)
+            images.append(rgb_img)
+
+            c += 1
+            if c > N and N > 0:
+                break
+
+    if len(images) > 0:
+        print(f"Loaded images shape: {images[0].shape}, dtype: {images[0].dtype}")
+    return images
+
+
+def load_pcd_from_rosbag(bag_path, N=-1):
+    # Create a typestore for the matching ROS release.
+    typestore = get_typestore(Stores.ROS1_NOETIC)
+
+    # Topic to filter
+    topic = "/ona2/sensors/pandar_front/cloud"
+    pcds = []
+
+    c = 0
+    # Create reader instance and open for reading.
+    with Reader(bag_path) as reader:
+        connections = [x for x in reader.connections if x.topic == topic]
+
+        for connection, timestamp, rawdata in reader.messages(connections=connections):
+
+            msg = typestore.deserialize_ros1(rawdata, connection.msgtype)
+            np_points = pointcloud2_to_xyz_array(msg)
+            pcds.append(np_points) 
+            c += 1
+            if c > N and N > 0:
+                break
+    
+    return pcds
+
+
+def visualize_pointcloud(points: np.ndarray, colors: np.ndarray = None):
+    """
+    Visualizes a 3D point cloud using Open3D.
+
+    Args:
+        points (np.ndarray): Nx3 array of XYZ coordinates.
+        colors (np.ndarray, optional): Nx3 array of RGB colors in [0, 1]. Defaults to None.
+    """
+    if points.ndim != 2 or points.shape[1] != 3:
+        raise ValueError("Points should be an Nx3 NumPy array.")
+    
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(points)
+
+    if colors is not None:
+        if colors.shape != points.shape:
+            raise ValueError("Colors must have the same shape as points.")
+        pcd.colors = o3d.utility.Vector3dVector(colors)
+
+    # Create a coordinate frame at the origin (0, 0, 0)
+    axis = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5, origin=[0, 0, 0])
+
+    def set_top_down_view(vis):
+        ctr = vis.get_view_control()
+        ctr.set_front([0.0, 0.0, -1.0])   # Camera looks along -Z
+        ctr.set_up([0.0, 1.0, 0.0])       # Y axis is 'up' in the view
+        ctr.set_lookat(pcd.get_center())  # Focus on the center of the cloud
+        ctr.set_zoom(0.5)                 # Zoom level, adjust as needed
+        return False  # Return False to stop calling repeatedly
+
+    # Wrap inside a key callback so it remains interactive
+    key_to_callback = {ord("T"): set_top_down_view}
+
+    print("Press 'T' in the viewer to set top-down view.")
+    o3d.visualization.draw_geometries_with_key_callbacks([pcd, axis], key_to_callback)
+
+    # o3d.visualization.draw_geometries([pcd])
+
+@hydra.main(config_path='configs', config_name='config', version_base='1.2')
+def main(cfg):
+    with Trainer(cfg) as run:
+        net = run.net_ema.module.cuda()
+        net.eval()
+
+        global_path = "/home/kitti_dataset/2024_11_07_born/"
+        image_bag_name = "2024-11-07-11-24-29_3.bag"
+        pcd_bag_name = "2024-11-07-11-24-14_1.bag"
+
+        v2c_filepath = global_path + '2011_09_26/calib_velo_to_cam.txt'
+        c2c_filepath = global_path + '2011_09_26/calib_cam_to_cam.txt'
+        image_mean = np.array([90.9950, 96.2278, 94.3213])
+        image_std = np.array([79.2382, 80.5267, 82.1483])
+        image_height = 352
+        image_width = 1216
+
+        # Get LiDAR data from rosbag
+        pcd_array = load_pcd_from_rosbag(global_path + "robot/" + pcd_bag_name, 150)
+
+        # Get image data from rosbag
+        images_array = load_image_from_rosbag(global_path + "camera/" + image_bag_name, 150)
+
+        print(f"Loaded {len(images_array)} images and {len(pcd_array)} pcds.")
+
+        # viewer = ImageGridViewer(titles=["Depth", "Raw", "Lidar", "Projected"])
+        # viewer_3d = PointCloudImageViewer()
+        # viewer_pc = PointCloudViewer()
+
+        N = len(images_array)
+        i_pcd = 0
+        for i in tqdm(range(0,N,2)): # We take 1 pcd for each 2 images (LiDAR: 10Hz, Camera: 20Hz)
+
+            image = images_array[i]
+            w, h = image.shape[1], image.shape[0]
+
+            # Undistort image
+            K, D = calib_cam2cam()
+
+                # Get optimal new camera matrix
+            # new_K, roi = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
+
+            #     # Undistort using new_K
+            # undistorted_img = cv2.undistort(image, K, D, None, new_K)
+            # x, y, w, h = roi
+            # cropped_img = undistorted_img[y:y+h, x:x+w]
+
+            # Resize image to model dimensions
+            resized = cv2.resize(image, (image_width, image_height), interpolation=cv2.INTER_AREA)
+            image = resized
+
+            # Correct intrinsics
+            CALIB_W = 1440
+            CALIB_H = 1080
+
+            sx = image_width / CALIB_W
+            sy = image_height / CALIB_H
+
+            # new_K[0,0] *= sx 
+            # new_K[1,1] *= sy 
+            # new_K[0,2] *= sx 
+            # new_K[1,2] *= sy 
+
+            K[0,0] *= sx 
+            K[1,1] *= sy 
+            K[0,2] *= sx 
+            K[1,2] *= sy 
+
+            # Get pointcloud
+            pcd = pcd_array[i_pcd]
+            i_pcd += 1
+
+            # Project 3D points into 2D (in pixels)
+            ans, c_, lidar = velo3d_2_camera2d_points(pcd, v_fov=(-24.9, 2.0), h_fov=(-45, 45),
+                                                      K=K, D=D, image_shape=image.shape)
+
+            image_vis = print_projection_plt(points=ans, color=c_, image=image.copy())
+
+            # Depth completion
+            # K_cam = torch.from_numpy(new_K.astype(np.float32)).cuda()
+            K_cam = torch.from_numpy(K.astype(np.float32)).cuda()
+            
+            lidar = lidar[:, :, None]
+
+            image = (image - image_mean) / image_std
+
+            image_tensor = image.transpose(2, 0, 1).astype(np.float32)[None]
+            lidar_tensor = lidar.transpose(2, 0, 1).astype(np.float32)[None]
+
+            image_tensor = torch.from_numpy(image_tensor)
+            lidar_tensor = torch.from_numpy(lidar_tensor)
+
+            m_start = time.time()
+
+            output = net(image_tensor.cuda(), None,
+                         lidar_tensor.cuda(), K_cam[None].cuda())
+
+            m_end = time.time()
+            print(f"Inference time: {(m_end - m_start)*1000.0} ms")
+
+            if isinstance(output, (list, tuple)):
+                output = output[-1]
+
+            tensor_output = output
+            
+            output = output.squeeze().detach().cpu().numpy()
+            image = image * image_std + image_mean
+            
+            output_max, output_min = output.max(), output.min()
+            output_norm = (output - output_min) / (output_max - output_min) * 255
+            output_norm = output_norm.astype('uint8')
+            output_color = cv2.applyColorMap(output_norm, cv2.COLORMAP_JET)
+
+            # Transform from depth to 3D
+            # points_3d = depth_to_point_cloud(tensor_output, K_cam)
+            points_3d, colors = depth_to_colored_point_cloud(tensor_output, K_cam, image.astype(np.uint8)[:, :, ::-1])
+            del tensor_output # free GPU memory
+
+            visualize_pointcloud(points_3d, colors)
+
+            # viewer_3d.visualize_for_seconds(points_3d, output_color, 2)
+
+            # viewer_pc.visualize_for_seconds(points_3d, 5, colors)
+
+            # viewer.update([output_color, image.astype(np.uint8)[:, :, ::-1], lidar.astype(np.uint8) * 3, image_vis],
+            #                duration=0.1)  # Display for x seconds
+
+            # (Optional) Save outputs
+            # cv2.imwrite(f'outputs/0000000{i:03d}_depth.png', output_color)
+            
+            # cv2.imwrite(f'outputs/0000000{i:03d}_image.png', image.astype(np.uint8)[:, :, ::-1])
+            # cv2.imwrite(f'outputs/0000000{i:03d}_lidar.png', lidar.astype(np.uint8) * 3)
+            # cv2.imwrite(f'outputs/0000000{i:03d}_image_vis.png', image_vis)
+
+if __name__ == '__main__':
+    main()
diff --git a/demo_ona.sh b/demo_ona.sh
new file mode 100755
index 0000000..a82ec61
--- /dev/null
+++ b/demo_ona.sh
@@ -0,0 +1,4 @@
+python demo_ona.py gpus=[0] name=BP_KITTI ++chpt=dmd3c_distillation_depth_anything_v2.pth \
+    net=PMP num_workers=1 \
+    data=KITTI data.testset.mode=test data.testset.height=352 \
+    test_batch_size=1 metric=RMSE ++save=true
diff --git a/environment.yml b/environment.yml
index 06c22e0..37d8425 100644
--- a/environment.yml
+++ b/environment.yml
@@ -272,3 +272,4 @@ dependencies:
   - pip:
       - open3d
       - imutils
+      - rosbags
diff --git a/exts/.gitignore b/exts/.gitignore
new file mode 100644
index 0000000..9388d05
--- /dev/null
+++ b/exts/.gitignore
@@ -0,0 +1,3 @@
+dist/*
+build/*
+BpOps.egg-info/*
\ No newline at end of file
diff --git a/models/.gitignore b/models/.gitignore
new file mode 100644
index 0000000..f2043a4
--- /dev/null
+++ b/models/.gitignore
@@ -0,0 +1,2 @@
+# Ignore everything in this directory
+__pycache__/*
\ No newline at end of file
diff --git a/visualization_tools.py b/visualization_tools.py
index ab20c26..aa02abf 100644
--- a/visualization_tools.py
+++ b/visualization_tools.py
@@ -153,7 +153,17 @@ def update(self, points_np, colors = None):
         self.scene_widget.scene.add_geometry(self.geometry_name, pcd, rendering.MaterialRecord())
 
         bounds = pcd.get_axis_aligned_bounding_box()
-        self.scene_widget.setup_camera(60.0, bounds, bounds.get_center())
+        center = bounds.get_center()
+        extent = bounds.get_extent()
+        diameter = np.linalg.norm(extent)
+
+        lookat = pcd.get_center()
+        front = np.array([0.0, 0.0, 1.0])
+        up = np.array([0.0, 1.0, 0.0])
+        zoom_distance = 0.2 * np.linalg.norm(pcd.get_max_bound() - pcd.get_min_bound())
+
+        eye = lookat - front * zoom_distance
+        self.scene_widget.scene.camera.look_at(lookat, eye, up)
 
     def visualize_for_seconds(self, pointcloud_np, duration_sec=2, colors_np=None):
         self.update(pointcloud_np, colors_np)

From 140e6ef1ea616561aa2ae20c64f28e3ec1d46de3 Mon Sep 17 00:00:00 2001
From: Fetty <fetty3113@gmail.com>
Date: Wed, 9 Jul 2025 12:58:26 +0000
Subject: [PATCH 3/4] Visualization updates

---
 demo_ona.py | 128 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 80 insertions(+), 48 deletions(-)

diff --git a/demo_ona.py b/demo_ona.py
index 62a3097..83c335a 100644
--- a/demo_ona.py
+++ b/demo_ona.py
@@ -155,8 +155,10 @@ def velo_points_filter(points, v_fov, h_fov):
     xyz_ = np.concatenate((xyz_, one_mat), axis=0)
 
     # need dist info for points color
-    dist_lim = fov_setting(dist, x, y, z, dist, h_fov, v_fov)
-    color = depth_color(dist_lim, 0, 70)
+    # dist_lim = fov_setting(dist, x, y, z, dist, h_fov, v_fov)
+    p3N = xyz_[:3, :]
+    norms = np.linalg.norm(p3N, axis=0)
+    color = depth_color(norms, 0, np.max(norms))
 
     return xyz_, color
 
@@ -214,7 +216,7 @@ def velo3d_2_camera2d_points(points, v_fov, h_fov, K, D=None, image_shape=None):
     if D is not None:
         img_points, _ = cv2.projectPoints(xyz_c, np.zeros((3,1)), np.zeros((3,1)), K, D)
     else:
-        img_points, _ = cv2.projectPoints(xyz_c, np.zeros((3,1)), np.zeros((3,1)), K, np.array([0,0,0,0,0]))
+        img_points, _ = cv2.projectPoints(xyz_c, np.zeros((3,1)), np.zeros((3,1)), K, None)
     img_points = img_points.reshape(-1, 2)
 
     # Extract depth (Z in camera frame)
@@ -277,38 +279,56 @@ def pointcloud2_to_xyz_array(cloud_msg):
 
     return np.array(points, dtype=np.float32)
 
-def load_image_from_rosbag(bag_path, N=-1):
+def load_image_from_rosbag(bag_path, stamps, N=-1):
     typestore = get_typestore(Stores.ROS1_NOETIC)
     topic = "/ona2/sensors/flir_camera_front/image_raw"
     images = []
+    pcd_ids = []
 
     with Reader(bag_path) as reader:
         connections = [x for x in reader.connections if x.topic == topic]
-        c = 0
+        c = 0   
+        stamp_i = 0
+        start_found = False
 
         for connection, timestamp, rawdata in reader.messages(connections=connections):
+            
+            now = timestamp*1e-9
 
-            msg = typestore.deserialize_ros1(rawdata, connection.msgtype)
-
-            if msg.encoding != "bayer_rggb8":
-                print(f"Unsupported encoding: {msg.encoding}")
+            if now > stamps[min(stamp_i,len(stamps)-1)] and not start_found:
+                stamp_i += 1
                 continue
+            else:
+                start_found = True
 
-            height = msg.height
-            width = msg.width
-            data = np.frombuffer(msg.data, dtype=np.uint8).reshape((height, width))
+            print(f"now: {now}")
+            print(f"stamp: {stamps[stamp_i]}")
 
-            # Debayer using OpenCV: Bayer RGGB -> RGB
-            rgb_img = cv2.cvtColor(data, cv2.COLOR_BAYER_RG2RGB)
-            images.append(rgb_img)
+            if now >= stamps[stamp_i] and now < stamps[min(stamp_i+1,len(stamps)-1)]:
 
-            c += 1
-            if c > N and N > 0:
-                break
+                msg = typestore.deserialize_ros1(rawdata, connection.msgtype)
+
+                if msg.encoding != "bayer_rggb8":
+                    print(f"Unsupported encoding: {msg.encoding}")
+                    continue
+
+                height = msg.height
+                width = msg.width
+                data = np.frombuffer(msg.data, dtype=np.uint8).reshape((height, width))
+
+                # Debayer using OpenCV: Bayer RGGB -> RGB
+                rgb_img = cv2.cvtColor(data, cv2.COLOR_BAYER_RG2RGB)
+                images.append(rgb_img)
+                pcd_ids.append(stamp_i)
+                stamp_i += 1
+
+                c += 1
+                if c > N and N > 0:
+                    break
 
     if len(images) > 0:
         print(f"Loaded images shape: {images[0].shape}, dtype: {images[0].dtype}")
-    return images
+    return images, pcd_ids
 
 
 def load_pcd_from_rosbag(bag_path, N=-1):
@@ -318,6 +338,7 @@ def load_pcd_from_rosbag(bag_path, N=-1):
     # Topic to filter
     topic = "/ona2/sensors/pandar_front/cloud"
     pcds = []
+    stamps = []
 
     c = 0
     # Create reader instance and open for reading.
@@ -329,11 +350,12 @@ def load_pcd_from_rosbag(bag_path, N=-1):
             msg = typestore.deserialize_ros1(rawdata, connection.msgtype)
             np_points = pointcloud2_to_xyz_array(msg)
             pcds.append(np_points) 
+            stamps.append(timestamp*1e-9)
             c += 1
             if c > N and N > 0:
                 break
     
-    return pcds
+    return pcds, stamps
 
 
 def visualize_pointcloud(points: np.ndarray, colors: np.ndarray = None):
@@ -384,28 +406,25 @@ def main(cfg):
         image_bag_name = "2024-11-07-11-24-29_3.bag"
         pcd_bag_name = "2024-11-07-11-24-14_1.bag"
 
-        v2c_filepath = global_path + '2011_09_26/calib_velo_to_cam.txt'
-        c2c_filepath = global_path + '2011_09_26/calib_cam_to_cam.txt'
         image_mean = np.array([90.9950, 96.2278, 94.3213])
         image_std = np.array([79.2382, 80.5267, 82.1483])
         image_height = 352
         image_width = 1216
 
         # Get LiDAR data from rosbag
-        pcd_array = load_pcd_from_rosbag(global_path + "robot/" + pcd_bag_name, 150)
+        pcd_array, stamps = load_pcd_from_rosbag(global_path + "robot/" + pcd_bag_name)
 
         # Get image data from rosbag
-        images_array = load_image_from_rosbag(global_path + "camera/" + image_bag_name, 150)
+        images_array, pcd_ids = load_image_from_rosbag(global_path + "camera/" + image_bag_name, stamps)
 
         print(f"Loaded {len(images_array)} images and {len(pcd_array)} pcds.")
 
-        # viewer = ImageGridViewer(titles=["Depth", "Raw", "Lidar", "Projected"])
+        viewer = ImageGridViewer(titles=["Depth", "Raw", "Lidar", "Projected"])
         # viewer_3d = PointCloudImageViewer()
         # viewer_pc = PointCloudViewer()
 
         N = len(images_array)
-        i_pcd = 0
-        for i in tqdm(range(0,N,2)): # We take 1 pcd for each 2 images (LiDAR: 10Hz, Camera: 20Hz)
+        for i in tqdm(range(N)): 
 
             image = images_array[i]
             w, h = image.shape[1], image.shape[0]
@@ -413,16 +432,26 @@ def main(cfg):
             # Undistort image
             K, D = calib_cam2cam()
 
+            # h, w = img.shape[:2]
+            # new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
+
+            # # Compute undistort maps
+            # map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_16SC2)
+
+            # # Apply remap
+            # undistorted_img = cv2.remap(img, map1, map2, interpolation=cv2.INTER_LINEAR)
+
                 # Get optimal new camera matrix
-            # new_K, roi = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
+            new_K, roi = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
 
-            #     # Undistort using new_K
-            # undistorted_img = cv2.undistort(image, K, D, None, new_K)
-            # x, y, w, h = roi
-            # cropped_img = undistorted_img[y:y+h, x:x+w]
+                # Undistort using new_K
+            undistorted_img = cv2.undistort(image, K, D, None, new_K)
+            x, y, w, h = roi
+            cropped_img = undistorted_img[y:y+h, x:x+w]
 
             # Resize image to model dimensions
-            resized = cv2.resize(image, (image_width, image_height), interpolation=cv2.INTER_AREA)
+            # resized = cv2.resize(image, (image_width, image_height), interpolation=cv2.INTER_AREA)
+            resized = cv2.resize(undistorted_img, (image_width, image_height), interpolation=cv2.INTER_AREA)
             image = resized
 
             # Correct intrinsics
@@ -432,29 +461,30 @@ def main(cfg):
             sx = image_width / CALIB_W
             sy = image_height / CALIB_H
 
-            # new_K[0,0] *= sx 
-            # new_K[1,1] *= sy 
-            # new_K[0,2] *= sx 
-            # new_K[1,2] *= sy 
+            new_K[0,0] *= sx 
+            new_K[1,1] *= sy 
+            new_K[0,2] *= sx 
+            new_K[1,2] *= sy 
 
-            K[0,0] *= sx 
-            K[1,1] *= sy 
-            K[0,2] *= sx 
-            K[1,2] *= sy 
+            # K[0,0] *= sx 
+            # K[1,1] *= sy 
+            # K[0,2] *= sx 
+            # K[1,2] *= sy 
 
             # Get pointcloud
-            pcd = pcd_array[i_pcd]
-            i_pcd += 1
+            pcd = pcd_array[pcd_ids[i]]
 
             # Project 3D points into 2D (in pixels)
+            # ans, c_, lidar = velo3d_2_camera2d_points(pcd, v_fov=(-24.9, 2.0), h_fov=(-45, 45),
+            #                                           K=K, D=D, image_shape=image.shape)
             ans, c_, lidar = velo3d_2_camera2d_points(pcd, v_fov=(-24.9, 2.0), h_fov=(-45, 45),
-                                                      K=K, D=D, image_shape=image.shape)
+                                                      K=new_K, D=None, image_shape=image.shape)
 
             image_vis = print_projection_plt(points=ans, color=c_, image=image.copy())
 
             # Depth completion
-            # K_cam = torch.from_numpy(new_K.astype(np.float32)).cuda()
-            K_cam = torch.from_numpy(K.astype(np.float32)).cuda()
+            K_cam = torch.from_numpy(new_K.astype(np.float32)).cuda()
+            # K_cam = torch.from_numpy(K.astype(np.float32)).cuda()
             
             lidar = lidar[:, :, None]
 
@@ -498,8 +528,10 @@ def main(cfg):
 
             # viewer_pc.visualize_for_seconds(points_3d, 5, colors)
 
-            # viewer.update([output_color, image.astype(np.uint8)[:, :, ::-1], lidar.astype(np.uint8) * 3, image_vis],
-            #                duration=0.1)  # Display for x seconds
+            lidar = depth_color(lidar, 0, 35)
+            lidar = cv2.applyColorMap(lidar.astype(np.uint8)*3, cv2.COLORMAP_TURBO)
+            viewer.update([output_color, image.astype(np.uint8)[:, :, ::-1], lidar, image_vis],
+                           duration=0.1)  # Display for x seconds
 
             # (Optional) Save outputs
             # cv2.imwrite(f'outputs/0000000{i:03d}_depth.png', output_color)

From 5ba677d1e4b8038d3c2f72e1d6488361a267230d Mon Sep 17 00:00:00 2001
From: Fetty <fetty3113@gmail.com>
Date: Fri, 11 Jul 2025 13:09:53 +0000
Subject: [PATCH 4/4] Small fixes for better documentation of docker

---
 README.md         | 16 +++++++++++++++-
 demo.py           |  1 -
 demo_docker.py    | 10 +++++-----
 demo_ona.py       | 40 ++++++++++++++++++++++++++++++++++------
 docker/Dockerfile |  2 ++
 5 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 26f3f02..41711ba 100644
--- a/README.md
+++ b/README.md
@@ -46,6 +46,13 @@ chmod +x build run
 ./build # build docker image
 ./run   # run docker container
 ```
+
+_Note that inside the `run` script the directory on your PC with the KITTI data is shared with the docker container via:_
+```sh
+--volume="$HOME/Desktop/kitti_dataset:/home/kitti_dataset" 
+```
+_Change this line (80) inside the [run](docker/run) script in order to match with your KITTI dataset parent folder._
+
 __Once inside docker, set up environment:__
 ```sh
 ## 🐳 Inside the Docker container
@@ -92,7 +99,14 @@ Open `demo.py` and go to **line 338**, where you can modify the input sequence p
 
 ```python
 # demo.py (Line 338)
-sequence = "/path/to/your/kitti/sequence"
+base = "/your/path/to/kitti/sequence"
+```
+
+_(Or if you are using docker):_
+```python
+# demo_docker.py (Line 418)
+global_path = "/your/path/to/kitti/"
+base = global_path + "/sequence"
 ```
 
 Download pre-trained weights:
diff --git a/demo.py b/demo.py
index a9e5fbb..9443cde 100644
--- a/demo.py
+++ b/demo.py
@@ -335,7 +335,6 @@ def main(cfg):
         net = run.net_ema.module.cuda()
         net.eval()
         # 读取左目图像
-        # base = "datas/kitti/raw/2011_09_26/2011_09_26_drive_0002_sync"
         base = "datas/kitti/raw/2011_09_26/2011_09_26_drive_0048_sync"
 
         image_type = 'color'  # 'grayscale' or 'color' image
diff --git a/demo_docker.py b/demo_docker.py
index 51e4a2d..6abd560 100644
--- a/demo_docker.py
+++ b/demo_docker.py
@@ -67,7 +67,7 @@ def depth_to_point_cloud(depth, K_cam):
     z = depth[0][0]
     x = (j - K_cam[0, 2]) * z / K_cam[0, 0]
     y = (i - K_cam[1, 2]) * z / K_cam[1, 1]
-    y = -y
+    # y = -y
 
     points = torch.stack((x, y, z), dim=2).reshape(-1, 3)
     return points.detach().cpu().numpy()
@@ -101,6 +101,7 @@ def depth_to_colored_point_cloud(depth, K_cam, bgr_image):
     x = (j - K_cam[0, 2]) * z / K_cam[0, 0]
     y = (i - K_cam[1, 2]) * z / K_cam[1, 1]
     y = -y  # flip if needed
+    x = -x  # flip if needed
 
     points = torch.stack((x, y, z), dim=2).reshape(-1, 3).detach().cpu().numpy()
 
@@ -416,7 +417,6 @@ def main(cfg):
         net.eval()
         # 读取左目图像
         global_path = "/home/kitti_dataset/"
-        # base = "datas/kitti/raw/2011_09_26/2011_09_26_drive_0002_sync"
         base = global_path + "2011_09_26/2011_09_26_drive_0005_sync"
 
         image_type = 'color'  # 'grayscale' or 'color' image
@@ -434,7 +434,7 @@ def main(cfg):
         with open(t_file, 'r') as f:
             N = sum(1 for _ in f)
 
-        # viewer = ImageGridViewer(titles=["Depth", "Raw", "Lidar", "Projected"])
+        viewer = ImageGridViewer(titles=["Depth", "Raw", "Lidar", "Projected"])
         # viewer_3d = PointCloudImageViewer()
         # viewer_pc = PointCloudViewer()
 
@@ -508,8 +508,8 @@ def main(cfg):
 
             # viewer_pc.visualize_for_seconds(points_3d, 2, colors)
 
-            # viewer.update([output_color, image.astype(np.uint8)[:, :, ::-1], lidar.astype(np.uint8) * 3, image_vis],
-            #                duration=0.5)  # Display for x seconds
+            viewer.update([output_color, image.astype(np.uint8)[:, :, ::-1], lidar.astype(np.uint8) * 3, image_vis],
+                           duration=0.5)  # Display for x seconds
 
             cv2.imwrite(f'outputs/0000000{i:03d}_depth.png', output_color)
             
diff --git a/demo_ona.py b/demo_ona.py
index 83c335a..e48be4c 100644
--- a/demo_ona.py
+++ b/demo_ona.py
@@ -64,6 +64,7 @@ def depth_to_colored_point_cloud(depth, K_cam, bgr_image):
     x = (j - K_cam[0, 2]) * z / K_cam[0, 0]
     y = (i - K_cam[1, 2]) * z / K_cam[1, 1]
     y = -y  # flip if needed
+    x = -x  # flip if needed
 
     points = torch.stack((x, y, z), dim=2).reshape(-1, 3).detach().cpu().numpy()
 
@@ -279,9 +280,22 @@ def pointcloud2_to_xyz_array(cloud_msg):
 
     return np.array(points, dtype=np.float32)
 
-def load_image_from_rosbag(bag_path, stamps, N=-1):
+def load_image_from_rosbag(bag_path, topic, stamps, N=-1):
+    """
+    Loads each rgb image from a rosbag file taking also into account a simple synchronization
+    strategy w.r.t to the pcd timestamps.
+
+    Args:
+        bag_path (str): path to the rosbag file.
+        topic (str): the topic with image data (sensor_msgs/ImageRaw) 
+        stamps (list(float)): time stamp of each pointcloud/LiDAR frame.
+        N (int): optional max number of images retrieved
+
+    Returns:
+        images (list(np.ndarray)): list of BGR images.
+        pcd_ids (list(int)): index of the closest (in time) pointclouds for the chosen images.
+    """
     typestore = get_typestore(Stores.ROS1_NOETIC)
-    topic = "/ona2/sensors/flir_camera_front/image_raw"
     images = []
     pcd_ids = []
 
@@ -331,12 +345,23 @@ def load_image_from_rosbag(bag_path, stamps, N=-1):
     return images, pcd_ids
 
 
-def load_pcd_from_rosbag(bag_path, N=-1):
+def load_pcd_from_rosbag(bag_path, topic, N=-1):
+    """
+    Loads each pcd from a rosbag file.
+
+    Args:
+        bag_path (str): path to the rosbag file.
+        topic (str): the topic with pointcloud data (sensor_msgs/PointCloud2) 
+        N (int): optional max number of pcd retrieved
+
+    Returns:
+        pcds (list(np.ndarray)): list of pointclouds (pcd).
+        stamps (list(float)): time stamp of each pointcloud/LiDAR frame.
+    """
     # Create a typestore for the matching ROS release.
     typestore = get_typestore(Stores.ROS1_NOETIC)
 
     # Topic to filter
-    topic = "/ona2/sensors/pandar_front/cloud"
     pcds = []
     stamps = []
 
@@ -406,16 +431,19 @@ def main(cfg):
         image_bag_name = "2024-11-07-11-24-29_3.bag"
         pcd_bag_name = "2024-11-07-11-24-14_1.bag"
 
+        topic_lidar = "/ona2/sensors/pandar_front/cloud"
+        topic_camera = "/ona2/sensors/flir_camera_front/image_raw"
+
         image_mean = np.array([90.9950, 96.2278, 94.3213])
         image_std = np.array([79.2382, 80.5267, 82.1483])
         image_height = 352
         image_width = 1216
 
         # Get LiDAR data from rosbag
-        pcd_array, stamps = load_pcd_from_rosbag(global_path + "robot/" + pcd_bag_name)
+        pcd_array, stamps = load_pcd_from_rosbag(global_path + "robot/" + pcd_bag_name, topic_lidar)
 
         # Get image data from rosbag
-        images_array, pcd_ids = load_image_from_rosbag(global_path + "camera/" + image_bag_name, stamps)
+        images_array, pcd_ids = load_image_from_rosbag(global_path + "camera/" + image_bag_name, topic_camera, stamps)
 
         print(f"Loaded {len(images_array)} images and {len(pcd_array)} pcds.")
 
diff --git a/docker/Dockerfile b/docker/Dockerfile
index f055d49..83809ed 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,6 +1,8 @@
 # Base image with CUDA and PyTorch support
 FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
 
+ARG DEBIAN_FRONTEND=noninteractive
+
 # System setup
 RUN apt-get update && apt-get install -y \
     python3.10 python3-pip python3-dev git build-essential cmake \