From adfceea78546436a639dceb45516fd7be8e75845 Mon Sep 17 00:00:00 2001
From: Benjamin Wilson <benjaminrwilson@users.noreply.github.com>
Date: Mon, 15 May 2023 00:01:56 -0700
Subject: [PATCH 1/2] Add additional python bindings.

---
 rust/src/data_loader.rs                    |  16 +--
 rust/src/geometry/camera/pinhole_camera.rs | 143 ++++++++++++++-------
 rust/src/geometry/se3.rs                   |   3 +
 rust/src/structures/timestamped_image.rs   |  18 ++-
 4 files changed, 115 insertions(+), 65 deletions(-)
diff --git a/rust/src/data_loader.rs b/rust/src/data_loader.rs
index 31d2fdea..ef20b076 100644
--- a/rust/src/data_loader.rs
+++ b/rust/src/data_loader.rs
@@ -8,10 +8,6 @@ use image::ImageBuffer;
 use image::Rgba;
 use io::{read_accumulate_lidar, read_timestamped_feather};
 use itertools::Itertools;
-use ndarray::Ix3;
-use nshare::ToNdarray3;
-use numpy::IntoPyArray;
-use numpy::PyArray;
 use pyo3::prelude::*;
 use pyo3_polars::PyDataFrame;
 use rayon::prelude::IntoParallelRefIterator;
@@ -185,16 +181,8 @@ impl DataLoader {
 
     /// Get all synchronized images at the sweep index.
     #[pyo3(name = "get_synchronized_images")]
-    pub fn py_get_synchronized_images<'py>(
-        &self,
-        py: Python<'py>,
-        index: usize,
-    ) -> Vec<&'py PyArray<u8, Ix3>> {
-        let images = self.get_synchronized_images(index);
-        images
-            .into_iter()
-            .map(|x| x.image.into_ndarray3().into_pyarray(py))
-            .collect_vec()
+    pub fn py_get_synchronized_images(&self, index: usize) -> Vec<TimeStampedImage> {
+        self.get_synchronized_images(index)
     }
 
     fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
diff --git a/rust/src/geometry/camera/pinhole_camera.rs b/rust/src/geometry/camera/pinhole_camera.rs
index b47b3ed8..3203fb16 100644
--- a/rust/src/geometry/camera/pinhole_camera.rs
+++ b/rust/src/geometry/camera/pinhole_camera.rs
@@ -1,6 +1,7 @@
 use std::{ops::DivAssign, path::Path};
 
 use ndarray::{par_azip, s, Array, ArrayView, Ix1, Ix2};
+use numpy::{PyArray, PyReadonlyArray, ToPyArray};
 use polars::{
     lazy::dsl::{col, lit},
     prelude::{DataFrame, IntoLazy},
@@ -10,24 +11,33 @@ use crate::{
     geometry::se3::SE3, geometry::so3::_quat_to_mat3, geometry::utils::cart_to_hom,
     io::read_feather_eager,
 };
+use pyo3::prelude::*;
 
 /// Pinhole camera intrinsics.
+#[pyclass]
 #[derive(Clone, Debug)]
 pub struct Intrinsics {
     /// Horizontal focal length in pixels.
+    #[pyo3(get, set)]
     pub fx_px: f32,
     /// Vertical focal length in pixels.
+    #[pyo3(get, set)]
     pub fy_px: f32,
     /// Horizontal focal center in pixels.
+    #[pyo3(get, set)]
     pub cx_px: f32,
     /// Vertical focal center in pixels.
+    #[pyo3(get, set)]
     pub cy_px: f32,
     /// Width of image in pixels.
+    #[pyo3(get, set)]
     pub width_px: usize,
     /// Height of image in pixels.
+    #[pyo3(get, set)]
     pub height_px: usize,
 }
 
+#[pymethods]
 impl Intrinsics {
     /// Construct a new `Intrinsics` instance.
     pub fn new(
@@ -48,7 +58,10 @@ impl Intrinsics {
             height_px,
         }
     }
+}
 
+/// Rust methods.
+impl Intrinsics {
     /// Camera intrinsic matrix.
     pub fn k(&self) -> Array<f32, Ix2> {
         let mut k = Array::<f32, Ix2>::eye(3);
@@ -61,16 +74,21 @@ impl Intrinsics {
 }
 
 /// Parameterizes a pinhole camera with zero skew.
+#[pyclass]
 #[derive(Clone, Debug)]
 pub struct PinholeCamera {
     /// Pose of camera in the egovehicle frame (inverse of extrinsics matrix).
+    #[pyo3(get, set)]
     pub ego_se3_cam: SE3,
     /// `Intrinsics` object containing intrinsic parameters and image dimensions.
+    #[pyo3(get, set)]
     pub intrinsics: Intrinsics,
     /// Associated camera name.
+    #[pyo3(get, set)]
     pub camera_name: String,
 }
 
+#[pymethods]
 impl PinholeCamera {
     /// Return the width of the image in pixels.
     pub fn width_px(&self) -> usize {
@@ -82,6 +100,31 @@ impl PinholeCamera {
         self.intrinsics.height_px
     }
 
+    /// Project a collection of 3D points (provided in the egovehicle frame) to the image plane.
+    #[pyo3(name = "project_ego_to_image_motion_compensated")]
+    #[allow(clippy::type_complexity)]
+    pub fn py_project_ego_to_image_motion_compensated<'py>(
+        &'py self,
+        py: Python<'py>,
+        points_ego: PyReadonlyArray<f32, Ix2>,
+        city_se3_ego_camera_t: SE3,
+        city_se3_ego_lidar_t: SE3,
+    ) -> (&PyArray<f32, Ix2>, &PyArray<f32, Ix2>, &PyArray<bool, Ix2>) {
+        let (uvz, points_hom_cam, is_valid) = self.project_ego_to_image_motion_compensated(
+            points_ego.as_array().view().to_owned(),
+            city_se3_ego_camera_t,
+            city_se3_ego_lidar_t,
+        );
+        (
+            uvz.to_pyarray(py),
+            points_hom_cam.to_pyarray(py),
+            is_valid.to_pyarray(py),
+        )
+    }
+}
+
+/// Rust methods.
+impl PinholeCamera {
     /// Return the camera extrinsics.
     pub fn extrinsics(&self) -> Array<f32, Ix2> {
         self.ego_se3_cam.inverse().transform_matrix()
@@ -94,55 +137,6 @@ impl PinholeCamera {
         p
     }
 
-    /// Create a pinhole camera model from a feather file.
-    pub fn from_feather(log_dir: &Path, camera_name: &str) -> PinholeCamera {
-        let intrinsics_path = log_dir.join("calibration/intrinsics.feather");
-        let intrinsics = read_feather_eager(&intrinsics_path, false)
-            .lazy()
-            .filter(col("sensor_name").eq(lit(camera_name)))
-            .collect()
-            .unwrap();
-
-        let intrinsics = Intrinsics {
-            fx_px: extract_f32_from_frame(&intrinsics, "fx_px"),
-            fy_px: extract_f32_from_frame(&intrinsics, "fy_px"),
-            cx_px: extract_f32_from_frame(&intrinsics, "cx_px"),
-            cy_px: extract_f32_from_frame(&intrinsics, "cy_px"),
-            width_px: extract_usize_from_frame(&intrinsics, "width_px"),
-            height_px: extract_usize_from_frame(&intrinsics, "height_px"),
-        };
-
-        let extrinsics_path = log_dir.join("calibration/egovehicle_SE3_sensor.feather");
-        let extrinsics = read_feather_eager(&extrinsics_path, false)
-            .lazy()
-            .filter(col("sensor_name").eq(lit(camera_name)))
-            .collect()
-            .unwrap();
-
-        let qw = extract_f32_from_frame(&extrinsics, "qw");
-        let qx = extract_f32_from_frame(&extrinsics, "qx");
-        let qy = extract_f32_from_frame(&extrinsics, "qy");
-        let qz = extract_f32_from_frame(&extrinsics, "qz");
-        let tx_m = extract_f32_from_frame(&extrinsics, "tx_m");
-        let ty_m = extract_f32_from_frame(&extrinsics, "ty_m");
-        let tz_m = extract_f32_from_frame(&extrinsics, "tz_m");
-
-        let quat_wxyz = Array::<f32, Ix1>::from_vec(vec![qw, qx, qy, qz]);
-        let rotation = _quat_to_mat3(&quat_wxyz.view());
-        let translation = Array::<f32, Ix1>::from_vec(vec![tx_m, ty_m, tz_m]);
-        let ego_se3_cam = SE3 {
-            rotation,
-            translation,
-        };
-
-        let camera_name_string = camera_name.to_string();
-        Self {
-            ego_se3_cam,
-            intrinsics,
-            camera_name: camera_name_string,
-        }
-    }
-
     /// Cull 3D points to camera view frustum.
     ///
     /// Ref: https://en.wikipedia.org/wiki/Hidden-surface_determination#Viewing-frustum_culling
@@ -154,7 +148,7 @@ impl PinholeCamera {
     /// in the range [0,height_px), and a positive z-coordinate (lying in
     /// front of the camera frustum).
     pub fn cull_to_view_frustum(
-        self,
+        &self,
         uv: &ArrayView<f32, Ix2>,
         points_camera: &ArrayView<f32, Ix2>,
     ) -> Array<bool, Ix2> {
@@ -206,6 +200,55 @@ impl PinholeCamera {
         let points_ego = ego_cam_t_se3_ego_lidar_t.transform_from(&points_ego.view());
         self.project_ego_to_image(points_ego)
     }
+
+    /// Create a pinhole camera model from a feather file.
+    pub fn from_feather(log_dir: &Path, camera_name: &str) -> PinholeCamera {
+        let intrinsics_path = log_dir.join("calibration/intrinsics.feather");
+        let intrinsics = read_feather_eager(&intrinsics_path, false)
+            .lazy()
+            .filter(col("sensor_name").eq(lit(camera_name)))
+            .collect()
+            .unwrap();
+
+        let intrinsics = Intrinsics {
+            fx_px: extract_f32_from_frame(&intrinsics, "fx_px"),
+            fy_px: extract_f32_from_frame(&intrinsics, "fy_px"),
+            cx_px: extract_f32_from_frame(&intrinsics, "cx_px"),
+            cy_px: extract_f32_from_frame(&intrinsics, "cy_px"),
+            width_px: extract_usize_from_frame(&intrinsics, "width_px"),
+            height_px: extract_usize_from_frame(&intrinsics, "height_px"),
+        };
+
+        let extrinsics_path = log_dir.join("calibration/egovehicle_SE3_sensor.feather");
+        let extrinsics = read_feather_eager(&extrinsics_path, false)
+            .lazy()
+            .filter(col("sensor_name").eq(lit(camera_name)))
+            .collect()
+            .unwrap();
+
+        let qw = extract_f32_from_frame(&extrinsics, "qw");
+        let qx = extract_f32_from_frame(&extrinsics, "qx");
+        let qy = extract_f32_from_frame(&extrinsics, "qy");
+        let qz = extract_f32_from_frame(&extrinsics, "qz");
+        let tx_m = extract_f32_from_frame(&extrinsics, "tx_m");
+        let ty_m = extract_f32_from_frame(&extrinsics, "ty_m");
+        let tz_m = extract_f32_from_frame(&extrinsics, "tz_m");
+
+        let quat_wxyz = Array::<f32, Ix1>::from_vec(vec![qw, qx, qy, qz]);
+        let rotation = _quat_to_mat3(&quat_wxyz.view());
+        let translation = Array::<f32, Ix1>::from_vec(vec![tx_m, ty_m, tz_m]);
+        let ego_se3_cam = SE3 {
+            rotation,
+            translation,
+        };
+
+        let camera_name_string = camera_name.to_string();
+        Self {
+            ego_se3_cam,
+            intrinsics,
+            camera_name: camera_name_string,
+        }
+    }
 }
 
 fn extract_f32_from_frame(series: &DataFrame, column: &str) -> f32 {
diff --git a/rust/src/geometry/se3.rs b/rust/src/geometry/se3.rs
index 9fc95d54..7e0c9a9a 100644
--- a/rust/src/geometry/se3.rs
+++ b/rust/src/geometry/se3.rs
@@ -4,9 +4,11 @@
 
 use ndarray::ArrayView2;
 use ndarray::{s, Array1, Array2};
+use pyo3::prelude::*;
 
 /// Special Euclidean Group 3 (SE(3)).
 /// Rigid transformation parameterized by a rotation and translation in $R^3$.
+#[pyclass]
 #[derive(Clone, Debug)]
 pub struct SE3 {
     /// (3,3) Orthonormal rotation matrix.
@@ -15,6 +17,7 @@ pub struct SE3 {
     pub translation: Array1<f32>,
 }
 
+/// Rust methods.
 impl SE3 {
     /// Get the (4,4) homogeneous transformation matrix associated with the rigid transformation.
     pub fn transform_matrix(&self) -> Array2<f32> {
diff --git a/rust/src/structures/timestamped_image.rs b/rust/src/structures/timestamped_image.rs
index 34adb751..f0df5dc9 100644
--- a/rust/src/structures/timestamped_image.rs
+++ b/rust/src/structures/timestamped_image.rs
@@ -1,14 +1,30 @@
 use image::{ImageBuffer, Rgba};
+use ndarray::Ix3;
+use nshare::RefNdarray3;
+use numpy::{PyArray, ToPyArray};
 
 use crate::geometry::camera::pinhole_camera::PinholeCamera;
+use pyo3::prelude::*;
 
 /// Image modeled by `camera_model` occuring at `timestamp_ns`.
-#[derive(Clone)]
+// #[pyclass]
+#[derive(Clone, Debug)]
+#[pyclass]
 pub struct TimeStampedImage {
     /// RGBA u8 image buffer.
     pub image: ImageBuffer<Rgba<u8>, Vec<u8>>,
     /// Pinhole camera model with intrinsics and extrinsics.
+    #[pyo3(get, set)]
     pub camera_model: PinholeCamera,
     /// Nanosecond timestamp.
+    #[pyo3(get, set)]
     pub timestamp_ns: usize,
 }
+
+#[pymethods]
+impl TimeStampedImage {
+    #[getter]
+    fn image<'py>(&self, py: Python<'py>) -> &'py PyArray<u8, Ix3> {
+        self.image.ref_ndarray3().to_pyarray(py)
+    }
+}

From 801e60e33f9f85a53f42aa4941109bca6d3af037 Mon Sep 17 00:00:00 2001
From: Benjamin Wilson <benjaminrwilson@users.noreply.github.com>
Date: Mon, 15 May 2023 15:54:08 -0700
Subject: [PATCH 2/2] Add pinhole camera conversions.

---
 rust/src/geometry/camera/pinhole_camera.rs    | 14 ++++++++-
 src/av2/_r.pyi                                | 20 ++++++++++--
 src/av2/structures/timestamped_image.py       |  1 -
 src/av2/torch/data_loaders/detection.py       | 31 ++++++++++++++++---
 .../torch/structures/time_stamped_image.py    | 21 +++++++++++++
 tutorials/3d_object_detection.py              |  2 +-
 6 files changed, 79 insertions(+), 10 deletions(-)
 create mode 100644 src/av2/torch/structures/time_stamped_image.py

diff --git a/rust/src/geometry/camera/pinhole_camera.rs b/rust/src/geometry/camera/pinhole_camera.rs
index 3203fb16..05ebab81 100644
--- a/rust/src/geometry/camera/pinhole_camera.rs
+++ b/rust/src/geometry/camera/pinhole_camera.rs
@@ -58,13 +58,19 @@ impl Intrinsics {
             height_px,
         }
     }
+
+    #[getter]
+    #[pyo3(name = "k")]
+    fn py_k<'py>(&'py self, py: Python<'py>) -> &'py PyArray<f32, Ix2> {
+        self.k().to_pyarray(py)
+    }
 }
 
 /// Rust methods.
 impl Intrinsics {
     /// Camera intrinsic matrix.
     pub fn k(&self) -> Array<f32, Ix2> {
-        let mut k = Array::<f32, Ix2>::eye(3);
+        let mut k = Array::<f32, Ix2>::eye(4);
         k[[0, 0]] = self.fx_px;
         k[[1, 1]] = self.fy_px;
         k[[0, 2]] = self.cx_px;
@@ -121,6 +127,12 @@ impl PinholeCamera {
             is_valid.to_pyarray(py),
         )
     }
+
+    #[getter]
+    #[pyo3(name = "extrinsics")]
+    fn py_extrinsics<'py>(&'py self, py: Python<'py>) -> &'py PyArray<f32, Ix2> {
+        self.ego_se3_cam.inverse().transform_matrix().to_pyarray(py)
+    }
 }
 
 /// Rust methods.
diff --git a/src/av2/_r.pyi b/src/av2/_r.pyi
index 2000aba6..1ea5f5a9 100644
--- a/src/av2/_r.pyi
+++ b/src/av2/_r.pyi
@@ -4,7 +4,8 @@ from dataclasses import dataclass, field
 from typing import List, Optional, Tuple
 
 import polars as pl
-import torch
+
+from av2.utils.typing import NDArrayByte
 
 @dataclass
 class DataLoader:
@@ -18,7 +19,7 @@ class DataLoader:
     file_index: pl.DataFrame = field(init=False)
 
     def get(self, index: int) -> Sweep: ...
-    def get_synchronized_images(self, index: int) -> List[torch.Tensor]: ...
+    def get_synchronized_images(self, index: int) -> List[TimeStampedImage]: ...
     def __len__(self) -> int: ...
 
 @dataclass
@@ -27,3 +28,18 @@ class Sweep:
     lidar: pl.DataFrame
     sweep_uuid: Tuple[str, int]
     cuboids: Optional[pl.DataFrame]
+
+@dataclass
+class PinholeCamera:
+    fx_px: float
+    fy_px: float
+    cx_px: float
+    cy_px: float
+    width_px: float
+    height_px: float
+
+@dataclass
+class TimeStampedImage:
+    image: NDArrayByte
+    camera_model: PinholeCamera
+    timestamp_ns: int
diff --git a/src/av2/structures/timestamped_image.py b/src/av2/structures/timestamped_image.py
index 41cb1c51..d09eb996 100644
--- a/src/av2/structures/timestamped_image.py
+++ b/src/av2/structures/timestamped_image.py
@@ -16,7 +16,6 @@ class TimestampedImage:
         img: (H,W,C) image.
         camera_model: Pinhole camera model with intrinsics and extrinsics.
         timestamp_ns: Nanosecond timestamp.
-
     """
 
     img: NDArrayByte
diff --git a/src/av2/torch/data_loaders/detection.py b/src/av2/torch/data_loaders/detection.py
index 290a56c2..e78f65f7 100644
--- a/src/av2/torch/data_loaders/detection.py
+++ b/src/av2/torch/data_loaders/detection.py
@@ -4,12 +4,14 @@
 
 import logging
 from dataclasses import dataclass, field
-from typing import List
+from typing import List, Tuple
 
 import torch
+from kornia.geometry.camera.pinhole import PinholeCamera, PinholeCamerasList
 from torch.utils.data import Dataset
 
 import av2._r as rust
+from av2.torch.structures.time_stamped_image import TimeStampedImage
 from av2.utils.typing import PathType
 
 from ..structures.sweep import Sweep
@@ -79,7 +81,26 @@ def __next__(self) -> Sweep:
         self._current_sweep_index += 1
         return datum
 
-    def get_synchronized_images(self, sweep_index: int) -> List[torch.Tensor]:
-        """Get the synchronized ring images associated with the sweep index.."""
-        synchronized_images_list = self._backend.get_synchronized_images(sweep_index)
-        return [torch.as_tensor(x) for x in synchronized_images_list]
+    def get_synchronized_images(self, sweep_index: int) -> List[TimeStampedImage]:
+        """Get the synchronized ring images associated with the sweep index."""
+        time_stamped_images = self._backend.get_synchronized_images(sweep_index)
+        time_stamped_image_list = []
+        for ts_image in time_stamped_images:
+            image = torch.as_tensor(ts_image.image)
+            camera_model_rs = ts_image.camera_model
+
+            intrinsics = torch.as_tensor(camera_model_rs.intrinsics.k)
+            extrinsics = torch.as_tensor(camera_model_rs.extrinsics)
+
+            height = torch.as_tensor([image.shape[1]])
+            width = torch.as_tensor([image.shape[2]])
+            camera_model = PinholeCamera(
+                intrinsics=intrinsics, extrinsics=extrinsics, height=height, width=width
+            )
+
+            timestamp_ns = ts_image.timestamp_ns
+            time_stamped_image = TimeStampedImage(
+                image=image, camera_model=camera_model, timestamp_ns=timestamp_ns
+            )
+            time_stamped_image_list.append(time_stamped_image)
+        return time_stamped_image_list
diff --git a/src/av2/torch/structures/time_stamped_image.py b/src/av2/torch/structures/time_stamped_image.py
new file mode 100644
index 00000000..883be135
--- /dev/null
+++ b/src/av2/torch/structures/time_stamped_image.py
@@ -0,0 +1,21 @@
+"""Timestamped image class with camera model for synchronization."""
+
+from dataclasses import dataclass
+
+import torch
+from kornia.geometry.camera import PinholeCamera
+
+
+@dataclass
+class TimeStampedImage:
+    """Timestamped image with an accompanying camera model.
+
+    Args:
+        image: (H,W,C) image.
+        camera_model: Pinhole camera model with intrinsics and extrinsics.
+        timestamp_ns: Nanosecond timestamp.
+    """
+
+    image: torch.Tensor
+    camera_model: PinholeCamera
+    timestamp_ns: int
diff --git a/tutorials/3d_object_detection.py b/tutorials/3d_object_detection.py
index ff6d9cc5..05ae3cfd 100644
--- a/tutorials/3d_object_detection.py
+++ b/tutorials/3d_object_detection.py
@@ -48,7 +48,7 @@ def main(
         lidar_tensor = sweep.lidar.as_tensor()
 
         # Synchronized ring cameras.
-        synchronized_images = data_loader.get_synchronized_images(i)
+        time_stamped_image_list = data_loader.get_synchronized_images(i)
 
         # Transform the points to city coordinates.
         lidar_xyz_city = transform_points(city_SE3_ego_mat4, lidar_tensor[:, :3])