From adfceea78546436a639dceb45516fd7be8e75845 Mon Sep 17 00:00:00 2001 From: Benjamin Wilson Date: Mon, 15 May 2023 00:01:56 -0700 Subject: [PATCH 1/2] Add additional python bindings. --- rust/src/data_loader.rs | 16 +-- rust/src/geometry/camera/pinhole_camera.rs | 143 ++++++++++++++------- rust/src/geometry/se3.rs | 3 + rust/src/structures/timestamped_image.rs | 18 ++- 4 files changed, 115 insertions(+), 65 deletions(-) diff --git a/rust/src/data_loader.rs b/rust/src/data_loader.rs index 31d2fdea..ef20b076 100644 --- a/rust/src/data_loader.rs +++ b/rust/src/data_loader.rs @@ -8,10 +8,6 @@ use image::ImageBuffer; use image::Rgba; use io::{read_accumulate_lidar, read_timestamped_feather}; use itertools::Itertools; -use ndarray::Ix3; -use nshare::ToNdarray3; -use numpy::IntoPyArray; -use numpy::PyArray; use pyo3::prelude::*; use pyo3_polars::PyDataFrame; use rayon::prelude::IntoParallelRefIterator; @@ -185,16 +181,8 @@ impl DataLoader { /// Get all synchronized images at the sweep index. #[pyo3(name = "get_synchronized_images")] - pub fn py_get_synchronized_images<'py>( - &self, - py: Python<'py>, - index: usize, - ) -> Vec<&'py PyArray> { - let images = self.get_synchronized_images(index); - images - .into_iter() - .map(|x| x.image.into_ndarray3().into_pyarray(py)) - .collect_vec() + pub fn py_get_synchronized_images(&self, index: usize) -> Vec { + self.get_synchronized_images(index) } fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { diff --git a/rust/src/geometry/camera/pinhole_camera.rs b/rust/src/geometry/camera/pinhole_camera.rs index b47b3ed8..3203fb16 100644 --- a/rust/src/geometry/camera/pinhole_camera.rs +++ b/rust/src/geometry/camera/pinhole_camera.rs @@ -1,6 +1,7 @@ use std::{ops::DivAssign, path::Path}; use ndarray::{par_azip, s, Array, ArrayView, Ix1, Ix2}; +use numpy::{PyArray, PyReadonlyArray, ToPyArray}; use polars::{ lazy::dsl::{col, lit}, prelude::{DataFrame, IntoLazy}, @@ -10,24 +11,33 @@ use crate::{ geometry::se3::SE3, geometry::so3::_quat_to_mat3, geometry::utils::cart_to_hom, io::read_feather_eager, }; +use pyo3::prelude::*; /// Pinhole camera intrinsics. +#[pyclass] #[derive(Clone, Debug)] pub struct Intrinsics { /// Horizontal focal length in pixels. + #[pyo3(get, set)] pub fx_px: f32, /// Vertical focal length in pixels. + #[pyo3(get, set)] pub fy_px: f32, /// Horizontal focal center in pixels. + #[pyo3(get, set)] pub cx_px: f32, /// Vertical focal center in pixels. + #[pyo3(get, set)] pub cy_px: f32, /// Width of image in pixels. + #[pyo3(get, set)] pub width_px: usize, /// Height of image in pixels. + #[pyo3(get, set)] pub height_px: usize, } +#[pymethods] impl Intrinsics { /// Construct a new `Intrinsics` instance. pub fn new( @@ -48,7 +58,10 @@ impl Intrinsics { height_px, } } +} +/// Rust methods. +impl Intrinsics { /// Camera intrinsic matrix. pub fn k(&self) -> Array { let mut k = Array::::eye(3); @@ -61,16 +74,21 @@ impl Intrinsics { } /// Parameterizes a pinhole camera with zero skew. +#[pyclass] #[derive(Clone, Debug)] pub struct PinholeCamera { /// Pose of camera in the egovehicle frame (inverse of extrinsics matrix). + #[pyo3(get, set)] pub ego_se3_cam: SE3, /// `Intrinsics` object containing intrinsic parameters and image dimensions. + #[pyo3(get, set)] pub intrinsics: Intrinsics, /// Associated camera name. + #[pyo3(get, set)] pub camera_name: String, } +#[pymethods] impl PinholeCamera { /// Return the width of the image in pixels. pub fn width_px(&self) -> usize { @@ -82,6 +100,31 @@ impl PinholeCamera { self.intrinsics.height_px } + /// Project a collection of 3D points (provided in the egovehicle frame) to the image plane. + #[pyo3(name = "project_ego_to_image_motion_compensated")] + #[allow(clippy::type_complexity)] + pub fn py_project_ego_to_image_motion_compensated<'py>( + &'py self, + py: Python<'py>, + points_ego: PyReadonlyArray, + city_se3_ego_camera_t: SE3, + city_se3_ego_lidar_t: SE3, + ) -> (&PyArray, &PyArray, &PyArray) { + let (uvz, points_hom_cam, is_valid) = self.project_ego_to_image_motion_compensated( + points_ego.as_array().view().to_owned(), + city_se3_ego_camera_t, + city_se3_ego_lidar_t, + ); + ( + uvz.to_pyarray(py), + points_hom_cam.to_pyarray(py), + is_valid.to_pyarray(py), + ) + } +} + +/// Rust methods. +impl PinholeCamera { /// Return the camera extrinsics. pub fn extrinsics(&self) -> Array { self.ego_se3_cam.inverse().transform_matrix() @@ -94,55 +137,6 @@ impl PinholeCamera { p } - /// Create a pinhole camera model from a feather file. - pub fn from_feather(log_dir: &Path, camera_name: &str) -> PinholeCamera { - let intrinsics_path = log_dir.join("calibration/intrinsics.feather"); - let intrinsics = read_feather_eager(&intrinsics_path, false) - .lazy() - .filter(col("sensor_name").eq(lit(camera_name))) - .collect() - .unwrap(); - - let intrinsics = Intrinsics { - fx_px: extract_f32_from_frame(&intrinsics, "fx_px"), - fy_px: extract_f32_from_frame(&intrinsics, "fy_px"), - cx_px: extract_f32_from_frame(&intrinsics, "cx_px"), - cy_px: extract_f32_from_frame(&intrinsics, "cy_px"), - width_px: extract_usize_from_frame(&intrinsics, "width_px"), - height_px: extract_usize_from_frame(&intrinsics, "height_px"), - }; - - let extrinsics_path = log_dir.join("calibration/egovehicle_SE3_sensor.feather"); - let extrinsics = read_feather_eager(&extrinsics_path, false) - .lazy() - .filter(col("sensor_name").eq(lit(camera_name))) - .collect() - .unwrap(); - - let qw = extract_f32_from_frame(&extrinsics, "qw"); - let qx = extract_f32_from_frame(&extrinsics, "qx"); - let qy = extract_f32_from_frame(&extrinsics, "qy"); - let qz = extract_f32_from_frame(&extrinsics, "qz"); - let tx_m = extract_f32_from_frame(&extrinsics, "tx_m"); - let ty_m = extract_f32_from_frame(&extrinsics, "ty_m"); - let tz_m = extract_f32_from_frame(&extrinsics, "tz_m"); - - let quat_wxyz = Array::::from_vec(vec![qw, qx, qy, qz]); - let rotation = _quat_to_mat3(&quat_wxyz.view()); - let translation = Array::::from_vec(vec![tx_m, ty_m, tz_m]); - let ego_se3_cam = SE3 { - rotation, - translation, - }; - - let camera_name_string = camera_name.to_string(); - Self { - ego_se3_cam, - intrinsics, - camera_name: camera_name_string, - } - } - /// Cull 3D points to camera view frustum. /// /// Ref: https://en.wikipedia.org/wiki/Hidden-surface_determination#Viewing-frustum_culling @@ -154,7 +148,7 @@ impl PinholeCamera { /// in the range [0,height_px), and a positive z-coordinate (lying in /// front of the camera frustum). pub fn cull_to_view_frustum( - self, + &self, uv: &ArrayView, points_camera: &ArrayView, ) -> Array { @@ -206,6 +200,55 @@ impl PinholeCamera { let points_ego = ego_cam_t_se3_ego_lidar_t.transform_from(&points_ego.view()); self.project_ego_to_image(points_ego) } + + /// Create a pinhole camera model from a feather file. + pub fn from_feather(log_dir: &Path, camera_name: &str) -> PinholeCamera { + let intrinsics_path = log_dir.join("calibration/intrinsics.feather"); + let intrinsics = read_feather_eager(&intrinsics_path, false) + .lazy() + .filter(col("sensor_name").eq(lit(camera_name))) + .collect() + .unwrap(); + + let intrinsics = Intrinsics { + fx_px: extract_f32_from_frame(&intrinsics, "fx_px"), + fy_px: extract_f32_from_frame(&intrinsics, "fy_px"), + cx_px: extract_f32_from_frame(&intrinsics, "cx_px"), + cy_px: extract_f32_from_frame(&intrinsics, "cy_px"), + width_px: extract_usize_from_frame(&intrinsics, "width_px"), + height_px: extract_usize_from_frame(&intrinsics, "height_px"), + }; + + let extrinsics_path = log_dir.join("calibration/egovehicle_SE3_sensor.feather"); + let extrinsics = read_feather_eager(&extrinsics_path, false) + .lazy() + .filter(col("sensor_name").eq(lit(camera_name))) + .collect() + .unwrap(); + + let qw = extract_f32_from_frame(&extrinsics, "qw"); + let qx = extract_f32_from_frame(&extrinsics, "qx"); + let qy = extract_f32_from_frame(&extrinsics, "qy"); + let qz = extract_f32_from_frame(&extrinsics, "qz"); + let tx_m = extract_f32_from_frame(&extrinsics, "tx_m"); + let ty_m = extract_f32_from_frame(&extrinsics, "ty_m"); + let tz_m = extract_f32_from_frame(&extrinsics, "tz_m"); + + let quat_wxyz = Array::::from_vec(vec![qw, qx, qy, qz]); + let rotation = _quat_to_mat3(&quat_wxyz.view()); + let translation = Array::::from_vec(vec![tx_m, ty_m, tz_m]); + let ego_se3_cam = SE3 { + rotation, + translation, + }; + + let camera_name_string = camera_name.to_string(); + Self { + ego_se3_cam, + intrinsics, + camera_name: camera_name_string, + } + } } fn extract_f32_from_frame(series: &DataFrame, column: &str) -> f32 { diff --git a/rust/src/geometry/se3.rs b/rust/src/geometry/se3.rs index 9fc95d54..7e0c9a9a 100644 --- a/rust/src/geometry/se3.rs +++ b/rust/src/geometry/se3.rs @@ -4,9 +4,11 @@ use ndarray::ArrayView2; use ndarray::{s, Array1, Array2}; +use pyo3::prelude::*; /// Special Euclidean Group 3 (SE(3)). /// Rigid transformation parameterized by a rotation and translation in $R^3$. +#[pyclass] #[derive(Clone, Debug)] pub struct SE3 { /// (3,3) Orthonormal rotation matrix. @@ -15,6 +17,7 @@ pub struct SE3 { pub translation: Array1, } +/// Rust methods. impl SE3 { /// Get the (4,4) homogeneous transformation matrix associated with the rigid transformation. pub fn transform_matrix(&self) -> Array2 { diff --git a/rust/src/structures/timestamped_image.rs b/rust/src/structures/timestamped_image.rs index 34adb751..f0df5dc9 100644 --- a/rust/src/structures/timestamped_image.rs +++ b/rust/src/structures/timestamped_image.rs @@ -1,14 +1,30 @@ use image::{ImageBuffer, Rgba}; +use ndarray::Ix3; +use nshare::RefNdarray3; +use numpy::{PyArray, ToPyArray}; use crate::geometry::camera::pinhole_camera::PinholeCamera; +use pyo3::prelude::*; /// Image modeled by `camera_model` occuring at `timestamp_ns`. -#[derive(Clone)] +// #[pyclass] +#[derive(Clone, Debug)] +#[pyclass] pub struct TimeStampedImage { /// RGBA u8 image buffer. pub image: ImageBuffer, Vec>, /// Pinhole camera model with intrinsics and extrinsics. + #[pyo3(get, set)] pub camera_model: PinholeCamera, /// Nanosecond timestamp. + #[pyo3(get, set)] pub timestamp_ns: usize, } + +#[pymethods] +impl TimeStampedImage { + #[getter] + fn image<'py>(&self, py: Python<'py>) -> &'py PyArray { + self.image.ref_ndarray3().to_pyarray(py) + } +} From 801e60e33f9f85a53f42aa4941109bca6d3af037 Mon Sep 17 00:00:00 2001 From: Benjamin Wilson Date: Mon, 15 May 2023 15:54:08 -0700 Subject: [PATCH 2/2] Add pinhole camera conversions. --- rust/src/geometry/camera/pinhole_camera.rs | 14 ++++++++- src/av2/_r.pyi | 20 ++++++++++-- src/av2/structures/timestamped_image.py | 1 - src/av2/torch/data_loaders/detection.py | 31 ++++++++++++++++--- .../torch/structures/time_stamped_image.py | 21 +++++++++++++ tutorials/3d_object_detection.py | 2 +- 6 files changed, 79 insertions(+), 10 deletions(-) create mode 100644 src/av2/torch/structures/time_stamped_image.py diff --git a/rust/src/geometry/camera/pinhole_camera.rs b/rust/src/geometry/camera/pinhole_camera.rs index 3203fb16..05ebab81 100644 --- a/rust/src/geometry/camera/pinhole_camera.rs +++ b/rust/src/geometry/camera/pinhole_camera.rs @@ -58,13 +58,19 @@ impl Intrinsics { height_px, } } + + #[getter] + #[pyo3(name = "k")] + fn py_k<'py>(&'py self, py: Python<'py>) -> &'py PyArray { + self.k().to_pyarray(py) + } } /// Rust methods. impl Intrinsics { /// Camera intrinsic matrix. pub fn k(&self) -> Array { - let mut k = Array::::eye(3); + let mut k = Array::::eye(4); k[[0, 0]] = self.fx_px; k[[1, 1]] = self.fy_px; k[[0, 2]] = self.cx_px; @@ -121,6 +127,12 @@ impl PinholeCamera { is_valid.to_pyarray(py), ) } + + #[getter] + #[pyo3(name = "extrinsics")] + fn py_extrinsics<'py>(&'py self, py: Python<'py>) -> &'py PyArray { + self.ego_se3_cam.inverse().transform_matrix().to_pyarray(py) + } } /// Rust methods. diff --git a/src/av2/_r.pyi b/src/av2/_r.pyi index 2000aba6..1ea5f5a9 100644 --- a/src/av2/_r.pyi +++ b/src/av2/_r.pyi @@ -4,7 +4,8 @@ from dataclasses import dataclass, field from typing import List, Optional, Tuple import polars as pl -import torch + +from av2.utils.typing import NDArrayByte @dataclass class DataLoader: @@ -18,7 +19,7 @@ class DataLoader: file_index: pl.DataFrame = field(init=False) def get(self, index: int) -> Sweep: ... - def get_synchronized_images(self, index: int) -> List[torch.Tensor]: ... + def get_synchronized_images(self, index: int) -> List[TimeStampedImage]: ... def __len__(self) -> int: ... @dataclass @@ -27,3 +28,18 @@ class Sweep: lidar: pl.DataFrame sweep_uuid: Tuple[str, int] cuboids: Optional[pl.DataFrame] + +@dataclass +class PinholeCamera: + fx_px: float + fy_px: float + cx_px: float + cy_px: float + width_px: float + height_px: float + +@dataclass +class TimeStampedImage: + image: NDArrayByte + camera_model: PinholeCamera + timestamp_ns: int diff --git a/src/av2/structures/timestamped_image.py b/src/av2/structures/timestamped_image.py index 41cb1c51..d09eb996 100644 --- a/src/av2/structures/timestamped_image.py +++ b/src/av2/structures/timestamped_image.py @@ -16,7 +16,6 @@ class TimestampedImage: img: (H,W,C) image. camera_model: Pinhole camera model with intrinsics and extrinsics. timestamp_ns: Nanosecond timestamp. - """ img: NDArrayByte diff --git a/src/av2/torch/data_loaders/detection.py b/src/av2/torch/data_loaders/detection.py index 290a56c2..e78f65f7 100644 --- a/src/av2/torch/data_loaders/detection.py +++ b/src/av2/torch/data_loaders/detection.py @@ -4,12 +4,14 @@ import logging from dataclasses import dataclass, field -from typing import List +from typing import List, Tuple import torch +from kornia.geometry.camera.pinhole import PinholeCamera, PinholeCamerasList from torch.utils.data import Dataset import av2._r as rust +from av2.torch.structures.time_stamped_image import TimeStampedImage from av2.utils.typing import PathType from ..structures.sweep import Sweep @@ -79,7 +81,26 @@ def __next__(self) -> Sweep: self._current_sweep_index += 1 return datum - def get_synchronized_images(self, sweep_index: int) -> List[torch.Tensor]: - """Get the synchronized ring images associated with the sweep index..""" - synchronized_images_list = self._backend.get_synchronized_images(sweep_index) - return [torch.as_tensor(x) for x in synchronized_images_list] + def get_synchronized_images(self, sweep_index: int) -> List[TimeStampedImage]: + """Get the synchronized ring images associated with the sweep index.""" + time_stamped_images = self._backend.get_synchronized_images(sweep_index) + time_stamped_image_list = [] + for ts_image in time_stamped_images: + image = torch.as_tensor(ts_image.image) + camera_model_rs = ts_image.camera_model + + intrinsics = torch.as_tensor(camera_model_rs.intrinsics.k) + extrinsics = torch.as_tensor(camera_model_rs.extrinsics) + + height = torch.as_tensor([image.shape[1]]) + width = torch.as_tensor([image.shape[2]]) + camera_model = PinholeCamera( + intrinsics=intrinsics, extrinsics=extrinsics, height=height, width=width + ) + + timestamp_ns = ts_image.timestamp_ns + time_stamped_image = TimeStampedImage( + image=image, camera_model=camera_model, timestamp_ns=timestamp_ns + ) + time_stamped_image_list.append(time_stamped_image) + return time_stamped_image_list diff --git a/src/av2/torch/structures/time_stamped_image.py b/src/av2/torch/structures/time_stamped_image.py new file mode 100644 index 00000000..883be135 --- /dev/null +++ b/src/av2/torch/structures/time_stamped_image.py @@ -0,0 +1,21 @@ +"""Timestamped image class with camera model for synchronization.""" + +from dataclasses import dataclass + +import torch +from kornia.geometry.camera import PinholeCamera + + +@dataclass +class TimeStampedImage: + """Timestamped image with an accompanying camera model. + + Args: + image: (H,W,C) image. + camera_model: Pinhole camera model with intrinsics and extrinsics. + timestamp_ns: Nanosecond timestamp. + """ + + image: torch.Tensor + camera_model: PinholeCamera + timestamp_ns: int diff --git a/tutorials/3d_object_detection.py b/tutorials/3d_object_detection.py index ff6d9cc5..05ae3cfd 100644 --- a/tutorials/3d_object_detection.py +++ b/tutorials/3d_object_detection.py @@ -48,7 +48,7 @@ def main( lidar_tensor = sweep.lidar.as_tensor() # Synchronized ring cameras. - synchronized_images = data_loader.get_synchronized_images(i) + time_stamped_image_list = data_loader.get_synchronized_images(i) # Transform the points to city coordinates. lidar_xyz_city = transform_points(city_SE3_ego_mat4, lidar_tensor[:, :3])