From 3a7de017300e2525647e967dca1bacb04de9a4a6 Mon Sep 17 00:00:00 2001 From: Lea Vauchier Date: Tue, 6 Oct 2020 10:44:54 +0200 Subject: [PATCH 1/7] Add basic support for Pandaset dataset --- pcdet/datasets/__init__.py | 4 +- pcdet/datasets/pandaset/pandaset_dataset.py | 312 ++++++++++++++++++++ 2 files changed, 315 insertions(+), 1 deletion(-) create mode 100644 pcdet/datasets/pandaset/pandaset_dataset.py diff --git a/pcdet/datasets/__init__.py b/pcdet/datasets/__init__.py index ee70c2a88..af43d1353 100644 --- a/pcdet/datasets/__init__.py +++ b/pcdet/datasets/__init__.py @@ -8,12 +8,14 @@ from .kitti.kitti_dataset import KittiDataset from .nuscenes.nuscenes_dataset import NuScenesDataset from .waymo.waymo_dataset import WaymoDataset +from .pandaset.pandaset_dataset import PandasetDataset __all__ = { 'DatasetTemplate': DatasetTemplate, 'KittiDataset': KittiDataset, 'NuScenesDataset': NuScenesDataset, - 'WaymoDataset': WaymoDataset + 'WaymoDataset': WaymoDataset, + 'PandasetDataset': PandasetDataset } diff --git a/pcdet/datasets/pandaset/pandaset_dataset.py b/pcdet/datasets/pandaset/pandaset_dataset.py new file mode 100644 index 000000000..67cfc33f9 --- /dev/null +++ b/pcdet/datasets/pandaset/pandaset_dataset.py @@ -0,0 +1,312 @@ +""" + Dataset from Pandaset (Hesai) +""" + +import pickle +import os +import pandas as pd +import pandaset as ps +import numpy as np + +from ..dataset import DatasetTemplate + + +class PandasetDataset(DatasetTemplate): + def __init__(self, dataset_cfg, class_names, training=True, root_path=None, logger=None): + """ + Args: + root_path: + dataset_cfg: + class_names: + training: + logger: + """ + super().__init__( + dataset_cfg=dataset_cfg, class_names=class_names, training=training, root_path=root_path, logger=logger + ) + if root_path is None: + root_path = self.dataset_cfg.DATA_PATH + self.dataset = ps.DataSet(os.path.join(root_path, 'dataset')) + self.split = self.dataset_cfg.DATA_SPLIT[self.mode] + self.pandaset_infos = [] + self.include_pandaset_infos(self.mode) + + + def include_pandaset_infos(self, mode): + if self.logger is not None: + self.logger.info('Loading PandaSet dataset') + pandaset_infos = [] + + for info_path in self.dataset_cfg.INFO_PATH[mode]: + info_path = os.path.join(self.root_path, info_path) + if not os.path.exists(info_path): + continue + with open(info_path, 'rb') as f: + infos = pickle.load(f) + pandaset_infos.extend(infos) + + self.pandaset_infos.extend(pandaset_infos) + + if self.logger is not None: + self.logger.info('Total samples for PandaSet dataset ({}): {}'.format(self.mode, len(pandaset_infos))) + + + def set_split(self, split): + self.sequences = self.dataset_cfg.SEQUENCES[split] + self.split = split + + + def __len__(self): + return len(self.pandaset_infos) + + + def __getitem__(self, index): + """ + To support a custom dataset, implement this function to load the raw data (and labels), then transform them to + the unified normative coordinate (x pointing forward, z pointing upwards) and call the function self.prepare_data() to process the data and send them + to the model. + + Args: + index: + + Returns: + + """ + info = self.pandaset_infos[index] + seq_idx = info['sequence'] + + pose = self._get_pose(info) + points = self._get_lidar_points(info, pose) + boxes, labels = self._get_annotations(info, pose) + + input_dict = {'points': points, + 'gt_boxes': boxes, + 'gt_names': labels, + 'sequence': int(seq_idx), + 'frame_idx': info['frame_idx']} + # seq_idx is converted to int because strings can't be passed to + # the gpu in pytorch + + data_dict = self.prepare_data(data_dict=input_dict) + + return data_dict + + + def _get_pose(self, info): + seq_idx = info['sequence'] + # get pose for world to ego frame transformation + if self.dataset[seq_idx].lidar.poses is None: + self.dataset[seq_idx].lidar._load_poses() + + pose = self.dataset[seq_idx].lidar.poses[info['frame_idx']] + + return pose + + + def _get_lidar_points(self, info, pose): + """ + Get lidar in the unified normative coordinate system for a given frame + The intensity is normalized to fit [0-1] range (pandaset intensity is in [0-255] range) + """ + # get lidar points + lidar_frame = pd.read_pickle(info['lidar_path']) + # get points for pandar64 rotating lidar only + # TODO: move lidar choice to parameters + lidar_frame = lidar_frame[lidar_frame.d == 0] + world_points = lidar_frame.to_numpy() + del lidar_frame # There seem to be issues with the automatic deletion of pandas datasets sometimes + + points_loc = world_points[:, :3] + points_int = world_points[:, 3] + + # nromalize intensity + points_int = points_int / 255 + + ego_points = ps.geometry.lidar_points_to_ego(points_loc, pose) + # Pandaset ego coordinates are: + # - x pointing to the right + # - y pointing to the front + # - z pointing up + # Normative coordinates are: + # - x pointing foreward + # - y pointings to the left + # - z pointing to the top + # So a transformation is required to the match the normative coordinates + ego_points = ego_points[:, [1, 0, 2]] # switch x and y + ego_points[:, 1] = - ego_points[:, 1] # revert y axis + + return np.append(ego_points, np.expand_dims(points_int, axis=1), axis=1).astype(np.float32) + + + def _get_annotations(self,info, pose): + """ + Get box informations in the unified normative coordinate system for a given frame + """ + + # get boxes + cuboids = pd.read_pickle(info["cuboids_path"]) + # TODO: check compatibility with having the sensor id as a parameter + cuboids = cuboids[cuboids["cuboids.sensor_id"] != 1] # keep only points from the rotating lidar + + xs = cuboids['position.x'].to_numpy() + ys = cuboids['position.y'].to_numpy() + zs = cuboids['position.z'].to_numpy() + dxs = cuboids['dimensions.x'].to_numpy() + dys = cuboids['dimensions.y'].to_numpy() + dzs = cuboids['dimensions.z'].to_numpy() + yaws = cuboids['yaw'].to_numpy() + labels = cuboids['label'].to_numpy() + + del cuboids # There seem to be issues with the automatic deletion of pandas datasets sometimes + + # Compute the center points coordinates in ego coordinates + centers = np.vstack([xs, ys, zs]).T + ego_centers = ps.geometry.lidar_points_to_ego(centers, pose) + + # Compute the yaw in ego coordinates + # The following implementation supposes that the pitch of the car is + # negligible compared to its yaw, in order to be able to express the + # bbox coordinates in the ego coordinate system with an {axis aligned + # box + yaw} only representation + yaxis_points_from_pose = ps.geometry.lidar_points_to_ego(np.array([[0, 0, 0], [0, 1., 0]]), pose) + yaxis_from_pose = yaxis_points_from_pose[1, :] - yaxis_points_from_pose[0, :] + + if yaxis_from_pose[-1] >= 10**-1: + if self.logger is not None: + self.logger.warning("The car's pitch is supposed to be negligible sin(pitch) is >= 10**-1 ({})".format(yaxis_from_pose[-1])) + + zrot = np.arctan2(-yaxis_from_pose[0], yaxis_from_pose[1]) # rotation angle in rads of the y axis around thz z axis + ego_yaws = yaws + zrot + + # Pandaset ego coordinates are: + # - x pointing to the right + # - y pointing to the front + # - z pointing up + # Normative coordinates are: + # - x pointing foreward + # - y pointings to the left + # - z pointing to the top + # So a transformation is required to the match the normative coordinates + ego_xs = ego_centers[:, 1] + ego_ys = -ego_centers[:, 0] + ego_zs = ego_centers[:, 2] + ego_dxs = dys + ego_dys = dxs # stays >= 0 + ego_dzs = dzs + + ego_boxes = np.vstack([ego_xs, ego_ys, ego_zs, ego_dxs, ego_dys, ego_dzs, ego_yaws]).T + + return ego_boxes.astype(np.float32), labels + + + @staticmethod + def generate_prediction_dicts(batch_dict, pred_dicts, class_names, output_path=None): + """ + To support a custom dataset, implement this function to receive the predicted results from the model, and then + transform the unified normative coordinate to your required coordinate, and optionally save them to disk. + + Args: + batch_dict: dict of original data from the dataloader + pred_dicts: dict of predicted results from the model + pred_boxes: (N, 7), Tensor + pred_scores: (N), Tensor + pred_labels: (N), Tensor + class_names: + output_path: if it is not None, save the results to this path + Returns: + + """ + + def generate_single_sample_dataframe(batch_index, box_dict): + print(box_dict) + raise NotImplementedError + + + annos = [] + for index, box_dict in enumerate(pred_dicts): + frame_idx = batch_dict['frame_idx'][index] + seq_idx = batch_dict['sequence'][index] + single_pred_df = generate_single_sample_dataframe(index, box_dict) + + single_pred_dict = {'preds': single_pred_df, + 'frame_idx': frame_idx, + 'sequence': str(seq_idx).zfill(3)} + # seq_idx was converted to int in self.__getitem__` because strings + # can't be passed to the gpu in pytorch. + # To convert it back to a string, we assume that the sequence is + # provided in pandaset format with 3 digits + + if output_path is not None: + cur_det_file = os.path.join(output_path, seq_idx, 'predictions', + 'cuboids', ("{:02d}.pkl.gz".format(frame_idx))) + os.makedirs(os.path.dirname(cur_det_file), exist_ok=True) + single_pred_df.to_pickle(cur_det_file) + + annos.append(single_pred_dict) + + + return annos + + + def get_infos(self): + """ + Generate the dataset infos dict for each sample of the dataset. + For each sample, this dict contains: + - the sequence index + - the frame index + - the path to the lidar data + - the path to the bounding box annotations + """ + infos = [] + for seq in self.sequences: + s = self.dataset[seq] + s.load_lidar() + if len(s.lidar.data) > 100: + raise ValueError("The implementation for this dataset assumes that each sequence is " + + "no longer than 100 frames. The current sequence has {}".format(len(s.lidar.data))) + info = [{'sequence': seq, + 'frame_idx': ii, + 'lidar_path': os.path.join(self.root_path, 'dataset', seq, 'lidar', ("{:02d}.pkl.gz".format(ii))), + 'cuboids_path': os.path.join(self.root_path, 'dataset', seq, 'annotations', 'cuboids', ("{:02d}.pkl.gz".format(ii))) + } for ii in range(len(s.lidar.data))] + infos.extend(info) + del self.dataset._sequences[seq] + + return infos + + +def create_pandaset_infos(dataset_cfg, class_names, data_path, save_path): + """ + Create dataset_infos files in order not to have it in a preprocessed pickle + file with the info for each sample + See PandasetDataset.get_infos for further details. + """ + dataset = PandasetDataset(dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, training=False) + for split in ["train", "val", "test"]: + print("---------------- Start to generate {} data infos ---------------".format(split)) + dataset.set_split(split) + infos = dataset.get_infos() + file_path = os.path.join(save_path, 'pandaset_infos_{}.pkl'.format(split)) + with open(file_path, 'wb') as f: + pickle.dump(infos, f) + print("Pandaset info {} file is saved to {}".format(split, file_path)) + + +if __name__ == '__main__': + import sys + if sys.argv.__len__() > 1 and sys.argv[1] == 'create_pandaset_infos': + import yaml + from pathlib import Path + from easydict import EasyDict + dataset_cfg = EasyDict(yaml.safe_load(open(sys.argv[2]))) + ROOT_DIR = (Path(__file__).resolve().parent / '../../../').resolve() + create_pandaset_infos( + dataset_cfg=dataset_cfg, + class_names=['Car', 'Pedestrian', 'Cyclist'], + data_path=ROOT_DIR / 'data' / 'pandaset', + save_path=ROOT_DIR / 'data' / 'pandaset' + ) + + + + From fac17f71b4d2be7ab9a736e3e16d62bc07ded9e1 Mon Sep 17 00:00:00 2001 From: Lea Vauchier Date: Mon, 26 Oct 2020 14:00:31 +0100 Subject: [PATCH 2/7] Add gt_sampling support for pandaset --- pcdet/datasets/pandaset/pandaset_dataset.py | 62 +++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/pcdet/datasets/pandaset/pandaset_dataset.py b/pcdet/datasets/pandaset/pandaset_dataset.py index 67cfc33f9..77b0910ad 100644 --- a/pcdet/datasets/pandaset/pandaset_dataset.py +++ b/pcdet/datasets/pandaset/pandaset_dataset.py @@ -9,7 +9,9 @@ import numpy as np from ..dataset import DatasetTemplate +from ...ops.roiaware_pool3d import roiaware_pool3d_utils +import torch class PandasetDataset(DatasetTemplate): def __init__(self, dataset_cfg, class_names, training=True, root_path=None, logger=None): @@ -275,6 +277,57 @@ def get_infos(self): return infos + def create_groundtruth_database(self, info_path=None, used_classes=None, split='train'): + database_save_path = os.path.join(self.root_path, + 'gt_database' if split == 'train' else 'gt_database_{}'.format(split)) + db_info_save_path = os.path.join(self.root_path, + 'pandaset_dbinfos_{}.pkl'.format(split)) + + os.makedirs(database_save_path, exist_ok=True) + all_db_infos = {} + + with open(info_path, 'rb') as f: + infos = pickle.load(f) + + for k in range(len(infos)): + print('gt_database sample: %d/%d' % (k + 1, len(infos))) + info = infos[k] + sample_idx = info['frame_idx'] + pose = self._get_pose(info) + points = self._get_lidar_points(info, pose) + gt_boxes, names = self._get_annotations(info, pose) + + num_obj = gt_boxes.shape[0] + + point_indices = roiaware_pool3d_utils.points_in_boxes_cpu( + torch.from_numpy(points[:, 0:3]), torch.from_numpy(gt_boxes) + ).numpy() # (nboxes, npoints) + + for i in range(num_obj): + tmp_name = names[i].replace("/", "").replace(" ", "") + filename = '%s_%s_%d.bin' % (sample_idx, tmp_name, i) + filepath = os.path.join(database_save_path, filename) + gt_points = points[point_indices[i] > 0] + gt_points[:, :3] -= gt_boxes[i, :3] + with open(filepath, 'wb') as f: + gt_points.tofile(f) + + if (used_classes is None) or names[i] in used_classes: + db_path = os.path.relpath(filepath, self.root_path) # gt_database/xxxxx.bin + db_info = {'name': names[i], 'path': db_path, 'gt_idx': i, + 'box3d_lidar': gt_boxes[i], 'num_points_in_gt': gt_points.shape[0], + 'difficulty': -1} + if names[i] in all_db_infos: + all_db_infos[names[i]].append(db_info) + else: + all_db_infos[names[i]] = [db_info] + for k, v in all_db_infos.items(): + print('Database %s: %d' % (k, len(v))) + + with open(db_info_save_path, 'wb') as f: + pickle.dump(all_db_infos, f) + + def create_pandaset_infos(dataset_cfg, class_names, data_path, save_path): """ Create dataset_infos files in order not to have it in a preprocessed pickle @@ -291,6 +344,15 @@ def create_pandaset_infos(dataset_cfg, class_names, data_path, save_path): pickle.dump(infos, f) print("Pandaset info {} file is saved to {}".format(split, file_path)) + print('---------------Start create groundtruth database for data augmentation---------------') + dataset.set_split("train") + dataset.create_groundtruth_database( + os.path.join(save_path, 'pandaset_infos_train.pkl'), + split="train" + ) + + print('---------------Data preparation Done---------------') + if __name__ == '__main__': import sys From 84ec90689eabf6d1664f4572d5f929afb02b3532 Mon Sep 17 00:00:00 2001 From: Lea Vauchier Date: Thu, 29 Oct 2020 10:55:41 +0100 Subject: [PATCH 3/7] Enable training on different classes than raw dataset --- pcdet/datasets/pandaset/pandaset_dataset.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pcdet/datasets/pandaset/pandaset_dataset.py b/pcdet/datasets/pandaset/pandaset_dataset.py index 77b0910ad..d9ac9c6e2 100644 --- a/pcdet/datasets/pandaset/pandaset_dataset.py +++ b/pcdet/datasets/pandaset/pandaset_dataset.py @@ -161,6 +161,9 @@ def _get_annotations(self,info, pose): del cuboids # There seem to be issues with the automatic deletion of pandas datasets sometimes + labels = np.array([self.dataset_cfg.TRAINING_CATEGORIES.get(lab, lab) + for lab in labels] ) + # Compute the center points coordinates in ego coordinates centers = np.vstack([xs, ys, zs]).T ego_centers = ps.geometry.lidar_points_to_ego(centers, pose) From 4f4fb0b1b5c98598f981581b270d5b3bf5346a2c Mon Sep 17 00:00:00 2001 From: Lea Vauchier Date: Thu, 29 Oct 2020 10:56:30 +0100 Subject: [PATCH 4/7] Save pandaset output in the same format as annotations --- pcdet/datasets/pandaset/pandaset_dataset.py | 120 +++++++++++++++++--- 1 file changed, 105 insertions(+), 15 deletions(-) diff --git a/pcdet/datasets/pandaset/pandaset_dataset.py b/pcdet/datasets/pandaset/pandaset_dataset.py index d9ac9c6e2..6a99aca3b 100644 --- a/pcdet/datasets/pandaset/pandaset_dataset.py +++ b/pcdet/datasets/pandaset/pandaset_dataset.py @@ -13,6 +13,39 @@ import torch + +def pose_dict_to_numpy(pose): + """ + Conert pandaset pose dict to a numpy vector in order to pass it through the network + """ + pose_np = [pose["position"]["x"], + pose["position"]["y"], + pose["position"]["z"], + pose["heading"]["w"], + pose["heading"]["x"], + pose["heading"]["y"], + pose["heading"]["z"]] + + return pose_np + + +def pose_numpy_to_dict(pose): + """ + Conert pandaset pose dict to a numpy vector in order to pass it through the network + """ + pose_dict = {'position': + {'x': pose[0], + 'y': pose[1], + 'z': pose[2]}, + 'heading': + {'w': pose[3], + 'x': pose[4], + 'y': pose[5], + 'z': pose[6]}} + + return pose_dict + + class PandasetDataset(DatasetTemplate): def __init__(self, dataset_cfg, class_names, training=True, root_path=None, logger=None): """ @@ -79,15 +112,21 @@ def __getitem__(self, index): pose = self._get_pose(info) points = self._get_lidar_points(info, pose) - boxes, labels = self._get_annotations(info, pose) + boxes, labels, zrot_world_to_ego = self._get_annotations(info, pose) + pose_np = pose_dict_to_numpy(pose) input_dict = {'points': points, 'gt_boxes': boxes, 'gt_names': labels, 'sequence': int(seq_idx), - 'frame_idx': info['frame_idx']} + 'frame_idx': info['frame_idx'], + 'zrot_world_to_ego': zrot_world_to_ego, + 'pose': pose_dict_to_numpy(pose) + } # seq_idx is converted to int because strings can't be passed to # the gpu in pytorch + # zrot_world_to_ego is propagated in order to be able to transform the + # predicted yaws back to world coordinates data_dict = self.prepare_data(data_dict=input_dict) @@ -180,8 +219,8 @@ def _get_annotations(self,info, pose): if self.logger is not None: self.logger.warning("The car's pitch is supposed to be negligible sin(pitch) is >= 10**-1 ({})".format(yaxis_from_pose[-1])) - zrot = np.arctan2(-yaxis_from_pose[0], yaxis_from_pose[1]) # rotation angle in rads of the y axis around thz z axis - ego_yaws = yaws + zrot + zrot_world_to_ego = np.arctan2(-yaxis_from_pose[0], yaxis_from_pose[1]) # rotation angle in rads of the y axis around thz z axis + ego_yaws = yaws + zrot_world_to_ego # Pandaset ego coordinates are: # - x pointing to the right @@ -201,7 +240,7 @@ def _get_annotations(self,info, pose): ego_boxes = np.vstack([ego_xs, ego_ys, ego_zs, ego_dxs, ego_dys, ego_dzs, ego_yaws]).T - return ego_boxes.astype(np.float32), labels + return ego_boxes.astype(np.float32), labels, zrot_world_to_ego @staticmethod @@ -222,16 +261,66 @@ def generate_prediction_dicts(batch_dict, pred_dicts, class_names, output_path=N """ - def generate_single_sample_dataframe(batch_index, box_dict): - print(box_dict) - raise NotImplementedError + def generate_single_sample_dataframe(batch_index, box_dict, zrot_world_to_ego, pose): + pred_boxes = box_dict["pred_boxes"].cpu().numpy() + pred_scores = box_dict["pred_scores"].cpu().numpy() + pred_labels = box_dict["pred_labels"].cpu().numpy() + zrot = zrot_world_to_ego.cpu().numpy() + pose_dict = pose_numpy_to_dict(pose.cpu().numpy()) + + xs = pred_boxes[:, 0] + ys = pred_boxes[:, 1] + zs = pred_boxes[:, 2] + dxs = pred_boxes[:, 3] + dys = pred_boxes[:, 4] + dzs = pred_boxes[:, 5] + yaws = pred_boxes[:, 6] + names = np.array(class_names)[pred_labels - 1] # Predicted labels start on 1 + + # convert from normative coordinates to pandaset ego coordinates + ego_xs = - ys + ego_ys = xs + ego_zs = zs + ego_dxs = dys + ego_dys = dxs + ego_dzs = dzs + ego_yaws = yaws + + # convert from pandaset ego coordinates to world coordinates + # for the moment, an simplified estimation of the ego yaw is computed in __getitem__ + # which sets ego_yaw = world_yaw + zrot_world_to_ego + world_yaws = ego_yaws - zrot + + ego_centers = np.vstack([ego_xs, ego_ys, ego_zs]).T + world_centers = ps.geometry.ego_to_lidar_points(ego_centers, pose_dict) + world_xs = world_centers[:, 0] + world_ys = world_centers[:, 1] + world_zs = world_centers[:, 2] + # dx, dy, dz remain unchanged as the bbox orientation is handled by + # the yaw information + + data_dict = {'position.x': world_xs, + 'position.y': world_ys, + 'position.z': world_zs, + 'dimensions.x': ego_dxs, + 'dimensions.y': ego_dys, + 'dimensions.z': ego_dzs, + 'yaw': world_yaws % (2 * np.pi), + 'label': names, + 'score': pred_scores + } + + return pd.DataFrame(data_dict) annos = [] for index, box_dict in enumerate(pred_dicts): frame_idx = batch_dict['frame_idx'][index] seq_idx = batch_dict['sequence'][index] - single_pred_df = generate_single_sample_dataframe(index, box_dict) + zrot = batch_dict['zrot_world_to_ego'][index] + pose = batch_dict['pose'][index] + + single_pred_df = generate_single_sample_dataframe(index, box_dict, zrot, pose) single_pred_dict = {'preds': single_pred_df, 'frame_idx': frame_idx, @@ -242,14 +331,15 @@ def generate_single_sample_dataframe(batch_index, box_dict): # provided in pandaset format with 3 digits if output_path is not None: - cur_det_file = os.path.join(output_path, seq_idx, 'predictions', - 'cuboids', ("{:02d}.pkl.gz".format(frame_idx))) + frame_id = str(int(frame_idx)).zfill(2) + seq_id = str(int(seq_idx)).zfill(3) + cur_det_file = os.path.join(output_path, seq_id, 'predictions', + 'cuboids', ("{}.pkl.gz".format(frame_id))) os.makedirs(os.path.dirname(cur_det_file), exist_ok=True) single_pred_df.to_pickle(cur_det_file) annos.append(single_pred_dict) - return annos @@ -298,7 +388,7 @@ def create_groundtruth_database(self, info_path=None, used_classes=None, split=' sample_idx = info['frame_idx'] pose = self._get_pose(info) points = self._get_lidar_points(info, pose) - gt_boxes, names = self._get_annotations(info, pose) + gt_boxes, names, _ = self._get_annotations(info, pose) num_obj = gt_boxes.shape[0] @@ -347,13 +437,13 @@ def create_pandaset_infos(dataset_cfg, class_names, data_path, save_path): pickle.dump(infos, f) print("Pandaset info {} file is saved to {}".format(split, file_path)) - print('---------------Start create groundtruth database for data augmentation---------------') + print('------------Start create groundtruth database for data augmentation-----------') + dataset = PandasetDataset(dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, training=False) dataset.set_split("train") dataset.create_groundtruth_database( os.path.join(save_path, 'pandaset_infos_train.pkl'), split="train" ) - print('---------------Data preparation Done---------------') From 4c9c8f865eb926b8fab0369444b160e83e9d76aa Mon Sep 17 00:00:00 2001 From: Lea Vauchier Date: Thu, 29 Oct 2020 11:04:07 +0100 Subject: [PATCH 5/7] Add dataset config file for pandaset --- .../dataset_configs/pandaset_dataset.yaml | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 tools/cfgs/dataset_configs/pandaset_dataset.yaml diff --git a/tools/cfgs/dataset_configs/pandaset_dataset.yaml b/tools/cfgs/dataset_configs/pandaset_dataset.yaml new file mode 100644 index 000000000..4e3d6119f --- /dev/null +++ b/tools/cfgs/dataset_configs/pandaset_dataset.yaml @@ -0,0 +1,99 @@ +DATASET: 'PandasetDataset' +DATA_PATH: '../data/pandaset' + +POINT_CLOUD_RANGE: [-70, -40, -3, 70, 40, 1] # xmin, ymin, zmin, xmax, ymax, zmax + +DATA_SPLIT: { + 'train': train, + 'test': val +} + +SEQUENCES: { + 'train': ['014', '050', '079', '048', '093', '091', '063', '104', '100', '092', '012', '047', '018', '006', '099', '085', '035', '041', '052', '105', '030', '113', '002', '084', '028', '119', '044', '005', '102', '034', '077', '064', '067', '058', '019', '015', '037', '095', '120', '066', '023', '071', '117', '098', '139', '038', '116', '046', '088', '089', '040', '033', '016', '024', '122', '039', '158', '069', '124', '123', '106'], # ~60% of the sequences, randomly chosen + 'val': ['045', '059', '055', '051', '020', '097', '073', '043', '003', '101', '027', '056', '011', '078', '080', '109', '042', '021', '094', '057'], # ~20% of the sequences, randomly chosen + 'test': ['074', '004', '086', '062', '068', '008', '001', '110', '053', '115', '054', '065', '017', '103', '072', '013', '029', '090', '112', '149', '070', '032'] # ~20% of the sequences, randomly chosen +} + +INFO_PATH: { + 'train': [pandaset_infos_train.pkl], + 'test': [pandaset_infos_val.pkl], +} + +TRAINING_CATEGORIES: { +# This maps raw dataset categories with the corresponding categories used in training +# This map can be incomplete. In case a category is not present, the category +# for training is the same as the raw dataset category + 'Car': 'Car', + 'Pickup Truck': 'Car', + 'Medium-sized Truck': 'Truck', + 'Semi-truck': 'Truck', + 'Towed Object': 'Other Vehicle', + 'Motorcycle': 'Motorcycle', + 'Other Vehicle - Construction Vehicle': 'Other Vehicle', + 'Other Vehicle - Uncommon': 'Other Vehicle', + 'Other Vehicle - Pedicab': 'Other Vehicle', + 'Emergency Vehicle': 'Other Vehicle', + 'Bus': 'Bus', + 'Bicycle': 'Bicycle', + 'Pedestrian': 'Pedestrian', + 'Pedestrian with Object': 'Pedestrian', + 'Animals - Other': 'Animal' +} + + +FOV_POINTS_ONLY: False + + +DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: +# gt sampling not working at the moment + - NAME: gt_sampling + USE_ROAD_PLANE: False + DB_INFO_PATH: + - pandaset_dbinfos_train.pkl + PREPARE: { + filter_by_min_points: ['Car:5', 'Pedestrian:5', 'Bicycle:5'], + filter_by_difficulty: [-1], + } + + SAMPLE_GROUPS: ['Car:20','Pedestrian:15', 'Bicycle:15'] + NUM_POINT_FEATURES: 4 + DATABASE_WITH_FAKELIDAR: False + REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + LIMIT_WHOLE_SCENE: True + + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x', 'y'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-3.14159265, 3.114159265] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + + +POINT_FEATURE_ENCODING: { + encoding_type: absolute_coordinates_encoding, + used_feature_list: ['x', 'y', 'z', 'intensity'], + src_feature_list: ['x', 'y', 'z', 'intensity'], +} + + +DATA_PROCESSOR: + - NAME: mask_points_and_boxes_outside_range + REMOVE_OUTSIDE_BOXES: True + + - NAME: shuffle_points + SHUFFLE_ENABLED: { + 'train': True, + 'test': False + } + + - NAME: transform_points_to_voxels + VOXEL_SIZE: [0.05, 0.05, 0.1] + MAX_POINTS_PER_VOXEL: 5 + MAX_NUMBER_OF_VOXELS: { + 'train': 16000, + 'test': 40000 + } From e0aa766299314ee6fc4835bd81fffe50ce4756f1 Mon Sep 17 00:00:00 2001 From: Lea Vauchier Date: Wed, 4 Nov 2020 09:53:01 +0100 Subject: [PATCH 6/7] Make pandaset dataset compatible with test.py - add 'evaluation' method - add 'name' key to predictions dict --- pcdet/datasets/pandaset/pandaset_dataset.py | 26 +++++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/pcdet/datasets/pandaset/pandaset_dataset.py b/pcdet/datasets/pandaset/pandaset_dataset.py index 6a99aca3b..72541bcf7 100644 --- a/pcdet/datasets/pandaset/pandaset_dataset.py +++ b/pcdet/datasets/pandaset/pandaset_dataset.py @@ -155,7 +155,8 @@ def _get_lidar_points(self, info, pose): # TODO: move lidar choice to parameters lidar_frame = lidar_frame[lidar_frame.d == 0] world_points = lidar_frame.to_numpy() - del lidar_frame # There seem to be issues with the automatic deletion of pandas datasets sometimes + # There seems to be issues with the automatic deletion of pandas datasets sometimes + del lidar_frame points_loc = world_points[:, :3] points_int = world_points[:, 3] @@ -217,9 +218,11 @@ def _get_annotations(self,info, pose): if yaxis_from_pose[-1] >= 10**-1: if self.logger is not None: - self.logger.warning("The car's pitch is supposed to be negligible sin(pitch) is >= 10**-1 ({})".format(yaxis_from_pose[-1])) + self.logger.warning("The car's pitch is supposed to be negligible " + + "sin(pitch) is >= 10**-1 ({})".format(yaxis_from_pose[-1])) - zrot_world_to_ego = np.arctan2(-yaxis_from_pose[0], yaxis_from_pose[1]) # rotation angle in rads of the y axis around thz z axis + # rotation angle in rads of the y axis around thz z axis + zrot_world_to_ego = np.arctan2(-yaxis_from_pose[0], yaxis_from_pose[1]) ego_yaws = yaws + zrot_world_to_ego # Pandaset ego coordinates are: @@ -322,7 +325,10 @@ def generate_single_sample_dataframe(batch_index, box_dict, zrot_world_to_ego, p single_pred_df = generate_single_sample_dataframe(index, box_dict, zrot, pose) - single_pred_dict = {'preds': single_pred_df, + + single_pred_dict = {'preds' : single_pred_df, + # 'name 'ensures testing the number of detections in a compatible format as kitti + 'name' : single_pred_df['label'].tolist(), 'frame_idx': frame_idx, 'sequence': str(seq_idx).zfill(3)} # seq_idx was converted to int in self.__getitem__` because strings @@ -362,7 +368,8 @@ def get_infos(self): info = [{'sequence': seq, 'frame_idx': ii, 'lidar_path': os.path.join(self.root_path, 'dataset', seq, 'lidar', ("{:02d}.pkl.gz".format(ii))), - 'cuboids_path': os.path.join(self.root_path, 'dataset', seq, 'annotations', 'cuboids', ("{:02d}.pkl.gz".format(ii))) + 'cuboids_path': os.path.join(self.root_path, 'dataset', seq, + 'annotations', 'cuboids', ("{:02d}.pkl.gz".format(ii))) } for ii in range(len(s.lidar.data))] infos.extend(info) del self.dataset._sequences[seq] @@ -421,6 +428,15 @@ def create_groundtruth_database(self, info_path=None, used_classes=None, split=' pickle.dump(all_db_infos, f) + def evaluation(self, det_annos, class_names, **kwargs): + self.logger.warning('Evaluation is not implemented for Pandaset as there is no official one. ' + + 'Returning an empty evaluation result.') + ap_result_str = '' + ap_dict = {} + + return ap_result_str, ap_dict + + def create_pandaset_infos(dataset_cfg, class_names, data_path, save_path): """ Create dataset_infos files in order not to have it in a preprocessed pickle From 37f162a41275cc417fef91441eaf48147547128c Mon Sep 17 00:00:00 2001 From: Lea Vauchier Date: Wed, 4 Nov 2020 14:42:23 +0100 Subject: [PATCH 7/7] Enable selecting lidar device for pandaset --- pcdet/datasets/pandaset/pandaset_dataset.py | 13 ++++++++----- tools/cfgs/dataset_configs/pandaset_dataset.yaml | 15 +++++++++++++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/pcdet/datasets/pandaset/pandaset_dataset.py b/pcdet/datasets/pandaset/pandaset_dataset.py index 72541bcf7..b7072dbfa 100644 --- a/pcdet/datasets/pandaset/pandaset_dataset.py +++ b/pcdet/datasets/pandaset/pandaset_dataset.py @@ -151,9 +151,10 @@ def _get_lidar_points(self, info, pose): """ # get lidar points lidar_frame = pd.read_pickle(info['lidar_path']) - # get points for pandar64 rotating lidar only - # TODO: move lidar choice to parameters - lidar_frame = lidar_frame[lidar_frame.d == 0] + # get points for the required lidar(s) only + device = self.dataset_cfg.get('LIDAR_DEVICE', 0) + if device != -1: + lidar_frame = lidar_frame[lidar_frame.d == device] world_points = lidar_frame.to_numpy() # There seems to be issues with the automatic deletion of pandas datasets sometimes del lidar_frame @@ -187,8 +188,10 @@ def _get_annotations(self,info, pose): # get boxes cuboids = pd.read_pickle(info["cuboids_path"]) - # TODO: check compatibility with having the sensor id as a parameter - cuboids = cuboids[cuboids["cuboids.sensor_id"] != 1] # keep only points from the rotating lidar + device = self.dataset_cfg.get('LIDAR_DEVICE', 0) + if device != -1: + # keep cuboids that are seen by a given device + cuboids = cuboids[cuboids["cuboids.sensor_id"] != 1 - device] xs = cuboids['position.x'].to_numpy() ys = cuboids['position.y'].to_numpy() diff --git a/tools/cfgs/dataset_configs/pandaset_dataset.yaml b/tools/cfgs/dataset_configs/pandaset_dataset.yaml index 4e3d6119f..d2d8d4e73 100644 --- a/tools/cfgs/dataset_configs/pandaset_dataset.yaml +++ b/tools/cfgs/dataset_configs/pandaset_dataset.yaml @@ -1,7 +1,7 @@ DATASET: 'PandasetDataset' DATA_PATH: '../data/pandaset' -POINT_CLOUD_RANGE: [-70, -40, -3, 70, 40, 1] # xmin, ymin, zmin, xmax, ymax, zmax +POINT_CLOUD_RANGE: [-70, -40, -3, 70, 40, 1] # xmin, ymin, zmin, xmax, ymax, zmax DATA_SPLIT: { 'train': train, @@ -14,6 +14,17 @@ SEQUENCES: { 'test': ['074', '004', '086', '062', '068', '008', '001', '110', '053', '115', '054', '065', '017', '103', '072', '013', '029', '090', '112', '149', '070', '032'] # ~20% of the sequences, randomly chosen } +# Acquisition device to consider when loading the data +# Pandaset contains data from: +# - a pandar64 spinning lidar +# - a pandarGT forward facing lidar +# To use data from: +# - the pandar64 lidar only (default), set LIDAR_DEVICE to 0, +# - the pandarGT lidar onlu, set it to 1 +# - both devices, set it to -1 +LIDAR_DEVICE: 0 + + INFO_PATH: { 'train': [pandaset_infos_train.pkl], 'test': [pandaset_infos_val.pkl], @@ -21,7 +32,7 @@ INFO_PATH: { TRAINING_CATEGORIES: { # This maps raw dataset categories with the corresponding categories used in training -# This map can be incomplete. In case a category is not present, the category +# This map can be incomplete. In case a category is not present, the category # for training is the same as the raw dataset category 'Car': 'Car', 'Pickup Truck': 'Car',