From f732d8737de97cc1fdbeea94d83476a1f02f652f Mon Sep 17 00:00:00 2001 From: Kok Seang Date: Tue, 5 Aug 2025 18:52:14 +0900 Subject: [PATCH 1/6] Add dataset_preparation interface --- tools/dataset_preparation/__init__.py | 0 tools/dataset_preparation/dataset/__init__.py | 0 .../dataset/base/__init__.py | 0 .../dataset/base/dataset_preparation_base.py | 47 +++++++ .../base/t4dataset_preparation_base.py | 116 ++++++++++++++++++ .../dataset/detection2d/__init__.py | 0 .../dataset/detection3d/__init__.py | 0 .../t4dataset_detection3d_preparation.py | 49 ++++++++ .../dataset_preparation.py | 100 +++++++++++++++ tools/dataset_preparation/enum.py | 28 +++++ tools/detection2d/create_data_t4dataset.py | 2 +- 11 files changed, 341 insertions(+), 1 deletion(-) create mode 100644 tools/dataset_preparation/__init__.py create mode 100644 tools/dataset_preparation/dataset/__init__.py create mode 100644 tools/dataset_preparation/dataset/base/__init__.py create mode 100644 tools/dataset_preparation/dataset/base/dataset_preparation_base.py create mode 100644 tools/dataset_preparation/dataset/base/t4dataset_preparation_base.py create mode 100644 tools/dataset_preparation/dataset/detection2d/__init__.py create mode 100644 tools/dataset_preparation/dataset/detection3d/__init__.py create mode 100644 tools/dataset_preparation/dataset/detection3d/t4dataset_detection3d_preparation.py create mode 100644 tools/dataset_preparation/dataset_preparation.py create mode 100644 tools/dataset_preparation/enum.py diff --git a/tools/dataset_preparation/__init__.py b/tools/dataset_preparation/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tools/dataset_preparation/dataset/__init__.py b/tools/dataset_preparation/dataset/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tools/dataset_preparation/dataset/base/__init__.py b/tools/dataset_preparation/dataset/base/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tools/dataset_preparation/dataset/base/dataset_preparation_base.py b/tools/dataset_preparation/dataset/base/dataset_preparation_base.py new file mode 100644 index 000000000..913848756 --- /dev/null +++ b/tools/dataset_preparation/dataset/base/dataset_preparation_base.py @@ -0,0 +1,47 @@ +from pathlib import Path +from abc import ABC +from typing import Dict, Any + +import mmengine +from tools.analysis_3d.data_classes import AnalysisData, DatasetSplitName + + +class DatasetPreparationBase: + + def __init__(self, + root_path: Path, + config: Any, + info_save_path: Path, + info_version: str + ) -> None: + """ + Base class of dataset prepation. + :param root_path: Root path that contains data. + :param config: Configuration for the dataset prepration. + :param info_save_path: Path to save a dictionary of dataset information. + :param info_version: Version name for dataset information. + """ + self.root_path = root_path + self.config = config + self.info_save_path = info_save_path + self.info_version = info_version + + # Make the output path + self.info_save_path.mkdirs( + exist_ok=True, parents=True + ) + + def run(self) -> None: + """ + Run dataset preparation to convert dataset to corresponding info format. + """ + raise NotImplementedError + + def save_info_file(self, info: Dict[str, Any], info_file_name: str) -> None: + """ + Save a dictionary of datasets information to pickle file that is used by downstream tasks later. + :param info: Selected info from datasets. + :param info_file_name: Info output file name. + """ + info_file_save_path = self.info_save_path / info_file_name + mmengine.dump(info, info_file_save_path) diff --git a/tools/dataset_preparation/dataset/base/t4dataset_preparation_base.py b/tools/dataset_preparation/dataset/base/t4dataset_preparation_base.py new file mode 100644 index 000000000..240b33fe3 --- /dev/null +++ b/tools/dataset_preparation/dataset/base/t4dataset_preparation_base.py @@ -0,0 +1,116 @@ +from pathlib import Path +from typing import Dict, Any, List + +import yaml +from mmengine.logging import print_log +from t4_devkit import Tier4 + +from tools.dataset_preparation.dataset.base.dataset_preparation_base import DatasetPreparationBase +from tools.dataset_preparation.enum import DatasetInfoSplitKey +from tools.detection3d.create_data_t4dataset import get_scene_root_dir_path + +class T4DatasetPreparationBase(DatasetPreparationBase): + + def __init__(self, + root_path: Path, + config: Any, + info_save_path: Path, + info_version: str, + use_available_dataset_version: bool = False + ) -> None: + """ + Base class of dataset prepation. + :param config: Configuration for the dataset prepration. + """ + super(T4DatasetPreparationBase, self).__init__( + root_path=root_path, + config=config, + info_save_path=info_save_path, + info_version=info_version + ) + self.use_available_dataset_version = use_available_dataset_version + self.t4dataset_info_file_template = "t4dataset_{}_infos_{}.pkl" + + def process_t4dataset(self, t4_dataset: Tier4) -> Dict[str, Any]: + """ + Process a t4dataset and prepare it usable format to the AWML framework. + :param t4_dataset: Tier4 data object for a t4dataset. + :return: A dict of {frame identifier: frame data}. + """ + # For the base case, it does nothing. + raise NotImplementedError + + def save_t4_info_file(self, info: Dict[str, Any], split_name: str): + """ + Save t4 infos to a file. + :param infos: Selected T4 info. + """ + info_split_file_name = self.t4dataset_info_file_template.format(self.info_version, split_name) + self.save_info_file(info=info, info_file_name=info_split_file_name) + + def extract_metainfo(self) -> Dict[str, Any]: + """ + Extract metainfo. + """ + return {} + + def run( + self, + ) -> None: + """ + Run dataset preparation to convert dataset to corresponding info format. + """ + data_info = { + DatasetInfoSplitKey.TRAIN: [], + DatasetInfoSplitKey.VAL: [], + DatasetInfoSplitKey.TEST: [], + } + metainfo = self.extract_metainfo() + + for dataset_version in self.config.dataset_version_list: + dataset_list = Path(self.config.dataset_version_config_root) / (dataset_version + ".yaml") + with open(dataset_list, "r") as f: + dataset_list_dict: Dict[str, List[str]] = yaml.safe_load(f) + + for split in [DatasetInfoSplitKey.TRAIN, DatasetInfoSplitKey.VAL, DatasetInfoSplitKey.TEST]: + print_log(f"Creating data info for split: {split}", logger="current") + for scene_id in dataset_list_dict.get(split, []): + print_log(f"Creating data info for scene: {scene_id}") + + t4_dataset_id, t4_dataset_version_id = scene_id.split("/") + scene_root_dir_path = Path(self.root_path) / dataset_version / t4_dataset_id / t4_dataset_version_id + if not scene_root_dir_path.exists(): + if self.use_available_dataset_version: + print_log( + "Warning: The version of the dataset specified in the config file does not exist. " \ + "Will use whatever is available locally." + ) + scene_root_dir_path = get_scene_root_dir_path(self.root_path, dataset_version, t4_dataset_id) + else: + raise ValueError(f"{scene_root_dir_path} does not exist.") + + t4_dataset = Tier4( + version="annotation", + data_root=scene_root_dir_path, + verbose=False, + ) + + info = self.process_t4dataset( + t4_dataset=t4_dataset + ) + + data_info[split].extend(info.values()) + + info_pairs = { + DatasetInfoSplitKey.TRAIN: data_info[DatasetInfoSplitKey.TRAIN], + DatasetInfoSplitKey.VAL: data_info[DatasetInfoSplitKey.VAL], + DatasetInfoSplitKey.TEST: data_info[DatasetInfoSplitKey.TEST], + DatasetInfoSplitKey.TRAIN_VAL: data_info[DatasetInfoSplitKey.TRAIN] + data_info[DatasetInfoSplitKey.VAL], + DatasetInfoSplitKey.ALL: data_info, + } + for split_name, info in info_pairs.items(): + format_info = { + "data_list": info, + "metainfo": metainfo + } + self.save_t4_info_file(info=format_info, split_name=split_name) diff --git a/tools/dataset_preparation/dataset/detection2d/__init__.py b/tools/dataset_preparation/dataset/detection2d/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tools/dataset_preparation/dataset/detection3d/__init__.py b/tools/dataset_preparation/dataset/detection3d/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tools/dataset_preparation/dataset/detection3d/t4dataset_detection3d_preparation.py b/tools/dataset_preparation/dataset/detection3d/t4dataset_detection3d_preparation.py new file mode 100644 index 000000000..3548e6016 --- /dev/null +++ b/tools/dataset_preparation/dataset/detection3d/t4dataset_detection3d_preparation.py @@ -0,0 +1,49 @@ +from pathlib import Path +from typing import Dict, Any, List + +from t4_devkit import Tier4 + +from tools.dataset_preparation.dataset.base.t4dataset_preparation_base import T4DatasetPreparationBase +from tools.detection3d.create_data_t4dataset import get_info + +class T4DatasetDetection3DPreparation(T4DatasetPreparationBase): + + def __init__(self, + root_path: Path, + config: Any, + info_save_path: Path, + info_version: str, + max_sweeps: int, + use_available_dataset_version: bool = False + ) -> None: + """ + Base class of dataset prepation. + :param config: Configuration for the dataset prepration. + """ + super(T4DatasetDetection3DPreparation, self).__init__( + root_path=root_path, + config=config, + info_save_path=info_save_path, + info_version=info_version, + use_available_dataset_version=use_available_dataset_version + ) + self._max_sweeps = max_sweeps + + def process_t4dataset(self, t4_dataset: Tier4) -> Dict[str, Any]: + """ + Process a t4dataset and prepare it usable format to the AWML framework. + :return: A dict of {split_name: list of t4dataset frames}. + """ + infos = {} + for i, sample in enumerate(t4_dataset.sample): + infos[i] = get_info(cfg=self.config, t4=t4_dataset, sample=sample, i=i, max_sweeps=self._max_sweeps) + return infos + + def extract_metainfo(self) -> Dict[str, Any]: + """ + Extract metainfo. + :return A dict of metainfo about the data prepration. + """ + return { + "version": self.info_version, "task_name": "3d_detection", "classes": self.config.class_names + } diff --git a/tools/dataset_preparation/dataset_preparation.py b/tools/dataset_preparation/dataset_preparation.py new file mode 100644 index 000000000..09258b1ba --- /dev/null +++ b/tools/dataset_preparation/dataset_preparation.py @@ -0,0 +1,100 @@ +"""Script to compute analysis of T4 datasets.""" +from pathlib import Path +from typing import Any + +import argparse + +import yaml +from mmengine.config import Config +from mmengine.logging import print_log + +from tools.dataset_preparation.enum import Task +from tools.dataset_preparation.dataset.base.dataset_preparation_base import DatasetPreparationBase +from tools.dataset_preparation.dataset.detection3d.t4dataset_detection3d_preparation import \ + T4DatasetDetection3DPreparation + + +def parse_args(): + parser = argparse.ArgumentParser(description="Create data info for T4dataset") + parser.add_argument( + '--task', + choices=['detection3d', 'detection2d', 'classification2d'], + help='Choose a task for data preparation.', + ) + parser.add_argument( + "--config", + type=str, + required=True, + help="config for T4dataset", + ) + parser.add_argument( + "--root_path", + type=str, + required=True, + help="specify the root path of dataset", + ) + parser.add_argument( + "--version", + type=str, + required=True, + help="product version", + ) + parser.add_argument( + "--max_sweeps", + type=int, + required=False, + help="specify sweeps of lidar per example", + ) + parser.add_argument( + "-o", + "--out_dir", + type=str, + required=True, + help="output directory of info file", + ) + parser.add_argument( + "--use_available_dataset_version", + action="store_true", + help="Will resort to using the available dataset version if the one specified in the config file does not exist.", + ) + args = parser.parse_args() + return args + + +def build_task(task: Task, config: Any, args: Any) -> DatasetPreparationBase: + """ Build DataPreparation based on the task. """ + if task == Task.DETECTION3D: + assert args.max_sweeps, f"max_sweeps must be set when the data preparation task is {Task.DETECTION3D}." + dataset_preparation = T4DatasetDetection3DPreparation( + root_path=Path(args.root_path), + config=config, + info_save_path=Path(args.outout_dir), + info_version=args.version, + max_sweeps=args.max_sweeps, + use_available_dataset_version=args.use_available_dataset_version, + ) + else: + raise ValueError(f"Task: {task} not supported yet!") + + return dataset_preparation + +def main(): + """Main enrtypoint to run the Runner.""" + # Load argparse + args = parse_args() + + # load config + config = Config.fromfile(args.config) + + # Build task + dataset_preparation = build_task( + task=Task[args.task], + config=config, + args=args + ) + + # Run dataset preparation + dataset_preparation.run() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tools/dataset_preparation/enum.py b/tools/dataset_preparation/enum.py new file mode 100644 index 000000000..060b51078 --- /dev/null +++ b/tools/dataset_preparation/enum.py @@ -0,0 +1,28 @@ +from enum import Enum + + +class DatasetInfoSplitKey(Enum): + """ Supported split names in data preparation. """ + + TRAIN = "train" + VAL = "val" + TEST = "test" + TRAIN_VAL = "trainval" + ALL = "all" + + def __str__(self): + """ String representation. """ + return self.value + + +class Task(Enum): + """ Supported types in data preparation. """ + + DETECTION3D = "detection3d" + DETECTION2D = "detection2d" + CLASSIFICATION2D = "classification2d" + + def __str__(self): + """ String representation. """ + return self.value + diff --git a/tools/detection2d/create_data_t4dataset.py b/tools/detection2d/create_data_t4dataset.py index f86ea762b..c76464df3 100644 --- a/tools/detection2d/create_data_t4dataset.py +++ b/tools/detection2d/create_data_t4dataset.py @@ -128,7 +128,7 @@ def assign_ids_and_save_detection_data( for instance in entry.instances ], } - for i, entry in enumerate(detection_data.data_list) + for i, entry in enumerate(detection_da+ta.data_list) ], } From 8a4ddc1fd76fba2b738fa0367b4cc340ef6d2e41 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 5 Aug 2025 09:53:29 +0000 Subject: [PATCH 2/6] ci(pre-commit): autofix --- .../dataset/base/dataset_preparation_base.py | 22 +++---- .../base/t4dataset_preparation_base.py | 62 +++++++++---------- .../t4dataset_detection3d_preparation.py | 36 +++++------ .../dataset_preparation.py | 37 ++++++----- tools/dataset_preparation/enum.py | 17 +++-- tools/detection2d/create_data_t4dataset.py | 2 +- 6 files changed, 83 insertions(+), 93 deletions(-) diff --git a/tools/dataset_preparation/dataset/base/dataset_preparation_base.py b/tools/dataset_preparation/dataset/base/dataset_preparation_base.py index 913848756..1b4e761ce 100644 --- a/tools/dataset_preparation/dataset/base/dataset_preparation_base.py +++ b/tools/dataset_preparation/dataset/base/dataset_preparation_base.py @@ -1,21 +1,17 @@ -from pathlib import Path from abc import ABC -from typing import Dict, Any +from pathlib import Path +from typing import Any, Dict import mmengine + from tools.analysis_3d.data_classes import AnalysisData, DatasetSplitName class DatasetPreparationBase: - def __init__(self, - root_path: Path, - config: Any, - info_save_path: Path, - info_version: str - ) -> None: + def __init__(self, root_path: Path, config: Any, info_save_path: Path, info_version: str) -> None: """ - Base class of dataset prepation. + Base class of dataset prepation. :param root_path: Root path that contains data. :param config: Configuration for the dataset prepration. :param info_save_path: Path to save a dictionary of dataset information. @@ -27,13 +23,11 @@ def __init__(self, self.info_version = info_version # Make the output path - self.info_save_path.mkdirs( - exist_ok=True, parents=True - ) + self.info_save_path.mkdirs(exist_ok=True, parents=True) def run(self) -> None: """ - Run dataset preparation to convert dataset to corresponding info format. + Run dataset preparation to convert dataset to corresponding info format. """ raise NotImplementedError @@ -41,7 +35,7 @@ def save_info_file(self, info: Dict[str, Any], info_file_name: str) -> None: """ Save a dictionary of datasets information to pickle file that is used by downstream tasks later. :param info: Selected info from datasets. - :param info_file_name: Info output file name. + :param info_file_name: Info output file name. """ info_file_save_path = self.info_save_path / info_file_name mmengine.dump(info, info_file_save_path) diff --git a/tools/dataset_preparation/dataset/base/t4dataset_preparation_base.py b/tools/dataset_preparation/dataset/base/t4dataset_preparation_base.py index 240b33fe3..a94421d73 100644 --- a/tools/dataset_preparation/dataset/base/t4dataset_preparation_base.py +++ b/tools/dataset_preparation/dataset/base/t4dataset_preparation_base.py @@ -1,32 +1,31 @@ from pathlib import Path -from typing import Dict, Any, List +from typing import Any, Dict, List -import yaml +import yaml from mmengine.logging import print_log from t4_devkit import Tier4 from tools.dataset_preparation.dataset.base.dataset_preparation_base import DatasetPreparationBase from tools.dataset_preparation.enum import DatasetInfoSplitKey -from tools.detection3d.create_data_t4dataset import get_scene_root_dir_path +from tools.detection3d.create_data_t4dataset import get_scene_root_dir_path + class T4DatasetPreparationBase(DatasetPreparationBase): - - def __init__(self, - root_path: Path, - config: Any, - info_save_path: Path, - info_version: str, - use_available_dataset_version: bool = False - ) -> None: + + def __init__( + self, + root_path: Path, + config: Any, + info_save_path: Path, + info_version: str, + use_available_dataset_version: bool = False, + ) -> None: """ - Base class of dataset prepation. + Base class of dataset prepation. :param config: Configuration for the dataset prepration. """ super(T4DatasetPreparationBase, self).__init__( - root_path=root_path, - config=config, - info_save_path=info_save_path, - info_version=info_version + root_path=root_path, config=config, info_save_path=info_save_path, info_version=info_version ) self.use_available_dataset_version = use_available_dataset_version self.t4dataset_info_file_template = "t4dataset_{}_infos_{}.pkl" @@ -39,12 +38,12 @@ def process_t4dataset(self, t4_dataset: Tier4) -> Dict[str, Any]: """ # For the base case, it does nothing. raise NotImplementedError - + def save_t4_info_file(self, info: Dict[str, Any], split_name: str): """ Save t4 infos to a file. - :param infos: Selected T4 info. - """ + :param infos: Selected T4 info. + """ info_split_file_name = self.t4dataset_info_file_template.format(self.info_version, split_name) self.save_info_file(info=info, info_file_name=info_split_file_name) @@ -58,7 +57,7 @@ def run( self, ) -> None: """ - Run dataset preparation to convert dataset to corresponding info format. + Run dataset preparation to convert dataset to corresponding info format. """ data_info = { DatasetInfoSplitKey.TRAIN: [], @@ -78,14 +77,18 @@ def run( print_log(f"Creating data info for scene: {scene_id}") t4_dataset_id, t4_dataset_version_id = scene_id.split("/") - scene_root_dir_path = Path(self.root_path) / dataset_version / t4_dataset_id / t4_dataset_version_id + scene_root_dir_path = ( + Path(self.root_path) / dataset_version / t4_dataset_id / t4_dataset_version_id + ) if not scene_root_dir_path.exists(): if self.use_available_dataset_version: print_log( - "Warning: The version of the dataset specified in the config file does not exist. " \ + "Warning: The version of the dataset specified in the config file does not exist. " "Will use whatever is available locally." ) - scene_root_dir_path = get_scene_root_dir_path(self.root_path, dataset_version, t4_dataset_id) + scene_root_dir_path = get_scene_root_dir_path( + self.root_path, dataset_version, t4_dataset_id + ) else: raise ValueError(f"{scene_root_dir_path} does not exist.") @@ -94,13 +97,11 @@ def run( data_root=scene_root_dir_path, verbose=False, ) - - info = self.process_t4dataset( - t4_dataset=t4_dataset - ) + + info = self.process_t4dataset(t4_dataset=t4_dataset) data_info[split].extend(info.values()) - + info_pairs = { DatasetInfoSplitKey.TRAIN: data_info[DatasetInfoSplitKey.TRAIN], DatasetInfoSplitKey.VAL: data_info[DatasetInfoSplitKey.VAL], @@ -109,8 +110,5 @@ def run( DatasetInfoSplitKey.ALL: data_info, } for split_name, info in info_pairs.items(): - format_info = { - "data_list": info, - "metainfo": metainfo - } + format_info = {"data_list": info, "metainfo": metainfo} self.save_t4_info_file(info=format_info, split_name=split_name) diff --git a/tools/dataset_preparation/dataset/detection3d/t4dataset_detection3d_preparation.py b/tools/dataset_preparation/dataset/detection3d/t4dataset_detection3d_preparation.py index 3548e6016..27427e4d5 100644 --- a/tools/dataset_preparation/dataset/detection3d/t4dataset_detection3d_preparation.py +++ b/tools/dataset_preparation/dataset/detection3d/t4dataset_detection3d_preparation.py @@ -1,31 +1,33 @@ from pathlib import Path -from typing import Dict, Any, List +from typing import Any, Dict, List from t4_devkit import Tier4 from tools.dataset_preparation.dataset.base.t4dataset_preparation_base import T4DatasetPreparationBase from tools.detection3d.create_data_t4dataset import get_info + class T4DatasetDetection3DPreparation(T4DatasetPreparationBase): - - def __init__(self, - root_path: Path, - config: Any, - info_save_path: Path, - info_version: str, - max_sweeps: int, - use_available_dataset_version: bool = False - ) -> None: + + def __init__( + self, + root_path: Path, + config: Any, + info_save_path: Path, + info_version: str, + max_sweeps: int, + use_available_dataset_version: bool = False, + ) -> None: """ - Base class of dataset prepation. + Base class of dataset prepation. :param config: Configuration for the dataset prepration. """ super(T4DatasetDetection3DPreparation, self).__init__( root_path=root_path, - config=config, - info_save_path=info_save_path, + config=config, + info_save_path=info_save_path, info_version=info_version, - use_available_dataset_version=use_available_dataset_version + use_available_dataset_version=use_available_dataset_version, ) self._max_sweeps = max_sweeps @@ -38,12 +40,10 @@ def process_t4dataset(self, t4_dataset: Tier4) -> Dict[str, Any]: for i, sample in enumerate(t4_dataset.sample): infos[i] = get_info(cfg=self.config, t4=t4_dataset, sample=sample, i=i, max_sweeps=self._max_sweeps) return infos - + def extract_metainfo(self) -> Dict[str, Any]: """ Extract metainfo. :return A dict of metainfo about the data prepration. """ - return { - "version": self.info_version, "task_name": "3d_detection", "classes": self.config.class_names - } + return {"version": self.info_version, "task_name": "3d_detection", "classes": self.config.class_names} diff --git a/tools/dataset_preparation/dataset_preparation.py b/tools/dataset_preparation/dataset_preparation.py index 09258b1ba..e4163be6a 100644 --- a/tools/dataset_preparation/dataset_preparation.py +++ b/tools/dataset_preparation/dataset_preparation.py @@ -1,25 +1,26 @@ """Script to compute analysis of T4 datasets.""" -from pathlib import Path -from typing import Any import argparse +from pathlib import Path +from typing import Any -import yaml +import yaml from mmengine.config import Config from mmengine.logging import print_log -from tools.dataset_preparation.enum import Task from tools.dataset_preparation.dataset.base.dataset_preparation_base import DatasetPreparationBase -from tools.dataset_preparation.dataset.detection3d.t4dataset_detection3d_preparation import \ - T4DatasetDetection3DPreparation +from tools.dataset_preparation.dataset.detection3d.t4dataset_detection3d_preparation import ( + T4DatasetDetection3DPreparation, +) +from tools.dataset_preparation.enum import Task def parse_args(): parser = argparse.ArgumentParser(description="Create data info for T4dataset") parser.add_argument( - '--task', - choices=['detection3d', 'detection2d', 'classification2d'], - help='Choose a task for data preparation.', + "--task", + choices=["detection3d", "detection2d", "classification2d"], + help="Choose a task for data preparation.", ) parser.add_argument( "--config", @@ -62,12 +63,12 @@ def parse_args(): def build_task(task: Task, config: Any, args: Any) -> DatasetPreparationBase: - """ Build DataPreparation based on the task. """ + """Build DataPreparation based on the task.""" if task == Task.DETECTION3D: assert args.max_sweeps, f"max_sweeps must be set when the data preparation task is {Task.DETECTION3D}." dataset_preparation = T4DatasetDetection3DPreparation( root_path=Path(args.root_path), - config=config, + config=config, info_save_path=Path(args.outout_dir), info_version=args.version, max_sweeps=args.max_sweeps, @@ -75,8 +76,9 @@ def build_task(task: Task, config: Any, args: Any) -> DatasetPreparationBase: ) else: raise ValueError(f"Task: {task} not supported yet!") - - return dataset_preparation + + return dataset_preparation + def main(): """Main enrtypoint to run the Runner.""" @@ -87,14 +89,11 @@ def main(): config = Config.fromfile(args.config) # Build task - dataset_preparation = build_task( - task=Task[args.task], - config=config, - args=args - ) + dataset_preparation = build_task(task=Task[args.task], config=config, args=args) # Run dataset preparation dataset_preparation.run() + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/tools/dataset_preparation/enum.py b/tools/dataset_preparation/enum.py index 060b51078..40d348521 100644 --- a/tools/dataset_preparation/enum.py +++ b/tools/dataset_preparation/enum.py @@ -1,28 +1,27 @@ -from enum import Enum +from enum import Enum class DatasetInfoSplitKey(Enum): - """ Supported split names in data preparation. """ + """Supported split names in data preparation.""" TRAIN = "train" VAL = "val" - TEST = "test" + TEST = "test" TRAIN_VAL = "trainval" ALL = "all" def __str__(self): - """ String representation. """ + """String representation.""" return self.value class Task(Enum): - """ Supported types in data preparation. """ - + """Supported types in data preparation.""" + DETECTION3D = "detection3d" DETECTION2D = "detection2d" CLASSIFICATION2D = "classification2d" - + def __str__(self): - """ String representation. """ + """String representation.""" return self.value - diff --git a/tools/detection2d/create_data_t4dataset.py b/tools/detection2d/create_data_t4dataset.py index c76464df3..44fe4063d 100644 --- a/tools/detection2d/create_data_t4dataset.py +++ b/tools/detection2d/create_data_t4dataset.py @@ -128,7 +128,7 @@ def assign_ids_and_save_detection_data( for instance in entry.instances ], } - for i, entry in enumerate(detection_da+ta.data_list) + for i, entry in enumerate(detection_da + ta.data_list) ], } From 73da6d4ea24350b0f9a29ddbcdde037161a7082c Mon Sep 17 00:00:00 2001 From: Kok Seang Date: Tue, 5 Aug 2025 20:41:38 +0900 Subject: [PATCH 3/6] Add dataset_preparation interface --- .../dataset/detection3d/__init__.py | 0 .../{detection2d => t4dataset}/__init__.py | 0 .../t4dataset_detection3d_preparation.py | 2 +- .../t4dataset_preparation_base.py | 0 .../dataset_preparation/dataset_preparation.py | 17 +++++++++-------- tools/dataset_preparation/enum.py | 10 +++++----- 6 files changed, 15 insertions(+), 14 deletions(-) delete mode 100644 tools/dataset_preparation/dataset/detection3d/__init__.py rename tools/dataset_preparation/dataset/{detection2d => t4dataset}/__init__.py (100%) rename tools/dataset_preparation/dataset/{detection3d => t4dataset}/t4dataset_detection3d_preparation.py (93%) rename tools/dataset_preparation/dataset/{base => t4dataset}/t4dataset_preparation_base.py (100%) diff --git a/tools/dataset_preparation/dataset/detection3d/__init__.py b/tools/dataset_preparation/dataset/detection3d/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tools/dataset_preparation/dataset/detection2d/__init__.py b/tools/dataset_preparation/dataset/t4dataset/__init__.py similarity index 100% rename from tools/dataset_preparation/dataset/detection2d/__init__.py rename to tools/dataset_preparation/dataset/t4dataset/__init__.py diff --git a/tools/dataset_preparation/dataset/detection3d/t4dataset_detection3d_preparation.py b/tools/dataset_preparation/dataset/t4dataset/t4dataset_detection3d_preparation.py similarity index 93% rename from tools/dataset_preparation/dataset/detection3d/t4dataset_detection3d_preparation.py rename to tools/dataset_preparation/dataset/t4dataset/t4dataset_detection3d_preparation.py index 27427e4d5..8abfd3192 100644 --- a/tools/dataset_preparation/dataset/detection3d/t4dataset_detection3d_preparation.py +++ b/tools/dataset_preparation/dataset/t4dataset/t4dataset_detection3d_preparation.py @@ -3,7 +3,7 @@ from t4_devkit import Tier4 -from tools.dataset_preparation.dataset.base.t4dataset_preparation_base import T4DatasetPreparationBase +from tools.dataset_preparation.dataset.t4dataset.t4dataset_preparation_base import T4DatasetPreparationBase from tools.detection3d.create_data_t4dataset import get_info diff --git a/tools/dataset_preparation/dataset/base/t4dataset_preparation_base.py b/tools/dataset_preparation/dataset/t4dataset/t4dataset_preparation_base.py similarity index 100% rename from tools/dataset_preparation/dataset/base/t4dataset_preparation_base.py rename to tools/dataset_preparation/dataset/t4dataset/t4dataset_preparation_base.py diff --git a/tools/dataset_preparation/dataset_preparation.py b/tools/dataset_preparation/dataset_preparation.py index e4163be6a..085248e3b 100644 --- a/tools/dataset_preparation/dataset_preparation.py +++ b/tools/dataset_preparation/dataset_preparation.py @@ -1,4 +1,4 @@ -"""Script to compute analysis of T4 datasets.""" +"""Script to convert dataset to info pickles.""" import argparse from pathlib import Path @@ -9,17 +9,17 @@ from mmengine.logging import print_log from tools.dataset_preparation.dataset.base.dataset_preparation_base import DatasetPreparationBase -from tools.dataset_preparation.dataset.detection3d.t4dataset_detection3d_preparation import ( +from tools.dataset_preparation.dataset.t4dataset.t4dataset_detection3d_preparation import ( T4DatasetDetection3DPreparation, ) -from tools.dataset_preparation.enum import Task +from tools.dataset_preparation.enum import DatasetTask def parse_args(): parser = argparse.ArgumentParser(description="Create data info for T4dataset") parser.add_argument( "--task", - choices=["detection3d", "detection2d", "classification2d"], + choices=["t4_detection3d", "t4_detection2d", "t4_classification2d"], help="Choose a task for data preparation.", ) parser.add_argument( @@ -62,10 +62,10 @@ def parse_args(): return args -def build_task(task: Task, config: Any, args: Any) -> DatasetPreparationBase: +def build_dataset_task(dataset_task: T4DatasetDetection3DPreparation, config: Any, args: Any) -> DatasetPreparationBase: """Build DataPreparation based on the task.""" - if task == Task.DETECTION3D: - assert args.max_sweeps, f"max_sweeps must be set when the data preparation task is {Task.DETECTION3D}." + if task == DatasetTask.T4DETECTION3D: + assert args.max_sweeps, f"max_sweeps must be set when the data preparation task is {T4DatasetDetection3DPreparation.DETECTION3D}." dataset_preparation = T4DatasetDetection3DPreparation( root_path=Path(args.root_path), config=config, @@ -77,6 +77,7 @@ def build_task(task: Task, config: Any, args: Any) -> DatasetPreparationBase: else: raise ValueError(f"Task: {task} not supported yet!") + print_log(f"Built {task}") return dataset_preparation @@ -89,7 +90,7 @@ def main(): config = Config.fromfile(args.config) # Build task - dataset_preparation = build_task(task=Task[args.task], config=config, args=args) + dataset_preparation = build_dataset_task(dataset_task=DatasetTask[args.task], config=config, args=args) # Run dataset preparation dataset_preparation.run() diff --git a/tools/dataset_preparation/enum.py b/tools/dataset_preparation/enum.py index 40d348521..2617f8b4d 100644 --- a/tools/dataset_preparation/enum.py +++ b/tools/dataset_preparation/enum.py @@ -15,12 +15,12 @@ def __str__(self): return self.value -class Task(Enum): - """Supported types in data preparation.""" +class DatasetTask(Enum): + """Supported dataset tasks in data preparation.""" - DETECTION3D = "detection3d" - DETECTION2D = "detection2d" - CLASSIFICATION2D = "classification2d" + T4DETECTION3D = "t4_detection3d" + T4DETECTION2D = "t4_detection2d" + T4CLASSIFICATION2D = "t4_classification2d" def __str__(self): """String representation.""" From dc50bdd9082abe67f013c083aea5ef27b5373d12 Mon Sep 17 00:00:00 2001 From: Kok Seang Date: Tue, 5 Aug 2025 20:42:05 +0900 Subject: [PATCH 4/6] Add dataset_preparation interface --- tools/dataset_preparation/dataset_preparation.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/dataset_preparation/dataset_preparation.py b/tools/dataset_preparation/dataset_preparation.py index 085248e3b..27b4ce07e 100644 --- a/tools/dataset_preparation/dataset_preparation.py +++ b/tools/dataset_preparation/dataset_preparation.py @@ -4,7 +4,6 @@ from pathlib import Path from typing import Any -import yaml from mmengine.config import Config from mmengine.logging import print_log @@ -64,7 +63,7 @@ def parse_args(): def build_dataset_task(dataset_task: T4DatasetDetection3DPreparation, config: Any, args: Any) -> DatasetPreparationBase: """Build DataPreparation based on the task.""" - if task == DatasetTask.T4DETECTION3D: + if dataset_task == DatasetTask.T4DETECTION3D: assert args.max_sweeps, f"max_sweeps must be set when the data preparation task is {T4DatasetDetection3DPreparation.DETECTION3D}." dataset_preparation = T4DatasetDetection3DPreparation( root_path=Path(args.root_path), @@ -75,9 +74,9 @@ def build_dataset_task(dataset_task: T4DatasetDetection3DPreparation, config: An use_available_dataset_version=args.use_available_dataset_version, ) else: - raise ValueError(f"Task: {task} not supported yet!") + raise ValueError(f"Task: {dataset_task} not supported yet!") - print_log(f"Built {task}") + print_log(f"Built {dataset_task}") return dataset_preparation From cdc5bdee9fd4c3600050d79b8a0752e5a80d5fdf Mon Sep 17 00:00:00 2001 From: Kok Seang Date: Tue, 5 Aug 2025 20:42:35 +0900 Subject: [PATCH 5/6] Add dataset_preparation interface --- .../dataset/base/dataset_preparation_base.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/dataset_preparation/dataset/base/dataset_preparation_base.py b/tools/dataset_preparation/dataset/base/dataset_preparation_base.py index 1b4e761ce..1d05d3a45 100644 --- a/tools/dataset_preparation/dataset/base/dataset_preparation_base.py +++ b/tools/dataset_preparation/dataset/base/dataset_preparation_base.py @@ -4,9 +4,6 @@ import mmengine -from tools.analysis_3d.data_classes import AnalysisData, DatasetSplitName - - class DatasetPreparationBase: def __init__(self, root_path: Path, config: Any, info_save_path: Path, info_version: str) -> None: From 132083d945185b68cfe0b1c3cfb3e53b985083e4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 5 Aug 2025 11:44:21 +0000 Subject: [PATCH 6/6] ci(pre-commit): autofix --- .../dataset/base/dataset_preparation_base.py | 1 + tools/dataset_preparation/dataset_preparation.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/dataset_preparation/dataset/base/dataset_preparation_base.py b/tools/dataset_preparation/dataset/base/dataset_preparation_base.py index 1d05d3a45..2e685caac 100644 --- a/tools/dataset_preparation/dataset/base/dataset_preparation_base.py +++ b/tools/dataset_preparation/dataset/base/dataset_preparation_base.py @@ -4,6 +4,7 @@ import mmengine + class DatasetPreparationBase: def __init__(self, root_path: Path, config: Any, info_save_path: Path, info_version: str) -> None: diff --git a/tools/dataset_preparation/dataset_preparation.py b/tools/dataset_preparation/dataset_preparation.py index 27b4ce07e..b54e4bda8 100644 --- a/tools/dataset_preparation/dataset_preparation.py +++ b/tools/dataset_preparation/dataset_preparation.py @@ -61,10 +61,14 @@ def parse_args(): return args -def build_dataset_task(dataset_task: T4DatasetDetection3DPreparation, config: Any, args: Any) -> DatasetPreparationBase: +def build_dataset_task( + dataset_task: T4DatasetDetection3DPreparation, config: Any, args: Any +) -> DatasetPreparationBase: """Build DataPreparation based on the task.""" if dataset_task == DatasetTask.T4DETECTION3D: - assert args.max_sweeps, f"max_sweeps must be set when the data preparation task is {T4DatasetDetection3DPreparation.DETECTION3D}." + assert ( + args.max_sweeps + ), f"max_sweeps must be set when the data preparation task is {T4DatasetDetection3DPreparation.DETECTION3D}." dataset_preparation = T4DatasetDetection3DPreparation( root_path=Path(args.root_path), config=config,