Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Empty file.
Empty file.
39 changes: 39 additions & 0 deletions tools/dataset_preparation/dataset/base/dataset_preparation_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from abc import ABC
from pathlib import Path
from typing import Any, Dict

import mmengine


class DatasetPreparationBase:

def __init__(self, root_path: Path, config: Any, info_save_path: Path, info_version: str) -> None:
"""
Base class of dataset prepation.
:param root_path: Root path that contains data.
:param config: Configuration for the dataset prepration.
:param info_save_path: Path to save a dictionary of dataset information.
:param info_version: Version name for dataset information.
"""
self.root_path = root_path
self.config = config
self.info_save_path = info_save_path
self.info_version = info_version

# Make the output path
self.info_save_path.mkdirs(exist_ok=True, parents=True)

def run(self) -> None:
"""
Run dataset preparation to convert dataset to corresponding info format.
"""
raise NotImplementedError

def save_info_file(self, info: Dict[str, Any], info_file_name: str) -> None:
"""
Save a dictionary of datasets information to pickle file that is used by downstream tasks later.
:param info: Selected info from datasets.
:param info_file_name: Info output file name.
"""
info_file_save_path = self.info_save_path / info_file_name
mmengine.dump(info, info_file_save_path)
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from pathlib import Path
from typing import Any, Dict, List

from t4_devkit import Tier4

from tools.dataset_preparation.dataset.t4dataset.t4dataset_preparation_base import T4DatasetPreparationBase
from tools.detection3d.create_data_t4dataset import get_info


class T4DatasetDetection3DPreparation(T4DatasetPreparationBase):

def __init__(
self,
root_path: Path,
config: Any,
info_save_path: Path,
info_version: str,
max_sweeps: int,
use_available_dataset_version: bool = False,
) -> None:
"""
Base class of dataset prepation.
:param config: Configuration for the dataset prepration.
"""
super(T4DatasetDetection3DPreparation, self).__init__(
root_path=root_path,
config=config,
info_save_path=info_save_path,
info_version=info_version,
use_available_dataset_version=use_available_dataset_version,
)
self._max_sweeps = max_sweeps

def process_t4dataset(self, t4_dataset: Tier4) -> Dict[str, Any]:
"""
Process a t4dataset and prepare it usable format to the AWML framework.
:return: A dict of {split_name: list of t4dataset frames}.
"""
infos = {}
for i, sample in enumerate(t4_dataset.sample):
infos[i] = get_info(cfg=self.config, t4=t4_dataset, sample=sample, i=i, max_sweeps=self._max_sweeps)
return infos

def extract_metainfo(self) -> Dict[str, Any]:
"""
Extract metainfo.
:return A dict of metainfo about the data prepration.
"""
return {"version": self.info_version, "task_name": "3d_detection", "classes": self.config.class_names}
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from pathlib import Path
from typing import Any, Dict, List

import yaml
from mmengine.logging import print_log
from t4_devkit import Tier4

from tools.dataset_preparation.dataset.base.dataset_preparation_base import DatasetPreparationBase
from tools.dataset_preparation.enum import DatasetInfoSplitKey
from tools.detection3d.create_data_t4dataset import get_scene_root_dir_path


class T4DatasetPreparationBase(DatasetPreparationBase):

def __init__(
self,
root_path: Path,
config: Any,
info_save_path: Path,
info_version: str,
use_available_dataset_version: bool = False,
) -> None:
"""
Base class of dataset prepation.
:param config: Configuration for the dataset prepration.
"""
super(T4DatasetPreparationBase, self).__init__(
root_path=root_path, config=config, info_save_path=info_save_path, info_version=info_version
)
self.use_available_dataset_version = use_available_dataset_version
self.t4dataset_info_file_template = "t4dataset_{}_infos_{}.pkl"

def process_t4dataset(self, t4_dataset: Tier4) -> Dict[str, Any]:
"""
Process a t4dataset and prepare it usable format to the AWML framework.
:param t4_dataset: Tier4 data object for a t4dataset.
:return: A dict of {frame identifier: frame data}.
"""
# For the base case, it does nothing.
raise NotImplementedError

def save_t4_info_file(self, info: Dict[str, Any], split_name: str):
"""
Save t4 infos to a file.
:param infos: Selected T4 info.
"""
info_split_file_name = self.t4dataset_info_file_template.format(self.info_version, split_name)
self.save_info_file(info=info, info_file_name=info_split_file_name)

def extract_metainfo(self) -> Dict[str, Any]:
"""
Extract metainfo.
"""
return {}

def run(
self,
) -> None:
"""
Run dataset preparation to convert dataset to corresponding info format.
"""
data_info = {
DatasetInfoSplitKey.TRAIN: [],
DatasetInfoSplitKey.VAL: [],
DatasetInfoSplitKey.TEST: [],
}
metainfo = self.extract_metainfo()

for dataset_version in self.config.dataset_version_list:
dataset_list = Path(self.config.dataset_version_config_root) / (dataset_version + ".yaml")
with open(dataset_list, "r") as f:
dataset_list_dict: Dict[str, List[str]] = yaml.safe_load(f)

for split in [DatasetInfoSplitKey.TRAIN, DatasetInfoSplitKey.VAL, DatasetInfoSplitKey.TEST]:
print_log(f"Creating data info for split: {split}", logger="current")
for scene_id in dataset_list_dict.get(split, []):
print_log(f"Creating data info for scene: {scene_id}")

t4_dataset_id, t4_dataset_version_id = scene_id.split("/")
scene_root_dir_path = (
Path(self.root_path) / dataset_version / t4_dataset_id / t4_dataset_version_id
)
if not scene_root_dir_path.exists():
if self.use_available_dataset_version:
print_log(
"Warning: The version of the dataset specified in the config file does not exist. "
"Will use whatever is available locally."
)
scene_root_dir_path = get_scene_root_dir_path(
self.root_path, dataset_version, t4_dataset_id
)
else:
raise ValueError(f"{scene_root_dir_path} does not exist.")

t4_dataset = Tier4(
version="annotation",
data_root=scene_root_dir_path,
verbose=False,
)

info = self.process_t4dataset(t4_dataset=t4_dataset)

data_info[split].extend(info.values())

info_pairs = {
DatasetInfoSplitKey.TRAIN: data_info[DatasetInfoSplitKey.TRAIN],
DatasetInfoSplitKey.VAL: data_info[DatasetInfoSplitKey.VAL],
DatasetInfoSplitKey.TEST: data_info[DatasetInfoSplitKey.TEST],
DatasetInfoSplitKey.TRAIN_VAL: data_info[DatasetInfoSplitKey.TRAIN] + data_info[DatasetInfoSplitKey.VAL],
DatasetInfoSplitKey.ALL: data_info,
}
for split_name, info in info_pairs.items():
format_info = {"data_list": info, "metainfo": metainfo}
self.save_t4_info_file(info=format_info, split_name=split_name)
103 changes: 103 additions & 0 deletions tools/dataset_preparation/dataset_preparation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""Script to convert dataset to info pickles."""

import argparse
from pathlib import Path
from typing import Any

from mmengine.config import Config
from mmengine.logging import print_log

from tools.dataset_preparation.dataset.base.dataset_preparation_base import DatasetPreparationBase
from tools.dataset_preparation.dataset.t4dataset.t4dataset_detection3d_preparation import (
T4DatasetDetection3DPreparation,
)
from tools.dataset_preparation.enum import DatasetTask


def parse_args():
parser = argparse.ArgumentParser(description="Create data info for T4dataset")
parser.add_argument(
"--task",
choices=["t4_detection3d", "t4_detection2d", "t4_classification2d"],
help="Choose a task for data preparation.",
)
parser.add_argument(
"--config",
type=str,
required=True,
help="config for T4dataset",
)
parser.add_argument(
"--root_path",
type=str,
required=True,
help="specify the root path of dataset",
)
parser.add_argument(
"--version",
type=str,
required=True,
help="product version",
)
parser.add_argument(
"--max_sweeps",
type=int,
required=False,
help="specify sweeps of lidar per example",
)
parser.add_argument(
"-o",
"--out_dir",
type=str,
required=True,
help="output directory of info file",
)
parser.add_argument(
"--use_available_dataset_version",
action="store_true",
help="Will resort to using the available dataset version if the one specified in the config file does not exist.",
)
args = parser.parse_args()
return args


def build_dataset_task(
dataset_task: T4DatasetDetection3DPreparation, config: Any, args: Any
) -> DatasetPreparationBase:
"""Build DataPreparation based on the task."""
if dataset_task == DatasetTask.T4DETECTION3D:
assert (
args.max_sweeps
), f"max_sweeps must be set when the data preparation task is {T4DatasetDetection3DPreparation.DETECTION3D}."
dataset_preparation = T4DatasetDetection3DPreparation(
root_path=Path(args.root_path),
config=config,
info_save_path=Path(args.outout_dir),
info_version=args.version,
max_sweeps=args.max_sweeps,
use_available_dataset_version=args.use_available_dataset_version,
)
else:
raise ValueError(f"Task: {dataset_task} not supported yet!")

print_log(f"Built {dataset_task}")
return dataset_preparation


def main():
"""Main enrtypoint to run the Runner."""
# Load argparse
args = parse_args()

# load config
config = Config.fromfile(args.config)

# Build task
dataset_preparation = build_dataset_task(dataset_task=DatasetTask[args.task], config=config, args=args)

# Run dataset preparation
dataset_preparation.run()


if __name__ == "__main__":
main()
27 changes: 27 additions & 0 deletions tools/dataset_preparation/enum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from enum import Enum


class DatasetInfoSplitKey(Enum):
"""Supported split names in data preparation."""

TRAIN = "train"
VAL = "val"
TEST = "test"
TRAIN_VAL = "trainval"
ALL = "all"

def __str__(self):
"""String representation."""
return self.value


class DatasetTask(Enum):
"""Supported dataset tasks in data preparation."""

T4DETECTION3D = "t4_detection3d"
T4DETECTION2D = "t4_detection2d"
T4CLASSIFICATION2D = "t4_classification2d"

def __str__(self):
"""String representation."""
return self.value
2 changes: 1 addition & 1 deletion tools/detection2d/create_data_t4dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def assign_ids_and_save_detection_data(
for instance in entry.instances
],
}
for i, entry in enumerate(detection_data.data_list)
for i, entry in enumerate(detection_da + ta.data_list)
],
}

Expand Down