diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index d98352b4..00000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.gitignore b/.gitignore index 63357d2a..eb3d75b0 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,6 @@ __pycache__ dist poetry.lock -local/ \ No newline at end of file +local/ + +.DS_Store \ No newline at end of file diff --git a/README.md b/README.md index ca8c6a62..e6f3619a 100644 --- a/README.md +++ b/README.md @@ -41,8 +41,8 @@ Now, we're excited to introduce ***DetectionMetrics v2***! While retaining the f LiDAR - Rellis3D, GOOSE, custom GAIA format - PyTorch (tested with RandLA-Net and KPConv from Open3D-ML) + Rellis3D, GOOSE, WildScenes, custom GAIA format + PyTorch (tested with Open3D-ML, mmdetection3d, SphereFormer, and LSK3DNet models) Object detection Image diff --git a/detectionmetrics/cli/batch.py b/detectionmetrics/cli/batch.py index 0dae173a..a9ebd323 100644 --- a/detectionmetrics/cli/batch.py +++ b/detectionmetrics/cli/batch.py @@ -1,4 +1,4 @@ -from itertools import product +from itertools import product, chain from glob import glob import os @@ -30,9 +30,19 @@ def batch(command, jobs_cfg): for model_cfg in jobs_cfg["model"]: model_path = model_cfg["path"] - model_paths = glob(model_path) if model_cfg["path_is_pattern"] else [model_path] - assert model_paths, f"No files found for pattern {model_cfg['path']}" + is_pattern = model_cfg.get("path_is_pattern", False) + if isinstance(model_path, list): + if is_pattern: + model_paths = list(chain.from_iterable(glob(p) for p in model_path)) + else: + model_paths = model_path + else: + model_paths = glob(model_path) if is_pattern else [model_path] + + if not model_paths: + raise FileNotFoundError(f"No files found for path/pattern: {model_path}") + print(f"Found {len(model_paths)} model(s) for pattern: {model_path}") for new_path in model_paths: assert os.path.exists(new_path), f"File or directory {new_path} not found" @@ -41,7 +51,8 @@ def batch(command, jobs_cfg): if os.path.isfile(new_path): new_model_id, _ = os.path.splitext(new_model_id) - new_model_cfg = model_cfg | { + new_model_cfg = { + **model_cfg, "path": new_path, "id": f"{model_cfg['id']}-{new_model_id.replace('-', '_')}", } @@ -102,9 +113,20 @@ def batch(command, jobs_cfg): "model": model_cfg["path"], "model_ontology": model_cfg["ontology"], "model_cfg": model_cfg["cfg"], - # "image_size": model_cfg.get("image_size", None), } ) + + if command == "computational_cost": + if jobs_cfg["input_type"] == "image": + params["image_size"] = model_cfg.get("image_size", [512, 512]) + elif jobs_cfg["input_type"] == "lidar": + params["point_cloud_range"] = model_cfg.get( + "point_cloud_range", [-50, -50, -5, 50, 50, 5] + ) + params["num_points"] = model_cfg.get("num_points", 100000) + else: + raise ValueError(f"Unknown input type: {jobs_cfg['input_type']}") + if has_dataset: dataset_cfg = job_components[1] params.update( diff --git a/detectionmetrics/cli/computational_cost.py b/detectionmetrics/cli/computational_cost.py index 951f8a47..402b24c7 100644 --- a/detectionmetrics/cli/computational_cost.py +++ b/detectionmetrics/cli/computational_cost.py @@ -1,7 +1,6 @@ import click from detectionmetrics import cli -from detectionmetrics.utils.io import read_json @click.command(name="computational_cost", help="Estimate model computational cost") @@ -12,9 +11,7 @@ # model @click.option( "--model_format", - type=click.Choice( - ["torch", "tensorflow", "tensorflow_explicit"], case_sensitive=False - ), + type=click.Choice(["torch", "tensorflow"], case_sensitive=False), show_default=True, default="torch", help="Trained model format", @@ -39,14 +36,35 @@ ) @click.option( "--image_size", - type=(int, int), + nargs=2, + type=int, required=False, - help="Dummy image size used for computational cost estimation", + help="Dummy image size. Should be provided as two integers: width height", +) +@click.option( + "--point_cloud_range", + nargs=6, + type=int, + required=False, + help="Dummy point cloud range (meters). Should be provided as six integers: x_min y_min z_min x_max y_max z_max", +) +@click.option( + "--num_points", + type=int, + required=False, + help="Number of points for the dummy point cloud (uniformly sampled)", +) +@click.option( + "--has_intensity", + is_flag=True, + default=False, + help="Whether the dummy point cloud has intensity values", ) # output @click.option( "--out_fname", type=click.Path(writable=True), + required=True, help="CSV file where the computational cost estimation results will be stored", ) def computational_cost( @@ -57,23 +75,46 @@ def computational_cost( model_ontology, model_cfg, image_size, + point_cloud_range, + num_points, + has_intensity, out_fname, ): """Estimate model computational cost""" - - if image_size is None: - parsed_model_cfg = read_json(model_cfg) - if "image_size" in parsed_model_cfg: - image_size = parsed_model_cfg["image_size"] - else: + if input_type == "image": + if image_size is None: + raise ValueError("Image size must be provided for image models") + if point_cloud_range is not None or num_points is not None: + raise ValueError( + "Point cloud range and number of points cannot be provided for image models" + ) + if has_intensity: + raise ValueError("Intensity flag cannot be set for image models") + params = {"image_size": image_size} + elif input_type == "lidar": + if point_cloud_range is None or num_points is None: raise ValueError( - "Image size must be provided either as an argument or in the model configuration file" + "Point cloud range and number of points must be provided for lidar models" ) + if image_size is not None: + raise ValueError("Image size cannot be provided for lidar models") + + params = { + "point_cloud_range": point_cloud_range, + "num_points": num_points, + "has_intensity": has_intensity, + } + else: + raise ValueError(f"Unknown input type: {input_type}") model = cli.get_model( task, input_type, model_format, model, model_ontology, model_cfg ) - results = model.get_computational_cost(image_size) + results = model.get_computational_cost(**params) results.to_csv(out_fname) return results + + +if __name__ == "__main__": + computational_cost() diff --git a/detectionmetrics/cli/evaluate.py b/detectionmetrics/cli/evaluate.py index 4fd23ee0..bba5c0c9 100644 --- a/detectionmetrics/cli/evaluate.py +++ b/detectionmetrics/cli/evaluate.py @@ -25,7 +25,7 @@ def parse_split(ctx, param, value): @click.option( "--model_format", type=click.Choice( - ["torch", "tensorflow", "tensorflow_explicit"], case_sensitive=False + ["torch", "tensorflow"], case_sensitive=False ), show_default=True, default="torch", @@ -197,3 +197,7 @@ def evaluate( results.to_csv(out_fname) return results + + +if __name__ == "__main__": + evaluate() diff --git a/detectionmetrics/datasets/gaia.py b/detectionmetrics/datasets/gaia.py index a9ceaaa2..84722038 100644 --- a/detectionmetrics/datasets/gaia.py +++ b/detectionmetrics/datasets/gaia.py @@ -23,8 +23,15 @@ def build_dataset(dataset_fname: str) -> Tuple[pd.DataFrame, str, dict]: dataset_dir = os.path.dirname(dataset_fname) # Read ontology file - ontology_fname = dataset.attrs["ontology_fname"] - ontology = uio.read_json(os.path.join(dataset_dir, ontology_fname)) + try: + ontology_fname = dataset.attrs["ontology_fname"] + except KeyError: + ontology_fname = "ontology.json" + + ontology_fname = os.path.join(dataset_dir, ontology_fname) + assert os.path.isfile(ontology_fname), "Ontology file not found" + + ontology = uio.read_json(ontology_fname) for name, data in ontology.items(): ontology[name]["rgb"] = tuple(data["rgb"]) diff --git a/detectionmetrics/datasets/goose.py b/detectionmetrics/datasets/goose.py index b83f6d9b..34615d60 100644 --- a/detectionmetrics/datasets/goose.py +++ b/detectionmetrics/datasets/goose.py @@ -16,6 +16,7 @@ def build_dataset( train_dataset_dir: Optional[str] = None, val_dataset_dir: Optional[str] = None, test_dataset_dir: Optional[str] = None, + is_goose_ex: bool = False, ) -> Tuple[dict, dict]: """Build dataset and ontology dictionaries from GOOSE dataset structure @@ -31,6 +32,8 @@ def build_dataset( :type val_dataset_dir: str, optional :param test_dataset_dir: Directory containing test data, defaults to None :type test_dataset_dir: str, optional + :param is_goose_ex: Whether the dataset is GOOSE Ex or GOOSE, defaults to False + :type is_goose_ex: bool, optional :return: Dataset and onotology :rtype: Tuple[dict, dict] """ @@ -66,13 +69,23 @@ def build_dataset( train_data = os.path.join(dataset_dir, f"{data_type}/{split}/*/*_{data_suffix}") for data_fname in glob(train_data): sample_dir, sample_base_name = os.path.split(data_fname) - sample_base_name = sample_base_name.split("__")[-1] + + # GOOSE Ex uses a different label file naming convention + if is_goose_ex: + sample_base_name = "sequence" + sample_base_name.split("_sequence")[-1] + else: + sample_base_name = sample_base_name.split("__")[-1] + sample_base_name = sample_base_name.split("_" + data_suffix)[0] scene = os.path.split(sample_dir)[-1] sample_name = f"{scene}-{sample_base_name}" - label_base_name = f"{scene}__{sample_base_name}_{label_suffix}" + if is_goose_ex: + label_base_name = f"{scene}_{sample_base_name}_{label_suffix}" + else: + label_base_name = f"{scene}__{sample_base_name}_{label_suffix}" + label_fname = os.path.join( dataset_dir, "labels", split, scene, label_base_name ) @@ -131,9 +144,9 @@ def __init__( class GOOSELiDARSegmentationDataset(dm_segmentation_dataset.LiDARSegmentationDataset): """Specific class for GOOSE-styled LiDAR segmentation datasets. All data can be downloaded from the official webpage (https://goose-dataset.de): - train -> https://goose-dataset.de/storage/goose_3d_train.zip - val -> https://goose-dataset.de/storage/goose_3d_val.zip - test -> https://goose-dataset.de/storage/goose_3d_test.zip + train -> https://goose-dataset.de/storage/gooseEx_3d_train.zip + val -> https://goose-dataset.de/storage/gooseEx_3d_val.zip + test -> https://goose-dataset.de/storage/gooseEx_3d_test.zip :param train_dataset_dir: Directory containing training data :type train_dataset_dir: str @@ -141,6 +154,8 @@ class GOOSELiDARSegmentationDataset(dm_segmentation_dataset.LiDARSegmentationDat :type val_dataset_dir: str, optional :param test_dataset_dir: Directory containing test data, defaults to None :type test_dataset_dir: str, optional + :param is_goose_ex: Whether the dataset is GOOSE Ex or GOOSE, defaults to False + :type is_goose_ex: bool, optional """ def __init__( @@ -148,14 +163,16 @@ def __init__( train_dataset_dir: Optional[str] = None, val_dataset_dir: Optional[str] = None, test_dataset_dir: Optional[str] = None, + is_goose_ex: bool = False, ): dataset, ontology = build_dataset( "lidar", - "vls128.bin", + "pcl.bin" if is_goose_ex else "vls128.bin", "goose.label", train_dataset_dir, val_dataset_dir, test_dataset_dir, + is_goose_ex=is_goose_ex, ) # Convert to Pandas diff --git a/detectionmetrics/datasets/segmentation.py b/detectionmetrics/datasets/segmentation.py index 2c199aae..7a74ef53 100644 --- a/detectionmetrics/datasets/segmentation.py +++ b/detectionmetrics/datasets/segmentation.py @@ -12,6 +12,7 @@ from detectionmetrics.datasets.perception import PerceptionDataset import detectionmetrics.utils.io as uio import detectionmetrics.utils.conversion as uc +import detectionmetrics.utils.lidar as ul class SegmentationDataset(PerceptionDataset): @@ -68,7 +69,7 @@ def export( outdir: str, new_ontology: Optional[dict] = None, ontology_translation: Optional[dict] = None, - ignored_classes: Optional[List[str]] = None, + classes_to_remove: Optional[List[str]] = None, resize: Optional[Tuple[int, int]] = None, include_label_count: bool = True, ): @@ -80,8 +81,8 @@ def export( :type new_ontology: dict :param ontology_translation: Ontology translation dictionary, defaults to None :type ontology_translation: Optional[dict], optional - :param ignored_classes: Classes to ignore from the old ontology, defaults to [] - :type ignored_classes: Optional[List[str]], optional + :param classes_to_remove: Classes to remove from the old ontology, defaults to [] + :type classes_to_remove: Optional[List[str]], optional :param resize: Resize images and labels to the given dimensions, defaults to None :type resize: Optional[Tuple[int, int]], optional :param include_label_count: Whether to include class weights in the dataset, defaults to True @@ -104,7 +105,8 @@ def export( old_ontology=self.ontology, new_ontology=new_ontology, ontology_translation=ontology_translation, - ignored_classes=ignored_classes, + classes_to_remove=classes_to_remove, + lut_dtype=np.uint32, ) n_classes = max(c["idx"] for c in new_ontology.values()) + 1 else: @@ -166,7 +168,7 @@ def export( # Convert label to new ontology if needed if ontology_conversion_lut is not None: - label = ontology_conversion_lut[label] + label = ontology_conversion_lut[label].astype(np.uint8) # Resize label if needed if resize is not None: @@ -254,6 +256,8 @@ class LiDARSegmentationDataset(SegmentationDataset): :type ontology: dict :param is_kitti_format: Whether the linked files in the dataset are stored in SemanticKITTI format or not, defaults to True :type is_kitti_format: bool, optional + :param has_intensity: Whether the point cloud files contain intensity values, defaults to True + :type has_intensity: bool, optional """ def __init__( @@ -262,9 +266,11 @@ def __init__( dataset_dir: str, ontology: dict, is_kitti_format: bool = True, + has_intensity: bool = True, ): super().__init__(dataset, dataset_dir, ontology) self.is_kitti_format = is_kitti_format + self.has_intensity = has_intensity def make_fname_global(self): """Get all relative filenames in dataset and make global""" @@ -282,7 +288,9 @@ def export( outdir: str, new_ontology: Optional[dict] = None, ontology_translation: Optional[dict] = None, - ignored_classes: Optional[List[str]] = [], + classes_to_remove: Optional[List[str]] = [], + include_label_count: bool = True, + remove_origin: bool = False, ): """Export dataset dataframe and LiDAR files in SemanticKITTI format. Optionally, modify ontology before exporting. @@ -292,8 +300,12 @@ def export( :type new_ontology: dict :param ontology_translation: Ontology translation dictionary, defaults to None :type ontology_translation: Optional[dict], optional - :param ignored_classes: Classes to ignore from the old ontology, defaults to [] - :type ignored_classes: Optional[List[str]], optional + :param classes_to_remove: Classes to remove from the old ontology, defaults to [] + :type classes_to_remove: Optional[List[str]], optional + :param include_label_count: Whether to include class weights in the dataset, defaults to True + :type include_label_count: bool, optional + :param remove_origin: Whether to remove the origin from the point cloud (mostly for removing RELLIS-3D spurious points), defaults to False + :type remove_origin: bool, optional """ os.makedirs(outdir, exist_ok=True) @@ -302,14 +314,25 @@ def export( if ontology_translation is not None and new_ontology is None: raise ValueError("New ontology must be provided") + # Create ontology conversion lookup table if needed and get number of classes ontology_conversion_lut = None if new_ontology is not None: ontology_conversion_lut = uc.get_ontology_conversion_lut( old_ontology=self.ontology, new_ontology=new_ontology, ontology_translation=ontology_translation, - ignored_classes=ignored_classes, + classes_to_remove=classes_to_remove, ) + n_classes = max(c["idx"] for c in new_ontology.values()) + 1 + else: + n_classes = max(c["idx"] for c in self.ontology.values()) + 1 + + # Check if label count is missing and create empty array if needed + label_count_missing = include_label_count and ( + not self.has_label_count or new_ontology is not None or remove_origin + ) + if label_count_missing: + label_count = np.zeros(n_classes, dtype=np.uint64) pbar = tqdm(self.dataset.iterrows()) @@ -334,23 +357,51 @@ def export( label_fname = os.path.join(self.dataset_dir, label_fname) # If format is not appropriate: read, convert, and rewrite sample - if not self.is_kitti_format or ontology_conversion_lut is not None: + if ( + not self.is_kitti_format + or ontology_conversion_lut is not None + or label_count_missing + or remove_origin + ): points = self.read_points(points_fname) - label, _ = self.read_label(label_fname) + label = self.read_label(label_fname) + + # Convert label to new ontology if needed if ontology_conversion_lut is not None: - label = ontology_conversion_lut[label] + label = ontology_conversion_lut[label].astype(np.uint32) + + # Remove points in coordinate origin if needed + if remove_origin: + mask = np.all(points[:, :3] != 0, axis=1) + points = points[mask] + label = label[mask] + points.tofile(os.path.join(outdir, rel_points_fname)) label.tofile(os.path.join(outdir, rel_label_fname)) + + indices, counts = np.unique(label, return_counts=True) + label_count[indices] += counts.astype(np.uint64) else: - shutil.copy2(points_fname, os.path.join(outdir, rel_points_fname)) - shutil.copy2(label_fname, os.path.join(outdir, rel_label_fname)) + new_points_fname = os.path.join(outdir, rel_points_fname) + new_label_fname = os.path.join(outdir, rel_label_fname) + try: + shutil.copy2(points_fname, new_points_fname) + shutil.copy2(label_fname, new_label_fname) + except shutil.SameFileError: + pass # Source and destination are the same file self.dataset.at[sample_name, "points"] = rel_points_fname self.dataset.at[sample_name, "label"] = rel_label_fname + # Update dataset directory and ontology if needed self.dataset_dir = outdir + self.ontology = new_ontology if new_ontology is not None else self.ontology # Write ontology and store relative path in dataset attributes + if label_count_missing: + for class_data in self.ontology.values(): + class_data["label_count"] = int(label_count[class_data["idx"]]) + ontology_fname = "ontology.json" self.dataset.attrs = {"ontology_fname": ontology_fname} uio.write_json(os.path.join(outdir, ontology_fname), self.ontology) @@ -358,29 +409,23 @@ def export( # Store dataset as Parquet file containing relative filenames self.dataset.to_parquet(os.path.join(outdir, "dataset.parquet")) - @staticmethod - def read_points(fname: str) -> np.ndarray: - """Read points from a binary file in SemanticKITTI format + def read_points(self, fname: str) -> np.ndarray: + """Read point cloud. Defaults to SemanticKITTI format - :param fname: Binary file containing points + :param fname: File containing point cloud :type fname: str :return: Numpy array containing points :rtype: np.ndarray """ - points = np.fromfile(fname, dtype=np.float32) - return points.reshape((-1, 4)) + return ul.read_semantickitti_points(fname, self.has_intensity) - @staticmethod - def read_label(fname: str) -> Tuple[np.ndarray, np.ndarray]: - """Read labels from a binary file in SemanticKITTI format + def read_label(self, fname: str) -> Tuple[np.ndarray, np.ndarray]: + """Read semantic labels. Defaults to SemanticKITTI format :param fname: Binary file containing labels :type fname: str - :return: Numpy arrays containing semantic and instance labels - :rtype: Tuple[np.ndarray, np.ndarray] + :return: Numpy arrays containing semantic labels + :rtype: np.ndarray """ - label = np.fromfile(fname, dtype=np.uint32) - label = label.reshape((-1)) - semantic_label = label & 0xFFFF - instance_label = label >> 16 - return semantic_label.astype(np.int32), instance_label.astype(np.int32) + label, _ = ul.read_semantickitti_label(fname) + return label diff --git a/detectionmetrics/datasets/wildscenes.py b/detectionmetrics/datasets/wildscenes.py index a2dce7a3..a95a21c2 100644 --- a/detectionmetrics/datasets/wildscenes.py +++ b/detectionmetrics/datasets/wildscenes.py @@ -8,61 +8,17 @@ from detectionmetrics.datasets import segmentation as dm_segmentation_dataset -# Ontology definition as found in the official repo (https://github.com/csiro-robotics/WildScenes/blob/main/wildscenes/tools/utils2d.py) -METAINFO = { - "classes": ( - "unlabelled", - "asphalt", - "dirt", - "mud", - "water", - "gravel", - "other-terrain", - "tree-trunk", - "tree-foliage", - "bush", - "fence", - "structure", - "pole", - "vehicle", - "rock", - "log", - "other-object", - "sky", - "grass", - ), - "palette": [ - (0, 0, 0), - (255, 165, 0), - (60, 180, 75), - (255, 225, 25), - (0, 130, 200), - (145, 30, 180), - (70, 240, 240), - (240, 50, 230), - (210, 245, 60), - (230, 25, 75), - (0, 128, 128), - (170, 110, 40), - (255, 250, 200), - (128, 0, 0), - (170, 255, 195), - (128, 128, 0), - (250, 190, 190), - (0, 0, 128), - (128, 128, 128), - ], - "cidx": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], -} - - -def build_dataset(dataset_dir: str, split_fnames: dict) -> Tuple[dict, dict]: +def build_dataset( + dataset_dir: str, split_fnames: dict, ontology: dict +) -> Tuple[dict, dict]: """Build dataset and ontology dictionaries from Wildscenes dataset structure :param dataset_dir: Directory where both RGB images and annotations have been extracted to :type dataset_dir: str :param split_fnames: Dictionary that contains the paths where train, val, and test split files (.csv) have been extracted to :type split_dir: str + :param ontology: Ontology definition as found in the official repo + :type ontology: dict :return: Dataset and onotology :rtype: Tuple[dict, dict] """ @@ -75,10 +31,10 @@ def build_dataset(dataset_dir: str, split_fnames: dict) -> Tuple[dict, dict]: assert os.path.isfile(split_fname), f"{split_fname} split file not found" # Load and adapt ontology - ontology = {} - ontology_iter = zip(METAINFO["classes"], METAINFO["palette"], METAINFO["cidx"]) + parsed_ontology = {} + ontology_iter = zip(ontology["classes"], ontology["palette"], ontology["cidx"]) for name, color, idx in ontology_iter: - ontology[name] = {"idx": idx, "rgb": color} + parsed_ontology[name] = {"idx": idx, "rgb": color} # Get samples filenames train_split = pd.read_csv(split_fnames["train"]) @@ -92,6 +48,9 @@ def build_dataset(dataset_dir: str, split_fnames: dict) -> Tuple[dict, dict]: samples_data = pd.concat([train_split, val_split, test_split]) + if "hist_path" in samples_data.columns: + samples_data = samples_data.drop(columns=["hist_path"]) + # Build dataset as ordered python dictionary dataset = OrderedDict() skipped_samples = [] @@ -120,20 +79,20 @@ def build_dataset(dataset_dir: str, split_fnames: dict) -> Tuple[dict, dict]: for sample_name in skipped_samples: print(f"\n\t{sample_name}") - return dataset, ontology + return dataset, parsed_ontology class WildscenesImageSegmentationDataset( dm_segmentation_dataset.ImageSegmentationDataset ): """Specific class for Wildscenes-styled image segmentation datasets. All data can - be downloaded from the official repo (https://github.com/unmannedlab/RELLIS-3D): + be downloaded from the official repo: dataset -> https://data.csiro.au/collection/csiro:61541 split -> https://github.com/csiro-robotics/WildScenes/tree/main/data/splits/opt2d :param dataset_dir: Directory where dataset images and labels are stored (Wildscenes2D) :type dataset_dir: str - :param split_dir: Directory where train, val, and test files (.csv) have been extracted to (data/splits/opt2d from the official repo) + :param split_dir: Directory where train, val, and test files (.csv) have been extracted to :type split_dir: str """ @@ -143,7 +102,54 @@ def __init__(self, dataset_dir: str, split_dir: str): "val": os.path.join(split_dir, "val.csv"), "test": os.path.join(split_dir, "test.csv"), } - dataset, ontology = build_dataset(dataset_dir, split_fnames) + + # Ontology definition as found in the official repo (https://github.com/csiro-robotics/WildScenes/blob/main/wildscenes/tools/utils2d.py) + METAINFO = { + "classes": ( + "unlabelled", + "asphalt", + "dirt", + "mud", + "water", + "gravel", + "other-terrain", + "tree-trunk", + "tree-foliage", + "bush", + "fence", + "structure", + "pole", + "vehicle", + "rock", + "log", + "other-object", + "sky", + "grass", + ), + "palette": [ + (0, 0, 0), + (255, 165, 0), + (60, 180, 75), + (255, 225, 25), + (0, 130, 200), + (145, 30, 180), + (70, 240, 240), + (240, 50, 230), + (210, 245, 60), + (230, 25, 75), + (0, 128, 128), + (170, 110, 40), + (255, 250, 200), + (128, 0, 0), + (170, 255, 195), + (128, 128, 0), + (250, 190, 190), + (0, 0, 128), + (128, 128, 128), + ], + "cidx": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + } + dataset, ontology = build_dataset(dataset_dir, split_fnames, METAINFO) # Convert to Pandas cols = ["image", "label", "scene", "split"] @@ -151,3 +157,72 @@ def __init__(self, dataset_dir: str, split_dir: str): dataset.attrs = {"ontology": ontology} super().__init__(dataset, dataset_dir, ontology) + + +class WildscenesLiDARSegmentationDataset(dm_dataset.LiDARSegmentationDataset): + """Specific class for Wildscenes-styled LiDAR segmentation datasets. All data can + be downloaded from the official repo: + dataset -> https://data.csiro.au/collection/csiro:61541 + split -> https://github.com/csiro-robotics/WildScenes/tree/main/data/splits/opt3d + + :param dataset_dir: Directory where dataset images and labels are stored (Wildscenes3D) + :type dataset_dir: str + :param split_dir: Directory where train, val, and test files (.csv) have been extracted to + :type split_dir: str + """ + + def __init__(self, dataset_dir: str, split_dir: str): + split_fnames = { + "train": os.path.join(split_dir, "train.csv"), + "val": os.path.join(split_dir, "val.csv"), + "test": os.path.join(split_dir, "test.csv"), + } + + # Ontology definition as found in the official repo (https://github.com/csiro-robotics/WildScenes/blob/main/wildscenes/tools/utils3d.py) + METAINFO = { + "classes": ( + "unlabelled", + "bush", + "dirt", + "fence", + "grass", + "gravel", + "log", + "mud", + "other-object", + "other-terrain", + "rock", + "sky", + "structure", + "tree-foliage", + "tree-trunk", + "water", + ), + "palette": [ + (0, 0, 0), + (230, 25, 75), + (60, 180, 75), + (0, 128, 128), + (128, 128, 128), + (145, 30, 180), + (128, 128, 0), + (255, 225, 25), + (250, 190, 190), + (70, 240, 240), + (170, 255, 195), + (0, 0, 128), + (170, 110, 40), + (210, 245, 60), + (240, 50, 230), + (0, 130, 200), + ], + "cidx": [255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], + } + dataset, ontology = build_dataset(dataset_dir, split_fnames, METAINFO) + + # Convert to Pandas + cols = ["points", "label", "scene", "split"] + dataset = pd.DataFrame.from_dict(dataset, orient="index", columns=cols) + dataset.attrs = {"ontology": ontology} + + super().__init__(dataset, dataset_dir, ontology, has_intensity=False) diff --git a/detectionmetrics/models/perception.py b/detectionmetrics/models/perception.py index f78b1472..e6ece062 100644 --- a/detectionmetrics/models/perception.py +++ b/detectionmetrics/models/perception.py @@ -47,6 +47,7 @@ def __init__( self.ontology = uio.read_json(ontology_fname) self.model_cfg = uio.read_json(model_cfg) self.n_classes = len(self.ontology) + self.model_cfg["n_classes"] = self.n_classes @abstractmethod def inference( @@ -90,6 +91,6 @@ def get_lut_ontology( dataset_ontology, self.ontology, ontology_translation, - self.model_cfg.get("ignored_classes", []), + classes_to_remove=self.model_cfg.get("classes_to_remove", None), ) return lut_ontology diff --git a/detectionmetrics/models/segmentation.py b/detectionmetrics/models/segmentation.py index 66a4b141..af01932a 100644 --- a/detectionmetrics/models/segmentation.py +++ b/detectionmetrics/models/segmentation.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod import os -from typing import Any, List, Optional, Union +from typing import Any, List, Optional, Tuple, Union import numpy as np import pandas as pd @@ -39,24 +39,36 @@ def __init__( super().__init__(model, model_type, model_cfg, ontology_fname, model_fname) @abstractmethod - def inference( - self, points: Union[np.ndarray, Image.Image] + def predict( + self, data: Union[np.ndarray, Image.Image] ) -> Union[np.ndarray, Image.Image]: - """Perform inference for a single image or point cloud + """Perform prediction for a single data sample - :param image: Either a numpy array (LiDAR point cloud) or a PIL image - :type image: Union[np.ndarray, Image.Image] - :return: Segmenation result as a point cloud or image with label indices + :param data: Input data sample (image or point cloud) + :type data: Union[np.ndarray, Image.Image] + :return: Prediction result :rtype: Union[np.ndarray, Image.Image] """ raise NotImplementedError + @abstractmethod + def predict(self, tensor_in): + """Perform inference for a tensor + + :param tensor_in: Input tensor (image or point cloud) + :type tensor_in: Either tf.Tensor or torch.Tensor + :return: Segmenation result as a tensor + :rtype: Either tf.Tensor or torch.Tensor + """ + raise NotImplementedError + @abstractmethod def eval( self, dataset: dm_segentation_dataset.SegmentationDataset, split: str | List[str] = "test", ontology_translation: Optional[str] = None, + translations_direction: str = "dataset_to_model", predictions_outdir: Optional[str] = None, results_per_sample: bool = False, ) -> pd.DataFrame: @@ -65,9 +77,11 @@ def eval( :param dataset: Segmentation dataset for which the evaluation will be performed :type dataset: ImageSegmentationDataset :param split: Split or splits to be used from the dataset, defaults to "test" - :type split: str | List[str], optional + :type split: Union[str, List[str]], optional :param ontology_translation: JSON file containing translation between dataset and model output ontologies :type ontology_translation: str, optional + :param translations_direction: Direction of the ontology translation. Either "dataset_to_model" or "model_to_dataset", defaults to "dataset_to_model" + :type translations_direction: str, optional :param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved. :type predictions_outdir: Optional[str], optional :param results_per_sample: Whether to store results per sample or not, defaults to False. If True, predictions_outdir must be provided. @@ -104,13 +118,17 @@ def __init__( super().__init__(model, model_type, model_cfg, ontology_fname, model_fname) @abstractmethod - def inference(self, image: Image.Image) -> Image.Image: - """Perform inference for a single image + def predict( + self, image: Image.Image, return_sample: bool = False + ) -> Union[Image.Image, Tuple[Image.Image, Any]]: + """Perform prediction for a single image - :param image: PIL image. + :param image: PIL image :type image: Image.Image - :return: Segmenation result as PIL image - :rtype: Image.Image + :param return_sample: Whether to return the sample data along with predictions, defaults to False + :type return_sample: bool, optional + :return: Segmentation result as a PIL image or a tuple with the segmentation result and the input sample tensor + :rtype: Union[Image.Image, Tuple[Image.Image, Any]] """ raise NotImplementedError @@ -120,6 +138,7 @@ def eval( dataset: dm_segentation_dataset.ImageSegmentationDataset, split: str | List[str] = "test", ontology_translation: Optional[str] = None, + translations_direction: str = "dataset_to_model", predictions_outdir: Optional[str] = None, results_per_sample: bool = False, ) -> pd.DataFrame: @@ -128,9 +147,11 @@ def eval( :param dataset: Image segmentation dataset for which the evaluation will be performed :type dataset: ImageSegmentationDataset :param split: Split or splits to be used from the dataset, defaults to "test" - :type split: str | List[str], optional + :type split: Union[str, List[str]], optional :param ontology_translation: JSON file containing translation between dataset and model output ontologies :type ontology_translation: str, optional + :param translations_direction: Direction of the ontology translation. Either "dataset_to_model" or "model_to_dataset", defaults to "dataset_to_model" + :type translations_direction: str, optional :param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved. :type predictions_outdir: Optional[str], optional :param results_per_sample: Whether to store results per sample or not, defaults to False. If True, predictions_outdir must be provided. @@ -140,6 +161,25 @@ def eval( """ raise NotImplementedError + @abstractmethod + def get_computational_cost( + self, + image_size: Tuple[int] = None, + runs: int = 30, + warm_up_runs: int = 5, + ) -> dict: + """Get different metrics related to the computational cost of the model + + :param image_size: Image size used for inference + :type image_size: Tuple[int], optional + :param runs: Number of runs to measure inference time, defaults to 30 + :type runs: int, optional + :param warm_up_runs: Number of warm-up runs, defaults to 5 + :type warm_up_runs: int, optional + :return: Dictionary containing computational cost information + """ + raise NotImplementedError + class LiDARSegmentationModel(SegmentationModel): """Parent LiDAR segmentation model class @@ -167,13 +207,22 @@ def __init__( super().__init__(model, model_type, model_cfg, ontology_fname, model_fname) @abstractmethod - def inference(self, points: np.ndarray) -> np.ndarray: - """Perform inference for a single image + def predict( + self, + points_fname: str, + has_intensity: bool = True, + return_sample: bool = False, + ) -> Union[np.ndarray, Tuple[np.ndarray, Any]]: + """Perform prediction for a single point cloud - :param image: Point cloud xyz array - :type image: np.ndarray - :return: Segmenation result as a point cloud with label indices - :rtype: np.ndarray + :param points_fname: Point cloud in SemanticKITTI .bin format + :type points_fname: str + :param has_intensity: Whether the point cloud has intensity values, defaults to True + :type has_intensity: bool, optional + :param return_sample: Whether to return the sample data along with predictions, defaults to False + :type return_sample: bool, optional + :return: Segmentation result as a numpy array or a tuple with the segmentation result and the input sample data + :rtype: Union[np.ndarray, Tuple[np.ndarray, Any]] """ raise NotImplementedError @@ -183,6 +232,7 @@ def eval( dataset: dm_segentation_dataset.LiDARSegmentationDataset, split: str | List[str] = "test", ontology_translation: Optional[str] = None, + translations_direction: str = "dataset_to_model", predictions_outdir: Optional[str] = None, results_per_sample: bool = False, ) -> pd.DataFrame: @@ -191,9 +241,11 @@ def eval( :param dataset: LiDAR segmentation dataset for which the evaluation will be performed :type dataset: LiDARSegmentationDataset :param split: Split or splits to be used from the dataset, defaults to "test" - :type split: str | List[str], optional + :type split: Union[str, List[str]], optional :param ontology_translation: JSON file containing translation between dataset and model output ontologies :type ontology_translation: str, optional + :param translations_direction: Direction of the ontology translation. Either "dataset_to_model" or "model_to_dataset", defaults to "dataset_to_model" + :type translations_direction: str, optional :param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved. :type predictions_outdir: Optional[str], optional :param results_per_sample: Whether to store results per sample or not, defaults to False. If True, predictions_outdir must be provided. @@ -202,3 +254,15 @@ def eval( :rtype: pd.DataFrame """ raise NotImplementedError + + @abstractmethod + def get_computational_cost(self, runs: int = 30, warm_up_runs: int = 5) -> dict: + """Get different metrics related to the computational cost of the model + + :param runs: Number of runs to measure inference time, defaults to 30 + :type runs: int, optional + :param warm_up_runs: Number of warm-up runs, defaults to 5 + :type warm_up_runs: int, optional + :return: Dictionary containing computational cost information + """ + raise NotImplementedError diff --git a/detectionmetrics/models/tensorflow.py b/detectionmetrics/models/tensorflow.py index a1e25012..38b33130 100644 --- a/detectionmetrics/models/tensorflow.py +++ b/detectionmetrics/models/tensorflow.py @@ -13,74 +13,12 @@ from detectionmetrics.datasets.segmentation import ImageSegmentationDataset from detectionmetrics.models.segmentation import ImageSegmentationModel +import detectionmetrics.utils.conversion as uc +import detectionmetrics.utils.io as uio import detectionmetrics.utils.segmentation_metrics as um -tf.config.optimizer.set_experimental_options({"layout_optimizer": False}) - - -def get_computational_cost( - model: tf.Module, - dummy_input: tf.Tensor, - model_fname: Optional[str] = None, - runs: int = 30, - warm_up_runs: int = 5, -) -> dict: - """Get different metrics related to the computational cost of the model - - :param model: Loaded TensorFlow SavedModel - :type model: tf.Module - :param dummy_input: Dummy input data for the model - :type dummy_input: tf.Tensor - :param model_fname: Model filename used to measure model size, defaults to None - :type model_fname: Optional[str], optional - :param runs: Number of runs to measure inference time, defaults to 30 - :type runs: int, optional - :param warm_up_runs: Number of warm-up runs, defaults to 5 - :type warm_up_runs: int, optional - :return: DataFrame containing computational cost information - :rtype: pd.DataFrame - """ - # Get model size (if possible) and number of parameters - if model_fname is not None: - size_mb = sum( - os.path.getsize(os.path.join(dirpath, f)) - for dirpath, _, files in os.walk(model_fname) - for f in files - ) - size_mb /= 1024**2 - else: - size_mb = None - - n_params = sum(np.prod(var.shape) for var in model.variables.variables) - - # Measure inference time with GPU synchronization - infer = model.signatures["serving_default"] - for _ in range(warm_up_runs): - _ = infer(dummy_input) - - has_gpu = bool(tf.config.list_physical_devices("GPU")) - inference_times = [] - - for _ in range(runs): - if has_gpu: - tf.config.experimental.set_synchronous_execution(True) - - start_time = time.time() - _ = infer(dummy_input) - if has_gpu: - tf.config.experimental.set_synchronous_execution(True) - - inference_times.append(time.time() - start_time) - - # Retrieve computational cost information - result = { - "input_shape": ["x".join(map(str, dummy_input.shape.as_list()))], - "n_params": [int(n_params)], - "size_mb": [size_mb], - "inference_time_s": [np.mean(inference_times)], - } - return pd.DataFrame.from_dict(result) +tf.config.optimizer.set_experimental_options({"layout_optimizer": False}) def resize_image( @@ -361,33 +299,53 @@ def t_in(image): tf.argmax(tf.squeeze(x), axis=2).numpy().astype(np.uint8) ) - def inference(self, image: Image.Image) -> Image.Image: - """Perform inference for a single image + def predict( + self, image: Image.Image, return_sample: bool = False + ) -> Union[Image.Image, Tuple[Image.Image, tf.Tensor]]: + """Perform prediction for a single image :param image: PIL image :type image: Image.Image - :return: segmenation result as PIL image - :rtype: Image.Image + :param return_sample: Whether to return the sample data along with predictions, defaults to False + :type return_sample: bool, optional + :return: Segmentation result as a PIL image or a tuple with the segmentation result and the input sample tensor + :rtype: Union[Image.Image, Tuple[Image.Image, tf.Tensor]] """ - tensor = self.t_in(image) + sample = self.t_in(image) + result = self.inference(sample) + result = self.t_out(result) + + if return_sample: + return result, sample + else: + return result + def inference(self, tensor_in: tf.Tensor) -> tf.Tensor: + """Perform inference for a tensor + + :param tensor_in: Input point cloud tensor + :type tensor_in: tf.Tensor + :return: Segmentation result as tensor + :rtype: tf.Tensor + """ if self.model_type == "native": - result = self.model(tensor) + tensor_out = self.model(tensor_in, training=False) elif self.model_type == "compiled": - result = self.model.signatures["serving_default"](tensor) + tensor_out = self.model.signatures["serving_default"](tensor_in) else: raise ValueError("Model type not recognized") - if isinstance(result, dict): - result = list(result.values())[0] + if isinstance(tensor_out, dict): + tensor_out = list(tensor_out.values())[0] - return self.t_out(result) + return tensor_out def eval( self, dataset: ImageSegmentationDataset, - split: str | List[str] = "test", + split: Union[str, List[str]] = "test", ontology_translation: Optional[str] = None, + translations_direction: str = "dataset_to_model", predictions_outdir: Optional[str] = None, results_per_sample: bool = False, ) -> pd.DataFrame: @@ -396,9 +354,11 @@ def eval( :param dataset: Image segmentation dataset for which the evaluation will be performed :type dataset: ImageSegmentationDataset :param split: Split to be used from the dataset, defaults to "test" - :type split: str | List[str], optional + :type split: Union[str, List[str]], optional :param ontology_translation: JSON file containing translation between dataset and model output ontologies :type ontology_translation: str, optional + :param translations_direction: Direction of the ontology translation. Either "dataset_to_model" or "model_to_dataset", defaults to "dataset_to_model" + :type translations_direction: str, optional :param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved. :type predictions_outdir: Optional[str], optional :param results_per_sample: Whether to store results per sample or not, defaults to False. If True, predictions_outdir must be provided. @@ -417,8 +377,23 @@ def eval( os.makedirs(predictions_outdir, exist_ok=True) # Build a LUT for transforming ontology if needed - lut_ontology = self.get_lut_ontology(dataset.ontology, ontology_translation) - dataset_ontology = dataset.ontology + eval_ontology = self.ontology + + if ontology_translation is not None: + ontology_translation = uio.read_json(ontology_translation) + if translations_direction == "dataset_to_model": + lut_ontology = uc.get_ontology_conversion_lut( + dataset.ontology, self.ontology, ontology_translation + ) + else: + eval_ontology = dataset.ontology + lut_ontology = uc.get_ontology_conversion_lut( + self.ontology, dataset.ontology, ontology_translation + ) + else: + lut_ontology = None + + n_classes = len(eval_ontology) # Get Tensorflow dataset dataset = ImageSegmentationTensorflowDataset( @@ -427,7 +402,9 @@ def eval( crop=self.model_cfg.get("crop", None), batch_size=self.model_cfg.get("batch_size", 1), splits=[split] if isinstance(split, str) else split, - lut_ontology=lut_ontology, + lut_ontology=( + lut_ontology if translations_direction == "dataset_to_model" else None + ), normalization=self.model_cfg.get("normalization", None), keep_aspect=self.model_cfg.get("keep_aspect", False), ) @@ -435,25 +412,17 @@ def eval( # Retrieve ignored label indices ignored_label_indices = [] for ignored_class in self.model_cfg.get("ignored_classes", []): - ignored_label_indices.append(dataset_ontology[ignored_class]["idx"]) + ignored_label_indices.append(eval_ontology[ignored_class]["idx"]) # Init metrics - metrics_factory = um.SegmentationMetricsFactory(self.n_classes) + metrics_factory = um.SegmentationMetricsFactory(n_classes) # Evaluation loop pbar = tqdm(dataset.dataset) for idx, image, label in pbar: idx = idx.numpy() - if self.model_type == "native": - pred = self.model(image, training=False) - elif self.model_type == "compiled": - pred = self.model.signatures["serving_default"](image) - else: - raise ValueError("Model type not recognized") - - if isinstance(pred, dict): - pred = list(pred.values())[0] + pred = self.inference(image) # Get valid points masks depending on ignored label indices if ignored_label_indices: @@ -469,6 +438,13 @@ def eval( if valid_mask is not None: valid_mask = tf.squeeze(valid_mask, axis=3).numpy() + # Convert predictions to dataset ontology if needed + if ( + lut_ontology is not None + and translations_direction == "model_to_dataset" + ): + pred = lut_ontology[pred] + metrics_factory.update(pred, label, valid_mask) # Store predictions and results per sample if required @@ -481,16 +457,16 @@ def eval( sample_valid_mask = ( valid_mask[i] if valid_mask is not None else None ) - sample_mf = um.SegmentationMetricsFactory(n_classes=self.n_classes) + sample_mf = um.SegmentationMetricsFactory(n_classes) sample_mf.update(sample_pred, sample_label, sample_valid_mask) - sample_df = um.get_metrics_dataframe(sample_mf, self.ontology) + sample_df = um.get_metrics_dataframe(sample_mf, eval_ontology) sample_df.to_csv( os.path.join(predictions_outdir, f"{sample_idx}.csv") ) pred = Image.fromarray(np.squeeze(pred).astype(np.uint8)) pred.save(os.path.join(predictions_outdir, f"{sample_idx}.png")) - return um.get_metrics_dataframe(metrics_factory, self.ontology) + return um.get_metrics_dataframe(metrics_factory, eval_ontology) def get_computational_cost( self, @@ -508,7 +484,46 @@ def get_computational_cost( :type warm_up_runs: int, optional :return: Dictionary containing computational cost information """ + # Generate dummy input dummy_input = tf.random.normal([1, *image_size, 3]) - return get_computational_cost( - self.model, dummy_input, self.model_fname, runs, warm_up_runs - ) + + # Get model size (if possible) and number of parameters + if self.model_fname is not None: + size_mb = sum( + os.path.getsize(os.path.join(dirpath, f)) + for dirpath, _, files in os.walk(self.model_fname) + for f in files + ) + size_mb /= 1024**2 + else: + size_mb = None + + n_params = sum(np.prod(var.shape) for var in self.model.variables.variables) + + # Measure inference time with GPU synchronization + for _ in range(warm_up_runs): + self.inference(dummy_input) + + has_gpu = bool(tf.config.list_physical_devices("GPU")) + inference_times = [] + + for _ in range(runs): + if has_gpu: + tf.config.experimental.set_synchronous_execution(True) + + start_time = time.time() + self.inference(dummy_input) + + if has_gpu: + tf.config.experimental.set_synchronous_execution(True) + + inference_times.append(time.time() - start_time) + + # Retrieve computational cost information + result = { + "input_shape": ["x".join(map(str, dummy_input.shape.as_list()))], + "n_params": [int(n_params)], + "size_mb": [size_mb], + "inference_time_s": [np.mean(inference_times)], + } + return pd.DataFrame.from_dict(result) diff --git a/detectionmetrics/models/torch_model_utils/__init__.py b/detectionmetrics/models/torch_model_utils/__init__.py deleted file mode 100644 index 48f449a4..00000000 --- a/detectionmetrics/models/torch_model_utils/__init__.py +++ /dev/null @@ -1,43 +0,0 @@ -from typing import Optional, Tuple - -import numpy as np - -try: - from open3d._ml3d.datasets.utils import DataProcessing -except Exception: - print("Open3D-ML3D not available") -from sklearn.neighbors import KDTree - -from detectionmetrics.models.torch_model_utils import o3d_randlanet, o3d_kpconv - - -# Default functions -def preprocess( - points: np.ndarray, cfg: Optional[dict] = {} -) -> Tuple[np.ndarray, KDTree, np.ndarray]: - """Preprocess point cloud data - - :param points: Point cloud data - :type points: np.ndarray - :param cfg: Dictionary containing model configuration, defaults to {} - :type cfg: Optional[dict], optional - :return: Subsampled points, search tree, and projected indices - :rtype: Tuple[np.ndarray, KDTree, np.ndarray] - """ - # Keep only XYZ coordinates - points = np.array(points[:, 0:3], dtype=np.float32) - - # Subsample points using a grid of given size - grid_size = cfg.get("grid_size", 0.06) - sub_points = DataProcessing.grid_subsampling(points, grid_size=grid_size) - - # Create search tree so that we can project points back to the original point cloud - search_tree = KDTree(sub_points) - projected_indices = np.squeeze(search_tree.query(points, return_distance=False)) - projected_indices = projected_indices.astype(np.int32) - - return sub_points, search_tree, projected_indices - - -transform_input = o3d_randlanet.transform_input -update_probs = o3d_randlanet.update_probs diff --git a/detectionmetrics/models/torch_segmentation.py b/detectionmetrics/models/torch_segmentation.py index bf628a5e..7a3cb2ce 100644 --- a/detectionmetrics/models/torch_segmentation.py +++ b/detectionmetrics/models/torch_segmentation.py @@ -1,6 +1,7 @@ -from collections import defaultdict +import importlib import os import time +import tempfile from typing import Any, List, Optional, Tuple, Union import numpy as np @@ -8,77 +9,35 @@ from PIL import Image import torch from torch.utils.data import DataLoader, Dataset -from torchvision.transforms import v2 as transforms -from torchvision.transforms.v2 import functional as F + +try: + from torchvision.transforms import v2 as transforms + from torchvision.transforms.v2 import functional as F +except ImportError: + from torchvision.transforms import transforms + from torchvision.transforms import functional as F from tqdm import tqdm from detectionmetrics.datasets import segmentation as dm_segmentation_dataset from detectionmetrics.models import segmentation as dm_segmentation_model -from detectionmetrics.models import torch_model_utils as tmu -import detectionmetrics.utils.lidar as ul +import detectionmetrics.utils.conversion as uc +import detectionmetrics.utils.io as uio import detectionmetrics.utils.segmentation_metrics as um +import detectionmetrics.utils.torch as ut -def data_to_device( - data: Union[tuple, list], device: torch.device -) -> Union[tuple, list]: - """Move provided data to given device (CPU or GPU) - - :param data: Data provided (it can be a single or multiple tensors) - :type data: Union[tuple, list] - :param device: Device to move data to - :type device: torch.device - :return: Data moved to device - :rtype: Union[tuple, list] - """ - if isinstance(data, (tuple, list)): - return type(data)( - d.to(device) if torch.is_tensor(d) else data_to_device(d, device) - for d in data - ) - elif torch.is_tensor(data): - return data.to(device) - else: - return data - - -def get_data_shape(data: Union[tuple, list]) -> Union[tuple, list]: - """Get the shape of the provided data - - :param data: Data provided (it can be a single or multiple tensors) - :type data: Union[tuple, list] - :return: Data shape - :rtype: Union[tuple, list] - """ - if isinstance(data, (tuple, list)): - return type(data)( - tuple(d.shape) if torch.is_tensor(d) else get_data_shape(d) for d in data - ) - elif torch.is_tensor(data): - return tuple(data.shape) - else: - return tuple(data.shape) +AVAILABLE_MODEL_FORMATS_LIDAR = ["o3d_randlanet", "o3d_kpconv", "mmdet3d"] -def unsqueeze_data(data: Union[tuple, list], dim: int = 0) -> Union[tuple, list]: - """Unsqueeze provided data along given dimension +def raise_unknown_model_format_lidar(model_format: str) -> None: + """Raise an exception if the LiDAR model format is unknown - :param data: Data provided (it can be a single or multiple tensors) - :type data: Union[tuple, list] - :param dim: Dimension that will be unsqueezed, defaults to 0 - :type dim: int, optional - :return: Unsqueezed data - :rtype: Union[tuple, list] + :param input_format: Model format string + :type input_format: str """ - if isinstance(data, (tuple, list)): - return type(data)( - d.unsqueeze(dim) if torch.is_tensor(d) else unsqueeze_data(d, dim) - for d in data - ) - elif torch.is_tensor(data): - return data.unsqueeze(dim) - else: - return data + msg = f"Unknown model format: {model_format}." + msg += f"Available formats: {AVAILABLE_MODEL_FORMATS_LIDAR}" + raise Exception(msg) def get_computational_cost( @@ -103,63 +62,6 @@ def get_computational_cost( :return: DataFrame containing computational cost information :rtype: pd.DataFrame """ - # Get model size if possible - if model_fname is not None: - size_mb = os.path.getsize(model_fname) / 1024**2 - else: - size_mb = None - - # Measure inference time with GPU synchronization - dummy_tuple = dummy_input if isinstance(dummy_input, tuple) else (dummy_input,) - - for _ in range(warm_up_runs): - if hasattr(model, "inference"): # e.g. mmsegmentation models - model.inference( - *dummy_tuple, - [ - dict( - ori_shape=dummy_tuple[0].shape[2:], - img_shape=dummy_tuple[0].shape[2:], - pad_shape=dummy_tuple[0].shape[2:], - padding_size=[0, 0, 0, 0], - ) - ] - * dummy_tuple[0].shape[0], - ) - else: - model(*dummy_tuple) - - inference_times = [] - for _ in range(runs): - torch.cuda.synchronize() - start_time = time.time() - if hasattr(model, "inference"): # e.g. mmsegmentation models - model.inference( - *dummy_tuple, - [ - dict( - ori_shape=dummy_tuple[0].shape[2:], - img_shape=dummy_tuple[0].shape[2:], - pad_shape=dummy_tuple[0].shape[2:], - padding_size=[0, 0, 0, 0], - ) - ] - * dummy_tuple[0].shape[0], - ) - else: - model(*dummy_tuple) - torch.cuda.synchronize() - end_time = time.time() - inference_times.append(end_time - start_time) - - result = { - "input_shape": ["x".join(map(str, get_data_shape(dummy_input)))], - "n_params": [sum(p.numel() for p in model.parameters())], - "size_mb": [size_mb], - "inference_time_s": [np.mean(inference_times)], - } - - return pd.DataFrame.from_dict(result) class CustomResize(torch.nn.Module): @@ -256,16 +158,14 @@ def __getitem__( class LiDARSegmentationTorchDataset(Dataset): - """Dataset for LiDAR segmentation PyTorch models + """Dataset for LiDAR segmentation PyTorch - Open3D-ML models :param dataset: LiDAR segmentation dataset :type dataset: LiDARSegmentationDataset :param model_cfg: Dictionary containing model configuration :type model_cfg: dict - :param preprocess: Function for preprocessing point clouds - :type preprocess: callable - :param n_classes: Number of classes estimated by the model - :type n_classes: int + :param get_sample: Function for loading sample data + :type get_sample: callable :param splits: Splits to be used from the dataset, defaults to ["test"] :type splits: str, optional """ @@ -274,59 +174,33 @@ def __init__( self, dataset: dm_segmentation_dataset.LiDARSegmentationDataset, model_cfg: dict, - preprocess: callable, - n_classes: int, + get_sample: callable, splits: str = ["test"], ): # Filter split and make filenames global dataset.dataset = dataset.dataset[dataset.dataset["split"].isin(splits)] self.dataset = dataset self.dataset.make_fname_global() - self.model_cfg = model_cfg - self.preprocess = preprocess - self.n_classes = n_classes + self.get_sample = get_sample def __len__(self): return len(self.dataset.dataset) - def __getitem__( - self, idx: int - ) -> Tuple[Union[np.ndarray, torch.Tensor], Union[np.ndarray, torch.Tensor]]: - """Prepare sample data: point cloud and label + def __getitem__(self, idx: int): + """Prepare sample data :param idx: Sample index :type idx: int - :return: Point cloud and corresponding label tensor or numpy arrays - :rtype: Tuple[np.ndarray, np.ndarray,] + :return: Sample data required by the model """ - # Read the point cloud and its labels - points = self.dataset.read_points(self.dataset.dataset.iloc[idx]["points"]) - semantic_label, instance_label = self.dataset.read_label( - self.dataset.dataset.iloc[idx]["label"] - ) - - # Preprocess point cloud - preprocessed_points, search_tree, projected_indices = self.preprocess( - points, self.model_cfg - ) - - # Init sampler - sampler = None - if "sampler" in self.model_cfg: - sampler = ul.Sampler( - preprocessed_points.shape[0], - search_tree, - self.model_cfg["sampler"], - self.n_classes, - ) - - return ( - self.dataset.dataset.index[idx], - preprocessed_points, - projected_indices, - (semantic_label, instance_label), - sampler, + return self.get_sample( + points_fname=self.dataset.dataset.iloc[idx]["points"], + model_cfg=self.model_cfg, + label_fname=self.dataset.dataset.iloc[idx]["label"], + name=self.dataset.dataset.index[idx], + idx=idx, + has_intensity=self.dataset.has_intensity, ) @@ -440,38 +314,57 @@ def __init__( ] ) - def inference(self, image: Image.Image) -> Image.Image: - """Perform inference for a single image + def predict( + self, image: Image.Image, return_sample: bool = False + ) -> Union[Image.Image, Tuple[Image.Image, torch.Tensor]]: + """Perform prediction for a single image :param image: PIL image :type image: Image.Image - :return: segmenation result as PIL image - :rtype: Image.Image + :param return_sample: Whether to return the sample data along with predictions, defaults to False + :type return_sample: bool, optional + :return: Segmentation result as a PIL image or a tuple with the segmentation result and the input sample tensor + :rtype: Union[Image.Image, Tuple[Image.Image, torch.Tensor]] """ - tensor = self.transform_input(image).unsqueeze(0).to(self.device) + sample = self.transform_input(image).unsqueeze(0).to(self.device) + result = self.inference(sample) + result = self.transform_output(result) + + if return_sample: + return result, sample + else: + return result + def inference(self, tensor_in: torch.Tensor) -> torch.Tensor: + """Perform inference for a tensor + + :param tensor_in: Input point cloud tensor + :type tensor_in: torch.Tensor + :return: Segmentation result as tensor + :rtype: torch.Tensor + """ with torch.no_grad(): # Perform inference if hasattr(self.model, "inference"): # e.g. mmsegmentation models - result = self.model.inference( - tensor.to(self.device), + tensor_out = self.model.inference( + tensor_in.to(self.device), [ dict( - ori_shape=tensor.shape[2:], - img_shape=tensor.shape[2:], - pad_shape=tensor.shape[2:], + ori_shape=tensor_in.shape[2:], + img_shape=tensor_in.shape[2:], + pad_shape=tensor_in.shape[2:], padding_size=[0, 0, 0, 0], ) ] - * tensor.shape[0], + * tensor_in.shape[0], ) else: - result = self.model(tensor.to(self.device)) + tensor_out = self.model(tensor_in.to(self.device)) - if isinstance(result, dict): - result = result["out"] + if isinstance(tensor_out, dict): + tensor_out = tensor_out["out"] - return self.transform_output(result) + return tensor_out def eval( self, @@ -486,7 +379,7 @@ def eval( :param dataset: Image segmentation dataset for which the evaluation will be performed :type dataset: ImageSegmentationDataset :param split: Split or splits to be used from the dataset, defaults to "test" - :type split: str | List[str], optional + :type split: Union[str, List[str]], optional :param ontology_translation: JSON file containing translation between dataset and model output ontologies :type ontology_translation: str, optional :param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved. @@ -507,7 +400,9 @@ def eval( os.makedirs(predictions_outdir, exist_ok=True) # Build a LUT for transforming ontology if needed - lut_ontology = self.get_lut_ontology(dataset.ontology, ontology_translation) + lut_ontology = uc.get_ontology_conversion_lut( + self.ontology, dataset.ontology, ontology_translation + ) lut_ontology = torch.tensor(lut_ontology, dtype=torch.int64).to(self.device) # Retrieve ignored label indices @@ -537,24 +432,7 @@ def eval( pbar = tqdm(dataloader, leave=True) for idx, image, label in pbar: # Perform inference - if hasattr(self.model, "inference"): # e.g. mmsegmentation models - pred = self.model.inference( - image.to(self.device), - [ - dict( - ori_shape=image.shape[2:], - img_shape=image.shape[2:], - pad_shape=image.shape[2:], - padding_size=[0, 0, 0, 0], - ) - ] - * image.shape[0], - ) - else: - pred = self.model(image.to(self.device)) - - if isinstance(pred, dict): - pred = pred["out"] + pred = self.inference(image) # Get valid points masks depending on ignored label indices if ignored_label_indices: @@ -617,10 +495,38 @@ def get_computational_cost( :type warm_up_runs: int, optional :return: Dictionary containing computational cost information """ + # Create dummy input dummy_input = torch.randn(1, 3, *image_size).to(self.device) - return get_computational_cost( - self.model, dummy_input, self.model_fname, runs, warm_up_runs - ) + + # Get model size if possible + if self.model_fname is not None: + size_mb = os.path.getsize(self.model_fname) / 1024**2 + else: + size_mb = None + + # Measure inference time with GPU synchronization + dummy_tuple = dummy_input if isinstance(dummy_input, tuple) else (dummy_input,) + + for _ in range(warm_up_runs): + self.inference(dummy_tuple[0]) + + inference_times = [] + for _ in range(runs): + torch.cuda.synchronize() + start_time = time.time() + self.inference(dummy_tuple[0]) + torch.cuda.synchronize() + end_time = time.time() + inference_times.append(end_time - start_time) + + result = { + "input_shape": ["x".join(map(str, ut.get_data_shape(dummy_input)))], + "n_params": [sum(p.numel() for p in self.model.parameters())], + "size_mb": [size_mb], + "inference_time_s": [np.mean(inference_times)], + } + + return pd.DataFrame.from_dict(result) class TorchLiDARSegmentationModel(dm_segmentation_model.LiDARSegmentationModel): @@ -655,6 +561,7 @@ def __init__( print("Model is not a TorchScript model. Loading as a PyTorch module.") model = torch.load(model, map_location=self.device) model_type = "native" + # Otherwise, check that it is a PyTorch module elif isinstance(model, torch.nn.Module): model_fname = None @@ -666,98 +573,63 @@ def __init__( super().__init__(model, model_type, model_cfg, ontology_fname, model_fname) self.model = self.model.to(self.device).eval() - # Init model specific functions - if self.model_cfg["input_format"] == "o3d_randlanet": # Open3D RandLaNet - self.preprocess = tmu.preprocess - self.transform_input = tmu.o3d_randlanet.transform_input - self.update_probs = tmu.o3d_randlanet.update_probs - self.model_cfg["num_layers"] = sum(1 for _ in self.model.decoder.children()) - if self.model_cfg["input_format"] == "o3d_kpconv": # Open3D KPConv - self.preprocess = tmu.preprocess - self.transform_input = tmu.o3d_kpconv.transform_input - self.update_probs = tmu.o3d_kpconv.update_probs - else: - self.preprocess = tmu.preprocess - self.transform_input = tmu.transform_input - self.update_probs = tmu.update_probs + # Init specific attributes and update model configuration + self.model_format = self.model_cfg["model_format"] - # Transformation for output labels - self.transform_output = ( - lambda x: torch.argmax(x.squeeze(), axis=-1).squeeze().to(torch.uint8) + # Init model specific functions + model_format = self.model_format.split("_")[0] + model_utils_module_str = ( + f"detectionmetrics.models.lidar_torch_utils.{model_format}" ) + try: + model_utils_module = importlib.import_module(model_utils_module_str) + except ImportError: + raise_unknown_model_format_lidar(model_format) + self._get_sample = model_utils_module.get_sample + self.inference = model_utils_module.inference + if hasattr(model_utils_module, "reset_sampler"): + self._reset_sampler = model_utils_module.reset_sampler + else: + self._reset_sampler = None - def inference(self, points: np.ndarray) -> np.ndarray: - """Perform inference for a single point cloud - - :param points: Point cloud xyz array - :type points: np.ndarray - :return: Segmenation result as a point cloud with label indices - :rtype: np.ndarray + def predict( + self, + points_fname: str, + has_intensity: bool = True, + return_sample: bool = False, + ignore_index: Optional[List[int]] = None, + ) -> Union[np.ndarray, Tuple[np.ndarray, Any]]: + """Perform prediction for a single point cloud + + :param points_fname: Point cloud in SemanticKITTI .bin format + :type points_fname: str + :param has_intensity: Whether the point cloud has intensity values, defaults to True + :type has_intensity: bool, optional + :param return_sample: Whether to return the sample data along with predictions, defaults to False + :type return_sample: bool, optional + :param ignore_index: List of class indices to ignore during prediction, defaults to None + :type ignore_index: Optional[List[int]], optional + :return: Segmentation result as a numpy array or a tuple with the segmentation result and the input sample data + :rtype: Union[np.ndarray, Tuple[np.ndarray, Any]] """ # Preprocess point cloud - points, search_tree, projected_indices = self.preprocess(points, self.model_cfg) - - # Init sampler if needed - sampler = None - if "sampler" in self.model_cfg: - end_th = self.model_cfg.get("end_th", 0.5) - sampler = ul.Sampler( - points.shape[0], - search_tree, - self.model_cfg["sampler"], - self.n_classes, - ) - - # Iterate over the sampled point cloud until all points reach the end threshold. - # If no sampler is provided, the inference is performed in a single step. - infer_complete = False - while not infer_complete: - # Get model input data - input_data, selected_indices = self.transform_input( - points, self.model_cfg, sampler - ) - input_data = data_to_device(input_data, self.device) - if self.model_cfg["input_format"] != "o3d_kpconv": - input_data = unsqueeze_data(input_data) - - # Perform inference - with torch.no_grad(): - result = self.model(*input_data) - - # TODO: check if this is consistent across different models - if isinstance(result, dict): - result = result["out"] - - # Update probabilities if sampler is used - if sampler is not None: - if self.model_cfg["input_format"] == "o3d_kpconv": - sampler.test_probs = self.update_probs( - result, - selected_indices, - sampler.test_probs, - lengths=input_data[-1], - ) - else: - sampler.test_probs = self.update_probs( - result, - selected_indices, - sampler.test_probs, - self.n_classes, - ) - if sampler.p[sampler.p > end_th].shape[0] == sampler.p.shape[0]: - result = sampler.test_probs[projected_indices] - infer_complete = True - else: - result = result.squeeze().cpu()[projected_indices].cuda() - infer_complete = True + sample = self._get_sample( + points_fname, self.model_cfg, has_intensity=has_intensity + ) + result, _, _ = self.inference(sample, self.model, self.model_cfg, ignore_index) + result = result.squeeze().cpu().numpy() - return self.transform_output(result).cpu().numpy() + if return_sample: + return result, sample + else: + return result def eval( self, dataset: dm_segmentation_dataset.LiDARSegmentationDataset, split: str | List[str] = "test", ontology_translation: Optional[str] = None, + translation_direction: str = "dataset_to_model", predictions_outdir: Optional[str] = None, results_per_sample: bool = False, ) -> pd.DataFrame: @@ -766,9 +638,11 @@ def eval( :param dataset: LiDAR segmentation dataset for which the evaluation will be performed :type dataset: LiDARSegmentationDataset :param split: Split or splits to be used from the dataset, defaults to "test" - :type split: str | List[str], optional + :type split: Union[str, List[str]], optional :param ontology_translation: JSON file containing translation between dataset and model output ontologies - :type ontology_translation: str, optional + :type ontology_translation: Optional[str], optional + :param translation_direction: Direction of the ontology translation, either 'dataset_to_model' or 'model_to_dataset', defaults to "dataset_to_model" + :type translation_direction: str, optional :param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved. :type predictions_outdir: Optional[str], optional :param results_per_sample: Whether to store results per sample or not, defaults to False. If True, predictions_outdir must be provided. @@ -787,75 +661,50 @@ def eval( os.makedirs(predictions_outdir, exist_ok=True) # Build a LUT for transforming ontology if needed - lut_ontology = self.get_lut_ontology(dataset.ontology, ontology_translation) - lut_ontology = torch.tensor(lut_ontology, dtype=torch.int64).to(self.device) + eval_ontology = self.ontology + + if ontology_translation is not None: + ontology_translation = uio.read_json(ontology_translation) + if translation_direction == "dataset_to_model": + lut_ontology = uc.get_ontology_conversion_lut( + dataset.ontology, self.ontology, ontology_translation + ) + else: + eval_ontology = dataset.ontology + lut_ontology = uc.get_ontology_conversion_lut( + self.ontology, dataset.ontology, ontology_translation + ) + + lut_ontology = torch.tensor(lut_ontology, dtype=torch.int64).to(self.device) + else: + lut_ontology = None + + n_classes = len(eval_ontology) # Retrieve ignored label indices ignored_label_indices = [] for ignored_class in self.model_cfg.get("ignored_classes", []): ignored_label_indices.append(dataset.ontology[ignored_class]["idx"]) - # Get PyTorch dataset (no dataloader to avoid complexity with batching samplers) + # Get PyTorch dataloader dataset = LiDARSegmentationTorchDataset( dataset, - model_cfg=self.model_cfg, - preprocess=self.preprocess, - n_classes=self.n_classes, + self.model_cfg, + self._get_sample, splits=[split] if isinstance(split, str) else split, ) # Init metrics - metrics_factory = um.SegmentationMetricsFactory(self.n_classes) + metrics_factory = um.SegmentationMetricsFactory(n_classes) # Evaluation loop - end_th = self.model_cfg.get("end_th", 0.5) with torch.no_grad(): pbar = tqdm(dataset, total=len(dataset), leave=True) - for idx, points, projected_indices, (label, _), sampler in pbar: - # Iterate over the sampled point cloud until all points reach the end - # threshold. If no sampler is provided, the inference is performed in a - # single step. - infer_complete = False - while not infer_complete: - # Get model input data - input_data, selected_indices = self.transform_input( - points, self.model_cfg, sampler - ) - input_data = data_to_device(input_data, self.device) - if self.model_cfg["input_format"] != "o3d_kpconv": - input_data = unsqueeze_data(input_data) - - # Perform inference - pred = self.model(*input_data) - - # TODO: check if this is consistent across different models - if isinstance(pred, dict): - pred = pred["out"] - - if sampler is not None: - if self.model_cfg["input_format"] == "o3d_kpconv": - sampler.test_probs = self.update_probs( - pred, - selected_indices, - sampler.test_probs, - lengths=input_data[-1], - ) - else: - sampler.test_probs = self.update_probs( - pred, - selected_indices, - sampler.test_probs, - self.n_classes, - ) - if sampler.p[sampler.p > end_th].shape[0] == sampler.p.shape[0]: - pred = sampler.test_probs[projected_indices] - infer_complete = True - else: - pred = pred.squeeze().cpu()[projected_indices].cuda() - infer_complete = True + for sample in pbar: + # Perform inference + pred, label, name = self.inference(sample, self.model, self.model_cfg) # Get valid points masks depending on ignored label indices - label = torch.tensor(label, device=self.device) if ignored_label_indices: valid_mask = torch.ones_like(label, dtype=torch.bool) for idx in ignored_label_indices: @@ -865,70 +714,118 @@ def eval( # Convert labels if needed if lut_ontology is not None: - label = lut_ontology[label] + if translation_direction == "dataset_to_model": + label = lut_ontology[label] + else: + pred = lut_ontology[pred] # Prepare data and update metrics factory - label = label.cpu().unsqueeze(0).numpy() - pred = self.transform_output(pred) - pred = pred.cpu().unsqueeze(0).to(torch.int64).numpy() + label = label.cpu().numpy() + pred = pred.cpu().numpy() if valid_mask is not None: - valid_mask = valid_mask.cpu().unsqueeze(0).numpy() + valid_mask = valid_mask.cpu().numpy() metrics_factory.update(pred, label, valid_mask) # Store predictions and results per sample if required if predictions_outdir is not None: - for i, (sample_idx, sample_pred, sample_label) in enumerate( - zip(idx, pred, label) + for i, (sample_name, sample_pred, sample_label) in enumerate( + zip(name, pred, label) ): if results_per_sample: sample_valid_mask = ( valid_mask[i] if valid_mask is not None else None ) - sample_mf = um.SegmentationMetricsFactory(n_classes=self.n_classes) + sample_mf = um.SegmentationMetricsFactory(n_classes) sample_mf.update( sample_pred, sample_label, sample_valid_mask ) sample_df = um.get_metrics_dataframe( - sample_mf, self.ontology + sample_mf, eval_ontology ) sample_df.to_csv( - os.path.join(predictions_outdir, f"{sample_idx}.csv") + os.path.join(predictions_outdir, f"{sample_name}.csv") ) pred.tofile( - os.path.join(predictions_outdir, f"{sample_idx}.bin") + os.path.join(predictions_outdir, f"{sample_name}.bin") ) - return um.get_metrics_dataframe(metrics_factory, self.ontology) + return um.get_metrics_dataframe(metrics_factory, eval_ontology) - def get_computational_cost(self, runs: int = 30, warm_up_runs: int = 5) -> dict: + def get_computational_cost( + self, + point_cloud_range: Tuple[int, int, int, int, int, int] = ( + -50, + -50, + -5, + 50, + 50, + 5, + ), + num_points: int = 100000, + has_intensity: bool = False, + runs: int = 30, + warm_up_runs: int = 5, + ) -> dict: """Get different metrics related to the computational cost of the model + :param point_cloud_range: Point cloud range (meters), defaults to (-50, -50, -5, 50, 50, 5) + :type point_cloud_range: Tuple[int, int, int, int, int, int], optional + :param num_points: Number of points in the point cloud, defaults to 100000 + :type num_points: int, optional + :param has_intensity: Whether the point cloud has intensity values, defaults to False + :type has_intensity: bool, optional :param runs: Number of runs to measure inference time, defaults to 30 :type runs: int, optional :param warm_up_runs: Number of warm-up runs, defaults to 5 :type warm_up_runs: int, optional :return: Dictionary containing computational cost information """ - # Build dummy input data (process is a bit complex for LiDAR models) - dummy_points = np.random.rand(1000000, 4) - dummy_points, search_tree, _ = self.preprocess(dummy_points, self.model_cfg) - - sampler = None - if "sampler" in self.model_cfg: - sampler = ul.Sampler( - point_cloud_size=dummy_points.shape[0], - search_tree=search_tree, - sampler_name=self.model_cfg["sampler"], - num_classes=self.n_classes, + # Build dummy point cloud using uniform distribution + dummy_points = np.random.uniform( + low=point_cloud_range[0:3], + high=point_cloud_range[3:6], + size=(num_points, 3 + int(has_intensity)), + ).astype(np.float32) + + # Store in a secure temporary .bin file + with tempfile.NamedTemporaryFile(suffix=".bin") as tmp_file: + dummy_points.tofile(tmp_file.name) + sample = self._get_sample( + tmp_file.name, self.model_cfg, has_intensity=has_intensity ) - dummy_input, _ = self.transform_input(dummy_points, self.model_cfg, sampler) - dummy_input = data_to_device(dummy_input, self.device) - if self.model_cfg["input_format"] != "o3d_kpconv": - dummy_input = unsqueeze_data(dummy_input) - - # Get computational cost - return get_computational_cost( - self.model, dummy_input, self.model_fname, runs, warm_up_runs - ) + # Get model size if possible + if self.model_fname is not None: + size_mb = os.path.getsize(self.model_fname) / 1024**2 + else: + size_mb = None + + # Measure inference time with GPU synchronization + for _ in range(warm_up_runs): + if "o3d" in self.model_format: # reset random sampling for Open3D-ML models + subsampled_points, _, sampler, _, _, _ = sample + self._reset_sampler(sampler, subsampled_points.shape[0], self.n_classes) + + self.inference(sample, self.model, self.model_cfg) + + inference_times = [] + for _ in range(runs): + if "o3d" in self.model_format: # reset random sampling for Open3D-ML models + subsampled_points, _, sampler, _, _, _ = sample + self._reset_sampler(sampler, subsampled_points.shape[0], self.n_classes) + torch.cuda.synchronize() + start_time = time.time() + self.inference(sample, self.model, self.model_cfg) + torch.cuda.synchronize() + end_time = time.time() + inference_times.append(end_time - start_time) + + result = { + "input_shape": ["x".join(map(str, ut.get_data_shape(dummy_points)))], + "n_params": [sum(p.numel() for p in self.model.parameters())], + "size_mb": [size_mb], + "inference_time_s": [np.mean(inference_times)], + } + + return pd.DataFrame.from_dict(result) diff --git a/detectionmetrics/models/utils/__init__.py b/detectionmetrics/models/utils/__init__.py new file mode 100644 index 00000000..a706d9f3 --- /dev/null +++ b/detectionmetrics/models/utils/__init__.py @@ -0,0 +1,19 @@ +try: + from detectionmetrics.models.utils import o3d +except ImportError: + pass + +try: + from detectionmetrics.models.utils import mmdet3d +except ImportError: + pass + +try: + from detectionmetrics.models.utils import lsk3dnet +except ImportError: + pass + +try: + from detectionmetrics.models.utils import sphereformer +except ImportError: + pass diff --git a/detectionmetrics/models/utils/lsk3dnet.py b/detectionmetrics/models/utils/lsk3dnet.py new file mode 100644 index 00000000..581c2e05 --- /dev/null +++ b/detectionmetrics/models/utils/lsk3dnet.py @@ -0,0 +1,298 @@ +import time +from typing import List, Optional, Tuple + +from c_gen_normal_map import gen_normal_map +import numpy as np +import torch +import utils.depth_map_utils as depth_map_utils + +import detectionmetrics.utils.torch as ut +import detectionmetrics.utils.lidar as ul + + +def range_projection(current_vertex, fov_up=3.0, fov_down=-25.0, proj_H=64, proj_W=900): + """Project a pointcloud into a spherical projection (range image).""" + # laser parameters + fov_up = fov_up / 180.0 * np.pi # field of view up in radians + fov_down = fov_down / 180.0 * np.pi # field of view down in radians + fov = abs(fov_down) + abs(fov_up) # get field of view total in radians + + # get depth of all points + depth = np.linalg.norm(current_vertex[:, :3], 2, axis=1) + + # get scan components + scan_x = current_vertex[:, 0] + scan_y = current_vertex[:, 1] + scan_z = current_vertex[:, 2] + + # get angles of all points + yaw = -np.arctan2(scan_y, scan_x) + pitch = np.arcsin(scan_z / depth) + + # get projections in image coords + proj_x = 0.5 * (yaw / np.pi + 1.0) # in [0.0, 1.0] + proj_y = 1.0 - (pitch + abs(fov_down)) / fov # in [0.0, 1.0] + + # scale to image size using angular resolution + proj_x *= proj_W # in [0.0, W] + proj_y *= proj_H # in [0.0, H] + + # round and clamp for use as index + proj_x = np.floor(proj_x) + proj_x = np.minimum(proj_W - 1, proj_x) + proj_x = np.maximum(0, proj_x).astype(np.int32) # in [0,W-1] + from_proj_x = np.copy(proj_x) # store a copy in orig order + + proj_y = np.floor(proj_y) + proj_y = np.minimum(proj_H - 1, proj_y) + proj_y = np.maximum(0, proj_y).astype(np.int32) # in [0,H-1] + from_proj_y = np.copy(proj_y) # stope a copy in original order + + # order in decreasing depth + order = np.argsort(depth)[::-1] + depth = depth[order] + + proj_y = proj_y[order] + proj_x = proj_x[order] + + scan_x = scan_x[order] + scan_y = scan_y[order] + scan_z = scan_z[order] + + indices = np.arange(depth.shape[0]) + indices = indices[order] + + proj_range = np.full((proj_H, proj_W), -1, dtype=np.float32) + proj_vertex = np.full((proj_H, proj_W, 4), -1, dtype=np.float32) + proj_idx = np.full((proj_H, proj_W), -1, dtype=np.int32) + + proj_range[proj_y, proj_x] = depth + proj_vertex[proj_y, proj_x] = np.array( + [scan_x, scan_y, scan_z, np.ones(len(scan_x))] + ).T + proj_idx[proj_y, proj_x] = indices + + return proj_range, proj_vertex, from_proj_x, from_proj_y + + +def compute_normals_range( + current_vertex, proj_H=64, proj_W=900, extrapolate=True, blur_type="gaussian" +): + """Compute normals for each point using range image-based method.""" + proj_range, proj_vertex, from_proj_x, from_proj_y = range_projection(current_vertex) + proj_range = depth_map_utils.fill_in_fast( + proj_range, extrapolate=extrapolate, blur_type=blur_type + ) + + # generate normal image + normal_data = gen_normal_map(proj_range, proj_vertex, proj_H, proj_W) + unproj_normal_data = normal_data[from_proj_y, from_proj_x] + + return unproj_normal_data + + +def collate_fn(samples: List[dict]) -> dict: + """Collate function for batching samples + + :param samples: list of sample dictionaries + :type samples: List[dict] + :return: collated batch dictionary + :rtype: dict + """ + point_num = [d["point_num"] for d in samples] + batch_size = len(point_num) + ref_labels = samples[0]["ref_label"] + origin_len = samples[0]["origin_len"] + ref_indices = [torch.from_numpy(d["ref_index"]) for d in samples] + path = samples[0]["root"] # [d['root'] for d in data] + root = [d["root"] for d in samples] + sample_id = [d["sample_id"] for d in samples] + + b_idx = [] + for i in range(batch_size): + b_idx.append(torch.ones(point_num[i]) * i) + points = [torch.from_numpy(d["point_feat"]) for d in samples] + ref_xyz = [torch.from_numpy(d["ref_xyz"]) for d in samples] + + has_labels = samples[0]["point_label"] is not None + if has_labels: + labels = [torch.from_numpy(d["point_label"]) for d in samples] + else: + labels = [d["point_label"] for d in samples] + normal = [torch.from_numpy(d["normal"]) for d in samples] + + return { + "points": torch.cat(points).float(), + "normal": torch.cat(normal).float(), + "ref_xyz": torch.cat(ref_xyz).float(), + "batch_idx": torch.cat(b_idx).long(), + "batch_size": batch_size, + "labels": torch.cat(labels).long().squeeze(1) if has_labels else labels, + "raw_labels": torch.from_numpy(ref_labels).long() if has_labels else ref_labels, + "origin_len": origin_len, + "indices": torch.cat(ref_indices).long(), + "path": path, + "point_num": point_num, + "root": root, + "sample_id": sample_id, + } + + +def get_sample( + points_fname: str, + model_cfg: dict, + label_fname: Optional[str] = None, + name: Optional[str] = None, + idx: Optional[int] = None, + has_intensity: bool = True, + measure_processing_time: bool = False, +) -> Tuple[dict, Optional[dict]]: + """Get sample data for mmdetection3d models + + :param points_fname: filename of the point cloud + :type points_fname: str + :param model_cfg: model configuration + :type model_cfg: dict + :param label_fname: filename of the semantic label, defaults to None + :type label_fname: Optional[str], optional + :param name: sample name, defaults to None + :type name: Optional[str], optional + :param idx: sample numerical index, defaults to None + :type idx: Optional[int], optional + :param has_intensity: whether the point cloud has intensity values, defaults to True + :type has_intensity: bool, optional + :param measure_processing_time: whether to measure processing time, defaults to False + :type measure_processing_time: bool, optional + :return: sample data dictionary and processing time dictionary (if measured) + :rtype: Tuple[dict, Optional[dict]] + """ + raw_data = ul.read_semantickitti_points(points_fname, has_intensity) + + labels, ref_labels = None, None + if label_fname is not None: + labels, _ = ul.read_semantickitti_label(label_fname) + labels = labels.reshape((-1, 1)).astype(np.uint8) + ref_labels = labels.copy() + + if measure_processing_time: + start = time.perf_counter() + + xyz = raw_data[:, :3] + feat = raw_data[:, 3:4] if model_cfg["n_feats"] > 3 else None + origin_len = len(xyz) + + ref_pc = xyz.copy() + ref_index = np.arange(len(ref_pc)) + + mask_x = np.logical_and( + xyz[:, 0] > model_cfg["min_volume_space"][0], + xyz[:, 0] < model_cfg["max_volume_space"][0], + ) + mask_y = np.logical_and( + xyz[:, 1] > model_cfg["min_volume_space"][1], + xyz[:, 1] < model_cfg["max_volume_space"][1], + ) + mask_z = np.logical_and( + xyz[:, 2] > model_cfg["min_volume_space"][2], + xyz[:, 2] < model_cfg["max_volume_space"][2], + ) + mask = np.logical_and(mask_x, np.logical_and(mask_y, mask_z)) + + not_zero = np.logical_not(np.all(xyz[:, :3] == 0, axis=1)) + mask = np.logical_and(mask, not_zero) + + xyz = xyz[mask] + if labels is not None: + labels = labels[mask] + ref_index = ref_index[mask] + if feat is not None: + feat = feat[mask] + point_num = len(xyz) + + feat = np.concatenate((xyz, feat), axis=1) if feat is not None else xyz + + unproj_normal_data = compute_normals_range(feat) + + if measure_processing_time: + end = time.perf_counter() + processing_time = {"preprocessing": end - start} + + sample = {} + sample["point_feat"] = feat + sample["point_label"] = labels + sample["ref_xyz"] = ref_pc + sample["ref_label"] = ref_labels + sample["ref_index"] = ref_index + sample["point_num"] = point_num + sample["origin_len"] = origin_len + sample["normal"] = unproj_normal_data + sample["root"] = points_fname + sample["sample_id"] = name + sample["idx"] = idx + + if measure_processing_time: + return sample, processing_time + + return sample + + +def inference( + sample: dict, + model: torch.nn.Module, + model_cfg: dict, + ignore_index: Optional[List[int]] = None, + measure_processing_time: bool = False, +) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[List[str]]]: + """Perform inference on a sample using an mmdetection3D model + + :param sample: sample data dictionary + :type sample: dict + :param model: mmdetection3D model + :type model: torch.nn.Module + :param model_cfg: model configuration + :type model_cfg: dict + :param ignore_index: list of class indices to ignore during inference, defaults to None + :type ignore_index: Optional[List[int]], optional + :param measure_processing_time: whether to measure processing time, defaults to False + :type measure_processing_time: bool, optional + :return: tuple of (predictions, labels, names) and processing time dictionary (if measured) + :rtype: Tuple[Tuple[torch.Tensor, Optional[torch.Tensor], List[str]], Optional[dict]] + """ + single_sample = not isinstance(sample["sample_id"], list) + if single_sample: + sample = collate_fn([sample]) + + device = next(model.parameters()).device + for k, v in sample.items(): + sample[k] = ut.data_to_device(v, device) + + if measure_processing_time: + torch.cuda.synchronize() + start = time.perf_counter() + pred = model(sample) + if measure_processing_time: + torch.cuda.synchronize() + end = time.perf_counter() + processing_time = {"inference_n_voxelization": end - start} + + if ignore_index is not None: + pred["logits"][:, ignore_index] = -1e9 + pred["logits"] = torch.argmax(pred["logits"], dim=1) + + has_labels = pred["labels"][0] is not None + preds, labels, names = ([], [], []) if has_labels else ([], None, None) + + for batch_idx in range(pred["batch_size"]): + preds.append(pred["logits"][pred["batch_idx"] == batch_idx]) + if has_labels: + labels.append(pred["labels"][pred["batch_idx"] == batch_idx]) + names.append(pred["sample_id"][batch_idx]) + + preds = torch.stack(preds, dim=0).squeeze() + if has_labels: + labels = torch.stack(labels, dim=0).squeeze() + + if measure_processing_time: + return (preds, labels, names), processing_time + + return preds, labels, names diff --git a/detectionmetrics/models/utils/mmdet3d.py b/detectionmetrics/models/utils/mmdet3d.py new file mode 100644 index 00000000..2dc6bea8 --- /dev/null +++ b/detectionmetrics/models/utils/mmdet3d.py @@ -0,0 +1,153 @@ +import time +from typing import List, Optional, Tuple + +from mmdet3d.datasets.transforms import ( + LoadPointsFromFile, + LoadAnnotations3D, + Pack3DDetInputs, +) +from mmengine.registry import FUNCTIONS +import torch +from torchvision.transforms import Compose + +COLLATE_FN = FUNCTIONS.get("pseudo_collate") + + +def get_sample( + points_fname: str, + model_cfg: dict, + label_fname: Optional[str] = None, + name: Optional[str] = None, + idx: Optional[int] = None, + has_intensity: bool = True, + measure_processing_time: bool = False, +) -> Tuple[dict, Optional[dict]]: + """Get sample data for mmdetection3d models + + :param points_fname: filename of the point cloud + :type points_fname: str + :param model_cfg: model configuration + :type model_cfg: dict + :param label_fname: filename of the semantic label, defaults to None + :type label_fname: Optional[str], optional + :param name: sample name, defaults to None + :type name: Optional[str], optional + :param idx: sample numerical index, defaults to None + :type idx: Optional[int], optional + :param has_intensity: whether the point cloud has intensity values, defaults to True + :type has_intensity: bool, optional + :param measure_processing_time: whether to measure processing time, defaults to False + :type measure_processing_time: bool, optional + :return: sample data and optionally processing time + :rtype: Tuple[ dict, Optional[dict] ] + """ + sample = { + "lidar_points": { + "lidar_path": points_fname, + "num_pts_feats": model_cfg.get("n_feats", 4), + }, + "pts_semantic_mask_path": label_fname, + "sample_id": name, + "sample_idx": idx, + "num_pts_feats": model_cfg.get("n_feats", 4), + "lidar_path": points_fname, + } + + n_feats = sample["num_pts_feats"] + load_dim = 4 if has_intensity else 3 + transforms = [ + LoadPointsFromFile(coord_type="LIDAR", load_dim=load_dim, use_dim=n_feats) + ] + if sample["pts_semantic_mask_path"] is not None: + transforms.append( + LoadAnnotations3D( + with_bbox_3d=False, + with_label_3d=False, + with_seg_3d=True, + seg_3d_dtype="np.uint32", + seg_offset=65536, + dataset_type="semantickitti", + ) + ) + transforms.append( + Pack3DDetInputs( + keys=["points", "pts_semantic_mask"], + meta_keys=["sample_idx", "lidar_path", "num_pts_feats", "sample_id"], + ) + ) + + if measure_processing_time: + start = time.perf_counter() + transforms = Compose(transforms) + sample = transforms(sample) + if measure_processing_time: + end = time.perf_counter() + return sample, {"preprocessing": end - start} + + return sample + + +def inference( + sample: dict, + model: torch.nn.Module, + model_cfg: dict, + ignore_index: Optional[List[int]] = None, + measure_processing_time: bool = False, +) -> Tuple[ + Tuple[torch.Tensor, Optional[torch.Tensor], Optional[List[str]]], Optional[dict] +]: + """Perform inference on a sample using an mmdetection3D model + + :param sample: sample data dictionary + :type sample: dict + :param model: mmdetection3D model + :type model: torch.nn.Module + :param model_cfg: model configuration + :type model_cfg: dict + :param measure_processing_time: whether to measure processing time, defaults to False + :type measure_processing_time: bool, optional + :param ignore_index: list of class indices to ignore during inference, defaults to None + :type ignore_index: Optional[List[int]], optional + :return: predictions, labels (if available), sample names and optionally processing time + :rtype: Tuple[ Tuple[torch.Tensor, Optional[torch.Tensor], Optional[List[str]]], Optional[dict] ] + """ + single_sample = not isinstance(sample["data_samples"], list) + if single_sample: + sample = COLLATE_FN([sample]) + + if measure_processing_time: + start = time.perf_counter() + sample = model.data_preprocessor(sample, training=False) + if measure_processing_time: + end = time.perf_counter() + processing_time = {"voxelization": end - start} + + inputs, data_samples = sample["inputs"], sample["data_samples"] + has_labels = hasattr(data_samples[0].gt_pts_seg, "pts_semantic_mask") + + if measure_processing_time: + torch.cuda.synchronize() + start = time.perf_counter() + outputs = model(inputs, data_samples, mode="predict") + if measure_processing_time: + torch.cuda.synchronize() + end = time.perf_counter() + processing_time["inference"] = end - start + + preds, labels, names = ([], [], []) if has_labels else ([], None, None) + for output in outputs: + if ignore_index is not None: + output.pts_seg_logits.pts_seg_logits[ignore_index] = -1e9 + pred = torch.argmax(output.pts_seg_logits.pts_seg_logits, dim=0) + preds.append(pred) + if has_labels: + labels.append(output.gt_pts_seg.pts_semantic_mask) + names.append(output.metainfo["sample_id"]) + preds = torch.stack(preds, dim=0).squeeze() + if has_labels: + labels = torch.stack(labels, dim=0).squeeze() + + if measure_processing_time: + return (preds, labels, names), processing_time + else: + return preds, labels, names diff --git a/detectionmetrics/models/utils/o3d/__init__.py b/detectionmetrics/models/utils/o3d/__init__.py new file mode 100644 index 00000000..945c3578 --- /dev/null +++ b/detectionmetrics/models/utils/o3d/__init__.py @@ -0,0 +1,216 @@ +import time +from typing import Optional, Tuple, Union, Dict + +import numpy as np +import torch + +try: + from open3d._ml3d.datasets.utils import DataProcessing +except Exception: + print("Open3D-ML3D not available") +from sklearn.neighbors import KDTree + +from detectionmetrics.models.utils.o3d import randlanet, kpconv +from detectionmetrics.utils import lidar as ul +import detectionmetrics.utils.torch as ut + + +def inference( + sample: Tuple[np.ndarray, np.ndarray, ul.Sampler], + model: torch.nn.Module, + model_cfg: dict, + measure_processing_time: bool = False, +) -> Union[ + Tuple[torch.Tensor, Optional[torch.Tensor], Optional[str]], + Tuple[torch.Tensor, Optional[torch.Tensor], Optional[str], Dict[str, float]], +]: + """Perform inference on a sample using an Open3D-ML model + + :param sample: sample data dictionary + :type sample: dict + :param model: Open3D-ML model + :type model: torch.nn.Module + :param model_cfg: model configuration + :type model_cfg: dict + :param measure_processing_time: whether to measure processing time, defaults to False + :type measure_processing_time: bool, optional + :return: predicted labels, ground truth labels, sample name and optionally processing time + :rtype: Union[ Tuple[torch.Tensor, Optional[torch.Tensor], Optional[str]], Tuple[torch.Tensor, Optional[torch.Tensor], Optional[str], Dict[str, float]] ] + """ + infer_complete = False + points, projected_indices, sampler, label, name, _ = sample + model_format = model_cfg["model_format"] + end_th = model_cfg.get("end_th", 0.5) + + processing_time = {"preprocessing": 0, "inference": 0, "postprocessing": 0} + + if "kpconv" in model_format: + transform_input = kpconv.transform_input + update_probs = kpconv.update_probs + elif "randlanet" in model_format: + decoder_layers = model.decoder.children() + model_cfg["num_layers"] = sum(1 for _ in decoder_layers) + transform_input = randlanet.transform_input + update_probs = randlanet.update_probs + else: + raise ValueError(f"Unknown model type: {model_format}") + + while not infer_complete: + # Get model input data + if measure_processing_time: + start = time.perf_counter() + input_data, selected_indices = transform_input(points, model_cfg, sampler) + if measure_processing_time: + end = time.perf_counter() + processing_time["preprocessing"] += end - start + + input_data = ut.data_to_device(input_data, model.device) + if "randlanet" in model_format: + input_data = ut.unsqueeze_data(input_data) + + # Perform inference + with torch.no_grad(): + if measure_processing_time: + torch.cuda.synchronize() + start = time.perf_counter() + pred = model(*input_data) + if measure_processing_time: + torch.cuda.synchronize() + end = time.perf_counter() + processing_time["inference"] += end - start + + # TODO: check if this is consistent across different models + if isinstance(pred, dict): + pred = pred["out"] + + # Update probabilities if sampler is used + if measure_processing_time: + start = time.perf_counter() + if sampler is not None: + if "kpconv" in model_format: + sampler.test_probs = update_probs( + pred, + selected_indices, + sampler.test_probs, + lengths=input_data[-1], + ) + else: + sampler.test_probs = update_probs( + pred, + selected_indices, + sampler.test_probs, + model_cfg["n_classes"], + ) + if sampler.p[sampler.p > end_th].shape[0] == sampler.p.shape[0]: + pred = sampler.test_probs[projected_indices] + infer_complete = True + else: + pred = pred.squeeze().cpu()[projected_indices].cuda() + infer_complete = True + if measure_processing_time: + end = time.perf_counter() + processing_time["postprocessing"] += end - start + + if label is not None: + label = torch.from_numpy(label.astype(np.int64)).long().cuda() + + result = torch.argmax(pred.squeeze(), axis=-1), label, name + + # Return processing time if needed + if measure_processing_time: + return result, processing_time + + return result + + +def get_sample( + points_fname: str, + model_cfg: dict, + label_fname: Optional[str] = None, + name: Optional[str] = None, + idx: Optional[int] = None, + has_intensity: bool = True, + measure_processing_time: bool = False, +) -> Tuple[ + Union[ + Tuple[np.ndarray, np.ndarray, ul.Sampler, np.ndarray, str, int], + Tuple[np.ndarray, np.ndarray, ul.Sampler, np.ndarray, str, int], + Dict[str, float], + ] +]: + """Get sample data for mmdetection3d models + + :param points_fname: filename of the point cloud + :type points_fname: str + :param model_cfg: model configuration + :type model_cfg: dict + :param label_fname: filename of the semantic label, defaults to None + :type label_fname: Optional[str], optional + :param name: sample name, defaults to None + :type name: Optional[str], optional + :param idx: sample numerical index, defaults to None + :type idx: Optional[int], optional + :param has_intensity: whether the point cloud has intensity values, defaults to True + :type has_intensity: bool, optional + :param measure_processing_time: whether to measure processing time, defaults to False + :type measure_processing_time: bool, optional + :return: sample data and optionally processing time + :rtype: Union[ Tuple[np.ndarray, np.ndarray, ul.Sampler, np.ndarray, str, int], Tuple[np.ndarray, np.ndarray, ul.Sampler, np.ndarray, str, int], Dict[str, float] ] + """ + points = ul.read_semantickitti_points(points_fname, has_intensity) + label = None + if label_fname is not None: + label, _ = ul.read_semantickitti_label(label_fname) + + if measure_processing_time: + start = time.perf_counter() + + # Keep only XYZ coordinates + points = np.array(points[:, 0:3], dtype=np.float32) + + # Subsample points using a grid of given size + grid_size = model_cfg.get("grid_size", 0.06) + sub_points = DataProcessing.grid_subsampling(points, grid_size=grid_size) + + # Create search tree so that we can project points back to the original point cloud + search_tree = KDTree(sub_points) + projected_indices = np.squeeze(search_tree.query(points, return_distance=False)) + projected_indices = projected_indices.astype(np.int32) + + # Init sampler + sampler = None + if "sampler" in model_cfg: + sampler = ul.Sampler( + sub_points.shape[0], + search_tree, + model_cfg["sampler"], + model_cfg["n_classes"], + ) + + if measure_processing_time: + end = time.perf_counter() + + sample = sub_points, projected_indices, sampler, label, name, idx + + # Return processing time if needed + if measure_processing_time: + processing_time = {"preprocessing": end - start} + return sample, processing_time + + return sample + + +def reset_sampler(sampler: ul.Sampler, num_points: int, num_classes: int): + """Reset sampler object probabilities + + :param sampler: Sampler object + :type sampler: ul.Sampler + :param num_points: Number of points in the point cloud + :type num_points: int + :param num_classes: Number of semantic classes + :type num_classes: int + """ + sampler.p = np.random.rand(num_points) * 1e-3 + sampler.min_p = float(np.min(sampler.p[-1])) + sampler.test_probs = np.zeros((num_points, num_classes), dtype=np.float32) + return sampler diff --git a/detectionmetrics/models/torch_model_utils/o3d_kpconv.py b/detectionmetrics/models/utils/o3d/kpconv.py similarity index 99% rename from detectionmetrics/models/torch_model_utils/o3d_kpconv.py rename to detectionmetrics/models/utils/o3d/kpconv.py index 01a0ba29..00a64f28 100644 --- a/detectionmetrics/models/torch_model_utils/o3d_kpconv.py +++ b/detectionmetrics/models/utils/o3d/kpconv.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Tuple +from typing import List, Tuple import numpy as np diff --git a/detectionmetrics/models/torch_model_utils/o3d_randlanet.py b/detectionmetrics/models/utils/o3d/randlanet.py similarity index 99% rename from detectionmetrics/models/torch_model_utils/o3d_randlanet.py rename to detectionmetrics/models/utils/o3d/randlanet.py index 8caad287..1210b6a7 100644 --- a/detectionmetrics/models/torch_model_utils/o3d_randlanet.py +++ b/detectionmetrics/models/utils/o3d/randlanet.py @@ -109,4 +109,4 @@ def update_probs( test_probs = torch.tensor(test_probs, device=new_probs.device) test_probs[indices] = weight * test_probs[indices] + (1 - weight) * new_probs - return test_probs + return test_probs \ No newline at end of file diff --git a/detectionmetrics/models/utils/sphereformer.py b/detectionmetrics/models/utils/sphereformer.py new file mode 100644 index 00000000..226bf9f0 --- /dev/null +++ b/detectionmetrics/models/utils/sphereformer.py @@ -0,0 +1,204 @@ +import time +from typing import List, Optional, Tuple + +import numpy as np +import spconv.pytorch as spconv +import torch +from util.data_util import data_prepare + +import detectionmetrics.utils.torch as ut +import detectionmetrics.utils.lidar as ul + + +def collate_fn(samples: List[dict]) -> dict: + """Collate function for batching samples + + :param samples: list of sample dictionaries + :type samples: List[dict] + :return: collated batch dictionary + :rtype: dict + """ + coords, xyz, feats, labels, inds_recons, fnames, sample_ids = list(zip(*samples)) + inds_recons = list(inds_recons) + + accmulate_points_num = 0 + offset = [] + for i in range(len(coords)): + inds_recons[i] = accmulate_points_num + inds_recons[i] + accmulate_points_num += coords[i].shape[0] + offset.append(accmulate_points_num) + + coords = torch.cat(coords) + xyz = torch.cat(xyz) + feats = torch.cat(feats) + if any(label is None for label in labels): + labels = None + offset = torch.IntTensor(offset) + inds_recons = torch.cat(inds_recons) + + return ( + coords, + xyz, + feats, + labels, + offset, + inds_recons, + list(fnames), + list(sample_ids), + ) + + +def get_sample( + points_fname: str, + model_cfg: dict, + label_fname: Optional[str] = None, + name: Optional[str] = None, + idx: Optional[int] = None, + has_intensity: bool = True, + measure_processing_time: bool = False, +) -> Tuple[dict, Optional[dict]]: + """Get sample data for mmdetection3d models + + :param points_fname: filename of the point cloud + :type points_fname: str + :param model_cfg: model configuration + :type model_cfg: dict + :param label_fname: filename of the semantic label, defaults to None + :type label_fname: Optional[str], optional + :param name: sample name, defaults to None + :type name: Optional[str], optional + :param idx: sample numerical index, defaults to None + :type idx: Optional[int], optional + :param has_intensity: whether the point cloud has intensity values, defaults to True + :type has_intensity: bool, optional + :param measure_processing_time: whether to measure processing time, defaults to False + :type measure_processing_time: bool, optional + :return: sample data dictionary and processing time dictionary (if measured) + :rtype: Tuple[dict, Optional[dict]] + """ + feats = ul.read_semantickitti_points(points_fname, has_intensity) + feats = feats[:, : model_cfg["n_feats"]] + + labels_in = None + if label_fname is not None: + annotated_data = np.fromfile(label_fname, dtype=np.uint32) + annotated_data = annotated_data.reshape((-1, 1)) + labels_in = annotated_data.astype(np.uint8).reshape(-1) + + if measure_processing_time: + start = time.perf_counter() + + xyz = feats[:, :3] + xyz = np.clip(xyz, model_cfg["pc_range"][0], model_cfg["pc_range"][1]) + + coords, xyz, feats, labels, inds_reconstruct = data_prepare( + xyz, + feats, + labels_in, + "test", + np.array(model_cfg["voxel_size"]), + model_cfg["voxel_max"], + None, + model_cfg["xyz_norm"], + ) + + if measure_processing_time: + end = time.perf_counter() + processing_time = {"voxelization": end - start} + + sample = ( + coords, + xyz, + feats, + labels, + inds_reconstruct, + points_fname, + name, + ) + + if measure_processing_time: + return sample, processing_time + + return sample + + +def inference( + sample: dict, + model: torch.nn.Module, + model_cfg: dict, + ignore_index: Optional[List[int]] = None, + measure_processing_time: bool = False, +) -> Tuple[Tuple[torch.Tensor, Optional[torch.Tensor], List[str]], Optional[dict]]: + """Perform inference on a sample using an mmdetection3D model + + :param sample: sample data dictionary + :type sample: dict + :param model: mmdetection3D model + :type model: torch.nn.Module + :param model_cfg: model configuration + :type model_cfg: dict + :param measure_processing_time: whether to measure processing time, defaults to False + :type measure_processing_time: bool, optional + :param ignore_index: list of class indices to ignore during inference, defaults to None + :type ignore_index: Optional[List[int]], optional + :return: tuple of (predictions, labels, names) and processing time dictionary (if measured) + :rtype: Tuple[Tuple[torch.Tensor, Optional[torch.Tensor], List[str]], Optional[dict]] + """ + single_sample = not isinstance(sample[-1], list) + if single_sample: + sample = collate_fn([sample]) + + device = next(model.parameters()).device + sample = ut.data_to_device(sample, device) + + ( + coord, + xyz, + feat, + labels, + offset, + inds_reconstruct, + fnames, + names, + ) = sample + + if measure_processing_time: + start = time.perf_counter() + + offset_ = offset.clone() + offset_[1:] = offset_[1:] - offset_[:-1] + + batch = ( + torch.cat([torch.tensor([ii] * o) for ii, o in enumerate(offset_)], 0) + .long() + .to(device) + ) + + coord = torch.cat([batch.unsqueeze(-1), coord], -1) + spatial_shape = np.clip((coord.max(0)[0][1:] + 1).cpu().numpy(), 128, None) + batch_size = len(fnames) + + sinput = spconv.SparseConvTensor(feat, coord.int(), spatial_shape, batch_size) + if measure_processing_time: + end = time.perf_counter() + processing_time = {"preprocessing": end - start} + start = time.perf_counter() + + if measure_processing_time: + torch.cuda.synchronize() + start = time.perf_counter() + preds = model(sinput, xyz, batch) + if measure_processing_time: + torch.cuda.synchronize() + end = time.perf_counter() + processing_time["inference"] = end - start + + preds = preds[inds_reconstruct, :] + if ignore_index is not None: + preds[:, ignore_index] = -1e9 + preds = torch.argmax(preds, dim=1) + + if measure_processing_time: + return (preds, labels, names), processing_time + + return preds, labels, names diff --git a/detectionmetrics/utils/conversion.py b/detectionmetrics/utils/conversion.py index 9cf8e9ce..1518aec9 100644 --- a/detectionmetrics/utils/conversion.py +++ b/detectionmetrics/utils/conversion.py @@ -57,7 +57,8 @@ def get_ontology_conversion_lut( old_ontology: dict, new_ontology: dict, ontology_translation: Optional[dict] = None, - ignored_classes: Optional[List[str]] = None, + classes_to_remove: Optional[List[str]] = None, + lut_dtype: Optional[np.dtype] = np.uint8, ) -> np.ndarray: """Build a LUT that links old ontology and new ontology indices. If class names don't match between the provided ontologies, user must provide an ontology @@ -69,18 +70,20 @@ def get_ontology_conversion_lut( :type new_ontology: dict :param ontology_translation: Ontology translation dictionary, defaults to None :type ontology_translation: Optional[dict], optional - :param ignored_classes: Classes to ignore from the old ontology, defaults to None - :type ignored_classes: Optional[List[str]], optional + :param classes_to_remove: Classes to be removed from the old ontology, defaults to None + :type classes_to_remove: Optional[List[str]], optional + :param lut_dtype: Type for the ontology conversion LUT, defaults to np.uint8 + :type lut_dtype: Optional[np.dtype], optional :return: numpy array associating old and new ontology indices :rtype: np.ndarray """ - ignored_classes = [] if ignored_classes is None else ignored_classes + classes_to_remove = [] if classes_to_remove is None else classes_to_remove max_idx = max(class_data["idx"] for class_data in old_ontology.values()) - lut = np.zeros((max_idx + 1), dtype=np.uint8) + lut = np.zeros((max_idx + 1), dtype=lut_dtype) if ontology_translation is not None: - # Deleting ignored classes that exist in ontology_translation - for class_name in ignored_classes: + # Deleting requested classes from ontology translation + for class_name in classes_to_remove: if class_name in ontology_translation: del ontology_translation[class_name] @@ -91,7 +94,8 @@ def get_ontology_conversion_lut( lut[old_class_idx] = new_class_idx else: old_ontology = old_ontology.copy() - for class_name in ignored_classes: # Deleting ignored classes from old_ontology + # Deleting classes requested from old ontology + for class_name in classes_to_remove: del old_ontology[class_name] assert set(old_ontology.keys()) == set( # Checking ontology compatibility new_ontology.keys() diff --git a/detectionmetrics/utils/lidar.py b/detectionmetrics/utils/lidar.py index 21331782..3d2b4226 100644 --- a/detectionmetrics/utils/lidar.py +++ b/detectionmetrics/utils/lidar.py @@ -1,6 +1,6 @@ import numpy as np import random -from typing import List, Optional, Tuple +from typing import List, Optional, Tuple, Union import open3d as o3d from PIL import Image @@ -13,7 +13,21 @@ "front": np.array([1, 0, 0.5], dtype=np.float32), # Camera front vector "lookat": np.array([1, 0.0, 0.0], dtype=np.float32), # Point camera looks at "up": np.array([-0.5, 0, 1], dtype=np.float32), # Camera up direction - } + }, + "top": { + "zoom": 0.025, + "front": np.array([0, 0, -1], dtype=np.float32), # Looking straight down + "lookat": np.array([1, 0.0, 0.0], dtype=np.float32), # Same target point + "up": np.array([0, 1, 0], dtype=np.float32), # Y axis is "up" in image + }, + "side": { + "zoom": 0.012, + "front": np.array( + [0, -1, 0], dtype=np.float32 + ), # Looking from positive Y toward origin + "lookat": np.array([1, 0.0, 0.0], dtype=np.float32), # Same target point + "up": np.array([0, 0, 1], dtype=np.float32), # Z axis is up + }, } @@ -211,11 +225,13 @@ def view_point_cloud(points: np.ndarray, colors: np.ndarray): def render_point_cloud( points: np.ndarray, colors: np.ndarray, - camera_view: str = "3rd_person", + camera_view: Union[str, dict] = "3rd_person", bg_color: Optional[List[float]] = [0.0, 0.0, 0.0, 1.0], color_jitter: float = 0.05, point_size: float = 3.0, resolution: Tuple[int, int] = (1920, 1080), + render_origin: bool = False, + origin_size: float = 0.5, ) -> Image: """Render a given point cloud from a specific camera view and return the image @@ -223,8 +239,8 @@ def render_point_cloud( :type points: np.ndarray :param colors: Colors for the point cloud data :type colors: np.ndarray - :param camera_view: Camera view, defaults to "3rd_person" - :type camera_view: str, optional + :param camera_view: Camera view (either ID or dictionary containing camera definition), defaults to "3rd_person" + :type camera_view: Union[str, dict], optional :param bg_color: Background color, defaults to black -> [0., 0., 0., 1.] :type bg_color: Optional[List[float]], optional :param color_jitter: Jitters the colors by a random value between [-color_jitter, color_jitter], defaults to 0.05 @@ -233,11 +249,20 @@ def render_point_cloud( :type point_size: float, optional :param resolution: Render resolution, defaults to (1920, 1080) :type resolution: Tuple[int, int], optional + :param render_origin: Whether to render the origin axes, defaults to False + :type render_origin: bool, optional + :param origin_size: Size of the origin axes, defaults to 0.5 + :type origin_size: float, optional :return: Rendered point cloud :rtype: Image """ - assert camera_view in CAMERA_VIEWS, f"Camera view {camera_view} not implemented" - view_settings = CAMERA_VIEWS[camera_view] + if isinstance(camera_view, dict): + # If camera_view is a dictionary, use it directly + view_settings = camera_view + elif isinstance(camera_view, str): + # If camera_view is a string, look it up in predefined views + assert camera_view in CAMERA_VIEWS, f"Camera view {camera_view} not implemented" + view_settings = CAMERA_VIEWS[camera_view] # Add color jitter if needed if color_jitter > 0: @@ -258,6 +283,15 @@ def render_point_cloud( material.point_size = point_size renderer.scene.add_geometry("point_cloud", point_cloud, material) + # Add origin axes for reference + if render_origin: + coord_frame = o3d.geometry.TriangleMesh.create_coordinate_frame( + size=origin_size, origin=[0, 0, 0] + ) + coord_material = o3d.visualization.rendering.MaterialRecord() + coord_material.shader = "defaultUnlit" # Also unlit for visibility + renderer.scene.add_geometry("coordinate_frame", coord_frame, coord_material) + # Set the background color renderer.scene.set_background(bg_color) @@ -280,3 +314,36 @@ def render_point_cloud( renderer.scene.clear_geometry() return image + + +def read_semantickitti_points(fname: str, has_intensity: bool = True) -> np.ndarray: + """Read points from a binary file in SemanticKITTI format + + :param fname: Binary file containing points + :type fname: str + :param has_intensity: Whether the points have intensity values, defaults to True + :type has_intensity: bool + :return: Numpy array containing points + :rtype: np.ndarray + """ + points = np.fromfile(fname, dtype=np.float32) + points = points.reshape((-1, 4 if has_intensity else 3)) + if not has_intensity: + empty_intensity = np.zeros((points.shape[0], 1), dtype=np.float32) + points = np.concatenate([points, empty_intensity], axis=1) + return points + + +def read_semantickitti_label(fname: str) -> Tuple[np.ndarray, np.ndarray]: + """Read labels from a binary file in SemanticKITTI format + + :param fname: Binary file containing labels + :type fname: str + :return: Numpy arrays containing semantic and instance labels + :rtype: Tuple[np.ndarray, np.ndarray] + """ + label = np.fromfile(fname, dtype=np.uint32) + label = label.reshape((-1)) + semantic_label = label & 0xFFFF + instance_label = label >> 16 + return semantic_label, instance_label diff --git a/detectionmetrics/utils/segmentation_metrics.py b/detectionmetrics/utils/segmentation_metrics.py index a3d7ff6b..18652f35 100644 --- a/detectionmetrics/utils/segmentation_metrics.py +++ b/detectionmetrics/utils/segmentation_metrics.py @@ -1,6 +1,6 @@ from collections import defaultdict import math -from typing import Optional +from typing import List, Optional, Union import numpy as np import pandas as pd @@ -50,10 +50,6 @@ def update( if valid_mask is not None: mask &= valid_mask - # Update confusion matrix - if np.count_nonzero(gt >= 16): - pass - # Update confusion matrix new_entry = np.bincount( self.n_classes * gt[mask].astype(int) + pred[mask].astype(int), @@ -61,11 +57,11 @@ def update( ) self.confusion_matrix += new_entry.reshape(self.n_classes, self.n_classes) - def get_metric_names(self) -> list[str]: + def get_metric_names(self) -> List[str]: """Get available metric names :return: List of available metric names - :rtype: list[str] + :rtype: List[str] """ return self.METRIC_NAMES @@ -77,58 +73,58 @@ def get_confusion_matrix(self) -> np.ndarray: """ return self.confusion_matrix - def get_tp(self, per_class: bool = True) -> np.ndarray | int: + def get_tp(self, per_class: bool = True) -> Union[np.ndarray, int]: """True Positives :param per_class: Return per class TP, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, int] """ tp = np.diag(self.confusion_matrix) return tp if per_class else int(np.nansum(tp)) - def get_fp(self, per_class: bool = True) -> np.ndarray | int: + def get_fp(self, per_class: bool = True) -> Union[np.ndarray, int]: """False Positives :param per_class: Return per class FP, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, int] """ fp = self.confusion_matrix.sum(axis=0) - np.diag(self.confusion_matrix) return fp if per_class else int(np.nansum(fp)) - def get_fn(self, per_class: bool = True) -> np.ndarray | int: + def get_fn(self, per_class: bool = True) -> Union[np.ndarray, int]: """False negatives :param per_class: Return per class FN, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, int] """ fn = self.confusion_matrix.sum(axis=1) - np.diag(self.confusion_matrix) return fn if per_class else int(np.nansum(fn)) - def get_tn(self, per_class: bool = True) -> np.ndarray | int: + def get_tn(self, per_class: bool = True) -> Union[np.ndarray, int]: """True negatives :param per_class: Return per class TN, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, int] """ total = self.confusion_matrix.sum() tn = total - (self.get_tp() + self.get_fp() + self.get_fn()) return tn if per_class else int(np.nansum(tn)) - def get_precision(self, per_class: bool = True) -> np.ndarray | float: + def get_precision(self, per_class: bool = True) -> Union[np.ndarray, float]: """Precision = TP / (TP + FP) :param per_class: Return per class precision, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, float] """ tp = self.get_tp(per_class) fp = self.get_fp(per_class) @@ -139,13 +135,13 @@ def get_precision(self, per_class: bool = True) -> np.ndarray | float: else: return np.where(denominator > 0, tp / denominator, np.nan) - def get_recall(self, per_class: bool = True) -> np.ndarray | float: + def get_recall(self, per_class: bool = True) -> Union[np.ndarray, float]: """Recall = TP / (TP + FN) :param per_class: Return per class recall, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, float] """ tp = self.get_tp(per_class) fn = self.get_fn(per_class) @@ -156,13 +152,13 @@ def get_recall(self, per_class: bool = True) -> np.ndarray | float: else: return np.where(denominator > 0, tp / denominator, np.nan) - def get_accuracy(self, per_class: bool = True) -> np.ndarray | float: + def get_accuracy(self, per_class: bool = True) -> Union[np.ndarray, float]: """Accuracy = (TP + TN) / (TP + FP + FN + TN) :param per_class: Return per class accuracy, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, float] """ tp = self.get_tp(per_class) fp = self.get_fp(per_class) @@ -175,13 +171,13 @@ def get_accuracy(self, per_class: bool = True) -> np.ndarray | float: else: return np.where(total > 0, (tp + tn) / total, np.nan) - def get_f1_score(self, per_class: bool = True) -> np.ndarray | float: + def get_f1_score(self, per_class: bool = True) -> Union[np.ndarray, float]: """F1-score = 2 * (Precision * Recall) / (Precision + Recall) :param per_class: Return per class F1 score, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, float] """ precision = self.get_precision(per_class) recall = self.get_recall(per_class) @@ -196,13 +192,13 @@ def get_f1_score(self, per_class: bool = True) -> np.ndarray | float: denominator > 0, 2 * (precision * recall) / denominator, np.nan ) - def get_iou(self, per_class: bool = True) -> np.ndarray | float: + def get_iou(self, per_class: bool = True) -> Union[np.ndarray, float]: """IoU = TP / (TP + FP + FN) :param per_class: Return per class IoU, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, float] """ tp = self.get_tp(per_class) fp = self.get_fp(per_class) @@ -242,7 +238,7 @@ def get_averaged_metric( def get_metric_per_name( self, metric_name: str, per_class: bool = True - ) -> np.ndarray | float | int: + ) -> Union[np.ndarray, float, int]: """Get metric value by name :param metric_name: Name of the metric to compute @@ -250,7 +246,7 @@ def get_metric_per_name( :param per_class: Return per class metric, defaults to True :type per_class: bool, optional :return: Metric value - :rtype: np.ndarray | float | int + :rtype: Union[np.ndarray, float, int] """ return getattr(self, f"get_{metric_name}")(per_class=per_class) diff --git a/detectionmetrics/utils/torch.py b/detectionmetrics/utils/torch.py new file mode 100644 index 00000000..ecb7a633 --- /dev/null +++ b/detectionmetrics/utils/torch.py @@ -0,0 +1,65 @@ +from typing import Union + +import torch + + +def data_to_device( + data: Union[tuple, list], device: torch.device +) -> Union[tuple, list]: + """Move provided data to given device (CPU or GPU) + + :param data: Data provided (it can be a single or multiple tensors) + :type data: Union[tuple, list] + :param device: Device to move data to + :type device: torch.device + :return: Data moved to device + :rtype: Union[tuple, list] + """ + if isinstance(data, (tuple, list)): + return type(data)( + d.to(device) if torch.is_tensor(d) else data_to_device(d, device) + for d in data + ) + elif torch.is_tensor(data): + return data.to(device) + else: + return data + + +def get_data_shape(data: Union[tuple, list]) -> Union[tuple, list]: + """Get the shape of the provided data + + :param data: Data provided (it can be a single or multiple tensors) + :type data: Union[tuple, list] + :return: Data shape + :rtype: Union[tuple, list] + """ + if isinstance(data, (tuple, list)): + return type(data)( + tuple(d.shape) if torch.is_tensor(d) else get_data_shape(d) for d in data + ) + elif torch.is_tensor(data): + return tuple(data.shape) + else: + return tuple(data.shape) + + +def unsqueeze_data(data: Union[tuple, list], dim: int = 0) -> Union[tuple, list]: + """Unsqueeze provided data along given dimension + + :param data: Data provided (it can be a single or multiple tensors) + :type data: Union[tuple, list] + :param dim: Dimension that will be unsqueezed, defaults to 0 + :type dim: int, optional + :return: Unsqueezed data + :rtype: Union[tuple, list] + """ + if isinstance(data, (tuple, list)): + return type(data)( + d.unsqueeze(dim) if torch.is_tensor(d) else unsqueeze_data(d, dim) + for d in data + ) + elif torch.is_tensor(data): + return data.unsqueeze(dim) + else: + return data diff --git a/docs/_pages/home.md b/docs/_pages/home.md index 6ebb9570..c44f7b39 100644 --- a/docs/_pages/home.md +++ b/docs/_pages/home.md @@ -49,8 +49,8 @@ Now, we're excited to introduce ***DetectionMetrics v2***! While retaining the f LiDAR - Rellis3D, GOOSE, custom GAIA format - PyTorch (tested with RandLA-Net and KPConv from Open3D-ML) + Rellis3D, GOOSE, WildScenes, custom GAIA format + PyTorch (tested with Open3D-ML, mmdetection3d, SphereFormer, and LSK3DNet models) Object detection Image diff --git a/docs/_pages/v2/compatibility.md b/docs/_pages/v2/compatibility.md index 59b127cb..6db61a15 100644 --- a/docs/_pages/v2/compatibility.md +++ b/docs/_pages/v2/compatibility.md @@ -9,8 +9,10 @@ sidebar: ## Image semantic segmentation - Datasets: + - **[RUGD](http://rugd.vision/)** - **[Rellis3D](https://www.unmannedlab.org/research/RELLIS-3D)** - **[GOOSE](https://goose-dataset.de/)** + - **[WildScenes](https://csiro-robotics.github.io/WildScenes/)** - **Custom GAIA format**: *Parquet* file containing samples and labels relative paths and a JSON file with the dataset ontology. - **Generic**: simply assumes a different directory per split, different suffixes for samples and labels, and a JSON file containing the dataset ontology. - Models: @@ -52,36 +54,115 @@ sidebar: - Datasets: - **[Rellis3D](https://www.unmannedlab.org/research/RELLIS-3D)** - **[GOOSE](https://goose-dataset.de/)** + - **[WildScenes](https://csiro-robotics.github.io/WildScenes/)** - **Custom GAIA format**: *Parquet* file containing samples and labels relative paths and a JSON file with the dataset ontology. - **Generic**: simply assumes a different directory per split, different suffixes for samples and labels, and a JSON file containing the dataset ontology. - Models: - - **PyTorch ([TorchScript](https://pytorch.org/docs/stable/jit.html) compiled format and native modules)**. As of now, we have tested RandLA-Net and KPConv from [Open3D-ML](https://github.com/isl-org/Open3D-ML). + - **PyTorch ([TorchScript](https://pytorch.org/docs/stable/jit.html) compiled format and native modules)**. As of now, we have tested Open3D-ML, mmdetection3d, SphereFormer, and LSK3DNet models. - Input shape: defined by the `input_format` tag. - Output shape: `(num_points)` - - JSON configuration file format: + - JSON configuration file format examples (different depending on the model): ```json { - "seed": 42, - "input_format": "o3d_randlanet", + "model_format": <"o3d_randlanet" | "o3d_kpconv" | "mmdet3d" | "sphereformer" | "lsk3dnet">, + "n_feats": <3|4>, // without/with intensity + "seed": , + // -- EXTRA PARAMETERS PER MODEL (EXAMPLES) -- + // o3d kpconv "sampler": "spatially_regular", + "min_in_points": 10000, + "max_in_points": 20000, + "in_radius": 4.0, "recenter": { "dims": [ 0, - 1 + 1, + 2 ] }, - "ignored_classes": [ - "void" + "first_subsampling_dl": 0.075, + "conv_radius": 2.5, + "architecture": [ + "simple", + "resnetb", + "resnetb_strided", + "resnetb", + "resnetb", + "resnetb_strided", + "resnetb", + "resnetb", + "resnetb_strided", + "resnetb", + "resnetb", + "resnetb_strided", + "resnetb", + "nearest_upsample", + "unary", + "nearest_upsample", + "unary", + "nearest_upsample", + "unary", + "nearest_upsample", + "unary" ], + "num_layers": 5, + "num_points": 45056, + "grid_size": 0.075, + "num_neighbors": 16, + "sub_sampling_ratio": [ + 4, + 4, + 4, + 4 + ], + // o3d randlanet + "sampler": "spatially_regular", + "recenter": { + "dims": [ + 0, + 1 + ] + }, "num_points": 45056, - "grid_size": 0.06, + "grid_size": 0.075, "num_neighbors": 16, "sub_sampling_ratio": [ 4, 4, 4, 4 + ], + // sphereformer + "voxel_size": [ + 0.05, + 0.05, + 0.05 + ], + "voxel_max": 120000, + "pc_range": [ + [ + -22, + -17, + -4 + ], + [ + 30, + 18, + 13 + ] + ], + "xyz_norm": false, + // lsk3dnet + "min_volume_space": [ + -120, + -120, + -6 + ], + "max_volume_space": [ + 120, + 120, + 11 ] } ``` diff --git a/docs/_pages/v2/usage.md b/docs/_pages/v2/usage.md index 689306a4..d2dcdc46 100644 --- a/docs/_pages/v2/usage.md +++ b/docs/_pages/v2/usage.md @@ -67,7 +67,7 @@ Usage: dm_evaluate [OPTIONS] {segmentation|detection} {image|lidar} Evaluate model on dataset Options: - --model_format [torch|tensorflow|tensorflow_explicit] + --model_format [torch|tensorflow] Trained model format [default: torch] --model PATH Trained model filename (TorchScript) or directory (TensorFlow SavedModel) diff --git a/examples/gaia_image.py b/examples/gaia_image.py index 1718aca1..4a8f3a88 100644 --- a/examples/gaia_image.py +++ b/examples/gaia_image.py @@ -1,4 +1,5 @@ import argparse +import json from detectionmetrics.datasets.gaia import GaiaImageSegmentationDataset @@ -13,6 +14,16 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--dataset", type=str, required=True, help="Parquet dataset file" ) + parser.add_argument( + "--new_ontology", + type=str, + help="New ontology JSON file name", + ) + parser.add_argument( + "--ontology_translation", + type=str, + help="Ontology translation JSON file name", + ) parser.add_argument( "--outdir", type=str, @@ -41,11 +52,26 @@ def main(): """Main function""" args = parse_args() + new_ontology, ontology_translation = None, None + if args.new_ontology is not None: + with open(args.new_ontology, "r", encoding="utf-8") as f: + new_ontology = json.load(f) + + if args.ontology_translation is not None: + with open(args.ontology_translation, "r", encoding="utf-8") as f: + ontology_translation = json.load(f) + dataset = GaiaImageSegmentationDataset(dataset_fname=args.dataset) if args.split: dataset.dataset = dataset.dataset[dataset.dataset["split"] == args.split] dataset.has_label_count = False - dataset.export(outdir=args.outdir, resize=args.resize) + + dataset.export( + outdir=args.outdir, + resize=args.resize, + new_ontology=new_ontology, + ontology_translation=ontology_translation, + ) if __name__ == "__main__": diff --git a/examples/gaia_lidar.py b/examples/gaia_lidar.py index b5e1d8a3..4280bb36 100644 --- a/examples/gaia_lidar.py +++ b/examples/gaia_lidar.py @@ -1,4 +1,5 @@ import argparse +import json from detectionmetrics.datasets.gaia import GaiaLiDARSegmentationDataset @@ -13,6 +14,23 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--dataset", type=str, required=True, help="Parquet dataset file" ) + parser.add_argument( + "--new_ontology", + type=str, + help="New ontology JSON file name", + ) + parser.add_argument( + "--ontology_translation", + type=str, + help="Ontology translation JSON file name", + ) + parser.add_argument( + "--outdir", + type=str, + required=True, + help="Directory where dataset will be stored in common format", + ) + return parser.parse_args() @@ -20,7 +38,22 @@ def main(): """Main function""" args = parse_args() - GaiaLiDARSegmentationDataset(dataset_fname=args.dataset) + new_ontology, ontology_translation = None, None + if args.new_ontology is not None: + with open(args.new_ontology, "r", encoding="utf-8") as f: + new_ontology = json.load(f) + + if args.ontology_translation is not None: + with open(args.ontology_translation, "r", encoding="utf-8") as f: + ontology_translation = json.load(f) + + dataset = GaiaLiDARSegmentationDataset(dataset_fname=args.dataset) + + dataset.export( + args.outdir, + new_ontology=new_ontology, + ontology_translation=ontology_translation, + ) if __name__ == "__main__": diff --git a/examples/goose_lidar.py b/examples/goose_lidar.py index 0ecc9693..3f860663 100644 --- a/examples/goose_lidar.py +++ b/examples/goose_lidar.py @@ -1,4 +1,5 @@ import argparse +import json from detectionmetrics.datasets.goose import GOOSELiDARSegmentationDataset @@ -26,6 +27,16 @@ def parse_args() -> argparse.Namespace: type=str, help="Directory where test dataset split is stored", ) + parser.add_argument( + "--new_ontology", + type=str, + help="New ontology JSON file name", + ) + parser.add_argument( + "--ontology_translation", + type=str, + help="Ontology translation JSON file name", + ) parser.add_argument( "--outdir", type=str, @@ -40,12 +51,25 @@ def main(): """Main function""" args = parse_args() + new_ontology, ontology_translation = None, None + if args.new_ontology is not None: + with open(args.new_ontology, "r", encoding="utf-8") as f: + new_ontology = json.load(f) + + if args.ontology_translation is not None: + with open(args.ontology_translation, "r", encoding="utf-8") as f: + ontology_translation = json.load(f) + dataset = GOOSELiDARSegmentationDataset( train_dataset_dir=args.train_dataset_dir, val_dataset_dir=args.val_dataset_dir, test_dataset_dir=args.test_dataset_dir, ) - dataset.export(args.outdir) + dataset.export( + args.outdir, + new_ontology=new_ontology, + ontology_translation=ontology_translation, + ) if __name__ == "__main__": diff --git a/examples/merge_datasets.py b/examples/merge_datasets.py index 87ce0243..cf9201a3 100644 --- a/examples/merge_datasets.py +++ b/examples/merge_datasets.py @@ -1,6 +1,6 @@ import argparse -from detectionmetrics.datasets.gaia import GaiaImageSegmentationDataset +from detectionmetrics.datasets.gaia import GaiaImageSegmentationDataset, GaiaLiDARSegmentationDataset def parse_args() -> argparse.Namespace: @@ -23,6 +23,13 @@ def parse_args() -> argparse.Namespace: required=True, help="Directory where merged dataset will be stored", ) + parser.add_argument( + "--dataset_type", + type=str, + choices=["image", "lidar"], + required=True, + help="Type of datasets to merge", + ) return parser.parse_args() @@ -31,7 +38,14 @@ def main(): """Main function""" args = parse_args() - datasets = [GaiaImageSegmentationDataset(fname) for fname in args.datasets] + if args.dataset_type == "image": + dataset_class = GaiaImageSegmentationDataset + elif args.dataset_type == "lidar": + dataset_class = GaiaLiDARSegmentationDataset + else: + raise ValueError(f"Unknown dataset type: {args.dataset_type}") + + datasets = [dataset_class(fname) for fname in args.datasets] main_dataset = datasets[0] for extra_dataset in datasets[1:]: main_dataset.append(extra_dataset) diff --git a/examples/rellis3d_lidar.py b/examples/rellis3d_lidar.py index a5a1cc93..cb5bf48e 100644 --- a/examples/rellis3d_lidar.py +++ b/examples/rellis3d_lidar.py @@ -1,4 +1,5 @@ import argparse +import json from detectionmetrics.datasets.rellis3d import Rellis3DLiDARSegmentationDataset @@ -28,6 +29,16 @@ def parse_args() -> argparse.Namespace: required=True, help="YAML file containing dataset ontology", ) + parser.add_argument( + "--new_ontology", + type=str, + help="New ontology JSON file name", + ) + parser.add_argument( + "--ontology_translation", + type=str, + help="Ontology translation JSON file name", + ) parser.add_argument( "--outdir", type=str, @@ -42,12 +53,25 @@ def main(): """Main function""" args = parse_args() + new_ontology, ontology_translation = None, None + if args.new_ontology is not None: + with open(args.new_ontology, "r", encoding="utf-8") as f: + new_ontology = json.load(f) + + if args.ontology_translation is not None: + with open(args.ontology_translation, "r", encoding="utf-8") as f: + ontology_translation = json.load(f) + dataset = Rellis3DLiDARSegmentationDataset( dataset_dir=args.dataset_dir, split_dir=args.split_dir, ontology_fname=args.ontology_fname, ) - dataset.export(args.outdir) + dataset.export( + outdir=args.outdir, + new_ontology=new_ontology, + ontology_translation=ontology_translation, + ) if __name__ == "__main__": diff --git a/examples/store_image_video.py b/examples/store_image_video.py index 7e25d52e..20ab3694 100644 --- a/examples/store_image_video.py +++ b/examples/store_image_video.py @@ -104,7 +104,7 @@ def main(): if model is not None: image = Image.open(sample_data["image"]) - label = model.inference(image) + label = model.predict(image) lut = uc.ontology_to_rgb_lut(model.ontology) else: label = Image.open(sample_data["label"]) diff --git a/examples/store_lidar_video.py b/examples/store_lidar_video.py index e280eadf..eb66ee4e 100644 --- a/examples/store_lidar_video.py +++ b/examples/store_lidar_video.py @@ -112,10 +112,10 @@ def main(): point_cloud = dataset.read_points(sample_data["points"]) if model is not None: - label = model.inference(point_cloud) + label = model.predict(point_cloud) lut = uc.ontology_to_rgb_lut(model.ontology) else: - label, _ = dataset.read_label(sample_data["label"]) + label = dataset.read_label(sample_data["label"]) lut = uc.ontology_to_rgb_lut(dataset.ontology) colors = lut[label] / 255.0 diff --git a/examples/tensorflow_image.py b/examples/tensorflow_image.py index 058c2928..024910ec 100644 --- a/examples/tensorflow_image.py +++ b/examples/tensorflow_image.py @@ -73,7 +73,7 @@ def main(): if args.image is not None: image = Image.open(args.image).convert("RGB") - result = model.inference(image) + result = model.predict(image) result = uc.label_to_rgb(result, model.ontology) result.show() diff --git a/examples/torch_image.py b/examples/torch_image.py index aeca7cb1..6410136c 100644 --- a/examples/torch_image.py +++ b/examples/torch_image.py @@ -73,7 +73,7 @@ def main(): if args.image is not None: image = Image.open(args.image).convert("RGB") - result = model.inference(image) + result = model.predict(image) result = uc.label_to_rgb(result, model.ontology) result.show() diff --git a/examples/torch_lidar.py b/examples/torch_lidar.py index 0dfab360..e6351b64 100644 --- a/examples/torch_lidar.py +++ b/examples/torch_lidar.py @@ -57,6 +57,13 @@ def parse_args() -> argparse.Namespace: required=False, help="JSON file containing translation between dataset and model classes", ) + parser.add_argument( + "--translation_direction", + type=str, + choices=["dataset_to_model", "model_to_dataset"], + default="dataset_to_model", + help="Direction of the ontology translation", + ) parser.add_argument( "--predictions_outdir", type=str, @@ -75,16 +82,17 @@ def main(): dataset = GaiaLiDARSegmentationDataset(args.dataset) if args.point_cloud is not None: - point_cloud = dataset.read_points(args.point_cloud) - result = model.inference(point_cloud) + result = model.predict(args.point_cloud) lut = uc.ontology_to_rgb_lut(model.ontology) colors = lut[result] / 255.0 + point_cloud = dataset.read_points(args.point_cloud) ul.view_point_cloud(point_cloud[:, :3], colors) results = model.eval( dataset, split=args.split, ontology_translation=args.ontology_translation, + translation_direction=args.translation_direction, predictions_outdir=args.predictions_outdir, results_per_sample=args.predictions_outdir is not None, ) diff --git a/examples/torch_native_image.py b/examples/torch_native_image.py index ab590098..f74c64c7 100644 --- a/examples/torch_native_image.py +++ b/examples/torch_native_image.py @@ -79,7 +79,7 @@ def main(): if args.image is not None: image = Image.open(args.image).convert("RGB") - result = model.inference(image) + result = model.predict(image) result = uc.label_to_rgb(result, model.ontology) result.show() diff --git a/examples/tutorial_image_segmentation.ipynb b/examples/tutorial_image_segmentation.ipynb index a170caae..274eb651 100644 --- a/examples/tutorial_image_segmentation.ipynb +++ b/examples/tutorial_image_segmentation.ipynb @@ -130,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -158,7 +158,7 @@ "label = Image.open(label_fname)\n", "label = uc.label_to_rgb(label, dataset.ontology)\n", "\n", - "pred = model.inference(image)\n", + "pred = model.predict(image)\n", "pred = uc.label_to_rgb(pred, model.ontology)\n", "pred = pred.resize(label.size)\n", "\n", diff --git a/pyproject.toml b/pyproject.toml index cfe250bd..3251c6c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ license = "LICENSE" [tool.poetry.dependencies] python = "^3.10" -tqdm = "^4.67.0" +tqdm = "^4.65.0" pandas = "^2.2.3" PyYAML = "^6.0.2" pyarrow = "^18.0.0" @@ -18,7 +18,7 @@ opencv-python-headless = "^4.10.0.84" scikit-learn = "^1.6.0" open3d = "^0.19.0" addict = "^2.4.0" -matplotlib = "^3.10.0" +matplotlib = "^3.6.0" click = "^8.1.8" tensorboard = "^2.18.0" pycocotools = { version = "^2.0.7", markers = "sys_platform != 'win32'" }