diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index d98352b4..00000000
Binary files a/.DS_Store and /dev/null differ
diff --git a/.gitignore b/.gitignore
index 63357d2a..eb3d75b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,4 +7,6 @@ __pycache__
dist
poetry.lock
-local/
\ No newline at end of file
+local/
+
+.DS_Store
\ No newline at end of file
diff --git a/README.md b/README.md
index ca8c6a62..e6f3619a 100644
--- a/README.md
+++ b/README.md
@@ -41,8 +41,8 @@ Now, we're excited to introduce ***DetectionMetrics v2***! While retaining the f
| LiDAR |
- Rellis3D, GOOSE, custom GAIA format |
- PyTorch (tested with RandLA-Net and KPConv from Open3D-ML) |
+ Rellis3D, GOOSE, WildScenes, custom GAIA format |
+ PyTorch (tested with Open3D-ML, mmdetection3d, SphereFormer, and LSK3DNet models) |
| Object detection |
Image |
diff --git a/detectionmetrics/cli/batch.py b/detectionmetrics/cli/batch.py
index 0dae173a..a9ebd323 100644
--- a/detectionmetrics/cli/batch.py
+++ b/detectionmetrics/cli/batch.py
@@ -1,4 +1,4 @@
-from itertools import product
+from itertools import product, chain
from glob import glob
import os
@@ -30,9 +30,19 @@ def batch(command, jobs_cfg):
for model_cfg in jobs_cfg["model"]:
model_path = model_cfg["path"]
- model_paths = glob(model_path) if model_cfg["path_is_pattern"] else [model_path]
- assert model_paths, f"No files found for pattern {model_cfg['path']}"
+ is_pattern = model_cfg.get("path_is_pattern", False)
+ if isinstance(model_path, list):
+ if is_pattern:
+ model_paths = list(chain.from_iterable(glob(p) for p in model_path))
+ else:
+ model_paths = model_path
+ else:
+ model_paths = glob(model_path) if is_pattern else [model_path]
+
+ if not model_paths:
+ raise FileNotFoundError(f"No files found for path/pattern: {model_path}")
+ print(f"Found {len(model_paths)} model(s) for pattern: {model_path}")
for new_path in model_paths:
assert os.path.exists(new_path), f"File or directory {new_path} not found"
@@ -41,7 +51,8 @@ def batch(command, jobs_cfg):
if os.path.isfile(new_path):
new_model_id, _ = os.path.splitext(new_model_id)
- new_model_cfg = model_cfg | {
+ new_model_cfg = {
+ **model_cfg,
"path": new_path,
"id": f"{model_cfg['id']}-{new_model_id.replace('-', '_')}",
}
@@ -102,9 +113,20 @@ def batch(command, jobs_cfg):
"model": model_cfg["path"],
"model_ontology": model_cfg["ontology"],
"model_cfg": model_cfg["cfg"],
- # "image_size": model_cfg.get("image_size", None),
}
)
+
+ if command == "computational_cost":
+ if jobs_cfg["input_type"] == "image":
+ params["image_size"] = model_cfg.get("image_size", [512, 512])
+ elif jobs_cfg["input_type"] == "lidar":
+ params["point_cloud_range"] = model_cfg.get(
+ "point_cloud_range", [-50, -50, -5, 50, 50, 5]
+ )
+ params["num_points"] = model_cfg.get("num_points", 100000)
+ else:
+ raise ValueError(f"Unknown input type: {jobs_cfg['input_type']}")
+
if has_dataset:
dataset_cfg = job_components[1]
params.update(
diff --git a/detectionmetrics/cli/computational_cost.py b/detectionmetrics/cli/computational_cost.py
index 951f8a47..402b24c7 100644
--- a/detectionmetrics/cli/computational_cost.py
+++ b/detectionmetrics/cli/computational_cost.py
@@ -1,7 +1,6 @@
import click
from detectionmetrics import cli
-from detectionmetrics.utils.io import read_json
@click.command(name="computational_cost", help="Estimate model computational cost")
@@ -12,9 +11,7 @@
# model
@click.option(
"--model_format",
- type=click.Choice(
- ["torch", "tensorflow", "tensorflow_explicit"], case_sensitive=False
- ),
+ type=click.Choice(["torch", "tensorflow"], case_sensitive=False),
show_default=True,
default="torch",
help="Trained model format",
@@ -39,14 +36,35 @@
)
@click.option(
"--image_size",
- type=(int, int),
+ nargs=2,
+ type=int,
required=False,
- help="Dummy image size used for computational cost estimation",
+ help="Dummy image size. Should be provided as two integers: width height",
+)
+@click.option(
+ "--point_cloud_range",
+ nargs=6,
+ type=int,
+ required=False,
+ help="Dummy point cloud range (meters). Should be provided as six integers: x_min y_min z_min x_max y_max z_max",
+)
+@click.option(
+ "--num_points",
+ type=int,
+ required=False,
+ help="Number of points for the dummy point cloud (uniformly sampled)",
+)
+@click.option(
+ "--has_intensity",
+ is_flag=True,
+ default=False,
+ help="Whether the dummy point cloud has intensity values",
)
# output
@click.option(
"--out_fname",
type=click.Path(writable=True),
+ required=True,
help="CSV file where the computational cost estimation results will be stored",
)
def computational_cost(
@@ -57,23 +75,46 @@ def computational_cost(
model_ontology,
model_cfg,
image_size,
+ point_cloud_range,
+ num_points,
+ has_intensity,
out_fname,
):
"""Estimate model computational cost"""
-
- if image_size is None:
- parsed_model_cfg = read_json(model_cfg)
- if "image_size" in parsed_model_cfg:
- image_size = parsed_model_cfg["image_size"]
- else:
+ if input_type == "image":
+ if image_size is None:
+ raise ValueError("Image size must be provided for image models")
+ if point_cloud_range is not None or num_points is not None:
+ raise ValueError(
+ "Point cloud range and number of points cannot be provided for image models"
+ )
+ if has_intensity:
+ raise ValueError("Intensity flag cannot be set for image models")
+ params = {"image_size": image_size}
+ elif input_type == "lidar":
+ if point_cloud_range is None or num_points is None:
raise ValueError(
- "Image size must be provided either as an argument or in the model configuration file"
+ "Point cloud range and number of points must be provided for lidar models"
)
+ if image_size is not None:
+ raise ValueError("Image size cannot be provided for lidar models")
+
+ params = {
+ "point_cloud_range": point_cloud_range,
+ "num_points": num_points,
+ "has_intensity": has_intensity,
+ }
+ else:
+ raise ValueError(f"Unknown input type: {input_type}")
model = cli.get_model(
task, input_type, model_format, model, model_ontology, model_cfg
)
- results = model.get_computational_cost(image_size)
+ results = model.get_computational_cost(**params)
results.to_csv(out_fname)
return results
+
+
+if __name__ == "__main__":
+ computational_cost()
diff --git a/detectionmetrics/cli/evaluate.py b/detectionmetrics/cli/evaluate.py
index 4fd23ee0..bba5c0c9 100644
--- a/detectionmetrics/cli/evaluate.py
+++ b/detectionmetrics/cli/evaluate.py
@@ -25,7 +25,7 @@ def parse_split(ctx, param, value):
@click.option(
"--model_format",
type=click.Choice(
- ["torch", "tensorflow", "tensorflow_explicit"], case_sensitive=False
+ ["torch", "tensorflow"], case_sensitive=False
),
show_default=True,
default="torch",
@@ -197,3 +197,7 @@ def evaluate(
results.to_csv(out_fname)
return results
+
+
+if __name__ == "__main__":
+ evaluate()
diff --git a/detectionmetrics/datasets/gaia.py b/detectionmetrics/datasets/gaia.py
index a9ceaaa2..84722038 100644
--- a/detectionmetrics/datasets/gaia.py
+++ b/detectionmetrics/datasets/gaia.py
@@ -23,8 +23,15 @@ def build_dataset(dataset_fname: str) -> Tuple[pd.DataFrame, str, dict]:
dataset_dir = os.path.dirname(dataset_fname)
# Read ontology file
- ontology_fname = dataset.attrs["ontology_fname"]
- ontology = uio.read_json(os.path.join(dataset_dir, ontology_fname))
+ try:
+ ontology_fname = dataset.attrs["ontology_fname"]
+ except KeyError:
+ ontology_fname = "ontology.json"
+
+ ontology_fname = os.path.join(dataset_dir, ontology_fname)
+ assert os.path.isfile(ontology_fname), "Ontology file not found"
+
+ ontology = uio.read_json(ontology_fname)
for name, data in ontology.items():
ontology[name]["rgb"] = tuple(data["rgb"])
diff --git a/detectionmetrics/datasets/goose.py b/detectionmetrics/datasets/goose.py
index b83f6d9b..34615d60 100644
--- a/detectionmetrics/datasets/goose.py
+++ b/detectionmetrics/datasets/goose.py
@@ -16,6 +16,7 @@ def build_dataset(
train_dataset_dir: Optional[str] = None,
val_dataset_dir: Optional[str] = None,
test_dataset_dir: Optional[str] = None,
+ is_goose_ex: bool = False,
) -> Tuple[dict, dict]:
"""Build dataset and ontology dictionaries from GOOSE dataset structure
@@ -31,6 +32,8 @@ def build_dataset(
:type val_dataset_dir: str, optional
:param test_dataset_dir: Directory containing test data, defaults to None
:type test_dataset_dir: str, optional
+ :param is_goose_ex: Whether the dataset is GOOSE Ex or GOOSE, defaults to False
+ :type is_goose_ex: bool, optional
:return: Dataset and onotology
:rtype: Tuple[dict, dict]
"""
@@ -66,13 +69,23 @@ def build_dataset(
train_data = os.path.join(dataset_dir, f"{data_type}/{split}/*/*_{data_suffix}")
for data_fname in glob(train_data):
sample_dir, sample_base_name = os.path.split(data_fname)
- sample_base_name = sample_base_name.split("__")[-1]
+
+ # GOOSE Ex uses a different label file naming convention
+ if is_goose_ex:
+ sample_base_name = "sequence" + sample_base_name.split("_sequence")[-1]
+ else:
+ sample_base_name = sample_base_name.split("__")[-1]
+
sample_base_name = sample_base_name.split("_" + data_suffix)[0]
scene = os.path.split(sample_dir)[-1]
sample_name = f"{scene}-{sample_base_name}"
- label_base_name = f"{scene}__{sample_base_name}_{label_suffix}"
+ if is_goose_ex:
+ label_base_name = f"{scene}_{sample_base_name}_{label_suffix}"
+ else:
+ label_base_name = f"{scene}__{sample_base_name}_{label_suffix}"
+
label_fname = os.path.join(
dataset_dir, "labels", split, scene, label_base_name
)
@@ -131,9 +144,9 @@ def __init__(
class GOOSELiDARSegmentationDataset(dm_segmentation_dataset.LiDARSegmentationDataset):
"""Specific class for GOOSE-styled LiDAR segmentation datasets. All data can be
downloaded from the official webpage (https://goose-dataset.de):
- train -> https://goose-dataset.de/storage/goose_3d_train.zip
- val -> https://goose-dataset.de/storage/goose_3d_val.zip
- test -> https://goose-dataset.de/storage/goose_3d_test.zip
+ train -> https://goose-dataset.de/storage/gooseEx_3d_train.zip
+ val -> https://goose-dataset.de/storage/gooseEx_3d_val.zip
+ test -> https://goose-dataset.de/storage/gooseEx_3d_test.zip
:param train_dataset_dir: Directory containing training data
:type train_dataset_dir: str
@@ -141,6 +154,8 @@ class GOOSELiDARSegmentationDataset(dm_segmentation_dataset.LiDARSegmentationDat
:type val_dataset_dir: str, optional
:param test_dataset_dir: Directory containing test data, defaults to None
:type test_dataset_dir: str, optional
+ :param is_goose_ex: Whether the dataset is GOOSE Ex or GOOSE, defaults to False
+ :type is_goose_ex: bool, optional
"""
def __init__(
@@ -148,14 +163,16 @@ def __init__(
train_dataset_dir: Optional[str] = None,
val_dataset_dir: Optional[str] = None,
test_dataset_dir: Optional[str] = None,
+ is_goose_ex: bool = False,
):
dataset, ontology = build_dataset(
"lidar",
- "vls128.bin",
+ "pcl.bin" if is_goose_ex else "vls128.bin",
"goose.label",
train_dataset_dir,
val_dataset_dir,
test_dataset_dir,
+ is_goose_ex=is_goose_ex,
)
# Convert to Pandas
diff --git a/detectionmetrics/datasets/segmentation.py b/detectionmetrics/datasets/segmentation.py
index 2c199aae..7a74ef53 100644
--- a/detectionmetrics/datasets/segmentation.py
+++ b/detectionmetrics/datasets/segmentation.py
@@ -12,6 +12,7 @@
from detectionmetrics.datasets.perception import PerceptionDataset
import detectionmetrics.utils.io as uio
import detectionmetrics.utils.conversion as uc
+import detectionmetrics.utils.lidar as ul
class SegmentationDataset(PerceptionDataset):
@@ -68,7 +69,7 @@ def export(
outdir: str,
new_ontology: Optional[dict] = None,
ontology_translation: Optional[dict] = None,
- ignored_classes: Optional[List[str]] = None,
+ classes_to_remove: Optional[List[str]] = None,
resize: Optional[Tuple[int, int]] = None,
include_label_count: bool = True,
):
@@ -80,8 +81,8 @@ def export(
:type new_ontology: dict
:param ontology_translation: Ontology translation dictionary, defaults to None
:type ontology_translation: Optional[dict], optional
- :param ignored_classes: Classes to ignore from the old ontology, defaults to []
- :type ignored_classes: Optional[List[str]], optional
+ :param classes_to_remove: Classes to remove from the old ontology, defaults to []
+ :type classes_to_remove: Optional[List[str]], optional
:param resize: Resize images and labels to the given dimensions, defaults to None
:type resize: Optional[Tuple[int, int]], optional
:param include_label_count: Whether to include class weights in the dataset, defaults to True
@@ -104,7 +105,8 @@ def export(
old_ontology=self.ontology,
new_ontology=new_ontology,
ontology_translation=ontology_translation,
- ignored_classes=ignored_classes,
+ classes_to_remove=classes_to_remove,
+ lut_dtype=np.uint32,
)
n_classes = max(c["idx"] for c in new_ontology.values()) + 1
else:
@@ -166,7 +168,7 @@ def export(
# Convert label to new ontology if needed
if ontology_conversion_lut is not None:
- label = ontology_conversion_lut[label]
+ label = ontology_conversion_lut[label].astype(np.uint8)
# Resize label if needed
if resize is not None:
@@ -254,6 +256,8 @@ class LiDARSegmentationDataset(SegmentationDataset):
:type ontology: dict
:param is_kitti_format: Whether the linked files in the dataset are stored in SemanticKITTI format or not, defaults to True
:type is_kitti_format: bool, optional
+ :param has_intensity: Whether the point cloud files contain intensity values, defaults to True
+ :type has_intensity: bool, optional
"""
def __init__(
@@ -262,9 +266,11 @@ def __init__(
dataset_dir: str,
ontology: dict,
is_kitti_format: bool = True,
+ has_intensity: bool = True,
):
super().__init__(dataset, dataset_dir, ontology)
self.is_kitti_format = is_kitti_format
+ self.has_intensity = has_intensity
def make_fname_global(self):
"""Get all relative filenames in dataset and make global"""
@@ -282,7 +288,9 @@ def export(
outdir: str,
new_ontology: Optional[dict] = None,
ontology_translation: Optional[dict] = None,
- ignored_classes: Optional[List[str]] = [],
+ classes_to_remove: Optional[List[str]] = [],
+ include_label_count: bool = True,
+ remove_origin: bool = False,
):
"""Export dataset dataframe and LiDAR files in SemanticKITTI format. Optionally, modify ontology before exporting.
@@ -292,8 +300,12 @@ def export(
:type new_ontology: dict
:param ontology_translation: Ontology translation dictionary, defaults to None
:type ontology_translation: Optional[dict], optional
- :param ignored_classes: Classes to ignore from the old ontology, defaults to []
- :type ignored_classes: Optional[List[str]], optional
+ :param classes_to_remove: Classes to remove from the old ontology, defaults to []
+ :type classes_to_remove: Optional[List[str]], optional
+ :param include_label_count: Whether to include class weights in the dataset, defaults to True
+ :type include_label_count: bool, optional
+ :param remove_origin: Whether to remove the origin from the point cloud (mostly for removing RELLIS-3D spurious points), defaults to False
+ :type remove_origin: bool, optional
"""
os.makedirs(outdir, exist_ok=True)
@@ -302,14 +314,25 @@ def export(
if ontology_translation is not None and new_ontology is None:
raise ValueError("New ontology must be provided")
+ # Create ontology conversion lookup table if needed and get number of classes
ontology_conversion_lut = None
if new_ontology is not None:
ontology_conversion_lut = uc.get_ontology_conversion_lut(
old_ontology=self.ontology,
new_ontology=new_ontology,
ontology_translation=ontology_translation,
- ignored_classes=ignored_classes,
+ classes_to_remove=classes_to_remove,
)
+ n_classes = max(c["idx"] for c in new_ontology.values()) + 1
+ else:
+ n_classes = max(c["idx"] for c in self.ontology.values()) + 1
+
+ # Check if label count is missing and create empty array if needed
+ label_count_missing = include_label_count and (
+ not self.has_label_count or new_ontology is not None or remove_origin
+ )
+ if label_count_missing:
+ label_count = np.zeros(n_classes, dtype=np.uint64)
pbar = tqdm(self.dataset.iterrows())
@@ -334,23 +357,51 @@ def export(
label_fname = os.path.join(self.dataset_dir, label_fname)
# If format is not appropriate: read, convert, and rewrite sample
- if not self.is_kitti_format or ontology_conversion_lut is not None:
+ if (
+ not self.is_kitti_format
+ or ontology_conversion_lut is not None
+ or label_count_missing
+ or remove_origin
+ ):
points = self.read_points(points_fname)
- label, _ = self.read_label(label_fname)
+ label = self.read_label(label_fname)
+
+ # Convert label to new ontology if needed
if ontology_conversion_lut is not None:
- label = ontology_conversion_lut[label]
+ label = ontology_conversion_lut[label].astype(np.uint32)
+
+ # Remove points in coordinate origin if needed
+ if remove_origin:
+ mask = np.all(points[:, :3] != 0, axis=1)
+ points = points[mask]
+ label = label[mask]
+
points.tofile(os.path.join(outdir, rel_points_fname))
label.tofile(os.path.join(outdir, rel_label_fname))
+
+ indices, counts = np.unique(label, return_counts=True)
+ label_count[indices] += counts.astype(np.uint64)
else:
- shutil.copy2(points_fname, os.path.join(outdir, rel_points_fname))
- shutil.copy2(label_fname, os.path.join(outdir, rel_label_fname))
+ new_points_fname = os.path.join(outdir, rel_points_fname)
+ new_label_fname = os.path.join(outdir, rel_label_fname)
+ try:
+ shutil.copy2(points_fname, new_points_fname)
+ shutil.copy2(label_fname, new_label_fname)
+ except shutil.SameFileError:
+ pass # Source and destination are the same file
self.dataset.at[sample_name, "points"] = rel_points_fname
self.dataset.at[sample_name, "label"] = rel_label_fname
+ # Update dataset directory and ontology if needed
self.dataset_dir = outdir
+ self.ontology = new_ontology if new_ontology is not None else self.ontology
# Write ontology and store relative path in dataset attributes
+ if label_count_missing:
+ for class_data in self.ontology.values():
+ class_data["label_count"] = int(label_count[class_data["idx"]])
+
ontology_fname = "ontology.json"
self.dataset.attrs = {"ontology_fname": ontology_fname}
uio.write_json(os.path.join(outdir, ontology_fname), self.ontology)
@@ -358,29 +409,23 @@ def export(
# Store dataset as Parquet file containing relative filenames
self.dataset.to_parquet(os.path.join(outdir, "dataset.parquet"))
- @staticmethod
- def read_points(fname: str) -> np.ndarray:
- """Read points from a binary file in SemanticKITTI format
+ def read_points(self, fname: str) -> np.ndarray:
+ """Read point cloud. Defaults to SemanticKITTI format
- :param fname: Binary file containing points
+ :param fname: File containing point cloud
:type fname: str
:return: Numpy array containing points
:rtype: np.ndarray
"""
- points = np.fromfile(fname, dtype=np.float32)
- return points.reshape((-1, 4))
+ return ul.read_semantickitti_points(fname, self.has_intensity)
- @staticmethod
- def read_label(fname: str) -> Tuple[np.ndarray, np.ndarray]:
- """Read labels from a binary file in SemanticKITTI format
+ def read_label(self, fname: str) -> Tuple[np.ndarray, np.ndarray]:
+ """Read semantic labels. Defaults to SemanticKITTI format
:param fname: Binary file containing labels
:type fname: str
- :return: Numpy arrays containing semantic and instance labels
- :rtype: Tuple[np.ndarray, np.ndarray]
+ :return: Numpy arrays containing semantic labels
+ :rtype: np.ndarray
"""
- label = np.fromfile(fname, dtype=np.uint32)
- label = label.reshape((-1))
- semantic_label = label & 0xFFFF
- instance_label = label >> 16
- return semantic_label.astype(np.int32), instance_label.astype(np.int32)
+ label, _ = ul.read_semantickitti_label(fname)
+ return label
diff --git a/detectionmetrics/datasets/wildscenes.py b/detectionmetrics/datasets/wildscenes.py
index a2dce7a3..a95a21c2 100644
--- a/detectionmetrics/datasets/wildscenes.py
+++ b/detectionmetrics/datasets/wildscenes.py
@@ -8,61 +8,17 @@
from detectionmetrics.datasets import segmentation as dm_segmentation_dataset
-# Ontology definition as found in the official repo (https://github.com/csiro-robotics/WildScenes/blob/main/wildscenes/tools/utils2d.py)
-METAINFO = {
- "classes": (
- "unlabelled",
- "asphalt",
- "dirt",
- "mud",
- "water",
- "gravel",
- "other-terrain",
- "tree-trunk",
- "tree-foliage",
- "bush",
- "fence",
- "structure",
- "pole",
- "vehicle",
- "rock",
- "log",
- "other-object",
- "sky",
- "grass",
- ),
- "palette": [
- (0, 0, 0),
- (255, 165, 0),
- (60, 180, 75),
- (255, 225, 25),
- (0, 130, 200),
- (145, 30, 180),
- (70, 240, 240),
- (240, 50, 230),
- (210, 245, 60),
- (230, 25, 75),
- (0, 128, 128),
- (170, 110, 40),
- (255, 250, 200),
- (128, 0, 0),
- (170, 255, 195),
- (128, 128, 0),
- (250, 190, 190),
- (0, 0, 128),
- (128, 128, 128),
- ],
- "cidx": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
-}
-
-
-def build_dataset(dataset_dir: str, split_fnames: dict) -> Tuple[dict, dict]:
+def build_dataset(
+ dataset_dir: str, split_fnames: dict, ontology: dict
+) -> Tuple[dict, dict]:
"""Build dataset and ontology dictionaries from Wildscenes dataset structure
:param dataset_dir: Directory where both RGB images and annotations have been extracted to
:type dataset_dir: str
:param split_fnames: Dictionary that contains the paths where train, val, and test split files (.csv) have been extracted to
:type split_dir: str
+ :param ontology: Ontology definition as found in the official repo
+ :type ontology: dict
:return: Dataset and onotology
:rtype: Tuple[dict, dict]
"""
@@ -75,10 +31,10 @@ def build_dataset(dataset_dir: str, split_fnames: dict) -> Tuple[dict, dict]:
assert os.path.isfile(split_fname), f"{split_fname} split file not found"
# Load and adapt ontology
- ontology = {}
- ontology_iter = zip(METAINFO["classes"], METAINFO["palette"], METAINFO["cidx"])
+ parsed_ontology = {}
+ ontology_iter = zip(ontology["classes"], ontology["palette"], ontology["cidx"])
for name, color, idx in ontology_iter:
- ontology[name] = {"idx": idx, "rgb": color}
+ parsed_ontology[name] = {"idx": idx, "rgb": color}
# Get samples filenames
train_split = pd.read_csv(split_fnames["train"])
@@ -92,6 +48,9 @@ def build_dataset(dataset_dir: str, split_fnames: dict) -> Tuple[dict, dict]:
samples_data = pd.concat([train_split, val_split, test_split])
+ if "hist_path" in samples_data.columns:
+ samples_data = samples_data.drop(columns=["hist_path"])
+
# Build dataset as ordered python dictionary
dataset = OrderedDict()
skipped_samples = []
@@ -120,20 +79,20 @@ def build_dataset(dataset_dir: str, split_fnames: dict) -> Tuple[dict, dict]:
for sample_name in skipped_samples:
print(f"\n\t{sample_name}")
- return dataset, ontology
+ return dataset, parsed_ontology
class WildscenesImageSegmentationDataset(
dm_segmentation_dataset.ImageSegmentationDataset
):
"""Specific class for Wildscenes-styled image segmentation datasets. All data can
- be downloaded from the official repo (https://github.com/unmannedlab/RELLIS-3D):
+ be downloaded from the official repo:
dataset -> https://data.csiro.au/collection/csiro:61541
split -> https://github.com/csiro-robotics/WildScenes/tree/main/data/splits/opt2d
:param dataset_dir: Directory where dataset images and labels are stored (Wildscenes2D)
:type dataset_dir: str
- :param split_dir: Directory where train, val, and test files (.csv) have been extracted to (data/splits/opt2d from the official repo)
+ :param split_dir: Directory where train, val, and test files (.csv) have been extracted to
:type split_dir: str
"""
@@ -143,7 +102,54 @@ def __init__(self, dataset_dir: str, split_dir: str):
"val": os.path.join(split_dir, "val.csv"),
"test": os.path.join(split_dir, "test.csv"),
}
- dataset, ontology = build_dataset(dataset_dir, split_fnames)
+
+ # Ontology definition as found in the official repo (https://github.com/csiro-robotics/WildScenes/blob/main/wildscenes/tools/utils2d.py)
+ METAINFO = {
+ "classes": (
+ "unlabelled",
+ "asphalt",
+ "dirt",
+ "mud",
+ "water",
+ "gravel",
+ "other-terrain",
+ "tree-trunk",
+ "tree-foliage",
+ "bush",
+ "fence",
+ "structure",
+ "pole",
+ "vehicle",
+ "rock",
+ "log",
+ "other-object",
+ "sky",
+ "grass",
+ ),
+ "palette": [
+ (0, 0, 0),
+ (255, 165, 0),
+ (60, 180, 75),
+ (255, 225, 25),
+ (0, 130, 200),
+ (145, 30, 180),
+ (70, 240, 240),
+ (240, 50, 230),
+ (210, 245, 60),
+ (230, 25, 75),
+ (0, 128, 128),
+ (170, 110, 40),
+ (255, 250, 200),
+ (128, 0, 0),
+ (170, 255, 195),
+ (128, 128, 0),
+ (250, 190, 190),
+ (0, 0, 128),
+ (128, 128, 128),
+ ],
+ "cidx": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ }
+ dataset, ontology = build_dataset(dataset_dir, split_fnames, METAINFO)
# Convert to Pandas
cols = ["image", "label", "scene", "split"]
@@ -151,3 +157,72 @@ def __init__(self, dataset_dir: str, split_dir: str):
dataset.attrs = {"ontology": ontology}
super().__init__(dataset, dataset_dir, ontology)
+
+
+class WildscenesLiDARSegmentationDataset(dm_dataset.LiDARSegmentationDataset):
+ """Specific class for Wildscenes-styled LiDAR segmentation datasets. All data can
+ be downloaded from the official repo:
+ dataset -> https://data.csiro.au/collection/csiro:61541
+ split -> https://github.com/csiro-robotics/WildScenes/tree/main/data/splits/opt3d
+
+ :param dataset_dir: Directory where dataset images and labels are stored (Wildscenes3D)
+ :type dataset_dir: str
+ :param split_dir: Directory where train, val, and test files (.csv) have been extracted to
+ :type split_dir: str
+ """
+
+ def __init__(self, dataset_dir: str, split_dir: str):
+ split_fnames = {
+ "train": os.path.join(split_dir, "train.csv"),
+ "val": os.path.join(split_dir, "val.csv"),
+ "test": os.path.join(split_dir, "test.csv"),
+ }
+
+ # Ontology definition as found in the official repo (https://github.com/csiro-robotics/WildScenes/blob/main/wildscenes/tools/utils3d.py)
+ METAINFO = {
+ "classes": (
+ "unlabelled",
+ "bush",
+ "dirt",
+ "fence",
+ "grass",
+ "gravel",
+ "log",
+ "mud",
+ "other-object",
+ "other-terrain",
+ "rock",
+ "sky",
+ "structure",
+ "tree-foliage",
+ "tree-trunk",
+ "water",
+ ),
+ "palette": [
+ (0, 0, 0),
+ (230, 25, 75),
+ (60, 180, 75),
+ (0, 128, 128),
+ (128, 128, 128),
+ (145, 30, 180),
+ (128, 128, 0),
+ (255, 225, 25),
+ (250, 190, 190),
+ (70, 240, 240),
+ (170, 255, 195),
+ (0, 0, 128),
+ (170, 110, 40),
+ (210, 245, 60),
+ (240, 50, 230),
+ (0, 130, 200),
+ ],
+ "cidx": [255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ }
+ dataset, ontology = build_dataset(dataset_dir, split_fnames, METAINFO)
+
+ # Convert to Pandas
+ cols = ["points", "label", "scene", "split"]
+ dataset = pd.DataFrame.from_dict(dataset, orient="index", columns=cols)
+ dataset.attrs = {"ontology": ontology}
+
+ super().__init__(dataset, dataset_dir, ontology, has_intensity=False)
diff --git a/detectionmetrics/models/perception.py b/detectionmetrics/models/perception.py
index f78b1472..e6ece062 100644
--- a/detectionmetrics/models/perception.py
+++ b/detectionmetrics/models/perception.py
@@ -47,6 +47,7 @@ def __init__(
self.ontology = uio.read_json(ontology_fname)
self.model_cfg = uio.read_json(model_cfg)
self.n_classes = len(self.ontology)
+ self.model_cfg["n_classes"] = self.n_classes
@abstractmethod
def inference(
@@ -90,6 +91,6 @@ def get_lut_ontology(
dataset_ontology,
self.ontology,
ontology_translation,
- self.model_cfg.get("ignored_classes", []),
+ classes_to_remove=self.model_cfg.get("classes_to_remove", None),
)
return lut_ontology
diff --git a/detectionmetrics/models/segmentation.py b/detectionmetrics/models/segmentation.py
index 66a4b141..af01932a 100644
--- a/detectionmetrics/models/segmentation.py
+++ b/detectionmetrics/models/segmentation.py
@@ -1,6 +1,6 @@
from abc import ABC, abstractmethod
import os
-from typing import Any, List, Optional, Union
+from typing import Any, List, Optional, Tuple, Union
import numpy as np
import pandas as pd
@@ -39,24 +39,36 @@ def __init__(
super().__init__(model, model_type, model_cfg, ontology_fname, model_fname)
@abstractmethod
- def inference(
- self, points: Union[np.ndarray, Image.Image]
+ def predict(
+ self, data: Union[np.ndarray, Image.Image]
) -> Union[np.ndarray, Image.Image]:
- """Perform inference for a single image or point cloud
+ """Perform prediction for a single data sample
- :param image: Either a numpy array (LiDAR point cloud) or a PIL image
- :type image: Union[np.ndarray, Image.Image]
- :return: Segmenation result as a point cloud or image with label indices
+ :param data: Input data sample (image or point cloud)
+ :type data: Union[np.ndarray, Image.Image]
+ :return: Prediction result
:rtype: Union[np.ndarray, Image.Image]
"""
raise NotImplementedError
+ @abstractmethod
+ def predict(self, tensor_in):
+ """Perform inference for a tensor
+
+ :param tensor_in: Input tensor (image or point cloud)
+ :type tensor_in: Either tf.Tensor or torch.Tensor
+ :return: Segmenation result as a tensor
+ :rtype: Either tf.Tensor or torch.Tensor
+ """
+ raise NotImplementedError
+
@abstractmethod
def eval(
self,
dataset: dm_segentation_dataset.SegmentationDataset,
split: str | List[str] = "test",
ontology_translation: Optional[str] = None,
+ translations_direction: str = "dataset_to_model",
predictions_outdir: Optional[str] = None,
results_per_sample: bool = False,
) -> pd.DataFrame:
@@ -65,9 +77,11 @@ def eval(
:param dataset: Segmentation dataset for which the evaluation will be performed
:type dataset: ImageSegmentationDataset
:param split: Split or splits to be used from the dataset, defaults to "test"
- :type split: str | List[str], optional
+ :type split: Union[str, List[str]], optional
:param ontology_translation: JSON file containing translation between dataset and model output ontologies
:type ontology_translation: str, optional
+ :param translations_direction: Direction of the ontology translation. Either "dataset_to_model" or "model_to_dataset", defaults to "dataset_to_model"
+ :type translations_direction: str, optional
:param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved.
:type predictions_outdir: Optional[str], optional
:param results_per_sample: Whether to store results per sample or not, defaults to False. If True, predictions_outdir must be provided.
@@ -104,13 +118,17 @@ def __init__(
super().__init__(model, model_type, model_cfg, ontology_fname, model_fname)
@abstractmethod
- def inference(self, image: Image.Image) -> Image.Image:
- """Perform inference for a single image
+ def predict(
+ self, image: Image.Image, return_sample: bool = False
+ ) -> Union[Image.Image, Tuple[Image.Image, Any]]:
+ """Perform prediction for a single image
- :param image: PIL image.
+ :param image: PIL image
:type image: Image.Image
- :return: Segmenation result as PIL image
- :rtype: Image.Image
+ :param return_sample: Whether to return the sample data along with predictions, defaults to False
+ :type return_sample: bool, optional
+ :return: Segmentation result as a PIL image or a tuple with the segmentation result and the input sample tensor
+ :rtype: Union[Image.Image, Tuple[Image.Image, Any]]
"""
raise NotImplementedError
@@ -120,6 +138,7 @@ def eval(
dataset: dm_segentation_dataset.ImageSegmentationDataset,
split: str | List[str] = "test",
ontology_translation: Optional[str] = None,
+ translations_direction: str = "dataset_to_model",
predictions_outdir: Optional[str] = None,
results_per_sample: bool = False,
) -> pd.DataFrame:
@@ -128,9 +147,11 @@ def eval(
:param dataset: Image segmentation dataset for which the evaluation will be performed
:type dataset: ImageSegmentationDataset
:param split: Split or splits to be used from the dataset, defaults to "test"
- :type split: str | List[str], optional
+ :type split: Union[str, List[str]], optional
:param ontology_translation: JSON file containing translation between dataset and model output ontologies
:type ontology_translation: str, optional
+ :param translations_direction: Direction of the ontology translation. Either "dataset_to_model" or "model_to_dataset", defaults to "dataset_to_model"
+ :type translations_direction: str, optional
:param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved.
:type predictions_outdir: Optional[str], optional
:param results_per_sample: Whether to store results per sample or not, defaults to False. If True, predictions_outdir must be provided.
@@ -140,6 +161,25 @@ def eval(
"""
raise NotImplementedError
+ @abstractmethod
+ def get_computational_cost(
+ self,
+ image_size: Tuple[int] = None,
+ runs: int = 30,
+ warm_up_runs: int = 5,
+ ) -> dict:
+ """Get different metrics related to the computational cost of the model
+
+ :param image_size: Image size used for inference
+ :type image_size: Tuple[int], optional
+ :param runs: Number of runs to measure inference time, defaults to 30
+ :type runs: int, optional
+ :param warm_up_runs: Number of warm-up runs, defaults to 5
+ :type warm_up_runs: int, optional
+ :return: Dictionary containing computational cost information
+ """
+ raise NotImplementedError
+
class LiDARSegmentationModel(SegmentationModel):
"""Parent LiDAR segmentation model class
@@ -167,13 +207,22 @@ def __init__(
super().__init__(model, model_type, model_cfg, ontology_fname, model_fname)
@abstractmethod
- def inference(self, points: np.ndarray) -> np.ndarray:
- """Perform inference for a single image
+ def predict(
+ self,
+ points_fname: str,
+ has_intensity: bool = True,
+ return_sample: bool = False,
+ ) -> Union[np.ndarray, Tuple[np.ndarray, Any]]:
+ """Perform prediction for a single point cloud
- :param image: Point cloud xyz array
- :type image: np.ndarray
- :return: Segmenation result as a point cloud with label indices
- :rtype: np.ndarray
+ :param points_fname: Point cloud in SemanticKITTI .bin format
+ :type points_fname: str
+ :param has_intensity: Whether the point cloud has intensity values, defaults to True
+ :type has_intensity: bool, optional
+ :param return_sample: Whether to return the sample data along with predictions, defaults to False
+ :type return_sample: bool, optional
+ :return: Segmentation result as a numpy array or a tuple with the segmentation result and the input sample data
+ :rtype: Union[np.ndarray, Tuple[np.ndarray, Any]]
"""
raise NotImplementedError
@@ -183,6 +232,7 @@ def eval(
dataset: dm_segentation_dataset.LiDARSegmentationDataset,
split: str | List[str] = "test",
ontology_translation: Optional[str] = None,
+ translations_direction: str = "dataset_to_model",
predictions_outdir: Optional[str] = None,
results_per_sample: bool = False,
) -> pd.DataFrame:
@@ -191,9 +241,11 @@ def eval(
:param dataset: LiDAR segmentation dataset for which the evaluation will be performed
:type dataset: LiDARSegmentationDataset
:param split: Split or splits to be used from the dataset, defaults to "test"
- :type split: str | List[str], optional
+ :type split: Union[str, List[str]], optional
:param ontology_translation: JSON file containing translation between dataset and model output ontologies
:type ontology_translation: str, optional
+ :param translations_direction: Direction of the ontology translation. Either "dataset_to_model" or "model_to_dataset", defaults to "dataset_to_model"
+ :type translations_direction: str, optional
:param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved.
:type predictions_outdir: Optional[str], optional
:param results_per_sample: Whether to store results per sample or not, defaults to False. If True, predictions_outdir must be provided.
@@ -202,3 +254,15 @@ def eval(
:rtype: pd.DataFrame
"""
raise NotImplementedError
+
+ @abstractmethod
+ def get_computational_cost(self, runs: int = 30, warm_up_runs: int = 5) -> dict:
+ """Get different metrics related to the computational cost of the model
+
+ :param runs: Number of runs to measure inference time, defaults to 30
+ :type runs: int, optional
+ :param warm_up_runs: Number of warm-up runs, defaults to 5
+ :type warm_up_runs: int, optional
+ :return: Dictionary containing computational cost information
+ """
+ raise NotImplementedError
diff --git a/detectionmetrics/models/tensorflow.py b/detectionmetrics/models/tensorflow.py
index a1e25012..38b33130 100644
--- a/detectionmetrics/models/tensorflow.py
+++ b/detectionmetrics/models/tensorflow.py
@@ -13,74 +13,12 @@
from detectionmetrics.datasets.segmentation import ImageSegmentationDataset
from detectionmetrics.models.segmentation import ImageSegmentationModel
+import detectionmetrics.utils.conversion as uc
+import detectionmetrics.utils.io as uio
import detectionmetrics.utils.segmentation_metrics as um
-tf.config.optimizer.set_experimental_options({"layout_optimizer": False})
-
-
-def get_computational_cost(
- model: tf.Module,
- dummy_input: tf.Tensor,
- model_fname: Optional[str] = None,
- runs: int = 30,
- warm_up_runs: int = 5,
-) -> dict:
- """Get different metrics related to the computational cost of the model
-
- :param model: Loaded TensorFlow SavedModel
- :type model: tf.Module
- :param dummy_input: Dummy input data for the model
- :type dummy_input: tf.Tensor
- :param model_fname: Model filename used to measure model size, defaults to None
- :type model_fname: Optional[str], optional
- :param runs: Number of runs to measure inference time, defaults to 30
- :type runs: int, optional
- :param warm_up_runs: Number of warm-up runs, defaults to 5
- :type warm_up_runs: int, optional
- :return: DataFrame containing computational cost information
- :rtype: pd.DataFrame
- """
- # Get model size (if possible) and number of parameters
- if model_fname is not None:
- size_mb = sum(
- os.path.getsize(os.path.join(dirpath, f))
- for dirpath, _, files in os.walk(model_fname)
- for f in files
- )
- size_mb /= 1024**2
- else:
- size_mb = None
-
- n_params = sum(np.prod(var.shape) for var in model.variables.variables)
-
- # Measure inference time with GPU synchronization
- infer = model.signatures["serving_default"]
- for _ in range(warm_up_runs):
- _ = infer(dummy_input)
-
- has_gpu = bool(tf.config.list_physical_devices("GPU"))
- inference_times = []
-
- for _ in range(runs):
- if has_gpu:
- tf.config.experimental.set_synchronous_execution(True)
-
- start_time = time.time()
- _ = infer(dummy_input)
- if has_gpu:
- tf.config.experimental.set_synchronous_execution(True)
-
- inference_times.append(time.time() - start_time)
-
- # Retrieve computational cost information
- result = {
- "input_shape": ["x".join(map(str, dummy_input.shape.as_list()))],
- "n_params": [int(n_params)],
- "size_mb": [size_mb],
- "inference_time_s": [np.mean(inference_times)],
- }
- return pd.DataFrame.from_dict(result)
+tf.config.optimizer.set_experimental_options({"layout_optimizer": False})
def resize_image(
@@ -361,33 +299,53 @@ def t_in(image):
tf.argmax(tf.squeeze(x), axis=2).numpy().astype(np.uint8)
)
- def inference(self, image: Image.Image) -> Image.Image:
- """Perform inference for a single image
+ def predict(
+ self, image: Image.Image, return_sample: bool = False
+ ) -> Union[Image.Image, Tuple[Image.Image, tf.Tensor]]:
+ """Perform prediction for a single image
:param image: PIL image
:type image: Image.Image
- :return: segmenation result as PIL image
- :rtype: Image.Image
+ :param return_sample: Whether to return the sample data along with predictions, defaults to False
+ :type return_sample: bool, optional
+ :return: Segmentation result as a PIL image or a tuple with the segmentation result and the input sample tensor
+ :rtype: Union[Image.Image, Tuple[Image.Image, tf.Tensor]]
"""
- tensor = self.t_in(image)
+ sample = self.t_in(image)
+ result = self.inference(sample)
+ result = self.t_out(result)
+
+ if return_sample:
+ return result, sample
+ else:
+ return result
+ def inference(self, tensor_in: tf.Tensor) -> tf.Tensor:
+ """Perform inference for a tensor
+
+ :param tensor_in: Input point cloud tensor
+ :type tensor_in: tf.Tensor
+ :return: Segmentation result as tensor
+ :rtype: tf.Tensor
+ """
if self.model_type == "native":
- result = self.model(tensor)
+ tensor_out = self.model(tensor_in, training=False)
elif self.model_type == "compiled":
- result = self.model.signatures["serving_default"](tensor)
+ tensor_out = self.model.signatures["serving_default"](tensor_in)
else:
raise ValueError("Model type not recognized")
- if isinstance(result, dict):
- result = list(result.values())[0]
+ if isinstance(tensor_out, dict):
+ tensor_out = list(tensor_out.values())[0]
- return self.t_out(result)
+ return tensor_out
def eval(
self,
dataset: ImageSegmentationDataset,
- split: str | List[str] = "test",
+ split: Union[str, List[str]] = "test",
ontology_translation: Optional[str] = None,
+ translations_direction: str = "dataset_to_model",
predictions_outdir: Optional[str] = None,
results_per_sample: bool = False,
) -> pd.DataFrame:
@@ -396,9 +354,11 @@ def eval(
:param dataset: Image segmentation dataset for which the evaluation will be performed
:type dataset: ImageSegmentationDataset
:param split: Split to be used from the dataset, defaults to "test"
- :type split: str | List[str], optional
+ :type split: Union[str, List[str]], optional
:param ontology_translation: JSON file containing translation between dataset and model output ontologies
:type ontology_translation: str, optional
+ :param translations_direction: Direction of the ontology translation. Either "dataset_to_model" or "model_to_dataset", defaults to "dataset_to_model"
+ :type translations_direction: str, optional
:param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved.
:type predictions_outdir: Optional[str], optional
:param results_per_sample: Whether to store results per sample or not, defaults to False. If True, predictions_outdir must be provided.
@@ -417,8 +377,23 @@ def eval(
os.makedirs(predictions_outdir, exist_ok=True)
# Build a LUT for transforming ontology if needed
- lut_ontology = self.get_lut_ontology(dataset.ontology, ontology_translation)
- dataset_ontology = dataset.ontology
+ eval_ontology = self.ontology
+
+ if ontology_translation is not None:
+ ontology_translation = uio.read_json(ontology_translation)
+ if translations_direction == "dataset_to_model":
+ lut_ontology = uc.get_ontology_conversion_lut(
+ dataset.ontology, self.ontology, ontology_translation
+ )
+ else:
+ eval_ontology = dataset.ontology
+ lut_ontology = uc.get_ontology_conversion_lut(
+ self.ontology, dataset.ontology, ontology_translation
+ )
+ else:
+ lut_ontology = None
+
+ n_classes = len(eval_ontology)
# Get Tensorflow dataset
dataset = ImageSegmentationTensorflowDataset(
@@ -427,7 +402,9 @@ def eval(
crop=self.model_cfg.get("crop", None),
batch_size=self.model_cfg.get("batch_size", 1),
splits=[split] if isinstance(split, str) else split,
- lut_ontology=lut_ontology,
+ lut_ontology=(
+ lut_ontology if translations_direction == "dataset_to_model" else None
+ ),
normalization=self.model_cfg.get("normalization", None),
keep_aspect=self.model_cfg.get("keep_aspect", False),
)
@@ -435,25 +412,17 @@ def eval(
# Retrieve ignored label indices
ignored_label_indices = []
for ignored_class in self.model_cfg.get("ignored_classes", []):
- ignored_label_indices.append(dataset_ontology[ignored_class]["idx"])
+ ignored_label_indices.append(eval_ontology[ignored_class]["idx"])
# Init metrics
- metrics_factory = um.SegmentationMetricsFactory(self.n_classes)
+ metrics_factory = um.SegmentationMetricsFactory(n_classes)
# Evaluation loop
pbar = tqdm(dataset.dataset)
for idx, image, label in pbar:
idx = idx.numpy()
- if self.model_type == "native":
- pred = self.model(image, training=False)
- elif self.model_type == "compiled":
- pred = self.model.signatures["serving_default"](image)
- else:
- raise ValueError("Model type not recognized")
-
- if isinstance(pred, dict):
- pred = list(pred.values())[0]
+ pred = self.inference(image)
# Get valid points masks depending on ignored label indices
if ignored_label_indices:
@@ -469,6 +438,13 @@ def eval(
if valid_mask is not None:
valid_mask = tf.squeeze(valid_mask, axis=3).numpy()
+ # Convert predictions to dataset ontology if needed
+ if (
+ lut_ontology is not None
+ and translations_direction == "model_to_dataset"
+ ):
+ pred = lut_ontology[pred]
+
metrics_factory.update(pred, label, valid_mask)
# Store predictions and results per sample if required
@@ -481,16 +457,16 @@ def eval(
sample_valid_mask = (
valid_mask[i] if valid_mask is not None else None
)
- sample_mf = um.SegmentationMetricsFactory(n_classes=self.n_classes)
+ sample_mf = um.SegmentationMetricsFactory(n_classes)
sample_mf.update(sample_pred, sample_label, sample_valid_mask)
- sample_df = um.get_metrics_dataframe(sample_mf, self.ontology)
+ sample_df = um.get_metrics_dataframe(sample_mf, eval_ontology)
sample_df.to_csv(
os.path.join(predictions_outdir, f"{sample_idx}.csv")
)
pred = Image.fromarray(np.squeeze(pred).astype(np.uint8))
pred.save(os.path.join(predictions_outdir, f"{sample_idx}.png"))
- return um.get_metrics_dataframe(metrics_factory, self.ontology)
+ return um.get_metrics_dataframe(metrics_factory, eval_ontology)
def get_computational_cost(
self,
@@ -508,7 +484,46 @@ def get_computational_cost(
:type warm_up_runs: int, optional
:return: Dictionary containing computational cost information
"""
+ # Generate dummy input
dummy_input = tf.random.normal([1, *image_size, 3])
- return get_computational_cost(
- self.model, dummy_input, self.model_fname, runs, warm_up_runs
- )
+
+ # Get model size (if possible) and number of parameters
+ if self.model_fname is not None:
+ size_mb = sum(
+ os.path.getsize(os.path.join(dirpath, f))
+ for dirpath, _, files in os.walk(self.model_fname)
+ for f in files
+ )
+ size_mb /= 1024**2
+ else:
+ size_mb = None
+
+ n_params = sum(np.prod(var.shape) for var in self.model.variables.variables)
+
+ # Measure inference time with GPU synchronization
+ for _ in range(warm_up_runs):
+ self.inference(dummy_input)
+
+ has_gpu = bool(tf.config.list_physical_devices("GPU"))
+ inference_times = []
+
+ for _ in range(runs):
+ if has_gpu:
+ tf.config.experimental.set_synchronous_execution(True)
+
+ start_time = time.time()
+ self.inference(dummy_input)
+
+ if has_gpu:
+ tf.config.experimental.set_synchronous_execution(True)
+
+ inference_times.append(time.time() - start_time)
+
+ # Retrieve computational cost information
+ result = {
+ "input_shape": ["x".join(map(str, dummy_input.shape.as_list()))],
+ "n_params": [int(n_params)],
+ "size_mb": [size_mb],
+ "inference_time_s": [np.mean(inference_times)],
+ }
+ return pd.DataFrame.from_dict(result)
diff --git a/detectionmetrics/models/torch_model_utils/__init__.py b/detectionmetrics/models/torch_model_utils/__init__.py
deleted file mode 100644
index 48f449a4..00000000
--- a/detectionmetrics/models/torch_model_utils/__init__.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from typing import Optional, Tuple
-
-import numpy as np
-
-try:
- from open3d._ml3d.datasets.utils import DataProcessing
-except Exception:
- print("Open3D-ML3D not available")
-from sklearn.neighbors import KDTree
-
-from detectionmetrics.models.torch_model_utils import o3d_randlanet, o3d_kpconv
-
-
-# Default functions
-def preprocess(
- points: np.ndarray, cfg: Optional[dict] = {}
-) -> Tuple[np.ndarray, KDTree, np.ndarray]:
- """Preprocess point cloud data
-
- :param points: Point cloud data
- :type points: np.ndarray
- :param cfg: Dictionary containing model configuration, defaults to {}
- :type cfg: Optional[dict], optional
- :return: Subsampled points, search tree, and projected indices
- :rtype: Tuple[np.ndarray, KDTree, np.ndarray]
- """
- # Keep only XYZ coordinates
- points = np.array(points[:, 0:3], dtype=np.float32)
-
- # Subsample points using a grid of given size
- grid_size = cfg.get("grid_size", 0.06)
- sub_points = DataProcessing.grid_subsampling(points, grid_size=grid_size)
-
- # Create search tree so that we can project points back to the original point cloud
- search_tree = KDTree(sub_points)
- projected_indices = np.squeeze(search_tree.query(points, return_distance=False))
- projected_indices = projected_indices.astype(np.int32)
-
- return sub_points, search_tree, projected_indices
-
-
-transform_input = o3d_randlanet.transform_input
-update_probs = o3d_randlanet.update_probs
diff --git a/detectionmetrics/models/torch_segmentation.py b/detectionmetrics/models/torch_segmentation.py
index bf628a5e..7a3cb2ce 100644
--- a/detectionmetrics/models/torch_segmentation.py
+++ b/detectionmetrics/models/torch_segmentation.py
@@ -1,6 +1,7 @@
-from collections import defaultdict
+import importlib
import os
import time
+import tempfile
from typing import Any, List, Optional, Tuple, Union
import numpy as np
@@ -8,77 +9,35 @@
from PIL import Image
import torch
from torch.utils.data import DataLoader, Dataset
-from torchvision.transforms import v2 as transforms
-from torchvision.transforms.v2 import functional as F
+
+try:
+ from torchvision.transforms import v2 as transforms
+ from torchvision.transforms.v2 import functional as F
+except ImportError:
+ from torchvision.transforms import transforms
+ from torchvision.transforms import functional as F
from tqdm import tqdm
from detectionmetrics.datasets import segmentation as dm_segmentation_dataset
from detectionmetrics.models import segmentation as dm_segmentation_model
-from detectionmetrics.models import torch_model_utils as tmu
-import detectionmetrics.utils.lidar as ul
+import detectionmetrics.utils.conversion as uc
+import detectionmetrics.utils.io as uio
import detectionmetrics.utils.segmentation_metrics as um
+import detectionmetrics.utils.torch as ut
-def data_to_device(
- data: Union[tuple, list], device: torch.device
-) -> Union[tuple, list]:
- """Move provided data to given device (CPU or GPU)
-
- :param data: Data provided (it can be a single or multiple tensors)
- :type data: Union[tuple, list]
- :param device: Device to move data to
- :type device: torch.device
- :return: Data moved to device
- :rtype: Union[tuple, list]
- """
- if isinstance(data, (tuple, list)):
- return type(data)(
- d.to(device) if torch.is_tensor(d) else data_to_device(d, device)
- for d in data
- )
- elif torch.is_tensor(data):
- return data.to(device)
- else:
- return data
-
-
-def get_data_shape(data: Union[tuple, list]) -> Union[tuple, list]:
- """Get the shape of the provided data
-
- :param data: Data provided (it can be a single or multiple tensors)
- :type data: Union[tuple, list]
- :return: Data shape
- :rtype: Union[tuple, list]
- """
- if isinstance(data, (tuple, list)):
- return type(data)(
- tuple(d.shape) if torch.is_tensor(d) else get_data_shape(d) for d in data
- )
- elif torch.is_tensor(data):
- return tuple(data.shape)
- else:
- return tuple(data.shape)
+AVAILABLE_MODEL_FORMATS_LIDAR = ["o3d_randlanet", "o3d_kpconv", "mmdet3d"]
-def unsqueeze_data(data: Union[tuple, list], dim: int = 0) -> Union[tuple, list]:
- """Unsqueeze provided data along given dimension
+def raise_unknown_model_format_lidar(model_format: str) -> None:
+ """Raise an exception if the LiDAR model format is unknown
- :param data: Data provided (it can be a single or multiple tensors)
- :type data: Union[tuple, list]
- :param dim: Dimension that will be unsqueezed, defaults to 0
- :type dim: int, optional
- :return: Unsqueezed data
- :rtype: Union[tuple, list]
+ :param input_format: Model format string
+ :type input_format: str
"""
- if isinstance(data, (tuple, list)):
- return type(data)(
- d.unsqueeze(dim) if torch.is_tensor(d) else unsqueeze_data(d, dim)
- for d in data
- )
- elif torch.is_tensor(data):
- return data.unsqueeze(dim)
- else:
- return data
+ msg = f"Unknown model format: {model_format}."
+ msg += f"Available formats: {AVAILABLE_MODEL_FORMATS_LIDAR}"
+ raise Exception(msg)
def get_computational_cost(
@@ -103,63 +62,6 @@ def get_computational_cost(
:return: DataFrame containing computational cost information
:rtype: pd.DataFrame
"""
- # Get model size if possible
- if model_fname is not None:
- size_mb = os.path.getsize(model_fname) / 1024**2
- else:
- size_mb = None
-
- # Measure inference time with GPU synchronization
- dummy_tuple = dummy_input if isinstance(dummy_input, tuple) else (dummy_input,)
-
- for _ in range(warm_up_runs):
- if hasattr(model, "inference"): # e.g. mmsegmentation models
- model.inference(
- *dummy_tuple,
- [
- dict(
- ori_shape=dummy_tuple[0].shape[2:],
- img_shape=dummy_tuple[0].shape[2:],
- pad_shape=dummy_tuple[0].shape[2:],
- padding_size=[0, 0, 0, 0],
- )
- ]
- * dummy_tuple[0].shape[0],
- )
- else:
- model(*dummy_tuple)
-
- inference_times = []
- for _ in range(runs):
- torch.cuda.synchronize()
- start_time = time.time()
- if hasattr(model, "inference"): # e.g. mmsegmentation models
- model.inference(
- *dummy_tuple,
- [
- dict(
- ori_shape=dummy_tuple[0].shape[2:],
- img_shape=dummy_tuple[0].shape[2:],
- pad_shape=dummy_tuple[0].shape[2:],
- padding_size=[0, 0, 0, 0],
- )
- ]
- * dummy_tuple[0].shape[0],
- )
- else:
- model(*dummy_tuple)
- torch.cuda.synchronize()
- end_time = time.time()
- inference_times.append(end_time - start_time)
-
- result = {
- "input_shape": ["x".join(map(str, get_data_shape(dummy_input)))],
- "n_params": [sum(p.numel() for p in model.parameters())],
- "size_mb": [size_mb],
- "inference_time_s": [np.mean(inference_times)],
- }
-
- return pd.DataFrame.from_dict(result)
class CustomResize(torch.nn.Module):
@@ -256,16 +158,14 @@ def __getitem__(
class LiDARSegmentationTorchDataset(Dataset):
- """Dataset for LiDAR segmentation PyTorch models
+ """Dataset for LiDAR segmentation PyTorch - Open3D-ML models
:param dataset: LiDAR segmentation dataset
:type dataset: LiDARSegmentationDataset
:param model_cfg: Dictionary containing model configuration
:type model_cfg: dict
- :param preprocess: Function for preprocessing point clouds
- :type preprocess: callable
- :param n_classes: Number of classes estimated by the model
- :type n_classes: int
+ :param get_sample: Function for loading sample data
+ :type get_sample: callable
:param splits: Splits to be used from the dataset, defaults to ["test"]
:type splits: str, optional
"""
@@ -274,59 +174,33 @@ def __init__(
self,
dataset: dm_segmentation_dataset.LiDARSegmentationDataset,
model_cfg: dict,
- preprocess: callable,
- n_classes: int,
+ get_sample: callable,
splits: str = ["test"],
):
# Filter split and make filenames global
dataset.dataset = dataset.dataset[dataset.dataset["split"].isin(splits)]
self.dataset = dataset
self.dataset.make_fname_global()
-
self.model_cfg = model_cfg
- self.preprocess = preprocess
- self.n_classes = n_classes
+ self.get_sample = get_sample
def __len__(self):
return len(self.dataset.dataset)
- def __getitem__(
- self, idx: int
- ) -> Tuple[Union[np.ndarray, torch.Tensor], Union[np.ndarray, torch.Tensor]]:
- """Prepare sample data: point cloud and label
+ def __getitem__(self, idx: int):
+ """Prepare sample data
:param idx: Sample index
:type idx: int
- :return: Point cloud and corresponding label tensor or numpy arrays
- :rtype: Tuple[np.ndarray, np.ndarray,]
+ :return: Sample data required by the model
"""
- # Read the point cloud and its labels
- points = self.dataset.read_points(self.dataset.dataset.iloc[idx]["points"])
- semantic_label, instance_label = self.dataset.read_label(
- self.dataset.dataset.iloc[idx]["label"]
- )
-
- # Preprocess point cloud
- preprocessed_points, search_tree, projected_indices = self.preprocess(
- points, self.model_cfg
- )
-
- # Init sampler
- sampler = None
- if "sampler" in self.model_cfg:
- sampler = ul.Sampler(
- preprocessed_points.shape[0],
- search_tree,
- self.model_cfg["sampler"],
- self.n_classes,
- )
-
- return (
- self.dataset.dataset.index[idx],
- preprocessed_points,
- projected_indices,
- (semantic_label, instance_label),
- sampler,
+ return self.get_sample(
+ points_fname=self.dataset.dataset.iloc[idx]["points"],
+ model_cfg=self.model_cfg,
+ label_fname=self.dataset.dataset.iloc[idx]["label"],
+ name=self.dataset.dataset.index[idx],
+ idx=idx,
+ has_intensity=self.dataset.has_intensity,
)
@@ -440,38 +314,57 @@ def __init__(
]
)
- def inference(self, image: Image.Image) -> Image.Image:
- """Perform inference for a single image
+ def predict(
+ self, image: Image.Image, return_sample: bool = False
+ ) -> Union[Image.Image, Tuple[Image.Image, torch.Tensor]]:
+ """Perform prediction for a single image
:param image: PIL image
:type image: Image.Image
- :return: segmenation result as PIL image
- :rtype: Image.Image
+ :param return_sample: Whether to return the sample data along with predictions, defaults to False
+ :type return_sample: bool, optional
+ :return: Segmentation result as a PIL image or a tuple with the segmentation result and the input sample tensor
+ :rtype: Union[Image.Image, Tuple[Image.Image, torch.Tensor]]
"""
- tensor = self.transform_input(image).unsqueeze(0).to(self.device)
+ sample = self.transform_input(image).unsqueeze(0).to(self.device)
+ result = self.inference(sample)
+ result = self.transform_output(result)
+
+ if return_sample:
+ return result, sample
+ else:
+ return result
+ def inference(self, tensor_in: torch.Tensor) -> torch.Tensor:
+ """Perform inference for a tensor
+
+ :param tensor_in: Input point cloud tensor
+ :type tensor_in: torch.Tensor
+ :return: Segmentation result as tensor
+ :rtype: torch.Tensor
+ """
with torch.no_grad():
# Perform inference
if hasattr(self.model, "inference"): # e.g. mmsegmentation models
- result = self.model.inference(
- tensor.to(self.device),
+ tensor_out = self.model.inference(
+ tensor_in.to(self.device),
[
dict(
- ori_shape=tensor.shape[2:],
- img_shape=tensor.shape[2:],
- pad_shape=tensor.shape[2:],
+ ori_shape=tensor_in.shape[2:],
+ img_shape=tensor_in.shape[2:],
+ pad_shape=tensor_in.shape[2:],
padding_size=[0, 0, 0, 0],
)
]
- * tensor.shape[0],
+ * tensor_in.shape[0],
)
else:
- result = self.model(tensor.to(self.device))
+ tensor_out = self.model(tensor_in.to(self.device))
- if isinstance(result, dict):
- result = result["out"]
+ if isinstance(tensor_out, dict):
+ tensor_out = tensor_out["out"]
- return self.transform_output(result)
+ return tensor_out
def eval(
self,
@@ -486,7 +379,7 @@ def eval(
:param dataset: Image segmentation dataset for which the evaluation will be performed
:type dataset: ImageSegmentationDataset
:param split: Split or splits to be used from the dataset, defaults to "test"
- :type split: str | List[str], optional
+ :type split: Union[str, List[str]], optional
:param ontology_translation: JSON file containing translation between dataset and model output ontologies
:type ontology_translation: str, optional
:param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved.
@@ -507,7 +400,9 @@ def eval(
os.makedirs(predictions_outdir, exist_ok=True)
# Build a LUT for transforming ontology if needed
- lut_ontology = self.get_lut_ontology(dataset.ontology, ontology_translation)
+ lut_ontology = uc.get_ontology_conversion_lut(
+ self.ontology, dataset.ontology, ontology_translation
+ )
lut_ontology = torch.tensor(lut_ontology, dtype=torch.int64).to(self.device)
# Retrieve ignored label indices
@@ -537,24 +432,7 @@ def eval(
pbar = tqdm(dataloader, leave=True)
for idx, image, label in pbar:
# Perform inference
- if hasattr(self.model, "inference"): # e.g. mmsegmentation models
- pred = self.model.inference(
- image.to(self.device),
- [
- dict(
- ori_shape=image.shape[2:],
- img_shape=image.shape[2:],
- pad_shape=image.shape[2:],
- padding_size=[0, 0, 0, 0],
- )
- ]
- * image.shape[0],
- )
- else:
- pred = self.model(image.to(self.device))
-
- if isinstance(pred, dict):
- pred = pred["out"]
+ pred = self.inference(image)
# Get valid points masks depending on ignored label indices
if ignored_label_indices:
@@ -617,10 +495,38 @@ def get_computational_cost(
:type warm_up_runs: int, optional
:return: Dictionary containing computational cost information
"""
+ # Create dummy input
dummy_input = torch.randn(1, 3, *image_size).to(self.device)
- return get_computational_cost(
- self.model, dummy_input, self.model_fname, runs, warm_up_runs
- )
+
+ # Get model size if possible
+ if self.model_fname is not None:
+ size_mb = os.path.getsize(self.model_fname) / 1024**2
+ else:
+ size_mb = None
+
+ # Measure inference time with GPU synchronization
+ dummy_tuple = dummy_input if isinstance(dummy_input, tuple) else (dummy_input,)
+
+ for _ in range(warm_up_runs):
+ self.inference(dummy_tuple[0])
+
+ inference_times = []
+ for _ in range(runs):
+ torch.cuda.synchronize()
+ start_time = time.time()
+ self.inference(dummy_tuple[0])
+ torch.cuda.synchronize()
+ end_time = time.time()
+ inference_times.append(end_time - start_time)
+
+ result = {
+ "input_shape": ["x".join(map(str, ut.get_data_shape(dummy_input)))],
+ "n_params": [sum(p.numel() for p in self.model.parameters())],
+ "size_mb": [size_mb],
+ "inference_time_s": [np.mean(inference_times)],
+ }
+
+ return pd.DataFrame.from_dict(result)
class TorchLiDARSegmentationModel(dm_segmentation_model.LiDARSegmentationModel):
@@ -655,6 +561,7 @@ def __init__(
print("Model is not a TorchScript model. Loading as a PyTorch module.")
model = torch.load(model, map_location=self.device)
model_type = "native"
+
# Otherwise, check that it is a PyTorch module
elif isinstance(model, torch.nn.Module):
model_fname = None
@@ -666,98 +573,63 @@ def __init__(
super().__init__(model, model_type, model_cfg, ontology_fname, model_fname)
self.model = self.model.to(self.device).eval()
- # Init model specific functions
- if self.model_cfg["input_format"] == "o3d_randlanet": # Open3D RandLaNet
- self.preprocess = tmu.preprocess
- self.transform_input = tmu.o3d_randlanet.transform_input
- self.update_probs = tmu.o3d_randlanet.update_probs
- self.model_cfg["num_layers"] = sum(1 for _ in self.model.decoder.children())
- if self.model_cfg["input_format"] == "o3d_kpconv": # Open3D KPConv
- self.preprocess = tmu.preprocess
- self.transform_input = tmu.o3d_kpconv.transform_input
- self.update_probs = tmu.o3d_kpconv.update_probs
- else:
- self.preprocess = tmu.preprocess
- self.transform_input = tmu.transform_input
- self.update_probs = tmu.update_probs
+ # Init specific attributes and update model configuration
+ self.model_format = self.model_cfg["model_format"]
- # Transformation for output labels
- self.transform_output = (
- lambda x: torch.argmax(x.squeeze(), axis=-1).squeeze().to(torch.uint8)
+ # Init model specific functions
+ model_format = self.model_format.split("_")[0]
+ model_utils_module_str = (
+ f"detectionmetrics.models.lidar_torch_utils.{model_format}"
)
+ try:
+ model_utils_module = importlib.import_module(model_utils_module_str)
+ except ImportError:
+ raise_unknown_model_format_lidar(model_format)
+ self._get_sample = model_utils_module.get_sample
+ self.inference = model_utils_module.inference
+ if hasattr(model_utils_module, "reset_sampler"):
+ self._reset_sampler = model_utils_module.reset_sampler
+ else:
+ self._reset_sampler = None
- def inference(self, points: np.ndarray) -> np.ndarray:
- """Perform inference for a single point cloud
-
- :param points: Point cloud xyz array
- :type points: np.ndarray
- :return: Segmenation result as a point cloud with label indices
- :rtype: np.ndarray
+ def predict(
+ self,
+ points_fname: str,
+ has_intensity: bool = True,
+ return_sample: bool = False,
+ ignore_index: Optional[List[int]] = None,
+ ) -> Union[np.ndarray, Tuple[np.ndarray, Any]]:
+ """Perform prediction for a single point cloud
+
+ :param points_fname: Point cloud in SemanticKITTI .bin format
+ :type points_fname: str
+ :param has_intensity: Whether the point cloud has intensity values, defaults to True
+ :type has_intensity: bool, optional
+ :param return_sample: Whether to return the sample data along with predictions, defaults to False
+ :type return_sample: bool, optional
+ :param ignore_index: List of class indices to ignore during prediction, defaults to None
+ :type ignore_index: Optional[List[int]], optional
+ :return: Segmentation result as a numpy array or a tuple with the segmentation result and the input sample data
+ :rtype: Union[np.ndarray, Tuple[np.ndarray, Any]]
"""
# Preprocess point cloud
- points, search_tree, projected_indices = self.preprocess(points, self.model_cfg)
-
- # Init sampler if needed
- sampler = None
- if "sampler" in self.model_cfg:
- end_th = self.model_cfg.get("end_th", 0.5)
- sampler = ul.Sampler(
- points.shape[0],
- search_tree,
- self.model_cfg["sampler"],
- self.n_classes,
- )
-
- # Iterate over the sampled point cloud until all points reach the end threshold.
- # If no sampler is provided, the inference is performed in a single step.
- infer_complete = False
- while not infer_complete:
- # Get model input data
- input_data, selected_indices = self.transform_input(
- points, self.model_cfg, sampler
- )
- input_data = data_to_device(input_data, self.device)
- if self.model_cfg["input_format"] != "o3d_kpconv":
- input_data = unsqueeze_data(input_data)
-
- # Perform inference
- with torch.no_grad():
- result = self.model(*input_data)
-
- # TODO: check if this is consistent across different models
- if isinstance(result, dict):
- result = result["out"]
-
- # Update probabilities if sampler is used
- if sampler is not None:
- if self.model_cfg["input_format"] == "o3d_kpconv":
- sampler.test_probs = self.update_probs(
- result,
- selected_indices,
- sampler.test_probs,
- lengths=input_data[-1],
- )
- else:
- sampler.test_probs = self.update_probs(
- result,
- selected_indices,
- sampler.test_probs,
- self.n_classes,
- )
- if sampler.p[sampler.p > end_th].shape[0] == sampler.p.shape[0]:
- result = sampler.test_probs[projected_indices]
- infer_complete = True
- else:
- result = result.squeeze().cpu()[projected_indices].cuda()
- infer_complete = True
+ sample = self._get_sample(
+ points_fname, self.model_cfg, has_intensity=has_intensity
+ )
+ result, _, _ = self.inference(sample, self.model, self.model_cfg, ignore_index)
+ result = result.squeeze().cpu().numpy()
- return self.transform_output(result).cpu().numpy()
+ if return_sample:
+ return result, sample
+ else:
+ return result
def eval(
self,
dataset: dm_segmentation_dataset.LiDARSegmentationDataset,
split: str | List[str] = "test",
ontology_translation: Optional[str] = None,
+ translation_direction: str = "dataset_to_model",
predictions_outdir: Optional[str] = None,
results_per_sample: bool = False,
) -> pd.DataFrame:
@@ -766,9 +638,11 @@ def eval(
:param dataset: LiDAR segmentation dataset for which the evaluation will be performed
:type dataset: LiDARSegmentationDataset
:param split: Split or splits to be used from the dataset, defaults to "test"
- :type split: str | List[str], optional
+ :type split: Union[str, List[str]], optional
:param ontology_translation: JSON file containing translation between dataset and model output ontologies
- :type ontology_translation: str, optional
+ :type ontology_translation: Optional[str], optional
+ :param translation_direction: Direction of the ontology translation, either 'dataset_to_model' or 'model_to_dataset', defaults to "dataset_to_model"
+ :type translation_direction: str, optional
:param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved.
:type predictions_outdir: Optional[str], optional
:param results_per_sample: Whether to store results per sample or not, defaults to False. If True, predictions_outdir must be provided.
@@ -787,75 +661,50 @@ def eval(
os.makedirs(predictions_outdir, exist_ok=True)
# Build a LUT for transforming ontology if needed
- lut_ontology = self.get_lut_ontology(dataset.ontology, ontology_translation)
- lut_ontology = torch.tensor(lut_ontology, dtype=torch.int64).to(self.device)
+ eval_ontology = self.ontology
+
+ if ontology_translation is not None:
+ ontology_translation = uio.read_json(ontology_translation)
+ if translation_direction == "dataset_to_model":
+ lut_ontology = uc.get_ontology_conversion_lut(
+ dataset.ontology, self.ontology, ontology_translation
+ )
+ else:
+ eval_ontology = dataset.ontology
+ lut_ontology = uc.get_ontology_conversion_lut(
+ self.ontology, dataset.ontology, ontology_translation
+ )
+
+ lut_ontology = torch.tensor(lut_ontology, dtype=torch.int64).to(self.device)
+ else:
+ lut_ontology = None
+
+ n_classes = len(eval_ontology)
# Retrieve ignored label indices
ignored_label_indices = []
for ignored_class in self.model_cfg.get("ignored_classes", []):
ignored_label_indices.append(dataset.ontology[ignored_class]["idx"])
- # Get PyTorch dataset (no dataloader to avoid complexity with batching samplers)
+ # Get PyTorch dataloader
dataset = LiDARSegmentationTorchDataset(
dataset,
- model_cfg=self.model_cfg,
- preprocess=self.preprocess,
- n_classes=self.n_classes,
+ self.model_cfg,
+ self._get_sample,
splits=[split] if isinstance(split, str) else split,
)
# Init metrics
- metrics_factory = um.SegmentationMetricsFactory(self.n_classes)
+ metrics_factory = um.SegmentationMetricsFactory(n_classes)
# Evaluation loop
- end_th = self.model_cfg.get("end_th", 0.5)
with torch.no_grad():
pbar = tqdm(dataset, total=len(dataset), leave=True)
- for idx, points, projected_indices, (label, _), sampler in pbar:
- # Iterate over the sampled point cloud until all points reach the end
- # threshold. If no sampler is provided, the inference is performed in a
- # single step.
- infer_complete = False
- while not infer_complete:
- # Get model input data
- input_data, selected_indices = self.transform_input(
- points, self.model_cfg, sampler
- )
- input_data = data_to_device(input_data, self.device)
- if self.model_cfg["input_format"] != "o3d_kpconv":
- input_data = unsqueeze_data(input_data)
-
- # Perform inference
- pred = self.model(*input_data)
-
- # TODO: check if this is consistent across different models
- if isinstance(pred, dict):
- pred = pred["out"]
-
- if sampler is not None:
- if self.model_cfg["input_format"] == "o3d_kpconv":
- sampler.test_probs = self.update_probs(
- pred,
- selected_indices,
- sampler.test_probs,
- lengths=input_data[-1],
- )
- else:
- sampler.test_probs = self.update_probs(
- pred,
- selected_indices,
- sampler.test_probs,
- self.n_classes,
- )
- if sampler.p[sampler.p > end_th].shape[0] == sampler.p.shape[0]:
- pred = sampler.test_probs[projected_indices]
- infer_complete = True
- else:
- pred = pred.squeeze().cpu()[projected_indices].cuda()
- infer_complete = True
+ for sample in pbar:
+ # Perform inference
+ pred, label, name = self.inference(sample, self.model, self.model_cfg)
# Get valid points masks depending on ignored label indices
- label = torch.tensor(label, device=self.device)
if ignored_label_indices:
valid_mask = torch.ones_like(label, dtype=torch.bool)
for idx in ignored_label_indices:
@@ -865,70 +714,118 @@ def eval(
# Convert labels if needed
if lut_ontology is not None:
- label = lut_ontology[label]
+ if translation_direction == "dataset_to_model":
+ label = lut_ontology[label]
+ else:
+ pred = lut_ontology[pred]
# Prepare data and update metrics factory
- label = label.cpu().unsqueeze(0).numpy()
- pred = self.transform_output(pred)
- pred = pred.cpu().unsqueeze(0).to(torch.int64).numpy()
+ label = label.cpu().numpy()
+ pred = pred.cpu().numpy()
if valid_mask is not None:
- valid_mask = valid_mask.cpu().unsqueeze(0).numpy()
+ valid_mask = valid_mask.cpu().numpy()
metrics_factory.update(pred, label, valid_mask)
# Store predictions and results per sample if required
if predictions_outdir is not None:
- for i, (sample_idx, sample_pred, sample_label) in enumerate(
- zip(idx, pred, label)
+ for i, (sample_name, sample_pred, sample_label) in enumerate(
+ zip(name, pred, label)
):
if results_per_sample:
sample_valid_mask = (
valid_mask[i] if valid_mask is not None else None
)
- sample_mf = um.SegmentationMetricsFactory(n_classes=self.n_classes)
+ sample_mf = um.SegmentationMetricsFactory(n_classes)
sample_mf.update(
sample_pred, sample_label, sample_valid_mask
)
sample_df = um.get_metrics_dataframe(
- sample_mf, self.ontology
+ sample_mf, eval_ontology
)
sample_df.to_csv(
- os.path.join(predictions_outdir, f"{sample_idx}.csv")
+ os.path.join(predictions_outdir, f"{sample_name}.csv")
)
pred.tofile(
- os.path.join(predictions_outdir, f"{sample_idx}.bin")
+ os.path.join(predictions_outdir, f"{sample_name}.bin")
)
- return um.get_metrics_dataframe(metrics_factory, self.ontology)
+ return um.get_metrics_dataframe(metrics_factory, eval_ontology)
- def get_computational_cost(self, runs: int = 30, warm_up_runs: int = 5) -> dict:
+ def get_computational_cost(
+ self,
+ point_cloud_range: Tuple[int, int, int, int, int, int] = (
+ -50,
+ -50,
+ -5,
+ 50,
+ 50,
+ 5,
+ ),
+ num_points: int = 100000,
+ has_intensity: bool = False,
+ runs: int = 30,
+ warm_up_runs: int = 5,
+ ) -> dict:
"""Get different metrics related to the computational cost of the model
+ :param point_cloud_range: Point cloud range (meters), defaults to (-50, -50, -5, 50, 50, 5)
+ :type point_cloud_range: Tuple[int, int, int, int, int, int], optional
+ :param num_points: Number of points in the point cloud, defaults to 100000
+ :type num_points: int, optional
+ :param has_intensity: Whether the point cloud has intensity values, defaults to False
+ :type has_intensity: bool, optional
:param runs: Number of runs to measure inference time, defaults to 30
:type runs: int, optional
:param warm_up_runs: Number of warm-up runs, defaults to 5
:type warm_up_runs: int, optional
:return: Dictionary containing computational cost information
"""
- # Build dummy input data (process is a bit complex for LiDAR models)
- dummy_points = np.random.rand(1000000, 4)
- dummy_points, search_tree, _ = self.preprocess(dummy_points, self.model_cfg)
-
- sampler = None
- if "sampler" in self.model_cfg:
- sampler = ul.Sampler(
- point_cloud_size=dummy_points.shape[0],
- search_tree=search_tree,
- sampler_name=self.model_cfg["sampler"],
- num_classes=self.n_classes,
+ # Build dummy point cloud using uniform distribution
+ dummy_points = np.random.uniform(
+ low=point_cloud_range[0:3],
+ high=point_cloud_range[3:6],
+ size=(num_points, 3 + int(has_intensity)),
+ ).astype(np.float32)
+
+ # Store in a secure temporary .bin file
+ with tempfile.NamedTemporaryFile(suffix=".bin") as tmp_file:
+ dummy_points.tofile(tmp_file.name)
+ sample = self._get_sample(
+ tmp_file.name, self.model_cfg, has_intensity=has_intensity
)
- dummy_input, _ = self.transform_input(dummy_points, self.model_cfg, sampler)
- dummy_input = data_to_device(dummy_input, self.device)
- if self.model_cfg["input_format"] != "o3d_kpconv":
- dummy_input = unsqueeze_data(dummy_input)
-
- # Get computational cost
- return get_computational_cost(
- self.model, dummy_input, self.model_fname, runs, warm_up_runs
- )
+ # Get model size if possible
+ if self.model_fname is not None:
+ size_mb = os.path.getsize(self.model_fname) / 1024**2
+ else:
+ size_mb = None
+
+ # Measure inference time with GPU synchronization
+ for _ in range(warm_up_runs):
+ if "o3d" in self.model_format: # reset random sampling for Open3D-ML models
+ subsampled_points, _, sampler, _, _, _ = sample
+ self._reset_sampler(sampler, subsampled_points.shape[0], self.n_classes)
+
+ self.inference(sample, self.model, self.model_cfg)
+
+ inference_times = []
+ for _ in range(runs):
+ if "o3d" in self.model_format: # reset random sampling for Open3D-ML models
+ subsampled_points, _, sampler, _, _, _ = sample
+ self._reset_sampler(sampler, subsampled_points.shape[0], self.n_classes)
+ torch.cuda.synchronize()
+ start_time = time.time()
+ self.inference(sample, self.model, self.model_cfg)
+ torch.cuda.synchronize()
+ end_time = time.time()
+ inference_times.append(end_time - start_time)
+
+ result = {
+ "input_shape": ["x".join(map(str, ut.get_data_shape(dummy_points)))],
+ "n_params": [sum(p.numel() for p in self.model.parameters())],
+ "size_mb": [size_mb],
+ "inference_time_s": [np.mean(inference_times)],
+ }
+
+ return pd.DataFrame.from_dict(result)
diff --git a/detectionmetrics/models/utils/__init__.py b/detectionmetrics/models/utils/__init__.py
new file mode 100644
index 00000000..a706d9f3
--- /dev/null
+++ b/detectionmetrics/models/utils/__init__.py
@@ -0,0 +1,19 @@
+try:
+ from detectionmetrics.models.utils import o3d
+except ImportError:
+ pass
+
+try:
+ from detectionmetrics.models.utils import mmdet3d
+except ImportError:
+ pass
+
+try:
+ from detectionmetrics.models.utils import lsk3dnet
+except ImportError:
+ pass
+
+try:
+ from detectionmetrics.models.utils import sphereformer
+except ImportError:
+ pass
diff --git a/detectionmetrics/models/utils/lsk3dnet.py b/detectionmetrics/models/utils/lsk3dnet.py
new file mode 100644
index 00000000..581c2e05
--- /dev/null
+++ b/detectionmetrics/models/utils/lsk3dnet.py
@@ -0,0 +1,298 @@
+import time
+from typing import List, Optional, Tuple
+
+from c_gen_normal_map import gen_normal_map
+import numpy as np
+import torch
+import utils.depth_map_utils as depth_map_utils
+
+import detectionmetrics.utils.torch as ut
+import detectionmetrics.utils.lidar as ul
+
+
+def range_projection(current_vertex, fov_up=3.0, fov_down=-25.0, proj_H=64, proj_W=900):
+ """Project a pointcloud into a spherical projection (range image)."""
+ # laser parameters
+ fov_up = fov_up / 180.0 * np.pi # field of view up in radians
+ fov_down = fov_down / 180.0 * np.pi # field of view down in radians
+ fov = abs(fov_down) + abs(fov_up) # get field of view total in radians
+
+ # get depth of all points
+ depth = np.linalg.norm(current_vertex[:, :3], 2, axis=1)
+
+ # get scan components
+ scan_x = current_vertex[:, 0]
+ scan_y = current_vertex[:, 1]
+ scan_z = current_vertex[:, 2]
+
+ # get angles of all points
+ yaw = -np.arctan2(scan_y, scan_x)
+ pitch = np.arcsin(scan_z / depth)
+
+ # get projections in image coords
+ proj_x = 0.5 * (yaw / np.pi + 1.0) # in [0.0, 1.0]
+ proj_y = 1.0 - (pitch + abs(fov_down)) / fov # in [0.0, 1.0]
+
+ # scale to image size using angular resolution
+ proj_x *= proj_W # in [0.0, W]
+ proj_y *= proj_H # in [0.0, H]
+
+ # round and clamp for use as index
+ proj_x = np.floor(proj_x)
+ proj_x = np.minimum(proj_W - 1, proj_x)
+ proj_x = np.maximum(0, proj_x).astype(np.int32) # in [0,W-1]
+ from_proj_x = np.copy(proj_x) # store a copy in orig order
+
+ proj_y = np.floor(proj_y)
+ proj_y = np.minimum(proj_H - 1, proj_y)
+ proj_y = np.maximum(0, proj_y).astype(np.int32) # in [0,H-1]
+ from_proj_y = np.copy(proj_y) # stope a copy in original order
+
+ # order in decreasing depth
+ order = np.argsort(depth)[::-1]
+ depth = depth[order]
+
+ proj_y = proj_y[order]
+ proj_x = proj_x[order]
+
+ scan_x = scan_x[order]
+ scan_y = scan_y[order]
+ scan_z = scan_z[order]
+
+ indices = np.arange(depth.shape[0])
+ indices = indices[order]
+
+ proj_range = np.full((proj_H, proj_W), -1, dtype=np.float32)
+ proj_vertex = np.full((proj_H, proj_W, 4), -1, dtype=np.float32)
+ proj_idx = np.full((proj_H, proj_W), -1, dtype=np.int32)
+
+ proj_range[proj_y, proj_x] = depth
+ proj_vertex[proj_y, proj_x] = np.array(
+ [scan_x, scan_y, scan_z, np.ones(len(scan_x))]
+ ).T
+ proj_idx[proj_y, proj_x] = indices
+
+ return proj_range, proj_vertex, from_proj_x, from_proj_y
+
+
+def compute_normals_range(
+ current_vertex, proj_H=64, proj_W=900, extrapolate=True, blur_type="gaussian"
+):
+ """Compute normals for each point using range image-based method."""
+ proj_range, proj_vertex, from_proj_x, from_proj_y = range_projection(current_vertex)
+ proj_range = depth_map_utils.fill_in_fast(
+ proj_range, extrapolate=extrapolate, blur_type=blur_type
+ )
+
+ # generate normal image
+ normal_data = gen_normal_map(proj_range, proj_vertex, proj_H, proj_W)
+ unproj_normal_data = normal_data[from_proj_y, from_proj_x]
+
+ return unproj_normal_data
+
+
+def collate_fn(samples: List[dict]) -> dict:
+ """Collate function for batching samples
+
+ :param samples: list of sample dictionaries
+ :type samples: List[dict]
+ :return: collated batch dictionary
+ :rtype: dict
+ """
+ point_num = [d["point_num"] for d in samples]
+ batch_size = len(point_num)
+ ref_labels = samples[0]["ref_label"]
+ origin_len = samples[0]["origin_len"]
+ ref_indices = [torch.from_numpy(d["ref_index"]) for d in samples]
+ path = samples[0]["root"] # [d['root'] for d in data]
+ root = [d["root"] for d in samples]
+ sample_id = [d["sample_id"] for d in samples]
+
+ b_idx = []
+ for i in range(batch_size):
+ b_idx.append(torch.ones(point_num[i]) * i)
+ points = [torch.from_numpy(d["point_feat"]) for d in samples]
+ ref_xyz = [torch.from_numpy(d["ref_xyz"]) for d in samples]
+
+ has_labels = samples[0]["point_label"] is not None
+ if has_labels:
+ labels = [torch.from_numpy(d["point_label"]) for d in samples]
+ else:
+ labels = [d["point_label"] for d in samples]
+ normal = [torch.from_numpy(d["normal"]) for d in samples]
+
+ return {
+ "points": torch.cat(points).float(),
+ "normal": torch.cat(normal).float(),
+ "ref_xyz": torch.cat(ref_xyz).float(),
+ "batch_idx": torch.cat(b_idx).long(),
+ "batch_size": batch_size,
+ "labels": torch.cat(labels).long().squeeze(1) if has_labels else labels,
+ "raw_labels": torch.from_numpy(ref_labels).long() if has_labels else ref_labels,
+ "origin_len": origin_len,
+ "indices": torch.cat(ref_indices).long(),
+ "path": path,
+ "point_num": point_num,
+ "root": root,
+ "sample_id": sample_id,
+ }
+
+
+def get_sample(
+ points_fname: str,
+ model_cfg: dict,
+ label_fname: Optional[str] = None,
+ name: Optional[str] = None,
+ idx: Optional[int] = None,
+ has_intensity: bool = True,
+ measure_processing_time: bool = False,
+) -> Tuple[dict, Optional[dict]]:
+ """Get sample data for mmdetection3d models
+
+ :param points_fname: filename of the point cloud
+ :type points_fname: str
+ :param model_cfg: model configuration
+ :type model_cfg: dict
+ :param label_fname: filename of the semantic label, defaults to None
+ :type label_fname: Optional[str], optional
+ :param name: sample name, defaults to None
+ :type name: Optional[str], optional
+ :param idx: sample numerical index, defaults to None
+ :type idx: Optional[int], optional
+ :param has_intensity: whether the point cloud has intensity values, defaults to True
+ :type has_intensity: bool, optional
+ :param measure_processing_time: whether to measure processing time, defaults to False
+ :type measure_processing_time: bool, optional
+ :return: sample data dictionary and processing time dictionary (if measured)
+ :rtype: Tuple[dict, Optional[dict]]
+ """
+ raw_data = ul.read_semantickitti_points(points_fname, has_intensity)
+
+ labels, ref_labels = None, None
+ if label_fname is not None:
+ labels, _ = ul.read_semantickitti_label(label_fname)
+ labels = labels.reshape((-1, 1)).astype(np.uint8)
+ ref_labels = labels.copy()
+
+ if measure_processing_time:
+ start = time.perf_counter()
+
+ xyz = raw_data[:, :3]
+ feat = raw_data[:, 3:4] if model_cfg["n_feats"] > 3 else None
+ origin_len = len(xyz)
+
+ ref_pc = xyz.copy()
+ ref_index = np.arange(len(ref_pc))
+
+ mask_x = np.logical_and(
+ xyz[:, 0] > model_cfg["min_volume_space"][0],
+ xyz[:, 0] < model_cfg["max_volume_space"][0],
+ )
+ mask_y = np.logical_and(
+ xyz[:, 1] > model_cfg["min_volume_space"][1],
+ xyz[:, 1] < model_cfg["max_volume_space"][1],
+ )
+ mask_z = np.logical_and(
+ xyz[:, 2] > model_cfg["min_volume_space"][2],
+ xyz[:, 2] < model_cfg["max_volume_space"][2],
+ )
+ mask = np.logical_and(mask_x, np.logical_and(mask_y, mask_z))
+
+ not_zero = np.logical_not(np.all(xyz[:, :3] == 0, axis=1))
+ mask = np.logical_and(mask, not_zero)
+
+ xyz = xyz[mask]
+ if labels is not None:
+ labels = labels[mask]
+ ref_index = ref_index[mask]
+ if feat is not None:
+ feat = feat[mask]
+ point_num = len(xyz)
+
+ feat = np.concatenate((xyz, feat), axis=1) if feat is not None else xyz
+
+ unproj_normal_data = compute_normals_range(feat)
+
+ if measure_processing_time:
+ end = time.perf_counter()
+ processing_time = {"preprocessing": end - start}
+
+ sample = {}
+ sample["point_feat"] = feat
+ sample["point_label"] = labels
+ sample["ref_xyz"] = ref_pc
+ sample["ref_label"] = ref_labels
+ sample["ref_index"] = ref_index
+ sample["point_num"] = point_num
+ sample["origin_len"] = origin_len
+ sample["normal"] = unproj_normal_data
+ sample["root"] = points_fname
+ sample["sample_id"] = name
+ sample["idx"] = idx
+
+ if measure_processing_time:
+ return sample, processing_time
+
+ return sample
+
+
+def inference(
+ sample: dict,
+ model: torch.nn.Module,
+ model_cfg: dict,
+ ignore_index: Optional[List[int]] = None,
+ measure_processing_time: bool = False,
+) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[List[str]]]:
+ """Perform inference on a sample using an mmdetection3D model
+
+ :param sample: sample data dictionary
+ :type sample: dict
+ :param model: mmdetection3D model
+ :type model: torch.nn.Module
+ :param model_cfg: model configuration
+ :type model_cfg: dict
+ :param ignore_index: list of class indices to ignore during inference, defaults to None
+ :type ignore_index: Optional[List[int]], optional
+ :param measure_processing_time: whether to measure processing time, defaults to False
+ :type measure_processing_time: bool, optional
+ :return: tuple of (predictions, labels, names) and processing time dictionary (if measured)
+ :rtype: Tuple[Tuple[torch.Tensor, Optional[torch.Tensor], List[str]], Optional[dict]]
+ """
+ single_sample = not isinstance(sample["sample_id"], list)
+ if single_sample:
+ sample = collate_fn([sample])
+
+ device = next(model.parameters()).device
+ for k, v in sample.items():
+ sample[k] = ut.data_to_device(v, device)
+
+ if measure_processing_time:
+ torch.cuda.synchronize()
+ start = time.perf_counter()
+ pred = model(sample)
+ if measure_processing_time:
+ torch.cuda.synchronize()
+ end = time.perf_counter()
+ processing_time = {"inference_n_voxelization": end - start}
+
+ if ignore_index is not None:
+ pred["logits"][:, ignore_index] = -1e9
+ pred["logits"] = torch.argmax(pred["logits"], dim=1)
+
+ has_labels = pred["labels"][0] is not None
+ preds, labels, names = ([], [], []) if has_labels else ([], None, None)
+
+ for batch_idx in range(pred["batch_size"]):
+ preds.append(pred["logits"][pred["batch_idx"] == batch_idx])
+ if has_labels:
+ labels.append(pred["labels"][pred["batch_idx"] == batch_idx])
+ names.append(pred["sample_id"][batch_idx])
+
+ preds = torch.stack(preds, dim=0).squeeze()
+ if has_labels:
+ labels = torch.stack(labels, dim=0).squeeze()
+
+ if measure_processing_time:
+ return (preds, labels, names), processing_time
+
+ return preds, labels, names
diff --git a/detectionmetrics/models/utils/mmdet3d.py b/detectionmetrics/models/utils/mmdet3d.py
new file mode 100644
index 00000000..2dc6bea8
--- /dev/null
+++ b/detectionmetrics/models/utils/mmdet3d.py
@@ -0,0 +1,153 @@
+import time
+from typing import List, Optional, Tuple
+
+from mmdet3d.datasets.transforms import (
+ LoadPointsFromFile,
+ LoadAnnotations3D,
+ Pack3DDetInputs,
+)
+from mmengine.registry import FUNCTIONS
+import torch
+from torchvision.transforms import Compose
+
+COLLATE_FN = FUNCTIONS.get("pseudo_collate")
+
+
+def get_sample(
+ points_fname: str,
+ model_cfg: dict,
+ label_fname: Optional[str] = None,
+ name: Optional[str] = None,
+ idx: Optional[int] = None,
+ has_intensity: bool = True,
+ measure_processing_time: bool = False,
+) -> Tuple[dict, Optional[dict]]:
+ """Get sample data for mmdetection3d models
+
+ :param points_fname: filename of the point cloud
+ :type points_fname: str
+ :param model_cfg: model configuration
+ :type model_cfg: dict
+ :param label_fname: filename of the semantic label, defaults to None
+ :type label_fname: Optional[str], optional
+ :param name: sample name, defaults to None
+ :type name: Optional[str], optional
+ :param idx: sample numerical index, defaults to None
+ :type idx: Optional[int], optional
+ :param has_intensity: whether the point cloud has intensity values, defaults to True
+ :type has_intensity: bool, optional
+ :param measure_processing_time: whether to measure processing time, defaults to False
+ :type measure_processing_time: bool, optional
+ :return: sample data and optionally processing time
+ :rtype: Tuple[ dict, Optional[dict] ]
+ """
+ sample = {
+ "lidar_points": {
+ "lidar_path": points_fname,
+ "num_pts_feats": model_cfg.get("n_feats", 4),
+ },
+ "pts_semantic_mask_path": label_fname,
+ "sample_id": name,
+ "sample_idx": idx,
+ "num_pts_feats": model_cfg.get("n_feats", 4),
+ "lidar_path": points_fname,
+ }
+
+ n_feats = sample["num_pts_feats"]
+ load_dim = 4 if has_intensity else 3
+ transforms = [
+ LoadPointsFromFile(coord_type="LIDAR", load_dim=load_dim, use_dim=n_feats)
+ ]
+ if sample["pts_semantic_mask_path"] is not None:
+ transforms.append(
+ LoadAnnotations3D(
+ with_bbox_3d=False,
+ with_label_3d=False,
+ with_seg_3d=True,
+ seg_3d_dtype="np.uint32",
+ seg_offset=65536,
+ dataset_type="semantickitti",
+ )
+ )
+ transforms.append(
+ Pack3DDetInputs(
+ keys=["points", "pts_semantic_mask"],
+ meta_keys=["sample_idx", "lidar_path", "num_pts_feats", "sample_id"],
+ )
+ )
+
+ if measure_processing_time:
+ start = time.perf_counter()
+ transforms = Compose(transforms)
+ sample = transforms(sample)
+ if measure_processing_time:
+ end = time.perf_counter()
+ return sample, {"preprocessing": end - start}
+
+ return sample
+
+
+def inference(
+ sample: dict,
+ model: torch.nn.Module,
+ model_cfg: dict,
+ ignore_index: Optional[List[int]] = None,
+ measure_processing_time: bool = False,
+) -> Tuple[
+ Tuple[torch.Tensor, Optional[torch.Tensor], Optional[List[str]]], Optional[dict]
+]:
+ """Perform inference on a sample using an mmdetection3D model
+
+ :param sample: sample data dictionary
+ :type sample: dict
+ :param model: mmdetection3D model
+ :type model: torch.nn.Module
+ :param model_cfg: model configuration
+ :type model_cfg: dict
+ :param measure_processing_time: whether to measure processing time, defaults to False
+ :type measure_processing_time: bool, optional
+ :param ignore_index: list of class indices to ignore during inference, defaults to None
+ :type ignore_index: Optional[List[int]], optional
+ :return: predictions, labels (if available), sample names and optionally processing time
+ :rtype: Tuple[ Tuple[torch.Tensor, Optional[torch.Tensor], Optional[List[str]]], Optional[dict] ]
+ """
+ single_sample = not isinstance(sample["data_samples"], list)
+ if single_sample:
+ sample = COLLATE_FN([sample])
+
+ if measure_processing_time:
+ start = time.perf_counter()
+ sample = model.data_preprocessor(sample, training=False)
+ if measure_processing_time:
+ end = time.perf_counter()
+ processing_time = {"voxelization": end - start}
+
+ inputs, data_samples = sample["inputs"], sample["data_samples"]
+ has_labels = hasattr(data_samples[0].gt_pts_seg, "pts_semantic_mask")
+
+ if measure_processing_time:
+ torch.cuda.synchronize()
+ start = time.perf_counter()
+ outputs = model(inputs, data_samples, mode="predict")
+ if measure_processing_time:
+ torch.cuda.synchronize()
+ end = time.perf_counter()
+ processing_time["inference"] = end - start
+
+ preds, labels, names = ([], [], []) if has_labels else ([], None, None)
+ for output in outputs:
+ if ignore_index is not None:
+ output.pts_seg_logits.pts_seg_logits[ignore_index] = -1e9
+ pred = torch.argmax(output.pts_seg_logits.pts_seg_logits, dim=0)
+ preds.append(pred)
+ if has_labels:
+ labels.append(output.gt_pts_seg.pts_semantic_mask)
+ names.append(output.metainfo["sample_id"])
+ preds = torch.stack(preds, dim=0).squeeze()
+ if has_labels:
+ labels = torch.stack(labels, dim=0).squeeze()
+
+ if measure_processing_time:
+ return (preds, labels, names), processing_time
+ else:
+ return preds, labels, names
diff --git a/detectionmetrics/models/utils/o3d/__init__.py b/detectionmetrics/models/utils/o3d/__init__.py
new file mode 100644
index 00000000..945c3578
--- /dev/null
+++ b/detectionmetrics/models/utils/o3d/__init__.py
@@ -0,0 +1,216 @@
+import time
+from typing import Optional, Tuple, Union, Dict
+
+import numpy as np
+import torch
+
+try:
+ from open3d._ml3d.datasets.utils import DataProcessing
+except Exception:
+ print("Open3D-ML3D not available")
+from sklearn.neighbors import KDTree
+
+from detectionmetrics.models.utils.o3d import randlanet, kpconv
+from detectionmetrics.utils import lidar as ul
+import detectionmetrics.utils.torch as ut
+
+
+def inference(
+ sample: Tuple[np.ndarray, np.ndarray, ul.Sampler],
+ model: torch.nn.Module,
+ model_cfg: dict,
+ measure_processing_time: bool = False,
+) -> Union[
+ Tuple[torch.Tensor, Optional[torch.Tensor], Optional[str]],
+ Tuple[torch.Tensor, Optional[torch.Tensor], Optional[str], Dict[str, float]],
+]:
+ """Perform inference on a sample using an Open3D-ML model
+
+ :param sample: sample data dictionary
+ :type sample: dict
+ :param model: Open3D-ML model
+ :type model: torch.nn.Module
+ :param model_cfg: model configuration
+ :type model_cfg: dict
+ :param measure_processing_time: whether to measure processing time, defaults to False
+ :type measure_processing_time: bool, optional
+ :return: predicted labels, ground truth labels, sample name and optionally processing time
+ :rtype: Union[ Tuple[torch.Tensor, Optional[torch.Tensor], Optional[str]], Tuple[torch.Tensor, Optional[torch.Tensor], Optional[str], Dict[str, float]] ]
+ """
+ infer_complete = False
+ points, projected_indices, sampler, label, name, _ = sample
+ model_format = model_cfg["model_format"]
+ end_th = model_cfg.get("end_th", 0.5)
+
+ processing_time = {"preprocessing": 0, "inference": 0, "postprocessing": 0}
+
+ if "kpconv" in model_format:
+ transform_input = kpconv.transform_input
+ update_probs = kpconv.update_probs
+ elif "randlanet" in model_format:
+ decoder_layers = model.decoder.children()
+ model_cfg["num_layers"] = sum(1 for _ in decoder_layers)
+ transform_input = randlanet.transform_input
+ update_probs = randlanet.update_probs
+ else:
+ raise ValueError(f"Unknown model type: {model_format}")
+
+ while not infer_complete:
+ # Get model input data
+ if measure_processing_time:
+ start = time.perf_counter()
+ input_data, selected_indices = transform_input(points, model_cfg, sampler)
+ if measure_processing_time:
+ end = time.perf_counter()
+ processing_time["preprocessing"] += end - start
+
+ input_data = ut.data_to_device(input_data, model.device)
+ if "randlanet" in model_format:
+ input_data = ut.unsqueeze_data(input_data)
+
+ # Perform inference
+ with torch.no_grad():
+ if measure_processing_time:
+ torch.cuda.synchronize()
+ start = time.perf_counter()
+ pred = model(*input_data)
+ if measure_processing_time:
+ torch.cuda.synchronize()
+ end = time.perf_counter()
+ processing_time["inference"] += end - start
+
+ # TODO: check if this is consistent across different models
+ if isinstance(pred, dict):
+ pred = pred["out"]
+
+ # Update probabilities if sampler is used
+ if measure_processing_time:
+ start = time.perf_counter()
+ if sampler is not None:
+ if "kpconv" in model_format:
+ sampler.test_probs = update_probs(
+ pred,
+ selected_indices,
+ sampler.test_probs,
+ lengths=input_data[-1],
+ )
+ else:
+ sampler.test_probs = update_probs(
+ pred,
+ selected_indices,
+ sampler.test_probs,
+ model_cfg["n_classes"],
+ )
+ if sampler.p[sampler.p > end_th].shape[0] == sampler.p.shape[0]:
+ pred = sampler.test_probs[projected_indices]
+ infer_complete = True
+ else:
+ pred = pred.squeeze().cpu()[projected_indices].cuda()
+ infer_complete = True
+ if measure_processing_time:
+ end = time.perf_counter()
+ processing_time["postprocessing"] += end - start
+
+ if label is not None:
+ label = torch.from_numpy(label.astype(np.int64)).long().cuda()
+
+ result = torch.argmax(pred.squeeze(), axis=-1), label, name
+
+ # Return processing time if needed
+ if measure_processing_time:
+ return result, processing_time
+
+ return result
+
+
+def get_sample(
+ points_fname: str,
+ model_cfg: dict,
+ label_fname: Optional[str] = None,
+ name: Optional[str] = None,
+ idx: Optional[int] = None,
+ has_intensity: bool = True,
+ measure_processing_time: bool = False,
+) -> Tuple[
+ Union[
+ Tuple[np.ndarray, np.ndarray, ul.Sampler, np.ndarray, str, int],
+ Tuple[np.ndarray, np.ndarray, ul.Sampler, np.ndarray, str, int],
+ Dict[str, float],
+ ]
+]:
+ """Get sample data for mmdetection3d models
+
+ :param points_fname: filename of the point cloud
+ :type points_fname: str
+ :param model_cfg: model configuration
+ :type model_cfg: dict
+ :param label_fname: filename of the semantic label, defaults to None
+ :type label_fname: Optional[str], optional
+ :param name: sample name, defaults to None
+ :type name: Optional[str], optional
+ :param idx: sample numerical index, defaults to None
+ :type idx: Optional[int], optional
+ :param has_intensity: whether the point cloud has intensity values, defaults to True
+ :type has_intensity: bool, optional
+ :param measure_processing_time: whether to measure processing time, defaults to False
+ :type measure_processing_time: bool, optional
+ :return: sample data and optionally processing time
+ :rtype: Union[ Tuple[np.ndarray, np.ndarray, ul.Sampler, np.ndarray, str, int], Tuple[np.ndarray, np.ndarray, ul.Sampler, np.ndarray, str, int], Dict[str, float] ]
+ """
+ points = ul.read_semantickitti_points(points_fname, has_intensity)
+ label = None
+ if label_fname is not None:
+ label, _ = ul.read_semantickitti_label(label_fname)
+
+ if measure_processing_time:
+ start = time.perf_counter()
+
+ # Keep only XYZ coordinates
+ points = np.array(points[:, 0:3], dtype=np.float32)
+
+ # Subsample points using a grid of given size
+ grid_size = model_cfg.get("grid_size", 0.06)
+ sub_points = DataProcessing.grid_subsampling(points, grid_size=grid_size)
+
+ # Create search tree so that we can project points back to the original point cloud
+ search_tree = KDTree(sub_points)
+ projected_indices = np.squeeze(search_tree.query(points, return_distance=False))
+ projected_indices = projected_indices.astype(np.int32)
+
+ # Init sampler
+ sampler = None
+ if "sampler" in model_cfg:
+ sampler = ul.Sampler(
+ sub_points.shape[0],
+ search_tree,
+ model_cfg["sampler"],
+ model_cfg["n_classes"],
+ )
+
+ if measure_processing_time:
+ end = time.perf_counter()
+
+ sample = sub_points, projected_indices, sampler, label, name, idx
+
+ # Return processing time if needed
+ if measure_processing_time:
+ processing_time = {"preprocessing": end - start}
+ return sample, processing_time
+
+ return sample
+
+
+def reset_sampler(sampler: ul.Sampler, num_points: int, num_classes: int):
+ """Reset sampler object probabilities
+
+ :param sampler: Sampler object
+ :type sampler: ul.Sampler
+ :param num_points: Number of points in the point cloud
+ :type num_points: int
+ :param num_classes: Number of semantic classes
+ :type num_classes: int
+ """
+ sampler.p = np.random.rand(num_points) * 1e-3
+ sampler.min_p = float(np.min(sampler.p[-1]))
+ sampler.test_probs = np.zeros((num_points, num_classes), dtype=np.float32)
+ return sampler
diff --git a/detectionmetrics/models/torch_model_utils/o3d_kpconv.py b/detectionmetrics/models/utils/o3d/kpconv.py
similarity index 99%
rename from detectionmetrics/models/torch_model_utils/o3d_kpconv.py
rename to detectionmetrics/models/utils/o3d/kpconv.py
index 01a0ba29..00a64f28 100644
--- a/detectionmetrics/models/torch_model_utils/o3d_kpconv.py
+++ b/detectionmetrics/models/utils/o3d/kpconv.py
@@ -1,4 +1,4 @@
-from typing import List, Optional, Tuple
+from typing import List, Tuple
import numpy as np
diff --git a/detectionmetrics/models/torch_model_utils/o3d_randlanet.py b/detectionmetrics/models/utils/o3d/randlanet.py
similarity index 99%
rename from detectionmetrics/models/torch_model_utils/o3d_randlanet.py
rename to detectionmetrics/models/utils/o3d/randlanet.py
index 8caad287..1210b6a7 100644
--- a/detectionmetrics/models/torch_model_utils/o3d_randlanet.py
+++ b/detectionmetrics/models/utils/o3d/randlanet.py
@@ -109,4 +109,4 @@ def update_probs(
test_probs = torch.tensor(test_probs, device=new_probs.device)
test_probs[indices] = weight * test_probs[indices] + (1 - weight) * new_probs
- return test_probs
+ return test_probs
\ No newline at end of file
diff --git a/detectionmetrics/models/utils/sphereformer.py b/detectionmetrics/models/utils/sphereformer.py
new file mode 100644
index 00000000..226bf9f0
--- /dev/null
+++ b/detectionmetrics/models/utils/sphereformer.py
@@ -0,0 +1,204 @@
+import time
+from typing import List, Optional, Tuple
+
+import numpy as np
+import spconv.pytorch as spconv
+import torch
+from util.data_util import data_prepare
+
+import detectionmetrics.utils.torch as ut
+import detectionmetrics.utils.lidar as ul
+
+
+def collate_fn(samples: List[dict]) -> dict:
+ """Collate function for batching samples
+
+ :param samples: list of sample dictionaries
+ :type samples: List[dict]
+ :return: collated batch dictionary
+ :rtype: dict
+ """
+ coords, xyz, feats, labels, inds_recons, fnames, sample_ids = list(zip(*samples))
+ inds_recons = list(inds_recons)
+
+ accmulate_points_num = 0
+ offset = []
+ for i in range(len(coords)):
+ inds_recons[i] = accmulate_points_num + inds_recons[i]
+ accmulate_points_num += coords[i].shape[0]
+ offset.append(accmulate_points_num)
+
+ coords = torch.cat(coords)
+ xyz = torch.cat(xyz)
+ feats = torch.cat(feats)
+ if any(label is None for label in labels):
+ labels = None
+ offset = torch.IntTensor(offset)
+ inds_recons = torch.cat(inds_recons)
+
+ return (
+ coords,
+ xyz,
+ feats,
+ labels,
+ offset,
+ inds_recons,
+ list(fnames),
+ list(sample_ids),
+ )
+
+
+def get_sample(
+ points_fname: str,
+ model_cfg: dict,
+ label_fname: Optional[str] = None,
+ name: Optional[str] = None,
+ idx: Optional[int] = None,
+ has_intensity: bool = True,
+ measure_processing_time: bool = False,
+) -> Tuple[dict, Optional[dict]]:
+ """Get sample data for mmdetection3d models
+
+ :param points_fname: filename of the point cloud
+ :type points_fname: str
+ :param model_cfg: model configuration
+ :type model_cfg: dict
+ :param label_fname: filename of the semantic label, defaults to None
+ :type label_fname: Optional[str], optional
+ :param name: sample name, defaults to None
+ :type name: Optional[str], optional
+ :param idx: sample numerical index, defaults to None
+ :type idx: Optional[int], optional
+ :param has_intensity: whether the point cloud has intensity values, defaults to True
+ :type has_intensity: bool, optional
+ :param measure_processing_time: whether to measure processing time, defaults to False
+ :type measure_processing_time: bool, optional
+ :return: sample data dictionary and processing time dictionary (if measured)
+ :rtype: Tuple[dict, Optional[dict]]
+ """
+ feats = ul.read_semantickitti_points(points_fname, has_intensity)
+ feats = feats[:, : model_cfg["n_feats"]]
+
+ labels_in = None
+ if label_fname is not None:
+ annotated_data = np.fromfile(label_fname, dtype=np.uint32)
+ annotated_data = annotated_data.reshape((-1, 1))
+ labels_in = annotated_data.astype(np.uint8).reshape(-1)
+
+ if measure_processing_time:
+ start = time.perf_counter()
+
+ xyz = feats[:, :3]
+ xyz = np.clip(xyz, model_cfg["pc_range"][0], model_cfg["pc_range"][1])
+
+ coords, xyz, feats, labels, inds_reconstruct = data_prepare(
+ xyz,
+ feats,
+ labels_in,
+ "test",
+ np.array(model_cfg["voxel_size"]),
+ model_cfg["voxel_max"],
+ None,
+ model_cfg["xyz_norm"],
+ )
+
+ if measure_processing_time:
+ end = time.perf_counter()
+ processing_time = {"voxelization": end - start}
+
+ sample = (
+ coords,
+ xyz,
+ feats,
+ labels,
+ inds_reconstruct,
+ points_fname,
+ name,
+ )
+
+ if measure_processing_time:
+ return sample, processing_time
+
+ return sample
+
+
+def inference(
+ sample: dict,
+ model: torch.nn.Module,
+ model_cfg: dict,
+ ignore_index: Optional[List[int]] = None,
+ measure_processing_time: bool = False,
+) -> Tuple[Tuple[torch.Tensor, Optional[torch.Tensor], List[str]], Optional[dict]]:
+ """Perform inference on a sample using an mmdetection3D model
+
+ :param sample: sample data dictionary
+ :type sample: dict
+ :param model: mmdetection3D model
+ :type model: torch.nn.Module
+ :param model_cfg: model configuration
+ :type model_cfg: dict
+ :param measure_processing_time: whether to measure processing time, defaults to False
+ :type measure_processing_time: bool, optional
+ :param ignore_index: list of class indices to ignore during inference, defaults to None
+ :type ignore_index: Optional[List[int]], optional
+ :return: tuple of (predictions, labels, names) and processing time dictionary (if measured)
+ :rtype: Tuple[Tuple[torch.Tensor, Optional[torch.Tensor], List[str]], Optional[dict]]
+ """
+ single_sample = not isinstance(sample[-1], list)
+ if single_sample:
+ sample = collate_fn([sample])
+
+ device = next(model.parameters()).device
+ sample = ut.data_to_device(sample, device)
+
+ (
+ coord,
+ xyz,
+ feat,
+ labels,
+ offset,
+ inds_reconstruct,
+ fnames,
+ names,
+ ) = sample
+
+ if measure_processing_time:
+ start = time.perf_counter()
+
+ offset_ = offset.clone()
+ offset_[1:] = offset_[1:] - offset_[:-1]
+
+ batch = (
+ torch.cat([torch.tensor([ii] * o) for ii, o in enumerate(offset_)], 0)
+ .long()
+ .to(device)
+ )
+
+ coord = torch.cat([batch.unsqueeze(-1), coord], -1)
+ spatial_shape = np.clip((coord.max(0)[0][1:] + 1).cpu().numpy(), 128, None)
+ batch_size = len(fnames)
+
+ sinput = spconv.SparseConvTensor(feat, coord.int(), spatial_shape, batch_size)
+ if measure_processing_time:
+ end = time.perf_counter()
+ processing_time = {"preprocessing": end - start}
+ start = time.perf_counter()
+
+ if measure_processing_time:
+ torch.cuda.synchronize()
+ start = time.perf_counter()
+ preds = model(sinput, xyz, batch)
+ if measure_processing_time:
+ torch.cuda.synchronize()
+ end = time.perf_counter()
+ processing_time["inference"] = end - start
+
+ preds = preds[inds_reconstruct, :]
+ if ignore_index is not None:
+ preds[:, ignore_index] = -1e9
+ preds = torch.argmax(preds, dim=1)
+
+ if measure_processing_time:
+ return (preds, labels, names), processing_time
+
+ return preds, labels, names
diff --git a/detectionmetrics/utils/conversion.py b/detectionmetrics/utils/conversion.py
index 9cf8e9ce..1518aec9 100644
--- a/detectionmetrics/utils/conversion.py
+++ b/detectionmetrics/utils/conversion.py
@@ -57,7 +57,8 @@ def get_ontology_conversion_lut(
old_ontology: dict,
new_ontology: dict,
ontology_translation: Optional[dict] = None,
- ignored_classes: Optional[List[str]] = None,
+ classes_to_remove: Optional[List[str]] = None,
+ lut_dtype: Optional[np.dtype] = np.uint8,
) -> np.ndarray:
"""Build a LUT that links old ontology and new ontology indices. If class names
don't match between the provided ontologies, user must provide an ontology
@@ -69,18 +70,20 @@ def get_ontology_conversion_lut(
:type new_ontology: dict
:param ontology_translation: Ontology translation dictionary, defaults to None
:type ontology_translation: Optional[dict], optional
- :param ignored_classes: Classes to ignore from the old ontology, defaults to None
- :type ignored_classes: Optional[List[str]], optional
+ :param classes_to_remove: Classes to be removed from the old ontology, defaults to None
+ :type classes_to_remove: Optional[List[str]], optional
+ :param lut_dtype: Type for the ontology conversion LUT, defaults to np.uint8
+ :type lut_dtype: Optional[np.dtype], optional
:return: numpy array associating old and new ontology indices
:rtype: np.ndarray
"""
- ignored_classes = [] if ignored_classes is None else ignored_classes
+ classes_to_remove = [] if classes_to_remove is None else classes_to_remove
max_idx = max(class_data["idx"] for class_data in old_ontology.values())
- lut = np.zeros((max_idx + 1), dtype=np.uint8)
+ lut = np.zeros((max_idx + 1), dtype=lut_dtype)
if ontology_translation is not None:
- # Deleting ignored classes that exist in ontology_translation
- for class_name in ignored_classes:
+ # Deleting requested classes from ontology translation
+ for class_name in classes_to_remove:
if class_name in ontology_translation:
del ontology_translation[class_name]
@@ -91,7 +94,8 @@ def get_ontology_conversion_lut(
lut[old_class_idx] = new_class_idx
else:
old_ontology = old_ontology.copy()
- for class_name in ignored_classes: # Deleting ignored classes from old_ontology
+ # Deleting classes requested from old ontology
+ for class_name in classes_to_remove:
del old_ontology[class_name]
assert set(old_ontology.keys()) == set( # Checking ontology compatibility
new_ontology.keys()
diff --git a/detectionmetrics/utils/lidar.py b/detectionmetrics/utils/lidar.py
index 21331782..3d2b4226 100644
--- a/detectionmetrics/utils/lidar.py
+++ b/detectionmetrics/utils/lidar.py
@@ -1,6 +1,6 @@
import numpy as np
import random
-from typing import List, Optional, Tuple
+from typing import List, Optional, Tuple, Union
import open3d as o3d
from PIL import Image
@@ -13,7 +13,21 @@
"front": np.array([1, 0, 0.5], dtype=np.float32), # Camera front vector
"lookat": np.array([1, 0.0, 0.0], dtype=np.float32), # Point camera looks at
"up": np.array([-0.5, 0, 1], dtype=np.float32), # Camera up direction
- }
+ },
+ "top": {
+ "zoom": 0.025,
+ "front": np.array([0, 0, -1], dtype=np.float32), # Looking straight down
+ "lookat": np.array([1, 0.0, 0.0], dtype=np.float32), # Same target point
+ "up": np.array([0, 1, 0], dtype=np.float32), # Y axis is "up" in image
+ },
+ "side": {
+ "zoom": 0.012,
+ "front": np.array(
+ [0, -1, 0], dtype=np.float32
+ ), # Looking from positive Y toward origin
+ "lookat": np.array([1, 0.0, 0.0], dtype=np.float32), # Same target point
+ "up": np.array([0, 0, 1], dtype=np.float32), # Z axis is up
+ },
}
@@ -211,11 +225,13 @@ def view_point_cloud(points: np.ndarray, colors: np.ndarray):
def render_point_cloud(
points: np.ndarray,
colors: np.ndarray,
- camera_view: str = "3rd_person",
+ camera_view: Union[str, dict] = "3rd_person",
bg_color: Optional[List[float]] = [0.0, 0.0, 0.0, 1.0],
color_jitter: float = 0.05,
point_size: float = 3.0,
resolution: Tuple[int, int] = (1920, 1080),
+ render_origin: bool = False,
+ origin_size: float = 0.5,
) -> Image:
"""Render a given point cloud from a specific camera view and return the image
@@ -223,8 +239,8 @@ def render_point_cloud(
:type points: np.ndarray
:param colors: Colors for the point cloud data
:type colors: np.ndarray
- :param camera_view: Camera view, defaults to "3rd_person"
- :type camera_view: str, optional
+ :param camera_view: Camera view (either ID or dictionary containing camera definition), defaults to "3rd_person"
+ :type camera_view: Union[str, dict], optional
:param bg_color: Background color, defaults to black -> [0., 0., 0., 1.]
:type bg_color: Optional[List[float]], optional
:param color_jitter: Jitters the colors by a random value between [-color_jitter, color_jitter], defaults to 0.05
@@ -233,11 +249,20 @@ def render_point_cloud(
:type point_size: float, optional
:param resolution: Render resolution, defaults to (1920, 1080)
:type resolution: Tuple[int, int], optional
+ :param render_origin: Whether to render the origin axes, defaults to False
+ :type render_origin: bool, optional
+ :param origin_size: Size of the origin axes, defaults to 0.5
+ :type origin_size: float, optional
:return: Rendered point cloud
:rtype: Image
"""
- assert camera_view in CAMERA_VIEWS, f"Camera view {camera_view} not implemented"
- view_settings = CAMERA_VIEWS[camera_view]
+ if isinstance(camera_view, dict):
+ # If camera_view is a dictionary, use it directly
+ view_settings = camera_view
+ elif isinstance(camera_view, str):
+ # If camera_view is a string, look it up in predefined views
+ assert camera_view in CAMERA_VIEWS, f"Camera view {camera_view} not implemented"
+ view_settings = CAMERA_VIEWS[camera_view]
# Add color jitter if needed
if color_jitter > 0:
@@ -258,6 +283,15 @@ def render_point_cloud(
material.point_size = point_size
renderer.scene.add_geometry("point_cloud", point_cloud, material)
+ # Add origin axes for reference
+ if render_origin:
+ coord_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(
+ size=origin_size, origin=[0, 0, 0]
+ )
+ coord_material = o3d.visualization.rendering.MaterialRecord()
+ coord_material.shader = "defaultUnlit" # Also unlit for visibility
+ renderer.scene.add_geometry("coordinate_frame", coord_frame, coord_material)
+
# Set the background color
renderer.scene.set_background(bg_color)
@@ -280,3 +314,36 @@ def render_point_cloud(
renderer.scene.clear_geometry()
return image
+
+
+def read_semantickitti_points(fname: str, has_intensity: bool = True) -> np.ndarray:
+ """Read points from a binary file in SemanticKITTI format
+
+ :param fname: Binary file containing points
+ :type fname: str
+ :param has_intensity: Whether the points have intensity values, defaults to True
+ :type has_intensity: bool
+ :return: Numpy array containing points
+ :rtype: np.ndarray
+ """
+ points = np.fromfile(fname, dtype=np.float32)
+ points = points.reshape((-1, 4 if has_intensity else 3))
+ if not has_intensity:
+ empty_intensity = np.zeros((points.shape[0], 1), dtype=np.float32)
+ points = np.concatenate([points, empty_intensity], axis=1)
+ return points
+
+
+def read_semantickitti_label(fname: str) -> Tuple[np.ndarray, np.ndarray]:
+ """Read labels from a binary file in SemanticKITTI format
+
+ :param fname: Binary file containing labels
+ :type fname: str
+ :return: Numpy arrays containing semantic and instance labels
+ :rtype: Tuple[np.ndarray, np.ndarray]
+ """
+ label = np.fromfile(fname, dtype=np.uint32)
+ label = label.reshape((-1))
+ semantic_label = label & 0xFFFF
+ instance_label = label >> 16
+ return semantic_label, instance_label
diff --git a/detectionmetrics/utils/segmentation_metrics.py b/detectionmetrics/utils/segmentation_metrics.py
index a3d7ff6b..18652f35 100644
--- a/detectionmetrics/utils/segmentation_metrics.py
+++ b/detectionmetrics/utils/segmentation_metrics.py
@@ -1,6 +1,6 @@
from collections import defaultdict
import math
-from typing import Optional
+from typing import List, Optional, Union
import numpy as np
import pandas as pd
@@ -50,10 +50,6 @@ def update(
if valid_mask is not None:
mask &= valid_mask
- # Update confusion matrix
- if np.count_nonzero(gt >= 16):
- pass
-
# Update confusion matrix
new_entry = np.bincount(
self.n_classes * gt[mask].astype(int) + pred[mask].astype(int),
@@ -61,11 +57,11 @@ def update(
)
self.confusion_matrix += new_entry.reshape(self.n_classes, self.n_classes)
- def get_metric_names(self) -> list[str]:
+ def get_metric_names(self) -> List[str]:
"""Get available metric names
:return: List of available metric names
- :rtype: list[str]
+ :rtype: List[str]
"""
return self.METRIC_NAMES
@@ -77,58 +73,58 @@ def get_confusion_matrix(self) -> np.ndarray:
"""
return self.confusion_matrix
- def get_tp(self, per_class: bool = True) -> np.ndarray | int:
+ def get_tp(self, per_class: bool = True) -> Union[np.ndarray, int]:
"""True Positives
:param per_class: Return per class TP, defaults to True
:type per_class: bool, optional
:return: True Positives
- :rtype: np.ndarray | int
+ :rtype: Union[np.ndarray, int]
"""
tp = np.diag(self.confusion_matrix)
return tp if per_class else int(np.nansum(tp))
- def get_fp(self, per_class: bool = True) -> np.ndarray | int:
+ def get_fp(self, per_class: bool = True) -> Union[np.ndarray, int]:
"""False Positives
:param per_class: Return per class FP, defaults to True
:type per_class: bool, optional
:return: True Positives
- :rtype: np.ndarray | int
+ :rtype: Union[np.ndarray, int]
"""
fp = self.confusion_matrix.sum(axis=0) - np.diag(self.confusion_matrix)
return fp if per_class else int(np.nansum(fp))
- def get_fn(self, per_class: bool = True) -> np.ndarray | int:
+ def get_fn(self, per_class: bool = True) -> Union[np.ndarray, int]:
"""False negatives
:param per_class: Return per class FN, defaults to True
:type per_class: bool, optional
:return: True Positives
- :rtype: np.ndarray | int
+ :rtype: Union[np.ndarray, int]
"""
fn = self.confusion_matrix.sum(axis=1) - np.diag(self.confusion_matrix)
return fn if per_class else int(np.nansum(fn))
- def get_tn(self, per_class: bool = True) -> np.ndarray | int:
+ def get_tn(self, per_class: bool = True) -> Union[np.ndarray, int]:
"""True negatives
:param per_class: Return per class TN, defaults to True
:type per_class: bool, optional
:return: True Positives
- :rtype: np.ndarray | int
+ :rtype: Union[np.ndarray, int]
"""
total = self.confusion_matrix.sum()
tn = total - (self.get_tp() + self.get_fp() + self.get_fn())
return tn if per_class else int(np.nansum(tn))
- def get_precision(self, per_class: bool = True) -> np.ndarray | float:
+ def get_precision(self, per_class: bool = True) -> Union[np.ndarray, float]:
"""Precision = TP / (TP + FP)
:param per_class: Return per class precision, defaults to True
:type per_class: bool, optional
:return: True Positives
- :rtype: np.ndarray | int
+ :rtype: Union[np.ndarray, float]
"""
tp = self.get_tp(per_class)
fp = self.get_fp(per_class)
@@ -139,13 +135,13 @@ def get_precision(self, per_class: bool = True) -> np.ndarray | float:
else:
return np.where(denominator > 0, tp / denominator, np.nan)
- def get_recall(self, per_class: bool = True) -> np.ndarray | float:
+ def get_recall(self, per_class: bool = True) -> Union[np.ndarray, float]:
"""Recall = TP / (TP + FN)
:param per_class: Return per class recall, defaults to True
:type per_class: bool, optional
:return: True Positives
- :rtype: np.ndarray | int
+ :rtype: Union[np.ndarray, float]
"""
tp = self.get_tp(per_class)
fn = self.get_fn(per_class)
@@ -156,13 +152,13 @@ def get_recall(self, per_class: bool = True) -> np.ndarray | float:
else:
return np.where(denominator > 0, tp / denominator, np.nan)
- def get_accuracy(self, per_class: bool = True) -> np.ndarray | float:
+ def get_accuracy(self, per_class: bool = True) -> Union[np.ndarray, float]:
"""Accuracy = (TP + TN) / (TP + FP + FN + TN)
:param per_class: Return per class accuracy, defaults to True
:type per_class: bool, optional
:return: True Positives
- :rtype: np.ndarray | int
+ :rtype: Union[np.ndarray, float]
"""
tp = self.get_tp(per_class)
fp = self.get_fp(per_class)
@@ -175,13 +171,13 @@ def get_accuracy(self, per_class: bool = True) -> np.ndarray | float:
else:
return np.where(total > 0, (tp + tn) / total, np.nan)
- def get_f1_score(self, per_class: bool = True) -> np.ndarray | float:
+ def get_f1_score(self, per_class: bool = True) -> Union[np.ndarray, float]:
"""F1-score = 2 * (Precision * Recall) / (Precision + Recall)
:param per_class: Return per class F1 score, defaults to True
:type per_class: bool, optional
:return: True Positives
- :rtype: np.ndarray | int
+ :rtype: Union[np.ndarray, float]
"""
precision = self.get_precision(per_class)
recall = self.get_recall(per_class)
@@ -196,13 +192,13 @@ def get_f1_score(self, per_class: bool = True) -> np.ndarray | float:
denominator > 0, 2 * (precision * recall) / denominator, np.nan
)
- def get_iou(self, per_class: bool = True) -> np.ndarray | float:
+ def get_iou(self, per_class: bool = True) -> Union[np.ndarray, float]:
"""IoU = TP / (TP + FP + FN)
:param per_class: Return per class IoU, defaults to True
:type per_class: bool, optional
:return: True Positives
- :rtype: np.ndarray | int
+ :rtype: Union[np.ndarray, float]
"""
tp = self.get_tp(per_class)
fp = self.get_fp(per_class)
@@ -242,7 +238,7 @@ def get_averaged_metric(
def get_metric_per_name(
self, metric_name: str, per_class: bool = True
- ) -> np.ndarray | float | int:
+ ) -> Union[np.ndarray, float, int]:
"""Get metric value by name
:param metric_name: Name of the metric to compute
@@ -250,7 +246,7 @@ def get_metric_per_name(
:param per_class: Return per class metric, defaults to True
:type per_class: bool, optional
:return: Metric value
- :rtype: np.ndarray | float | int
+ :rtype: Union[np.ndarray, float, int]
"""
return getattr(self, f"get_{metric_name}")(per_class=per_class)
diff --git a/detectionmetrics/utils/torch.py b/detectionmetrics/utils/torch.py
new file mode 100644
index 00000000..ecb7a633
--- /dev/null
+++ b/detectionmetrics/utils/torch.py
@@ -0,0 +1,65 @@
+from typing import Union
+
+import torch
+
+
+def data_to_device(
+ data: Union[tuple, list], device: torch.device
+) -> Union[tuple, list]:
+ """Move provided data to given device (CPU or GPU)
+
+ :param data: Data provided (it can be a single or multiple tensors)
+ :type data: Union[tuple, list]
+ :param device: Device to move data to
+ :type device: torch.device
+ :return: Data moved to device
+ :rtype: Union[tuple, list]
+ """
+ if isinstance(data, (tuple, list)):
+ return type(data)(
+ d.to(device) if torch.is_tensor(d) else data_to_device(d, device)
+ for d in data
+ )
+ elif torch.is_tensor(data):
+ return data.to(device)
+ else:
+ return data
+
+
+def get_data_shape(data: Union[tuple, list]) -> Union[tuple, list]:
+ """Get the shape of the provided data
+
+ :param data: Data provided (it can be a single or multiple tensors)
+ :type data: Union[tuple, list]
+ :return: Data shape
+ :rtype: Union[tuple, list]
+ """
+ if isinstance(data, (tuple, list)):
+ return type(data)(
+ tuple(d.shape) if torch.is_tensor(d) else get_data_shape(d) for d in data
+ )
+ elif torch.is_tensor(data):
+ return tuple(data.shape)
+ else:
+ return tuple(data.shape)
+
+
+def unsqueeze_data(data: Union[tuple, list], dim: int = 0) -> Union[tuple, list]:
+ """Unsqueeze provided data along given dimension
+
+ :param data: Data provided (it can be a single or multiple tensors)
+ :type data: Union[tuple, list]
+ :param dim: Dimension that will be unsqueezed, defaults to 0
+ :type dim: int, optional
+ :return: Unsqueezed data
+ :rtype: Union[tuple, list]
+ """
+ if isinstance(data, (tuple, list)):
+ return type(data)(
+ d.unsqueeze(dim) if torch.is_tensor(d) else unsqueeze_data(d, dim)
+ for d in data
+ )
+ elif torch.is_tensor(data):
+ return data.unsqueeze(dim)
+ else:
+ return data
diff --git a/docs/_pages/home.md b/docs/_pages/home.md
index 6ebb9570..c44f7b39 100644
--- a/docs/_pages/home.md
+++ b/docs/_pages/home.md
@@ -49,8 +49,8 @@ Now, we're excited to introduce ***DetectionMetrics v2***! While retaining the f
| LiDAR |
- Rellis3D, GOOSE, custom GAIA format |
- PyTorch (tested with RandLA-Net and KPConv from Open3D-ML) |
+ Rellis3D, GOOSE, WildScenes, custom GAIA format |
+ PyTorch (tested with Open3D-ML, mmdetection3d, SphereFormer, and LSK3DNet models) |
| Object detection |
Image |
diff --git a/docs/_pages/v2/compatibility.md b/docs/_pages/v2/compatibility.md
index 59b127cb..6db61a15 100644
--- a/docs/_pages/v2/compatibility.md
+++ b/docs/_pages/v2/compatibility.md
@@ -9,8 +9,10 @@ sidebar:
## Image semantic segmentation
- Datasets:
+ - **[RUGD](http://rugd.vision/)**
- **[Rellis3D](https://www.unmannedlab.org/research/RELLIS-3D)**
- **[GOOSE](https://goose-dataset.de/)**
+ - **[WildScenes](https://csiro-robotics.github.io/WildScenes/)**
- **Custom GAIA format**: *Parquet* file containing samples and labels relative paths and a JSON file with the dataset ontology.
- **Generic**: simply assumes a different directory per split, different suffixes for samples and labels, and a JSON file containing the dataset ontology.
- Models:
@@ -52,36 +54,115 @@ sidebar:
- Datasets:
- **[Rellis3D](https://www.unmannedlab.org/research/RELLIS-3D)**
- **[GOOSE](https://goose-dataset.de/)**
+ - **[WildScenes](https://csiro-robotics.github.io/WildScenes/)**
- **Custom GAIA format**: *Parquet* file containing samples and labels relative paths and a JSON file with the dataset ontology.
- **Generic**: simply assumes a different directory per split, different suffixes for samples and labels, and a JSON file containing the dataset ontology.
- Models:
- - **PyTorch ([TorchScript](https://pytorch.org/docs/stable/jit.html) compiled format and native modules)**. As of now, we have tested RandLA-Net and KPConv from [Open3D-ML](https://github.com/isl-org/Open3D-ML).
+ - **PyTorch ([TorchScript](https://pytorch.org/docs/stable/jit.html) compiled format and native modules)**. As of now, we have tested Open3D-ML, mmdetection3d, SphereFormer, and LSK3DNet models.
- Input shape: defined by the `input_format` tag.
- Output shape: `(num_points)`
- - JSON configuration file format:
+ - JSON configuration file format examples (different depending on the model):
```json
{
- "seed": 42,
- "input_format": "o3d_randlanet",
+ "model_format": <"o3d_randlanet" | "o3d_kpconv" | "mmdet3d" | "sphereformer" | "lsk3dnet">,
+ "n_feats": <3|4>, // without/with intensity
+ "seed": ,
+ // -- EXTRA PARAMETERS PER MODEL (EXAMPLES) --
+ // o3d kpconv
"sampler": "spatially_regular",
+ "min_in_points": 10000,
+ "max_in_points": 20000,
+ "in_radius": 4.0,
"recenter": {
"dims": [
0,
- 1
+ 1,
+ 2
]
},
- "ignored_classes": [
- "void"
+ "first_subsampling_dl": 0.075,
+ "conv_radius": 2.5,
+ "architecture": [
+ "simple",
+ "resnetb",
+ "resnetb_strided",
+ "resnetb",
+ "resnetb",
+ "resnetb_strided",
+ "resnetb",
+ "resnetb",
+ "resnetb_strided",
+ "resnetb",
+ "resnetb",
+ "resnetb_strided",
+ "resnetb",
+ "nearest_upsample",
+ "unary",
+ "nearest_upsample",
+ "unary",
+ "nearest_upsample",
+ "unary",
+ "nearest_upsample",
+ "unary"
],
+ "num_layers": 5,
+ "num_points": 45056,
+ "grid_size": 0.075,
+ "num_neighbors": 16,
+ "sub_sampling_ratio": [
+ 4,
+ 4,
+ 4,
+ 4
+ ],
+ // o3d randlanet
+ "sampler": "spatially_regular",
+ "recenter": {
+ "dims": [
+ 0,
+ 1
+ ]
+ },
"num_points": 45056,
- "grid_size": 0.06,
+ "grid_size": 0.075,
"num_neighbors": 16,
"sub_sampling_ratio": [
4,
4,
4,
4
+ ],
+ // sphereformer
+ "voxel_size": [
+ 0.05,
+ 0.05,
+ 0.05
+ ],
+ "voxel_max": 120000,
+ "pc_range": [
+ [
+ -22,
+ -17,
+ -4
+ ],
+ [
+ 30,
+ 18,
+ 13
+ ]
+ ],
+ "xyz_norm": false,
+ // lsk3dnet
+ "min_volume_space": [
+ -120,
+ -120,
+ -6
+ ],
+ "max_volume_space": [
+ 120,
+ 120,
+ 11
]
}
```
diff --git a/docs/_pages/v2/usage.md b/docs/_pages/v2/usage.md
index 689306a4..d2dcdc46 100644
--- a/docs/_pages/v2/usage.md
+++ b/docs/_pages/v2/usage.md
@@ -67,7 +67,7 @@ Usage: dm_evaluate [OPTIONS] {segmentation|detection} {image|lidar}
Evaluate model on dataset
Options:
- --model_format [torch|tensorflow|tensorflow_explicit]
+ --model_format [torch|tensorflow]
Trained model format [default: torch]
--model PATH Trained model filename (TorchScript) or
directory (TensorFlow SavedModel)
diff --git a/examples/gaia_image.py b/examples/gaia_image.py
index 1718aca1..4a8f3a88 100644
--- a/examples/gaia_image.py
+++ b/examples/gaia_image.py
@@ -1,4 +1,5 @@
import argparse
+import json
from detectionmetrics.datasets.gaia import GaiaImageSegmentationDataset
@@ -13,6 +14,16 @@ def parse_args() -> argparse.Namespace:
parser.add_argument(
"--dataset", type=str, required=True, help="Parquet dataset file"
)
+ parser.add_argument(
+ "--new_ontology",
+ type=str,
+ help="New ontology JSON file name",
+ )
+ parser.add_argument(
+ "--ontology_translation",
+ type=str,
+ help="Ontology translation JSON file name",
+ )
parser.add_argument(
"--outdir",
type=str,
@@ -41,11 +52,26 @@ def main():
"""Main function"""
args = parse_args()
+ new_ontology, ontology_translation = None, None
+ if args.new_ontology is not None:
+ with open(args.new_ontology, "r", encoding="utf-8") as f:
+ new_ontology = json.load(f)
+
+ if args.ontology_translation is not None:
+ with open(args.ontology_translation, "r", encoding="utf-8") as f:
+ ontology_translation = json.load(f)
+
dataset = GaiaImageSegmentationDataset(dataset_fname=args.dataset)
if args.split:
dataset.dataset = dataset.dataset[dataset.dataset["split"] == args.split]
dataset.has_label_count = False
- dataset.export(outdir=args.outdir, resize=args.resize)
+
+ dataset.export(
+ outdir=args.outdir,
+ resize=args.resize,
+ new_ontology=new_ontology,
+ ontology_translation=ontology_translation,
+ )
if __name__ == "__main__":
diff --git a/examples/gaia_lidar.py b/examples/gaia_lidar.py
index b5e1d8a3..4280bb36 100644
--- a/examples/gaia_lidar.py
+++ b/examples/gaia_lidar.py
@@ -1,4 +1,5 @@
import argparse
+import json
from detectionmetrics.datasets.gaia import GaiaLiDARSegmentationDataset
@@ -13,6 +14,23 @@ def parse_args() -> argparse.Namespace:
parser.add_argument(
"--dataset", type=str, required=True, help="Parquet dataset file"
)
+ parser.add_argument(
+ "--new_ontology",
+ type=str,
+ help="New ontology JSON file name",
+ )
+ parser.add_argument(
+ "--ontology_translation",
+ type=str,
+ help="Ontology translation JSON file name",
+ )
+ parser.add_argument(
+ "--outdir",
+ type=str,
+ required=True,
+ help="Directory where dataset will be stored in common format",
+ )
+
return parser.parse_args()
@@ -20,7 +38,22 @@ def main():
"""Main function"""
args = parse_args()
- GaiaLiDARSegmentationDataset(dataset_fname=args.dataset)
+ new_ontology, ontology_translation = None, None
+ if args.new_ontology is not None:
+ with open(args.new_ontology, "r", encoding="utf-8") as f:
+ new_ontology = json.load(f)
+
+ if args.ontology_translation is not None:
+ with open(args.ontology_translation, "r", encoding="utf-8") as f:
+ ontology_translation = json.load(f)
+
+ dataset = GaiaLiDARSegmentationDataset(dataset_fname=args.dataset)
+
+ dataset.export(
+ args.outdir,
+ new_ontology=new_ontology,
+ ontology_translation=ontology_translation,
+ )
if __name__ == "__main__":
diff --git a/examples/goose_lidar.py b/examples/goose_lidar.py
index 0ecc9693..3f860663 100644
--- a/examples/goose_lidar.py
+++ b/examples/goose_lidar.py
@@ -1,4 +1,5 @@
import argparse
+import json
from detectionmetrics.datasets.goose import GOOSELiDARSegmentationDataset
@@ -26,6 +27,16 @@ def parse_args() -> argparse.Namespace:
type=str,
help="Directory where test dataset split is stored",
)
+ parser.add_argument(
+ "--new_ontology",
+ type=str,
+ help="New ontology JSON file name",
+ )
+ parser.add_argument(
+ "--ontology_translation",
+ type=str,
+ help="Ontology translation JSON file name",
+ )
parser.add_argument(
"--outdir",
type=str,
@@ -40,12 +51,25 @@ def main():
"""Main function"""
args = parse_args()
+ new_ontology, ontology_translation = None, None
+ if args.new_ontology is not None:
+ with open(args.new_ontology, "r", encoding="utf-8") as f:
+ new_ontology = json.load(f)
+
+ if args.ontology_translation is not None:
+ with open(args.ontology_translation, "r", encoding="utf-8") as f:
+ ontology_translation = json.load(f)
+
dataset = GOOSELiDARSegmentationDataset(
train_dataset_dir=args.train_dataset_dir,
val_dataset_dir=args.val_dataset_dir,
test_dataset_dir=args.test_dataset_dir,
)
- dataset.export(args.outdir)
+ dataset.export(
+ args.outdir,
+ new_ontology=new_ontology,
+ ontology_translation=ontology_translation,
+ )
if __name__ == "__main__":
diff --git a/examples/merge_datasets.py b/examples/merge_datasets.py
index 87ce0243..cf9201a3 100644
--- a/examples/merge_datasets.py
+++ b/examples/merge_datasets.py
@@ -1,6 +1,6 @@
import argparse
-from detectionmetrics.datasets.gaia import GaiaImageSegmentationDataset
+from detectionmetrics.datasets.gaia import GaiaImageSegmentationDataset, GaiaLiDARSegmentationDataset
def parse_args() -> argparse.Namespace:
@@ -23,6 +23,13 @@ def parse_args() -> argparse.Namespace:
required=True,
help="Directory where merged dataset will be stored",
)
+ parser.add_argument(
+ "--dataset_type",
+ type=str,
+ choices=["image", "lidar"],
+ required=True,
+ help="Type of datasets to merge",
+ )
return parser.parse_args()
@@ -31,7 +38,14 @@ def main():
"""Main function"""
args = parse_args()
- datasets = [GaiaImageSegmentationDataset(fname) for fname in args.datasets]
+ if args.dataset_type == "image":
+ dataset_class = GaiaImageSegmentationDataset
+ elif args.dataset_type == "lidar":
+ dataset_class = GaiaLiDARSegmentationDataset
+ else:
+ raise ValueError(f"Unknown dataset type: {args.dataset_type}")
+
+ datasets = [dataset_class(fname) for fname in args.datasets]
main_dataset = datasets[0]
for extra_dataset in datasets[1:]:
main_dataset.append(extra_dataset)
diff --git a/examples/rellis3d_lidar.py b/examples/rellis3d_lidar.py
index a5a1cc93..cb5bf48e 100644
--- a/examples/rellis3d_lidar.py
+++ b/examples/rellis3d_lidar.py
@@ -1,4 +1,5 @@
import argparse
+import json
from detectionmetrics.datasets.rellis3d import Rellis3DLiDARSegmentationDataset
@@ -28,6 +29,16 @@ def parse_args() -> argparse.Namespace:
required=True,
help="YAML file containing dataset ontology",
)
+ parser.add_argument(
+ "--new_ontology",
+ type=str,
+ help="New ontology JSON file name",
+ )
+ parser.add_argument(
+ "--ontology_translation",
+ type=str,
+ help="Ontology translation JSON file name",
+ )
parser.add_argument(
"--outdir",
type=str,
@@ -42,12 +53,25 @@ def main():
"""Main function"""
args = parse_args()
+ new_ontology, ontology_translation = None, None
+ if args.new_ontology is not None:
+ with open(args.new_ontology, "r", encoding="utf-8") as f:
+ new_ontology = json.load(f)
+
+ if args.ontology_translation is not None:
+ with open(args.ontology_translation, "r", encoding="utf-8") as f:
+ ontology_translation = json.load(f)
+
dataset = Rellis3DLiDARSegmentationDataset(
dataset_dir=args.dataset_dir,
split_dir=args.split_dir,
ontology_fname=args.ontology_fname,
)
- dataset.export(args.outdir)
+ dataset.export(
+ outdir=args.outdir,
+ new_ontology=new_ontology,
+ ontology_translation=ontology_translation,
+ )
if __name__ == "__main__":
diff --git a/examples/store_image_video.py b/examples/store_image_video.py
index 7e25d52e..20ab3694 100644
--- a/examples/store_image_video.py
+++ b/examples/store_image_video.py
@@ -104,7 +104,7 @@ def main():
if model is not None:
image = Image.open(sample_data["image"])
- label = model.inference(image)
+ label = model.predict(image)
lut = uc.ontology_to_rgb_lut(model.ontology)
else:
label = Image.open(sample_data["label"])
diff --git a/examples/store_lidar_video.py b/examples/store_lidar_video.py
index e280eadf..eb66ee4e 100644
--- a/examples/store_lidar_video.py
+++ b/examples/store_lidar_video.py
@@ -112,10 +112,10 @@ def main():
point_cloud = dataset.read_points(sample_data["points"])
if model is not None:
- label = model.inference(point_cloud)
+ label = model.predict(point_cloud)
lut = uc.ontology_to_rgb_lut(model.ontology)
else:
- label, _ = dataset.read_label(sample_data["label"])
+ label = dataset.read_label(sample_data["label"])
lut = uc.ontology_to_rgb_lut(dataset.ontology)
colors = lut[label] / 255.0
diff --git a/examples/tensorflow_image.py b/examples/tensorflow_image.py
index 058c2928..024910ec 100644
--- a/examples/tensorflow_image.py
+++ b/examples/tensorflow_image.py
@@ -73,7 +73,7 @@ def main():
if args.image is not None:
image = Image.open(args.image).convert("RGB")
- result = model.inference(image)
+ result = model.predict(image)
result = uc.label_to_rgb(result, model.ontology)
result.show()
diff --git a/examples/torch_image.py b/examples/torch_image.py
index aeca7cb1..6410136c 100644
--- a/examples/torch_image.py
+++ b/examples/torch_image.py
@@ -73,7 +73,7 @@ def main():
if args.image is not None:
image = Image.open(args.image).convert("RGB")
- result = model.inference(image)
+ result = model.predict(image)
result = uc.label_to_rgb(result, model.ontology)
result.show()
diff --git a/examples/torch_lidar.py b/examples/torch_lidar.py
index 0dfab360..e6351b64 100644
--- a/examples/torch_lidar.py
+++ b/examples/torch_lidar.py
@@ -57,6 +57,13 @@ def parse_args() -> argparse.Namespace:
required=False,
help="JSON file containing translation between dataset and model classes",
)
+ parser.add_argument(
+ "--translation_direction",
+ type=str,
+ choices=["dataset_to_model", "model_to_dataset"],
+ default="dataset_to_model",
+ help="Direction of the ontology translation",
+ )
parser.add_argument(
"--predictions_outdir",
type=str,
@@ -75,16 +82,17 @@ def main():
dataset = GaiaLiDARSegmentationDataset(args.dataset)
if args.point_cloud is not None:
- point_cloud = dataset.read_points(args.point_cloud)
- result = model.inference(point_cloud)
+ result = model.predict(args.point_cloud)
lut = uc.ontology_to_rgb_lut(model.ontology)
colors = lut[result] / 255.0
+ point_cloud = dataset.read_points(args.point_cloud)
ul.view_point_cloud(point_cloud[:, :3], colors)
results = model.eval(
dataset,
split=args.split,
ontology_translation=args.ontology_translation,
+ translation_direction=args.translation_direction,
predictions_outdir=args.predictions_outdir,
results_per_sample=args.predictions_outdir is not None,
)
diff --git a/examples/torch_native_image.py b/examples/torch_native_image.py
index ab590098..f74c64c7 100644
--- a/examples/torch_native_image.py
+++ b/examples/torch_native_image.py
@@ -79,7 +79,7 @@ def main():
if args.image is not None:
image = Image.open(args.image).convert("RGB")
- result = model.inference(image)
+ result = model.predict(image)
result = uc.label_to_rgb(result, model.ontology)
result.show()
diff --git a/examples/tutorial_image_segmentation.ipynb b/examples/tutorial_image_segmentation.ipynb
index a170caae..274eb651 100644
--- a/examples/tutorial_image_segmentation.ipynb
+++ b/examples/tutorial_image_segmentation.ipynb
@@ -130,7 +130,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -158,7 +158,7 @@
"label = Image.open(label_fname)\n",
"label = uc.label_to_rgb(label, dataset.ontology)\n",
"\n",
- "pred = model.inference(image)\n",
+ "pred = model.predict(image)\n",
"pred = uc.label_to_rgb(pred, model.ontology)\n",
"pred = pred.resize(label.size)\n",
"\n",
diff --git a/pyproject.toml b/pyproject.toml
index cfe250bd..3251c6c6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ license = "LICENSE"
[tool.poetry.dependencies]
python = "^3.10"
-tqdm = "^4.67.0"
+tqdm = "^4.65.0"
pandas = "^2.2.3"
PyYAML = "^6.0.2"
pyarrow = "^18.0.0"
@@ -18,7 +18,7 @@ opencv-python-headless = "^4.10.0.84"
scikit-learn = "^1.6.0"
open3d = "^0.19.0"
addict = "^2.4.0"
-matplotlib = "^3.10.0"
+matplotlib = "^3.6.0"
click = "^8.1.8"
tensorboard = "^2.18.0"
pycocotools = { version = "^2.0.7", markers = "sys_platform != 'win32'" }