From e2c0e209cb427fc878359b2100979180572f8beb Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sat, 23 May 2026 10:09:41 +0200 Subject: [PATCH] Add rag label accumulation --- MIGRATION_GUIDE.md | 42 +++ .../bioimage_cpp/graph/label_accumulation.hxx | 139 ++++++++++ src/bindings/graph.cxx | 95 +++++++ src/bioimage_cpp/graph/features/__init__.py | 119 +++++++++ tests/graph/test_rag_accumulate_labels.py | 245 ++++++++++++++++++ 5 files changed, 640 insertions(+) create mode 100644 include/bioimage_cpp/graph/label_accumulation.hxx create mode 100644 tests/graph/test_rag_accumulate_labels.py diff --git a/MIGRATION_GUIDE.md b/MIGRATION_GUIDE.md index c88ca9d..e01c12e 100644 --- a/MIGRATION_GUIDE.md +++ b/MIGRATION_GUIDE.md @@ -1385,6 +1385,48 @@ Notes: - `number_of_threads=0` uses the library default; pass a positive integer for a fixed thread count. +### Accumulating Labels on a RAG + +Nifty's `gridRagAccumulateLabels` projects a second label volume onto a RAG +by taking a per-node majority vote (commonly used to project a ground-truth +segmentation onto an over-segmentation). + +Nifty: + +```python +import nifty.graph.rag as nrag + +rag = nrag.gridRag(labels) +node_labels = nrag.gridRagAccumulateLabels(rag, gt) +# ignore label 0 in the ground truth (nifty's "ignoreBackground"): +node_labels = nrag.gridRagAccumulateLabels(rag, gt, ignoreBackground=True) +``` + +bioimage-cpp: + +```python +rag = bic.graph.region_adjacency_graph(labels) +node_labels = bic.graph.features.accumulate_labels(rag, labels, gt) +# arbitrary ignore value (covers nifty's ignoreBackground=True by passing 0): +node_labels = bic.graph.features.accumulate_labels( + rag, labels, gt, ignore_value=0 +) +``` + +Notes: + +- `labels` must be the over-segmentation used to construct `rag`. +- `other_labels` must have the same shape as `labels`. Supported dtypes for + both arrays: `uint32`, `uint64`, `int32`, `int64`; they may differ. +- The output has length `rag.number_of_nodes` and the same dtype as + `other_labels`. Nodes whose pixels are all ignored receive `0`. +- Ties in the majority vote are broken by smaller label id (deterministic). + Nifty's tie-breaking depends on `std::unordered_map` iteration order and + is therefore platform-dependent; `bic` resolves ties deterministically. +- `ignore_value` is more general than nifty's boolean `ignoreBackground`: + pass `0` to reproduce `ignoreBackground=True`, or any other value to skip + arbitrary sentinels (e.g. `255` or `-1`). + ### RAG Boundary and Affinity Features Nifty has RAG feature helpers such as `accumulateEdgeMeanAndLength`, diff --git a/include/bioimage_cpp/graph/label_accumulation.hxx b/include/bioimage_cpp/graph/label_accumulation.hxx new file mode 100644 index 0000000..cb19b5e --- /dev/null +++ b/include/bioimage_cpp/graph/label_accumulation.hxx @@ -0,0 +1,139 @@ +#pragma once + +#include "bioimage_cpp/array_view.hxx" +#include "bioimage_cpp/detail/threading.hxx" +#include "bioimage_cpp/graph/node_label_projection.hxx" +#include "bioimage_cpp/graph/region_adjacency_graph.hxx" + +#include +#include +#include +#include +#include +#include +#include + +namespace bioimage_cpp::graph { + +namespace detail_label_accumulation { + +inline std::size_t number_of_pixels(const std::vector &shape) { + return static_cast(std::accumulate( + shape.begin(), + shape.end(), + std::ptrdiff_t{1}, + [](const std::ptrdiff_t a, const std::ptrdiff_t b) { return a * b; } + )); +} + +template +void scan_chunk( + const LabelT *labels, + const OtherT *other_labels, + const std::size_t pixel_begin, + const std::size_t pixel_end, + const bool has_ignore_value, + const OtherT ignore_value, + std::vector> &histograms +) { + for (std::size_t index = pixel_begin; index < pixel_end; ++index) { + const auto other = other_labels[index]; + if (has_ignore_value && other == ignore_value) { + continue; + } + const auto node = detail::checked_label_to_node(labels[index]); + ++histograms[static_cast(node)][other]; + } +} + +} // namespace detail_label_accumulation + +template +void accumulate_labels( + const RegionAdjacencyGraph &rag, + const ConstArrayView &labels, + const ConstArrayView &other_labels, + const bool has_ignore_value, + const OtherT ignore_value, + const std::size_t number_of_threads, + const ArrayView &out +) { + if (labels.ndim() != 2 && labels.ndim() != 3) { + throw std::invalid_argument( + "labels must be a 2D or 3D array, got ndim=" + + std::to_string(labels.ndim()) + ); + } + detail_projection::require_rag_shape_matches_labels(rag, labels.shape); + if (other_labels.shape != labels.shape) { + throw std::invalid_argument("other_labels shape must match labels shape"); + } + const auto number_of_nodes = static_cast(rag.number_of_nodes()); + if (out.shape != std::vector{static_cast(number_of_nodes)}) { + throw std::invalid_argument( + "out shape must be (number_of_nodes,)" + ); + } + if (detail::max_label(labels) >= static_cast(number_of_nodes)) { + throw std::invalid_argument("labels contain a node id outside the rag"); + } + + const auto n_pixels = detail_label_accumulation::number_of_pixels(labels.shape); + const auto n_threads = detail::normalize_thread_count(number_of_threads, n_pixels); + + std::vector>> per_thread( + n_threads, + std::vector>(number_of_nodes) + ); + + bioimage_cpp::detail::parallel_for_chunks( + n_threads, + n_pixels, + [&](const std::size_t thread_id, const std::size_t begin, const std::size_t end) { + detail_label_accumulation::scan_chunk( + labels.data, + other_labels.data, + begin, + end, + has_ignore_value, + ignore_value, + per_thread[thread_id] + ); + } + ); + + // Merge per-thread histograms and pick majority per node in one pass over + // nodes. This is embarrassingly parallel across nodes. + const auto node_threads = detail::normalize_thread_count(number_of_threads, number_of_nodes); + bioimage_cpp::detail::parallel_for_chunks( + node_threads, + number_of_nodes, + [&](const std::size_t, const std::size_t node_begin, const std::size_t node_end) { + for (std::size_t node = node_begin; node < node_end; ++node) { + std::unordered_map merged; + for (auto &thread_histograms : per_thread) { + auto &node_hist = thread_histograms[node]; + for (const auto &entry : node_hist) { + merged[entry.first] += entry.second; + } + node_hist.clear(); + } + OtherT best_label = OtherT{0}; + std::uint64_t best_count = 0; + bool has_best = false; + for (const auto &entry : merged) { + if (!has_best || + entry.second > best_count || + (entry.second == best_count && entry.first < best_label)) { + best_label = entry.first; + best_count = entry.second; + has_best = true; + } + } + out.data[node] = has_best ? best_label : OtherT{0}; + } + } + ); +} + +} // namespace bioimage_cpp::graph diff --git a/src/bindings/graph.cxx b/src/bindings/graph.cxx index be65bdf..e18b4a8 100644 --- a/src/bindings/graph.cxx +++ b/src/bindings/graph.cxx @@ -7,6 +7,7 @@ #include "bioimage_cpp/graph/feature_accumulation.hxx" #include "bioimage_cpp/graph/grid_features.hxx" #include "bioimage_cpp/graph/grid_graph.hxx" +#include "bioimage_cpp/graph/label_accumulation.hxx" #include "bioimage_cpp/graph/lifted_from_affinities.hxx" #include "bioimage_cpp/graph/lifted_multicut.hxx" #include "bioimage_cpp/graph/lifted_multicut/fusion_move.hxx" @@ -113,6 +114,20 @@ DoubleArray make_double_array(const std::vector &shape) { return DoubleArray(data, shape.size(), shape.data(), owner); } +template +using TypedArray = nb::ndarray; + +template +TypedArray make_typed_array(const std::vector &shape) { + std::size_t size = 1; + for (const auto axis_size : shape) { + size *= axis_size; + } + auto *data = new T[size](); + nb::capsule owner(data, [](void *p) noexcept { delete[] static_cast(p); }); + return TypedArray(data, shape.size(), shape.data(), owner); +} + template FloatingArray make_floating_array(const std::vector &shape) { std::size_t size = 1; @@ -1245,6 +1260,55 @@ UInt64Array project_node_labels_to_pixels_t( return result; } +template +TypedArray accumulate_labels_t( + const Rag &rag, + LabelArray labels, + LabelArray other_labels, + const bool has_ignore_value, + const OtherT ignore_value, + const std::size_t number_of_threads +) { + if (labels.ndim() != other_labels.ndim()) { + throw std::invalid_argument("other_labels shape must match labels shape"); + } + for (std::size_t axis = 0; axis < labels.ndim(); ++axis) { + if (labels.shape(axis) != other_labels.shape(axis)) { + throw std::invalid_argument("other_labels shape must match labels shape"); + } + } + + auto result = make_typed_array({static_cast(rag.number_of_nodes())}); + + ConstArrayView labels_view{ + labels.data(), + ndarray_shape(labels), + {}, + }; + ConstArrayView other_labels_view{ + other_labels.data(), + ndarray_shape(other_labels), + {}, + }; + ArrayView out_view{ + result.data(), + {static_cast(rag.number_of_nodes())}, + {}, + }; + + nb::gil_scoped_release release; + graph::accumulate_labels( + rag, + labels_view, + other_labels_view, + has_ignore_value, + ignore_value, + number_of_threads, + out_view + ); + return result; +} + } // namespace void bind_graph(nb::module_ &m) { @@ -1889,6 +1953,37 @@ void bind_graph(nb::module_ &m) { nb::arg("node_labels"), nb::arg("number_of_threads") ); + +#define BIC_BIND_ACCUMULATE_LABELS(LSUF, LT, OSUF, OT) \ + m.def( \ + "_accumulate_labels_" #LSUF "_" #OSUF, \ + &accumulate_labels_t, \ + nb::arg("rag"), \ + nb::arg("labels"), \ + nb::arg("other_labels"), \ + nb::arg("has_ignore_value"), \ + nb::arg("ignore_value"), \ + nb::arg("number_of_threads") \ + ) + + BIC_BIND_ACCUMULATE_LABELS(uint32, std::uint32_t, uint32, std::uint32_t); + BIC_BIND_ACCUMULATE_LABELS(uint32, std::uint32_t, uint64, std::uint64_t); + BIC_BIND_ACCUMULATE_LABELS(uint32, std::uint32_t, int32, std::int32_t); + BIC_BIND_ACCUMULATE_LABELS(uint32, std::uint32_t, int64, std::int64_t); + BIC_BIND_ACCUMULATE_LABELS(uint64, std::uint64_t, uint32, std::uint32_t); + BIC_BIND_ACCUMULATE_LABELS(uint64, std::uint64_t, uint64, std::uint64_t); + BIC_BIND_ACCUMULATE_LABELS(uint64, std::uint64_t, int32, std::int32_t); + BIC_BIND_ACCUMULATE_LABELS(uint64, std::uint64_t, int64, std::int64_t); + BIC_BIND_ACCUMULATE_LABELS(int32, std::int32_t, uint32, std::uint32_t); + BIC_BIND_ACCUMULATE_LABELS(int32, std::int32_t, uint64, std::uint64_t); + BIC_BIND_ACCUMULATE_LABELS(int32, std::int32_t, int32, std::int32_t); + BIC_BIND_ACCUMULATE_LABELS(int32, std::int32_t, int64, std::int64_t); + BIC_BIND_ACCUMULATE_LABELS(int64, std::int64_t, uint32, std::uint32_t); + BIC_BIND_ACCUMULATE_LABELS(int64, std::int64_t, uint64, std::uint64_t); + BIC_BIND_ACCUMULATE_LABELS(int64, std::int64_t, int32, std::int32_t); + BIC_BIND_ACCUMULATE_LABELS(int64, std::int64_t, int64, std::int64_t); + +#undef BIC_BIND_ACCUMULATE_LABELS } } // namespace bioimage_cpp::bindings diff --git a/src/bioimage_cpp/graph/features/__init__.py b/src/bioimage_cpp/graph/features/__init__.py index d3ca078..9e4e0b6 100644 --- a/src/bioimage_cpp/graph/features/__init__.py +++ b/src/bioimage_cpp/graph/features/__init__.py @@ -13,6 +13,8 @@ :func:`grid_affinity_features_with_lifted` — weights for nearest-neighbor grid graphs (and optional long-range edges) computed directly from boundary maps / affinity channels. +- :func:`accumulate_labels` — majority-vote of a second label volume per + RAG node (equivalent to nifty's ``gridRagAccumulateLabels``). """ from __future__ import annotations @@ -96,6 +98,34 @@ } +_LABEL_DTYPES = ( + np.dtype("uint32"), + np.dtype("uint64"), + np.dtype("int32"), + np.dtype("int64"), +) + + +def _accumulate_labels_dispatch(): + suffix = { + np.dtype("uint32"): "uint32", + np.dtype("uint64"): "uint64", + np.dtype("int32"): "int32", + np.dtype("int64"): "int64", + } + dispatch = {} + for labels_dtype in _LABEL_DTYPES: + for other_dtype in _LABEL_DTYPES: + name = ( + f"_accumulate_labels_{suffix[labels_dtype]}_{suffix[other_dtype]}" + ) + dispatch[(labels_dtype, other_dtype)] = getattr(_core, name) + return dispatch + + +_ACCUMULATE_LABELS_DISPATCH = _accumulate_labels_dispatch() + + def edge_map_features( rag, labels: np.ndarray, @@ -354,6 +384,94 @@ def grid_affinity_features_with_lifted( ) +def accumulate_labels( + rag, + labels: np.ndarray, + other_labels: np.ndarray, + *, + ignore_value: int | None = None, + number_of_threads: int = 0, +) -> np.ndarray: + """Majority-vote of ``other_labels`` per RAG node. + + For each node in ``rag``, returns the most frequent value of + ``other_labels`` across the pixels assigned to that node by the + over-segmentation ``labels``. This is the equivalent of nifty's + ``gridRagAccumulateLabels``. + + Parameters + ---------- + rag: + :class:`bioimage_cpp.graph.RegionAdjacencyGraph` built from ``labels``. + labels: + 2D or 3D over-segmentation used to construct ``rag``. Supported + dtypes: ``uint32``, ``uint64``, ``int32``, ``int64``. + other_labels: + Secondary label volume with the same shape as ``labels``. Supported + dtypes: ``uint32``, ``uint64``, ``int32``, ``int64``. + ignore_value: + Optional. Pixels where ``other_labels`` equals this value are + excluded from the histogram. Set to ``0`` to reproduce nifty's + ``ignoreBackground=True``. + number_of_threads: + ``0`` (default) uses the library default; positive integers fix + the thread count. + + Returns + ------- + np.ndarray + 1D array of length ``rag.number_of_nodes`` with the same dtype as + ``other_labels``. Nodes whose pixels are all ignored (or for which + no pixel contributes) get ``0``. Ties are broken by smaller label + id (deterministic). + """ + label_array = _normalize_labels(labels) + if tuple(int(size) for size in rag.shape) != label_array.shape: + raise ValueError( + "rag shape must match labels shape, got " + f"rag shape={tuple(rag.shape)}, labels shape={label_array.shape}" + ) + + other_array = np.asarray(other_labels) + if other_array.dtype not in _LABEL_DTYPES: + supported = ", ".join(str(dtype) for dtype in _LABEL_DTYPES) + raise TypeError( + f"other_labels must have one of dtypes ({supported}), got " + f"dtype={other_array.dtype}" + ) + if other_array.shape != label_array.shape: + raise ValueError( + "other_labels shape must match labels shape, got " + f"other_labels shape={other_array.shape}, " + f"labels shape={label_array.shape}" + ) + other_array = np.ascontiguousarray(other_array) + + if ignore_value is None: + has_ignore_value = False + ignore_scalar = other_array.dtype.type(0) + else: + info = np.iinfo(other_array.dtype) + ignore_int = int(ignore_value) + if ignore_int < info.min or ignore_int > info.max: + raise ValueError( + f"ignore_value={ignore_int} is not representable in " + f"other_labels dtype {other_array.dtype}" + ) + has_ignore_value = True + ignore_scalar = other_array.dtype.type(ignore_int) + + run = _ACCUMULATE_LABELS_DISPATCH[(label_array.dtype, other_array.dtype)] + return run( + rag, + label_array, + other_array, + bool(has_ignore_value), + ignore_scalar, + _normalize_number_of_threads(number_of_threads), + ) + + def _accumulate_edge_map_features( rag, labels: np.ndarray, @@ -462,6 +580,7 @@ def _accumulate_lifted_affinity_features( __all__ = [ "COMPLEX_EDGE_FEATURE_NAMES", "SIMPLE_EDGE_FEATURE_NAMES", + "accumulate_labels", "affinity_features", "affinity_features_complex", "edge_map_features", diff --git a/tests/graph/test_rag_accumulate_labels.py b/tests/graph/test_rag_accumulate_labels.py new file mode 100644 index 0000000..7130aea --- /dev/null +++ b/tests/graph/test_rag_accumulate_labels.py @@ -0,0 +1,245 @@ +import numpy as np +import pytest + +import bioimage_cpp as bic + + +LABEL_DTYPES = [np.uint32, np.uint64, np.int32, np.int64] + + +def _reference_majority(rag, labels, other_labels, ignore_value=None): + out = np.zeros(int(rag.number_of_nodes), dtype=other_labels.dtype) + flat_labels = np.asarray(labels).ravel() + flat_other = np.asarray(other_labels).ravel() + histograms = [dict() for _ in range(int(rag.number_of_nodes))] + for node, other in zip(flat_labels, flat_other): + if ignore_value is not None and int(other) == int(ignore_value): + continue + histograms[int(node)][int(other)] = histograms[int(node)].get(int(other), 0) + 1 + for node, hist in enumerate(histograms): + if not hist: + out[node] = 0 + continue + # Sort by (-count, label) for argmax with smallest-label tie-break. + best_label, _ = min(hist.items(), key=lambda kv: (-kv[1], kv[0])) + out[node] = best_label + return out + + +def test_accumulate_labels_2d_hand_computed(): + labels = np.array([[1, 1, 2], [3, 2, 2]], dtype=np.uint64) + other = np.array([[7, 7, 8], [9, 8, 8]], dtype=np.uint64) + rag = bic.graph.region_adjacency_graph(labels) + + actual = bic.graph.features.accumulate_labels(rag, labels, other) + + assert actual.dtype == np.uint64 + np.testing.assert_array_equal(actual, np.array([0, 7, 8, 9], dtype=np.uint64)) + + +def test_accumulate_labels_3d_hand_computed(): + labels = np.array( + [ + [[1, 1], [2, 2]], + [[1, 3], [3, 2]], + ], + dtype=np.uint32, + ) + other = np.array( + [ + [[5, 5], [6, 7]], + [[5, 8], [8, 7]], + ], + dtype=np.uint64, + ) + rag = bic.graph.region_adjacency_graph(labels) + + actual = bic.graph.features.accumulate_labels(rag, labels, other) + + # node 0: no pixels + # node 1: other = {5, 5, 5} -> 5 + # node 2: other = {6, 7, 7} -> 7 + # node 3: other = {8, 8} -> 8 + np.testing.assert_array_equal(actual, np.array([0, 5, 7, 8], dtype=np.uint64)) + + +@pytest.mark.parametrize("labels_dtype", LABEL_DTYPES) +@pytest.mark.parametrize("other_dtype", LABEL_DTYPES) +def test_accumulate_labels_dtype_combinations(labels_dtype, other_dtype): + rng = np.random.default_rng(42) + labels = rng.integers(0, 6, size=(5, 7)).astype(labels_dtype) + other = rng.integers(1, 4, size=(5, 7)).astype(other_dtype) + rag = bic.graph.region_adjacency_graph(labels) + + actual = bic.graph.features.accumulate_labels(rag, labels, other) + expected = _reference_majority(rag, labels, other) + + assert actual.dtype == np.dtype(other_dtype) + np.testing.assert_array_equal(actual, expected) + + +def test_accumulate_labels_ignore_value(): + labels = np.array([[1, 1, 2], [3, 2, 2]], dtype=np.uint64) + other = np.array([[0, 0, 0], [9, 8, 8]], dtype=np.uint64) + rag = bic.graph.region_adjacency_graph(labels) + + with_ignore = bic.graph.features.accumulate_labels( + rag, labels, other, ignore_value=0 + ) + without_ignore = bic.graph.features.accumulate_labels(rag, labels, other) + + # node 0: empty -> 0 + # node 1: pixels {0, 0}; all ignored -> 0 + # node 2: pixels {0, 8, 8}; 0 ignored -> 8 + # node 3: pixels {9} -> 9 + np.testing.assert_array_equal( + with_ignore, np.array([0, 0, 8, 9], dtype=np.uint64) + ) + # Without ignoring, node 1 has only zeros -> 0, node 2 has {0,8,8} -> 8. + np.testing.assert_array_equal( + without_ignore, np.array([0, 0, 8, 9], dtype=np.uint64) + ) + + +def test_accumulate_labels_tie_break_smaller_wins(): + labels = np.array([[1, 1]], dtype=np.uint64) + other = np.array([[5, 3]], dtype=np.uint64) # tie between 3 and 5 + rag = bic.graph.region_adjacency_graph(labels) + + actual = bic.graph.features.accumulate_labels(rag, labels, other) + # Both node 1's pixels are distinct labels with count 1 -> smaller wins. + assert int(actual[1]) == 3 + + +def test_accumulate_labels_ignore_value_negative_with_signed_dtype(): + labels = np.array([[1, 1, 2]], dtype=np.uint64) + other = np.array([[-1, 7, 7]], dtype=np.int32) + rag = bic.graph.region_adjacency_graph(labels) + + actual = bic.graph.features.accumulate_labels( + rag, labels, other, ignore_value=-1 + ) + # node 1: {-1, 7} with -1 ignored -> 7 + # node 2: {7} -> 7 + np.testing.assert_array_equal(actual, np.array([0, 7, 7], dtype=np.int32)) + + +def test_accumulate_labels_parallel_matches_single_thread(): + rng = np.random.default_rng(0) + labels = rng.integers(0, 20, size=(8, 12, 11)).astype(np.uint32) + other = rng.integers(0, 50, size=(8, 12, 11)).astype(np.uint64) + rag = bic.graph.region_adjacency_graph(labels, number_of_threads=2) + + single = bic.graph.features.accumulate_labels( + rag, labels, other, number_of_threads=1 + ) + parallel = bic.graph.features.accumulate_labels( + rag, labels, other, number_of_threads=4 + ) + + np.testing.assert_array_equal(single, parallel) + + +# Nifty's gridRagAccumulateLabels iterates a std::unordered_map for argmax +# (see nifty/graph/rag/grid_rag_features.hxx), so its tie-breaking depends +# on hash-table iteration order and is not portable. bic resolves ties +# deterministically (smaller label id wins). The cross-checks below pick +# inputs where each node has a strict majority, so both implementations +# must agree. + +def _tie_free_other(labels, n_other, rng): + """Build `other` so that each node has a strict majority (no ties).""" + other = rng.integers(0, n_other, size=labels.shape).astype(np.uint32) + # For every node, overwrite half of its pixels with a distinguished + # label to guarantee a strict winner. + flat_labels = labels.ravel() + flat_other = other.ravel() + for node in np.unique(flat_labels): + mask = np.flatnonzero(flat_labels == node) + # Force a strict majority of label `node % n_other` for this node. + forced = mask[: (mask.size + 1) // 2 + 1] + flat_other[forced] = int(node) % n_other + return flat_other.reshape(labels.shape) + + +def test_accumulate_labels_matches_nifty(): + nrag = pytest.importorskip("nifty.graph.rag") + + rng = np.random.default_rng(123) + labels = rng.integers(0, 12, size=(6, 7)).astype(np.uint32) + other = _tie_free_other(labels, 5, rng) + rag = bic.graph.region_adjacency_graph(labels) + nifty_rag = nrag.gridRag(labels) + + actual = bic.graph.features.accumulate_labels(rag, labels, other) + expected = nrag.gridRagAccumulateLabels(nifty_rag, other) + np.testing.assert_array_equal(actual, expected) + + +def test_accumulate_labels_matches_nifty_3d(): + nrag = pytest.importorskip("nifty.graph.rag") + + rng = np.random.default_rng(7) + labels = rng.integers(0, 8, size=(4, 5, 6)).astype(np.uint32) + other = _tie_free_other(labels, 4, rng) + rag = bic.graph.region_adjacency_graph(labels) + nifty_rag = nrag.gridRag(labels) + + actual = bic.graph.features.accumulate_labels(rag, labels, other) + expected = nrag.gridRagAccumulateLabels(nifty_rag, other) + np.testing.assert_array_equal(actual, expected) + + +def test_accumulate_labels_matches_nifty_ignore_background(): + nrag = pytest.importorskip("nifty.graph.rag") + + rng = np.random.default_rng(31) + labels = rng.integers(0, 8, size=(5, 6)).astype(np.uint32) + # Most pixels get label 0 (the "background"), a tie-free minority is + # the distinguishing label per node. + other = np.zeros(labels.shape, dtype=np.uint32) + flat_labels = labels.ravel() + flat_other = other.ravel() + for node in np.unique(flat_labels): + mask = np.flatnonzero(flat_labels == node) + forced = mask[: (mask.size + 1) // 2 + 1] + flat_other[forced] = int(node) + 1 # non-zero, unique per node + other = flat_other.reshape(labels.shape) + + rag = bic.graph.region_adjacency_graph(labels) + nifty_rag = nrag.gridRag(labels) + + actual = bic.graph.features.accumulate_labels( + rag, labels, other, ignore_value=0 + ) + expected = nrag.gridRagAccumulateLabels( + nifty_rag, other, ignoreBackground=True + ) + np.testing.assert_array_equal(actual, expected) + + +def test_accumulate_labels_validation(): + labels = np.array([[0, 1]], dtype=np.uint64) + rag = bic.graph.region_adjacency_graph(labels) + + with pytest.raises(ValueError, match="other_labels shape"): + bic.graph.features.accumulate_labels( + rag, labels, np.zeros((2, 2), dtype=np.uint64) + ) + + other_labels = np.zeros_like(labels, dtype=np.float32) + with pytest.raises(TypeError, match="other_labels must have one of dtypes"): + bic.graph.features.accumulate_labels(rag, labels, other_labels) + + other_labels = np.zeros_like(labels, dtype=np.uint32) + with pytest.raises(ValueError, match="ignore_value=-1 is not representable"): + bic.graph.features.accumulate_labels( + rag, labels, other_labels, ignore_value=-1 + ) + + other_labels_other_shape = np.zeros((2, 2), dtype=np.uint64) + other_labels = np.zeros_like(other_labels_other_shape) + with pytest.raises(ValueError, match="rag shape"): + bic.graph.features.accumulate_labels( + rag, other_labels_other_shape, other_labels + )