Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions MIGRATION_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -1146,6 +1146,41 @@ lifted_features = bic.graph.features.lifted_affinity_features_complex(...)
The output column conventions match the local-edge variants
(`SIMPLE_EDGE_FEATURE_NAMES`, `COMPLEX_EDGE_FEATURE_NAMES`).

#### Building lifted edges from per-node labels

When the lifted edges come from semantic / class labels per RAG node rather
than from long-range affinities, nifty offers
`nifty.distributed.liftedNeighborhoodFromNodeLabels`. The bioimage-cpp
equivalent lives under `bic.graph.lifted_multicut`:

```python
# nifty
lifted_uvs = nifty.distributed.liftedNeighborhoodFromNodeLabels(
graph, node_labels, graphDepth=2, numberOfThreads=4,
mode='all', ignoreLabel=0,
)

# bioimage-cpp
lifted_uvs = bic.graph.lifted_multicut.lifted_edges_from_node_labels(
graph, node_labels, graph_depth=2,
mode='all', ignore_label=0, number_of_threads=4,
)
```

Both functions return an `(n_lifted, 2)` `uint64` array of `(u, v)` pairs
with `u < v`, sorted lexicographically. The BFS hop distance is restricted
to `[2, graph_depth]`, so base-graph edges are excluded. `mode='same'` /
`'different'` filter by whether `node_labels[u] == node_labels[v]`;
`ignore_label` drops every pair where either endpoint label matches.

Intentional differences vs. nifty:

- snake_case parameter names (`graph_depth`, `ignore_label`,
`number_of_threads`);
- `ignore_label` defaults to `None` (no filtering) instead of `0`;
- node `0` is iterated as a source (nifty's distributed variant has an
off-by-one that silently skips it).

End-to-end pipeline (also in `examples/segmentation/lifted_multicut_from_affinities.py`):

```python
Expand Down
144 changes: 144 additions & 0 deletions include/bioimage_cpp/graph/lifted_multicut/lifted_from_node_labels.hxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#pragma once

#include "bioimage_cpp/array_view.hxx"
#include "bioimage_cpp/detail/edge_hash.hxx"
#include "bioimage_cpp/detail/threading.hxx"
#include "bioimage_cpp/graph/breadth_first_search.hxx"
#include "bioimage_cpp/graph/undirected_graph.hxx"

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <optional>
#include <stdexcept>
#include <unordered_set>
#include <vector>

namespace bioimage_cpp::graph::lifted_multicut {

enum class LiftedNodeLabelMode { all, same, different };

// Discover lifted edges from per-node labels by BFS-neighborhood expansion.
//
// For every source node `u` the BFS reports each reachable node `v` together
// with the hop distance. A pair `(u, v)` with `u < v` becomes a lifted edge
// iff:
// - distance is in [2, graph_depth] (distance 1 corresponds to base edges
// and is excluded);
// - neither labels[u] nor labels[v] equals `ignore_label` (when set);
// - the `mode` predicate matches: `all` keeps every pair, `same` keeps
// pairs with labels[u] == labels[v], `different` keeps the complement.
//
// Returns the deduplicated set sorted lexicographically with `u < v`.
template <class LabelT>
std::vector<bioimage_cpp::detail::Edge> lifted_edges_from_node_labels(
const UndirectedGraph &graph,
const ConstArrayView<LabelT> &node_labels,
const std::uint64_t graph_depth,
const LiftedNodeLabelMode mode,
const std::optional<LabelT> ignore_label,
const std::size_t number_of_threads
) {
if (node_labels.ndim() != 1) {
throw std::invalid_argument(
"node_labels must be a 1D array"
);
}
if (static_cast<std::uint64_t>(node_labels.shape[0]) != graph.number_of_nodes()) {
throw std::invalid_argument(
"node_labels length must match graph number_of_nodes"
);
}
if (graph_depth < 1) {
throw std::invalid_argument(
"graph_depth must be >= 1"
);
}

const auto n_nodes = static_cast<std::size_t>(graph.number_of_nodes());
if (n_nodes == 0) {
return {};
}

const auto n_threads = bioimage_cpp::detail::normalize_thread_count(
number_of_threads, n_nodes
);

const auto *labels = node_labels.data;

const auto label_pair_passes =
[&](const LabelT label_u, const LabelT label_v) -> bool {
if (ignore_label.has_value()) {
if (label_u == *ignore_label || label_v == *ignore_label) {
return false;
}
}
switch (mode) {
case LiftedNodeLabelMode::all:
return true;
case LiftedNodeLabelMode::same:
return label_u == label_v;
case LiftedNodeLabelMode::different:
return label_u != label_v;
}
return false;
};

using EdgeSet = std::unordered_set<
bioimage_cpp::detail::Edge, bioimage_cpp::detail::EdgeHash
>;
std::vector<EdgeSet> per_thread(n_threads);

bioimage_cpp::detail::parallel_for_chunks(
n_threads,
n_nodes,
[&](const std::size_t thread_id, const std::size_t begin, const std::size_t end) {
auto &out = per_thread[thread_id];
BfsWorkspace workspace;
for (std::size_t source = begin; source < end; ++source) {
const auto label_u = labels[source];
if (ignore_label.has_value() && label_u == *ignore_label) {
continue;
}
const auto entries = breadth_first_search(
graph,
static_cast<std::uint64_t>(source),
graph_depth,
/*include_source=*/false,
workspace
);
for (const auto &entry : entries) {
if (entry.distance < 2) {
continue;
}
if (entry.node <= source) {
continue;
}
const auto label_v = labels[static_cast<std::size_t>(entry.node)];
if (!label_pair_passes(label_u, label_v)) {
continue;
}
out.insert(bioimage_cpp::detail::edge_key(
static_cast<std::uint64_t>(source), entry.node
));
}
}
}
);

EdgeSet merged;
std::size_t total = 0;
for (const auto &set : per_thread) {
total += set.size();
}
merged.reserve(total);
for (auto &set : per_thread) {
merged.insert(set.begin(), set.end());
}

std::vector<bioimage_cpp::detail::Edge> result(merged.begin(), merged.end());
std::sort(result.begin(), result.end());
return result;
}

} // namespace bioimage_cpp::graph::lifted_multicut
97 changes: 97 additions & 0 deletions src/bindings/graph.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "bioimage_cpp/graph/lifted_from_affinities.hxx"
#include "bioimage_cpp/graph/lifted_multicut.hxx"
#include "bioimage_cpp/graph/lifted_multicut/fusion_move.hxx"
#include "bioimage_cpp/graph/lifted_multicut/lifted_from_node_labels.hxx"
#include "bioimage_cpp/graph/multicut.hxx"
#include "bioimage_cpp/graph/mutex_watershed.hxx"
#include "bioimage_cpp/graph/multicut/fusion_move.hxx"
Expand All @@ -24,14 +25,17 @@
#include "bioimage_cpp/graph/undirected_graph.hxx"

#include <nanobind/ndarray.h>
#include <nanobind/stl/optional.h>
#include <nanobind/stl/pair.h>
#include <nanobind/stl/string.h>
#include <nanobind/stl/unique_ptr.h>
#include <nanobind/stl/vector.h>

#include <array>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <optional>
#include <stdexcept>
#include <string>
#include <utility>
Expand Down Expand Up @@ -1146,6 +1150,58 @@ UInt64Array lifted_edges_from_affinities_t(
return result;
}

template <class LabelT>
UInt64Array lifted_edges_from_node_labels_t(
const Graph &graph,
LabelArray<LabelT> node_labels,
const std::uint64_t graph_depth,
const std::string &mode,
std::optional<LabelT> ignore_label,
const std::size_t number_of_threads
) {
if (node_labels.ndim() != 1) {
throw std::invalid_argument("node_labels must be a 1D array");
}
if (node_labels.shape(0) != graph.number_of_nodes()) {
throw std::invalid_argument(
"node_labels length must match graph number_of_nodes"
);
}
graph::lifted_multicut::LiftedNodeLabelMode mode_enum;
if (mode == "all") {
mode_enum = graph::lifted_multicut::LiftedNodeLabelMode::all;
} else if (mode == "same") {
mode_enum = graph::lifted_multicut::LiftedNodeLabelMode::same;
} else if (mode == "different") {
mode_enum = graph::lifted_multicut::LiftedNodeLabelMode::different;
} else {
throw std::invalid_argument(
"mode must be one of 'all', 'same', 'different', got '" + mode + "'"
);
}

ConstArrayView<LabelT> labels_view{
node_labels.data(),
{static_cast<std::ptrdiff_t>(node_labels.shape(0))},
{},
};

std::vector<bioimage_cpp::detail::Edge> lifted_edges;
{
nb::gil_scoped_release release;
lifted_edges = graph::lifted_multicut::lifted_edges_from_node_labels<LabelT>(
graph, labels_view, graph_depth, mode_enum, ignore_label, number_of_threads
);
}
auto result = make_uint64_array({lifted_edges.size(), 2});
auto *data = result.data();
for (std::size_t index = 0; index < lifted_edges.size(); ++index) {
data[2 * index] = lifted_edges[index].first;
data[2 * index + 1] = lifted_edges[index].second;
}
return result;
}

template <class LabelT>
DoubleArray accumulate_lifted_affinity_features_t(
LabelArray<LabelT> labels,
Expand Down Expand Up @@ -1816,6 +1872,47 @@ void bind_graph(nb::module_ &m) {
nb::arg("number_of_threads")
);

m.def(
"_lifted_edges_from_node_labels_uint32",
&lifted_edges_from_node_labels_t<std::uint32_t>,
nb::arg("graph"),
nb::arg("node_labels"),
nb::arg("graph_depth"),
nb::arg("mode"),
nb::arg("ignore_label"),
nb::arg("number_of_threads")
);
m.def(
"_lifted_edges_from_node_labels_uint64",
&lifted_edges_from_node_labels_t<std::uint64_t>,
nb::arg("graph"),
nb::arg("node_labels"),
nb::arg("graph_depth"),
nb::arg("mode"),
nb::arg("ignore_label"),
nb::arg("number_of_threads")
);
m.def(
"_lifted_edges_from_node_labels_int32",
&lifted_edges_from_node_labels_t<std::int32_t>,
nb::arg("graph"),
nb::arg("node_labels"),
nb::arg("graph_depth"),
nb::arg("mode"),
nb::arg("ignore_label"),
nb::arg("number_of_threads")
);
m.def(
"_lifted_edges_from_node_labels_int64",
&lifted_edges_from_node_labels_t<std::int64_t>,
nb::arg("graph"),
nb::arg("node_labels"),
nb::arg("graph_depth"),
nb::arg("mode"),
nb::arg("ignore_label"),
nb::arg("number_of_threads")
);

m.def(
"_accumulate_lifted_affinity_features_uint32",
&accumulate_lifted_affinity_features_t<std::uint32_t>,
Expand Down
Loading
Loading