diff --git a/MIGRATION_GUIDE.md b/MIGRATION_GUIDE.md
index e66fbeb..72cd32d 100644
--- a/MIGRATION_GUIDE.md
+++ b/MIGRATION_GUIDE.md
@@ -19,6 +19,22 @@ Graph functionality is under `bic.graph`, segmentation functionality is under
 `bic.segmentation`, and utility functionality (blocking, relabeling, overlap
 measurement, union-find, etc.) is under `bic.utils`.
 
+`bic.graph` keeps the core graph types and graph-level algorithms
+(`UndirectedGraph`, `GridGraph2D`, `GridGraph3D`, `RegionAdjacencyGraph`,
+`connected_components`, `breadth_first_search`, `edge_weighted_watershed`,
+`region_adjacency_graph`, `project_node_labels_to_pixels`) at the top level.
+Algorithmic domains live in dedicated submodules:
+
+- `bic.graph.multicut` — multicut objective and solvers, fusion-move
+  proposal generators, multicut problem loaders.
+- `bic.graph.lifted_multicut` — lifted multicut objective and solvers,
+  lifted multicut problem loaders. Proposal generators are re-exported
+  from `bic.graph.multicut` here for convenience.
+- `bic.graph.mutex_watershed` — graph-based mutex watershed clustering
+  (with and without semantic constraints).
+- `bic.graph.features` — edge-feature accumulation for RAGs and grid
+  graphs (boundary maps, affinity channels, lifted edge features).
+
 ## Affogato
 
 ### Affinities
@@ -142,7 +158,7 @@ Important migration notes:
 
 For mutex watershed on an arbitrary undirected graph (region adjacency graph
 or otherwise) with a separate list of long-range repulsive edges,
-`bioimage-cpp` provides `bic.graph.mutex_watershed_clustering`. This is a
+`bioimage-cpp` provides `bic.graph.mutex_watershed.mutex_watershed_clustering`. This is a
 port of affogato's `compute_mws_clustering` using the same input format as
 `LiftedMulticutObjective`: a base graph carries the attractive edges, and
 long-range (called *mutex* here) edges are supplied alongside as a `(M, 2)`
@@ -170,7 +186,7 @@ bioimage-cpp:
 import bioimage_cpp as bic
 
 graph = bic.graph.UndirectedGraph.from_edges(number_of_nodes, uvs)
-labels = bic.graph.mutex_watershed_clustering(
+labels = bic.graph.mutex_watershed.mutex_watershed_clustering(
     graph,
     weights,
     mutex_uvs,
@@ -292,7 +308,7 @@ bioimage-cpp:
 import bioimage_cpp as bic
 
 graph = bic.graph.UndirectedGraph.from_edges(number_of_nodes, uvs)
-labels, semantic_labels = bic.graph.semantic_mutex_watershed_clustering(
+labels, semantic_labels = bic.graph.mutex_watershed.semantic_mutex_watershed_clustering(
     graph,
     weights,
     mutex_uvs,
@@ -806,11 +822,11 @@ Multicut problems (3 samples × 2 sizes, originally from
 
 ```python
 # Returns (UndirectedGraph, edge_costs)
-graph, costs = bic.graph.load_multicut_problem(sample="A", size="small")
+graph, costs = bic.graph.multicut.load_multicut_problem(sample="A", size="small")
 # Or just the underlying arrays
-uv_ids, costs = bic.graph.load_multicut_problem_data(sample="B", size="medium")
+uv_ids, costs = bic.graph.multicut.load_multicut_problem_data(sample="B", size="medium")
 # Or the cached file path
-path = bic.graph.multicut_problem_path(sample="C", size="medium")
+path = bic.graph.multicut.multicut_problem_path(sample="C", size="medium")
 ```
 
 Valid samples are `"A"`, `"B"`, `"C"`; valid sizes are `"small"` and
@@ -824,10 +840,10 @@ Lifted multicut problems (2D ISBI slice, RAG-based 3D volume, and grid-graph
 volume):
 
 ```python
-problem = bic.graph.load_lifted_multicut_problem(size="2d")
+problem = bic.graph.lifted_multicut.load_lifted_multicut_problem(size="2d")
 # Fields: n_nodes (int), local_uvs, local_costs, lifted_uvs, lifted_costs.
 graph = bic.graph.UndirectedGraph.from_edges(problem.n_nodes, problem.local_uvs)
-objective = bic.graph.LiftedMulticutObjective(
+objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
     graph,
     problem.local_costs,
     lifted_uvs=problem.lifted_uvs,
@@ -914,14 +930,14 @@ bioimage-cpp:
 ```python
 import bioimage_cpp as bic
 
-objective = bic.graph.LiftedMulticutObjective(
+objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
     graph,
     edge_costs,
     lifted_uvs=lifted_uvs,
     lifted_costs=lifted_costs,
     bfs_distance=3,  # optional: also insert zero-weight lifted edges within k hops
 )
-labels = bic.graph.LiftedGreedyAdditiveMulticut().optimize(objective)
+labels = bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut().optimize(objective)
 energy = objective.energy(labels)
 ```
 
@@ -969,11 +985,11 @@ safety net). The differences are:
   pluggable via `sub_solver=`.
 
 ```python
-solver = bic.graph.FusionMoveLiftedMulticut(
-    proposal_generator=bic.graph.WatershedProposalGenerator(
+solver = bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+    proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(
         sigma=1.0, n_seeds_fraction=0.1, seed=0,
     ),
-    sub_solver=bic.graph.LiftedKernighanLinMulticut(number_of_outer_iterations=3),
+    sub_solver=bic.graph.lifted_multicut.LiftedKernighanLinMulticut(number_of_outer_iterations=3),
     number_of_iterations=10,
     stop_if_no_improvement=4,
     number_of_threads=4,
@@ -984,9 +1000,9 @@ labels = solver.optimize(objective)
 A typical warm-started solve combines greedy and KL:
 
 ```python
-solver = bic.graph.LiftedChainedSolvers([
-    bic.graph.LiftedGreedyAdditiveMulticut(),
-    bic.graph.LiftedKernighanLinMulticut(number_of_outer_iterations=10),
+solver = bic.graph.lifted_multicut.LiftedChainedSolvers([
+    bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut(),
+    bic.graph.lifted_multicut.LiftedKernighanLinMulticut(number_of_outer_iterations=10),
 ])
 labels = solver.optimize(objective)
 ```
@@ -1016,19 +1032,19 @@ two focused helpers that cover the same workflow:
 ```python
 # Discover lifted edges implied by long-range affinity offsets. 1-hop offsets
 # are skipped automatically, so the full offset list can be passed in.
-lifted_uvs = bic.graph.lifted_edges_from_affinities(
+lifted_uvs = bic.graph.features.lifted_edges_from_affinities(
     rag, oversegmentation, offsets, number_of_threads=0,
 )
 
 # Accumulate (mean, size) statistics per lifted edge. Pixel pairs whose
 # (u, v) does not appear in `lifted_uvs` are skipped, so local edges are
 # never contaminated with long-range affinities.
-lifted_features = bic.graph.lifted_affinity_features(
+lifted_features = bic.graph.features.lifted_affinity_features(
     oversegmentation, affinities, offsets, lifted_uvs,
     number_of_threads=0,
 )
 # For the 12-column feature set (mean, median, std, min, max, percentiles, size):
-lifted_features = bic.graph.lifted_affinity_features_complex(...)
+lifted_features = bic.graph.features.lifted_affinity_features_complex(...)
 ```
 
 The output column conventions match the local-edge variants
@@ -1038,16 +1054,16 @@ End-to-end pipeline (also in `examples/segmentation/lifted_multicut_from_affinit
 
 ```python
 rag = bic.graph.region_adjacency_graph(oversegmentation)
-local_costs = local_threshold - bic.graph.affinity_features(
+local_costs = local_threshold - bic.graph.features.affinity_features(
     rag, oversegmentation, direct_affinities, direct_offsets,
 )[:, 0]
-lifted_uvs = bic.graph.lifted_edges_from_affinities(
+lifted_uvs = bic.graph.features.lifted_edges_from_affinities(
     rag, oversegmentation, long_range_offsets,
 )
-lifted_costs = lifted_threshold - bic.graph.lifted_affinity_features(
+lifted_costs = lifted_threshold - bic.graph.features.lifted_affinity_features(
     oversegmentation, long_range_affinities, long_range_offsets, lifted_uvs,
 )[:, 0]
-objective = bic.graph.LiftedMulticutObjective(
+objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
     rag, local_costs, lifted_uvs=lifted_uvs, lifted_costs=lifted_costs,
 )
 ```
@@ -1074,8 +1090,8 @@ bioimage-cpp:
 ```python
 import bioimage_cpp as bic
 
-objective = bic.graph.MulticutObjective(graph, edge_costs)
-labels = bic.graph.GreedyAdditiveMulticut().optimize(objective)
+objective = bic.graph.multicut.MulticutObjective(graph, edge_costs)
+labels = bic.graph.multicut.GreedyAdditiveMulticut().optimize(objective)
 energy = objective.energy(labels)
 ```
 
@@ -1111,7 +1127,7 @@ Constructor argument mapping:
 Kernighan-Lin example:
 
 ```python
-solver = bic.graph.KernighanLinMulticut(number_of_outer_iterations=5)
+solver = bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=5)
 labels = solver.optimize(objective)
 ```
 
@@ -1123,9 +1139,9 @@ warm-start, set `objective.set_labels(...)` to a non-trivial labeling first.
 Chaining solvers:
 
 ```python
-solver = bic.graph.ChainedMulticutSolvers([
-    bic.graph.GreedyAdditiveMulticut(),
-    bic.graph.KernighanLinMulticut(number_of_outer_iterations=5),
+solver = bic.graph.multicut.ChainedMulticutSolvers([
+    bic.graph.multicut.GreedyAdditiveMulticut(),
+    bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=5),
 ])
 labels = solver.optimize(objective)
 ```
@@ -1134,9 +1150,9 @@ Decomposing a problem into positive-cost connected components and solving each
 sub-problem with a cheaper solver:
 
 ```python
-solver = bic.graph.MulticutDecomposer(
-    sub_solver=bic.graph.KernighanLinMulticut(number_of_outer_iterations=5),
-    fallthrough_solver=bic.graph.GreedyAdditiveMulticut(),
+solver = bic.graph.multicut.MulticutDecomposer(
+    sub_solver=bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=5),
+    fallthrough_solver=bic.graph.multicut.GreedyAdditiveMulticut(),
     number_of_threads=0,
 )
 labels = solver.optimize(objective)
@@ -1190,12 +1206,12 @@ bioimage-cpp:
 ```python
 import bioimage_cpp as bic
 
-objective = bic.graph.MulticutObjective(graph, edge_costs)
-solver = bic.graph.FusionMoveMulticut(
-    proposal_generator=bic.graph.WatershedProposalGenerator(
+objective = bic.graph.multicut.MulticutObjective(graph, edge_costs)
+solver = bic.graph.multicut.FusionMoveMulticut(
+    proposal_generator=bic.graph.multicut.WatershedProposalGenerator(
         sigma=1.0, n_seeds_fraction=0.1, seed=0,
     ),
-    sub_solver=bic.graph.GreedyAdditiveMulticut(),
+    sub_solver=bic.graph.multicut.GreedyAdditiveMulticut(),
     number_of_iterations=10,
     stop_if_no_improvement=4,
 )
@@ -1283,26 +1299,26 @@ Simple edge-map features:
 
 ```python
 rag = bic.graph.region_adjacency_graph(labels)
-features = bic.graph.edge_map_features(rag, labels, edge_map)
+features = bic.graph.features.edge_map_features(rag, labels, edge_map)
 ```
 
 The columns are:
 
 ```python
-bic.graph.SIMPLE_EDGE_FEATURE_NAMES
+bic.graph.features.SIMPLE_EDGE_FEATURE_NAMES
 # ("mean", "size")
 ```
 
 Complex edge-map features:
 
 ```python
-features = bic.graph.edge_map_features_complex(rag, labels, edge_map)
+features = bic.graph.features.edge_map_features_complex(rag, labels, edge_map)
 ```
 
 The columns are:
 
 ```python
-bic.graph.COMPLEX_EDGE_FEATURE_NAMES
+bic.graph.features.COMPLEX_EDGE_FEATURE_NAMES
 # ("mean", "median", "std", "min", "max", "p5", "p10",
 #  "p25", "p75", "p90", "p95", "size")
 ```
@@ -1310,7 +1326,7 @@ bic.graph.COMPLEX_EDGE_FEATURE_NAMES
 Affinity features:
 
 ```python
-features = bic.graph.affinity_features(
+features = bic.graph.features.affinity_features(
     rag,
     labels,
     affinities,
@@ -1321,7 +1337,7 @@ features = bic.graph.affinity_features(
 Complex affinity features:
 
 ```python
-features = bic.graph.affinity_features_complex(
+features = bic.graph.features.affinity_features_complex(
     rag,
     labels,
     affinities,
diff --git a/development/graph/_grid_affinity_compatibility.py b/development/graph/_grid_affinity_compatibility.py
index 84b3f14..9c957c9 100644
--- a/development/graph/_grid_affinity_compatibility.py
+++ b/development/graph/_grid_affinity_compatibility.py
@@ -104,7 +104,7 @@ def bioimage_cpp_local(affinities: np.ndarray, offsets: list[tuple[int, ...]]):
     import bioimage_cpp as bic
 
     graph = bic.graph.grid_graph(affinities.shape[1:])
-    weights, _ = bic.graph.grid_affinity_features(graph, affinities, offsets)
+    weights, _ = bic.graph.features.grid_affinity_features(graph, affinities, offsets)
     return graph.uv_ids(), weights
 
 
@@ -119,7 +119,7 @@ def bioimage_cpp_local_weights_only(
     """
     import bioimage_cpp as bic
 
-    weights, _ = bic.graph.grid_affinity_features(graph, affinities, offsets)
+    weights, _ = bic.graph.features.grid_affinity_features(graph, affinities, offsets)
     return weights
 
 
@@ -131,7 +131,7 @@ def bioimage_cpp_local_with_uvs(
     ``compute_nh_and_weights``, both of which return uvs in their output."""
     import bioimage_cpp as bic
 
-    weights, _ = bic.graph.grid_affinity_features(graph, affinities, offsets)
+    weights, _ = bic.graph.features.grid_affinity_features(graph, affinities, offsets)
     return graph.uv_ids(), weights
 
 
@@ -140,7 +140,7 @@ def bioimage_cpp_lifted(affinities: np.ndarray, offsets: list[tuple[int, ...]]):
 
     graph = bic.graph.grid_graph(affinities.shape[1:])
     local_weights, _, lifted_uvs, lifted_weights, _ = (
-        bic.graph.grid_affinity_features_with_lifted(graph, affinities, offsets)
+        bic.graph.features.grid_affinity_features_with_lifted(graph, affinities, offsets)
     )
     return graph, graph.uv_ids(), local_weights, lifted_uvs, lifted_weights
 
@@ -152,7 +152,7 @@ def bioimage_cpp_lifted_features_only(
     import bioimage_cpp as bic
 
     local_weights, _, lifted_uvs, lifted_weights, _ = (
-        bic.graph.grid_affinity_features_with_lifted(graph, affinities, offsets)
+        bic.graph.features.grid_affinity_features_with_lifted(graph, affinities, offsets)
     )
     return local_weights, lifted_uvs, lifted_weights
 
@@ -164,7 +164,7 @@ def bioimage_cpp_lifted_with_uvs(
     import bioimage_cpp as bic
 
     local_weights, _, lifted_uvs, lifted_weights, _ = (
-        bic.graph.grid_affinity_features_with_lifted(graph, affinities, offsets)
+        bic.graph.features.grid_affinity_features_with_lifted(graph, affinities, offsets)
     )
     return graph.uv_ids(), local_weights, lifted_uvs, lifted_weights
 
@@ -173,7 +173,7 @@ def assert_local_offsets_cover_all_edges(graph, affinities, offsets) -> None:
     """One-shot correctness check called outside of the timing loop."""
     import bioimage_cpp as bic
 
-    _, valid_edges = bic.graph.grid_affinity_features(graph, affinities, offsets)
+    _, valid_edges = bic.graph.features.grid_affinity_features(graph, affinities, offsets)
     if not np.all(valid_edges):
         raise AssertionError("local offsets did not cover all grid edges")
 
diff --git a/development/graph/_rag_compatibility.py b/development/graph/_rag_compatibility.py
index d8c6561..df37a67 100644
--- a/development/graph/_rag_compatibility.py
+++ b/development/graph/_rag_compatibility.py
@@ -144,7 +144,7 @@ def compare_boundary_features(
     # what its parallelism is tuned for, and forcing a single block (== labels
     # shape) starves nifty's worker pool.
     bic_timings, bic_features = time_call(
-        lambda: bic.graph.edge_map_features(
+        lambda: bic.graph.features.edge_map_features(
             bic_rag, labels, boundary_map, number_of_threads=threads
         ),
         repeats,
@@ -189,7 +189,7 @@ def compare_affinity_features(
         max_val = min_val + 1.0
 
     bic_timings, bic_features = time_call(
-        lambda: bic.graph.affinity_features(
+        lambda: bic.graph.features.affinity_features(
             bic_rag,
             labels,
             affs_float32,
diff --git a/development/graph/lifted_multicut/_compatibility.py b/development/graph/lifted_multicut/_compatibility.py
index e267d38..5130559 100644
--- a/development/graph/lifted_multicut/_compatibility.py
+++ b/development/graph/lifted_multicut/_compatibility.py
@@ -51,7 +51,7 @@ def load_problem(size: str, *, timeout: float):
     import nifty
     import nifty.graph.opt.lifted_multicut as nlmc
 
-    problem = bic.graph.load_lifted_multicut_problem(size, timeout=timeout)
+    problem = bic.graph.lifted_multicut.load_lifted_multicut_problem(size, timeout=timeout)
 
     bic_graph = bic.graph.UndirectedGraph.from_edges(
         problem.n_nodes, problem.local_uvs
@@ -84,7 +84,7 @@ def time_call(function: Callable[[], np.ndarray], repeats: int):
 def optimize_bic_solver(make_bic_solver, bic_graph, problem):
     import bioimage_cpp as bic
 
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         bic_graph,
         problem.local_costs,
         lifted_uvs=problem.lifted_uvs,
@@ -96,7 +96,7 @@ def optimize_bic_solver(make_bic_solver, bic_graph, problem):
 def bic_energy(bic_graph, problem, labels: np.ndarray) -> float:
     import bioimage_cpp as bic
 
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         bic_graph,
         problem.local_costs,
         lifted_uvs=problem.lifted_uvs,
diff --git a/development/graph/lifted_multicut/check_fusion_move.py b/development/graph/lifted_multicut/check_fusion_move.py
index f29465c..ab4cfd9 100644
--- a/development/graph/lifted_multicut/check_fusion_move.py
+++ b/development/graph/lifted_multicut/check_fusion_move.py
@@ -36,8 +36,8 @@ def main() -> None:
         )
     run_comparison(
         "lifted_fusion_move",
-        lambda: bic.graph.FusionMoveLiftedMulticut(
-            proposal_generator=bic.graph.WatershedProposalGenerator(),
+        lambda: bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+            proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(),
             number_of_iterations=10,
             stop_if_no_improvement=4,
             number_of_threads=threads,
diff --git a/development/graph/lifted_multicut/check_greedy_additive.py b/development/graph/lifted_multicut/check_greedy_additive.py
index c789c85..1093a8e 100644
--- a/development/graph/lifted_multicut/check_greedy_additive.py
+++ b/development/graph/lifted_multicut/check_greedy_additive.py
@@ -11,7 +11,7 @@ def main() -> None:
     ).parse_args()
     run_comparison(
         "lifted_greedy_additive",
-        lambda: bic.graph.LiftedGreedyAdditiveMulticut(),
+        lambda: bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut(),
         lambda objective: objective.liftedMulticutGreedyAdditiveFactory(),
         args,
     )
diff --git a/development/graph/lifted_multicut/check_kernighan_lin.py b/development/graph/lifted_multicut/check_kernighan_lin.py
index 840b923..4e283f9 100644
--- a/development/graph/lifted_multicut/check_kernighan_lin.py
+++ b/development/graph/lifted_multicut/check_kernighan_lin.py
@@ -16,7 +16,7 @@ def main() -> None:
     # implementations are doing the same work.
     run_comparison(
         "lifted_kernighan_lin",
-        lambda: bic.graph.LiftedKernighanLinMulticut(number_of_outer_iterations=10),
+        lambda: bic.graph.lifted_multicut.LiftedKernighanLinMulticut(number_of_outer_iterations=10),
         lambda objective: objective.chainedSolversFactory(
             [
                 objective.liftedMulticutGreedyAdditiveFactory(),
diff --git a/development/graph/lifted_multicut/evaluate_solvers.py b/development/graph/lifted_multicut/evaluate_solvers.py
index a2972fd..09f47ec 100644
--- a/development/graph/lifted_multicut/evaluate_solvers.py
+++ b/development/graph/lifted_multicut/evaluate_solvers.py
@@ -30,11 +30,11 @@ def solver_configs():
     # side because the bic proposal generator only sees the base graph.
     return {
         "lifted_greedy_additive": SolverConfig(
-            make_bic_solver=lambda: bic.graph.LiftedGreedyAdditiveMulticut(),
+            make_bic_solver=lambda: bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut(),
             make_nifty_factory=lambda objective: objective.liftedMulticutGreedyAdditiveFactory(),
         ),
         "lifted_kernighan_lin": SolverConfig(
-            make_bic_solver=lambda: bic.graph.LiftedKernighanLinMulticut(
+            make_bic_solver=lambda: bic.graph.lifted_multicut.LiftedKernighanLinMulticut(
                 number_of_outer_iterations=10
             ),
             make_nifty_factory=lambda objective: objective.chainedSolversFactory(
@@ -47,8 +47,8 @@ def solver_configs():
             ),
         ),
         "lifted_fusion_move": SolverConfig(
-            make_bic_solver=lambda: bic.graph.FusionMoveLiftedMulticut(
-                proposal_generator=bic.graph.WatershedProposalGenerator(),
+            make_bic_solver=lambda: bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+                proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(),
                 number_of_iterations=10,
                 stop_if_no_improvement=4,
                 number_of_threads=1,
@@ -76,7 +76,7 @@ def load_problem(size: str, *, timeout: float):
     import nifty
     import nifty.graph.opt.lifted_multicut as nlmc
 
-    problem = bic.graph.load_lifted_multicut_problem(size, timeout=timeout)
+    problem = bic.graph.lifted_multicut.load_lifted_multicut_problem(size, timeout=timeout)
     bic_graph = bic.graph.UndirectedGraph.from_edges(problem.n_nodes, problem.local_uvs)
     nifty_graph = nifty.graph.undirectedGraph(int(problem.n_nodes))
     nifty_graph.insertEdges(problem.local_uvs.astype(np.uint64, copy=False))
@@ -97,7 +97,7 @@ def make_nifty_objective():
 def make_bic_objective(bic_graph, problem):
     import bioimage_cpp as bic
 
-    return bic.graph.LiftedMulticutObjective(
+    return bic.graph.lifted_multicut.LiftedMulticutObjective(
         bic_graph,
         problem.local_costs,
         lifted_uvs=problem.lifted_uvs,
diff --git a/development/graph/multicut/_compatibility.py b/development/graph/multicut/_compatibility.py
index 39812aa..6d61633 100644
--- a/development/graph/multicut/_compatibility.py
+++ b/development/graph/multicut/_compatibility.py
@@ -46,7 +46,7 @@ def load_problem(path: str | None, *, timeout: float):
     import bioimage_cpp as bic
     import nifty
 
-    uv_ids, costs = bic.graph.load_external_multicut_problem_data(
+    uv_ids, costs = bic.graph.multicut.load_external_multicut_problem_data(
         path,
         timeout=timeout,
     )
@@ -75,7 +75,7 @@ def optimize_bic_solver(make_bic_solver, objective):
 def bic_energy(graph, costs: np.ndarray, labels: np.ndarray) -> float:
     import bioimage_cpp as bic
 
-    return bic.graph.MulticutObjective(graph, costs).energy(labels)
+    return bic.graph.multicut.MulticutObjective(graph, costs).energy(labels)
 
 
 def nifty_energy(graph, costs: np.ndarray, labels: np.ndarray) -> float:
@@ -94,7 +94,7 @@ def run_comparison(
     import nifty.graph.opt.multicut as nmc
 
     bic_graph, nifty_graph, costs = load_problem(args.path, timeout=args.timeout)
-    bic_objective = bic.graph.MulticutObjective(bic_graph, costs)
+    bic_objective = bic.graph.multicut.MulticutObjective(bic_graph, costs)
     nifty_objective = nmc.multicutObjective(nifty_graph, costs)
 
     bic_timings, bic_labels = time_call(
diff --git a/development/graph/multicut/check_chained.py b/development/graph/multicut/check_chained.py
index 6832a45..1ce4647 100644
--- a/development/graph/multicut/check_chained.py
+++ b/development/graph/multicut/check_chained.py
@@ -9,10 +9,10 @@ def main() -> None:
     args = parser("Compare bioimage-cpp and nifty chained multicut solvers.").parse_args()
     run_comparison(
         "chained",
-        lambda: bic.graph.ChainedMulticutSolvers(
+        lambda: bic.graph.multicut.ChainedMulticutSolvers(
             [
-                bic.graph.GreedyAdditiveMulticut(),
-                bic.graph.KernighanLinMulticut(number_of_outer_iterations=5),
+                bic.graph.multicut.GreedyAdditiveMulticut(),
+                bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=5),
             ]
         ),
         lambda objective: objective.chainedSolversFactory(
diff --git a/development/graph/multicut/check_decomposer.py b/development/graph/multicut/check_decomposer.py
index 01c24b6..b7f38ba 100644
--- a/development/graph/multicut/check_decomposer.py
+++ b/development/graph/multicut/check_decomposer.py
@@ -9,7 +9,7 @@ def main() -> None:
     args = parser("Compare bioimage-cpp and nifty decomposer multicut.").parse_args()
     run_comparison(
         "decomposer",
-        lambda: bic.graph.MulticutDecomposer(bic.graph.GreedyAdditiveMulticut()),
+        lambda: bic.graph.multicut.MulticutDecomposer(bic.graph.multicut.GreedyAdditiveMulticut()),
         lambda objective: objective.multicutDecomposerFactory(
             submodelFactory=objective.greedyAdditiveFactory(),
             fallthroughFactory=objective.greedyAdditiveFactory(),
diff --git a/development/graph/multicut/check_fusion_move.py b/development/graph/multicut/check_fusion_move.py
index a779762..5746c50 100644
--- a/development/graph/multicut/check_fusion_move.py
+++ b/development/graph/multicut/check_fusion_move.py
@@ -20,8 +20,8 @@ def main() -> None:
     threads = int(args.threads)
     run_comparison(
         "fusion_move",
-        lambda: bic.graph.FusionMoveMulticut(
-            proposal_generator=bic.graph.WatershedProposalGenerator(),
+        lambda: bic.graph.multicut.FusionMoveMulticut(
+            proposal_generator=bic.graph.multicut.WatershedProposalGenerator(),
             number_of_threads=threads,
             number_of_parallel_proposals=threads,
         ),
diff --git a/development/graph/multicut/check_greedy_additive.py b/development/graph/multicut/check_greedy_additive.py
index e9f7710..e343166 100644
--- a/development/graph/multicut/check_greedy_additive.py
+++ b/development/graph/multicut/check_greedy_additive.py
@@ -9,7 +9,7 @@ def main() -> None:
     args = parser("Compare bioimage-cpp and nifty greedy-additive multicut.").parse_args()
     run_comparison(
         "greedy_additive",
-        lambda: bic.graph.GreedyAdditiveMulticut(),
+        lambda: bic.graph.multicut.GreedyAdditiveMulticut(),
         lambda objective: objective.greedyAdditiveFactory(),
         args,
     )
diff --git a/development/graph/multicut/check_greedy_fixation.py b/development/graph/multicut/check_greedy_fixation.py
index 29b033f..3235703 100644
--- a/development/graph/multicut/check_greedy_fixation.py
+++ b/development/graph/multicut/check_greedy_fixation.py
@@ -9,7 +9,7 @@ def main() -> None:
     args = parser("Compare bioimage-cpp and nifty greedy-fixation multicut.").parse_args()
     run_comparison(
         "greedy_fixation",
-        lambda: bic.graph.GreedyFixationMulticut(),
+        lambda: bic.graph.multicut.GreedyFixationMulticut(),
         lambda objective: objective.greedyFixationFactory(),
         args,
     )
diff --git a/development/graph/multicut/check_kernighan_lin.py b/development/graph/multicut/check_kernighan_lin.py
index 843bcb2..4b64a65 100644
--- a/development/graph/multicut/check_kernighan_lin.py
+++ b/development/graph/multicut/check_kernighan_lin.py
@@ -9,7 +9,7 @@ def main() -> None:
     args = parser("Compare bioimage-cpp and nifty Kernighan-Lin multicut.").parse_args()
     run_comparison(
         "kernighan_lin",
-        lambda: bic.graph.KernighanLinMulticut(number_of_outer_iterations=5),
+        lambda: bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=5),
         lambda objective: objective.kernighanLinFactory(
             warmStartGreedy=True,
             numberOfOuterIterations=5,
diff --git a/development/graph/multicut/evaluate_solvers.py b/development/graph/multicut/evaluate_solvers.py
index 0e50933..ef97083 100644
--- a/development/graph/multicut/evaluate_solvers.py
+++ b/development/graph/multicut/evaluate_solvers.py
@@ -29,11 +29,11 @@ def solver_configs():
 
     return {
         "greedy_additive": SolverConfig(
-            make_bic_solver=lambda threads: bic.graph.GreedyAdditiveMulticut(),
+            make_bic_solver=lambda threads: bic.graph.multicut.GreedyAdditiveMulticut(),
             make_nifty_factory=lambda objective, threads: objective.greedyAdditiveFactory(),
         ),
         "kernighan_lin": SolverConfig(
-            make_bic_solver=lambda threads: bic.graph.KernighanLinMulticut(
+            make_bic_solver=lambda threads: bic.graph.multicut.KernighanLinMulticut(
                 number_of_outer_iterations=5
             ),
             make_nifty_factory=lambda objective, threads: objective.kernighanLinFactory(
@@ -42,14 +42,14 @@ def solver_configs():
             ),
         ),
         "greedy_fixation": SolverConfig(
-            make_bic_solver=lambda threads: bic.graph.GreedyFixationMulticut(),
+            make_bic_solver=lambda threads: bic.graph.multicut.GreedyFixationMulticut(),
             make_nifty_factory=lambda objective, threads: objective.greedyFixationFactory(),
         ),
         "chained": SolverConfig(
-            make_bic_solver=lambda threads: bic.graph.ChainedMulticutSolvers(
+            make_bic_solver=lambda threads: bic.graph.multicut.ChainedMulticutSolvers(
                 [
-                    bic.graph.GreedyAdditiveMulticut(),
-                    bic.graph.KernighanLinMulticut(number_of_outer_iterations=5),
+                    bic.graph.multicut.GreedyAdditiveMulticut(),
+                    bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=5),
                 ]
             ),
             make_nifty_factory=lambda objective, threads: objective.chainedSolversFactory(
@@ -60,8 +60,8 @@ def solver_configs():
             ),
         ),
         "decomposer": SolverConfig(
-            make_bic_solver=lambda threads: bic.graph.MulticutDecomposer(
-                bic.graph.GreedyAdditiveMulticut()
+            make_bic_solver=lambda threads: bic.graph.multicut.MulticutDecomposer(
+                bic.graph.multicut.GreedyAdditiveMulticut()
             ),
             make_nifty_factory=lambda objective, threads: objective.multicutDecomposerFactory(
                 submodelFactory=objective.greedyAdditiveFactory(),
@@ -70,8 +70,8 @@ def solver_configs():
             ),
         ),
         "fusion_move": SolverConfig(
-            make_bic_solver=lambda threads: bic.graph.FusionMoveMulticut(
-                proposal_generator=bic.graph.WatershedProposalGenerator(),
+            make_bic_solver=lambda threads: bic.graph.multicut.FusionMoveMulticut(
+                proposal_generator=bic.graph.multicut.WatershedProposalGenerator(),
                 number_of_threads=threads,
                 number_of_parallel_proposals=threads,
             ),
@@ -108,7 +108,7 @@ def load_problem(problem_name: str, *, timeout: float):
     import nifty
 
     sample, size = parse_problem_name(problem_name)
-    uv_ids, costs = bic.graph.load_multicut_problem_data(
+    uv_ids, costs = bic.graph.multicut.load_multicut_problem_data(
         sample=sample,
         size=size,
         timeout=timeout,
@@ -123,7 +123,7 @@ def load_problem(problem_name: str, *, timeout: float):
 def bic_energy(graph, costs: np.ndarray, labels: np.ndarray) -> float:
     import bioimage_cpp as bic
 
-    return float(bic.graph.MulticutObjective(graph, costs).energy(labels))
+    return float(bic.graph.multicut.MulticutObjective(graph, costs).energy(labels))
 
 
 def nifty_energy(graph, costs: np.ndarray, labels: np.ndarray) -> float:
@@ -144,7 +144,7 @@ def evaluate(problem_name: str, solver_name: str, config: SolverConfig, args):
 
     for _ in range(args.n_repeats):
         if args.backend in ("both", "bic"):
-            bic_objective = bic.graph.MulticutObjective(bic_graph, costs)
+            bic_objective = bic.graph.multicut.MulticutObjective(bic_graph, costs)
             start = perf_counter()
             bic_labels = config.make_bic_solver(args.threads).optimize(bic_objective)
             bic_runtimes.append(perf_counter() - start)
diff --git a/development/graph/mutex_watershed/check_mutex_clustering.py b/development/graph/mutex_watershed/check_mutex_clustering.py
index 48db1ad..c48d136 100644
--- a/development/graph/mutex_watershed/check_mutex_clustering.py
+++ b/development/graph/mutex_watershed/check_mutex_clustering.py
@@ -33,7 +33,7 @@
 def load_problem(size: str, *, timeout: float):
     import bioimage_cpp as bic
 
-    problem = bic.graph.load_lifted_multicut_problem(size, timeout=timeout)
+    problem = bic.graph.lifted_multicut.load_lifted_multicut_problem(size, timeout=timeout)
     bic_graph = bic.graph.UndirectedGraph.from_edges(problem.n_nodes, problem.local_uvs)
     return bic_graph, problem
 
@@ -41,7 +41,7 @@ def load_problem(size: str, *, timeout: float):
 def run_bioimage_cpp(bic_graph, problem, *, dtype: np.dtype) -> np.ndarray:
     import bioimage_cpp as bic
 
-    return bic.graph.mutex_watershed_clustering(
+    return bic.graph.mutex_watershed.mutex_watershed_clustering(
         bic_graph,
         problem.local_costs.astype(dtype, copy=False),
         problem.lifted_uvs,
diff --git a/development/graph/mutex_watershed/check_semantic_mutex_clustering.py b/development/graph/mutex_watershed/check_semantic_mutex_clustering.py
index ca733fe..5a0924f 100644
--- a/development/graph/mutex_watershed/check_semantic_mutex_clustering.py
+++ b/development/graph/mutex_watershed/check_semantic_mutex_clustering.py
@@ -12,9 +12,9 @@
 3. Oversegment with a simple grid-marker watershed on
    ``1 - mean(attractive affinities)``.
 4. Build a RAG from the oversegmentation and reduce weights to per-edge
-   attractive costs (``bic.graph.affinity_features`` mean), to long-range
-   mutex edges (``bic.graph.lifted_edges_from_affinities`` +
-   ``bic.graph.lifted_affinity_features``), and to per-(node, class)
+   attractive costs (``bic.graph.features.affinity_features`` mean), to long-range
+   mutex edges (``bic.graph.features.lifted_edges_from_affinities`` +
+   ``bic.graph.features.lifted_affinity_features``), and to per-(node, class)
    semantic costs via ``scipy.ndimage.mean``.
 5. Run bioimage-cpp + affogato side-by-side, time both, and report partition
    + semantic agreement.
@@ -139,7 +139,7 @@ def build_costs(
         weights[:number_of_attractive_channels].astype(np.float32, copy=False)
     )
     attr_offsets = offsets[:number_of_attractive_channels]
-    attr_features = bic.graph.affinity_features(
+    attr_features = bic.graph.features.affinity_features(
         rag, over_seg, attr_w, attr_offsets, number_of_threads=rag_threads
     )
     edge_costs = np.ascontiguousarray(attr_features[:, 0], dtype=np.float32)
@@ -152,10 +152,10 @@ def build_costs(
             np.float32, copy=False
         )
     )
-    mutex_uvs = bic.graph.lifted_edges_from_affinities(
+    mutex_uvs = bic.graph.features.lifted_edges_from_affinities(
         rag, over_seg, mutex_offsets, number_of_threads=rag_threads
     )
-    mutex_features = bic.graph.lifted_affinity_features(
+    mutex_features = bic.graph.features.lifted_affinity_features(
         over_seg, mutex_w, mutex_offsets, mutex_uvs, number_of_threads=rag_threads
     )
     # mutex_features columns are (mean, size); use the mean as the edge cost.
@@ -199,7 +199,7 @@ def run_bioimage_cpp(
 ) -> tuple[np.ndarray, np.ndarray]:
     import bioimage_cpp as bic
 
-    return bic.graph.semantic_mutex_watershed_clustering(
+    return bic.graph.mutex_watershed.semantic_mutex_watershed_clustering(
         rag,
         edge_costs,
         mutex_uvs,
diff --git a/examples/segmentation/_lifted_problem.py b/examples/segmentation/_lifted_problem.py
index af6ad1e..24919f5 100644
--- a/examples/segmentation/_lifted_problem.py
+++ b/examples/segmentation/_lifted_problem.py
@@ -139,7 +139,7 @@ def build_lifted_problem(
         oversegmentation, number_of_threads=number_of_threads
     )
 
-    local_features = bic.graph.affinity_features(
+    local_features = bic.graph.features.affinity_features(
         rag,
         oversegmentation,
         affinity_problem.direct_affinities,
@@ -148,7 +148,7 @@ def build_lifted_problem(
     )
     local_costs = (local_threshold - local_features[:, 0]).astype(np.float64, copy=False)
 
-    lifted_uvs = bic.graph.lifted_edges_from_affinities(
+    lifted_uvs = bic.graph.features.lifted_edges_from_affinities(
         rag,
         oversegmentation,
         affinity_problem.long_range_offsets,
@@ -157,7 +157,7 @@ def build_lifted_problem(
     if lifted_uvs.shape[0] == 0:
         lifted_costs = np.zeros(0, dtype=np.float64)
     else:
-        lifted_features = bic.graph.lifted_affinity_features(
+        lifted_features = bic.graph.features.lifted_affinity_features(
             oversegmentation,
             affinity_problem.long_range_affinities,
             affinity_problem.long_range_offsets,
diff --git a/examples/segmentation/lifted_multicut_from_affinities.py b/examples/segmentation/lifted_multicut_from_affinities.py
index b739f5f..88ca236 100644
--- a/examples/segmentation/lifted_multicut_from_affinities.py
+++ b/examples/segmentation/lifted_multicut_from_affinities.py
@@ -52,16 +52,16 @@ def main():
         f"{lifted_problem.lifted_uvs.shape[0]} lifted edges"
     )
 
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         lifted_problem.rag,
         lifted_problem.local_costs,
         lifted_uvs=lifted_problem.lifted_uvs,
         lifted_costs=lifted_problem.lifted_costs,
     )
-    solver = bic.graph.LiftedChainedSolvers(
+    solver = bic.graph.lifted_multicut.LiftedChainedSolvers(
         [
-            bic.graph.LiftedGreedyAdditiveMulticut(),
-            bic.graph.LiftedKernighanLinMulticut(
+            bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut(),
+            bic.graph.lifted_multicut.LiftedKernighanLinMulticut(
                 number_of_outer_iterations=args.kl_outer_iterations
             ),
         ]
diff --git a/examples/segmentation/multicut_from_affinities.py b/examples/segmentation/multicut_from_affinities.py
index 1926659..76e87ac 100644
--- a/examples/segmentation/multicut_from_affinities.py
+++ b/examples/segmentation/multicut_from_affinities.py
@@ -89,7 +89,7 @@ def multicut_from_affinities(
     rag = bic.graph.region_adjacency_graph(
         oversegmentation, number_of_threads=number_of_threads
     )
-    features = bic.graph.affinity_features(
+    features = bic.graph.features.affinity_features(
         rag,
         oversegmentation,
         affinities,
@@ -98,11 +98,11 @@ def multicut_from_affinities(
     )
     edge_costs = threshold - features[:, 0]
 
-    objective = bic.graph.MulticutObjective(rag, edge_costs)
-    node_labels = bic.graph.ChainedMulticutSolvers(
+    objective = bic.graph.multicut.MulticutObjective(rag, edge_costs)
+    node_labels = bic.graph.multicut.ChainedMulticutSolvers(
         [
-            bic.graph.GreedyAdditiveMulticut(),
-            bic.graph.KernighanLinMulticut(number_of_outer_iterations=10),
+            bic.graph.multicut.GreedyAdditiveMulticut(),
+            bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=10),
         ]
     ).optimize(objective)
     segmentation = bic.graph.project_node_labels_to_pixels(
diff --git a/examples/segmentation/serialize_grid_lifted_problem.py b/examples/segmentation/serialize_grid_lifted_problem.py
index a67f575..0c7f7ef 100644
--- a/examples/segmentation/serialize_grid_lifted_problem.py
+++ b/examples/segmentation/serialize_grid_lifted_problem.py
@@ -77,7 +77,7 @@ def build_grid_lifted_problem(
 ):
     graph = bic.graph.grid_graph(affinities.shape[1:])
     local_weights, valid_edges, lifted_uvs, lifted_weights, _ = (
-        bic.graph.grid_affinity_features_with_lifted(graph, affinities, offsets)
+        bic.graph.features.grid_affinity_features_with_lifted(graph, affinities, offsets)
     )
     if not np.all(valid_edges):
         invalid = int(valid_edges.size - np.count_nonzero(valid_edges))
diff --git a/src/bioimage_cpp/graph/__init__.py b/src/bioimage_cpp/graph/__init__.py
index 623931a..e86deab 100644
--- a/src/bioimage_cpp/graph/__init__.py
+++ b/src/bioimage_cpp/graph/__init__.py
@@ -1,93 +1,41 @@
-"""Graph data structures."""
+"""Graph data structures and graph-level algorithms.
 
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-
-import numpy as np
-
-from .. import _core
-from ._external import (
-    DEFAULT_EXTERNAL_MULTICUT_PROBLEM_PATH,
-    EXTERNAL_MULTICUT_PROBLEM_URL,
-    LiftedMulticutProblem,
-    external_multicut_problem_path,
-    lifted_multicut_problem_path,
-    load_external_multicut_problem,
-    load_external_multicut_problem_data,
-    load_lifted_multicut_problem,
-    load_multicut_problem,
-    load_multicut_problem_data,
-    multicut_problem_path,
-)
-
-_REGION_ADJACENCY_GRAPH_BY_DTYPE = {
-    np.dtype("uint32"): _core._region_adjacency_graph_uint32,
-    np.dtype("uint64"): _core._region_adjacency_graph_uint64,
-    np.dtype("int32"): _core._region_adjacency_graph_int32,
-    np.dtype("int64"): _core._region_adjacency_graph_int64,
-}
+Top-level surface:
 
-_EDGE_MAP_FEATURES_BY_DTYPE = {
-    np.dtype("uint32"): _core._accumulate_edge_map_features_uint32,
-    np.dtype("uint64"): _core._accumulate_edge_map_features_uint64,
-    np.dtype("int32"): _core._accumulate_edge_map_features_int32,
-    np.dtype("int64"): _core._accumulate_edge_map_features_int64,
-}
+- Graph structures: :class:`UndirectedGraph`, :class:`GridGraph2D`,
+  :class:`GridGraph3D`, :class:`RegionAdjacencyGraph`.
+- Constructors: :func:`undirected_graph`, :func:`grid_graph`,
+  :func:`region_adjacency_graph`.
+- Algorithms: :func:`connected_components`, :func:`breadth_first_search`,
+  :func:`edge_weighted_watershed`, :func:`project_node_labels_to_pixels`.
 
-_AFFINITY_FEATURES_BY_DTYPE = {
-    np.dtype("uint32"): _core._accumulate_affinity_features_uint32,
-    np.dtype("uint64"): _core._accumulate_affinity_features_uint64,
-    np.dtype("int32"): _core._accumulate_affinity_features_int32,
-    np.dtype("int64"): _core._accumulate_affinity_features_int64,
-}
+Algorithm domains live in dedicated submodules:
 
-_LIFTED_EDGES_FROM_AFFINITIES_BY_DTYPE = {
-    np.dtype("uint32"): _core._lifted_edges_from_affinities_uint32,
-    np.dtype("uint64"): _core._lifted_edges_from_affinities_uint64,
-    np.dtype("int32"): _core._lifted_edges_from_affinities_int32,
-    np.dtype("int64"): _core._lifted_edges_from_affinities_int64,
-}
+- :mod:`bioimage_cpp.graph.multicut` — multicut objective and solvers,
+  proposal generators, multicut problem loaders.
+- :mod:`bioimage_cpp.graph.lifted_multicut` — lifted multicut objective and
+  solvers, lifted problem loaders.
+- :mod:`bioimage_cpp.graph.mutex_watershed` — mutex watershed clustering
+  (with and without semantic constraints).
+- :mod:`bioimage_cpp.graph.features` — edge-feature accumulation on RAGs and
+  grid graphs.
+"""
 
-_LIFTED_AFFINITY_FEATURES_BY_DTYPE = {
-    np.dtype("uint32"): _core._accumulate_lifted_affinity_features_uint32,
-    np.dtype("uint64"): _core._accumulate_lifted_affinity_features_uint64,
-    np.dtype("int32"): _core._accumulate_lifted_affinity_features_int32,
-    np.dtype("int64"): _core._accumulate_lifted_affinity_features_int64,
-}
-
-_EDGE_WEIGHTED_WATERSHED_BY_DTYPE = {
-    (np.dtype("float32"), np.dtype("uint32")): _core._edge_weighted_watershed_float32_uint32,
-    (np.dtype("float32"), np.dtype("uint64")): _core._edge_weighted_watershed_float32_uint64,
-    (np.dtype("float32"), np.dtype("int32")): _core._edge_weighted_watershed_float32_int32,
-    (np.dtype("float32"), np.dtype("int64")): _core._edge_weighted_watershed_float32_int64,
-    (np.dtype("float64"), np.dtype("uint32")): _core._edge_weighted_watershed_float64_uint32,
-    (np.dtype("float64"), np.dtype("uint64")): _core._edge_weighted_watershed_float64_uint64,
-    (np.dtype("float64"), np.dtype("int32")): _core._edge_weighted_watershed_float64_int32,
-    (np.dtype("float64"), np.dtype("int64")): _core._edge_weighted_watershed_float64_int64,
-}
+from __future__ import annotations
 
-_PROJECT_NODE_LABELS_TO_PIXELS_BY_DTYPE = {
-    np.dtype("uint32"): _core._project_node_labels_to_pixels_uint32,
-    np.dtype("uint64"): _core._project_node_labels_to_pixels_uint64,
-    np.dtype("int32"): _core._project_node_labels_to_pixels_int32,
-    np.dtype("int64"): _core._project_node_labels_to_pixels_int64,
-}
+import numpy as np
 
-SIMPLE_EDGE_FEATURE_NAMES = ("mean", "size")
-COMPLEX_EDGE_FEATURE_NAMES = (
-    "mean",
-    "median",
-    "std",
-    "min",
-    "max",
-    "p5",
-    "p10",
-    "p25",
-    "p75",
-    "p90",
-    "p95",
-    "size",
+from .. import _core
+from ._shared import (
+    _as_1d_array,
+    _as_coordinate_array,
+    _as_node_array,
+    _as_offset_array,
+    _as_serialization_array,
+    _as_shape,
+    _as_uv_array,
+    _normalize_labels,
+    _normalize_number_of_threads,
 )
 
 
@@ -139,17 +87,6 @@ def deserialize(cls, serialization):
         return cls.from_edges(number_of_nodes, uvs)
 
 
-def _as_shape(shape, ndim: int, name: str = "shape") -> list[int]:
-    array = np.asarray(shape)
-    if array.ndim != 1 or array.shape[0] != ndim:
-        raise ValueError(f"{name} must be a 1D sequence of length {ndim}")
-    if not np.issubdtype(array.dtype, np.integer):
-        raise TypeError(f"{name} must contain integers")
-    if np.any(array <= 0):
-        raise ValueError(f"{name} dimensions must be greater than zero")
-    return [int(axis_size) for axis_size in array]
-
-
 class GridGraph2D(_core.GridGraph2D):
     """Regular 2D nearest-neighbor grid graph.
 
@@ -196,102 +133,7 @@ def offsetTarget(self, node: int, offset):
         return self.offset_target(node, offset)
 
 
-def _as_coordinate_array(coordinate, ndim: int, name: str) -> np.ndarray:
-    array = np.asarray(coordinate, dtype=np.uint64)
-    if array.ndim != 1 or array.shape[0] != ndim:
-        raise ValueError(f"{name} must be a 1D sequence of length {ndim}")
-    return np.ascontiguousarray(array)
-
-
-def _as_offset_array(offset, ndim: int, name: str) -> np.ndarray:
-    array = np.asarray(offset, dtype=np.int64)
-    if array.ndim != 1 or array.shape[0] != ndim:
-        raise ValueError(f"{name} must be a 1D sequence of length {ndim}")
-    return np.ascontiguousarray(array)
-
-
-def _as_uv_array(uvs, name: str) -> np.ndarray:
-    array = np.asarray(uvs, dtype=np.uint64)
-    if array.ndim != 2 or array.shape[1] != 2:
-        raise ValueError(f"{name} must have shape (n_edges, 2)")
-    return np.ascontiguousarray(array)
-
-
-def _as_node_array(nodes, name: str) -> np.ndarray:
-    array = np.asarray(nodes, dtype=np.uint64)
-    if array.ndim != 1:
-        raise ValueError(f"{name} must be a 1D array")
-    return np.ascontiguousarray(array)
-
-
-def _as_serialization_array(serialization) -> np.ndarray:
-    array = np.asarray(serialization, dtype=np.uint64)
-    if array.ndim != 1:
-        raise ValueError("serialization must be a 1D array")
-    if array.size < 2:
-        raise ValueError("serialization must have at least two entries")
-    number_of_edges = int(array[1])
-    if array.size != 2 + 2 * number_of_edges:
-        raise ValueError("serialization size must be 2 + 2 * number_of_edges")
-    return np.ascontiguousarray(array)
-
-
-def _copy_graph(graph) -> _core.UndirectedGraph:
-    # `uv_ids()` always returns a unique list (graphs deduplicate on insert),
-    # so we can use the bulk constructor that skips per-edge hash dedup —
-    # significantly faster than `insert_edges` for large graphs. The result
-    # is a ``_core.UndirectedGraph``; downstream code (objectives, solvers,
-    # validators) uses base-class methods that work identically.
-    if graph.number_of_edges == 0:
-        return _core.UndirectedGraph(int(graph.number_of_nodes))
-    return _core.UndirectedGraph.from_unique_edges(
-        int(graph.number_of_nodes), graph.uv_ids()
-    )
-
-
-def _as_edge_costs(edge_costs, graph: UndirectedGraph | RegionAdjacencyGraph) -> np.ndarray:
-    array = np.asarray(edge_costs, dtype=np.float64)
-    if array.ndim != 1:
-        raise ValueError("edge_costs must be a 1D array")
-    if array.shape[0] != graph.number_of_edges:
-        raise ValueError("edge_costs length must match graph number_of_edges")
-    return np.ascontiguousarray(array)
-
-
-def _as_node_labels(labels, graph: UndirectedGraph | RegionAdjacencyGraph) -> np.ndarray:
-    array = np.asarray(labels, dtype=np.uint64)
-    if array.ndim != 1:
-        raise ValueError("labels must be a 1D array")
-    if array.shape[0] != graph.number_of_nodes:
-        raise ValueError("labels length must match graph number_of_nodes")
-    return np.ascontiguousarray(array)
-
-
-def _as_1d_array(values, dtype, name: str, expected_size: int) -> np.ndarray:
-    array = np.asarray(values, dtype=dtype)
-    if array.ndim != 1:
-        raise ValueError(f"{name} must be a 1D array")
-    if array.shape[0] != expected_size:
-        raise ValueError(
-            f"{name} length must be {expected_size}, got {array.shape[0]}"
-        )
-    return np.ascontiguousarray(array)
-
-
-def _dense_labels(labels) -> np.ndarray:
-    labels = np.asarray(labels, dtype=np.uint64)
-    _, dense = np.unique(labels, return_inverse=True)
-    return np.ascontiguousarray(dense.astype(np.uint64, copy=False))
-
-
-def _subproblem_from_edges(number_of_nodes: int, nodes, uvs, edge_costs):
-    local_ids = np.full(int(number_of_nodes), -1, dtype=np.int64)
-    local_ids[nodes] = np.arange(nodes.size, dtype=np.int64)
-    local_uvs = local_ids[np.asarray(uvs, dtype=np.uint64)]
-    sub_graph = UndirectedGraph(int(nodes.size), int(len(edge_costs)))
-    if local_uvs.size:
-        sub_graph.insert_edges(np.ascontiguousarray(local_uvs.astype(np.uint64, copy=False)))
-    return sub_graph, np.ascontiguousarray(np.asarray(edge_costs, dtype=np.float64))
+RegionAdjacencyGraph = _core.RegionAdjacencyGraph
 
 
 def undirected_graph(number_of_nodes: int) -> UndirectedGraph:
@@ -312,161 +154,36 @@ def grid_graph(shape):
     raise ValueError(f"shape must have length 2 or 3, got length={n_axes}")
 
 
-def _grid_ndim(graph) -> int:
-    if isinstance(graph, GridGraph2D):
-        return 2
-    if isinstance(graph, GridGraph3D):
-        return 3
-    raise TypeError("graph must be a GridGraph2D or GridGraph3D")
-
-
-def _grid_shape(graph) -> tuple[int, ...]:
-    return tuple(int(size) for size in graph.shape)
-
-
-_GRID_FLOAT_DTYPES = (np.dtype(np.float32), np.dtype(np.float64))
-
-
-def _as_grid_data(values, graph, name: str, *, with_channels: bool) -> np.ndarray:
-    array = np.asarray(values)
-    if array.dtype not in _GRID_FLOAT_DTYPES:
-        # Integer / non-float input falls back to float64 — the previous default.
-        # float32 and float64 inputs are passed through end-to-end, no copy.
-        array = array.astype(np.float64)
-    shape = _grid_shape(graph)
-    if with_channels:
-        if array.ndim != len(shape) + 1 or array.shape[1:] != shape:
-            raise ValueError(
-                f"{name} must have shape (channels, *graph.shape), got "
-                f"shape={array.shape}, graph shape={shape}"
-            )
-    elif array.shape != shape:
-        raise ValueError(
-            f"{name} shape must match graph shape, got "
-            f"{name} shape={array.shape}, graph shape={shape}"
-        )
-    return np.ascontiguousarray(array)
-
-
-def _normalize_grid_offsets(offsets, ndim: int, n_channels: int) -> list[tuple[int, ...]]:
-    normalized = [tuple(int(value) for value in offset) for offset in offsets]
-    if len(normalized) != n_channels:
-        raise ValueError(
-            "offsets length must match affinities channel count, got "
-            f"offsets length={len(normalized)}, channels={n_channels}"
-        )
-    if any(len(offset) != ndim for offset in normalized):
-        raise ValueError("each offset must have length matching graph ndim")
-    if any(all(value == 0 for value in offset) for offset in normalized):
-        raise ValueError("offsets must not contain the zero offset")
-    return normalized
-
-
-def _grid_dtype_suffix(array: np.ndarray) -> str:
-    if array.dtype == np.float32:
-        return "float32"
-    return "float64"
-
-
-_GRID_BOUNDARY_DISPATCH = {
-    (2, "float32"): _core._grid_boundary_features_2d_float32,
-    (2, "float64"): _core._grid_boundary_features_2d_float64,
-    (3, "float32"): _core._grid_boundary_features_3d_float32,
-    (3, "float64"): _core._grid_boundary_features_3d_float64,
-}
-_GRID_AFFINITY_DISPATCH = {
-    (2, "float32"): _core._grid_affinity_features_2d_float32,
-    (2, "float64"): _core._grid_affinity_features_2d_float64,
-    (3, "float32"): _core._grid_affinity_features_3d_float32,
-    (3, "float64"): _core._grid_affinity_features_3d_float64,
-}
-_GRID_AFFINITY_LIFTED_DISPATCH = {
-    (2, "float32"): _core._grid_affinity_features_with_lifted_2d_float32,
-    (2, "float64"): _core._grid_affinity_features_with_lifted_2d_float64,
-    (3, "float32"): _core._grid_affinity_features_with_lifted_3d_float32,
-    (3, "float64"): _core._grid_affinity_features_with_lifted_3d_float64,
+_EDGE_WEIGHTED_WATERSHED_BY_DTYPE = {
+    (np.dtype("float32"), np.dtype("uint32")): _core._edge_weighted_watershed_float32_uint32,
+    (np.dtype("float32"), np.dtype("uint64")): _core._edge_weighted_watershed_float32_uint64,
+    (np.dtype("float32"), np.dtype("int32")): _core._edge_weighted_watershed_float32_int32,
+    (np.dtype("float32"), np.dtype("int64")): _core._edge_weighted_watershed_float32_int64,
+    (np.dtype("float64"), np.dtype("uint32")): _core._edge_weighted_watershed_float64_uint32,
+    (np.dtype("float64"), np.dtype("uint64")): _core._edge_weighted_watershed_float64_uint64,
+    (np.dtype("float64"), np.dtype("int32")): _core._edge_weighted_watershed_float64_int32,
+    (np.dtype("float64"), np.dtype("int64")): _core._edge_weighted_watershed_float64_int64,
 }
 
 
-def grid_boundary_features(graph, boundary_map) -> np.ndarray:
-    """Compute scalar nearest-neighbor grid edge weights from a boundary map.
-
-    The output is a 1D array aligned to ``graph.edges()``. Output dtype matches
-    the input: ``float32`` and ``float64`` inputs are processed without copying,
-    other dtypes are promoted to ``float64``. Each edge receives the average of
-    the two endpoint boundary-map values.
-    """
-    ndim = _grid_ndim(graph)
-    boundary_array = _as_grid_data(
-        boundary_map, graph, "boundary_map", with_channels=False
-    )
-    return _GRID_BOUNDARY_DISPATCH[(ndim, _grid_dtype_suffix(boundary_array))](
-        graph, boundary_array
-    )
-
-
-def grid_affinity_features(graph, affinities, offsets) -> tuple[np.ndarray, np.ndarray]:
-    """Map local affinity channels to grid graph edge weights.
-
-    Only nearest-neighbor offsets with L1 norm 1 are accepted. The returned
-    tuple is ``(edge_weights, valid_edges)``, both aligned to ``graph.edges()``.
-    ``edge_weights`` has the same dtype as ``affinities`` (``float32`` or
-    ``float64``; other dtypes are promoted to ``float64``). ``valid_edges`` is
-    boolean and marks local graph edges covered by offsets.
-    """
-    ndim = _grid_ndim(graph)
-    affinity_array = _as_grid_data(
-        affinities, graph, "affinities", with_channels=True
-    )
-    normalized_offsets = _normalize_grid_offsets(
-        offsets, ndim, int(affinity_array.shape[0])
-    )
-    weights, valid = _GRID_AFFINITY_DISPATCH[
-        (ndim, _grid_dtype_suffix(affinity_array))
-    ](graph, affinity_array, normalized_offsets)
-    return weights, valid.astype(bool, copy=False)
-
-
-def grid_affinity_features_with_lifted(
-    graph,
-    affinities,
-    offsets,
-) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
-    """Map local affinities and emit explicit long-range grid edges.
-
-    Returns ``(local_weights, valid_local_edges, lifted_uvs, lifted_weights,
-    lifted_offset_ids)``. Local arrays are aligned to ``graph.edges()``.
-    Long-range arrays have one row/value per valid offset hit and are suitable
-    for lifted multicut or mutex-watershed style callers. Weight arrays share
-    the dtype of ``affinities`` (``float32`` or ``float64``; other dtypes are
-    promoted to ``float64``).
-    """
-    ndim = _grid_ndim(graph)
-    affinity_array = _as_grid_data(
-        affinities, graph, "affinities", with_channels=True
-    )
-    normalized_offsets = _normalize_grid_offsets(
-        offsets, ndim, int(affinity_array.shape[0])
-    )
-    local_weights, valid, lifted_uvs, lifted_weights, lifted_offset_ids = (
-        _GRID_AFFINITY_LIFTED_DISPATCH[
-            (ndim, _grid_dtype_suffix(affinity_array))
-        ](graph, affinity_array, normalized_offsets)
-    )
-    return (
-        local_weights,
-        valid.astype(bool, copy=False),
-        lifted_uvs,
-        lifted_weights,
-        lifted_offset_ids,
-    )
+_REGION_ADJACENCY_GRAPH_BY_DTYPE = {
+    np.dtype("uint32"): _core._region_adjacency_graph_uint32,
+    np.dtype("uint64"): _core._region_adjacency_graph_uint64,
+    np.dtype("int32"): _core._region_adjacency_graph_int32,
+    np.dtype("int64"): _core._region_adjacency_graph_int64,
+}
 
 
-RegionAdjacencyGraph = _core.RegionAdjacencyGraph
+_PROJECT_NODE_LABELS_TO_PIXELS_BY_DTYPE = {
+    np.dtype("uint32"): _core._project_node_labels_to_pixels_uint32,
+    np.dtype("uint64"): _core._project_node_labels_to_pixels_uint64,
+    np.dtype("int32"): _core._project_node_labels_to_pixels_int32,
+    np.dtype("int64"): _core._project_node_labels_to_pixels_int64,
+}
 
 
 def connected_components(
-    graph: UndirectedGraph | RegionAdjacencyGraph,
+    graph,
     edge_mask: np.ndarray | None = None,
 ) -> np.ndarray:
     """Compute dense connected-component labels for graph nodes.
@@ -490,7 +207,7 @@ def connected_components(
 
 
 def breadth_first_search(
-    graph: UndirectedGraph | RegionAdjacencyGraph,
+    graph,
     source: int,
     *,
     max_distance: int | None = None,
@@ -533,7 +250,7 @@ def breadth_first_search(
 
 
 def edge_weighted_watershed(
-    graph: UndirectedGraph | RegionAdjacencyGraph,
+    graph,
     edge_weights,
     seeds,
 ) -> np.ndarray:
@@ -599,1082 +316,6 @@ def edge_weighted_watershed(
     return run(graph, weight_array, seed_array)
 
 
-_MUTEX_WATERSHED_CLUSTERING_BY_DTYPE = {
-    np.dtype("float32"): _core._mutex_watershed_clustering_float32,
-    np.dtype("float64"): _core._mutex_watershed_clustering_float64,
-}
-
-_SEMANTIC_MUTEX_WATERSHED_CLUSTERING_BY_DTYPE = {
-    np.dtype("float32"): _core._semantic_mutex_watershed_clustering_float32,
-    np.dtype("float64"): _core._semantic_mutex_watershed_clustering_float64,
-}
-
-
-def _resolve_weight_dtype(array, name: str) -> np.ndarray:
-    """Coerce a weights input to a supported floating dtype.
-
-    ``float32`` and ``float64`` pass through unchanged. Other floating
-    dtypes are cast to ``float32`` (matches the convention used by
-    :func:`edge_weighted_watershed`). Non-floating dtypes raise.
-    """
-    array = np.asarray(array)
-    if array.dtype in (np.dtype("float32"), np.dtype("float64")):
-        return array
-    if np.issubdtype(array.dtype, np.floating):
-        return array.astype(np.float32, copy=False)
-    raise TypeError(
-        f"{name} must have a floating dtype (float32 or float64), got "
-        f"dtype={array.dtype}"
-    )
-
-
-def mutex_watershed_clustering(
-    graph: UndirectedGraph | RegionAdjacencyGraph,
-    edge_costs,
-    mutex_uvs,
-    mutex_costs,
-) -> np.ndarray:
-    """Mutex watershed clustering on an undirected graph.
-
-    Attractive edges come from ``graph`` (one cost per edge in
-    ``edge_costs``); repulsive long-range edges are supplied separately as
-    ``mutex_uvs`` with weights ``mutex_costs``. All edges are jointly sorted
-    by descending weight and processed in a single pass: an attractive edge
-    merges its two components unless a mutex constraint already separates
-    them; a mutex edge installs a constraint between the two current
-    components.
-
-    The input format matches :class:`LiftedMulticutObjective` — the same
-    ``(graph, edge_costs, lifted_uvs, lifted_costs)`` arrays can be passed
-    here as ``(graph, edge_costs, mutex_uvs, mutex_costs)`` — though the
-    algorithms differ (mutex constraints are hard; lifted costs are soft).
-
-    Parameters
-    ----------
-    graph:
-        :class:`UndirectedGraph` or :class:`RegionAdjacencyGraph` defining
-        the attractive edges.
-    edge_costs:
-        1D array of length ``graph.number_of_edges``. Supported dtypes are
-        ``float32`` and ``float64``; other floating dtypes are cast to
-        ``float32``. Higher values are more attractive.
-    mutex_uvs:
-        ``(n_mutex, 2)`` uint64 array of (u, v) pairs for the mutex edges.
-    mutex_costs:
-        1D array of length ``n_mutex``. Same dtype rules as ``edge_costs``;
-        if the two dtypes differ both are promoted to ``float64``. Higher
-        values are stronger repulsions.
-
-    Returns
-    -------
-    np.ndarray
-        ``(graph.number_of_nodes,)`` uint64 array. Dense node labels in
-        ``[0, k)`` in first-occurrence order.
-    """
-    edge_cost_array = _resolve_weight_dtype(edge_costs, "edge_costs")
-    mutex_cost_array = _resolve_weight_dtype(mutex_costs, "mutex_costs")
-    # Use a single instantiation for both arrays. If the user supplied
-    # mismatched dtypes (one float32, one float64) we promote both to
-    # float64 — the wider type — rather than silently downcasting.
-    if edge_cost_array.dtype != mutex_cost_array.dtype:
-        edge_cost_array = edge_cost_array.astype(np.float64, copy=False)
-        mutex_cost_array = mutex_cost_array.astype(np.float64, copy=False)
-
-    edge_cost_array = _as_1d_array(
-        edge_cost_array,
-        edge_cost_array.dtype,
-        "edge_costs",
-        int(graph.number_of_edges),
-    )
-    mutex_uv_array = _as_uv_array(mutex_uvs, "mutex_uvs")
-    mutex_cost_array = _as_1d_array(
-        mutex_cost_array,
-        mutex_cost_array.dtype,
-        "mutex_costs",
-        int(mutex_uv_array.shape[0]),
-    )
-    run = _MUTEX_WATERSHED_CLUSTERING_BY_DTYPE[edge_cost_array.dtype]
-    return run(graph, edge_cost_array, mutex_uv_array, mutex_cost_array)
-
-
-def semantic_mutex_watershed_clustering(
-    graph: UndirectedGraph | RegionAdjacencyGraph,
-    edge_costs,
-    mutex_uvs,
-    mutex_costs,
-    semantic_node_classes,
-    semantic_costs,
-) -> tuple[np.ndarray, np.ndarray]:
-    """Semantic mutex watershed clustering on an undirected graph.
-
-    Extends :func:`mutex_watershed_clustering` with a third group of edges
-    that attach semantic class labels to clusters. Two clusters carrying
-    different semantic class labels are forbidden from merging; otherwise
-    the algorithm proceeds as in the non-semantic case (attractive edges
-    merge; mutex edges block).
-
-    Parameters
-    ----------
-    graph:
-        :class:`UndirectedGraph` or :class:`RegionAdjacencyGraph` defining
-        the attractive edges.
-    edge_costs:
-        1D array of length ``graph.number_of_edges``. Same dtype rules as
-        :func:`mutex_watershed_clustering`.
-    mutex_uvs:
-        ``(n_mutex, 2)`` uint64 array of (u, v) pairs for the mutex edges.
-    mutex_costs:
-        1D array of length ``n_mutex``.
-    semantic_node_classes:
-        ``(n_semantic, 2)`` uint64 array. Column 0 is a node id; column 1
-        is the semantic class id (non-negative integer). The semantic class
-        id is interpreted as a signed integer when reported back, so values
-        above ``np.iinfo(np.int64).max`` are out of range.
-    semantic_costs:
-        1D array of length ``n_semantic``. Same dtype rules as
-        ``edge_costs``; if the floating dtypes of the three weight arrays
-        do not all agree, all three are promoted to ``float64``.
-
-    Returns
-    -------
-    node_labels:
-        ``(graph.number_of_nodes,)`` uint64 array. Dense node labels in
-        ``[0, k)`` in first-occurrence order.
-    semantic_labels:
-        ``(graph.number_of_nodes,)`` int64 array. Per-node semantic class
-        id, or ``-1`` for clusters that received no semantic assignment.
-    """
-    edge_cost_array = _resolve_weight_dtype(edge_costs, "edge_costs")
-    mutex_cost_array = _resolve_weight_dtype(mutex_costs, "mutex_costs")
-    semantic_cost_array = _resolve_weight_dtype(semantic_costs, "semantic_costs")
-
-    dtypes = {edge_cost_array.dtype, mutex_cost_array.dtype, semantic_cost_array.dtype}
-    if len(dtypes) > 1:
-        edge_cost_array = edge_cost_array.astype(np.float64, copy=False)
-        mutex_cost_array = mutex_cost_array.astype(np.float64, copy=False)
-        semantic_cost_array = semantic_cost_array.astype(np.float64, copy=False)
-
-    edge_cost_array = _as_1d_array(
-        edge_cost_array,
-        edge_cost_array.dtype,
-        "edge_costs",
-        int(graph.number_of_edges),
-    )
-    mutex_uv_array = _as_uv_array(mutex_uvs, "mutex_uvs")
-    mutex_cost_array = _as_1d_array(
-        mutex_cost_array,
-        mutex_cost_array.dtype,
-        "mutex_costs",
-        int(mutex_uv_array.shape[0]),
-    )
-    semantic_uv_array = _as_uv_array(semantic_node_classes, "semantic_node_classes")
-    semantic_cost_array = _as_1d_array(
-        semantic_cost_array,
-        semantic_cost_array.dtype,
-        "semantic_costs",
-        int(semantic_uv_array.shape[0]),
-    )
-
-    run = _SEMANTIC_MUTEX_WATERSHED_CLUSTERING_BY_DTYPE[edge_cost_array.dtype]
-    return run(
-        graph,
-        edge_cost_array,
-        mutex_uv_array,
-        mutex_cost_array,
-        semantic_uv_array,
-        semantic_cost_array,
-    )
-
-
-class MulticutObjective:
-    """Multicut objective for an undirected graph and edge costs."""
-
-    def __init__(
-        self,
-        graph: UndirectedGraph | RegionAdjacencyGraph,
-        edge_costs,
-        initial_labels=None,
-    ):
-        self._graph = _copy_graph(graph)
-        self._edge_costs = _as_edge_costs(edge_costs, self._graph)
-        if initial_labels is None:
-            self._labels = np.arange(self._graph.number_of_nodes, dtype=np.uint64)
-        else:
-            self._labels = _as_node_labels(initial_labels, self._graph)
-
-    @property
-    def graph(self) -> UndirectedGraph:
-        return self._graph
-
-    @property
-    def edge_costs(self) -> np.ndarray:
-        return self._edge_costs
-
-    @property
-    def labels(self) -> np.ndarray:
-        return self._labels
-
-    @labels.setter
-    def labels(self, labels) -> None:
-        self._labels = _as_node_labels(labels, self._graph)
-
-    def set_labels(self, labels) -> None:
-        self.labels = labels
-
-    def reset_labels(self) -> None:
-        self._labels = np.arange(self._graph.number_of_nodes, dtype=np.uint64)
-
-    def energy(self, labels=None) -> float:
-        label_array = self._labels if labels is None else _as_node_labels(labels, self._graph)
-        return float(_core._multicut_energy(self._graph, self._edge_costs, label_array))
-
-
-class MulticutSolver(ABC):
-    """Base class for multicut solvers."""
-
-    @abstractmethod
-    def optimize(self, objective: MulticutObjective) -> np.ndarray:
-        """Optimize ``objective`` and return the node labeling."""
-
-
-class GreedyAdditiveMulticut(MulticutSolver):
-    def __init__(
-        self,
-        *,
-        weight_stop: float = 0.0,
-        node_num_stop: float = -1.0,
-        add_noise: bool = False,
-        seed: int = 42,
-        sigma: float = 1.0,
-    ):
-        self.weight_stop = float(weight_stop)
-        self.node_num_stop = float(node_num_stop)
-        self.add_noise = bool(add_noise)
-        self.seed = int(seed)
-        self.sigma = float(sigma)
-
-    def optimize(self, objective: MulticutObjective) -> np.ndarray:
-        labels = _core._multicut_greedy_additive(
-            objective.graph,
-            objective.edge_costs,
-            self.weight_stop,
-            self.node_num_stop,
-            self.add_noise,
-            self.seed,
-            self.sigma,
-        )
-        objective.labels = labels
-        return objective.labels
-
-    def _build_cpp_sub_solver(self):
-        return _core._GreedyAdditiveMulticutSubSolver(
-            weight_stop=self.weight_stop,
-            node_num_stop=self.node_num_stop,
-            add_noise=self.add_noise,
-            seed=self.seed,
-            sigma=self.sigma,
-        )
-
-
-class GreedyFixationMulticut(MulticutSolver):
-    def __init__(self, *, weight_stop: float = 0.0, node_num_stop: float = -1.0):
-        self.weight_stop = float(weight_stop)
-        self.node_num_stop = float(node_num_stop)
-
-    def optimize(self, objective: MulticutObjective) -> np.ndarray:
-        labels = _core._multicut_greedy_fixation(
-            objective.graph,
-            objective.edge_costs,
-            self.weight_stop,
-            self.node_num_stop,
-        )
-        objective.labels = labels
-        return objective.labels
-
-    def _build_cpp_sub_solver(self):
-        return _core._GreedyFixationMulticutSubSolver(
-            weight_stop=self.weight_stop,
-            node_num_stop=self.node_num_stop,
-        )
-
-
-class KernighanLinMulticut(MulticutSolver):
-    def __init__(
-        self,
-        *,
-        number_of_outer_iterations: int = 100,
-        number_of_inner_iterations: int | None = None,
-        epsilon: float = 1.0e-6,
-    ):
-        self.number_of_outer_iterations = int(number_of_outer_iterations)
-        if self.number_of_outer_iterations < 0:
-            raise ValueError("number_of_outer_iterations must be non-negative")
-        self.number_of_inner_iterations = (
-            None if number_of_inner_iterations is None else int(number_of_inner_iterations)
-        )
-        self.epsilon = float(epsilon)
-
-    def optimize(self, objective: MulticutObjective) -> np.ndarray:
-        initial_labels = objective.labels
-        if np.array_equal(
-            initial_labels,
-            np.arange(objective.graph.number_of_nodes, dtype=np.uint64),
-        ):
-            initial_labels = _core._multicut_greedy_additive(
-                objective.graph,
-                objective.edge_costs,
-                0.0,
-                -1.0,
-                False,
-                42,
-                1.0,
-            )
-        labels = _core._multicut_kernighan_lin(
-            objective.graph,
-            objective.edge_costs,
-            initial_labels,
-            self.number_of_outer_iterations,
-            self.epsilon,
-        )
-        objective.labels = labels
-        return objective.labels
-
-    def _build_cpp_sub_solver(self):
-        return _core._KernighanLinMulticutSubSolver(
-            number_of_outer_iterations=self.number_of_outer_iterations,
-            epsilon=self.epsilon,
-        )
-
-
-class ProposalGenerator(ABC):
-    """Base class for fusion-move proposal generators.
-
-    Concrete generators carry settings on the Python side. ``_build_for_thread``
-    constructs an independent underlying C++ proposal-generator object whose
-    seed is offset by ``seed_offset`` so that parallel proposal slots produce
-    distinct, reproducible streams.
-    """
-
-    @abstractmethod
-    def _build_for_thread(
-        self,
-        graph: UndirectedGraph | RegionAdjacencyGraph,
-        edge_costs: np.ndarray,
-        seed_offset: int,
-    ):
-        """Construct the underlying C++ proposal generator with a seed offset."""
-
-
-class WatershedProposalGenerator(ProposalGenerator):
-    """Watershed proposal generator (nifty's fusion-move workhorse).
-
-    Per call: add Gaussian noise to edge costs, drop random seeds at the
-    endpoints of negative-cost edges, run the edge-weighted watershed.
-
-    ``n_seeds_fraction`` is the target *total seed count*: ``<= 1.0`` is a
-    fraction of ``number_of_nodes``, otherwise an absolute count. The
-    seeding loop places two seeds per iteration and runs ``n_seeds / 2``
-    times, matching nifty's ``WatershedProposalGenerator`` so the same
-    parameter value yields the same proposal density on both sides.
-    """
-
-    def __init__(
-        self,
-        *,
-        sigma: float = 1.0,
-        n_seeds_fraction: float = 0.1,
-        seed: int = 0,
-    ):
-        self.sigma = float(sigma)
-        self.n_seeds_fraction = float(n_seeds_fraction)
-        self.seed = int(seed)
-
-    def _build_for_thread(self, graph, edge_costs, seed_offset):
-        return _core._WatershedProposalGenerator(
-            graph,
-            edge_costs,
-            sigma=self.sigma,
-            n_seeds_fraction=self.n_seeds_fraction,
-            seed=self.seed + int(seed_offset),
-        )
-
-
-class GreedyAdditiveProposalGenerator(ProposalGenerator):
-    """Greedy-additive multicut proposal generator.
-
-    Per call: run the greedy-additive multicut solver with noisy edge weights;
-    the seed advances every call so successive proposals differ.
-    """
-
-    def __init__(
-        self,
-        *,
-        sigma: float = 1.0,
-        weight_stop: float = 0.0,
-        node_num_stop: float = -1.0,
-        seed: int = 0,
-    ):
-        self.sigma = float(sigma)
-        self.weight_stop = float(weight_stop)
-        self.node_num_stop = float(node_num_stop)
-        self.seed = int(seed)
-
-    def _build_for_thread(self, graph, edge_costs, seed_offset):
-        return _core._GreedyAdditiveMulticutProposalGenerator(
-            graph,
-            edge_costs,
-            sigma=self.sigma,
-            weight_stop=self.weight_stop,
-            node_num_stop=self.node_num_stop,
-            seed=self.seed + int(seed_offset),
-        )
-
-
-class FusionMoveMulticut(MulticutSolver):
-    """Fusion-move multicut solver.
-
-    Iteratively generates proposals via ``proposal_generator``, fuses them
-    with the current best labeling, and accepts improvements. The fuse step
-    solves a contracted multicut subproblem with ``sub_solver``; if omitted,
-    the default sub-solver is :class:`GreedyAdditiveMulticut`.
-
-    If the objective's current labels are the trivial singleton labeling, the
-    driver warm-starts with one greedy-additive pass before the proposal loop.
-    The best-of safety net guarantees energy never increases across iterations.
-
-    Threading: ``number_of_threads > 1`` runs ``number_of_parallel_proposals``
-    proposal generators in parallel within each iteration. Each parallel slot
-    uses an independent proposal generator with seed ``proposal_generator.seed
-    + slot_index``. By default ``number_of_parallel_proposals`` is ``2`` when
-    ``number_of_threads == 1`` and ``number_of_threads`` otherwise; pass it
-    explicitly to override.
-
-    Multi-proposal fuse: when at least two parallel pairwise fuses fail to
-    improve on the current best, a joint multi-proposal fuse is run over the
-    surviving fused candidates (matches nifty's ``ccFusionMoveBased`` stage-2
-    behaviour). With ``number_of_parallel_proposals == 2`` this stage rarely
-    triggers; it becomes useful as ``number_of_parallel_proposals`` grows.
-    """
-
-    def __init__(
-        self,
-        *,
-        proposal_generator: ProposalGenerator,
-        sub_solver: MulticutSolver | None = None,
-        number_of_iterations: int = 10,
-        stop_if_no_improvement: int = 4,
-        number_of_threads: int = 1,
-        number_of_parallel_proposals: int | None = None,
-    ):
-        if not isinstance(proposal_generator, ProposalGenerator):
-            raise TypeError("proposal_generator must inherit from ProposalGenerator")
-        if sub_solver is not None and not isinstance(sub_solver, MulticutSolver):
-            raise TypeError("sub_solver must inherit from MulticutSolver")
-        if sub_solver is not None and not hasattr(sub_solver, "_build_cpp_sub_solver"):
-            raise TypeError(
-                "sub_solver must be a built-in multicut solver "
-                "(custom Python solvers are not supported as fusion-move sub-solvers)"
-            )
-        n_threads = int(number_of_threads)
-        if n_threads < 1:
-            raise ValueError("number_of_threads must be >= 1")
-        if number_of_parallel_proposals is None:
-            n_parallel = 2 if n_threads == 1 else n_threads
-        else:
-            n_parallel = int(number_of_parallel_proposals)
-        if n_parallel < 1:
-            raise ValueError("number_of_parallel_proposals must be >= 1")
-
-        self.proposal_generator = proposal_generator
-        self.sub_solver = sub_solver
-        self.number_of_iterations = int(number_of_iterations)
-        self.stop_if_no_improvement = int(stop_if_no_improvement)
-        self.number_of_threads = n_threads
-        self.number_of_parallel_proposals = n_parallel
-        if self.number_of_iterations < 0:
-            raise ValueError("number_of_iterations must be non-negative")
-        if self.stop_if_no_improvement < 1:
-            raise ValueError("stop_if_no_improvement must be >= 1")
-
-    def optimize(self, objective: MulticutObjective) -> np.ndarray:
-        # Build one C++ proposal generator per parallel slot, each with a
-        # distinct seed offset, so parallel streams are independent and
-        # reproducible.
-        cpp_pgens = [
-            self.proposal_generator._build_for_thread(
-                objective.graph, objective.edge_costs, slot
-            )
-            for slot in range(self.number_of_parallel_proposals)
-        ]
-        cpp_sub_solver = (
-            None if self.sub_solver is None else self.sub_solver._build_cpp_sub_solver()
-        )
-        labels = _core._multicut_fusion_move(
-            objective.graph,
-            objective.edge_costs,
-            objective.labels,
-            cpp_pgens,
-            cpp_sub_solver,
-            self.number_of_iterations,
-            self.stop_if_no_improvement,
-            self.number_of_threads,
-            self.number_of_parallel_proposals,
-        )
-        objective.labels = labels
-        return objective.labels
-
-
-class ChainedMulticutSolvers(MulticutSolver):
-    def __init__(self, solvers):
-        self.solvers = list(solvers)
-        if len(self.solvers) == 0:
-            raise ValueError("solvers must contain at least one solver")
-        if not all(isinstance(solver, MulticutSolver) for solver in self.solvers):
-            raise TypeError("all solvers must inherit from MulticutSolver")
-
-    def optimize(self, objective: MulticutObjective) -> np.ndarray:
-        labels = objective.labels
-        for solver in self.solvers:
-            labels = solver.optimize(objective)
-        return labels
-
-
-class MulticutDecomposer(MulticutSolver):
-    def __init__(
-        self,
-        sub_solver: MulticutSolver,
-        *,
-        fallthrough_solver: MulticutSolver | None = None,
-        number_of_threads: int = 0,
-    ):
-        if not isinstance(sub_solver, MulticutSolver):
-            raise TypeError("sub_solver must inherit from MulticutSolver")
-        if fallthrough_solver is not None and not isinstance(fallthrough_solver, MulticutSolver):
-            raise TypeError("fallthrough_solver must inherit from MulticutSolver")
-        self.sub_solver = sub_solver
-        self.fallthrough_solver = fallthrough_solver
-        self.number_of_threads = _normalize_number_of_threads(number_of_threads)
-
-    def optimize(self, objective: MulticutObjective) -> np.ndarray:
-        if self.fallthrough_solver is None and isinstance(self.sub_solver, GreedyAdditiveMulticut):
-            return self.sub_solver.optimize(objective)
-
-        component_labels = connected_components(
-            objective.graph,
-            edge_mask=objective.edge_costs > 0.0,
-        )
-        number_of_components = int(component_labels.max()) + 1 if component_labels.size else 0
-        if number_of_components <= 1:
-            solver = self.fallthrough_solver or self.sub_solver
-            return solver.optimize(objective)
-
-        global_labels = np.empty(objective.graph.number_of_nodes, dtype=np.uint64)
-        label_offset = 0
-        all_uvs = objective.graph.uv_ids()
-        for component in range(number_of_components):
-            nodes = np.flatnonzero(component_labels == component).astype(np.uint64)
-            if nodes.size == 1:
-                global_labels[int(nodes[0])] = label_offset
-                label_offset += 1
-                continue
-
-            edge_ids = objective.graph.edges_from_node_list(nodes)
-            sub_graph, sub_costs = _subproblem_from_edges(
-                objective.graph.number_of_nodes,
-                nodes,
-                all_uvs[edge_ids],
-                objective.edge_costs[edge_ids],
-            )
-            sub_objective = MulticutObjective(sub_graph, sub_costs)
-            sub_labels = self.sub_solver.optimize(sub_objective)
-            sub_labels = _dense_labels(sub_labels)
-            global_labels[nodes] = sub_labels + label_offset
-            label_offset += int(sub_labels.max()) + 1
-
-        objective.labels = _dense_labels(global_labels)
-        return objective.labels
-
-
-class LiftedMulticutObjective:
-    """Lifted multicut objective.
-
-    Stores a base graph + base edge costs together with an internal *lifted
-    graph* that is a superset of the base graph (base edges occupy ids
-    ``0 .. base.number_of_edges - 1``; lifted edges follow). The energy of a
-    node labeling is the sum of base + lifted edge weights across cut edges.
-
-    The lifted edges can be supplied either as explicit ``(uvs, costs)``
-    arrays, via a ``bfs_distance=k`` constructor argument that inserts a
-    zero-weight lifted edge for every pair of nodes within ``k`` hops of each
-    other in the base graph, or by calling :meth:`set_cost` after construction.
-    """
-
-    def __init__(
-        self,
-        graph: UndirectedGraph | RegionAdjacencyGraph,
-        edge_costs,
-        *,
-        lifted_uvs=None,
-        lifted_costs=None,
-        bfs_distance: int | None = None,
-        overwrite_existing: bool = False,
-        initial_labels=None,
-    ):
-        # The objective holds a reference to the user's base graph (no
-        # defensive copy — the C++ ``Objective`` already keeps a const
-        # reference). The user is expected to treat the input graph as
-        # immutable while the objective is alive; mutations are visible to
-        # the objective and may produce undefined behaviour.
-        base_graph = graph
-        base_costs = _as_edge_costs(edge_costs, base_graph)
-
-        # Use the bulk constructor for the lifted graph's base portion to
-        # bypass the per-edge hash dedup that ``insert_edges`` performs.
-        if int(base_graph.number_of_edges) > 0:
-            lifted_graph = _core.UndirectedGraph.from_unique_edges(
-                int(base_graph.number_of_nodes), base_graph.uv_ids()
-            )
-        else:
-            lifted_graph = _core.UndirectedGraph(int(base_graph.number_of_nodes))
-
-        weights_list = [base_costs.copy()]
-
-        if bfs_distance is not None:
-            distance = int(bfs_distance)
-            if distance < 1:
-                raise ValueError("bfs_distance must be >= 1")
-            bfs_uvs = []
-            for source in range(int(base_graph.number_of_nodes)):
-                nodes, _ = breadth_first_search(
-                    base_graph,
-                    source,
-                    max_distance=distance,
-                    include_source=False,
-                )
-                if nodes.size == 0:
-                    continue
-                tail = nodes[nodes > source]
-                if tail.size == 0:
-                    continue
-                source_column = np.full(tail.size, source, dtype=np.uint64)
-                bfs_uvs.append(np.stack([source_column, tail], axis=1))
-            if bfs_uvs:
-                bfs_uv_array = np.ascontiguousarray(np.concatenate(bfs_uvs, axis=0))
-                _add_lifted_edges(
-                    lifted_graph,
-                    weights_list,
-                    bfs_uv_array,
-                    np.zeros(bfs_uv_array.shape[0], dtype=np.float64),
-                    overwrite_existing=overwrite_existing,
-                )
-
-        if lifted_uvs is not None or lifted_costs is not None:
-            if lifted_uvs is None or lifted_costs is None:
-                raise ValueError(
-                    "lifted_uvs and lifted_costs must be provided together"
-                )
-            uv_array = _as_uv_array(lifted_uvs, "lifted_uvs")
-            cost_array = np.asarray(lifted_costs, dtype=np.float64)
-            if cost_array.ndim != 1:
-                raise ValueError("lifted_costs must be a 1D array")
-            if cost_array.shape[0] != uv_array.shape[0]:
-                raise ValueError(
-                    "lifted_uvs and lifted_costs must have the same length, got "
-                    f"lifted_uvs.shape[0]={uv_array.shape[0]}, "
-                    f"lifted_costs.shape[0]={cost_array.shape[0]}"
-                )
-            _add_lifted_edges(
-                lifted_graph,
-                weights_list,
-                uv_array,
-                np.ascontiguousarray(cost_array),
-                overwrite_existing=overwrite_existing,
-            )
-
-        self._base_graph = base_graph
-        self._lifted_graph = lifted_graph
-        self._n_base_edges = int(base_graph.number_of_edges)
-        self._weights = np.ascontiguousarray(np.concatenate(weights_list)) \
-            if len(weights_list) > 1 else weights_list[0]
-        if initial_labels is None:
-            self._labels = np.arange(base_graph.number_of_nodes, dtype=np.uint64)
-        else:
-            self._labels = _as_node_labels(initial_labels, base_graph)
-
-    @property
-    def graph(self) -> UndirectedGraph:
-        return self._base_graph
-
-    @property
-    def lifted_graph(self) -> UndirectedGraph:
-        return self._lifted_graph
-
-    @property
-    def weights(self) -> np.ndarray:
-        return self._weights
-
-    @property
-    def number_of_base_edges(self) -> int:
-        return self._n_base_edges
-
-    @property
-    def number_of_lifted_edges(self) -> int:
-        return int(self._lifted_graph.number_of_edges) - self._n_base_edges
-
-    @property
-    def labels(self) -> np.ndarray:
-        return self._labels
-
-    @labels.setter
-    def labels(self, labels) -> None:
-        self._labels = _as_node_labels(labels, self._base_graph)
-
-    def set_labels(self, labels) -> None:
-        self.labels = labels
-
-    def reset_labels(self) -> None:
-        self._labels = np.arange(self._base_graph.number_of_nodes, dtype=np.uint64)
-
-    def set_cost(
-        self,
-        u: int,
-        v: int,
-        weight: float,
-        *,
-        overwrite: bool = False,
-    ) -> tuple[int, bool]:
-        """Insert or update a single lifted edge weight.
-
-        Returns ``(edge_id, is_new)`` — the lifted-graph edge id and whether a
-        new edge was inserted. If the edge already exists (as a base edge or a
-        previously inserted lifted edge), the weight is accumulated unless
-        ``overwrite=True``.
-        """
-        pre = int(self._lifted_graph.number_of_edges)
-        edge = int(self._lifted_graph.insert_edge(int(u), int(v)))
-        if int(self._lifted_graph.number_of_edges) > pre:
-            self._weights = np.concatenate(
-                [self._weights, np.asarray([float(weight)], dtype=np.float64)]
-            )
-            return edge, True
-        if overwrite:
-            self._weights[edge] = float(weight)
-        else:
-            self._weights[edge] = self._weights[edge] + float(weight)
-        return edge, False
-
-    def energy(self, labels=None) -> float:
-        label_array = (
-            self._labels if labels is None else _as_node_labels(labels, self._base_graph)
-        )
-        return float(
-            _core._lifted_multicut_energy(self._lifted_graph, self._weights, label_array)
-        )
-
-
-def _add_lifted_edges(
-    lifted_graph: UndirectedGraph,
-    weights_list: list[np.ndarray],
-    lifted_uvs: np.ndarray,
-    lifted_costs: np.ndarray,
-    *,
-    overwrite_existing: bool,
-) -> None:
-    """Insert lifted edges into an UndirectedGraph and update the weights list.
-
-    Mirrors the C++ ``build_lifted_graph`` semantics: brand-new edges append
-    their cost to ``weights_list``; existing edges (duplicates of a base edge
-    or of a previously inserted lifted edge) either overwrite or accumulate
-    their weight in place. ``weights_list`` is expected to hold the current
-    weights array as its single element on entry; on exit it contains either
-    one or two ndarray entries (the existing weights plus the new tail).
-    """
-    if lifted_uvs.shape[0] == 0:
-        return
-    # In-place updates require a single flat working buffer; coalesce first.
-    if len(weights_list) > 1:
-        weights_list[:] = [np.ascontiguousarray(np.concatenate(weights_list))]
-
-    # Fast path: bulk-insert and detect uniqueness from the row count delta.
-    # For the typical case — ``lifted_uvs`` produced by
-    # ``lifted_edges_from_affinities`` or by the BFS constructor — every row
-    # is a brand-new edge, so the delta equals the input length and we can
-    # append the weights array directly. No ``find_edges`` calls are needed.
-    if not overwrite_existing:
-        pre_count = int(lifted_graph.number_of_edges)
-        lifted_graph.insert_edges(lifted_uvs)
-        post_count = int(lifted_graph.number_of_edges)
-        n_new = post_count - pre_count
-
-        if n_new == lifted_uvs.shape[0]:
-            weights_list.append(
-                np.ascontiguousarray(lifted_costs.astype(np.float64, copy=False))
-            )
-            return
-
-        # Some rows collided with existing edges or with each other. Use
-        # find_edges to recover the per-row edge id (insertion is already
-        # done; this is just a lookup).
-        edge_ids = np.asarray(lifted_graph.find_edges(lifted_uvs))
-        lifted_costs_f64 = lifted_costs.astype(np.float64, copy=False)
-        working = weights_list[0]
-
-        collision_mask = edge_ids < pre_count
-        if collision_mask.any():
-            np.add.at(
-                working,
-                edge_ids[collision_mask].astype(np.intp, copy=False),
-                lifted_costs_f64[collision_mask],
-            )
-
-        new_mask = ~collision_mask
-        if new_mask.any():
-            slot = (edge_ids[new_mask] - pre_count).astype(np.int64, copy=False)
-            new_weights = np.bincount(
-                slot, weights=lifted_costs_f64[new_mask], minlength=n_new
-            ).astype(np.float64, copy=False)
-        else:
-            new_weights = np.zeros(n_new, dtype=np.float64)
-
-        weights_list[0] = working
-        if n_new > 0:
-            weights_list.append(new_weights)
-        return
-
-    # Slow path: per-row Python loop for ``overwrite_existing=True`` (rare).
-    # Order-sensitive last-write-wins semantics on collisions.
-    working = weights_list[0]
-    new_costs: list[float] = []
-    for index in range(lifted_uvs.shape[0]):
-        u = int(lifted_uvs[index, 0])
-        v = int(lifted_uvs[index, 1])
-        weight = float(lifted_costs[index])
-        pre = int(lifted_graph.number_of_edges)
-        edge = int(lifted_graph.insert_edge(u, v))
-        if int(lifted_graph.number_of_edges) > pre:
-            new_costs.append(weight)
-        else:
-            working[edge] = weight
-    if new_costs:
-        weights_list[0] = working
-        weights_list.append(np.asarray(new_costs, dtype=np.float64))
-    else:
-        weights_list[0] = working
-
-
-class LiftedMulticutSolver(ABC):
-    """Base class for lifted multicut solvers."""
-
-    @abstractmethod
-    def optimize(self, objective: LiftedMulticutObjective) -> np.ndarray:
-        """Optimize ``objective`` and return the node labeling."""
-
-
-class LiftedGreedyAdditiveMulticut(LiftedMulticutSolver):
-    def __init__(
-        self,
-        *,
-        weight_stop: float = 0.0,
-        node_num_stop: float = -1.0,
-        add_noise: bool = False,
-        seed: int = 42,
-        sigma: float = 1.0,
-    ):
-        self.weight_stop = float(weight_stop)
-        self.node_num_stop = float(node_num_stop)
-        self.add_noise = bool(add_noise)
-        self.seed = int(seed)
-        self.sigma = float(sigma)
-
-    def optimize(self, objective: LiftedMulticutObjective) -> np.ndarray:
-        labels = _core._lifted_multicut_greedy_additive(
-            objective.lifted_graph,
-            objective.weights,
-            objective.number_of_base_edges,
-            self.weight_stop,
-            self.node_num_stop,
-            self.add_noise,
-            self.seed,
-            self.sigma,
-        )
-        objective.labels = labels
-        return objective.labels
-
-    def _build_cpp_sub_solver(self):
-        return _core._GreedyAdditiveLiftedMulticutSubSolver(
-            weight_stop=self.weight_stop,
-            node_num_stop=self.node_num_stop,
-            add_noise=self.add_noise,
-            seed=self.seed,
-            sigma=self.sigma,
-        )
-
-
-class LiftedKernighanLinMulticut(LiftedMulticutSolver):
-    def __init__(
-        self,
-        *,
-        number_of_outer_iterations: int = 100,
-        epsilon: float = 1.0e-6,
-    ):
-        self.number_of_outer_iterations = int(number_of_outer_iterations)
-        if self.number_of_outer_iterations < 0:
-            raise ValueError("number_of_outer_iterations must be non-negative")
-        self.epsilon = float(epsilon)
-
-    def optimize(self, objective: LiftedMulticutObjective) -> np.ndarray:
-        initial_labels = objective.labels
-        if np.array_equal(
-            initial_labels,
-            np.arange(objective.graph.number_of_nodes, dtype=np.uint64),
-        ):
-            initial_labels = _core._lifted_multicut_greedy_additive(
-                objective.lifted_graph,
-                objective.weights,
-                objective.number_of_base_edges,
-                0.0,
-                -1.0,
-                False,
-                42,
-                1.0,
-            )
-        labels = _core._lifted_multicut_kernighan_lin(
-            objective.graph,
-            objective.lifted_graph,
-            objective.weights,
-            objective.number_of_base_edges,
-            initial_labels,
-            self.number_of_outer_iterations,
-            self.epsilon,
-        )
-        objective.labels = labels
-        return objective.labels
-
-    def _build_cpp_sub_solver(self):
-        return _core._KernighanLinLiftedMulticutSubSolver(
-            number_of_outer_iterations=self.number_of_outer_iterations,
-            epsilon=self.epsilon,
-        )
-
-
-class FusionMoveLiftedMulticut(LiftedMulticutSolver):
-    """Fusion-move lifted multicut solver.
-
-    Iteratively generates proposals via ``proposal_generator`` (which sees the
-    *base* graph and base edge costs), fuses them with the current best
-    labeling, and accepts improvements. Each fuse contracts the base graph by
-    agreement across the proposals, aggregates base + lifted weights onto the
-    contracted lifted-multicut subproblem, and dispatches to ``sub_solver``.
-    If ``sub_solver`` is omitted, the default sub-solver is
-    :class:`LiftedGreedyAdditiveMulticut`.
-
-    If the objective's current labels are the trivial singleton labeling, the
-    driver warm-starts with one lifted greedy-additive pass before the proposal
-    loop. The best-of safety net guarantees energy never increases across
-    iterations.
-
-    Threading: ``number_of_threads > 1`` runs ``number_of_parallel_proposals``
-    proposal generators in parallel within each iteration. Each parallel slot
-    uses an independent proposal generator with seed ``proposal_generator.seed
-    + slot_index``. By default ``number_of_parallel_proposals`` is ``2`` when
-    ``number_of_threads == 1`` and ``number_of_threads`` otherwise; pass it
-    explicitly to override.
-    """
-
-    def __init__(
-        self,
-        *,
-        proposal_generator: ProposalGenerator,
-        sub_solver: LiftedMulticutSolver | None = None,
-        number_of_iterations: int = 10,
-        stop_if_no_improvement: int = 4,
-        number_of_threads: int = 1,
-        number_of_parallel_proposals: int | None = None,
-    ):
-        if not isinstance(proposal_generator, ProposalGenerator):
-            raise TypeError("proposal_generator must inherit from ProposalGenerator")
-        if sub_solver is not None and not isinstance(sub_solver, LiftedMulticutSolver):
-            raise TypeError("sub_solver must inherit from LiftedMulticutSolver")
-        if sub_solver is not None and not hasattr(sub_solver, "_build_cpp_sub_solver"):
-            raise TypeError(
-                "sub_solver must be a built-in lifted multicut solver "
-                "(custom Python solvers are not supported as fusion-move sub-solvers)"
-            )
-        n_threads = int(number_of_threads)
-        if n_threads < 1:
-            raise ValueError("number_of_threads must be >= 1")
-        if number_of_parallel_proposals is None:
-            n_parallel = 2 if n_threads == 1 else n_threads
-        else:
-            n_parallel = int(number_of_parallel_proposals)
-        if n_parallel < 1:
-            raise ValueError("number_of_parallel_proposals must be >= 1")
-
-        self.proposal_generator = proposal_generator
-        self.sub_solver = sub_solver
-        self.number_of_iterations = int(number_of_iterations)
-        self.stop_if_no_improvement = int(stop_if_no_improvement)
-        self.number_of_threads = n_threads
-        self.number_of_parallel_proposals = n_parallel
-        if self.number_of_iterations < 0:
-            raise ValueError("number_of_iterations must be non-negative")
-        if self.stop_if_no_improvement < 1:
-            raise ValueError("stop_if_no_improvement must be >= 1")
-
-    def optimize(self, objective: LiftedMulticutObjective) -> np.ndarray:
-        n_base = objective.number_of_base_edges
-        # The base costs back the proposal generators (the lifted weights
-        # cannot drive base-graph contraction or watershed segmentation).
-        base_costs = np.ascontiguousarray(objective.weights[:n_base])
-        cpp_pgens = [
-            self.proposal_generator._build_for_thread(
-                objective.graph, base_costs, slot
-            )
-            for slot in range(self.number_of_parallel_proposals)
-        ]
-        cpp_sub_solver = (
-            None if self.sub_solver is None else self.sub_solver._build_cpp_sub_solver()
-        )
-        labels = _core._lifted_multicut_fusion_move(
-            objective.graph,
-            objective.lifted_graph,
-            objective.weights,
-            n_base,
-            objective.labels,
-            cpp_pgens,
-            cpp_sub_solver,
-            self.number_of_iterations,
-            self.stop_if_no_improvement,
-            self.number_of_threads,
-            self.number_of_parallel_proposals,
-        )
-        objective.labels = labels
-        return objective.labels
-
-
-class LiftedChainedSolvers(LiftedMulticutSolver):
-    """Chain of lifted multicut solvers run in sequence on the same objective.
-
-    Each solver's output labeling is fed to the next via the shared
-    :class:`LiftedMulticutObjective`. Typical use: ``[LiftedGreedyAdditiveMulticut(),
-    LiftedKernighanLinMulticut(...)]`` for a fast warm-start followed by a
-    local refinement.
-    """
-
-    def __init__(self, solvers):
-        self.solvers = list(solvers)
-        if len(self.solvers) == 0:
-            raise ValueError("solvers must contain at least one solver")
-        if not all(isinstance(solver, LiftedMulticutSolver) for solver in self.solvers):
-            raise TypeError("all solvers must inherit from LiftedMulticutSolver")
-
-    def optimize(self, objective: LiftedMulticutObjective) -> np.ndarray:
-        labels = objective.labels
-        for solver in self.solvers:
-            labels = solver.optimize(objective)
-        return labels
-
-
 def region_adjacency_graph(
     labels: np.ndarray,
     *,
@@ -1704,196 +345,10 @@ def region_adjacency_graph(
             f"labels must have one of dtypes ({supported}), got dtype={dtype}"
         ) from error
 
-    number_of_threads = int(number_of_threads)
-    if number_of_threads < 0:
-        raise ValueError("number_of_threads must be non-negative")
+    number_of_threads = _normalize_number_of_threads(number_of_threads)
     return run(np.ascontiguousarray(array), number_of_threads)
 
 
-def edge_map_features(
-    rag: RegionAdjacencyGraph,
-    labels: np.ndarray,
-    edge_map: np.ndarray,
-    *,
-    number_of_threads: int = 0,
-) -> np.ndarray:
-    """Compute mean and size features for edge-map values on RAG boundaries."""
-    return _accumulate_edge_map_features(
-        rag,
-        labels,
-        edge_map,
-        compute_complex_features=False,
-        number_of_threads=number_of_threads,
-    )
-
-
-def edge_map_features_complex(
-    rag: RegionAdjacencyGraph,
-    labels: np.ndarray,
-    edge_map: np.ndarray,
-    *,
-    number_of_threads: int = 0,
-) -> np.ndarray:
-    """Compute complex edge-map features on RAG boundaries.
-
-    The output columns are given by ``COMPLEX_EDGE_FEATURE_NAMES``.
-    """
-    return _accumulate_edge_map_features(
-        rag,
-        labels,
-        edge_map,
-        compute_complex_features=True,
-        number_of_threads=number_of_threads,
-    )
-
-
-def affinity_features(
-    rag: RegionAdjacencyGraph,
-    labels: np.ndarray,
-    affinities: np.ndarray,
-    offsets,
-    *,
-    number_of_threads: int = 0,
-) -> np.ndarray:
-    """Compute mean and size features for affinity links crossing RAG edges."""
-    return _accumulate_affinity_features(
-        rag,
-        labels,
-        affinities,
-        offsets,
-        compute_complex_features=False,
-        number_of_threads=number_of_threads,
-    )
-
-
-def affinity_features_complex(
-    rag: RegionAdjacencyGraph,
-    labels: np.ndarray,
-    affinities: np.ndarray,
-    offsets,
-    *,
-    number_of_threads: int = 0,
-) -> np.ndarray:
-    """Compute complex affinity features for links crossing RAG edges.
-
-    The output columns are given by ``COMPLEX_EDGE_FEATURE_NAMES``.
-    """
-    return _accumulate_affinity_features(
-        rag,
-        labels,
-        affinities,
-        offsets,
-        compute_complex_features=True,
-        number_of_threads=number_of_threads,
-    )
-
-
-def lifted_edges_from_affinities(
-    rag: RegionAdjacencyGraph,
-    labels: np.ndarray,
-    offsets,
-    *,
-    number_of_threads: int = 0,
-) -> np.ndarray:
-    """Discover lifted edges implied by long-range affinity offsets.
-
-    Walks every grid coordinate together with each long-range offset (1-hop
-    offsets are skipped automatically). When the labels at ``(p, p + offset)``
-    differ and ``(labels[p], labels[p + offset])`` is not already a local
-    edge of ``rag``, the pair is recorded as a lifted edge.
-
-    Parameters
-    ----------
-    rag:
-        :class:`RegionAdjacencyGraph` built from ``labels``.
-    labels:
-        2D or 3D label array. Supported dtypes: ``uint32``, ``uint64``,
-        ``int32``, ``int64``.
-    offsets:
-        Sequence of per-channel offsets. Each offset must have length equal
-        to ``labels.ndim``. Offsets with L1 norm ``<= 1`` are skipped, so
-        callers can pass the full offset list of an affinity volume without
-        pre-filtering.
-
-    Returns
-    -------
-    np.ndarray
-        ``(n_lifted, 2)`` ``uint64`` array of ``(u, v)`` pairs with
-        ``u < v``, sorted lexicographically.
-    """
-    label_array = _normalize_labels(labels)
-    if tuple(int(size) for size in rag.shape) != label_array.shape:
-        raise ValueError(
-            "rag shape must match labels shape, got "
-            f"rag shape={tuple(rag.shape)}, labels shape={label_array.shape}"
-        )
-
-    normalized_offsets = [tuple(int(value) for value in offset) for offset in offsets]
-    if any(len(offset) != label_array.ndim for offset in normalized_offsets):
-        raise ValueError("each offset must have length matching labels ndim")
-
-    run = _LIFTED_EDGES_FROM_AFFINITIES_BY_DTYPE[label_array.dtype]
-    return run(
-        rag,
-        label_array,
-        normalized_offsets,
-        _normalize_number_of_threads(number_of_threads),
-    )
-
-
-def lifted_affinity_features(
-    labels: np.ndarray,
-    affinities: np.ndarray,
-    offsets,
-    lifted_uvs,
-    *,
-    number_of_threads: int = 0,
-) -> np.ndarray:
-    """Compute mean and size features for affinity links across lifted edges.
-
-    Affinity values at pixel pairs ``(p, p + offset)`` whose labels match a
-    row of ``lifted_uvs`` are binned into that lifted edge. Pixel pairs that
-    fall on a non-lifted edge (or no edge at all) are silently skipped, so
-    a local edge that is also reachable by a long-range offset is not
-    contaminated by long-range affinities.
-
-    1-hop offsets are skipped automatically.
-
-    The returned array has shape ``(len(lifted_uvs), 2)`` with columns
-    ``SIMPLE_EDGE_FEATURE_NAMES`` (``mean``, ``size``).
-    """
-    return _accumulate_lifted_affinity_features(
-        labels,
-        affinities,
-        offsets,
-        lifted_uvs,
-        compute_complex_features=False,
-        number_of_threads=number_of_threads,
-    )
-
-
-def lifted_affinity_features_complex(
-    labels: np.ndarray,
-    affinities: np.ndarray,
-    offsets,
-    lifted_uvs,
-    *,
-    number_of_threads: int = 0,
-) -> np.ndarray:
-    """Complex affinity features for links across lifted edges.
-
-    Output columns: ``COMPLEX_EDGE_FEATURE_NAMES``.
-    """
-    return _accumulate_lifted_affinity_features(
-        labels,
-        affinities,
-        offsets,
-        lifted_uvs,
-        compute_complex_features=True,
-        number_of_threads=number_of_threads,
-    )
-
-
 def project_node_labels_to_pixels(
     rag: RegionAdjacencyGraph,
     labels: np.ndarray,
@@ -1930,186 +385,26 @@ def project_node_labels_to_pixels(
     )
 
 
-def _accumulate_edge_map_features(
-    rag: RegionAdjacencyGraph,
-    labels: np.ndarray,
-    edge_map: np.ndarray,
-    *,
-    compute_complex_features: bool,
-    number_of_threads: int,
-) -> np.ndarray:
-    label_array = _normalize_labels(labels)
-    edge_map_array = np.asarray(edge_map, dtype=np.float64)
-    if edge_map_array.shape != label_array.shape:
-        raise ValueError(
-            "edge_map shape must match labels shape, got "
-            f"edge_map shape={edge_map_array.shape}, labels shape={label_array.shape}"
-        )
-    run = _EDGE_MAP_FEATURES_BY_DTYPE[label_array.dtype]
-    return run(
-        rag,
-        label_array,
-        np.ascontiguousarray(edge_map_array),
-        bool(compute_complex_features),
-        _normalize_number_of_threads(number_of_threads),
-    )
-
-
-def _accumulate_affinity_features(
-    rag: RegionAdjacencyGraph,
-    labels: np.ndarray,
-    affinities: np.ndarray,
-    offsets,
-    *,
-    compute_complex_features: bool,
-    number_of_threads: int,
-) -> np.ndarray:
-    label_array = _normalize_labels(labels)
-    affinity_array = np.asarray(affinities, dtype=np.float64)
-    if affinity_array.ndim != label_array.ndim + 1:
-        raise ValueError("affinities must have shape (channels, *labels.shape)")
-    if affinity_array.shape[1:] != label_array.shape:
-        raise ValueError(
-            "affinities spatial shape must match labels shape, got "
-            f"affinities shape={affinity_array.shape}, labels shape={label_array.shape}"
-        )
-
-    normalized_offsets = [tuple(int(value) for value in offset) for offset in offsets]
-    if len(normalized_offsets) != affinity_array.shape[0]:
-        raise ValueError(
-            "offsets length must match affinities channel count, got "
-            f"offsets length={len(normalized_offsets)}, channels={affinity_array.shape[0]}"
-        )
-    if any(len(offset) != label_array.ndim for offset in normalized_offsets):
-        raise ValueError("each offset must have length matching labels ndim")
-
-    run = _AFFINITY_FEATURES_BY_DTYPE[label_array.dtype]
-    return run(
-        rag,
-        label_array,
-        np.ascontiguousarray(affinity_array),
-        normalized_offsets,
-        bool(compute_complex_features),
-        _normalize_number_of_threads(number_of_threads),
-    )
-
-
-def _accumulate_lifted_affinity_features(
-    labels: np.ndarray,
-    affinities: np.ndarray,
-    offsets,
-    lifted_uvs,
-    *,
-    compute_complex_features: bool,
-    number_of_threads: int,
-) -> np.ndarray:
-    label_array = _normalize_labels(labels)
-    affinity_array = np.asarray(affinities, dtype=np.float64)
-    if affinity_array.ndim != label_array.ndim + 1:
-        raise ValueError("affinities must have shape (channels, *labels.shape)")
-    if affinity_array.shape[1:] != label_array.shape:
-        raise ValueError(
-            "affinities spatial shape must match labels shape, got "
-            f"affinities shape={affinity_array.shape}, labels shape={label_array.shape}"
-        )
-
-    normalized_offsets = [tuple(int(value) for value in offset) for offset in offsets]
-    if len(normalized_offsets) != affinity_array.shape[0]:
-        raise ValueError(
-            "offsets length must match affinities channel count, got "
-            f"offsets length={len(normalized_offsets)}, channels={affinity_array.shape[0]}"
-        )
-    if any(len(offset) != label_array.ndim for offset in normalized_offsets):
-        raise ValueError("each offset must have length matching labels ndim")
-
-    lifted_uv_array = _as_uv_array(lifted_uvs, "lifted_uvs")
-
-    run = _LIFTED_AFFINITY_FEATURES_BY_DTYPE[label_array.dtype]
-    return run(
-        label_array,
-        np.ascontiguousarray(affinity_array),
-        normalized_offsets,
-        lifted_uv_array,
-        bool(compute_complex_features),
-        _normalize_number_of_threads(number_of_threads),
-    )
-
-
-def _normalize_labels(labels: np.ndarray) -> np.ndarray:
-    array = np.asarray(labels)
-    if array.ndim not in (2, 3):
-        raise ValueError(f"labels must be a 2D or 3D array, got ndim={array.ndim}")
-    try:
-        _REGION_ADJACENCY_GRAPH_BY_DTYPE[array.dtype]
-    except KeyError as error:
-        supported = ", ".join(
-            str(dtype) for dtype in _REGION_ADJACENCY_GRAPH_BY_DTYPE
-        )
-        raise TypeError(
-            f"labels must have one of dtypes ({supported}), got dtype={array.dtype}"
-        ) from error
-    return np.ascontiguousarray(array)
-
-
-def _normalize_number_of_threads(number_of_threads: int) -> int:
-    number_of_threads = int(number_of_threads)
-    if number_of_threads < 0:
-        raise ValueError("number_of_threads must be non-negative")
-    return number_of_threads
+from . import features  # noqa: E402  (must follow class/function definitions)
+from . import lifted_multicut  # noqa: E402
+from . import multicut  # noqa: E402
+from . import mutex_watershed  # noqa: E402
 
 
 __all__ = [
-    "ChainedMulticutSolvers",
-    "COMPLEX_EDGE_FEATURE_NAMES",
-    "DEFAULT_EXTERNAL_MULTICUT_PROBLEM_PATH",
-    "EXTERNAL_MULTICUT_PROBLEM_URL",
-    "FusionMoveLiftedMulticut",
-    "FusionMoveMulticut",
-    "GreedyAdditiveMulticut",
-    "GreedyAdditiveProposalGenerator",
-    "GreedyFixationMulticut",
     "GridGraph2D",
     "GridGraph3D",
-    "KernighanLinMulticut",
-    "LiftedChainedSolvers",
-    "LiftedGreedyAdditiveMulticut",
-    "LiftedKernighanLinMulticut",
-    "LiftedMulticutObjective",
-    "LiftedMulticutProblem",
-    "LiftedMulticutSolver",
-    "MulticutDecomposer",
-    "MulticutObjective",
-    "MulticutSolver",
-    "ProposalGenerator",
     "RegionAdjacencyGraph",
-    "SIMPLE_EDGE_FEATURE_NAMES",
     "UndirectedGraph",
-    "WatershedProposalGenerator",
-    "affinity_features",
-    "affinity_features_complex",
     "breadth_first_search",
     "connected_components",
-    "edge_map_features",
-    "edge_map_features_complex",
     "edge_weighted_watershed",
+    "features",
     "grid_graph",
-    "grid_affinity_features",
-    "grid_affinity_features_with_lifted",
-    "grid_boundary_features",
-    "lifted_affinity_features",
-    "lifted_affinity_features_complex",
-    "lifted_edges_from_affinities",
-    "external_multicut_problem_path",
-    "lifted_multicut_problem_path",
-    "load_external_multicut_problem",
-    "load_external_multicut_problem_data",
-    "load_lifted_multicut_problem",
-    "load_multicut_problem",
-    "load_multicut_problem_data",
-    "multicut_problem_path",
-    "mutex_watershed_clustering",
+    "lifted_multicut",
+    "multicut",
+    "mutex_watershed",
     "project_node_labels_to_pixels",
     "region_adjacency_graph",
-    "semantic_mutex_watershed_clustering",
     "undirected_graph",
 ]
diff --git a/src/bioimage_cpp/graph/_shared.py b/src/bioimage_cpp/graph/_shared.py
new file mode 100644
index 0000000..7c5ac39
--- /dev/null
+++ b/src/bioimage_cpp/graph/_shared.py
@@ -0,0 +1,235 @@
+"""Shared internal helpers for the ``bioimage_cpp.graph`` submodules.
+
+This module is private (every name is ``_``-prefixed). It holds the validators,
+shape helpers, and small graph utilities that are reused by more than one of
+``graph.multicut``, ``graph.lifted_multicut``, ``graph.mutex_watershed``,
+``graph.features``, and the core ``graph`` namespace.
+"""
+
+from __future__ import annotations
+
+import numpy as np
+
+from .. import _core
+
+
+_REGION_ADJACENCY_GRAPH_BY_DTYPE = {
+    np.dtype("uint32"): _core._region_adjacency_graph_uint32,
+    np.dtype("uint64"): _core._region_adjacency_graph_uint64,
+    np.dtype("int32"): _core._region_adjacency_graph_int32,
+    np.dtype("int64"): _core._region_adjacency_graph_int64,
+}
+
+
+_GRID_FLOAT_DTYPES = (np.dtype(np.float32), np.dtype(np.float64))
+
+
+def _as_shape(shape, ndim: int, name: str = "shape") -> list[int]:
+    array = np.asarray(shape)
+    if array.ndim != 1 or array.shape[0] != ndim:
+        raise ValueError(f"{name} must be a 1D sequence of length {ndim}")
+    if not np.issubdtype(array.dtype, np.integer):
+        raise TypeError(f"{name} must contain integers")
+    if np.any(array <= 0):
+        raise ValueError(f"{name} dimensions must be greater than zero")
+    return [int(axis_size) for axis_size in array]
+
+
+def _as_coordinate_array(coordinate, ndim: int, name: str) -> np.ndarray:
+    array = np.asarray(coordinate, dtype=np.uint64)
+    if array.ndim != 1 or array.shape[0] != ndim:
+        raise ValueError(f"{name} must be a 1D sequence of length {ndim}")
+    return np.ascontiguousarray(array)
+
+
+def _as_offset_array(offset, ndim: int, name: str) -> np.ndarray:
+    array = np.asarray(offset, dtype=np.int64)
+    if array.ndim != 1 or array.shape[0] != ndim:
+        raise ValueError(f"{name} must be a 1D sequence of length {ndim}")
+    return np.ascontiguousarray(array)
+
+
+def _as_uv_array(uvs, name: str) -> np.ndarray:
+    array = np.asarray(uvs, dtype=np.uint64)
+    if array.ndim != 2 or array.shape[1] != 2:
+        raise ValueError(f"{name} must have shape (n_edges, 2)")
+    return np.ascontiguousarray(array)
+
+
+def _as_node_array(nodes, name: str) -> np.ndarray:
+    array = np.asarray(nodes, dtype=np.uint64)
+    if array.ndim != 1:
+        raise ValueError(f"{name} must be a 1D array")
+    return np.ascontiguousarray(array)
+
+
+def _as_serialization_array(serialization) -> np.ndarray:
+    array = np.asarray(serialization, dtype=np.uint64)
+    if array.ndim != 1:
+        raise ValueError("serialization must be a 1D array")
+    if array.size < 2:
+        raise ValueError("serialization must have at least two entries")
+    number_of_edges = int(array[1])
+    if array.size != 2 + 2 * number_of_edges:
+        raise ValueError("serialization size must be 2 + 2 * number_of_edges")
+    return np.ascontiguousarray(array)
+
+
+def _copy_graph(graph) -> _core.UndirectedGraph:
+    # `uv_ids()` always returns a unique list (graphs deduplicate on insert),
+    # so we can use the bulk constructor that skips per-edge hash dedup —
+    # significantly faster than `insert_edges` for large graphs. The result
+    # is a ``_core.UndirectedGraph``; downstream code (objectives, solvers,
+    # validators) uses base-class methods that work identically.
+    if graph.number_of_edges == 0:
+        return _core.UndirectedGraph(int(graph.number_of_nodes))
+    return _core.UndirectedGraph.from_unique_edges(
+        int(graph.number_of_nodes), graph.uv_ids()
+    )
+
+
+def _as_edge_costs(edge_costs, graph) -> np.ndarray:
+    array = np.asarray(edge_costs, dtype=np.float64)
+    if array.ndim != 1:
+        raise ValueError("edge_costs must be a 1D array")
+    if array.shape[0] != graph.number_of_edges:
+        raise ValueError("edge_costs length must match graph number_of_edges")
+    return np.ascontiguousarray(array)
+
+
+def _as_node_labels(labels, graph) -> np.ndarray:
+    array = np.asarray(labels, dtype=np.uint64)
+    if array.ndim != 1:
+        raise ValueError("labels must be a 1D array")
+    if array.shape[0] != graph.number_of_nodes:
+        raise ValueError("labels length must match graph number_of_nodes")
+    return np.ascontiguousarray(array)
+
+
+def _as_1d_array(values, dtype, name: str, expected_size: int) -> np.ndarray:
+    array = np.asarray(values, dtype=dtype)
+    if array.ndim != 1:
+        raise ValueError(f"{name} must be a 1D array")
+    if array.shape[0] != expected_size:
+        raise ValueError(
+            f"{name} length must be {expected_size}, got {array.shape[0]}"
+        )
+    return np.ascontiguousarray(array)
+
+
+def _dense_labels(labels) -> np.ndarray:
+    labels = np.asarray(labels, dtype=np.uint64)
+    _, dense = np.unique(labels, return_inverse=True)
+    return np.ascontiguousarray(dense.astype(np.uint64, copy=False))
+
+
+def _subproblem_from_edges(number_of_nodes: int, nodes, uvs, edge_costs):
+    # Local import to avoid a circular dependency with the multicut submodule
+    # at module-load time (this helper is only called from the decomposer).
+    from . import UndirectedGraph
+
+    local_ids = np.full(int(number_of_nodes), -1, dtype=np.int64)
+    local_ids[nodes] = np.arange(nodes.size, dtype=np.int64)
+    local_uvs = local_ids[np.asarray(uvs, dtype=np.uint64)]
+    sub_graph = UndirectedGraph(int(nodes.size), int(len(edge_costs)))
+    if local_uvs.size:
+        sub_graph.insert_edges(np.ascontiguousarray(local_uvs.astype(np.uint64, copy=False)))
+    return sub_graph, np.ascontiguousarray(np.asarray(edge_costs, dtype=np.float64))
+
+
+def _normalize_labels(labels: np.ndarray) -> np.ndarray:
+    array = np.asarray(labels)
+    if array.ndim not in (2, 3):
+        raise ValueError(f"labels must be a 2D or 3D array, got ndim={array.ndim}")
+    try:
+        _REGION_ADJACENCY_GRAPH_BY_DTYPE[array.dtype]
+    except KeyError as error:
+        supported = ", ".join(
+            str(dtype) for dtype in _REGION_ADJACENCY_GRAPH_BY_DTYPE
+        )
+        raise TypeError(
+            f"labels must have one of dtypes ({supported}), got dtype={array.dtype}"
+        ) from error
+    return np.ascontiguousarray(array)
+
+
+def _normalize_number_of_threads(number_of_threads: int) -> int:
+    number_of_threads = int(number_of_threads)
+    if number_of_threads < 0:
+        raise ValueError("number_of_threads must be non-negative")
+    return number_of_threads
+
+
+def _resolve_weight_dtype(array, name: str) -> np.ndarray:
+    """Coerce a weights input to a supported floating dtype.
+
+    ``float32`` and ``float64`` pass through unchanged. Other floating
+    dtypes are cast to ``float32`` (matches the convention used by
+    :func:`bioimage_cpp.graph.edge_weighted_watershed`). Non-floating dtypes
+    raise.
+    """
+    array = np.asarray(array)
+    if array.dtype in (np.dtype("float32"), np.dtype("float64")):
+        return array
+    if np.issubdtype(array.dtype, np.floating):
+        return array.astype(np.float32, copy=False)
+    raise TypeError(
+        f"{name} must have a floating dtype (float32 or float64), got "
+        f"dtype={array.dtype}"
+    )
+
+
+def _grid_ndim(graph) -> int:
+    # Local import to avoid a circular dependency at module-load time.
+    from . import GridGraph2D, GridGraph3D
+
+    if isinstance(graph, GridGraph2D):
+        return 2
+    if isinstance(graph, GridGraph3D):
+        return 3
+    raise TypeError("graph must be a GridGraph2D or GridGraph3D")
+
+
+def _grid_shape(graph) -> tuple[int, ...]:
+    return tuple(int(size) for size in graph.shape)
+
+
+def _as_grid_data(values, graph, name: str, *, with_channels: bool) -> np.ndarray:
+    array = np.asarray(values)
+    if array.dtype not in _GRID_FLOAT_DTYPES:
+        # Integer / non-float input falls back to float64 — the previous default.
+        # float32 and float64 inputs are passed through end-to-end, no copy.
+        array = array.astype(np.float64)
+    shape = _grid_shape(graph)
+    if with_channels:
+        if array.ndim != len(shape) + 1 or array.shape[1:] != shape:
+            raise ValueError(
+                f"{name} must have shape (channels, *graph.shape), got "
+                f"shape={array.shape}, graph shape={shape}"
+            )
+    elif array.shape != shape:
+        raise ValueError(
+            f"{name} shape must match graph shape, got "
+            f"{name} shape={array.shape}, graph shape={shape}"
+        )
+    return np.ascontiguousarray(array)
+
+
+def _normalize_grid_offsets(offsets, ndim: int, n_channels: int) -> list[tuple[int, ...]]:
+    normalized = [tuple(int(value) for value in offset) for offset in offsets]
+    if len(normalized) != n_channels:
+        raise ValueError(
+            "offsets length must match affinities channel count, got "
+            f"offsets length={len(normalized)}, channels={n_channels}"
+        )
+    if any(len(offset) != ndim for offset in normalized):
+        raise ValueError("each offset must have length matching graph ndim")
+    if any(all(value == 0 for value in offset) for offset in normalized):
+        raise ValueError("offsets must not contain the zero offset")
+    return normalized
+
+
+def _grid_dtype_suffix(array: np.ndarray) -> str:
+    if array.dtype == np.float32:
+        return "float32"
+    return "float64"
diff --git a/src/bioimage_cpp/graph/features/__init__.py b/src/bioimage_cpp/graph/features/__init__.py
new file mode 100644
index 0000000..d3ca078
--- /dev/null
+++ b/src/bioimage_cpp/graph/features/__init__.py
@@ -0,0 +1,475 @@
+"""Edge-feature accumulation on graphs.
+
+Functions in this submodule compute edge-aligned feature vectors:
+
+- :func:`edge_map_features` / :func:`edge_map_features_complex` — features
+  derived from a scalar edge map on RAG boundaries.
+- :func:`affinity_features` / :func:`affinity_features_complex` — features
+  derived from affinity channels on RAG edges.
+- :func:`lifted_edges_from_affinities`, :func:`lifted_affinity_features`,
+  :func:`lifted_affinity_features_complex` — features for long-range
+  (lifted) edges discovered from affinity offsets.
+- :func:`grid_boundary_features`, :func:`grid_affinity_features`,
+  :func:`grid_affinity_features_with_lifted` — weights for nearest-neighbor
+  grid graphs (and optional long-range edges) computed directly from
+  boundary maps / affinity channels.
+"""
+
+from __future__ import annotations
+
+import numpy as np
+
+from .. import _core
+from .._shared import (
+    _as_grid_data,
+    _as_uv_array,
+    _grid_dtype_suffix,
+    _grid_ndim,
+    _normalize_grid_offsets,
+    _normalize_labels,
+    _normalize_number_of_threads,
+)
+
+
+SIMPLE_EDGE_FEATURE_NAMES = ("mean", "size")
+COMPLEX_EDGE_FEATURE_NAMES = (
+    "mean",
+    "median",
+    "std",
+    "min",
+    "max",
+    "p5",
+    "p10",
+    "p25",
+    "p75",
+    "p90",
+    "p95",
+    "size",
+)
+
+
+_EDGE_MAP_FEATURES_BY_DTYPE = {
+    np.dtype("uint32"): _core._accumulate_edge_map_features_uint32,
+    np.dtype("uint64"): _core._accumulate_edge_map_features_uint64,
+    np.dtype("int32"): _core._accumulate_edge_map_features_int32,
+    np.dtype("int64"): _core._accumulate_edge_map_features_int64,
+}
+
+_AFFINITY_FEATURES_BY_DTYPE = {
+    np.dtype("uint32"): _core._accumulate_affinity_features_uint32,
+    np.dtype("uint64"): _core._accumulate_affinity_features_uint64,
+    np.dtype("int32"): _core._accumulate_affinity_features_int32,
+    np.dtype("int64"): _core._accumulate_affinity_features_int64,
+}
+
+_LIFTED_EDGES_FROM_AFFINITIES_BY_DTYPE = {
+    np.dtype("uint32"): _core._lifted_edges_from_affinities_uint32,
+    np.dtype("uint64"): _core._lifted_edges_from_affinities_uint64,
+    np.dtype("int32"): _core._lifted_edges_from_affinities_int32,
+    np.dtype("int64"): _core._lifted_edges_from_affinities_int64,
+}
+
+_LIFTED_AFFINITY_FEATURES_BY_DTYPE = {
+    np.dtype("uint32"): _core._accumulate_lifted_affinity_features_uint32,
+    np.dtype("uint64"): _core._accumulate_lifted_affinity_features_uint64,
+    np.dtype("int32"): _core._accumulate_lifted_affinity_features_int32,
+    np.dtype("int64"): _core._accumulate_lifted_affinity_features_int64,
+}
+
+_GRID_BOUNDARY_DISPATCH = {
+    (2, "float32"): _core._grid_boundary_features_2d_float32,
+    (2, "float64"): _core._grid_boundary_features_2d_float64,
+    (3, "float32"): _core._grid_boundary_features_3d_float32,
+    (3, "float64"): _core._grid_boundary_features_3d_float64,
+}
+_GRID_AFFINITY_DISPATCH = {
+    (2, "float32"): _core._grid_affinity_features_2d_float32,
+    (2, "float64"): _core._grid_affinity_features_2d_float64,
+    (3, "float32"): _core._grid_affinity_features_3d_float32,
+    (3, "float64"): _core._grid_affinity_features_3d_float64,
+}
+_GRID_AFFINITY_LIFTED_DISPATCH = {
+    (2, "float32"): _core._grid_affinity_features_with_lifted_2d_float32,
+    (2, "float64"): _core._grid_affinity_features_with_lifted_2d_float64,
+    (3, "float32"): _core._grid_affinity_features_with_lifted_3d_float32,
+    (3, "float64"): _core._grid_affinity_features_with_lifted_3d_float64,
+}
+
+
+def edge_map_features(
+    rag,
+    labels: np.ndarray,
+    edge_map: np.ndarray,
+    *,
+    number_of_threads: int = 0,
+) -> np.ndarray:
+    """Compute mean and size features for edge-map values on RAG boundaries."""
+    return _accumulate_edge_map_features(
+        rag,
+        labels,
+        edge_map,
+        compute_complex_features=False,
+        number_of_threads=number_of_threads,
+    )
+
+
+def edge_map_features_complex(
+    rag,
+    labels: np.ndarray,
+    edge_map: np.ndarray,
+    *,
+    number_of_threads: int = 0,
+) -> np.ndarray:
+    """Compute complex edge-map features on RAG boundaries.
+
+    The output columns are given by :data:`COMPLEX_EDGE_FEATURE_NAMES`.
+    """
+    return _accumulate_edge_map_features(
+        rag,
+        labels,
+        edge_map,
+        compute_complex_features=True,
+        number_of_threads=number_of_threads,
+    )
+
+
+def affinity_features(
+    rag,
+    labels: np.ndarray,
+    affinities: np.ndarray,
+    offsets,
+    *,
+    number_of_threads: int = 0,
+) -> np.ndarray:
+    """Compute mean and size features for affinity links crossing RAG edges."""
+    return _accumulate_affinity_features(
+        rag,
+        labels,
+        affinities,
+        offsets,
+        compute_complex_features=False,
+        number_of_threads=number_of_threads,
+    )
+
+
+def affinity_features_complex(
+    rag,
+    labels: np.ndarray,
+    affinities: np.ndarray,
+    offsets,
+    *,
+    number_of_threads: int = 0,
+) -> np.ndarray:
+    """Compute complex affinity features for links crossing RAG edges.
+
+    The output columns are given by :data:`COMPLEX_EDGE_FEATURE_NAMES`.
+    """
+    return _accumulate_affinity_features(
+        rag,
+        labels,
+        affinities,
+        offsets,
+        compute_complex_features=True,
+        number_of_threads=number_of_threads,
+    )
+
+
+def lifted_edges_from_affinities(
+    rag,
+    labels: np.ndarray,
+    offsets,
+    *,
+    number_of_threads: int = 0,
+) -> np.ndarray:
+    """Discover lifted edges implied by long-range affinity offsets.
+
+    Walks every grid coordinate together with each long-range offset (1-hop
+    offsets are skipped automatically). When the labels at ``(p, p + offset)``
+    differ and ``(labels[p], labels[p + offset])`` is not already a local
+    edge of ``rag``, the pair is recorded as a lifted edge.
+
+    Parameters
+    ----------
+    rag:
+        :class:`bioimage_cpp.graph.RegionAdjacencyGraph` built from ``labels``.
+    labels:
+        2D or 3D label array. Supported dtypes: ``uint32``, ``uint64``,
+        ``int32``, ``int64``.
+    offsets:
+        Sequence of per-channel offsets. Each offset must have length equal
+        to ``labels.ndim``. Offsets with L1 norm ``<= 1`` are skipped, so
+        callers can pass the full offset list of an affinity volume without
+        pre-filtering.
+
+    Returns
+    -------
+    np.ndarray
+        ``(n_lifted, 2)`` ``uint64`` array of ``(u, v)`` pairs with
+        ``u < v``, sorted lexicographically.
+    """
+    label_array = _normalize_labels(labels)
+    if tuple(int(size) for size in rag.shape) != label_array.shape:
+        raise ValueError(
+            "rag shape must match labels shape, got "
+            f"rag shape={tuple(rag.shape)}, labels shape={label_array.shape}"
+        )
+
+    normalized_offsets = [tuple(int(value) for value in offset) for offset in offsets]
+    if any(len(offset) != label_array.ndim for offset in normalized_offsets):
+        raise ValueError("each offset must have length matching labels ndim")
+
+    run = _LIFTED_EDGES_FROM_AFFINITIES_BY_DTYPE[label_array.dtype]
+    return run(
+        rag,
+        label_array,
+        normalized_offsets,
+        _normalize_number_of_threads(number_of_threads),
+    )
+
+
+def lifted_affinity_features(
+    labels: np.ndarray,
+    affinities: np.ndarray,
+    offsets,
+    lifted_uvs,
+    *,
+    number_of_threads: int = 0,
+) -> np.ndarray:
+    """Compute mean and size features for affinity links across lifted edges.
+
+    Affinity values at pixel pairs ``(p, p + offset)`` whose labels match a
+    row of ``lifted_uvs`` are binned into that lifted edge. Pixel pairs that
+    fall on a non-lifted edge (or no edge at all) are silently skipped, so
+    a local edge that is also reachable by a long-range offset is not
+    contaminated by long-range affinities.
+
+    1-hop offsets are skipped automatically.
+
+    The returned array has shape ``(len(lifted_uvs), 2)`` with columns
+    :data:`SIMPLE_EDGE_FEATURE_NAMES` (``mean``, ``size``).
+    """
+    return _accumulate_lifted_affinity_features(
+        labels,
+        affinities,
+        offsets,
+        lifted_uvs,
+        compute_complex_features=False,
+        number_of_threads=number_of_threads,
+    )
+
+
+def lifted_affinity_features_complex(
+    labels: np.ndarray,
+    affinities: np.ndarray,
+    offsets,
+    lifted_uvs,
+    *,
+    number_of_threads: int = 0,
+) -> np.ndarray:
+    """Complex affinity features for links across lifted edges.
+
+    Output columns: :data:`COMPLEX_EDGE_FEATURE_NAMES`.
+    """
+    return _accumulate_lifted_affinity_features(
+        labels,
+        affinities,
+        offsets,
+        lifted_uvs,
+        compute_complex_features=True,
+        number_of_threads=number_of_threads,
+    )
+
+
+def grid_boundary_features(graph, boundary_map) -> np.ndarray:
+    """Compute scalar nearest-neighbor grid edge weights from a boundary map.
+
+    The output is a 1D array aligned to ``graph.edges()``. Output dtype matches
+    the input: ``float32`` and ``float64`` inputs are processed without copying,
+    other dtypes are promoted to ``float64``. Each edge receives the average of
+    the two endpoint boundary-map values.
+    """
+    ndim = _grid_ndim(graph)
+    boundary_array = _as_grid_data(
+        boundary_map, graph, "boundary_map", with_channels=False
+    )
+    return _GRID_BOUNDARY_DISPATCH[(ndim, _grid_dtype_suffix(boundary_array))](
+        graph, boundary_array
+    )
+
+
+def grid_affinity_features(graph, affinities, offsets) -> tuple[np.ndarray, np.ndarray]:
+    """Map local affinity channels to grid graph edge weights.
+
+    Only nearest-neighbor offsets with L1 norm 1 are accepted. The returned
+    tuple is ``(edge_weights, valid_edges)``, both aligned to ``graph.edges()``.
+    ``edge_weights`` has the same dtype as ``affinities`` (``float32`` or
+    ``float64``; other dtypes are promoted to ``float64``). ``valid_edges`` is
+    boolean and marks local graph edges covered by offsets.
+    """
+    ndim = _grid_ndim(graph)
+    affinity_array = _as_grid_data(
+        affinities, graph, "affinities", with_channels=True
+    )
+    normalized_offsets = _normalize_grid_offsets(
+        offsets, ndim, int(affinity_array.shape[0])
+    )
+    weights, valid = _GRID_AFFINITY_DISPATCH[
+        (ndim, _grid_dtype_suffix(affinity_array))
+    ](graph, affinity_array, normalized_offsets)
+    return weights, valid.astype(bool, copy=False)
+
+
+def grid_affinity_features_with_lifted(
+    graph,
+    affinities,
+    offsets,
+) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    """Map local affinities and emit explicit long-range grid edges.
+
+    Returns ``(local_weights, valid_local_edges, lifted_uvs, lifted_weights,
+    lifted_offset_ids)``. Local arrays are aligned to ``graph.edges()``.
+    Long-range arrays have one row/value per valid offset hit and are suitable
+    for lifted multicut or mutex-watershed style callers. Weight arrays share
+    the dtype of ``affinities`` (``float32`` or ``float64``; other dtypes are
+    promoted to ``float64``).
+    """
+    ndim = _grid_ndim(graph)
+    affinity_array = _as_grid_data(
+        affinities, graph, "affinities", with_channels=True
+    )
+    normalized_offsets = _normalize_grid_offsets(
+        offsets, ndim, int(affinity_array.shape[0])
+    )
+    local_weights, valid, lifted_uvs, lifted_weights, lifted_offset_ids = (
+        _GRID_AFFINITY_LIFTED_DISPATCH[
+            (ndim, _grid_dtype_suffix(affinity_array))
+        ](graph, affinity_array, normalized_offsets)
+    )
+    return (
+        local_weights,
+        valid.astype(bool, copy=False),
+        lifted_uvs,
+        lifted_weights,
+        lifted_offset_ids,
+    )
+
+
+def _accumulate_edge_map_features(
+    rag,
+    labels: np.ndarray,
+    edge_map: np.ndarray,
+    *,
+    compute_complex_features: bool,
+    number_of_threads: int,
+) -> np.ndarray:
+    label_array = _normalize_labels(labels)
+    edge_map_array = np.asarray(edge_map, dtype=np.float64)
+    if edge_map_array.shape != label_array.shape:
+        raise ValueError(
+            "edge_map shape must match labels shape, got "
+            f"edge_map shape={edge_map_array.shape}, labels shape={label_array.shape}"
+        )
+    run = _EDGE_MAP_FEATURES_BY_DTYPE[label_array.dtype]
+    return run(
+        rag,
+        label_array,
+        np.ascontiguousarray(edge_map_array),
+        bool(compute_complex_features),
+        _normalize_number_of_threads(number_of_threads),
+    )
+
+
+def _accumulate_affinity_features(
+    rag,
+    labels: np.ndarray,
+    affinities: np.ndarray,
+    offsets,
+    *,
+    compute_complex_features: bool,
+    number_of_threads: int,
+) -> np.ndarray:
+    label_array = _normalize_labels(labels)
+    affinity_array = np.asarray(affinities, dtype=np.float64)
+    if affinity_array.ndim != label_array.ndim + 1:
+        raise ValueError("affinities must have shape (channels, *labels.shape)")
+    if affinity_array.shape[1:] != label_array.shape:
+        raise ValueError(
+            "affinities spatial shape must match labels shape, got "
+            f"affinities shape={affinity_array.shape}, labels shape={label_array.shape}"
+        )
+
+    normalized_offsets = [tuple(int(value) for value in offset) for offset in offsets]
+    if len(normalized_offsets) != affinity_array.shape[0]:
+        raise ValueError(
+            "offsets length must match affinities channel count, got "
+            f"offsets length={len(normalized_offsets)}, channels={affinity_array.shape[0]}"
+        )
+    if any(len(offset) != label_array.ndim for offset in normalized_offsets):
+        raise ValueError("each offset must have length matching labels ndim")
+
+    run = _AFFINITY_FEATURES_BY_DTYPE[label_array.dtype]
+    return run(
+        rag,
+        label_array,
+        np.ascontiguousarray(affinity_array),
+        normalized_offsets,
+        bool(compute_complex_features),
+        _normalize_number_of_threads(number_of_threads),
+    )
+
+
+def _accumulate_lifted_affinity_features(
+    labels: np.ndarray,
+    affinities: np.ndarray,
+    offsets,
+    lifted_uvs,
+    *,
+    compute_complex_features: bool,
+    number_of_threads: int,
+) -> np.ndarray:
+    label_array = _normalize_labels(labels)
+    affinity_array = np.asarray(affinities, dtype=np.float64)
+    if affinity_array.ndim != label_array.ndim + 1:
+        raise ValueError("affinities must have shape (channels, *labels.shape)")
+    if affinity_array.shape[1:] != label_array.shape:
+        raise ValueError(
+            "affinities spatial shape must match labels shape, got "
+            f"affinities shape={affinity_array.shape}, labels shape={label_array.shape}"
+        )
+
+    normalized_offsets = [tuple(int(value) for value in offset) for offset in offsets]
+    if len(normalized_offsets) != affinity_array.shape[0]:
+        raise ValueError(
+            "offsets length must match affinities channel count, got "
+            f"offsets length={len(normalized_offsets)}, channels={affinity_array.shape[0]}"
+        )
+    if any(len(offset) != label_array.ndim for offset in normalized_offsets):
+        raise ValueError("each offset must have length matching labels ndim")
+
+    lifted_uv_array = _as_uv_array(lifted_uvs, "lifted_uvs")
+
+    run = _LIFTED_AFFINITY_FEATURES_BY_DTYPE[label_array.dtype]
+    return run(
+        label_array,
+        np.ascontiguousarray(affinity_array),
+        normalized_offsets,
+        lifted_uv_array,
+        bool(compute_complex_features),
+        _normalize_number_of_threads(number_of_threads),
+    )
+
+
+__all__ = [
+    "COMPLEX_EDGE_FEATURE_NAMES",
+    "SIMPLE_EDGE_FEATURE_NAMES",
+    "affinity_features",
+    "affinity_features_complex",
+    "edge_map_features",
+    "edge_map_features_complex",
+    "grid_affinity_features",
+    "grid_affinity_features_with_lifted",
+    "grid_boundary_features",
+    "lifted_affinity_features",
+    "lifted_affinity_features_complex",
+    "lifted_edges_from_affinities",
+]
diff --git a/src/bioimage_cpp/graph/lifted_multicut/__init__.py b/src/bioimage_cpp/graph/lifted_multicut/__init__.py
new file mode 100644
index 0000000..0b52145
--- /dev/null
+++ b/src/bioimage_cpp/graph/lifted_multicut/__init__.py
@@ -0,0 +1,560 @@
+"""Lifted multicut objective and solvers.
+
+The lifted multicut problem extends the standard multicut with a second set
+of *lifted* edges that do not have to be present in the base graph but
+contribute to the energy via their cut status. This submodule exposes:
+
+- :class:`LiftedMulticutObjective` — base graph + base costs + lifted edges +
+  working labeling.
+- :class:`LiftedMulticutSolver` and the concrete solvers
+  :class:`LiftedGreedyAdditiveMulticut`, :class:`LiftedKernighanLinMulticut`,
+  :class:`FusionMoveLiftedMulticut`, :class:`LiftedChainedSolvers`.
+- :class:`LiftedMulticutProblem` and loaders for the benchmark lifted multicut
+  problem instances.
+- :class:`ProposalGenerator`, :class:`WatershedProposalGenerator`,
+  :class:`GreedyAdditiveProposalGenerator` re-exported from
+  :mod:`bioimage_cpp.graph.multicut` (the lifted fusion-move solver consumes
+  them).
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+
+import numpy as np
+
+from .. import _core
+from .._external import (
+    LiftedMulticutProblem,
+    lifted_multicut_problem_path,
+    load_lifted_multicut_problem,
+)
+from .._shared import (
+    _as_edge_costs,
+    _as_node_labels,
+    _as_uv_array,
+)
+from ..multicut import (
+    GreedyAdditiveProposalGenerator,
+    ProposalGenerator,
+    WatershedProposalGenerator,
+)
+
+
+class LiftedMulticutObjective:
+    """Lifted multicut objective.
+
+    Stores a base graph + base edge costs together with an internal *lifted
+    graph* that is a superset of the base graph (base edges occupy ids
+    ``0 .. base.number_of_edges - 1``; lifted edges follow). The energy of a
+    node labeling is the sum of base + lifted edge weights across cut edges.
+
+    The lifted edges can be supplied either as explicit ``(uvs, costs)``
+    arrays, via a ``bfs_distance=k`` constructor argument that inserts a
+    zero-weight lifted edge for every pair of nodes within ``k`` hops of each
+    other in the base graph, or by calling :meth:`set_cost` after construction.
+    """
+
+    def __init__(
+        self,
+        graph,
+        edge_costs,
+        *,
+        lifted_uvs=None,
+        lifted_costs=None,
+        bfs_distance: int | None = None,
+        overwrite_existing: bool = False,
+        initial_labels=None,
+    ):
+        # Local import to avoid a circular dependency at module-load time
+        # (``breadth_first_search`` lives in the parent ``bioimage_cpp.graph``
+        # namespace which imports this submodule).
+        from .. import breadth_first_search
+
+        # The objective holds a reference to the user's base graph (no
+        # defensive copy — the C++ ``Objective`` already keeps a const
+        # reference). The user is expected to treat the input graph as
+        # immutable while the objective is alive; mutations are visible to
+        # the objective and may produce undefined behaviour.
+        base_graph = graph
+        base_costs = _as_edge_costs(edge_costs, base_graph)
+
+        # Use the bulk constructor for the lifted graph's base portion to
+        # bypass the per-edge hash dedup that ``insert_edges`` performs.
+        if int(base_graph.number_of_edges) > 0:
+            lifted_graph = _core.UndirectedGraph.from_unique_edges(
+                int(base_graph.number_of_nodes), base_graph.uv_ids()
+            )
+        else:
+            lifted_graph = _core.UndirectedGraph(int(base_graph.number_of_nodes))
+
+        weights_list = [base_costs.copy()]
+
+        if bfs_distance is not None:
+            distance = int(bfs_distance)
+            if distance < 1:
+                raise ValueError("bfs_distance must be >= 1")
+            bfs_uvs = []
+            for source in range(int(base_graph.number_of_nodes)):
+                nodes, _ = breadth_first_search(
+                    base_graph,
+                    source,
+                    max_distance=distance,
+                    include_source=False,
+                )
+                if nodes.size == 0:
+                    continue
+                tail = nodes[nodes > source]
+                if tail.size == 0:
+                    continue
+                source_column = np.full(tail.size, source, dtype=np.uint64)
+                bfs_uvs.append(np.stack([source_column, tail], axis=1))
+            if bfs_uvs:
+                bfs_uv_array = np.ascontiguousarray(np.concatenate(bfs_uvs, axis=0))
+                _add_lifted_edges(
+                    lifted_graph,
+                    weights_list,
+                    bfs_uv_array,
+                    np.zeros(bfs_uv_array.shape[0], dtype=np.float64),
+                    overwrite_existing=overwrite_existing,
+                )
+
+        if lifted_uvs is not None or lifted_costs is not None:
+            if lifted_uvs is None or lifted_costs is None:
+                raise ValueError(
+                    "lifted_uvs and lifted_costs must be provided together"
+                )
+            uv_array = _as_uv_array(lifted_uvs, "lifted_uvs")
+            cost_array = np.asarray(lifted_costs, dtype=np.float64)
+            if cost_array.ndim != 1:
+                raise ValueError("lifted_costs must be a 1D array")
+            if cost_array.shape[0] != uv_array.shape[0]:
+                raise ValueError(
+                    "lifted_uvs and lifted_costs must have the same length, got "
+                    f"lifted_uvs.shape[0]={uv_array.shape[0]}, "
+                    f"lifted_costs.shape[0]={cost_array.shape[0]}"
+                )
+            _add_lifted_edges(
+                lifted_graph,
+                weights_list,
+                uv_array,
+                np.ascontiguousarray(cost_array),
+                overwrite_existing=overwrite_existing,
+            )
+
+        self._base_graph = base_graph
+        self._lifted_graph = lifted_graph
+        self._n_base_edges = int(base_graph.number_of_edges)
+        self._weights = np.ascontiguousarray(np.concatenate(weights_list)) \
+            if len(weights_list) > 1 else weights_list[0]
+        if initial_labels is None:
+            self._labels = np.arange(base_graph.number_of_nodes, dtype=np.uint64)
+        else:
+            self._labels = _as_node_labels(initial_labels, base_graph)
+
+    @property
+    def graph(self):
+        return self._base_graph
+
+    @property
+    def lifted_graph(self):
+        return self._lifted_graph
+
+    @property
+    def weights(self) -> np.ndarray:
+        return self._weights
+
+    @property
+    def number_of_base_edges(self) -> int:
+        return self._n_base_edges
+
+    @property
+    def number_of_lifted_edges(self) -> int:
+        return int(self._lifted_graph.number_of_edges) - self._n_base_edges
+
+    @property
+    def labels(self) -> np.ndarray:
+        return self._labels
+
+    @labels.setter
+    def labels(self, labels) -> None:
+        self._labels = _as_node_labels(labels, self._base_graph)
+
+    def set_labels(self, labels) -> None:
+        self.labels = labels
+
+    def reset_labels(self) -> None:
+        self._labels = np.arange(self._base_graph.number_of_nodes, dtype=np.uint64)
+
+    def set_cost(
+        self,
+        u: int,
+        v: int,
+        weight: float,
+        *,
+        overwrite: bool = False,
+    ) -> tuple[int, bool]:
+        """Insert or update a single lifted edge weight.
+
+        Returns ``(edge_id, is_new)`` — the lifted-graph edge id and whether a
+        new edge was inserted. If the edge already exists (as a base edge or a
+        previously inserted lifted edge), the weight is accumulated unless
+        ``overwrite=True``.
+        """
+        pre = int(self._lifted_graph.number_of_edges)
+        edge = int(self._lifted_graph.insert_edge(int(u), int(v)))
+        if int(self._lifted_graph.number_of_edges) > pre:
+            self._weights = np.concatenate(
+                [self._weights, np.asarray([float(weight)], dtype=np.float64)]
+            )
+            return edge, True
+        if overwrite:
+            self._weights[edge] = float(weight)
+        else:
+            self._weights[edge] = self._weights[edge] + float(weight)
+        return edge, False
+
+    def energy(self, labels=None) -> float:
+        label_array = (
+            self._labels if labels is None else _as_node_labels(labels, self._base_graph)
+        )
+        return float(
+            _core._lifted_multicut_energy(self._lifted_graph, self._weights, label_array)
+        )
+
+
+def _add_lifted_edges(
+    lifted_graph,
+    weights_list: list[np.ndarray],
+    lifted_uvs: np.ndarray,
+    lifted_costs: np.ndarray,
+    *,
+    overwrite_existing: bool,
+) -> None:
+    """Insert lifted edges into an UndirectedGraph and update the weights list.
+
+    Mirrors the C++ ``build_lifted_graph`` semantics: brand-new edges append
+    their cost to ``weights_list``; existing edges (duplicates of a base edge
+    or of a previously inserted lifted edge) either overwrite or accumulate
+    their weight in place. ``weights_list`` is expected to hold the current
+    weights array as its single element on entry; on exit it contains either
+    one or two ndarray entries (the existing weights plus the new tail).
+    """
+    if lifted_uvs.shape[0] == 0:
+        return
+    # In-place updates require a single flat working buffer; coalesce first.
+    if len(weights_list) > 1:
+        weights_list[:] = [np.ascontiguousarray(np.concatenate(weights_list))]
+
+    # Fast path: bulk-insert and detect uniqueness from the row count delta.
+    # For the typical case — ``lifted_uvs`` produced by
+    # ``lifted_edges_from_affinities`` or by the BFS constructor — every row
+    # is a brand-new edge, so the delta equals the input length and we can
+    # append the weights array directly. No ``find_edges`` calls are needed.
+    if not overwrite_existing:
+        pre_count = int(lifted_graph.number_of_edges)
+        lifted_graph.insert_edges(lifted_uvs)
+        post_count = int(lifted_graph.number_of_edges)
+        n_new = post_count - pre_count
+
+        if n_new == lifted_uvs.shape[0]:
+            weights_list.append(
+                np.ascontiguousarray(lifted_costs.astype(np.float64, copy=False))
+            )
+            return
+
+        # Some rows collided with existing edges or with each other. Use
+        # find_edges to recover the per-row edge id (insertion is already
+        # done; this is just a lookup).
+        edge_ids = np.asarray(lifted_graph.find_edges(lifted_uvs))
+        lifted_costs_f64 = lifted_costs.astype(np.float64, copy=False)
+        working = weights_list[0]
+
+        collision_mask = edge_ids < pre_count
+        if collision_mask.any():
+            np.add.at(
+                working,
+                edge_ids[collision_mask].astype(np.intp, copy=False),
+                lifted_costs_f64[collision_mask],
+            )
+
+        new_mask = ~collision_mask
+        if new_mask.any():
+            slot = (edge_ids[new_mask] - pre_count).astype(np.int64, copy=False)
+            new_weights = np.bincount(
+                slot, weights=lifted_costs_f64[new_mask], minlength=n_new
+            ).astype(np.float64, copy=False)
+        else:
+            new_weights = np.zeros(n_new, dtype=np.float64)
+
+        weights_list[0] = working
+        if n_new > 0:
+            weights_list.append(new_weights)
+        return
+
+    # Slow path: per-row Python loop for ``overwrite_existing=True`` (rare).
+    # Order-sensitive last-write-wins semantics on collisions.
+    working = weights_list[0]
+    new_costs: list[float] = []
+    for index in range(lifted_uvs.shape[0]):
+        u = int(lifted_uvs[index, 0])
+        v = int(lifted_uvs[index, 1])
+        weight = float(lifted_costs[index])
+        pre = int(lifted_graph.number_of_edges)
+        edge = int(lifted_graph.insert_edge(u, v))
+        if int(lifted_graph.number_of_edges) > pre:
+            new_costs.append(weight)
+        else:
+            working[edge] = weight
+    if new_costs:
+        weights_list[0] = working
+        weights_list.append(np.asarray(new_costs, dtype=np.float64))
+    else:
+        weights_list[0] = working
+
+
+class LiftedMulticutSolver(ABC):
+    """Base class for lifted multicut solvers."""
+
+    @abstractmethod
+    def optimize(self, objective: LiftedMulticutObjective) -> np.ndarray:
+        """Optimize ``objective`` and return the node labeling."""
+
+
+class LiftedGreedyAdditiveMulticut(LiftedMulticutSolver):
+    """Greedy additive edge contraction (GAEC) lifted multicut solver.
+
+    Introduced in "An efficient fusion move algorithm for the minimum cost
+    lifted multicut problem":
+    https://hci.iwr.uni-heidelberg.de/sites/default/files/publications/files/1939997197/beier_16_efficient.pdf
+    """
+
+    def __init__(
+        self,
+        *,
+        weight_stop: float = 0.0,
+        node_num_stop: float = -1.0,
+        add_noise: bool = False,
+        seed: int = 42,
+        sigma: float = 1.0,
+    ):
+        self.weight_stop = float(weight_stop)
+        self.node_num_stop = float(node_num_stop)
+        self.add_noise = bool(add_noise)
+        self.seed = int(seed)
+        self.sigma = float(sigma)
+
+    def optimize(self, objective: LiftedMulticutObjective) -> np.ndarray:
+        labels = _core._lifted_multicut_greedy_additive(
+            objective.lifted_graph,
+            objective.weights,
+            objective.number_of_base_edges,
+            self.weight_stop,
+            self.node_num_stop,
+            self.add_noise,
+            self.seed,
+            self.sigma,
+        )
+        objective.labels = labels
+        return objective.labels
+
+    def _build_cpp_sub_solver(self):
+        return _core._GreedyAdditiveLiftedMulticutSubSolver(
+            weight_stop=self.weight_stop,
+            node_num_stop=self.node_num_stop,
+            add_noise=self.add_noise,
+            seed=self.seed,
+            sigma=self.sigma,
+        )
+
+
+class LiftedKernighanLinMulticut(LiftedMulticutSolver):
+    """Kernighan-Lin lifted multicut solver.
+
+    Introduced in "Efficient decomposition of image and mesh graphs by lifted
+    multicuts":
+    http://openaccess.thecvf.com/content_iccv_2015/papers/Keuper_Efficient_Decomposition_of_ICCV_2015_paper.pdf
+    """
+
+    def __init__(
+        self,
+        *,
+        number_of_outer_iterations: int = 100,
+        epsilon: float = 1.0e-6,
+    ):
+        self.number_of_outer_iterations = int(number_of_outer_iterations)
+        if self.number_of_outer_iterations < 0:
+            raise ValueError("number_of_outer_iterations must be non-negative")
+        self.epsilon = float(epsilon)
+
+    def optimize(self, objective: LiftedMulticutObjective) -> np.ndarray:
+        initial_labels = objective.labels
+        if np.array_equal(
+            initial_labels,
+            np.arange(objective.graph.number_of_nodes, dtype=np.uint64),
+        ):
+            initial_labels = _core._lifted_multicut_greedy_additive(
+                objective.lifted_graph,
+                objective.weights,
+                objective.number_of_base_edges,
+                0.0,
+                -1.0,
+                False,
+                42,
+                1.0,
+            )
+        labels = _core._lifted_multicut_kernighan_lin(
+            objective.graph,
+            objective.lifted_graph,
+            objective.weights,
+            objective.number_of_base_edges,
+            initial_labels,
+            self.number_of_outer_iterations,
+            self.epsilon,
+        )
+        objective.labels = labels
+        return objective.labels
+
+    def _build_cpp_sub_solver(self):
+        return _core._KernighanLinLiftedMulticutSubSolver(
+            number_of_outer_iterations=self.number_of_outer_iterations,
+            epsilon=self.epsilon,
+        )
+
+
+class FusionMoveLiftedMulticut(LiftedMulticutSolver):
+    """Fusion-move lifted multicut solver.
+
+    Introduced in "An efficient fusion move algorithm for the minimum cost
+    lifted multicut problem":
+    https://hci.iwr.uni-heidelberg.de/sites/default/files/publications/files/1939997197/beier_16_efficient.pdf
+
+    Iteratively generates proposals via ``proposal_generator`` (which sees the
+    *base* graph and base edge costs), fuses them with the current best
+    labeling, and accepts improvements. Each fuse contracts the base graph by
+    agreement across the proposals, aggregates base + lifted weights onto the
+    contracted lifted-multicut subproblem, and dispatches to ``sub_solver``.
+    If ``sub_solver`` is omitted, the default sub-solver is
+    :class:`LiftedGreedyAdditiveMulticut`.
+
+    If the objective's current labels are the trivial singleton labeling, the
+    driver warm-starts with one lifted greedy-additive pass before the proposal
+    loop. The best-of safety net guarantees energy never increases across
+    iterations.
+
+    Threading: ``number_of_threads > 1`` runs ``number_of_parallel_proposals``
+    proposal generators in parallel within each iteration. Each parallel slot
+    uses an independent proposal generator with seed ``proposal_generator.seed
+    + slot_index``. By default ``number_of_parallel_proposals`` is ``2`` when
+    ``number_of_threads == 1`` and ``number_of_threads`` otherwise; pass it
+    explicitly to override.
+    """
+
+    def __init__(
+        self,
+        *,
+        proposal_generator: ProposalGenerator,
+        sub_solver: LiftedMulticutSolver | None = None,
+        number_of_iterations: int = 10,
+        stop_if_no_improvement: int = 4,
+        number_of_threads: int = 1,
+        number_of_parallel_proposals: int | None = None,
+    ):
+        if not isinstance(proposal_generator, ProposalGenerator):
+            raise TypeError("proposal_generator must inherit from ProposalGenerator")
+        if sub_solver is not None and not isinstance(sub_solver, LiftedMulticutSolver):
+            raise TypeError("sub_solver must inherit from LiftedMulticutSolver")
+        if sub_solver is not None and not hasattr(sub_solver, "_build_cpp_sub_solver"):
+            raise TypeError(
+                "sub_solver must be a built-in lifted multicut solver "
+                "(custom Python solvers are not supported as fusion-move sub-solvers)"
+            )
+        n_threads = int(number_of_threads)
+        if n_threads < 1:
+            raise ValueError("number_of_threads must be >= 1")
+        if number_of_parallel_proposals is None:
+            n_parallel = 2 if n_threads == 1 else n_threads
+        else:
+            n_parallel = int(number_of_parallel_proposals)
+        if n_parallel < 1:
+            raise ValueError("number_of_parallel_proposals must be >= 1")
+
+        self.proposal_generator = proposal_generator
+        self.sub_solver = sub_solver
+        self.number_of_iterations = int(number_of_iterations)
+        self.stop_if_no_improvement = int(stop_if_no_improvement)
+        self.number_of_threads = n_threads
+        self.number_of_parallel_proposals = n_parallel
+        if self.number_of_iterations < 0:
+            raise ValueError("number_of_iterations must be non-negative")
+        if self.stop_if_no_improvement < 1:
+            raise ValueError("stop_if_no_improvement must be >= 1")
+
+    def optimize(self, objective: LiftedMulticutObjective) -> np.ndarray:
+        n_base = objective.number_of_base_edges
+        # The base costs back the proposal generators (the lifted weights
+        # cannot drive base-graph contraction or watershed segmentation).
+        base_costs = np.ascontiguousarray(objective.weights[:n_base])
+        cpp_pgens = [
+            self.proposal_generator._build_for_thread(
+                objective.graph, base_costs, slot
+            )
+            for slot in range(self.number_of_parallel_proposals)
+        ]
+        cpp_sub_solver = (
+            None if self.sub_solver is None else self.sub_solver._build_cpp_sub_solver()
+        )
+        labels = _core._lifted_multicut_fusion_move(
+            objective.graph,
+            objective.lifted_graph,
+            objective.weights,
+            n_base,
+            objective.labels,
+            cpp_pgens,
+            cpp_sub_solver,
+            self.number_of_iterations,
+            self.stop_if_no_improvement,
+            self.number_of_threads,
+            self.number_of_parallel_proposals,
+        )
+        objective.labels = labels
+        return objective.labels
+
+
+class LiftedChainedSolvers(LiftedMulticutSolver):
+    """Chain of lifted multicut solvers run in sequence on the same objective.
+
+    Each solver's output labeling is fed to the next via the shared
+    :class:`LiftedMulticutObjective`. Typical use: ``[LiftedGreedyAdditiveMulticut(),
+    LiftedKernighanLinMulticut(...)]`` for a fast warm-start followed by a
+    local refinement.
+    """
+
+    def __init__(self, solvers):
+        self.solvers = list(solvers)
+        if len(self.solvers) == 0:
+            raise ValueError("solvers must contain at least one solver")
+        if not all(isinstance(solver, LiftedMulticutSolver) for solver in self.solvers):
+            raise TypeError("all solvers must inherit from LiftedMulticutSolver")
+
+    def optimize(self, objective: LiftedMulticutObjective) -> np.ndarray:
+        labels = objective.labels
+        for solver in self.solvers:
+            labels = solver.optimize(objective)
+        return labels
+
+
+__all__ = [
+    "FusionMoveLiftedMulticut",
+    "GreedyAdditiveProposalGenerator",
+    "LiftedChainedSolvers",
+    "LiftedGreedyAdditiveMulticut",
+    "LiftedKernighanLinMulticut",
+    "LiftedMulticutObjective",
+    "LiftedMulticutProblem",
+    "LiftedMulticutSolver",
+    "ProposalGenerator",
+    "WatershedProposalGenerator",
+    "lifted_multicut_problem_path",
+    "load_lifted_multicut_problem",
+]
diff --git a/src/bioimage_cpp/graph/multicut/__init__.py b/src/bioimage_cpp/graph/multicut/__init__.py
new file mode 100644
index 0000000..8c73fc4
--- /dev/null
+++ b/src/bioimage_cpp/graph/multicut/__init__.py
@@ -0,0 +1,511 @@
+"""Multicut objective and solvers on undirected graphs.
+
+Public surface:
+
+- :class:`MulticutObjective` — base graph + edge costs + working labeling.
+- :class:`MulticutSolver` — abstract base for solvers; concrete subclasses
+  are :class:`GreedyAdditiveMulticut`, :class:`GreedyFixationMulticut`,
+  :class:`KernighanLinMulticut`, :class:`FusionMoveMulticut`,
+  :class:`ChainedMulticutSolvers`, :class:`MulticutDecomposer`.
+- :class:`ProposalGenerator` and the concrete
+  :class:`WatershedProposalGenerator`,
+  :class:`GreedyAdditiveProposalGenerator` — proposal generators consumed by
+  fusion-move solvers (also re-exported from
+  :mod:`bioimage_cpp.graph.lifted_multicut`).
+- Loaders for the benchmark multicut problem instances.
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+
+import numpy as np
+
+from .. import _core
+from .._external import (
+    DEFAULT_EXTERNAL_MULTICUT_PROBLEM_PATH,
+    EXTERNAL_MULTICUT_PROBLEM_URL,
+    external_multicut_problem_path,
+    load_external_multicut_problem,
+    load_external_multicut_problem_data,
+    load_multicut_problem,
+    load_multicut_problem_data,
+    multicut_problem_path,
+)
+from .._shared import (
+    _as_edge_costs,
+    _as_node_labels,
+    _copy_graph,
+    _dense_labels,
+    _normalize_number_of_threads,
+    _subproblem_from_edges,
+)
+
+
+class MulticutObjective:
+    """Multicut objective for an undirected graph and edge costs."""
+
+    def __init__(
+        self,
+        graph,
+        edge_costs,
+        initial_labels=None,
+    ):
+        self._graph = _copy_graph(graph)
+        self._edge_costs = _as_edge_costs(edge_costs, self._graph)
+        if initial_labels is None:
+            self._labels = np.arange(self._graph.number_of_nodes, dtype=np.uint64)
+        else:
+            self._labels = _as_node_labels(initial_labels, self._graph)
+
+    @property
+    def graph(self):
+        return self._graph
+
+    @property
+    def edge_costs(self) -> np.ndarray:
+        return self._edge_costs
+
+    @property
+    def labels(self) -> np.ndarray:
+        return self._labels
+
+    @labels.setter
+    def labels(self, labels) -> None:
+        self._labels = _as_node_labels(labels, self._graph)
+
+    def set_labels(self, labels) -> None:
+        self.labels = labels
+
+    def reset_labels(self) -> None:
+        self._labels = np.arange(self._graph.number_of_nodes, dtype=np.uint64)
+
+    def energy(self, labels=None) -> float:
+        label_array = self._labels if labels is None else _as_node_labels(labels, self._graph)
+        return float(_core._multicut_energy(self._graph, self._edge_costs, label_array))
+
+
+class MulticutSolver(ABC):
+    """Base class for multicut solvers."""
+
+    @abstractmethod
+    def optimize(self, objective: MulticutObjective) -> np.ndarray:
+        """Optimize ``objective`` and return the node labeling."""
+
+
+class GreedyAdditiveMulticut(MulticutSolver):
+    """Greedy additive edge contraction (GAEC) multicut solver.
+
+    Introduced in "Fusion moves for correlation clustering":
+    http://openaccess.thecvf.com/content_cvpr_2015/papers/Beier_Fusion_Moves_for_2015_CVPR_paper.pdf
+    """
+
+    def __init__(
+        self,
+        *,
+        weight_stop: float = 0.0,
+        node_num_stop: float = -1.0,
+        add_noise: bool = False,
+        seed: int = 42,
+        sigma: float = 1.0,
+    ):
+        self.weight_stop = float(weight_stop)
+        self.node_num_stop = float(node_num_stop)
+        self.add_noise = bool(add_noise)
+        self.seed = int(seed)
+        self.sigma = float(sigma)
+
+    def optimize(self, objective: MulticutObjective) -> np.ndarray:
+        labels = _core._multicut_greedy_additive(
+            objective.graph,
+            objective.edge_costs,
+            self.weight_stop,
+            self.node_num_stop,
+            self.add_noise,
+            self.seed,
+            self.sigma,
+        )
+        objective.labels = labels
+        return objective.labels
+
+    def _build_cpp_sub_solver(self):
+        return _core._GreedyAdditiveMulticutSubSolver(
+            weight_stop=self.weight_stop,
+            node_num_stop=self.node_num_stop,
+            add_noise=self.add_noise,
+            seed=self.seed,
+            sigma=self.sigma,
+        )
+
+
+class GreedyFixationMulticut(MulticutSolver):
+    """Greedy fixation multicut solver.
+
+    Introduced in "A Comparative Study of Local Search Algorithms for
+    Correlation Clustering":
+    https://link.springer.com/chapter/10.1007/978-3-319-66709-6_9
+    """
+
+    def __init__(self, *, weight_stop: float = 0.0, node_num_stop: float = -1.0):
+        self.weight_stop = float(weight_stop)
+        self.node_num_stop = float(node_num_stop)
+
+    def optimize(self, objective: MulticutObjective) -> np.ndarray:
+        labels = _core._multicut_greedy_fixation(
+            objective.graph,
+            objective.edge_costs,
+            self.weight_stop,
+            self.node_num_stop,
+        )
+        objective.labels = labels
+        return objective.labels
+
+    def _build_cpp_sub_solver(self):
+        return _core._GreedyFixationMulticutSubSolver(
+            weight_stop=self.weight_stop,
+            node_num_stop=self.node_num_stop,
+        )
+
+
+class KernighanLinMulticut(MulticutSolver):
+    """Kernighan-Lin multicut solver.
+
+    Introduced in "An efficient heuristic procedure for partitioning graphs":
+    http://xilinx.asia/_hdl/4/eda.ee.ucla.edu/EE201A-04Spring/kl.pdf
+    """
+
+    def __init__(
+        self,
+        *,
+        number_of_outer_iterations: int = 100,
+        number_of_inner_iterations: int | None = None,
+        epsilon: float = 1.0e-6,
+    ):
+        self.number_of_outer_iterations = int(number_of_outer_iterations)
+        if self.number_of_outer_iterations < 0:
+            raise ValueError("number_of_outer_iterations must be non-negative")
+        self.number_of_inner_iterations = (
+            None if number_of_inner_iterations is None else int(number_of_inner_iterations)
+        )
+        self.epsilon = float(epsilon)
+
+    def optimize(self, objective: MulticutObjective) -> np.ndarray:
+        initial_labels = objective.labels
+        if np.array_equal(
+            initial_labels,
+            np.arange(objective.graph.number_of_nodes, dtype=np.uint64),
+        ):
+            initial_labels = _core._multicut_greedy_additive(
+                objective.graph,
+                objective.edge_costs,
+                0.0,
+                -1.0,
+                False,
+                42,
+                1.0,
+            )
+        labels = _core._multicut_kernighan_lin(
+            objective.graph,
+            objective.edge_costs,
+            initial_labels,
+            self.number_of_outer_iterations,
+            self.epsilon,
+        )
+        objective.labels = labels
+        return objective.labels
+
+    def _build_cpp_sub_solver(self):
+        return _core._KernighanLinMulticutSubSolver(
+            number_of_outer_iterations=self.number_of_outer_iterations,
+            epsilon=self.epsilon,
+        )
+
+
+class ProposalGenerator(ABC):
+    """Base class for fusion-move proposal generators.
+
+    Concrete generators carry settings on the Python side. ``_build_for_thread``
+    constructs an independent underlying C++ proposal-generator object whose
+    seed is offset by ``seed_offset`` so that parallel proposal slots produce
+    distinct, reproducible streams.
+    """
+
+    @abstractmethod
+    def _build_for_thread(
+        self,
+        graph,
+        edge_costs: np.ndarray,
+        seed_offset: int,
+    ):
+        """Construct the underlying C++ proposal generator with a seed offset."""
+
+
+class WatershedProposalGenerator(ProposalGenerator):
+    """Watershed proposal generator (nifty's fusion-move workhorse).
+
+    Per call: add Gaussian noise to edge costs, drop random seeds at the
+    endpoints of negative-cost edges, run the edge-weighted watershed.
+
+    ``n_seeds_fraction`` is the target *total seed count*: ``<= 1.0`` is a
+    fraction of ``number_of_nodes``, otherwise an absolute count. The
+    seeding loop places two seeds per iteration and runs ``n_seeds / 2``
+    times, matching nifty's ``WatershedProposalGenerator`` so the same
+    parameter value yields the same proposal density on both sides.
+    """
+
+    def __init__(
+        self,
+        *,
+        sigma: float = 1.0,
+        n_seeds_fraction: float = 0.1,
+        seed: int = 0,
+    ):
+        self.sigma = float(sigma)
+        self.n_seeds_fraction = float(n_seeds_fraction)
+        self.seed = int(seed)
+
+    def _build_for_thread(self, graph, edge_costs, seed_offset):
+        return _core._WatershedProposalGenerator(
+            graph,
+            edge_costs,
+            sigma=self.sigma,
+            n_seeds_fraction=self.n_seeds_fraction,
+            seed=self.seed + int(seed_offset),
+        )
+
+
+class GreedyAdditiveProposalGenerator(ProposalGenerator):
+    """Greedy-additive multicut proposal generator.
+
+    Per call: run the greedy-additive multicut solver with noisy edge weights;
+    the seed advances every call so successive proposals differ.
+    """
+
+    def __init__(
+        self,
+        *,
+        sigma: float = 1.0,
+        weight_stop: float = 0.0,
+        node_num_stop: float = -1.0,
+        seed: int = 0,
+    ):
+        self.sigma = float(sigma)
+        self.weight_stop = float(weight_stop)
+        self.node_num_stop = float(node_num_stop)
+        self.seed = int(seed)
+
+    def _build_for_thread(self, graph, edge_costs, seed_offset):
+        return _core._GreedyAdditiveMulticutProposalGenerator(
+            graph,
+            edge_costs,
+            sigma=self.sigma,
+            weight_stop=self.weight_stop,
+            node_num_stop=self.node_num_stop,
+            seed=self.seed + int(seed_offset),
+        )
+
+
+class FusionMoveMulticut(MulticutSolver):
+    """Fusion-move multicut solver.
+
+    Introduced in "Fusion moves for correlation clustering":
+    http://openaccess.thecvf.com/content_cvpr_2015/papers/Beier_Fusion_Moves_for_2015_CVPR_paper.pdf
+
+    Iteratively generates proposals via ``proposal_generator``, fuses them
+    with the current best labeling, and accepts improvements. The fuse step
+    solves a contracted multicut subproblem with ``sub_solver``; if omitted,
+    the default sub-solver is :class:`GreedyAdditiveMulticut`.
+
+    If the objective's current labels are the trivial singleton labeling, the
+    driver warm-starts with one greedy-additive pass before the proposal loop.
+    The best-of safety net guarantees energy never increases across iterations.
+
+    Threading: ``number_of_threads > 1`` runs ``number_of_parallel_proposals``
+    proposal generators in parallel within each iteration. Each parallel slot
+    uses an independent proposal generator with seed ``proposal_generator.seed
+    + slot_index``. By default ``number_of_parallel_proposals`` is ``2`` when
+    ``number_of_threads == 1`` and ``number_of_threads`` otherwise; pass it
+    explicitly to override.
+
+    Multi-proposal fuse: when at least two parallel pairwise fuses fail to
+    improve on the current best, a joint multi-proposal fuse is run over the
+    surviving fused candidates (matches nifty's ``ccFusionMoveBased`` stage-2
+    behaviour). With ``number_of_parallel_proposals == 2`` this stage rarely
+    triggers; it becomes useful as ``number_of_parallel_proposals`` grows.
+    """
+
+    def __init__(
+        self,
+        *,
+        proposal_generator: ProposalGenerator,
+        sub_solver: MulticutSolver | None = None,
+        number_of_iterations: int = 10,
+        stop_if_no_improvement: int = 4,
+        number_of_threads: int = 1,
+        number_of_parallel_proposals: int | None = None,
+    ):
+        if not isinstance(proposal_generator, ProposalGenerator):
+            raise TypeError("proposal_generator must inherit from ProposalGenerator")
+        if sub_solver is not None and not isinstance(sub_solver, MulticutSolver):
+            raise TypeError("sub_solver must inherit from MulticutSolver")
+        if sub_solver is not None and not hasattr(sub_solver, "_build_cpp_sub_solver"):
+            raise TypeError(
+                "sub_solver must be a built-in multicut solver "
+                "(custom Python solvers are not supported as fusion-move sub-solvers)"
+            )
+        n_threads = int(number_of_threads)
+        if n_threads < 1:
+            raise ValueError("number_of_threads must be >= 1")
+        if number_of_parallel_proposals is None:
+            n_parallel = 2 if n_threads == 1 else n_threads
+        else:
+            n_parallel = int(number_of_parallel_proposals)
+        if n_parallel < 1:
+            raise ValueError("number_of_parallel_proposals must be >= 1")
+
+        self.proposal_generator = proposal_generator
+        self.sub_solver = sub_solver
+        self.number_of_iterations = int(number_of_iterations)
+        self.stop_if_no_improvement = int(stop_if_no_improvement)
+        self.number_of_threads = n_threads
+        self.number_of_parallel_proposals = n_parallel
+        if self.number_of_iterations < 0:
+            raise ValueError("number_of_iterations must be non-negative")
+        if self.stop_if_no_improvement < 1:
+            raise ValueError("stop_if_no_improvement must be >= 1")
+
+    def optimize(self, objective: MulticutObjective) -> np.ndarray:
+        # Build one C++ proposal generator per parallel slot, each with a
+        # distinct seed offset, so parallel streams are independent and
+        # reproducible.
+        cpp_pgens = [
+            self.proposal_generator._build_for_thread(
+                objective.graph, objective.edge_costs, slot
+            )
+            for slot in range(self.number_of_parallel_proposals)
+        ]
+        cpp_sub_solver = (
+            None if self.sub_solver is None else self.sub_solver._build_cpp_sub_solver()
+        )
+        labels = _core._multicut_fusion_move(
+            objective.graph,
+            objective.edge_costs,
+            objective.labels,
+            cpp_pgens,
+            cpp_sub_solver,
+            self.number_of_iterations,
+            self.stop_if_no_improvement,
+            self.number_of_threads,
+            self.number_of_parallel_proposals,
+        )
+        objective.labels = labels
+        return objective.labels
+
+
+class ChainedMulticutSolvers(MulticutSolver):
+    def __init__(self, solvers):
+        self.solvers = list(solvers)
+        if len(self.solvers) == 0:
+            raise ValueError("solvers must contain at least one solver")
+        if not all(isinstance(solver, MulticutSolver) for solver in self.solvers):
+            raise TypeError("all solvers must inherit from MulticutSolver")
+
+    def optimize(self, objective: MulticutObjective) -> np.ndarray:
+        labels = objective.labels
+        for solver in self.solvers:
+            labels = solver.optimize(objective)
+        return labels
+
+
+class MulticutDecomposer(MulticutSolver):
+    """Decomposition-based multicut solver.
+
+    Introduced in "Break and Conquer: Efficient Correlation Clustering for
+    Image Segmentation":
+    https://link.springer.com/chapter/10.1007/978-3-642-39140-8_9
+
+    Splits the multicut problem into connected components (based on positive
+    edge costs) and solves each component independently with ``sub_solver``.
+    """
+
+    def __init__(
+        self,
+        sub_solver: MulticutSolver,
+        *,
+        fallthrough_solver: MulticutSolver | None = None,
+        number_of_threads: int = 0,
+    ):
+        if not isinstance(sub_solver, MulticutSolver):
+            raise TypeError("sub_solver must inherit from MulticutSolver")
+        if fallthrough_solver is not None and not isinstance(fallthrough_solver, MulticutSolver):
+            raise TypeError("fallthrough_solver must inherit from MulticutSolver")
+        self.sub_solver = sub_solver
+        self.fallthrough_solver = fallthrough_solver
+        self.number_of_threads = _normalize_number_of_threads(number_of_threads)
+
+    def optimize(self, objective: MulticutObjective) -> np.ndarray:
+        # Local import to avoid a circular dependency at module-load time:
+        # ``connected_components`` lives in the top-level ``bioimage_cpp.graph``
+        # namespace, which itself imports this submodule.
+        from .. import connected_components
+
+        if self.fallthrough_solver is None and isinstance(self.sub_solver, GreedyAdditiveMulticut):
+            return self.sub_solver.optimize(objective)
+
+        component_labels = connected_components(
+            objective.graph,
+            edge_mask=objective.edge_costs > 0.0,
+        )
+        number_of_components = int(component_labels.max()) + 1 if component_labels.size else 0
+        if number_of_components <= 1:
+            solver = self.fallthrough_solver or self.sub_solver
+            return solver.optimize(objective)
+
+        global_labels = np.empty(objective.graph.number_of_nodes, dtype=np.uint64)
+        label_offset = 0
+        all_uvs = objective.graph.uv_ids()
+        for component in range(number_of_components):
+            nodes = np.flatnonzero(component_labels == component).astype(np.uint64)
+            if nodes.size == 1:
+                global_labels[int(nodes[0])] = label_offset
+                label_offset += 1
+                continue
+
+            edge_ids = objective.graph.edges_from_node_list(nodes)
+            sub_graph, sub_costs = _subproblem_from_edges(
+                objective.graph.number_of_nodes,
+                nodes,
+                all_uvs[edge_ids],
+                objective.edge_costs[edge_ids],
+            )
+            sub_objective = MulticutObjective(sub_graph, sub_costs)
+            sub_labels = self.sub_solver.optimize(sub_objective)
+            sub_labels = _dense_labels(sub_labels)
+            global_labels[nodes] = sub_labels + label_offset
+            label_offset += int(sub_labels.max()) + 1
+
+        objective.labels = _dense_labels(global_labels)
+        return objective.labels
+
+
+__all__ = [
+    "ChainedMulticutSolvers",
+    "DEFAULT_EXTERNAL_MULTICUT_PROBLEM_PATH",
+    "EXTERNAL_MULTICUT_PROBLEM_URL",
+    "FusionMoveMulticut",
+    "GreedyAdditiveMulticut",
+    "GreedyAdditiveProposalGenerator",
+    "GreedyFixationMulticut",
+    "KernighanLinMulticut",
+    "MulticutDecomposer",
+    "MulticutObjective",
+    "MulticutSolver",
+    "ProposalGenerator",
+    "WatershedProposalGenerator",
+    "external_multicut_problem_path",
+    "load_external_multicut_problem",
+    "load_external_multicut_problem_data",
+    "load_multicut_problem",
+    "load_multicut_problem_data",
+    "multicut_problem_path",
+]
diff --git a/src/bioimage_cpp/graph/mutex_watershed.py b/src/bioimage_cpp/graph/mutex_watershed.py
new file mode 100644
index 0000000..69d490a
--- /dev/null
+++ b/src/bioimage_cpp/graph/mutex_watershed.py
@@ -0,0 +1,207 @@
+"""Mutex watershed clustering on undirected graphs.
+
+The functions in this submodule operate on an attractive base graph plus a
+set of explicit repulsive (mutex) constraints. They are conceptually close to
+the lifted multicut input format — the same ``(graph, edge_costs, mutex_uvs,
+mutex_costs)`` arrays can be reused — but the algorithms differ: mutex
+constraints are *hard* (a single mutex edge separates two components forever),
+whereas lifted costs are soft.
+"""
+
+from __future__ import annotations
+
+import numpy as np
+
+from .. import _core
+from ._shared import (
+    _as_1d_array,
+    _as_uv_array,
+    _resolve_weight_dtype,
+)
+
+
+_MUTEX_WATERSHED_CLUSTERING_BY_DTYPE = {
+    np.dtype("float32"): _core._mutex_watershed_clustering_float32,
+    np.dtype("float64"): _core._mutex_watershed_clustering_float64,
+}
+
+_SEMANTIC_MUTEX_WATERSHED_CLUSTERING_BY_DTYPE = {
+    np.dtype("float32"): _core._semantic_mutex_watershed_clustering_float32,
+    np.dtype("float64"): _core._semantic_mutex_watershed_clustering_float64,
+}
+
+
+def mutex_watershed_clustering(
+    graph,
+    edge_costs,
+    mutex_uvs,
+    mutex_costs,
+) -> np.ndarray:
+    """Mutex watershed clustering on an undirected graph.
+
+    Introduced in "The Mutex Watershed and its Objective: Efficient,
+    Parameter-Free Image Partitioning":
+    https://arxiv.org/pdf/1904.12654.pdf
+
+    Attractive edges come from ``graph`` (one cost per edge in
+    ``edge_costs``); repulsive long-range edges are supplied separately as
+    ``mutex_uvs`` with weights ``mutex_costs``. All edges are jointly sorted
+    by descending weight and processed in a single pass: an attractive edge
+    merges its two components unless a mutex constraint already separates
+    them; a mutex edge installs a constraint between the two current
+    components.
+
+    The input format matches
+    :class:`bioimage_cpp.graph.lifted_multicut.LiftedMulticutObjective` — the
+    same ``(graph, edge_costs, lifted_uvs, lifted_costs)`` arrays can be
+    passed here as ``(graph, edge_costs, mutex_uvs, mutex_costs)`` — though
+    the algorithms differ (mutex constraints are hard; lifted costs are
+    soft).
+
+    Parameters
+    ----------
+    graph:
+        :class:`bioimage_cpp.graph.UndirectedGraph` or
+        :class:`bioimage_cpp.graph.RegionAdjacencyGraph` defining the
+        attractive edges.
+    edge_costs:
+        1D array of length ``graph.number_of_edges``. Supported dtypes are
+        ``float32`` and ``float64``; other floating dtypes are cast to
+        ``float32``. Higher values are more attractive.
+    mutex_uvs:
+        ``(n_mutex, 2)`` uint64 array of (u, v) pairs for the mutex edges.
+    mutex_costs:
+        1D array of length ``n_mutex``. Same dtype rules as ``edge_costs``;
+        if the two dtypes differ both are promoted to ``float64``. Higher
+        values are stronger repulsions.
+
+    Returns
+    -------
+    np.ndarray
+        ``(graph.number_of_nodes,)`` uint64 array. Dense node labels in
+        ``[0, k)`` in first-occurrence order.
+    """
+    edge_cost_array = _resolve_weight_dtype(edge_costs, "edge_costs")
+    mutex_cost_array = _resolve_weight_dtype(mutex_costs, "mutex_costs")
+    # Use a single instantiation for both arrays. If the user supplied
+    # mismatched dtypes (one float32, one float64) we promote both to
+    # float64 — the wider type — rather than silently downcasting.
+    if edge_cost_array.dtype != mutex_cost_array.dtype:
+        edge_cost_array = edge_cost_array.astype(np.float64, copy=False)
+        mutex_cost_array = mutex_cost_array.astype(np.float64, copy=False)
+
+    edge_cost_array = _as_1d_array(
+        edge_cost_array,
+        edge_cost_array.dtype,
+        "edge_costs",
+        int(graph.number_of_edges),
+    )
+    mutex_uv_array = _as_uv_array(mutex_uvs, "mutex_uvs")
+    mutex_cost_array = _as_1d_array(
+        mutex_cost_array,
+        mutex_cost_array.dtype,
+        "mutex_costs",
+        int(mutex_uv_array.shape[0]),
+    )
+    run = _MUTEX_WATERSHED_CLUSTERING_BY_DTYPE[edge_cost_array.dtype]
+    return run(graph, edge_cost_array, mutex_uv_array, mutex_cost_array)
+
+
+def semantic_mutex_watershed_clustering(
+    graph,
+    edge_costs,
+    mutex_uvs,
+    mutex_costs,
+    semantic_node_classes,
+    semantic_costs,
+) -> tuple[np.ndarray, np.ndarray]:
+    """Semantic mutex watershed clustering on an undirected graph.
+
+    Introduced in "The Semantic Mutex Watershed for Efficient Bottom-Up
+    Semantic Instance Segmentation":
+    https://arxiv.org/pdf/1912.12717.pdf
+
+    Extends :func:`mutex_watershed_clustering` with a third group of edges
+    that attach semantic class labels to clusters. Two clusters carrying
+    different semantic class labels are forbidden from merging; otherwise
+    the algorithm proceeds as in the non-semantic case (attractive edges
+    merge; mutex edges block).
+
+    Parameters
+    ----------
+    graph:
+        :class:`bioimage_cpp.graph.UndirectedGraph` or
+        :class:`bioimage_cpp.graph.RegionAdjacencyGraph` defining the
+        attractive edges.
+    edge_costs:
+        1D array of length ``graph.number_of_edges``. Same dtype rules as
+        :func:`mutex_watershed_clustering`.
+    mutex_uvs:
+        ``(n_mutex, 2)`` uint64 array of (u, v) pairs for the mutex edges.
+    mutex_costs:
+        1D array of length ``n_mutex``.
+    semantic_node_classes:
+        ``(n_semantic, 2)`` uint64 array. Column 0 is a node id; column 1
+        is the semantic class id (non-negative integer). The semantic class
+        id is interpreted as a signed integer when reported back, so values
+        above ``np.iinfo(np.int64).max`` are out of range.
+    semantic_costs:
+        1D array of length ``n_semantic``. Same dtype rules as
+        ``edge_costs``; if the floating dtypes of the three weight arrays
+        do not all agree, all three are promoted to ``float64``.
+
+    Returns
+    -------
+    node_labels:
+        ``(graph.number_of_nodes,)`` uint64 array. Dense node labels in
+        ``[0, k)`` in first-occurrence order.
+    semantic_labels:
+        ``(graph.number_of_nodes,)`` int64 array. Per-node semantic class
+        id, or ``-1`` for clusters that received no semantic assignment.
+    """
+    edge_cost_array = _resolve_weight_dtype(edge_costs, "edge_costs")
+    mutex_cost_array = _resolve_weight_dtype(mutex_costs, "mutex_costs")
+    semantic_cost_array = _resolve_weight_dtype(semantic_costs, "semantic_costs")
+
+    dtypes = {edge_cost_array.dtype, mutex_cost_array.dtype, semantic_cost_array.dtype}
+    if len(dtypes) > 1:
+        edge_cost_array = edge_cost_array.astype(np.float64, copy=False)
+        mutex_cost_array = mutex_cost_array.astype(np.float64, copy=False)
+        semantic_cost_array = semantic_cost_array.astype(np.float64, copy=False)
+
+    edge_cost_array = _as_1d_array(
+        edge_cost_array,
+        edge_cost_array.dtype,
+        "edge_costs",
+        int(graph.number_of_edges),
+    )
+    mutex_uv_array = _as_uv_array(mutex_uvs, "mutex_uvs")
+    mutex_cost_array = _as_1d_array(
+        mutex_cost_array,
+        mutex_cost_array.dtype,
+        "mutex_costs",
+        int(mutex_uv_array.shape[0]),
+    )
+    semantic_uv_array = _as_uv_array(semantic_node_classes, "semantic_node_classes")
+    semantic_cost_array = _as_1d_array(
+        semantic_cost_array,
+        semantic_cost_array.dtype,
+        "semantic_costs",
+        int(semantic_uv_array.shape[0]),
+    )
+
+    run = _SEMANTIC_MUTEX_WATERSHED_CLUSTERING_BY_DTYPE[edge_cost_array.dtype]
+    return run(
+        graph,
+        edge_cost_array,
+        mutex_uv_array,
+        mutex_cost_array,
+        semantic_uv_array,
+        semantic_cost_array,
+    )
+
+
+__all__ = [
+    "mutex_watershed_clustering",
+    "semantic_mutex_watershed_clustering",
+]
diff --git a/src/bioimage_cpp/segmentation/mutex_watershed.py b/src/bioimage_cpp/segmentation/mutex_watershed.py
index 78e02e5..1927a84 100644
--- a/src/bioimage_cpp/segmentation/mutex_watershed.py
+++ b/src/bioimage_cpp/segmentation/mutex_watershed.py
@@ -30,6 +30,10 @@ def mutex_watershed(
 ) -> np.ndarray:
     """Run mutex watershed on a 2D or 3D image-derived grid graph.
 
+    Introduced in "The Mutex Watershed and its Objective: Efficient,
+    Parameter-Free Image Partitioning":
+    https://arxiv.org/pdf/1904.12654.pdf
+
     Parameters
     ----------
     affinities:
@@ -127,6 +131,10 @@ def semantic_mutex_watershed(
 ) -> tuple[np.ndarray, np.ndarray]:
     """Run semantic mutex watershed on a 2D or 3D image-derived grid graph.
 
+    Introduced in "The Semantic Mutex Watershed for Efficient Bottom-Up
+    Semantic Instance Segmentation":
+    https://arxiv.org/pdf/1912.12717.pdf
+
     The affinity array stacks three groups of channels along axis 0:
 
     1. ``[0, number_of_attractive_channels)`` — attractive grid edges.
diff --git a/tests/graph/lifted_multicut/test_external_problem.py b/tests/graph/lifted_multicut/test_external_problem.py
index 0a2603a..f2039ce 100644
--- a/tests/graph/lifted_multicut/test_external_problem.py
+++ b/tests/graph/lifted_multicut/test_external_problem.py
@@ -16,7 +16,7 @@
 
 def _load_problem():
     try:
-        return bic.graph.load_lifted_multicut_problem("2d", timeout=30)
+        return bic.graph.lifted_multicut.load_lifted_multicut_problem("2d", timeout=30)
     except (FileNotFoundError, ModuleNotFoundError, RuntimeError) as error:
         pytest.skip(str(error))
 
@@ -26,18 +26,18 @@ def test_solvers_on_2d_lifted_problem():
     graph = bic.graph.UndirectedGraph.from_edges(problem.n_nodes, problem.local_uvs)
 
     solvers = [
-        bic.graph.LiftedGreedyAdditiveMulticut(),
-        bic.graph.LiftedKernighanLinMulticut(number_of_outer_iterations=10),
-        bic.graph.LiftedChainedSolvers(
+        bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut(),
+        bic.graph.lifted_multicut.LiftedKernighanLinMulticut(number_of_outer_iterations=10),
+        bic.graph.lifted_multicut.LiftedChainedSolvers(
             [
-                bic.graph.LiftedGreedyAdditiveMulticut(),
-                bic.graph.LiftedKernighanLinMulticut(number_of_outer_iterations=10),
+                bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut(),
+                bic.graph.lifted_multicut.LiftedKernighanLinMulticut(number_of_outer_iterations=10),
             ]
         ),
     ]
 
     for solver in solvers:
-        objective = bic.graph.LiftedMulticutObjective(
+        objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
             graph,
             problem.local_costs,
             lifted_uvs=problem.lifted_uvs,
diff --git a/tests/graph/lifted_multicut/test_fusion_move.py b/tests/graph/lifted_multicut/test_fusion_move.py
index e6c8105..07d9974 100644
--- a/tests/graph/lifted_multicut/test_fusion_move.py
+++ b/tests/graph/lifted_multicut/test_fusion_move.py
@@ -25,7 +25,7 @@ def _grid_lifted_problem(shape=(4, 4), bfs_distance=2, seed=0):
     base = bic.graph.UndirectedGraph.from_edges(shape[0] * shape[1], edges)
     base_costs = np.array(base_costs, dtype=np.float64)
 
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         base, base_costs, bfs_distance=bfs_distance
     )
     # Reweight a handful of lifted edges to non-zero, mixed sign, so they
@@ -41,7 +41,7 @@ def _grid_lifted_problem(shape=(4, 4), bfs_distance=2, seed=0):
 
 def test_fusion_move_splits_chain_along_repulsive_lifted_edge(chain_with_lifted):
     base, base_costs, lifted_uvs, lifted_costs = chain_with_lifted
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         base, base_costs, lifted_uvs=lifted_uvs, lifted_costs=lifted_costs
     )
 
@@ -50,8 +50,8 @@ def test_fusion_move_splits_chain_along_repulsive_lifted_edge(chain_with_lifted)
     # subsequent proposal/fuse loop must not regress past that.
     merged_energy = objective.energy(np.zeros(4, dtype=np.uint64))
 
-    solver = bic.graph.FusionMoveLiftedMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=1),
+    solver = bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+        proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(seed=1),
         number_of_iterations=10,
     )
     labels = solver.optimize(objective)
@@ -65,15 +65,15 @@ def test_fusion_move_splits_chain_along_repulsive_lifted_edge(chain_with_lifted)
 
 def test_fusion_move_safety_net_never_regresses(chain_with_lifted):
     base, base_costs, lifted_uvs, lifted_costs = chain_with_lifted
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         base, base_costs, lifted_uvs=lifted_uvs, lifted_costs=lifted_costs
     )
-    baseline = bic.graph.LiftedGreedyAdditiveMulticut().optimize(objective)
+    baseline = bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut().optimize(objective)
     baseline_energy = objective.energy(baseline)
 
     objective.reset_labels()
-    solver = bic.graph.FusionMoveLiftedMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=7),
+    solver = bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+        proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(seed=7),
         number_of_iterations=8,
     )
     labels = solver.optimize(objective)
@@ -89,15 +89,15 @@ def test_fusion_move_matches_multicut_without_lifted_edges():
     )
     base_costs = np.array([5, -20, 5, 5, -20, 5, 5], dtype=np.float64)
 
-    mc_objective = bic.graph.MulticutObjective(base, base_costs)
-    mc_labels = bic.graph.FusionMoveMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=3),
+    mc_objective = bic.graph.multicut.MulticutObjective(base, base_costs)
+    mc_labels = bic.graph.multicut.FusionMoveMulticut(
+        proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(seed=3),
         number_of_iterations=5,
     ).optimize(mc_objective)
 
-    lmc_objective = bic.graph.LiftedMulticutObjective(base, base_costs)
-    lmc_labels = bic.graph.FusionMoveLiftedMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=3),
+    lmc_objective = bic.graph.lifted_multicut.LiftedMulticutObjective(base, base_costs)
+    lmc_labels = bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+        proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(seed=3),
         number_of_iterations=5,
     ).optimize(lmc_objective)
 
@@ -111,10 +111,10 @@ def test_fusion_move_matches_multicut_without_lifted_edges():
 def test_fusion_move_warm_starts_from_singleton():
     base = bic.graph.UndirectedGraph.from_edges(3, [[0, 1], [1, 2]])
     base_costs = np.array([2.0, 2.0], dtype=np.float64)
-    objective = bic.graph.LiftedMulticutObjective(base, base_costs)
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(base, base_costs)
     # Singleton labels trigger the internal lifted greedy-additive warm start.
-    labels = bic.graph.FusionMoveLiftedMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=0),
+    labels = bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+        proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(seed=0),
         number_of_iterations=3,
     ).optimize(objective)
     same_partition(labels, [0, 0, 0])
@@ -126,11 +126,11 @@ def test_fusion_move_keeps_base_disconnected_clusters_separate(
     base, base_costs, lifted_uvs, lifted_costs = (
         disjoint_clusters_with_attractive_lifted
     )
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         base, base_costs, lifted_uvs=lifted_uvs, lifted_costs=lifted_costs
     )
-    labels = bic.graph.FusionMoveLiftedMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=0),
+    labels = bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+        proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(seed=0),
         number_of_iterations=5,
     ).optimize(objective)
     # The (0, 2) lifted edge is attractive, but the base graph has no path
@@ -142,12 +142,12 @@ def test_fusion_move_keeps_base_disconnected_clusters_separate(
 def test_fusion_move_greedy_additive_proposal_generator_runs():
     objective = _grid_lifted_problem()
     baseline = objective.energy(
-        bic.graph.LiftedGreedyAdditiveMulticut().optimize(objective)
+        bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut().optimize(objective)
     )
 
     objective.reset_labels()
-    solver = bic.graph.FusionMoveLiftedMulticut(
-        proposal_generator=bic.graph.GreedyAdditiveProposalGenerator(
+    solver = bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+        proposal_generator=bic.graph.lifted_multicut.GreedyAdditiveProposalGenerator(
             seed=3, sigma=1.0
         ),
         number_of_iterations=5,
@@ -159,14 +159,14 @@ def test_fusion_move_greedy_additive_proposal_generator_runs():
 @pytest.mark.parametrize(
     "sub_solver",
     [
-        bic.graph.LiftedGreedyAdditiveMulticut(),
-        bic.graph.LiftedKernighanLinMulticut(number_of_outer_iterations=3),
+        bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut(),
+        bic.graph.lifted_multicut.LiftedKernighanLinMulticut(number_of_outer_iterations=3),
     ],
 )
 def test_fusion_move_sub_solver_pluggability(sub_solver):
     objective = _grid_lifted_problem()
-    solver = bic.graph.FusionMoveLiftedMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=2),
+    solver = bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+        proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(seed=2),
         sub_solver=sub_solver,
         number_of_iterations=4,
     )
@@ -180,14 +180,14 @@ def test_fusion_move_stops_after_no_improvement():
     # threshold: the loop must terminate quickly.
     base = bic.graph.UndirectedGraph.from_edges(3, [[0, 1], [1, 2], [0, 2]])
     base_costs = np.array([2.0, 2.0, -5.0], dtype=np.float64)
-    objective = bic.graph.LiftedMulticutObjective(base, base_costs)
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(base, base_costs)
     baseline = objective.energy(
-        bic.graph.LiftedGreedyAdditiveMulticut().optimize(objective)
+        bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut().optimize(objective)
     )
 
     objective.reset_labels()
-    solver = bic.graph.FusionMoveLiftedMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=0),
+    solver = bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+        proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(seed=0),
         number_of_iterations=1000,
         stop_if_no_improvement=1,
     )
@@ -197,20 +197,20 @@ def test_fusion_move_stops_after_no_improvement():
 
 def test_fusion_move_chains_with_kernighan_lin(chain_with_lifted):
     base, base_costs, lifted_uvs, lifted_costs = chain_with_lifted
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         base, base_costs, lifted_uvs=lifted_uvs, lifted_costs=lifted_costs
     )
     baseline = objective.energy(
-        bic.graph.LiftedGreedyAdditiveMulticut().optimize(objective)
+        bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut().optimize(objective)
     )
 
     objective.reset_labels()
-    solver = bic.graph.LiftedChainedSolvers([
-        bic.graph.FusionMoveLiftedMulticut(
-            proposal_generator=bic.graph.WatershedProposalGenerator(seed=11),
+    solver = bic.graph.lifted_multicut.LiftedChainedSolvers([
+        bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+            proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(seed=11),
             number_of_iterations=5,
         ),
-        bic.graph.LiftedKernighanLinMulticut(number_of_outer_iterations=3),
+        bic.graph.lifted_multicut.LiftedKernighanLinMulticut(number_of_outer_iterations=3),
     ])
     labels = solver.optimize(objective)
     assert objective.energy(labels) <= baseline + 1e-9
@@ -218,38 +218,38 @@ def test_fusion_move_chains_with_kernighan_lin(chain_with_lifted):
 
 def test_fusion_move_rejects_non_proposal_generator():
     with pytest.raises(TypeError, match="proposal_generator"):
-        bic.graph.FusionMoveLiftedMulticut(proposal_generator=object())
+        bic.graph.lifted_multicut.FusionMoveLiftedMulticut(proposal_generator=object())
 
 
 def test_fusion_move_rejects_non_lifted_sub_solver():
     with pytest.raises(TypeError, match="sub_solver"):
-        bic.graph.FusionMoveLiftedMulticut(
-            proposal_generator=bic.graph.WatershedProposalGenerator(),
-            sub_solver=bic.graph.GreedyAdditiveMulticut(),
+        bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+            proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(),
+            sub_solver=bic.graph.multicut.GreedyAdditiveMulticut(),
         )
 
 
 def test_fusion_move_rejects_zero_thread_count():
     with pytest.raises(ValueError, match="number_of_threads"):
-        bic.graph.FusionMoveLiftedMulticut(
-            proposal_generator=bic.graph.WatershedProposalGenerator(),
+        bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+            proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(),
             number_of_threads=0,
         )
 
 
 def test_fusion_move_rejects_zero_parallel_proposals():
     with pytest.raises(ValueError, match="number_of_parallel_proposals"):
-        bic.graph.FusionMoveLiftedMulticut(
-            proposal_generator=bic.graph.WatershedProposalGenerator(),
+        bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+            proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(),
             number_of_parallel_proposals=0,
         )
 
 
 def test_fusion_move_runs_on_empty_graph():
     base = bic.graph.UndirectedGraph(0)
-    objective = bic.graph.LiftedMulticutObjective(base, np.zeros(0))
-    solver = bic.graph.FusionMoveLiftedMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(),
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(base, np.zeros(0))
+    solver = bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+        proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(),
     )
     labels = solver.optimize(objective)
     assert labels.shape == (0,)
@@ -258,12 +258,12 @@ def test_fusion_move_runs_on_empty_graph():
 def test_fusion_move_parallel_threads_match_single_threaded_safety_net():
     objective = _grid_lifted_problem(seed=11)
     baseline = objective.energy(
-        bic.graph.LiftedGreedyAdditiveMulticut().optimize(objective)
+        bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut().optimize(objective)
     )
 
     objective.reset_labels()
-    solver = bic.graph.FusionMoveLiftedMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=11),
+    solver = bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+        proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(seed=11),
         number_of_threads=4,
         number_of_iterations=5,
     )
@@ -274,12 +274,12 @@ def test_fusion_move_parallel_threads_match_single_threaded_safety_net():
 def test_fusion_move_multi_proposal_runs():
     objective = _grid_lifted_problem(seed=3)
     baseline = objective.energy(
-        bic.graph.LiftedGreedyAdditiveMulticut().optimize(objective)
+        bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut().optimize(objective)
     )
 
     objective.reset_labels()
-    solver = bic.graph.FusionMoveLiftedMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=3),
+    solver = bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+        proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(seed=3),
         number_of_threads=2,
         number_of_parallel_proposals=4,
         number_of_iterations=5,
@@ -291,8 +291,8 @@ def test_fusion_move_multi_proposal_runs():
 def test_fusion_move_parallel_is_deterministic_given_settings():
     def run():
         objective = _grid_lifted_problem(seed=7)
-        solver = bic.graph.FusionMoveLiftedMulticut(
-            proposal_generator=bic.graph.WatershedProposalGenerator(seed=7),
+        solver = bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
+            proposal_generator=bic.graph.lifted_multicut.WatershedProposalGenerator(seed=7),
             number_of_threads=4,
             number_of_iterations=5,
         )
@@ -302,9 +302,9 @@ def run():
 
 
 def test_fusion_move_default_parallel_proposals_tracks_threads():
-    pgen = bic.graph.WatershedProposalGenerator()
-    one_thread = bic.graph.FusionMoveLiftedMulticut(proposal_generator=pgen)
-    four_threads = bic.graph.FusionMoveLiftedMulticut(
+    pgen = bic.graph.lifted_multicut.WatershedProposalGenerator()
+    one_thread = bic.graph.lifted_multicut.FusionMoveLiftedMulticut(proposal_generator=pgen)
+    four_threads = bic.graph.lifted_multicut.FusionMoveLiftedMulticut(
         proposal_generator=pgen, number_of_threads=4
     )
     assert one_thread.number_of_parallel_proposals == 2
diff --git a/tests/graph/lifted_multicut/test_greedy_additive.py b/tests/graph/lifted_multicut/test_greedy_additive.py
index ac5a881..4629b5e 100644
--- a/tests/graph/lifted_multicut/test_greedy_additive.py
+++ b/tests/graph/lifted_multicut/test_greedy_additive.py
@@ -8,11 +8,11 @@
 
 def test_greedy_additive_stops_before_lifted_repulsive_dominates(chain_with_lifted):
     base, base_costs, lifted_uvs, lifted_costs = chain_with_lifted
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         base, base_costs, lifted_uvs=lifted_uvs, lifted_costs=lifted_costs
     )
 
-    labels = bic.graph.LiftedGreedyAdditiveMulticut().optimize(objective)
+    labels = bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut().optimize(objective)
     # As base edges get contracted, the lifted -10 weight is folded into the
     # remaining contracted edge to the unmerged endpoint. Once the summed
     # weight drops below `weight_stop=0` greedy stops, leaving the chain
@@ -27,11 +27,11 @@ def test_greedy_additive_keeps_base_disconnected_clusters(
     disjoint_clusters_with_attractive_lifted,
 ):
     base, base_costs, lifted_uvs, lifted_costs = disjoint_clusters_with_attractive_lifted
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         base, base_costs, lifted_uvs=lifted_uvs, lifted_costs=lifted_costs
     )
 
-    labels = bic.graph.LiftedGreedyAdditiveMulticut().optimize(objective)
+    labels = bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut().optimize(objective)
     # Lifted (0, 2) is attractive but there's no base path to merge across,
     # so greedy-additive must keep the two base components separate.
     assert labels[0] == labels[1]
@@ -41,11 +41,11 @@ def test_greedy_additive_keeps_base_disconnected_clusters(
 
 def test_greedy_additive_respects_weight_stop(chain_with_lifted):
     base, base_costs, lifted_uvs, lifted_costs = chain_with_lifted
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         base, base_costs, lifted_uvs=lifted_uvs, lifted_costs=lifted_costs
     )
 
-    labels = bic.graph.LiftedGreedyAdditiveMulticut(weight_stop=3.0).optimize(objective)
+    labels = bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut(weight_stop=3.0).optimize(objective)
     # Every base edge has weight 2.0, below the stop threshold, so no contractions.
     same_partition(labels, [0, 1, 2, 3])
 
@@ -62,11 +62,11 @@ def test_greedy_additive_matches_multicut_when_no_lifted_edges():
     )
     base_costs = np.array([5, -20, 5, 5, -20, 5, 5], dtype=np.float64)
 
-    mc_objective = bic.graph.MulticutObjective(base, base_costs)
-    mc_labels = bic.graph.GreedyAdditiveMulticut().optimize(mc_objective)
+    mc_objective = bic.graph.multicut.MulticutObjective(base, base_costs)
+    mc_labels = bic.graph.multicut.GreedyAdditiveMulticut().optimize(mc_objective)
 
-    lmc_objective = bic.graph.LiftedMulticutObjective(base, base_costs)
-    lmc_labels = bic.graph.LiftedGreedyAdditiveMulticut().optimize(lmc_objective)
+    lmc_objective = bic.graph.lifted_multicut.LiftedMulticutObjective(base, base_costs)
+    lmc_labels = bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut().optimize(lmc_objective)
 
     same_partition(lmc_labels, mc_labels)
 
@@ -81,8 +81,8 @@ def test_greedy_additive_lifted_attractive_merges_through_base_path():
     lifted_uvs = np.array([[0, 2]], dtype=np.uint64)
     lifted_costs = np.array([10.0], dtype=np.float64)
 
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         base, base_costs, lifted_uvs=lifted_uvs, lifted_costs=lifted_costs
     )
-    labels = bic.graph.LiftedGreedyAdditiveMulticut().optimize(objective)
+    labels = bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut().optimize(objective)
     same_partition(labels, [0, 0, 0])
diff --git a/tests/graph/lifted_multicut/test_kernighan_lin.py b/tests/graph/lifted_multicut/test_kernighan_lin.py
index bdf8a65..43a1c7e 100644
--- a/tests/graph/lifted_multicut/test_kernighan_lin.py
+++ b/tests/graph/lifted_multicut/test_kernighan_lin.py
@@ -8,14 +8,14 @@
 
 def test_kl_splits_chain_along_repulsive_lifted_edge(chain_with_lifted):
     base, base_costs, lifted_uvs, lifted_costs = chain_with_lifted
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         base, base_costs, lifted_uvs=lifted_uvs, lifted_costs=lifted_costs
     )
 
     # Warm-starting from a single cluster (everything merged) and running KL
     # should split the chain so that the repulsive (0, 3) lifted edge is cut.
     objective.labels = np.zeros(4, dtype=np.uint64)
-    labels = bic.graph.LiftedKernighanLinMulticut(
+    labels = bic.graph.lifted_multicut.LiftedKernighanLinMulticut(
         number_of_outer_iterations=10
     ).optimize(objective)
 
@@ -29,11 +29,11 @@ def test_kl_keeps_base_disconnected_clusters_separate(
     disjoint_clusters_with_attractive_lifted,
 ):
     base, base_costs, lifted_uvs, lifted_costs = disjoint_clusters_with_attractive_lifted
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         base, base_costs, lifted_uvs=lifted_uvs, lifted_costs=lifted_costs
     )
 
-    labels = bic.graph.LiftedKernighanLinMulticut(
+    labels = bic.graph.lifted_multicut.LiftedKernighanLinMulticut(
         number_of_outer_iterations=10
     ).optimize(objective)
     # Lifted (0, 2) is attractive but the base graph offers no path between
@@ -53,13 +53,13 @@ def test_kl_matches_multicut_without_lifted_edges():
     )
     base_costs = np.array([5, -20, 5, 5, -20, 5, 5], dtype=np.float64)
 
-    mc_objective = bic.graph.MulticutObjective(base, base_costs)
-    mc_labels = bic.graph.KernighanLinMulticut(
+    mc_objective = bic.graph.multicut.MulticutObjective(base, base_costs)
+    mc_labels = bic.graph.multicut.KernighanLinMulticut(
         number_of_outer_iterations=10
     ).optimize(mc_objective)
 
-    lmc_objective = bic.graph.LiftedMulticutObjective(base, base_costs)
-    lmc_labels = bic.graph.LiftedKernighanLinMulticut(
+    lmc_objective = bic.graph.lifted_multicut.LiftedMulticutObjective(base, base_costs)
+    lmc_labels = bic.graph.lifted_multicut.LiftedKernighanLinMulticut(
         number_of_outer_iterations=10
     ).optimize(lmc_objective)
 
@@ -77,10 +77,10 @@ def test_kl_lifted_attractive_overrides_repulsive_base():
     lifted_uvs = np.array([[0, 2]], dtype=np.uint64)
     lifted_costs = np.array([10.0], dtype=np.float64)
 
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         base, base_costs, lifted_uvs=lifted_uvs, lifted_costs=lifted_costs
     )
-    labels = bic.graph.LiftedKernighanLinMulticut(
+    labels = bic.graph.lifted_multicut.LiftedKernighanLinMulticut(
         number_of_outer_iterations=10
     ).optimize(objective)
 
@@ -91,9 +91,9 @@ def test_kl_lifted_attractive_overrides_repulsive_base():
 def test_kl_warm_starts_from_singleton():
     base = bic.graph.UndirectedGraph.from_edges(3, [[0, 1], [1, 2]])
     base_costs = np.array([2.0, 2.0], dtype=np.float64)
-    objective = bic.graph.LiftedMulticutObjective(base, base_costs)
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(base, base_costs)
     # Singleton labels trigger an internal greedy-additive warm start.
-    labels = bic.graph.LiftedKernighanLinMulticut(
+    labels = bic.graph.lifted_multicut.LiftedKernighanLinMulticut(
         number_of_outer_iterations=5
     ).optimize(objective)
     same_partition(labels, [0, 0, 0])
diff --git a/tests/graph/lifted_multicut/test_objective.py b/tests/graph/lifted_multicut/test_objective.py
index 47da69a..27de7d5 100644
--- a/tests/graph/lifted_multicut/test_objective.py
+++ b/tests/graph/lifted_multicut/test_objective.py
@@ -6,7 +6,7 @@
 
 def test_objective_no_lifted_edges_matches_base(chain_with_lifted):
     base, base_costs, _, _ = chain_with_lifted
-    objective = bic.graph.LiftedMulticutObjective(base, base_costs)
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(base, base_costs)
 
     # Default labels (singleton): every base edge is cut.
     assert objective.number_of_base_edges == int(base.number_of_edges)
@@ -14,13 +14,13 @@ def test_objective_no_lifted_edges_matches_base(chain_with_lifted):
     assert objective.energy() == pytest.approx(float(base_costs.sum()))
 
     # Same labeling under a multicut objective should yield the same energy.
-    mc_objective = bic.graph.MulticutObjective(base, base_costs)
+    mc_objective = bic.graph.multicut.MulticutObjective(base, base_costs)
     assert objective.energy() == pytest.approx(mc_objective.energy())
 
 
 def test_objective_with_lifted_edges(chain_with_lifted):
     base, base_costs, lifted_uvs, lifted_costs = chain_with_lifted
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         base, base_costs, lifted_uvs=lifted_uvs, lifted_costs=lifted_costs
     )
     assert objective.number_of_base_edges == 3
@@ -40,7 +40,7 @@ def test_objective_with_lifted_edges(chain_with_lifted):
 
 def test_objective_lifted_edge_over_base_accumulates(triangle_with_lifted):
     base, base_costs, lifted_uvs, lifted_costs = triangle_with_lifted
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         base, base_costs, lifted_uvs=lifted_uvs, lifted_costs=lifted_costs
     )
 
@@ -53,7 +53,7 @@ def test_objective_lifted_edge_over_base_accumulates(triangle_with_lifted):
 
 def test_objective_lifted_edge_over_base_overwrite(triangle_with_lifted):
     base, base_costs, lifted_uvs, lifted_costs = triangle_with_lifted
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         base,
         base_costs,
         lifted_uvs=lifted_uvs,
@@ -68,7 +68,7 @@ def test_objective_lifted_edge_over_base_overwrite(triangle_with_lifted):
 
 def test_objective_set_cost_inserts_and_accumulates(chain_with_lifted):
     base, base_costs, _, _ = chain_with_lifted
-    objective = bic.graph.LiftedMulticutObjective(base, base_costs)
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(base, base_costs)
 
     edge, is_new = objective.set_cost(0, 3, -5.0)
     assert is_new
@@ -88,7 +88,7 @@ def test_objective_set_cost_inserts_and_accumulates(chain_with_lifted):
 def test_objective_bfs_distance_inserts_within_k_hops(small_chain_bfs_problem):
     base, base_costs = small_chain_bfs_problem
 
-    objective = bic.graph.LiftedMulticutObjective(base, base_costs, bfs_distance=2)
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(base, base_costs, bfs_distance=2)
     # 5-node chain at distance 2: lifted edges are (0,2), (1,3), (2,4) — 3 edges.
     assert objective.number_of_lifted_edges == 3
     lifted_uvs = objective.lifted_graph.uv_ids()[objective.number_of_base_edges:]
@@ -96,7 +96,7 @@ def test_objective_bfs_distance_inserts_within_k_hops(small_chain_bfs_problem):
     assert pairs == {(0, 2), (1, 3), (2, 4)}
 
     # Default lifted weights are zero so the energy equals the multicut energy.
-    expected = bic.graph.MulticutObjective(base, base_costs).energy()
+    expected = bic.graph.multicut.MulticutObjective(base, base_costs).energy()
     assert objective.energy() == pytest.approx(expected)
 
 
@@ -104,7 +104,7 @@ def test_objective_bfs_distance_combines_with_explicit_lifted_costs(small_chain_
     base, base_costs = small_chain_bfs_problem
     lifted_uvs = np.array([[0, 4]], dtype=np.uint64)
     lifted_costs = np.array([-3.0], dtype=np.float64)
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         base,
         base_costs,
         bfs_distance=2,
@@ -118,7 +118,7 @@ def test_objective_bfs_distance_combines_with_explicit_lifted_costs(small_chain_
 
 def test_objective_labels_and_reset(chain_with_lifted):
     base, base_costs, lifted_uvs, lifted_costs = chain_with_lifted
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         base, base_costs, lifted_uvs=lifted_uvs, lifted_costs=lifted_costs
     )
     objective.labels = [0, 0, 1, 1]
@@ -134,19 +134,19 @@ def test_objective_validation_errors():
     base_costs = np.array([1.0, 2.0], dtype=np.float64)
 
     with pytest.raises(ValueError, match="edge_costs"):
-        bic.graph.LiftedMulticutObjective(base, np.array([1.0], dtype=np.float64))
+        bic.graph.lifted_multicut.LiftedMulticutObjective(base, np.array([1.0], dtype=np.float64))
 
-    obj = bic.graph.LiftedMulticutObjective(base, base_costs)
+    obj = bic.graph.lifted_multicut.LiftedMulticutObjective(base, base_costs)
     with pytest.raises(ValueError, match="labels"):
         obj.labels = [0, 1]
 
     with pytest.raises(ValueError, match="lifted_uvs and lifted_costs"):
-        bic.graph.LiftedMulticutObjective(
+        bic.graph.lifted_multicut.LiftedMulticutObjective(
             base, base_costs, lifted_uvs=np.array([[0, 2]], dtype=np.uint64)
         )
 
     with pytest.raises(ValueError, match="same length"):
-        bic.graph.LiftedMulticutObjective(
+        bic.graph.lifted_multicut.LiftedMulticutObjective(
             base,
             base_costs,
             lifted_uvs=np.array([[0, 2]], dtype=np.uint64),
@@ -154,4 +154,4 @@ def test_objective_validation_errors():
         )
 
     with pytest.raises(ValueError, match="bfs_distance"):
-        bic.graph.LiftedMulticutObjective(base, base_costs, bfs_distance=0)
+        bic.graph.lifted_multicut.LiftedMulticutObjective(base, base_costs, bfs_distance=0)
diff --git a/tests/graph/multicut/test_chain_decomposer.py b/tests/graph/multicut/test_chain_decomposer.py
index a7b428d..d05b025 100644
--- a/tests/graph/multicut/test_chain_decomposer.py
+++ b/tests/graph/multicut/test_chain_decomposer.py
@@ -8,11 +8,11 @@
 
 def test_chained_multicut_solvers(chain_problem):
     graph, costs = chain_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
-    solver = bic.graph.ChainedMulticutSolvers(
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
+    solver = bic.graph.multicut.ChainedMulticutSolvers(
         [
-            bic.graph.GreedyAdditiveMulticut(),
-            bic.graph.KernighanLinMulticut(number_of_outer_iterations=5),
+            bic.graph.multicut.GreedyAdditiveMulticut(),
+            bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=5),
         ]
     )
 
@@ -24,13 +24,13 @@ def test_chained_multicut_solvers(chain_problem):
 
 def test_chained_solver_rejects_empty_chain():
     with pytest.raises(ValueError, match="at least one"):
-        bic.graph.ChainedMulticutSolvers([])
+        bic.graph.multicut.ChainedMulticutSolvers([])
 
 
 def test_multicut_decomposer_solves_positive_components():
     graph = bic.graph.UndirectedGraph.from_edges(4, [[0, 1], [1, 2], [2, 3]])
-    objective = bic.graph.MulticutObjective(graph, [1.0, -5.0, 1.0])
-    solver = bic.graph.MulticutDecomposer(bic.graph.GreedyAdditiveMulticut())
+    objective = bic.graph.multicut.MulticutObjective(graph, [1.0, -5.0, 1.0])
+    solver = bic.graph.multicut.MulticutDecomposer(bic.graph.multicut.GreedyAdditiveMulticut())
 
     labels = solver.optimize(objective)
 
@@ -39,15 +39,15 @@ def test_multicut_decomposer_solves_positive_components():
 
 
 def test_multicut_decomposer_uses_fallthrough_solver_for_single_component():
-    class SingletonSolver(bic.graph.MulticutSolver):
+    class SingletonSolver(bic.graph.multicut.MulticutSolver):
         def optimize(self, objective):
             objective.labels = np.arange(objective.graph.number_of_nodes, dtype=np.uint64)
             return objective.labels
 
     graph = bic.graph.UndirectedGraph.from_edges(2, [[0, 1]])
-    objective = bic.graph.MulticutObjective(graph, [1.0])
-    solver = bic.graph.MulticutDecomposer(
-        bic.graph.GreedyAdditiveMulticut(),
+    objective = bic.graph.multicut.MulticutObjective(graph, [1.0])
+    solver = bic.graph.multicut.MulticutDecomposer(
+        bic.graph.multicut.GreedyAdditiveMulticut(),
         fallthrough_solver=SingletonSolver(),
     )
 
@@ -58,12 +58,12 @@ def optimize(self, objective):
 
 def test_decomposer_on_external_toy_problem(external_toy_problem):
     graph, costs, _ = external_toy_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
-    solver = bic.graph.MulticutDecomposer(
-        bic.graph.ChainedMulticutSolvers(
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
+    solver = bic.graph.multicut.MulticutDecomposer(
+        bic.graph.multicut.ChainedMulticutSolvers(
             [
-                bic.graph.GreedyAdditiveMulticut(),
-                bic.graph.KernighanLinMulticut(number_of_outer_iterations=10),
+                bic.graph.multicut.GreedyAdditiveMulticut(),
+                bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=10),
             ]
         )
     )
@@ -75,8 +75,8 @@ def test_decomposer_on_external_toy_problem(external_toy_problem):
 
 def test_decomposer_energy_bound_on_grid_problem(grid_problem):
     graph, costs = grid_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
-    solver = bic.graph.MulticutDecomposer(bic.graph.GreedyAdditiveMulticut())
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
+    solver = bic.graph.multicut.MulticutDecomposer(bic.graph.multicut.GreedyAdditiveMulticut())
 
     labels = solver.optimize(objective)
 
diff --git a/tests/graph/multicut/test_external_problem.py b/tests/graph/multicut/test_external_problem.py
index 78a9f7f..8e7beb4 100644
--- a/tests/graph/multicut/test_external_problem.py
+++ b/tests/graph/multicut/test_external_problem.py
@@ -5,25 +5,25 @@
 
 def test_solvers_on_full_external_problem(tmp_path):
     try:
-        graph, costs = bic.graph.load_external_multicut_problem(timeout=30)
+        graph, costs = bic.graph.multicut.load_external_multicut_problem(timeout=30)
     except (FileNotFoundError, RuntimeError) as error:
         pytest.skip(str(error))
 
     solvers = [
-        bic.graph.GreedyAdditiveMulticut(),
-        bic.graph.GreedyFixationMulticut(),
-        bic.graph.KernighanLinMulticut(number_of_outer_iterations=5),
-        bic.graph.ChainedMulticutSolvers(
+        bic.graph.multicut.GreedyAdditiveMulticut(),
+        bic.graph.multicut.GreedyFixationMulticut(),
+        bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=5),
+        bic.graph.multicut.ChainedMulticutSolvers(
             [
-                bic.graph.GreedyAdditiveMulticut(),
-                bic.graph.KernighanLinMulticut(number_of_outer_iterations=5),
+                bic.graph.multicut.GreedyAdditiveMulticut(),
+                bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=5),
             ]
         ),
-        bic.graph.MulticutDecomposer(bic.graph.GreedyAdditiveMulticut()),
+        bic.graph.multicut.MulticutDecomposer(bic.graph.multicut.GreedyAdditiveMulticut()),
     ]
 
     for solver in solvers:
-        objective = bic.graph.MulticutObjective(graph, costs)
+        objective = bic.graph.multicut.MulticutObjective(graph, costs)
         labels = solver.optimize(objective)
         assert labels.shape == (graph.number_of_nodes,)
         assert objective.energy(labels) <= -76900.0
diff --git a/tests/graph/multicut/test_fusion_move.py b/tests/graph/multicut/test_fusion_move.py
index c586089..cc45250 100644
--- a/tests/graph/multicut/test_fusion_move.py
+++ b/tests/graph/multicut/test_fusion_move.py
@@ -8,13 +8,13 @@
 
 def test_fuses_a_two_cluster_problem(external_toy_problem):
     graph, costs, _ = external_toy_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
-    baseline = bic.graph.GreedyAdditiveMulticut().optimize(objective)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
+    baseline = bic.graph.multicut.GreedyAdditiveMulticut().optimize(objective)
     baseline_energy = objective.energy(baseline)
 
     objective.reset_labels()
-    solver = bic.graph.FusionMoveMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=1),
+    solver = bic.graph.multicut.FusionMoveMulticut(
+        proposal_generator=bic.graph.multicut.WatershedProposalGenerator(seed=1),
         number_of_iterations=10,
     )
     labels = solver.optimize(objective)
@@ -26,14 +26,14 @@ def test_fuses_a_two_cluster_problem(external_toy_problem):
 
 def test_safety_net_never_regresses(grid_problem):
     graph, costs = grid_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
     baseline_energy = objective.energy(
-        bic.graph.GreedyAdditiveMulticut().optimize(objective)
+        bic.graph.multicut.GreedyAdditiveMulticut().optimize(objective)
     )
 
     objective.reset_labels()
-    solver = bic.graph.FusionMoveMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=7),
+    solver = bic.graph.multicut.FusionMoveMulticut(
+        proposal_generator=bic.graph.multicut.WatershedProposalGenerator(seed=7),
         number_of_iterations=8,
     )
     labels = solver.optimize(objective)
@@ -45,10 +45,10 @@ def test_warm_starts_from_singleton(chain_problem):
     graph, costs = chain_problem
     # The default objective labels are the singleton (arange) labeling; the
     # driver should warm-start with greedy-additive and reach the optimum.
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
 
-    solver = bic.graph.FusionMoveMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=0),
+    solver = bic.graph.multicut.FusionMoveMulticut(
+        proposal_generator=bic.graph.multicut.WatershedProposalGenerator(seed=0),
         number_of_iterations=3,
     )
     labels = solver.optimize(objective)
@@ -60,12 +60,12 @@ def test_warm_starts_from_singleton(chain_problem):
 
 def test_greedy_additive_proposal_generator_runs(grid_problem):
     graph, costs = grid_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
-    baseline = objective.energy(bic.graph.GreedyAdditiveMulticut().optimize(objective))
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
+    baseline = objective.energy(bic.graph.multicut.GreedyAdditiveMulticut().optimize(objective))
 
     objective.reset_labels()
-    solver = bic.graph.FusionMoveMulticut(
-        proposal_generator=bic.graph.GreedyAdditiveProposalGenerator(seed=3, sigma=1.0),
+    solver = bic.graph.multicut.FusionMoveMulticut(
+        proposal_generator=bic.graph.multicut.GreedyAdditiveProposalGenerator(seed=3, sigma=1.0),
         number_of_iterations=5,
     )
     labels = solver.optimize(objective)
@@ -75,17 +75,17 @@ def test_greedy_additive_proposal_generator_runs(grid_problem):
 @pytest.mark.parametrize(
     "sub_solver",
     [
-        bic.graph.GreedyAdditiveMulticut(),
-        bic.graph.GreedyFixationMulticut(),
-        bic.graph.KernighanLinMulticut(number_of_outer_iterations=3),
+        bic.graph.multicut.GreedyAdditiveMulticut(),
+        bic.graph.multicut.GreedyFixationMulticut(),
+        bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=3),
     ],
 )
 def test_sub_solver_pluggability(grid_problem, sub_solver):
     graph, costs = grid_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
 
-    solver = bic.graph.FusionMoveMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=2),
+    solver = bic.graph.multicut.FusionMoveMulticut(
+        proposal_generator=bic.graph.multicut.WatershedProposalGenerator(seed=2),
         sub_solver=sub_solver,
         number_of_iterations=4,
     )
@@ -100,14 +100,14 @@ def test_stops_after_no_improvement(frustrated_triangle):
     # threshold: the loop must terminate quickly without scanning all 1000
     # iterations (the test would time out otherwise).
     graph, costs = frustrated_triangle
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
     baseline = objective.energy(
-        bic.graph.GreedyAdditiveMulticut().optimize(objective)
+        bic.graph.multicut.GreedyAdditiveMulticut().optimize(objective)
     )
 
     objective.reset_labels()
-    solver = bic.graph.FusionMoveMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=0),
+    solver = bic.graph.multicut.FusionMoveMulticut(
+        proposal_generator=bic.graph.multicut.WatershedProposalGenerator(seed=0),
         number_of_iterations=1000,
         stop_if_no_improvement=1,
     )
@@ -118,18 +118,18 @@ def test_stops_after_no_improvement(frustrated_triangle):
 
 def test_chains_with_kernighan_lin(external_toy_problem):
     graph, costs, _ = external_toy_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
     baseline = objective.energy(
-        bic.graph.GreedyAdditiveMulticut().optimize(objective)
+        bic.graph.multicut.GreedyAdditiveMulticut().optimize(objective)
     )
 
     objective.reset_labels()
-    solver = bic.graph.ChainedMulticutSolvers([
-        bic.graph.FusionMoveMulticut(
-            proposal_generator=bic.graph.WatershedProposalGenerator(seed=11),
+    solver = bic.graph.multicut.ChainedMulticutSolvers([
+        bic.graph.multicut.FusionMoveMulticut(
+            proposal_generator=bic.graph.multicut.WatershedProposalGenerator(seed=11),
             number_of_iterations=5,
         ),
-        bic.graph.KernighanLinMulticut(number_of_outer_iterations=3),
+        bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=3),
     ])
     labels = solver.optimize(objective)
     assert objective.energy(labels) <= baseline + 1e-9
@@ -137,43 +137,43 @@ def test_chains_with_kernighan_lin(external_toy_problem):
 
 def test_rejects_non_proposal_generator():
     with pytest.raises(TypeError, match="proposal_generator"):
-        bic.graph.FusionMoveMulticut(proposal_generator=object())
+        bic.graph.multicut.FusionMoveMulticut(proposal_generator=object())
 
 
 def test_rejects_zero_thread_count():
     with pytest.raises(ValueError, match="number_of_threads"):
-        bic.graph.FusionMoveMulticut(
-            proposal_generator=bic.graph.WatershedProposalGenerator(),
+        bic.graph.multicut.FusionMoveMulticut(
+            proposal_generator=bic.graph.multicut.WatershedProposalGenerator(),
             number_of_threads=0,
         )
 
 
 def test_rejects_zero_parallel_proposals():
     with pytest.raises(ValueError, match="number_of_parallel_proposals"):
-        bic.graph.FusionMoveMulticut(
-            proposal_generator=bic.graph.WatershedProposalGenerator(),
+        bic.graph.multicut.FusionMoveMulticut(
+            proposal_generator=bic.graph.multicut.WatershedProposalGenerator(),
             number_of_parallel_proposals=0,
         )
 
 
 def test_rejects_invalid_iteration_settings():
     with pytest.raises(ValueError, match="number_of_iterations"):
-        bic.graph.FusionMoveMulticut(
-            proposal_generator=bic.graph.WatershedProposalGenerator(),
+        bic.graph.multicut.FusionMoveMulticut(
+            proposal_generator=bic.graph.multicut.WatershedProposalGenerator(),
             number_of_iterations=-1,
         )
     with pytest.raises(ValueError, match="stop_if_no_improvement"):
-        bic.graph.FusionMoveMulticut(
-            proposal_generator=bic.graph.WatershedProposalGenerator(),
+        bic.graph.multicut.FusionMoveMulticut(
+            proposal_generator=bic.graph.multicut.WatershedProposalGenerator(),
             stop_if_no_improvement=0,
         )
 
 
 def test_runs_on_empty_graph():
     graph = bic.graph.UndirectedGraph(0)
-    objective = bic.graph.MulticutObjective(graph, np.zeros(0))
-    solver = bic.graph.FusionMoveMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(),
+    objective = bic.graph.multicut.MulticutObjective(graph, np.zeros(0))
+    solver = bic.graph.multicut.FusionMoveMulticut(
+        proposal_generator=bic.graph.multicut.WatershedProposalGenerator(),
     )
     labels = solver.optimize(objective)
     assert labels.shape == (0,)
@@ -183,13 +183,13 @@ def test_parallel_threads_match_single_threaded_safety_net(grid_problem):
     # With T>1 the loop should never regress past the greedy-additive baseline
     # (best-of safety net is enforced in C++).
     graph, costs = grid_problem
-    baseline = bic.graph.MulticutObjective(graph, costs).energy(
-        bic.graph.GreedyAdditiveMulticut().optimize(bic.graph.MulticutObjective(graph, costs))
+    baseline = bic.graph.multicut.MulticutObjective(graph, costs).energy(
+        bic.graph.multicut.GreedyAdditiveMulticut().optimize(bic.graph.multicut.MulticutObjective(graph, costs))
     )
 
-    objective = bic.graph.MulticutObjective(graph, costs)
-    solver = bic.graph.FusionMoveMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=11),
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
+    solver = bic.graph.multicut.FusionMoveMulticut(
+        proposal_generator=bic.graph.multicut.WatershedProposalGenerator(seed=11),
         number_of_threads=4,
         number_of_iterations=5,
     )
@@ -201,13 +201,13 @@ def test_multi_proposal_runs(grid_problem):
     # number_of_parallel_proposals > 2 exercises the stage-2 joint fuse path
     # (no determinism guarantee against T=1 here — the algorithm shape changes).
     graph, costs = grid_problem
-    baseline = bic.graph.MulticutObjective(graph, costs).energy(
-        bic.graph.GreedyAdditiveMulticut().optimize(bic.graph.MulticutObjective(graph, costs))
+    baseline = bic.graph.multicut.MulticutObjective(graph, costs).energy(
+        bic.graph.multicut.GreedyAdditiveMulticut().optimize(bic.graph.multicut.MulticutObjective(graph, costs))
     )
 
-    objective = bic.graph.MulticutObjective(graph, costs)
-    solver = bic.graph.FusionMoveMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=3),
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
+    solver = bic.graph.multicut.FusionMoveMulticut(
+        proposal_generator=bic.graph.multicut.WatershedProposalGenerator(seed=3),
         number_of_threads=2,
         number_of_parallel_proposals=4,
         number_of_iterations=5,
@@ -221,9 +221,9 @@ def test_parallel_is_deterministic_given_settings(grid_problem):
     graph, costs = grid_problem
 
     def run():
-        objective = bic.graph.MulticutObjective(graph, costs)
-        solver = bic.graph.FusionMoveMulticut(
-            proposal_generator=bic.graph.WatershedProposalGenerator(seed=7),
+        objective = bic.graph.multicut.MulticutObjective(graph, costs)
+        solver = bic.graph.multicut.FusionMoveMulticut(
+            proposal_generator=bic.graph.multicut.WatershedProposalGenerator(seed=7),
             number_of_threads=4,
             number_of_iterations=5,
         )
@@ -233,9 +233,9 @@ def run():
 
 
 def test_default_parallel_proposals_tracks_threads():
-    pgen = bic.graph.WatershedProposalGenerator()
-    one_thread = bic.graph.FusionMoveMulticut(proposal_generator=pgen)
-    four_threads = bic.graph.FusionMoveMulticut(
+    pgen = bic.graph.multicut.WatershedProposalGenerator()
+    one_thread = bic.graph.multicut.FusionMoveMulticut(proposal_generator=pgen)
+    four_threads = bic.graph.multicut.FusionMoveMulticut(
         proposal_generator=pgen, number_of_threads=4
     )
     assert one_thread.number_of_parallel_proposals == 2
@@ -243,8 +243,8 @@ def test_default_parallel_proposals_tracks_threads():
 
 
 def test_explicit_parallel_proposals_overrides_default():
-    solver = bic.graph.FusionMoveMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(),
+    solver = bic.graph.multicut.FusionMoveMulticut(
+        proposal_generator=bic.graph.multicut.WatershedProposalGenerator(),
         number_of_threads=4,
         number_of_parallel_proposals=8,
     )
@@ -257,9 +257,9 @@ def test_runs_on_graph_without_negative_edges(chain_problem):
     # warm-started greedy-additive result.
     graph, _ = chain_problem
     costs = np.array([1.0, 2.0, 3.0], dtype=np.float64)
-    objective = bic.graph.MulticutObjective(graph, costs)
-    solver = bic.graph.FusionMoveMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=0),
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
+    solver = bic.graph.multicut.FusionMoveMulticut(
+        proposal_generator=bic.graph.multicut.WatershedProposalGenerator(seed=0),
         number_of_iterations=3,
     )
     labels = solver.optimize(objective)
diff --git a/tests/graph/multicut/test_greedy_additive.py b/tests/graph/multicut/test_greedy_additive.py
index 610e3d0..6185365 100644
--- a/tests/graph/multicut/test_greedy_additive.py
+++ b/tests/graph/multicut/test_greedy_additive.py
@@ -8,9 +8,9 @@
 
 def test_greedy_additive_merges_positive_components(chain_problem):
     graph, costs = chain_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
 
-    labels = bic.graph.GreedyAdditiveMulticut().optimize(objective)
+    labels = bic.graph.multicut.GreedyAdditiveMulticut().optimize(objective)
 
     same_partition(labels, [0, 0, 0, 1])
     np.testing.assert_array_equal(objective.labels, labels)
@@ -19,26 +19,26 @@ def test_greedy_additive_merges_positive_components(chain_problem):
 
 def test_greedy_additive_respects_weight_stop(chain_problem):
     graph, costs = chain_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
 
-    labels = bic.graph.GreedyAdditiveMulticut(weight_stop=1.5).optimize(objective)
+    labels = bic.graph.multicut.GreedyAdditiveMulticut(weight_stop=1.5).optimize(objective)
 
     same_partition(labels, [0, 1, 1, 2])
 
 
 def test_greedy_additive_on_external_toy_problem(external_toy_problem):
     graph, costs, _ = external_toy_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
 
-    labels = bic.graph.GreedyAdditiveMulticut().optimize(objective)
+    labels = bic.graph.multicut.GreedyAdditiveMulticut().optimize(objective)
 
     assert objective.energy(labels) <= -35.0
 
 
 def test_greedy_additive_energy_bound_on_grid_problem(grid_problem):
     graph, costs = grid_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
 
-    labels = bic.graph.GreedyAdditiveMulticut().optimize(objective)
+    labels = bic.graph.multicut.GreedyAdditiveMulticut().optimize(objective)
 
     assert objective.energy(labels) <= -20.0
diff --git a/tests/graph/multicut/test_greedy_fixation.py b/tests/graph/multicut/test_greedy_fixation.py
index c1f65a5..043cc56 100644
--- a/tests/graph/multicut/test_greedy_fixation.py
+++ b/tests/graph/multicut/test_greedy_fixation.py
@@ -3,9 +3,9 @@
 
 def test_greedy_fixation_respects_negative_constraints(frustrated_triangle):
     graph, costs = frustrated_triangle
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
 
-    labels = bic.graph.GreedyFixationMulticut().optimize(objective)
+    labels = bic.graph.multicut.GreedyFixationMulticut().optimize(objective)
 
     assert labels[0] != labels[2]
     assert objective.energy(labels) <= -3.0
@@ -13,26 +13,26 @@ def test_greedy_fixation_respects_negative_constraints(frustrated_triangle):
 
 def test_greedy_fixation_node_num_stop(chain_problem):
     graph, costs = chain_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
 
-    labels = bic.graph.GreedyFixationMulticut(node_num_stop=3).optimize(objective)
+    labels = bic.graph.multicut.GreedyFixationMulticut(node_num_stop=3).optimize(objective)
 
     assert len(set(labels.tolist())) == 3
 
 
 def test_greedy_fixation_on_external_toy_problem(external_toy_problem):
     graph, costs, _ = external_toy_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
 
-    labels = bic.graph.GreedyFixationMulticut().optimize(objective)
+    labels = bic.graph.multicut.GreedyFixationMulticut().optimize(objective)
 
     assert objective.energy(labels) <= -35.0
 
 
 def test_greedy_fixation_energy_bound_on_grid_problem(grid_problem):
     graph, costs = grid_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
 
-    labels = bic.graph.GreedyFixationMulticut().optimize(objective)
+    labels = bic.graph.multicut.GreedyFixationMulticut().optimize(objective)
 
     assert objective.energy(labels) <= -20.0
diff --git a/tests/graph/multicut/test_kernighan_lin.py b/tests/graph/multicut/test_kernighan_lin.py
index b1cd562..82479fe 100644
--- a/tests/graph/multicut/test_kernighan_lin.py
+++ b/tests/graph/multicut/test_kernighan_lin.py
@@ -6,10 +6,10 @@
 
 def test_kernighan_lin_improves_or_preserves_energy(frustrated_triangle):
     graph, costs = frustrated_triangle
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
     before = objective.energy()
 
-    labels = bic.graph.KernighanLinMulticut(number_of_outer_iterations=10).optimize(objective)
+    labels = bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=10).optimize(objective)
 
     assert objective.energy(labels) <= before
     assert labels.dtype == np.uint64
@@ -17,28 +17,28 @@ def test_kernighan_lin_improves_or_preserves_energy(frustrated_triangle):
 
 def test_kernighan_lin_finds_frustrated_triangle_optimum(frustrated_triangle):
     graph, costs = frustrated_triangle
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
 
-    labels = bic.graph.KernighanLinMulticut(number_of_outer_iterations=10).optimize(objective)
+    labels = bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=10).optimize(objective)
 
     assert objective.energy(labels) == pytest.approx(-3.0)
 
 
 def test_kernighan_lin_accepts_initial_labels(chain_problem):
     graph, costs = chain_problem
-    objective = bic.graph.MulticutObjective(graph, costs, initial_labels=[0, 0, 0, 1])
+    objective = bic.graph.multicut.MulticutObjective(graph, costs, initial_labels=[0, 0, 0, 1])
     before = objective.energy()
 
-    labels = bic.graph.KernighanLinMulticut(number_of_outer_iterations=5).optimize(objective)
+    labels = bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=5).optimize(objective)
 
     assert objective.energy(labels) <= before
 
 
 def test_kernighan_lin_on_external_toy_problem(external_toy_problem):
     graph, costs, _ = external_toy_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
 
-    labels = bic.graph.KernighanLinMulticut(number_of_outer_iterations=20).optimize(objective)
+    labels = bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=20).optimize(objective)
 
     # The move-chain implementation reaches the optimum on this instance.
     assert objective.energy(labels) == pytest.approx(-35.0)
@@ -46,9 +46,9 @@ def test_kernighan_lin_on_external_toy_problem(external_toy_problem):
 
 def test_kernighan_lin_energy_bound_on_grid_problem(grid_problem):
     graph, costs = grid_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
 
-    labels = bic.graph.KernighanLinMulticut(number_of_outer_iterations=10).optimize(objective)
+    labels = bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=10).optimize(objective)
 
     # Regression guard pinned to the move-chain implementation's converged energy.
     assert objective.energy(labels) <= -34.0
diff --git a/tests/graph/multicut/test_objective.py b/tests/graph/multicut/test_objective.py
index fde684d..a746906 100644
--- a/tests/graph/multicut/test_objective.py
+++ b/tests/graph/multicut/test_objective.py
@@ -6,21 +6,21 @@
 
 def test_multicut_objective_energy_and_validation(frustrated_triangle):
     graph, costs = frustrated_triangle
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
 
     assert objective.energy([0, 0, 1]) == pytest.approx(-3.0)
     objective.labels = [0, 0, 1]
     assert objective.energy() == pytest.approx(-3.0)
 
     with pytest.raises(ValueError, match="edge_costs"):
-        bic.graph.MulticutObjective(graph, [1.0, 2.0])
+        bic.graph.multicut.MulticutObjective(graph, [1.0, 2.0])
     with pytest.raises(ValueError, match="labels"):
         objective.labels = [0, 1]
 
 
 def test_multicut_objective_copies_graph_topology(chain_problem):
     graph, costs = chain_problem
-    objective = bic.graph.MulticutObjective(graph, costs)
+    objective = bic.graph.multicut.MulticutObjective(graph, costs)
 
     graph.insert_edge(0, 3)
 
@@ -33,7 +33,7 @@ def test_multicut_objective_copies_graph_topology(chain_problem):
 
 def test_multicut_objective_reset_labels(chain_problem):
     graph, costs = chain_problem
-    objective = bic.graph.MulticutObjective(graph, costs, initial_labels=[0, 0, 1, 1])
+    objective = bic.graph.multicut.MulticutObjective(graph, costs, initial_labels=[0, 0, 1, 1])
 
     objective.reset_labels()
 
diff --git a/tests/graph/multicut/test_proposal_generators.py b/tests/graph/multicut/test_proposal_generators.py
index 103c132..ac21c18 100644
--- a/tests/graph/multicut/test_proposal_generators.py
+++ b/tests/graph/multicut/test_proposal_generators.py
@@ -13,15 +13,15 @@ def test_watershed_proposal_generator_is_deterministic_given_seed():
     )
     costs = np.array([-1.0, 1.0, -1.0, 1.0, -1.0, 0.5], dtype=np.float64)
 
-    objective_a = bic.graph.MulticutObjective(graph, costs)
-    objective_b = bic.graph.MulticutObjective(graph, costs)
+    objective_a = bic.graph.multicut.MulticutObjective(graph, costs)
+    objective_b = bic.graph.multicut.MulticutObjective(graph, costs)
 
-    solver_a = bic.graph.FusionMoveMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=42),
+    solver_a = bic.graph.multicut.FusionMoveMulticut(
+        proposal_generator=bic.graph.multicut.WatershedProposalGenerator(seed=42),
         number_of_iterations=3,
     )
-    solver_b = bic.graph.FusionMoveMulticut(
-        proposal_generator=bic.graph.WatershedProposalGenerator(seed=42),
+    solver_b = bic.graph.multicut.FusionMoveMulticut(
+        proposal_generator=bic.graph.multicut.WatershedProposalGenerator(seed=42),
         number_of_iterations=3,
     )
 
@@ -32,15 +32,15 @@ def test_watershed_proposal_generator_is_deterministic_given_seed():
 
 def test_greedy_additive_proposal_generator_is_deterministic_given_seed(grid_problem):
     graph, costs = grid_problem
-    objective_a = bic.graph.MulticutObjective(graph, costs)
-    objective_b = bic.graph.MulticutObjective(graph, costs)
+    objective_a = bic.graph.multicut.MulticutObjective(graph, costs)
+    objective_b = bic.graph.multicut.MulticutObjective(graph, costs)
 
-    solver_a = bic.graph.FusionMoveMulticut(
-        proposal_generator=bic.graph.GreedyAdditiveProposalGenerator(seed=5),
+    solver_a = bic.graph.multicut.FusionMoveMulticut(
+        proposal_generator=bic.graph.multicut.GreedyAdditiveProposalGenerator(seed=5),
         number_of_iterations=3,
     )
-    solver_b = bic.graph.FusionMoveMulticut(
-        proposal_generator=bic.graph.GreedyAdditiveProposalGenerator(seed=5),
+    solver_b = bic.graph.multicut.FusionMoveMulticut(
+        proposal_generator=bic.graph.multicut.GreedyAdditiveProposalGenerator(seed=5),
         number_of_iterations=3,
     )
     np.testing.assert_array_equal(
@@ -60,8 +60,8 @@ def test_cpp_proposal_generator_validates_costs_length():
 
 def test_proposal_generator_isinstance_check():
     assert isinstance(
-        bic.graph.WatershedProposalGenerator(), bic.graph.ProposalGenerator
+        bic.graph.multicut.WatershedProposalGenerator(), bic.graph.multicut.ProposalGenerator
     )
     assert isinstance(
-        bic.graph.GreedyAdditiveProposalGenerator(), bic.graph.ProposalGenerator
+        bic.graph.multicut.GreedyAdditiveProposalGenerator(), bic.graph.multicut.ProposalGenerator
     )
diff --git a/tests/graph/test_grid_affinity_multicut_integration.py b/tests/graph/test_grid_affinity_multicut_integration.py
index 59962df..d944dc6 100644
--- a/tests/graph/test_grid_affinity_multicut_integration.py
+++ b/tests/graph/test_grid_affinity_multicut_integration.py
@@ -42,15 +42,15 @@ def test_grid_local_affinities_drive_multicut_partition():
     offsets = [(1, 0), (0, 1)]
     affinities = _affinities_from_partition(partition, offsets)
 
-    weights, valid_edges = bic.graph.grid_affinity_features(graph, affinities, offsets)
+    weights, valid_edges = bic.graph.features.grid_affinity_features(graph, affinities, offsets)
     np.testing.assert_array_equal(valid_edges, np.ones(graph.number_of_edges, dtype=bool))
     edge_costs = weights - 0.5
 
-    objective = bic.graph.MulticutObjective(graph, edge_costs)
-    labels = bic.graph.ChainedMulticutSolvers(
+    objective = bic.graph.multicut.MulticutObjective(graph, edge_costs)
+    labels = bic.graph.multicut.ChainedMulticutSolvers(
         [
-            bic.graph.GreedyAdditiveMulticut(),
-            bic.graph.KernighanLinMulticut(number_of_outer_iterations=5),
+            bic.graph.multicut.GreedyAdditiveMulticut(),
+            bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=5),
         ]
     ).optimize(objective)
 
@@ -66,22 +66,22 @@ def test_grid_long_range_affinities_drive_lifted_multicut_partition():
     affinities = _affinities_from_partition(partition, offsets)
 
     local_weights, valid_edges, lifted_uvs, lifted_weights, lifted_offset_ids = (
-        bic.graph.grid_affinity_features_with_lifted(graph, affinities, offsets)
+        bic.graph.features.grid_affinity_features_with_lifted(graph, affinities, offsets)
     )
     np.testing.assert_array_equal(valid_edges, np.ones(graph.number_of_edges, dtype=bool))
     assert lifted_uvs.shape[0] > 0
     assert set(np.unique(lifted_offset_ids).tolist()) == {2, 3}
 
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         graph,
         local_weights - 0.5,
         lifted_uvs=lifted_uvs,
         lifted_costs=lifted_weights - 0.5,
     )
-    labels = bic.graph.LiftedChainedSolvers(
+    labels = bic.graph.lifted_multicut.LiftedChainedSolvers(
         [
-            bic.graph.LiftedGreedyAdditiveMulticut(),
-            bic.graph.LiftedKernighanLinMulticut(number_of_outer_iterations=5),
+            bic.graph.lifted_multicut.LiftedGreedyAdditiveMulticut(),
+            bic.graph.lifted_multicut.LiftedKernighanLinMulticut(number_of_outer_iterations=5),
         ]
     ).optimize(objective)
 
diff --git a/tests/graph/test_grid_features.py b/tests/graph/test_grid_features.py
index 285db49..e6fd9b3 100644
--- a/tests/graph/test_grid_features.py
+++ b/tests/graph/test_grid_features.py
@@ -8,7 +8,7 @@ def test_grid_boundary_features_2d_align_to_grid_edges():
     graph = bic.graph.GridGraph2D((2, 3))
     boundary_map = np.array([[0.0, 2.0, 4.0], [10.0, 12.0, 14.0]])
 
-    weights = bic.graph.grid_boundary_features(graph, boundary_map)
+    weights = bic.graph.features.grid_boundary_features(graph, boundary_map)
 
     assert weights.dtype == np.float64
     np.testing.assert_allclose(
@@ -23,7 +23,7 @@ def test_grid_affinity_features_2d_local_offsets():
     affinities[0] = np.array([[10.0, 11.0, 12.0], [0.0, 0.0, 0.0]])
     affinities[1] = np.array([[20.0, 21.0, 0.0], [23.0, 24.0, 0.0]])
 
-    weights, valid = bic.graph.grid_affinity_features(
+    weights, valid = bic.graph.features.grid_affinity_features(
         graph, affinities, offsets=[(1, 0), (0, 1)]
     )
 
@@ -35,7 +35,7 @@ def test_grid_affinity_features_partial_local_offsets():
     graph = bic.graph.GridGraph2D((2, 3))
     affinities = np.arange(6, dtype=np.float64).reshape(1, 2, 3)
 
-    weights, valid = bic.graph.grid_affinity_features(
+    weights, valid = bic.graph.features.grid_affinity_features(
         graph, affinities, offsets=[(1, 0)]
     )
 
@@ -50,7 +50,7 @@ def test_grid_affinity_features_with_lifted_2d():
     affinities[1] = 100.0 + np.arange(9, dtype=np.float64).reshape(3, 3)
 
     local_weights, valid, lifted_uvs, lifted_weights, lifted_offset_ids = (
-        bic.graph.grid_affinity_features_with_lifted(
+        bic.graph.features.grid_affinity_features_with_lifted(
             graph, affinities, offsets=[(1, 0), (0, 2)]
         )
     )
@@ -79,7 +79,7 @@ def test_grid_affinity_features_with_lifted_3d():
     affinities[1, 1, 0, 0] = 10.0
 
     local_weights, valid, lifted_uvs, lifted_weights, lifted_offset_ids = (
-        bic.graph.grid_affinity_features_with_lifted(
+        bic.graph.features.grid_affinity_features_with_lifted(
             graph, affinities, offsets=[(1, 0, 0), (0, 0, 2)]
         )
     )
@@ -99,7 +99,7 @@ def test_grid_affinity_features_rejects_long_range_offsets():
     affinities = np.zeros((1, 3, 3), dtype=np.float64)
 
     with pytest.raises(ValueError, match="only local offsets"):
-        bic.graph.grid_affinity_features(graph, affinities, offsets=[(0, 2)])
+        bic.graph.features.grid_affinity_features(graph, affinities, offsets=[(0, 2)])
 
 
 def test_grid_affinity_features_rejects_duplicate_local_edges():
@@ -107,7 +107,7 @@ def test_grid_affinity_features_rejects_duplicate_local_edges():
     affinities = np.zeros((2, 2, 3), dtype=np.float64)
 
     with pytest.raises(ValueError, match="duplicate local"):
-        bic.graph.grid_affinity_features(graph, affinities, offsets=[(0, 1), (0, -1)])
+        bic.graph.features.grid_affinity_features(graph, affinities, offsets=[(0, 1), (0, -1)])
 
 
 def test_grid_affinity_features_rejects_duplicate_lifted_edges():
@@ -115,7 +115,7 @@ def test_grid_affinity_features_rejects_duplicate_lifted_edges():
     affinities = np.zeros((2, 1, 3), dtype=np.float64)
 
     with pytest.raises(ValueError, match="duplicate long-range"):
-        bic.graph.grid_affinity_features_with_lifted(
+        bic.graph.features.grid_affinity_features_with_lifted(
             graph, affinities, offsets=[(0, 2), (0, -2)]
         )
 
@@ -126,7 +126,7 @@ def test_grid_boundary_features_preserves_float32_dtype():
         [[0.0, 2.0, 4.0], [10.0, 12.0, 14.0]], dtype=np.float32
     )
 
-    weights = bic.graph.grid_boundary_features(graph, boundary_map)
+    weights = bic.graph.features.grid_boundary_features(graph, boundary_map)
 
     assert weights.dtype == np.float32
     np.testing.assert_allclose(
@@ -140,7 +140,7 @@ def test_grid_affinity_features_preserves_float32_dtype():
     affinities[0] = np.array([[10.0, 11.0, 12.0], [0.0, 0.0, 0.0]], dtype=np.float32)
     affinities[1] = np.array([[20.0, 21.0, 0.0], [23.0, 24.0, 0.0]], dtype=np.float32)
 
-    weights, valid = bic.graph.grid_affinity_features(
+    weights, valid = bic.graph.features.grid_affinity_features(
         graph, affinities, offsets=[(1, 0), (0, 1)]
     )
 
@@ -159,7 +159,7 @@ def test_grid_affinity_features_with_lifted_preserves_float32_dtype():
     affinities[1] = 100.0 + np.arange(9, dtype=np.float32).reshape(3, 3)
 
     local_weights, valid, lifted_uvs, lifted_weights, _ = (
-        bic.graph.grid_affinity_features_with_lifted(
+        bic.graph.features.grid_affinity_features_with_lifted(
             graph, affinities, offsets=[(1, 0), (0, 2)]
         )
     )
@@ -185,7 +185,7 @@ def test_grid_features_integer_input_promotes_to_float64():
     graph = bic.graph.GridGraph2D((2, 3))
     boundary_map = np.array([[0, 2, 4], [10, 12, 14]], dtype=np.int32)
 
-    weights = bic.graph.grid_boundary_features(graph, boundary_map)
+    weights = bic.graph.features.grid_boundary_features(graph, boundary_map)
 
     assert weights.dtype == np.float64
 
@@ -194,10 +194,10 @@ def test_grid_feature_validation():
     graph = bic.graph.GridGraph2D((2, 3))
 
     with pytest.raises(ValueError, match="boundary_map shape"):
-        bic.graph.grid_boundary_features(graph, np.zeros((3, 2)))
+        bic.graph.features.grid_boundary_features(graph, np.zeros((3, 2)))
     with pytest.raises(ValueError, match="affinities must have shape"):
-        bic.graph.grid_affinity_features(graph, np.zeros((2, 3)), offsets=[(0, 1)])
+        bic.graph.features.grid_affinity_features(graph, np.zeros((2, 3)), offsets=[(0, 1)])
     with pytest.raises(ValueError, match="offsets length"):
-        bic.graph.grid_affinity_features(graph, np.zeros((2, 2, 3)), offsets=[(0, 1)])
+        bic.graph.features.grid_affinity_features(graph, np.zeros((2, 2, 3)), offsets=[(0, 1)])
     with pytest.raises(ValueError, match="zero offset"):
-        bic.graph.grid_affinity_features(graph, np.zeros((1, 2, 3)), offsets=[(0, 0)])
+        bic.graph.features.grid_affinity_features(graph, np.zeros((1, 2, 3)), offsets=[(0, 0)])
diff --git a/tests/graph/test_mutex_watershed_graph.py b/tests/graph/test_mutex_watershed_graph.py
index 44765af..d36e32e 100644
--- a/tests/graph/test_mutex_watershed_graph.py
+++ b/tests/graph/test_mutex_watershed_graph.py
@@ -32,7 +32,7 @@ def test_all_attractive_merges_to_one_component():
     mutex_uvs = np.zeros((0, 2), dtype=np.uint64)
     mutex_costs = np.zeros(0, dtype=np.float64)
 
-    labels = bic.graph.mutex_watershed_clustering(
+    labels = bic.graph.mutex_watershed.mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs
     )
 
@@ -49,7 +49,7 @@ def test_mutex_only_keeps_singletons():
     )
     mutex_costs = np.ones(6, dtype=np.float64)
 
-    labels = bic.graph.mutex_watershed_clustering(
+    labels = bic.graph.mutex_watershed.mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs
     )
 
@@ -65,7 +65,7 @@ def test_mutex_beats_attractive_when_higher_weight():
     mutex_uvs = np.array([[0, 2]], dtype=np.uint64)
     mutex_costs = np.array([2.0], dtype=np.float64)
 
-    labels = bic.graph.mutex_watershed_clustering(
+    labels = bic.graph.mutex_watershed.mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs
     )
 
@@ -82,7 +82,7 @@ def test_mutex_constraint_propagates_through_merge():
     mutex_uvs = np.array([[0, 3]], dtype=np.uint64)
     mutex_costs = np.array([5.0], dtype=np.float64)
 
-    labels = bic.graph.mutex_watershed_clustering(
+    labels = bic.graph.mutex_watershed.mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs
     )
 
@@ -97,7 +97,7 @@ def test_attractive_higher_than_mutex_still_merges():
     mutex_uvs = np.array([[0, 2]], dtype=np.uint64)
     mutex_costs = np.array([0.1], dtype=np.float64)
 
-    labels = bic.graph.mutex_watershed_clustering(
+    labels = bic.graph.mutex_watershed.mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs
     )
 
@@ -115,7 +115,7 @@ def test_lifted_multicut_inputs_accepted_unchanged():
     lifted_uvs = np.array([[0, 2], [0, 3], [1, 3]], dtype=np.uint64)
     lifted_costs = np.array([-1.0, -1.0, 0.5], dtype=np.float64)
 
-    objective = bic.graph.LiftedMulticutObjective(
+    objective = bic.graph.lifted_multicut.LiftedMulticutObjective(
         graph,
         edge_costs,
         lifted_uvs=lifted_uvs,
@@ -123,7 +123,7 @@ def test_lifted_multicut_inputs_accepted_unchanged():
     )
     assert objective is not None
 
-    labels = bic.graph.mutex_watershed_clustering(
+    labels = bic.graph.mutex_watershed.mutex_watershed_clustering(
         graph, edge_costs, lifted_uvs, lifted_costs
     )
     assert labels.shape == (4,)
@@ -145,8 +145,8 @@ def test_deterministic_across_runs():
     mutex_uvs = mutex_uvs[mutex_uvs[:, 0] != mutex_uvs[:, 1]]
     mutex_costs = rng.uniform(0.0, 1.0, size=mutex_uvs.shape[0]).astype(np.float64)
 
-    first = bic.graph.mutex_watershed_clustering(graph, edge_costs, mutex_uvs, mutex_costs)
-    second = bic.graph.mutex_watershed_clustering(graph, edge_costs, mutex_uvs, mutex_costs)
+    first = bic.graph.mutex_watershed.mutex_watershed_clustering(graph, edge_costs, mutex_uvs, mutex_costs)
+    second = bic.graph.mutex_watershed.mutex_watershed_clustering(graph, edge_costs, mutex_uvs, mutex_costs)
     np.testing.assert_array_equal(first, second)
 
 
@@ -156,7 +156,7 @@ def test_dense_label_range():
     mutex_uvs = np.zeros((0, 2), dtype=np.uint64)
     mutex_costs = np.zeros(0, dtype=np.float64)
 
-    labels = bic.graph.mutex_watershed_clustering(
+    labels = bic.graph.mutex_watershed.mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs
     )
 
@@ -172,7 +172,7 @@ def test_invalid_edge_costs_length_raises():
     mutex_costs = np.zeros(0, dtype=np.float64)
 
     with pytest.raises(ValueError):
-        bic.graph.mutex_watershed_clustering(graph, bad_costs, mutex_uvs, mutex_costs)
+        bic.graph.mutex_watershed.mutex_watershed_clustering(graph, bad_costs, mutex_uvs, mutex_costs)
 
 
 def test_invalid_mutex_uvs_shape_raises():
@@ -182,7 +182,7 @@ def test_invalid_mutex_uvs_shape_raises():
     mutex_costs = np.array([1.0], dtype=np.float64)
 
     with pytest.raises(ValueError):
-        bic.graph.mutex_watershed_clustering(graph, edge_costs, bad_mutex_uvs, mutex_costs)
+        bic.graph.mutex_watershed.mutex_watershed_clustering(graph, edge_costs, bad_mutex_uvs, mutex_costs)
 
 
 def test_mismatched_mutex_costs_raises():
@@ -192,7 +192,7 @@ def test_mismatched_mutex_costs_raises():
     bad_mutex_costs = np.array([1.0, 2.0], dtype=np.float64)  # expected size 1
 
     with pytest.raises(ValueError):
-        bic.graph.mutex_watershed_clustering(graph, edge_costs, mutex_uvs, bad_mutex_costs)
+        bic.graph.mutex_watershed.mutex_watershed_clustering(graph, edge_costs, mutex_uvs, bad_mutex_costs)
 
 
 def test_float32_inputs_supported():
@@ -201,7 +201,7 @@ def test_float32_inputs_supported():
     mutex_uvs = np.array([[0, 3]], dtype=np.uint64)
     mutex_costs = np.array([5.0], dtype=np.float32)
 
-    labels = bic.graph.mutex_watershed_clustering(
+    labels = bic.graph.mutex_watershed.mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs
     )
 
@@ -217,10 +217,10 @@ def test_float32_and_float64_agree_on_simple_problem():
     mutex_uvs = np.array([[0, 4], [1, 3]], dtype=np.uint64)
     mutex_costs = np.array([2.0, 0.1], dtype=np.float64)
 
-    labels_64 = bic.graph.mutex_watershed_clustering(
+    labels_64 = bic.graph.mutex_watershed.mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs
     )
-    labels_32 = bic.graph.mutex_watershed_clustering(
+    labels_32 = bic.graph.mutex_watershed.mutex_watershed_clustering(
         graph,
         edge_costs.astype(np.float32),
         mutex_uvs,
@@ -237,7 +237,7 @@ def test_mismatched_dtypes_are_promoted():
     mutex_uvs = np.array([[0, 2]], dtype=np.uint64)
     mutex_costs = np.array([2.0], dtype=np.float64)
 
-    labels = bic.graph.mutex_watershed_clustering(
+    labels = bic.graph.mutex_watershed.mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs
     )
     np.testing.assert_array_equal(_canonicalize(labels), np.array([0, 0, 1]))
@@ -250,7 +250,7 @@ def test_integer_edge_costs_rejected():
     mutex_costs = np.zeros(0, dtype=np.float64)
 
     with pytest.raises(TypeError):
-        bic.graph.mutex_watershed_clustering(
+        bic.graph.mutex_watershed.mutex_watershed_clustering(
             graph, bad_edge_costs, mutex_uvs, mutex_costs
         )
 
@@ -262,4 +262,4 @@ def test_out_of_range_mutex_endpoint_raises():
     mutex_costs = np.array([1.0], dtype=np.float64)
 
     with pytest.raises((ValueError, RuntimeError)):
-        bic.graph.mutex_watershed_clustering(graph, edge_costs, bad_mutex_uvs, mutex_costs)
+        bic.graph.mutex_watershed.mutex_watershed_clustering(graph, edge_costs, bad_mutex_uvs, mutex_costs)
diff --git a/tests/graph/test_rag_features.py b/tests/graph/test_rag_features.py
index 8f8c42c..15aeb08 100644
--- a/tests/graph/test_rag_features.py
+++ b/tests/graph/test_rag_features.py
@@ -9,9 +9,9 @@ def test_edge_map_features_simple():
     edge_map = np.array([[1.0, 2.0, 10.0], [3.0, 4.0, 20.0]])
     rag = bic.graph.region_adjacency_graph(labels)
 
-    features = bic.graph.edge_map_features(rag, labels, edge_map)
+    features = bic.graph.features.edge_map_features(rag, labels, edge_map)
 
-    assert tuple(bic.graph.SIMPLE_EDGE_FEATURE_NAMES) == ("mean", "size")
+    assert tuple(bic.graph.features.SIMPLE_EDGE_FEATURE_NAMES) == ("mean", "size")
     np.testing.assert_allclose(
         features,
         np.array(
@@ -29,9 +29,9 @@ def test_edge_map_features_complex():
     edge_map = np.array([[1.0, 2.0, 10.0], [3.0, 4.0, 20.0]])
     rag = bic.graph.region_adjacency_graph(labels)
 
-    features = bic.graph.edge_map_features_complex(rag, labels, edge_map)
+    features = bic.graph.features.edge_map_features_complex(rag, labels, edge_map)
 
-    assert tuple(bic.graph.COMPLEX_EDGE_FEATURE_NAMES) == (
+    assert tuple(bic.graph.features.COMPLEX_EDGE_FEATURE_NAMES) == (
         "mean",
         "median",
         "std",
@@ -73,7 +73,7 @@ def test_affinity_features_simple():
     affinities[0] = np.array([[0.0, 6.0, 0.0], [7.0, 8.0, 0.0]])
     affinities[1] = np.array([[0.0, 9.0, 0.0], [0.0, 0.0, 0.0]])
 
-    features = bic.graph.affinity_features(
+    features = bic.graph.features.affinity_features(
         rag, labels, affinities, offsets=[[0, 1], [1, 0]]
     )
 
@@ -142,7 +142,7 @@ def test_affinity_features_2d_offsets_match_reference(offsets):
     ).astype(np.float32)
 
     expected = _reference_affinity_features(labels, rag, affinities, offsets)
-    got = bic.graph.affinity_features(rag, labels, affinities, offsets=offsets)
+    got = bic.graph.features.affinity_features(rag, labels, affinities, offsets=offsets)
 
     np.testing.assert_allclose(got, expected, rtol=1e-5, atol=1e-6)
 
@@ -167,7 +167,7 @@ def test_affinity_features_3d_offsets_match_reference(offsets):
     ).astype(np.float32)
 
     expected = _reference_affinity_features(labels, rag, affinities, offsets)
-    got = bic.graph.affinity_features(rag, labels, affinities, offsets=offsets)
+    got = bic.graph.features.affinity_features(rag, labels, affinities, offsets=offsets)
 
     np.testing.assert_allclose(got, expected, rtol=1e-5, atol=1e-6)
 
@@ -178,10 +178,10 @@ def test_affinity_features_complex_parallel_matches_single_thread():
     affinities = np.arange(2 * labels.size, dtype=np.float64).reshape((2,) + labels.shape)
     offsets = [[0, 1], [1, 0]]
 
-    single_threaded = bic.graph.affinity_features_complex(
+    single_threaded = bic.graph.features.affinity_features_complex(
         rag, labels, affinities, offsets, number_of_threads=1
     )
-    parallel = bic.graph.affinity_features_complex(
+    parallel = bic.graph.features.affinity_features_complex(
         rag, labels, affinities, offsets, number_of_threads=3
     )
 
@@ -193,12 +193,12 @@ def test_rag_feature_validation():
     rag = bic.graph.region_adjacency_graph(labels)
 
     with pytest.raises(ValueError, match="edge_map shape"):
-        bic.graph.edge_map_features(rag, labels, np.ones((2, 2)))
+        bic.graph.features.edge_map_features(rag, labels, np.ones((2, 2)))
 
     with pytest.raises(ValueError, match="affinities must have shape"):
-        bic.graph.affinity_features(rag, labels, np.ones((2, 2)), offsets=[[0, 1]])
+        bic.graph.features.affinity_features(rag, labels, np.ones((2, 2)), offsets=[[0, 1]])
 
     with pytest.raises(ValueError, match="offsets length"):
-        bic.graph.affinity_features(
+        bic.graph.features.affinity_features(
             rag, labels, np.ones((2, 1, 2)), offsets=[[0, 1]]
         )
diff --git a/tests/graph/test_rag_lifted_features.py b/tests/graph/test_rag_lifted_features.py
index 6a6e50f..24707b7 100644
--- a/tests/graph/test_rag_lifted_features.py
+++ b/tests/graph/test_rag_lifted_features.py
@@ -29,7 +29,7 @@ def test_lifted_edges_discovered_via_diagonal_offset(
 ):
     # Diagonal offset (2, 2): every hit connects label 0 (top-left) with
     # label 3 (bottom-right). (0, 3) is not in the RAG -> lifted edge.
-    lifted = bic.graph.lifted_edges_from_affinities(
+    lifted = bic.graph.features.lifted_edges_from_affinities(
         four_quadrant_rag_2d, four_quadrant_labels_2d, [(2, 2)]
     )
     assert lifted.dtype == np.uint64
@@ -40,7 +40,7 @@ def test_lifted_edges_anti_diagonal_offset(
     four_quadrant_labels_2d, four_quadrant_rag_2d
 ):
     # (2, -2): connects label 1 (top-right) with label 2 (bottom-left).
-    lifted = bic.graph.lifted_edges_from_affinities(
+    lifted = bic.graph.features.lifted_edges_from_affinities(
         four_quadrant_rag_2d, four_quadrant_labels_2d, [(2, -2)]
     )
     np.testing.assert_array_equal(lifted, np.array([[1, 2]], dtype=np.uint64))
@@ -51,7 +51,7 @@ def test_lifted_edges_skip_one_hop_offsets(
 ):
     # 1-hop offsets only hit pairs that already exist in the RAG. The
     # function must skip them and return an empty result.
-    lifted = bic.graph.lifted_edges_from_affinities(
+    lifted = bic.graph.features.lifted_edges_from_affinities(
         four_quadrant_rag_2d,
         four_quadrant_labels_2d,
         [(0, 1), (1, 0), (0, -1), (-1, 0)],
@@ -62,7 +62,7 @@ def test_lifted_edges_skip_one_hop_offsets(
 def test_lifted_edges_mixed_offsets(four_quadrant_labels_2d, four_quadrant_rag_2d):
     # Mixing a long-range diagonal with 1-hop offsets: only the long-range
     # contribution produces lifted edges.
-    lifted = bic.graph.lifted_edges_from_affinities(
+    lifted = bic.graph.features.lifted_edges_from_affinities(
         four_quadrant_rag_2d,
         four_quadrant_labels_2d,
         [(0, 1), (2, 2), (1, 0), (2, -2)],
@@ -84,14 +84,14 @@ def test_lifted_edges_pair_already_in_rag_skipped():
         dtype=np.uint32,
     )
     rag = bic.graph.region_adjacency_graph(labels)
-    lifted = bic.graph.lifted_edges_from_affinities(
+    lifted = bic.graph.features.lifted_edges_from_affinities(
         rag, labels, [(2, 0), (0, 2), (2, 2)]
     )
     assert lifted.shape == (0, 2)
 
 
 def test_lifted_edges_empty_offsets(four_quadrant_labels_2d, four_quadrant_rag_2d):
-    lifted = bic.graph.lifted_edges_from_affinities(
+    lifted = bic.graph.features.lifted_edges_from_affinities(
         four_quadrant_rag_2d, four_quadrant_labels_2d, []
     )
     assert lifted.shape == (0, 2)
@@ -112,7 +112,7 @@ def test_lifted_edges_3d():
 
     # Diagonal (z, y, x) = (2, 0, 2) connects (0,*,0..1) with (2,*,2..3)
     # i.e. label 0 with label 3 -> lifted (0, 3) not in RAG.
-    lifted = bic.graph.lifted_edges_from_affinities(rag, labels, [(2, 0, 2)])
+    lifted = bic.graph.features.lifted_edges_from_affinities(rag, labels, [(2, 0, 2)])
     np.testing.assert_array_equal(lifted, np.array([[0, 3]], dtype=np.uint64))
 
 
@@ -120,20 +120,20 @@ def test_lifted_edges_dedup_across_offsets(
     four_quadrant_labels_2d, four_quadrant_rag_2d
 ):
     # (2, 2) and (-2, -2) discover the same (0, 3) pair from opposite ends.
-    lifted = bic.graph.lifted_edges_from_affinities(
+    lifted = bic.graph.features.lifted_edges_from_affinities(
         four_quadrant_rag_2d, four_quadrant_labels_2d, [(2, 2), (-2, -2)]
     )
     np.testing.assert_array_equal(lifted, np.array([[0, 3]], dtype=np.uint64))
 
 
 def test_lifted_edges_threads(four_quadrant_labels_2d, four_quadrant_rag_2d):
-    lifted_single = bic.graph.lifted_edges_from_affinities(
+    lifted_single = bic.graph.features.lifted_edges_from_affinities(
         four_quadrant_rag_2d,
         four_quadrant_labels_2d,
         [(2, 2), (2, -2)],
         number_of_threads=1,
     )
-    lifted_multi = bic.graph.lifted_edges_from_affinities(
+    lifted_multi = bic.graph.features.lifted_edges_from_affinities(
         four_quadrant_rag_2d,
         four_quadrant_labels_2d,
         [(2, 2), (2, -2)],
@@ -154,10 +154,10 @@ def test_lifted_affinity_features_basic(
     affinities[0, 1, 0] = 0.3
     affinities[0, 1, 1] = 0.4
 
-    lifted_uvs = bic.graph.lifted_edges_from_affinities(
+    lifted_uvs = bic.graph.features.lifted_edges_from_affinities(
         four_quadrant_rag_2d, four_quadrant_labels_2d, [(2, 2)]
     )
-    features = bic.graph.lifted_affinity_features(
+    features = bic.graph.features.lifted_affinity_features(
         four_quadrant_labels_2d, affinities, [(2, 2)], lifted_uvs
     )
     assert features.shape == (1, 2)
@@ -169,10 +169,10 @@ def test_lifted_affinity_features_complex_columns(
     four_quadrant_labels_2d, four_quadrant_rag_2d
 ):
     affinities = np.full((1, 4, 4), 0.5, dtype=np.float64)
-    lifted_uvs = bic.graph.lifted_edges_from_affinities(
+    lifted_uvs = bic.graph.features.lifted_edges_from_affinities(
         four_quadrant_rag_2d, four_quadrant_labels_2d, [(2, 2)]
     )
-    features = bic.graph.lifted_affinity_features_complex(
+    features = bic.graph.features.lifted_affinity_features_complex(
         four_quadrant_labels_2d, affinities, [(2, 2)], lifted_uvs
     )
     assert features.shape == (1, 12)
@@ -196,10 +196,10 @@ def test_lifted_affinity_features_skips_one_hop(
     # Channel 0 is 1-hop (should be skipped); channel 1 is the long-range
     # diagonal that defines the lifted edge.
     offsets = [(0, 1), (2, 2)]
-    lifted_uvs = bic.graph.lifted_edges_from_affinities(
+    lifted_uvs = bic.graph.features.lifted_edges_from_affinities(
         four_quadrant_rag_2d, four_quadrant_labels_2d, offsets
     )
-    features = bic.graph.lifted_affinity_features(
+    features = bic.graph.features.lifted_affinity_features(
         four_quadrant_labels_2d, affinities, offsets, lifted_uvs
     )
     # Only the 4 diagonal hits count.
@@ -225,7 +225,7 @@ def test_lifted_affinity_features_skips_local_hits():
     # in the data to make sure no accumulation happens.
     lifted_uvs = np.array([[0, 1]], dtype=np.uint64)  # actually a local edge!
     affinities = np.full((1, 3, 3), 0.9, dtype=np.float64)
-    features = bic.graph.lifted_affinity_features(
+    features = bic.graph.features.lifted_affinity_features(
         labels, affinities, [(2, 2)], lifted_uvs
     )
     # The (0,0)->(2,2) hit has matching labels, so no count.
@@ -235,7 +235,7 @@ def test_lifted_affinity_features_skips_local_hits():
 def test_lifted_affinity_features_empty_lifted_uvs(four_quadrant_labels_2d):
     affinities = np.full((1, 4, 4), 0.3, dtype=np.float64)
     empty_uvs = np.zeros((0, 2), dtype=np.uint64)
-    features = bic.graph.lifted_affinity_features(
+    features = bic.graph.features.lifted_affinity_features(
         four_quadrant_labels_2d, affinities, [(2, 2)], empty_uvs
     )
     assert features.shape == (0, 2)
@@ -245,14 +245,14 @@ def test_lifted_affinity_features_threads(
     four_quadrant_labels_2d, four_quadrant_rag_2d
 ):
     affinities = np.linspace(0.0, 1.0, num=16).reshape(1, 4, 4).astype(np.float64)
-    lifted_uvs = bic.graph.lifted_edges_from_affinities(
+    lifted_uvs = bic.graph.features.lifted_edges_from_affinities(
         four_quadrant_rag_2d, four_quadrant_labels_2d, [(2, 2)]
     )
-    single = bic.graph.lifted_affinity_features(
+    single = bic.graph.features.lifted_affinity_features(
         four_quadrant_labels_2d, affinities, [(2, 2)], lifted_uvs,
         number_of_threads=1,
     )
-    multi = bic.graph.lifted_affinity_features(
+    multi = bic.graph.features.lifted_affinity_features(
         four_quadrant_labels_2d, affinities, [(2, 2)], lifted_uvs,
         number_of_threads=4,
     )
@@ -272,8 +272,8 @@ def test_lifted_affinity_features_3d():
     rag = bic.graph.region_adjacency_graph(labels)
 
     affinities = np.full((1, 4, 1, 4), 0.42, dtype=np.float64)
-    lifted_uvs = bic.graph.lifted_edges_from_affinities(rag, labels, [(2, 0, 2)])
-    features = bic.graph.lifted_affinity_features(
+    lifted_uvs = bic.graph.features.lifted_edges_from_affinities(rag, labels, [(2, 0, 2)])
+    features = bic.graph.features.lifted_affinity_features(
         labels, affinities, [(2, 0, 2)], lifted_uvs
     )
     # (0,0,0)->(2,0,2): label 0 -> 3, hit
@@ -291,17 +291,17 @@ def test_lifted_affinity_features_validation():
     lifted_uvs = np.zeros((1, 2), dtype=np.uint64)
 
     with pytest.raises(ValueError, match="channel count"):
-        bic.graph.lifted_affinity_features(
+        bic.graph.features.lifted_affinity_features(
             labels, bad_affinities, [(2, 2)], lifted_uvs
         )
     with pytest.raises(ValueError, match="ndim"):
-        bic.graph.lifted_affinity_features(
+        bic.graph.features.lifted_affinity_features(
             labels, affinities, [(2, 2, 0)], lifted_uvs
         )
 
 
 def test_lifted_edges_validation(four_quadrant_labels_2d, four_quadrant_rag_2d):
     with pytest.raises(ValueError, match="ndim"):
-        bic.graph.lifted_edges_from_affinities(
+        bic.graph.features.lifted_edges_from_affinities(
             four_quadrant_rag_2d, four_quadrant_labels_2d, [(2, 2, 0)]
         )
diff --git a/tests/graph/test_rag_multicut_integration.py b/tests/graph/test_rag_multicut_integration.py
index 73f2389..7e48416 100644
--- a/tests/graph/test_rag_multicut_integration.py
+++ b/tests/graph/test_rag_multicut_integration.py
@@ -42,16 +42,16 @@ def test_rag_feature_multicut_projection_on_realistic_oversegmentation():
     boundary_map[boundary_pixels] = 0.92
 
     rag = bic.graph.region_adjacency_graph(oversegmentation, number_of_threads=3)
-    features = bic.graph.edge_map_features(
+    features = bic.graph.features.edge_map_features(
         rag, oversegmentation, boundary_map, number_of_threads=3
     )
     edge_costs = 0.6 - features[:, 0]
 
-    objective = bic.graph.MulticutObjective(rag, edge_costs)
-    node_labels = bic.graph.ChainedMulticutSolvers(
+    objective = bic.graph.multicut.MulticutObjective(rag, edge_costs)
+    node_labels = bic.graph.multicut.ChainedMulticutSolvers(
         [
-            bic.graph.GreedyAdditiveMulticut(),
-            bic.graph.KernighanLinMulticut(number_of_outer_iterations=5),
+            bic.graph.multicut.GreedyAdditiveMulticut(),
+            bic.graph.multicut.KernighanLinMulticut(number_of_outer_iterations=5),
         ]
     ).optimize(objective)
     projected = bic.graph.project_node_labels_to_pixels(
diff --git a/tests/graph/test_semantic_mutex_watershed_graph.py b/tests/graph/test_semantic_mutex_watershed_graph.py
index c224351..8afdd19 100644
--- a/tests/graph/test_semantic_mutex_watershed_graph.py
+++ b/tests/graph/test_semantic_mutex_watershed_graph.py
@@ -35,10 +35,10 @@ def test_without_semantic_edges_matches_regular_mutex_watershed():
     mutex_costs = np.array([5.0], dtype=np.float64)
     semantic_uvs, semantic_costs = _empty_semantic()
 
-    labels, semantic = bic.graph.semantic_mutex_watershed_clustering(
+    labels, semantic = bic.graph.mutex_watershed.semantic_mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs, semantic_uvs, semantic_costs
     )
-    regular_labels = bic.graph.mutex_watershed_clustering(
+    regular_labels = bic.graph.mutex_watershed.mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs
     )
 
@@ -60,7 +60,7 @@ def test_semantic_constraint_blocks_merge():
     semantic_node_classes = np.array([[0, 0], [2, 1]], dtype=np.uint64)
     semantic_costs = np.array([10.0, 10.0], dtype=np.float64)
 
-    labels, semantic = bic.graph.semantic_mutex_watershed_clustering(
+    labels, semantic = bic.graph.mutex_watershed.semantic_mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs,
         semantic_node_classes, semantic_costs,
     )
@@ -85,7 +85,7 @@ def test_semantic_label_propagates_across_merge():
     semantic_node_classes = np.array([[0, 3]], dtype=np.uint64)
     semantic_costs = np.array([10.0], dtype=np.float64)
 
-    labels, semantic = bic.graph.semantic_mutex_watershed_clustering(
+    labels, semantic = bic.graph.mutex_watershed.semantic_mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs,
         semantic_node_classes, semantic_costs,
     )
@@ -104,7 +104,7 @@ def test_same_semantic_label_does_not_block_merge():
     semantic_node_classes = np.array([[0, 7], [2, 7]], dtype=np.uint64)
     semantic_costs = np.array([10.0, 10.0], dtype=np.float64)
 
-    labels, semantic = bic.graph.semantic_mutex_watershed_clustering(
+    labels, semantic = bic.graph.mutex_watershed.semantic_mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs,
         semantic_node_classes, semantic_costs,
     )
@@ -122,7 +122,7 @@ def test_mutex_still_separates_under_semantic():
     semantic_node_classes = np.array([[0, 0], [1, 0], [2, 0]], dtype=np.uint64)
     semantic_costs = np.array([0.5, 0.5, 0.5], dtype=np.float64)
 
-    labels, semantic = bic.graph.semantic_mutex_watershed_clustering(
+    labels, semantic = bic.graph.mutex_watershed.semantic_mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs,
         semantic_node_classes, semantic_costs,
     )
@@ -142,7 +142,7 @@ def test_unassigned_clusters_keep_minus_one():
     semantic_node_classes = np.array([[0, 5]], dtype=np.uint64)
     semantic_costs = np.array([10.0], dtype=np.float64)
 
-    labels, semantic = bic.graph.semantic_mutex_watershed_clustering(
+    labels, semantic = bic.graph.mutex_watershed.semantic_mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs,
         semantic_node_classes, semantic_costs,
     )
@@ -160,7 +160,7 @@ def test_dense_label_range():
     mutex_costs = np.zeros(0, dtype=np.float64)
     semantic_uvs, semantic_costs = _empty_semantic()
 
-    labels, _ = bic.graph.semantic_mutex_watershed_clustering(
+    labels, _ = bic.graph.mutex_watershed.semantic_mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs, semantic_uvs, semantic_costs
     )
 
@@ -186,10 +186,10 @@ def test_deterministic_across_runs():
     semantic_node_classes = np.stack([semantic_nodes, semantic_classes], axis=1)
     semantic_costs = rng.uniform(0.0, 1.0, size=10).astype(np.float64)
 
-    first = bic.graph.semantic_mutex_watershed_clustering(
+    first = bic.graph.mutex_watershed.semantic_mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs, semantic_node_classes, semantic_costs
     )
-    second = bic.graph.semantic_mutex_watershed_clustering(
+    second = bic.graph.mutex_watershed.semantic_mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs, semantic_node_classes, semantic_costs
     )
     np.testing.assert_array_equal(first[0], second[0])
@@ -204,7 +204,7 @@ def test_float32_inputs_supported():
     semantic_node_classes = np.array([[0, 0], [2, 1]], dtype=np.uint64)
     semantic_costs = np.array([10.0, 10.0], dtype=np.float32)
 
-    labels, semantic = bic.graph.semantic_mutex_watershed_clustering(
+    labels, semantic = bic.graph.mutex_watershed.semantic_mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs,
         semantic_node_classes, semantic_costs,
     )
@@ -222,7 +222,7 @@ def test_mismatched_dtypes_are_promoted():
     semantic_node_classes = np.array([[0, 0], [2, 1]], dtype=np.uint64)
     semantic_costs = np.array([10.0, 10.0], dtype=np.float64)
 
-    labels, semantic = bic.graph.semantic_mutex_watershed_clustering(
+    labels, semantic = bic.graph.mutex_watershed.semantic_mutex_watershed_clustering(
         graph, edge_costs, mutex_uvs, mutex_costs,
         semantic_node_classes, semantic_costs,
     )
@@ -241,7 +241,7 @@ def test_invalid_semantic_shape_raises():
     semantic_costs = np.array([1.0], dtype=np.float64)
 
     with pytest.raises(ValueError):
-        bic.graph.semantic_mutex_watershed_clustering(
+        bic.graph.mutex_watershed.semantic_mutex_watershed_clustering(
             graph, edge_costs, mutex_uvs, mutex_costs, bad_semantic, semantic_costs
         )
 
@@ -255,7 +255,7 @@ def test_mismatched_semantic_costs_raises():
     bad_costs = np.array([1.0, 2.0], dtype=np.float64)
 
     with pytest.raises(ValueError):
-        bic.graph.semantic_mutex_watershed_clustering(
+        bic.graph.mutex_watershed.semantic_mutex_watershed_clustering(
             graph, edge_costs, mutex_uvs, mutex_costs,
             semantic_node_classes, bad_costs,
         )
@@ -270,7 +270,7 @@ def test_out_of_range_semantic_node_raises():
     semantic_costs = np.array([1.0], dtype=np.float64)
 
     with pytest.raises((ValueError, RuntimeError)):
-        bic.graph.semantic_mutex_watershed_clustering(
+        bic.graph.mutex_watershed.semantic_mutex_watershed_clustering(
             graph, edge_costs, mutex_uvs, mutex_costs,
             semantic_node_classes, semantic_costs,
         )