From 9242d7893e5caa013d80a792b36871b298b1cd97 Mon Sep 17 00:00:00 2001
From: Marvin Albert <marvin.albert@gmail.com>
Date: Tue, 9 Sep 2025 16:20:36 +0200
Subject: [PATCH 1/6] Improve parallelism of label() by making block labels
 unique using low/high bit encoding rather than additive scheme

---
 dask_image/ndmeasure/__init__.py              |  89 ++++++----
 dask_image/ndmeasure/_utils/_label.py         | 163 ++++++++++++++----
 .../test_ndmeasure/test_core.py               |  63 +++++--
 3 files changed, 236 insertions(+), 79 deletions(-)

diff --git a/dask_image/ndmeasure/__init__.py b/dask_image/ndmeasure/__init__.py
index ed539e0a..0829f4f5 100644
--- a/dask_image/ndmeasure/__init__.py
+++ b/dask_image/ndmeasure/__init__.py
@@ -11,6 +11,7 @@
 import dask.bag as db
 import dask.dataframe as dd
 import numpy as np
+from scipy.ndimage import label as ndimage_label
 
 from . import _utils
 from ._utils import _label
@@ -315,7 +316,12 @@ def histogram(image,
     return result
 
 
-def label(image, structure=None, wrap_axes=None):
+def label(
+        image,
+        structure=None,
+        wrap_axes=None,
+        produce_sequential_labels=False,
+        ):
     """
     Label features in an array.
 
@@ -334,7 +340,6 @@ def label(image, structure=None, wrap_axes=None):
             [[0,1,0],
              [1,1,1],
              [0,1,0]]
-
     wrap_axes : tuple of int, optional
         Whether labels should be wrapped across array boundaries, and if so
         which axes.
@@ -343,6 +348,11 @@ def label(image, structure=None, wrap_axes=None):
         - (0,) only wrap across the 0th axis.
         - (0, 1) wrap across the 0th and 1st axis.
         - (0, 1, 3)  wrap across 0th, 1st and 3rd axis.
+    produce_sequential_labels : bool, optional
+        If False, the returned labels are not contiguous (i.e. 0, 1, 6045, 54654).
+        This is the default behavior and has advantages for parallelism.
+        If True, the labels are relabeled to be sequential
+        and contiguous from 1 to the number of features.
 
     Returns
     -------
@@ -355,41 +365,52 @@ def label(image, structure=None, wrap_axes=None):
 
     image = da.asarray(image)
 
-    labeled_blocks = np.empty(image.numblocks, dtype=object)
+    block_labeled = image.map_blocks(
+        lambda x: ndimage_label(x, structure=structure)[0],
+        dtype=_label.LABEL_DTYPE,
+    )
+
+    # Make labels unique across blocks by encoding the block ID into the labels.
+    # We use half the bits to encode the block ID and half the bits to encode
+    # the label ID within the block.
+    block_labeled_unique = _label._make_labels_unique(
+        block_labeled, encoding_dtype=np.uint32)
 
-    # First, label each block independently, incrementing the labels in that
-    # block by the total number of labels from previous blocks. This way, each
-    # block's labels are globally unique.
-    block_iter = zip(
-        np.ndindex(*image.numblocks),
-        map(functools.partial(operator.getitem, image),
-            da.core.slices_from_chunks(image.chunks))
-    )
-    index, input_block = next(block_iter)
-    labeled_blocks[index], total = _label.block_ndi_label_delayed(input_block,
-                                                                  structure)
-    for index, input_block in block_iter:
-        labeled_block, n = _label.block_ndi_label_delayed(input_block,
-                                                          structure)
-        block_label_offset = da.where(labeled_block > 0,
-                                      total,
-                                      _label.LABEL_DTYPE.type(0))
-        labeled_block += block_label_offset
-        labeled_blocks[index] = labeled_block
-        total += n
-
-    # Put all the blocks together
-    block_labeled = da.block(labeled_blocks.tolist())
-
-    # Now, build a label connectivity graph that groups labels across blocks.
-    # We use this graph to find connected components and then relabel each
+    # Now, build a label mapping that groups labels across blocks.
+    # We use this mapping to find connected components and then relabel each
     # block according to those.
-    label_groups = _label.label_adjacency_graph(
-        block_labeled, structure, total, wrap_axes=wrap_axes
+    all_mappings = _label.label_adjacency_mapping(
+        block_labeled_unique, structure, wrap_axes=wrap_axes
     )
-    new_labeling = _label.connected_components_delayed(label_groups)
-    relabeled = _label.relabel_blocks(block_labeled, new_labeling)
-    n = da.max(relabeled)
+    mapping, n_red = _label.connected_components_delayed(all_mappings)
+
+    n_red = da.from_delayed(
+        n_red,
+        shape=(),
+        dtype=np.uint16
+    )
+
+    relabeled = _label.relabel_blocks(
+        block_labeled_unique, mapping
+    )
+
+    relabeled = relabeled.astype(_label.LABEL_DTYPE)
+
+    if produce_sequential_labels:
+        relabeled = _label.relabel_sequential(relabeled)
+        n = relabeled.max()
+        
+    else:
+        def count_labels(label_block):
+            return len(np.unique(label_block[label_block > 0]))
+
+        total = block_labeled.map_blocks(
+            lambda x: np.ones((1, ) * x.ndim, dtype=np.uint64) * count_labels(x),
+            dtype=np.uint64,
+            chunks=(1, ) * block_labeled.ndim
+        ).sum()
+
+        n = total - n_red
 
     return (relabeled, n)
 
diff --git a/dask_image/ndmeasure/_utils/_label.py b/dask_image/ndmeasure/_utils/_label.py
index 01544d48..4a6cc6d8 100644
--- a/dask_image/ndmeasure/_utils/_label.py
+++ b/dask_image/ndmeasure/_utils/_label.py
@@ -5,6 +5,7 @@
 import dask
 import dask.array as da
 import numpy as np
+import pandas as pd
 import scipy.ndimage
 import scipy.sparse
 import scipy.sparse.csgraph
@@ -25,16 +26,16 @@ def _get_connected_components_dtype():
 CONN_COMP_DTYPE = _get_connected_components_dtype()
 
 
-def relabel_blocks(block_labeled, new_labeling):
+def relabel_blocks(block_labeled, mapping):
     """
-    Relabel a block-labeled array based on ``new_labeling``.
+    Relabel a block-labeled array based on ``mapping``.
 
     Parameters
     ----------
     block_labeled : array of int
         The input label array.
-    new_labeling : 1D array of int
-        A new labeling, such that ``labeling[i] = j`` implies that
+    mapping : dict
+        A label mapping, such that ``mapping[i] = j`` implies that
         any element in ``array`` valued ``i`` should be relabeled to ``j``.
 
     Returns
@@ -42,12 +43,37 @@ def relabel_blocks(block_labeled, new_labeling):
     relabeled : array of int, same shape as ``array``
         The relabeled input array.
     """
-    new_labeling = new_labeling.astype(LABEL_DTYPE)
-    relabeled = da.map_blocks(operator.getitem,
-                              new_labeling,
-                              block_labeled,
-                              dtype=LABEL_DTYPE,
-                              chunks=block_labeled.chunks)
+    
+    def relabel_block(block, mapping):
+        # pandas replace works well for both dense and sparse mappings
+        # including large integer labels
+        s = pd.Series(block.flatten())
+        result = s.replace(mapping).to_numpy()
+        return result.reshape(block.shape)
+
+    relabeled = block_labeled.map_blocks(
+        relabel_block,
+        mapping=mapping,
+        dtype=block_labeled.dtype
+    )
+
+    return relabeled
+
+
+def relabel_sequential(labels):
+    """
+    Relabel the labels in ``labels`` to be sequential starting at 1.
+    """
+
+    u = da.unique(
+        da.concatenate(
+            [da.unique(b) for b in labels.blocks]
+            )
+        )
+    mapping = dask.delayed(lambda x: {k: v for k, v in zip(x, np.arange(len(x)))})(u)
+
+    relabeled = relabel_blocks(labels, mapping)
+
     return relabeled
 
 
@@ -115,7 +141,6 @@ def _across_block_label_grouping_delayed(face, structure):
     return da.from_delayed(grouped, shape=(2, np.nan), dtype=LABEL_DTYPE)
 
 
-@dask.delayed
 def _to_csr_matrix(i, j, n):
     """Using i and j as coo-format coordinates, return csr matrix."""
     v = np.ones_like(i)
@@ -123,14 +148,14 @@ def _to_csr_matrix(i, j, n):
     return mat.tocsr()
 
 
-def label_adjacency_graph(labels, structure, nlabels, wrap_axes=None):
+def label_adjacency_mapping(labels, structure, wrap_axes=None):
     """
     Adjacency graph of labels between chunks of ``labels``.
 
     Each chunk in ``labels`` has been labeled independently, and the labels
     in different chunks are guaranteed to be unique.
 
-    Here we construct a graph connecting labels in different chunks that
+    Here we construct a mapping connecting labels in different chunks that
     correspond to the same logical label in the global volume. This is true
     if the two labels "touch" across the block face as defined by the input
     ``structure``.
@@ -141,9 +166,6 @@ def label_adjacency_graph(labels, structure, nlabels, wrap_axes=None):
         The input labeled array, where each chunk is independently labeled.
     structure : array of bool
         Structuring element, shape (3,) * labels.ndim.
-    nlabels : delayed int
-        The total number of labels in ``labels`` *before* correcting for
-        global consistency.
     wrap_axes : tuple of int, optional
         Should labels be wrapped across array boundaries, and if so which axes.
         - (0,) only wrap over the 0th axis.
@@ -152,9 +174,9 @@ def label_adjacency_graph(labels, structure, nlabels, wrap_axes=None):
 
     Returns
     -------
-    mat : delayed scipy.sparse.csr_matrix
-        This matrix has value 1 at (i, j) if label i is connected to
-        label j in the global volume, 0 everywhere else.
+    mat : delayed dict
+        This is a mapping such that if ``mapping[i] = j``, then labels ``i``
+        and ``j`` should be merged.
     """
 
     if structure is None:
@@ -171,10 +193,8 @@ def label_adjacency_graph(labels, structure, nlabels, wrap_axes=None):
         all_mappings.append(mapped)
 
     all_mappings = da.concatenate(all_mappings, axis=1)
-    i, j = all_mappings
-    mat = _to_csr_matrix(i, j, nlabels + 1)
 
-    return mat
+    return all_mappings
 
 
 def _chunk_faces(chunks, shape, structure, wrap_axes=None):
@@ -302,13 +322,98 @@ def block_ndi_label_delayed(block, structure):
     return labeled, n
 
 
-def connected_components_delayed(csr_matrix):
+@dask.delayed(nout=2)
+def connected_components_delayed(all_mappings):
     """
-    Delayed version of scipy.sparse.csgraph.connected_components.
+    Use scipy.sparse.csgraph.connected_components to find the connected
+    components of the graph defined by ``all_mappings``.
 
-    This version only returns the (delayed) connected component labelling, not
-    the number of components.
+    Return the mapping from old labels to new labels, and the number of
+    labels that were collapsed.
     """
-    conn_comp = dask.delayed(scipy.sparse.csgraph.connected_components, nout=2)
-    return da.from_delayed(conn_comp(csr_matrix, directed=False)[1],
-                           shape=(np.nan,), dtype=CONN_COMP_DTYPE)
+
+    if not all_mappings.shape[1]:
+        return {}, 0
+
+    # relabel all_mappings to consecutive integers starting at 1
+    unique_labels, unique_inverse = np.unique(
+        all_mappings,
+        return_inverse=True)
+
+    unique_labels_new = np.arange(
+        0, len(unique_labels), dtype=all_mappings.dtype)
+
+    relabeled_mappings = unique_labels_new[unique_inverse]
+
+    i, j = relabeled_mappings
+    csr_matrix = _to_csr_matrix(i, j, len(unique_labels) + 1)
+
+    conn_comp = scipy.sparse.csgraph.connected_components
+    new_labeling = unique_labels[conn_comp(csr_matrix, directed=False)[1]]
+
+    n_red = len(unique_labels) - len(np.unique(new_labeling)) + 1
+
+    mapping = {k: v for k, v in zip(unique_labels, new_labeling)}
+
+    return mapping, n_red
+
+
+def _encode_label(label, block_id, encoding_dtype=np.uint32):
+    bit_shift = np.iinfo(encoding_dtype).bits // 2
+    nonzero = label != 0
+    label = np.array(label, dtype=encoding_dtype)
+    label[nonzero] = label[nonzero] + (block_id << bit_shift)
+    return label
+
+
+# def _decode_label(encoded_label, encoding_dtype=np.uint32, decoding_dtype=np.uint16):
+#     bit_shift = np.iinfo(encoding_dtype).bits // 2
+#     label = encoded_label & ((1 << bit_shift) - 1)
+#     # block_id = encoded_label >> bit_shift
+#     return label.astype(decoding_dtype)
+
+
+def _make_labels_unique(labels, encoding_dtype=np.uint16):
+    """
+    Make labels unique across blocks by using
+    - lower bits to encode the block ID
+    - higher bits to encode the label ID within the block
+
+    Parameters
+    ----------
+    labels: dask array
+        Array containing labels
+    encoding_dtype: numpy dtype
+        Dtype used to encode the labels.
+        Must be an unsigned integer dtype.
+    """
+
+    assert np.issubdtype(encoding_dtype, np.unsignedinteger), \
+        "encoding_dtype must be an unsigned integer dtype"
+        
+    def _unique_block_labels(
+            block, block_id=None,
+            encoding_dtype=encoding_dtype,
+            numblocks=labels.numblocks
+            ):
+        
+        if block_id is None:
+            block_id = (0,) * block.ndim
+
+        block_id = np.ravel_multi_index(
+            block_id,
+            numblocks,
+            )
+        block = block.astype(encoding_dtype)
+        block = _encode_label(block, block_id, encoding_dtype=encoding_dtype)
+
+        return block
+
+    unique_labels = da.map_blocks(_unique_block_labels,
+                                  labels,
+                                  dtype=encoding_dtype,
+                                  chunks=labels.chunks,
+                                  encoding_dtype=encoding_dtype,
+                                  )
+    
+    return unique_labels
diff --git a/tests/test_dask_image/test_ndmeasure/test_core.py b/tests/test_dask_image/test_ndmeasure/test_core.py
index 3bbdc6df..c2675f45 100644
--- a/tests/test_dask_image/test_ndmeasure/test_core.py
+++ b/tests/test_dask_image/test_ndmeasure/test_core.py
@@ -319,26 +319,36 @@ def _assert_equivalent_labeling(labels0, labels1):
     """
     matching = np.stack((labels0.ravel(), labels1.ravel()), axis=1)
     unique_matching = dask_image.ndmeasure._label._unique_axis(matching)
-    bincount0 = np.bincount(unique_matching[:, 0])
-    bincount1 = np.bincount(unique_matching[:, 1])
-    assert np.all(bincount0 == 1)
-    assert np.all(bincount1 == 1)
+    assert len(np.unique(unique_matching[:, 0])),\
+           len(np.unique(unique_matching[:, 1]))
+
+
+def assert_sequential_labeling(labels):
+    """Assert that the labels are sequential starting at 1.
+
+    I.e. the labels are in {0, 1, 2, ..., N} where 0 is background.
+    """
+    u = np.unique(labels)
+    assert len(u) == 1 + u.max()
 
 
 @pytest.mark.parametrize(
-    "seed, prob, shape, chunks, connectivity", [
-        (42, 0.4, (15, 16), (15, 16), 1),
-        (42, 0.4, (15, 16), (4, 5), 1),
-        (42, 0.4, (15, 16), (4, 5), 2),
-        (42, 0.4, (15, 16), (4, 5), None),
-        (42, 0.4, (15, 16), (8, 5), 1),
-        (42, 0.4, (15, 16), (8, 5), 2),
-        (42, 0.3, (10, 8, 6), (5, 4, 3), 1),
-        (42, 0.3, (10, 8, 6), (5, 4, 3), 2),
-        (42, 0.3, (10, 8, 6), (5, 4, 3), 3),
+    "seed, prob, shape, chunks, connectivity, sequential", [
+        (42, 0.4, (15, 16), (15, 16), 1, False),
+        (42, 0.4, (15, 16), (15, 16), 1, True),
+        (42, 0.4, (15, 16), (4, 5), 1, False),
+        (42, 0.4, (15, 16), (4, 5), 1, True),
+        (42, 0.4, (15, 16), (4, 5), 2, False),
+        (42, 0.4, (15, 16), (4, 5), None, False),
+        (42, 0.4, (15, 16), (8, 5), 1, False),
+        (42, 0.4, (15, 16), (8, 5), 2, False),
+        (42, 0.3, (10, 8, 6), (5, 4, 3), 1, False),
+        (42, 0.3, (10, 8, 6), (5, 4, 3), 1, True),
+        (42, 0.3, (10, 8, 6), (5, 4, 3), 2, False),
+        (42, 0.3, (10, 8, 6), (5, 4, 3), 3, False),
     ]
 )
-def test_label(seed, prob, shape, chunks, connectivity):
+def test_label(seed, prob, shape, chunks, connectivity, sequential):
     np.random.seed(seed)
 
     a = np.random.random(shape) < prob
@@ -350,7 +360,8 @@ def test_label(seed, prob, shape, chunks, connectivity):
         s = scipy.ndimage.generate_binary_structure(a.ndim, connectivity)
 
     a_l, a_nl = scipy.ndimage.label(a, s)
-    d_l, d_nl = dask_image.ndmeasure.label(d, s)
+    d_l, d_nl = dask_image.ndmeasure.label(
+        d, s, produce_sequential_labels=sequential)
 
     assert a_nl == d_nl.compute()
 
@@ -358,6 +369,9 @@ def test_label(seed, prob, shape, chunks, connectivity):
     assert a_l.shape == d_l.shape
     _assert_equivalent_labeling(a_l, d_l.compute())
 
+    if sequential:
+        assert_sequential_labeling(d_l.compute())
+
 
 a = np.array(
     [
@@ -854,3 +868,20 @@ def func_min_max(val):
                 assert d_r[i].compute() is None
             else:
                 assert np.allclose(a_r[i], d_r[i].compute(), equal_nan=True)
+
+
+def test_label_encoding():
+    for label in [0, 10]:
+        for block_id in [0, 1]:
+            for encoding_dtype in [np.uint8, np.uint32]:
+                bit_shift = np.iinfo(encoding_dtype).bits // 2
+                encoded = dask_image.ndmeasure._label._encode_label(
+                            label, block_id, encoding_dtype=encoding_dtype
+                        )
+                assert encoded.dtype == encoding_dtype
+                label_d, block_id_d = \
+                    dask_image.ndmeasure._label._decode_label(
+                        encoded, encoding_dtype=encoding_dtype
+                    )
+                assert label == label_d
+                assert block_id == block_id_d

From 55e393656ac66516779d91551bf3ebd224320879 Mon Sep 17 00:00:00 2001
From: Marvin Albert <marvin.albert@gmail.com>
Date: Tue, 9 Sep 2025 16:42:23 +0200
Subject: [PATCH 2/6] Refactor counting of remapped labels

---
 dask_image/ndmeasure/__init__.py      | 14 ++++++--------
 dask_image/ndmeasure/_utils/_label.py | 15 ++++++++-------
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/dask_image/ndmeasure/__init__.py b/dask_image/ndmeasure/__init__.py
index 0829f4f5..7ffeac21 100644
--- a/dask_image/ndmeasure/__init__.py
+++ b/dask_image/ndmeasure/__init__.py
@@ -382,13 +382,7 @@ def label(
     all_mappings = _label.label_adjacency_mapping(
         block_labeled_unique, structure, wrap_axes=wrap_axes
     )
-    mapping, n_red = _label.connected_components_delayed(all_mappings)
-
-    n_red = da.from_delayed(
-        n_red,
-        shape=(),
-        dtype=np.uint16
-    )
+    mapping = _label.connected_components_delayed(all_mappings)
 
     relabeled = _label.relabel_blocks(
         block_labeled_unique, mapping
@@ -410,7 +404,11 @@ def count_labels(label_block):
             chunks=(1, ) * block_labeled.ndim
         ).sum()
 
-        n = total - n_red
+        n = total - da.from_delayed(
+            delayed(_label.count_n_of_collapsed_labels)(mapping),
+            shape=(),
+            dtype=np.uint64
+        )
 
     return (relabeled, n)
 
diff --git a/dask_image/ndmeasure/_utils/_label.py b/dask_image/ndmeasure/_utils/_label.py
index 4a6cc6d8..56cb93ed 100644
--- a/dask_image/ndmeasure/_utils/_label.py
+++ b/dask_image/ndmeasure/_utils/_label.py
@@ -322,18 +322,17 @@ def block_ndi_label_delayed(block, structure):
     return labeled, n
 
 
-@dask.delayed(nout=2)
+@dask.delayed
 def connected_components_delayed(all_mappings):
     """
     Use scipy.sparse.csgraph.connected_components to find the connected
     components of the graph defined by ``all_mappings``.
 
-    Return the mapping from old labels to new labels, and the number of
-    labels that were collapsed.
+    Return the mapping from old labels to new labels.
     """
 
     if not all_mappings.shape[1]:
-        return {}, 0
+        return {}
 
     # relabel all_mappings to consecutive integers starting at 1
     unique_labels, unique_inverse = np.unique(
@@ -351,11 +350,13 @@ def connected_components_delayed(all_mappings):
     conn_comp = scipy.sparse.csgraph.connected_components
     new_labeling = unique_labels[conn_comp(csr_matrix, directed=False)[1]]
 
-    n_red = len(unique_labels) - len(np.unique(new_labeling)) + 1
-
     mapping = {k: v for k, v in zip(unique_labels, new_labeling)}
 
-    return mapping, n_red
+    return mapping
+
+
+def count_n_of_collapsed_labels(mapping):
+    return len(mapping.keys()) - len(set(mapping.values()))
 
 
 def _encode_label(label, block_id, encoding_dtype=np.uint32):

From d64519c13557efbb333884c079db99b7e5fc7ce7 Mon Sep 17 00:00:00 2001
From: Marvin Albert <marvin.albert@gmail.com>
Date: Tue, 9 Sep 2025 20:58:46 +0200
Subject: [PATCH 3/6] Introduce new public functions
 merge_labels_across_chunk_boundaries (includes new functionality) and
 relabel_sequential. merge_labels_across_chunk_boundaries. Refactor
 ndmeasure.label to use the new functions. Rewrite relabel_blocks to handle
 sparse and large values relabeling efficiently. Add tests for new
 functionality.

---
 dask_image/ndmeasure/__init__.py              | 141 ++++++++++--
 dask_image/ndmeasure/_utils/_label.py         | 204 +++++++++++++-----
 .../test_ndmeasure/test_core.py               | 122 +++++++++--
 3 files changed, 377 insertions(+), 90 deletions(-)

diff --git a/dask_image/ndmeasure/__init__.py b/dask_image/ndmeasure/__init__.py
index 7ffeac21..ce0f8d65 100644
--- a/dask_image/ndmeasure/__init__.py
+++ b/dask_image/ndmeasure/__init__.py
@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 
-import collections
 import functools
 import operator
+import collections
 import warnings
 from dask import compute, delayed
 import dask.config as dask_config
@@ -370,47 +370,148 @@ def label(
         dtype=_label.LABEL_DTYPE,
     )
 
+    merged_labels_dict = merge_labels_across_chunk_boundaries(
+        block_labeled,
+        overlap_depth=0,
+        structure=structure,
+        wrap_axes=wrap_axes,
+        produce_sequential_labels=False,
+        )
+    
+    relabeled = merged_labels_dict['labels']
+    mapping = merged_labels_dict['mapping']
+
+    if produce_sequential_labels:
+        relabeled = relabel_sequential(relabeled)
+        n = relabeled.max()
+        
+    else:
+        def count_labels(label_block):
+            return len(np.unique(label_block[label_block > 0]))
+
+        total = block_labeled.map_blocks(
+            lambda x: np.ones((1, ) * x.ndim, dtype=np.uint64) * count_labels(x),
+            dtype=np.uint64,
+            chunks=(1, ) * block_labeled.ndim
+        ).sum()
+
+        n = total - da.from_delayed(
+            delayed(_label.count_n_of_collapsed_labels)(mapping),
+            shape=(),
+            dtype=np.uint64
+        )
+
+    return (relabeled, n)
+
+
+def merge_labels_across_chunk_boundaries(
+        labels,
+        overlap_depth=0,
+        iou_threshold=0.8,
+        structure=None,
+        wrap_axes=None,
+        trim_overlap=True,
+        produce_sequential_labels=True,
+        ):
+    """
+    Merge labels across chunk boundaries.
+
+    Each chunk in ``labels`` has been labeled independently, and the labels
+    in different chunks may overlap. This function tries to merge labels across
+    chunk boundaries using a strategy dependent on ``overlap``:
+    - If ``overlap > 0``, the overlap region between chunks is used to
+      determine which between each pair of chunks should be merged.
+    - If ``overlap == 0``, labels that touch across chunk boundaries are merged.
+
+    Parameters
+    ----------
+    labels : dask array of int
+        The input labeled array, where each chunk is independently labeled.
+    overlap_depth : int, optional
+        The size of the overlap region between chunks, e.g. as produced by
+        `dask.array.overlap` or `map_overlap`. Default is 0.
+    iou_threshold : float, optional
+        If ``overlap > 0``, the intersection-over-union (IoU) between labels
+        in the overlap region is used to determine which labels should be
+        merged. If the IoU between two labels is greater than ``iou_threshold``,
+        they are merged. Default is 0.8.
+    structure : array of bool, optional
+        Structuring element for determining connectivity. If None, a
+        cross-shaped structuring element is used.
+    wrap_axes : tuple of int, optional
+        Should labels be wrapped across array boundaries, and if so which axes.
+        - (0,) only wrap over the 0th axis.
+        - (0, 1) wrap over the 0th and 1st axis.
+        - (0, 1, 3)  wrap over 0th, 1st and 3rd axis.
+    trim_overlap : bool, optional
+        If True, the overlap regions are trimmed from the output labels.
+        Default is True.
+
+    Returns
+    -------
+    dict with keys 'labels' and 'mapping'
+        'labels': dask array of int
+            The relabeled array, where labels have been merged across chunk
+            boundaries.
+        'mapping': dict
+            A mapping from old labels to new labels.
+    """
+
     # Make labels unique across blocks by encoding the block ID into the labels.
     # We use half the bits to encode the block ID and half the bits to encode
     # the label ID within the block.
     block_labeled_unique = _label._make_labels_unique(
-        block_labeled, encoding_dtype=np.uint32)
+        labels, encoding_dtype=np.uint32)
 
     # Now, build a label mapping that groups labels across blocks.
     # We use this mapping to find connected components and then relabel each
     # block according to those.
     all_mappings = _label.label_adjacency_mapping(
-        block_labeled_unique, structure, wrap_axes=wrap_axes
+        block_labeled_unique,
+        structure,
+        wrap_axes=wrap_axes,
+        iou_threshold=iou_threshold,
+        overlap_depth=overlap_depth,
     )
+
     mapping = _label.connected_components_delayed(all_mappings)
 
-    relabeled = _label.relabel_blocks(
+    relabeled = _label.relabel(
         block_labeled_unique, mapping
     )
 
     relabeled = relabeled.astype(_label.LABEL_DTYPE)
 
+    if trim_overlap and overlap_depth > 0:
+        relabeled = da.overlap.trim_overlap(
+            relabeled, {i: overlap_depth for i in range(relabeled.ndim)},
+            boundary='none',
+        )
+
     if produce_sequential_labels:
-        relabeled = _label.relabel_sequential(relabeled)
-        n = relabeled.max()
-        
-    else:
-        def count_labels(label_block):
-            return len(np.unique(label_block[label_block > 0]))
+        relabeled = relabel_sequential(relabeled)
 
-        total = block_labeled.map_blocks(
-            lambda x: np.ones((1, ) * x.ndim, dtype=np.uint64) * count_labels(x),
-            dtype=np.uint64,
-            chunks=(1, ) * block_labeled.ndim
-        ).sum()
+    return {
+        'labels': relabeled,
+        'mapping': mapping,
+    }
 
-        n = total - da.from_delayed(
-            delayed(_label.count_n_of_collapsed_labels)(mapping),
-            shape=(),
-            dtype=np.uint64
+
+def relabel_sequential(labels):
+    """
+    Relabel the labels in ``labels`` to be sequential starting at 1.
+    """
+
+    u = da.unique(
+        da.concatenate(
+            [da.unique(b) for b in labels.blocks]
+            )
         )
+    mapping = delayed(lambda x: {k: v for k, v in zip(x, np.arange(len(x)))})(u)
 
-    return (relabeled, n)
+    relabeled = _label.relabel(labels, mapping)
+
+    return relabeled
 
 
 def labeled_comprehension(image,
diff --git a/dask_image/ndmeasure/_utils/_label.py b/dask_image/ndmeasure/_utils/_label.py
index 56cb93ed..48276ef2 100644
--- a/dask_image/ndmeasure/_utils/_label.py
+++ b/dask_image/ndmeasure/_utils/_label.py
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
-
+import functools
 import operator
-
 import dask
 import dask.array as da
 import numpy as np
@@ -26,15 +25,15 @@ def _get_connected_components_dtype():
 CONN_COMP_DTYPE = _get_connected_components_dtype()
 
 
-def relabel_blocks(block_labeled, mapping):
+def relabel(labels, mapping):
     """
-    Relabel a block-labeled array based on ``mapping``.
+    Relabel a label array based on ``mapping``.
 
     Parameters
     ----------
-    block_labeled : array of int
+    labels : array of int
         The input label array.
-    mapping : dict
+    mapping : (delayed) dict
         A label mapping, such that ``mapping[i] = j`` implies that
         any element in ``array`` valued ``i`` should be relabeled to ``j``.
 
@@ -43,36 +42,34 @@ def relabel_blocks(block_labeled, mapping):
     relabeled : array of int, same shape as ``array``
         The relabeled input array.
     """
-    
-    def relabel_block(block, mapping):
-        # pandas replace works well for both dense and sparse mappings
-        # including large integer labels
-        s = pd.Series(block.flatten())
-        result = s.replace(mapping).to_numpy()
-        return result.reshape(block.shape)
 
-    relabeled = block_labeled.map_blocks(
-        relabel_block,
-        mapping=mapping,
-        dtype=block_labeled.dtype
-    )
+    relabeled = np.empty(labels.numblocks, dtype=object)
 
-    return relabeled
+    block_iter = zip(
+        np.ndindex(*labels.numblocks),
+        map(functools.partial(operator.getitem, labels),
+            da.core.slices_from_chunks(labels.chunks))
+    )
 
+    def relabel_block(block, mapping):
+        # Convert block to pandas Series for mapping
+        s = pd.Series(block.flatten())
+        # Vectorized lookup using reindex
+        relabeled_block = s.replace(mapping).to_numpy().reshape(block.shape)
+        return relabeled_block
 
-def relabel_sequential(labels):
-    """
-    Relabel the labels in ``labels`` to be sequential starting at 1.
-    """
+    for index, input_block in block_iter:
 
-    u = da.unique(
-        da.concatenate(
-            [da.unique(b) for b in labels.blocks]
-            )
+        relabeled_block = da.from_delayed(
+            dask.delayed(relabel_block)(input_block, mapping),
+            shape=input_block.shape,
+            dtype=input_block.dtype
         )
-    mapping = dask.delayed(lambda x: {k: v for k, v in zip(x, np.arange(len(x)))})(u)
 
-    relabeled = relabel_blocks(labels, mapping)
+        relabeled[index] = relabeled_block
+
+    # Put all the blocks together
+    relabeled = da.block(relabeled.tolist())
 
     return relabeled
 
@@ -86,7 +83,12 @@ def _unique_axis(a, axis=0):
     return r
 
 
-def _across_block_label_grouping(face, structure):
+def _across_block_label_grouping(
+        face, structure,
+        overlap_depth,
+        face_dims,
+        iou_threshold,
+        ):
     """
     Find a grouping of labels across block faces.
 
@@ -101,6 +103,15 @@ def _across_block_label_grouping(face, structure):
         Structuring element for the labeling of the face. This should have
         length 3 along each axis and have the same number of dimensions as
         ``face``.
+    overlap_depth : int
+        The depth of the overlap between blocks.
+    face_dims : list of int
+        The dimensions along which the face extends.
+    iou_threshold : float, optional
+        If ``overlap_depth > 0``, the intersection-over-union (IoU) between labels
+        in the overlap region is used to determine which labels should be
+        merged. If the IoU between two labels is greater than ``iou_threshold``,
+        they are merged. Default is 0.8.
 
     Returns
     -------
@@ -121,23 +132,79 @@ def _across_block_label_grouping(face, structure):
     This shows that 1-2 are connected, 2-7 are connected, and 8-9 are
     connected. The resulting graph is (1-2-7), (8-9).
     """
-    common_labels = scipy.ndimage.label(face, structure)[0]
-    matching = np.stack((common_labels.ravel(), face.ravel()), axis=1)
-    unique_matching = _unique_axis(matching)
-    valid = np.all(unique_matching, axis=1)
-    unique_valid_matching = unique_matching[valid]
-    common_labels, labels = unique_valid_matching.T
-    in_group = np.flatnonzero(np.diff(common_labels) == 0)
-    i = np.take(labels, in_group)
-    j = np.take(labels, in_group + 1)
-    grouped = np.stack((i, j), axis=0)
+
+    if not overlap_depth:
+
+        # merge touching labels
+        
+        common_labels = scipy.ndimage.label(face, structure)[0]
+        # when processing labels that don't come from ndimage.label, we need to
+        # ensure equal dtypes
+        common_labels = common_labels.astype(face.dtype)
+        matching = np.stack((common_labels.ravel(), face.ravel()), axis=1)
+        unique_matching = _unique_axis(matching)
+        valid = np.all(unique_matching, axis=1)
+        unique_valid_matching = unique_matching[valid]
+        common_labels, labels = unique_valid_matching.T
+        in_group = np.flatnonzero(np.diff(common_labels) == 0)
+        i = np.take(labels, in_group)
+        j = np.take(labels, in_group + 1)
+        grouped = np.stack((i, j), axis=0)
+
+    else:
+
+        # consider labels as grouped if intersection over union
+        # is higher than the given threshold
+
+        # overlap comparison
+        slice1 = [slice(None)] * len(face.shape)
+        slice2 = [slice(None)] * len(face.shape)
+        for dim in range(face.ndim):
+            if dim in face_dims:
+                # slice1[dim] = slice(-2 * overlap_depth, None)
+                # slice2[dim] = slice(0, 2 * overlap_depth)
+                slice1[dim] = slice(-1 * overlap_depth, None)
+                slice2[dim] = slice(0, 1 * overlap_depth)
+            else:
+                slice1[dim] = slice(None)
+                slice2[dim] = slice(None)
+
+        # get IoU based matching
+
+        face1 = face[tuple(slice1)]
+        face2 = face[tuple(slice2)]
+
+        # get IoU between all labels in face1 and face2
+        # consider only already overlapping labels
+        label_pairs = np.stack((face1.ravel(), face2.ravel()), axis=1)
+        unique_label_pairs = _unique_axis(label_pairs)
+        valid = np.all(unique_label_pairs > 0, axis=1)
+        unique_valid_label_pairs = unique_label_pairs[valid]
+        ilabels1, ilabels2 = unique_valid_label_pairs.T
+
+        matching_pairs = []
+        for l1 in ilabels1:
+            for l2 in ilabels2:
+                intersection = np.sum((face1 == l1) * (face2 == l2))
+                if intersection == 0:
+                    continue
+                union = np.sum(face1 == l1) + np.sum(face2 == l2) - intersection
+                iou = intersection / union
+                if iou > iou_threshold:
+                    matching_pairs.append((l1, l2))
+
+        grouped = np.array(matching_pairs).T if len(matching_pairs) > 0\
+            else np.zeros((2, 0), dtype=face.dtype)
+        
+        # import pdb; pdb.set_trace()
+
     return grouped
 
 
-def _across_block_label_grouping_delayed(face, structure):
+def _across_block_label_grouping_delayed(**across_block_label_grouping_kwargs):
     """Delayed version of :func:`_across_block_label_grouping`."""
     _across_block_label_grouping_ = dask.delayed(_across_block_label_grouping)
-    grouped = _across_block_label_grouping_(face, structure)
+    grouped = _across_block_label_grouping_(**across_block_label_grouping_kwargs)
     return da.from_delayed(grouped, shape=(2, np.nan), dtype=LABEL_DTYPE)
 
 
@@ -148,7 +215,13 @@ def _to_csr_matrix(i, j, n):
     return mat.tocsr()
 
 
-def label_adjacency_mapping(labels, structure, wrap_axes=None):
+def label_adjacency_mapping(
+        labels,
+        structure=None,
+        wrap_axes=None,
+        overlap_depth=None,
+        iou_threshold=0.8,
+    ):
     """
     Adjacency graph of labels between chunks of ``labels``.
 
@@ -171,6 +244,7 @@ def label_adjacency_mapping(labels, structure, wrap_axes=None):
         - (0,) only wrap over the 0th axis.
         - (0, 1) wrap over the 0th and 1st axis.
         - (0, 1, 3)  wrap over 0th, 1st and 3rd axis.
+    overlap_depth : int, optional
 
     Returns
     -------
@@ -182,14 +256,21 @@ def label_adjacency_mapping(labels, structure, wrap_axes=None):
     if structure is None:
         structure = scipy.ndimage.generate_binary_structure(labels.ndim, 1)
 
-    face_slices = _chunk_faces(
+    face_slice_infos = _chunk_faces(
         labels.chunks, labels.shape, structure, wrap_axes=wrap_axes
     )
     all_mappings = [da.empty((2, 0), dtype=LABEL_DTYPE, chunks=1)]
 
-    for face_slice in face_slices:
-        face = labels[face_slice]
-        mapped = _across_block_label_grouping_delayed(face, structure)
+    for face_slice_info in face_slice_infos:
+
+        face = labels[face_slice_info['slice']]
+        mapped = _across_block_label_grouping_delayed(
+            face=face,
+            structure=structure,
+            overlap_depth=overlap_depth,
+            face_dims=face_slice_info['dims'],
+            iou_threshold=iou_threshold
+        )
         all_mappings.append(mapped)
 
     all_mappings = da.concatenate(all_mappings, axis=1)
@@ -197,7 +278,10 @@ def label_adjacency_mapping(labels, structure, wrap_axes=None):
     return all_mappings
 
 
-def _chunk_faces(chunks, shape, structure, wrap_axes=None):
+def _chunk_faces(
+    chunks, shape, structure,
+    wrap_axes=None, overlap_depth=0
+    ):
     """
     Return slices for two-pixel-wide boundaries between chunks.
 
@@ -214,6 +298,8 @@ def _chunk_faces(chunks, shape, structure, wrap_axes=None):
         - (0,) only wrap over the 0th axis.
         - (0, 1) wrap over the 0th and 1st axis.
         - (0, 1, 3)  wrap over 0th, 1st and 3rd axis.
+    overlap_depth : int, optional
+        The depth of the overlap between blocks.
 
     Yields
     -------
@@ -279,6 +365,7 @@ def _chunk_faces(chunks, shape, structure, wrap_axes=None):
             ind_curr_block = block_summary[tuple(curr_block)]
 
             curr_slice = []
+            curr_slice_dims = []
             for dim in range(ndim):
                 # keep slice if not on boundary
                 if neigh_block[dim] == curr_block[dim]:
@@ -288,11 +375,20 @@ def _chunk_faces(chunks, shape, structure, wrap_axes=None):
                     if slices[ind_curr_block][dim].stop == shape[dim]:
                         curr_slice.append(slice(None, None, shape[dim] - 1))
                     else:
-                        curr_slice.append(slice(
-                            slices[ind_curr_block][dim].stop - 1,
-                            slices[ind_curr_block][dim].stop + 1))
-
-            yield tuple(curr_slice)
+                        if overlap_depth > 0:
+                            curr_slice.append(slice(
+                                slices[ind_curr_block][dim].stop - 1 * overlap_depth,
+                                slices[ind_curr_block][dim].stop + 1 * overlap_depth))
+                        else:
+                            curr_slice.append(slice(
+                                slices[ind_curr_block][dim].stop - 1,
+                                slices[ind_curr_block][dim].stop + 1))
+                    curr_slice_dims.append(dim)
+
+            yield {
+                'slice': tuple(curr_slice),
+                'dims': curr_slice_dims
+            }
 
 
 def block_ndi_label_delayed(block, structure):
@@ -345,7 +441,7 @@ def connected_components_delayed(all_mappings):
     relabeled_mappings = unique_labels_new[unique_inverse]
 
     i, j = relabeled_mappings
-    csr_matrix = _to_csr_matrix(i, j, len(unique_labels) + 1)
+    csr_matrix = _to_csr_matrix(i, j, len(unique_labels))
 
     conn_comp = scipy.sparse.csgraph.connected_components
     new_labeling = unique_labels[conn_comp(csr_matrix, directed=False)[1]]
diff --git a/tests/test_dask_image/test_ndmeasure/test_core.py b/tests/test_dask_image/test_ndmeasure/test_core.py
index c2675f45..ec627e04 100644
--- a/tests/test_dask_image/test_ndmeasure/test_core.py
+++ b/tests/test_dask_image/test_ndmeasure/test_core.py
@@ -319,7 +319,7 @@ def _assert_equivalent_labeling(labels0, labels1):
     """
     matching = np.stack((labels0.ravel(), labels1.ravel()), axis=1)
     unique_matching = dask_image.ndmeasure._label._unique_axis(matching)
-    assert len(np.unique(unique_matching[:, 0])),\
+    assert len(np.unique(unique_matching[:, 0])) == \
            len(np.unique(unique_matching[:, 1]))
 
 
@@ -870,18 +870,108 @@ def func_min_max(val):
                 assert np.allclose(a_r[i], d_r[i].compute(), equal_nan=True)
 
 
-def test_label_encoding():
-    for label in [0, 10]:
-        for block_id in [0, 1]:
-            for encoding_dtype in [np.uint8, np.uint32]:
-                bit_shift = np.iinfo(encoding_dtype).bits // 2
-                encoded = dask_image.ndmeasure._label._encode_label(
-                            label, block_id, encoding_dtype=encoding_dtype
-                        )
-                assert encoded.dtype == encoding_dtype
-                label_d, block_id_d = \
-                    dask_image.ndmeasure._label._decode_label(
-                        encoded, encoding_dtype=encoding_dtype
-                    )
-                assert label == label_d
-                assert block_id == block_id_d
+def test_make_labels_unique():
+
+    np.random.seed(42)
+    labels = da.random.randint(0, 1, size=(6, 6), chunks=(3, 3))
+    unique_labels = dask_image.ndmeasure._label._make_labels_unique(labels)
+
+    assert unique_labels.shape == labels.shape
+    assert len(np.unique(unique_labels.compute())) >= \
+           len(np.unique(labels.compute()))
+
+
+@pytest.mark.parametrize(
+        "ndim, overlap_depth, produce_sequential_labels",
+        [(1, 0, False),
+         (1, 1, True),
+         (2, 0, False),
+         (2, 1, True),
+         (3, 0, False),
+         (3, 1, True)]
+)
+def test_merge_labels_across_chunk_boundaries(
+    ndim, overlap_depth, produce_sequential_labels
+):
+
+    # create a segmentation ground truth
+    # e.g. in 2d:
+    # 0 0 0 0 0 0
+    # 0 1 1 1 1 0
+    # 0 1 1 1 1 0
+    # 0 2 2 2 2 0
+    # 0 2 2 2 2 0
+    # 0 0 0 0 0 0
+
+    im = np.zeros((6, ) * ndim, dtype=np.uint16)
+    im[tuple([slice(1, -1)] * ndim)] = 1
+
+    # along dimension 0 introduce two "instances"
+    # with labels 1 and 2
+    # along the remaining dimensions the labels don't change
+
+    dim = da.from_array(im, chunks=(3, ) * ndim)
+
+    def label_block_dim0(x, block_id=None):
+        return x * (block_id[0] + 1)
+
+    dim = dim.map_blocks(
+        label_block_dim0,
+        dtype=dim.dtype,
+    )
+
+    # simulate a segmentation method which works
+    # perfectly within each chunk, but cannot
+    # guarantee object identities across chunks.
+    # the segmentation is applied with different
+    # overlap depths
+
+    # input as seen by the segmentation method
+    dim_chunkwise_view = da.overlap.overlap(
+        dim,
+        depth={i: overlap_depth for i in range(ndim)},
+        boundary='none',
+    )
+
+    # simulate instance label ids that vary over chunks
+    random_factor_per_chunk = da.random.randint(
+        0,
+        1000,
+        size=dim_chunkwise_view.numblocks,
+        chunks=(1, ) * ndim,
+    )
+
+    dim_chunkwise_segmentation = da.map_blocks(
+            lambda x, y: x * y,
+            dim_chunkwise_view,
+            random_factor_per_chunk,
+            chunks=dim_chunkwise_view.chunks,
+            dtype=dim.dtype,
+        )
+
+    # merge labels across chunk boundaries
+    dim_segmentation_merged = dask_image.ndmeasure\
+        .merge_labels_across_chunk_boundaries(
+            dim_chunkwise_segmentation,
+            overlap_depth=overlap_depth
+        )['labels']
+
+    dim_segmentation_merged_c = \
+        dim_segmentation_merged.compute(scheduler='single-threaded')
+
+    if not overlap_depth:
+        # merging labels with zero overlap should merge all labels
+        # into a single connected component
+        _assert_equivalent_labeling(
+            dim_segmentation_merged_c,
+            scipy.ndimage.label(dim > 0)[0]
+        )
+    else:
+        # merging labels with overlap recovers the ground truth
+        _assert_equivalent_labeling(
+            dim.compute(),
+            dim_segmentation_merged_c
+        )
+
+    if produce_sequential_labels:
+        assert_sequential_labeling(dim_segmentation_merged_c)

From 72e7f200a6d61e528b8e9508bd110dfe9a68aeb4 Mon Sep 17 00:00:00 2001
From: Marvin Albert <marvin.albert@gmail.com>
Date: Tue, 9 Sep 2025 21:40:04 +0200
Subject: [PATCH 4/6] Fix wrong overlap_depth handling

---
 dask_image/ndmeasure/_utils/_label.py           | 17 ++++++++---------
 .../test_dask_image/test_ndmeasure/test_core.py | 17 ++++++++++-------
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/dask_image/ndmeasure/_utils/_label.py b/dask_image/ndmeasure/_utils/_label.py
index 48276ef2..73c095ec 100644
--- a/dask_image/ndmeasure/_utils/_label.py
+++ b/dask_image/ndmeasure/_utils/_label.py
@@ -161,10 +161,10 @@ def _across_block_label_grouping(
         slice2 = [slice(None)] * len(face.shape)
         for dim in range(face.ndim):
             if dim in face_dims:
-                # slice1[dim] = slice(-2 * overlap_depth, None)
-                # slice2[dim] = slice(0, 2 * overlap_depth)
-                slice1[dim] = slice(-1 * overlap_depth, None)
-                slice2[dim] = slice(0, 1 * overlap_depth)
+                slice1[dim] = slice(-2 * overlap_depth, None)
+                slice2[dim] = slice(0, 2 * overlap_depth)
+                # slice1[dim] = slice(-1 * overlap_depth, None)
+                # slice2[dim] = slice(0, 1 * overlap_depth)
             else:
                 slice1[dim] = slice(None)
                 slice2[dim] = slice(None)
@@ -195,8 +195,6 @@ def _across_block_label_grouping(
 
         grouped = np.array(matching_pairs).T if len(matching_pairs) > 0\
             else np.zeros((2, 0), dtype=face.dtype)
-        
-        # import pdb; pdb.set_trace()
 
     return grouped
 
@@ -257,7 +255,8 @@ def label_adjacency_mapping(
         structure = scipy.ndimage.generate_binary_structure(labels.ndim, 1)
 
     face_slice_infos = _chunk_faces(
-        labels.chunks, labels.shape, structure, wrap_axes=wrap_axes
+        labels.chunks, labels.shape, structure,
+        wrap_axes=wrap_axes, overlap_depth=overlap_depth
     )
     all_mappings = [da.empty((2, 0), dtype=LABEL_DTYPE, chunks=1)]
 
@@ -377,8 +376,8 @@ def _chunk_faces(
                     else:
                         if overlap_depth > 0:
                             curr_slice.append(slice(
-                                slices[ind_curr_block][dim].stop - 1 * overlap_depth,
-                                slices[ind_curr_block][dim].stop + 1 * overlap_depth))
+                                slices[ind_curr_block][dim].stop - 2 * overlap_depth,
+                                slices[ind_curr_block][dim].stop + 2 * overlap_depth))
                         else:
                             curr_slice.append(slice(
                                 slices[ind_curr_block][dim].stop - 1,
diff --git a/tests/test_dask_image/test_ndmeasure/test_core.py b/tests/test_dask_image/test_ndmeasure/test_core.py
index ec627e04..3fc2dc86 100644
--- a/tests/test_dask_image/test_ndmeasure/test_core.py
+++ b/tests/test_dask_image/test_ndmeasure/test_core.py
@@ -883,12 +883,14 @@ def test_make_labels_unique():
 
 @pytest.mark.parametrize(
         "ndim, overlap_depth, produce_sequential_labels",
-        [(1, 0, False),
-         (1, 1, True),
-         (2, 0, False),
-         (2, 1, True),
-         (3, 0, False),
-         (3, 1, True)]
+        [
+            (1, 0, False),
+            (1, 1, True),
+            (2, 0, False),
+            (2, 1, True),
+            (3, 0, False),
+            (3, 1, True),
+        ]
 )
 def test_merge_labels_across_chunk_boundaries(
     ndim, overlap_depth, produce_sequential_labels
@@ -953,7 +955,8 @@ def label_block_dim0(x, block_id=None):
     dim_segmentation_merged = dask_image.ndmeasure\
         .merge_labels_across_chunk_boundaries(
             dim_chunkwise_segmentation,
-            overlap_depth=overlap_depth
+            overlap_depth=overlap_depth,
+            iou_threshold=0,
         )['labels']
 
     dim_segmentation_merged_c = \

From d2a837dcc3978e5e5226a73e6a7497f5f6281fe6 Mon Sep 17 00:00:00 2001
From: Marvin Albert <marvin.albert@gmail.com>
Date: Wed, 10 Sep 2025 14:25:29 +0200
Subject: [PATCH 5/6] Fix flake8

---
 dask_image/ndmeasure/__init__.py      | 28 ++++++++-------
 dask_image/ndmeasure/_utils/_label.py | 50 +++++++++++++++------------
 2 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/dask_image/ndmeasure/__init__.py b/dask_image/ndmeasure/__init__.py
index ce0f8d65..174b0e12 100644
--- a/dask_image/ndmeasure/__init__.py
+++ b/dask_image/ndmeasure/__init__.py
@@ -349,8 +349,9 @@ def label(
         - (0, 1) wrap across the 0th and 1st axis.
         - (0, 1, 3)  wrap across 0th, 1st and 3rd axis.
     produce_sequential_labels : bool, optional
-        If False, the returned labels are not contiguous (i.e. 0, 1, 6045, 54654).
-        This is the default behavior and has advantages for parallelism.
+        If False, the returned labels are not contiguous
+        (i.e. 0, 1, 6045, 54654). This is the default behavior
+        and has advantages for parallelism.
         If True, the labels are relabeled to be sequential
         and contiguous from 1 to the number of features.
 
@@ -377,20 +378,21 @@ def label(
         wrap_axes=wrap_axes,
         produce_sequential_labels=False,
         )
-    
+
     relabeled = merged_labels_dict['labels']
     mapping = merged_labels_dict['mapping']
 
     if produce_sequential_labels:
         relabeled = relabel_sequential(relabeled)
         n = relabeled.max()
-        
+
     else:
         def count_labels(label_block):
             return len(np.unique(label_block[label_block > 0]))
 
         total = block_labeled.map_blocks(
-            lambda x: np.ones((1, ) * x.ndim, dtype=np.uint64) * count_labels(x),
+            lambda x: np.ones(
+                (1, ) * x.ndim, dtype=np.uint64) * count_labels(x),
             dtype=np.uint64,
             chunks=(1, ) * block_labeled.ndim
         ).sum()
@@ -421,7 +423,8 @@ def merge_labels_across_chunk_boundaries(
     chunk boundaries using a strategy dependent on ``overlap``:
     - If ``overlap > 0``, the overlap region between chunks is used to
       determine which between each pair of chunks should be merged.
-    - If ``overlap == 0``, labels that touch across chunk boundaries are merged.
+    - If ``overlap == 0``, labels that touch across chunk boundaries
+    are merged.
 
     Parameters
     ----------
@@ -433,8 +436,8 @@ def merge_labels_across_chunk_boundaries(
     iou_threshold : float, optional
         If ``overlap > 0``, the intersection-over-union (IoU) between labels
         in the overlap region is used to determine which labels should be
-        merged. If the IoU between two labels is greater than ``iou_threshold``,
-        they are merged. Default is 0.8.
+        merged. If the IoU between two labels is greater than
+        ``iou_threshold``, they are merged. Default is 0.8.
     structure : array of bool, optional
         Structuring element for determining connectivity. If None, a
         cross-shaped structuring element is used.
@@ -457,9 +460,9 @@ def merge_labels_across_chunk_boundaries(
             A mapping from old labels to new labels.
     """
 
-    # Make labels unique across blocks by encoding the block ID into the labels.
-    # We use half the bits to encode the block ID and half the bits to encode
-    # the label ID within the block.
+    # Make labels unique across blocks by encoding the block ID into
+    # the labels. We use half the bits to encode the block ID and half the
+    # bits to encode the label ID within the block.
     block_labeled_unique = _label._make_labels_unique(
         labels, encoding_dtype=np.uint32)
 
@@ -507,7 +510,8 @@ def relabel_sequential(labels):
             [da.unique(b) for b in labels.blocks]
             )
         )
-    mapping = delayed(lambda x: {k: v for k, v in zip(x, np.arange(len(x)))})(u)
+    mapping = delayed(
+        lambda x: {k: v for k, v in zip(x, np.arange(len(x)))})(u)
 
     relabeled = _label.relabel(labels, mapping)
 
diff --git a/dask_image/ndmeasure/_utils/_label.py b/dask_image/ndmeasure/_utils/_label.py
index 73c095ec..6ede1837 100644
--- a/dask_image/ndmeasure/_utils/_label.py
+++ b/dask_image/ndmeasure/_utils/_label.py
@@ -108,10 +108,10 @@ def _across_block_label_grouping(
     face_dims : list of int
         The dimensions along which the face extends.
     iou_threshold : float, optional
-        If ``overlap_depth > 0``, the intersection-over-union (IoU) between labels
-        in the overlap region is used to determine which labels should be
-        merged. If the IoU between two labels is greater than ``iou_threshold``,
-        they are merged. Default is 0.8.
+        If ``overlap_depth > 0``, the intersection-over-union (IoU) between
+        labels in the overlap region is used to determine which labels should
+        be merged. If the IoU between two labels is greater than
+        ``iou_threshold``, they are merged. Default is 0.8.
 
     Returns
     -------
@@ -136,7 +136,7 @@ def _across_block_label_grouping(
     if not overlap_depth:
 
         # merge touching labels
-        
+
         common_labels = scipy.ndimage.label(face, structure)[0]
         # when processing labels that don't come from ndimage.label, we need to
         # ensure equal dtypes
@@ -188,7 +188,8 @@ def _across_block_label_grouping(
                 intersection = np.sum((face1 == l1) * (face2 == l2))
                 if intersection == 0:
                     continue
-                union = np.sum(face1 == l1) + np.sum(face2 == l2) - intersection
+                union = \
+                    np.sum(face1 == l1) + np.sum(face2 == l2) - intersection
                 iou = intersection / union
                 if iou > iou_threshold:
                     matching_pairs.append((l1, l2))
@@ -202,7 +203,8 @@ def _across_block_label_grouping(
 def _across_block_label_grouping_delayed(**across_block_label_grouping_kwargs):
     """Delayed version of :func:`_across_block_label_grouping`."""
     _across_block_label_grouping_ = dask.delayed(_across_block_label_grouping)
-    grouped = _across_block_label_grouping_(**across_block_label_grouping_kwargs)
+    grouped = _across_block_label_grouping_(
+        **across_block_label_grouping_kwargs)
     return da.from_delayed(grouped, shape=(2, np.nan), dtype=LABEL_DTYPE)
 
 
@@ -214,12 +216,12 @@ def _to_csr_matrix(i, j, n):
 
 
 def label_adjacency_mapping(
-        labels,
-        structure=None,
-        wrap_axes=None,
-        overlap_depth=None,
-        iou_threshold=0.8,
-    ):
+            labels,
+            structure=None,
+            wrap_axes=None,
+            overlap_depth=None,
+            iou_threshold=0.8,
+        ):
     """
     Adjacency graph of labels between chunks of ``labels``.
 
@@ -278,9 +280,9 @@ def label_adjacency_mapping(
 
 
 def _chunk_faces(
-    chunks, shape, structure,
-    wrap_axes=None, overlap_depth=0
-    ):
+        chunks, shape, structure,
+        wrap_axes=None, overlap_depth=0
+        ):
     """
     Return slices for two-pixel-wide boundaries between chunks.
 
@@ -376,8 +378,10 @@ def _chunk_faces(
                     else:
                         if overlap_depth > 0:
                             curr_slice.append(slice(
-                                slices[ind_curr_block][dim].stop - 2 * overlap_depth,
-                                slices[ind_curr_block][dim].stop + 2 * overlap_depth))
+                                slices[ind_curr_block][dim].stop -
+                                2 * overlap_depth,
+                                slices[ind_curr_block][dim].stop +
+                                2 * overlap_depth))
                         else:
                             curr_slice.append(slice(
                                 slices[ind_curr_block][dim].stop - 1,
@@ -462,7 +466,9 @@ def _encode_label(label, block_id, encoding_dtype=np.uint32):
     return label
 
 
-# def _decode_label(encoded_label, encoding_dtype=np.uint32, decoding_dtype=np.uint16):
+# def _decode_label(
+#     encoded_label, encoding_dtype=np.uint32, decoding_dtype=np.uint16
+# ):
 #     bit_shift = np.iinfo(encoding_dtype).bits // 2
 #     label = encoded_label & ((1 << bit_shift) - 1)
 #     # block_id = encoded_label >> bit_shift
@@ -486,13 +492,13 @@ def _make_labels_unique(labels, encoding_dtype=np.uint16):
 
     assert np.issubdtype(encoding_dtype, np.unsignedinteger), \
         "encoding_dtype must be an unsigned integer dtype"
-        
+
     def _unique_block_labels(
             block, block_id=None,
             encoding_dtype=encoding_dtype,
             numblocks=labels.numblocks
             ):
-        
+
         if block_id is None:
             block_id = (0,) * block.ndim
 
@@ -511,5 +517,5 @@ def _unique_block_labels(
                                   chunks=labels.chunks,
                                   encoding_dtype=encoding_dtype,
                                   )
-    
+
     return unique_labels

From 66ede85a60ca32956ca6206c1361c4344d3623ee Mon Sep 17 00:00:00 2001
From: Marvin Albert <marvin.albert@gmail.com>
Date: Wed, 10 Sep 2025 14:56:45 +0200
Subject: [PATCH 6/6] Fix compatibility with numpy<2

---
 dask_image/ndmeasure/__init__.py      | 8 +++++---
 dask_image/ndmeasure/_utils/_label.py | 7 +++++--
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/dask_image/ndmeasure/__init__.py b/dask_image/ndmeasure/__init__.py
index 174b0e12..2291dfd4 100644
--- a/dask_image/ndmeasure/__init__.py
+++ b/dask_image/ndmeasure/__init__.py
@@ -391,18 +391,20 @@ def count_labels(label_block):
             return len(np.unique(label_block[label_block > 0]))
 
         total = block_labeled.map_blocks(
-            lambda x: np.ones(
-                (1, ) * x.ndim, dtype=np.uint64) * count_labels(x),
+            lambda x: (np.ones(
+                (1, ) * x.ndim) * count_labels(x)).astype(np.uint64),
             dtype=np.uint64,
             chunks=(1, ) * block_labeled.ndim
         ).sum()
 
-        n = total - da.from_delayed(
+        n_reduced = da.from_delayed(
             delayed(_label.count_n_of_collapsed_labels)(mapping),
             shape=(),
             dtype=np.uint64
         )
 
+        n = total - n_reduced
+
     return (relabeled, n)
 
 
diff --git a/dask_image/ndmeasure/_utils/_label.py b/dask_image/ndmeasure/_utils/_label.py
index 6ede1837..e7a7188e 100644
--- a/dask_image/ndmeasure/_utils/_label.py
+++ b/dask_image/ndmeasure/_utils/_label.py
@@ -441,7 +441,8 @@ def connected_components_delayed(all_mappings):
     unique_labels_new = np.arange(
         0, len(unique_labels), dtype=all_mappings.dtype)
 
-    relabeled_mappings = unique_labels_new[unique_inverse]
+    relabeled_mappings = unique_labels_new[unique_inverse].reshape(
+        all_mappings.shape)
 
     i, j = relabeled_mappings
     csr_matrix = _to_csr_matrix(i, j, len(unique_labels))
@@ -455,7 +456,9 @@ def connected_components_delayed(all_mappings):
 
 
 def count_n_of_collapsed_labels(mapping):
-    return len(mapping.keys()) - len(set(mapping.values()))
+    return np.array(
+        len(mapping.keys()) - len(set(mapping.values())))\
+            .astype(np.uint64)
 
 
 def _encode_label(label, block_id, encoding_dtype=np.uint32):