Merge pull request #3982 from alejoe91/remove-cast-unsigned

alejoe91 · web-flow · commit fcf2284a5d1a · 2025-06-12T19:13:12.000+02:00
Remove auto_cast_uint, cast_unsigned, and modify fix_dtype
diff --git a/src/spikeinterface/core/baserecording.py b/src/spikeinterface/core/baserecording.py
@@ -297,7 +297,6 @@ def get_traces(
         order: "C" | "F" | None = None,
         return_scaled: bool | None = None,
         return_in_uV: bool = False,
-        cast_unsigned: bool = False,
     ) -> np.ndarray:
         """Returns traces from recording.
 
@@ -320,9 +319,6 @@ def get_traces(
         return_in_uV : bool, default: False
             If True and the recording has scaling (gain_to_uV and offset_to_uV properties),
             traces are scaled to uV
-        cast_unsigned : bool, default: False
-            If True and the traces are unsigned, they are cast to integer and centered
-            (an offset of (2**nbits) is subtracted)
 
         Returns
         -------
@@ -345,17 +341,6 @@ def get_traces(
             assert order in ["C", "F"]
             traces = np.asanyarray(traces, order=order)
 
-        if cast_unsigned:
-            dtype = traces.dtype
-            # if dtype is unsigned, return centered signed signal
-            if dtype.kind == "u":
-                itemsize = dtype.itemsize
-                assert itemsize < 8, "Cannot upcast uint64!"
-                nbits = dtype.itemsize * 8
-                # upcast to int with double itemsize
-                traces = traces.astype(f"int{2 * (dtype.itemsize) * 8}") - 2 ** (nbits - 1)
-                traces = traces.astype(f"int{dtype.itemsize * 8}")
-
         # Handle deprecated return_scaled parameter
         if return_scaled is not None:
             warnings.warn(
diff --git a/src/spikeinterface/core/recording_tools.py b/src/spikeinterface/core/recording_tools.py
@@ -54,13 +54,12 @@ def read_binary_recording(file, num_channels, dtype, time_axis=0, offset=0):
 
 
 # used by write_binary_recording + ChunkRecordingExecutor
-def _init_binary_worker(recording, file_path_dict, dtype, byte_offest, cast_unsigned):
+def _init_binary_worker(recording, file_path_dict, dtype, byte_offest):
     # create a local dict per worker
     worker_ctx = {}
     worker_ctx["recording"] = recording
     worker_ctx["byte_offset"] = byte_offest
     worker_ctx["dtype"] = np.dtype(dtype)
-    worker_ctx["cast_unsigned"] = cast_unsigned
 
     file_dict = {segment_index: open(file_path, "r+") for segment_index, file_path in file_path_dict.items()}
     worker_ctx["file_dict"] = file_dict
@@ -74,7 +73,6 @@ def write_binary_recording(
     dtype: np.typing.DTypeLike = None,
     add_file_extension: bool = True,
     byte_offset: int = 0,
-    auto_cast_uint: bool = True,
     verbose: bool = False,
     **job_kwargs,
 ):
@@ -98,9 +96,6 @@ def write_binary_recording(
     byte_offset : int, default: 0
         Offset in bytes for the binary file (e.g. to write a header). This is useful in case you want to append data
         to an existing file where you wrote a header or other data before.
-    auto_cast_uint : bool, default: True
-        If True, unsigned integers are automatically cast to int if the specified dtype is signed
-        .. deprecated:: 0.103, use the `unsigned_to_signed` function instead.
     verbose : bool
         This is the verbosity of the ChunkRecordingExecutor
     {}
@@ -117,12 +112,6 @@ def write_binary_recording(
         file_path_list = [add_suffix(file_path, ["raw", "bin", "dat"]) for file_path in file_path_list]
 
     dtype = dtype if dtype is not None else recording.get_dtype()
-    if auto_cast_uint:
-        cast_unsigned = determine_cast_unsigned(recording, dtype)
-        warning_message = (
-            "auto_cast_uint is deprecated and will be removed in 0.103. Use the `unsigned_to_signed` function instead."
-        )
-        warnings.warn(warning_message, DeprecationWarning, stacklevel=2)
 
     dtype_size_bytes = np.dtype(dtype).itemsize
     num_channels = recording.get_num_channels()
@@ -144,7 +133,7 @@ def write_binary_recording(
     # use executor (loop or workers)
     func = _write_binary_chunk
     init_func = _init_binary_worker
-    init_args = (recording, file_path_dict, dtype, byte_offset, cast_unsigned)
+    init_args = (recording, file_path_dict, dtype, byte_offset)
     executor = ChunkRecordingExecutor(
         recording, func, init_func, init_args, job_name="write_binary_recording", verbose=verbose, **job_kwargs
     )
@@ -157,7 +146,6 @@ def _write_binary_chunk(segment_index, start_frame, end_frame, worker_ctx):
     recording = worker_ctx["recording"]
     dtype = worker_ctx["dtype"]
     byte_offset = worker_ctx["byte_offset"]
-    cast_unsigned = worker_ctx["cast_unsigned"]
     file = worker_ctx["file_dict"][segment_index]
 
     num_channels = recording.get_num_channels()
@@ -181,9 +169,7 @@ def _write_binary_chunk(segment_index, start_frame, end_frame, worker_ctx):
     memmap_array = np.ndarray(shape=shape, dtype=dtype, buffer=memmap_obj, offset=start_offset)
 
     # Extract the traces and store them in the memmap array
-    traces = recording.get_traces(
-        start_frame=start_frame, end_frame=end_frame, segment_index=segment_index, cast_unsigned=cast_unsigned
-    )
+    traces = recording.get_traces(start_frame=start_frame, end_frame=end_frame, segment_index=segment_index)
 
     if traces.dtype != dtype:
         traces = traces.astype(dtype, copy=False)
@@ -243,7 +229,7 @@ def write_binary_recording_file_handle(
 
 
 # used by write_memory_recording
-def _init_memory_worker(recording, arrays, shm_names, shapes, dtype, cast_unsigned):
+def _init_memory_worker(recording, arrays, shm_names, shapes, dtype):
     # create a local dict per worker
     worker_ctx = {}
     if isinstance(recording, dict):
@@ -269,7 +255,6 @@ def _init_memory_worker(recording, arrays, shm_names, shapes, dtype, cast_unsign
             arrays.append(arr)
 
     worker_ctx["arrays"] = arrays
-    worker_ctx["cast_unsigned"] = cast_unsigned
 
     return worker_ctx
 
@@ -280,17 +265,14 @@ def _write_memory_chunk(segment_index, start_frame, end_frame, worker_ctx):
     recording = worker_ctx["recording"]
     dtype = worker_ctx["dtype"]
     arr = worker_ctx["arrays"][segment_index]
-    cast_unsigned = worker_ctx["cast_unsigned"]
 
     # apply function
-    traces = recording.get_traces(
-        start_frame=start_frame, end_frame=end_frame, segment_index=segment_index, cast_unsigned=cast_unsigned
-    )
+    traces = recording.get_traces(start_frame=start_frame, end_frame=end_frame, segment_index=segment_index)
     traces = traces.astype(dtype, copy=False)
     arr[start_frame:end_frame, :] = traces
 
 
-def write_memory_recording(recording, dtype=None, verbose=False, auto_cast_uint=True, buffer_type="auto", **job_kwargs):
+def write_memory_recording(recording, dtype=None, verbose=False, buffer_type="auto", **job_kwargs):
     """
     Save the traces into numpy arrays (memory).
     try to use the SharedMemory introduce in py3.8 if n_jobs > 1
@@ -303,8 +285,6 @@ def write_memory_recording(recording, dtype=None, verbose=False, auto_cast_uint=
         Type of the saved data
     verbose : bool, default: False
         If True, output is verbose (when chunks are used)
-    auto_cast_uint : bool, default: True
-        If True, unsigned integers are automatically cast to int if the specified dtype is signed
     buffer_type : "auto" | "numpy" | "sharedmem"
     {}
 
@@ -316,10 +296,6 @@ def write_memory_recording(recording, dtype=None, verbose=False, auto_cast_uint=
 
     if dtype is None:
         dtype = recording.get_dtype()
-    if auto_cast_uint:
-        cast_unsigned = determine_cast_unsigned(recording, dtype)
-    else:
-        cast_unsigned = False
 
     # create sharedmmep
     arrays = []
@@ -352,9 +328,9 @@ def write_memory_recording(recording, dtype=None, verbose=False, auto_cast_uint=
     func = _write_memory_chunk
     init_func = _init_memory_worker
     if n_jobs > 1:
-        init_args = (recording, None, shm_names, shapes, dtype, cast_unsigned)
+        init_args = (recording, None, shm_names, shapes, dtype)
     else:
-        init_args = (recording, arrays, None, None, dtype, cast_unsigned)
+        init_args = (recording, arrays, None, None, dtype)
 
     executor = ChunkRecordingExecutor(
         recording, func, init_func, init_args, verbose=verbose, job_name="write_memory_recording", **job_kwargs
@@ -379,7 +355,6 @@ def write_to_h5_dataset_format(
     chunk_size=None,
     chunk_memory="500M",
     verbose=False,
-    auto_cast_uint=True,
     return_scaled=None,
     return_in_uV=False,
 ):
@@ -413,8 +388,6 @@ def write_to_h5_dataset_format(
         Chunk size in bytes must end with "k", "M" or "G"
     verbose : bool, default: False
         If True, output is verbose (when chunks are used)
-    auto_cast_uint : bool, default: True
-        If True, unsigned integers are automatically cast to int if the specified dtype is signed
     return_scaled : bool | None, default: None
         DEPRECATED. Use return_in_uV instead.
         If True and the recording has scaling (gain_to_uV and offset_to_uV properties),
@@ -446,10 +419,6 @@ def write_to_h5_dataset_format(
         dtype_file = recording.get_dtype()
     else:
         dtype_file = dtype
-    if auto_cast_uint:
-        cast_unsigned = determine_cast_unsigned(recording, dtype)
-    else:
-        cast_unsigned = False
 
     if single_axis:
         shape = (num_frames,)
@@ -472,7 +441,7 @@ def write_to_h5_dataset_format(
             )
             return_in_uV = return_scaled
 
-        traces = recording.get_traces(cast_unsigned=cast_unsigned, return_scaled=return_in_uV)
+        traces = recording.get_traces(return_scaled=return_in_uV)
         if dtype is not None:
             traces = traces.astype(dtype_file, copy=False)
         if time_axis == 1:
@@ -496,7 +465,6 @@ def write_to_h5_dataset_format(
                 segment_index=segment_index,
                 start_frame=i * chunk_size,
                 end_frame=min((i + 1) * chunk_size, num_frames),
-                cast_unsigned=cast_unsigned,
                 return_scaled=return_in_uV if return_scaled is None else return_scaled,
             )
             chunk_frames = traces.shape[0]
@@ -517,16 +485,6 @@ def write_to_h5_dataset_format(
     return save_path
 
 
-def determine_cast_unsigned(recording, dtype):
-    recording_dtype = np.dtype(recording.get_dtype())
-
-    if np.dtype(dtype) != recording_dtype and recording_dtype.kind == "u" and np.dtype(dtype).kind == "i":
-        cast_unsigned = True
-    else:
-        cast_unsigned = False
-    return cast_unsigned
-
-
 def get_random_recording_slices(
     recording,
     method="full_random",
diff --git a/src/spikeinterface/core/tests/test_baserecording.py b/src/spikeinterface/core/tests/test_baserecording.py
@@ -253,22 +253,6 @@ def test_BaseRecording(create_cache_folder):
     # Verify both parameters produce the same result
     assert np.array_equal(traces_float32_old, traces_float32_new)
 
-    # test cast unsigned
-    tr_u = rec_uint16.get_traces(cast_unsigned=False)
-    assert tr_u.dtype.kind == "u"
-    tr_i = rec_uint16.get_traces(cast_unsigned=True)
-    assert tr_i.dtype.kind == "i"
-    folder = cache_folder / "recording_unsigned"
-    rec_u = rec_uint16.save(folder=folder)
-    rec_u.get_dtype() == "uint16"
-    folder = cache_folder / "recording_signed"
-    rec_i = rec_uint16.save(folder=folder, dtype="int16")
-    rec_i.get_dtype() == "int16"
-    assert np.allclose(
-        rec_u.get_traces(cast_unsigned=False).astype("float") - (2**15), rec_i.get_traces().astype("float")
-    )
-    assert np.allclose(rec_u.get_traces(cast_unsigned=True), rec_i.get_traces().astype("float"))
-
     # test cast with dtype
     rec_float32 = rec_int16.astype("float32")
     assert rec_float32.get_dtype() == "float32"
@@ -361,16 +345,6 @@ def test_BaseRecording(create_cache_folder):
         assert rec2.get_annotation(annotation_name) == rec_zarr2.get_annotation(annotation_name)
         assert rec2.get_annotation(annotation_name) == rec_zarr2_loaded.get_annotation(annotation_name)
 
-    # test cast unsigned
-    rec_u = rec_uint16.save(format="zarr", folder=cache_folder / "rec_u")
-    rec_u.get_dtype() == "uint16"
-    rec_i = rec_uint16.save(format="zarr", folder=cache_folder / "rec_i", dtype="int16")
-    rec_i.get_dtype() == "int16"
-    assert np.allclose(
-        rec_u.get_traces(cast_unsigned=False).astype("float") - (2**15), rec_i.get_traces().astype("float")
-    )
-    assert np.allclose(rec_u.get_traces(cast_unsigned=True), rec_i.get_traces().astype("float"))
-
 
 def test_interleaved_probegroups():
     recording = generate_recording(durations=[1.0], num_channels=16)
diff --git a/src/spikeinterface/core/zarrextractors.py b/src/spikeinterface/core/zarrextractors.py
@@ -11,7 +11,6 @@
 from .basesorting import BaseSorting, SpikeVectorSortingSegment, minimum_spike_dtype
 from .core_tools import define_function_from_class, check_json
 from .job_tools import split_job_kwargs
-from .recording_tools import determine_cast_unsigned
 from .core_tools import is_path_remote
 
 
@@ -446,7 +445,7 @@ def add_sorting_to_zarr_group(sorting: BaseSorting, zarr_group: zarr.hierarchy.G
 
 # Recording
 def add_recording_to_zarr_group(
-    recording: BaseRecording, zarr_group: zarr.hierarchy.Group, verbose=False, auto_cast_uint=True, dtype=None, **kwargs
+    recording: BaseRecording, zarr_group: zarr.hierarchy.Group, verbose=False, dtype=None, **kwargs
 ):
     zarr_kwargs, job_kwargs = split_job_kwargs(kwargs)
 
@@ -478,7 +477,6 @@ def add_recording_to_zarr_group(
         filters=filters_traces,
         dtype=dtype,
         channel_chunk_size=channel_chunk_size,
-        auto_cast_uint=auto_cast_uint,
         verbose=verbose,
         **job_kwargs,
     )
@@ -522,7 +520,6 @@ def add_traces_to_zarr(
     compressor=None,
     filters=None,
     verbose=False,
-    auto_cast_uint=True,
     **job_kwargs,
 ):
     """
@@ -546,8 +543,6 @@ def add_traces_to_zarr(
         List of zarr filters
     verbose : bool, default: False
         If True, output is verbose (when chunks are used)
-    auto_cast_uint : bool, default: True
-        If True, unsigned integers are automatically cast to int if the specified dtype is signed
     {}
     """
     from .job_tools import (
@@ -564,10 +559,6 @@ def add_traces_to_zarr(
 
     if dtype is None:
         dtype = recording.get_dtype()
-    if auto_cast_uint:
-        cast_unsigned = determine_cast_unsigned(recording, dtype)
-    else:
-        cast_unsigned = False
 
     job_kwargs = fix_job_kwargs(job_kwargs)
     chunk_size = ensure_chunk_size(recording, **job_kwargs)
@@ -593,23 +584,22 @@ def add_traces_to_zarr(
     # use executor (loop or workers)
     func = _write_zarr_chunk
     init_func = _init_zarr_worker
-    init_args = (recording, zarr_datasets, dtype, cast_unsigned)
+    init_args = (recording, zarr_datasets, dtype)
     executor = ChunkRecordingExecutor(
         recording, func, init_func, init_args, verbose=verbose, job_name="write_zarr_recording", **job_kwargs
     )
     executor.run()
 
 
 # used by write_zarr_recording + ChunkRecordingExecutor
-def _init_zarr_worker(recording, zarr_datasets, dtype, cast_unsigned):
+def _init_zarr_worker(recording, zarr_datasets, dtype):
     import zarr
 
     # create a local dict per worker
     worker_ctx = {}
     worker_ctx["recording"] = recording
     worker_ctx["zarr_datasets"] = zarr_datasets
     worker_ctx["dtype"] = np.dtype(dtype)
-    worker_ctx["cast_unsigned"] = cast_unsigned
 
     return worker_ctx
 
@@ -622,11 +612,12 @@ def _write_zarr_chunk(segment_index, start_frame, end_frame, worker_ctx):
     recording = worker_ctx["recording"]
     dtype = worker_ctx["dtype"]
     zarr_dataset = worker_ctx["zarr_datasets"][segment_index]
-    cast_unsigned = worker_ctx["cast_unsigned"]
 
     # apply function
     traces = recording.get_traces(
-        start_frame=start_frame, end_frame=end_frame, segment_index=segment_index, cast_unsigned=cast_unsigned
+        start_frame=start_frame,
+        end_frame=end_frame,
+        segment_index=segment_index,
     )
     traces = traces.astype(dtype)
     zarr_dataset[start_frame:end_frame, :] = traces
diff --git a/src/spikeinterface/preprocessing/filter.py b/src/spikeinterface/preprocessing/filter.py
diff --git a/src/spikeinterface/preprocessing/tests/test_filter.py b/src/spikeinterface/preprocessing/tests/test_filter.py