diff --git a/src/spikeinterface/core/baserecording.py b/src/spikeinterface/core/baserecording.py index 712d3cd6cb..14d13a6bff 100644 --- a/src/spikeinterface/core/baserecording.py +++ b/src/spikeinterface/core/baserecording.py @@ -297,7 +297,6 @@ def get_traces( order: "C" | "F" | None = None, return_scaled: bool | None = None, return_in_uV: bool = False, - cast_unsigned: bool = False, ) -> np.ndarray: """Returns traces from recording. @@ -320,9 +319,6 @@ def get_traces( return_in_uV : bool, default: False If True and the recording has scaling (gain_to_uV and offset_to_uV properties), traces are scaled to uV - cast_unsigned : bool, default: False - If True and the traces are unsigned, they are cast to integer and centered - (an offset of (2**nbits) is subtracted) Returns ------- @@ -345,17 +341,6 @@ def get_traces( assert order in ["C", "F"] traces = np.asanyarray(traces, order=order) - if cast_unsigned: - dtype = traces.dtype - # if dtype is unsigned, return centered signed signal - if dtype.kind == "u": - itemsize = dtype.itemsize - assert itemsize < 8, "Cannot upcast uint64!" - nbits = dtype.itemsize * 8 - # upcast to int with double itemsize - traces = traces.astype(f"int{2 * (dtype.itemsize) * 8}") - 2 ** (nbits - 1) - traces = traces.astype(f"int{dtype.itemsize * 8}") - # Handle deprecated return_scaled parameter if return_scaled is not None: warnings.warn( diff --git a/src/spikeinterface/core/recording_tools.py b/src/spikeinterface/core/recording_tools.py index 4a3463a0df..b56bb41eaa 100644 --- a/src/spikeinterface/core/recording_tools.py +++ b/src/spikeinterface/core/recording_tools.py @@ -54,13 +54,12 @@ def read_binary_recording(file, num_channels, dtype, time_axis=0, offset=0): # used by write_binary_recording + ChunkRecordingExecutor -def _init_binary_worker(recording, file_path_dict, dtype, byte_offest, cast_unsigned): +def _init_binary_worker(recording, file_path_dict, dtype, byte_offest): # create a local dict per worker worker_ctx = {} worker_ctx["recording"] = recording worker_ctx["byte_offset"] = byte_offest worker_ctx["dtype"] = np.dtype(dtype) - worker_ctx["cast_unsigned"] = cast_unsigned file_dict = {segment_index: open(file_path, "r+") for segment_index, file_path in file_path_dict.items()} worker_ctx["file_dict"] = file_dict @@ -74,7 +73,6 @@ def write_binary_recording( dtype: np.typing.DTypeLike = None, add_file_extension: bool = True, byte_offset: int = 0, - auto_cast_uint: bool = True, verbose: bool = False, **job_kwargs, ): @@ -98,9 +96,6 @@ def write_binary_recording( byte_offset : int, default: 0 Offset in bytes for the binary file (e.g. to write a header). This is useful in case you want to append data to an existing file where you wrote a header or other data before. - auto_cast_uint : bool, default: True - If True, unsigned integers are automatically cast to int if the specified dtype is signed - .. deprecated:: 0.103, use the `unsigned_to_signed` function instead. verbose : bool This is the verbosity of the ChunkRecordingExecutor {} @@ -117,12 +112,6 @@ def write_binary_recording( file_path_list = [add_suffix(file_path, ["raw", "bin", "dat"]) for file_path in file_path_list] dtype = dtype if dtype is not None else recording.get_dtype() - if auto_cast_uint: - cast_unsigned = determine_cast_unsigned(recording, dtype) - warning_message = ( - "auto_cast_uint is deprecated and will be removed in 0.103. Use the `unsigned_to_signed` function instead." - ) - warnings.warn(warning_message, DeprecationWarning, stacklevel=2) dtype_size_bytes = np.dtype(dtype).itemsize num_channels = recording.get_num_channels() @@ -144,7 +133,7 @@ def write_binary_recording( # use executor (loop or workers) func = _write_binary_chunk init_func = _init_binary_worker - init_args = (recording, file_path_dict, dtype, byte_offset, cast_unsigned) + init_args = (recording, file_path_dict, dtype, byte_offset) executor = ChunkRecordingExecutor( recording, func, init_func, init_args, job_name="write_binary_recording", verbose=verbose, **job_kwargs ) @@ -157,7 +146,6 @@ def _write_binary_chunk(segment_index, start_frame, end_frame, worker_ctx): recording = worker_ctx["recording"] dtype = worker_ctx["dtype"] byte_offset = worker_ctx["byte_offset"] - cast_unsigned = worker_ctx["cast_unsigned"] file = worker_ctx["file_dict"][segment_index] num_channels = recording.get_num_channels() @@ -181,9 +169,7 @@ def _write_binary_chunk(segment_index, start_frame, end_frame, worker_ctx): memmap_array = np.ndarray(shape=shape, dtype=dtype, buffer=memmap_obj, offset=start_offset) # Extract the traces and store them in the memmap array - traces = recording.get_traces( - start_frame=start_frame, end_frame=end_frame, segment_index=segment_index, cast_unsigned=cast_unsigned - ) + traces = recording.get_traces(start_frame=start_frame, end_frame=end_frame, segment_index=segment_index) if traces.dtype != dtype: traces = traces.astype(dtype, copy=False) @@ -243,7 +229,7 @@ def write_binary_recording_file_handle( # used by write_memory_recording -def _init_memory_worker(recording, arrays, shm_names, shapes, dtype, cast_unsigned): +def _init_memory_worker(recording, arrays, shm_names, shapes, dtype): # create a local dict per worker worker_ctx = {} if isinstance(recording, dict): @@ -269,7 +255,6 @@ def _init_memory_worker(recording, arrays, shm_names, shapes, dtype, cast_unsign arrays.append(arr) worker_ctx["arrays"] = arrays - worker_ctx["cast_unsigned"] = cast_unsigned return worker_ctx @@ -280,17 +265,14 @@ def _write_memory_chunk(segment_index, start_frame, end_frame, worker_ctx): recording = worker_ctx["recording"] dtype = worker_ctx["dtype"] arr = worker_ctx["arrays"][segment_index] - cast_unsigned = worker_ctx["cast_unsigned"] # apply function - traces = recording.get_traces( - start_frame=start_frame, end_frame=end_frame, segment_index=segment_index, cast_unsigned=cast_unsigned - ) + traces = recording.get_traces(start_frame=start_frame, end_frame=end_frame, segment_index=segment_index) traces = traces.astype(dtype, copy=False) arr[start_frame:end_frame, :] = traces -def write_memory_recording(recording, dtype=None, verbose=False, auto_cast_uint=True, buffer_type="auto", **job_kwargs): +def write_memory_recording(recording, dtype=None, verbose=False, buffer_type="auto", **job_kwargs): """ Save the traces into numpy arrays (memory). try to use the SharedMemory introduce in py3.8 if n_jobs > 1 @@ -303,8 +285,6 @@ def write_memory_recording(recording, dtype=None, verbose=False, auto_cast_uint= Type of the saved data verbose : bool, default: False If True, output is verbose (when chunks are used) - auto_cast_uint : bool, default: True - If True, unsigned integers are automatically cast to int if the specified dtype is signed buffer_type : "auto" | "numpy" | "sharedmem" {} @@ -316,10 +296,6 @@ def write_memory_recording(recording, dtype=None, verbose=False, auto_cast_uint= if dtype is None: dtype = recording.get_dtype() - if auto_cast_uint: - cast_unsigned = determine_cast_unsigned(recording, dtype) - else: - cast_unsigned = False # create sharedmmep arrays = [] @@ -352,9 +328,9 @@ def write_memory_recording(recording, dtype=None, verbose=False, auto_cast_uint= func = _write_memory_chunk init_func = _init_memory_worker if n_jobs > 1: - init_args = (recording, None, shm_names, shapes, dtype, cast_unsigned) + init_args = (recording, None, shm_names, shapes, dtype) else: - init_args = (recording, arrays, None, None, dtype, cast_unsigned) + init_args = (recording, arrays, None, None, dtype) executor = ChunkRecordingExecutor( recording, func, init_func, init_args, verbose=verbose, job_name="write_memory_recording", **job_kwargs @@ -379,7 +355,6 @@ def write_to_h5_dataset_format( chunk_size=None, chunk_memory="500M", verbose=False, - auto_cast_uint=True, return_scaled=None, return_in_uV=False, ): @@ -413,8 +388,6 @@ def write_to_h5_dataset_format( Chunk size in bytes must end with "k", "M" or "G" verbose : bool, default: False If True, output is verbose (when chunks are used) - auto_cast_uint : bool, default: True - If True, unsigned integers are automatically cast to int if the specified dtype is signed return_scaled : bool | None, default: None DEPRECATED. Use return_in_uV instead. If True and the recording has scaling (gain_to_uV and offset_to_uV properties), @@ -446,10 +419,6 @@ def write_to_h5_dataset_format( dtype_file = recording.get_dtype() else: dtype_file = dtype - if auto_cast_uint: - cast_unsigned = determine_cast_unsigned(recording, dtype) - else: - cast_unsigned = False if single_axis: shape = (num_frames,) @@ -472,7 +441,7 @@ def write_to_h5_dataset_format( ) return_in_uV = return_scaled - traces = recording.get_traces(cast_unsigned=cast_unsigned, return_scaled=return_in_uV) + traces = recording.get_traces(return_scaled=return_in_uV) if dtype is not None: traces = traces.astype(dtype_file, copy=False) if time_axis == 1: @@ -496,7 +465,6 @@ def write_to_h5_dataset_format( segment_index=segment_index, start_frame=i * chunk_size, end_frame=min((i + 1) * chunk_size, num_frames), - cast_unsigned=cast_unsigned, return_scaled=return_in_uV if return_scaled is None else return_scaled, ) chunk_frames = traces.shape[0] @@ -517,16 +485,6 @@ def write_to_h5_dataset_format( return save_path -def determine_cast_unsigned(recording, dtype): - recording_dtype = np.dtype(recording.get_dtype()) - - if np.dtype(dtype) != recording_dtype and recording_dtype.kind == "u" and np.dtype(dtype).kind == "i": - cast_unsigned = True - else: - cast_unsigned = False - return cast_unsigned - - def get_random_recording_slices( recording, method="full_random", diff --git a/src/spikeinterface/core/tests/test_baserecording.py b/src/spikeinterface/core/tests/test_baserecording.py index 9f735a504e..15b1fcf366 100644 --- a/src/spikeinterface/core/tests/test_baserecording.py +++ b/src/spikeinterface/core/tests/test_baserecording.py @@ -253,22 +253,6 @@ def test_BaseRecording(create_cache_folder): # Verify both parameters produce the same result assert np.array_equal(traces_float32_old, traces_float32_new) - # test cast unsigned - tr_u = rec_uint16.get_traces(cast_unsigned=False) - assert tr_u.dtype.kind == "u" - tr_i = rec_uint16.get_traces(cast_unsigned=True) - assert tr_i.dtype.kind == "i" - folder = cache_folder / "recording_unsigned" - rec_u = rec_uint16.save(folder=folder) - rec_u.get_dtype() == "uint16" - folder = cache_folder / "recording_signed" - rec_i = rec_uint16.save(folder=folder, dtype="int16") - rec_i.get_dtype() == "int16" - assert np.allclose( - rec_u.get_traces(cast_unsigned=False).astype("float") - (2**15), rec_i.get_traces().astype("float") - ) - assert np.allclose(rec_u.get_traces(cast_unsigned=True), rec_i.get_traces().astype("float")) - # test cast with dtype rec_float32 = rec_int16.astype("float32") assert rec_float32.get_dtype() == "float32" @@ -361,16 +345,6 @@ def test_BaseRecording(create_cache_folder): assert rec2.get_annotation(annotation_name) == rec_zarr2.get_annotation(annotation_name) assert rec2.get_annotation(annotation_name) == rec_zarr2_loaded.get_annotation(annotation_name) - # test cast unsigned - rec_u = rec_uint16.save(format="zarr", folder=cache_folder / "rec_u") - rec_u.get_dtype() == "uint16" - rec_i = rec_uint16.save(format="zarr", folder=cache_folder / "rec_i", dtype="int16") - rec_i.get_dtype() == "int16" - assert np.allclose( - rec_u.get_traces(cast_unsigned=False).astype("float") - (2**15), rec_i.get_traces().astype("float") - ) - assert np.allclose(rec_u.get_traces(cast_unsigned=True), rec_i.get_traces().astype("float")) - def test_interleaved_probegroups(): recording = generate_recording(durations=[1.0], num_channels=16) diff --git a/src/spikeinterface/core/zarrextractors.py b/src/spikeinterface/core/zarrextractors.py index d5023eb202..3013fffe4a 100644 --- a/src/spikeinterface/core/zarrextractors.py +++ b/src/spikeinterface/core/zarrextractors.py @@ -11,7 +11,6 @@ from .basesorting import BaseSorting, SpikeVectorSortingSegment, minimum_spike_dtype from .core_tools import define_function_from_class, check_json from .job_tools import split_job_kwargs -from .recording_tools import determine_cast_unsigned from .core_tools import is_path_remote @@ -446,7 +445,7 @@ def add_sorting_to_zarr_group(sorting: BaseSorting, zarr_group: zarr.hierarchy.G # Recording def add_recording_to_zarr_group( - recording: BaseRecording, zarr_group: zarr.hierarchy.Group, verbose=False, auto_cast_uint=True, dtype=None, **kwargs + recording: BaseRecording, zarr_group: zarr.hierarchy.Group, verbose=False, dtype=None, **kwargs ): zarr_kwargs, job_kwargs = split_job_kwargs(kwargs) @@ -478,7 +477,6 @@ def add_recording_to_zarr_group( filters=filters_traces, dtype=dtype, channel_chunk_size=channel_chunk_size, - auto_cast_uint=auto_cast_uint, verbose=verbose, **job_kwargs, ) @@ -522,7 +520,6 @@ def add_traces_to_zarr( compressor=None, filters=None, verbose=False, - auto_cast_uint=True, **job_kwargs, ): """ @@ -546,8 +543,6 @@ def add_traces_to_zarr( List of zarr filters verbose : bool, default: False If True, output is verbose (when chunks are used) - auto_cast_uint : bool, default: True - If True, unsigned integers are automatically cast to int if the specified dtype is signed {} """ from .job_tools import ( @@ -564,10 +559,6 @@ def add_traces_to_zarr( if dtype is None: dtype = recording.get_dtype() - if auto_cast_uint: - cast_unsigned = determine_cast_unsigned(recording, dtype) - else: - cast_unsigned = False job_kwargs = fix_job_kwargs(job_kwargs) chunk_size = ensure_chunk_size(recording, **job_kwargs) @@ -593,7 +584,7 @@ def add_traces_to_zarr( # use executor (loop or workers) func = _write_zarr_chunk init_func = _init_zarr_worker - init_args = (recording, zarr_datasets, dtype, cast_unsigned) + init_args = (recording, zarr_datasets, dtype) executor = ChunkRecordingExecutor( recording, func, init_func, init_args, verbose=verbose, job_name="write_zarr_recording", **job_kwargs ) @@ -601,7 +592,7 @@ def add_traces_to_zarr( # used by write_zarr_recording + ChunkRecordingExecutor -def _init_zarr_worker(recording, zarr_datasets, dtype, cast_unsigned): +def _init_zarr_worker(recording, zarr_datasets, dtype): import zarr # create a local dict per worker @@ -609,7 +600,6 @@ def _init_zarr_worker(recording, zarr_datasets, dtype, cast_unsigned): worker_ctx["recording"] = recording worker_ctx["zarr_datasets"] = zarr_datasets worker_ctx["dtype"] = np.dtype(dtype) - worker_ctx["cast_unsigned"] = cast_unsigned return worker_ctx @@ -622,11 +612,12 @@ def _write_zarr_chunk(segment_index, start_frame, end_frame, worker_ctx): recording = worker_ctx["recording"] dtype = worker_ctx["dtype"] zarr_dataset = worker_ctx["zarr_datasets"][segment_index] - cast_unsigned = worker_ctx["cast_unsigned"] # apply function traces = recording.get_traces( - start_frame=start_frame, end_frame=end_frame, segment_index=segment_index, cast_unsigned=cast_unsigned + start_frame=start_frame, + end_frame=end_frame, + segment_index=segment_index, ) traces = traces.astype(dtype) zarr_dataset[start_frame:end_frame, :] = traces diff --git a/src/spikeinterface/preprocessing/filter.py b/src/spikeinterface/preprocessing/filter.py index 42b7090c0d..9157c450a0 100644 --- a/src/spikeinterface/preprocessing/filter.py +++ b/src/spikeinterface/preprocessing/filter.py @@ -399,12 +399,33 @@ def causal_filter( def fix_dtype(recording, dtype): + """ + Fix recording dtype for preprocessing, by always returning a numpy.dtype. + If `dtype` is not provided, the recording dtype is returned. + If the dtype is unsigned, it raises a ValueError. + + Parameters + ---------- + recording : BaseRecording + The recording to fix the dtype for + dtype : str | numpy.dtype + A specified dtype to return as numpy.dtype + + Returns + ------- + fixed_dtype : numpy.dtype + The fixed numpy.dtype + """ if dtype is None: dtype = recording.get_dtype() dtype = np.dtype(dtype) # if uint --> force int if dtype.kind == "u": - dtype = np.dtype(dtype.str.replace("u", "i")) + raise ValueError( + "Unsigned types are not supported, since they don't ineract well with " + "various preprocessing steps. You can use " + "`spikeinterface.preprocessing.unsigned_to_signed` to convert the recording to a signed type." + ) return dtype diff --git a/src/spikeinterface/preprocessing/tests/test_filter.py b/src/spikeinterface/preprocessing/tests/test_filter.py index bf723c84b9..66889bf095 100644 --- a/src/spikeinterface/preprocessing/tests/test_filter.py +++ b/src/spikeinterface/preprocessing/tests/test_filter.py @@ -190,24 +190,6 @@ def test_filter(): assert np.allclose(trace0, trace1) -def test_filter_unsigned(): - traces = np.random.randint(1, 1000, (5000, 4), dtype="uint16") - rec = NumpyRecording(traces_list=traces, sampling_frequency=1000) - rec = rec.save() - - rec2 = bandpass_filter(rec, freq_min=10.0, freq_max=300.0) - assert not np.issubdtype(rec2.get_dtype(), np.unsignedinteger) - traces2 = rec2.get_traces() - assert not np.issubdtype(traces2.dtype, np.unsignedinteger) - - # notch filter note supported for unsigned - with pytest.raises(TypeError): - rec3 = notch_filter(rec, freq=300.0, q=10) - - # this is ok - rec3 = notch_filter(rec, freq=300.0, q=10, dtype="float32") - - @pytest.mark.skip("OpenCL not tested") def test_filter_opencl(): rec = generate_recording( @@ -240,4 +222,3 @@ def test_filter_opencl(): if __name__ == "__main__": test_filter() - test_filter_unsigned()