From 99e6d508831257f285c52e478d7a926bf83ac0bd Mon Sep 17 00:00:00 2001 From: zm711 <92116279+zm711@users.noreply.github.com> Date: Sat, 19 Jul 2025 13:41:38 -0400 Subject: [PATCH 1/5] first draft of unsigned_to_signed doc --- doc/how_to/index.rst | 1 + doc/how_to/physical_units.rst | 2 + doc/how_to/unsigned_to_signed.rst | 81 ++++++++++++++++++++++ src/spikeinterface/preprocessing/filter.py | 4 +- 4 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 doc/how_to/unsigned_to_signed.rst diff --git a/doc/how_to/index.rst b/doc/how_to/index.rst index aa2e8ca518..675edb2d18 100644 --- a/doc/how_to/index.rst +++ b/doc/how_to/index.rst @@ -18,4 +18,5 @@ Guides on how to solve specific, short problems in SpikeInterface. Learn how to. auto_curation_training auto_curation_prediction physical_units + unsigned_to_signed customize_a_plot diff --git a/doc/how_to/physical_units.rst b/doc/how_to/physical_units.rst index d2c1743930..f98affe80c 100644 --- a/doc/how_to/physical_units.rst +++ b/doc/how_to/physical_units.rst @@ -1,3 +1,5 @@ +.. _physical_units: + Working with physical units in SpikeInterface recordings ======================================================== diff --git a/doc/how_to/unsigned_to_signed.rst b/doc/how_to/unsigned_to_signed.rst new file mode 100644 index 0000000000..03d1ebc672 --- /dev/null +++ b/doc/how_to/unsigned_to_signed.rst @@ -0,0 +1,81 @@ +Unsigned to Signed Data types +============================= + +As of version 0.103.0 SpikeInterface has changed one of its defaults for interacting with +:code:`Recording`` objects. We no longer autocast unsigned dtypes to signed implicitly. This +means that some users of SpikeInterface will need to add one additional line of code to their scripts +to explicitly handle this conversion. + + +Why this matters? +----------------- + +For those that want a deeper understanding of dtypes `NumPy provides a great explanation `_. +For our purposes it is important to know that many pieces of recording equipment opt to store their electrophysiological data as unsigned integers, +which provides the benefit of reduces the necessary file size. In order to convert to real units these file formats only need to store a :code:`gain` +and an :code:`offset`. Our :code:`RecordingExtractor`'s maintain the dtype that the file format utilizes, which means that some of our +:code:`RecordingExtractor`'s will have unsigned dtypes. + +The problem with using unsigned dtypes is that many types of funtions (including the ones we use from SciPy) perform poorly with unsigned integers. +This is made worse by the fact that these failures are silent (i.e. no error is triggered but the operation leads to nonsensical data). So the +solution required is to convernt unsigned integers into signed integers. Previously we did this under the hood, automatically for users that had +a :code:`Recording` object with an unsigned dtype. + +We decided, however, that implicitly performing this action was not the best course of action. So from version 0.103.0, users will now explicitly +have to perform this transformation of their data. This will help users better understand how they are processing their data during an analysis +pipeline as well as better understand the provenance of their pipeline. + + +Using :code:`unsigned_to_signed` +-------------------------------- + +For users that receive an error because their :code:`Recording` is unsigned, their is one additional step that must be done: + +.. code:: python + + import spikeinterface.extractors as se + import spikeinterface.preprocessing as spre + + # Intan is an example of unsigned data + recording = se.read_intan('path/to/my/file.rhd', stream_id='0') + # to get a signed version of our Recording we use the following function + recording_signed = spre.unsigned_to_signed(recording) + # we can use all functions that we used previously in our scripts + recording_filtered = spre.bandpass_filter(recording_signed) + + +Now with the signed dtype of the :code:`Recording` one can use a SpikeInterface pipeline as usual. + + +If you are curious if your :code:`Recording` is unsigned you can simply check the repr or use :code:`get_dtype()` + +.. code:: python + + # the repr automatically displays the dtype + print(recording) + # use method on the Recording object + print(recording.get_dtype()) + + + +Additional Notes +---------------- + +1) Some some sorters make use of SpikeInterface preprocessing either +within their wrappers or within their own code base. So remember to use the "signed" version of +your recording for the rest of your pipeline. + +2) Using :code:`unsigned_to_signed` in versions less than 0.103.0 does not hurt your scripts. This +option was available previously along with the implicit option. Adding this into scripts with old +versions of SpikeInterface will still work and will "future-proof" your scripts for when you +update to a version greater than or equal to 0.103.0 + +3) For additional information on units and scaling in SpikeInterface see :ref:`physical_units` + + +Bit depth +--------- + +One final important piece of information for some users is the optional :code:`bit_depth` argument that can be fed +into the :code:`unsigned_to_signed` function. This should be used in cases where the ADC bit depth does not match +the bit depth of the data type. This allows us to estimate the offset based on the bit depth of the ADC. diff --git a/src/spikeinterface/preprocessing/filter.py b/src/spikeinterface/preprocessing/filter.py index 9157c450a0..78542e1f37 100644 --- a/src/spikeinterface/preprocessing/filter.py +++ b/src/spikeinterface/preprocessing/filter.py @@ -423,9 +423,11 @@ def fix_dtype(recording, dtype): # if uint --> force int if dtype.kind == "u": raise ValueError( - "Unsigned types are not supported, since they don't ineract well with " + "Unsigned types are not supported, since they don't interact well with " "various preprocessing steps. You can use " "`spikeinterface.preprocessing.unsigned_to_signed` to convert the recording to a signed type." + "For more information, please see " + "https://spikeinterface.readthedocs.io/en/stable/how_to/unsigned_to_signed.html" ) return dtype From 76755ee2f35d8421fb9f3d6775ac1c4a0544f9b6 Mon Sep 17 00:00:00 2001 From: Zach McKenzie <92116279+zm711@users.noreply.github.com> Date: Sun, 20 Jul 2025 18:44:02 -0400 Subject: [PATCH 2/5] Chris' fixes Co-authored-by: Chris Halcrow <57948917+chrishalcrow@users.noreply.github.com> --- doc/how_to/unsigned_to_signed.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/how_to/unsigned_to_signed.rst b/doc/how_to/unsigned_to_signed.rst index 03d1ebc672..0fed27e2bd 100644 --- a/doc/how_to/unsigned_to_signed.rst +++ b/doc/how_to/unsigned_to_signed.rst @@ -2,7 +2,7 @@ Unsigned to Signed Data types ============================= As of version 0.103.0 SpikeInterface has changed one of its defaults for interacting with -:code:`Recording`` objects. We no longer autocast unsigned dtypes to signed implicitly. This +:code:`Recording` objects. We no longer autocast unsigned dtypes to signed implicitly. This means that some users of SpikeInterface will need to add one additional line of code to their scripts to explicitly handle this conversion. @@ -12,7 +12,7 @@ Why this matters? For those that want a deeper understanding of dtypes `NumPy provides a great explanation `_. For our purposes it is important to know that many pieces of recording equipment opt to store their electrophysiological data as unsigned integers, -which provides the benefit of reduces the necessary file size. In order to convert to real units these file formats only need to store a :code:`gain` +which provides the benefit of reducing the necessary file size. In order to convert to real units these file formats only need to store a :code:`gain` and an :code:`offset`. Our :code:`RecordingExtractor`'s maintain the dtype that the file format utilizes, which means that some of our :code:`RecordingExtractor`'s will have unsigned dtypes. @@ -40,7 +40,7 @@ For users that receive an error because their :code:`Recording` is unsigned, the recording = se.read_intan('path/to/my/file.rhd', stream_id='0') # to get a signed version of our Recording we use the following function recording_signed = spre.unsigned_to_signed(recording) - # we can use all functions that we used previously in our scripts + # we can now apply any preprocessing functions like normal, e.g. recording_filtered = spre.bandpass_filter(recording_signed) From f5f0bfa91e1e5ff345c9d3b582c0f4014ceeba29 Mon Sep 17 00:00:00 2001 From: zm711 <92116279+zm711@users.noreply.github.com> Date: Sun, 20 Jul 2025 18:49:27 -0400 Subject: [PATCH 3/5] more Chris requests. --- doc/how_to/unsigned_to_signed.rst | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/doc/how_to/unsigned_to_signed.rst b/doc/how_to/unsigned_to_signed.rst index 0fed27e2bd..e83b1786ac 100644 --- a/doc/how_to/unsigned_to_signed.rst +++ b/doc/how_to/unsigned_to_signed.rst @@ -56,21 +56,22 @@ If you are curious if your :code:`Recording` is unsigned you can simply check th # use method on the Recording object print(recording.get_dtype()) - +In either case, if the dtype displayed has a :code:`u`at the beginning (e.g. :code:`uint16`) then your recording is +unsigned. If it doesn't have the :code:`u` (e.g. :code:`int16`) then it is signed and would not need this preprocessing step. Additional Notes ---------------- -1) Some some sorters make use of SpikeInterface preprocessing either -within their wrappers or within their own code base. So remember to use the "signed" version of -your recording for the rest of your pipeline. +1) Some sorters make use of SpikeInterface preprocessing either + within their wrappers or within their own code base. So remember to use the "signed" version of + your recording for the rest of your pipeline. 2) Using :code:`unsigned_to_signed` in versions less than 0.103.0 does not hurt your scripts. This -option was available previously along with the implicit option. Adding this into scripts with old -versions of SpikeInterface will still work and will "future-proof" your scripts for when you -update to a version greater than or equal to 0.103.0 + option was available previously along with the implicit option. Adding this into scripts with old + versions of SpikeInterface will still work and will "future-proof" your scripts for when you + update to a version greater than or equal to 0.103.0. -3) For additional information on units and scaling in SpikeInterface see :ref:`physical_units` +3) For additional information on units and scaling in SpikeInterface see :ref:`physical_units`. Bit depth From 56abb703d97a68ca9bafeea97b460b77c3cd19fa Mon Sep 17 00:00:00 2001 From: zm711 <92116279+zm711@users.noreply.github.com> Date: Sun, 20 Jul 2025 18:52:00 -0400 Subject: [PATCH 4/5] fix rendering typo --- doc/how_to/unsigned_to_signed.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/how_to/unsigned_to_signed.rst b/doc/how_to/unsigned_to_signed.rst index e83b1786ac..c8ec731430 100644 --- a/doc/how_to/unsigned_to_signed.rst +++ b/doc/how_to/unsigned_to_signed.rst @@ -56,7 +56,7 @@ If you are curious if your :code:`Recording` is unsigned you can simply check th # use method on the Recording object print(recording.get_dtype()) -In either case, if the dtype displayed has a :code:`u`at the beginning (e.g. :code:`uint16`) then your recording is +In either case, if the dtype displayed has a :code:`u` at the beginning (e.g. :code:`uint16`) then your recording is unsigned. If it doesn't have the :code:`u` (e.g. :code:`int16`) then it is signed and would not need this preprocessing step. Additional Notes @@ -69,7 +69,7 @@ Additional Notes 2) Using :code:`unsigned_to_signed` in versions less than 0.103.0 does not hurt your scripts. This option was available previously along with the implicit option. Adding this into scripts with old versions of SpikeInterface will still work and will "future-proof" your scripts for when you - update to a version greater than or equal to 0.103.0. + update to a version greater than or equal to 0.103.0. 3) For additional information on units and scaling in SpikeInterface see :ref:`physical_units`. From ca4db72aa592133aa47960dfb5eedd7a52eb0f58 Mon Sep 17 00:00:00 2001 From: Alessio Buccino Date: Tue, 22 Jul 2025 10:02:12 +0200 Subject: [PATCH 5/5] Some clarifications and bit depth --- doc/how_to/unsigned_to_signed.rst | 48 ++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/doc/how_to/unsigned_to_signed.rst b/doc/how_to/unsigned_to_signed.rst index c8ec731430..8043833ce8 100644 --- a/doc/how_to/unsigned_to_signed.rst +++ b/doc/how_to/unsigned_to_signed.rst @@ -11,19 +11,24 @@ Why this matters? ----------------- For those that want a deeper understanding of dtypes `NumPy provides a great explanation `_. -For our purposes it is important to know that many pieces of recording equipment opt to store their electrophysiological data as unsigned integers, -which provides the benefit of reducing the necessary file size. In order to convert to real units these file formats only need to store a :code:`gain` +For our purposes it is important to know that many pieces of recording equipment opt to store their electrophysiological data as unsigned integers +(e.g., Intan, Maxwell Biosystems, 3Brain Biocam). +Similarly to signed integers, in order to convert to real units these file formats only need to store a :code:`gain` and an :code:`offset`. Our :code:`RecordingExtractor`'s maintain the dtype that the file format utilizes, which means that some of our :code:`RecordingExtractor`'s will have unsigned dtypes. -The problem with using unsigned dtypes is that many types of funtions (including the ones we use from SciPy) perform poorly with unsigned integers. +The problem with using unsigned dtypes is that many types of functions (including the ones we use from :code:`SciPy`) perform poorly with unsigned integers. This is made worse by the fact that these failures are silent (i.e. no error is triggered but the operation leads to nonsensical data). So the solution required is to convernt unsigned integers into signed integers. Previously we did this under the hood, automatically for users that had a :code:`Recording` object with an unsigned dtype. -We decided, however, that implicitly performing this action was not the best course of action. So from version 0.103.0, users will now explicitly -have to perform this transformation of their data. This will help users better understand how they are processing their data during an analysis -pipeline as well as better understand the provenance of their pipeline. +We decided, however, that implicitly performing this action was not the best course of action, since: + +1) *explicit* is always better than *implicit* +2) some functions would *magically* change the dtype of the :code:`Recording` object, which can cause confusion + +So from version 0.103.0, users will now explicitly have to perform this transformation of their data. This will help users better understand how they are +processing their data during an analysis pipeline as well as better understand the provenance of their pipeline. Using :code:`unsigned_to_signed` @@ -59,6 +64,29 @@ If you are curious if your :code:`Recording` is unsigned you can simply check th In either case, if the dtype displayed has a :code:`u` at the beginning (e.g. :code:`uint16`) then your recording is unsigned. If it doesn't have the :code:`u` (e.g. :code:`int16`) then it is signed and would not need this preprocessing step. + +Bit depth +--------- + +One final important piece of information for some users is the concept of bit depth, which is the number of bits used to +sample the data. The :code:`bit_depth` argument that can be fed into the :code:`unsigned_to_signed` function. +This should be used in cases where the ADC bit depth does not match the bit depth of the data type (e.g., if the data is +stored as :code:`uint16` but the ADC is 12 bits). +Let's make a concrete example: the Biocam acquisition system from 3Brain uses a 12-bit ADC and stores the data as +:code:`uint16`. This means that the data is stored in a 16-bit unsigned integer format, but the actual data +only covers a 12-bit range. Therefore, that the "zero" of the data is not at 0, nor at half of the :code:`uint16` range (i.e. 2^15), +but rather at 2048 (i.e., 2^12). +In this case, setting the :code:`bit_depth` argument to 12 will allow the :code:`unsigned_to_signed` function to +correctly convert the unsigned data to signed data and offset the data to be centered around 0, by subtracting 2048 +while converting the data from unsigned to signed. + +.. code:: python + + recording_unsigned = se.read_biocam('path/to/my/file.brw') + # we can now convert to signed with the correct bit depth + recording_signed = spre.unsigned_to_signed(recording_unsigned, bit_depth=12) + + Additional Notes ---------------- @@ -72,11 +100,3 @@ Additional Notes update to a version greater than or equal to 0.103.0. 3) For additional information on units and scaling in SpikeInterface see :ref:`physical_units`. - - -Bit depth ---------- - -One final important piece of information for some users is the optional :code:`bit_depth` argument that can be fed -into the :code:`unsigned_to_signed` function. This should be used in cases where the ADC bit depth does not match -the bit depth of the data type. This allows us to estimate the offset based on the bit depth of the ADC.