From f9ff667188d2b70026dcf2267c367a0e92a9ce4d Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Mon, 24 Jun 2024 19:03:06 +0100
Subject: [PATCH 001/187] Add for 'set_files'.

---
 src/spikeinterface/sorters/external/kilosort4.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index a7f40a9558..92bfabbe73 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -205,7 +205,16 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         # NOTE: Also modifies settings in-place
         data_dir = ""
         results_dir = sorter_output_folder
-        filename, data_dir, results_dir, probe = set_files(settings, filename, probe, probe_name, data_dir, results_dir)
+
+        filename, data_dir, results_dir, probe = set_files(
+            settings=settings,
+            filename=filename,
+            probe=probe,
+            probe_name=probe_name,
+            data_dir=data_dir,
+            results_dir=results_dir,
+        )
+
         if version.parse(cls.get_sorter_version()) >= version.parse("4.0.12"):
             ops = initialize_ops(settings, probe, recording.get_dtype(), do_CAR, invert_sign, device, False)
             n_chan_bin, fs, NT, nt, twav_min, chan_map, dtype, do_CAR, invert, _, _, tmin, tmax, artifact, _, _ = (

From aaa389f78243ac5c40c89f78cf282e69b591aebe Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Mon, 24 Jun 2024 19:06:30 +0100
Subject: [PATCH 002/187] Add for 'initialize_ops'.

---
 .../sorters/external/kilosort4.py             | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 92bfabbe73..b723e7a2bb 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -216,12 +216,27 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         )
 
         if version.parse(cls.get_sorter_version()) >= version.parse("4.0.12"):
-            ops = initialize_ops(settings, probe, recording.get_dtype(), do_CAR, invert_sign, device, False)
+            ops = initialize_ops(
+                settings=settings,
+                probe=probe,
+                data_dtype=recording.get_dtype(),
+                do_CAR=do_CAR,
+                invert_sign=invert_sign,
+                device=device,
+                save_preprocessed_copy=False,
+            )
             n_chan_bin, fs, NT, nt, twav_min, chan_map, dtype, do_CAR, invert, _, _, tmin, tmax, artifact, _, _ = (
                 get_run_parameters(ops)
             )
         else:
-            ops = initialize_ops(settings, probe, recording.get_dtype(), do_CAR, invert_sign, device)
+            ops = initialize_ops(
+                settings=settings,
+                probe=probe,
+                data_dtype=recording.get_dtype(),
+                do_CAR=do_CAR,
+                invert_sign=invert_sign,
+                device=device,
+            )
             n_chan_bin, fs, NT, nt, twav_min, chan_map, dtype, do_CAR, invert, _, _, tmin, tmax, artifact = (
                 get_run_parameters(ops)
             )

From 3ea9b8da6c9c0ce3672fc2b60d551cbfa96f8552 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Mon, 24 Jun 2024 19:07:08 +0100
Subject: [PATCH 003/187] Add for 'compute_preprocessing'.

---
 src/spikeinterface/sorters/external/kilosort4.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index b723e7a2bb..d8b1f1a60a 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -243,7 +243,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
 
         # Set preprocessing and drift correction parameters
         if not params["skip_kilosort_preprocessing"]:
-            ops = compute_preprocessing(ops, device, tic0=tic0, file_object=file_object)
+            ops = compute_preprocessing(ops=ops, device=device, tic0=tic0, file_object=file_object)
         else:
             print("Skipping kilosort preprocessing.")
             bfile = BinaryFiltered(

From 28656425eac96ef7be1573256c316c23b057f1c5 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Mon, 24 Jun 2024 19:07:43 +0100
Subject: [PATCH 004/187] Add for 'compute_drift_correction'.

---
 src/spikeinterface/sorters/external/kilosort4.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index d8b1f1a60a..d187b445ef 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -278,7 +278,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
 
         # this function applies both preprocessing and drift correction
         ops, bfile, st0 = compute_drift_correction(
-            ops, device, tic0=tic0, progress_bar=progress_bar, file_object=file_object
+            ops=ops, device=device, tic0=tic0, progress_bar=progress_bar, file_object=file_object
         )
 
         # Sort spikes and save results

From 9e0207aed2f92424e5d8d8088ce6c95de286eb38 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Mon, 24 Jun 2024 19:11:42 +0100
Subject: [PATCH 005/187] Add for detect_spikes, cluster_spikes, save_sorting.

---
 .../sorters/external/kilosort4.py             | 20 ++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index d187b445ef..032f980ee2 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -282,14 +282,28 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         )
 
         # Sort spikes and save results
-        st, tF, _, _ = detect_spikes(ops, device, bfile, tic0=tic0, progress_bar=progress_bar)
-        clu, Wall = cluster_spikes(st, tF, ops, device, bfile, tic0=tic0, progress_bar=progress_bar)
+        st, tF, _, _ = detect_spikes(ops=ops, device=device, bfile=bfile, tic0=tic0, progress_bar=progress_bar)
+
+        clu, Wall = cluster_spikes(
+            st=st, tF=tF, ops=ops, device=device, bfile=bfile, tic0=tic0, progress_bar=progress_bar
+        )
+
         if params["skip_kilosort_preprocessing"]:
             ops["preprocessing"] = dict(
                 hp_filter=torch.as_tensor(np.zeros(1)), whiten_mat=torch.as_tensor(np.eye(recording.get_num_channels()))
             )
 
-        _ = save_sorting(ops, results_dir, st, clu, tF, Wall, bfile.imin, tic0, save_extra_vars=save_extra_vars)
+        _ = save_sorting(
+            ops=ops,
+            results_dir=results_dir,
+            st=st,
+            clu=clu,
+            tF=tF,
+            Wall=Wall,
+            imin=bfile.imin,
+            tic0=tic0,
+            save_extra_vars=save_extra_vars,
+        )
 
     @classmethod
     def _get_result_from_folder(cls, sorter_output_folder):

From b07359ff360a210ae864fbf43c23d805b9507300 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Mon, 24 Jun 2024 19:13:46 +0100
Subject: [PATCH 006/187] Add for 'load_probe', 'RecordingExtractorAsArray'.

---
 src/spikeinterface/sorters/external/kilosort4.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 032f980ee2..ba1b10b793 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -176,12 +176,12 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
 
         # load probe
         recording = cls.load_recording_from_folder(sorter_output_folder.parent, with_warnings=False)
-        probe = load_probe(probe_filename)
+        probe = load_probe(probe_path=probe_filename)
         probe_name = ""
         filename = ""
 
         # this internally concatenates the recording
-        file_object = RecordingExtractorAsArray(recording)
+        file_object = RecordingExtractorAsArray(recording_extractor=recording)
 
         do_CAR = params["do_CAR"]
         invert_sign = params["invert_sign"]

From ac844e9d550624f007f851c1cc061e5c36abb002 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Mon, 24 Jun 2024 19:47:22 +0100
Subject: [PATCH 007/187] Add for BinaryFiltered + some generate notes.

---
 .../sorters/external/kilosort4.py             | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index ba1b10b793..47ef328b28 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -17,6 +17,7 @@ class Kilosort4Sorter(BaseSorter):
     requires_locations = True
     gpu_capability = "nvidia-optional"
 
+    # Q: Should we take these directly from the KS defaults? https://github.com/MouseLand/Kilosort/blob/59c03b060cc8e8ac75a7f1a972a8b5c5af3f41a6/kilosort/parameters.py#L164
     _default_params = {
         "batch_size": 60000,
         "nblocks": 1,
@@ -25,8 +26,8 @@ class Kilosort4Sorter(BaseSorter):
         "do_CAR": True,
         "invert_sign": False,
         "nt": 61,
-        "shift": None,
-        "scale": None,
+        "shift": None,  # TODO: I don't think these are passed to BinaryFiltered when preprocessing skipped. Need to distinguish version +/ 4.0.9
+        "scale": None,  # TODO: I don't think these are passed to BinaryFiltered when preprocessing skipped. Need to distinguish version +/ 4.0.9
         "artifact_threshold": None,
         "nskip": 25,
         "whitening_range": 32,
@@ -247,16 +248,16 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         else:
             print("Skipping kilosort preprocessing.")
             bfile = BinaryFiltered(
-                ops["filename"],
-                n_chan_bin,
-                fs,
-                NT,
-                nt,
-                twav_min,
-                chan_map,
+                filename=ops["filename"],
+                n_chan_bin=n_chan_bin,
+                fs=fs,
+                nT=NT,
+                nt=nt,
+                nt0min=twav_min,
+                chan_map=chan_map,
                 hp_filter=None,
                 device=device,
-                do_CAR=do_CAR,
+                do_CAR=do_CAR,  # TODO: should this always be False if we are in skipping KS preprocessing land?
                 invert_sign=invert,
                 dtype=dtype,
                 tmin=tmin,

From 44835bb397a36ebfd914a6c2a8038bf3727b95e3 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Mon, 24 Jun 2024 19:50:12 +0100
Subject: [PATCH 008/187] Update note on DEFAULT_SETTINGS.

---
 src/spikeinterface/sorters/external/kilosort4.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 47ef328b28..bcd8ddc617 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -18,6 +18,8 @@ class Kilosort4Sorter(BaseSorter):
     gpu_capability = "nvidia-optional"
 
     # Q: Should we take these directly from the KS defaults? https://github.com/MouseLand/Kilosort/blob/59c03b060cc8e8ac75a7f1a972a8b5c5af3f41a6/kilosort/parameters.py#L164
+    # I see these overwrite the `DEFAULT_SETTINGS`. Do we want to do this? There is benefit to fixing on the SI side, but users switching KS version would expect
+    # the defaults to represent the KS version. This could lead to divergence in result between users running KS directly vs. the SI wrapper.
     _default_params = {
         "batch_size": 60000,
         "nblocks": 1,

From 5bdc31e1ac6f2b3ecde2f2d428f4bae306dacfb3 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Mon, 24 Jun 2024 19:59:16 +0100
Subject: [PATCH 009/187] Remove some TODO and notes.

---
 src/spikeinterface/sorters/external/kilosort4.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index bcd8ddc617..cba7e65517 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -17,9 +17,6 @@ class Kilosort4Sorter(BaseSorter):
     requires_locations = True
     gpu_capability = "nvidia-optional"
 
-    # Q: Should we take these directly from the KS defaults? https://github.com/MouseLand/Kilosort/blob/59c03b060cc8e8ac75a7f1a972a8b5c5af3f41a6/kilosort/parameters.py#L164
-    # I see these overwrite the `DEFAULT_SETTINGS`. Do we want to do this? There is benefit to fixing on the SI side, but users switching KS version would expect
-    # the defaults to represent the KS version. This could lead to divergence in result between users running KS directly vs. the SI wrapper.
     _default_params = {
         "batch_size": 60000,
         "nblocks": 1,
@@ -28,8 +25,8 @@ class Kilosort4Sorter(BaseSorter):
         "do_CAR": True,
         "invert_sign": False,
         "nt": 61,
-        "shift": None,  # TODO: I don't think these are passed to BinaryFiltered when preprocessing skipped. Need to distinguish version +/ 4.0.9
-        "scale": None,  # TODO: I don't think these are passed to BinaryFiltered when preprocessing skipped. Need to distinguish version +/ 4.0.9
+        "shift": None,
+        "scale": None,
         "artifact_threshold": None,
         "nskip": 25,
         "whitening_range": 32,

From dc848eb2f8691206826d9545927d9cf28fbcd558 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Wed, 26 Jun 2024 02:04:22 +0100
Subject: [PATCH 010/187] Use version to handle all KS versions some which are
 missing .__version__ attribute.

---
 .../sorters/external/kilosort4.py             | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index cba7e65517..ed41baeff9 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -6,6 +6,7 @@
 
 from ..basesorter import BaseSorter
 from .kilosortbase import KilosortBase
+from importlib.metadata import version
 
 PathType = Union[str, Path]
 
@@ -129,9 +130,8 @@ def is_installed(cls):
 
     @classmethod
     def get_sorter_version(cls):
-        import kilosort as ks
-
-        return ks.__version__
+        """kilosort version <0.0.10 is always '4' z"""
+        return version("kilosort")
 
     @classmethod
     def _setup_recording(cls, recording, sorter_output_folder, params, verbose):
@@ -216,6 +216,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         )
 
         if version.parse(cls.get_sorter_version()) >= version.parse("4.0.12"):
+            # TODO: save_preprocessed_copy added
             ops = initialize_ops(
                 settings=settings,
                 probe=probe,
@@ -225,9 +226,6 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
                 device=device,
                 save_preprocessed_copy=False,
             )
-            n_chan_bin, fs, NT, nt, twav_min, chan_map, dtype, do_CAR, invert, _, _, tmin, tmax, artifact, _, _ = (
-                get_run_parameters(ops)
-            )
         else:
             ops = initialize_ops(
                 settings=settings,
@@ -237,6 +235,13 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
                 invert_sign=invert_sign,
                 device=device,
             )
+
+        if version.parse(cls.get_sorter_version()) >= version.parse("4.0.11"):
+            # TODO: shift, scaled added
+            n_chan_bin, fs, NT, nt, twav_min, chan_map, dtype, do_CAR, invert, _, _, tmin, tmax, artifact, _, _ = (
+                get_run_parameters(ops)
+            )
+        else:
             n_chan_bin, fs, NT, nt, twav_min, chan_map, dtype, do_CAR, invert, _, _, tmin, tmax, artifact = (
                 get_run_parameters(ops)
             )
@@ -259,10 +264,10 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
                 do_CAR=do_CAR,  # TODO: should this always be False if we are in skipping KS preprocessing land?
                 invert_sign=invert,
                 dtype=dtype,
-                tmin=tmin,
+                tmin=tmin,  # TODO: exposing tmin, max?
                 tmax=tmax,
                 artifact_threshold=artifact,
-                file_object=file_object,
+                file_object=file_object,  # TODO: exposing shift, scale when skipping preprocessing?
             )
             ops["preprocessing"] = dict(hp_filter=None, whiten_mat=None)
             ops["Wrot"] = torch.as_tensor(np.eye(recording.get_num_channels()))

From 69e72bf0ddfafe42577959e35eac96f184acb727 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Wed, 26 Jun 2024 02:06:20 +0100
Subject: [PATCH 011/187] Remove unused vars that were left over I think from
 prev KS versions.

---
 src/spikeinterface/sorters/external/kilosort4.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index ed41baeff9..9320022a20 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -94,11 +94,9 @@ class Kilosort4Sorter(BaseSorter):
         "cluster_pcs": "Maximum number of spatiotemporal PC features used for clustering. Default value: 64.",
         "x_centers": "Number of x-positions to use when determining center points for template groupings. If None, this will be determined automatically by finding peaks in channel density. For 2D array type probes, we recommend specifying this so that centers are placed every few hundred microns.",
         "duplicate_spike_bins": "Number of bins for which subsequent spikes from the same cluster are assumed to be artifacts. A value of 0 disables this step. Default value: 7.",
-        "keep_good_only": "If True only 'good' units are returned",
         "do_correction": "If True, drift correction is performed",
         "save_extra_kwargs": "If True, additional kwargs are saved to the output",
         "skip_kilosort_preprocessing": "Can optionally skip the internal kilosort preprocessing",
-        "scaleproc": "int16 scaling of whitened data, if None set to 200.",
         "torch_device": "Select the torch device auto/cuda/cpu",
     }
 

From c3b2bdda3d2f2f1db009302529c6c9b50a3781b9 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Wed, 26 Jun 2024 12:19:16 +0100
Subject: [PATCH 012/187] Use importlib version instead of .__version__

---
 src/spikeinterface/sorters/external/kilosort4.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 9320022a20..65f1483348 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -6,7 +6,6 @@
 
 from ..basesorter import BaseSorter
 from .kilosortbase import KilosortBase
-from importlib.metadata import version
 
 PathType = Union[str, Path]
 
@@ -129,7 +128,10 @@ def is_installed(cls):
     @classmethod
     def get_sorter_version(cls):
         """kilosort version <0.0.10 is always '4' z"""
-        return version("kilosort")
+        # Note this import clashes with version!
+        from importlib.metadata import version as importlib_version
+
+        return importlib_version("kilosort")
 
     @classmethod
     def _setup_recording(cls, recording, sorter_output_folder, params, verbose):

From 52457224b0c724e5c0ee4f5d1e659ae7c3159b91 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Wed, 26 Jun 2024 12:28:49 +0100
Subject: [PATCH 013/187] Add kilosort test script and CI workflow.

---
 .github/workflows/test_kilosort4.yml          |  61 +++
 .../temp_test_file_dir/test_kilosort4_new.py  | 472 ++++++++++++++++++
 2 files changed, 533 insertions(+)
 create mode 100644 .github/workflows/test_kilosort4.yml
 create mode 100644 src/spikeinterface/temp_test_file_dir/test_kilosort4_new.py

diff --git a/.github/workflows/test_kilosort4.yml b/.github/workflows/test_kilosort4.yml
new file mode 100644
index 0000000000..8e57f79786
--- /dev/null
+++ b/.github/workflows/test_kilosort4.yml
@@ -0,0 +1,61 @@
+name: Testing Kilosort4
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 12 * * 0"  # Weekly on Sunday at noon UTC
+  pull_request:
+    types: [synchronize, opened, reopened]
+    branches:
+      - main
+
+# env:
+#   KACHERY_CLOUD_CLIENT_ID: ${{ secrets.KACHERY_CLOUD_CLIENT_ID }}
+#   KACHERY_CLOUD_PRIVATE_KEY: ${{ secrets.KACHERY_CLOUD_PRIVATE_KEY }}
+
+# concurrency:  # Cancel previous workflows on the same pull request
+#   group: ${{ github.workflow }}-${{ github.ref }}
+#  cancel-in-progress: true
+
+jobs:
+  run:
+    name: ${{ matrix.os }} Python ${{ matrix.python-version }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12"]  # TODO: "3.9",  # Lower and higher versions we support
+        os: [ubuntu-latest]  # TODO: macos-13, windows-latest,
+        ks_version: ["4.0.12"] # TODO: add / build from pypi based on Christians PR
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install packages
+        # TODO: maybe dont need full?
+        run: |
+          pip install -e .[test]
+     #     git config --global user.email "CI@example.com"
+      #    git config --global user.name "CI Almighty"
+       #   pip install tabulate
+        shell: bash
+
+      - name: Install Kilosort
+        run: |
+          pip install kilosort==${{ matrix.ks_version }}
+        shell: bash
+
+      - name: Run new kilosort4 tests
+        # run: chmod +x .github/test_kilosort4.sh
+         # TODO: figure out the paths to be able to run this by calling the file directly
+        run: |
+          pytest -k test_kilosort4_new --durations=0
+        shell: bash
+
+# TODO: pip install -e .[full,dev] is failing #
+#The conflict is caused by:
+#    spikeinterface[docs] 0.101.0rc0 depends on datalad==0.16.2; extra == "docs"
+#    spikeinterface[test] 0.101.0rc0 depends on datalad>=1.0.2; extra == "test"
diff --git a/src/spikeinterface/temp_test_file_dir/test_kilosort4_new.py b/src/spikeinterface/temp_test_file_dir/test_kilosort4_new.py
new file mode 100644
index 0000000000..0fb9841728
--- /dev/null
+++ b/src/spikeinterface/temp_test_file_dir/test_kilosort4_new.py
@@ -0,0 +1,472 @@
+import copy
+from typing import Any
+import spikeinterface.full as si
+import numpy as np
+import torch
+import kilosort
+from kilosort.io import load_probe
+import pandas as pd
+
+import pytest
+from probeinterface.io import write_prb
+from kilosort.parameters import DEFAULT_SETTINGS
+from packaging.version import parse
+from importlib.metadata import version
+
+# TODO: duplicate_spike_bins to duplicate_spike_ms
+# TODO: write an issue on KS about bin! vs bin_ms!
+# TODO: expose tmin and tmax
+# TODO: expose save_preprocessed_copy
+# TODO: make here a log of all API changes (or on kilosort4.py)
+# TODO: try out longer recordings and do some benchmarking tests..
+# TODO: expose tmin and tmax
+# There is no way to skip HP spatial filter
+# might as well expose tmin and tmax
+# might as well expose preprocessing save (across the two functions that use it)
+# BinaryFilter added scale and shift as new arguments recently
+# test with docker
+# test all params once
+# try and read func / class object to see kwargs
+# Shift and scale are also taken as a function on BinaryFilter. Do we want to apply these even when
+# do kilosort preprocessing is false? probably
+# TODO: find a test case for the other annoying ones (larger recording, variable amplitude)
+# TODO: test docker
+# TODO: test multi-segment recording
+# TODO: test do correction, skip preprocessing
+# TODO: can we rename 'save_extra_kwargs' to 'save_extra_vars'. Currently untested.
+# nt :  # TODO: can't kilosort figure this out from sampling rate?
+# TODO: also test runtimes
+# TODO: test skip preprocessing separately
+# TODO: the pure default case is not tested
+# TODO: shift and scale - this is also added to BinaryFilter
+
+RUN_KILOSORT_ARGS = ["do_CAR", "invert_sign", "save_preprocessed_copy"]  # TODO: ignore some of these
+# "device", "progress_bar", "save_extra_vars" are not tested. "save_extra_vars" could be.
+
+
+PARAMS_TO_TEST = [
+    # Not tested
+    # ("torch_device", "auto")
+    # Stable across KS version 4.0.01 - 4.0.12
+    ("change_nothing", None),
+    ("nblocks", 0),
+    ("do_CAR", False),
+    ("batch_size", 42743),  # Q: how much do these results change with batch size?
+    ("Th_universal", 12),
+    ("Th_learned", 14),
+    ("invert_sign", True),
+    ("nt", 93),
+    ("nskip", 1),
+    ("whitening_range", 16),
+    ("sig_interp", 5),
+    ("nt0min", 25),
+    ("dmin", 15),
+    ("dminx", 16),
+    ("min_template_size", 15),
+    ("template_sizes", 10),
+    ("nearest_chans", 8),
+    ("nearest_templates", 35),
+    ("max_channel_distance", 5),
+    ("templates_from_data", False),
+    ("n_templates", 10),
+    ("n_pcs", 3),
+    ("Th_single_ch", 4),
+    ("acg_threshold", 0.001),
+    ("x_centers", 5),
+    ("duplicate_spike_bins", 5),  # TODO: why is this not erroring, it is deprecated. issue on KS
+    ("binning_depth", 1),
+    ("artifact_threshold", 200),
+    ("ccg_threshold", 1e9),
+    ("cluster_downsampling", 1e9),
+    ("duplicate_spike_bins", 5),  # TODO: this is depcrecated and changed to _ms in 4.0.13!
+]
+
+# Update PARAMS_TO_TEST with version-dependent kwargs
+if parse(version("kilosort")) >= parse("4.0.12"):
+    pass  # TODO: expose?
+#    PARAMS_TO_TEST.extend(
+#         [
+#             ("save_preprocessed_copy", False),
+#         ]
+#    )
+if parse(version("kilosort")) >= parse("4.0.11"):
+    PARAMS_TO_TEST.extend(
+        [
+            ("shift", 1e9),
+            ("scale", -1e9),
+        ]
+    )
+if parse(version("kilosort")) == parse("4.0.9"):
+    # bug in 4.0.9 for "nblocks=0"
+    PARAMS_TO_TEST = [param for param in PARAMS_TO_TEST if param[0] != "nblocks"]
+
+if parse(version("kilosort")) >= parse("4.0.8"):
+    PARAMS_TO_TEST.extend(
+        [
+            ("drift_smoothing", [250, 250, 250]),
+        ]
+    )
+if parse(version("kilosort")) <= parse("4.0.6"):
+    # AFAIK this parameter was always unused in KS (that's why it was removed)
+    PARAMS_TO_TEST.extend(
+        [
+            ("cluster_pcs", 1e9),
+        ]
+    )
+if parse(version("kilosort")) <= parse("4.0.3"):
+    PARAMS_TO_TEST = [param for param in PARAMS_TO_TEST if param[0] not in ["x_centers", "max_channel_distance"]]
+
+
+class TestKilosort4Long:
+
+    # Fixtures ######
+    @pytest.fixture(scope="session")
+    def recording_and_paths(self, tmp_path_factory):
+        """ """
+        tmp_path = tmp_path_factory.mktemp("kilosort4_tests")
+
+        np.random.seed(0)  # TODO: check below...
+
+        recording = self._get_ground_truth_recording()
+
+        paths = self._save_ground_truth_recording(recording, tmp_path)
+
+        return (recording, paths)
+
+    @pytest.fixture(scope="session")
+    def default_results(self, recording_and_paths):
+        """ """
+        recording, paths = recording_and_paths
+
+        settings, ks_format_probe = self._run_kilosort_with_kilosort(recording, paths)
+
+        defaults_ks_output_dir = paths["session_scope_tmp_path"] / "default_ks_output"
+
+        kilosort.run_kilosort(
+            settings=settings,
+            probe=ks_format_probe,
+            data_dtype="float32",
+            results_dir=defaults_ks_output_dir,
+        )
+
+        default_results = self._get_sorting_output(defaults_ks_output_dir)
+
+        return default_results
+
+    # Tests ######
+    def test_params_to_test(self):
+        """
+        Test that all parameters in PARAMS_TO_TEST are
+        different than the default value used in Kilosort, otherwise
+        there is no point to the test.
+
+        TODO: need to use _default_params vs. DEFAULT_SETTINGS
+        depending on decision
+
+        TODO: write issue on this, we hope it will be on DEFAULT_SETTINGS
+        TODO: duplicate_spike_ms in POSTPROCESSING but seems unused?
+        """
+        for parameter in PARAMS_TO_TEST:
+
+            param_key, param_value = parameter
+
+            if param_key == "change_nothing":
+                continue
+
+            if param_key not in RUN_KILOSORT_ARGS:
+                assert DEFAULT_SETTINGS[param_key] != param_value, f"{param_key} values should be different in test."
+
+    def test_default_settings_all_represented(self):
+        """
+        Test that every entry in DEFAULT_SETTINGS is tested in
+        PARAMS_TO_TEST, otherwise we are missing settings added
+        on the KS side.
+        """
+        tested_keys = [entry[0] for entry in PARAMS_TO_TEST]
+
+        for param_key in DEFAULT_SETTINGS:
+
+            if param_key not in ["n_chan_bin", "fs", "tmin", "tmax"]:
+                assert param_key in tested_keys, f"param: {param_key} in DEFAULT SETTINGS but not tested."
+
+    @pytest.mark.parametrize("parameter", PARAMS_TO_TEST)
+    def test_kilosort4(self, recording_and_paths, default_results, tmp_path, parameter):
+        """ """
+        recording, paths = recording_and_paths
+        param_key, param_value = parameter
+
+        kilosort_output_dir = tmp_path / "kilosort_output_dir"
+        spikeinterface_output_dir = tmp_path / "spikeinterface_output_dir"
+
+        extra_ks_settings = {}
+        if param_key == "binning_depth":
+            extra_ks_settings.update({"nblocks": 5})
+
+        if param_key in RUN_KILOSORT_ARGS:
+            run_kilosort_kwargs = {param_key: param_value}
+        else:
+            if param_key != "change_nothing":
+                extra_ks_settings.update({param_key: param_value})
+            run_kilosort_kwargs = {}
+
+        settings, ks_format_probe = self._run_kilosort_with_kilosort(recording, paths, extra_ks_settings)
+
+        kilosort.run_kilosort(
+            settings=settings,
+            probe=ks_format_probe,
+            data_dtype="float32",
+            results_dir=kilosort_output_dir,
+            **run_kilosort_kwargs,
+        )
+
+        extra_si_settings = {}
+        if param_key != "change_nothing":
+            extra_si_settings.update({param_key: param_value})
+
+        if param_key == "binning_depth":
+            extra_si_settings.update({"nblocks": 5})
+
+        spikeinterface_settings = self._get_spikeinterface_settings(extra_settings=extra_si_settings)
+        si.run_sorter(
+            "kilosort4",
+            recording,
+            remove_existing_folder=True,
+            folder=spikeinterface_output_dir,
+            **spikeinterface_settings,
+        )
+
+        results = self._get_sorting_output(kilosort_output_dir, spikeinterface_output_dir)
+
+        assert np.array_equal(results["ks"]["st"], results["si"]["st"]), f"{param_key} spike times different"
+
+        assert all(
+            results["ks"]["clus"].iloc[:, 0] == results["si"]["clus"].iloc[:, 0]
+        ), f"{param_key} cluster assignment different"
+        assert all(
+            results["ks"]["clus"].iloc[:, 1] == results["si"]["clus"].iloc[:, 1]
+        ), f"{param_key} cluster quality different"  # TODO: check pandas probably better way
+
+        # This is saved on the SI side so not an extremely
+        # robust addition, but it can't hurt.
+        if param_key != "change_nothing":
+            ops = np.load(spikeinterface_output_dir / "sorter_output" / "ops.npy", allow_pickle=True)
+            ops = ops.tolist()  # strangely this makes a dict
+            assert ops[param_key] == param_value
+
+        # Finally, check out test parameters actually changes stuff!
+        if parse(version("kilosort")) > parse("4.0.4"):
+            self._check_test_parameters_are_actually_changing_the_output(results, default_results, param_key)
+
+    def test_kilosort4_no_correction(self, recording_and_paths, tmp_path):
+        """ """
+        recording, paths = recording_and_paths
+
+        kilosort_output_dir = tmp_path / "kilosort_output_dir"  # TODO: a lost of copying here
+        spikeinterface_output_dir = tmp_path / "spikeinterface_output_dir"
+
+        settings, ks_format_probe = self._run_kilosort_with_kilosort(recording, paths, extra_settings={"nblocks": 0})
+
+        kilosort.run_kilosort(
+            settings=settings,
+            probe=ks_format_probe,
+            data_dtype="float32",
+            results_dir=kilosort_output_dir,
+            do_CAR=True,
+        )
+
+        spikeinterface_settings = self._get_spikeinterface_settings(extra_settings={"nblocks": 6})
+        si.run_sorter(
+            "kilosort4",
+            recording,
+            remove_existing_folder=True,
+            folder=spikeinterface_output_dir,
+            do_correction=False,
+            **spikeinterface_settings,
+        )
+
+        results = self._get_sorting_output(kilosort_output_dir, spikeinterface_output_dir)
+
+        assert np.array_equal(results["ks"]["st"], results["si"]["st"])
+
+        assert all(results["ks"]["clus"].iloc[:, 0] == results["si"]["clus"].iloc[:, 0])
+        assert all(results["ks"]["clus"].iloc[:, 1] == results["si"]["clus"].iloc[:, 1])
+
+    def test_kilosort4_skip_preprocessing_correction(self, tmp_path, monkeypatch):
+        """ """
+        recording = self._get_ground_truth_recording()
+
+        # We need to filter and whiten the recording here to KS takes forever.
+        # Do this in a way differnt to KS.
+        recording = si.highpass_filter(recording, 300)
+        recording = si.whiten(recording, mode="local", apply_mean=False)
+
+        paths = self._save_ground_truth_recording(recording, tmp_path)
+
+        kilosort_default_output_dir = tmp_path / "kilosort_default_output_dir"
+        kilosort_output_dir = tmp_path / "kilosort_output_dir"
+        spikeinterface_output_dir = tmp_path / "spikeinterface_output_dir"
+
+        ks_settings, ks_format_probe = self._run_kilosort_with_kilosort(recording, paths, extra_settings={"nblocks": 0})
+
+        kilosort.run_kilosort(
+            settings=ks_settings,
+            probe=ks_format_probe,
+            data_dtype="float32",
+            results_dir=kilosort_default_output_dir,
+            do_CAR=False,
+        )
+
+        # Now the tricky bit, we need to turn off preprocessing in kilosort.
+        # This is not exposed by run_kilosort() arguments (at 4.0.12 at least)
+        # and so we need to monkeypatch the internal functions. The easiest
+        # thing to do would be to set `get_highpass_filter()` and
+        # `get_whitening_matrix()` to return `None` so these steps are skipped
+        # in BinaryFilter. Unfortunately the ops saving machinery requires
+        # these to be torch arrays and will error otherwise, so instead
+        # we must set the filter (in frequency space) and whitening matrix
+        # to unity operations so the filter and whitening do nothing. It is
+        # also required to turn off motion correection to avoid some additional
+        # magic KS is doing at the whitening step when motion correction is on.
+        fake_filter = np.ones(60122, dtype="float32")  # TODO: hard coded
+        fake_filter = torch.from_numpy(fake_filter).to("cpu")
+
+        fake_white_matrix = np.eye(recording.get_num_channels(), dtype="float32")
+        fake_white_matrix = torch.from_numpy(fake_white_matrix).to("cpu")
+
+        def fake_fft_highpass(*args, **kwargs):
+            return fake_filter
+
+        def fake_get_whitening_matrix(*args, **kwargs):
+            return fake_white_matrix
+
+        def fake_fftshift(X, dim):
+            return X
+
+        monkeypatch.setattr("kilosort.io.fft_highpass", fake_fft_highpass)
+        monkeypatch.setattr("kilosort.preprocessing.get_whitening_matrix", fake_get_whitening_matrix)
+        monkeypatch.setattr("kilosort.io.fftshift", fake_fftshift)
+
+        kilosort.run_kilosort(
+            settings=ks_settings,
+            probe=ks_format_probe,
+            data_dtype="float32",
+            results_dir=kilosort_output_dir,
+            do_CAR=False,
+        )
+
+        monkeypatch.undo()
+
+        # Now, run kilosort through spikeinterface with the same options.
+        spikeinterface_settings = self._get_spikeinterface_settings(extra_settings={"nblocks": 0})
+        si.run_sorter(
+            "kilosort4",
+            recording,
+            remove_existing_folder=True,
+            folder=spikeinterface_output_dir,
+            do_CAR=False,
+            skip_kilosort_preprocessing=True,
+            **spikeinterface_settings,
+        )
+
+        default_results = self._get_sorting_output(kilosort_default_output_dir)
+        results = self._get_sorting_output(kilosort_output_dir, spikeinterface_output_dir)
+
+        # Check that out intervention actually make some difference to KS output
+        # (or this test would do nothing). Then check SI and KS outputs with
+        # preprocessing skipped are identical.
+        assert not np.array_equal(default_results["ks"]["st"], results["ks"]["st"])
+        assert np.array_equal(results["ks"]["st"], results["si"]["st"])
+
+    # Helpers ######
+    def _check_test_parameters_are_actually_changing_the_output(self, results, default_results, param_key):
+        """ """
+        if param_key not in ["artifact_threshold", "ccg_threshold", "cluster_downsampling"]:
+            num_clus = np.unique(results["si"]["clus"].iloc[:, 0]).size
+            num_clus_default = np.unique(default_results["ks"]["clus"].iloc[:, 0]).size
+
+            if param_key == "change_nothing":
+                # TODO: lol
+                assert (
+                    (results["si"]["st"].size == default_results["ks"]["st"].size)
+                    and num_clus == num_clus_default
+                    and all(results["si"]["clus"].iloc[:, 1] == default_results["ks"]["clus"].iloc[:, 1])
+                ), f"{param_key} changed somehow!."
+            else:
+                assert (
+                    (results["si"]["st"].size != default_results["ks"]["st"].size)
+                    or num_clus != num_clus_default
+                    or not all(results["si"]["clus"].iloc[:, 1] == default_results["ks"]["clus"].iloc[:, 1])
+                ), f"{param_key} results did not change with parameter change."
+
+    def _run_kilosort_with_kilosort(self, recording, paths, extra_settings=None):
+        """ """
+        # dont actually run KS here because we will overwrite the defaults!
+        settings = {
+            "data_dir": paths["recording_path"],
+            "n_chan_bin": recording.get_num_channels(),
+            "fs": recording.get_sampling_frequency(),
+        }
+
+        if extra_settings is not None:
+            settings.update(extra_settings)
+
+        ks_format_probe = load_probe(paths["probe_path"])
+
+        return settings, ks_format_probe
+
+    def _get_spikeinterface_settings(self, extra_settings=None):
+        """ """
+        # dont actually run here.
+        settings = copy.deepcopy(DEFAULT_SETTINGS)
+
+        if extra_settings is not None:
+            settings.update(extra_settings)
+
+        for name in ["n_chan_bin", "fs", "tmin", "tmax"]:  # TODO: check tmin and tmax
+            settings.pop(name)
+
+        return settings
+
+    def _get_sorting_output(self, kilosort_output_dir=None, spikeinterface_output_dir=None) -> dict[str, Any]:
+        """ """
+        results = {
+            "si": {},
+            "ks": {},
+        }
+        if kilosort_output_dir:
+            results["ks"]["st"] = np.load(kilosort_output_dir / "spike_times.npy")
+            results["ks"]["clus"] = pd.read_table(kilosort_output_dir / "cluster_group.tsv")
+
+        if spikeinterface_output_dir:
+            results["si"]["st"] = np.load(spikeinterface_output_dir / "sorter_output" / "spike_times.npy")
+            results["si"]["clus"] = pd.read_table(spikeinterface_output_dir / "sorter_output" / "cluster_group.tsv")
+
+        return results
+
+    def _get_ground_truth_recording(self):
+        """ """
+        # Chosen so all parameter changes to indeed change the output
+        num_channels = 32
+        recording, _ = si.generate_ground_truth_recording(
+            durations=[5],
+            seed=0,
+            num_channels=num_channels,
+            num_units=5,
+            generate_sorting_kwargs=dict(firing_rates=100, refractory_period_ms=4.0),
+        )
+        return recording
+
+    def _save_ground_truth_recording(self, recording, tmp_path):
+        """ """
+        paths = {
+            "session_scope_tmp_path": tmp_path,
+            "recording_path": tmp_path / "my_test_recording",
+            "probe_path": tmp_path / "my_test_probe.prb",
+        }
+
+        recording.save(folder=paths["recording_path"], overwrite=True)
+
+        probegroup = recording.get_probegroup()
+        write_prb(paths["probe_path"].as_posix(), probegroup)
+
+        return paths

From ede9dd482163728901dd118973c86d946ffd5f16 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Wed, 26 Jun 2024 14:20:43 +0100
Subject: [PATCH 014/187] Fix save_preprocesed copy, argument mispelled.

---
 src/spikeinterface/sorters/external/kilosort4.py            | 4 ++--
 src/spikeinterface/temp_test_file_dir/test_kilosort4_new.py | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 65f1483348..449ddfbff1 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -216,7 +216,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         )
 
         if version.parse(cls.get_sorter_version()) >= version.parse("4.0.12"):
-            # TODO: save_preprocessed_copy added
+            # TODO: save_preprocesed_copy added
             ops = initialize_ops(
                 settings=settings,
                 probe=probe,
@@ -224,7 +224,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
                 do_CAR=do_CAR,
                 invert_sign=invert_sign,
                 device=device,
-                save_preprocessed_copy=False,
+                save_preprocesed_copy=False,
             )
         else:
             ops = initialize_ops(
diff --git a/src/spikeinterface/temp_test_file_dir/test_kilosort4_new.py b/src/spikeinterface/temp_test_file_dir/test_kilosort4_new.py
index 0fb9841728..e4d48a1344 100644
--- a/src/spikeinterface/temp_test_file_dir/test_kilosort4_new.py
+++ b/src/spikeinterface/temp_test_file_dir/test_kilosort4_new.py
@@ -13,6 +13,7 @@
 from packaging.version import parse
 from importlib.metadata import version
 
+# TODO: save_preprocesed_copy is misspelled in KS4.
 # TODO: duplicate_spike_bins to duplicate_spike_ms
 # TODO: write an issue on KS about bin! vs bin_ms!
 # TODO: expose tmin and tmax

From 9570c9273b4f86bd800120c6d05096ebfc82e85d Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Wed, 26 Jun 2024 14:36:55 +0100
Subject: [PATCH 015/187] Fix NT format for BinaryFiltered, double-check all
 again

---
 src/spikeinterface/sorters/external/kilosort4.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 449ddfbff1..28a3c3ffa3 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -255,7 +255,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
                 filename=ops["filename"],
                 n_chan_bin=n_chan_bin,
                 fs=fs,
-                nT=NT,
+                NT=NT,
                 nt=nt,
                 nt0min=twav_min,
                 chan_map=chan_map,

From a8489a50a0d4ccaa1c6e75307b73fcae7a8c4bc2 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Wed, 26 Jun 2024 15:25:08 +0100
Subject: [PATCH 016/187] Add CI to test all kilosort4 versions.

---
 .github/scripts/README.MD                     |   2 +
 .github/scripts/check_kilosort4_releases.py   |  20 ++++
 .../scripts/test_kilosort4_ci.py              | 106 +++++++++++++++++-
 .github/workflows/test_kilosort4.yml          |  63 ++++++-----
 conftest.py                                   |   7 +-
 5 files changed, 170 insertions(+), 28 deletions(-)
 create mode 100644 .github/scripts/README.MD
 create mode 100644 .github/scripts/check_kilosort4_releases.py
 rename src/spikeinterface/temp_test_file_dir/test_kilosort4_new.py => .github/scripts/test_kilosort4_ci.py (83%)

diff --git a/.github/scripts/README.MD b/.github/scripts/README.MD
new file mode 100644
index 0000000000..1d3a622aae
--- /dev/null
+++ b/.github/scripts/README.MD
@@ -0,0 +1,2 @@
+This folder contains test scripts for running in the CI, that are not run as part of the usual
+CI because they are too long / heavy. These are run on cron-jobs once per week.
diff --git a/.github/scripts/check_kilosort4_releases.py b/.github/scripts/check_kilosort4_releases.py
new file mode 100644
index 0000000000..3d04d6948a
--- /dev/null
+++ b/.github/scripts/check_kilosort4_releases.py
@@ -0,0 +1,20 @@
+import os
+import re
+from pathlib import Path
+import requests
+import json
+
+
+def get_pypi_versions(package_name):
+    url = f"https://pypi.org/pypi/{package_name}/json"
+    response = requests.get(url)
+    response.raise_for_status()
+    data = response.json()
+    return list(sorted(data["releases"].keys()))
+
+
+if __name__ == "__main__":
+    package_name = "kilosort"
+    versions = get_pypi_versions(package_name)
+    with open(Path(os.path.realpath(__file__)).parent / "kilosort4-latest-version.json", "w") as f:
+        json.dump(versions, f)
diff --git a/src/spikeinterface/temp_test_file_dir/test_kilosort4_new.py b/.github/scripts/test_kilosort4_ci.py
similarity index 83%
rename from src/spikeinterface/temp_test_file_dir/test_kilosort4_new.py
rename to .github/scripts/test_kilosort4_ci.py
index e4d48a1344..4684038bd0 100644
--- a/src/spikeinterface/temp_test_file_dir/test_kilosort4_new.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -12,6 +12,14 @@
 from kilosort.parameters import DEFAULT_SETTINGS
 from packaging.version import parse
 from importlib.metadata import version
+from inspect import signature
+from kilosort.run_kilosort import (set_files, initialize_ops,
+                                   compute_preprocessing,
+                                   compute_drift_correction, detect_spikes,
+                                   cluster_spikes, save_sorting,
+                                   get_run_parameters, )
+from kilosort.io import load_probe, RecordingExtractorAsArray, BinaryFiltered
+from kilosort.parameters import DEFAULT_SETTINGS
 
 # TODO: save_preprocesed_copy is misspelled in KS4.
 # TODO: duplicate_spike_bins to duplicate_spike_ms
@@ -190,6 +198,102 @@ def test_default_settings_all_represented(self):
             if param_key not in ["n_chan_bin", "fs", "tmin", "tmax"]:
                 assert param_key in tested_keys, f"param: {param_key} in DEFAULT SETTINGS but not tested."
 
+    def test_set_files_arguments(self):
+        self._check_arguments(
+            set_files,
+            ["settings", "filename", "probe", "probe_name", "data_dir", "results_dir"]
+        )
+
+    def test_initialize_ops_arguments(self):
+
+        expected_arguments = ["settings", "probe", "data_dtype", "do_CAR", "invert_sign", "device"]
+
+        if parse(version("kilosort")) >= parse("4.0.12"):
+            expected_arguments.append("save_preprocesed_copy")
+
+        self._check_arguments(
+            initialize_ops,
+            expected_arguments,
+        )
+
+    def test_compute_preprocessing_arguments(self):
+        self._check_arguments(
+            compute_preprocessing,
+                ["ops", "device", "tic0", "file_object"]
+        )
+
+    def test_compute_drift_location_arguments(self):
+        self._check_arguments(
+            compute_drift_correction,
+                ["ops", "device", "tic0", "progress_bar", "file_object"]
+        )
+
+    def test_detect_spikes_arguments(self):
+        self._check_arguments(
+            detect_spikes,
+                ["ops", "device", "bfile", "tic0", "progress_bar"]
+        )
+
+
+    def test_cluster_spikes_arguments(self):
+        self._check_arguments(
+            cluster_spikes,
+                ["st", "tF", "ops", "device", "bfile", "tic0", "progress_bar"]
+        )
+
+    def test_save_sorting_arguments(self):
+
+        expected_arguments = ["ops", "results_dir", "st", "clu", "tF", "Wall", "imin", "tic0", "save_extra_vars"]
+
+        if parse(version("kilosort")) > parse("4.0.11"):
+            expected_arguments.append("save_preprocessed_copy")
+
+        self._check_arguments(
+            save_sorting,
+                expected_arguments
+        )
+
+    def test_get_run_parameters(self):
+        self._check_arguments(
+            get_run_parameters,
+                ["ops"]
+        )
+
+    def test_load_probe_parameters(self):
+        self._check_arguments(
+            load_probe,
+                ["probe_path"]
+        )
+
+    def test_recording_extractor_as_array_arguments(self):
+        self._check_arguments(
+            RecordingExtractorAsArray,
+                ["recording_extractor"]
+        )
+
+    def test_binary_filtered_arguments(self):
+
+        expected_arguments = [
+            "filename", "n_chan_bin", "fs", "NT", "nt", "nt0min",
+            "chan_map", "hp_filter", "whiten_mat", "dshift",
+            "device", "do_CAR", "artifact_threshold", "invert_sign",
+            "dtype", "tmin", "tmax", "file_object"
+        ]
+
+        if parse(version("kilosort")) >= parse("4.0.11"):
+            expected_arguments.pop(-1)
+            expected_arguments.extend(["shift", "scale", "file_object"])
+
+        self._check_arguments(
+            BinaryFiltered,
+            expected_arguments
+        )
+
+    def _check_arguments(self, object_, expected_arguments):
+        sig = signature(object_)
+        obj_arguments = list(sig.parameters.keys())
+        assert expected_arguments == obj_arguments
+
     @pytest.mark.parametrize("parameter", PARAMS_TO_TEST)
     def test_kilosort4(self, recording_and_paths, default_results, tmp_path, parameter):
         """ """
@@ -381,7 +485,7 @@ def fake_fftshift(X, dim):
     # Helpers ######
     def _check_test_parameters_are_actually_changing_the_output(self, results, default_results, param_key):
         """ """
-        if param_key not in ["artifact_threshold", "ccg_threshold", "cluster_downsampling"]:
+        if param_key not in ["artifact_threshold", "ccg_threshold", "cluster_downsampling", "cluster_pcs"]:
             num_clus = np.unique(results["si"]["clus"].iloc[:, 0]).size
             num_clus_default = np.unique(default_results["ks"]["clus"].iloc[:, 0]).size
 
diff --git a/.github/workflows/test_kilosort4.yml b/.github/workflows/test_kilosort4.yml
index 8e57f79786..c216be20d0 100644
--- a/.github/workflows/test_kilosort4.yml
+++ b/.github/workflows/test_kilosort4.yml
@@ -9,38 +9,56 @@ on:
     branches:
       - main
 
-# env:
-#   KACHERY_CLOUD_CLIENT_ID: ${{ secrets.KACHERY_CLOUD_CLIENT_ID }}
-#   KACHERY_CLOUD_PRIVATE_KEY: ${{ secrets.KACHERY_CLOUD_PRIVATE_KEY }}
+jobs:
+  versions:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v2
 
-# concurrency:  # Cancel previous workflows on the same pull request
-#   group: ${{ github.workflow }}-${{ github.ref }}
-#  cancel-in-progress: true
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.12
 
-jobs:
-  run:
-    name: ${{ matrix.os }} Python ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          pip install requests
+
+      - name: Fetch package versions from PyPI
+        run: |
+          python .github/scripts/check_kilosort4_releases.py
+        shell: bash
+
+      - name: Set matrix data
+        id: set-matrix
+        run: |
+          echo "matrix=$(jq -c . < .github/scripts/kilosort4-latest-version.json)" >> $GITHUB_OUTPUT
+
+  test:
+    needs: versions
+    name: ${{ matrix.ks_version }}
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.12"]  # TODO: "3.9",  # Lower and higher versions we support
-        os: [ubuntu-latest]  # TODO: macos-13, windows-latest,
-        ks_version: ["4.0.12"] # TODO: add / build from pypi based on Christians PR
+        python-version: ["3.12"]
+        os: [ubuntu-latest]
+        ks_version: ${{ fromJson(needs.versions.outputs.matrix) }}
     steps:
-      - uses: actions/checkout@v4
+      - name: Checkout repository
+        uses: actions/checkout@v2
+
       - name: Setup Python ${{ matrix.python-version }}
         uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
 
-      - name: Install packages
-        # TODO: maybe dont need full?
+      - name: Install SpikeInterface
         run: |
           pip install -e .[test]
-     #     git config --global user.email "CI@example.com"
-      #    git config --global user.name "CI Almighty"
-       #   pip install tabulate
         shell: bash
 
       - name: Install Kilosort
@@ -49,13 +67,6 @@ jobs:
         shell: bash
 
       - name: Run new kilosort4 tests
-        # run: chmod +x .github/test_kilosort4.sh
-         # TODO: figure out the paths to be able to run this by calling the file directly
         run: |
-          pytest -k test_kilosort4_new --durations=0
+          pytest .github/scripts/test_kilosort4_ci.py
         shell: bash
-
-# TODO: pip install -e .[full,dev] is failing #
-#The conflict is caused by:
-#    spikeinterface[docs] 0.101.0rc0 depends on datalad==0.16.2; extra == "docs"
-#    spikeinterface[test] 0.101.0rc0 depends on datalad>=1.0.2; extra == "test"
diff --git a/conftest.py b/conftest.py
index c4bac6628a..8c06830d25 100644
--- a/conftest.py
+++ b/conftest.py
@@ -19,6 +19,7 @@ def create_cache_folder(tmp_path_factory):
     cache_folder = tmp_path_factory.mktemp("cache_folder")
     return cache_folder
 
+
 def pytest_collection_modifyitems(config, items):
     """
     This function marks (in the pytest sense) the tests according to their name and file_path location
@@ -28,7 +29,11 @@ def pytest_collection_modifyitems(config, items):
     rootdir = Path(config.rootdir)
     modules_location = rootdir / "src" / "spikeinterface"
     for item in items:
-        rel_path = Path(item.fspath).relative_to(modules_location)
+        try:  # TODO: make a note on this, check with Herberto its okay.
+            rel_path = Path(item.fspath).relative_to(modules_location)
+        except:
+            continue
+
         module = rel_path.parts[0]
         if module == "sorters":
             if "internal" in rel_path.parts:

From 159e2b0a92b87ebaddedbf12cc68062bd0e5e5eb Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Thu, 27 Jun 2024 01:33:48 +0100
Subject: [PATCH 017/187] Tidying up tests and removing comments from
 kilosort4.py.

---
 .github/scripts/test_kilosort4_ci.py          | 442 ++++++++++--------
 conftest.py                                   |   2 +-
 .../sorters/external/kilosort4.py             |  14 +-
 3 files changed, 247 insertions(+), 211 deletions(-)

diff --git a/.github/scripts/test_kilosort4_ci.py b/.github/scripts/test_kilosort4_ci.py
index 4684038bd0..8a455a41fe 100644
--- a/.github/scripts/test_kilosort4_ci.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -1,3 +1,23 @@
+"""
+This file tests the SpikeInterface wrapper of the Kilosort4. The general logic
+of the tests are:
+- Change every exposed parameter one at a time (PARAMS_TO_TEST). Check that
+  the result of the SpikeInterface wrapper and Kilosort run natively are
+  the same. The SpikeInterface wrapper is non-trivial and decomposes the
+  kilosort pipeline to allow additions such as skipping preprocessing. Therefore,
+  the idea is that is it safer to rely on the output directly rather than
+  try monkeypatching. One thing can could be better tested is parameter
+  changes when skipping KS4 preprocessing is true, because this takes a slightly
+  different path through the kilosort4.py wrapper logic.
+  This also checks that changing the parameter changes the test output from default
+  on our test case (otherwise, the test could not detect a failure). This is possible
+  for nearly all parameters, see `_check_test_parameters_are_changing_the_output()`.
+
+- Test that kilosort functions called from `kilosort4.py` wrapper have the expected
+  input signatures
+
+- Do some tests to check all KS4 parameters are tested against.
+"""
 import copy
 from typing import Any
 import spikeinterface.full as si
@@ -20,47 +40,21 @@
                                    get_run_parameters, )
 from kilosort.io import load_probe, RecordingExtractorAsArray, BinaryFiltered
 from kilosort.parameters import DEFAULT_SETTINGS
+from kilosort import preprocessing as ks_preprocessing
 
-# TODO: save_preprocesed_copy is misspelled in KS4.
-# TODO: duplicate_spike_bins to duplicate_spike_ms
-# TODO: write an issue on KS about bin! vs bin_ms!
-# TODO: expose tmin and tmax
-# TODO: expose save_preprocessed_copy
-# TODO: make here a log of all API changes (or on kilosort4.py)
-# TODO: try out longer recordings and do some benchmarking tests..
-# TODO: expose tmin and tmax
-# There is no way to skip HP spatial filter
-# might as well expose tmin and tmax
-# might as well expose preprocessing save (across the two functions that use it)
-# BinaryFilter added scale and shift as new arguments recently
-# test with docker
-# test all params once
-# try and read func / class object to see kwargs
-# Shift and scale are also taken as a function on BinaryFilter. Do we want to apply these even when
-# do kilosort preprocessing is false? probably
-# TODO: find a test case for the other annoying ones (larger recording, variable amplitude)
-# TODO: test docker
-# TODO: test multi-segment recording
-# TODO: test do correction, skip preprocessing
-# TODO: can we rename 'save_extra_kwargs' to 'save_extra_vars'. Currently untested.
-# nt :  # TODO: can't kilosort figure this out from sampling rate?
-# TODO: also test runtimes
-# TODO: test skip preprocessing separately
-# TODO: the pure default case is not tested
-# TODO: shift and scale - this is also added to BinaryFilter
-
-RUN_KILOSORT_ARGS = ["do_CAR", "invert_sign", "save_preprocessed_copy"]  # TODO: ignore some of these
+RUN_KILOSORT_ARGS = ["do_CAR", "invert_sign", "save_preprocessed_copy"]
 # "device", "progress_bar", "save_extra_vars" are not tested. "save_extra_vars" could be.
 
-
+# Setup Params to test ####
 PARAMS_TO_TEST = [
     # Not tested
     # ("torch_device", "auto")
+
     # Stable across KS version 4.0.01 - 4.0.12
     ("change_nothing", None),
     ("nblocks", 0),
     ("do_CAR", False),
-    ("batch_size", 42743),  # Q: how much do these results change with batch size?
+    ("batch_size", 42743),
     ("Th_universal", 12),
     ("Th_learned", 14),
     ("invert_sign", True),
@@ -80,14 +74,15 @@
     ("n_templates", 10),
     ("n_pcs", 3),
     ("Th_single_ch", 4),
-    ("acg_threshold", 0.001),
     ("x_centers", 5),
-    ("duplicate_spike_bins", 5),  # TODO: why is this not erroring, it is deprecated. issue on KS
     ("binning_depth", 1),
+    # Note: These don't change the results from
+    # default when applied to the test case.
     ("artifact_threshold", 200),
-    ("ccg_threshold", 1e9),
-    ("cluster_downsampling", 1e9),
-    ("duplicate_spike_bins", 5),  # TODO: this is depcrecated and changed to _ms in 4.0.13!
+    ("ccg_threshold", 1e12),
+    ("acg_threshold", 1e12),
+    ("cluster_downsampling", 2),
+    ("duplicate_spike_bins", 5),
 ]
 
 # Update PARAMS_TO_TEST with version-dependent kwargs
@@ -131,11 +126,13 @@ class TestKilosort4Long:
     # Fixtures ######
     @pytest.fixture(scope="session")
     def recording_and_paths(self, tmp_path_factory):
-        """ """
+        """
+        Create a ground-truth recording, and save it to binary
+        so KS4 can run on it. Fixture is set up once and shared between
+        all tests.
+        """
         tmp_path = tmp_path_factory.mktemp("kilosort4_tests")
 
-        np.random.seed(0)  # TODO: check below...
-
         recording = self._get_ground_truth_recording()
 
         paths = self._save_ground_truth_recording(recording, tmp_path)
@@ -144,10 +141,17 @@ def recording_and_paths(self, tmp_path_factory):
 
     @pytest.fixture(scope="session")
     def default_results(self, recording_and_paths):
-        """ """
+        """
+        Because we check each parameter at a time and check the
+        KS4 and SpikeInterface versions match, if changing the parameter
+        had no effect as compared to default then the test would not test
+        anything. Therefore, the default results are run once and stored,
+        to check changing params indeed changes the results during testing.
+        This is possibly for nearly all parameters.
+        """
         recording, paths = recording_and_paths
 
-        settings, ks_format_probe = self._run_kilosort_with_kilosort(recording, paths)
+        settings, _, ks_format_probe = self._get_kilosort_native_settings(recording, paths, "change_nothing", None)
 
         defaults_ks_output_dir = paths["session_scope_tmp_path"] / "default_ks_output"
 
@@ -162,18 +166,46 @@ def default_results(self, recording_and_paths):
 
         return default_results
 
-    # Tests ######
-    def test_params_to_test(self):
+    def _get_ground_truth_recording(self):
         """
-        Test that all parameters in PARAMS_TO_TEST are
-        different than the default value used in Kilosort, otherwise
-        there is no point to the test.
+        A ground truth recording chosen to be as small as possible (for speed).
+        But contain enough information so that changing most parameters
+        changes the results.
+        """
+        num_channels = 32
+        recording, _ = si.generate_ground_truth_recording(
+            durations=[5],
+            seed=0,
+            num_channels=num_channels,
+            num_units=5,
+            generate_sorting_kwargs=dict(firing_rates=100, refractory_period_ms=4.0),
+        )
+        return recording
 
-        TODO: need to use _default_params vs. DEFAULT_SETTINGS
-        depending on decision
+    def _save_ground_truth_recording(self, recording, tmp_path):
+        """
+        Save the recording and its probe to file, so it can be
+        loaded by KS4.
+        """
+        paths = {
+            "session_scope_tmp_path": tmp_path,
+            "recording_path": tmp_path / "my_test_recording",
+            "probe_path": tmp_path / "my_test_probe.prb",
+        }
 
-        TODO: write issue on this, we hope it will be on DEFAULT_SETTINGS
-        TODO: duplicate_spike_ms in POSTPROCESSING but seems unused?
+        recording.save(folder=paths["recording_path"], overwrite=True)
+
+        probegroup = recording.get_probegroup()
+        write_prb(paths["probe_path"].as_posix(), probegroup)
+
+        return paths
+
+    # Tests ######
+    def test_params_to_test(self):
+        """
+        Test that all values in PARAMS_TO_TEST are
+        different to the default values used in Kilosort,
+        otherwise there is no point to the test.
         """
         for parameter in PARAMS_TO_TEST:
 
@@ -198,6 +230,7 @@ def test_default_settings_all_represented(self):
             if param_key not in ["n_chan_bin", "fs", "tmin", "tmax"]:
                 assert param_key in tested_keys, f"param: {param_key} in DEFAULT SETTINGS but not tested."
 
+    # Testing Arguments ###
     def test_set_files_arguments(self):
         self._check_arguments(
             set_files,
@@ -205,7 +238,6 @@ def test_set_files_arguments(self):
         )
 
     def test_initialize_ops_arguments(self):
-
         expected_arguments = ["settings", "probe", "data_dtype", "do_CAR", "invert_sign", "device"]
 
         if parse(version("kilosort")) >= parse("4.0.12"):
@@ -234,7 +266,6 @@ def test_detect_spikes_arguments(self):
                 ["ops", "device", "bfile", "tic0", "progress_bar"]
         )
 
-
     def test_cluster_spikes_arguments(self):
         self._check_arguments(
             cluster_spikes,
@@ -242,7 +273,6 @@ def test_cluster_spikes_arguments(self):
         )
 
     def test_save_sorting_arguments(self):
-
         expected_arguments = ["ops", "results_dir", "st", "clu", "tF", "Wall", "imin", "tic0", "save_extra_vars"]
 
         if parse(version("kilosort")) > parse("4.0.11"):
@@ -272,7 +302,6 @@ def test_recording_extractor_as_array_arguments(self):
         )
 
     def test_binary_filtered_arguments(self):
-
         expected_arguments = [
             "filename", "n_chan_bin", "fs", "NT", "nt", "nt0min",
             "chan_map", "hp_filter", "whiten_mat", "dshift",
@@ -294,27 +323,23 @@ def _check_arguments(self, object_, expected_arguments):
         obj_arguments = list(sig.parameters.keys())
         assert expected_arguments == obj_arguments
 
+    # Full Test ####
     @pytest.mark.parametrize("parameter", PARAMS_TO_TEST)
     def test_kilosort4(self, recording_and_paths, default_results, tmp_path, parameter):
-        """ """
+        """
+        Given a recording, paths to raw data, and a parameter to change,
+        run KS4 natively and within the SpikeInterface wrapper with the
+        new parameter value (all other values default) and
+        check the outputs are the same.
+        """
         recording, paths = recording_and_paths
         param_key, param_value = parameter
 
+        # Setup parameters for KS4 and run it natively
         kilosort_output_dir = tmp_path / "kilosort_output_dir"
         spikeinterface_output_dir = tmp_path / "spikeinterface_output_dir"
 
-        extra_ks_settings = {}
-        if param_key == "binning_depth":
-            extra_ks_settings.update({"nblocks": 5})
-
-        if param_key in RUN_KILOSORT_ARGS:
-            run_kilosort_kwargs = {param_key: param_value}
-        else:
-            if param_key != "change_nothing":
-                extra_ks_settings.update({param_key: param_value})
-            run_kilosort_kwargs = {}
-
-        settings, ks_format_probe = self._run_kilosort_with_kilosort(recording, paths, extra_ks_settings)
+        settings, run_kilosort_kwargs, ks_format_probe = self._get_kilosort_native_settings(recording, paths, param_key, param_value)
 
         kilosort.run_kilosort(
             settings=settings,
@@ -324,14 +349,9 @@ def test_kilosort4(self, recording_and_paths, default_results, tmp_path, paramet
             **run_kilosort_kwargs,
         )
 
-        extra_si_settings = {}
-        if param_key != "change_nothing":
-            extra_si_settings.update({param_key: param_value})
+        # Setup Parameters for SI and KS4 through SI
+        spikeinterface_settings = self._get_spikeinterface_settings(param_key, param_value)
 
-        if param_key == "binning_depth":
-            extra_si_settings.update({"nblocks": 5})
-
-        spikeinterface_settings = self._get_spikeinterface_settings(extra_settings=extra_si_settings)
         si.run_sorter(
             "kilosort4",
             recording,
@@ -340,36 +360,41 @@ def test_kilosort4(self, recording_and_paths, default_results, tmp_path, paramet
             **spikeinterface_settings,
         )
 
+        # Get the results and check they match
         results = self._get_sorting_output(kilosort_output_dir, spikeinterface_output_dir)
 
         assert np.array_equal(results["ks"]["st"], results["si"]["st"]), f"{param_key} spike times different"
+        assert np.array_equal(results["ks"]["clus"], results["si"]["clus"]), f"{param_key} cluster assignment different"
 
-        assert all(
-            results["ks"]["clus"].iloc[:, 0] == results["si"]["clus"].iloc[:, 0]
-        ), f"{param_key} cluster assignment different"
-        assert all(
-            results["ks"]["clus"].iloc[:, 1] == results["si"]["clus"].iloc[:, 1]
-        ), f"{param_key} cluster quality different"  # TODO: check pandas probably better way
-
-        # This is saved on the SI side so not an extremely
-        # robust addition, but it can't hurt.
+        # Check the ops file in KS4 output is as expected. This is saved on the
+        # SI side so not an extremely robust addition, but it can't hurt.
         if param_key != "change_nothing":
             ops = np.load(spikeinterface_output_dir / "sorter_output" / "ops.npy", allow_pickle=True)
             ops = ops.tolist()  # strangely this makes a dict
             assert ops[param_key] == param_value
 
-        # Finally, check out test parameters actually changes stuff!
+        # Finally, check out test parameters actually change the output of
+        # KS4, ensuring our tests are actually doing something. This is not
+        # done prior to 4.0.4 because a number of parameters seem to stop
+        # having an effect. This is probably due to small changes in their
+        # behaviour, and the test file chosen here.
         if parse(version("kilosort")) > parse("4.0.4"):
-            self._check_test_parameters_are_actually_changing_the_output(results, default_results, param_key)
+            self._check_test_parameters_are_changing_the_output(results, default_results, param_key)
 
     def test_kilosort4_no_correction(self, recording_and_paths, tmp_path):
-        """ """
+        """
+        Test the SpikeInterface wrappers `do_correction` argument. We set
+        `nblocks=0` for KS4 native, turning off motion correction. Then
+        we run KS$ through SpikeInterface with `do_correction=False` but
+        `nblocks=1` (KS4 default) - checking that `do_correction` overrides
+        this and the result matches KS4 when run without motion correction.
+        """
         recording, paths = recording_and_paths
 
-        kilosort_output_dir = tmp_path / "kilosort_output_dir"  # TODO: a lost of copying here
+        kilosort_output_dir = tmp_path / "kilosort_output_dir"
         spikeinterface_output_dir = tmp_path / "spikeinterface_output_dir"
 
-        settings, ks_format_probe = self._run_kilosort_with_kilosort(recording, paths, extra_settings={"nblocks": 0})
+        settings, _, ks_format_probe = self._get_kilosort_native_settings(recording, paths, "nblocks", 0)
 
         kilosort.run_kilosort(
             settings=settings,
@@ -379,7 +404,7 @@ def test_kilosort4_no_correction(self, recording_and_paths, tmp_path):
             do_CAR=True,
         )
 
-        spikeinterface_settings = self._get_spikeinterface_settings(extra_settings={"nblocks": 6})
+        spikeinterface_settings = self._get_spikeinterface_settings("nblocks", 1)
         si.run_sorter(
             "kilosort4",
             recording,
@@ -392,186 +417,199 @@ def test_kilosort4_no_correction(self, recording_and_paths, tmp_path):
         results = self._get_sorting_output(kilosort_output_dir, spikeinterface_output_dir)
 
         assert np.array_equal(results["ks"]["st"], results["si"]["st"])
+        assert np.array_equal(results["ks"]["clus"], results["si"]["clus"])
+
+    @pytest.mark.parametrize("param_to_test", [
+        ("change_nothing", None),
+        ("do_CAR", False),
+        ("batch_size", 42743),
+        ("Th_learned", 14),
+        ("dmin", 15),
+        ("max_channel_distance", 5),
+        ("n_pcs", 3),
+    ])
+    def test_kilosort4_skip_preprocessing_correction(self, tmp_path, monkeypatch, param_to_test):
+        """
+        Test that skipping KS4 preprocessing works as expected. Run
+        KS4 natively, monkeypatching the relevant preprocessing functions
+        such that preprocessing is not performed. Then run in SpikeInterface
+        with `skip_kilosort_preprocessing=True` and check the outputs match.
+
+        Run with a few randomly chosen parameters to check these are propagated
+        under this condition.
 
-        assert all(results["ks"]["clus"].iloc[:, 0] == results["si"]["clus"].iloc[:, 0])
-        assert all(results["ks"]["clus"].iloc[:, 1] == results["si"]["clus"].iloc[:, 1])
+        TODO
+        ----
+        It would be nice to check a few additional parameters here. Screw it!
+        """
+        param_key, param_value = param_to_test
 
-    def test_kilosort4_skip_preprocessing_correction(self, tmp_path, monkeypatch):
-        """ """
         recording = self._get_ground_truth_recording()
 
         # We need to filter and whiten the recording here to KS takes forever.
-        # Do this in a way differnt to KS.
+        # Do this in a way different to KS.
         recording = si.highpass_filter(recording, 300)
         recording = si.whiten(recording, mode="local", apply_mean=False)
 
         paths = self._save_ground_truth_recording(recording, tmp_path)
 
-        kilosort_default_output_dir = tmp_path / "kilosort_default_output_dir"
         kilosort_output_dir = tmp_path / "kilosort_output_dir"
         spikeinterface_output_dir = tmp_path / "spikeinterface_output_dir"
 
-        ks_settings, ks_format_probe = self._run_kilosort_with_kilosort(recording, paths, extra_settings={"nblocks": 0})
+        def monkeypatch_filter_function(self, X, ops=None, ibatch=None):
+            """
+            This is a direct copy of the kilosort io.BinaryFiltered.filter
+            function, with hp_filter and whitening matrix code sections, and
+            comments removed. This is the easiest way to monkeypatch (tried a few approaches)
+            """
+            if self.chan_map is not None:
+                X = X[self.chan_map]
 
-        kilosort.run_kilosort(
-            settings=ks_settings,
-            probe=ks_format_probe,
-            data_dtype="float32",
-            results_dir=kilosort_default_output_dir,
-            do_CAR=False,
-        )
+            if self.invert_sign:
+                X = X * -1
+
+            X = X - X.mean(1).unsqueeze(1)
+            if self.do_CAR:
+                X = X - torch.median(X, 0)[0]
+
+            if self.hp_filter is not None:
+                pass
 
-        # Now the tricky bit, we need to turn off preprocessing in kilosort.
-        # This is not exposed by run_kilosort() arguments (at 4.0.12 at least)
-        # and so we need to monkeypatch the internal functions. The easiest
-        # thing to do would be to set `get_highpass_filter()` and
-        # `get_whitening_matrix()` to return `None` so these steps are skipped
-        # in BinaryFilter. Unfortunately the ops saving machinery requires
-        # these to be torch arrays and will error otherwise, so instead
-        # we must set the filter (in frequency space) and whitening matrix
-        # to unity operations so the filter and whitening do nothing. It is
-        # also required to turn off motion correection to avoid some additional
-        # magic KS is doing at the whitening step when motion correction is on.
-        fake_filter = np.ones(60122, dtype="float32")  # TODO: hard coded
-        fake_filter = torch.from_numpy(fake_filter).to("cpu")
-
-        fake_white_matrix = np.eye(recording.get_num_channels(), dtype="float32")
-        fake_white_matrix = torch.from_numpy(fake_white_matrix).to("cpu")
-
-        def fake_fft_highpass(*args, **kwargs):
-            return fake_filter
-
-        def fake_get_whitening_matrix(*args, **kwargs):
-            return fake_white_matrix
-
-        def fake_fftshift(X, dim):
+            if self.artifact_threshold < np.inf:
+                if torch.any(torch.abs(X) >= self.artifact_threshold):
+                    return torch.zeros_like(X)
+
+            if self.whiten_mat is not None:
+                pass
             return X
 
-        monkeypatch.setattr("kilosort.io.fft_highpass", fake_fft_highpass)
-        monkeypatch.setattr("kilosort.preprocessing.get_whitening_matrix", fake_get_whitening_matrix)
-        monkeypatch.setattr("kilosort.io.fftshift", fake_fftshift)
+        monkeypatch.setattr("kilosort.io.BinaryFiltered.filter",
+                            monkeypatch_filter_function)
+
+        ks_settings, _, ks_format_probe = self._get_kilosort_native_settings(recording, paths, param_key, param_value)
+        ks_settings["nblocks"] = 0
+
+        # Be explicit here and don't rely on defaults.
+        do_CAR = param_value if param_key == "do_CAR" else False
 
         kilosort.run_kilosort(
             settings=ks_settings,
             probe=ks_format_probe,
             data_dtype="float32",
             results_dir=kilosort_output_dir,
-            do_CAR=False,
+            do_CAR=do_CAR,
         )
 
         monkeypatch.undo()
 
         # Now, run kilosort through spikeinterface with the same options.
-        spikeinterface_settings = self._get_spikeinterface_settings(extra_settings={"nblocks": 0})
+        spikeinterface_settings = self._get_spikeinterface_settings(param_key, param_value)
+        spikeinterface_settings["nblocks"] = 0
+
+        do_CAR = False if param_key != "do_CAR" else spikeinterface_settings.pop("do_CAR")
+
         si.run_sorter(
             "kilosort4",
             recording,
             remove_existing_folder=True,
             folder=spikeinterface_output_dir,
-            do_CAR=False,
+            do_CAR=do_CAR,
             skip_kilosort_preprocessing=True,
             **spikeinterface_settings,
         )
 
-        default_results = self._get_sorting_output(kilosort_default_output_dir)
+        # There is a very slight difference caused by the batching between load vs.
+        # memory file. Because in this test recordings are preprocessed, there are
+        # some filter edge effects that depend on the chunking in `get_traces()`.
+        # These are all extremely close (usually just 1 spike, 1 idx different).
         results = self._get_sorting_output(kilosort_output_dir, spikeinterface_output_dir)
-
-        # Check that out intervention actually make some difference to KS output
-        # (or this test would do nothing). Then check SI and KS outputs with
-        # preprocessing skipped are identical.
-        assert not np.array_equal(default_results["ks"]["st"], results["ks"]["st"])
-        assert np.array_equal(results["ks"]["st"], results["si"]["st"])
+        assert np.allclose(results["ks"]["st"], results["si"]["st"], rtol=0, atol=1)
 
     # Helpers ######
-    def _check_test_parameters_are_actually_changing_the_output(self, results, default_results, param_key):
-        """ """
-        if param_key not in ["artifact_threshold", "ccg_threshold", "cluster_downsampling", "cluster_pcs"]:
-            num_clus = np.unique(results["si"]["clus"].iloc[:, 0]).size
-            num_clus_default = np.unique(default_results["ks"]["clus"].iloc[:, 0]).size
+    def _check_test_parameters_are_changing_the_output(self, results, default_results, param_key):
+        """
+        If nothing is changed, default vs. results outputs are identical.
+        Otherwise, check they are not the same. Can't figure out how to get
+        the skipped three parameters below to change the results on this
+        small test file.
+        """
+        if param_key in ["acg_threshold", "ccg_threshold", "artifact_threshold", "cluster_downsampling"]:
+            return
+
+        if param_key == "change_nothing":
+            assert all(
+                default_results["ks"]["st"] == results["ks"]["st"]
+            ) and all(
+                default_results["ks"]["clus"] == results["ks"]["clus"]
+            ), f"{param_key} changed somehow!."
+        else:
+            assert not (
+                    default_results["ks"]["st"].size == results["ks"]["st"].size
+            ) or not all(
+                default_results["ks"]["clus"] == results["ks"]["clus"]
+            ), f"{param_key} results did not change with parameter change."
 
-            if param_key == "change_nothing":
-                # TODO: lol
-                assert (
-                    (results["si"]["st"].size == default_results["ks"]["st"].size)
-                    and num_clus == num_clus_default
-                    and all(results["si"]["clus"].iloc[:, 1] == default_results["ks"]["clus"].iloc[:, 1])
-                ), f"{param_key} changed somehow!."
-            else:
-                assert (
-                    (results["si"]["st"].size != default_results["ks"]["st"].size)
-                    or num_clus != num_clus_default
-                    or not all(results["si"]["clus"].iloc[:, 1] == default_results["ks"]["clus"].iloc[:, 1])
-                ), f"{param_key} results did not change with parameter change."
-
-    def _run_kilosort_with_kilosort(self, recording, paths, extra_settings=None):
-        """ """
-        # dont actually run KS here because we will overwrite the defaults!
+    def _get_kilosort_native_settings(self, recording, paths, param_key, param_value):
+        """
+        Function to generate the settings and function inputs to run kilosort.
+        Note when `binning_depth` is used we need to set `nblocks` high to
+        get the results to change from default.
+
+        Some settings in KS4 are passed by `settings` dict while others
+        are through the function, these are split here.
+        """
         settings = {
             "data_dir": paths["recording_path"],
             "n_chan_bin": recording.get_num_channels(),
             "fs": recording.get_sampling_frequency(),
         }
 
-        if extra_settings is not None:
-            settings.update(extra_settings)
+        if param_key == "binning_depth":
+            settings.update({"nblocks": 5})
+
+        if param_key in RUN_KILOSORT_ARGS:
+            run_kilosort_kwargs = {param_key: param_value}
+        else:
+            if param_key != "change_nothing":
+                settings.update({param_key: param_value})
+            run_kilosort_kwargs = {}
 
         ks_format_probe = load_probe(paths["probe_path"])
 
-        return settings, ks_format_probe
+        return settings, run_kilosort_kwargs, ks_format_probe
 
-    def _get_spikeinterface_settings(self, extra_settings=None):
-        """ """
-        # dont actually run here.
+    def _get_spikeinterface_settings(self, param_key, param_value):
+        """
+        Generate settings kwargs for running KS4 in SpikeInterface.
+        See `_get_kilosort_native_settings()` for some details.
+        """
         settings = copy.deepcopy(DEFAULT_SETTINGS)
 
-        if extra_settings is not None:
-            settings.update(extra_settings)
+        if param_key != "change_nothing":
+            settings.update({param_key: param_value})
+
+        if param_key == "binning_depth":
+            settings.update({"nblocks": 5})
 
-        for name in ["n_chan_bin", "fs", "tmin", "tmax"]:  # TODO: check tmin and tmax
+        for name in ["n_chan_bin", "fs", "tmin", "tmax"]:
             settings.pop(name)
 
         return settings
 
     def _get_sorting_output(self, kilosort_output_dir=None, spikeinterface_output_dir=None) -> dict[str, Any]:
-        """ """
+        """
+        Load the results of sorting into a dict for easy comparison.
+        """
         results = {
             "si": {},
             "ks": {},
         }
         if kilosort_output_dir:
             results["ks"]["st"] = np.load(kilosort_output_dir / "spike_times.npy")
-            results["ks"]["clus"] = pd.read_table(kilosort_output_dir / "cluster_group.tsv")
+            results["ks"]["clus"] = np.load(kilosort_output_dir / "spike_clusters.npy")
 
         if spikeinterface_output_dir:
             results["si"]["st"] = np.load(spikeinterface_output_dir / "sorter_output" / "spike_times.npy")
-            results["si"]["clus"] = pd.read_table(spikeinterface_output_dir / "sorter_output" / "cluster_group.tsv")
+            results["si"]["clus"] = np.load(spikeinterface_output_dir / "sorter_output" / "spike_clusters.npy")
 
         return results
-
-    def _get_ground_truth_recording(self):
-        """ """
-        # Chosen so all parameter changes to indeed change the output
-        num_channels = 32
-        recording, _ = si.generate_ground_truth_recording(
-            durations=[5],
-            seed=0,
-            num_channels=num_channels,
-            num_units=5,
-            generate_sorting_kwargs=dict(firing_rates=100, refractory_period_ms=4.0),
-        )
-        return recording
-
-    def _save_ground_truth_recording(self, recording, tmp_path):
-        """ """
-        paths = {
-            "session_scope_tmp_path": tmp_path,
-            "recording_path": tmp_path / "my_test_recording",
-            "probe_path": tmp_path / "my_test_probe.prb",
-        }
-
-        recording.save(folder=paths["recording_path"], overwrite=True)
-
-        probegroup = recording.get_probegroup()
-        write_prb(paths["probe_path"].as_posix(), probegroup)
-
-        return paths
diff --git a/conftest.py b/conftest.py
index 8c06830d25..544c2fb6cb 100644
--- a/conftest.py
+++ b/conftest.py
@@ -29,7 +29,7 @@ def pytest_collection_modifyitems(config, items):
     rootdir = Path(config.rootdir)
     modules_location = rootdir / "src" / "spikeinterface"
     for item in items:
-        try:  # TODO: make a note on this, check with Herberto its okay.
+        try:
             rel_path = Path(item.fspath).relative_to(modules_location)
         except:
             continue
diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 28a3c3ffa3..8721ce1b89 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -127,8 +127,7 @@ def is_installed(cls):
 
     @classmethod
     def get_sorter_version(cls):
-        """kilosort version <0.0.10 is always '4' z"""
-        # Note this import clashes with version!
+        """kilosort version <0.0.10 is always '4'"""
         from importlib.metadata import version as importlib_version
 
         return importlib_version("kilosort")
@@ -216,7 +215,6 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         )
 
         if version.parse(cls.get_sorter_version()) >= version.parse("4.0.12"):
-            # TODO: save_preprocesed_copy added
             ops = initialize_ops(
                 settings=settings,
                 probe=probe,
@@ -237,7 +235,6 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
             )
 
         if version.parse(cls.get_sorter_version()) >= version.parse("4.0.11"):
-            # TODO: shift, scaled added
             n_chan_bin, fs, NT, nt, twav_min, chan_map, dtype, do_CAR, invert, _, _, tmin, tmax, artifact, _, _ = (
                 get_run_parameters(ops)
             )
@@ -261,22 +258,23 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
                 chan_map=chan_map,
                 hp_filter=None,
                 device=device,
-                do_CAR=do_CAR,  # TODO: should this always be False if we are in skipping KS preprocessing land?
+                do_CAR=do_CAR,
                 invert_sign=invert,
                 dtype=dtype,
-                tmin=tmin,  # TODO: exposing tmin, max?
+                tmin=tmin,
                 tmax=tmax,
                 artifact_threshold=artifact,
-                file_object=file_object,  # TODO: exposing shift, scale when skipping preprocessing?
+                file_object=file_object,
             )
             ops["preprocessing"] = dict(hp_filter=None, whiten_mat=None)
             ops["Wrot"] = torch.as_tensor(np.eye(recording.get_num_channels()))
             ops["Nbatches"] = bfile.n_batches
+        #            bfile.close()  # TODO: KS do this after preprocessing?
 
         np.random.seed(1)
         torch.cuda.manual_seed_all(1)
         torch.random.manual_seed(1)
-        # if not params["skip_kilosort_preprocessing"]:
+
         if not params["do_correction"]:
             print("Skipping drift correction.")
             ops["nblocks"] = 0

From 0817a5b3f10c986db04632fb979e2c30cf501dbc Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Thu, 27 Jun 2024 02:30:19 +0100
Subject: [PATCH 018/187] Add tests to check _default_params against KS params.

---
 .github/scripts/test_kilosort4_ci.py          | 25 +++++++++++++++----
 .../sorters/external/kilosort4.py             |  7 +++---
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/.github/scripts/test_kilosort4_ci.py b/.github/scripts/test_kilosort4_ci.py
index 8a455a41fe..ecc931781c 100644
--- a/.github/scripts/test_kilosort4_ci.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -26,7 +26,7 @@
 import kilosort
 from kilosort.io import load_probe
 import pandas as pd
-
+from spikeinterface.sorters.external.kilosort4 import Kilosort4Sorter
 import pytest
 from probeinterface.io import write_prb
 from kilosort.parameters import DEFAULT_SETTINGS
@@ -230,6 +230,21 @@ def test_default_settings_all_represented(self):
             if param_key not in ["n_chan_bin", "fs", "tmin", "tmax"]:
                 assert param_key in tested_keys, f"param: {param_key} in DEFAULT SETTINGS but not tested."
 
+    def test_spikeinterface_defaults_against_kilsort(self):
+        """
+        Here check that all _
+        Don't check that every default in KS is exposed in params,
+        because they change across versions. Instead, this check
+        is performed here against PARAMS_TO_TEST.
+        """
+        params = copy.deepcopy(Kilosort4Sorter._default_params)
+
+        for key in params.keys():
+            # "artifact threshold" is set to `np.inf` if `None` in
+            # the body of the `Kilosort4Sorter` class.
+            if key in DEFAULT_SETTINGS and key not in ["artifact_threshold"]:
+                assert params[key] == DEFAULT_SETTINGS[key], f"{key} is not the same across versions."
+
     # Testing Arguments ###
     def test_set_files_arguments(self):
         self._check_arguments(
@@ -533,7 +548,7 @@ def _check_test_parameters_are_changing_the_output(self, results, default_result
         the skipped three parameters below to change the results on this
         small test file.
         """
-        if param_key in ["acg_threshold", "ccg_threshold", "artifact_threshold", "cluster_downsampling"]:
+        if param_key in ["acg_threshold", "ccg_threshold", "artifact_threshold", "cluster_downsampling", "cluster_pcs"]:
             return
 
         if param_key == "change_nothing":
@@ -583,7 +598,7 @@ def _get_spikeinterface_settings(self, param_key, param_value):
         Generate settings kwargs for running KS4 in SpikeInterface.
         See `_get_kilosort_native_settings()` for some details.
         """
-        settings = copy.deepcopy(DEFAULT_SETTINGS)
+        settings = {} # copy.deepcopy(DEFAULT_SETTINGS)
 
         if param_key != "change_nothing":
             settings.update({param_key: param_value})
@@ -591,8 +606,8 @@ def _get_spikeinterface_settings(self, param_key, param_value):
         if param_key == "binning_depth":
             settings.update({"nblocks": 5})
 
-        for name in ["n_chan_bin", "fs", "tmin", "tmax"]:
-            settings.pop(name)
+       # for name in ["n_chan_bin", "fs", "tmin", "tmax"]:
+        #    settings.pop(name)
 
         return settings
 
diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 8721ce1b89..82c033f61d 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -6,6 +6,7 @@
 
 from ..basesorter import BaseSorter
 from .kilosortbase import KilosortBase
+from importlib.metadata import version as importlib_version
 
 PathType = Union[str, Path]
 
@@ -35,7 +36,7 @@ class Kilosort4Sorter(BaseSorter):
         "drift_smoothing": [0.5, 0.5, 0.5],
         "nt0min": None,
         "dmin": None,
-        "dminx": 32,
+        "dminx": 32 if version.parse(importlib_version("kilosort")) > version.parse("4.0.0.1") else None,
         "min_template_size": 10,
         "template_sizes": 5,
         "nearest_chans": 10,
@@ -50,7 +51,7 @@ class Kilosort4Sorter(BaseSorter):
         "cluster_downsampling": 20,
         "cluster_pcs": 64,
         "x_centers": None,
-        "duplicate_spike_bins": 7,
+        "duplicate_spike_bins": 7 if version.parse(importlib_version("kilosort")) >= version.parse("4.0.4") else 15,
         "do_correction": True,
         "keep_good_only": False,
         "save_extra_kwargs": False,
@@ -128,8 +129,6 @@ def is_installed(cls):
     @classmethod
     def get_sorter_version(cls):
         """kilosort version <0.0.10 is always '4'"""
-        from importlib.metadata import version as importlib_version
-
         return importlib_version("kilosort")
 
     @classmethod

From c8779fc87dfaa6aa1d2bdb72d6fa58ed36c7da7c Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Thu, 27 Jun 2024 03:01:37 +0100
Subject: [PATCH 019/187] Skip tests where relevant, try on slightly earlier
 python version to avoid weird xlabel bug.

---
 .github/scripts/test_kilosort4_ci.py             | 3 +++
 .github/workflows/test_kilosort4.yml             | 2 +-
 src/spikeinterface/sorters/external/kilosort4.py | 4 ++--
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/scripts/test_kilosort4_ci.py b/.github/scripts/test_kilosort4_ci.py
index ecc931781c..3e74fa708e 100644
--- a/.github/scripts/test_kilosort4_ci.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -228,6 +228,8 @@ def test_default_settings_all_represented(self):
         for param_key in DEFAULT_SETTINGS:
 
             if param_key not in ["n_chan_bin", "fs", "tmin", "tmax"]:
+                if parse(version("kilosort")) == parse("4.0.9") and param_key == "nblocks":
+                    continue
                 assert param_key in tested_keys, f"param: {param_key} in DEFAULT SETTINGS but not tested."
 
     def test_spikeinterface_defaults_against_kilsort(self):
@@ -434,6 +436,7 @@ def test_kilosort4_no_correction(self, recording_and_paths, tmp_path):
         assert np.array_equal(results["ks"]["st"], results["si"]["st"])
         assert np.array_equal(results["ks"]["clus"], results["si"]["clus"])
 
+    @pytest.mark.skipif(parse(version("kilosort")) == parse("4.0.9"), reason="nblock=0 fails on KS4=4.0.9")
     @pytest.mark.parametrize("param_to_test", [
         ("change_nothing", None),
         ("do_CAR", False),
diff --git a/.github/workflows/test_kilosort4.yml b/.github/workflows/test_kilosort4.yml
index c216be20d0..3ad61c0d2e 100644
--- a/.github/workflows/test_kilosort4.yml
+++ b/.github/workflows/test_kilosort4.yml
@@ -44,7 +44,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.12"]
+        python-version: ["3.10"]  # TODO: just checking python version is not cause of failing test.
         os: [ubuntu-latest]
         ks_version: ${{ fromJson(needs.versions.outputs.matrix) }}
     steps:
diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 82c033f61d..811a6e8452 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -36,7 +36,7 @@ class Kilosort4Sorter(BaseSorter):
         "drift_smoothing": [0.5, 0.5, 0.5],
         "nt0min": None,
         "dmin": None,
-        "dminx": 32 if version.parse(importlib_version("kilosort")) > version.parse("4.0.0.1") else None,
+        "dminx": 32 if version.parse(importlib_version("kilosort")) > version.parse("4.0.2") else None,
         "min_template_size": 10,
         "template_sizes": 5,
         "nearest_chans": 10,
@@ -128,7 +128,7 @@ def is_installed(cls):
 
     @classmethod
     def get_sorter_version(cls):
-        """kilosort version <0.0.10 is always '4'"""
+        """kilosort version <4.0.10 is always '4'"""
         return importlib_version("kilosort")
 
     @classmethod

From 867729102ee5a76f412f1a8e7c025ceefadb7bff Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Thu, 27 Jun 2024 09:37:21 +0100
Subject: [PATCH 020/187] Don't support 4.0.4

---
 .github/scripts/check_kilosort4_releases.py      | 7 +++++++
 .github/scripts/test_kilosort4_ci.py             | 3 ++-
 .github/workflows/test_kilosort4.yml             | 2 +-
 src/spikeinterface/sorters/external/kilosort4.py | 5 +++++
 4 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/check_kilosort4_releases.py b/.github/scripts/check_kilosort4_releases.py
index 3d04d6948a..9572f88330 100644
--- a/.github/scripts/check_kilosort4_releases.py
+++ b/.github/scripts/check_kilosort4_releases.py
@@ -6,14 +6,21 @@
 
 
 def get_pypi_versions(package_name):
+    """
+    Make an API call to pypi to retrieve all
+    available versions of the kilosort package.
+    """
     url = f"https://pypi.org/pypi/{package_name}/json"
     response = requests.get(url)
     response.raise_for_status()
     data = response.json()
+    versions = list(sorted(data["releases"].keys()))
+    versions.pop(versions.index("4.0.4"))
     return list(sorted(data["releases"].keys()))
 
 
 if __name__ == "__main__":
+    # Get all KS4 versions from pipi and write to file.
     package_name = "kilosort"
     versions = get_pypi_versions(package_name)
     with open(Path(os.path.realpath(__file__)).parent / "kilosort4-latest-version.json", "w") as f:
diff --git a/.github/scripts/test_kilosort4_ci.py b/.github/scripts/test_kilosort4_ci.py
index 3e74fa708e..c894ed71ff 100644
--- a/.github/scripts/test_kilosort4_ci.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -342,7 +342,7 @@ def _check_arguments(self, object_, expected_arguments):
 
     # Full Test ####
     @pytest.mark.parametrize("parameter", PARAMS_TO_TEST)
-    def test_kilosort4(self, recording_and_paths, default_results, tmp_path, parameter):
+    def test_kilosort4_main(self, recording_and_paths, default_results, tmp_path, parameter):
         """
         Given a recording, paths to raw data, and a parameter to change,
         run KS4 natively and within the SpikeInterface wrapper with the
@@ -398,6 +398,7 @@ def test_kilosort4(self, recording_and_paths, default_results, tmp_path, paramet
         if parse(version("kilosort")) > parse("4.0.4"):
             self._check_test_parameters_are_changing_the_output(results, default_results, param_key)
 
+    @pytest.mark.skipif(parse(version("kilosort")) == parse("4.0.9"), reason="nblock=0 fails on KS4=4.0.9")
     def test_kilosort4_no_correction(self, recording_and_paths, tmp_path):
         """
         Test the SpikeInterface wrappers `do_correction` argument. We set
diff --git a/.github/workflows/test_kilosort4.yml b/.github/workflows/test_kilosort4.yml
index 3ad61c0d2e..03db2b6170 100644
--- a/.github/workflows/test_kilosort4.yml
+++ b/.github/workflows/test_kilosort4.yml
@@ -44,7 +44,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.10"]  # TODO: just checking python version is not cause of failing test.
+        python-version: ["3.12"]  # TODO: just checking python version is not cause of failing test.
         os: [ubuntu-latest]
         ks_version: ${{ fromJson(needs.versions.outputs.matrix) }}
     steps:
diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 811a6e8452..55e694a02f 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -163,6 +163,11 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
 
             logging.basicConfig(level=logging.INFO)
 
+        if cls.get_sorter_version() == version.parse("4.0.4"):
+            raise RuntimeError(
+                "Kilosort version 4.0.4 is not supported" "in SpikeInterface. Please change Kilosort version."
+            )
+
         sorter_output_folder = sorter_output_folder.absolute()
 
         probe_filename = sorter_output_folder / "probe.prb"

From 21caaf99bd93e7189725acc3de3079264e79d710 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Thu, 27 Jun 2024 10:09:58 +0100
Subject: [PATCH 021/187] Remove support for versions earlier that 4.0.5.

---
 .github/scripts/check_kilosort4_releases.py      |  5 +++--
 src/spikeinterface/sorters/external/kilosort4.py | 10 ++++++----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/.github/scripts/check_kilosort4_releases.py b/.github/scripts/check_kilosort4_releases.py
index 9572f88330..05d8c0c614 100644
--- a/.github/scripts/check_kilosort4_releases.py
+++ b/.github/scripts/check_kilosort4_releases.py
@@ -3,7 +3,7 @@
 from pathlib import Path
 import requests
 import json
-
+from packaging.version import parse
 
 def get_pypi_versions(package_name):
     """
@@ -15,8 +15,9 @@ def get_pypi_versions(package_name):
     response.raise_for_status()
     data = response.json()
     versions = list(sorted(data["releases"].keys()))
+    versions = [ver for ver in versions if parse(ver) >= parse("4.0.5")]
     versions.pop(versions.index("4.0.4"))
-    return list(sorted(data["releases"].keys()))
+    return versions
 
 
 if __name__ == "__main__":
diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 55e694a02f..dba28f7244 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -36,7 +36,7 @@ class Kilosort4Sorter(BaseSorter):
         "drift_smoothing": [0.5, 0.5, 0.5],
         "nt0min": None,
         "dmin": None,
-        "dminx": 32 if version.parse(importlib_version("kilosort")) > version.parse("4.0.2") else None,
+        "dminx": 32,
         "min_template_size": 10,
         "template_sizes": 5,
         "nearest_chans": 10,
@@ -51,7 +51,7 @@ class Kilosort4Sorter(BaseSorter):
         "cluster_downsampling": 20,
         "cluster_pcs": 64,
         "x_centers": None,
-        "duplicate_spike_bins": 7 if version.parse(importlib_version("kilosort")) >= version.parse("4.0.4") else 15,
+        "duplicate_spike_bins": 7,
         "do_correction": True,
         "keep_good_only": False,
         "save_extra_kwargs": False,
@@ -163,9 +163,11 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
 
             logging.basicConfig(level=logging.INFO)
 
-        if cls.get_sorter_version() == version.parse("4.0.4"):
+        if cls.get_sorter_version() < version.parse("4.0.5"):
             raise RuntimeError(
-                "Kilosort version 4.0.4 is not supported" "in SpikeInterface. Please change Kilosort version."
+                "Kilosort versions before 4.0.5 are not supported"
+                "in SpikeInterface. "
+                "Please upgrade Kilosort version."
             )
 
         sorter_output_folder = sorter_output_folder.absolute()

From 9bc18978fbb56917b0f4fe46df7c3bc531f850a4 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Thu, 27 Jun 2024 10:40:50 +0100
Subject: [PATCH 022/187] Add packaging to CI dependency.

 On branch add_kilosort4_wrapper_tests
---
 .github/scripts/check_kilosort4_releases.py      | 1 -
 .github/workflows/test_kilosort4.yml             | 2 +-
 src/spikeinterface/sorters/external/kilosort4.py | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/scripts/check_kilosort4_releases.py b/.github/scripts/check_kilosort4_releases.py
index 05d8c0c614..de11dc974b 100644
--- a/.github/scripts/check_kilosort4_releases.py
+++ b/.github/scripts/check_kilosort4_releases.py
@@ -16,7 +16,6 @@ def get_pypi_versions(package_name):
     data = response.json()
     versions = list(sorted(data["releases"].keys()))
     versions = [ver for ver in versions if parse(ver) >= parse("4.0.5")]
-    versions.pop(versions.index("4.0.4"))
     return versions
 
 
diff --git a/.github/workflows/test_kilosort4.yml b/.github/workflows/test_kilosort4.yml
index 03db2b6170..088dd1a6a4 100644
--- a/.github/workflows/test_kilosort4.yml
+++ b/.github/workflows/test_kilosort4.yml
@@ -25,7 +25,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install requests
+          pip install requests packaging
 
       - name: Fetch package versions from PyPI
         run: |
diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index dba28f7244..eb1df7c455 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -163,7 +163,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
 
             logging.basicConfig(level=logging.INFO)
 
-        if cls.get_sorter_version() < version.parse("4.0.5"):
+        if version.parse(cls.get_sorter_version()) < version.parse("4.0.5"):
             raise RuntimeError(
                 "Kilosort versions before 4.0.5 are not supported"
                 "in SpikeInterface. "

From 23d2c77533a2bc65791bd6d07eda9b8723133c33 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Thu, 27 Jun 2024 12:30:05 +0100
Subject: [PATCH 023/187] Add some more documentation to .yml

---
 .github/workflows/test_kilosort4.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/test_kilosort4.yml b/.github/workflows/test_kilosort4.yml
index 088dd1a6a4..13d70acf88 100644
--- a/.github/workflows/test_kilosort4.yml
+++ b/.github/workflows/test_kilosort4.yml
@@ -11,6 +11,8 @@ on:
 
 jobs:
   versions:
+    # Poll Pypi for all released KS4 versions >4.0.4, save to JSON
+    # and store them in a matrix for the next job.
     runs-on: ubuntu-latest
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}

From 1bad6d6e3fb26b4f3e4bae9876bf25b056077280 Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Thu, 18 Jul 2024 16:01:22 +0100
Subject: [PATCH 024/187] Remove unused rng.

---
 src/spikeinterface/generation/drifting_generator.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/spikeinterface/generation/drifting_generator.py b/src/spikeinterface/generation/drifting_generator.py
index b439c57c52..7f8682035c 100644
--- a/src/spikeinterface/generation/drifting_generator.py
+++ b/src/spikeinterface/generation/drifting_generator.py
@@ -348,9 +348,6 @@ def generate_drifting_recording(
 
         This can be helpfull for motion benchmark.
     """
-
-    rng = np.random.default_rng(seed=seed)
-
     # probe
     if generate_probe_kwargs is None:
         generate_probe_kwargs = _toy_probes[probe_name]

From 0f9c32cbdb82ce48120830fe55c88d0376a350b8 Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Thu, 18 Jul 2024 17:25:00 +0100
Subject: [PATCH 025/187] Add 'int' type to 'num_samples' on
 'InjectTemplatesRecording'.

---
 src/spikeinterface/core/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 62aa7f37c3..e53f8cc539 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1714,7 +1714,7 @@ def __init__(
         amplitude_factor: Union[List[List[float]], List[float], float, None] = None,
         parent_recording: Union[BaseRecording, None] = None,
         num_samples: Optional[List[int]] = None,
-        upsample_vector: Union[List[int], None] = None,
+        upsample_vector: Union[List[int], int, None] = None,
         check_borders: bool = False,
     ) -> None:
         templates = np.asarray(templates)

From 73c146f29828d073af08e91a7fd45a38430cff71 Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Thu, 18 Jul 2024 18:01:22 +0100
Subject: [PATCH 026/187] Remove some errneous Optional type hints and convert
 to | on 'generate_recording'.

---
 src/spikeinterface/core/generate.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index e53f8cc539..e9255d55cc 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -27,12 +27,12 @@ def _ensure_seed(seed):
 
 
 def generate_recording(
-    num_channels: Optional[int] = 2,
-    sampling_frequency: Optional[float] = 30000.0,
-    durations: Optional[List[float]] = [5.0, 2.5],
-    set_probe: Optional[bool] = True,
-    ndim: Optional[int] = 2,
-    seed: Optional[int] = None,
+    num_channels: int = 2,
+    sampling_frequency: float = 30000.0,
+    durations: List[float] = [5.0, 2.5],
+    set_probe: bool | None = True,
+    ndim: int | None = 2,
+    seed: int | None = None,
 ) -> BaseRecording:
     """
     Generate a lazy recording object.
@@ -1090,7 +1090,7 @@ def __init__(
         num_channels: int,
         sampling_frequency: float,
         durations: List[float],
-        noise_levels: float = 1.0,
+        noise_levels: float | np.array = 1.0,
         cov_matrix: Optional[np.array] = None,
         dtype: Optional[Union[np.dtype, str]] = "float32",
         seed: Optional[int] = None,

From e5701f6202106c14c4307e2e541ad8167319dfdd Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Thu, 18 Jul 2024 18:01:31 +0100
Subject: [PATCH 027/187] Remove some errneous Optional type hints and convert
 to | on 'generate_recording'.

---
 src/spikeinterface/core/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index e9255d55cc..c4665a7bd5 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1090,7 +1090,7 @@ def __init__(
         num_channels: int,
         sampling_frequency: float,
         durations: List[float],
-        noise_levels: float | np.array = 1.0,
+        noise_levels: float = 1.0,
         cov_matrix: Optional[np.array] = None,
         dtype: Optional[Union[np.dtype, str]] = "float32",
         seed: Optional[int] = None,

From 098f8071b1aa0e7170b04d0966fc35526839b1e9 Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Thu, 18 Jul 2024 18:12:56 +0100
Subject: [PATCH 028/187] Convert NoiseGeneratorRecording.

---
 src/spikeinterface/core/generate.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index c4665a7bd5..9037109549 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1064,11 +1064,11 @@ class NoiseGeneratorRecording(BaseRecording):
         The durations of each segment in seconds. Note that the length of this list is the number of segments.
     noise_levels: float or array, default: 1
         Std of the white noise (if an array, defined by per channels)
-    cov_matrix: np.array, default None
+    cov_matrix: np.array | None, default None
         The covariance matrix of the noise
-    dtype : Optional[Union[np.dtype, str]], default: "float32"
+    dtype : np.dtype | str |None, default: "float32"
         The dtype of the recording. Note that only np.float32 and np.float64 are supported.
-    seed : Optional[int], default: None
+    seed : int | None, default: None
         The seed for np.random.default_rng.
     strategy : "tile_pregenerated" or "on_the_fly"
         The strategy of generating noise chunk:
@@ -1090,10 +1090,10 @@ def __init__(
         num_channels: int,
         sampling_frequency: float,
         durations: List[float],
-        noise_levels: float = 1.0,
-        cov_matrix: Optional[np.array] = None,
-        dtype: Optional[Union[np.dtype, str]] = "float32",
-        seed: Optional[int] = None,
+        noise_levels: float | np.array = 1.0,
+        cov_matrix: np.array | None = None,
+        dtype: np.dtype | str | None = "float32",
+        seed: int | None = None,
         strategy: Literal["tile_pregenerated", "on_the_fly"] = "tile_pregenerated",
         noise_block_size: int = 30000,
     ):

From db0c30d37d443fcffad7caead7412d672e553d81 Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Thu, 18 Jul 2024 18:13:27 +0100
Subject: [PATCH 029/187] Remove duplicate noise level keys in
 NoiseGeneratorRecording.

---
 src/spikeinterface/core/generate.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 9037109549..a3e77b57f0 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1150,7 +1150,6 @@ def __init__(
             "sampling_frequency": sampling_frequency,
             "noise_levels": noise_levels,
             "cov_matrix": cov_matrix,
-            "noise_levels": noise_levels,
             "dtype": dtype,
             "seed": seed,
             "strategy": strategy,

From 013c834aba8ad0f0ac6a956db30bdc4a5e8b3598 Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Thu, 18 Jul 2024 18:16:44 +0100
Subject: [PATCH 030/187] substitute get_traces().

---
 src/spikeinterface/core/generate.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index a3e77b57f0..0d95668f2e 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1204,9 +1204,9 @@ def get_num_samples(self) -> int:
 
     def get_traces(
         self,
-        start_frame: Union[int, None] = None,
-        end_frame: Union[int, None] = None,
-        channel_indices: Union[List, None] = None,
+        start_frame: int | None = None,
+        end_frame: int | None = None,
+        channel_indices: List | None = None,
     ) -> np.ndarray:
         start_frame_within_block = start_frame % self.noise_block_size
         end_frame_within_block = end_frame % self.noise_block_size

From 0cede16b03ca2b404783806353b30591e0116d03 Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Thu, 18 Jul 2024 18:17:17 +0100
Subject: [PATCH 031/187] Remove unused argument to generate_recording_by_size.

---
 src/spikeinterface/core/generate.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 0d95668f2e..b48d8b40df 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1260,7 +1260,6 @@ def get_traces(
 
 def generate_recording_by_size(
     full_traces_size_GiB: float,
-    num_channels: int = 384,
     seed: Optional[int] = None,
     strategy: Literal["tile_pregenerated", "on_the_fly"] = "tile_pregenerated",
 ) -> NoiseGeneratorRecording:

From bbc55b48233e1cca578c69bb6af0223fc0e0c1d0 Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Thu, 18 Jul 2024 18:17:38 +0100
Subject: [PATCH 032/187] Convert 'generate_recording_by_size'.

---
 src/spikeinterface/core/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index b48d8b40df..41b44792be 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1260,7 +1260,7 @@ def get_traces(
 
 def generate_recording_by_size(
     full_traces_size_GiB: float,
-    seed: Optional[int] = None,
+    seed: int | None = None,
     strategy: Literal["tile_pregenerated", "on_the_fly"] = "tile_pregenerated",
 ) -> NoiseGeneratorRecording:
     """

From 52b0052a6f2d4b4686e3d10f3937cf387d9c145a Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Thu, 18 Jul 2024 18:26:57 +0100
Subject: [PATCH 033/187] Add check for None in
 'NoiseGeneratorRecordingSegment' get_traces().

---
 src/spikeinterface/core/generate.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 62aa7f37c3..3db3960a8a 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1209,6 +1209,12 @@ def get_traces(
         end_frame: Union[int, None] = None,
         channel_indices: Union[List, None] = None,
     ) -> np.ndarray:
+
+        if start_frame is None:
+            start_frame = 0
+        if end_frame is None:
+            end_frame = self.get_num_samples()
+
         start_frame_within_block = start_frame % self.noise_block_size
         end_frame_within_block = end_frame % self.noise_block_size
         num_samples = end_frame - start_frame

From c5af7f36b75d1f37d0e7e2d54ff81383a7928cc2 Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Thu, 18 Jul 2024 18:40:02 +0100
Subject: [PATCH 034/187] Fix type hints on InjectTemplatesRecording and
 convert.

---
 src/spikeinterface/core/generate.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 41b44792be..b9ab8f6d25 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1694,7 +1694,7 @@ class InjectTemplatesRecording(BaseRecording):
     num_samples: list[int] | int | None
         The number of samples in the recording per segment.
         You can use int for mono-segment objects.
-    upsample_vector: np.array or None, default: None.
+    upsample_vector: np.array | None, default: None.
         When templates is 4d we can simulate a jitter.
         Optional the upsample_vector is the jitter index with a number per spike in range 0-templates.shape[3].
 
@@ -1708,11 +1708,11 @@ def __init__(
         self,
         sorting: BaseSorting,
         templates: np.ndarray,
-        nbefore: Union[List[int], int, None] = None,
-        amplitude_factor: Union[List[List[float]], List[float], float, None] = None,
-        parent_recording: Union[BaseRecording, None] = None,
-        num_samples: Optional[List[int]] = None,
-        upsample_vector: Union[List[int], int, None] = None,
+        nbefore: List[int] | int | None = None,
+        amplitude_factor: List[List[float]] | List[float] | float | None = None,
+        parent_recording: BaseRecording | None = None,
+        num_samples: List[int] | int | None = None,
+        upsample_vector: np.array | None = None,
         check_borders: bool = False,
     ) -> None:
         templates = np.asarray(templates)

From 16bf359de2c364f9681f193601aeb46304053762 Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Thu, 18 Jul 2024 18:43:55 +0100
Subject: [PATCH 035/187] Remove bad type hint on 'InjectTemplatesRecording'.

---
 src/spikeinterface/core/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index b9ab8f6d25..21edae8447 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1709,7 +1709,7 @@ def __init__(
         sorting: BaseSorting,
         templates: np.ndarray,
         nbefore: List[int] | int | None = None,
-        amplitude_factor: List[List[float]] | List[float] | float | None = None,
+        amplitude_factor: List[float] | float | None = None,
         parent_recording: BaseRecording | None = None,
         num_samples: List[int] | int | None = None,
         upsample_vector: np.array | None = None,

From 1760d0f5a213356390069b387d4674fafbd314bb Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Thu, 18 Jul 2024 18:54:30 +0100
Subject: [PATCH 036/187] Fix all other cases

---
 src/spikeinterface/core/generate.py | 60 ++++++++++++++---------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 21edae8447..6fc21a34dc 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -2,7 +2,7 @@
 import math
 import warnings
 import numpy as np
-from typing import Union, Optional, List, Literal
+from typing import List, Literal
 from math import ceil
 
 from .basesorting import SpikeVectorSortingSegment
@@ -47,10 +47,10 @@ def generate_recording(
     durations: List[float], default: [5.0, 2.5]
         The duration in seconds of each segment in the recording, default: [5.0, 2.5].
         Note that the number of segments is determined by the length of this list.
-    set_probe: bool, default: True
-    ndim : int, default: 2
+    set_probe: bool | None, default: True
+    ndim : int | None, default: 2
         The number of dimensions of the probe, default: 2. Set to 3 to make 3 dimensional probe.
-    seed : Optional[int]
+    seed : int | None, default: None
         A seed for the np.ramdom.default_rng function
 
     Returns
@@ -253,13 +253,13 @@ def generate_sorting_to_inject(
     num_samples: list of size num_segments.
         The number of samples in all the segments of the sorting, to generate spike times
         covering entire the entire duration of the segments.
-    max_injected_per_unit: int, default 1000
+    max_injected_per_unit: int, default: 1000
         The maximal number of spikes injected per units.
-    injected_rate: float, default 0.05
+    injected_rate: float, default: 0.05
         The rate at which spikes are injected.
-    refractory_period_ms: float, default 1.5
+    refractory_period_ms: float, default: 1.5
         The refractory period that should not be violated while injecting new spikes.
-    seed: int, default None
+    seed: int, default: None
         The random seed.
 
     Returns
@@ -313,13 +313,13 @@ class TransformSorting(BaseSorting):
     ----------
     sorting : BaseSorting
         The sorting object.
-    added_spikes_existing_units : np.array (spike_vector)
+    added_spikes_existing_units : np.array (spike_vector) | None, default: None
         The spikes that should be added to the sorting object, for existing units.
-    added_spikes_new_units: np.array (spike_vector)
+    added_spikes_new_units: np.array (spike_vector) | None, default: None
         The spikes that should be added to the sorting object, for new units.
-    new_units_ids: list
+    new_units_ids: list[str, int] | None, default: None
         The unit_ids that should be added if spikes for new units are added.
-    refractory_period_ms : float, default None
+    refractory_period_ms : float | None, default: None
         The refractory period violation to prevent duplicates and/or unphysiological addition
         of spikes. Any spike times in added_spikes violating the refractory period will be
         discarded.
@@ -333,10 +333,10 @@ class TransformSorting(BaseSorting):
     def __init__(
         self,
         sorting: BaseSorting,
-        added_spikes_existing_units=None,
-        added_spikes_new_units=None,
-        new_unit_ids: Optional[List[Union[str, int]]] = None,
-        refractory_period_ms: Optional[float] = None,
+        added_spikes_existing_units: np.array | None = None,
+        added_spikes_new_units: np.array | None = None,
+        new_unit_ids: List[str | int] | None = None,
+        refractory_period_ms: float | None = None,
     ):
         sampling_frequency = sorting.get_sampling_frequency()
         unit_ids = list(sorting.get_unit_ids())
@@ -432,7 +432,7 @@ def add_from_sorting(sorting1: BaseSorting, sorting2: BaseSorting, refractory_pe
             The first sorting.
         sorting2: BaseSorting
             The second sorting.
-        refractory_period_ms : float, default None
+        refractory_period_ms : float, default: None
             The refractory period violation to prevent duplicates and/or unphysiological addition
             of spikes. Any spike times in added_spikes violating the refractory period will be
             discarded.
@@ -498,7 +498,7 @@ def add_from_unit_dict(
             The first sorting
         dict_list: list of dict
             A list of dict with unit_ids as keys and spike times as values.
-        refractory_period_ms : float, default None
+        refractory_period_ms : float, default: None
             The refractory period violation to prevent duplicates and/or unphysiological addition
             of spikes. Any spike times in added_spikes violating the refractory period will be
             discarded.
@@ -528,7 +528,7 @@ def from_times_labels(
         unit_ids: list or None, default: None
             The explicit list of unit_ids that should be extracted from labels_list
             If None, then it will be np.unique(labels_list).
-        refractory_period_ms : float, default None
+        refractory_period_ms : float, default: None
             The refractory period violation to prevent duplicates and/or unphysiological addition
             of spikes. Any spike times in added_spikes violating the refractory period will be
             discarded.
@@ -1064,7 +1064,7 @@ class NoiseGeneratorRecording(BaseRecording):
         The durations of each segment in seconds. Note that the length of this list is the number of segments.
     noise_levels: float or array, default: 1
         Std of the white noise (if an array, defined by per channels)
-    cov_matrix: np.array | None, default None
+    cov_matrix: np.array | None, default: None
         The covariance matrix of the noise
     dtype : np.dtype | str |None, default: "float32"
         The dtype of the recording. Note that only np.float32 and np.float64 are supported.
@@ -1279,7 +1279,7 @@ def generate_recording_by_size(
         The size in gigabytes (GiB) of the recording.
     num_channels: int
         Number of channels.
-    seed : int, default: None
+    seed : int | None, default: None
         The seed for np.random.default_rng.
 
     Returns
@@ -1688,10 +1688,10 @@ class InjectTemplatesRecording(BaseRecording):
         Can be None (no scaling).
         Can be scalar all spikes have the same factor (certainly useless).
         Can be a vector with same shape of spike_vector of the sorting.
-    parent_recording: BaseRecording | None
+    parent_recording: BaseRecording | None, default: None
         The recording over which to add the templates.
         If None, will default to traces containing all 0.
-    num_samples: list[int] | int | None
+    num_samples: list[int] | int | None, default: None
         The number of samples in the recording per segment.
         You can use int for mono-segment objects.
     upsample_vector: np.array | None, default: None.
@@ -1844,10 +1844,10 @@ def __init__(
         spike_vector: np.ndarray,
         templates: np.ndarray,
         nbefore: int,
-        amplitude_vector: Union[List[float], None],
-        upsample_vector: Union[List[float], None],
-        parent_recording_segment: Union[BaseRecordingSegment, None] = None,
-        num_samples: Union[int, None] = None,
+        amplitude_vector: List[float] | None,
+        upsample_vector: List[float] | None,
+        parent_recording_segment: BaseRecordingSegment | None = None,
+        num_samples: int | None = None,
     ) -> None:
         BaseRecordingSegment.__init__(
             self,
@@ -1867,9 +1867,9 @@ def __init__(
 
     def get_traces(
         self,
-        start_frame: Union[int, None] = None,
-        end_frame: Union[int, None] = None,
-        channel_indices: Union[List, None] = None,
+        start_frame: int | None = None,
+        end_frame: int | None = None,
+        channel_indices: List | None = None,
     ) -> np.ndarray:
         if channel_indices is None:
             n_channels = self.templates.shape[2]

From f694e30e06ffaf3895ff3cbafd060551aabedd08 Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Thu, 18 Jul 2024 18:55:40 +0100
Subject: [PATCH 037/187] List -> list.

---
 src/spikeinterface/core/generate.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 6fc21a34dc..ea58ab6ef8 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -2,7 +2,7 @@
 import math
 import warnings
 import numpy as np
-from typing import List, Literal
+from typing import Literal
 from math import ceil
 
 from .basesorting import SpikeVectorSortingSegment
@@ -29,7 +29,7 @@ def _ensure_seed(seed):
 def generate_recording(
     num_channels: int = 2,
     sampling_frequency: float = 30000.0,
-    durations: List[float] = [5.0, 2.5],
+    durations: list[float] = [5.0, 2.5],
     set_probe: bool | None = True,
     ndim: int | None = 2,
     seed: int | None = None,
@@ -44,7 +44,7 @@ def generate_recording(
         The number of channels in the recording.
     sampling_frequency : float, default: 30000. (in Hz)
         The sampling frequency of the recording, default: 30000.
-    durations: List[float], default: [5.0, 2.5]
+    durations: list[float], default: [5.0, 2.5]
         The duration in seconds of each segment in the recording, default: [5.0, 2.5].
         Note that the number of segments is determined by the length of this list.
     set_probe: bool | None, default: True
@@ -236,7 +236,7 @@ def add_synchrony_to_sorting(sorting, sync_event_ratio=0.3, seed=None):
 
 def generate_sorting_to_inject(
     sorting: BaseSorting,
-    num_samples: List[int],
+    num_samples: list[int],
     max_injected_per_unit: int = 1000,
     injected_rate: float = 0.05,
     refractory_period_ms: float = 1.5,
@@ -335,7 +335,7 @@ def __init__(
         sorting: BaseSorting,
         added_spikes_existing_units: np.array | None = None,
         added_spikes_new_units: np.array | None = None,
-        new_unit_ids: List[str | int] | None = None,
+        new_unit_ids: list[str | int] | None = None,
         refractory_period_ms: float | None = None,
     ):
         sampling_frequency = sorting.get_sampling_frequency()
@@ -1060,7 +1060,7 @@ class NoiseGeneratorRecording(BaseRecording):
         The number of channels.
     sampling_frequency : float
         The sampling frequency of the recorder.
-    durations : List[float]
+    durations : list[float]
         The durations of each segment in seconds. Note that the length of this list is the number of segments.
     noise_levels: float or array, default: 1
         Std of the white noise (if an array, defined by per channels)
@@ -1089,7 +1089,7 @@ def __init__(
         self,
         num_channels: int,
         sampling_frequency: float,
-        durations: List[float],
+        durations: list[float],
         noise_levels: float | np.array = 1.0,
         cov_matrix: np.array | None = None,
         dtype: np.dtype | str | None = "float32",
@@ -1206,7 +1206,7 @@ def get_traces(
         self,
         start_frame: int | None = None,
         end_frame: int | None = None,
-        channel_indices: List | None = None,
+        channel_indices: list | None = None,
     ) -> np.ndarray:
         start_frame_within_block = start_frame % self.noise_block_size
         end_frame_within_block = end_frame % self.noise_block_size
@@ -1708,10 +1708,10 @@ def __init__(
         self,
         sorting: BaseSorting,
         templates: np.ndarray,
-        nbefore: List[int] | int | None = None,
-        amplitude_factor: List[float] | float | None = None,
+        nbefore: list[int] | int | None = None,
+        amplitude_factor: list[float] | float | None = None,
         parent_recording: BaseRecording | None = None,
-        num_samples: List[int] | int | None = None,
+        num_samples: list[int] | int | None = None,
         upsample_vector: np.array | None = None,
         check_borders: bool = False,
     ) -> None:
@@ -1844,8 +1844,8 @@ def __init__(
         spike_vector: np.ndarray,
         templates: np.ndarray,
         nbefore: int,
-        amplitude_vector: List[float] | None,
-        upsample_vector: List[float] | None,
+        amplitude_vector: list[float] | None,
+        upsample_vector: list[float] | None,
         parent_recording_segment: BaseRecordingSegment | None = None,
         num_samples: int | None = None,
     ) -> None:
@@ -1869,7 +1869,7 @@ def get_traces(
         self,
         start_frame: int | None = None,
         end_frame: int | None = None,
-        channel_indices: List | None = None,
+        channel_indices: list | None = None,
     ) -> np.ndarray:
         if channel_indices is None:
             n_channels = self.templates.shape[2]

From 8b84711c0272ce9d35f248a3ad20c22fa3f51730 Mon Sep 17 00:00:00 2001
From: Charlie Windolf <cwindolf95@gmail.com>
Date: Fri, 19 Jul 2024 13:20:11 -0700
Subject: [PATCH 038/187] Handle case where channel count changes from probeA
 to probeB

---
 src/spikeinterface/generation/drift_tools.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/spikeinterface/generation/drift_tools.py b/src/spikeinterface/generation/drift_tools.py
index 0e4f1985c6..42b6ca99dd 100644
--- a/src/spikeinterface/generation/drift_tools.py
+++ b/src/spikeinterface/generation/drift_tools.py
@@ -40,9 +40,13 @@ def interpolate_templates(templates_array, source_locations, dest_locations, int
     source_locations = np.asarray(source_locations)
     dest_locations = np.asarray(dest_locations)
     if dest_locations.ndim == 2:
-        new_shape = templates_array.shape
+        new_shape = (*templates_array.shape[:2], len(dest_locations))
     elif dest_locations.ndim == 3:
-        new_shape = (dest_locations.shape[0],) + templates_array.shape
+        new_shape = (
+            dest_locations.shape[0],
+            *templates_array.shape[:2],
+            dest_locations.shape[1],
+        )
     else:
         raise ValueError(f"Incorrect dimensions for dest_locations: {dest_locations.ndim}. Dimensions can be 2 or 3. ")
 

From bc2cc8a965ec8871c7a8f91edde8bf104792fde5 Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Mon, 22 Jul 2024 16:22:18 +0100
Subject: [PATCH 039/187] space before colon

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index ea58ab6ef8..04d2135670 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -44,7 +44,7 @@ def generate_recording(
         The number of channels in the recording.
     sampling_frequency : float, default: 30000. (in Hz)
         The sampling frequency of the recording, default: 30000.
-    durations: list[float], default: [5.0, 2.5]
+    durations : list[float], default: [5.0, 2.5]
         The duration in seconds of each segment in the recording, default: [5.0, 2.5].
         Note that the number of segments is determined by the length of this list.
     set_probe: bool | None, default: True

From ff66a3815663e9f4909a505231008d5bd1779fb7 Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Mon, 22 Jul 2024 16:22:28 +0100
Subject: [PATCH 040/187] space before colon

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 04d2135670..325008a4f2 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -259,7 +259,7 @@ def generate_sorting_to_inject(
         The rate at which spikes are injected.
     refractory_period_ms: float, default: 1.5
         The refractory period that should not be violated while injecting new spikes.
-    seed: int, default: None
+    seed : int, default: None
         The random seed.
 
     Returns

From 90b366dfe4b5fd7c4a73b858b5aff844d4703f1e Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Mon, 22 Jul 2024 16:22:58 +0100
Subject: [PATCH 041/187] space before colon

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 325008a4f2..10139918c2 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1688,7 +1688,7 @@ class InjectTemplatesRecording(BaseRecording):
         Can be None (no scaling).
         Can be scalar all spikes have the same factor (certainly useless).
         Can be a vector with same shape of spike_vector of the sorting.
-    parent_recording: BaseRecording | None, default: None
+    parent_recording : BaseRecording | None, default: None
         The recording over which to add the templates.
         If None, will default to traces containing all 0.
     num_samples: list[int] | int | None, default: None

From adc40e6de25a3e001a4a2d0e1385fb18dec35b4a Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Mon, 22 Jul 2024 16:23:17 +0100
Subject: [PATCH 042/187] space before colon

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 10139918c2..d1f9ff97f3 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1694,7 +1694,7 @@ class InjectTemplatesRecording(BaseRecording):
     num_samples: list[int] | int | None, default: None
         The number of samples in the recording per segment.
         You can use int for mono-segment objects.
-    upsample_vector: np.array | None, default: None.
+    upsample_vector : np.array | None, default: None.
         When templates is 4d we can simulate a jitter.
         Optional the upsample_vector is the jitter index with a number per spike in range 0-templates.shape[3].
 

From 4c5d198b20806521b013c3ea9d37b067032edd03 Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Mon, 22 Jul 2024 16:23:32 +0100
Subject: [PATCH 043/187] space before colon

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index d1f9ff97f3..3be5e166ab 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1708,11 +1708,11 @@ def __init__(
         self,
         sorting: BaseSorting,
         templates: np.ndarray,
-        nbefore: list[int] | int | None = None,
-        amplitude_factor: list[float] | float | None = None,
-        parent_recording: BaseRecording | None = None,
-        num_samples: list[int] | int | None = None,
-        upsample_vector: np.array | None = None,
+        nbefore : list[int] | int | None = None,
+        amplitude_factor : list[float] | float | None = None,
+        parent_recording : BaseRecording | None = None,
+        num_samples : list[int] | int | None = None,
+        upsample_vector : np.array | None = None,
         check_borders: bool = False,
     ) -> None:
         templates = np.asarray(templates)

From a9400f999a73a7997c1160a90375d91210c1a5c0 Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Mon, 22 Jul 2024 16:23:47 +0100
Subject: [PATCH 044/187] space before colon

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 3be5e166ab..6fc231f6f4 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1844,10 +1844,10 @@ def __init__(
         spike_vector: np.ndarray,
         templates: np.ndarray,
         nbefore: int,
-        amplitude_vector: list[float] | None,
-        upsample_vector: list[float] | None,
-        parent_recording_segment: BaseRecordingSegment | None = None,
-        num_samples: int | None = None,
+        amplitude_vector : list[float] | None,
+        upsample_vector : list[float] | None,
+        parent_recording_segment : BaseRecordingSegment | None = None,
+        num_samples : int | None = None,
     ) -> None:
         BaseRecordingSegment.__init__(
             self,

From ef4d9e39440bb2f1924b2bba023a2f8d4ebc6c5a Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Mon, 22 Jul 2024 16:24:01 +0100
Subject: [PATCH 045/187] space before colon

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 6fc231f6f4..d51fe8101c 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1867,9 +1867,9 @@ def __init__(
 
     def get_traces(
         self,
-        start_frame: int | None = None,
-        end_frame: int | None = None,
-        channel_indices: list | None = None,
+        start_frame : int | None = None,
+        end_frame : int | None = None,
+        channel_indices : list | None = None,
     ) -> np.ndarray:
         if channel_indices is None:
             n_channels = self.templates.shape[2]

From 6519ffa1e001f5abb37f07d88364af15665fb9c4 Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Mon, 22 Jul 2024 16:24:12 +0100
Subject: [PATCH 046/187] space before colon

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index d51fe8101c..1098df8275 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -253,7 +253,7 @@ def generate_sorting_to_inject(
     num_samples: list of size num_segments.
         The number of samples in all the segments of the sorting, to generate spike times
         covering entire the entire duration of the segments.
-    max_injected_per_unit: int, default: 1000
+    max_injected_per_unit : int, default: 1000
         The maximal number of spikes injected per units.
     injected_rate: float, default: 0.05
         The rate at which spikes are injected.

From ab80c707be525130f5f50c33dddf1fa3868b08e0 Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Mon, 22 Jul 2024 16:24:27 +0100
Subject: [PATCH 047/187] space before colon

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 1098df8275..68aa558543 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1089,11 +1089,11 @@ def __init__(
         self,
         num_channels: int,
         sampling_frequency: float,
-        durations: list[float],
-        noise_levels: float | np.array = 1.0,
-        cov_matrix: np.array | None = None,
-        dtype: np.dtype | str | None = "float32",
-        seed: int | None = None,
+        durations : list[float],
+        noise_levels : float | np.array = 1.0,
+        cov_matrix : np.array | None = None,
+        dtype : np.dtype | str | None = "float32",
+        seed : int | None = None,
         strategy: Literal["tile_pregenerated", "on_the_fly"] = "tile_pregenerated",
         noise_block_size: int = 30000,
     ):

From 061a5fa47004c2aca2b5c0c28d6437a6e1af6d6a Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Mon, 22 Jul 2024 16:24:42 +0100
Subject: [PATCH 048/187] space before colon

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 68aa558543..c73954dcf5 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1064,7 +1064,7 @@ class NoiseGeneratorRecording(BaseRecording):
         The durations of each segment in seconds. Note that the length of this list is the number of segments.
     noise_levels: float or array, default: 1
         Std of the white noise (if an array, defined by per channels)
-    cov_matrix: np.array | None, default: None
+    cov_matrix : np.array | None, default: None
         The covariance matrix of the noise
     dtype : np.dtype | str |None, default: "float32"
         The dtype of the recording. Note that only np.float32 and np.float64 are supported.

From 34d09a9d9dd2b5db4213e0f2434ffd033d102ba0 Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Mon, 22 Jul 2024 16:24:53 +0100
Subject: [PATCH 049/187] space before colon

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index c73954dcf5..b57767161f 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -255,7 +255,7 @@ def generate_sorting_to_inject(
         covering entire the entire duration of the segments.
     max_injected_per_unit : int, default: 1000
         The maximal number of spikes injected per units.
-    injected_rate: float, default: 0.05
+    injected_rate : float, default: 0.05
         The rate at which spikes are injected.
     refractory_period_ms: float, default: 1.5
         The refractory period that should not be violated while injecting new spikes.

From 8257cd9a0783c48e93db69011d5960cc80ae1059 Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Mon, 22 Jul 2024 16:25:33 +0100
Subject: [PATCH 050/187] space before colon

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index b57767161f..4abd407681 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -333,10 +333,10 @@ class TransformSorting(BaseSorting):
     def __init__(
         self,
         sorting: BaseSorting,
-        added_spikes_existing_units: np.array | None = None,
-        added_spikes_new_units: np.array | None = None,
-        new_unit_ids: list[str | int] | None = None,
-        refractory_period_ms: float | None = None,
+        added_spikes_existing_units : np.array | None = None,
+        added_spikes_new_units : np.array | None = None,
+        new_unit_ids : list[str | int] | None = None,
+        refractory_period_ms : float | None = None,
     ):
         sampling_frequency = sorting.get_sampling_frequency()
         unit_ids = list(sorting.get_unit_ids())

From c175b6b72563661a400752bece72e873e56e1abc Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Mon, 22 Jul 2024 16:25:48 +0100
Subject: [PATCH 051/187] space before colon

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 4abd407681..28cf7ec404 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -315,7 +315,7 @@ class TransformSorting(BaseSorting):
         The sorting object.
     added_spikes_existing_units : np.array (spike_vector) | None, default: None
         The spikes that should be added to the sorting object, for existing units.
-    added_spikes_new_units: np.array (spike_vector) | None, default: None
+    added_spikes_new_units : np.array (spike_vector) | None, default: None
         The spikes that should be added to the sorting object, for new units.
     new_units_ids: list[str, int] | None, default: None
         The unit_ids that should be added if spikes for new units are added.

From 39d46a12aa14db08f23d18fe68f1b9a408be7bf3 Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Mon, 22 Jul 2024 16:26:04 +0100
Subject: [PATCH 052/187] space before colon

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 28cf7ec404..09db185776 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -257,7 +257,7 @@ def generate_sorting_to_inject(
         The maximal number of spikes injected per units.
     injected_rate : float, default: 0.05
         The rate at which spikes are injected.
-    refractory_period_ms: float, default: 1.5
+    refractory_period_ms : float, default: 1.5
         The refractory period that should not be violated while injecting new spikes.
     seed : int, default: None
         The random seed.

From 16023bba5f325017f20a91fb4f6740edd9445ed5 Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Mon, 22 Jul 2024 16:26:19 +0100
Subject: [PATCH 053/187] space before colon

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 09db185776..57f79c87ae 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -317,7 +317,7 @@ class TransformSorting(BaseSorting):
         The spikes that should be added to the sorting object, for existing units.
     added_spikes_new_units : np.array (spike_vector) | None, default: None
         The spikes that should be added to the sorting object, for new units.
-    new_units_ids: list[str, int] | None, default: None
+    new_units_ids : list[str, int] | None, default: None
         The unit_ids that should be added if spikes for new units are added.
     refractory_period_ms : float | None, default: None
         The refractory period violation to prevent duplicates and/or unphysiological addition

From 259562e2d554464a29bc68c944afc4ff3a9bbb65 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 22 Jul 2024 15:29:32 +0000
Subject: [PATCH 054/187] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spikeinterface/core/generate.py | 42 ++++++++++++++---------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 57f79c87ae..5749f31b10 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -333,10 +333,10 @@ class TransformSorting(BaseSorting):
     def __init__(
         self,
         sorting: BaseSorting,
-        added_spikes_existing_units : np.array | None = None,
-        added_spikes_new_units : np.array | None = None,
-        new_unit_ids : list[str | int] | None = None,
-        refractory_period_ms : float | None = None,
+        added_spikes_existing_units: np.array | None = None,
+        added_spikes_new_units: np.array | None = None,
+        new_unit_ids: list[str | int] | None = None,
+        refractory_period_ms: float | None = None,
     ):
         sampling_frequency = sorting.get_sampling_frequency()
         unit_ids = list(sorting.get_unit_ids())
@@ -1089,11 +1089,11 @@ def __init__(
         self,
         num_channels: int,
         sampling_frequency: float,
-        durations : list[float],
-        noise_levels : float | np.array = 1.0,
-        cov_matrix : np.array | None = None,
-        dtype : np.dtype | str | None = "float32",
-        seed : int | None = None,
+        durations: list[float],
+        noise_levels: float | np.array = 1.0,
+        cov_matrix: np.array | None = None,
+        dtype: np.dtype | str | None = "float32",
+        seed: int | None = None,
         strategy: Literal["tile_pregenerated", "on_the_fly"] = "tile_pregenerated",
         noise_block_size: int = 30000,
     ):
@@ -1708,11 +1708,11 @@ def __init__(
         self,
         sorting: BaseSorting,
         templates: np.ndarray,
-        nbefore : list[int] | int | None = None,
-        amplitude_factor : list[float] | float | None = None,
-        parent_recording : BaseRecording | None = None,
-        num_samples : list[int] | int | None = None,
-        upsample_vector : np.array | None = None,
+        nbefore: list[int] | int | None = None,
+        amplitude_factor: list[float] | float | None = None,
+        parent_recording: BaseRecording | None = None,
+        num_samples: list[int] | int | None = None,
+        upsample_vector: np.array | None = None,
         check_borders: bool = False,
     ) -> None:
         templates = np.asarray(templates)
@@ -1844,10 +1844,10 @@ def __init__(
         spike_vector: np.ndarray,
         templates: np.ndarray,
         nbefore: int,
-        amplitude_vector : list[float] | None,
-        upsample_vector : list[float] | None,
-        parent_recording_segment : BaseRecordingSegment | None = None,
-        num_samples : int | None = None,
+        amplitude_vector: list[float] | None,
+        upsample_vector: list[float] | None,
+        parent_recording_segment: BaseRecordingSegment | None = None,
+        num_samples: int | None = None,
     ) -> None:
         BaseRecordingSegment.__init__(
             self,
@@ -1867,9 +1867,9 @@ def __init__(
 
     def get_traces(
         self,
-        start_frame : int | None = None,
-        end_frame : int | None = None,
-        channel_indices : list | None = None,
+        start_frame: int | None = None,
+        end_frame: int | None = None,
+        channel_indices: list | None = None,
     ) -> np.ndarray:
         if channel_indices is None:
             n_channels = self.templates.shape[2]

From 67174c2893f4afaf767433bed223ba5906f7956f Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Mon, 22 Jul 2024 17:05:20 +0100
Subject: [PATCH 055/187] Add a few more fixes to docstrings.

---
 src/spikeinterface/core/generate.py | 185 +++++++++++++++-------------
 1 file changed, 98 insertions(+), 87 deletions(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 57f79c87ae..a195b73aab 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -47,7 +47,7 @@ def generate_recording(
     durations : list[float], default: [5.0, 2.5]
         The duration in seconds of each segment in the recording, default: [5.0, 2.5].
         Note that the number of segments is determined by the length of this list.
-    set_probe: bool | None, default: True
+    set_probe : bool | None, default: True
     ndim : int | None, default: 2
         The number of dimensions of the probe, default: 2. Set to 3 to make 3 dimensional probe.
     seed : int | None, default: None
@@ -188,7 +188,7 @@ def add_synchrony_to_sorting(sorting, sync_event_ratio=0.3, seed=None):
     ----------
     sorting : BaseSorting
         The sorting object.
-    sync_event_ratio : float
+    sync_event_ratio : float, default: 0.3
         The ratio of added synchronous spikes with respect to the total number of spikes.
         E.g., 0.5 means that the final sorting will have 1.5 times number of spikes, and all the extra
         spikes are synchronous (same sample_index), but on different units (not duplicates).
@@ -250,7 +250,7 @@ def generate_sorting_to_inject(
     ----------
     sorting : BaseSorting
         The sorting object.
-    num_samples: list of size num_segments.
+    num_samples : list[int] of size num_segments.
         The number of samples in all the segments of the sorting, to generate spike times
         covering entire the entire duration of the segments.
     max_injected_per_unit : int, default: 1000
@@ -333,10 +333,10 @@ class TransformSorting(BaseSorting):
     def __init__(
         self,
         sorting: BaseSorting,
-        added_spikes_existing_units : np.array | None = None,
-        added_spikes_new_units : np.array | None = None,
-        new_unit_ids : list[str | int] | None = None,
-        refractory_period_ms : float | None = None,
+        added_spikes_existing_units: np.array | None = None,
+        added_spikes_new_units: np.array | None = None,
+        new_unit_ids: list[str | int] | None = None,
+        refractory_period_ms: float | None = None,
     ):
         sampling_frequency = sorting.get_sampling_frequency()
         unit_ids = list(sorting.get_unit_ids())
@@ -428,9 +428,9 @@ def add_from_sorting(sorting1: BaseSorting, sorting2: BaseSorting, refractory_pe
 
         Parameters
         ----------
-        sorting1: BaseSorting
+        sorting1 : BaseSorting
             The first sorting.
-        sorting2: BaseSorting
+        sorting2 : BaseSorting
             The second sorting.
         refractory_period_ms : float, default: None
             The refractory period violation to prevent duplicates and/or unphysiological addition
@@ -484,7 +484,7 @@ def add_from_sorting(sorting1: BaseSorting, sorting2: BaseSorting, refractory_pe
 
     @staticmethod
     def add_from_unit_dict(
-        sorting1: BaseSorting, units_dict_list: dict, refractory_period_ms=None
+        sorting1: BaseSorting, units_dict_list: list[dict] | dict, refractory_period_ms=None
     ) -> "TransformSorting":
         """
         Construct TransformSorting by adding one sorting with a
@@ -494,9 +494,9 @@ def add_from_unit_dict(
         Parameters
         ----------
 
-        sorting1: BaseSorting
+        sorting1 : BaseSorting
             The first sorting
-        dict_list: list of dict
+        dict_list : list[dict] | dict
             A list of dict with unit_ids as keys and spike times as values.
         refractory_period_ms : float, default: None
             The refractory period violation to prevent duplicates and/or unphysiological addition
@@ -519,13 +519,15 @@ def from_times_labels(
 
         Parameters
         ----------
-        sorting1: BaseSorting
+        sorting1 : BaseSorting
             The first sorting
-        times_list: list of array (or array)
+        times_list : list[np.array] | np.array
             An array of spike times (in frames).
-        labels_list: list of array (or array)
+        labels_list : list[np.array] | np.array
             An array of spike labels corresponding to the given times.
-        unit_ids: list or None, default: None
+        sampling_frequency : float, default: 30000. (in Hz)
+            The sampling frequency of the recording, default: 30000.
+        unit_ids : list | None, default: None
             The explicit list of unit_ids that should be extracted from labels_list
             If None, then it will be np.unique(labels_list).
         refractory_period_ms : float, default: None
@@ -592,7 +594,7 @@ def generate_snippets(
     nafter=44,
     num_channels=2,
     wf_folder=None,
-    sampling_frequency=30000.0,  # in Hz
+    sampling_frequency=30000.0,
     durations=[10.325, 3.5],  #  in s for 2 segments
     set_probe=True,
     ndim=2,
@@ -613,7 +615,7 @@ def generate_snippets(
         Number of channels.
     wf_folder : str | Path | None, default: None
         Optional folder to save the waveform snippets. If None, snippets are in memory.
-    sampling_frequency : float, default: 30000.0
+    sampling_frequency : float, default: 30000.0 (in Hz)
         The sampling frequency of the snippets.
     ndim : int, default: 2
         The number of dimensions of the probe.
@@ -690,7 +692,7 @@ def synthesize_poisson_spike_vector(
     ----------
     num_units : int, default: 20
         Number of neuronal units to simulate.
-    sampling_frequency : float, default: 30000.0
+    sampling_frequency : float, default: 30000.0 (in Hz)
         Sampling frequency in Hz.
     duration : float, default: 60.0
         Duration of the simulation in seconds.
@@ -793,20 +795,20 @@ def synthesize_random_firings(
 
     Parameters
     ----------
-    num_units : int
+    num_units : int, default: 20
         Number of units.
-    sampling_frequency : float
+    sampling_frequency : float, default: 30000.0 (in Hz)
         Sampling rate.
-    duration : float
+    duration : float, default: 60
         Duration of the segment in seconds.
-    refractory_period_ms: float
+    refractory_period_ms : float, default: 4.0
         Refractory period in ms.
-    firing_rates: float or list[float]
+    firing_rates : float or list[float], default: 3.0
         The firing rate of each unit (in Hz).
         If float, all units will have the same firing rate.
-    add_shift_shuffle: bool, default: False
+    add_shift_shuffle : bool, default: False
         Optionally add a small shuffle on half of the spikes to make the autocorrelogram less flat.
-    seed: int, default: None
+    seed : int, default: None
         Seed for the generator.
 
     Returns
@@ -899,12 +901,14 @@ def inject_some_duplicate_units(sorting, num=4, max_shift=5, ratio=None, seed=No
     ----------
     sorting :
         Original sorting.
-    num : int
+    num : int, default: 4
         Number of injected units.
-    max_shift : int
+    max_shift : int, default: 5
         range of the shift in sample.
-    ratio: float
+    ratio : float | None, default: None
         Proportion of original spike in the injected units.
+    seed : int, default: None
+        Seed for the generator.
 
     Returns
     -------
@@ -1062,21 +1066,21 @@ class NoiseGeneratorRecording(BaseRecording):
         The sampling frequency of the recorder.
     durations : list[float]
         The durations of each segment in seconds. Note that the length of this list is the number of segments.
-    noise_levels: float or array, default: 1
+    noise_levels : float | np.array, default: 1.0
         Std of the white noise (if an array, defined by per channels)
     cov_matrix : np.array | None, default: None
         The covariance matrix of the noise
-    dtype : np.dtype | str |None, default: "float32"
+    dtype : np.dtype | str | None, default: "float32"
         The dtype of the recording. Note that only np.float32 and np.float64 are supported.
     seed : int | None, default: None
         The seed for np.random.default_rng.
-    strategy : "tile_pregenerated" or "on_the_fly"
+    strategy : "tile_pregenerated" | "on_the_fly", default: "tile_pregenerated"
         The strategy of generating noise chunk:
           * "tile_pregenerated": pregenerate a noise chunk of noise_block_size sample and repeat it
                                  very fast and cusume only one noise block.
           * "on_the_fly": generate on the fly a new noise block by combining seed + noise block index
                           no memory preallocation but a bit more computaion (random)
-    noise_block_size: int
+    noise_block_size : int, default: 30000
         Size in sample of noise block.
 
     Note
@@ -1089,11 +1093,11 @@ def __init__(
         self,
         num_channels: int,
         sampling_frequency: float,
-        durations : list[float],
-        noise_levels : float | np.array = 1.0,
-        cov_matrix : np.array | None = None,
-        dtype : np.dtype | str | None = "float32",
-        seed : int | None = None,
+        durations: list[float],
+        noise_levels: float | np.array = 1.0,
+        cov_matrix: np.array | None = None,
+        dtype: np.dtype | str | None = "float32",
+        seed: int | None = None,
         strategy: Literal["tile_pregenerated", "on_the_fly"] = "tile_pregenerated",
         noise_block_size: int = 30000,
     ):
@@ -1277,11 +1281,14 @@ def generate_recording_by_size(
     ----------
     full_traces_size_GiB : float
         The size in gigabytes (GiB) of the recording.
-    num_channels: int
-        Number of channels.
     seed : int | None, default: None
         The seed for np.random.default_rng.
-
+    strategy : "tile_pregenerated" | "on_the_fly", default: "tile_pregenerated"
+        The strategy of generating noise chunk:
+          * "tile_pregenerated": pregenerate a noise chunk of noise_block_size sample and repeat it
+                                 very fast and cusume only one noise block.
+          * "on_the_fly": generate on the fly a new noise block by combining seed + noise block index
+                          no memory preallocation but a bit more computaion (random)
     Returns
     -------
     GeneratorRecording
@@ -1517,25 +1524,25 @@ def generate_templates(
     Parameters
     ----------
 
-    channel_locations: np.ndarray
+    channel_locations : np.ndarray
         Channel locations.
-    units_locations: np.ndarray
+    units_locations : np.ndarray
         Must be 3D.
-    sampling_frequency: float
+    sampling_frequency : float
         Sampling frequency.
-    ms_before: float
+    ms_before : float
         Cut out in ms before spike peak.
-    ms_after: float
+    ms_after : float
         Cut out in ms after spike peak.
-    seed: int or None
+    seed : int | None
         A seed for random.
-    dtype: numpy.dtype, default: "float32"
+    dtype : numpy.dtype, default: "float32"
         Templates dtype
-    upsample_factor: None or int
+    upsample_factor : int | None, default: None
         If not None then template are generated upsampled by this factor.
         Then a new dimention (axis=3) is added to the template with intermediate inter sample representation.
         This allow easy random jitter by choising a template this new dim
-    unit_params: dict of arrays or dict of scalar of dict of tuple
+    unit_params : dict[np.array] | dict[float] | dict[tuple] | None, default: None
         An optional dict containing parameters per units.
         Keys are parameter names:
 
@@ -1552,6 +1559,10 @@ def generate_templates(
             * array of the same length of units
             * scalar, then an array is created
             * tuple, then this difine a range for random values.
+    mode : "ellipsoid" | "sphere", default: "ellipsoid"
+        Method used to calculate the distance between unit and channel location.
+        Ellipoid injects some anisotropy dependent on unit shape, sphere is equivalent
+        to Euclidean distance.
 
     Returns
     -------
@@ -1672,18 +1683,18 @@ class InjectTemplatesRecording(BaseRecording):
 
     Parameters
     ----------
-    sorting: BaseSorting
+    sorting : BaseSorting
         Sorting object containing all the units and their spike train.
-    templates: np.ndarray[n_units, n_samples, n_channels] or np.ndarray[n_units, n_samples, n_oversampling]
+    templates : np.ndarray[n_units, n_samples, n_channels] | np.ndarray[n_units, n_samples, n_oversampling]
         Array containing the templates to inject for all the units.
         Shape can be:
 
             * (num_units, num_samples, num_channels): standard case
             * (num_units, num_samples, num_channels, upsample_factor): case with oversample template to introduce sampling jitter.
-    nbefore: list[int] | int | None, default: None
+    nbefore : list[int] | int | None, default: None
         The number of samples before the peak of the template to align the spike.
         If None, will default to the highest peak.
-    amplitude_factor: list[float] | float | None, default: None
+    amplitude_factor : list[float] | float | None, default: None
         The amplitude of each spike for each unit.
         Can be None (no scaling).
         Can be scalar all spikes have the same factor (certainly useless).
@@ -1691,7 +1702,7 @@ class InjectTemplatesRecording(BaseRecording):
     parent_recording : BaseRecording | None, default: None
         The recording over which to add the templates.
         If None, will default to traces containing all 0.
-    num_samples: list[int] | int | None, default: None
+    num_samples : list[int] | int | None, default: None
         The number of samples in the recording per segment.
         You can use int for mono-segment objects.
     upsample_vector : np.array | None, default: None.
@@ -1708,11 +1719,11 @@ def __init__(
         self,
         sorting: BaseSorting,
         templates: np.ndarray,
-        nbefore : list[int] | int | None = None,
-        amplitude_factor : list[float] | float | None = None,
-        parent_recording : BaseRecording | None = None,
-        num_samples : list[int] | int | None = None,
-        upsample_vector : np.array | None = None,
+        nbefore: list[int] | int | None = None,
+        amplitude_factor: list[float] | float | None = None,
+        parent_recording: BaseRecording | None = None,
+        num_samples: list[int] | int | None = None,
+        upsample_vector: np.array | None = None,
         check_borders: bool = False,
     ) -> None:
         templates = np.asarray(templates)
@@ -1844,10 +1855,10 @@ def __init__(
         spike_vector: np.ndarray,
         templates: np.ndarray,
         nbefore: int,
-        amplitude_vector : list[float] | None,
-        upsample_vector : list[float] | None,
-        parent_recording_segment : BaseRecordingSegment | None = None,
-        num_samples : int | None = None,
+        amplitude_vector: list[float] | None,
+        upsample_vector: list[float] | None,
+        parent_recording_segment: BaseRecordingSegment | None = None,
+        num_samples: int | None = None,
     ) -> None:
         BaseRecordingSegment.__init__(
             self,
@@ -1867,9 +1878,9 @@ def __init__(
 
     def get_traces(
         self,
-        start_frame : int | None = None,
-        end_frame : int | None = None,
-        channel_indices : list | None = None,
+        start_frame: int | None = None,
+        end_frame: int | None = None,
+        channel_indices: list | None = None,
     ) -> np.ndarray:
         if channel_indices is None:
             n_channels = self.templates.shape[2]
@@ -2040,55 +2051,55 @@ def generate_ground_truth_recording(
 
     Parameters
     ----------
-    durations: list of float, default: [10.]
+    durations : list[float], default: [10.]
         Durations in seconds for all segments.
-    sampling_frequency: float, default: 25000
+    sampling_frequency : float, default: 25000.0
         Sampling frequency.
-    num_channels: int, default: 4
+    num_channels : int, default: 4
         Number of channels, not used when probe is given.
-    num_units: int, default: 10
+    num_units : int, default: 10
         Number of units,  not used when sorting is given.
-    sorting: Sorting or None
+    sorting : Sorting | None
         An external sorting object. If not provide, one is genrated.
-    probe: Probe or None
+    probe : Probe | None
         An external Probe object. If not provided a probe is generated using generate_probe_kwargs.
-    generate_probe_kwargs: dict
+    generate_probe_kwargs : dict
         A dict to constuct the Probe using :py:func:`probeinterface.generate_multi_columns_probe()`.
-    templates: np.array or None
+    templates : np.array | None
         The templates of units.
         If None they are generated.
         Shape can be:
 
             * (num_units, num_samples, num_channels): standard case
             * (num_units, num_samples, num_channels, upsample_factor): case with oversample template to introduce jitter.
-    ms_before: float, default: 1.5
+    ms_before : float, default: 1.5
         Cut out in ms before spike peak.
-    ms_after: float, default: 3
+    ms_after : float, default: 3
         Cut out in ms after spike peak.
-    upsample_factor: None or int, default: None
+    upsample_factor : None | int, default: None
         A upsampling factor used only when templates are not provided.
-    upsample_vector: np.array or None
+    upsample_vector : np.array | None
         Optional the upsample_vector can given. This has the same shape as spike_vector
-    generate_sorting_kwargs: dict
+    generate_sorting_kwargs : dict
         When sorting is not provide, this dict is used to generated a Sorting.
-    noise_kwargs: dict
+    noise_kwargs : dict
         Dict used to generated the noise with NoiseGeneratorRecording.
-    generate_unit_locations_kwargs: dict
+    generate_unit_locations_kwargs : dict
         Dict used to generated template when template not provided.
-    generate_templates_kwargs: dict
+    generate_templates_kwargs : dict
         Dict used to generated template when template not provided.
-    dtype: np.dtype, default: "float32"
+    dtype : np.dtype, default: "float32"
         The dtype of the recording.
-    seed: int or None
+    seed : int | None
         Seed for random initialization.
         If None a diffrent Recording is generated at every call.
         Note: even with None a generated recording keep internaly a seed to regenerate the same signal after dump/load.
 
     Returns
     -------
-    recording: Recording
+    recording : Recording
         The generated recording extractor.
-    sorting: Sorting
+    sorting : Sorting
         The generated sorting extractor.
     """
     generate_templates_kwargs = generate_templates_kwargs or dict()

From 535fe17e83872d983ef5fe96abde34c944d74a1d Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Wed, 24 Jul 2024 15:04:46 +0100
Subject: [PATCH 056/187] typo fix

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index a195b73aab..d2a5f98fe8 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1561,7 +1561,7 @@ def generate_templates(
             * tuple, then this difine a range for random values.
     mode : "ellipsoid" | "sphere", default: "ellipsoid"
         Method used to calculate the distance between unit and channel location.
-        Ellipoid injects some anisotropy dependent on unit shape, sphere is equivalent
+        Ellipsoid injects some anisotropy dependent on unit shape, sphere is equivalent
         to Euclidean distance.
 
     Returns

From e7ce974fe0f9e5fc72b8ebe708e58ac5371e120f Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Wed, 24 Jul 2024 15:05:03 +0100
Subject: [PATCH 057/187] typo fix

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index d2a5f98fe8..09965b4550 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1286,7 +1286,7 @@ def generate_recording_by_size(
     strategy : "tile_pregenerated" | "on_the_fly", default: "tile_pregenerated"
         The strategy of generating noise chunk:
           * "tile_pregenerated": pregenerate a noise chunk of noise_block_size sample and repeat it
-                                 very fast and cusume only one noise block.
+                                 very fast and consume only one noise block.
           * "on_the_fly": generate on the fly a new noise block by combining seed + noise block index
                           no memory preallocation but a bit more computaion (random)
     Returns

From c7b5aa6810ab9019b54e10aef973d62a31f015e1 Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Wed, 24 Jul 2024 15:05:15 +0100
Subject: [PATCH 058/187] typo fix

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 09965b4550..b2ffdcd88a 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -1288,7 +1288,7 @@ def generate_recording_by_size(
           * "tile_pregenerated": pregenerate a noise chunk of noise_block_size sample and repeat it
                                  very fast and consume only one noise block.
           * "on_the_fly": generate on the fly a new noise block by combining seed + noise block index
-                          no memory preallocation but a bit more computaion (random)
+                          no memory preallocation but a bit more computation (random)
     Returns
     -------
     GeneratorRecording

From 92ea6093e2312e60212798089dc552e6b2f15d21 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Wed, 24 Jul 2024 15:10:39 +0100
Subject: [PATCH 059/187] Move 'in Hz' to description for sampling frequency
 docstring.

---
 src/spikeinterface/core/generate.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index b2ffdcd88a..73a159380c 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -42,8 +42,8 @@ def generate_recording(
     ----------
     num_channels : int, default: 2
         The number of channels in the recording.
-    sampling_frequency : float, default: 30000. (in Hz)
-        The sampling frequency of the recording, default: 30000.
+    sampling_frequency : float, default: 30000.0
+        The sampling frequency of the recording in Hz
     durations : list[float], default: [5.0, 2.5]
         The duration in seconds of each segment in the recording, default: [5.0, 2.5].
         Note that the number of segments is determined by the length of this list.
@@ -105,7 +105,7 @@ def generate_sorting(
     num_units : int, default: 5
         Number of units.
     sampling_frequency : float, default: 30000.0
-        The sampling frequency.
+        The sampling frequency of the recording in Hz.
     durations : list, default: [10.325, 3.5]
         Duration of each segment in s.
     firing_rates : float, default: 3.0
@@ -525,8 +525,8 @@ def from_times_labels(
             An array of spike times (in frames).
         labels_list : list[np.array] | np.array
             An array of spike labels corresponding to the given times.
-        sampling_frequency : float, default: 30000. (in Hz)
-            The sampling frequency of the recording, default: 30000.
+        sampling_frequency : float, default: 30000.0
+            The sampling frequency of the recording in Hz.
         unit_ids : list | None, default: None
             The explicit list of unit_ids that should be extracted from labels_list
             If None, then it will be np.unique(labels_list).
@@ -615,8 +615,8 @@ def generate_snippets(
         Number of channels.
     wf_folder : str | Path | None, default: None
         Optional folder to save the waveform snippets. If None, snippets are in memory.
-    sampling_frequency : float, default: 30000.0 (in Hz)
-        The sampling frequency of the snippets.
+    sampling_frequency : float, default: 30000.0
+        The sampling frequency of the snippets in Hz.
     ndim : int, default: 2
         The number of dimensions of the probe.
     num_units : int, default: 5
@@ -692,7 +692,7 @@ def synthesize_poisson_spike_vector(
     ----------
     num_units : int, default: 20
         Number of neuronal units to simulate.
-    sampling_frequency : float, default: 30000.0 (in Hz)
+    sampling_frequency : float, default: 30000.0
         Sampling frequency in Hz.
     duration : float, default: 60.0
         Duration of the simulation in seconds.
@@ -797,8 +797,8 @@ def synthesize_random_firings(
     ----------
     num_units : int, default: 20
         Number of units.
-    sampling_frequency : float, default: 30000.0 (in Hz)
-        Sampling rate.
+    sampling_frequency : float, default: 30000.0
+        Sampling rate in Hz.
     duration : float, default: 60
         Duration of the segment in seconds.
     refractory_period_ms : float, default: 4.0

From 2307c9b5a302c6532270a54677af0f2f139f2f49 Mon Sep 17 00:00:00 2001
From: chrishalcrow <57948917+chrishalcrow@users.noreply.github.com>
Date: Thu, 25 Jul 2024 09:42:30 +0100
Subject: [PATCH 060/187] Comparison, Generation, Postprocessing,
 QualityMetrics, SortingComponents docstrings compliance

---
 src/spikeinterface/comparison/collision.py    |  14 ++-
 src/spikeinterface/comparison/correlogram.py  |  15 +++
 .../comparison/groundtruthstudy.py            |   6 +-
 src/spikeinterface/comparison/hybrid.py       |   2 +
 .../comparison/multicomparisons.py            |  10 ++
 .../comparison/paircomparisons.py             |  54 +++++++-
 .../core/analyzer_extension_core.py           |   6 +-
 src/spikeinterface/core/generate.py           | 118 ++++++++++--------
 src/spikeinterface/curation/auto_merge.py     |   3 +
 .../curation/curation_format.py               |  16 +--
 .../curation/curationsorting.py               |   3 +-
 .../curation/mergeunitssorting.py             |   2 +-
 .../curation/remove_redundant.py              |   4 +
 .../curation/splitunitsorting.py              |   5 +-
 .../extractors/neoextractors/maxwell.py       |   2 +-
 .../extractors/neoextractors/plexon.py        |   2 +-
 src/spikeinterface/generation/drift_tools.py  |  12 ++
 .../generation/drifting_generator.py          |   2 +
 src/spikeinterface/generation/hybrid_tools.py |   8 +-
 .../generation/template_database.py           |   2 +
 .../postprocessing/correlograms.py            |   2 +-
 src/spikeinterface/postprocessing/isi.py      |   2 +-
 .../postprocessing/principal_component.py     |  12 +-
 .../postprocessing/spike_amplitudes.py        |   4 +-
 .../postprocessing/spike_locations.py         |   4 +-
 .../postprocessing/template_metrics.py        |   8 +-
 .../postprocessing/template_similarity.py     |  11 +-
 .../postprocessing/unit_locations.py          |   8 +-
 src/spikeinterface/preprocessing/motion.py    |   5 +
 .../qualitymetrics/misc_metrics.py            |  11 +-
 src/spikeinterface/sorters/launcher.py        |   2 +
 src/spikeinterface/sorters/runsorter.py       |  21 +---
 .../sortingcomponents/clustering/main.py      |  10 +-
 .../sortingcomponents/matching/main.py        |  14 ++-
 .../motion/motion_cleaner.py                  |  10 +-
 .../motion/motion_estimation.py               |  16 ++-
 .../motion/motion_interpolation.py            |  37 +++---
 .../sortingcomponents/motion/motion_utils.py  |   1 +
 .../sortingcomponents/peak_detection.py       |  10 +-
 .../sortingcomponents/peak_localization.py    |   8 +-
 src/spikeinterface/sortingcomponents/tools.py |   6 +-
 41 files changed, 312 insertions(+), 176 deletions(-)

diff --git a/src/spikeinterface/comparison/collision.py b/src/spikeinterface/comparison/collision.py
index 9b455e6200..574bd16093 100644
--- a/src/spikeinterface/comparison/collision.py
+++ b/src/spikeinterface/comparison/collision.py
@@ -13,9 +13,19 @@ class CollisionGTComparison(GroundTruthComparison):
 
     This class needs maintenance and need a bit of refactoring.
 
-
-    collision_lag : float
+    Parameters
+    ----------
+    gt_sorting : SortingExtractor
+        The first sorting for the comparison
+    collision_lag : float, default 2.0
         Collision lag in ms.
+    tested_sorting : SortingExtractor
+        The second sorting for the comparison
+    nbins : int, default : 11
+        Number of collision bins
+    **kwargs : dict
+        Keyword arguments for `GroundTruthComparison`
+
 
     """
 
diff --git a/src/spikeinterface/comparison/correlogram.py b/src/spikeinterface/comparison/correlogram.py
index 5a0dd1d3a7..0cafef2c12 100644
--- a/src/spikeinterface/comparison/correlogram.py
+++ b/src/spikeinterface/comparison/correlogram.py
@@ -15,6 +15,21 @@ class CorrelogramGTComparison(GroundTruthComparison):
 
     This class needs maintenance and need a bit of refactoring.
 
+    Parameters
+    ----------
+    gt_sorting : SortingExtractor
+        The first sorting for the comparison
+    tested_sorting : SortingExtractor
+        The second sorting for the comparison
+    bin_ms : float, default: 1.0
+        Size of bin for correlograms
+    window_ms : float, default: 100.0
+        The window around the spike to compute the correlation in ms.
+    well_detected_score : float, default: 0.8
+        Agreement score above which units are well detected
+    **kwargs : dict
+        Keyword arguments for `GroundTruthComparison`
+
     """
 
     def __init__(self, gt_sorting, tested_sorting, window_ms=100.0, bin_ms=1.0, well_detected_score=0.8, **kwargs):
diff --git a/src/spikeinterface/comparison/groundtruthstudy.py b/src/spikeinterface/comparison/groundtruthstudy.py
index ba7268b4f0..d45956a07e 100644
--- a/src/spikeinterface/comparison/groundtruthstudy.py
+++ b/src/spikeinterface/comparison/groundtruthstudy.py
@@ -42,6 +42,11 @@ class GroundTruthStudy:
 
     This GroundTruthStudy have been refactor in version 0.100 to be more flexible than previous versions.
     Note that the underlying folder structure is not backward compatible!
+
+    Parameters
+    ----------
+    study_folder : srt | Path
+        Path to folder containing `GroundTruthStudy`
     """
 
     def __init__(self, study_folder):
@@ -370,7 +375,6 @@ def get_metrics(self, key):
         return metrics
 
     def get_units_snr(self, key):
-        """ """
         return self.get_metrics(key)["snr"]
 
     def get_performance_by_unit(self, case_keys=None):
diff --git a/src/spikeinterface/comparison/hybrid.py b/src/spikeinterface/comparison/hybrid.py
index beb9682e37..657fc73b71 100644
--- a/src/spikeinterface/comparison/hybrid.py
+++ b/src/spikeinterface/comparison/hybrid.py
@@ -48,6 +48,8 @@ class HybridUnitsRecording(InjectTemplatesRecording):
     injected_sorting_folder : str | Path | None
         If given, the injected sorting is saved to this folder.
         It must be specified if injected_sorting is None or not serialisable to file.
+    seed : int, default: None
+        Random seed for amplitude_factor
 
     Returns
     -------
diff --git a/src/spikeinterface/comparison/multicomparisons.py b/src/spikeinterface/comparison/multicomparisons.py
index 499004e32e..dccff6118d 100644
--- a/src/spikeinterface/comparison/multicomparisons.py
+++ b/src/spikeinterface/comparison/multicomparisons.py
@@ -44,6 +44,8 @@ class MultiSortingComparison(BaseMultiComparison, MixinSpikeTrainComparison):
                best matching two sorters
     verbose : bool, default: False
         if True, output is verbose
+    do_matching : bool, default: True
+        if True, SOMETHING HAPPENS.
 
     Returns
     -------
@@ -319,6 +321,14 @@ class MultiTemplateComparison(BaseMultiComparison, MixinTemplateComparison):
         Minimum agreement score to for a possible match
     verbose : bool, default: False
         if True, output is verbose
+    do_matching : bool, default: True
+        if True, IT DOES SOMETHING
+    support : "dense" | "union" | "intersection", default: "union"
+        The support to compute the similarity matrix.
+    num_shifts : int, default: 0
+        Number of shifts to use to shift templates to maximize similarity.
+    similarity_method : "cosine" | "l1" | "l2", default: "cosine"
+         Method for the similarity matrix.
 
     Returns
     -------
diff --git a/src/spikeinterface/comparison/paircomparisons.py b/src/spikeinterface/comparison/paircomparisons.py
index 7d5f04dfdd..9566354918 100644
--- a/src/spikeinterface/comparison/paircomparisons.py
+++ b/src/spikeinterface/comparison/paircomparisons.py
@@ -263,7 +263,6 @@ def __init__(
         gt_name=None,
         tested_name=None,
         delta_time=0.4,
-        sampling_frequency=None,
         match_score=0.5,
         well_detected_score=0.8,
         redundant_score=0.2,
@@ -425,6 +424,11 @@ def get_performance(self, method="by_unit", output="pandas"):
     def print_performance(self, method="pooled_with_average"):
         """
         Print performance with the selected method
+
+        Parameters
+        ----------
+        method : "by_unit" | "pooled_with_average", default: "pooled_with_average"
+            The method to compute performance
         """
 
         template_txt_performance = _template_txt_performance
@@ -449,6 +453,19 @@ def print_summary(self, well_detected_score=None, redundant_score=None, overmerg
           * how many gt units (one or several)
 
         This summary mix several performance metrics.
+
+        Parameters
+        ----------
+        well_detected_score : float, default: None
+            The agreement score above which tested units
+            are counted as "well detected".
+        redundant_score : float, default: None
+            The agreement score below which tested units
+            are counted as "false positive"" (and not "redundant").
+        overmerged_score : float, default: None
+            Tested units with 2 or more agreement scores above "overmerged_score"
+            are counted as "overmerged".
+
         """
         txt = _template_summary_part1
 
@@ -500,6 +517,12 @@ def count_well_detected_units(self, well_detected_score):
         """
         Count how many well detected units.
         kwargs are the same as get_well_detected_units.
+
+        Parameters
+        ----------
+        well_detected_score : float, default: None
+            The agreement score above which tested units
+            are counted as "well detected".
         """
         return len(self.get_well_detected_units(well_detected_score=well_detected_score))
 
@@ -540,6 +563,12 @@ def get_false_positive_units(self, redundant_score=None):
     def count_false_positive_units(self, redundant_score=None):
         """
         See get_false_positive_units().
+
+        Parameters
+        ----------
+        redundant_score : float, default: None
+            The agreement score below which tested units
+            are counted as "false positive"" (and not "redundant").
         """
         return len(self.get_false_positive_units(redundant_score))
 
@@ -554,7 +583,7 @@ def get_redundant_units(self, redundant_score=None):
 
         Parameters
         ----------
-        redundant_score=None : float, default: None
+        redundant_score : float, default: None
             The agreement score above which tested units
             are counted as "redundant" (and not "false positive" ).
         """
@@ -577,6 +606,12 @@ def get_redundant_units(self, redundant_score=None):
     def count_redundant_units(self, redundant_score=None):
         """
         See get_redundant_units().
+
+         Parameters
+         ----------
+         redundant_score : float, default: None
+             The agreement score below which tested units
+             are counted as "false positive"" (and not "redundant").
         """
         return len(self.get_redundant_units(redundant_score=redundant_score))
 
@@ -609,6 +644,12 @@ def get_overmerged_units(self, overmerged_score=None):
     def count_overmerged_units(self, overmerged_score=None):
         """
         See get_overmerged_units().
+
+        Parameters
+        ----------
+        overmerged_score : float, default: None
+            Tested units with 2 or more agreement scores above "overmerged_score"
+            are counted as "overmerged".
         """
         return len(self.get_overmerged_units(overmerged_score=overmerged_score))
 
@@ -704,6 +745,10 @@ class TemplateComparison(BasePairComparison, MixinTemplateComparison):
         List of units from sorting_analyzer_1 to compare.
     unit_ids2 : list, default: None
         List of units from sorting_analyzer_2 to compare.
+    name1 : str, default: "sess1"
+        Name of first session.
+    name2 : str, default: "sess1"
+        Name of second session.
     similarity_method : "cosine" | "l1" | "l2", default: "cosine"
         Method for the similarity matrix.
     support : "dense" | "union" | "intersection", default: "union"
@@ -712,6 +757,11 @@ class TemplateComparison(BasePairComparison, MixinTemplateComparison):
         Number of shifts to use to shift templates to maximize similarity.
     verbose : bool, default: False
         If True, output is verbose.
+    chance_score : float, default: 0.3
+         Minimum agreement score to for a possible match
+    match_score : float, default: 0.7
+        Minimum agreement score to match units
+
 
     Returns
     -------
diff --git a/src/spikeinterface/core/analyzer_extension_core.py b/src/spikeinterface/core/analyzer_extension_core.py
index ff1dc5dafa..c9cff4fb94 100644
--- a/src/spikeinterface/core/analyzer_extension_core.py
+++ b/src/spikeinterface/core/analyzer_extension_core.py
@@ -675,13 +675,13 @@ class ComputeNoiseLevels(AnalyzerExtension):
 
     Parameters
     ----------
-    sorting_analyzer: SortingAnalyzer
+    sorting_analyzer : SortingAnalyzer
         A SortingAnalyzer object
-    **params: dict with additional parameters for the `spikeinterface.get_noise_levels()` function
+    **params : dict with additional parameters for the `spikeinterface.get_noise_levels()` function
 
     Returns
     -------
-    noise_levels: np.array
+    noise_levels : np.array
         The noise level vector
     """
 
diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index ff75789aab..187103d031 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -44,10 +44,11 @@ def generate_recording(
         The number of channels in the recording.
     sampling_frequency : float, default: 30000. (in Hz)
         The sampling frequency of the recording, default: 30000.
-    durations: List[float], default: [5.0, 2.5]
-        The duration in seconds of each segment in the recording, default: [5.0, 2.5].
-        Note that the number of segments is determined by the length of this list.
-    set_probe: bool, default: True
+    durations : List[float], default: [5.0, 2.5]
+        The duration in seconds of each segment in the recording.
+        The number of segments is determined by the length of this list.
+    set_probe : bool, default: True
+        If true, attaches probe to the returned `Recording`
     ndim : int, default: 2
         The number of dimensions of the probe, default: 2. Set to 3 to make 3 dimensional probe.
     seed : Optional[int]
@@ -621,6 +622,13 @@ def generate_snippets(
         The number of units.
     empty_units : list | None, default: None
         A list of units that will have no spikes.
+    durations : List[float], default: [10.325, 3.5]
+        The duration in seconds of each segment in the recording.
+        The number of segments is determined by the length of this list.
+    set_probe : bool, default: True
+        If true, attaches probe to the returned snippets object
+    **job_kwargs : dict, default: None
+        Job keyword arguments for `snippets_from_sorting`
 
     Returns
     -------
@@ -799,14 +807,14 @@ def synthesize_random_firings(
         Sampling rate.
     duration : float
         Duration of the segment in seconds.
-    refractory_period_ms: float
+    refractory_period_ms : float
         Refractory period in ms.
-    firing_rates: float or list[float]
+    firing_rates : float or list[float]
         The firing rate of each unit (in Hz).
         If float, all units will have the same firing rate.
-    add_shift_shuffle: bool, default: False
+    add_shift_shuffle : bool, default: False
         Optionally add a small shuffle on half of the spikes to make the autocorrelogram less flat.
-    seed: int, default: None
+    seed : int, default: None
         Seed for the generator.
 
     Returns
@@ -903,8 +911,10 @@ def inject_some_duplicate_units(sorting, num=4, max_shift=5, ratio=None, seed=No
         Number of injected units.
     max_shift : int
         range of the shift in sample.
-    ratio: float
+    ratio : float
         Proportion of original spike in the injected units.
+    seed : None|int, default: None
+        Random seed for creating unit peak shifts.
 
     Returns
     -------
@@ -1062,9 +1072,9 @@ class NoiseGeneratorRecording(BaseRecording):
         The sampling frequency of the recorder.
     durations : List[float]
         The durations of each segment in seconds. Note that the length of this list is the number of segments.
-    noise_levels: float or array, default: 1
+    noise_levels : float or array, default: 1
         Std of the white noise (if an array, defined by per channels)
-    cov_matrix: np.array, default None
+    cov_matrix : np.array, default None
         The covariance matrix of the noise
     dtype : Optional[Union[np.dtype, str]], default: "float32"
         The dtype of the recording. Note that only np.float32 and np.float64 are supported.
@@ -1076,7 +1086,7 @@ class NoiseGeneratorRecording(BaseRecording):
                                  very fast and cusume only one noise block.
           * "on_the_fly": generate on the fly a new noise block by combining seed + noise block index
                           no memory preallocation but a bit more computaion (random)
-    noise_block_size: int
+    noise_block_size : int
         Size in sample of noise block.
 
     Notes
@@ -1279,10 +1289,14 @@ def generate_recording_by_size(
     ----------
     full_traces_size_GiB : float
         The size in gigabytes (GiB) of the recording.
-    num_channels: int
+    num_channels : int
         Number of channels.
     seed : int, default: None
         The seed for np.random.default_rng.
+    strategy : "tile_pregenerated"| "on_the_fly", default: "tile_pregenerated"
+        The strategy of generating noise chunk:
+        * "tile_pregenerated": pregenerate a noise chunk of noise_block_size sample and repeat it very fast and cusume only one noise block.
+        * "on_the_fly": generate on the fly a new noise block by combining seed + noise block index no memory preallocation but a bit more computaion (random)
 
     Returns
     -------
@@ -1519,25 +1533,25 @@ def generate_templates(
     Parameters
     ----------
 
-    channel_locations: np.ndarray
+    channel_locations : np.ndarray
         Channel locations.
-    units_locations: np.ndarray
+    units_locations : np.ndarray
         Must be 3D.
-    sampling_frequency: float
+    sampling_frequency : float
         Sampling frequency.
-    ms_before: float
+    ms_before : float
         Cut out in ms before spike peak.
-    ms_after: float
+    ms_after : float
         Cut out in ms after spike peak.
-    seed: int or None
+    seed : int or None
         A seed for random.
-    dtype: numpy.dtype, default: "float32"
+    dtype : numpy.dtype, default: "float32"
         Templates dtype
-    upsample_factor: None or int
+    upsample_factor : None or int
         If not None then template are generated upsampled by this factor.
         Then a new dimention (axis=3) is added to the template with intermediate inter sample representation.
         This allow easy random jitter by choising a template this new dim
-    unit_params: dict of arrays or dict of scalar of dict of tuple
+    unit_params : dict of arrays or dict of scalar of dict of tuple
         An optional dict containing parameters per units.
         Keys are parameter names:
 
@@ -1555,6 +1569,10 @@ def generate_templates(
             * scalar, then an array is created
             * tuple, then this difine a range for random values.
 
+    mode : "sphere" | "ellipsoid", default: "ellipsoid"
+        Mode for how to calculate distances
+
+
     Returns
     -------
     templates: np.array
@@ -1674,31 +1692,33 @@ class InjectTemplatesRecording(BaseRecording):
 
     Parameters
     ----------
-    sorting: BaseSorting
+    sorting : BaseSorting
         Sorting object containing all the units and their spike train.
-    templates: np.ndarray[n_units, n_samples, n_channels] or np.ndarray[n_units, n_samples, n_oversampling]
+    templates : np.ndarray[n_units, n_samples, n_channels] or np.ndarray[n_units, n_samples, n_oversampling]
         Array containing the templates to inject for all the units.
         Shape can be:
 
             * (num_units, num_samples, num_channels): standard case
             * (num_units, num_samples, num_channels, upsample_factor): case with oversample template to introduce sampling jitter.
-    nbefore: list[int] | int | None, default: None
+    nbefore : list[int] | int | None, default: None
         The number of samples before the peak of the template to align the spike.
         If None, will default to the highest peak.
-    amplitude_factor: list[float] | float | None, default: None
+    amplitude_factor : list[float] | float | None, default: None
         The amplitude of each spike for each unit.
         Can be None (no scaling).
         Can be scalar all spikes have the same factor (certainly useless).
         Can be a vector with same shape of spike_vector of the sorting.
-    parent_recording: BaseRecording | None
+    parent_recording : BaseRecording | None
         The recording over which to add the templates.
         If None, will default to traces containing all 0.
-    num_samples: list[int] | int | None
+    num_samples : list[int] | int | None
         The number of samples in the recording per segment.
         You can use int for mono-segment objects.
-    upsample_vector: np.array or None, default: None.
+    upsample_vector : np.array or None, default: None.
         When templates is 4d we can simulate a jitter.
         Optional the upsample_vector is the jitter index with a number per spike in range 0-templates.shape[3].
+    check_borders : bool, default: False
+        Checks if the border of the templates are zero.
 
     Returns
     -------
@@ -2042,55 +2062,55 @@ def generate_ground_truth_recording(
 
     Parameters
     ----------
-    durations: list of float, default: [10.]
+    durations : list of float, default: [10.]
         Durations in seconds for all segments.
-    sampling_frequency: float, default: 25000
+    sampling_frequency : float, default: 25000
         Sampling frequency.
-    num_channels: int, default: 4
+    num_channels : int, default: 4
         Number of channels, not used when probe is given.
-    num_units: int, default: 10
+    num_units : int, default: 10
         Number of units,  not used when sorting is given.
-    sorting: Sorting or None
+    sorting : Sorting or None
         An external sorting object. If not provide, one is genrated.
-    probe: Probe or None
+    probe : Probe or None
         An external Probe object. If not provided a probe is generated using generate_probe_kwargs.
-    generate_probe_kwargs: dict
+    generate_probe_kwargs : dict
         A dict to constuct the Probe using :py:func:`probeinterface.generate_multi_columns_probe()`.
-    templates: np.array or None
+    templates : np.array or None
         The templates of units.
         If None they are generated.
         Shape can be:
 
             * (num_units, num_samples, num_channels): standard case
             * (num_units, num_samples, num_channels, upsample_factor): case with oversample template to introduce jitter.
-    ms_before: float, default: 1.5
+    ms_before : float, default: 1.5
         Cut out in ms before spike peak.
-    ms_after: float, default: 3
+    ms_after : float, default: 3
         Cut out in ms after spike peak.
-    upsample_factor: None or int, default: None
+    upsample_factor : None or int, default: None
         A upsampling factor used only when templates are not provided.
-    upsample_vector: np.array or None
+    upsample_vector : np.array or None
         Optional the upsample_vector can given. This has the same shape as spike_vector
-    generate_sorting_kwargs: dict
+    generate_sorting_kwargs : dict
         When sorting is not provide, this dict is used to generated a Sorting.
-    noise_kwargs: dict
+    noise_kwargs : dict
         Dict used to generated the noise with NoiseGeneratorRecording.
-    generate_unit_locations_kwargs: dict
+    generate_unit_locations_kwargs : dict
         Dict used to generated template when template not provided.
-    generate_templates_kwargs: dict
+    generate_templates_kwargs : dict
         Dict used to generated template when template not provided.
-    dtype: np.dtype, default: "float32"
+    dtype : np.dtype, default: "float32"
         The dtype of the recording.
-    seed: int or None
+    seed : int or None
         Seed for random initialization.
         If None a diffrent Recording is generated at every call.
         Note: even with None a generated recording keep internaly a seed to regenerate the same signal after dump/load.
 
     Returns
     -------
-    recording: Recording
+    recording : Recording
         The generated recording extractor.
-    sorting: Sorting
+    sorting : Sorting
         The generated sorting extractor.
     """
     generate_templates_kwargs = generate_templates_kwargs or dict()
diff --git a/src/spikeinterface/curation/auto_merge.py b/src/spikeinterface/curation/auto_merge.py
index 920d6713ad..1b0f287d09 100644
--- a/src/spikeinterface/curation/auto_merge.py
+++ b/src/spikeinterface/curation/auto_merge.py
@@ -98,6 +98,7 @@ def get_potential_auto_merge(
         * | "feature_neighbors": focused on finding unit pairs whose spikes are close in the feature space using kNN.
           | It uses the following steps: "num_spikes", "snr", "remove_contaminated", "unit_locations",
           | "knn", "quality_score"
+
         If `preset` is None, you can specify the steps manually with the `steps` parameter.
     resolve_graph : bool, default: False
         If True, the function resolves the potential unit pairs to be merged into multiple-unit merges.
@@ -145,6 +146,8 @@ def get_potential_auto_merge(
         Pontential steps : "num_spikes", "snr", "remove_contaminated", "unit_locations", "correlogram",
         "template_similarity", "presence_distance", "cross_contamination", "knn", "quality_score"
         Please check steps explanations above!
+    presence_distance_kwargs : None|dict, default: None
+        A dictionary of kwargs to be passed to compute_presence_distance().
 
     Returns
     -------
diff --git a/src/spikeinterface/curation/curation_format.py b/src/spikeinterface/curation/curation_format.py
index 88190a9bab..20f20b1a2f 100644
--- a/src/spikeinterface/curation/curation_format.py
+++ b/src/spikeinterface/curation/curation_format.py
@@ -289,7 +289,7 @@ def apply_curation(
         The Sorting object to apply merges.
     curation_dict : dict
         The curation dict.
-    censor_ms: float | None, default: None
+    censor_ms : float | None, default: None
         When applying the merges, any consecutive spikes within the `censor_ms` are removed. This can be thought of
         as the desired refractory period. If `censor_ms=None`, no spikes are discarded.
     new_id_strategy : "append" | "take_first", default: "append"
@@ -297,17 +297,17 @@ def apply_curation(
 
             * "append" : new_units_ids will be added at the end of max(sorting.unit_ids)
             * "take_first" : new_unit_ids will be the first unit_id of every list of merges
-    merging_mode  : "soft" | "hard", default: "soft"
+    merging_mode : "soft" | "hard", default: "soft"
         How merges are performed for SortingAnalyzer. If the `merge_mode` is "soft" , merges will be approximated, with no reloading of
         the waveforms. This will lead to approximations. If `merge_mode` is "hard", recomputations are accurately
         performed, reloading waveforms if needed
     sparsity_overlap : float, default 0.75
-            The percentage of overlap that units should share in order to accept merges. If this criteria is not
-            achieved, soft merging will not be possible and an error will be raised. This is for use with a SortingAnalyzer input.
-
-    verbose:
-
-    **job_kwargs
+        The percentage of overlap that units should share in order to accept merges. If this criteria is not
+        achieved, soft merging will not be possible and an error will be raised. This is for use with a SortingAnalyzer input.
+    verbose : bool, default: False
+        If True, output is verbose
+    **job_kwargs : dict
+        Job keyword arguments for `merge_units`
 
     Returns
     -------
diff --git a/src/spikeinterface/curation/curationsorting.py b/src/spikeinterface/curation/curationsorting.py
index 702bb587f7..b4afeab547 100644
--- a/src/spikeinterface/curation/curationsorting.py
+++ b/src/spikeinterface/curation/curationsorting.py
@@ -18,7 +18,7 @@ class CurationSorting:
 
     Parameters
     ----------
-    sorting: BaseSorting
+    sorting : BaseSorting
         The sorting object
     properties_policy : "keep" | "remove", default: "keep"
         Policy used to propagate properties after split and merge operation. If "keep" the properties will be
@@ -26,6 +26,7 @@ class CurationSorting:
         an empty value for all the properties
     make_graph : bool
         True to keep a Networkx graph instance with the curation history
+
     Returns
     -------
     sorting : Sorting
diff --git a/src/spikeinterface/curation/mergeunitssorting.py b/src/spikeinterface/curation/mergeunitssorting.py
index 11f26ea778..df5bb7446c 100644
--- a/src/spikeinterface/curation/mergeunitssorting.py
+++ b/src/spikeinterface/curation/mergeunitssorting.py
@@ -13,7 +13,7 @@ class MergeUnitsSorting(BaseSorting):
 
     Parameters
     ----------
-    sorting: BaseSorting
+    sorting : BaseSorting
         The sorting object
     units_to_merge : list/tuple of lists/tuples
         A list of lists for every merge group. Each element needs to have at least two elements (two units to merge),
diff --git a/src/spikeinterface/curation/remove_redundant.py b/src/spikeinterface/curation/remove_redundant.py
index 874552f767..bf03afbb8b 100644
--- a/src/spikeinterface/curation/remove_redundant.py
+++ b/src/spikeinterface/curation/remove_redundant.py
@@ -58,6 +58,10 @@ def remove_redundant_units(
         Used when remove_strategy="highest_amplitude"
     extra_outputs : bool, default: False
         If True, will return the redundant pairs.
+    unit_peak_shifts : dict
+        Dictionary mapping the unit_id to the unit's shift (in number of samples).
+        A positive shift means the spike train is shifted back in time, while
+        a negative shift means the spike train is shifted forward.
 
     Returns
     -------
diff --git a/src/spikeinterface/curation/splitunitsorting.py b/src/spikeinterface/curation/splitunitsorting.py
index 33c14dfe5a..0804f637a5 100644
--- a/src/spikeinterface/curation/splitunitsorting.py
+++ b/src/spikeinterface/curation/splitunitsorting.py
@@ -13,9 +13,9 @@ class SplitUnitSorting(BaseSorting):
 
     Parameters
     ----------
-    sorting: BaseSorting
+    sorting : BaseSorting
         The sorting object
-    parent_unit_id : int
+    split_unit_id : int
         Unit id of the unit to split
     indices_list : list or np.array
         A list of index arrays selecting the spikes to split in each segment.
@@ -28,6 +28,7 @@ class SplitUnitSorting(BaseSorting):
         Policy used to propagate properties. If "keep" the properties will be passed to the new units
          (if the units_to_merge have the same value). If "remove" the new units will have an empty
          value for all the properties of the new unit
+
     Returns
     -------
     sorting : Sorting
diff --git a/src/spikeinterface/extractors/neoextractors/maxwell.py b/src/spikeinterface/extractors/neoextractors/maxwell.py
index 58110cf7aa..04e41433e1 100644
--- a/src/spikeinterface/extractors/neoextractors/maxwell.py
+++ b/src/spikeinterface/extractors/neoextractors/maxwell.py
@@ -32,7 +32,7 @@ class MaxwellRecordingExtractor(NeoBaseRecordingExtractor):
         Load exhaustively all annotations from neo.
     use_names_as_ids : bool, default: False
         Determines the format of the channel IDs used by the extractor. If set to True, the channel IDs will be the
-    names from NeoRawIO. If set to False, the channel IDs will be the ids provided by NeoRawIO.
+        names from NeoRawIO. If set to False, the channel IDs will be the ids provided by NeoRawIO.
     rec_name : str, default: None
         When the file contains several recordings you need to specify the one
         you want to extract. (rec_name='rec0000').
diff --git a/src/spikeinterface/extractors/neoextractors/plexon.py b/src/spikeinterface/extractors/neoextractors/plexon.py
index 0adddc2439..eed3188d16 100644
--- a/src/spikeinterface/extractors/neoextractors/plexon.py
+++ b/src/spikeinterface/extractors/neoextractors/plexon.py
@@ -25,7 +25,7 @@ class PlexonRecordingExtractor(NeoBaseRecordingExtractor):
         Load exhaustively all annotations from neo.
     use_names_as_ids : bool, default: True
         Determines the format of the channel IDs used by the extractor. If set to True, the channel IDs will be the
-    names from NeoRawIO. If set to False, the channel IDs will be the ids provided by NeoRawIO.
+        names from NeoRawIO. If set to False, the channel IDs will be the ids provided by NeoRawIO.
 
         Example for wideband signals:
             names: ["WB01", "WB02", "WB03", "WB04"]
diff --git a/src/spikeinterface/generation/drift_tools.py b/src/spikeinterface/generation/drift_tools.py
index 0e4f1985c6..aa59de8f60 100644
--- a/src/spikeinterface/generation/drift_tools.py
+++ b/src/spikeinterface/generation/drift_tools.py
@@ -116,6 +116,16 @@ class DriftingTemplates(Templates):
       * move every templates on-the-fly, this lead to one interpolation per spike
       * precompute some displacements for all templates and use a discreate interpolation, for instance by step of 1um
         This is the same strategy used by MEArec.
+
+    Parameters
+    ----------
+    templates_array_moved : np.array
+        Shape is (num_displacement, num_templates, num_samples, num_channels)
+    displacements : np.array
+        Displacement vector
+        shape : (num_displacement, 2)
+    **static_kwargs : dict
+        Keyword arguments for `Templates`
     """
 
     def __init__(self, templates_array_moved=None, displacements=None, **static_kwargs):
@@ -306,6 +316,8 @@ class InjectDriftingTemplatesRecording(BaseRecording):
         If None, no amplitude scaling is applied.
         If scalar all spikes have the same factor (certainly useless).
         If vector, it must have the same size as the spike vector.
+    mode : str, default: "precompute"
+        Mode for how to compute templates.
 
     Returns
     -------
diff --git a/src/spikeinterface/generation/drifting_generator.py b/src/spikeinterface/generation/drifting_generator.py
index b439c57c52..69f1fb6375 100644
--- a/src/spikeinterface/generation/drifting_generator.py
+++ b/src/spikeinterface/generation/drifting_generator.py
@@ -194,6 +194,8 @@ def generate_displacement_vector(
     motion_list : list of dict
         List of dicts containing individual motion vector parameters.
         len(motion_list) == displacement_vectors.shape[2]
+    seed : None | seed, default: None
+        Random seed for `make_one_displacement_vector`
 
     Returns
     -------
diff --git a/src/spikeinterface/generation/hybrid_tools.py b/src/spikeinterface/generation/hybrid_tools.py
index 0c82e496c0..12958649dd 100644
--- a/src/spikeinterface/generation/hybrid_tools.py
+++ b/src/spikeinterface/generation/hybrid_tools.py
@@ -42,6 +42,8 @@ def estimate_templates_from_recording(
 
     Parameters
     ----------
+    recording : BaseRecording
+        The recording to get temaples from.
     ms_before : float
         The time before peaks of templates.
     ms_after : float
@@ -181,6 +183,8 @@ def scale_template_to_range(
         The minimum amplitude of the output templates after scaling.
     max_amplitude : float
         The maximum amplitude of the output templates after scaling.
+    amplitude_function : "ptp" | "min" | "max", default: "pip"
+        The function to use to compute the amplitude of the templates. Can be "ptp", "min" or "max".
 
     Returns
     -------
@@ -356,10 +360,6 @@ def generate_hybrid_recording(
     are_templates_scaled : bool, default: True
         If True, the templates are assumed to be in uV, otherwise in the same unit as the recording.
         In case the recording has scaling, the templates are "unscaled" before injection.
-    ms_before : float, default: 1.5
-        Cut out in ms before spike peak.
-    ms_after : float, default: 3
-        Cut out in ms after spike peak.
     unit_locations : np.array, default: None
         The locations at which the templates should be injected. If not provided, generated (see
         generate_unit_location_kwargs).
diff --git a/src/spikeinterface/generation/template_database.py b/src/spikeinterface/generation/template_database.py
index 17d2bdf521..6d094adf11 100644
--- a/src/spikeinterface/generation/template_database.py
+++ b/src/spikeinterface/generation/template_database.py
@@ -71,6 +71,8 @@ def query_templates_from_database(template_df: "pandas.DataFrame", verbose: bool
     ----------
     template_df : pd.DataFrame
         Dataframe containing the template information, obtained by slicing/querying the output of fetch_templates_info.
+    verbose : bool, default: False
+        if True, output is verbose
 
     Returns
     -------
diff --git a/src/spikeinterface/postprocessing/correlograms.py b/src/spikeinterface/postprocessing/correlograms.py
index 3c65f2075c..8da1ed752a 100644
--- a/src/spikeinterface/postprocessing/correlograms.py
+++ b/src/spikeinterface/postprocessing/correlograms.py
@@ -20,7 +20,7 @@ class ComputeCorrelograms(AnalyzerExtension):
 
     Parameters
     ----------
-    sorting_analyzer: SortingAnalyzer
+    sorting_analyzer : SortingAnalyzer
         A SortingAnalyzer object
     window_ms : float, default: 50.0
         The window around the spike to compute the correlation in ms. For example,
diff --git a/src/spikeinterface/postprocessing/isi.py b/src/spikeinterface/postprocessing/isi.py
index fa919e11e2..542f829f21 100644
--- a/src/spikeinterface/postprocessing/isi.py
+++ b/src/spikeinterface/postprocessing/isi.py
@@ -17,7 +17,7 @@ class ComputeISIHistograms(AnalyzerExtension):
 
     Parameters
     ----------
-    sorting_analyzer: SortingAnalyzer
+    sorting_analyzer : SortingAnalyzer
         A SortingAnalyzer object
     window_ms : float, default: 50
         The window in ms
diff --git a/src/spikeinterface/postprocessing/principal_component.py b/src/spikeinterface/postprocessing/principal_component.py
index 99c60a5043..f1f89403c7 100644
--- a/src/spikeinterface/postprocessing/principal_component.py
+++ b/src/spikeinterface/postprocessing/principal_component.py
@@ -25,21 +25,21 @@ class ComputePrincipalComponents(AnalyzerExtension):
 
     Parameters
     ----------
-    sorting_analyzer: SortingAnalyzer
+    sorting_analyzer : SortingAnalyzer
         A SortingAnalyzer object
-    n_components: int, default: 5
+    n_components : int, default: 5
         Number of components fo PCA
-    mode: "by_channel_local" | "by_channel_global" | "concatenated", default: "by_channel_local"
+    mode : "by_channel_local" | "by_channel_global" | "concatenated", default: "by_channel_local"
         The PCA mode:
             - "by_channel_local": a local PCA is fitted for each channel (projection by channel)
             - "by_channel_global": a global PCA is fitted for all channels (projection by channel)
             - "concatenated": channels are concatenated and a global PCA is fitted
-    sparsity: ChannelSparsity or None, default: None
+    sparsity : ChannelSparsity or None, default: None
         The sparsity to apply to waveforms.
         If sorting_analyzer is already sparse, the default sparsity will be used
-    whiten: bool, default: True
+    whiten : bool, default: True
         If True, waveforms are pre-whitened
-    dtype: dtype, default: "float32"
+    dtype : dtype, default: "float32"
         Dtype of the pc scores
 
     Examples
diff --git a/src/spikeinterface/postprocessing/spike_amplitudes.py b/src/spikeinterface/postprocessing/spike_amplitudes.py
index e82a9e61e4..9e8b5993b9 100644
--- a/src/spikeinterface/postprocessing/spike_amplitudes.py
+++ b/src/spikeinterface/postprocessing/spike_amplitudes.py
@@ -22,13 +22,13 @@ class ComputeSpikeAmplitudes(AnalyzerExtension):
 
     Parameters
     ----------
-    sorting_analyzer: SortingAnalyzer
+    sorting_analyzer : SortingAnalyzer
         A SortingAnalyzer object
     ms_before : float, default: 0.5
         The left window, before a peak, in milliseconds
     ms_after : float, default: 0.5
         The right window, after a peak, in milliseconds
-    spike_retriver_kwargs: dict
+    spike_retriver_kwargs : dict
         A dictionary to control the behavior for getting the maximum channel for each spike
         This dictionary contains:
 
diff --git a/src/spikeinterface/postprocessing/spike_locations.py b/src/spikeinterface/postprocessing/spike_locations.py
index 53e55b4d1f..6995fc04da 100644
--- a/src/spikeinterface/postprocessing/spike_locations.py
+++ b/src/spikeinterface/postprocessing/spike_locations.py
@@ -17,13 +17,13 @@ class ComputeSpikeLocations(AnalyzerExtension):
 
     Parameters
     ----------
-    sorting_analyzer: SortingAnalyzer
+    sorting_analyzer : SortingAnalyzer
         A SortingAnalyzer object
     ms_before : float, default: 0.5
         The left window, before a peak, in milliseconds
     ms_after : float, default: 0.5
         The right window, after a peak, in milliseconds
-    spike_retriver_kwargs: dict
+    spike_retriver_kwargs : dict
         A dictionary to control the behavior for getting the maximum channel for each spike
         This dictionary contains:
 
diff --git a/src/spikeinterface/postprocessing/template_metrics.py b/src/spikeinterface/postprocessing/template_metrics.py
index eef2a2f32c..31652d8afc 100644
--- a/src/spikeinterface/postprocessing/template_metrics.py
+++ b/src/spikeinterface/postprocessing/template_metrics.py
@@ -50,7 +50,7 @@ class ComputeTemplateMetrics(AnalyzerExtension):
 
     Parameters
     ----------
-    sorting_analyzer: SortingAnalyzer
+    sorting_analyzer : SortingAnalyzer
         The SortingAnalyzer object
     metric_names : list or None, default: None
         List of metrics to compute (see si.postprocessing.get_template_metric_names())
@@ -58,13 +58,13 @@ class ComputeTemplateMetrics(AnalyzerExtension):
         Whether to use the positive ("pos") or negative ("neg") peaks to estimate extremum channels.
     upsampling_factor : int, default: 10
         The upsampling factor to upsample the templates
-    sparsity: ChannelSparsity or None, default: None
+    sparsity : ChannelSparsity or None, default: None
         If None, template metrics are computed on the extremum channel only.
         If sparsity is given, template metrics are computed on all sparse channels of each unit.
         For more on generating a ChannelSparsity, see the `~spikeinterface.compute_sparsity()` function.
-    include_multi_channel_metrics: bool, default: False
+    include_multi_channel_metrics : bool, default: False
         Whether to compute multi-channel metrics
-    metrics_kwargs: dict
+    metrics_kwargs : dict
         Additional arguments to pass to the metric functions. Including:
             * recovery_window_ms: the window in ms after the peak to compute the recovery_slope, default: 0.7
             * peak_relative_threshold: the relative threshold to detect positive and negative peaks, default: 0.2
diff --git a/src/spikeinterface/postprocessing/template_similarity.py b/src/spikeinterface/postprocessing/template_similarity.py
index cb4cc323ad..53df14ff8a 100644
--- a/src/spikeinterface/postprocessing/template_similarity.py
+++ b/src/spikeinterface/postprocessing/template_similarity.py
@@ -13,25 +13,22 @@ class ComputeTemplateSimilarity(AnalyzerExtension):
 
     Similarity is defined as 1 - distance(T_1, T_2) for two templates T_1, T_2
 
-
     Parameters
     ----------
     sorting_analyzer : SortingAnalyzer
         The SortingAnalyzer object
     method : str, default: "cosine"
         The method to compute the similarity. Can be in ["cosine", "l2", "l1"]
+        In case of "l1" or "l2", the formula used is:
+        - similarity = 1 - norm(T_1 - T_2)/(norm(T_1) + norm(T_2)).
+        In case of cosine it is:
+        - similarity = 1 - sum(T_1.T_2)/(norm(T_1)norm(T_2)).
     max_lag_ms : float, default: 0
         If specified, the best distance for all given lag within max_lag_ms is kept, for every template
     support : "dense" | "union" | "intersection", default: "union"
         Support that should be considered to compute the distances between the templates, given their sparsities.
         Can be either ["dense", "union", "intersection"]
 
-    In case of "l1" or "l2", the formula used is:
-        similarity = 1 - norm(T_1 - T_2)/(norm(T_1) + norm(T_2))
-
-    In case of cosine this is:
-        similarity = 1 - sum(T_1.T_2)/(norm(T_1)norm(T_2))
-
     Returns
     -------
     similarity: np.array
diff --git a/src/spikeinterface/postprocessing/unit_locations.py b/src/spikeinterface/postprocessing/unit_locations.py
index 818f0a8062..5d190d43f1 100644
--- a/src/spikeinterface/postprocessing/unit_locations.py
+++ b/src/spikeinterface/postprocessing/unit_locations.py
@@ -24,16 +24,16 @@ class ComputeUnitLocations(AnalyzerExtension):
 
     Parameters
     ----------
-    sorting_analyzer: SortingAnalyzer
+    sorting_analyzer : SortingAnalyzer
         A SortingAnalyzer object
-    method: "center_of_mass" | "monopolar_triangulation" | "grid_convolution", default: "center_of_mass"
+    method : "center_of_mass" | "monopolar_triangulation" | "grid_convolution", default: "center_of_mass"
         The method to use for localization
-    method_kwargs: dict, default: {}
+    method_kwargs : dict, default: {}
         Other kwargs depending on the method
 
     Returns
     -------
-    unit_locations: np.array
+    unit_locations : np.array
         unit location with shape (num_unit, 2) or (num_unit, 3) or (num_unit, 3) (with alpha)
     """
 
diff --git a/src/spikeinterface/preprocessing/motion.py b/src/spikeinterface/preprocessing/motion.py
index 8d1f9bc9f3..ddb981a944 100644
--- a/src/spikeinterface/preprocessing/motion.py
+++ b/src/spikeinterface/preprocessing/motion.py
@@ -216,6 +216,11 @@ def get_motion_presets():
 def get_motion_parameters_preset(preset):
     """
     Get the parameters tree for a given preset for motion correction.
+
+    Parameters
+    ----------
+    preset : str, default: None
+        The preset name. See available presets using `spikeinterface.preprocessing.get_motion_presets()`.
     """
     preset_params = copy.deepcopy(motion_options_preset[preset])
     all_default_params = _get_default_motion_params()
diff --git a/src/spikeinterface/qualitymetrics/misc_metrics.py b/src/spikeinterface/qualitymetrics/misc_metrics.py
index 7465d58737..4e1136da91 100644
--- a/src/spikeinterface/qualitymetrics/misc_metrics.py
+++ b/src/spikeinterface/qualitymetrics/misc_metrics.py
@@ -69,7 +69,7 @@ def compute_num_spikes(sorting_analyzer, unit_ids=None, **kwargs):
     return num_spikes
 
 
-def compute_firing_rates(sorting_analyzer, unit_ids=None, **kwargs):
+def compute_firing_rates(sorting_analyzer, unit_ids=None):
     """
     Compute the firing rate across segments.
 
@@ -98,7 +98,7 @@ def compute_firing_rates(sorting_analyzer, unit_ids=None, **kwargs):
     return firing_rates
 
 
-def compute_presence_ratios(sorting_analyzer, bin_duration_s=60.0, mean_fr_ratio_thresh=0.0, unit_ids=None, **kwargs):
+def compute_presence_ratios(sorting_analyzer, bin_duration_s=60.0, mean_fr_ratio_thresh=0.0, unit_ids=None):
     """
     Calculate the presence ratio, the fraction of time the unit is firing above a certain threshold.
 
@@ -620,7 +620,7 @@ def compute_synchrony_metrics(sorting_analyzer, synchrony_sizes=(2, 4, 8), unit_
 _default_params["synchrony"] = dict(synchrony_sizes=(2, 4, 8))
 
 
-def compute_firing_ranges(sorting_analyzer, bin_size_s=5, percentiles=(5, 95), unit_ids=None, **kwargs):
+def compute_firing_ranges(sorting_analyzer, bin_size_s=5, percentiles=(5, 95), unit_ids=None):
     """
     Calculate firing range, the range between the 5th and 95th percentiles of the firing rates distribution
     computed in non-overlapping time bins.
@@ -1437,6 +1437,8 @@ def compute_sd_ratio(
     In this case, noise refers to the global voltage trace on the same channel as the best channel of the unit.
     (ideally (not implemented yet), the noise would be computed outside of spikes from the unit itself).
 
+    TODO: Take jitter into account.
+
     Parameters
     ----------
     sorting_analyzer : SortingAnalyzer
@@ -1450,9 +1452,8 @@ def compute_sd_ratio(
         and will make a rough estimation of what that impact is (and remove it).
     unit_ids : list or None, default: None
         The list of unit ids to compute this metric. If None, all units are used.
-    **kwargs:
+    **kwargs : dict, default: {}
         Keyword arguments for computing spike amplitudes and extremum channel.
-    TODO: Take jitter into account.
 
     Returns
     -------
diff --git a/src/spikeinterface/sorters/launcher.py b/src/spikeinterface/sorters/launcher.py
index c7127226b0..7ed5b29556 100644
--- a/src/spikeinterface/sorters/launcher.py
+++ b/src/spikeinterface/sorters/launcher.py
@@ -250,6 +250,8 @@ def run_sorter_by_property(
         Controls sorter verboseness
     docker_image : None or str, default: None
         If str run the sorter inside a container (docker) using the docker package
+    singularity_image : None or str, default: None
+        If str run the sorter inside a container (singularity) using the docker package
     **sorter_params : keyword args
         Spike sorter specific arguments (they can be retrieved with `get_default_sorter_params(sorter_name_or_class)`)
 
diff --git a/src/spikeinterface/sorters/runsorter.py b/src/spikeinterface/sorters/runsorter.py
index 80608f8973..17700e7df8 100644
--- a/src/spikeinterface/sorters/runsorter.py
+++ b/src/spikeinterface/sorters/runsorter.py
@@ -96,23 +96,6 @@
         If True, the output Sorting is returned as a Sorting
     delete_container_files : bool, default: True
         If True, the container temporary files are deleted after the sorting is done
-    extra_requirements : list, default: None
-        List of extra requirements to install in the container
-    installation_mode : "auto" | "pypi" | "github" | "folder" | "dev" | "no-install", default: "auto"
-        How spikeinterface is installed in the container:
-          * "auto" : if host installation is a pip release then use "github" with tag
-                    if host installation is DEV_MODE=True then use "dev"
-          * "pypi" : use pypi with pip install spikeinterface
-          * "github" : use github with `pip install git+https`
-          * "folder" : mount a folder in container and install from this one.
-                      So the version in the container is a different spikeinterface version from host, useful for
-                      cross checks
-          * "dev" : same as "folder", but the folder is the spikeinterface.__file__ to ensure same version as host
-          * "no-install" : do not install spikeinterface in the container because it is already installed
-    spikeinterface_version : str, default: None
-        The spikeinterface version to install in the container. If None, the current version is used
-    spikeinterface_folder_source : Path or None, default: None
-        In case of installation_mode="folder", the spikeinterface folder source to use to install in the container
     output_folder : None, default: None
         Do not use. Deprecated output function to be removed in 0.103.
     **sorter_params : keyword args
@@ -691,7 +674,9 @@ def read_sorter_folder(folder, register_recording=True, sorting_info=True, raise
     register_recording : bool, default: True
         Attach recording (when json or pickle) to the sorting
     sorting_info : bool, default: True
-        Attach sorting info to the sorting.
+        Attach sorting info to the sorting
+    raise_error : bool, detault: True
+        Raise an error if the spike sorting failed
     """
     folder = Path(folder)
     log_file = folder / "spikeinterface_log.json"
diff --git a/src/spikeinterface/sortingcomponents/clustering/main.py b/src/spikeinterface/sortingcomponents/clustering/main.py
index 7381875557..99881f2f34 100644
--- a/src/spikeinterface/sortingcomponents/clustering/main.py
+++ b/src/spikeinterface/sortingcomponents/clustering/main.py
@@ -12,15 +12,15 @@ def find_cluster_from_peaks(recording, peaks, method="stupid", method_kwargs={},
 
     Parameters
     ----------
-    recording: RecordingExtractor
+    recording : RecordingExtractor
         The recording extractor object
-    peaks: numpy.array
+    peaks : numpy.array
         The peak vector
-    method: str
+    method : str
         Which method to use ("stupid" | "XXXX")
-    method_kwargs: dict, default: dict()
+    method_kwargs : dict, default: dict()
         Keyword arguments for the chosen method
-    extra_outputs: bool, default: False
+    extra_outputs : bool, default: False
         If True then debug is also return
     {}
 
diff --git a/src/spikeinterface/sortingcomponents/matching/main.py b/src/spikeinterface/sortingcomponents/matching/main.py
index 9476a0df03..88b31476a9 100644
--- a/src/spikeinterface/sortingcomponents/matching/main.py
+++ b/src/spikeinterface/sortingcomponents/matching/main.py
@@ -14,20 +14,22 @@ def find_spikes_from_templates(
 
     Parameters
     ----------
-    recording: RecordingExtractor
+    recording : RecordingExtractor
         The recording extractor object
-    method: "naive" | "tridesclous" | "circus" | "circus-omp" | "wobble"
+    method : "naive" | "tridesclous" | "circus" | "circus-omp" | "wobble"
         Which method to use for template matching
-    method_kwargs: dict, optional
+    method_kwargs : dict, optional
         Keyword arguments for the chosen method
-    extra_outputs: bool
+    extra_outputs : bool
         If True then method_kwargs is also returned
-    job_kwargs: dict
+    **job_kwargs : dict
         Parameters for ChunkRecordingExecutor
+    verbose : Bool, default: False
+        If True, output is verbose
 
     Returns
     -------
-    spikes: ndarray
+    spikes : ndarray
         Spikes found from templates.
     method_kwargs:
         Optionaly returns for debug purpose.
diff --git a/src/spikeinterface/sortingcomponents/motion/motion_cleaner.py b/src/spikeinterface/sortingcomponents/motion/motion_cleaner.py
index 2fc1a281a9..87dca64496 100644
--- a/src/spikeinterface/sortingcomponents/motion/motion_cleaner.py
+++ b/src/spikeinterface/sortingcomponents/motion/motion_cleaner.py
@@ -11,16 +11,16 @@ def clean_motion_vector(motion, temporal_bins, bin_duration_s, speed_threshold=3
 
     Parameters
     ----------
-    motion: numpy array 2d
+    motion : numpy array 2d
         Motion estimate in um.
-    temporal_bins: numpy.array 1d
+    temporal_bins : numpy.array 1d
         temporal bins (bin center)
-    bin_duration_s: float
+    bin_duration_s : float
         bin duration in second
-    speed_threshold: float (units um/s)
+    speed_threshold : float (units um/s)
         Maximum speed treshold between 2 bins allowed.
         Expressed in um/s
-    sigma_smooth_s: None or float
+    sigma_smooth_s : None or float
         Optional smooting gaussian kernel.
 
     Returns
diff --git a/src/spikeinterface/sortingcomponents/motion/motion_estimation.py b/src/spikeinterface/sortingcomponents/motion/motion_estimation.py
index 0d425c98da..c75f7129aa 100644
--- a/src/spikeinterface/sortingcomponents/motion/motion_estimation.py
+++ b/src/spikeinterface/sortingcomponents/motion/motion_estimation.py
@@ -43,20 +43,19 @@ def estimate_motion(
 
     Parameters
     ----------
-    recording: BaseRecording
+    recording : BaseRecording
         The recording extractor
-    peaks: numpy array
+    peaks : numpy array
         Peak vector (complex dtype).
         Needed for decentralized and iterative_template methods.
-    peak_locations: numpy array
+    peak_locations : numpy array
         Complex dtype with "x", "y", "z" fields
         Needed for decentralized and iterative_template methods.
-    direction: "x" | "y" | "z", default: "y"
+    direction : "x" | "y" | "z", default: "y"
         Dimension on which the motion is estimated. "y" is depth along the probe.
 
     {method_doc}
 
-    **non-rigid section**
 
     rigid : bool, default: False
         Compute rigid (one motion for the entire probe) or non rigid motion
@@ -76,15 +75,14 @@ def estimate_motion(
         See win_shape
     win_margin_um : None | float, default: None
         See win_shape
-    extra_outputs: bool, default: False
+    extra_outputs : bool, default: False
         If True then return an extra dict that contains variables
         to check intermediate steps (motion_histogram, non_rigid_windows, pairwise_displacement)
-    progress_bar: bool, default: False
+    progress_bar : bool, default: False
         Display progress bar or not
-    verbose: bool, default: False
+    verbose : bool, default: False
         If True, output is verbose
 
-
     Returns
     -------
     motion: Motion object
diff --git a/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py b/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py
index 11ce11e1aa..57cc4d1371 100644
--- a/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py
+++ b/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py
@@ -69,23 +69,25 @@ def interpolate_motion_on_traces(
         Trace snippet (num_samples, num_channels)
     times : np.array
         Sample times in seconds for the frames of the traces snippet
-    channel_location: np.array 2d
+    channel_locations : np.array 2d
         Channel location with shape (n, 2) or (n, 3)
-    motion: Motion
+    motion : Motion
         The motion object.
-    segment_index: int or None
+    segment_index : int or None
         The segment index.
-    channel_inds: None or list
+    channel_inds : None or list
         If not None, interpolate only a subset of channels.
     interpolation_time_bin_centers_s : None or np.array
         Manually specify the time bins which the interpolation happens
         in for this segment. If None, these are the motion estimate's time bins.
-    spatial_interpolation_method: "idw" | "kriging", default: "kriging"
+    spatial_interpolation_method : "idw" | "kriging", default: "kriging"
         The spatial interpolation method used to interpolate the channel locations:
             * idw : Inverse Distance Weighing
             * kriging : kilosort2.5 like
-    spatial_interpolation_kwargs:
-        * specific option for the interpolation method
+    spatial_interpolation_kwargs : dict
+        specific option for the interpolation method
+    dtype : np.dtype, default: None
+        The dtype of the traces. If None, interhits from traces snippet
 
     Returns
     -------
@@ -237,11 +239,11 @@ class InterpolateMotionRecording(BasePreprocessor):
 
     Parameters
     ----------
-    recording: Recording
+    recording : Recording
         The parent recording.
-    motion: Motion
+    motion : Motion
         The motion object
-    spatial_interpolation_method: "kriging" | "idw" | "nearest", default: "kriging"
+    spatial_interpolation_method : "kriging" | "idw" | "nearest", default: "kriging"
         The spatial interpolation method used to interpolate the channel locations.
         See `spikeinterface.preprocessing.get_spatial_interpolation_kernel()` for more details.
         Choice of the method:
@@ -249,23 +251,24 @@ class InterpolateMotionRecording(BasePreprocessor):
             * "kriging" : the same one used in kilosort
             * "idw" : inverse  distance weighted
             * "nearest" : use neareast channel
-    sigma_um: float, default: 20.0
+
+    sigma_um : float, default: 20.0
         Used in the "kriging" formula
-    p: int, default: 1
+    p : int, default: 1
         Used in the "kriging" formula
-    num_closest: int, default: 3
+    num_closest : int, default: 3
         Number of closest channels used by "idw" method for interpolation.
-    border_mode: "remove_channels" | "force_extrapolate" | "force_zeros", default: "remove_channels"
+    border_mode : "remove_channels" | "force_extrapolate" | "force_zeros", default: "remove_channels"
         Control how channels are handled on border:
         * "remove_channels": remove channels on the border, the recording has less channels
         * "force_extrapolate": keep all channel and force extrapolation (can lead to strange signal)
         * "force_zeros": keep all channel but set zeros when outside (force_extrapolate=False)
-    interpolation_time_bin_centers_s: np.array or list of np.array, optional
+    interpolation_time_bin_centers_s : np.array or list of np.array, optional
         Spatially interpolate each frame according to the displacement estimate at its closest
         bin center in this array. If not supplied, this is set to the motion estimate's time bin
         centers. If it's supplied, the motion estimate is interpolated to these bin centers.
         If you have a multi-segment recording, pass a list of these, one per segment.
-    interpolation_time_bin_size_s: float, optional
+    interpolation_time_bin_size_s : float, optional
         Similar to the previous argument: interpolation_time_bin_centers_s will be constructed
         by bins spaced by interpolation_time_bin_size_s. This is ignored if interpolation_time_bin_centers_s
         is supplied.
@@ -273,6 +276,8 @@ class InterpolateMotionRecording(BasePreprocessor):
         Interpolation needs to convert to a floating dtype. If dtype is supplied, that will be used.
         If the input recording is already floating and dtype=None, then its dtype is used by default.
         If the input recording is integer, then float32 is used by default.
+    **spatial_interpolation_kwargs: dict
+        Spatial interpolation kwargs for `interpolate_motion_on_traces`.
 
     Returns
     -------
diff --git a/src/spikeinterface/sortingcomponents/motion/motion_utils.py b/src/spikeinterface/sortingcomponents/motion/motion_utils.py
index 203bd2473b..635624cca8 100644
--- a/src/spikeinterface/sortingcomponents/motion/motion_utils.py
+++ b/src/spikeinterface/sortingcomponents/motion/motion_utils.py
@@ -17,6 +17,7 @@ class Motion:
         Motion estimate in um.
         List is the number of segment.
         For each semgent :
+
             * shape (temporal bins, spatial bins)
             * motion.shape[0] = temporal_bins.shape[0]
             * motion.shape[1] = 1 (rigid) or spatial_bins.shape[1] (non rigid)
diff --git a/src/spikeinterface/sortingcomponents/peak_detection.py b/src/spikeinterface/sortingcomponents/peak_detection.py
index b984853123..4fe90dd7bc 100644
--- a/src/spikeinterface/sortingcomponents/peak_detection.py
+++ b/src/spikeinterface/sortingcomponents/peak_detection.py
@@ -59,19 +59,19 @@ def detect_peaks(
 
     Parameters
     ----------
-    recording: RecordingExtractor
+    recording : RecordingExtractor
         The recording extractor object.
-    pipeline_nodes: None or list[PipelineNode]
+    pipeline_nodes : None or list[PipelineNode]
         Optional additional PipelineNode need to computed just after detection time.
         This avoid reading the recording multiple times.
-    gather_mode: str
+    gather_mode : str
         How to gather the results:
         * "memory": results are returned as in-memory numpy arrays
         * "npy": results are stored to .npy files in `folder`
 
-    folder: str or Path
+    folder : str or Path
         If gather_mode is "npy", the folder where the files are created.
-    names: list
+    names : list
         List of strings with file stems associated with returns.
 
     {method_doc}
diff --git a/src/spikeinterface/sortingcomponents/peak_localization.py b/src/spikeinterface/sortingcomponents/peak_localization.py
index b578eb4478..4dff27e338 100644
--- a/src/spikeinterface/sortingcomponents/peak_localization.py
+++ b/src/spikeinterface/sortingcomponents/peak_localization.py
@@ -98,10 +98,14 @@ def localize_peaks(recording, peaks, method="center_of_mass", ms_before=0.5, ms_
 
     Parameters
     ----------
-    recording: RecordingExtractor
+    recording : RecordingExtractor
         The recording extractor object.
-    peaks: array
+    peaks : array
         Peaks array, as returned by detect_peaks() in "compact_numpy" way.
+    ms_before : float
+        The number of milliseconds to include before the peak of the spike
+    ms_after : float
+        The number of milliseconds to include after the peak of the spike
 
     {method_doc}
 
diff --git a/src/spikeinterface/sortingcomponents/tools.py b/src/spikeinterface/sortingcomponents/tools.py
index facefac4c5..05552d41a9 100644
--- a/src/spikeinterface/sortingcomponents/tools.py
+++ b/src/spikeinterface/sortingcomponents/tools.py
@@ -20,14 +20,14 @@
 def make_multi_method_doc(methods, ident="    "):
     doc = ""
 
-    doc += "method: " + ", ".join(f"'{method.name}'" for method in methods) + "\n"
+    doc += "method : " + ", ".join(f"'{method.name}'" for method in methods) + "\n"
     doc += ident + "    Method to use.\n"
 
     for method in methods:
         doc += "\n"
-        doc += ident + f"arguments for method='{method.name}'"
+        doc += ident + ident + f"arguments for method='{method.name}'"
         for line in method.params_doc.splitlines():
-            doc += ident + line + "\n"
+            doc += ident + ident + line + "\n"
 
     return doc
 

From 05be8efe5334aa970ba7885fedba6c253fc8a85d Mon Sep 17 00:00:00 2001
From: jonahpearl <j4jonahp@gmail.com>
Date: Thu, 25 Jul 2024 13:31:58 -0400
Subject: [PATCH 061/187] always split job kwargs

---
 src/spikeinterface/core/sortinganalyzer.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/spikeinterface/core/sortinganalyzer.py b/src/spikeinterface/core/sortinganalyzer.py
index ac142405ab..cda0e10ff7 100644
--- a/src/spikeinterface/core/sortinganalyzer.py
+++ b/src/spikeinterface/core/sortinganalyzer.py
@@ -1209,11 +1209,7 @@ def compute_one_extension(self, extension_name, save=True, verbose=False, **kwar
                 print(f"Deleting {child}")
                 self.delete_extension(child)
 
-        if extension_class.need_job_kwargs:
-            params, job_kwargs = split_job_kwargs(kwargs)
-        else:
-            params = kwargs
-            job_kwargs = {}
+        params, job_kwargs = split_job_kwargs(kwargs)
 
         # check dependencies
         if extension_class.need_recording:

From 40b253935fab880c53441c6356b022606fd11577 Mon Sep 17 00:00:00 2001
From: jonahpearl <j4jonahp@gmail.com>
Date: Fri, 26 Jul 2024 08:41:00 -0400
Subject: [PATCH 062/187] fix name of principal_components ext in qm docs

---
 doc/modules/qualitymetrics.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/qualitymetrics.rst b/doc/modules/qualitymetrics.rst
index 04a302c597..f119693203 100644
--- a/doc/modules/qualitymetrics.rst
+++ b/doc/modules/qualitymetrics.rst
@@ -57,7 +57,7 @@ This code snippet shows how to compute quality metrics (with or without principa
 
     # with PCs (depends on "pca" in addition to the above metrics)
 
-    qm_ext = sorting_analyzer.compute(input={"pca": dict(n_components=5, mode="by_channel_local"),
+    qm_ext = sorting_analyzer.compute(input={"principal_components": dict(n_components=5, mode="by_channel_local"),
                                     "quality_metrics": dict(skip_pc_metrics=False)})
      metrics = qm_ext.get_data()
      assert 'isolation_distance' in metrics.columns

From 092c13c9683194819bfdf78e4c79ebb04b005344 Mon Sep 17 00:00:00 2001
From: mhhennig <m.hennig@ed.ac.uk>
Date: Sat, 27 Jul 2024 16:08:54 +0100
Subject: [PATCH 063/187] added lowpass parameter, fixed verbose option

---
 src/spikeinterface/sorters/external/herdingspikes.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/spikeinterface/sorters/external/herdingspikes.py b/src/spikeinterface/sorters/external/herdingspikes.py
index a84c05c240..f3bbb530ef 100644
--- a/src/spikeinterface/sorters/external/herdingspikes.py
+++ b/src/spikeinterface/sorters/external/herdingspikes.py
@@ -19,6 +19,7 @@ class HerdingspikesSorter(BaseSorter):
         "chunk_size": None,
         "rescale": True,
         "rescale_value": -1280.0,
+        "lowpass": True,
         "common_reference": "median",
         "spike_duration": 1.0,
         "amp_avg_duration": 0.4,
@@ -53,6 +54,7 @@ class HerdingspikesSorter(BaseSorter):
         "out_file": "Path and filename to store detection and clustering results. (`str`, `HS2_detected`)",
         "verbose": "Print progress information. (`bool`, `True`)",
         "chunk_size": " Number of samples per chunk during detection. If `None`, a suitable value will be estimated. (`int`, `None`)",
+        "lowpass": "Enable internal low-pass filtering (simple two-step average). (`bool`, `True`)",
         "common_reference": "Method for common reference filtering, can be `average` or `median` (`str`, `median`)",
         "rescale": "Automatically re-scale the data.  (`bool`, `True`)",
         "rescale_value": "Factor by which data is re-scaled. (`float`, `-1280.0`)",
@@ -122,20 +124,21 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
 
         hs_version = version.parse(hs.__version__)
 
-        if hs_version >= version.parse("0.4.001"):
+        if hs_version >= version.parse("0.4.1"):
             lightning_api = True
         else:
             lightning_api = False
 
         assert (
             lightning_api
-        ), "HerdingSpikes version <0.4.001 is no longer supported. run:\n>>> pip install --upgrade herdingspikes"
+        ), "HerdingSpikes version <0.4.001 is no longer supported. To upgrade, run:\n>>> pip install --upgrade herdingspikes"
 
         recording = cls.load_recording_from_folder(sorter_output_folder.parent, with_warnings=False)
 
         sorted_file = str(sorter_output_folder / "HS2_sorted.hdf5")
         params["out_file"] = str(sorter_output_folder / "HS2_detected")
         p = params
+        p.update({"verbose": verbose})
 
         det = hs.HSDetectionLightning(recording, p)
         det.DetectFromRaw()

From 742068c07b324ea5c8005212c478b2cc1bf075a8 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Mon, 29 Jul 2024 11:26:10 +0200
Subject: [PATCH 064/187] After release

---
 pyproject.toml                 | 18 +++++++++---------
 src/spikeinterface/__init__.py |  4 ++--
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 67aee92d29..71919c072b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "spikeinterface"
-version = "0.101.0"
+version = "0.101.1"
 authors = [
   { name="Alessio Buccino", email="alessiop.buccino@gmail.com" },
   { name="Samuel Garcia", email="sam.garcia.die@gmail.com" },
@@ -125,16 +125,16 @@ test_core = [
 
     # for github test : probeinterface and neo from master
     # for release we need pypi, so this need to be commented
-    # "probeinterface @ git+https://github.com/SpikeInterface/probeinterface.git",
-    # "neo @ git+https://github.com/NeuralEnsemble/python-neo.git",
+    "probeinterface @ git+https://github.com/SpikeInterface/probeinterface.git",
+    "neo @ git+https://github.com/NeuralEnsemble/python-neo.git",
 ]
 
 test_extractors = [
     # Functions to download data in neo test suite
     "pooch>=1.8.2",
     "datalad>=1.0.2",
-    # "probeinterface @ git+https://github.com/SpikeInterface/probeinterface.git",
-    # "neo @ git+https://github.com/NeuralEnsemble/python-neo.git",
+    "probeinterface @ git+https://github.com/SpikeInterface/probeinterface.git",
+    "neo @ git+https://github.com/NeuralEnsemble/python-neo.git",
 ]
 
 test_preprocessing = [
@@ -175,8 +175,8 @@ test = [
 
     # for github test : probeinterface and neo from master
     # for release we need pypi, so this need to be commented
-    # "probeinterface @ git+https://github.com/SpikeInterface/probeinterface.git",
-    # "neo @ git+https://github.com/NeuralEnsemble/python-neo.git",
+    "probeinterface @ git+https://github.com/SpikeInterface/probeinterface.git",
+    "neo @ git+https://github.com/NeuralEnsemble/python-neo.git",
 ]
 
 docs = [
@@ -199,8 +199,8 @@ docs = [
     "datalad>=1.0.2",
 
     # for release we need pypi, so this needs to be commented
-    # "probeinterface @ git+https://github.com/SpikeInterface/probeinterface.git",  # We always build from the latest version
-    # "neo @ git+https://github.com/NeuralEnsemble/python-neo.git",  # We always build from the latest version
+    "probeinterface @ git+https://github.com/SpikeInterface/probeinterface.git",  # We always build from the latest version
+    "neo @ git+https://github.com/NeuralEnsemble/python-neo.git",  # We always build from the latest version
 
 ]
 
diff --git a/src/spikeinterface/__init__.py b/src/spikeinterface/__init__.py
index 97fb95b623..306c12d516 100644
--- a/src/spikeinterface/__init__.py
+++ b/src/spikeinterface/__init__.py
@@ -30,5 +30,5 @@
 # This flag must be set to False for release
 # This avoids using versioning that contains ".dev0" (and this is a better choice)
 # This is mainly useful when using run_sorter in a container and spikeinterface install
-# DEV_MODE = True
-DEV_MODE = False
+DEV_MODE = True
+# DEV_MODE = False

From 0b7dc336d28dca614ea98c5abd975b8c61346fd6 Mon Sep 17 00:00:00 2001
From: "Matthias H. Hennig" <mhhennig@gmail.com>
Date: Mon, 29 Jul 2024 11:12:34 +0100
Subject: [PATCH 065/187] Update
 src/spikeinterface/sorters/external/herdingspikes.py

Co-authored-by: Alessio Buccino <alejoe9187@gmail.com>
---
 src/spikeinterface/sorters/external/herdingspikes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/sorters/external/herdingspikes.py b/src/spikeinterface/sorters/external/herdingspikes.py
index f3bbb530ef..94d66e7f86 100644
--- a/src/spikeinterface/sorters/external/herdingspikes.py
+++ b/src/spikeinterface/sorters/external/herdingspikes.py
@@ -131,7 +131,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
 
         assert (
             lightning_api
-        ), "HerdingSpikes version <0.4.001 is no longer supported. To upgrade, run:\n>>> pip install --upgrade herdingspikes"
+        ), "HerdingSpikes version <0.4.1 is no longer supported. To upgrade, run:\n>>> pip install --upgrade herdingspikes"
 
         recording = cls.load_recording_from_folder(sorter_output_folder.parent, with_warnings=False)
 

From d9fb487f5f3820a0097d00ac7e233bc51608925b Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Mon, 29 Jul 2024 12:53:56 +0200
Subject: [PATCH 066/187] Fix KS2/2.5/3 skip_kilosort_preprocessing

---
 src/spikeinterface/sorters/external/kilosortbase.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/sorters/external/kilosortbase.py b/src/spikeinterface/sorters/external/kilosortbase.py
index d04128b50c..4015574280 100644
--- a/src/spikeinterface/sorters/external/kilosortbase.py
+++ b/src/spikeinterface/sorters/external/kilosortbase.py
@@ -146,7 +146,7 @@ def _setup_recording(cls, recording, sorter_output_folder, params, verbose):
                 padding_start = 0
                 padding_end = pad
                 padded_recording = TracePaddedRecording(
-                    parent_recording=recording,
+                    recording=recording,
                     padding_start=padding_start,
                     padding_end=padding_end,
                 )

From 3c3cb933f7f51615e02e04335123899046708335 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Mon, 29 Jul 2024 13:37:24 -0300
Subject: [PATCH 067/187] drop python 3.8 in pyproject

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 71919c072b..eb2c0f2fe9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ authors = [
 ]
 description = "Python toolkit for analysis, visualization, and comparison of spike sorting output"
 readme = "README.md"
-requires-python = ">=3.8,<4.0"
+requires-python = ">=3.9,<4.0"
 classifiers = [
     "Programming Language :: Python :: 3 :: Only",
     "License :: OSI Approved :: MIT License",

From 36eec5718de05ffcd609aea93d0d8603d7e43298 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Mon, 29 Jul 2024 18:39:41 +0200
Subject: [PATCH 068/187] Fix postprocessing docs

---
 doc/modules/postprocessing.rst | 58 ++++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 23 deletions(-)

diff --git a/doc/modules/postprocessing.rst b/doc/modules/postprocessing.rst
index cc2b064ed0..3c30f248c8 100644
--- a/doc/modules/postprocessing.rst
+++ b/doc/modules/postprocessing.rst
@@ -208,9 +208,11 @@ For dense waveforms, sparsity can also be passed as an argument.
 
 .. code-block:: python
 
-    pc = sorting_analyzer.compute(input="principal_components",
-                             n_components=3,
-                             mode="by_channel_local")
+    pc = sorting_analyzer.compute(
+        input="principal_components",
+        n_components=3,
+        mode="by_channel_local"
+    )
 
 For more information, see :py:func:`~spikeinterface.postprocessing.compute_principal_components`
 
@@ -243,9 +245,7 @@ each spike.
 
 .. code-block:: python
 
-    amplitudes = sorting_analyzer.compute(input="spike_amplitudes",
-                             peak_sign="neg",
-                             outputs="concatenated")
+    amplitudes = sorting_analyzer.compute(input="spike_amplitudes", peak_sign="neg")
 
 For more information, see :py:func:`~spikeinterface.postprocessing.compute_spike_amplitudes`
 
@@ -263,15 +263,17 @@ with center of mass (:code:`method="center_of_mass"` - fast, but less accurate),
 
 .. code-block:: python
 
-    spike_locations = sorting_analyzer.compute(input="spike_locations",
-                             ms_before=0.5,
-                             ms_after=0.5,
-                             spike_retriever_kwargs=dict(
-                                channel_from_template=True,
-                                radius_um=50,
-                                peak_sign="neg"
-                                              ),
-                             method="center_of_mass")
+    spike_locations = sorting_analyzer.compute(
+        input="spike_locations",
+        ms_before=0.5,
+        ms_after=0.5,
+        spike_retriever_kwargs=dict(
+            channel_from_template=True,
+            radius_um=50,
+            peak_sign="neg"
+        ),
+        method="center_of_mass"
+    )
 
 
 For more information, see :py:func:`~spikeinterface.postprocessing.compute_spike_locations`
@@ -329,6 +331,12 @@ Optionally, the following multi-channel metrics can be computed by setting:
     Visualization of template metrics. Image from `ecephys_spike_sorting <https://github.com/AllenInstitute/ecephys_spike_sorting/tree/v0.2/ecephys_spike_sorting/modules/mean_waveforms>`_
     from the Allen Institute.
 
+
+.. code-block:: python
+
+    tm = sorting_analyzer.compute(input="template_metrics", include_multi_channel_metrics=True)
+
+
 For more information, see :py:func:`~spikeinterface.postprocessing.compute_template_metrics`
 
 
@@ -340,10 +348,12 @@ with shape (num_units, num_units, num_bins) with all correlograms for each pair
 
 .. code-block:: python
 
-    ccg = sorting_analyzer.compute(input="correlograms",
-                            window_ms=50.0,
-                            bin_ms=1.0,
-                            method="auto")
+    ccg = sorting_analyzer.compute(
+        input="correlograms",
+        window_ms=50.0,
+        bin_ms=1.0,
+        method="auto"
+    )
 
 For more information, see :py:func:`~spikeinterface.postprocessing.compute_correlograms`
 
@@ -357,10 +367,12 @@ This extension computes the histograms of inter-spike-intervals. The computed ou
 
 .. code-block:: python
 
-   isi =  sorting_analyer.compute(input="isi_histograms"
-                            window_ms=50.0,
-                            bin_ms=1.0,
-                            method="auto")
+    isi =  sorting_analyer.compute(
+        input="isi_histograms"
+        window_ms=50.0,
+        bin_ms=1.0,
+        method="auto"
+    )
 
 For more information, see :py:func:`~spikeinterface.postprocessing.compute_isi_histograms`
 

From 4b83e8b52a906826570df49c7bd29a5c26050235 Mon Sep 17 00:00:00 2001
From: Zach McKenzie <92116279+zm711@users.noreply.github.com>
Date: Mon, 29 Jul 2024 13:41:09 -0400
Subject: [PATCH 069/187] fix docstring and error

---
 src/spikeinterface/postprocessing/spike_amplitudes.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/spikeinterface/postprocessing/spike_amplitudes.py b/src/spikeinterface/postprocessing/spike_amplitudes.py
index e82a9e61e4..f158952132 100644
--- a/src/spikeinterface/postprocessing/spike_amplitudes.py
+++ b/src/spikeinterface/postprocessing/spike_amplitudes.py
@@ -44,8 +44,8 @@ class ComputeSpikeAmplitudes(AnalyzerExtension):
         The localization method to use
     method_kwargs : dict, default: dict()
         Other kwargs depending on the method.
-    outputs : "concatenated" | "by_unit", default: "concatenated"
-        The output format
+    outputs : "numpy" | "by_unit", default: "numpy"
+        The output format, either concatenated as numpy array or separated on a per unit basis
 
     Returns
     -------
@@ -148,7 +148,7 @@ def _get_data(self, outputs="numpy"):
                     amplitudes_by_units[segment_index][unit_id] = all_amplitudes[inds]
             return amplitudes_by_units
         else:
-            raise ValueError(f"Wrong .get_data(outputs={outputs})")
+            raise ValueError(f"Wrong .get_data(outputs={outputs}); possibilities are `numpy` or `by_unit`")
 
 
 register_result_extension(ComputeSpikeAmplitudes)

From 983cf753ebc17a85b52e413e565225e1207ed8a4 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Mon, 29 Jul 2024 20:04:38 +0200
Subject: [PATCH 070/187] Protect median against nans in get_prototype_spike

---
 src/spikeinterface/sortingcomponents/tools.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/spikeinterface/sortingcomponents/tools.py b/src/spikeinterface/sortingcomponents/tools.py
index facefac4c5..969b20c272 100644
--- a/src/spikeinterface/sortingcomponents/tools.py
+++ b/src/spikeinterface/sortingcomponents/tools.py
@@ -70,18 +70,18 @@ def extract_waveform_at_max_channel(rec, peaks, ms_before=0.5, ms_after=1.5, **j
 
 
 def get_prototype_spike(recording, peaks, ms_before=0.5, ms_after=0.5, nb_peaks=1000, **job_kwargs):
+    from spikeinterface.sortingcomponents.peak_selection import select_peaks
+
     nbefore = int(ms_before * recording.sampling_frequency / 1000.0)
     nafter = int(ms_after * recording.sampling_frequency / 1000.0)
 
-    from spikeinterface.sortingcomponents.peak_selection import select_peaks
-
     few_peaks = select_peaks(peaks, recording=recording, method="uniform", n_peaks=nb_peaks, margin=(nbefore, nafter))
 
     waveforms = extract_waveform_at_max_channel(
         recording, few_peaks, ms_before=ms_before, ms_after=ms_after, **job_kwargs
     )
     with np.errstate(divide="ignore", invalid="ignore"):
-        prototype = np.median(waveforms[:, :, 0] / (np.abs(waveforms[:, nbefore, 0][:, np.newaxis])), axis=0)
+        prototype = np.nanmedian(waveforms[:, :, 0] / (np.abs(waveforms[:, nbefore, 0][:, np.newaxis])), axis=0)
     return prototype
 
 

From 4336f0d6d3ef5595a45242cab6985fa2e638fe3f Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Mon, 29 Jul 2024 19:24:28 +0100
Subject: [PATCH 071/187] Expose 'save_preprocessed_copy' in KS4 wrapper.

---
 src/spikeinterface/sorters/external/kilosort4.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index a7f40a9558..a904866629 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -56,6 +56,7 @@ class Kilosort4Sorter(BaseSorter):
         "save_extra_kwargs": False,
         "skip_kilosort_preprocessing": False,
         "scaleproc": None,
+        "save_preprocessed_copy": False,
         "torch_device": "auto",
     }
 
@@ -98,6 +99,7 @@ class Kilosort4Sorter(BaseSorter):
         "save_extra_kwargs": "If True, additional kwargs are saved to the output",
         "skip_kilosort_preprocessing": "Can optionally skip the internal kilosort preprocessing",
         "scaleproc": "int16 scaling of whitened data, if None set to 200.",
+        "save_preprocessed_copy": "save a pre-processed copy of the data (including drift correction) to temp_wh.dat in the results directory",
         "torch_device": "Select the torch device auto/cuda/cpu",
     }
 
@@ -186,6 +188,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         do_CAR = params["do_CAR"]
         invert_sign = params["invert_sign"]
         save_extra_vars = params["save_extra_kwargs"]
+        save_preprocessed_copy = (params["save_preprocessed_copy"],)
         progress_bar = None
         settings_ks = {k: v for k, v in params.items() if k in DEFAULT_SETTINGS}
         settings_ks["n_chan_bin"] = recording.get_num_channels()
@@ -207,7 +210,15 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         results_dir = sorter_output_folder
         filename, data_dir, results_dir, probe = set_files(settings, filename, probe, probe_name, data_dir, results_dir)
         if version.parse(cls.get_sorter_version()) >= version.parse("4.0.12"):
-            ops = initialize_ops(settings, probe, recording.get_dtype(), do_CAR, invert_sign, device, False)
+            ops = initialize_ops(
+                settings,
+                probe,
+                recording.get_dtype(),
+                do_CAR,
+                invert_sign,
+                device,
+                save_preprocesed_copy=save_preprocessed_copy,
+            )
             n_chan_bin, fs, NT, nt, twav_min, chan_map, dtype, do_CAR, invert, _, _, tmin, tmax, artifact, _, _ = (
                 get_run_parameters(ops)
             )

From dcd64b2285d6e2cb5d7d541ff0ff3bf9cc714089 Mon Sep 17 00:00:00 2001
From: zm711 <92116279+zm711@users.noreply.github.com>
Date: Wed, 31 Jul 2024 08:09:27 -0400
Subject: [PATCH 072/187] some more numpydoc fixes

---
 .../comparison/basecomparison.py              |   4 +-
 .../comparison/comparisontools.py             | 110 +++++++++---------
 .../curation/curation_format.py               |   4 +-
 src/spikeinterface/curation/curation_tools.py |  10 +-
 .../curation/splitunitsorting.py              |   2 +-
 src/spikeinterface/sorters/container_tools.py |  30 ++---
 .../sorters/external/kilosort.py              |   6 +-
 .../sorters/external/kilosort2.py             |   6 +-
 .../sorters/external/kilosort2_5.py           |   6 +-
 .../sorters/external/kilosort3.py             |   6 +-
 .../sorters/external/kilosortbase.py          |   6 +-
 11 files changed, 95 insertions(+), 95 deletions(-)

diff --git a/src/spikeinterface/comparison/basecomparison.py b/src/spikeinterface/comparison/basecomparison.py
index f1d2130d38..3a39f08a7c 100644
--- a/src/spikeinterface/comparison/basecomparison.py
+++ b/src/spikeinterface/comparison/basecomparison.py
@@ -63,9 +63,9 @@ def compute_subgraphs(self):
         Computes subgraphs of connected components.
         Returns
         -------
-        sg_object_names: list
+        sg_object_names : list
             List of sorter names for each node in the connected component subgraph
-        sg_units: list
+        sg_units : list
             List of unit ids for each node in the connected component subgraph
         """
         if self.clean_graph is not None:
diff --git a/src/spikeinterface/comparison/comparisontools.py b/src/spikeinterface/comparison/comparisontools.py
index 87d0bf512b..814cb907ea 100644
--- a/src/spikeinterface/comparison/comparisontools.py
+++ b/src/spikeinterface/comparison/comparisontools.py
@@ -14,16 +14,16 @@ def count_matching_events(times1, times2, delta=10):
 
     Parameters
     ----------
-    times1: list
+    times1 : list
         List of spike train 1 frames
-    times2: list
+    times2 : list
         List of spike train 2 frames
-    delta: int
+    delta : int
         Number of frames for considering matching events
 
     Returns
     -------
-    matching_count: int
+    matching_count : int
         Number of matching events
     """
     times_concat = np.concatenate((times1, times2))
@@ -45,16 +45,16 @@ def compute_agreement_score(num_matches, num1, num2):
 
     Parameters
     ----------
-    num_matches: int
+    num_matches : int
         Number of matches
-    num1: int
+    num1 : int
         Number of events in spike train 1
-    num2: int
+    num2 : int
         Number of events in spike train 2
 
     Returns
     -------
-    score: float
+    score : float
         Agreement score
     """
     denom = num1 + num2 - num_matches
@@ -71,12 +71,12 @@ def do_count_event(sorting):
 
     Parameters
     ----------
-    sorting: SortingExtractor
+    sorting : SortingExtractor
         A sorting extractor
 
     Returns
     -------
-    event_count: pd.Series
+    event_count : pd.Series
         Nb of spike by units.
     """
     import pandas as pd
@@ -90,14 +90,14 @@ def count_match_spikes(times1, all_times2, delta_frames):  # , event_counts1, ev
 
     Parameters
     ----------
-    times1: array
+    times1 : array
         Spike train 1 frames
-    all_times2: list of array
+    all_times2 : list of array
         List of spike trains from sorting 2
 
     Returns
     -------
-    matching_events_count: list
+    matching_events_count : list
         List of counts of matching events
     """
     matching_event_counts = np.zeros(len(all_times2), dtype="int64")
@@ -337,18 +337,18 @@ def make_agreement_scores(sorting1, sorting2, delta_frames, ensure_symmetry=True
 
     Parameters
     ----------
-    sorting1: SortingExtractor
+    sorting1 : SortingExtractor
         The first sorting extractor
-    sorting2: SortingExtractor
+    sorting2 : SortingExtractor
         The second sorting extractor
-    delta_frames: int
+    delta_frames : int
         Number of frames to consider spikes coincident
-    ensure_symmetry: bool, default: True
+    ensure_symmetry : bool, default: True
         If ensure_symmetry is True, then the algo is run two times by switching sorting1 and sorting2.
         And the minimum of the two results is taken.
     Returns
     -------
-    agreement_scores: array (float)
+    agreement_scores : array (float)
         The agreement score matrix.
     """
     import pandas as pd
@@ -401,16 +401,16 @@ def make_possible_match(agreement_scores, min_score):
 
     Parameters
     ----------
-    agreement_scores: pd.DataFrame
+    agreement_scores : pd.DataFrame
 
-    min_score: float
+    min_score : float
 
 
     Returns
     -------
-    best_match_12: pd.Series
+    best_match_12 : pd.Series
 
-    best_match_21: pd.Series
+    best_match_21 : pd.Series
 
     """
     unit1_ids = np.array(agreement_scores.index)
@@ -442,16 +442,16 @@ def make_best_match(agreement_scores, min_score):
 
     Parameters
     ----------
-    agreement_scores: pd.DataFrame
+    agreement_scores : pd.DataFrame
 
-    min_score: float
+    min_score : float
 
 
     Returns
     -------
-    best_match_12: pd.Series
+    best_match_12 : pd.Series
 
-    best_match_21: pd.Series
+    best_match_21 : pd.Series
 
     """
     import pandas as pd
@@ -490,14 +490,14 @@ def make_hungarian_match(agreement_scores, min_score):
     ----------
     agreement_scores: pd.DataFrame
 
-    min_score: float
+    min_score : float
 
 
     Returns
     -------
-    hungarian_match_12: pd.Series
+    hungarian_match_12 : pd.Series
 
-    hungarian_match_21: pd.Series
+    hungarian_match_21 : pd.Series
 
     """
     import pandas as pd
@@ -541,22 +541,22 @@ def do_score_labels(sorting1, sorting2, delta_frames, unit_map12, label_misclass
 
     Parameters
     ----------
-    sorting1: SortingExtractor instance
+    sorting1 : SortingExtractor instance
         The ground truth sorting
-    sorting2: SortingExtractor instance
+    sorting2 : SortingExtractor instance
         The tested sorting
-    delta_frames: int
+    delta_frames : int
         Number of frames to consider spikes coincident
-    unit_map12: pd.Series
+    unit_map12 : pd.Series
         Dict of matching from sorting1 to sorting2
-    label_misclassification: bool
+    label_misclassification : bool
         If True, misclassification errors are labelled
 
     Returns
     -------
-    labels_st1: dict of lists of np.array of str
+    labels_st1 : dict of lists of np.array of str
         Contain score labels for units of sorting 1 for each segment
-    labels_st2: dict of lists of np.array of str
+    labels_st2 : dict of lists of np.array of str
         Contain score labels for units of sorting 2 for each segment
     """
     unit1_ids = sorting1.get_unit_ids()
@@ -647,12 +647,12 @@ def compare_spike_trains(spiketrain1, spiketrain2, delta_frames=10):
 
     Parameters
     ----------
-    spiketrain1, spiketrain2: numpy.array
+    spiketrain1, spiketrain2 : numpy.array
         Times of spikes for the 2 spike trains.
 
     Returns
     -------
-    lab_st1, lab_st2: numpy.array
+    lab_st1, lab_st2 : numpy.array
         Label of score for each spike
     """
     lab_st1 = np.array(["UNPAIRED"] * len(spiketrain1))
@@ -684,19 +684,19 @@ def do_confusion_matrix(event_counts1, event_counts2, match_12, match_event_coun
 
     Parameters
     ----------
-    event_counts1: pd.Series
+    event_counts1 : pd.Series
         Number of event per units 1
-    event_counts2: pd.Series
+    event_counts2 : pd.Series
         Number of event per units 2
-    match_12: pd.Series
+    match_12 : pd.Series
         Series of matching from sorting1 to sorting2.
         Can be the hungarian or best match.
-    match_event_count: pd.DataFrame
+    match_event_count : pd.DataFrame
         The match count matrix given by make_match_count_matrix
 
     Returns
     -------
-    confusion_matrix: pd.DataFrame
+    confusion_matrix : pd.DataFrame
         The confusion matrix
         index are units1 reordered
         columns are units2 redordered
@@ -746,19 +746,19 @@ def do_count_score(event_counts1, event_counts2, match_12, match_event_count):
 
     Parameters
     ----------
-    event_counts1: pd.Series
+    event_counts1 : pd.Series
         Number of event per units 1
-    event_counts2: pd.Series
+    event_counts2 : pd.Series
         Number of event per units 2
-    match_12: pd.Series
+    match_12 : pd.Series
         Series of matching from sorting1 to sorting2.
         Can be the hungarian or best match.
-    match_event_count: pd.DataFrame
+    match_event_count : pd.DataFrame
         The match count matrix given by make_match_count_matrix
 
     Returns
     -------
-    count_score: pd.DataFrame
+    count_score : pd.DataFrame
         A table with one line per GT units and columns
         are tp/fn/fp/...
     """
@@ -837,16 +837,16 @@ def make_matching_events(times1, times2, delta):
 
     Parameters
     ----------
-    times1: list
+    times1 : list
         List of spike train 1 frames
-    times2: list
+    times2 : list
         List of spike train 2 frames
-    delta: int
+    delta : int
         Number of frames for considering matching events
 
     Returns
     -------
-    matching_event: numpy array dtype = ["index1", "index2", "delta"]
+    matching_event : numpy array dtype = ["index1", "index2", "delta"]
         1d of collision
     """
     times_concat = np.concatenate((times1, times2))
@@ -894,14 +894,14 @@ def make_collision_events(sorting, delta):
 
     Parameters
     ----------
-    sorting: SortingExtractor
+    sorting : SortingExtractor
         The sorting extractor object for counting collision events
-    delta: int
+    delta : int
         Number of frames for considering collision events
 
     Returns
     -------
-    collision_events: numpy array
+    collision_events : numpy array
             dtype =  [('index1', 'int64'), ('unit_id1', 'int64'),
                       ('index2', 'int64'), ('unit_id2', 'int64'),
                       ('delta', 'int64')]
diff --git a/src/spikeinterface/curation/curation_format.py b/src/spikeinterface/curation/curation_format.py
index 88190a9bab..babe7aac40 100644
--- a/src/spikeinterface/curation/curation_format.py
+++ b/src/spikeinterface/curation/curation_format.py
@@ -87,7 +87,7 @@ def convert_from_sortingview_curation_format_v0(sortingview_dict, destination_fo
 
     Returns
     -------
-    curation_dict: dict
+    curation_dict : dict
         A curation dictionary
     """
 
@@ -138,7 +138,7 @@ def curation_label_to_vectors(curation_dict):
 
     Returns
     -------
-    labels: dict of numpy vector
+    labels : dict of numpy vector
 
     """
     unit_ids = list(curation_dict["unit_ids"])
diff --git a/src/spikeinterface/curation/curation_tools.py b/src/spikeinterface/curation/curation_tools.py
index 408a666613..3402638a16 100644
--- a/src/spikeinterface/curation/curation_tools.py
+++ b/src/spikeinterface/curation/curation_tools.py
@@ -106,18 +106,18 @@ def find_duplicated_spikes(
 
     Parameters
     ----------
-    spike_train: np.ndarray
+    spike_train : np.ndarray
         The spike train on which to look for duplicated spikes.
-    censored_period: int
+    censored_period : int
         The censored period for duplicates (in sample time).
-    method: "keep_first" |"keep_last" | "keep_first_iterative" | "keep_last_iterative" |random", default: "random"
+    method : "keep_first" |"keep_last" | "keep_first_iterative" | "keep_last_iterative" |random", default: "random"
         Method used to remove the duplicated spikes.
-    seed: int | None
+    seed : int | None
         The seed to use if method="random".
 
     Returns
     -------
-    indices_of_duplicates: np.ndarray
+    indices_of_duplicates : np.ndarray
         The indices of spikes considered to be duplicates.
     """
 
diff --git a/src/spikeinterface/curation/splitunitsorting.py b/src/spikeinterface/curation/splitunitsorting.py
index 33c14dfe5a..08ab704224 100644
--- a/src/spikeinterface/curation/splitunitsorting.py
+++ b/src/spikeinterface/curation/splitunitsorting.py
@@ -13,7 +13,7 @@ class SplitUnitSorting(BaseSorting):
 
     Parameters
     ----------
-    sorting: BaseSorting
+    sorting : BaseSorting
         The sorting object
     parent_unit_id : int
         Unit id of the unit to split
diff --git a/src/spikeinterface/sorters/container_tools.py b/src/spikeinterface/sorters/container_tools.py
index 8e03090eaf..6406919455 100644
--- a/src/spikeinterface/sorters/container_tools.py
+++ b/src/spikeinterface/sorters/container_tools.py
@@ -55,16 +55,16 @@ def __init__(self, mode, container_image, volumes, py_user_base, extra_kwargs):
         """
         Parameters
         ----------
-        mode: "docker" | "singularity"
+        mode : "docker" | "singularity"
             The container mode
-        container_image: str
+        container_image : str
             container image name and tag
-        volumes: dict
+        volumes : dict
             dict of volumes to bind
-        py_user_base: str
+        py_user_base : str
             Python user base folder to set as PYTHONUSERBASE env var in Singularity mode
             Prevents from overwriting user's packages when running pip install
-        extra_kwargs: dict
+        extra_kwargs : dict
             Extra kwargs to start container
         """
         assert mode in ("docker", "singularity")
@@ -180,28 +180,28 @@ def install_package_in_container(
 
     Parameters
     ----------
-    container_client: ContainerClient
+    container_client : ContainerClient
         The container client
-    package_name: str
+    package_name : str
         The package name
-    installation_mode: str
+    installation_mode : str
         The installation mode
-    extra: str
+    extra : str
         Extra pip install arguments, e.g. [full]
-    version: str
+    version : str
         The package version to install
-    tag: str
+    tag : str
         The github tag to install
-    github_url: str
+    github_url : str
         The github url to install (needed for github mode)
-    container_folder_source: str
+    container_folder_source : str
         The container folder source (needed for folder mode)
-    verbose: bool
+    verbose : bool
         If True, print output of pip install command
 
     Returns
     -------
-    res_output: str
+    res_output : str
         The output of the pip install command
     """
     assert installation_mode in ("pypi", "github", "folder")
diff --git a/src/spikeinterface/sorters/external/kilosort.py b/src/spikeinterface/sorters/external/kilosort.py
index 56adb3b632..102703f912 100644
--- a/src/spikeinterface/sorters/external/kilosort.py
+++ b/src/spikeinterface/sorters/external/kilosort.py
@@ -137,14 +137,14 @@ def _get_specific_options(cls, ops, params):
 
         Parameters
         ----------
-        ops: dict
+        ops : dict
             options data
-        params: dict
+        params : dict
             Custom parameters dictionary for kilosort3
 
         Returns
         ----------
-        ops: dict
+        ops : dict
             Final ops data
         """
 
diff --git a/src/spikeinterface/sorters/external/kilosort2.py b/src/spikeinterface/sorters/external/kilosort2.py
index 0425ad5e53..cedcfe2a5e 100644
--- a/src/spikeinterface/sorters/external/kilosort2.py
+++ b/src/spikeinterface/sorters/external/kilosort2.py
@@ -148,14 +148,14 @@ def _get_specific_options(cls, ops, params):
 
         Parameters
         ----------
-        ops: dict
+        ops : dict
             options data
-        params: dict
+        params : dict
             Custom parameters dictionary for kilosort3
 
         Returns
         ----------
-        ops: dict
+        ops : dict
             Final ops data
         """
 
diff --git a/src/spikeinterface/sorters/external/kilosort2_5.py b/src/spikeinterface/sorters/external/kilosort2_5.py
index b3d1718d59..ea93ffde0d 100644
--- a/src/spikeinterface/sorters/external/kilosort2_5.py
+++ b/src/spikeinterface/sorters/external/kilosort2_5.py
@@ -164,14 +164,14 @@ def _get_specific_options(cls, ops, params):
 
         Parameters
         ----------
-        ops: dict
+        ops : dict
             options data
-        params: dict
+        params : dict
             Custom parameters dictionary for kilosort3
 
         Returns
         ----------
-        ops: dict
+        ops : dict
             Final ops data
         """
         # frequency for high pass filtering (300)
diff --git a/src/spikeinterface/sorters/external/kilosort3.py b/src/spikeinterface/sorters/external/kilosort3.py
index f560fd7e1e..4066948e2e 100644
--- a/src/spikeinterface/sorters/external/kilosort3.py
+++ b/src/spikeinterface/sorters/external/kilosort3.py
@@ -160,14 +160,14 @@ def _get_specific_options(cls, ops, params):
 
         Parameters
         ----------
-        ops: dict
+        ops : dict
             options data
-        params: dict
+        params : dict
             Custom parameters dictionary for kilosort3
 
         Returns
         ----------
-        ops: dict
+        ops : dict
             Final ops data
         """
         # frequency for high pass filtering (150)
diff --git a/src/spikeinterface/sorters/external/kilosortbase.py b/src/spikeinterface/sorters/external/kilosortbase.py
index d04128b50c..ba8b88b6bc 100644
--- a/src/spikeinterface/sorters/external/kilosortbase.py
+++ b/src/spikeinterface/sorters/external/kilosortbase.py
@@ -79,11 +79,11 @@ def _generate_ops_file(cls, recording, params, sorter_output_folder, binary_file
 
         Parameters
         ----------
-        recording: BaseRecording
+        recording : BaseRecording
             The recording to generate the channel map file
-        params: dict
+        params : dict
             Custom parameters dictionary for kilosort
-        sorter_output_folder: pathlib.Path
+        sorter_output_folder : pathlib.Path
             Path object to save `ops.mat`
         """
         ops = {}

From 20b4d2fcf95171ea5304339c689170e52429d4fd Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Wed, 31 Jul 2024 15:04:57 +0100
Subject: [PATCH 073/187] Edit kilosort4.py to match the ks4 'run_sorter'
 function body.

---
 .../sorters/external/kilosort4.py             | 23 ++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index a904866629..16918128a2 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -155,7 +155,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
             save_sorting,
             get_run_parameters,
         )
-        from kilosort.io import load_probe, RecordingExtractorAsArray, BinaryFiltered
+        from kilosort.io import load_probe, RecordingExtractorAsArray, BinaryFiltered, save_preprocessing
         from kilosort.parameters import DEFAULT_SETTINGS
 
         import time
@@ -188,7 +188,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         do_CAR = params["do_CAR"]
         invert_sign = params["invert_sign"]
         save_extra_vars = params["save_extra_kwargs"]
-        save_preprocessed_copy = (params["save_preprocessed_copy"],)
+        save_preprocessed_copy = params["save_preprocessed_copy"]
         progress_bar = None
         settings_ks = {k: v for k, v in params.items() if k in DEFAULT_SETTINGS}
         settings_ks["n_chan_bin"] = recording.get_num_channels()
@@ -268,6 +268,9 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
             ops, device, tic0=tic0, progress_bar=progress_bar, file_object=file_object
         )
 
+        if save_preprocessed_copy:
+            save_preprocessing(results_dir / "temp_wh.dat", ops, bfile)
+
         # Sort spikes and save results
         st, tF, _, _ = detect_spikes(ops, device, bfile, tic0=tic0, progress_bar=progress_bar)
         clu, Wall = cluster_spikes(st, tF, ops, device, bfile, tic0=tic0, progress_bar=progress_bar)
@@ -276,7 +279,21 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
                 hp_filter=torch.as_tensor(np.zeros(1)), whiten_mat=torch.as_tensor(np.eye(recording.get_num_channels()))
             )
 
-        _ = save_sorting(ops, results_dir, st, clu, tF, Wall, bfile.imin, tic0, save_extra_vars=save_extra_vars)
+        if version.parse(cls.get_sorter_version()) >= version.parse("4.0.12"):
+            _ = save_sorting(
+                ops,
+                results_dir,
+                st,
+                clu,
+                tF,
+                Wall,
+                bfile.imin,
+                tic0,
+                save_extra_vars=save_extra_vars,
+                save_preprocessed_copy=save_preprocessed_copy,
+            )
+        else:
+            _ = save_sorting(ops, results_dir, st, clu, tF, Wall, bfile.imin, tic0, save_extra_vars=save_extra_vars)
 
     @classmethod
     def _get_result_from_folder(cls, sorter_output_folder):

From c320d6c09761ea673a5c24e06ea55622997f4d9f Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Wed, 31 Jul 2024 15:18:09 +0100
Subject: [PATCH 074/187] Add clarification on typo.

---
 src/spikeinterface/sorters/external/kilosort4.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 16918128a2..250c2865f9 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -217,7 +217,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
                 do_CAR,
                 invert_sign,
                 device,
-                save_preprocesed_copy=save_preprocessed_copy,
+                save_preprocesed_copy=save_preprocessed_copy,  # this kwarg is correct (typo)
             )
             n_chan_bin, fs, NT, nt, twav_min, chan_map, dtype, do_CAR, invert, _, _, tmin, tmax, artifact, _, _ = (
                 get_run_parameters(ops)

From e51088ab0e5f56596a78fc4cfd4e9a6d50f71414 Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Wed, 31 Jul 2024 15:20:32 +0100
Subject: [PATCH 075/187] Extend param description.

---
 src/spikeinterface/sorters/external/kilosort4.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 250c2865f9..6d83249653 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -99,7 +99,7 @@ class Kilosort4Sorter(BaseSorter):
         "save_extra_kwargs": "If True, additional kwargs are saved to the output",
         "skip_kilosort_preprocessing": "Can optionally skip the internal kilosort preprocessing",
         "scaleproc": "int16 scaling of whitened data, if None set to 200.",
-        "save_preprocessed_copy": "save a pre-processed copy of the data (including drift correction) to temp_wh.dat in the results directory",
+        "save_preprocessed_copy": "save a pre-processed copy of the data (including drift correction) to temp_wh.dat in the results directory and format Phy output to use that copy of the data",
         "torch_device": "Select the torch device auto/cuda/cpu",
     }
 

From 6f63f76d0089c4458529b3653b45a85bec467374 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Wed, 31 Jul 2024 15:44:16 -0300
Subject: [PATCH 076/187] patch widgets (#3238)

Co-authored-by: Alessio Buccino <alejoe9187@gmail.com>
---
 src/spikeinterface/widgets/utils_ipywidgets.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/spikeinterface/widgets/utils_ipywidgets.py b/src/spikeinterface/widgets/utils_ipywidgets.py
index e31f0e0444..0aae1777a9 100644
--- a/src/spikeinterface/widgets/utils_ipywidgets.py
+++ b/src/spikeinterface/widgets/utils_ipywidgets.py
@@ -10,7 +10,9 @@ def check_ipywidget_backend():
     import matplotlib
 
     mpl_backend = matplotlib.get_backend()
-    assert "ipympl" in mpl_backend, "To use the 'ipywidgets' backend, you have to set %matplotlib widget"
+    assert (
+        "ipympl" in mpl_backend or "widget" in mpl_backend
+    ), "To use the 'ipywidgets' backend, you have to set %matplotlib widget"
 
 
 class TimeSlider(W.HBox):

From 4668ab7ca434e004f16ebfd454923c7adf3d6943 Mon Sep 17 00:00:00 2001
From: zm711 <92116279+zm711@users.noreply.github.com>
Date: Thu, 1 Aug 2024 16:29:56 -0400
Subject: [PATCH 077/187] begin to add examples to docstrings

---
 src/spikeinterface/extractors/neoextractors/alphaomega.py | 8 +++++++-
 src/spikeinterface/extractors/neoextractors/axona.py      | 5 +++++
 src/spikeinterface/extractors/neoextractors/ced.py        | 5 +++++
 src/spikeinterface/extractors/neoextractors/intan.py      | 8 +++++++-
 src/spikeinterface/extractors/neoextractors/plexon.py     | 5 +++++
 src/spikeinterface/extractors/neoextractors/plexon2.py    | 5 +++++
 .../extractors/neoextractors/spikegadgets.py              | 5 +++++
 src/spikeinterface/extractors/neoextractors/spikeglx.py   | 7 +++++++
 8 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/src/spikeinterface/extractors/neoextractors/alphaomega.py b/src/spikeinterface/extractors/neoextractors/alphaomega.py
index b3f671ebf3..cf47b9819c 100644
--- a/src/spikeinterface/extractors/neoextractors/alphaomega.py
+++ b/src/spikeinterface/extractors/neoextractors/alphaomega.py
@@ -18,7 +18,7 @@ class AlphaOmegaRecordingExtractor(NeoBaseRecordingExtractor):
     folder_path : str or Path-like
         The folder path to the AlphaOmega recordings.
     lsx_files : list of strings or None, default: None
-        A list of listings files that refers to mpx files to load.
+        A list of files that refers to mpx files to load.
     stream_id : {"RAW", "LFP", "SPK", "ACC", "AI", "UD"}, default: "RAW"
         If there are several streams, specify the stream id you want to load.
     stream_name : str, default: None
@@ -28,6 +28,12 @@ class AlphaOmegaRecordingExtractor(NeoBaseRecordingExtractor):
     use_names_as_ids : bool, default: False
         Determines the format of the channel IDs used by the extractor. If set to True, the channel IDs will be the
         names from NeoRawIO. If set to False, the channel IDs will be the ids provided by NeoRawIO.
+
+    Examples
+    --------
+    >>> from spikeinterface.extractors import read_alphaomega
+    >>> recording = read_alphaomega(folder_path="alphaomega_folder")
+
     """
 
     NeoRawIOClass = "AlphaOmegaRawIO"
diff --git a/src/spikeinterface/extractors/neoextractors/axona.py b/src/spikeinterface/extractors/neoextractors/axona.py
index adfdccddd9..9de39bef2e 100644
--- a/src/spikeinterface/extractors/neoextractors/axona.py
+++ b/src/spikeinterface/extractors/neoextractors/axona.py
@@ -22,6 +22,11 @@ class AxonaRecordingExtractor(NeoBaseRecordingExtractor):
     use_names_as_ids : bool, default: False
         Determines the format of the channel IDs used by the extractor. If set to True, the channel IDs will be the
         names from NeoRawIO. If set to False, the channel IDs will be the ids provided by NeoRawIO.
+
+    Examples
+    --------
+    >>> from spikeinterface.extractors import read_axona
+    >>> recording = read_axona(file_path=r'my_data.set')
     """
 
     NeoRawIOClass = "AxonaRawIO"
diff --git a/src/spikeinterface/extractors/neoextractors/ced.py b/src/spikeinterface/extractors/neoextractors/ced.py
index a42a2d75a5..992d1a8941 100644
--- a/src/spikeinterface/extractors/neoextractors/ced.py
+++ b/src/spikeinterface/extractors/neoextractors/ced.py
@@ -28,6 +28,11 @@ class CedRecordingExtractor(NeoBaseRecordingExtractor):
     use_names_as_ids : bool, default: False
         Determines the format of the channel IDs used by the extractor. If set to True, the channel IDs will be the
         names from NeoRawIO. If set to False, the channel IDs will be the ids provided by NeoRawIO.
+
+    Examples
+    --------
+    >>> from spikeinterface.extractors import read_ced
+    >>> recording = read_ced(file_path=r'my_data.smr')
     """
 
     NeoRawIOClass = "CedRawIO"
diff --git a/src/spikeinterface/extractors/neoextractors/intan.py b/src/spikeinterface/extractors/neoextractors/intan.py
index f0a1894f25..261472ede9 100644
--- a/src/spikeinterface/extractors/neoextractors/intan.py
+++ b/src/spikeinterface/extractors/neoextractors/intan.py
@@ -34,7 +34,13 @@ class IntanRecordingExtractor(NeoBaseRecordingExtractor):
         In Intan the ids provided by NeoRawIO are the hardware channel ids while the names are custom names given by
         the user
 
-
+    Examples
+    --------
+    >>> from spikeinterface.extractors import read_intan
+    # intan amplifier data is stored in stream_id = '0'
+    >>> recording = read_intan(file_path=r'my_data.rhd', stream_id='0')
+    # intan has multi-file formats as well, but in this case our path should point to the header file 'info.rhd'
+    >>> recording = read_intan(file_path=r'info.rhd', stream_id='0')
     """
 
     NeoRawIOClass = "IntanRawIO"
diff --git a/src/spikeinterface/extractors/neoextractors/plexon.py b/src/spikeinterface/extractors/neoextractors/plexon.py
index 0adddc2439..a10c231e13 100644
--- a/src/spikeinterface/extractors/neoextractors/plexon.py
+++ b/src/spikeinterface/extractors/neoextractors/plexon.py
@@ -30,6 +30,11 @@ class PlexonRecordingExtractor(NeoBaseRecordingExtractor):
         Example for wideband signals:
             names: ["WB01", "WB02", "WB03", "WB04"]
             ids: ["0" , "1", "2", "3"]
+
+    Examples
+    --------
+    >>> from spikeinterface.extractors import read_plexon
+    >>> recording = read_plexon(file_path=r'my_data.plx')
     """
 
     NeoRawIOClass = "PlexonRawIO"
diff --git a/src/spikeinterface/extractors/neoextractors/plexon2.py b/src/spikeinterface/extractors/neoextractors/plexon2.py
index 4434d02cc1..2f360ed864 100644
--- a/src/spikeinterface/extractors/neoextractors/plexon2.py
+++ b/src/spikeinterface/extractors/neoextractors/plexon2.py
@@ -28,6 +28,11 @@ class Plexon2RecordingExtractor(NeoBaseRecordingExtractor):
             ids: ["source3.1" , "source3.2", "source3.3", "source3.4"]
     all_annotations : bool, default: False
         Load exhaustively all annotations from neo.
+
+    Examples
+    --------
+    >>> from spikeinterface.extractors import read_plexon2
+    >>> recording = read_plexon2(file_path=r'my_data.pl2')
     """
 
     NeoRawIOClass = "Plexon2RawIO"
diff --git a/src/spikeinterface/extractors/neoextractors/spikegadgets.py b/src/spikeinterface/extractors/neoextractors/spikegadgets.py
index 89c457a573..e91a81398b 100644
--- a/src/spikeinterface/extractors/neoextractors/spikegadgets.py
+++ b/src/spikeinterface/extractors/neoextractors/spikegadgets.py
@@ -29,6 +29,11 @@ class SpikeGadgetsRecordingExtractor(NeoBaseRecordingExtractor):
     use_names_as_ids : bool, default: False
         Determines the format of the channel IDs used by the extractor. If set to True, the channel IDs will be the
         names from NeoRawIO. If set to False, the channel IDs will be the ids provided by NeoRawIO.
+
+    Examples
+    --------
+    >>> from spikeinterface.extractors import read_spikegadgets
+    >>> recording = read_spikegadgets(file_path=r'my_data.rec')
     """
 
     NeoRawIOClass = "SpikeGadgetsRawIO"
diff --git a/src/spikeinterface/extractors/neoextractors/spikeglx.py b/src/spikeinterface/extractors/neoextractors/spikeglx.py
index cfe20bbfa6..874a65c045 100644
--- a/src/spikeinterface/extractors/neoextractors/spikeglx.py
+++ b/src/spikeinterface/extractors/neoextractors/spikeglx.py
@@ -41,6 +41,13 @@ class SpikeGLXRecordingExtractor(NeoBaseRecordingExtractor):
     use_names_as_ids : bool, default: False
         Determines the format of the channel IDs used by the extractor. If set to True, the channel IDs will be the
         names from NeoRawIO. If set to False, the channel IDs will be the ids provided by NeoRawIO.
+
+    Examples
+    --------
+    >>> from spikeinterface.extractors import read_spikeglx
+    >>> recording = read_spikeglx(folder_path=r'path_to_folder_with_data', load_sync_channel=False)
+    # we can load the sync channel, but then the probe is not loaded
+    >>> recording = read_spikeglx(folder_path=r'pat_to_folder_with_data', load_sync_channel=True)
     """
 
     NeoRawIOClass = "SpikeGLXRawIO"

From 6e6933bfa0a18ca46a250efb1f7ffac7a7c621e6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 5 Aug 2024 17:56:06 +0000
Subject: [PATCH 078/187] [pre-commit.ci] pre-commit autoupdate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/psf/black: 24.4.2 → 24.8.0](https://github.com/psf/black/compare/24.4.2...24.8.0)
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5a58e7e8f1..4c4bd68be4 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,7 +6,7 @@ repos:
     -   id: end-of-file-fixer
     -   id: trailing-whitespace
 -   repo: https://github.com/psf/black
-    rev: 24.4.2
+    rev: 24.8.0
     hooks:
     -   id: black
         files: ^src/

From 7a320c74d04b929af4c2d8fcc5f6c724dcc8235a Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 6 Aug 2024 08:51:08 -0300
Subject: [PATCH 079/187] work in progress

---
 src/spikeinterface/core/base.py            | 28 ++++++++++++-----
 src/spikeinterface/core/tests/test_base.py | 35 ++++++++++++++++++++++
 2 files changed, 55 insertions(+), 8 deletions(-)

diff --git a/src/spikeinterface/core/base.py b/src/spikeinterface/core/base.py
index 72c0a2c2fe..71c03aa0b5 100644
--- a/src/spikeinterface/core/base.py
+++ b/src/spikeinterface/core/base.py
@@ -164,7 +164,7 @@ def id_to_index(self, id) -> int:
     def annotate(self, **new_annotations) -> None:
         self._annotations.update(new_annotations)
 
-    def set_annotation(self, annotation_key, value: Any, overwrite=False) -> None:
+    def set_annotation(self, annotation_key: str, value: Any, overwrite=False) -> None:
         """This function adds an entry to the annotations dictionary.
 
         Parameters
@@ -192,7 +192,7 @@ def get_preferred_mp_context(self):
         """
         return self._preferred_mp_context
 
-    def get_annotation(self, key, copy: bool = True) -> Any:
+    def get_annotation(self, key: str, copy: bool = True) -> Any:
         """
         Get a annotation.
         Return a copy by default
@@ -205,7 +205,13 @@ def get_annotation(self, key, copy: bool = True) -> Any:
     def get_annotation_keys(self) -> List:
         return list(self._annotations.keys())
 
-    def set_property(self, key, values: Sequence, ids: Optional[Sequence] = None, missing_value: Any = None) -> None:
+    def set_property(
+        self,
+        key,
+        values: list | np.ndarray | tuple | None,
+        ids: list | np.ndarray | tuple | None = None,
+        missing_value: Any = None,
+    ) -> None:
         """
         Set property vector for main ids.
 
@@ -240,16 +246,20 @@ def set_property(self, key, values: Sequence, ids: Optional[Sequence] = None, mi
         dtype_kind = dtype.kind
 
         if ids is None:
-            assert values.shape[0] == size
+            assert (
+                values.shape[0] == size
+            ), f"Values must have the same size as the main ids: {size} but got array of size {values.shape[0]}"
             self._properties[key] = values
         else:
             ids = np.array(ids)
-            assert np.unique(ids).size == ids.size, "'ids' are not unique!"
+            unique_ids = np.unique(ids)
+            if unique_ids.size != ids.size:
+                non_unique_ids = [id for id in ids if np.count_nonzero(ids == id) > 1]
+                raise ValueError(f"IDs are not unique: {non_unique_ids}")
 
+            # This branch is used for unit and channel aggregation
             if ids.size < size:
                 if key not in self._properties:
-                    # create the property with nan or empty
-                    shape = (size,) + values.shape[1:]
 
                     if missing_value is None:
                         if dtype_kind not in self.default_missing_property_values.keys():
@@ -268,6 +278,8 @@ def set_property(self, key, values: Sequence, ids: Optional[Sequence] = None, mi
                             "as the values."
                         )
 
+                    # create the property with nan or empty
+                    shape = (size,) + values.shape[1:]
                     empty_values = np.zeros(shape, dtype=dtype)
                     empty_values[:] = missing_value
                     self._properties[key] = empty_values
@@ -285,7 +297,7 @@ def set_property(self, key, values: Sequence, ids: Optional[Sequence] = None, mi
                 self._properties[key] = np.zeros_like(values, dtype=values.dtype)
                 self._properties[key][indices] = values
 
-    def get_property(self, key, ids: Optional[Iterable] = None) -> np.ndarray:
+    def get_property(self, key: str, ids: Optional[Iterable] = None) -> np.ndarray:
         values = self._properties.get(key, None)
         if ids is not None and values is not None:
             inds = self.ids_to_indices(ids)
diff --git a/src/spikeinterface/core/tests/test_base.py b/src/spikeinterface/core/tests/test_base.py
index 7f55646b63..f5c4a9f7ca 100644
--- a/src/spikeinterface/core/tests/test_base.py
+++ b/src/spikeinterface/core/tests/test_base.py
@@ -94,6 +94,41 @@ def test_name_and_repr():
     assert "Hz" in rec_str
 
 
+def test_setting_properties():
+
+    num_channels = 5
+    recording = generate_recording(num_channels=5, durations=[1.0])
+    channel_ids = ["a", "b", "c", "d", "e"]
+    recording = recording.rename_channels(new_channel_ids=channel_ids)
+
+    complete_values = ["value"] * num_channels
+    recording.set_property(key="a_property", values=complete_values)
+
+    property_in_recording = recording.get_property("a_property")
+    expected_array = np.array(complete_values)
+    assert np.array_equal(property_in_recording, expected_array)
+
+    # Set property with missing values
+    incomplete_values = ["value"] * (num_channels - 1)
+    recording.set_property(key="incomplete_property", ids=channel_ids[:-1], values=incomplete_values)
+
+    property_in_recording = recording.get_property("incomplete_property")
+    expected_array = np.array(incomplete_values + [""])  # Spikeinterface defines missing values as empty strings
+    assert np.array_equal(property_in_recording, expected_array)
+
+    # Passs a missing value
+    recording.set_property(
+        key="missing_property",
+        ids=channel_ids[:-1],
+        values=incomplete_values,
+        missing_value="missing",
+    )
+
+    property_in_recording = recording.get_property("missing_property")
+    expected_array = np.array(incomplete_values + ["missing"])
+    assert np.array_equal(property_in_recording, expected_array)
+
+
 if __name__ == "__main__":
     test_check_if_memory_serializable()
     test_check_if_serializable()

From 6c9edfe22672a33677926918751f08c064ebdce7 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 6 Aug 2024 09:13:03 -0300
Subject: [PATCH 080/187] fix error strings

---
 src/spikeinterface/core/base.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/spikeinterface/core/base.py b/src/spikeinterface/core/base.py
index 71c03aa0b5..34cee5308b 100644
--- a/src/spikeinterface/core/base.py
+++ b/src/spikeinterface/core/base.py
@@ -229,12 +229,14 @@ def set_property(
             Array of values for the property
         ids : list/np.array, default: None
             List of subset of ids to set the values, default: None
+            if None which is the default all the ids are set or changed
         missing_value : object, default: None
             In case the property is set on a subset of values ("ids" not None),
             it specifies the how the missing values should be filled.
             The missing_value has to be specified for types int and unsigned int.
         """
 
+        # This deletes the values but we have `delete_property` maybe we should eliminate this?
         if values is None:
             if key in self._properties:
                 self._properties.pop(key)
@@ -257,16 +259,15 @@ def set_property(
                 non_unique_ids = [id for id in ids if np.count_nonzero(ids == id) > 1]
                 raise ValueError(f"IDs are not unique: {non_unique_ids}")
 
-            # This branch is used for unit and channel aggregation
+            # Not clear where this branch is used, perhaps on aggregation of extractors?
             if ids.size < size:
                 if key not in self._properties:
 
                     if missing_value is None:
                         if dtype_kind not in self.default_missing_property_values.keys():
-                            raise Exception(
-                                "For values dtypes other than float, string, object or unicode, the missing value "
-                                "cannot be automatically inferred. Please specify it with the 'missing_value' "
-                                "argument."
+                            raise ValueError(
+                                f"Can't infer a natural missing value for dtype {dtype_kind}. "
+                                "Please provide it with the missing_value argument"
                             )
                         else:
                             missing_value = self.default_missing_property_values[dtype_kind]
@@ -286,9 +287,10 @@ def set_property(
                     if ids.size == 0:
                         return
                 else:
-                    assert dtype_kind == self._properties[key].dtype.kind, (
-                        "Mismatch between existing property dtype " "values dtype."
-                    )
+                    existing_property = self._properties[key].dtype
+                    assert (
+                        dtype_kind == existing_property.kind
+                    ), f"Mismatch between existing property dtype {existing_property.kind} and provided values dtype {dtype_kind}."
 
                 indices = self.ids_to_indices(ids)
                 self._properties[key][indices] = values

From 4ad66b6e2147bb96c034df734616470119597be0 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 6 Aug 2024 09:20:56 -0300
Subject: [PATCH 081/187] temporary elimiante failing test, will add after a
 fix

---
 src/spikeinterface/core/tests/test_base.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/spikeinterface/core/tests/test_base.py b/src/spikeinterface/core/tests/test_base.py
index f5c4a9f7ca..24373bd04d 100644
--- a/src/spikeinterface/core/tests/test_base.py
+++ b/src/spikeinterface/core/tests/test_base.py
@@ -116,17 +116,17 @@ def test_setting_properties():
     expected_array = np.array(incomplete_values + [""])  # Spikeinterface defines missing values as empty strings
     assert np.array_equal(property_in_recording, expected_array)
 
-    # Passs a missing value
-    recording.set_property(
-        key="missing_property",
-        ids=channel_ids[:-1],
-        values=incomplete_values,
-        missing_value="missing",
-    )
-
-    property_in_recording = recording.get_property("missing_property")
-    expected_array = np.array(incomplete_values + ["missing"])
-    assert np.array_equal(property_in_recording, expected_array)
+    # # Passs a missing value
+    # recording.set_property(
+    #     key="missing_property",
+    #     ids=channel_ids[:-1],
+    #     values=incomplete_values,
+    #     missing_value="missing",
+    # )
+
+    # property_in_recording = recording.get_property("missing_property")
+    # expected_array = np.array(incomplete_values + ["missing"])
+    # assert np.array_equal(property_in_recording, expected_array)
 
 
 if __name__ == "__main__":

From 9eb82ba7b389b52356ce1d7fd264ee7e991e5c52 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 6 Aug 2024 09:34:34 -0300
Subject: [PATCH 082/187] test general skip

---
 .../extractors/tests/test_iblextractors.py        | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/spikeinterface/extractors/tests/test_iblextractors.py b/src/spikeinterface/extractors/tests/test_iblextractors.py
index c7c2dfacae..70d371a015 100644
--- a/src/spikeinterface/extractors/tests/test_iblextractors.py
+++ b/src/spikeinterface/extractors/tests/test_iblextractors.py
@@ -19,12 +19,15 @@ def setUpClass(cls):
         from one.api import ONE
 
         cls.eid = EID
-        cls.one = ONE(
-            base_url="https://openalyx.internationalbrainlab.org",
-            password="international",
-            silent=True,
-            cache_dir=None,
-        )
+        try:
+            cls.one = ONE(
+                base_url="https://openalyx.internationalbrainlab.org",
+                password="international",
+                silent=True,
+                cache_dir=None,
+            )
+        except:
+            pytest.skip("Skipping test due to server being down.")
         try:
             cls.recording = read_ibl_recording(eid=cls.eid, stream_name="probe00.ap", one=cls.one)
         except requests.exceptions.HTTPError as e:

From 21cc1b6c4a3a57f6f3ef324a415dae88eec4bf2c Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 6 Aug 2024 09:47:11 -0300
Subject: [PATCH 083/187] add skip to the rest of the tests

---
 .../extractors/tests/test_iblextractors.py    | 30 +++++++++++--------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/src/spikeinterface/extractors/tests/test_iblextractors.py b/src/spikeinterface/extractors/tests/test_iblextractors.py
index 70d371a015..c79627bb59 100644
--- a/src/spikeinterface/extractors/tests/test_iblextractors.py
+++ b/src/spikeinterface/extractors/tests/test_iblextractors.py
@@ -112,12 +112,15 @@ def setUpClass(cls):
         from one.api import ONE
 
         cls.eid = "e2b845a1-e313-4a08-bc61-a5f662ed295e"
-        cls.one = ONE(
-            base_url="https://openalyx.internationalbrainlab.org",
-            password="international",
-            silent=True,
-            cache_dir=None,
-        )
+        try:
+            cls.one = ONE(
+                base_url="https://openalyx.internationalbrainlab.org",
+                password="international",
+                silent=True,
+                cache_dir=None,
+            )
+        except:
+            pytest.skip("Skipping test due to server being down.")
         cls.recording = read_ibl_recording(eid=cls.eid, stream_name="probe00.ap", load_sync_channel=True, one=cls.one)
         cls.small_scaled_trace = cls.recording.get_traces(start_frame=5, end_frame=26, return_scaled=True)
         cls.small_unscaled_trace = cls.recording.get_traces(
@@ -185,12 +188,15 @@ def test_ibl_sorting_extractor(self):
         """
         from one.api import ONE
 
-        one = ONE(
-            base_url="https://openalyx.internationalbrainlab.org",
-            password="international",
-            silent=True,
-            cache_dir=None,
-        )
+        try:
+            one = ONE(
+                base_url="https://openalyx.internationalbrainlab.org",
+                password="international",
+                silent=True,
+                cache_dir=None,
+            )
+        except:
+            pytest.skip("Skipping test due to server being down.")
         sorting = read_ibl_sorting(pid=PID, one=one)
         assert len(sorting.unit_ids) == 733
         sorting_good = read_ibl_sorting(pid=PID, good_clusters_only=True)

From 97e48c41212db9a497a2280c5c35853f3ef19e74 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Wed, 7 Aug 2024 11:14:21 -0300
Subject: [PATCH 084/187] alessio suggestion to remove values=None option

---
 src/spikeinterface/core/base.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/spikeinterface/core/base.py b/src/spikeinterface/core/base.py
index 34cee5308b..eae38c2681 100644
--- a/src/spikeinterface/core/base.py
+++ b/src/spikeinterface/core/base.py
@@ -208,7 +208,7 @@ def get_annotation_keys(self) -> List:
     def set_property(
         self,
         key,
-        values: list | np.ndarray | tuple | None,
+        values: list | np.ndarray | tuple,
         ids: list | np.ndarray | tuple | None = None,
         missing_value: Any = None,
     ) -> None:
@@ -236,12 +236,6 @@ def set_property(
             The missing_value has to be specified for types int and unsigned int.
         """
 
-        # This deletes the values but we have `delete_property` maybe we should eliminate this?
-        if values is None:
-            if key in self._properties:
-                self._properties.pop(key)
-            return
-
         size = self._main_ids.size
         values = np.asarray(values)
         dtype = values.dtype
@@ -267,7 +261,7 @@ def set_property(
                         if dtype_kind not in self.default_missing_property_values.keys():
                             raise ValueError(
                                 f"Can't infer a natural missing value for dtype {dtype_kind}. "
-                                "Please provide it with the missing_value argument"
+                                "Please provide it with the `missing_value` argument"
                             )
                         else:
                             missing_value = self.default_missing_property_values[dtype_kind]

From 7b37ee3f45abe56debc6f05d2131f3114e94c10f Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Wed, 7 Aug 2024 17:50:54 -0300
Subject: [PATCH 085/187] revert

---
 src/spikeinterface/core/base.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/base.py b/src/spikeinterface/core/base.py
index eae38c2681..05e8ae3d8a 100644
--- a/src/spikeinterface/core/base.py
+++ b/src/spikeinterface/core/base.py
@@ -236,6 +236,11 @@ def set_property(
             The missing_value has to be specified for types int and unsigned int.
         """
 
+        if values is None:
+            if key in self._properties:
+                self._properties.pop(key)
+            return
+
         size = self._main_ids.size
         values = np.asarray(values)
         dtype = values.dtype
@@ -261,7 +266,7 @@ def set_property(
                         if dtype_kind not in self.default_missing_property_values.keys():
                             raise ValueError(
                                 f"Can't infer a natural missing value for dtype {dtype_kind}. "
-                                "Please provide it with the `missing_value` argument"
+                                "Please provide it with the missing_value argument"
                             )
                         else:
                             missing_value = self.default_missing_property_values[dtype_kind]

From 2085cbe9a7fb74ce5d9c2c831611eb2a596de686 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Thu, 8 Aug 2024 16:29:52 -0300
Subject: [PATCH 086/187] fix sampling frequency repr

---
 src/spikeinterface/core/baserecording.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/spikeinterface/core/baserecording.py b/src/spikeinterface/core/baserecording.py
index e65afabaca..edcc23f339 100644
--- a/src/spikeinterface/core/baserecording.py
+++ b/src/spikeinterface/core/baserecording.py
@@ -96,14 +96,18 @@ def list_to_string(lst, max_size=6):
     def _repr_header(self):
         num_segments = self.get_num_segments()
         num_channels = self.get_num_channels()
-        sf_hz = self.get_sampling_frequency()
-        sf_khz = sf_hz / 1000
         dtype = self.get_dtype()
 
         total_samples = self.get_total_samples()
         total_duration = self.get_total_duration()
         total_memory_size = self.get_total_memory_size()
-        sampling_frequency_repr = f"{sf_khz:0.1f}kHz" if sf_hz > 10_000.0 else f"{sf_hz:0.1f}Hz"
+
+        sf_hz = self.get_sampling_frequency()
+        if not sf_hz.is_integer():
+            sampling_frequency_repr = f"{sf_hz:f} Hz"
+        else:
+            # Khz for high sampling rate and Hz for LFP
+            sampling_frequency_repr = f"{(sf_hz/1000.0):0.1f}kHz" if sf_hz > 10_000.0 else f"{sf_hz:0.1f}Hz"
 
         txt = (
             f"{self.name}: "

From cd66e7ba8875161700d5523200930f9067ad2cde Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Thu, 8 Aug 2024 16:30:29 -0300
Subject: [PATCH 087/187] fix sampling repr

---
 src/spikeinterface/core/baserecording.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/spikeinterface/core/baserecording.py b/src/spikeinterface/core/baserecording.py
index e65afabaca..edcc23f339 100644
--- a/src/spikeinterface/core/baserecording.py
+++ b/src/spikeinterface/core/baserecording.py
@@ -96,14 +96,18 @@ def list_to_string(lst, max_size=6):
     def _repr_header(self):
         num_segments = self.get_num_segments()
         num_channels = self.get_num_channels()
-        sf_hz = self.get_sampling_frequency()
-        sf_khz = sf_hz / 1000
         dtype = self.get_dtype()
 
         total_samples = self.get_total_samples()
         total_duration = self.get_total_duration()
         total_memory_size = self.get_total_memory_size()
-        sampling_frequency_repr = f"{sf_khz:0.1f}kHz" if sf_hz > 10_000.0 else f"{sf_hz:0.1f}Hz"
+
+        sf_hz = self.get_sampling_frequency()
+        if not sf_hz.is_integer():
+            sampling_frequency_repr = f"{sf_hz:f} Hz"
+        else:
+            # Khz for high sampling rate and Hz for LFP
+            sampling_frequency_repr = f"{(sf_hz/1000.0):0.1f}kHz" if sf_hz > 10_000.0 else f"{sf_hz:0.1f}Hz"
 
         txt = (
             f"{self.name}: "

From 117e0f69356528fdeca417c6ae770d6c666f89b7 Mon Sep 17 00:00:00 2001
From: chrishalcrow <57948917+chrishalcrow@users.noreply.github.com>
Date: Tue, 13 Aug 2024 13:00:20 +0100
Subject: [PATCH 088/187] More docstring updates

---
 src/spikeinterface/core/analyzer_extension_core.py          | 3 ++-
 src/spikeinterface/postprocessing/correlograms.py           | 4 ++--
 src/spikeinterface/postprocessing/spike_amplitudes.py       | 4 ++--
 src/spikeinterface/postprocessing/template_metrics.py       | 6 +++---
 src/spikeinterface/postprocessing/unit_locations.py         | 6 +++---
 .../sortingcomponents/motion/motion_estimation.py           | 4 +---
 .../sortingcomponents/motion/motion_interpolation.py        | 2 +-
 7 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/spikeinterface/core/analyzer_extension_core.py b/src/spikeinterface/core/analyzer_extension_core.py
index c9cff4fb94..bc5de63d07 100644
--- a/src/spikeinterface/core/analyzer_extension_core.py
+++ b/src/spikeinterface/core/analyzer_extension_core.py
@@ -677,7 +677,8 @@ class ComputeNoiseLevels(AnalyzerExtension):
     ----------
     sorting_analyzer : SortingAnalyzer
         A SortingAnalyzer object
-    **params : dict with additional parameters for the `spikeinterface.get_noise_levels()` function
+    **kwargs : dict
+        Additional parameters for the `spikeinterface.get_noise_levels()` function
 
     Returns
     -------
diff --git a/src/spikeinterface/postprocessing/correlograms.py b/src/spikeinterface/postprocessing/correlograms.py
index 8da1ed752a..7f7946f634 100644
--- a/src/spikeinterface/postprocessing/correlograms.py
+++ b/src/spikeinterface/postprocessing/correlograms.py
@@ -20,8 +20,8 @@ class ComputeCorrelograms(AnalyzerExtension):
 
     Parameters
     ----------
-    sorting_analyzer : SortingAnalyzer
-        A SortingAnalyzer object
+    sorting_analyzer_or_sorting : SortingAnalyzer | Sorting
+        A SortingAnalyzer or Sorting object
     window_ms : float, default: 50.0
         The window around the spike to compute the correlation in ms. For example,
          if 50 ms, the correlations will be computed at lags -25 ms ... 25 ms.
diff --git a/src/spikeinterface/postprocessing/spike_amplitudes.py b/src/spikeinterface/postprocessing/spike_amplitudes.py
index 7abd2e625e..2efac0e0d0 100644
--- a/src/spikeinterface/postprocessing/spike_amplitudes.py
+++ b/src/spikeinterface/postprocessing/spike_amplitudes.py
@@ -42,8 +42,8 @@ class ComputeSpikeAmplitudes(AnalyzerExtension):
               In case channel_from_template=False, this is the peak sign.
     method : "center_of_mass" | "monopolar_triangulation" | "grid_convolution", default: "center_of_mass"
         The localization method to use
-    method_kwargs : dict, default: dict()
-        Other kwargs depending on the method.
+    **method_kwargs : dict, default: {}
+        Kwargs which are passed to the method function. These can be found in the docstrings of `compute_center_of_mass`, `compute_grid_convolution` and `compute_monopolar_triangulation`.
     outputs : "numpy" | "by_unit", default: "numpy"
         The output format, either concatenated as numpy array or separated on a per unit basis
 
diff --git a/src/spikeinterface/postprocessing/template_metrics.py b/src/spikeinterface/postprocessing/template_metrics.py
index 31652d8afc..e54ff87221 100644
--- a/src/spikeinterface/postprocessing/template_metrics.py
+++ b/src/spikeinterface/postprocessing/template_metrics.py
@@ -77,9 +77,9 @@ class ComputeTemplateMetrics(AnalyzerExtension):
             * spread_threshold: the threshold to compute the spread, default: 0.2
             * spread_smooth_um: the smoothing in um to compute the spread, default: 20
             * column_range: the range in um in the horizontal direction to consider channels for velocity, default: None
-                        - If None, all channels all channels are considered
-                        - If 0 or 1, only the "column" that includes the max channel is considered
-                        - If > 1, only channels within range (+/-) um from the max channel horizontal position are used
+                - If None, all channels all channels are considered
+                - If 0 or 1, only the "column" that includes the max channel is considered
+                - If > 1, only channels within range (+/-) um from the max channel horizontal position are used
 
     Returns
     -------
diff --git a/src/spikeinterface/postprocessing/unit_locations.py b/src/spikeinterface/postprocessing/unit_locations.py
index 5d190d43f1..4029fc88c7 100644
--- a/src/spikeinterface/postprocessing/unit_locations.py
+++ b/src/spikeinterface/postprocessing/unit_locations.py
@@ -28,8 +28,8 @@ class ComputeUnitLocations(AnalyzerExtension):
         A SortingAnalyzer object
     method : "center_of_mass" | "monopolar_triangulation" | "grid_convolution", default: "center_of_mass"
         The method to use for localization
-    method_kwargs : dict, default: {}
-        Other kwargs depending on the method
+    **method_kwargs : dict, default: {}
+        Kwargs which are passed to the method function. These can be found in the docstrings of `compute_center_of_mass`, `compute_grid_convolution` and `compute_monopolar_triangulation`.
 
     Returns
     -------
@@ -94,7 +94,7 @@ def _run(self, verbose=False):
         method_kwargs.pop("method")
 
         if method not in _unit_location_methods:
-            raise ValueError(f"Wrong ethod for unit_locations : it should be in {list(_unit_location_methods.keys())}")
+            raise ValueError(f"Wrong method for unit_locations : it should be in {list(_unit_location_methods.keys())}")
 
         func = _unit_location_methods[method]
         self.data["unit_locations"] = func(self.sorting_analyzer, **method_kwargs)
diff --git a/src/spikeinterface/sortingcomponents/motion/motion_estimation.py b/src/spikeinterface/sortingcomponents/motion/motion_estimation.py
index c75f7129aa..62b120e9a0 100644
--- a/src/spikeinterface/sortingcomponents/motion/motion_estimation.py
+++ b/src/spikeinterface/sortingcomponents/motion/motion_estimation.py
@@ -32,8 +32,6 @@ def estimate_motion(
     **method_kwargs,
 ):
     """
-
-
     Estimate motion with several possible methods.
 
     Most of methods except dredge_lfp needs peaks and after their localization.
@@ -56,7 +54,6 @@ def estimate_motion(
 
     {method_doc}
 
-
     rigid : bool, default: False
         Compute rigid (one motion for the entire probe) or non rigid motion
         Rigid computation is equivalent to non-rigid with only one window with rectangular shape.
@@ -82,6 +79,7 @@ def estimate_motion(
         Display progress bar or not
     verbose : bool, default: False
         If True, output is verbose
+    **method_kwargs :
 
     Returns
     -------
diff --git a/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py b/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py
index 57cc4d1371..4912c26ca0 100644
--- a/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py
+++ b/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py
@@ -276,7 +276,7 @@ class InterpolateMotionRecording(BasePreprocessor):
         Interpolation needs to convert to a floating dtype. If dtype is supplied, that will be used.
         If the input recording is already floating and dtype=None, then its dtype is used by default.
         If the input recording is integer, then float32 is used by default.
-    **spatial_interpolation_kwargs: dict
+    **spatial_interpolation_kwargs : dict
         Spatial interpolation kwargs for `interpolate_motion_on_traces`.
 
     Returns

From 4183451e3209c370792a2ba4742ee241ef2c1213 Mon Sep 17 00:00:00 2001
From: Jonah Pearl <jonahpearl@g.harvard.edu>
Date: Tue, 13 Aug 2024 12:23:41 -0400
Subject: [PATCH 089/187] allow quality and template metrics in unit table

---
 src/spikeinterface/widgets/sorting_summary.py |  9 ++-
 .../widgets/utils_sortingview.py              | 57 +++++++++++++++++--
 2 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/src/spikeinterface/widgets/sorting_summary.py b/src/spikeinterface/widgets/sorting_summary.py
index 6f60e9ab9a..4f9a9b7615 100644
--- a/src/spikeinterface/widgets/sorting_summary.py
+++ b/src/spikeinterface/widgets/sorting_summary.py
@@ -43,7 +43,12 @@ class SortingSummaryWidget(BaseWidget):
         List of labels to be added to the curation table
         (sortingview backend)
     unit_table_properties : list or None, default: None
-        List of properties to be added to the unit table
+        List of properties to be added to the unit table. 
+        These may be drawn from the sorting extractor, and, if available, 
+        the quality_metrics and template_metrics extensions of the SortingAnalyzer.
+        See all properties available with sorting.get_property_keys(), and, if available,
+        analyzer.get_extension("quality_metrics").get_data().columns and 
+        analyzer.get_extension("template_metrics").get_data().columns.
         (sortingview backend)
     """
 
@@ -151,7 +156,7 @@ def plot_sortingview(self, data_plot, **backend_kwargs):
 
         # unit ids
         v_units_table = generate_unit_table_view(
-            dp.sorting_analyzer.sorting, dp.unit_table_properties, similarity_scores=similarity_scores
+            dp.sorting_analyzer, dp.unit_table_properties, similarity_scores=similarity_scores
         )
 
         if dp.curation:
diff --git a/src/spikeinterface/widgets/utils_sortingview.py b/src/spikeinterface/widgets/utils_sortingview.py
index ab926a0104..1dbd4b9879 100644
--- a/src/spikeinterface/widgets/utils_sortingview.py
+++ b/src/spikeinterface/widgets/utils_sortingview.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 from ..core.core_tools import check_json
-
+from warnings import warn
 
 def make_serializable(*args):
     dict_to_serialize = {int(i): a for i, a in enumerate(args)}
@@ -45,9 +45,30 @@ def handle_display_and_url(widget, view, **backend_kwargs):
     return url
 
 
-def generate_unit_table_view(sorting, unit_properties=None, similarity_scores=None):
+def generate_unit_table_view(analyzer, unit_properties=None, similarity_scores=None):
     import sortingview.views as vv
+    sorting = analyzer.sorting
+    
+    # Find available unit properties from all sources
+    sorting_props = sorting.get_property_keys()
+    if analyzer.get_extension("quality_metrics") is not None:
+        qm_props = analyzer.get_extension("quality_metrics").get_data().columns
+        qm_data = analyzer.get_extension("quality_metrics").get_data()
+    else:
+        qm_props = []
+    if analyzer.get_extension("template_metrics") is not None:
+        tm_props = analyzer.get_extension("template_metrics").get_data().columns
+        tm_data = analyzer.get_extension("template_metrics").get_data()
+    else:
+        tm_props = []
 
+    # Check for any overlaps and warn user if any
+    all_props = sorting_props + qm_props + tm_props
+    overlap_props = [prop for prop in all_props if all_props.count(prop) > 1]
+    if len(overlap_props) > 0:
+        warn(f"Warning: Overlapping properties found in sorting, quality_metrics, and template_metrics: {overlap_props}")
+
+    # Get unit properties
     if unit_properties is None:
         ut_columns = []
         ut_rows = [vv.UnitsTableRow(unit_id=u, values={}) for u in sorting.unit_ids]
@@ -56,8 +77,22 @@ def generate_unit_table_view(sorting, unit_properties=None, similarity_scores=No
         ut_rows = []
         values = {}
         valid_unit_properties = []
+
+        # Create columns for each property
         for prop_name in unit_properties:
-            property_values = sorting.get_property(prop_name)
+
+            # Get property values from correct location
+            # import pdb
+            # pdb.set_trace()
+            if prop_name in sorting_props:
+                property_values = sorting.get_property(prop_name)
+            elif prop_name in qm_props:
+                property_values = qm_data[prop_name].values
+            elif prop_name in tm_props:
+                property_values = tm_data[prop_name].values
+            else:
+                raise ValueError(f"Property '{prop_name}' not found in sorting, quality_metrics, or template_metrics")
+            
             # make dtype available
             val0 = np.array(property_values[0])
             if val0.dtype.kind in ("i", "u"):
@@ -74,14 +109,26 @@ def generate_unit_table_view(sorting, unit_properties=None, similarity_scores=No
             ut_columns.append(vv.UnitsTableColumn(key=prop_name, label=prop_name, dtype=dtype))
             valid_unit_properties.append(prop_name)
 
+        # Create rows for each unit
         for ui, unit in enumerate(sorting.unit_ids):
             for prop_name in valid_unit_properties:
-                property_values = sorting.get_property(prop_name)
+
+                # Get property values from correct location
+                if prop_name in sorting_props:
+                    property_values = sorting.get_property(prop_name)
+                elif prop_name in qm_props:
+                    property_values = qm_data[prop_name].values
+                elif prop_name in tm_props:
+                    property_values = tm_data[prop_name].values
+                else:
+                    raise ValueError(f"Property '{prop_name}' not found in sorting, quality_metrics, or template_metrics")
+                
+                # Check for NaN values
                 val0 = np.array(property_values[0])
                 if val0.dtype.kind == "f":
                     if np.isnan(property_values[ui]):
                         continue
-                values[prop_name] = property_values[ui]
+                values[prop_name] = np.format_float_positional(property_values[ui], precision=4, fractional=False)
             ut_rows.append(vv.UnitsTableRow(unit_id=unit, values=check_json(values)))
 
     v_units_table = vv.UnitsTable(rows=ut_rows, columns=ut_columns, similarity_scores=similarity_scores)

From 1335ecfa1c14db8ae4c433dd79ced8f901891eee Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 13 Aug 2024 16:29:40 +0000
Subject: [PATCH 090/187] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spikeinterface/widgets/sorting_summary.py   |  6 +++---
 src/spikeinterface/widgets/utils_sortingview.py | 16 +++++++++++-----
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/spikeinterface/widgets/sorting_summary.py b/src/spikeinterface/widgets/sorting_summary.py
index 4f9a9b7615..a113298851 100644
--- a/src/spikeinterface/widgets/sorting_summary.py
+++ b/src/spikeinterface/widgets/sorting_summary.py
@@ -43,11 +43,11 @@ class SortingSummaryWidget(BaseWidget):
         List of labels to be added to the curation table
         (sortingview backend)
     unit_table_properties : list or None, default: None
-        List of properties to be added to the unit table. 
-        These may be drawn from the sorting extractor, and, if available, 
+        List of properties to be added to the unit table.
+        These may be drawn from the sorting extractor, and, if available,
         the quality_metrics and template_metrics extensions of the SortingAnalyzer.
         See all properties available with sorting.get_property_keys(), and, if available,
-        analyzer.get_extension("quality_metrics").get_data().columns and 
+        analyzer.get_extension("quality_metrics").get_data().columns and
         analyzer.get_extension("template_metrics").get_data().columns.
         (sortingview backend)
     """
diff --git a/src/spikeinterface/widgets/utils_sortingview.py b/src/spikeinterface/widgets/utils_sortingview.py
index 1dbd4b9879..d7fd222921 100644
--- a/src/spikeinterface/widgets/utils_sortingview.py
+++ b/src/spikeinterface/widgets/utils_sortingview.py
@@ -5,6 +5,7 @@
 from ..core.core_tools import check_json
 from warnings import warn
 
+
 def make_serializable(*args):
     dict_to_serialize = {int(i): a for i, a in enumerate(args)}
     serializable_dict = check_json(dict_to_serialize)
@@ -47,8 +48,9 @@ def handle_display_and_url(widget, view, **backend_kwargs):
 
 def generate_unit_table_view(analyzer, unit_properties=None, similarity_scores=None):
     import sortingview.views as vv
+
     sorting = analyzer.sorting
-    
+
     # Find available unit properties from all sources
     sorting_props = sorting.get_property_keys()
     if analyzer.get_extension("quality_metrics") is not None:
@@ -66,7 +68,9 @@ def generate_unit_table_view(analyzer, unit_properties=None, similarity_scores=N
     all_props = sorting_props + qm_props + tm_props
     overlap_props = [prop for prop in all_props if all_props.count(prop) > 1]
     if len(overlap_props) > 0:
-        warn(f"Warning: Overlapping properties found in sorting, quality_metrics, and template_metrics: {overlap_props}")
+        warn(
+            f"Warning: Overlapping properties found in sorting, quality_metrics, and template_metrics: {overlap_props}"
+        )
 
     # Get unit properties
     if unit_properties is None:
@@ -92,7 +96,7 @@ def generate_unit_table_view(analyzer, unit_properties=None, similarity_scores=N
                 property_values = tm_data[prop_name].values
             else:
                 raise ValueError(f"Property '{prop_name}' not found in sorting, quality_metrics, or template_metrics")
-            
+
             # make dtype available
             val0 = np.array(property_values[0])
             if val0.dtype.kind in ("i", "u"):
@@ -121,8 +125,10 @@ def generate_unit_table_view(analyzer, unit_properties=None, similarity_scores=N
                 elif prop_name in tm_props:
                     property_values = tm_data[prop_name].values
                 else:
-                    raise ValueError(f"Property '{prop_name}' not found in sorting, quality_metrics, or template_metrics")
-                
+                    raise ValueError(
+                        f"Property '{prop_name}' not found in sorting, quality_metrics, or template_metrics"
+                    )
+
                 # Check for NaN values
                 val0 = np.array(property_values[0])
                 if val0.dtype.kind == "f":

From 1b12d54267b304f8383684c5d2b60c607f4b1a65 Mon Sep 17 00:00:00 2001
From: chrishalcrow <57948917+chrishalcrow@users.noreply.github.com>
Date: Wed, 14 Aug 2024 15:46:07 +0100
Subject: [PATCH 091/187] Reply to review

---
 src/spikeinterface/comparison/groundtruthstudy.py    |  2 +-
 src/spikeinterface/comparison/multicomparisons.py    | 10 +++++-----
 src/spikeinterface/comparison/paircomparisons.py     |  4 ++--
 src/spikeinterface/core/generate.py                  | 12 ++++++------
 src/spikeinterface/generation/hybrid_tools.py        |  2 +-
 .../postprocessing/template_similarity.py            |  4 ++--
 .../sortingcomponents/matching/main.py               |  2 +-
 7 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/spikeinterface/comparison/groundtruthstudy.py b/src/spikeinterface/comparison/groundtruthstudy.py
index d45956a07e..8929d6983c 100644
--- a/src/spikeinterface/comparison/groundtruthstudy.py
+++ b/src/spikeinterface/comparison/groundtruthstudy.py
@@ -45,7 +45,7 @@ class GroundTruthStudy:
 
     Parameters
     ----------
-    study_folder : srt | Path
+    study_folder : str | Path
         Path to folder containing `GroundTruthStudy`
     """
 
diff --git a/src/spikeinterface/comparison/multicomparisons.py b/src/spikeinterface/comparison/multicomparisons.py
index dccff6118d..f7d9782a07 100644
--- a/src/spikeinterface/comparison/multicomparisons.py
+++ b/src/spikeinterface/comparison/multicomparisons.py
@@ -43,9 +43,9 @@ class MultiSortingComparison(BaseMultiComparison, MixinSpikeTrainComparison):
             - "intersection" : spike trains are the intersection between the spike trains of the
                best matching two sorters
     verbose : bool, default: False
-        if True, output is verbose
+        If True, output is verbose
     do_matching : bool, default: True
-        if True, SOMETHING HAPPENS.
+        If True, the comparison is done when the `MultiSortingComparison` is initialized
 
     Returns
     -------
@@ -320,15 +320,15 @@ class MultiTemplateComparison(BaseMultiComparison, MixinTemplateComparison):
     chance_score : float, default: 0.3
         Minimum agreement score to for a possible match
     verbose : bool, default: False
-        if True, output is verbose
+        If True, output is verbose
     do_matching : bool, default: True
-        if True, IT DOES SOMETHING
+        If True, the comparison is done when the `MultiSortingComparison` is initialized
     support : "dense" | "union" | "intersection", default: "union"
         The support to compute the similarity matrix.
     num_shifts : int, default: 0
         Number of shifts to use to shift templates to maximize similarity.
     similarity_method : "cosine" | "l1" | "l2", default: "cosine"
-         Method for the similarity matrix.
+        Method for the similarity matrix.
 
     Returns
     -------
diff --git a/src/spikeinterface/comparison/paircomparisons.py b/src/spikeinterface/comparison/paircomparisons.py
index 9566354918..f5e7cdcc1f 100644
--- a/src/spikeinterface/comparison/paircomparisons.py
+++ b/src/spikeinterface/comparison/paircomparisons.py
@@ -566,7 +566,7 @@ def count_false_positive_units(self, redundant_score=None):
 
         Parameters
         ----------
-        redundant_score : float, default: None
+        redundant_score : float | None, default: None
             The agreement score below which tested units
             are counted as "false positive"" (and not "redundant").
         """
@@ -747,7 +747,7 @@ class TemplateComparison(BasePairComparison, MixinTemplateComparison):
         List of units from sorting_analyzer_2 to compare.
     name1 : str, default: "sess1"
         Name of first session.
-    name2 : str, default: "sess1"
+    name2 : str, default: "sess2"
         Name of second session.
     similarity_method : "cosine" | "l1" | "l2", default: "cosine"
         Method for the similarity matrix.
diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index 187103d031..f8ab8a2d3a 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -44,7 +44,7 @@ def generate_recording(
         The number of channels in the recording.
     sampling_frequency : float, default: 30000. (in Hz)
         The sampling frequency of the recording, default: 30000.
-    durations : List[float], default: [5.0, 2.5]
+    durations : list[float], default: [5.0, 2.5]
         The duration in seconds of each segment in the recording.
         The number of segments is determined by the length of this list.
     set_probe : bool, default: True
@@ -1295,8 +1295,8 @@ def generate_recording_by_size(
         The seed for np.random.default_rng.
     strategy : "tile_pregenerated"| "on_the_fly", default: "tile_pregenerated"
         The strategy of generating noise chunk:
-        * "tile_pregenerated": pregenerate a noise chunk of noise_block_size sample and repeat it very fast and cusume only one noise block.
-        * "on_the_fly": generate on the fly a new noise block by combining seed + noise block index no memory preallocation but a bit more computaion (random)
+        * "tile_pregenerated": pregenerate a noise chunk of `noise_block_size` samples and repeat it quickly consuming only one noise block.
+        * "on_the_fly": generate on the fly a new noise block by combining seed + noise block index. No memory preallocation but a bit more computaion (random)
 
     Returns
     -------
@@ -2062,9 +2062,9 @@ def generate_ground_truth_recording(
 
     Parameters
     ----------
-    durations : list of float, default: [10.]
+    durations : list[float], default: [10.]
         Durations in seconds for all segments.
-    sampling_frequency : float, default: 25000
+    sampling_frequency : float, default: 25000.0
         Sampling frequency.
     num_channels : int, default: 4
         Number of channels, not used when probe is given.
@@ -2085,7 +2085,7 @@ def generate_ground_truth_recording(
             * (num_units, num_samples, num_channels, upsample_factor): case with oversample template to introduce jitter.
     ms_before : float, default: 1.5
         Cut out in ms before spike peak.
-    ms_after : float, default: 3
+    ms_after : float, default: 3.0
         Cut out in ms after spike peak.
     upsample_factor : None or int, default: None
         A upsampling factor used only when templates are not provided.
diff --git a/src/spikeinterface/generation/hybrid_tools.py b/src/spikeinterface/generation/hybrid_tools.py
index 12958649dd..747389a6d7 100644
--- a/src/spikeinterface/generation/hybrid_tools.py
+++ b/src/spikeinterface/generation/hybrid_tools.py
@@ -183,7 +183,7 @@ def scale_template_to_range(
         The minimum amplitude of the output templates after scaling.
     max_amplitude : float
         The maximum amplitude of the output templates after scaling.
-    amplitude_function : "ptp" | "min" | "max", default: "pip"
+    amplitude_function : "ptp" | "min" | "max", default: "ptp"
         The function to use to compute the amplitude of the templates. Can be "ptp", "min" or "max".
 
     Returns
diff --git a/src/spikeinterface/postprocessing/template_similarity.py b/src/spikeinterface/postprocessing/template_similarity.py
index 53df14ff8a..27214f32e6 100644
--- a/src/spikeinterface/postprocessing/template_similarity.py
+++ b/src/spikeinterface/postprocessing/template_similarity.py
@@ -17,8 +17,8 @@ class ComputeTemplateSimilarity(AnalyzerExtension):
     ----------
     sorting_analyzer : SortingAnalyzer
         The SortingAnalyzer object
-    method : str, default: "cosine"
-        The method to compute the similarity. Can be in ["cosine", "l2", "l1"]
+    method : "cosine" | "l1" | "l2", default: "cosine"
+        The method to compute the similarity.
         In case of "l1" or "l2", the formula used is:
         - similarity = 1 - norm(T_1 - T_2)/(norm(T_1) + norm(T_2)).
         In case of cosine it is:
diff --git a/src/spikeinterface/sortingcomponents/matching/main.py b/src/spikeinterface/sortingcomponents/matching/main.py
index 88b31476a9..fa2f7c055e 100644
--- a/src/spikeinterface/sortingcomponents/matching/main.py
+++ b/src/spikeinterface/sortingcomponents/matching/main.py
@@ -16,7 +16,7 @@ def find_spikes_from_templates(
     ----------
     recording : RecordingExtractor
         The recording extractor object
-    method : "naive" | "tridesclous" | "circus" | "circus-omp" | "wobble"
+    method : "naive" | "tridesclous" | "circus" | "circus-omp" | "wobble", default: "naive"
         Which method to use for template matching
     method_kwargs : dict, optional
         Keyword arguments for the chosen method

From d026d9647bc56a254cee250149008085b4f290ce Mon Sep 17 00:00:00 2001
From: Christian Tabedzki <35670232+tabedzki@users.noreply.github.com>
Date: Wed, 14 Aug 2024 17:22:25 -0400
Subject: [PATCH 092/187] Add annotation for `get_event_times` which propogates
 for events

---
 src/spikeinterface/core/baseevent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/baseevent.py b/src/spikeinterface/core/baseevent.py
index 2cf92f8cea..9e1963089b 100644
--- a/src/spikeinterface/core/baseevent.py
+++ b/src/spikeinterface/core/baseevent.py
@@ -137,7 +137,7 @@ class BaseEventSegment(BaseSegment):
     def __init__(self):
         BaseSegment.__init__(self)
 
-    def get_event_times(self, channel_id: int | str, start_time: float, end_time: float):
+    def get_event_times(self, channel_id: int | str, start_time: float, end_time: float) -> np.ndarray:
         """Returns event timestamps of a channel in seconds
         Parameters
         ----------

From 2fefbcda40cd932f1e75a0a55988fe568f08075e Mon Sep 17 00:00:00 2001
From: Christian Tabedzki <35670232+tabedzki@users.noreply.github.com>
Date: Wed, 14 Aug 2024 17:23:01 -0400
Subject: [PATCH 093/187] Added some type annotations in functions

---
 src/spikeinterface/comparison/comparisontools.py   |  4 ++--
 .../sortingcomponents/matching/wobble.py           | 14 +++++++-------
 .../sortingcomponents/peak_localization.py         |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/spikeinterface/comparison/comparisontools.py b/src/spikeinterface/comparison/comparisontools.py
index 814cb907ea..4a2294e77f 100644
--- a/src/spikeinterface/comparison/comparisontools.py
+++ b/src/spikeinterface/comparison/comparisontools.py
@@ -8,7 +8,7 @@
 import numpy as np
 
 
-def count_matching_events(times1, times2, delta=10):
+def count_matching_events(times1: list, times2: list, delta=10):
     """
     Counts matching events.
 
@@ -39,7 +39,7 @@ def count_matching_events(times1, times2, delta=10):
     return len(inds2) + 1
 
 
-def compute_agreement_score(num_matches, num1, num2):
+def compute_agreement_score(num_matches: int, num1: int, num2: int) -> float:
     """
     Computes agreement score.
 
diff --git a/src/spikeinterface/sortingcomponents/matching/wobble.py b/src/spikeinterface/sortingcomponents/matching/wobble.py
index 3b692c3bf0..2026515da0 100644
--- a/src/spikeinterface/sortingcomponents/matching/wobble.py
+++ b/src/spikeinterface/sortingcomponents/matching/wobble.py
@@ -522,7 +522,7 @@ def main_function(cls, traces, method_kwargs):
 
     # TODO: Replace this method with equivalent from spikeinterface
     @classmethod
-    def find_peaks(cls, objective, objective_normalized, spike_trains, params, template_data, template_meta):
+    def find_peaks(cls, objective, objective_normalized, spike_trains, params, template_data, template_meta)->tuple[np.ndarray, np.ndarray, np.ndarray]:
         """Find new peaks in the objective and update spike train accordingly.
 
         Parameters
@@ -603,7 +603,7 @@ def find_peaks(cls, objective, objective_normalized, spike_trains, params, templ
     @classmethod
     def subtract_spike_train(
         cls, spike_train, scalings, template_data, objective, objective_normalized, params, template_meta, sparsity
-    ):
+    ) -> tuple[np.ndarray, np.ndarray]:
         """Subtract spike train of templates from the objective directly.
 
         Parameters
@@ -662,7 +662,7 @@ def calculate_high_res_shift(
         template_data,
         params,
         template_meta,
-    ):
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
         """Determines optimal shifts when super-resolution, scaled templates are used.
 
         Parameters
@@ -746,7 +746,7 @@ def calculate_high_res_shift(
         return template_shift, time_shift, non_refractory_indices, scalings
 
     @classmethod
-    def enforce_refractory(cls, spike_train, objective, objective_normalized, params, template_meta):
+    def enforce_refractory(cls, spike_train, objective, objective_normalized, params, template_meta) -> tuple[np.ndarray, np.ndarray]:
         """Enforcing the refractory period for each unit by setting the objective to -infinity.
 
         Parameters
@@ -815,7 +815,7 @@ def compute_template_norm(visible_channels, templates):
     return norm_squared
 
 
-def compress_templates(templates, approx_rank):
+def compress_templates(templates, approx_rank) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
     """Compress templates using singular value decomposition.
 
     Parameters
@@ -936,7 +936,7 @@ def convolve_templates(compressed_templates, jitter_factor, approx_rank, jittere
     return pairwise_convolution
 
 
-def compute_objective(traces, template_data, approx_rank):
+def compute_objective(traces, template_data, approx_rank) -> np.ndarray:
     """Compute objective by convolving templates with voltage traces.
 
     Parameters
@@ -973,7 +973,7 @@ def compute_objective(traces, template_data, approx_rank):
     return objective
 
 
-def compute_scale_amplitudes(high_resolution_conv, norm_peaks, scale_min, scale_max, amplitude_variance):
+def compute_scale_amplitudes(high_resolution_conv, norm_peaks, scale_min, scale_max, amplitude_variance) -> tuple[np.ndarray, np.ndarray]:
     """Compute optimal amplitude scaling and the high-resolution objective resulting from scaled spikes.
 
     Without hard clipping, the objective can be obtained via
diff --git a/src/spikeinterface/sortingcomponents/peak_localization.py b/src/spikeinterface/sortingcomponents/peak_localization.py
index b578eb4478..7a544f341e 100644
--- a/src/spikeinterface/sortingcomponents/peak_localization.py
+++ b/src/spikeinterface/sortingcomponents/peak_localization.py
@@ -88,7 +88,7 @@ def get_localization_pipeline_nodes(
     return pipeline_nodes
 
 
-def localize_peaks(recording, peaks, method="center_of_mass", ms_before=0.5, ms_after=0.5, **kwargs):
+def localize_peaks(recording, peaks, method="center_of_mass", ms_before=0.5, ms_after=0.5, **kwargs) -> np.ndarray:
     """Localize peak (spike) in 2D or 3D depending the method.
 
     When a probe is 2D then:

From 8d41e31a7e44f67e9c32f2d5dca656b56e9f447f Mon Sep 17 00:00:00 2001
From: Christian Tabedzki <35670232+tabedzki@users.noreply.github.com>
Date: Wed, 14 Aug 2024 17:24:47 -0400
Subject: [PATCH 094/187] Ran black formatting

---
 .../sortingcomponents/matching/wobble.py             | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/spikeinterface/sortingcomponents/matching/wobble.py b/src/spikeinterface/sortingcomponents/matching/wobble.py
index 2026515da0..99de6fcd4e 100644
--- a/src/spikeinterface/sortingcomponents/matching/wobble.py
+++ b/src/spikeinterface/sortingcomponents/matching/wobble.py
@@ -522,7 +522,9 @@ def main_function(cls, traces, method_kwargs):
 
     # TODO: Replace this method with equivalent from spikeinterface
     @classmethod
-    def find_peaks(cls, objective, objective_normalized, spike_trains, params, template_data, template_meta)->tuple[np.ndarray, np.ndarray, np.ndarray]:
+    def find_peaks(
+        cls, objective, objective_normalized, spike_trains, params, template_data, template_meta
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
         """Find new peaks in the objective and update spike train accordingly.
 
         Parameters
@@ -746,7 +748,9 @@ def calculate_high_res_shift(
         return template_shift, time_shift, non_refractory_indices, scalings
 
     @classmethod
-    def enforce_refractory(cls, spike_train, objective, objective_normalized, params, template_meta) -> tuple[np.ndarray, np.ndarray]:
+    def enforce_refractory(
+        cls, spike_train, objective, objective_normalized, params, template_meta
+    ) -> tuple[np.ndarray, np.ndarray]:
         """Enforcing the refractory period for each unit by setting the objective to -infinity.
 
         Parameters
@@ -973,7 +977,9 @@ def compute_objective(traces, template_data, approx_rank) -> np.ndarray:
     return objective
 
 
-def compute_scale_amplitudes(high_resolution_conv, norm_peaks, scale_min, scale_max, amplitude_variance) -> tuple[np.ndarray, np.ndarray]:
+def compute_scale_amplitudes(
+    high_resolution_conv, norm_peaks, scale_min, scale_max, amplitude_variance
+) -> tuple[np.ndarray, np.ndarray]:
     """Compute optimal amplitude scaling and the high-resolution objective resulting from scaled spikes.
 
     Without hard clipping, the objective can be obtained via

From 9583aae767322f1b4ab8a0b6b47057ce3bfc1d71 Mon Sep 17 00:00:00 2001
From: Christian Tabedzki <35670232+tabedzki@users.noreply.github.com>
Date: Thu, 15 Aug 2024 09:02:31 -0400
Subject: [PATCH 095/187] Added sphinxcontrib-jquery

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 71919c072b..f759e839e8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -186,6 +186,7 @@ docs = [
     "sphinx-design",
     "numpydoc",
     "ipython",
+    "sphinxcontrib-jquery",
 
     # for notebooks in the gallery
     "MEArec", # Use as an example

From 3dc9da2d13ec13bf6b1b6c25111326181dc72cc9 Mon Sep 17 00:00:00 2001
From: Jonah Pearl <jonahpearl@g.harvard.edu>
Date: Thu, 15 Aug 2024 10:17:18 -0400
Subject: [PATCH 096/187] bug fix: convert props to lists

---
 src/spikeinterface/widgets/utils_sortingview.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/spikeinterface/widgets/utils_sortingview.py b/src/spikeinterface/widgets/utils_sortingview.py
index d7fd222921..0ce372efc4 100644
--- a/src/spikeinterface/widgets/utils_sortingview.py
+++ b/src/spikeinterface/widgets/utils_sortingview.py
@@ -52,14 +52,14 @@ def generate_unit_table_view(analyzer, unit_properties=None, similarity_scores=N
     sorting = analyzer.sorting
 
     # Find available unit properties from all sources
-    sorting_props = sorting.get_property_keys()
+    sorting_props = list(sorting.get_property_keys())
     if analyzer.get_extension("quality_metrics") is not None:
-        qm_props = analyzer.get_extension("quality_metrics").get_data().columns
+        qm_props = list(analyzer.get_extension("quality_metrics").get_data().columns)
         qm_data = analyzer.get_extension("quality_metrics").get_data()
     else:
         qm_props = []
     if analyzer.get_extension("template_metrics") is not None:
-        tm_props = analyzer.get_extension("template_metrics").get_data().columns
+        tm_props = list(analyzer.get_extension("template_metrics").get_data().columns)
         tm_data = analyzer.get_extension("template_metrics").get_data()
     else:
         tm_props = []

From 3b77acbada0ebef08cee3203f504dc96d741d238 Mon Sep 17 00:00:00 2001
From: Robin Kim <rk48@rice.edu>
Date: Thu, 15 Aug 2024 15:09:00 -0500
Subject: [PATCH 097/187] Add no merge test

---
 src/spikeinterface/curation/curation_format.py      |  3 ++-
 .../tests/sv-sorting-curation-no-merge.json         |  1 +
 .../curation/tests/test_sortingview_curation.py     | 13 +++++++++++++
 3 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 src/spikeinterface/curation/tests/sv-sorting-curation-no-merge.json

diff --git a/src/spikeinterface/curation/curation_format.py b/src/spikeinterface/curation/curation_format.py
index babe7aac40..5a57692597 100644
--- a/src/spikeinterface/curation/curation_format.py
+++ b/src/spikeinterface/curation/curation_format.py
@@ -92,7 +92,8 @@ def convert_from_sortingview_curation_format_v0(sortingview_dict, destination_fo
     """
 
     assert destination_format == "1"
-
+    if "mergeGroups" not in sortingview_dict.keys():
+        sortingview_dict["mergeGroups"] = []
     merge_groups = sortingview_dict["mergeGroups"]
     merged_units = sum(merge_groups, [])
     if len(merged_units) > 0:
diff --git a/src/spikeinterface/curation/tests/sv-sorting-curation-no-merge.json b/src/spikeinterface/curation/tests/sv-sorting-curation-no-merge.json
new file mode 100644
index 0000000000..2a350340f3
--- /dev/null
+++ b/src/spikeinterface/curation/tests/sv-sorting-curation-no-merge.json
@@ -0,0 +1 @@
+{"labelsByUnit":{"2":["mua"],"3":["mua"],"4":["mua"],"5":["accept"],"6":["accept"],"7":["accept"],"8":["artifact"],"9":["artifact"]}}
diff --git a/src/spikeinterface/curation/tests/test_sortingview_curation.py b/src/spikeinterface/curation/tests/test_sortingview_curation.py
index bb152e7f71..24bd44a4c8 100644
--- a/src/spikeinterface/curation/tests/test_sortingview_curation.py
+++ b/src/spikeinterface/curation/tests/test_sortingview_curation.py
@@ -243,6 +243,18 @@ def test_label_inheritance_str():
     assert np.all(sorting_include_accept.get_property("accept"))
 
 
+def test_json_no_merge_curation():
+    """
+    Test curation with no merges using a JSON file.
+    """
+    sorting = generate_sorting(num_units=10)
+
+    # from curation.json
+    json_file = parent_folder / "sv-sorting-curation-no-merge.json"
+    # print(f"Sorting: {sorting.get_unit_ids()}")
+    sorting_curated_json = apply_sortingview_curation(sorting, uri_or_json=json_file)
+
+
 if __name__ == "__main__":
     # generate_sortingview_curation_dataset()
     # test_sha1_curation()
@@ -251,3 +263,4 @@ def test_label_inheritance_str():
     test_false_positive_curation()
     test_label_inheritance_int()
     test_label_inheritance_str()
+    test_json_no_merge_curation()

From 1356d3a362174cdfdb3382e777e0f3c3af126e4e Mon Sep 17 00:00:00 2001
From: Robin Kim <rk48@rice.edu>
Date: Thu, 15 Aug 2024 15:11:39 -0500
Subject: [PATCH 098/187] Add comment describing test fail

---
 src/spikeinterface/curation/tests/test_sortingview_curation.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/spikeinterface/curation/tests/test_sortingview_curation.py b/src/spikeinterface/curation/tests/test_sortingview_curation.py
index 24bd44a4c8..6c6dc482c3 100644
--- a/src/spikeinterface/curation/tests/test_sortingview_curation.py
+++ b/src/spikeinterface/curation/tests/test_sortingview_curation.py
@@ -249,10 +249,9 @@ def test_json_no_merge_curation():
     """
     sorting = generate_sorting(num_units=10)
 
-    # from curation.json
     json_file = parent_folder / "sv-sorting-curation-no-merge.json"
-    # print(f"Sorting: {sorting.get_unit_ids()}")
     sorting_curated_json = apply_sortingview_curation(sorting, uri_or_json=json_file)
+    # ValueError: Curation format: some labeled units are not in the unit list
 
 
 if __name__ == "__main__":

From 0b2c237076aca81ab9920db953f8ac5fb2fdb4b4 Mon Sep 17 00:00:00 2001
From: Christian Tabedzki <35670232+tabedzki@users.noreply.github.com>
Date: Fri, 16 Aug 2024 10:21:25 -0400
Subject: [PATCH 099/187] Added sphinx-rtd-theme minimum version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index f759e839e8..ed5f6e6fa7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -181,7 +181,7 @@ test = [
 
 docs = [
     "Sphinx",
-    "sphinx_rtd_theme",
+    "sphinx_rtd_theme>=1.2",
     "sphinx-gallery",
     "sphinx-design",
     "numpydoc",

From d029f7d974020145ad6f309e05b3b1456693318b Mon Sep 17 00:00:00 2001
From: Robin Kim <rk48@rice.edu>
Date: Fri, 16 Aug 2024 11:56:39 -0500
Subject: [PATCH 100/187] Fix value error by checking first dict key type

---
 src/spikeinterface/curation/curation_format.py             | 7 +++++--
 .../curation/tests/test_sortingview_curation.py            | 4 ++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/spikeinterface/curation/curation_format.py b/src/spikeinterface/curation/curation_format.py
index 5a57692597..511abb7801 100644
--- a/src/spikeinterface/curation/curation_format.py
+++ b/src/spikeinterface/curation/curation_format.py
@@ -96,10 +96,13 @@ def convert_from_sortingview_curation_format_v0(sortingview_dict, destination_fo
         sortingview_dict["mergeGroups"] = []
     merge_groups = sortingview_dict["mergeGroups"]
     merged_units = sum(merge_groups, [])
-    if len(merged_units) > 0:
-        unit_id_type = int if isinstance(merged_units[0], int) else str
+
+    first_unit_id = next(iter(sortingview_dict["labelsByUnit"].keys()))
+    if str.isdigit(first_unit_id):
+        unit_id_type = int
     else:
         unit_id_type = str
+
     all_units = []
     all_labels = []
     manual_labels = []
diff --git a/src/spikeinterface/curation/tests/test_sortingview_curation.py b/src/spikeinterface/curation/tests/test_sortingview_curation.py
index 6c6dc482c3..945aca7937 100644
--- a/src/spikeinterface/curation/tests/test_sortingview_curation.py
+++ b/src/spikeinterface/curation/tests/test_sortingview_curation.py
@@ -250,13 +250,13 @@ def test_json_no_merge_curation():
     sorting = generate_sorting(num_units=10)
 
     json_file = parent_folder / "sv-sorting-curation-no-merge.json"
-    sorting_curated_json = apply_sortingview_curation(sorting, uri_or_json=json_file)
-    # ValueError: Curation format: some labeled units are not in the unit list
+    sorting_curated = apply_sortingview_curation(sorting, uri_or_json=json_file)
 
 
 if __name__ == "__main__":
     # generate_sortingview_curation_dataset()
     # test_sha1_curation()
+
     test_gh_curation()
     test_json_curation()
     test_false_positive_curation()

From 18687796634d196dea9e5a9fff6568a9a3ea99ab Mon Sep 17 00:00:00 2001
From: Christian Tabedzki <35670232+tabedzki@users.noreply.github.com>
Date: Fri, 16 Aug 2024 14:44:20 -0400
Subject: [PATCH 101/187] Reviewed and Annotated functions that contained
 'Returns' in the docstring

---
 src/spikeinterface/comparison/comparisontools.py          | 2 +-
 src/spikeinterface/core/baserecording.py                  | 2 +-
 src/spikeinterface/core/core_tools.py                     | 5 +++--
 src/spikeinterface/core/generate.py                       | 2 +-
 src/spikeinterface/core/old_api_utils.py                  | 8 ++++----
 src/spikeinterface/core/recording_tools.py                | 2 +-
 src/spikeinterface/core/sortinganalyzer.py                | 8 ++++----
 src/spikeinterface/core/waveform_tools.py                 | 2 +-
 .../core/waveforms_extractor_backwards_compatibility.py   | 2 +-
 src/spikeinterface/curation/auto_merge.py                 | 2 +-
 src/spikeinterface/curation/remove_excess_spikes.py       | 2 +-
 src/spikeinterface/extractors/neoextractors/neo_utils.py  | 2 +-
 src/spikeinterface/postprocessing/correlograms.py         | 2 +-
 src/spikeinterface/postprocessing/template_metrics.py     | 8 ++++----
 src/spikeinterface/qualitymetrics/misc_metrics.py         | 4 ++--
 src/spikeinterface/qualitymetrics/pca_metrics.py          | 4 ++--
 src/spikeinterface/sorters/external/kilosort.py           | 2 +-
 src/spikeinterface/sorters/external/kilosort2.py          | 2 +-
 src/spikeinterface/sorters/external/kilosort2_5.py        | 2 +-
 src/spikeinterface/sorters/external/kilosort3.py          | 2 +-
 src/spikeinterface/sorters/sorterlist.py                  | 6 +++---
 src/spikeinterface/sortingcomponents/matching/circus.py   | 4 +++-
 src/spikeinterface/sortingcomponents/matching/main.py     | 2 +-
 src/spikeinterface/sortingcomponents/motion/dredge.py     | 2 +-
 .../sortingcomponents/motion/motion_estimation.py         | 2 +-
 .../sortingcomponents/motion/motion_interpolation.py      | 2 +-
 26 files changed, 43 insertions(+), 40 deletions(-)

diff --git a/src/spikeinterface/comparison/comparisontools.py b/src/spikeinterface/comparison/comparisontools.py
index 4a2294e77f..d27bea6c1a 100644
--- a/src/spikeinterface/comparison/comparisontools.py
+++ b/src/spikeinterface/comparison/comparisontools.py
@@ -433,7 +433,7 @@ def make_possible_match(agreement_scores, min_score):
     return possible_match_12, possible_match_21
 
 
-def make_best_match(agreement_scores, min_score):
+def make_best_match(agreement_scores, min_score) -> tuple["pd.Series", "pd.Series"]:
     """
     Given an agreement matrix and a min_score threshold.
     return a dict a best match for each units independently of others.
diff --git a/src/spikeinterface/core/baserecording.py b/src/spikeinterface/core/baserecording.py
index e65afabaca..566ea51ae2 100644
--- a/src/spikeinterface/core/baserecording.py
+++ b/src/spikeinterface/core/baserecording.py
@@ -301,7 +301,7 @@ def get_traces(
         order: "C" | "F" | None = None,
         return_scaled: bool = False,
         cast_unsigned: bool = False,
-    ):
+    ) -> np.ndarray:
         """Returns traces from recording.
 
         Parameters
diff --git a/src/spikeinterface/core/core_tools.py b/src/spikeinterface/core/core_tools.py
index aad7613d01..457126f7f7 100644
--- a/src/spikeinterface/core/core_tools.py
+++ b/src/spikeinterface/core/core_tools.py
@@ -258,6 +258,7 @@ def set_value_in_extractor_dict(extractor_dict: dict, access_path: tuple, new_va
     for key in access_path[:-1]:
         current = current[key]
     current[access_path[-1]] = new_value
+    return current
 
 
 def recursive_path_modifier(d, func, target="path", copy=True) -> dict:
@@ -432,7 +433,7 @@ def make_paths_relative(input_dict: dict, relative_folder: str | Path) -> dict:
     return output_dict
 
 
-def make_paths_absolute(input_dict, base_folder):
+def make_paths_absolute(input_dict, base_folder) -> dict:
     """
     Recursively transform a dict describing an BaseExtractor to make every path absolute given a base_folder.
 
@@ -625,7 +626,7 @@ def normal_pdf(x, mu: float = 0.0, sigma: float = 1.0):
     return 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-((x - mu) ** 2) / (2 * sigma**2))
 
 
-def retrieve_importing_provenance(a_class):
+def retrieve_importing_provenance(a_class) -> dict:
     """
     Retrieve the import provenance of a class, including its import name (that consists of the class name and the module),
     the top-level module, and the module version.
diff --git a/src/spikeinterface/core/generate.py b/src/spikeinterface/core/generate.py
index ff75789aab..f7cb20b0ee 100644
--- a/src/spikeinterface/core/generate.py
+++ b/src/spikeinterface/core/generate.py
@@ -33,7 +33,7 @@ def generate_recording(
     set_probe: Optional[bool] = True,
     ndim: Optional[int] = 2,
     seed: Optional[int] = None,
-) -> BaseRecording:
+) -> NumpySorting:
     """
     Generate a lazy recording object.
     Useful for testing API and algos.
diff --git a/src/spikeinterface/core/old_api_utils.py b/src/spikeinterface/core/old_api_utils.py
index ea2f20d631..53ef736208 100644
--- a/src/spikeinterface/core/old_api_utils.py
+++ b/src/spikeinterface/core/old_api_utils.py
@@ -88,7 +88,7 @@ def get_unit_ids(self):
         """
         return list(self._unit_map.keys())
 
-    def get_unit_spike_train(self, unit_id, start_frame=None, end_frame=None):
+    def get_unit_spike_train(self, unit_id, start_frame=None, end_frame=None) -> np.ndarray:
         """This function extracts spike frames from the specified unit.
         It will return spike frames from within three ranges:
 
@@ -122,7 +122,7 @@ def get_unit_spike_train(self, unit_id, start_frame=None, end_frame=None):
             unit_id=self._unit_map[unit_id], segment_index=0, start_frame=start_frame, end_frame=end_frame
         )
 
-    def get_units_spike_train(self, unit_ids=None, start_frame=None, end_frame=None):
+    def get_units_spike_train(self, unit_ids=None, start_frame=None, end_frame=None) -> list[np.ndarray]:
         """This function extracts spike frames from the specified units.
 
         Parameters
@@ -137,7 +137,7 @@ def get_units_spike_train(self, unit_ids=None, start_frame=None, end_frame=None)
 
         Returns
         -------
-        spike_train: numpy.ndarray
+        spike_train: list[numpy.ndarray]
             An 2D array containing all the frames for each spike in the
             specified units given the range of start and end frames
         """
@@ -146,7 +146,7 @@ def get_units_spike_train(self, unit_ids=None, start_frame=None, end_frame=None)
         spike_trains = [self.get_unit_spike_train(uid, start_frame, end_frame) for uid in unit_ids]
         return spike_trains
 
-    def get_sampling_frequency(self):
+    def get_sampling_frequency(self) -> float:
         """
         It returns the sampling frequency.
 
diff --git a/src/spikeinterface/core/recording_tools.py b/src/spikeinterface/core/recording_tools.py
index b4c07e77c9..12505561f6 100644
--- a/src/spikeinterface/core/recording_tools.py
+++ b/src/spikeinterface/core/recording_tools.py
@@ -640,7 +640,7 @@ def get_noise_levels(
     method: Literal["mad", "std"] = "mad",
     force_recompute: bool = False,
     **random_chunk_kwargs,
-):
+) -> np.ndarray:
     """
     Estimate noise for each channel using MAD methods.
     You can use standard deviation with `method="std"`
diff --git a/src/spikeinterface/core/sortinganalyzer.py b/src/spikeinterface/core/sortinganalyzer.py
index ac142405ab..d6a715611c 100644
--- a/src/spikeinterface/core/sortinganalyzer.py
+++ b/src/spikeinterface/core/sortinganalyzer.py
@@ -44,7 +44,7 @@ def create_sorting_analyzer(
     return_scaled=True,
     overwrite=False,
     **sparsity_kwargs,
-):
+) -> "SortingAnalyzer":
     """
     Create a SortingAnalyzer by pairing a Sorting and the corresponding Recording.
 
@@ -143,7 +143,7 @@ def create_sorting_analyzer(
     return sorting_analyzer
 
 
-def load_sorting_analyzer(folder, load_extensions=True, format="auto"):
+def load_sorting_analyzer(folder, load_extensions=True, format="auto") -> "SortingAnalyzer":
     """
     Load a SortingAnalyzer object from disk.
 
@@ -1099,7 +1099,7 @@ def get_num_units(self) -> int:
         return self.sorting.get_num_units()
 
     ## extensions zone
-    def compute(self, input, save=True, extension_params=None, verbose=False, **kwargs):
+    def compute(self, input, save=True, extension_params=None, verbose=False, **kwargs) -> "SortingAnalyzer" | None:
         """
         Compute one extension or several extensiosn.
         Internally calls compute_one_extension() or compute_several_extensions() depending on the input type.
@@ -1166,7 +1166,7 @@ def compute(self, input, save=True, extension_params=None, verbose=False, **kwar
         else:
             raise ValueError("SortingAnalyzer.compute() need str, dict or list")
 
-    def compute_one_extension(self, extension_name, save=True, verbose=False, **kwargs):
+    def compute_one_extension(self, extension_name, save=True, verbose=False, **kwargs) -> "AnalyzerExtension":
         """
         Compute one extension.
 
diff --git a/src/spikeinterface/core/waveform_tools.py b/src/spikeinterface/core/waveform_tools.py
index 98380e955f..3affd7f0ec 100644
--- a/src/spikeinterface/core/waveform_tools.py
+++ b/src/spikeinterface/core/waveform_tools.py
@@ -791,7 +791,7 @@ def estimate_templates_with_accumulator(
     return_std: bool = False,
     verbose: bool = False,
     **job_kwargs,
-):
+) -> np.ndarray:
     """
     This is a fast implementation to compute template averages and standard deviations.
     This is useful to estimate sparsity without the need to allocate large waveform buffers.
diff --git a/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py b/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py
index da1f5a71f5..9815886bcd 100644
--- a/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py
+++ b/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py
@@ -348,7 +348,7 @@ def load_waveforms(
     with_recording: bool = True,
     sorting: Optional[BaseSorting] = None,
     output="MockWaveformExtractor",
-):
+) -> MockWaveformExtractor | SortingAnalyzer:
     """
     This read an old WaveformsExtactor folder (folder or zarr) and convert it into a SortingAnalyzer or MockWaveformExtractor.
 
diff --git a/src/spikeinterface/curation/auto_merge.py b/src/spikeinterface/curation/auto_merge.py
index 920d6713ad..cea394a021 100644
--- a/src/spikeinterface/curation/auto_merge.py
+++ b/src/spikeinterface/curation/auto_merge.py
@@ -512,7 +512,7 @@ def smooth_correlogram(correlograms, bins, sigma_smooth_ms=0.6):
     return correlograms_smoothed
 
 
-def get_unit_adaptive_window(auto_corr: np.ndarray, threshold: float):
+def get_unit_adaptive_window(auto_corr: np.ndarray, threshold: float) -> int:
     """
     Computes an adaptive window to correlogram (basically corresponds to the first peak).
     Based on a minimum threshold and minimum of second derivative.
diff --git a/src/spikeinterface/curation/remove_excess_spikes.py b/src/spikeinterface/curation/remove_excess_spikes.py
index d1d6b7f3cb..cceac7e8da 100644
--- a/src/spikeinterface/curation/remove_excess_spikes.py
+++ b/src/spikeinterface/curation/remove_excess_spikes.py
@@ -85,7 +85,7 @@ def get_unit_spike_train(
         return spike_train[min_spike:max_spike]
 
 
-def remove_excess_spikes(sorting, recording):
+def remove_excess_spikes(sorting: BaseSorting, recording):
     """
     Remove excess spikes from the spike trains.
     Excess spikes are the ones exceeding a recording number of samples, for each segment.
diff --git a/src/spikeinterface/extractors/neoextractors/neo_utils.py b/src/spikeinterface/extractors/neoextractors/neo_utils.py
index fa7916e774..ec6aae06c9 100644
--- a/src/spikeinterface/extractors/neoextractors/neo_utils.py
+++ b/src/spikeinterface/extractors/neoextractors/neo_utils.py
@@ -28,7 +28,7 @@ def get_neo_streams(extractor_name, *args, **kwargs):
     return neo_extractor.get_streams(*args, **kwargs)
 
 
-def get_neo_num_blocks(extractor_name, *args, **kwargs):
+def get_neo_num_blocks(extractor_name, *args, **kwargs) -> int:
     """Returns the number of NEO blocks.
     For multi-block datasets, the `block_index` argument can be used to select
     which bloack to read with the `read_**extractor_name**()` function.
diff --git a/src/spikeinterface/postprocessing/correlograms.py b/src/spikeinterface/postprocessing/correlograms.py
index 3c65f2075c..6fef655fa7 100644
--- a/src/spikeinterface/postprocessing/correlograms.py
+++ b/src/spikeinterface/postprocessing/correlograms.py
@@ -137,7 +137,7 @@ def compute_correlograms(
 compute_correlograms.__doc__ = compute_correlograms_sorting_analyzer.__doc__
 
 
-def _make_bins(sorting, window_ms, bin_ms):
+def _make_bins(sorting, window_ms, bin_ms) -> tuple[np.ndarray, int, int]:
     """
     Create the bins for the correlogram, in samples.
 
diff --git a/src/spikeinterface/postprocessing/template_metrics.py b/src/spikeinterface/postprocessing/template_metrics.py
index eef2a2f32c..4e75f48276 100644
--- a/src/spikeinterface/postprocessing/template_metrics.py
+++ b/src/spikeinterface/postprocessing/template_metrics.py
@@ -337,7 +337,7 @@ def get_trough_and_peak_idx(template):
 
 #########################################################################################
 # Single-channel metrics
-def get_peak_to_valley(template_single, sampling_frequency, trough_idx=None, peak_idx=None, **kwargs):
+def get_peak_to_valley(template_single, sampling_frequency, trough_idx=None, peak_idx=None, **kwargs) -> float:
     """
     Return the peak to valley duration in seconds of input waveforms.
 
@@ -363,7 +363,7 @@ def get_peak_to_valley(template_single, sampling_frequency, trough_idx=None, pea
     return ptv
 
 
-def get_peak_trough_ratio(template_single, sampling_frequency=None, trough_idx=None, peak_idx=None, **kwargs):
+def get_peak_trough_ratio(template_single, sampling_frequency=None, trough_idx=None, peak_idx=None, **kwargs) -> float:
     """
     Return the peak to trough ratio of input waveforms.
 
@@ -389,7 +389,7 @@ def get_peak_trough_ratio(template_single, sampling_frequency=None, trough_idx=N
     return ptratio
 
 
-def get_half_width(template_single, sampling_frequency, trough_idx=None, peak_idx=None, **kwargs):
+def get_half_width(template_single, sampling_frequency, trough_idx=None, peak_idx=None, **kwargs) -> float:
     """
     Return the half width of input waveforms in seconds.
 
@@ -889,7 +889,7 @@ def exp_decay(x, decay, amp0, offset):
     return exp_decay_value
 
 
-def get_spread(template, channel_locations, sampling_frequency, **kwargs):
+def get_spread(template, channel_locations, sampling_frequency, **kwargs) -> float:
     """
     Compute the spread of the template amplitude over distance in units um/s.
 
diff --git a/src/spikeinterface/qualitymetrics/misc_metrics.py b/src/spikeinterface/qualitymetrics/misc_metrics.py
index 7465d58737..544765382b 100644
--- a/src/spikeinterface/qualitymetrics/misc_metrics.py
+++ b/src/spikeinterface/qualitymetrics/misc_metrics.py
@@ -117,7 +117,7 @@ def compute_presence_ratios(sorting_analyzer, bin_duration_s=60.0, mean_fr_ratio
 
     Returns
     -------
-    presence_ratio : dict of flaots
+    presence_ratio : dict of floats
         The presence ratio for each unit ID.
 
     Notes
@@ -514,7 +514,7 @@ def compute_sliding_rp_violations(
 )
 
 
-def get_synchrony_counts(spikes, synchrony_sizes, all_unit_ids):
+def get_synchrony_count(spikes, synchrony_sizes, all_unit_ids):
     """
     Compute synchrony counts, the number of simultaneous spikes with sizes `synchrony_sizes`.
 
diff --git a/src/spikeinterface/qualitymetrics/pca_metrics.py b/src/spikeinterface/qualitymetrics/pca_metrics.py
index 1c5a491bf8..7c099a2f74 100644
--- a/src/spikeinterface/qualitymetrics/pca_metrics.py
+++ b/src/spikeinterface/qualitymetrics/pca_metrics.py
@@ -56,7 +56,7 @@ def compute_pc_metrics(
     seed=None,
     n_jobs=1,
     progress_bar=False,
-):
+) -> dict:
     """
     Calculate principal component derived metrics.
 
@@ -295,7 +295,7 @@ def mahalanobis_metrics(all_pcs, all_labels, this_unit_id):
     return isolation_distance, l_ratio
 
 
-def lda_metrics(all_pcs, all_labels, this_unit_id):
+def lda_metrics(all_pcs, all_labels, this_unit_id) -> float:
     """
     Calculate d-prime based on Linear Discriminant Analysis.
 
diff --git a/src/spikeinterface/sorters/external/kilosort.py b/src/spikeinterface/sorters/external/kilosort.py
index 102703f912..ad20408a06 100644
--- a/src/spikeinterface/sorters/external/kilosort.py
+++ b/src/spikeinterface/sorters/external/kilosort.py
@@ -131,7 +131,7 @@ def _check_params(cls, recording, output_folder, params):
         return p
 
     @classmethod
-    def _get_specific_options(cls, ops, params):
+    def _get_specific_options(cls, ops, params) -> dict:
         """
         Adds specific options for Kilosort in the ops dict and returns the final dict
 
diff --git a/src/spikeinterface/sorters/external/kilosort2.py b/src/spikeinterface/sorters/external/kilosort2.py
index cedcfe2a5e..643769b6f9 100644
--- a/src/spikeinterface/sorters/external/kilosort2.py
+++ b/src/spikeinterface/sorters/external/kilosort2.py
@@ -142,7 +142,7 @@ def _check_params(cls, recording, output_folder, params):
         return p
 
     @classmethod
-    def _get_specific_options(cls, ops, params):
+    def _get_specific_options(cls, ops, params) -> dict:
         """
         Adds specific options for Kilosort2 in the ops dict and returns the final dict
 
diff --git a/src/spikeinterface/sorters/external/kilosort2_5.py b/src/spikeinterface/sorters/external/kilosort2_5.py
index ea93ffde0d..df8f4e6873 100644
--- a/src/spikeinterface/sorters/external/kilosort2_5.py
+++ b/src/spikeinterface/sorters/external/kilosort2_5.py
@@ -158,7 +158,7 @@ def _check_params(cls, recording, output_folder, params):
         return p
 
     @classmethod
-    def _get_specific_options(cls, ops, params):
+    def _get_specific_options(cls, ops, params) -> dict:
         """
         Adds specific options for Kilosort2_5 in the ops dict and returns the final dict
 
diff --git a/src/spikeinterface/sorters/external/kilosort3.py b/src/spikeinterface/sorters/external/kilosort3.py
index 4066948e2e..3681b036a2 100644
--- a/src/spikeinterface/sorters/external/kilosort3.py
+++ b/src/spikeinterface/sorters/external/kilosort3.py
@@ -154,7 +154,7 @@ def _check_params(cls, recording, output_folder, params):
         return p
 
     @classmethod
-    def _get_specific_options(cls, ops, params):
+    def _get_specific_options(cls, ops, params) -> dict:
         """
         Adds specific options for Kilosort3 in the ops dict and returns the final dict
 
diff --git a/src/spikeinterface/sorters/sorterlist.py b/src/spikeinterface/sorters/sorterlist.py
index 105a536617..e1a6816133 100644
--- a/src/spikeinterface/sorters/sorterlist.py
+++ b/src/spikeinterface/sorters/sorterlist.py
@@ -76,7 +76,7 @@ def print_sorter_versions():
     print(txt)
 
 
-def get_default_sorter_params(sorter_name_or_class):
+def get_default_sorter_params(sorter_name_or_class) -> dict:
     """Returns default parameters for the specified sorter.
 
     Parameters
@@ -100,7 +100,7 @@ def get_default_sorter_params(sorter_name_or_class):
     return SorterClass.default_params()
 
 
-def get_sorter_params_description(sorter_name_or_class):
+def get_sorter_params_description(sorter_name_or_class) -> dict:
     """Returns a description of the parameters for the specified sorter.
 
     Parameters
@@ -124,7 +124,7 @@ def get_sorter_params_description(sorter_name_or_class):
     return SorterClass.params_description()
 
 
-def get_sorter_description(sorter_name_or_class):
+def get_sorter_description(sorter_name_or_class) -> dict:
     """Returns a brief description for the specified sorter.
 
     Parameters
diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index 183fdab04c..ad7391a297 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -20,7 +20,9 @@
 from .main import BaseTemplateMatchingEngine
 
 
-def compress_templates(templates_array, approx_rank, remove_mean=True, return_new_templates=True):
+def compress_templates(
+    templates_array, approx_rank, remove_mean=True, return_new_templates=True
+) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray | None]:
     """Compress templates using singular value decomposition.
 
     Parameters
diff --git a/src/spikeinterface/sortingcomponents/matching/main.py b/src/spikeinterface/sortingcomponents/matching/main.py
index 9476a0df03..7301307ba5 100644
--- a/src/spikeinterface/sortingcomponents/matching/main.py
+++ b/src/spikeinterface/sortingcomponents/matching/main.py
@@ -9,7 +9,7 @@
 
 def find_spikes_from_templates(
     recording, method="naive", method_kwargs={}, extra_outputs=False, verbose=False, **job_kwargs
-):
+) -> np.ndarray | tuple[np.ndarray, dict]:
     """Find spike from a recording from given templates.
 
     Parameters
diff --git a/src/spikeinterface/sortingcomponents/motion/dredge.py b/src/spikeinterface/sortingcomponents/motion/dredge.py
index ded4c257ab..40b229c588 100644
--- a/src/spikeinterface/sortingcomponents/motion/dredge.py
+++ b/src/spikeinterface/sortingcomponents/motion/dredge.py
@@ -1141,7 +1141,7 @@ def normxcorr1d(
     normalized=True,
     padding="same",
     conv_engine="torch",
-):
+) -> "torch.Tensor" | np.ndarray:
     """
     normxcorr1d: Normalized cross-correlation, optionally weighted
 
diff --git a/src/spikeinterface/sortingcomponents/motion/motion_estimation.py b/src/spikeinterface/sortingcomponents/motion/motion_estimation.py
index 0d425c98da..bed4a8ca6f 100644
--- a/src/spikeinterface/sortingcomponents/motion/motion_estimation.py
+++ b/src/spikeinterface/sortingcomponents/motion/motion_estimation.py
@@ -30,7 +30,7 @@ def estimate_motion(
     verbose=False,
     margin_um=None,
     **method_kwargs,
-):
+) -> Motion | tuple[Motion, dict]:
     """
 
 
diff --git a/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py b/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py
index 11ce11e1aa..19b1d9ad25 100644
--- a/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py
+++ b/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py
@@ -7,7 +7,7 @@
 from spikeinterface.preprocessing.filter import fix_dtype
 
 
-def correct_motion_on_peaks(peaks, peak_locations, motion, recording):
+def correct_motion_on_peaks(peaks, peak_locations, motion, recording) -> np.ndarray:
     """
     Given the output of estimate_motion(), apply inverse motion on peak locations.
 

From 76186e715dae3bcd04c02e6d0bac5a7897898c03 Mon Sep 17 00:00:00 2001
From: Christian Tabedzki <35670232+tabedzki@users.noreply.github.com>
Date: Fri, 16 Aug 2024 14:51:50 -0400
Subject: [PATCH 102/187] Updated comparisontools.py based off Zach's review

---
 src/spikeinterface/comparison/comparisontools.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/spikeinterface/comparison/comparisontools.py b/src/spikeinterface/comparison/comparisontools.py
index d27bea6c1a..0ab5789d1f 100644
--- a/src/spikeinterface/comparison/comparisontools.py
+++ b/src/spikeinterface/comparison/comparisontools.py
@@ -3,12 +3,13 @@
 """
 
 from __future__ import annotations
+from typing import Iterable
 
 
 import numpy as np
 
 
-def count_matching_events(times1: list, times2: list, delta=10):
+def count_matching_events(times1: Iterable, times2: Iterable, delta: int = 10):
     """
     Counts matching events.
 

From ca985072f1d87990b4e86b4c1fdc18c59f3c7869 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Sun, 18 Aug 2024 09:52:22 +0200
Subject: [PATCH 103/187] Add dtype in load_waveforms and
 analyzer.is_filtered()

---
 src/spikeinterface/core/sortinganalyzer.py                    | 3 +++
 .../core/waveforms_extractor_backwards_compatibility.py       | 4 ++++
 src/spikeinterface/exporters/to_phy.py                        | 2 +-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/sortinganalyzer.py b/src/spikeinterface/core/sortinganalyzer.py
index ac142405ab..e427236e15 100644
--- a/src/spikeinterface/core/sortinganalyzer.py
+++ b/src/spikeinterface/core/sortinganalyzer.py
@@ -1011,6 +1011,9 @@ def has_temporary_recording(self) -> bool:
     def is_sparse(self) -> bool:
         return self.sparsity is not None
 
+    def is_filtered(self) -> bool:
+        return self.rec_attributes["filtered"]
+
     def get_sorting_provenance(self):
         """
         Get the original sorting if possible otherwise return None
diff --git a/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py b/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py
index da1f5a71f5..d9514c7fce 100644
--- a/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py
+++ b/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py
@@ -446,6 +446,10 @@ def _read_old_waveforms_extractor_binary(folder, sorting):
     else:
         rec_attributes["probegroup"] = None
 
+    if "dtype" not in rec_attributes:
+        warnings.warn("dtype not found in rec_attributes. Setting to float32")
+        rec_attributes["dtype"] = "float32"
+
     # recording
     recording = None
     if (folder / "recording.json").exists():
diff --git a/src/spikeinterface/exporters/to_phy.py b/src/spikeinterface/exporters/to_phy.py
index 7b3c7daab0..06041da231 100644
--- a/src/spikeinterface/exporters/to_phy.py
+++ b/src/spikeinterface/exporters/to_phy.py
@@ -167,7 +167,7 @@ def export_to_phy(
         f.write(f"dtype = '{dtype_str}'\n")
         f.write(f"offset = 0\n")
         f.write(f"sample_rate = {fs}\n")
-        f.write(f"hp_filtered = {sorting_analyzer.recording.is_filtered()}")
+        f.write(f"hp_filtered = {sorting_analyzer.is_filtered()}")
 
     # export spike_times/spike_templates/spike_clusters
     # here spike_labels is a remapping to unit_index

From df238228d2bbe2650b4e1cb8be2ce61dbe424578 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Sun, 18 Aug 2024 10:45:36 +0200
Subject: [PATCH 104/187] oups

---
 src/spikeinterface/core/sortinganalyzer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/sortinganalyzer.py b/src/spikeinterface/core/sortinganalyzer.py
index e427236e15..7a9510e72f 100644
--- a/src/spikeinterface/core/sortinganalyzer.py
+++ b/src/spikeinterface/core/sortinganalyzer.py
@@ -1012,7 +1012,7 @@ def is_sparse(self) -> bool:
         return self.sparsity is not None
 
     def is_filtered(self) -> bool:
-        return self.rec_attributes["filtered"]
+        return self.rec_attributes["is_filtered"]
 
     def get_sorting_provenance(self):
         """

From 2af85b3f27a5463f0ddbf306229e5f8df1298106 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Mon, 19 Aug 2024 17:51:07 +0200
Subject: [PATCH 105/187] Enable cloud-loading for analyzer Zarr

---
 src/spikeinterface/core/core_tools.py      | 17 +++++++++
 src/spikeinterface/core/sortinganalyzer.py | 41 ++++++++++++----------
 2 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/src/spikeinterface/core/core_tools.py b/src/spikeinterface/core/core_tools.py
index aad7613d01..b38222391c 100644
--- a/src/spikeinterface/core/core_tools.py
+++ b/src/spikeinterface/core/core_tools.py
@@ -684,3 +684,20 @@ def measure_memory_allocation(measure_in_process: bool = True) -> float:
         memory = mem_info.total - mem_info.available
 
     return memory
+
+
+def is_path_remote(path: str | Path) -> bool:
+    """
+    Returns True if the path is a remote path (e.g., s3:// or gcs://).
+
+    Parameters
+    ----------
+    path : str or Path
+        The path to check.
+
+    Returns
+    -------
+    bool
+        Whether the path is a remote path.
+    """
+    return "s3://" in str(path) or "gcs://" in str(path)
diff --git a/src/spikeinterface/core/sortinganalyzer.py b/src/spikeinterface/core/sortinganalyzer.py
index ac142405ab..eb6233bf86 100644
--- a/src/spikeinterface/core/sortinganalyzer.py
+++ b/src/spikeinterface/core/sortinganalyzer.py
@@ -23,7 +23,7 @@
 
 from .base import load_extractor
 from .recording_tools import check_probe_do_not_overlap, get_rec_attributes, do_recording_attributes_match
-from .core_tools import check_json, retrieve_importing_provenance
+from .core_tools import check_json, retrieve_importing_provenance, is_path_remote
 from .sorting_tools import generate_unit_ids_for_merge_group, _get_ids_after_merging
 from .job_tools import split_job_kwargs
 from .numpyextractors import NumpySorting
@@ -195,6 +195,7 @@ def __init__(
         format=None,
         sparsity=None,
         return_scaled=True,
+        storage_options=None,
     ):
         # very fast init because checks are done in load and create
         self.sorting = sorting
@@ -204,6 +205,7 @@ def __init__(
         self.format = format
         self.sparsity = sparsity
         self.return_scaled = return_scaled
+        self.storage_options = storage_options
         # this is used to store temporary recording
         self._temporary_recording = None
 
@@ -276,17 +278,15 @@ def create(
         return sorting_analyzer
 
     @classmethod
-    def load(cls, folder, recording=None, load_extensions=True, format="auto"):
+    def load(cls, folder, recording=None, load_extensions=True, format="auto", storage_options=None):
         """
         Load folder or zarr.
         The recording can be given if the recording location has changed.
         Otherwise the recording is loaded when possible.
         """
-        folder = Path(folder)
-        assert folder.is_dir(), "Waveform folder does not exists"
         if format == "auto":
             # make better assumption and check for auto guess format
-            if folder.suffix == ".zarr":
+            if Path(folder).suffix == ".zarr":
                 format = "zarr"
             else:
                 format = "binary_folder"
@@ -294,12 +294,18 @@ def load(cls, folder, recording=None, load_extensions=True, format="auto"):
         if format == "binary_folder":
             sorting_analyzer = SortingAnalyzer.load_from_binary_folder(folder, recording=recording)
         elif format == "zarr":
-            sorting_analyzer = SortingAnalyzer.load_from_zarr(folder, recording=recording)
+            sorting_analyzer = SortingAnalyzer.load_from_zarr(
+                folder, recording=recording, storage_options=storage_options
+            )
 
-        sorting_analyzer.folder = folder
+        if is_path_remote(str(folder)):
+            sorting_analyzer.folder = folder
+            # in this case we only load extensions when needed
+        else:
+            sorting_analyzer.folder = Path(folder)
 
-        if load_extensions:
-            sorting_analyzer.load_all_saved_extension()
+            if load_extensions:
+                sorting_analyzer.load_all_saved_extension()
 
         return sorting_analyzer
 
@@ -470,7 +476,9 @@ def load_from_binary_folder(cls, folder, recording=None):
     def _get_zarr_root(self, mode="r+"):
         import zarr
 
-        zarr_root = zarr.open(self.folder, mode=mode)
+        if is_path_remote(str(self.folder)):
+            mode = "r"
+        zarr_root = zarr.open(self.folder, mode=mode, storage_options=self.storage_options)
         return zarr_root
 
     @classmethod
@@ -552,25 +560,22 @@ def create_zarr(cls, folder, sorting, recording, sparsity, return_scaled, rec_at
         recording_info = zarr_root.create_group("extensions")
 
     @classmethod
-    def load_from_zarr(cls, folder, recording=None):
+    def load_from_zarr(cls, folder, recording=None, storage_options=None):
         import zarr
 
-        folder = Path(folder)
-        assert folder.is_dir(), f"This folder does not exist {folder}"
-
-        zarr_root = zarr.open(folder, mode="r")
+        zarr_root = zarr.open_consolidated(str(folder), mode="r", storage_options=storage_options)
 
         # load internal sorting in memory
-        # TODO propagate storage_options
         sorting = NumpySorting.from_sorting(
-            ZarrSortingExtractor(folder, zarr_group="sorting"), with_metadata=True, copy_spike_vector=True
+            ZarrSortingExtractor(folder, zarr_group="sorting", storage_options=storage_options),
+            with_metadata=True,
+            copy_spike_vector=True,
         )
 
         # load recording if possible
         if recording is None:
             rec_dict = zarr_root["recording"][0]
             try:
-
                 recording = load_extractor(rec_dict, base_folder=folder)
             except:
                 recording = None

From 33e27b1c621aeca2a99a32d3ddf44f4a5fadf022 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Tue, 20 Aug 2024 09:15:51 +0200
Subject: [PATCH 106/187] Update src/spikeinterface/core/sortinganalyzer.py

---
 src/spikeinterface/core/sortinganalyzer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/sortinganalyzer.py b/src/spikeinterface/core/sortinganalyzer.py
index eb6233bf86..45f1f881b4 100644
--- a/src/spikeinterface/core/sortinganalyzer.py
+++ b/src/spikeinterface/core/sortinganalyzer.py
@@ -563,7 +563,7 @@ def create_zarr(cls, folder, sorting, recording, sparsity, return_scaled, rec_at
     def load_from_zarr(cls, folder, recording=None, storage_options=None):
         import zarr
 
-        zarr_root = zarr.open_consolidated(str(folder), mode="r", storage_options=storage_options)
+        zarr_root = zarr.open(str(folder), mode="r", storage_options=storage_options)
 
         # load internal sorting in memory
         sorting = NumpySorting.from_sorting(

From bc1c704dadb5c771b60ca11847d3bc2169fb4086 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Tue, 20 Aug 2024 12:35:09 +0200
Subject: [PATCH 107/187] Improve do_recording_attributes_match impelmentation,
 errors, and tests

---
 src/spikeinterface/core/recording_tools.py    | 52 +++++++++++++++----
 src/spikeinterface/core/sortinganalyzer.py    | 19 +++++--
 .../core/tests/test_recording_tools.py        | 41 +++++++++++++++
 .../core/tests/test_sortinganalyzer.py        | 11 +++-
 ...forms_extractor_backwards_compatibility.py |  4 --
 5 files changed, 107 insertions(+), 20 deletions(-)

diff --git a/src/spikeinterface/core/recording_tools.py b/src/spikeinterface/core/recording_tools.py
index b4c07e77c9..cd2f563fba 100644
--- a/src/spikeinterface/core/recording_tools.py
+++ b/src/spikeinterface/core/recording_tools.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 from copy import deepcopy
-from typing import Literal
+from typing import Literal, Tuple
 import warnings
 from pathlib import Path
 import os
@@ -929,7 +929,9 @@ def get_rec_attributes(recording):
     return rec_attributes
 
 
-def do_recording_attributes_match(recording1, recording2_attributes) -> bool:
+def do_recording_attributes_match(
+    recording1: "BaseRecording", recording2_attributes: bool, check_is_filtered: bool = True, check_dtype: bool = True
+) -> Tuple[bool, str]:
     """
     Check if two recordings have the same attributes
 
@@ -939,22 +941,52 @@ def do_recording_attributes_match(recording1, recording2_attributes) -> bool:
         The first recording object
     recording2_attributes : dict
         The recording attributes to test against
+    check_is_filtered : bool, default: True
+        If True, check if the recordings are filtered
+    check_dtype : bool, default: True
+        If True, check if the recordings have the same dtype
 
     Returns
     -------
     bool
         True if the recordings have the same attributes
+    str
+        A string with the an exception message with attributes that do not match
     """
     recording1_attributes = get_rec_attributes(recording1)
     recording2_attributes = deepcopy(recording2_attributes)
     recording1_attributes.pop("properties")
     recording2_attributes.pop("properties")
 
-    return (
-        np.array_equal(recording1_attributes["channel_ids"], recording2_attributes["channel_ids"])
-        and recording1_attributes["sampling_frequency"] == recording2_attributes["sampling_frequency"]
-        and recording1_attributes["num_channels"] == recording2_attributes["num_channels"]
-        and recording1_attributes["num_samples"] == recording2_attributes["num_samples"]
-        and recording1_attributes["is_filtered"] == recording2_attributes["is_filtered"]
-        and recording1_attributes["dtype"] == recording2_attributes["dtype"]
-    )
+    attributes_match = True
+    non_matching_attrs = []
+
+    if not np.array_equal(recording1_attributes["channel_ids"], recording2_attributes["channel_ids"]):
+        attributes_match = False
+        non_matching_attrs.append("channel_ids")
+    if not recording1_attributes["sampling_frequency"] == recording2_attributes["sampling_frequency"]:
+        attributes_match = False
+        non_matching_attrs.append("sampling_frequency")
+    if not recording1_attributes["num_channels"] == recording2_attributes["num_channels"]:
+        attributes_match = False
+        non_matching_attrs.append("num_channels")
+    if not recording1_attributes["num_samples"] == recording2_attributes["num_samples"]:
+        attributes_match = False
+        non_matching_attrs.append("num_samples")
+    if check_is_filtered:
+        if not recording1_attributes["is_filtered"] == recording2_attributes["is_filtered"]:
+            attributes_match = False
+            non_matching_attrs.append("is_filtered")
+    # dtype is optional
+    if "dtype" in recording1_attributes and "dtype" in recording2_attributes:
+        if check_dtype:
+            if not recording1_attributes["dtype"] == recording2_attributes["dtype"]:
+                attributes_match = False
+                non_matching_attrs.append("dtype")
+
+    if len(non_matching_attrs) > 0:
+        exception_str = f"Recordings do not match in the following attributes: {non_matching_attrs}"
+    else:
+        exception_str = ""
+
+    return attributes_match, exception_str
diff --git a/src/spikeinterface/core/sortinganalyzer.py b/src/spikeinterface/core/sortinganalyzer.py
index 7a9510e72f..d034dcb46a 100644
--- a/src/spikeinterface/core/sortinganalyzer.py
+++ b/src/spikeinterface/core/sortinganalyzer.py
@@ -608,7 +608,9 @@ def load_from_zarr(cls, folder, recording=None):
 
         return sorting_analyzer
 
-    def set_temporary_recording(self, recording: BaseRecording):
+    def set_temporary_recording(
+        self, recording: BaseRecording, check_is_filtered: bool = True, check_dtype: bool = True
+    ):
         """
         Sets a temporary recording object. This function can be useful to temporarily set
         a "cached" recording object that is not saved in the SortingAnalyzer object to speed up
@@ -620,12 +622,19 @@ def set_temporary_recording(self, recording: BaseRecording):
         ----------
         recording : BaseRecording
             The recording object to set as temporary recording.
+        check_is_filtered : bool, default: True
+            If True, check that the temporary recording is filtered in the same way as the original recording.
+        check_dtype : bool, default: True
+            If True, check that the dtype of the temporary recording is the same as the original recording.
         """
         # check that recording is compatible
-        assert do_recording_attributes_match(recording, self.rec_attributes), "Recording attributes do not match."
-        assert np.array_equal(
-            recording.get_channel_locations(), self.get_channel_locations()
-        ), "Recording channel locations do not match."
+        attributes_match, exception_str = do_recording_attributes_match(
+            recording, self.rec_attributes, check_is_filtered=check_is_filtered, check_dtype=check_dtype
+        )
+        if not attributes_match:
+            raise ValueError(exception_str)
+        if not np.array_equal(recording.get_channel_locations(), self.get_channel_locations()):
+            raise ValueError("Recording channel locations do not match.")
         if self._recording is not None:
             warnings.warn("SortingAnalyzer recording is already set. The current recording is temporarily replaced.")
         self._temporary_recording = recording
diff --git a/src/spikeinterface/core/tests/test_recording_tools.py b/src/spikeinterface/core/tests/test_recording_tools.py
index d83e4d76fc..8a8fc3a358 100644
--- a/src/spikeinterface/core/tests/test_recording_tools.py
+++ b/src/spikeinterface/core/tests/test_recording_tools.py
@@ -17,6 +17,8 @@
     get_channel_distances,
     get_noise_levels,
     order_channels_by_depth,
+    do_recording_attributes_match,
+    get_rec_attributes,
 )
 
 
@@ -300,6 +302,45 @@ def test_order_channels_by_depth():
     assert np.array_equal(order_2d[::-1], order_2d_fliped)
 
 
+def test_do_recording_attributes_match():
+    recording = NoiseGeneratorRecording(
+        num_channels=2, durations=[10.325, 3.5], sampling_frequency=30_000, strategy="tile_pregenerated"
+    )
+    rec_attributes = get_rec_attributes(recording)
+    do_match, _ = do_recording_attributes_match(recording, rec_attributes)
+    assert do_match
+
+    rec_attributes = get_rec_attributes(recording)
+    rec_attributes["sampling_frequency"] = 1.0
+    do_match, exc = do_recording_attributes_match(recording, rec_attributes)
+    assert not do_match
+    assert "sampling_frequency" in exc
+
+    # check is_filtered options
+    rec_attributes = get_rec_attributes(recording)
+    rec_attributes["is_filtered"] = not rec_attributes["is_filtered"]
+
+    do_match, exc = do_recording_attributes_match(recording, rec_attributes)
+    assert not do_match
+    assert "is_filtered" in exc
+    do_match, exc = do_recording_attributes_match(recording, rec_attributes, check_is_filtered=False)
+    assert do_match
+
+    # check dtype options
+    rec_attributes = get_rec_attributes(recording)
+    rec_attributes["dtype"] = "int16"
+    do_match, exc = do_recording_attributes_match(recording, rec_attributes)
+    assert not do_match
+    assert "dtype" in exc
+    do_match, exc = do_recording_attributes_match(recording, rec_attributes, check_dtype=False)
+    assert do_match
+
+    # check missing dtype
+    rec_attributes.pop("dtype")
+    do_match, exc = do_recording_attributes_match(recording, rec_attributes)
+    assert do_match
+
+
 if __name__ == "__main__":
     # Create a temporary folder using the standard library
     import tempfile
diff --git a/src/spikeinterface/core/tests/test_sortinganalyzer.py b/src/spikeinterface/core/tests/test_sortinganalyzer.py
index d89eb7fac0..9de725239d 100644
--- a/src/spikeinterface/core/tests/test_sortinganalyzer.py
+++ b/src/spikeinterface/core/tests/test_sortinganalyzer.py
@@ -141,9 +141,18 @@ def test_SortingAnalyzer_tmp_recording(dataset):
     recording_sliced = recording.channel_slice(recording.channel_ids[:-1])
 
     # wrong channels
-    with pytest.raises(AssertionError):
+    with pytest.raises(ValueError):
         sorting_analyzer.set_temporary_recording(recording_sliced)
 
+    # test with different is_filtered
+    recording_filt = recording.clone()
+    recording_filt.annotate(is_filtered=False)
+    with pytest.raises(ValueError):
+        sorting_analyzer.set_temporary_recording(recording_filt)
+
+    # thest with additional check_is_filtered
+    sorting_analyzer.set_temporary_recording(recording_filt, check_is_filtered=False)
+
 
 def _check_sorting_analyzers(sorting_analyzer, original_sorting, cache_folder):
 
diff --git a/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py b/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py
index d9514c7fce..da1f5a71f5 100644
--- a/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py
+++ b/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py
@@ -446,10 +446,6 @@ def _read_old_waveforms_extractor_binary(folder, sorting):
     else:
         rec_attributes["probegroup"] = None
 
-    if "dtype" not in rec_attributes:
-        warnings.warn("dtype not found in rec_attributes. Setting to float32")
-        rec_attributes["dtype"] = "float32"
-
     # recording
     recording = None
     if (folder / "recording.json").exists():

From c1239397b9bec50acf06c8b3cbc11ee93861786f Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Tue, 20 Aug 2024 15:14:12 +0200
Subject: [PATCH 108/187] Lazy loading of zarr timestamps

---
 src/spikeinterface/core/baserecording.py  | 7 +++----
 src/spikeinterface/core/zarrextractors.py | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/spikeinterface/core/baserecording.py b/src/spikeinterface/core/baserecording.py
index e65afabaca..efa6d03f56 100644
--- a/src/spikeinterface/core/baserecording.py
+++ b/src/spikeinterface/core/baserecording.py
@@ -811,10 +811,9 @@ def __init__(self, sampling_frequency=None, t_start=None, time_vector=None):
 
     def get_times(self):
         if self.time_vector is not None:
-            if isinstance(self.time_vector, np.ndarray):
-                return self.time_vector
-            else:
-                return np.array(self.time_vector)
+            if not isinstance(self.time_vector, np.ndarray):
+                self.time_vector = np.array(self.time_vector)
+            return self.time_vector
         else:
             time_vector = np.arange(self.get_num_samples(), dtype="float64")
             time_vector /= self.sampling_frequency
diff --git a/src/spikeinterface/core/zarrextractors.py b/src/spikeinterface/core/zarrextractors.py
index 1b9637e097..17f1ac08b3 100644
--- a/src/spikeinterface/core/zarrextractors.py
+++ b/src/spikeinterface/core/zarrextractors.py
@@ -66,7 +66,7 @@ def __init__(self, folder_path: Path | str, storage_options: dict | None = None)
             time_kwargs = {}
             time_vector = self._root.get(f"times_seg{segment_index}", None)
             if time_vector is not None:
-                time_kwargs["time_vector"] = time_vector[:]
+                time_kwargs["time_vector"] = time_vector
             else:
                 if t_starts is None:
                     t_start = None

From 32568ca1a9637a7dc167dbf1a56e214dbe13cfb5 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Tue, 20 Aug 2024 15:46:49 +0100
Subject: [PATCH 109/187] Remove run CI on main, only run on cron job.

---
 .github/workflows/test_kilosort4.yml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.github/workflows/test_kilosort4.yml b/.github/workflows/test_kilosort4.yml
index 13d70acf88..24b2e29440 100644
--- a/.github/workflows/test_kilosort4.yml
+++ b/.github/workflows/test_kilosort4.yml
@@ -4,10 +4,6 @@ on:
   workflow_dispatch:
   schedule:
     - cron: "0 12 * * 0"  # Weekly on Sunday at noon UTC
-  pull_request:
-    types: [synchronize, opened, reopened]
-    branches:
-      - main
 
 jobs:
   versions:

From 8580c975e0d26db4006883da7ff2c36a58a5832a Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Tue, 20 Aug 2024 15:47:42 +0100
Subject: [PATCH 110/187] Update .github/scripts/test_kilosort4_ci.py

---
 .github/scripts/test_kilosort4_ci.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/scripts/test_kilosort4_ci.py b/.github/scripts/test_kilosort4_ci.py
index c894ed71ff..10855f2120 100644
--- a/.github/scripts/test_kilosort4_ci.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -336,6 +336,10 @@ def test_binary_filtered_arguments(self):
         )
 
     def _check_arguments(self, object_, expected_arguments):
+        """
+        Check that the argument signature of  `object_` is as expected 
+        (i..e has not changed across kilosort versions).
+        """
         sig = signature(object_)
         obj_arguments = list(sig.parameters.keys())
         assert expected_arguments == obj_arguments

From b3c6680f859d165bc6f4e11ea8d91cfd6c95eaf1 Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Tue, 20 Aug 2024 15:47:52 +0100
Subject: [PATCH 111/187] Update
 src/spikeinterface/sorters/external/kilosort4.py

---
 src/spikeinterface/sorters/external/kilosort4.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index eb1df7c455..3f7a0f7abe 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -128,7 +128,7 @@ def is_installed(cls):
 
     @classmethod
     def get_sorter_version(cls):
-        """kilosort version <4.0.10 is always '4'"""
+        """kilosort.__version__ <4.0.10 is always '4'"""
         return importlib_version("kilosort")
 
     @classmethod

From 23c39831a9cadba7ab50c88c53536723e93fba2f Mon Sep 17 00:00:00 2001
From: Joe Ziminski <55797454+JoeZiminski@users.noreply.github.com>
Date: Tue, 20 Aug 2024 15:48:10 +0100
Subject: [PATCH 112/187] Update .github/workflows/test_kilosort4.yml

---
 .github/workflows/test_kilosort4.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test_kilosort4.yml b/.github/workflows/test_kilosort4.yml
index 24b2e29440..95fc30b0b2 100644
--- a/.github/workflows/test_kilosort4.yml
+++ b/.github/workflows/test_kilosort4.yml
@@ -42,7 +42,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.12"]  # TODO: just checking python version is not cause of failing test.
+        python-version: ["3.12"] 
         os: [ubuntu-latest]
         ks_version: ${{ fromJson(needs.versions.outputs.matrix) }}
     steps:

From ed9ef3251504a8d2388a5c461e5c8531113ccb09 Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Tue, 20 Aug 2024 16:04:16 +0100
Subject: [PATCH 113/187] Fix linting.

---
 .github/scripts/test_kilosort4_ci.py | 2 +-
 .github/workflows/test_kilosort4.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/test_kilosort4_ci.py b/.github/scripts/test_kilosort4_ci.py
index 10855f2120..3ac8c7dd2b 100644
--- a/.github/scripts/test_kilosort4_ci.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -337,7 +337,7 @@ def test_binary_filtered_arguments(self):
 
     def _check_arguments(self, object_, expected_arguments):
         """
-        Check that the argument signature of  `object_` is as expected 
+        Check that the argument signature of  `object_` is as expected
         (i..e has not changed across kilosort versions).
         """
         sig = signature(object_)
diff --git a/.github/workflows/test_kilosort4.yml b/.github/workflows/test_kilosort4.yml
index 95fc30b0b2..390bec98be 100644
--- a/.github/workflows/test_kilosort4.yml
+++ b/.github/workflows/test_kilosort4.yml
@@ -42,7 +42,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.12"] 
+        python-version: ["3.12"]
         os: [ubuntu-latest]
         ks_version: ${{ fromJson(needs.versions.outputs.matrix) }}
     steps:

From 1f6e34c5765912ba2d9ca4b1e063274d269a3c43 Mon Sep 17 00:00:00 2001
From: Robin Kim <rk48@rice.edu>
Date: Tue, 20 Aug 2024 11:36:35 -0500
Subject: [PATCH 114/187] Fix DeprecationWarnings by ensuring scalar extraction
 from distance calculations

---
 src/spikeinterface/postprocessing/template_similarity.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/spikeinterface/postprocessing/template_similarity.py b/src/spikeinterface/postprocessing/template_similarity.py
index cb4cc323ad..2c017b8d00 100644
--- a/src/spikeinterface/postprocessing/template_similarity.py
+++ b/src/spikeinterface/postprocessing/template_similarity.py
@@ -153,7 +153,6 @@ def _get_data(self):
 def compute_similarity_with_templates_array(
     templates_array, other_templates_array, method, support="union", num_shifts=0, sparsity=None, other_sparsity=None
 ):
-
     import sklearn.metrics.pairwise
 
     if method == "cosine_similarity":
@@ -226,15 +225,17 @@ def compute_similarity_with_templates_array(
                 if method == "l1":
                     norm_i = np.sum(np.abs(src))
                     norm_j = np.sum(np.abs(tgt))
-                    distances[count, i, j] = sklearn.metrics.pairwise.pairwise_distances(src, tgt, metric="l1")
+                    distances[count, i, j] = sklearn.metrics.pairwise.pairwise_distances(src, tgt, metric="l1").item()
                     distances[count, i, j] /= norm_i + norm_j
                 elif method == "l2":
                     norm_i = np.linalg.norm(src, ord=2)
                     norm_j = np.linalg.norm(tgt, ord=2)
-                    distances[count, i, j] = sklearn.metrics.pairwise.pairwise_distances(src, tgt, metric="l2")
+                    distances[count, i, j] = sklearn.metrics.pairwise.pairwise_distances(src, tgt, metric="l2").item()
                     distances[count, i, j] /= norm_i + norm_j
                 else:
-                    distances[count, i, j] = sklearn.metrics.pairwise.pairwise_distances(src, tgt, metric="cosine")
+                    distances[count, i, j] = sklearn.metrics.pairwise.pairwise_distances(
+                        src, tgt, metric="cosine"
+                    ).item()
 
                 if same_array:
                     distances[count, j, i] = distances[count, i, j]

From 522260cdda14ffab5a49675bf63b9bc8c44cbec5 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Tue, 20 Aug 2024 18:51:20 +0200
Subject: [PATCH 115/187] asarray and annotations

---
 src/spikeinterface/core/baserecording.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/spikeinterface/core/baserecording.py b/src/spikeinterface/core/baserecording.py
index efa6d03f56..dbec8a3730 100644
--- a/src/spikeinterface/core/baserecording.py
+++ b/src/spikeinterface/core/baserecording.py
@@ -422,7 +422,7 @@ def get_time_info(self, segment_index=None) -> dict:
 
         return time_kwargs
 
-    def get_times(self, segment_index=None):
+    def get_times(self, segment_index=None) -> np.ndarray:
         """Get time vector for a recording segment.
 
         If the segment has a time_vector, then it is returned. Otherwise
@@ -809,10 +809,9 @@ def __init__(self, sampling_frequency=None, t_start=None, time_vector=None):
 
         BaseSegment.__init__(self)
 
-    def get_times(self):
+    def get_times(self) -> np.ndarray:
         if self.time_vector is not None:
-            if not isinstance(self.time_vector, np.ndarray):
-                self.time_vector = np.array(self.time_vector)
+            self.time_vector = np.asarray(self.time_vector)
             return self.time_vector
         else:
             time_vector = np.arange(self.get_num_samples(), dtype="float64")

From be0fd8afacea9508d38e65a9f89655a5e25bba57 Mon Sep 17 00:00:00 2001
From: JuanPimiento <148992347+JuanPimientoCaicedo@users.noreply.github.com>
Date: Tue, 20 Aug 2024 12:52:10 -0400
Subject: [PATCH 116/187] Add causal filtering to filter.py (#3172)

---
 doc/api.rst                                   |   1 +
 src/spikeinterface/preprocessing/filter.py    | 127 ++++++++++++++--
 .../preprocessing/preprocessinglist.py        |   1 +
 .../preprocessing/tests/test_filter.py        | 137 +++++++++++++++++-
 4 files changed, 254 insertions(+), 12 deletions(-)

diff --git a/doc/api.rst b/doc/api.rst
index 1966b48a37..42f9fec299 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -171,6 +171,7 @@ spikeinterface.preprocessing
     .. autofunction:: interpolate_bad_channels
     .. autofunction:: normalize_by_quantile
     .. autofunction:: notch_filter
+    .. autofunction:: causal_filter
     .. autofunction:: phase_shift
     .. autofunction:: rectify
     .. autofunction:: remove_artifacts
diff --git a/src/spikeinterface/preprocessing/filter.py b/src/spikeinterface/preprocessing/filter.py
index 54c5ab2b2d..a67d163d3d 100644
--- a/src/spikeinterface/preprocessing/filter.py
+++ b/src/spikeinterface/preprocessing/filter.py
@@ -24,10 +24,12 @@
 
 class FilterRecording(BasePreprocessor):
     """
-    Generic filter class based on:
-
-      * scipy.signal.iirfilter
-      * scipy.signal.filtfilt or scipy.signal.sosfilt
+    A generic filter class based on:
+        For filter coefficient generation:
+            * scipy.signal.iirfilter
+        For filter application:
+            * scipy.signal.filtfilt or scipy.signal.sosfiltfilt when direction = "forward-backward"
+            * scipy.signal.lfilter or scipy.signal.sosfilt when direction = "forward" or "backward"
 
     BandpassFilterRecording is built on top of it.
 
@@ -56,6 +58,11 @@ class FilterRecording(BasePreprocessor):
         - numerator/denominator : ("ba")
     ftype : str, default: "butter"
         Filter type for `scipy.signal.iirfilter` e.g. "butter", "cheby1".
+    direction : "forward" | "backward" | "forward-backward", default: "forward-backward"
+        Direction of filtering:
+        - "forward" - filter is applied to the timeseries in one direction, creating phase shifts
+        - "backward" - the timeseries is reversed, the filter is applied and filtered timeseries reversed again. Creates phase shifts in the opposite direction to "forward"
+        - "forward-backward" - Applies the filter in the forward and backward direction, resulting in zero-phase filtering. Note this doubles the effective filter order.
 
     Returns
     -------
@@ -75,6 +82,7 @@ def __init__(
         add_reflect_padding=False,
         coeff=None,
         dtype=None,
+        direction="forward-backward",
     ):
         import scipy.signal
 
@@ -106,7 +114,13 @@ def __init__(
         for parent_segment in recording._recording_segments:
             self.add_recording_segment(
                 FilterRecordingSegment(
-                    parent_segment, filter_coeff, filter_mode, margin, dtype, add_reflect_padding=add_reflect_padding
+                    parent_segment,
+                    filter_coeff,
+                    filter_mode,
+                    margin,
+                    dtype,
+                    add_reflect_padding=add_reflect_padding,
+                    direction=direction,
                 )
             )
 
@@ -121,14 +135,25 @@ def __init__(
             margin_ms=margin_ms,
             add_reflect_padding=add_reflect_padding,
             dtype=dtype.str,
+            direction=direction,
         )
 
 
 class FilterRecordingSegment(BasePreprocessorSegment):
-    def __init__(self, parent_recording_segment, coeff, filter_mode, margin, dtype, add_reflect_padding=False):
+    def __init__(
+        self,
+        parent_recording_segment,
+        coeff,
+        filter_mode,
+        margin,
+        dtype,
+        add_reflect_padding=False,
+        direction="forward-backward",
+    ):
         BasePreprocessorSegment.__init__(self, parent_recording_segment)
         self.coeff = coeff
         self.filter_mode = filter_mode
+        self.direction = direction
         self.margin = margin
         self.add_reflect_padding = add_reflect_padding
         self.dtype = dtype
@@ -150,11 +175,24 @@ def get_traces(self, start_frame, end_frame, channel_indices):
 
         import scipy.signal
 
-        if self.filter_mode == "sos":
-            filtered_traces = scipy.signal.sosfiltfilt(self.coeff, traces_chunk, axis=0)
-        elif self.filter_mode == "ba":
-            b, a = self.coeff
-            filtered_traces = scipy.signal.filtfilt(b, a, traces_chunk, axis=0)
+        if self.direction == "forward-backward":
+            if self.filter_mode == "sos":
+                filtered_traces = scipy.signal.sosfiltfilt(self.coeff, traces_chunk, axis=0)
+            elif self.filter_mode == "ba":
+                b, a = self.coeff
+                filtered_traces = scipy.signal.filtfilt(b, a, traces_chunk, axis=0)
+        else:
+            if self.direction == "backward":
+                traces_chunk = np.flip(traces_chunk, axis=0)
+
+            if self.filter_mode == "sos":
+                filtered_traces = scipy.signal.sosfilt(self.coeff, traces_chunk, axis=0)
+            elif self.filter_mode == "ba":
+                b, a = self.coeff
+                filtered_traces = scipy.signal.lfilter(b, a, traces_chunk, axis=0)
+
+            if self.direction == "backward":
+                filtered_traces = np.flip(filtered_traces, axis=0)
 
         if right_margin > 0:
             filtered_traces = filtered_traces[left_margin:-right_margin, :]
@@ -289,6 +327,73 @@ def __init__(self, recording, freq=3000, q=30, margin_ms=5.0, dtype=None):
 notch_filter = define_function_from_class(source_class=NotchFilterRecording, name="notch_filter")
 highpass_filter = define_function_from_class(source_class=HighpassFilterRecording, name="highpass_filter")
 
+
+def causal_filter(
+    recording,
+    direction="forward",
+    band=[300.0, 6000.0],
+    btype="bandpass",
+    filter_order=5,
+    ftype="butter",
+    filter_mode="sos",
+    margin_ms=5.0,
+    add_reflect_padding=False,
+    coeff=None,
+    dtype=None,
+):
+    """
+    Generic causal filter built on top of the filter function.
+
+    Parameters
+    ----------
+    recording : Recording
+        The recording extractor to be re-referenced
+    direction : "forward" | "backward", default: "forward"
+        Direction of causal filter. The "backward" option flips the traces in time before applying the filter
+        and then flips them back.
+    band : float or list, default: [300.0, 6000.0]
+        If float, cutoff frequency in Hz for "highpass" filter type
+        If list. band (low, high) in Hz for "bandpass" filter type
+    btype : "bandpass" | "highpass", default: "bandpass"
+        Type of the filter
+    margin_ms : float, default: 5.0
+        Margin in ms on border to avoid border effect
+    coeff : array | None, default: None
+        Filter coefficients in the filter_mode form.
+    dtype : dtype or None, default: None
+        The dtype of the returned traces. If None, the dtype of the parent recording is used
+    add_reflect_padding : Bool, default False
+        If True, uses a left and right margin during calculation.
+    filter_order : order
+        The order of the filter for `scipy.signal.iirfilter`
+    filter_mode :  "sos" | "ba", default: "sos"
+        Filter form of the filter coefficients for `scipy.signal.iirfilter`:
+        - second-order sections ("sos")
+        - numerator/denominator : ("ba")
+    ftype : str, default: "butter"
+        Filter type for `scipy.signal.iirfilter` e.g. "butter", "cheby1".
+
+    Returns
+    -------
+    filter_recording : FilterRecording
+        The causal-filtered recording extractor object
+    """
+    assert direction in ["forward", "backward"], "Direction must be either 'forward' or 'backward'"
+    return filter(
+        recording=recording,
+        direction=direction,
+        band=band,
+        btype=btype,
+        filter_order=filter_order,
+        ftype=ftype,
+        filter_mode=filter_mode,
+        margin_ms=margin_ms,
+        add_reflect_padding=add_reflect_padding,
+        coeff=coeff,
+        dtype=dtype,
+    )
+
+
 bandpass_filter.__doc__ = bandpass_filter.__doc__.format(_common_filter_docs)
 highpass_filter.__doc__ = highpass_filter.__doc__.format(_common_filter_docs)
 
diff --git a/src/spikeinterface/preprocessing/preprocessinglist.py b/src/spikeinterface/preprocessing/preprocessinglist.py
index 149c6eb458..bdf5f2219c 100644
--- a/src/spikeinterface/preprocessing/preprocessinglist.py
+++ b/src/spikeinterface/preprocessing/preprocessinglist.py
@@ -12,6 +12,7 @@
     notch_filter,
     HighpassFilterRecording,
     highpass_filter,
+    causal_filter,
 )
 from .filter_gaussian import GaussianFilterRecording, gaussian_filter
 from .normalize_scale import (
diff --git a/src/spikeinterface/preprocessing/tests/test_filter.py b/src/spikeinterface/preprocessing/tests/test_filter.py
index 68790b3273..9df60af3db 100644
--- a/src/spikeinterface/preprocessing/tests/test_filter.py
+++ b/src/spikeinterface/preprocessing/tests/test_filter.py
@@ -4,7 +4,140 @@
 from spikeinterface.core import generate_recording
 from spikeinterface import NumpyRecording, set_global_tmp_folder
 
-from spikeinterface.preprocessing import filter, bandpass_filter, notch_filter
+from spikeinterface.preprocessing import filter, bandpass_filter, notch_filter, causal_filter
+
+
+class TestCausalFilter:
+    """
+    The only thing that is not tested (JZ, as of 23/07/2024) is the
+    propagation of margin kwargs, these are general filter params
+    and can be tested in an upcoming PR.
+    """
+
+    @pytest.fixture(scope="session")
+    def recording_and_data(self):
+        recording = generate_recording(durations=[1])
+        raw_data = recording.get_traces()
+
+        return (recording, raw_data)
+
+    def test_causal_filter_main_kwargs(self, recording_and_data):
+        """
+        Perform a test that expected output is returned under change
+        of all key filter-related kwargs. First run the filter in
+        the forward direction with options and compare it
+        to the expected output from scipy.
+
+        Next, change every filter-related kwarg and set in the backwards
+        direction. Again check it matches expected scipy output.
+        """
+        from scipy.signal import lfilter, sosfilt
+
+        recording, raw_data = recording_and_data
+
+        # First, check in the forward direction with
+        # the default set of kwargs
+        options = self._get_filter_options()
+
+        sos = self._run_iirfilter(options, recording)
+
+        test_data = sosfilt(sos, raw_data, axis=0)
+        test_data.astype(recording.dtype)
+
+        filt_data = causal_filter(recording, direction="forward", **options, margin_ms=0).get_traces()
+
+        assert np.allclose(test_data, filt_data, rtol=0, atol=1e-6)
+
+        # Then, change all kwargs to ensure they are propagated
+        # and check the backwards version.
+        options["band"] = [671]
+        options["btype"] = "highpass"
+        options["filter_order"] = 8
+        options["ftype"] = "bessel"
+        options["filter_mode"] = "ba"
+        options["dtype"] = np.float16
+
+        b, a = self._run_iirfilter(options, recording)
+
+        flip_raw = np.flip(raw_data, axis=0)
+        test_data = lfilter(b, a, flip_raw, axis=0)
+        test_data = np.flip(test_data, axis=0)
+        test_data = test_data.astype(options["dtype"])
+
+        filt_data = causal_filter(recording, direction="backward", **options, margin_ms=0).get_traces()
+
+        assert np.allclose(test_data, filt_data, rtol=0, atol=1e-6)
+
+    def test_causal_filter_custom_coeff(self, recording_and_data):
+        """
+        A different path is taken when custom coeff is selected.
+        Therefore, explicitly test the expected outputs are obtained
+        when passing custom coeff, under the "ba" and "sos" conditions.
+        """
+        from scipy.signal import lfilter, sosfilt
+
+        recording, raw_data = recording_and_data
+
+        options = self._get_filter_options()
+        options["filter_mode"] = "ba"
+        options["coeff"] = (np.array([0.1, 0.2, 0.3]), np.array([0.4, 0.5, 0.6]))
+
+        # Check the custom coeff are propagated in both modes.
+        # First, in "ba" mode
+        test_data = lfilter(options["coeff"][0], options["coeff"][1], raw_data, axis=0)
+        test_data = test_data.astype(recording.get_dtype())
+
+        filt_data = causal_filter(recording, direction="forward", **options, margin_ms=0).get_traces()
+
+        assert np.allclose(test_data, filt_data, rtol=0, atol=1e-6, equal_nan=True)
+
+        # Next, in "sos" mode
+        options["filter_mode"] = "sos"
+        options["coeff"] = np.ones((2, 6))
+
+        test_data = sosfilt(options["coeff"], raw_data, axis=0)
+        test_data = test_data.astype(recording.get_dtype())
+
+        filt_data = causal_filter(recording, direction="forward", **options, margin_ms=0).get_traces()
+
+        assert np.allclose(test_data, filt_data, rtol=0, atol=1e-6, equal_nan=True)
+
+    def test_causal_kwarg_error_raised(self, recording_and_data):
+        """
+        Test that passing the "forward-backward" direction results in
+        an error. It is is critical this error is raised,
+        otherwise the filter will no longer be causal.
+        """
+        recording, raw_data = recording_and_data
+
+        with pytest.raises(BaseException) as e:
+            filt_data = causal_filter(recording, direction="forward-backward")
+
+    def _run_iirfilter(self, options, recording):
+        """
+        Convenience function to convert Si kwarg
+        names to Scipy.
+        """
+        from scipy.signal import iirfilter
+
+        return iirfilter(
+            N=options["filter_order"],
+            Wn=options["band"],
+            btype=options["btype"],
+            ftype=options["ftype"],
+            output=options["filter_mode"],
+            fs=recording.get_sampling_frequency(),
+        )
+
+    def _get_filter_options(self):
+        return {
+            "band": [300.0, 6000.0],
+            "btype": "bandpass",
+            "filter_order": 5,
+            "ftype": "butter",
+            "filter_mode": "sos",
+            "coeff": None,
+        }
 
 
 def test_filter():
@@ -28,6 +161,8 @@ def test_filter():
     # other filtering types
     rec3 = filter(rec, band=500.0, btype="highpass", filter_mode="ba", filter_order=2)
     rec4 = notch_filter(rec, freq=3000, q=30, margin_ms=5.0)
+    rec5 = causal_filter(rec, direction="forward")
+    rec6 = causal_filter(rec, direction="backward")
 
     # filter from coefficients
     from scipy.signal import iirfilter

From ae44b4a908855b8495d1d9807fddc73d8452b86a Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Tue, 20 Aug 2024 21:20:27 +0100
Subject: [PATCH 117/187] Remove 'save_preprocessed' test.

---
 .github/scripts/test_kilosort4_ci.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/.github/scripts/test_kilosort4_ci.py b/.github/scripts/test_kilosort4_ci.py
index 3ac8c7dd2b..e0d1f2a504 100644
--- a/.github/scripts/test_kilosort4_ci.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -85,14 +85,6 @@
     ("duplicate_spike_bins", 5),
 ]
 
-# Update PARAMS_TO_TEST with version-dependent kwargs
-if parse(version("kilosort")) >= parse("4.0.12"):
-    pass  # TODO: expose?
-#    PARAMS_TO_TEST.extend(
-#         [
-#             ("save_preprocessed_copy", False),
-#         ]
-#    )
 if parse(version("kilosort")) >= parse("4.0.11"):
     PARAMS_TO_TEST.extend(
         [

From 642eea9b2c1242000dd847701eb89dc533def6be Mon Sep 17 00:00:00 2001
From: JoeZiminski <joseph.j.ziminski@gmail.com>
Date: Wed, 21 Aug 2024 12:55:59 +0100
Subject: [PATCH 118/187] Update KS4 versions to test on, add a warning for the
 next version.

---
 .github/scripts/check_kilosort4_releases.py   | 10 +++++++++-
 .github/scripts/kilosort4-latest-version.json |  1 +
 2 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 .github/scripts/kilosort4-latest-version.json

diff --git a/.github/scripts/check_kilosort4_releases.py b/.github/scripts/check_kilosort4_releases.py
index de11dc974b..92e7bf277f 100644
--- a/.github/scripts/check_kilosort4_releases.py
+++ b/.github/scripts/check_kilosort4_releases.py
@@ -4,6 +4,7 @@
 import requests
 import json
 from packaging.version import parse
+import spikeinterface
 
 def get_pypi_versions(package_name):
     """
@@ -15,7 +16,13 @@ def get_pypi_versions(package_name):
     response.raise_for_status()
     data = response.json()
     versions = list(sorted(data["releases"].keys()))
-    versions = [ver for ver in versions if parse(ver) >= parse("4.0.5")]
+
+    assert parse(spikeinterface.__version__) < parse("0.101.1"), (
+        "Kilosort 4.0.5-12 are supported in SpikeInterface < 0.101.1."
+        "At version 0.101.1, this should be updated to support newer"
+        "kilosort verrsions."
+    )
+    versions = [ver for ver in versions if parse("4.0.12") >= parse(ver) >= parse("4.0.5")]
     return versions
 
 
@@ -24,4 +31,5 @@ def get_pypi_versions(package_name):
     package_name = "kilosort"
     versions = get_pypi_versions(package_name)
     with open(Path(os.path.realpath(__file__)).parent / "kilosort4-latest-version.json", "w") as f:
+        print(versions)
         json.dump(versions, f)
diff --git a/.github/scripts/kilosort4-latest-version.json b/.github/scripts/kilosort4-latest-version.json
new file mode 100644
index 0000000000..03629ff842
--- /dev/null
+++ b/.github/scripts/kilosort4-latest-version.json
@@ -0,0 +1 @@
+["4.0.10", "4.0.11", "4.0.12", "4.0.5", "4.0.6", "4.0.7", "4.0.8", "4.0.9"]

From 3ffe6dfda36b00be3e67ba181b60db7a209363d8 Mon Sep 17 00:00:00 2001
From: Matthias H Hennig <mhhennig@gmail.com>
Date: Sun, 25 Aug 2024 01:41:14 +0100
Subject: [PATCH 119/187] fix: download apptainer images without docker client

---
 src/spikeinterface/sorters/container_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/sorters/container_tools.py b/src/spikeinterface/sorters/container_tools.py
index 6406919455..6b194c0702 100644
--- a/src/spikeinterface/sorters/container_tools.py
+++ b/src/spikeinterface/sorters/container_tools.py
@@ -99,7 +99,7 @@ def __init__(self, mode, container_image, volumes, py_user_base, extra_kwargs):
                 singularity_image = sif_file
             else:
 
-                docker_image = self._get_docker_image(container_image)
+                docker_image = Client.load('docker://'+container_image)
                 if docker_image and len(docker_image.tags) > 0:
                     tag = docker_image.tags[0]
                     print(f"Building singularity image from local docker image: {tag}")

From 4644dd14e4ff3dbd414a720dc9656c8b0d1faade Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 25 Aug 2024 00:43:05 +0000
Subject: [PATCH 120/187] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spikeinterface/sorters/container_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/sorters/container_tools.py b/src/spikeinterface/sorters/container_tools.py
index 6b194c0702..f9611586c9 100644
--- a/src/spikeinterface/sorters/container_tools.py
+++ b/src/spikeinterface/sorters/container_tools.py
@@ -99,7 +99,7 @@ def __init__(self, mode, container_image, volumes, py_user_base, extra_kwargs):
                 singularity_image = sif_file
             else:
 
-                docker_image = Client.load('docker://'+container_image)
+                docker_image = Client.load("docker://" + container_image)
                 if docker_image and len(docker_image.tags) > 0:
                     tag = docker_image.tags[0]
                     print(f"Building singularity image from local docker image: {tag}")

From 50643c195750f505038b535c37ba99e7b9d7031a Mon Sep 17 00:00:00 2001
From: Matthias H Hennig <mhhennig@gmail.com>
Date: Mon, 26 Aug 2024 09:57:50 +0100
Subject: [PATCH 121/187] Fix for ninor changes in latest Kilosort4 API

---
 src/spikeinterface/sorters/external/kilosort4.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 8499cef11f..8a81274d24 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -222,6 +222,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
             probe_name=probe_name,
             data_dir=data_dir,
             results_dir=results_dir,
+            bad_channels=None
         )
 
         if version.parse(cls.get_sorter_version()) >= version.parse("4.0.12"):
@@ -232,7 +233,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
                 do_CAR=do_CAR,
                 invert_sign=invert_sign,
                 device=device,
-                save_preprocesed_copy=save_preprocessed_copy,  # this kwarg is correct (typo)
+                save_preprocessed_copy=save_preprocessed_copy,  # this kwarg is correct (typo)
             )
         else:
             ops = initialize_ops(

From d1f546b7a493d6caa40fccfddb5fa0608d7a797d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 26 Aug 2024 08:58:56 +0000
Subject: [PATCH 122/187] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spikeinterface/sorters/external/kilosort4.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 8a81274d24..2ec6055d9b 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -222,7 +222,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
             probe_name=probe_name,
             data_dir=data_dir,
             results_dir=results_dir,
-            bad_channels=None
+            bad_channels=None,
         )
 
         if version.parse(cls.get_sorter_version()) >= version.parse("4.0.12"):

From d4ea5c58a662497af47ac64977dd7fdbaf20edeb Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 25 Aug 2024 00:43:05 +0000
Subject: [PATCH 123/187] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spikeinterface/sorters/container_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/sorters/container_tools.py b/src/spikeinterface/sorters/container_tools.py
index 6b194c0702..f9611586c9 100644
--- a/src/spikeinterface/sorters/container_tools.py
+++ b/src/spikeinterface/sorters/container_tools.py
@@ -99,7 +99,7 @@ def __init__(self, mode, container_image, volumes, py_user_base, extra_kwargs):
                 singularity_image = sif_file
             else:
 
-                docker_image = Client.load('docker://'+container_image)
+                docker_image = Client.load("docker://" + container_image)
                 if docker_image and len(docker_image.tags) > 0:
                     tag = docker_image.tags[0]
                     print(f"Building singularity image from local docker image: {tag}")

From 3c39b3c1035272b0e3a9a81e46292e4915eb670f Mon Sep 17 00:00:00 2001
From: Matthias H Hennig <mhhennig@gmail.com>
Date: Mon, 26 Aug 2024 10:11:05 +0100
Subject: [PATCH 124/187] Revert "Fix for ninor changes in latest Kilosort4
 API"

This reverts commit 50643c195750f505038b535c37ba99e7b9d7031a.
---
 src/spikeinterface/sorters/external/kilosort4.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 8a81274d24..8499cef11f 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -222,7 +222,6 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
             probe_name=probe_name,
             data_dir=data_dir,
             results_dir=results_dir,
-            bad_channels=None
         )
 
         if version.parse(cls.get_sorter_version()) >= version.parse("4.0.12"):
@@ -233,7 +232,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
                 do_CAR=do_CAR,
                 invert_sign=invert_sign,
                 device=device,
-                save_preprocessed_copy=save_preprocessed_copy,  # this kwarg is correct (typo)
+                save_preprocesed_copy=save_preprocessed_copy,  # this kwarg is correct (typo)
             )
         else:
             ops = initialize_ops(

From 0ee0c43a08711802a14323b1bc13046328b99540 Mon Sep 17 00:00:00 2001
From: chrishalcrow <57948917+chrishalcrow@users.noreply.github.com>
Date: Mon, 26 Aug 2024 17:01:56 +0100
Subject: [PATCH 125/187] Add bad channels and do version check

---
 .../sorters/external/kilosort4.py             | 64 +++++++++++--------
 1 file changed, 36 insertions(+), 28 deletions(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 8499cef11f..1a3ba59b54 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -59,6 +59,7 @@ class Kilosort4Sorter(BaseSorter):
         "scaleproc": None,
         "save_preprocessed_copy": False,
         "torch_device": "auto",
+        "bad_channels": None,
     }
 
     _params_description = {
@@ -101,6 +102,7 @@ class Kilosort4Sorter(BaseSorter):
         "scaleproc": "int16 scaling of whitened data, if None set to 200.",
         "save_preprocessed_copy": "save a pre-processed copy of the data (including drift correction) to temp_wh.dat in the results directory and format Phy output to use that copy of the data",
         "torch_device": "Select the torch device auto/cuda/cpu",
+        "bad_channels": "A list of channel indices (rows in the binary file) that should not be included in sorting. Listing channels here is equivalent to excluding them from the probe dictionary.",
     }
 
     sorter_description = """Kilosort4 is a Python package for spike sorting on GPUs with template matching.
@@ -110,7 +112,7 @@ class Kilosort4Sorter(BaseSorter):
     For more information see https://github.com/MouseLand/Kilosort"""
 
     installation_mesg = """\nTo use Kilosort4 run:\n
-        >>> pip install kilosort==4.0
+        >>> pip install kilosort --upgrade
 
     More information on Kilosort4 at:
         https://github.com/MouseLand/Kilosort
@@ -134,6 +136,25 @@ def get_sorter_version(cls):
         """kilosort.__version__ <4.0.10 is always '4'"""
         return importlib_version("kilosort")
 
+    @classmethod
+    def initialize_folder(cls, recording, output_folder, verbose, remove_existing_folder):
+        if not cls.is_installed():
+            raise Exception(
+                f"The sorter {cls.sorter_name} is not installed. Please install it with:\n{cls.installation_mesg}"
+            )
+        cls.check_sorter_version()
+        return super(Kilosort4Sorter, cls).initialize_folder(recording, output_folder, verbose, remove_existing_folder)
+
+    @classmethod
+    def check_sorter_version(cls):
+        kilosort_version = version.parse(cls.get_sorter_version())
+        if kilosort_version < version.parse("4.0.16"):
+            raise Exception(
+                f"""SpikeInterface only supports kilosort versions 4.0.16 and above. You are running version {kilosort_version}. To install the latest version, run:
+                        >>> pip install kilosort --upgrade
+                """
+            )
+
     @classmethod
     def _setup_recording(cls, recording, sorter_output_folder, params, verbose):
         from probeinterface import write_prb
@@ -214,6 +235,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         # NOTE: Also modifies settings in-place
         data_dir = ""
         results_dir = sorter_output_folder
+        bad_channels = params["bad_channels"]
 
         filename, data_dir, results_dir, probe = set_files(
             settings=settings,
@@ -222,36 +244,22 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
             probe_name=probe_name,
             data_dir=data_dir,
             results_dir=results_dir,
+            bad_channels=bad_channels,
         )
 
-        if version.parse(cls.get_sorter_version()) >= version.parse("4.0.12"):
-            ops = initialize_ops(
-                settings=settings,
-                probe=probe,
-                data_dtype=recording.get_dtype(),
-                do_CAR=do_CAR,
-                invert_sign=invert_sign,
-                device=device,
-                save_preprocesed_copy=save_preprocessed_copy,  # this kwarg is correct (typo)
-            )
-        else:
-            ops = initialize_ops(
-                settings=settings,
-                probe=probe,
-                data_dtype=recording.get_dtype(),
-                do_CAR=do_CAR,
-                invert_sign=invert_sign,
-                device=device,
-            )
+        ops = initialize_ops(
+            settings=settings,
+            probe=probe,
+            data_dtype=recording.get_dtype(),
+            do_CAR=do_CAR,
+            invert_sign=invert_sign,
+            device=device,
+            save_preprocessed_copy=save_preprocessed_copy,  # this kwarg is correct (typo)
+        )
 
-        if version.parse(cls.get_sorter_version()) >= version.parse("4.0.11"):
-            n_chan_bin, fs, NT, nt, twav_min, chan_map, dtype, do_CAR, invert, _, _, tmin, tmax, artifact, _, _ = (
-                get_run_parameters(ops)
-            )
-        else:
-            n_chan_bin, fs, NT, nt, twav_min, chan_map, dtype, do_CAR, invert, _, _, tmin, tmax, artifact = (
-                get_run_parameters(ops)
-            )
+        n_chan_bin, fs, NT, nt, twav_min, chan_map, dtype, do_CAR, invert, _, _, tmin, tmax, artifact, _, _ = (
+            get_run_parameters(ops)
+        )
 
         # Set preprocessing and drift correction parameters
         if not params["skip_kilosort_preprocessing"]:

From bc290ff48820cd6b50c433effd89edd86a569383 Mon Sep 17 00:00:00 2001
From: chrishalcrow <57948917+chrishalcrow@users.noreply.github.com>
Date: Tue, 27 Aug 2024 09:17:12 +0100
Subject: [PATCH 126/187] remove comment about preprocesed spelling

---
 src/spikeinterface/sorters/external/kilosort4.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 1a3ba59b54..7541b48201 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -254,7 +254,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
             do_CAR=do_CAR,
             invert_sign=invert_sign,
             device=device,
-            save_preprocessed_copy=save_preprocessed_copy,  # this kwarg is correct (typo)
+            save_preprocessed_copy=save_preprocessed_copy,
         )
 
         n_chan_bin, fs, NT, nt, twav_min, chan_map, dtype, do_CAR, invert, _, _, tmin, tmax, artifact, _, _ = (

From 0df714123f63a9b27a38f7a60f7af61287fceaba Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Tue, 27 Aug 2024 16:22:31 +0200
Subject: [PATCH 127/187] Add use_binary_file argument and logic to KS4

---
 .../sorters/external/kilosort4.py             | 39 ++++++++++++++++---
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 7541b48201..4b7d0dbe6e 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -4,8 +4,11 @@
 from typing import Union
 from packaging import version
 
-from ..basesorter import BaseSorter
+
+from ...core import write_binary_recording
+from ..basesorter import BaseSorter, get_job_kwargs
 from .kilosortbase import KilosortBase
+from ..basesorter import get_job_kwargs
 from importlib.metadata import version as importlib_version
 
 PathType = Union[str, Path]
@@ -17,6 +20,7 @@ class Kilosort4Sorter(BaseSorter):
     sorter_name: str = "kilosort4"
     requires_locations = True
     gpu_capability = "nvidia-optional"
+    requires_binary_data = False
 
     _default_params = {
         "batch_size": 60000,
@@ -60,6 +64,7 @@ class Kilosort4Sorter(BaseSorter):
         "save_preprocessed_copy": False,
         "torch_device": "auto",
         "bad_channels": None,
+        "use_binary_file": False,
     }
 
     _params_description = {
@@ -103,6 +108,8 @@ class Kilosort4Sorter(BaseSorter):
         "save_preprocessed_copy": "save a pre-processed copy of the data (including drift correction) to temp_wh.dat in the results directory and format Phy output to use that copy of the data",
         "torch_device": "Select the torch device auto/cuda/cpu",
         "bad_channels": "A list of channel indices (rows in the binary file) that should not be included in sorting. Listing channels here is equivalent to excluding them from the probe dictionary.",
+        "use_binary_file": "If True, the Kilosort is run from a binary file. In this case, if the recording is not binary it is written to a binary file in the output folder"
+        "If False, the Kilosort is run on the recording object directly using the RecordingExtractorAsArray object. Default is False.",
     }
 
     sorter_description = """Kilosort4 is a Python package for spike sorting on GPUs with template matching.
@@ -163,6 +170,16 @@ def _setup_recording(cls, recording, sorter_output_folder, params, verbose):
         probe_filename = sorter_output_folder / "probe.prb"
         write_prb(probe_filename, pg)
 
+        if params["use_binary_file"] and not recording.binary_compatible_with(time_axis=0, file_paths_lenght=1):
+            # local copy needed
+            binary_file_path = sorter_output_folder / "recording.dat"
+            write_binary_recording(
+                recording=recording,
+                file_paths=[binary_file_path],
+                **get_job_kwargs(params, verbose),
+            )
+            params["filename"] = str(binary_file_path)
+
     @classmethod
     def _run_from_folder(cls, sorter_output_folder, params, verbose):
         from kilosort.run_kilosort import (
@@ -207,10 +224,22 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         recording = cls.load_recording_from_folder(sorter_output_folder.parent, with_warnings=False)
         probe = load_probe(probe_path=probe_filename)
         probe_name = ""
-        filename = ""
 
-        # this internally concatenates the recording
-        file_object = RecordingExtractorAsArray(recording_extractor=recording)
+        if params["use_binary_file"]:
+            if recording.binary_compatible_with(time_axis=0, file_paths_lenght=1):
+                # no copy
+                binary_description = recording.get_binary_description()
+                filename = str(binary_description["file_paths"][0])
+                file_object = None
+            else:
+                # a local copy has been written
+                filename = str(sorter_output_folder / "recording.dat")
+                file_object = None
+        else:
+            # this internally concatenates the recording
+            filename = ""
+            file_object = RecordingExtractorAsArray(recording_extractor=recording)
+        data_dtype = recording.get_dtype()
 
         do_CAR = params["do_CAR"]
         invert_sign = params["invert_sign"]
@@ -250,7 +279,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         ops = initialize_ops(
             settings=settings,
             probe=probe,
-            data_dtype=recording.get_dtype(),
+            data_dtype=data_dtype,
             do_CAR=do_CAR,
             invert_sign=invert_sign,
             device=device,

From 8bb4d597bb76986a7ba83e28ba3f9bc5a739ee37 Mon Sep 17 00:00:00 2001
From: Christian Tabedzki <35670232+tabedzki@users.noreply.github.com>
Date: Tue, 27 Aug 2024 10:32:00 -0400
Subject: [PATCH 128/187] Fixed return values not being within a string literal

---
 src/spikeinterface/sortingcomponents/motion/dredge.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/sortingcomponents/motion/dredge.py b/src/spikeinterface/sortingcomponents/motion/dredge.py
index 40b229c588..e2b6b1a2bc 100644
--- a/src/spikeinterface/sortingcomponents/motion/dredge.py
+++ b/src/spikeinterface/sortingcomponents/motion/dredge.py
@@ -1141,7 +1141,7 @@ def normxcorr1d(
     normalized=True,
     padding="same",
     conv_engine="torch",
-) -> "torch.Tensor" | np.ndarray:
+) -> "torch.Tensor | np.ndarray":
     """
     normxcorr1d: Normalized cross-correlation, optionally weighted
 

From 45ae8d1471d18ed166e06625780a94d142b8848d Mon Sep 17 00:00:00 2001
From: Christian Tabedzki <35670232+tabedzki@users.noreply.github.com>
Date: Tue, 27 Aug 2024 11:09:50 -0400
Subject: [PATCH 129/187] Another Union outside of a string literal

---
 src/spikeinterface/core/sortinganalyzer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/sortinganalyzer.py b/src/spikeinterface/core/sortinganalyzer.py
index d37372a6a5..26c74e7ac0 100644
--- a/src/spikeinterface/core/sortinganalyzer.py
+++ b/src/spikeinterface/core/sortinganalyzer.py
@@ -1104,7 +1104,7 @@ def get_num_units(self) -> int:
         return self.sorting.get_num_units()
 
     ## extensions zone
-    def compute(self, input, save=True, extension_params=None, verbose=False, **kwargs) -> "SortingAnalyzer" | None:
+    def compute(self, input, save=True, extension_params=None, verbose=False, **kwargs) -> "SortingAnalyzer | None":
         """
         Compute one extension or several extensiosn.
         Internally calls compute_one_extension() or compute_several_extensions() depending on the input type.

From 8c80ecbd1d1b3b0168b9873bbab45bc71617dbd6 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Tue, 27 Aug 2024 17:14:37 +0200
Subject: [PATCH 130/187] Make InterpolateMotionRecording not JSON-serializable

---
 .../sortingcomponents/motion/motion_interpolation.py          | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py b/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py
index 11ce11e1aa..2108fdf9ca 100644
--- a/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py
+++ b/src/spikeinterface/sortingcomponents/motion/motion_interpolation.py
@@ -386,6 +386,10 @@ def __init__(
             )
             self.add_recording_segment(rec_segment)
 
+        # this object is currently not JSON-serializable because the Motion obejct cannot be reloaded properly
+        # see issue #3313
+        self._serializability["json"] = False
+
         self._kwargs = dict(
             recording=recording,
             motion=motion,

From 8ec11e0450d1250e2283a9e349dbf5c5be968c00 Mon Sep 17 00:00:00 2001
From: Christian Tabedzki <35670232+tabedzki@users.noreply.github.com>
Date: Tue, 27 Aug 2024 11:26:26 -0400
Subject: [PATCH 131/187] Fixed get_synchrony_counts

---
 src/spikeinterface/qualitymetrics/misc_metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/qualitymetrics/misc_metrics.py b/src/spikeinterface/qualitymetrics/misc_metrics.py
index 544765382b..98fc961656 100644
--- a/src/spikeinterface/qualitymetrics/misc_metrics.py
+++ b/src/spikeinterface/qualitymetrics/misc_metrics.py
@@ -514,7 +514,7 @@ def compute_sliding_rp_violations(
 )
 
 
-def get_synchrony_count(spikes, synchrony_sizes, all_unit_ids):
+def get_synchrony_counts(spikes, synchrony_sizes, all_unit_ids):
     """
     Compute synchrony counts, the number of simultaneous spikes with sizes `synchrony_sizes`.
 

From 54959577e2b0cda69eb370fc25c1534c14c92c95 Mon Sep 17 00:00:00 2001
From: Garcia Samuel <sam.garcia.die@gmail.com>
Date: Wed, 28 Aug 2024 12:53:39 +0200
Subject: [PATCH 132/187] Update
 src/spikeinterface/widgets/utils_sortingview.py

---
 src/spikeinterface/widgets/utils_sortingview.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/spikeinterface/widgets/utils_sortingview.py b/src/spikeinterface/widgets/utils_sortingview.py
index 0ce372efc4..269193b341 100644
--- a/src/spikeinterface/widgets/utils_sortingview.py
+++ b/src/spikeinterface/widgets/utils_sortingview.py
@@ -86,8 +86,6 @@ def generate_unit_table_view(analyzer, unit_properties=None, similarity_scores=N
         for prop_name in unit_properties:
 
             # Get property values from correct location
-            # import pdb
-            # pdb.set_trace()
             if prop_name in sorting_props:
                 property_values = sorting.get_property(prop_name)
             elif prop_name in qm_props:

From d6f3ced15f938943f83741b6867de7e7916a3de8 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Wed, 28 Aug 2024 13:09:18 +0200
Subject: [PATCH 133/187] Update src/spikeinterface/core/recording_tools.py

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/recording_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/recording_tools.py b/src/spikeinterface/core/recording_tools.py
index cd2f563fba..5833f81ff8 100644
--- a/src/spikeinterface/core/recording_tools.py
+++ b/src/spikeinterface/core/recording_tools.py
@@ -951,7 +951,7 @@ def do_recording_attributes_match(
     bool
         True if the recordings have the same attributes
     str
-        A string with the an exception message with attributes that do not match
+        A string with the exception message with the attributes that do not match
     """
     recording1_attributes = get_rec_attributes(recording1)
     recording2_attributes = deepcopy(recording2_attributes)

From 945fc15980122c7d4cd3f9b5fc67c4b40816c948 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Wed, 28 Aug 2024 15:08:15 +0200
Subject: [PATCH 134/187] Suggestions from code review

---
 src/spikeinterface/core/recording_tools.py    | 19 +++++--------------
 src/spikeinterface/core/sortinganalyzer.py    |  8 ++------
 .../core/tests/test_recording_tools.py        | 10 ----------
 .../core/tests/test_sortinganalyzer.py        |  9 ---------
 4 files changed, 7 insertions(+), 39 deletions(-)

diff --git a/src/spikeinterface/core/recording_tools.py b/src/spikeinterface/core/recording_tools.py
index cd2f563fba..7cbc236eda 100644
--- a/src/spikeinterface/core/recording_tools.py
+++ b/src/spikeinterface/core/recording_tools.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 from copy import deepcopy
-from typing import Literal, Tuple
+from typing import Literal
 import warnings
 from pathlib import Path
 import os
@@ -930,8 +930,8 @@ def get_rec_attributes(recording):
 
 
 def do_recording_attributes_match(
-    recording1: "BaseRecording", recording2_attributes: bool, check_is_filtered: bool = True, check_dtype: bool = True
-) -> Tuple[bool, str]:
+    recording1: "BaseRecording", recording2_attributes: bool, check_dtype: bool = True
+) -> tuple[bool, str]:
     """
     Check if two recordings have the same attributes
 
@@ -941,8 +941,6 @@ def do_recording_attributes_match(
         The first recording object
     recording2_attributes : dict
         The recording attributes to test against
-    check_is_filtered : bool, default: True
-        If True, check if the recordings are filtered
     check_dtype : bool, default: True
         If True, check if the recordings have the same dtype
 
@@ -962,31 +960,24 @@ def do_recording_attributes_match(
     non_matching_attrs = []
 
     if not np.array_equal(recording1_attributes["channel_ids"], recording2_attributes["channel_ids"]):
-        attributes_match = False
         non_matching_attrs.append("channel_ids")
     if not recording1_attributes["sampling_frequency"] == recording2_attributes["sampling_frequency"]:
-        attributes_match = False
         non_matching_attrs.append("sampling_frequency")
     if not recording1_attributes["num_channels"] == recording2_attributes["num_channels"]:
-        attributes_match = False
         non_matching_attrs.append("num_channels")
     if not recording1_attributes["num_samples"] == recording2_attributes["num_samples"]:
-        attributes_match = False
         non_matching_attrs.append("num_samples")
-    if check_is_filtered:
-        if not recording1_attributes["is_filtered"] == recording2_attributes["is_filtered"]:
-            attributes_match = False
-            non_matching_attrs.append("is_filtered")
     # dtype is optional
     if "dtype" in recording1_attributes and "dtype" in recording2_attributes:
         if check_dtype:
             if not recording1_attributes["dtype"] == recording2_attributes["dtype"]:
-                attributes_match = False
                 non_matching_attrs.append("dtype")
 
     if len(non_matching_attrs) > 0:
+        attributes_match = False
         exception_str = f"Recordings do not match in the following attributes: {non_matching_attrs}"
     else:
+        attributes_match = True
         exception_str = ""
 
     return attributes_match, exception_str
diff --git a/src/spikeinterface/core/sortinganalyzer.py b/src/spikeinterface/core/sortinganalyzer.py
index d034dcb46a..7687017db6 100644
--- a/src/spikeinterface/core/sortinganalyzer.py
+++ b/src/spikeinterface/core/sortinganalyzer.py
@@ -608,9 +608,7 @@ def load_from_zarr(cls, folder, recording=None):
 
         return sorting_analyzer
 
-    def set_temporary_recording(
-        self, recording: BaseRecording, check_is_filtered: bool = True, check_dtype: bool = True
-    ):
+    def set_temporary_recording(self, recording: BaseRecording, check_dtype: bool = True):
         """
         Sets a temporary recording object. This function can be useful to temporarily set
         a "cached" recording object that is not saved in the SortingAnalyzer object to speed up
@@ -622,14 +620,12 @@ def set_temporary_recording(
         ----------
         recording : BaseRecording
             The recording object to set as temporary recording.
-        check_is_filtered : bool, default: True
-            If True, check that the temporary recording is filtered in the same way as the original recording.
         check_dtype : bool, default: True
             If True, check that the dtype of the temporary recording is the same as the original recording.
         """
         # check that recording is compatible
         attributes_match, exception_str = do_recording_attributes_match(
-            recording, self.rec_attributes, check_is_filtered=check_is_filtered, check_dtype=check_dtype
+            recording, self.rec_attributes, check_dtype=check_dtype
         )
         if not attributes_match:
             raise ValueError(exception_str)
diff --git a/src/spikeinterface/core/tests/test_recording_tools.py b/src/spikeinterface/core/tests/test_recording_tools.py
index 8a8fc3a358..23a1574f2a 100644
--- a/src/spikeinterface/core/tests/test_recording_tools.py
+++ b/src/spikeinterface/core/tests/test_recording_tools.py
@@ -316,16 +316,6 @@ def test_do_recording_attributes_match():
     assert not do_match
     assert "sampling_frequency" in exc
 
-    # check is_filtered options
-    rec_attributes = get_rec_attributes(recording)
-    rec_attributes["is_filtered"] = not rec_attributes["is_filtered"]
-
-    do_match, exc = do_recording_attributes_match(recording, rec_attributes)
-    assert not do_match
-    assert "is_filtered" in exc
-    do_match, exc = do_recording_attributes_match(recording, rec_attributes, check_is_filtered=False)
-    assert do_match
-
     # check dtype options
     rec_attributes = get_rec_attributes(recording)
     rec_attributes["dtype"] = "int16"
diff --git a/src/spikeinterface/core/tests/test_sortinganalyzer.py b/src/spikeinterface/core/tests/test_sortinganalyzer.py
index 9de725239d..689073d6bf 100644
--- a/src/spikeinterface/core/tests/test_sortinganalyzer.py
+++ b/src/spikeinterface/core/tests/test_sortinganalyzer.py
@@ -144,15 +144,6 @@ def test_SortingAnalyzer_tmp_recording(dataset):
     with pytest.raises(ValueError):
         sorting_analyzer.set_temporary_recording(recording_sliced)
 
-    # test with different is_filtered
-    recording_filt = recording.clone()
-    recording_filt.annotate(is_filtered=False)
-    with pytest.raises(ValueError):
-        sorting_analyzer.set_temporary_recording(recording_filt)
-
-    # thest with additional check_is_filtered
-    sorting_analyzer.set_temporary_recording(recording_filt, check_is_filtered=False)
-
 
 def _check_sorting_analyzers(sorting_analyzer, original_sorting, cache_folder):
 

From 463e94b74f5d20bc273cc397dad730e8f34039bf Mon Sep 17 00:00:00 2001
From: Christian Tabedzki <35670232+tabedzki@users.noreply.github.com>
Date: Wed, 28 Aug 2024 09:31:45 -0400
Subject: [PATCH 135/187] Updated string literal type annotations

---
 src/spikeinterface/comparison/comparisontools.py | 2 +-
 src/spikeinterface/curation/remove_redundant.py  | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/spikeinterface/comparison/comparisontools.py b/src/spikeinterface/comparison/comparisontools.py
index 0ab5789d1f..fc41795967 100644
--- a/src/spikeinterface/comparison/comparisontools.py
+++ b/src/spikeinterface/comparison/comparisontools.py
@@ -434,7 +434,7 @@ def make_possible_match(agreement_scores, min_score):
     return possible_match_12, possible_match_21
 
 
-def make_best_match(agreement_scores, min_score) -> tuple["pd.Series", "pd.Series"]:
+def make_best_match(agreement_scores, min_score) -> "tuple[pd.Series, pd.Series]":
     """
     Given an agreement matrix and a min_score threshold.
     return a dict a best match for each units independently of others.
diff --git a/src/spikeinterface/curation/remove_redundant.py b/src/spikeinterface/curation/remove_redundant.py
index 874552f767..38d47472a7 100644
--- a/src/spikeinterface/curation/remove_redundant.py
+++ b/src/spikeinterface/curation/remove_redundant.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 import numpy as np
+from spikeinterface import BaseSorting
 
 from spikeinterface import SortingAnalyzer
 
@@ -20,7 +21,7 @@ def remove_redundant_units(
     remove_strategy="minimum_shift",
     peak_sign="neg",
     extra_outputs=False,
-):
+) -> BaseSorting:
     """
     Removes redundant or duplicate units by comparing the sorting output with itself.
 

From 98f4845950e6c10e6a7548d8426d00e8c46addeb Mon Sep 17 00:00:00 2001
From: Garcia Samuel <sam.garcia.die@gmail.com>
Date: Wed, 28 Aug 2024 16:45:24 +0200
Subject: [PATCH 136/187] Update src/spikeinterface/core/base.py

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/core/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/base.py b/src/spikeinterface/core/base.py
index 05e8ae3d8a..74bc0c1d14 100644
--- a/src/spikeinterface/core/base.py
+++ b/src/spikeinterface/core/base.py
@@ -266,7 +266,7 @@ def set_property(
                         if dtype_kind not in self.default_missing_property_values.keys():
                             raise ValueError(
                                 f"Can't infer a natural missing value for dtype {dtype_kind}. "
-                                "Please provide it with the missing_value argument"
+                                "Please provide it with the `missing_value` argument"
                             )
                         else:
                             missing_value = self.default_missing_property_values[dtype_kind]

From b1b6c228a1ea4d28a76c5f82d354a7172bba8958 Mon Sep 17 00:00:00 2001
From: zm711 <92116279+zm711@users.noreply.github.com>
Date: Wed, 28 Aug 2024 11:32:34 -0400
Subject: [PATCH 137/187] clean-up identity merges

---
 src/spikeinterface/curation/auto_merge.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/spikeinterface/curation/auto_merge.py b/src/spikeinterface/curation/auto_merge.py
index 920d6713ad..4cbe3958f2 100644
--- a/src/spikeinterface/curation/auto_merge.py
+++ b/src/spikeinterface/curation/auto_merge.py
@@ -361,6 +361,9 @@ def get_potential_auto_merge(
     ind1, ind2 = np.nonzero(pair_mask)
     potential_merges = list(zip(unit_ids[ind1], unit_ids[ind2]))
 
+    # some methods return identities ie (1,1) which we can cleanup first.
+    potential_merges = [(ids[0], ids[1]) for ids in potential_merges if ids[0] != ids[1]]
+
     if resolve_graph:
         potential_merges = resolve_merging_graph(sorting, potential_merges)
 

From 4b9f22e90b0de990cc86733b412ba8e6a2db3315 Mon Sep 17 00:00:00 2001
From: Christian Tabedzki <35670232+tabedzki@users.noreply.github.com>
Date: Wed, 28 Aug 2024 12:11:50 -0400
Subject: [PATCH 138/187] Updated the docstrings to match what the IDE says the
 functions return

---
 src/spikeinterface/comparison/comparisontools.py       | 10 +++++-----
 src/spikeinterface/curation/auto_merge.py              |  2 +-
 .../postprocessing/amplitude_scalings.py               |  2 +-
 src/spikeinterface/qualitymetrics/misc_metrics.py      |  2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/spikeinterface/comparison/comparisontools.py b/src/spikeinterface/comparison/comparisontools.py
index fc41795967..d35ed87aac 100644
--- a/src/spikeinterface/comparison/comparisontools.py
+++ b/src/spikeinterface/comparison/comparisontools.py
@@ -233,8 +233,8 @@ def make_match_count_matrix(sorting1, sorting2, delta_frames, ensure_symmetry=Fa
         And the minimum of the two results is taken.
     Returns
     -------
-    matching_matrix : ndarray
-        A 2D numpy array of shape `(num_units_train1, num_units_train2)`. Each element `[i, j]` represents
+    matching_matrix : pd.DataFrame
+        A 2D pandas DataFrame of shape `(num_units_train1, num_units_train2)`. Each element `[i, j]` represents
         the count of matching spike pairs between unit `i` from `spike_frames_train1` and unit `j` from `spike_frames_train2`.
 
     Notes
@@ -349,7 +349,7 @@ def make_agreement_scores(sorting1, sorting2, delta_frames, ensure_symmetry=True
         And the minimum of the two results is taken.
     Returns
     -------
-    agreement_scores : array (float)
+    agreement_scores : pd.DataFrame
         The agreement score matrix.
     """
     import pandas as pd
@@ -409,9 +409,9 @@ def make_possible_match(agreement_scores, min_score):
 
     Returns
     -------
-    best_match_12 : pd.Series
+    best_match_12 : dict[NDArray]
 
-    best_match_21 : pd.Series
+    best_match_21 : dict[NDArray]
 
     """
     unit1_ids = np.array(agreement_scores.index)
diff --git a/src/spikeinterface/curation/auto_merge.py b/src/spikeinterface/curation/auto_merge.py
index cea394a021..cc823fe314 100644
--- a/src/spikeinterface/curation/auto_merge.py
+++ b/src/spikeinterface/curation/auto_merge.py
@@ -754,7 +754,7 @@ def compute_presence_distance(sorting, pair_mask, num_samples=None, **presence_d
 
     Returns
     -------
-    potential_merges : list
+    potential_merges : NDArray
         The list of potential merges
 
     """
diff --git a/src/spikeinterface/postprocessing/amplitude_scalings.py b/src/spikeinterface/postprocessing/amplitude_scalings.py
index 0d57aec21e..298953c94a 100644
--- a/src/spikeinterface/postprocessing/amplitude_scalings.py
+++ b/src/spikeinterface/postprocessing/amplitude_scalings.py
@@ -456,7 +456,7 @@ def find_collisions(spikes, spikes_within_margin, delta_collision_samples, spars
 
     Returns
     -------
-    collision_spikes_dict: np.array
+    collision_spikes_dict: dict
         A dictionary with collisions. The key is the index of the spike with collision, the value is an
         array of overlapping spikes, including the spike itself at position 0.
     """
diff --git a/src/spikeinterface/qualitymetrics/misc_metrics.py b/src/spikeinterface/qualitymetrics/misc_metrics.py
index 98fc961656..2d4ae94d68 100644
--- a/src/spikeinterface/qualitymetrics/misc_metrics.py
+++ b/src/spikeinterface/qualitymetrics/misc_metrics.py
@@ -529,7 +529,7 @@ def get_synchrony_counts(spikes, synchrony_sizes, all_unit_ids):
 
     Returns
     -------
-    synchrony_counts : dict
+    synchrony_counts : np.ndarray
         The synchrony counts for the synchrony sizes.
 
     References

From 06f1c744a59943174086963372ce893cd4360996 Mon Sep 17 00:00:00 2001
From: Jonah Pearl <jonahpearl@g.harvard.edu>
Date: Wed, 28 Aug 2024 18:32:25 -0400
Subject: [PATCH 139/187] fix zarr folder suffix handling

---
 src/spikeinterface/core/base.py            |  5 ++---
 src/spikeinterface/core/core_tools.py      |  7 +++++++
 src/spikeinterface/core/sortinganalyzer.py | 15 +++++++--------
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/spikeinterface/core/base.py b/src/spikeinterface/core/base.py
index 74bc0c1d14..1fa218851b 100644
--- a/src/spikeinterface/core/base.py
+++ b/src/spikeinterface/core/base.py
@@ -17,6 +17,7 @@
 from .globals import get_global_tmp_folder, is_set_global_tmp_folder
 from .core_tools import (
     check_json,
+    clean_zarr_folder_name,
     is_dict_extractor,
     SIJsonEncoder,
     make_paths_relative,
@@ -1061,9 +1062,7 @@ def save_to_zarr(
                         print(f"Use zarr_path={zarr_path}")
         else:
             if storage_options is None:
-                folder = Path(folder)
-                if folder.suffix != ".zarr":
-                    folder = folder.parent / f"{folder.stem}.zarr"
+                folder = clean_zarr_folder_name(folder)
                 if folder.is_dir() and overwrite:
                     shutil.rmtree(folder)
                 zarr_path = folder
diff --git a/src/spikeinterface/core/core_tools.py b/src/spikeinterface/core/core_tools.py
index b38222391c..f398928757 100644
--- a/src/spikeinterface/core/core_tools.py
+++ b/src/spikeinterface/core/core_tools.py
@@ -153,6 +153,13 @@ def check_json(dictionary: dict) -> dict:
     return json.loads(json_string)
 
 
+def clean_zarr_folder_name(folder):
+        folder = Path(folder)
+        if folder.suffix != ".zarr":
+            folder = folder.parent / f"{folder.stem}.zarr"
+        return folder
+
+
 def add_suffix(file_path, possible_suffix):
     file_path = Path(file_path)
     if isinstance(possible_suffix, str):
diff --git a/src/spikeinterface/core/sortinganalyzer.py b/src/spikeinterface/core/sortinganalyzer.py
index fa4547d272..be85eaf343 100644
--- a/src/spikeinterface/core/sortinganalyzer.py
+++ b/src/spikeinterface/core/sortinganalyzer.py
@@ -23,7 +23,7 @@
 
 from .base import load_extractor
 from .recording_tools import check_probe_do_not_overlap, get_rec_attributes, do_recording_attributes_match
-from .core_tools import check_json, retrieve_importing_provenance, is_path_remote
+from .core_tools import check_json, retrieve_importing_provenance, is_path_remote, clean_zarr_folder_name
 from .sorting_tools import generate_unit_ids_for_merge_group, _get_ids_after_merging
 from .job_tools import split_job_kwargs
 from .numpyextractors import NumpySorting
@@ -111,6 +111,8 @@ def create_sorting_analyzer(
     sparsity off (or give external sparsity) like this.
     """
     if format != "memory":
+        if format == "zarr":
+            folder = clean_zarr_folder_name(folder)
         if Path(folder).is_dir():
             if not overwrite:
                 raise ValueError(f"Folder already exists {folder}! Use overwrite=True to overwrite it.")
@@ -269,6 +271,8 @@ def create(
             sorting_analyzer = cls.load_from_binary_folder(folder, recording=recording)
             sorting_analyzer.folder = Path(folder)
         elif format == "zarr":
+            assert folder is not None, "For format='zarr' folder must be provided"
+            folder = clean_zarr_folder_name(folder)
             cls.create_zarr(folder, sorting, recording, sparsity, return_scaled, rec_attributes=None)
             sorting_analyzer = cls.load_from_zarr(folder, recording=recording)
             sorting_analyzer.folder = Path(folder)
@@ -487,10 +491,7 @@ def create_zarr(cls, folder, sorting, recording, sparsity, return_scaled, rec_at
         import zarr
         import numcodecs
 
-        folder = Path(folder)
-        # force zarr sufix
-        if folder.suffix != ".zarr":
-            folder = folder.parent / f"{folder.stem}.zarr"
+        folder = clean_zarr_folder_name(folder)
 
         if folder.is_dir():
             raise ValueError(f"Folder already exists {folder}")
@@ -768,9 +769,7 @@ def _save_or_select_or_merge(
 
         elif format == "zarr":
             assert folder is not None, "For format='zarr' folder must be provided"
-            folder = Path(folder)
-            if folder.suffix != ".zarr":
-                folder = folder.parent / f"{folder.stem}.zarr"
+            folder = clean_zarr_folder_name(folder)
             SortingAnalyzer.create_zarr(
                 folder, sorting_provenance, recording, sparsity, self.return_scaled, self.rec_attributes
             )

From 68c244afca4b746526deaa26c7527b88919a5060 Mon Sep 17 00:00:00 2001
From: Jonah Pearl <jonahpearl@g.harvard.edu>
Date: Wed, 28 Aug 2024 18:36:38 -0400
Subject: [PATCH 140/187] fix load entry point

---
 src/spikeinterface/core/sortinganalyzer.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/spikeinterface/core/sortinganalyzer.py b/src/spikeinterface/core/sortinganalyzer.py
index be85eaf343..b0eac17aa7 100644
--- a/src/spikeinterface/core/sortinganalyzer.py
+++ b/src/spikeinterface/core/sortinganalyzer.py
@@ -164,6 +164,8 @@ def load_sorting_analyzer(folder, load_extensions=True, format="auto"):
         The loaded SortingAnalyzer
 
     """
+    if format == "zarr":
+        folder = clean_zarr_folder_name(folder)
     return SortingAnalyzer.load(folder, load_extensions=load_extensions, format=format)
 
 

From 5f98660b2c75bcfb631ca47a88fa756211d8128a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 28 Aug 2024 23:00:52 +0000
Subject: [PATCH 141/187] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spikeinterface/core/core_tools.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/spikeinterface/core/core_tools.py b/src/spikeinterface/core/core_tools.py
index f398928757..b3595dddf2 100644
--- a/src/spikeinterface/core/core_tools.py
+++ b/src/spikeinterface/core/core_tools.py
@@ -154,10 +154,10 @@ def check_json(dictionary: dict) -> dict:
 
 
 def clean_zarr_folder_name(folder):
-        folder = Path(folder)
-        if folder.suffix != ".zarr":
-            folder = folder.parent / f"{folder.stem}.zarr"
-        return folder
+    folder = Path(folder)
+    if folder.suffix != ".zarr":
+        folder = folder.parent / f"{folder.stem}.zarr"
+    return folder
 
 
 def add_suffix(file_path, possible_suffix):

From c992ca67fa33c806273781822a531e1fb50ee5e1 Mon Sep 17 00:00:00 2001
From: Jonah Pearl <jonahpearl@g.harvard.edu>
Date: Wed, 28 Aug 2024 19:00:48 -0400
Subject: [PATCH 142/187] catch more entry points

---
 src/spikeinterface/core/sortinganalyzer.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/spikeinterface/core/sortinganalyzer.py b/src/spikeinterface/core/sortinganalyzer.py
index b0eac17aa7..fbf0307498 100644
--- a/src/spikeinterface/core/sortinganalyzer.py
+++ b/src/spikeinterface/core/sortinganalyzer.py
@@ -830,6 +830,8 @@ def save_as(self, format="memory", folder=None) -> "SortingAnalyzer":
         format : "memory" | "binary_folder" | "zarr", default: "memory"
             The new backend format to use
         """
+        if format == "zarr":
+            folder = clean_zarr_folder_name(folder)
         return self._save_or_select_or_merge(format=format, folder=folder)
 
     def select_units(self, unit_ids, format="memory", folder=None) -> "SortingAnalyzer":
@@ -855,6 +857,8 @@ def select_units(self, unit_ids, format="memory", folder=None) -> "SortingAnalyz
             The newly create sorting_analyzer with the selected units
         """
         # TODO check that unit_ids are in same order otherwise many extension do handle it properly!!!!
+        if format == "zarr":
+            folder = clean_zarr_folder_name(folder)
         return self._save_or_select_or_merge(format=format, folder=folder, unit_ids=unit_ids)
 
     def remove_units(self, remove_unit_ids, format="memory", folder=None) -> "SortingAnalyzer":
@@ -881,6 +885,8 @@ def remove_units(self, remove_unit_ids, format="memory", folder=None) -> "Sortin
         """
         # TODO check that unit_ids are in same order otherwise many extension do handle it properly!!!!
         unit_ids = self.unit_ids[~np.isin(self.unit_ids, remove_unit_ids)]
+        if format == "zarr":
+            folder = clean_zarr_folder_name(folder)
         return self._save_or_select_or_merge(format=format, folder=folder, unit_ids=unit_ids)
 
     def merge_units(
@@ -939,6 +945,9 @@ def merge_units(
             The newly create `SortingAnalyzer` with the selected units
         """
 
+        if format == "zarr":
+            folder = clean_zarr_folder_name(folder)
+
         assert merging_mode in ["soft", "hard"], "Merging mode should be either soft or hard"
 
         if len(merge_unit_groups) == 0:

From ddad9036625bba553164dbfbf92ad43d21002e86 Mon Sep 17 00:00:00 2001
From: Julien Verplanken <verpla38@imec.be>
Date: Thu, 29 Aug 2024 08:58:18 +0200
Subject: [PATCH 143/187] fix print_summary for reporting when not
 exhaustive_gt

---
 src/spikeinterface/comparison/paircomparisons.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/spikeinterface/comparison/paircomparisons.py b/src/spikeinterface/comparison/paircomparisons.py
index 7d5f04dfdd..248af3d7e0 100644
--- a/src/spikeinterface/comparison/paircomparisons.py
+++ b/src/spikeinterface/comparison/paircomparisons.py
@@ -455,13 +455,13 @@ def print_summary(self, well_detected_score=None, redundant_score=None, overmerg
         d = dict(
             num_gt=len(self.unit1_ids),
             num_tested=len(self.unit2_ids),
-            num_well_detected=self.count_well_detected_units(well_detected_score),
-            num_redundant=self.count_redundant_units(redundant_score),
-            num_overmerged=self.count_overmerged_units(overmerged_score),
+            num_well_detected=self.count_well_detected_units(well_detected_score)
         )
 
         if self.exhaustive_gt:
             txt = txt + _template_summary_part2
+            d["num_redundant"] = self.count_redundant_units(redundant_score)
+            d["num_overmerged"] = self.count_overmerged_units(overmerged_score)
             d["num_false_positive_units"] = self.count_false_positive_units()
             d["num_bad"] = self.count_bad_units()
 
@@ -676,11 +676,11 @@ def count_units_categories(
 GT num_units: {num_gt}
 TESTED num_units: {num_tested}
 num_well_detected: {num_well_detected}
-num_redundant: {num_redundant}
-num_overmerged: {num_overmerged}
 """
 
-_template_summary_part2 = """num_false_positive_units {num_false_positive_units}
+_template_summary_part2 = """num_redundant: {num_redundant}
+num_overmerged: {num_overmerged}
+num_false_positive_units {num_false_positive_units}
 num_bad: {num_bad}
 """
 

From 3dbba3870b40b1fe4684f792a9222fa23a882275 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 29 Aug 2024 07:17:16 +0000
Subject: [PATCH 144/187] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spikeinterface/comparison/paircomparisons.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/comparison/paircomparisons.py b/src/spikeinterface/comparison/paircomparisons.py
index 248af3d7e0..fa5ec2d3d0 100644
--- a/src/spikeinterface/comparison/paircomparisons.py
+++ b/src/spikeinterface/comparison/paircomparisons.py
@@ -455,7 +455,7 @@ def print_summary(self, well_detected_score=None, redundant_score=None, overmerg
         d = dict(
             num_gt=len(self.unit1_ids),
             num_tested=len(self.unit2_ids),
-            num_well_detected=self.count_well_detected_units(well_detected_score)
+            num_well_detected=self.count_well_detected_units(well_detected_score),
         )
 
         if self.exhaustive_gt:

From 10f08b4293ee87c89b7886d2a3809c56d74fabbe Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Thu, 29 Aug 2024 10:35:21 +0200
Subject: [PATCH 145/187] Propagate storage_options to load_sorting_analyzer

---
 src/spikeinterface/core/sortinganalyzer.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/spikeinterface/core/sortinganalyzer.py b/src/spikeinterface/core/sortinganalyzer.py
index fa4547d272..aef31790f6 100644
--- a/src/spikeinterface/core/sortinganalyzer.py
+++ b/src/spikeinterface/core/sortinganalyzer.py
@@ -143,7 +143,7 @@ def create_sorting_analyzer(
     return sorting_analyzer
 
 
-def load_sorting_analyzer(folder, load_extensions=True, format="auto"):
+def load_sorting_analyzer(folder, load_extensions=True, format="auto", storage_options=None):
     """
     Load a SortingAnalyzer object from disk.
 
@@ -155,6 +155,9 @@ def load_sorting_analyzer(folder, load_extensions=True, format="auto"):
         Load all extensions or not.
     format : "auto" | "binary_folder" | "zarr"
         The format of the folder.
+    storage_options : dict | None, default: None
+        The storage options to specify credentials to remote zarr bucket.
+        For open buckets, it doesn't need to be specified.
 
     Returns
     -------
@@ -162,7 +165,7 @@ def load_sorting_analyzer(folder, load_extensions=True, format="auto"):
         The loaded SortingAnalyzer
 
     """
-    return SortingAnalyzer.load(folder, load_extensions=load_extensions, format=format)
+    return SortingAnalyzer.load(folder, load_extensions=load_extensions, format=format, storage_options=storage_options)
 
 
 class SortingAnalyzer:

From c410a52cca743f50ef53d4e31a1f779df99cbeea Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Thu, 29 Aug 2024 11:15:34 +0200
Subject: [PATCH 146/187] Add load_sorting_analyzer_or_waveforms function

---
 src/spikeinterface/core/__init__.py                    |  6 +++++-
 .../waveforms_extractor_backwards_compatibility.py     | 10 ++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/__init__.py b/src/spikeinterface/core/__init__.py
index 674f1ac463..ead7007920 100644
--- a/src/spikeinterface/core/__init__.py
+++ b/src/spikeinterface/core/__init__.py
@@ -166,4 +166,8 @@
 
 # Important not for compatibility!!
 # This wil be uncommented after 0.100
-from .waveforms_extractor_backwards_compatibility import extract_waveforms, load_waveforms
+from .waveforms_extractor_backwards_compatibility import (
+    extract_waveforms,
+    load_waveforms,
+    load_sorting_analyzer_or_waveforms,
+)
diff --git a/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py b/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py
index da1f5a71f5..c07bf57c4a 100644
--- a/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py
+++ b/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py
@@ -343,6 +343,16 @@ def get_template(
         return templates[0]
 
 
+def load_sorting_analyzer_or_waveforms(folder, sorting=None):
+    folder = Path(folder)
+    if folder.suffix == ".zarr":
+        return load_sorting_analyzer(folder)
+    elif (folder / "spikeinterface_info.json").exists():
+        return load_sorting_analyzer(folder)
+    else:
+        return load_waveforms(folder, sorting=sorting, output="SortingAnalyzer")
+
+
 def load_waveforms(
     folder,
     with_recording: bool = True,

From 6606b61d52bee40425f514831f23f5896de1040b Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Thu, 29 Aug 2024 15:22:20 +0200
Subject: [PATCH 147/187] Fix widgets tests and add test on
 unit_table_properties

---
 .../widgets/tests/test_widgets.py             | 15 ++++++
 .../widgets/utils_sortingview.py              | 48 ++++++++++++-------
 2 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/src/spikeinterface/widgets/tests/test_widgets.py b/src/spikeinterface/widgets/tests/test_widgets.py
index e06c79ad2f..debcd52085 100644
--- a/src/spikeinterface/widgets/tests/test_widgets.py
+++ b/src/spikeinterface/widgets/tests/test_widgets.py
@@ -539,6 +539,21 @@ def test_plot_sorting_summary(self):
                     backend=backend,
                     **self.backend_kwargs[backend],
                 )
+                # add unit_properties
+                sw.plot_sorting_summary(
+                    self.sorting_analyzer_sparse,
+                    unit_table_properties=["firing_rate", "snr"],
+                    backend=backend,
+                    **self.backend_kwargs[backend],
+                )
+                # adding a missing property should raise a warning
+                with self.assertWarns(UserWarning):
+                    sw.plot_sorting_summary(
+                        self.sorting_analyzer_sparse,
+                        unit_table_properties=["missing_property"],
+                        backend=backend,
+                        **self.backend_kwargs[backend],
+                    )
 
     def test_plot_agreement_matrix(self):
         possible_backends = list(sw.AgreementMatrixWidget.get_possible_backends())
diff --git a/src/spikeinterface/widgets/utils_sortingview.py b/src/spikeinterface/widgets/utils_sortingview.py
index 269193b341..7a9dc47826 100644
--- a/src/spikeinterface/widgets/utils_sortingview.py
+++ b/src/spikeinterface/widgets/utils_sortingview.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 
+from ..core import SortingAnalyzer, BaseSorting
 from ..core.core_tools import check_json
 from warnings import warn
 
@@ -46,26 +47,42 @@ def handle_display_and_url(widget, view, **backend_kwargs):
     return url
 
 
-def generate_unit_table_view(analyzer, unit_properties=None, similarity_scores=None):
+def generate_unit_table_view(
+    sorting_or_sorting_analyzer: SortingAnalyzer | BaseSorting,
+    unit_properties: list[str] | None = None,
+    similarity_scores: npndarray | None = None,
+):
     import sortingview.views as vv
 
-    sorting = analyzer.sorting
+    if isinstance(sorting_or_sorting_analyzer, SortingAnalyzer):
+        analyzer = sorting_or_sorting_analyzer
+        sorting = analyzer.sorting
+    else:
+        sorting = sorting_or_sorting_analyzer
+        analyzer = None
 
     # Find available unit properties from all sources
     sorting_props = list(sorting.get_property_keys())
-    if analyzer.get_extension("quality_metrics") is not None:
-        qm_props = list(analyzer.get_extension("quality_metrics").get_data().columns)
-        qm_data = analyzer.get_extension("quality_metrics").get_data()
+    if analyzer is not None:
+        if analyzer.get_extension("quality_metrics") is not None:
+            qm_props = list(analyzer.get_extension("quality_metrics").get_data().columns)
+            qm_data = analyzer.get_extension("quality_metrics").get_data()
+        else:
+            qm_props = []
+        if analyzer.get_extension("template_metrics") is not None:
+            tm_props = list(analyzer.get_extension("template_metrics").get_data().columns)
+            tm_data = analyzer.get_extension("template_metrics").get_data()
+        else:
+            tm_props = []
+        # Check for any overlaps and warn user if any
+        all_props = sorting_props + qm_props + tm_props
     else:
+        all_props = sorting_props
         qm_props = []
-    if analyzer.get_extension("template_metrics") is not None:
-        tm_props = list(analyzer.get_extension("template_metrics").get_data().columns)
-        tm_data = analyzer.get_extension("template_metrics").get_data()
-    else:
         tm_props = []
+        qm_data = None
+        tm_data = None
 
-    # Check for any overlaps and warn user if any
-    all_props = sorting_props + qm_props + tm_props
     overlap_props = [prop for prop in all_props if all_props.count(prop) > 1]
     if len(overlap_props) > 0:
         warn(
@@ -93,7 +110,8 @@ def generate_unit_table_view(analyzer, unit_properties=None, similarity_scores=N
             elif prop_name in tm_props:
                 property_values = tm_data[prop_name].values
             else:
-                raise ValueError(f"Property '{prop_name}' not found in sorting, quality_metrics, or template_metrics")
+                warn(f"Property '{prop_name}' not found in sorting, quality_metrics, or template_metrics")
+                continue
 
             # make dtype available
             val0 = np.array(property_values[0])
@@ -106,7 +124,7 @@ def generate_unit_table_view(analyzer, unit_properties=None, similarity_scores=N
             elif val0.dtype.kind == "b":
                 dtype = "bool"
             else:
-                print(f"Unsupported dtype {val0.dtype} for property {prop_name}. Skipping")
+                warn(f"Unsupported dtype {val0.dtype} for property {prop_name}. Skipping")
                 continue
             ut_columns.append(vv.UnitsTableColumn(key=prop_name, label=prop_name, dtype=dtype))
             valid_unit_properties.append(prop_name)
@@ -122,10 +140,6 @@ def generate_unit_table_view(analyzer, unit_properties=None, similarity_scores=N
                     property_values = qm_data[prop_name].values
                 elif prop_name in tm_props:
                     property_values = tm_data[prop_name].values
-                else:
-                    raise ValueError(
-                        f"Property '{prop_name}' not found in sorting, quality_metrics, or template_metrics"
-                    )
 
                 # Check for NaN values
                 val0 = np.array(property_values[0])

From 3b9922e648a0dfc8132ab41b817b36f1bce03f4f Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Sat, 31 Aug 2024 10:00:25 +0200
Subject: [PATCH 148/187] Install widgets dependencies for widget tests!

---
 .github/workflows/all-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/all-tests.yml b/.github/workflows/all-tests.yml
index 8317d7bec4..b695c7d627 100644
--- a/.github/workflows/all-tests.yml
+++ b/.github/workflows/all-tests.yml
@@ -202,7 +202,7 @@ jobs:
         shell: bash
         if: env.RUN_WIDGETS_TESTS == 'true'
         run: |
-          pip install -e .[full]
+          pip install -e .[full,widgets]
           ./.github/run_tests.sh widgets --no-virtual-env
 
       - name: Test exporters

From 4d39df51c8b23e35e4f4e35b063f1daa0422b6ec Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Sat, 31 Aug 2024 10:07:43 +0200
Subject: [PATCH 149/187] Add docstring and API

---
 doc/api.rst                                       | 13 +++++++++++++
 ...waveforms_extractor_backwards_compatibility.py | 15 +++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/doc/api.rst b/doc/api.rst
index 42f9fec299..77ec895a97 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -73,6 +73,19 @@ Low-level
 
     .. autoclass:: ChunkRecordingExecutor
 
+
+Back-compatibility with ``WaveformExtraxctor`` (version < 0.101.0)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: spikeinterface.core
+    :noindex:
+
+    .. autofunction:: extract_waveforms
+    .. autofunction:: load_waveforms
+    .. autofunction:: load_sorting_analyzer_or_waveforms
+
+
+
 spikeinterface.extractors
 -------------------------
 
diff --git a/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py b/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py
index c07bf57c4a..749ec7c1f0 100644
--- a/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py
+++ b/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py
@@ -344,6 +344,21 @@ def get_template(
 
 
 def load_sorting_analyzer_or_waveforms(folder, sorting=None):
+    """
+    Load a SortingAnalyzer from either a newly saved SortingAnalyzer folder or an old WaveformExtractor folder.
+
+    Parameters
+    ----------
+    folder: str | Path
+        The folder to the sorting analyzer or waveform extractor
+    sorting: BaseSorting | None, default: None
+        The sorting object to instantiate with the SortingAnalyzer (only used for old WaveformExtractor)
+
+    Returns
+    -------
+    sorting_analyzer: SortingAnalyzer
+        The returned SortingAnalyzer.
+    """
     folder = Path(folder)
     if folder.suffix == ".zarr":
         return load_sorting_analyzer(folder)

From 227b0e71d11c20f8bbbe0014456d80c50075fead Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Sat, 31 Aug 2024 10:12:12 +0200
Subject: [PATCH 150/187] Update KS4 versions

---
 .github/scripts/check_kilosort4_releases.py | 2 +-
 .github/workflows/test_kilosort4.yml        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/check_kilosort4_releases.py b/.github/scripts/check_kilosort4_releases.py
index 92e7bf277f..5544224d8d 100644
--- a/.github/scripts/check_kilosort4_releases.py
+++ b/.github/scripts/check_kilosort4_releases.py
@@ -22,7 +22,7 @@ def get_pypi_versions(package_name):
         "At version 0.101.1, this should be updated to support newer"
         "kilosort verrsions."
     )
-    versions = [ver for ver in versions if parse("4.0.12") >= parse(ver) >= parse("4.0.5")]
+    versions = [ver for ver in versions if parse(ver) >= parse("4.0.16")]
     return versions
 
 
diff --git a/.github/workflows/test_kilosort4.yml b/.github/workflows/test_kilosort4.yml
index 390bec98be..6c58c76813 100644
--- a/.github/workflows/test_kilosort4.yml
+++ b/.github/workflows/test_kilosort4.yml
@@ -7,7 +7,7 @@ on:
 
 jobs:
   versions:
-    # Poll Pypi for all released KS4 versions >4.0.4, save to JSON
+    # Poll Pypi for all released KS4 versions >4.0.16, save to JSON
     # and store them in a matrix for the next job.
     runs-on: ubuntu-latest
     outputs:

From 7501eda2fe8184817feebf6c9ec2df75d51e379f Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Mon, 2 Sep 2024 15:22:21 +0200
Subject: [PATCH 151/187] Update doc/api.rst

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 doc/api.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/api.rst b/doc/api.rst
index 77ec895a97..6bb9b39091 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -74,7 +74,7 @@ Low-level
     .. autoclass:: ChunkRecordingExecutor
 
 
-Back-compatibility with ``WaveformExtraxctor`` (version < 0.101.0)
+Back-compatibility with ``WaveformExtractor`` (version < 0.101.0)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. automodule:: spikeinterface.core

From c23d53032d406f6b386773338c751af71f4e1be4 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Mon, 2 Sep 2024 18:52:18 +0200
Subject: [PATCH 152/187] Update actions and always use binary if recording is
 binary

---
 .github/scripts/check_kilosort4_releases.py   |   7 +-
 .github/scripts/test_kilosort4_ci.py          | 189 ++++++++----------
 .github/workflows/test_kilosort4.yml          |   7 +-
 .../sorters/external/kilosort4.py             |  54 +++--
 4 files changed, 112 insertions(+), 145 deletions(-)

diff --git a/.github/scripts/check_kilosort4_releases.py b/.github/scripts/check_kilosort4_releases.py
index 5544224d8d..7a6368f3cf 100644
--- a/.github/scripts/check_kilosort4_releases.py
+++ b/.github/scripts/check_kilosort4_releases.py
@@ -16,12 +16,7 @@ def get_pypi_versions(package_name):
     response.raise_for_status()
     data = response.json()
     versions = list(sorted(data["releases"].keys()))
-
-    assert parse(spikeinterface.__version__) < parse("0.101.1"), (
-        "Kilosort 4.0.5-12 are supported in SpikeInterface < 0.101.1."
-        "At version 0.101.1, this should be updated to support newer"
-        "kilosort verrsions."
-    )
+    # Filter out versions that are less than 4.0.16
     versions = [ver for ver in versions if parse(ver) >= parse("4.0.16")]
     return versions
 
diff --git a/.github/scripts/test_kilosort4_ci.py b/.github/scripts/test_kilosort4_ci.py
index e0d1f2a504..c7853a2add 100644
--- a/.github/scripts/test_kilosort4_ci.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -18,6 +18,7 @@
 
 - Do some tests to check all KS4 parameters are tested against.
 """
+
 import copy
 from typing import Any
 import spikeinterface.full as si
@@ -33,11 +34,16 @@
 from packaging.version import parse
 from importlib.metadata import version
 from inspect import signature
-from kilosort.run_kilosort import (set_files, initialize_ops,
-                                   compute_preprocessing,
-                                   compute_drift_correction, detect_spikes,
-                                   cluster_spikes, save_sorting,
-                                   get_run_parameters, )
+from kilosort.run_kilosort import (
+    set_files,
+    initialize_ops,
+    compute_preprocessing,
+    compute_drift_correction,
+    detect_spikes,
+    cluster_spikes,
+    save_sorting,
+    get_run_parameters,
+)
 from kilosort.io import load_probe, RecordingExtractorAsArray, BinaryFiltered
 from kilosort.parameters import DEFAULT_SETTINGS
 from kilosort import preprocessing as ks_preprocessing
@@ -49,8 +55,7 @@
 PARAMS_TO_TEST = [
     # Not tested
     # ("torch_device", "auto")
-
-    # Stable across KS version 4.0.01 - 4.0.12
+    # Stable across KS version 4.0.16 - 4.0.X (?)
     ("change_nothing", None),
     ("nblocks", 0),
     ("do_CAR", False),
@@ -83,38 +88,21 @@
     ("acg_threshold", 1e12),
     ("cluster_downsampling", 2),
     ("duplicate_spike_bins", 5),
+    ("drift_smoothing", [250, 250, 250]),
+    ("bad_channels", None),
+    ("save_preprocessed_copy", False),
 ]
 
-if parse(version("kilosort")) >= parse("4.0.11"):
-    PARAMS_TO_TEST.extend(
-        [
-            ("shift", 1e9),
-            ("scale", -1e9),
-        ]
-    )
-if parse(version("kilosort")) == parse("4.0.9"):
-    # bug in 4.0.9 for "nblocks=0"
-    PARAMS_TO_TEST = [param for param in PARAMS_TO_TEST if param[0] != "nblocks"]
 
-if parse(version("kilosort")) >= parse("4.0.8"):
-    PARAMS_TO_TEST.extend(
-        [
-            ("drift_smoothing", [250, 250, 250]),
-        ]
-    )
-if parse(version("kilosort")) <= parse("4.0.6"):
-    # AFAIK this parameter was always unused in KS (that's why it was removed)
-    PARAMS_TO_TEST.extend(
-        [
-            ("cluster_pcs", 1e9),
-        ]
-    )
-if parse(version("kilosort")) <= parse("4.0.3"):
-    PARAMS_TO_TEST = [param for param in PARAMS_TO_TEST if param[0] not in ["x_centers", "max_channel_distance"]]
+# if parse(version("kilosort")) >= parse("4.0.X"):
+#     PARAMS_TO_TEST.extend(
+#         [
+#             ("new_param", new_values),
+#         ]
+#     )
 
 
 class TestKilosort4Long:
-
     # Fixtures ######
     @pytest.fixture(scope="session")
     def recording_and_paths(self, tmp_path_factory):
@@ -200,7 +188,6 @@ def test_params_to_test(self):
         otherwise there is no point to the test.
         """
         for parameter in PARAMS_TO_TEST:
-
             param_key, param_value = parameter
 
             if param_key == "change_nothing":
@@ -218,7 +205,6 @@ def test_default_settings_all_represented(self):
         tested_keys = [entry[0] for entry in PARAMS_TO_TEST]
 
         for param_key in DEFAULT_SETTINGS:
-
             if param_key not in ["n_chan_bin", "fs", "tmin", "tmax"]:
                 if parse(version("kilosort")) == parse("4.0.9") and param_key == "nblocks":
                     continue
@@ -241,16 +227,18 @@ def test_spikeinterface_defaults_against_kilsort(self):
 
     # Testing Arguments ###
     def test_set_files_arguments(self):
-        self._check_arguments(
-            set_files,
-            ["settings", "filename", "probe", "probe_name", "data_dir", "results_dir"]
-        )
+        self._check_arguments(set_files, ["settings", "filename", "probe", "probe_name", "data_dir", "results_dir"])
 
     def test_initialize_ops_arguments(self):
-        expected_arguments = ["settings", "probe", "data_dtype", "do_CAR", "invert_sign", "device"]
-
-        if parse(version("kilosort")) >= parse("4.0.12"):
-            expected_arguments.append("save_preprocesed_copy")
+        expected_arguments = [
+            "settings",
+            "probe",
+            "data_dtype",
+            "do_CAR",
+            "invert_sign",
+            "device",
+            "save_preprocessed_copy",
+        ]
 
         self._check_arguments(
             initialize_ops,
@@ -258,28 +246,16 @@ def test_initialize_ops_arguments(self):
         )
 
     def test_compute_preprocessing_arguments(self):
-        self._check_arguments(
-            compute_preprocessing,
-                ["ops", "device", "tic0", "file_object"]
-        )
+        self._check_arguments(compute_preprocessing, ["ops", "device", "tic0", "file_object"])
 
     def test_compute_drift_location_arguments(self):
-        self._check_arguments(
-            compute_drift_correction,
-                ["ops", "device", "tic0", "progress_bar", "file_object"]
-        )
+        self._check_arguments(compute_drift_correction, ["ops", "device", "tic0", "progress_bar", "file_object"])
 
     def test_detect_spikes_arguments(self):
-        self._check_arguments(
-            detect_spikes,
-                ["ops", "device", "bfile", "tic0", "progress_bar"]
-        )
+        self._check_arguments(detect_spikes, ["ops", "device", "bfile", "tic0", "progress_bar"])
 
     def test_cluster_spikes_arguments(self):
-        self._check_arguments(
-            cluster_spikes,
-                ["st", "tF", "ops", "device", "bfile", "tic0", "progress_bar"]
-        )
+        self._check_arguments(cluster_spikes, ["st", "tF", "ops", "device", "bfile", "tic0", "progress_bar"])
 
     def test_save_sorting_arguments(self):
         expected_arguments = ["ops", "results_dir", "st", "clu", "tF", "Wall", "imin", "tic0", "save_extra_vars"]
@@ -287,50 +263,47 @@ def test_save_sorting_arguments(self):
         if parse(version("kilosort")) > parse("4.0.11"):
             expected_arguments.append("save_preprocessed_copy")
 
-        self._check_arguments(
-            save_sorting,
-                expected_arguments
-        )
+        self._check_arguments(save_sorting, expected_arguments)
 
     def test_get_run_parameters(self):
-        self._check_arguments(
-            get_run_parameters,
-                ["ops"]
-        )
+        self._check_arguments(get_run_parameters, ["ops"])
 
     def test_load_probe_parameters(self):
-        self._check_arguments(
-            load_probe,
-                ["probe_path"]
-        )
+        self._check_arguments(load_probe, ["probe_path"])
 
     def test_recording_extractor_as_array_arguments(self):
-        self._check_arguments(
-            RecordingExtractorAsArray,
-                ["recording_extractor"]
-        )
+        self._check_arguments(RecordingExtractorAsArray, ["recording_extractor"])
 
     def test_binary_filtered_arguments(self):
         expected_arguments = [
-            "filename", "n_chan_bin", "fs", "NT", "nt", "nt0min",
-            "chan_map", "hp_filter", "whiten_mat", "dshift",
-            "device", "do_CAR", "artifact_threshold", "invert_sign",
-            "dtype", "tmin", "tmax", "file_object"
+            "filename",
+            "n_chan_bin",
+            "fs",
+            "NT",
+            "nt",
+            "nt0min",
+            "chan_map",
+            "hp_filter",
+            "whiten_mat",
+            "dshift",
+            "device",
+            "do_CAR",
+            "artifact_threshold",
+            "invert_sign",
+            "dtype",
+            "tmin",
+            "tmax",
+            "shift",
+            "scale",
+            "file_object",
         ]
 
-        if parse(version("kilosort")) >= parse("4.0.11"):
-            expected_arguments.pop(-1)
-            expected_arguments.extend(["shift", "scale", "file_object"])
-
-        self._check_arguments(
-            BinaryFiltered,
-            expected_arguments
-        )
+        self._check_arguments(BinaryFiltered, expected_arguments)
 
     def _check_arguments(self, object_, expected_arguments):
         """
         Check that the argument signature of  `object_` is as expected
-        (i..e has not changed across kilosort versions).
+        (i.e. has not changed across kilosort versions).
         """
         sig = signature(object_)
         obj_arguments = list(sig.parameters.keys())
@@ -352,7 +325,9 @@ def test_kilosort4_main(self, recording_and_paths, default_results, tmp_path, pa
         kilosort_output_dir = tmp_path / "kilosort_output_dir"
         spikeinterface_output_dir = tmp_path / "spikeinterface_output_dir"
 
-        settings, run_kilosort_kwargs, ks_format_probe = self._get_kilosort_native_settings(recording, paths, param_key, param_value)
+        settings, run_kilosort_kwargs, ks_format_probe = self._get_kilosort_native_settings(
+            recording, paths, param_key, param_value
+        )
 
         kilosort.run_kilosort(
             settings=settings,
@@ -434,15 +409,18 @@ def test_kilosort4_no_correction(self, recording_and_paths, tmp_path):
         assert np.array_equal(results["ks"]["clus"], results["si"]["clus"])
 
     @pytest.mark.skipif(parse(version("kilosort")) == parse("4.0.9"), reason="nblock=0 fails on KS4=4.0.9")
-    @pytest.mark.parametrize("param_to_test", [
-        ("change_nothing", None),
-        ("do_CAR", False),
-        ("batch_size", 42743),
-        ("Th_learned", 14),
-        ("dmin", 15),
-        ("max_channel_distance", 5),
-        ("n_pcs", 3),
-    ])
+    @pytest.mark.parametrize(
+        "param_to_test",
+        [
+            ("change_nothing", None),
+            ("do_CAR", False),
+            ("batch_size", 42743),
+            ("Th_learned", 14),
+            ("dmin", 15),
+            ("max_channel_distance", 5),
+            ("n_pcs", 3),
+        ],
+    )
     def test_kilosort4_skip_preprocessing_correction(self, tmp_path, monkeypatch, param_to_test):
         """
         Test that skipping KS4 preprocessing works as expected. Run
@@ -498,8 +476,7 @@ def monkeypatch_filter_function(self, X, ops=None, ibatch=None):
                 pass
             return X
 
-        monkeypatch.setattr("kilosort.io.BinaryFiltered.filter",
-                            monkeypatch_filter_function)
+        monkeypatch.setattr("kilosort.io.BinaryFiltered.filter", monkeypatch_filter_function)
 
         ks_settings, _, ks_format_probe = self._get_kilosort_native_settings(recording, paths, param_key, param_value)
         ks_settings["nblocks"] = 0
@@ -552,15 +529,11 @@ def _check_test_parameters_are_changing_the_output(self, results, default_result
             return
 
         if param_key == "change_nothing":
-            assert all(
-                default_results["ks"]["st"] == results["ks"]["st"]
-            ) and all(
+            assert all(default_results["ks"]["st"] == results["ks"]["st"]) and all(
                 default_results["ks"]["clus"] == results["ks"]["clus"]
             ), f"{param_key} changed somehow!."
         else:
-            assert not (
-                    default_results["ks"]["st"].size == results["ks"]["st"].size
-            ) or not all(
+            assert not (default_results["ks"]["st"].size == results["ks"]["st"].size) or not all(
                 default_results["ks"]["clus"] == results["ks"]["clus"]
             ), f"{param_key} results did not change with parameter change."
 
@@ -598,7 +571,7 @@ def _get_spikeinterface_settings(self, param_key, param_value):
         Generate settings kwargs for running KS4 in SpikeInterface.
         See `_get_kilosort_native_settings()` for some details.
         """
-        settings = {} # copy.deepcopy(DEFAULT_SETTINGS)
+        settings = {}  # copy.deepcopy(DEFAULT_SETTINGS)
 
         if param_key != "change_nothing":
             settings.update({param_key: param_value})
@@ -606,7 +579,7 @@ def _get_spikeinterface_settings(self, param_key, param_value):
         if param_key == "binning_depth":
             settings.update({"nblocks": 5})
 
-       # for name in ["n_chan_bin", "fs", "tmin", "tmax"]:
+        # for name in ["n_chan_bin", "fs", "tmin", "tmax"]:
         #    settings.pop(name)
 
         return settings
diff --git a/.github/workflows/test_kilosort4.yml b/.github/workflows/test_kilosort4.yml
index 6c58c76813..b8930c8ccc 100644
--- a/.github/workflows/test_kilosort4.yml
+++ b/.github/workflows/test_kilosort4.yml
@@ -14,16 +14,17 @@ jobs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
 
       - name: Set up Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v5
         with:
           python-version: 3.12
 
       - name: Install dependencies
         run: |
           pip install requests packaging
+          pip install .
 
       - name: Fetch package versions from PyPI
         run: |
@@ -47,7 +48,7 @@ jobs:
         ks_version: ${{ fromJson(needs.versions.outputs.matrix) }}
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
 
       - name: Setup Python ${{ matrix.python-version }}
         uses: actions/setup-python@v5
diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 4b7d0dbe6e..d4f0a26b3c 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -108,8 +108,8 @@ class Kilosort4Sorter(BaseSorter):
         "save_preprocessed_copy": "save a pre-processed copy of the data (including drift correction) to temp_wh.dat in the results directory and format Phy output to use that copy of the data",
         "torch_device": "Select the torch device auto/cuda/cpu",
         "bad_channels": "A list of channel indices (rows in the binary file) that should not be included in sorting. Listing channels here is equivalent to excluding them from the probe dictionary.",
-        "use_binary_file": "If True, the Kilosort is run from a binary file. In this case, if the recording is not binary it is written to a binary file in the output folder"
-        "If False, the Kilosort is run on the recording object directly using the RecordingExtractorAsArray object. Default is False.",
+        "use_binary_file": "If True and the recording is not binary compatible, then Kilosort is written to a binary file in the output folder. If False, the Kilosort is run on the recording object directly using the RecordingExtractorAsArray object. "
+        "If the recording is binary compatible, then the sorter will always use the binary file. Default is False.",
     }
 
     sorter_description = """Kilosort4 is a Python package for spike sorting on GPUs with template matching.
@@ -225,20 +225,21 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         probe = load_probe(probe_path=probe_filename)
         probe_name = ""
 
-        if params["use_binary_file"]:
-            if recording.binary_compatible_with(time_axis=0, file_paths_lenght=1):
-                # no copy
-                binary_description = recording.get_binary_description()
-                filename = str(binary_description["file_paths"][0])
-                file_object = None
-            else:
-                # a local copy has been written
-                filename = str(sorter_output_folder / "recording.dat")
-                file_object = None
+        if recording.binary_compatible_with(time_axis=0, file_paths_lenght=1):
+            # no copy
+            binary_description = recording.get_binary_description()
+            filename = str(binary_description["file_paths"][0])
+            file_object = None
+        elif params["use_binary_file"]:
+            # a local copy has been written
+            filename = str(sorter_output_folder / "recording.dat")
+            file_object = None
         else:
-            # this internally concatenates the recording
+            # the recording is not binary compatible and no binary copy has been written.
+            # in this case, we use the RecordingExtractorAsArray object
             filename = ""
             file_object = RecordingExtractorAsArray(recording_extractor=recording)
+
         data_dtype = recording.get_dtype()
 
         do_CAR = params["do_CAR"]
@@ -346,21 +347,18 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
                 hp_filter=torch.as_tensor(np.zeros(1)), whiten_mat=torch.as_tensor(np.eye(recording.get_num_channels()))
             )
 
-        if version.parse(cls.get_sorter_version()) >= version.parse("4.0.12"):
-            _ = save_sorting(
-                ops=ops,
-                results_dir=results_dir,
-                st=st,
-                clu=clu,
-                tF=tF,
-                Wall=Wall,
-                imin=bfile.imin,
-                tic0=tic0,
-                save_extra_vars=save_extra_vars,
-                save_preprocessed_copy=save_preprocessed_copy,
-            )
-        else:
-            _ = save_sorting(ops, results_dir, st, clu, tF, Wall, bfile.imin, tic0, save_extra_vars=save_extra_vars)
+        _ = save_sorting(
+            ops=ops,
+            results_dir=results_dir,
+            st=st,
+            clu=clu,
+            tF=tF,
+            Wall=Wall,
+            imin=bfile.imin,
+            tic0=tic0,
+            save_extra_vars=save_extra_vars,
+            save_preprocessed_copy=save_preprocessed_copy,
+        )
 
     @classmethod
     def _get_result_from_folder(cls, sorter_output_folder):

From f9dfa04190eaba160d5beec1c74300aaa2989fcc Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Tue, 3 Sep 2024 07:35:14 +0200
Subject: [PATCH 153/187] Add highpass_cutoff and fix KS tests

---
 .github/scripts/test_kilosort4_ci.py          | 29 ++++++++++++-------
 .../sorters/external/kilosort4.py             |  4 ++-
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/.github/scripts/test_kilosort4_ci.py b/.github/scripts/test_kilosort4_ci.py
index c7853a2add..96a037876f 100644
--- a/.github/scripts/test_kilosort4_ci.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -21,19 +21,21 @@
 
 import copy
 from typing import Any
-import spikeinterface.full as si
 import numpy as np
 import torch
 import kilosort
 from kilosort.io import load_probe
 import pandas as pd
-from spikeinterface.sorters.external.kilosort4 import Kilosort4Sorter
 import pytest
-from probeinterface.io import write_prb
-from kilosort.parameters import DEFAULT_SETTINGS
 from packaging.version import parse
 from importlib.metadata import version
 from inspect import signature
+
+import spikeinterface.full as si
+from spikeinterface.sorters.external.kilosort4 import Kilosort4Sorter
+from probeinterface.io import write_prb
+
+from kilosort.parameters import DEFAULT_SETTINGS
 from kilosort.run_kilosort import (
     set_files,
     initialize_ops,
@@ -66,6 +68,7 @@
     ("nt", 93),
     ("nskip", 1),
     ("whitening_range", 16),
+    ("highpass_cutoff", 200),
     ("sig_interp", 5),
     ("nt0min", 25),
     ("dmin", 15),
@@ -87,10 +90,11 @@
     ("ccg_threshold", 1e12),
     ("acg_threshold", 1e12),
     ("cluster_downsampling", 2),
-    ("duplicate_spike_bins", 5),
+    ("duplicate_spike_ms", 0.3),
     ("drift_smoothing", [250, 250, 250]),
-    ("bad_channels", None),
     ("save_preprocessed_copy", False),
+    ("shift", 0),
+    ("scale", 1),
 ]
 
 
@@ -194,7 +198,10 @@ def test_params_to_test(self):
                 continue
 
             if param_key not in RUN_KILOSORT_ARGS:
-                assert DEFAULT_SETTINGS[param_key] != param_value, f"{param_key} values should be different in test."
+                assert DEFAULT_SETTINGS[param_key] != param_value, (
+                    f"{param_key} values should be different in test: "
+                    f"{param_value} vs. {DEFAULT_SETTINGS[param_key]}"
+                )
 
     def test_default_settings_all_represented(self):
         """
@@ -227,7 +234,7 @@ def test_spikeinterface_defaults_against_kilsort(self):
 
     # Testing Arguments ###
     def test_set_files_arguments(self):
-        self._check_arguments(set_files, ["settings", "filename", "probe", "probe_name", "data_dir", "results_dir"])
+        self._check_arguments(set_files, ["settings", "filename", "probe", "probe_name", "data_dir", "results_dir", "bad_channels"])
 
     def test_initialize_ops_arguments(self):
         expected_arguments = [
@@ -249,13 +256,13 @@ def test_compute_preprocessing_arguments(self):
         self._check_arguments(compute_preprocessing, ["ops", "device", "tic0", "file_object"])
 
     def test_compute_drift_location_arguments(self):
-        self._check_arguments(compute_drift_correction, ["ops", "device", "tic0", "progress_bar", "file_object"])
+        self._check_arguments(compute_drift_correction, ["ops", "device", "tic0", "progress_bar", "file_object", "clear_cache"])
 
     def test_detect_spikes_arguments(self):
-        self._check_arguments(detect_spikes, ["ops", "device", "bfile", "tic0", "progress_bar"])
+        self._check_arguments(detect_spikes, ["ops", "device", "bfile", "tic0", "progress_bar", "clear_cache"])
 
     def test_cluster_spikes_arguments(self):
-        self._check_arguments(cluster_spikes, ["st", "tF", "ops", "device", "bfile", "tic0", "progress_bar"])
+        self._check_arguments(cluster_spikes, ["st", "tF", "ops", "device", "bfile", "tic0", "progress_bar", "clear_cache"])
 
     def test_save_sorting_arguments(self):
         expected_arguments = ["ops", "results_dir", "st", "clu", "tF", "Wall", "imin", "tic0", "save_extra_vars"]
diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index d4f0a26b3c..b0ba054e2d 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -35,6 +35,7 @@ class Kilosort4Sorter(BaseSorter):
         "artifact_threshold": None,
         "nskip": 25,
         "whitening_range": 32,
+        "highpass_cutoff": 300,
         "binning_depth": 5,
         "sig_interp": 20,
         "drift_smoothing": [0.5, 0.5, 0.5],
@@ -55,7 +56,7 @@ class Kilosort4Sorter(BaseSorter):
         "cluster_downsampling": 20,
         "cluster_pcs": 64,
         "x_centers": None,
-        "duplicate_spike_bins": 7,
+        "duplicate_spike_ms": 0.25,
         "do_correction": True,
         "keep_good_only": False,
         "save_extra_kwargs": False,
@@ -80,6 +81,7 @@ class Kilosort4Sorter(BaseSorter):
         "artifact_threshold": "If a batch contains absolute values above this number, it will be zeroed out under the assumption that a recording artifact is present. By default, the threshold is infinite (so that no zeroing occurs). Default value: None.",
         "nskip": "Batch stride for computing whitening matrix. Default value: 25.",
         "whitening_range": "Number of nearby channels used to estimate the whitening matrix. Default value: 32.",
+        "highpass_cutoff": "High-pass filter cutoff frequency in Hz. Default value: 300.",
         "binning_depth": "For drift correction, vertical bin size in microns used for 2D histogram. Default value: 5.",
         "sig_interp": "For drift correction, sigma for interpolation (spatial standard deviation). Approximate smoothness scale in units of microns. Default value: 20.",
         "drift_smoothing": "Amount of gaussian smoothing to apply to the spatiotemporal drift estimation, for x,y,time axes in units of registration blocks (for x,y axes) and batch size (for time axis). The x,y smoothing has no effect for `nblocks = 1`.",

From 1964f86b45e9bb96750b65bd84774f0d60595d5b Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Tue, 3 Sep 2024 08:00:00 +0200
Subject: [PATCH 154/187] test ks4 on ks4 changes

---
 .github/workflows/test_kilosort4.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/test_kilosort4.yml b/.github/workflows/test_kilosort4.yml
index b8930c8ccc..5a8259726e 100644
--- a/.github/workflows/test_kilosort4.yml
+++ b/.github/workflows/test_kilosort4.yml
@@ -4,6 +4,9 @@ on:
   workflow_dispatch:
   schedule:
     - cron: "0 12 * * 0"  # Weekly on Sunday at noon UTC
+  push:
+    paths:
+      - '**/kilosort4.py'
 
 jobs:
   versions:

From 8e9995d843ee7b0a43cbd877a6fb01a0c1fb7cb8 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Tue, 3 Sep 2024 08:02:44 +0200
Subject: [PATCH 155/187] Move testing scripts into scripts folder

---
 .github/run_tests.sh                                   | 2 +-
 .github/{ => scripts}/build_job_summary.py             | 0
 .github/{ => scripts}/determine_testing_environment.py | 0
 .github/{ => scripts}/import_test.py                   | 0
 .github/workflows/all-tests.yml                        | 2 +-
 .github/workflows/core-test.yml                        | 2 +-
 .github/workflows/full-test-with-codecov.yml           | 2 +-
 .github/workflows/test_imports.yml                     | 4 ++--
 8 files changed, 6 insertions(+), 6 deletions(-)
 rename .github/{ => scripts}/build_job_summary.py (100%)
 rename .github/{ => scripts}/determine_testing_environment.py (100%)
 rename .github/{ => scripts}/import_test.py (100%)

diff --git a/.github/run_tests.sh b/.github/run_tests.sh
index 558e0b64d3..02eb6ab8a1 100644
--- a/.github/run_tests.sh
+++ b/.github/run_tests.sh
@@ -10,5 +10,5 @@ fi
 
 pytest -m "$MARKER" -vv -ra --durations=0 --durations-min=0.001 | tee report.txt; test ${PIPESTATUS[0]} -eq 0 || exit 1
 echo "# Timing profile of ${MARKER}" >> $GITHUB_STEP_SUMMARY
-python $GITHUB_WORKSPACE/.github/build_job_summary.py report.txt >> $GITHUB_STEP_SUMMARY
+python $GITHUB_WORKSPACE/.github/scripts/build_job_summary.py report.txt >> $GITHUB_STEP_SUMMARY
 rm report.txt
diff --git a/.github/build_job_summary.py b/.github/scripts/build_job_summary.py
similarity index 100%
rename from .github/build_job_summary.py
rename to .github/scripts/build_job_summary.py
diff --git a/.github/determine_testing_environment.py b/.github/scripts/determine_testing_environment.py
similarity index 100%
rename from .github/determine_testing_environment.py
rename to .github/scripts/determine_testing_environment.py
diff --git a/.github/import_test.py b/.github/scripts/import_test.py
similarity index 100%
rename from .github/import_test.py
rename to .github/scripts/import_test.py
diff --git a/.github/workflows/all-tests.yml b/.github/workflows/all-tests.yml
index 8317d7bec4..5b583934ef 100644
--- a/.github/workflows/all-tests.yml
+++ b/.github/workflows/all-tests.yml
@@ -50,7 +50,7 @@ jobs:
         shell: bash
         run: |
           changed_files="${{ steps.changed-files.outputs.all_changed_files }}"
-          python .github/determine_testing_environment.py $changed_files
+          python .github/scripts/determine_testing_environment.py $changed_files
 
       - name: Display testing environment
         shell: bash
diff --git a/.github/workflows/core-test.yml b/.github/workflows/core-test.yml
index a513d48f3b..1dbf0f5109 100644
--- a/.github/workflows/core-test.yml
+++ b/.github/workflows/core-test.yml
@@ -39,7 +39,7 @@ jobs:
           pip install tabulate
           echo "# Timing profile of core tests in ${{matrix.os}}" >> $GITHUB_STEP_SUMMARY
           # Outputs markdown summary to standard output
-          python ./.github/build_job_summary.py report.txt >> $GITHUB_STEP_SUMMARY
+          python ./.github/scripts/build_job_summary.py report.txt >> $GITHUB_STEP_SUMMARY
           cat $GITHUB_STEP_SUMMARY
           rm report.txt
         shell: bash  # Necessary for pipeline to work on windows
diff --git a/.github/workflows/full-test-with-codecov.yml b/.github/workflows/full-test-with-codecov.yml
index ab4a083ae1..6a222f5e25 100644
--- a/.github/workflows/full-test-with-codecov.yml
+++ b/.github/workflows/full-test-with-codecov.yml
@@ -47,7 +47,7 @@ jobs:
           source ${{ github.workspace }}/test_env/bin/activate
           pytest -m "not sorters_external" --cov=./ --cov-report xml:./coverage.xml -vv -ra --durations=0 | tee report_full.txt; test ${PIPESTATUS[0]} -eq 0 || exit 1
           echo "# Timing profile of full tests" >> $GITHUB_STEP_SUMMARY
-          python ./.github/build_job_summary.py report_full.txt >> $GITHUB_STEP_SUMMARY
+          python ./.github/scripts/build_job_summary.py report_full.txt >> $GITHUB_STEP_SUMMARY
           cat $GITHUB_STEP_SUMMARY
           rm report_full.txt
       - uses: codecov/codecov-action@v4
diff --git a/.github/workflows/test_imports.yml b/.github/workflows/test_imports.yml
index d39fc37242..a2631f6eb7 100644
--- a/.github/workflows/test_imports.yml
+++ b/.github/workflows/test_imports.yml
@@ -34,7 +34,7 @@ jobs:
           echo "## OS: ${{ matrix.os }}" >> $GITHUB_STEP_SUMMARY
           echo "---" >> $GITHUB_STEP_SUMMARY
           echo "### Import times when only installing only core dependencies " >> $GITHUB_STEP_SUMMARY
-          python ./.github/import_test.py >> $GITHUB_STEP_SUMMARY
+          python ./.github/scripts/import_test.py >> $GITHUB_STEP_SUMMARY
         shell: bash  # Necessary for pipeline to work on windows
       - name: Install in full mode
         run: |
@@ -44,5 +44,5 @@ jobs:
           # Add a header to separate the two profiles
           echo "---" >> $GITHUB_STEP_SUMMARY
           echo "### Import times when installing full dependencies in " >> $GITHUB_STEP_SUMMARY
-          python ./.github/import_test.py >> $GITHUB_STEP_SUMMARY
+          python ./.github/scripts/import_test.py >> $GITHUB_STEP_SUMMARY
         shell: bash  # Necessary for pipeline to work on windows

From 219bee4ed768b5de1543e5e797d56973e8ac2664 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Tue, 3 Sep 2024 08:47:33 +0200
Subject: [PATCH 156/187] change trigger

---
 .github/workflows/test_kilosort4.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test_kilosort4.yml b/.github/workflows/test_kilosort4.yml
index 5a8259726e..42e6140917 100644
--- a/.github/workflows/test_kilosort4.yml
+++ b/.github/workflows/test_kilosort4.yml
@@ -4,7 +4,7 @@ on:
   workflow_dispatch:
   schedule:
     - cron: "0 12 * * 0"  # Weekly on Sunday at noon UTC
-  push:
+  pull_request:
     paths:
       - '**/kilosort4.py'
 

From e26e143c0c2ec6af79e509e1d09c6601717b93cf Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Tue, 3 Sep 2024 08:51:38 +0200
Subject: [PATCH 157/187] Remove last conditions on prior ks versions

---
 .github/scripts/test_kilosort4_ci.py | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/.github/scripts/test_kilosort4_ci.py b/.github/scripts/test_kilosort4_ci.py
index 96a037876f..009a2c447c 100644
--- a/.github/scripts/test_kilosort4_ci.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -213,8 +213,6 @@ def test_default_settings_all_represented(self):
 
         for param_key in DEFAULT_SETTINGS:
             if param_key not in ["n_chan_bin", "fs", "tmin", "tmax"]:
-                if parse(version("kilosort")) == parse("4.0.9") and param_key == "nblocks":
-                    continue
                 assert param_key in tested_keys, f"param: {param_key} in DEFAULT SETTINGS but not tested."
 
     def test_spikeinterface_defaults_against_kilsort(self):
@@ -267,8 +265,7 @@ def test_cluster_spikes_arguments(self):
     def test_save_sorting_arguments(self):
         expected_arguments = ["ops", "results_dir", "st", "clu", "tF", "Wall", "imin", "tic0", "save_extra_vars"]
 
-        if parse(version("kilosort")) > parse("4.0.11"):
-            expected_arguments.append("save_preprocessed_copy")
+        expected_arguments.append("save_preprocessed_copy")
 
         self._check_arguments(save_sorting, expected_arguments)
 
@@ -369,14 +366,9 @@ def test_kilosort4_main(self, recording_and_paths, default_results, tmp_path, pa
             assert ops[param_key] == param_value
 
         # Finally, check out test parameters actually change the output of
-        # KS4, ensuring our tests are actually doing something. This is not
-        # done prior to 4.0.4 because a number of parameters seem to stop
-        # having an effect. This is probably due to small changes in their
-        # behaviour, and the test file chosen here.
-        if parse(version("kilosort")) > parse("4.0.4"):
-            self._check_test_parameters_are_changing_the_output(results, default_results, param_key)
-
-    @pytest.mark.skipif(parse(version("kilosort")) == parse("4.0.9"), reason="nblock=0 fails on KS4=4.0.9")
+        # KS4, ensuring our tests are actually doing something.
+        self._check_test_parameters_are_changing_the_output(results, default_results, param_key)
+
     def test_kilosort4_no_correction(self, recording_and_paths, tmp_path):
         """
         Test the SpikeInterface wrappers `do_correction` argument. We set
@@ -415,7 +407,6 @@ def test_kilosort4_no_correction(self, recording_and_paths, tmp_path):
         assert np.array_equal(results["ks"]["st"], results["si"]["st"])
         assert np.array_equal(results["ks"]["clus"], results["si"]["clus"])
 
-    @pytest.mark.skipif(parse(version("kilosort")) == parse("4.0.9"), reason="nblock=0 fails on KS4=4.0.9")
     @pytest.mark.parametrize(
         "param_to_test",
         [

From 9c338dd9688455e7595433849399e7ee313301b0 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Tue, 3 Sep 2024 09:35:19 +0200
Subject: [PATCH 158/187] Fix KS parameters in tests

---
 .github/scripts/test_kilosort4_ci.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/.github/scripts/test_kilosort4_ci.py b/.github/scripts/test_kilosort4_ci.py
index 009a2c447c..0593534010 100644
--- a/.github/scripts/test_kilosort4_ci.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -90,11 +90,9 @@
     ("ccg_threshold", 1e12),
     ("acg_threshold", 1e12),
     ("cluster_downsampling", 2),
-    ("duplicate_spike_ms", 0.3),
     ("drift_smoothing", [250, 250, 250]),
-    ("save_preprocessed_copy", False),
-    ("shift", 0),
-    ("scale", 1),
+    # Not tested beacuse with ground truth data it doesn't change the results
+    # ("duplicate_spike_ms", 0.3),
 ]
 
 
@@ -210,6 +208,8 @@ def test_default_settings_all_represented(self):
         on the KS side.
         """
         tested_keys = [entry[0] for entry in PARAMS_TO_TEST]
+        additional_non_tested_keys = ["shift", "scale", "save_preprocessed_copy", "duplicate_spike_ms"]
+        tested_keys += additional_non_tested_keys
 
         for param_key in DEFAULT_SETTINGS:
             if param_key not in ["n_chan_bin", "fs", "tmin", "tmax"]:
@@ -407,6 +407,11 @@ def test_kilosort4_no_correction(self, recording_and_paths, tmp_path):
         assert np.array_equal(results["ks"]["st"], results["si"]["st"])
         assert np.array_equal(results["ks"]["clus"], results["si"]["clus"])
 
+
+    def test_kilosort4_use_binary_file(self, recording_and_paths, tmp_path):
+        # TODO
+        pass
+
     @pytest.mark.parametrize(
         "param_to_test",
         [

From 87fbe55a7118b8e741ad9b801cba50f11cdc379a Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Tue, 3 Sep 2024 10:47:01 +0200
Subject: [PATCH 159/187] More cleanup of KS4 tests

---
 .github/scripts/test_kilosort4_ci.py | 265 +++++++++++++--------------
 1 file changed, 128 insertions(+), 137 deletions(-)

diff --git a/.github/scripts/test_kilosort4_ci.py b/.github/scripts/test_kilosort4_ci.py
index 0593534010..35946a5a56 100644
--- a/.github/scripts/test_kilosort4_ci.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -10,31 +10,27 @@
   changes when skipping KS4 preprocessing is true, because this takes a slightly
   different path through the kilosort4.py wrapper logic.
   This also checks that changing the parameter changes the test output from default
-  on our test case (otherwise, the test could not detect a failure). This is possible
-  for nearly all parameters, see `_check_test_parameters_are_changing_the_output()`.
+  on our test case (otherwise, the test could not detect a failure).
 
 - Test that kilosort functions called from `kilosort4.py` wrapper have the expected
   input signatures
 
 - Do some tests to check all KS4 parameters are tested against.
 """
-
+import pytest
 import copy
 from typing import Any
+from inspect import signature
+
 import numpy as np
 import torch
-import kilosort
-from kilosort.io import load_probe
-import pandas as pd
-import pytest
-from packaging.version import parse
-from importlib.metadata import version
-from inspect import signature
 
 import spikeinterface.full as si
+from spikeinterface.core.testing import check_sortings_equal
 from spikeinterface.sorters.external.kilosort4 import Kilosort4Sorter
 from probeinterface.io import write_prb
 
+import kilosort
 from kilosort.parameters import DEFAULT_SETTINGS
 from kilosort.run_kilosort import (
     set_files,
@@ -47,59 +43,62 @@
     get_run_parameters,
 )
 from kilosort.io import load_probe, RecordingExtractorAsArray, BinaryFiltered
-from kilosort.parameters import DEFAULT_SETTINGS
-from kilosort import preprocessing as ks_preprocessing
+
 
 RUN_KILOSORT_ARGS = ["do_CAR", "invert_sign", "save_preprocessed_copy"]
 # "device", "progress_bar", "save_extra_vars" are not tested. "save_extra_vars" could be.
 
 # Setup Params to test ####
-PARAMS_TO_TEST = [
-    # Not tested
-    # ("torch_device", "auto")
-    # Stable across KS version 4.0.16 - 4.0.X (?)
-    ("change_nothing", None),
-    ("nblocks", 0),
-    ("do_CAR", False),
-    ("batch_size", 42743),
-    ("Th_universal", 12),
-    ("Th_learned", 14),
-    ("invert_sign", True),
-    ("nt", 93),
-    ("nskip", 1),
-    ("whitening_range", 16),
-    ("highpass_cutoff", 200),
-    ("sig_interp", 5),
-    ("nt0min", 25),
-    ("dmin", 15),
-    ("dminx", 16),
-    ("min_template_size", 15),
-    ("template_sizes", 10),
-    ("nearest_chans", 8),
-    ("nearest_templates", 35),
-    ("max_channel_distance", 5),
-    ("templates_from_data", False),
-    ("n_templates", 10),
-    ("n_pcs", 3),
-    ("Th_single_ch", 4),
-    ("x_centers", 5),
-    ("binning_depth", 1),
-    # Note: These don't change the results from
-    # default when applied to the test case.
-    ("artifact_threshold", 200),
-    ("ccg_threshold", 1e12),
-    ("acg_threshold", 1e12),
-    ("cluster_downsampling", 2),
-    ("drift_smoothing", [250, 250, 250]),
-    # Not tested beacuse with ground truth data it doesn't change the results
-    # ("duplicate_spike_ms", 0.3),
+PARAMS_TO_TEST_DICT = {
+    "nblocks": 0,
+    "do_CAR": False,
+    "batch_size": 42743,
+    "Th_universal": 12,
+    "Th_learned": 14,
+    "invert_sign": True,
+    "nt": 93,
+    "nskip": 1,
+    "whitening_range": 16,
+    "highpass_cutoff": 200,
+    "sig_interp": 5,
+    "nt0min": 25,
+    "dmin": 15,
+    "dminx": 16,
+    "min_template_size": 15,
+    "template_sizes": 10,
+    "nearest_chans": 8,
+    "nearest_templates": 35,
+    "max_channel_distance": 5,
+    "templates_from_data": False,
+    "n_templates": 10,
+    "n_pcs": 3,
+    "Th_single_ch": 4,
+    "x_centers": 5,
+    "binning_depth": 1,
+    "drift_smoothing": [250, 250, 250],
+    "artifact_threshold": 200,
+    "ccg_threshold": 1e12,
+    "acg_threshold": 1e12,
+    "cluster_downsampling": 2,
+    "duplicate_spike_ms": 0.3,
+}
+
+PARAMS_TO_TEST = list(PARAMS_TO_TEST_DICT.keys())
+
+PARAMETERS_NOT_AFFECTING_RESULTS = [
+    "artifact_threshold",
+    "ccg_threshold",
+    "acg_threshold",
+    "cluster_downsampling",
+    "cluster_pcs",
+    "duplicate_spike_ms" # this is because gorund-truth spikes don't have violations
 ]
 
-
+# THIS IS A PLACEHOLDER FOR FUTURE PARAMS TO TEST
 # if parse(version("kilosort")) >= parse("4.0.X"):
-#     PARAMS_TO_TEST.extend(
+#     PARAMS_TO_TEST_DICT.update(
 #         [
-#             ("new_param", new_values),
+#             {"new_param": new_value},
 #         ]
 #     )
 
@@ -122,7 +121,7 @@ def recording_and_paths(self, tmp_path_factory):
         return (recording, paths)
 
     @pytest.fixture(scope="session")
-    def default_results(self, recording_and_paths):
+    def default_kilosort_sorting(self, recording_and_paths):
         """
         Because we check each parameter at a time and check the
         KS4 and SpikeInterface versions match, if changing the parameter
@@ -133,7 +132,7 @@ def default_results(self, recording_and_paths):
         """
         recording, paths = recording_and_paths
 
-        settings, _, ks_format_probe = self._get_kilosort_native_settings(recording, paths, "change_nothing", None)
+        settings, _, ks_format_probe = self._get_kilosort_native_settings(recording, paths, None, None)
 
         defaults_ks_output_dir = paths["session_scope_tmp_path"] / "default_ks_output"
 
@@ -144,9 +143,8 @@ def default_results(self, recording_and_paths):
             results_dir=defaults_ks_output_dir,
         )
 
-        default_results = self._get_sorting_output(defaults_ks_output_dir)
+        return si.read_kilosort(defaults_ks_output_dir)
 
-        return default_results
 
     def _get_ground_truth_recording(self):
         """
@@ -185,16 +183,11 @@ def _save_ground_truth_recording(self, recording, tmp_path):
     # Tests ######
     def test_params_to_test(self):
         """
-        Test that all values in PARAMS_TO_TEST are
+        Test that all values in PARAMS_TO_TEST_DICT are
         different to the default values used in Kilosort,
         otherwise there is no point to the test.
         """
-        for parameter in PARAMS_TO_TEST:
-            param_key, param_value = parameter
-
-            if param_key == "change_nothing":
-                continue
-
+        for param_key, param_value in PARAMS_TO_TEST_DICT.items():
             if param_key not in RUN_KILOSORT_ARGS:
                 assert DEFAULT_SETTINGS[param_key] != param_value, (
                     f"{param_key} values should be different in test: "
@@ -207,8 +200,8 @@ def test_default_settings_all_represented(self):
         PARAMS_TO_TEST, otherwise we are missing settings added
         on the KS side.
         """
-        tested_keys = [entry[0] for entry in PARAMS_TO_TEST]
-        additional_non_tested_keys = ["shift", "scale", "save_preprocessed_copy", "duplicate_spike_ms"]
+        tested_keys = PARAMS_TO_TEST
+        additional_non_tested_keys = ["shift", "scale", "save_preprocessed_copy"]
         tested_keys += additional_non_tested_keys
 
         for param_key in DEFAULT_SETTINGS:
@@ -315,7 +308,7 @@ def _check_arguments(self, object_, expected_arguments):
 
     # Full Test ####
     @pytest.mark.parametrize("parameter", PARAMS_TO_TEST)
-    def test_kilosort4_main(self, recording_and_paths, default_results, tmp_path, parameter):
+    def test_kilosort4_main(self, recording_and_paths, default_kilosort_sorting, tmp_path, parameter):
         """
         Given a recording, paths to raw data, and a parameter to change,
         run KS4 natively and within the SpikeInterface wrapper with the
@@ -323,7 +316,8 @@ def test_kilosort4_main(self, recording_and_paths, default_results, tmp_path, pa
         check the outputs are the same.
         """
         recording, paths = recording_and_paths
-        param_key, param_value = parameter
+        param_key = parameter
+        param_value = PARAMS_TO_TEST_DICT[param_key]
 
         # Setup parameters for KS4 and run it natively
         kilosort_output_dir = tmp_path / "kilosort_output_dir"
@@ -340,11 +334,12 @@ def test_kilosort4_main(self, recording_and_paths, default_results, tmp_path, pa
             results_dir=kilosort_output_dir,
             **run_kilosort_kwargs,
         )
+        sorting_ks4 = si.read_kilosort(kilosort_output_dir)
 
         # Setup Parameters for SI and KS4 through SI
         spikeinterface_settings = self._get_spikeinterface_settings(param_key, param_value)
 
-        si.run_sorter(
+        sorting_si = si.run_sorter(
             "kilosort4",
             recording,
             remove_existing_folder=True,
@@ -353,21 +348,19 @@ def test_kilosort4_main(self, recording_and_paths, default_results, tmp_path, pa
         )
 
         # Get the results and check they match
-        results = self._get_sorting_output(kilosort_output_dir, spikeinterface_output_dir)
-
-        assert np.array_equal(results["ks"]["st"], results["si"]["st"]), f"{param_key} spike times different"
-        assert np.array_equal(results["ks"]["clus"], results["si"]["clus"]), f"{param_key} cluster assignment different"
+        check_sortings_equal(sorting_ks4, sorting_si)
 
         # Check the ops file in KS4 output is as expected. This is saved on the
         # SI side so not an extremely robust addition, but it can't hurt.
-        if param_key != "change_nothing":
-            ops = np.load(spikeinterface_output_dir / "sorter_output" / "ops.npy", allow_pickle=True)
-            ops = ops.tolist()  # strangely this makes a dict
-            assert ops[param_key] == param_value
+        ops = np.load(spikeinterface_output_dir / "sorter_output" / "ops.npy", allow_pickle=True)
+        ops = ops.tolist()  # strangely this makes a dict
+        assert ops[param_key] == param_value
 
         # Finally, check out test parameters actually change the output of
-        # KS4, ensuring our tests are actually doing something.
-        self._check_test_parameters_are_changing_the_output(results, default_results, param_key)
+        # KS4, ensuring our tests are actually doing something (exxcept for some params).
+        if param_key not in PARAMETERS_NOT_AFFECTING_RESULTS:
+            with pytest.raises(AssertionError):
+                check_sortings_equal(default_kilosort_sorting, sorting_si)
 
     def test_kilosort4_no_correction(self, recording_and_paths, tmp_path):
         """
@@ -391,9 +384,10 @@ def test_kilosort4_no_correction(self, recording_and_paths, tmp_path):
             results_dir=kilosort_output_dir,
             do_CAR=True,
         )
+        sorting_ks = si.read_kilosort(kilosort_output_dir)
 
         spikeinterface_settings = self._get_spikeinterface_settings("nblocks", 1)
-        si.run_sorter(
+        sorting_si = si.run_sorter(
             "kilosort4",
             recording,
             remove_existing_folder=True,
@@ -401,21 +395,46 @@ def test_kilosort4_no_correction(self, recording_and_paths, tmp_path):
             do_correction=False,
             **spikeinterface_settings,
         )
+        check_sortings_equal(sorting_ks, sorting_si)
 
-        results = self._get_sorting_output(kilosort_output_dir, spikeinterface_output_dir)
 
-        assert np.array_equal(results["ks"]["st"], results["si"]["st"])
-        assert np.array_equal(results["ks"]["clus"], results["si"]["clus"])
+    def test_use_binary_file(self, tmp_path):
+        """
+        Test that the SpikeInterface wrapper can run KS4 using a binary file as input or directly
+        from the recording.
+        """
+        recording = self._get_ground_truth_recording()
+        recording_bin = recording.save()
 
+        # run with SI wrapper
+        sorting_ks4 = si.run_sorter(
+            "kilosort4",
+            recording,
+            folder = tmp_path / "spikeinterface_output_dir_wrapper",
+            use_binary_file=False,
+            remove_existing_folder=True,
+        )
+        sorting_ks4_bin = si.run_sorter(
+            "kilosort4",
+            recording_bin,
+            folder = tmp_path / "spikeinterface_output_dir_bin",
+            use_binary_file=False,
+            remove_existing_folder=True,
+        )
+        sorting_ks4_non_bin = si.run_sorter(
+            "kilosort4",
+            recording,
+            folder = tmp_path / "spikeinterface_output_dir_non_bin",
+            use_binary_file=True,
+            remove_existing_folder=True,
+        )
 
-    def test_kilosort4_use_binary_file(self, recording_and_paths, tmp_path):
-        # TODO
-        pass
+        check_sortings_equal(sorting_ks4, sorting_ks4_bin)
+        check_sortings_equal(sorting_ks4, sorting_ks4_non_bin)
 
     @pytest.mark.parametrize(
         "param_to_test",
         [
-            ("change_nothing", None),
             ("do_CAR", False),
             ("batch_size", 42743),
             ("Th_learned", 14),
@@ -496,6 +515,7 @@ def monkeypatch_filter_function(self, X, ops=None, ibatch=None):
         )
 
         monkeypatch.undo()
+        si.read_kilosort(kilosort_output_dir)
 
         # Now, run kilosort through spikeinterface with the same options.
         spikeinterface_settings = self._get_spikeinterface_settings(param_key, param_value)
@@ -517,29 +537,17 @@ def monkeypatch_filter_function(self, X, ops=None, ibatch=None):
         # memory file. Because in this test recordings are preprocessed, there are
         # some filter edge effects that depend on the chunking in `get_traces()`.
         # These are all extremely close (usually just 1 spike, 1 idx different).
-        results = self._get_sorting_output(kilosort_output_dir, spikeinterface_output_dir)
+        results = {}
+        results["ks"] = {}
+        results["ks"]["st"] = np.load(kilosort_output_dir / "spike_times.npy")
+        results["ks"]["clus"] = np.load(kilosort_output_dir / "spike_clusters.npy")
+        results["si"] = {}
+        results["si"]["st"] = np.load(spikeinterface_output_dir / "sorter_output" / "spike_times.npy")
+        results["si"]["clus"] = np.load(spikeinterface_output_dir / "sorter_output" / "spike_clusters.npy")
         assert np.allclose(results["ks"]["st"], results["si"]["st"], rtol=0, atol=1)
 
-    # Helpers ######
-    def _check_test_parameters_are_changing_the_output(self, results, default_results, param_key):
-        """
-        If nothing is changed, default vs. results outputs are identical.
-        Otherwise, check they are not the same. Can't figure out how to get
-        the skipped three parameters below to change the results on this
-        small test file.
-        """
-        if param_key in ["acg_threshold", "ccg_threshold", "artifact_threshold", "cluster_downsampling", "cluster_pcs"]:
-            return
-
-        if param_key == "change_nothing":
-            assert all(default_results["ks"]["st"] == results["ks"]["st"]) and all(
-                default_results["ks"]["clus"] == results["ks"]["clus"]
-            ), f"{param_key} changed somehow!."
-        else:
-            assert not (default_results["ks"]["st"].size == results["ks"]["st"].size) or not all(
-                default_results["ks"]["clus"] == results["ks"]["clus"]
-            ), f"{param_key} results did not change with parameter change."
 
+    ##### Helpers ######
     def _get_kilosort_native_settings(self, recording, paths, param_key, param_value):
         """
         Function to generate the settings and function inputs to run kilosort.
@@ -554,16 +562,18 @@ def _get_kilosort_native_settings(self, recording, paths, param_key, param_value
             "n_chan_bin": recording.get_num_channels(),
             "fs": recording.get_sampling_frequency(),
         }
+        run_kilosort_kwargs = {}
 
-        if param_key == "binning_depth":
-            settings.update({"nblocks": 5})
+        if param_key is not None:
+            if param_key == "binning_depth":
+                settings.update({"nblocks": 5})
 
-        if param_key in RUN_KILOSORT_ARGS:
-            run_kilosort_kwargs = {param_key: param_value}
-        else:
-            if param_key != "change_nothing":
-                settings.update({param_key: param_value})
-            run_kilosort_kwargs = {}
+            if param_key in RUN_KILOSORT_ARGS:
+                run_kilosort_kwargs = {param_key: param_value}
+            else:
+                if param_key != "change_nothing":
+                    settings.update({param_key: param_value})
+                run_kilosort_kwargs = {}
 
         ks_format_probe = load_probe(paths["probe_path"])
 
@@ -576,31 +586,12 @@ def _get_spikeinterface_settings(self, param_key, param_value):
         """
         settings = {}  # copy.deepcopy(DEFAULT_SETTINGS)
 
-        if param_key != "change_nothing":
-            settings.update({param_key: param_value})
-
         if param_key == "binning_depth":
             settings.update({"nblocks": 5})
 
+        settings.update({param_key: param_value})
+
         # for name in ["n_chan_bin", "fs", "tmin", "tmax"]:
         #    settings.pop(name)
 
         return settings
-
-    def _get_sorting_output(self, kilosort_output_dir=None, spikeinterface_output_dir=None) -> dict[str, Any]:
-        """
-        Load the results of sorting into a dict for easy comparison.
-        """
-        results = {
-            "si": {},
-            "ks": {},
-        }
-        if kilosort_output_dir:
-            results["ks"]["st"] = np.load(kilosort_output_dir / "spike_times.npy")
-            results["ks"]["clus"] = np.load(kilosort_output_dir / "spike_clusters.npy")
-
-        if spikeinterface_output_dir:
-            results["si"]["st"] = np.load(spikeinterface_output_dir / "sorter_output" / "spike_times.npy")
-            results["si"]["clus"] = np.load(spikeinterface_output_dir / "sorter_output" / "spike_clusters.npy")
-
-        return results

From 10b7e1adc68c51fb784c3529f3638b3ab0d9de3d Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Tue, 3 Sep 2024 12:20:20 +0200
Subject: [PATCH 160/187] Remove last change_nothing

---
 .github/scripts/test_kilosort4_ci.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/.github/scripts/test_kilosort4_ci.py b/.github/scripts/test_kilosort4_ci.py
index 35946a5a56..dbd8135b9a 100644
--- a/.github/scripts/test_kilosort4_ci.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -17,6 +17,7 @@
 
 - Do some tests to check all KS4 parameters are tested against.
 """
+
 import pytest
 import copy
 from typing import Any
@@ -91,7 +92,7 @@
     "acg_threshold",
     "cluster_downsampling",
     "cluster_pcs",
-    "duplicate_spike_ms" # this is because gorund-truth spikes don't have violations
+    "duplicate_spike_ms",  # this is because gorund-truth spikes don't have violations
 ]
 
 # THIS IS A PLACEHOLDER FOR FUTURE PARAMS TO TEST
@@ -145,7 +146,6 @@ def default_kilosort_sorting(self, recording_and_paths):
 
         return si.read_kilosort(defaults_ks_output_dir)
 
-
     def _get_ground_truth_recording(self):
         """
         A ground truth recording chosen to be as small as possible (for speed).
@@ -225,7 +225,9 @@ def test_spikeinterface_defaults_against_kilsort(self):
 
     # Testing Arguments ###
     def test_set_files_arguments(self):
-        self._check_arguments(set_files, ["settings", "filename", "probe", "probe_name", "data_dir", "results_dir", "bad_channels"])
+        self._check_arguments(
+            set_files, ["settings", "filename", "probe", "probe_name", "data_dir", "results_dir", "bad_channels"]
+        )
 
     def test_initialize_ops_arguments(self):
         expected_arguments = [
@@ -247,13 +249,17 @@ def test_compute_preprocessing_arguments(self):
         self._check_arguments(compute_preprocessing, ["ops", "device", "tic0", "file_object"])
 
     def test_compute_drift_location_arguments(self):
-        self._check_arguments(compute_drift_correction, ["ops", "device", "tic0", "progress_bar", "file_object", "clear_cache"])
+        self._check_arguments(
+            compute_drift_correction, ["ops", "device", "tic0", "progress_bar", "file_object", "clear_cache"]
+        )
 
     def test_detect_spikes_arguments(self):
         self._check_arguments(detect_spikes, ["ops", "device", "bfile", "tic0", "progress_bar", "clear_cache"])
 
     def test_cluster_spikes_arguments(self):
-        self._check_arguments(cluster_spikes, ["st", "tF", "ops", "device", "bfile", "tic0", "progress_bar", "clear_cache"])
+        self._check_arguments(
+            cluster_spikes, ["st", "tF", "ops", "device", "bfile", "tic0", "progress_bar", "clear_cache"]
+        )
 
     def test_save_sorting_arguments(self):
         expected_arguments = ["ops", "results_dir", "st", "clu", "tF", "Wall", "imin", "tic0", "save_extra_vars"]
@@ -397,7 +403,6 @@ def test_kilosort4_no_correction(self, recording_and_paths, tmp_path):
         )
         check_sortings_equal(sorting_ks, sorting_si)
 
-
     def test_use_binary_file(self, tmp_path):
         """
         Test that the SpikeInterface wrapper can run KS4 using a binary file as input or directly
@@ -410,21 +415,21 @@ def test_use_binary_file(self, tmp_path):
         sorting_ks4 = si.run_sorter(
             "kilosort4",
             recording,
-            folder = tmp_path / "spikeinterface_output_dir_wrapper",
+            folder=tmp_path / "spikeinterface_output_dir_wrapper",
             use_binary_file=False,
             remove_existing_folder=True,
         )
         sorting_ks4_bin = si.run_sorter(
             "kilosort4",
             recording_bin,
-            folder = tmp_path / "spikeinterface_output_dir_bin",
+            folder=tmp_path / "spikeinterface_output_dir_bin",
             use_binary_file=False,
             remove_existing_folder=True,
         )
         sorting_ks4_non_bin = si.run_sorter(
             "kilosort4",
             recording,
-            folder = tmp_path / "spikeinterface_output_dir_non_bin",
+            folder=tmp_path / "spikeinterface_output_dir_non_bin",
             use_binary_file=True,
             remove_existing_folder=True,
         )
@@ -546,7 +551,6 @@ def monkeypatch_filter_function(self, X, ops=None, ibatch=None):
         results["si"]["clus"] = np.load(spikeinterface_output_dir / "sorter_output" / "spike_clusters.npy")
         assert np.allclose(results["ks"]["st"], results["si"]["st"], rtol=0, atol=1)
 
-
     ##### Helpers ######
     def _get_kilosort_native_settings(self, recording, paths, param_key, param_value):
         """
@@ -571,8 +575,7 @@ def _get_kilosort_native_settings(self, recording, paths, param_key, param_value
             if param_key in RUN_KILOSORT_ARGS:
                 run_kilosort_kwargs = {param_key: param_value}
             else:
-                if param_key != "change_nothing":
-                    settings.update({param_key: param_value})
+                settings.update({param_key: param_value})
                 run_kilosort_kwargs = {}
 
         ks_format_probe = load_probe(paths["probe_path"])

From 007b64de84ce0ad432d980064e9226d9cc83df39 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Tue, 3 Sep 2024 17:02:55 +0200
Subject: [PATCH 161/187] Allow use_binary_file=None (default) and add
 delete_recording_dat param

---
 .github/scripts/test_kilosort4_ci.py          | 31 +++++++--
 .../sorters/external/kilosort4.py             | 65 ++++++++++++-------
 2 files changed, 68 insertions(+), 28 deletions(-)

diff --git a/.github/scripts/test_kilosort4_ci.py b/.github/scripts/test_kilosort4_ci.py
index dbd8135b9a..61c10fd8e8 100644
--- a/.github/scripts/test_kilosort4_ci.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -415,27 +415,46 @@ def test_use_binary_file(self, tmp_path):
         sorting_ks4 = si.run_sorter(
             "kilosort4",
             recording,
-            folder=tmp_path / "spikeinterface_output_dir_wrapper",
-            use_binary_file=False,
+            folder=tmp_path / "ks4_output_si_wrapper_default",
+            use_binary_file=None,
             remove_existing_folder=True,
         )
         sorting_ks4_bin = si.run_sorter(
             "kilosort4",
             recording_bin,
-            folder=tmp_path / "spikeinterface_output_dir_bin",
+            folder=tmp_path / "ks4_output_bin_default",
+            use_binary_file=None,
+            remove_existing_folder=True,
+        )
+        sorting_ks4_force_binary = si.run_sorter(
+            "kilosort4",
+            recording,
+            folder=tmp_path / "ks4_output_force_bin",
+            use_binary_file=True,
+            remove_existing_folder=True,
+        )
+        assert not (tmp_path / "ks4_output_force_bin" / "sorter_output" / "recording.dat").exists()
+        sorting_ks4_force_non_binary = si.run_sorter(
+            "kilosort4",
+            recording_bin,
+            folder=tmp_path / "ks4_output_force_wrapper",
             use_binary_file=False,
             remove_existing_folder=True,
         )
-        sorting_ks4_non_bin = si.run_sorter(
+        # test deleting recording.dat
+        sorting_ks4_force_binary_keep = si.run_sorter(
             "kilosort4",
             recording,
-            folder=tmp_path / "spikeinterface_output_dir_non_bin",
+            folder=tmp_path / "ks4_output_force_bin_keep",
             use_binary_file=True,
+            delete_recording_dat=False,
             remove_existing_folder=True,
         )
+        assert (tmp_path / "ks4_output_force_bin_keep" / "sorter_output" / "recording.dat").exists()
 
         check_sortings_equal(sorting_ks4, sorting_ks4_bin)
-        check_sortings_equal(sorting_ks4, sorting_ks4_non_bin)
+        check_sortings_equal(sorting_ks4, sorting_ks4_force_binary)
+        check_sortings_equal(sorting_ks4, sorting_ks4_force_non_binary)
 
     @pytest.mark.parametrize(
         "param_to_test",
diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index b0ba054e2d..8a15642af4 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -65,7 +65,8 @@ class Kilosort4Sorter(BaseSorter):
         "save_preprocessed_copy": False,
         "torch_device": "auto",
         "bad_channels": None,
-        "use_binary_file": False,
+        "use_binary_file": None,
+        "delete_recording_dat": True,
     }
 
     _params_description = {
@@ -110,8 +111,10 @@ class Kilosort4Sorter(BaseSorter):
         "save_preprocessed_copy": "save a pre-processed copy of the data (including drift correction) to temp_wh.dat in the results directory and format Phy output to use that copy of the data",
         "torch_device": "Select the torch device auto/cuda/cpu",
         "bad_channels": "A list of channel indices (rows in the binary file) that should not be included in sorting. Listing channels here is equivalent to excluding them from the probe dictionary.",
-        "use_binary_file": "If True and the recording is not binary compatible, then Kilosort is written to a binary file in the output folder. If False, the Kilosort is run on the recording object directly using the RecordingExtractorAsArray object. "
-        "If the recording is binary compatible, then the sorter will always use the binary file. Default is False.",
+        "use_binary_file": "If True then Kilosort is run using a binary file. In this case, if the input recording is not binaru compatible, it is written to a binary file in the output folder. "
+        "If False then Kilosort is run on the recording object directly using the RecordingExtractorAsArray object. If None, then if the recording is binary compatible, the sorter will use the binary file, otherwise the RecordingExtractorAsArray. "
+        "Default is None.",
+        "delete_recording_dat": "If True, if a temporary binary file is created, it is deleted after the sorting is done. Default is True.",
     }
 
     sorter_description = """Kilosort4 is a Python package for spike sorting on GPUs with template matching.
@@ -172,15 +175,16 @@ def _setup_recording(cls, recording, sorter_output_folder, params, verbose):
         probe_filename = sorter_output_folder / "probe.prb"
         write_prb(probe_filename, pg)
 
-        if params["use_binary_file"] and not recording.binary_compatible_with(time_axis=0, file_paths_lenght=1):
-            # local copy needed
-            binary_file_path = sorter_output_folder / "recording.dat"
-            write_binary_recording(
-                recording=recording,
-                file_paths=[binary_file_path],
-                **get_job_kwargs(params, verbose),
-            )
-            params["filename"] = str(binary_file_path)
+        if params["use_binary_file"]:
+            if not recording.binary_compatible_with(time_axis=0, file_paths_lenght=1):
+                # local copy needed
+                binary_file_path = sorter_output_folder / "recording.dat"
+                write_binary_recording(
+                    recording=recording,
+                    file_paths=[binary_file_path],
+                    **get_job_kwargs(params, verbose),
+                )
+                params["filename"] = str(binary_file_path)
 
     @classmethod
     def _run_from_folder(cls, sorter_output_folder, params, verbose):
@@ -227,18 +231,30 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         probe = load_probe(probe_path=probe_filename)
         probe_name = ""
 
-        if recording.binary_compatible_with(time_axis=0, file_paths_lenght=1):
-            # no copy
-            binary_description = recording.get_binary_description()
-            filename = str(binary_description["file_paths"][0])
-            file_object = None
+        if params["use_binary_file"] is None:
+            if recording.binary_compatible_with(time_axis=0, file_paths_lenght=1):
+                # no copy
+                binary_description = recording.get_binary_description()
+                filename = str(binary_description["file_paths"][0])
+                file_object = None
+            else:
+                # the recording is not binary compatible and no binary copy has been written.
+                # in this case, we use the RecordingExtractorAsArray object
+                filename = ""
+                file_object = RecordingExtractorAsArray(recording_extractor=recording)
         elif params["use_binary_file"]:
-            # a local copy has been written
-            filename = str(sorter_output_folder / "recording.dat")
-            file_object = None
+            # here we force the use of a binary file
+            if recording.binary_compatible_with(time_axis=0, file_paths_lenght=1):
+                # no copy
+                binary_description = recording.get_binary_description()
+                filename = str(binary_description["file_paths"][0])
+                file_object = None
+            else:
+                # a local copy has been written
+                filename = str(sorter_output_folder / "recording.dat")
+                file_object = None
         else:
-            # the recording is not binary compatible and no binary copy has been written.
-            # in this case, we use the RecordingExtractorAsArray object
+            # here we force the use of the RecordingExtractorAsArray object
             filename = ""
             file_object = RecordingExtractorAsArray(recording_extractor=recording)
 
@@ -362,6 +378,11 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
             save_preprocessed_copy=save_preprocessed_copy,
         )
 
+        if params["delete_recording_dat"]:
+            # only delete dat file if it was created by the wrapper
+            if (sorter_output_folder / "recording.dat").is_file():
+                (sorter_output_folder / "recording.dat").unlink()
+
     @classmethod
     def _get_result_from_folder(cls, sorter_output_folder):
         return KilosortBase._get_result_from_folder(sorter_output_folder)

From 056c5d248f984aaf0ff3ccca9b30e61866c16a6d Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 3 Sep 2024 19:53:08 -0600
Subject: [PATCH 162/187] improve docstring api

---
 src/spikeinterface/comparison/paircomparisons.py | 4 +++-
 src/spikeinterface/sorters/runsorter.py          | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/spikeinterface/comparison/paircomparisons.py b/src/spikeinterface/comparison/paircomparisons.py
index fa5ec2d3d0..67a655a67c 100644
--- a/src/spikeinterface/comparison/paircomparisons.py
+++ b/src/spikeinterface/comparison/paircomparisons.py
@@ -224,7 +224,9 @@ class GroundTruthComparison(BasePairSorterComparison):
     tested_name : : str, default: None
         The name of sorter 2
     delta_time : float, default: 0.4
-        Number of ms to consider coincident spikes
+        Number of ms to consider coincident spikes.
+        This means that two spikes are considered simultaneous if they are within delta_time of each other or
+        mathematically abs(spike1_time - spike2_time) <= delta_time.
     match_score : float, default: 0.5
         Minimum agreement score to match units
     chance_score : float, default: 0.1
diff --git a/src/spikeinterface/sorters/runsorter.py b/src/spikeinterface/sorters/runsorter.py
index 80608f8973..a9fb64d87b 100644
--- a/src/spikeinterface/sorters/runsorter.py
+++ b/src/spikeinterface/sorters/runsorter.py
@@ -272,7 +272,9 @@ def run_sorter_local(
     # only classmethod call not instance (stateless at instance level but state is in folder)
     folder = SorterClass.initialize_folder(recording, folder, verbose, remove_existing_folder)
     SorterClass.set_params_to_folder(recording, folder, sorter_params, verbose)
+    # This writes parameters and recording to binary and can happen in the host
     SorterClass.setup_recording(recording, folder, verbose=verbose)
+    # This NEEDS to happen in the docker because of dependencies
     SorterClass.run_from_folder(folder, raise_error, verbose)
     if with_output:
         sorting = SorterClass.get_result_from_folder(folder, register_recording=True, sorting_info=True)

From 46c6342d13b92d9dceb6b07bee780f6a726aa85b Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 3 Sep 2024 20:17:32 -0600
Subject: [PATCH 163/187] fix streaming extractor condition

---
 .github/workflows/all-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/all-tests.yml b/.github/workflows/all-tests.yml
index b695c7d627..2a50c976a5 100644
--- a/.github/workflows/all-tests.yml
+++ b/.github/workflows/all-tests.yml
@@ -139,7 +139,7 @@ jobs:
 
       - name: Test streaming extractors
         shell: bash
-        if: env.RUN_STREAMING_EXTRACTORS_TESTS
+        if: env.RUN_STREAMING_EXTRACTORS_TESTS == 'true'
         run: |
           pip install -e .[streaming_extractors,test_extractors]
           ./.github/run_tests.sh "streaming_extractors" --no-virtual-env

From f399f6ecc84ba0da3e33f5aefe99fa0248a7a578 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Wed, 4 Sep 2024 09:48:40 +0200
Subject: [PATCH 164/187] Update .github/scripts/test_kilosort4_ci.py

Co-authored-by: Chris Halcrow <57948917+chrishalcrow@users.noreply.github.com>
---
 .github/scripts/test_kilosort4_ci.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/scripts/test_kilosort4_ci.py b/.github/scripts/test_kilosort4_ci.py
index 61c10fd8e8..df4cb64216 100644
--- a/.github/scripts/test_kilosort4_ci.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -92,7 +92,7 @@
     "acg_threshold",
     "cluster_downsampling",
     "cluster_pcs",
-    "duplicate_spike_ms",  # this is because gorund-truth spikes don't have violations
+    "duplicate_spike_ms",  # this is because ground-truth spikes don't have violations
 ]
 
 # THIS IS A PLACEHOLDER FOR FUTURE PARAMS TO TEST

From 464c6e3d59531cf8dd4126038c4ff23f9a33b69a Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Wed, 4 Sep 2024 14:19:22 +0200
Subject: [PATCH 165/187] Update
 src/spikeinterface/sorters/external/kilosort4.py

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/sorters/external/kilosort4.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 8a15642af4..183f26d86c 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -111,7 +111,7 @@ class Kilosort4Sorter(BaseSorter):
         "save_preprocessed_copy": "save a pre-processed copy of the data (including drift correction) to temp_wh.dat in the results directory and format Phy output to use that copy of the data",
         "torch_device": "Select the torch device auto/cuda/cpu",
         "bad_channels": "A list of channel indices (rows in the binary file) that should not be included in sorting. Listing channels here is equivalent to excluding them from the probe dictionary.",
-        "use_binary_file": "If True then Kilosort is run using a binary file. In this case, if the input recording is not binaru compatible, it is written to a binary file in the output folder. "
+        "use_binary_file": "If True then Kilosort is run using a binary file. In this case, if the input recording is not binary compatible, it is written to a binary file in the output folder. "
         "If False then Kilosort is run on the recording object directly using the RecordingExtractorAsArray object. If None, then if the recording is binary compatible, the sorter will use the binary file, otherwise the RecordingExtractorAsArray. "
         "Default is None.",
         "delete_recording_dat": "If True, if a temporary binary file is created, it is deleted after the sorting is done. Default is True.",

From 0ed4876dfedb175ca08c90d94c8a1d3e215d0586 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Wed, 4 Sep 2024 14:25:57 +0200
Subject: [PATCH 166/187] Extend check on clus

---
 .github/scripts/test_kilosort4_ci.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/scripts/test_kilosort4_ci.py b/.github/scripts/test_kilosort4_ci.py
index df4cb64216..1da2f2ba92 100644
--- a/.github/scripts/test_kilosort4_ci.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -569,6 +569,7 @@ def monkeypatch_filter_function(self, X, ops=None, ibatch=None):
         results["si"]["st"] = np.load(spikeinterface_output_dir / "sorter_output" / "spike_times.npy")
         results["si"]["clus"] = np.load(spikeinterface_output_dir / "sorter_output" / "spike_clusters.npy")
         assert np.allclose(results["ks"]["st"], results["si"]["st"], rtol=0, atol=1)
+        assert np.array_equal(results["ks"]["clus"], results["si"]["clus"])
 
     ##### Helpers ######
     def _get_kilosort_native_settings(self, recording, paths, param_key, param_value):

From 149c5d4daba621909af454d433bdde09d83bec56 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Wed, 4 Sep 2024 14:49:23 +0200
Subject: [PATCH 167/187] Apply suggestions from code review

Co-authored-by: Zach McKenzie <92116279+zm711@users.noreply.github.com>
---
 src/spikeinterface/comparison/paircomparisons.py | 2 +-
 src/spikeinterface/sorters/runsorter.py          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/spikeinterface/comparison/paircomparisons.py b/src/spikeinterface/comparison/paircomparisons.py
index 67a655a67c..ec1ad7753d 100644
--- a/src/spikeinterface/comparison/paircomparisons.py
+++ b/src/spikeinterface/comparison/paircomparisons.py
@@ -225,7 +225,7 @@ class GroundTruthComparison(BasePairSorterComparison):
         The name of sorter 2
     delta_time : float, default: 0.4
         Number of ms to consider coincident spikes.
-        This means that two spikes are considered simultaneous if they are within delta_time of each other or
+        This means that two spikes are considered simultaneous if they are within `delta_time` of each other or
         mathematically abs(spike1_time - spike2_time) <= delta_time.
     match_score : float, default: 0.5
         Minimum agreement score to match units
diff --git a/src/spikeinterface/sorters/runsorter.py b/src/spikeinterface/sorters/runsorter.py
index a9fb64d87b..6a24c8814b 100644
--- a/src/spikeinterface/sorters/runsorter.py
+++ b/src/spikeinterface/sorters/runsorter.py
@@ -272,7 +272,7 @@ def run_sorter_local(
     # only classmethod call not instance (stateless at instance level but state is in folder)
     folder = SorterClass.initialize_folder(recording, folder, verbose, remove_existing_folder)
     SorterClass.set_params_to_folder(recording, folder, sorter_params, verbose)
-    # This writes parameters and recording to binary and can happen in the host
+    # This writes parameters and recording to binary and could ideally happen in the host
     SorterClass.setup_recording(recording, folder, verbose=verbose)
     # This NEEDS to happen in the docker because of dependencies
     SorterClass.run_from_folder(folder, raise_error, verbose)

From 9cd537d7474b9342ddbb5ad62502810fe8308c62 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Wed, 4 Sep 2024 15:35:53 +0200
Subject: [PATCH 168/187] Add BaseRecording.reset_times() function

---
 src/spikeinterface/core/baserecording.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/spikeinterface/core/baserecording.py b/src/spikeinterface/core/baserecording.py
index fe670cbf3a..d40ab021b2 100644
--- a/src/spikeinterface/core/baserecording.py
+++ b/src/spikeinterface/core/baserecording.py
@@ -495,6 +495,14 @@ def set_times(self, times, segment_index=None, with_warning=True):
                 "Use this carefully!"
             )
 
+    def reset_times(self):
+        """Reset times in-memory for all segments that have a time vector."""
+        for segment_index in range(self.get_num_segments()):
+            if self.has_time_vector(segment_index):
+                rs = self._recording_segments[segment_index]
+                rs.t_start = None
+                rs.time_vector = None
+
     def sample_index_to_time(self, sample_ind, segment_index=None):
         """
         Transform sample index into time in seconds

From 3c9a3e432356e0d7d4fe1a0ed9e9b1b3c661bc08 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Wed, 4 Sep 2024 15:38:31 +0200
Subject: [PATCH 169/187] Add test

---
 src/spikeinterface/core/tests/test_baserecording.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/spikeinterface/core/tests/test_baserecording.py b/src/spikeinterface/core/tests/test_baserecording.py
index 682881af8a..3758fc3b43 100644
--- a/src/spikeinterface/core/tests/test_baserecording.py
+++ b/src/spikeinterface/core/tests/test_baserecording.py
@@ -289,6 +289,11 @@ def test_BaseRecording(create_cache_folder):
     rec3 = load_extractor(folder)
     assert np.allclose(times1, rec3.get_times(1))
 
+    # reset times
+    rec.reset_times()
+    for segm in range(num_seg):
+        assert not rec.has_time_vector(segment_index=segm)
+
     # test 3d probe
     rec_3d = generate_recording(ndim=3, num_channels=30)
     locations_3d = rec_3d.get_property("location")

From d18f48f7ca9a3168c15fdd896377a64c56695f9c Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Wed, 4 Sep 2024 15:45:29 +0200
Subject: [PATCH 170/187] Add extra protection for template metrix

---
 .../postprocessing/template_metrics.py        | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/spikeinterface/postprocessing/template_metrics.py b/src/spikeinterface/postprocessing/template_metrics.py
index e54ff87221..9d21e56611 100644
--- a/src/spikeinterface/postprocessing/template_metrics.py
+++ b/src/spikeinterface/postprocessing/template_metrics.py
@@ -8,11 +8,9 @@
 
 import numpy as np
 import warnings
-from typing import Optional
 from copy import deepcopy
 
 from ..core.sortinganalyzer import register_result_extension, AnalyzerExtension
-from ..core import ChannelSparsity
 from ..core.template_tools import get_template_extremum_channel
 from ..core.template_tools import get_dense_templates_array
 
@@ -238,13 +236,17 @@ def _compute_metrics(self, sorting_analyzer, unit_ids=None, verbose=False, **job
 
                 for metric_name in metrics_single_channel:
                     func = _metric_name_to_func[metric_name]
-                    value = func(
-                        template_upsampled,
-                        sampling_frequency=sampling_frequency_up,
-                        trough_idx=trough_idx,
-                        peak_idx=peak_idx,
-                        **self.params["metrics_kwargs"],
-                    )
+                    try:
+                        value = func(
+                            template_upsampled,
+                            sampling_frequency=sampling_frequency_up,
+                            trough_idx=trough_idx,
+                            peak_idx=peak_idx,
+                            **self.params["metrics_kwargs"],
+                        )
+                    except Exception as e:
+                        warnings.warn(f"Error computing metric {metric_name} for unit {unit_id}: {e}")
+                        value = np.nan
                     template_metrics.at[index, metric_name] = value
 
             # compute metrics multi_channel

From f0744d5e45437b93a27f6c252dc615057e316b8e Mon Sep 17 00:00:00 2001
From: tabedzki <35670232+tabedzki@users.noreply.github.com>
Date: Wed, 4 Sep 2024 10:04:02 -0400
Subject: [PATCH 171/187] Update src/spikeinterface/core/sortinganalyzer.py

Co-authored-by: Alessio Buccino <alejoe9187@gmail.com>
---
 src/spikeinterface/core/sortinganalyzer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/sortinganalyzer.py b/src/spikeinterface/core/sortinganalyzer.py
index 9051a6b8af..d7ea1f98f1 100644
--- a/src/spikeinterface/core/sortinganalyzer.py
+++ b/src/spikeinterface/core/sortinganalyzer.py
@@ -1122,7 +1122,7 @@ def get_num_units(self) -> int:
         return self.sorting.get_num_units()
 
     ## extensions zone
-    def compute(self, input, save=True, extension_params=None, verbose=False, **kwargs) -> "SortingAnalyzer | None":
+    def compute(self, input, save=True, extension_params=None, verbose=False, **kwargs) -> "AnalyzerExtension | None":
         """
         Compute one extension or several extensiosn.
         Internally calls compute_one_extension() or compute_several_extensions() depending on the input type.

From 6bdf0bf11f123072ea9dc24db16ffab621c01fa9 Mon Sep 17 00:00:00 2001
From: tabedzki <35670232+tabedzki@users.noreply.github.com>
Date: Wed, 4 Sep 2024 11:43:26 -0400
Subject: [PATCH 172/187] Update
 src/spikeinterface/curation/remove_excess_spikes.py

Co-authored-by: Alessio Buccino <alejoe9187@gmail.com>
---
 src/spikeinterface/curation/remove_excess_spikes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/curation/remove_excess_spikes.py b/src/spikeinterface/curation/remove_excess_spikes.py
index cceac7e8da..0d70e264a9 100644
--- a/src/spikeinterface/curation/remove_excess_spikes.py
+++ b/src/spikeinterface/curation/remove_excess_spikes.py
@@ -85,7 +85,7 @@ def get_unit_spike_train(
         return spike_train[min_spike:max_spike]
 
 
-def remove_excess_spikes(sorting: BaseSorting, recording):
+def remove_excess_spikes(sorting: BaseSorting, recording: BaseRecording):
     """
     Remove excess spikes from the spike trains.
     Excess spikes are the ones exceeding a recording number of samples, for each segment.

From ae1b6d843cef7eefb50f77bbda156ed8ca821b60 Mon Sep 17 00:00:00 2001
From: Christian Tabedzki <35670232+tabedzki@users.noreply.github.com>
Date: Wed, 4 Sep 2024 11:56:25 -0400
Subject: [PATCH 173/187] Removed Iterable from the function type hint

---
 src/spikeinterface/comparison/comparisontools.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/spikeinterface/comparison/comparisontools.py b/src/spikeinterface/comparison/comparisontools.py
index d35ed87aac..9b5304b0a7 100644
--- a/src/spikeinterface/comparison/comparisontools.py
+++ b/src/spikeinterface/comparison/comparisontools.py
@@ -3,13 +3,10 @@
 """
 
 from __future__ import annotations
-from typing import Iterable
-
-
 import numpy as np
 
 
-def count_matching_events(times1: Iterable, times2: Iterable, delta: int = 10):
+def count_matching_events(times1, times2: np.ndarray | list, delta: int = 10):
     """
     Counts matching events.
 

From e6b99da61eee1b896409c987339b5833c9bfcf68 Mon Sep 17 00:00:00 2001
From: Christian Tabedzki <35670232+tabedzki@users.noreply.github.com>
Date: Wed, 4 Sep 2024 12:44:01 -0400
Subject: [PATCH 174/187] Removed return of current in

---
 src/spikeinterface/core/core_tools.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/spikeinterface/core/core_tools.py b/src/spikeinterface/core/core_tools.py
index 4fbba9f57b..380562dbd5 100644
--- a/src/spikeinterface/core/core_tools.py
+++ b/src/spikeinterface/core/core_tools.py
@@ -257,15 +257,13 @@ def set_value_in_extractor_dict(extractor_dict: dict, access_path: tuple, new_va
 
     Returns
     -------
-    dict
-        The modified dictionary
+    None
     """
 
     current = extractor_dict
     for key in access_path[:-1]:
         current = current[key]
     current[access_path[-1]] = new_value
-    return current
 
 
 def recursive_path_modifier(d, func, target="path", copy=True) -> dict:

From 5953d6cdaac993928988a715725b824edf11d00a Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Wed, 4 Sep 2024 18:25:57 -0600
Subject: [PATCH 175/187] quality in phy to string

---
 src/spikeinterface/extractors/phykilosortextractors.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/spikeinterface/extractors/phykilosortextractors.py b/src/spikeinterface/extractors/phykilosortextractors.py
index 7ffbc166de..a75ce97198 100644
--- a/src/spikeinterface/extractors/phykilosortextractors.py
+++ b/src/spikeinterface/extractors/phykilosortextractors.py
@@ -159,7 +159,8 @@ def __init__(
                 self.set_property(key="original_cluster_id", values=cluster_info[prop_name])
             elif prop_name == "group":
                 # rename group property to 'quality'
-                self.set_property(key="quality", values=cluster_info[prop_name])
+                values = cluster_info[prop_name].values.astype("str")
+                self.set_property(key="quality", values=values)
             else:
                 if load_all_cluster_properties:
                     # pandas loads strings with empty values as objects with NaNs

From 7d50fc5e238c75d9db490174e4d156f73aa697eb Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Thu, 5 Sep 2024 10:19:18 +0200
Subject: [PATCH 176/187] Improve reset_times docstring

---
 src/spikeinterface/core/baserecording.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/baserecording.py b/src/spikeinterface/core/baserecording.py
index d40ab021b2..766429f8c9 100644
--- a/src/spikeinterface/core/baserecording.py
+++ b/src/spikeinterface/core/baserecording.py
@@ -496,7 +496,11 @@ def set_times(self, times, segment_index=None, with_warning=True):
             )
 
     def reset_times(self):
-        """Reset times in-memory for all segments that have a time vector."""
+        """
+        Reset times in-memory for all segments that have a time vector.
+        If the timestamps come from a file, the files won't be modified. but only the in-memory
+        attributes of the recording objects are deleted.
+        """
         for segment_index in range(self.get_num_segments()):
             if self.has_time_vector(segment_index):
                 rs = self._recording_segments[segment_index]

From ef491b8b8647dcb24e471f43585067455ff10bd3 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Thu, 5 Sep 2024 10:31:16 +0200
Subject: [PATCH 177/187] Load phy channel_group as group

---
 src/spikeinterface/extractors/phykilosortextractors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/extractors/phykilosortextractors.py b/src/spikeinterface/extractors/phykilosortextractors.py
index a75ce97198..bc143ff33a 100644
--- a/src/spikeinterface/extractors/phykilosortextractors.py
+++ b/src/spikeinterface/extractors/phykilosortextractors.py
@@ -153,7 +153,7 @@ def __init__(
         self.extra_requirements.append("pandas")
 
         for prop_name in cluster_info.columns:
-            if prop_name in ["chan_grp", "ch_group"]:
+            if prop_name in ["chan_grp", "ch_group", "channel_group"]:
                 self.set_property(key="group", values=cluster_info[prop_name])
             elif prop_name == "cluster_id":
                 self.set_property(key="original_cluster_id", values=cluster_info[prop_name])

From 8fbf100dfc0be3032a85f02a0cd857a42edea53a Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Thu, 5 Sep 2024 13:50:29 +0200
Subject: [PATCH 178/187] Expose clear_Cache argument in KS4

---
 .github/scripts/test_kilosort4_ci.py          | 24 +++++++++++++++++++
 .../sorters/external/kilosort4.py             | 23 +++++++++++++++---
 2 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/.github/scripts/test_kilosort4_ci.py b/.github/scripts/test_kilosort4_ci.py
index 1da2f2ba92..6eeb71f1dd 100644
--- a/.github/scripts/test_kilosort4_ci.py
+++ b/.github/scripts/test_kilosort4_ci.py
@@ -368,6 +368,30 @@ def test_kilosort4_main(self, recording_and_paths, default_kilosort_sorting, tmp
             with pytest.raises(AssertionError):
                 check_sortings_equal(default_kilosort_sorting, sorting_si)
 
+    def test_clear_cache(self,recording_and_paths, tmp_path):
+        """
+        Test clear_cache parameter in kilosort4.run_kilosort
+        """
+        recording, paths = recording_and_paths
+
+        spikeinterface_output_dir = tmp_path / "spikeinterface_output_clear"
+        sorting_si_clear = si.run_sorter(
+            "kilosort4",
+            recording,
+            remove_existing_folder=True,
+            folder=spikeinterface_output_dir,
+            clear_cache=True
+        )
+        spikeinterface_output_dir = tmp_path / "spikeinterface_output_no_clear"
+        sorting_si_no_clear = si.run_sorter(
+            "kilosort4",
+            recording,
+            remove_existing_folder=True,
+            folder=spikeinterface_output_dir,
+            clear_cache=False
+        )
+        check_sortings_equal(sorting_si_clear, sorting_si_no_clear)
+
     def test_kilosort4_no_correction(self, recording_and_paths, tmp_path):
         """
         Test the SpikeInterface wrappers `do_correction` argument. We set
diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 183f26d86c..4a8c9d1782 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -65,6 +65,7 @@ class Kilosort4Sorter(BaseSorter):
         "save_preprocessed_copy": False,
         "torch_device": "auto",
         "bad_channels": None,
+        "clear_cache": False,
         "use_binary_file": None,
         "delete_recording_dat": True,
     }
@@ -111,6 +112,7 @@ class Kilosort4Sorter(BaseSorter):
         "save_preprocessed_copy": "save a pre-processed copy of the data (including drift correction) to temp_wh.dat in the results directory and format Phy output to use that copy of the data",
         "torch_device": "Select the torch device auto/cuda/cpu",
         "bad_channels": "A list of channel indices (rows in the binary file) that should not be included in sorting. Listing channels here is equivalent to excluding them from the probe dictionary.",
+        "clear_cache": "If True, force pytorch to free up memory reserved for its cache in between memory-intensive operations. Note that setting `clear_cache=True` is NOT recommended unless you encounter GPU out-of-memory errors, since this can result in slower sorting.",
         "use_binary_file": "If True then Kilosort is run using a binary file. In this case, if the input recording is not binary compatible, it is written to a binary file in the output folder. "
         "If False then Kilosort is run on the recording object directly using the RecordingExtractorAsArray object. If None, then if the recording is binary compatible, the sorter will use the binary file, otherwise the RecordingExtractorAsArray. "
         "Default is None.",
@@ -284,6 +286,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         data_dir = ""
         results_dir = sorter_output_folder
         bad_channels = params["bad_channels"]
+        clear_cache = params["clear_cache"]
 
         filename, data_dir, results_dir, probe = set_files(
             settings=settings,
@@ -347,17 +350,31 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
 
         # this function applies both preprocessing and drift correction
         ops, bfile, st0 = compute_drift_correction(
-            ops=ops, device=device, tic0=tic0, progress_bar=progress_bar, file_object=file_object
+            ops=ops,
+            device=device,
+            tic0=tic0,
+            progress_bar=progress_bar,
+            file_object=file_object,
+            clear_cache=clear_cache,
         )
 
         if save_preprocessed_copy:
             save_preprocessing(results_dir / "temp_wh.dat", ops, bfile)
 
         # Sort spikes and save results
-        st, tF, _, _ = detect_spikes(ops=ops, device=device, bfile=bfile, tic0=tic0, progress_bar=progress_bar)
+        st, tF, _, _ = detect_spikes(
+            ops=ops, device=device, bfile=bfile, tic0=tic0, progress_bar=progress_bar, clear_cache=clear_cache
+        )
 
         clu, Wall = cluster_spikes(
-            st=st, tF=tF, ops=ops, device=device, bfile=bfile, tic0=tic0, progress_bar=progress_bar
+            st=st,
+            tF=tF,
+            ops=ops,
+            device=device,
+            bfile=bfile,
+            tic0=tic0,
+            progress_bar=progress_bar,
+            clear_cache=clear_cache,
         )
 
         if params["skip_kilosort_preprocessing"]:

From fd61bb6cd0baa65efd5fde22af95da9c80c9d8cf Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Thu, 5 Sep 2024 14:58:27 +0200
Subject: [PATCH 179/187] Explicitly add (spikeinterface parameter) to KS4
 param description

---
 .../sorters/external/kilosort4.py             | 23 ++++++++++---------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/spikeinterface/sorters/external/kilosort4.py b/src/spikeinterface/sorters/external/kilosort4.py
index 4a8c9d1782..e73ac2cb6c 100644
--- a/src/spikeinterface/sorters/external/kilosort4.py
+++ b/src/spikeinterface/sorters/external/kilosort4.py
@@ -57,15 +57,15 @@ class Kilosort4Sorter(BaseSorter):
         "cluster_pcs": 64,
         "x_centers": None,
         "duplicate_spike_ms": 0.25,
-        "do_correction": True,
-        "keep_good_only": False,
-        "save_extra_kwargs": False,
-        "skip_kilosort_preprocessing": False,
         "scaleproc": None,
         "save_preprocessed_copy": False,
         "torch_device": "auto",
         "bad_channels": None,
         "clear_cache": False,
+        "save_extra_vars": False,
+        "do_correction": True,
+        "keep_good_only": False,
+        "skip_kilosort_preprocessing": False,
         "use_binary_file": None,
         "delete_recording_dat": True,
     }
@@ -105,18 +105,19 @@ class Kilosort4Sorter(BaseSorter):
         "cluster_pcs": "Maximum number of spatiotemporal PC features used for clustering. Default value: 64.",
         "x_centers": "Number of x-positions to use when determining center points for template groupings. If None, this will be determined automatically by finding peaks in channel density. For 2D array type probes, we recommend specifying this so that centers are placed every few hundred microns.",
         "duplicate_spike_bins": "Number of bins for which subsequent spikes from the same cluster are assumed to be artifacts. A value of 0 disables this step. Default value: 7.",
-        "do_correction": "If True, drift correction is performed",
-        "save_extra_kwargs": "If True, additional kwargs are saved to the output",
-        "skip_kilosort_preprocessing": "Can optionally skip the internal kilosort preprocessing",
+        "save_extra_vars": "If True, additional kwargs are saved to the output",
         "scaleproc": "int16 scaling of whitened data, if None set to 200.",
-        "save_preprocessed_copy": "save a pre-processed copy of the data (including drift correction) to temp_wh.dat in the results directory and format Phy output to use that copy of the data",
+        "save_preprocessed_copy": "Save a pre-processed copy of the data (including drift correction) to temp_wh.dat in the results directory and format Phy output to use that copy of the data",
         "torch_device": "Select the torch device auto/cuda/cpu",
         "bad_channels": "A list of channel indices (rows in the binary file) that should not be included in sorting. Listing channels here is equivalent to excluding them from the probe dictionary.",
         "clear_cache": "If True, force pytorch to free up memory reserved for its cache in between memory-intensive operations. Note that setting `clear_cache=True` is NOT recommended unless you encounter GPU out-of-memory errors, since this can result in slower sorting.",
+        "do_correction": "If True, drift correction is performed. Default is True. (spikeinterface parameter)",
+        "skip_kilosort_preprocessing": "Can optionally skip the internal kilosort preprocessing. (spikeinterface parameter)",
+        "keep_good_only": "If True, only the units labeled as 'good' by Kilosort are returned in the output. (spikeinterface parameter)",
         "use_binary_file": "If True then Kilosort is run using a binary file. In this case, if the input recording is not binary compatible, it is written to a binary file in the output folder. "
         "If False then Kilosort is run on the recording object directly using the RecordingExtractorAsArray object. If None, then if the recording is binary compatible, the sorter will use the binary file, otherwise the RecordingExtractorAsArray. "
-        "Default is None.",
-        "delete_recording_dat": "If True, if a temporary binary file is created, it is deleted after the sorting is done. Default is True.",
+        "Default is None. (spikeinterface parameter)",
+        "delete_recording_dat": "If True, if a temporary binary file is created, it is deleted after the sorting is done. Default is True. (spikeinterface parameter)",
     }
 
     sorter_description = """Kilosort4 is a Python package for spike sorting on GPUs with template matching.
@@ -264,7 +265,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
 
         do_CAR = params["do_CAR"]
         invert_sign = params["invert_sign"]
-        save_extra_vars = params["save_extra_kwargs"]
+        save_extra_vars = params["save_extra_vars"]
         save_preprocessed_copy = params["save_preprocessed_copy"]
         progress_bar = None
         settings_ks = {k: v for k, v in params.items() if k in DEFAULT_SETTINGS}

From df9efc9a39e8534daa81cb39efa9db3c1d51518b Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Thu, 5 Sep 2024 15:11:57 +0200
Subject: [PATCH 180/187] Minor typing fixes

---
 src/spikeinterface/core/core_tools.py      | 2 +-
 src/spikeinterface/core/recording_tools.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/spikeinterface/core/core_tools.py b/src/spikeinterface/core/core_tools.py
index 380562dbd5..b3a857d158 100644
--- a/src/spikeinterface/core/core_tools.py
+++ b/src/spikeinterface/core/core_tools.py
@@ -201,7 +201,7 @@ def is_dict_extractor(d: dict) -> bool:
 extractor_dict_element = namedtuple(typename="extractor_dict_element", field_names=["value", "name", "access_path"])
 
 
-def extractor_dict_iterator(extractor_dict: dict) -> Generator[extractor_dict_element]:
+def extractor_dict_iterator(extractor_dict: dict) -> Generator[extractor_dict_element, None, None]:
     """
     Iterator for recursive traversal of a dictionary.
     This function explores the dictionary recursively and yields the path to each value along with the value itself.
diff --git a/src/spikeinterface/core/recording_tools.py b/src/spikeinterface/core/recording_tools.py
index b3eda360e7..34be7153b7 100644
--- a/src/spikeinterface/core/recording_tools.py
+++ b/src/spikeinterface/core/recording_tools.py
@@ -69,7 +69,7 @@ def _init_binary_worker(recording, file_path_dict, dtype, byte_offest, cast_unsi
 def write_binary_recording(
     recording: "BaseRecording",
     file_paths: list[Path | str] | Path | str,
-    dtype: np.ndtype = None,
+    dtype: np.dtype = None,
     add_file_extension: bool = True,
     byte_offset: int = 0,
     auto_cast_uint: bool = True,

From daed5f1c976725efc02576f034974b5cfb737bdf Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Fri, 6 Sep 2024 17:45:16 +0200
Subject: [PATCH 181/187] Fix time handling test memory

---
 src/spikeinterface/core/tests/test_time_handling.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/spikeinterface/core/tests/test_time_handling.py b/src/spikeinterface/core/tests/test_time_handling.py
index 1b570091be..52afd9d216 100644
--- a/src/spikeinterface/core/tests/test_time_handling.py
+++ b/src/spikeinterface/core/tests/test_time_handling.py
@@ -49,19 +49,14 @@ def _get_time_vector_recording(self, raw_recording):
         spaced timeseries data. Return the original recording,
         recoridng with time vectors added and list including the added time vectors.
         """
-        times_recording = copy.deepcopy(raw_recording)
+        times_recording = raw_recording.clone()
         all_time_vectors = []
         for segment_index in range(raw_recording.get_num_segments()):
 
             t_start = segment_index + 1 * 100
+            t_stop = t_start + raw_recording.get_duration(segment_index) + segment_index + 1
 
-            some_small_increasing_numbers = np.arange(times_recording.get_num_samples(segment_index)) * (
-                1 / times_recording.get_sampling_frequency()
-            )
-
-            offsets = np.cumsum(some_small_increasing_numbers)
-            time_vector = t_start + times_recording.get_times(segment_index) + offsets
-
+            time_vector = np.linspace(t_start, t_stop, raw_recording.get_num_samples(segment_index))
             all_time_vectors.append(time_vector)
             times_recording.set_times(times=time_vector, segment_index=segment_index)
 
@@ -285,6 +280,7 @@ def test_sorting_analyzer_get_durations_from_recording(self, time_vector_recordi
         """
         _, times_recording, _ = time_vector_recording
 
+        durations = [times_recording.get_duration(s) for s in range(times_recording.get_num_segments())]
         sorting = si.generate_sorting(
             durations=[times_recording.get_duration(s) for s in range(times_recording.get_num_segments())]
         )

From f032b1bd73b55e4f338c02764ae7d639f1c77438 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Fri, 6 Sep 2024 17:49:16 +0200
Subject: [PATCH 182/187] Update
 src/spikeinterface/core/tests/test_time_handling.py

---
 src/spikeinterface/core/tests/test_time_handling.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/spikeinterface/core/tests/test_time_handling.py b/src/spikeinterface/core/tests/test_time_handling.py
index 52afd9d216..a129316ee7 100644
--- a/src/spikeinterface/core/tests/test_time_handling.py
+++ b/src/spikeinterface/core/tests/test_time_handling.py
@@ -280,7 +280,6 @@ def test_sorting_analyzer_get_durations_from_recording(self, time_vector_recordi
         """
         _, times_recording, _ = time_vector_recording
 
-        durations = [times_recording.get_duration(s) for s in range(times_recording.get_num_segments())]
         sorting = si.generate_sorting(
             durations=[times_recording.get_duration(s) for s in range(times_recording.get_num_segments())]
         )

From ea13bcb9996e4894e7d9ea1be49fe6a2c5dee6c8 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Fri, 6 Sep 2024 19:16:27 +0200
Subject: [PATCH 183/187] Add protection for multi-channel metrics (thanks
 Chris)

---
 .../qualitymetrics/quality_metric_calculator.py              | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/spikeinterface/qualitymetrics/quality_metric_calculator.py b/src/spikeinterface/qualitymetrics/quality_metric_calculator.py
index 0c7cf25237..cdf6151e95 100644
--- a/src/spikeinterface/qualitymetrics/quality_metric_calculator.py
+++ b/src/spikeinterface/qualitymetrics/quality_metric_calculator.py
@@ -164,7 +164,10 @@ def _compute_metrics(self, sorting_analyzer, unit_ids=None, verbose=False, **job
         pc_metric_names = [k for k in metric_names if k in _possible_pc_metric_names]
         if len(pc_metric_names) > 0 and not self.params["skip_pc_metrics"]:
             if not sorting_analyzer.has_extension("principal_components"):
-                raise ValueError("waveform_principal_component must be provied")
+                raise ValueError(
+                    "To compute principal components base metrics, the principal components "
+                    "extension must be computed first."
+                )
             pc_metrics = compute_pc_metrics(
                 sorting_analyzer,
                 unit_ids=non_empty_unit_ids,

From 4e000ed041b11a9f2195691caf0bcb39bca4a500 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Fri, 6 Sep 2024 19:24:07 +0200
Subject: [PATCH 184/187] same for multi-channel

---
 .../postprocessing/template_metrics.py           | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/spikeinterface/postprocessing/template_metrics.py b/src/spikeinterface/postprocessing/template_metrics.py
index 9d21e56611..726ec49558 100644
--- a/src/spikeinterface/postprocessing/template_metrics.py
+++ b/src/spikeinterface/postprocessing/template_metrics.py
@@ -276,12 +276,16 @@ def _compute_metrics(self, sorting_analyzer, unit_ids=None, verbose=False, **job
                     sampling_frequency_up = sampling_frequency
 
                 func = _metric_name_to_func[metric_name]
-                value = func(
-                    template_upsampled,
-                    channel_locations=channel_locations_sparse,
-                    sampling_frequency=sampling_frequency_up,
-                    **self.params["metrics_kwargs"],
-                )
+                try:
+                    value = func(
+                        template_upsampled,
+                        channel_locations=channel_locations_sparse,
+                        sampling_frequency=sampling_frequency_up,
+                        **self.params["metrics_kwargs"],
+                    )
+                except Exception as e:
+                    warnings.warn(f"Error computing metric {metric_name} for unit {unit_id}: {e}")
+                    value = np.nan
                 template_metrics.at[index, metric_name] = value
         return template_metrics
 

From 1fc89b4a128f4584fc560f963200b076734d2654 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Fri, 6 Sep 2024 19:33:33 +0200
Subject: [PATCH 185/187] Reset-times: segment must have either time vector or
 sampling frequency

---
 src/spikeinterface/core/baserecording.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/spikeinterface/core/baserecording.py b/src/spikeinterface/core/baserecording.py
index f7918be7b0..225f070d9d 100644
--- a/src/spikeinterface/core/baserecording.py
+++ b/src/spikeinterface/core/baserecording.py
@@ -506,6 +506,7 @@ def reset_times(self):
                 rs = self._recording_segments[segment_index]
                 rs.t_start = None
                 rs.time_vector = None
+                rs.sampling_frequency = self.sampling_frequency
 
     def sample_index_to_time(self, sample_ind, segment_index=None):
         """

From a3fef7055d3702481cee99f17821f910b84921c3 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Mon, 9 Sep 2024 17:54:01 +0200
Subject: [PATCH 186/187] Reset times also sets t_start to None

---
 src/spikeinterface/core/baserecording.py            | 9 +++++----
 src/spikeinterface/core/tests/test_baserecording.py | 4 ++++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/spikeinterface/core/baserecording.py b/src/spikeinterface/core/baserecording.py
index 225f070d9d..3e5e43b528 100644
--- a/src/spikeinterface/core/baserecording.py
+++ b/src/spikeinterface/core/baserecording.py
@@ -497,16 +497,17 @@ def set_times(self, times, segment_index=None, with_warning=True):
 
     def reset_times(self):
         """
-        Reset times in-memory for all segments that have a time vector.
+        Reset time information in-memory for all segments that have a time vector.
         If the timestamps come from a file, the files won't be modified. but only the in-memory
-        attributes of the recording objects are deleted.
+        attributes of the recording objects are deleted. Also `t_start` is set to None and the
+        segment's sampling frequency is set to the recording's sampling frequency.
         """
         for segment_index in range(self.get_num_segments()):
             if self.has_time_vector(segment_index):
                 rs = self._recording_segments[segment_index]
-                rs.t_start = None
                 rs.time_vector = None
-                rs.sampling_frequency = self.sampling_frequency
+            rs.t_start = None
+            rs.sampling_frequency = self.sampling_frequency
 
     def sample_index_to_time(self, sample_ind, segment_index=None):
         """
diff --git a/src/spikeinterface/core/tests/test_baserecording.py b/src/spikeinterface/core/tests/test_baserecording.py
index 3758fc3b43..9c354510ac 100644
--- a/src/spikeinterface/core/tests/test_baserecording.py
+++ b/src/spikeinterface/core/tests/test_baserecording.py
@@ -292,7 +292,11 @@ def test_BaseRecording(create_cache_folder):
     # reset times
     rec.reset_times()
     for segm in range(num_seg):
+        time_info = rec.get_time_info(segment_index=segm)
         assert not rec.has_time_vector(segment_index=segm)
+        assert time_info["t_start"] is None
+        assert time_info["time_vector"] is None
+        assert time_info["sampling_frequency"] == rec.sampling_frequency
 
     # test 3d probe
     rec_3d = generate_recording(ndim=3, num_channels=30)

From 5775c36a5309a7b1b48698940ca83551bc4053f7 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Tue, 10 Sep 2024 10:23:11 +0200
Subject: [PATCH 187/187] Update src/spikeinterface/core/recording_tools.py

---
 src/spikeinterface/core/recording_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/core/recording_tools.py b/src/spikeinterface/core/recording_tools.py
index 34be7153b7..0ec5449bae 100644
--- a/src/spikeinterface/core/recording_tools.py
+++ b/src/spikeinterface/core/recording_tools.py
@@ -69,7 +69,7 @@ def _init_binary_worker(recording, file_path_dict, dtype, byte_offest, cast_unsi
 def write_binary_recording(
     recording: "BaseRecording",
     file_paths: list[Path | str] | Path | str,
-    dtype: np.dtype = None,
+    dtype: np.typing.DTypeLike = None,
     add_file_extension: bool = True,
     byte_offset: int = 0,
     auto_cast_uint: bool = True,