Merge branch 'main' into bencmarks_components

SpikeInterface · Jun 14, 2024 · 2b4b2f9 · 2b4b2f9
2 parents ef67e80 + bd89c99
commit 2b4b2f9
Show file tree

Hide file tree

Showing 213 changed files with 2,944 additions and 2,246 deletions.
diff --git a/.github/import_test.py b/.github/import_test.py
@@ -18,37 +18,52 @@
 
 n_samples = 10
 # Note that the symbols at the end are for centering the table
-markdown_output = f"## \n\n| Imported Module ({n_samples=}) | Importing Time (seconds) | Standard Deviation (seconds) |\n| :--: | :--------------: | :------------------: |\n"
+markdown_output = f"## \n\n| Imported Module ({n_samples=}) | Importing Time (seconds) | Standard Deviation (seconds) | Times List (seconds) |\n| :--: | :--------------: | :------------------: | :-------------: |\n"
 
 exceptions = []
 
 for import_statement in import_statement_list:
     time_taken_list = []
     for _ in range(n_samples):
         script_to_execute = (
-                f"import timeit \n"
-                f"import_statement = '{import_statement}' \n"
-                f"time_taken = timeit.timeit(import_statement, number=1) \n"
-                f"print(time_taken) \n"
-               )
+            f"import timeit \n"
+            f"import_statement = '{import_statement}' \n"
+            f"time_taken = timeit.timeit(import_statement, number=1) \n"
+            f"print(time_taken) \n"
+        )
 
         result = subprocess.run(["python", "-c", script_to_execute], capture_output=True, text=True)
 
         if result.returncode != 0:
-            error_message  = (
-                f"Error when running {import_statement} \n"
-                f"Error in subprocess: {result.stderr.strip()}\n"
+            error_message = (
+                f"Error when running {import_statement} \n" f"Error in subprocess: {result.stderr.strip()}\n"
             )
             exceptions.append(error_message)
             break
 
         time_taken = float(result.stdout.strip())
         time_taken_list.append(time_taken)
 
+    for time in time_taken_list:
+        import_time_threshold = 2.0  # Most of the times is sub-second but there outliers
+        if time >= import_time_threshold:
+            exceptions.append(
+                f"Importing {import_statement} took: {time:.2f} s. Should be <: {import_time_threshold} s."
+            )
+            break
+
+
     if time_taken_list:
-        avg_time_taken = sum(time_taken_list) / len(time_taken_list)
-        std_dev_time_taken = math.sqrt(sum((x - avg_time_taken) ** 2 for x in time_taken_list) / len(time_taken_list))
-        markdown_output += f"| `{import_statement}` | {avg_time_taken:.2f} | {std_dev_time_taken:.2f} |\n"
+        avg_time = sum(time_taken_list) / len(time_taken_list)
+        std_time = math.sqrt(sum((x - avg_time) ** 2 for x in time_taken_list) / len(time_taken_list))
+        times_list_str = ", ".join(f"{time:.2f}" for time in time_taken_list)
+        markdown_output += f"| `{import_statement}` | {avg_time:.2f} | {std_time:.2f} | {times_list_str} |\n"
+
+        import_time_threshold = 1.0
+        if avg_time > import_time_threshold:
+            exceptions.append(
+                f"Importing {import_statement} took: {avg_time:.2f} s in average. Should be <: {import_time_threshold} s."
+            )
 
 if exceptions:
     raise Exception("\n".join(exceptions))

diff --git a/.github/workflows/test_imports.yml b/.github/workflows/test_imports.yml
@@ -22,13 +22,13 @@ jobs:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
         with:
-          python-version: "3.10"
+          python-version: "3.11"
       - name: Install Spikeinterface with only core dependencies
         run: |
           git config --global user.email "[email protected]"
           git config --global user.name "CI Almighty"
           python -m pip install -U pip  # Official recommended way
-          pip install -e .  # This should install core only
+          pip install .  # This should install core only
       - name: Profile Imports
         run: |
           echo "## OS: ${{ matrix.os }}" >> $GITHUB_STEP_SUMMARY
@@ -38,8 +38,7 @@ jobs:
         shell: bash  # Necessary for pipeline to work on windows
       - name: Install in full mode
         run: |
-          python -m pip install -U pip  # Official recommended way
-          pip install -e .[full]
+          pip install .[full]
       - name: Profile Imports with full
         run: |
           # Add a header to separate the two profiles

diff --git a/conftest.py b/conftest.py
@@ -14,16 +14,10 @@
               "widgets", "exporters", "sortingcomponents", "generation"]
 
 
-# define global test folder
-def pytest_sessionstart(session):
-    # setup_stuff
-    pytest.global_test_folder = Path(__file__).parent / "test_folder"
-    if pytest.global_test_folder.is_dir():
-        shutil.rmtree(pytest.global_test_folder)
-    pytest.global_test_folder.mkdir()
-
-    for mark_name in mark_names:
-        (pytest.global_test_folder / mark_name).mkdir()
+@pytest.fixture(scope="module")
+def create_cache_folder(tmp_path_factory):
+    cache_folder = tmp_path_factory.mktemp("cache_folder")
+    return cache_folder
 
 def pytest_collection_modifyitems(config, items):
     """
@@ -45,12 +39,3 @@ def pytest_collection_modifyitems(config, items):
                 item.add_marker("sorters")
         else:
             item.add_marker(module)
-
-
-
-def pytest_sessionfinish(session, exitstatus):
-    # teardown_stuff only if tests passed
-    # We don't delete the test folder in the CI because it was causing problems with the code coverage.
-    if exitstatus == 0:
-        if pytest.global_test_folder.is_dir() and not ON_GITHUB:
-            shutil.rmtree(pytest.global_test_folder)
diff --git a/doc/conf.py b/doc/conf.py
@@ -67,6 +67,7 @@
     'numpydoc',
     'sphinx.ext.autosectionlabel',
     'sphinx_design',
+    'sphinxcontrib.jquery',
     "sphinx.ext.intersphinx",
     "sphinx.ext.extlinks",
     "IPython.sphinxext.ipython_directive",

diff --git a/doc/development/development.rst b/doc/development/development.rst
@@ -152,7 +152,7 @@ for providing parameters, however is a little different. The project prefers the
 
 .. code-block:: bash
 
-    parameter_name: type, default: default_value
+    parameter_name : type, default: default_value
 
 
 This allows users to quickly understand the type of data that should be input into a function as well as whether a default is supplied. A full example would be:
@@ -165,21 +165,22 @@ This allows users to quickly understand the type of data that should be input in
 
         Parameters
         ----------
-        param_a: dict
+        param_a : dict
             A dictionary containing the data
-        param_b: int, default: 5
+        param_b : int, default: 5
             A scaling factor to be applied to the data
-        param_c: "mean" | "median", default: "mean"
+        param_c : "mean" | "median", default: "mean"
             What to calculate on the data
 
         Returns
         -------
-        great_data: dict
+        great_data : dict
             A dictionary of the processed data
         """
 
 
-Note that in this example we demonstrate two other docstring conventions followed by SpikeInterface. First, that all string arguments should be presented
+There should be a space between each parameter and the colon following it. This is neccessary for using the `numpydoc validator <https://numpydoc.readthedocs.io/en/latest/validation.html>`_.
+In the above example we demonstrate two other docstring conventions followed by SpikeInterface. First, that all string arguments should be presented
 with double quotes. This is the same stylistic convention followed by Black and enforced by the pre-commit for the repo. Second, when a parameter is a
 string with a limited number of values (e.g. :code:`mean` and :code:`median`), rather than give the type a value of :code:`str`, please list the possible strings
 so that the user knows what the options are.

diff --git a/doc/how_to/index.rst b/doc/how_to/index.rst
@@ -1,5 +1,5 @@
 How to Guides
-=========
+=============
 
 Guides on how to solve specific, short problems in SpikeInterface. Learn how to...
 
@@ -12,3 +12,4 @@ Guides on how to solve specific, short problems in SpikeInterface. Learn how to.
     load_matlab_data
     combine_recordings
     process_by_channel_group
+    load_your_data_into_sorting
diff --git a/doc/how_to/load_your_data_into_sorting.rst b/doc/how_to/load_your_data_into_sorting.rst
@@ -0,0 +1,154 @@
+Load Your Own Data into a Sorting
+=================================
+
+Why make a :code:`Sorting`?
+
+SpikeInterface contains pre-build readers for the output of many common sorters.
+However, what if you have sorting output that is not in a standard format (e.g.
+old csv file)? If this is the case you can make your own Sorting object to load
+your data into SpikeInterface. This means you can still easily apply various
+downstream analyses to your results (e.g. building correlograms or for generating
+a :code:`SortingAnalyzer``).
+
+The Sorting object is a core object within SpikeInterface that acts as a convenient
+way to interface with sorting results, no matter which sorter was used to generate
+them. **At a fundamental level it is a series of spike times and a series of labels
+for each unit and a sampling frequency for transforming frames to time.** Below, we will show you have
+to take your existing data and load it as a SpikeInterface :code:`Sorting` object.
+
+
+Reading a standard spike sorting format into a :code:`Sorting`
+-------------------------------------------------------------
+
+For most spike sorting output formats the :code:`Sorting` is automatically generated. For example one could do
+
+.. code-block:: python
+
+    from spikeinterface.extractors import read_phy
+
+    # For kilosort/phy output files we can use the read_phy
+    # most formats will have a read_xx that can used.
+    phy_sorting = read_phy('path/to/folder')
+
+And voilà you now have your :code:`Sorting` object generated and can use it for further analysis. For all the
+current formats see :ref:`compatible_formats`.
+
+
+
+Loading your own data into a :code:`Sorting`
+-------------------------------------------
+
+
+This :code:`Sorting` contains important information about your spike trains including:
+
+  * spike times: the peaks of the extracellular potentials expressed in samples/frames these can
+    be converted to seconds under the hood using the sampling_frequency
+  * spike labels: the neuron id for each spike, can also be called cluster ids or unit ids
+    Stored as the :code:`unit_ids` in SpikeInterface
+  * sampling_frequency: the rate at which the recording equipment was run at. Note this is the
+    frequency and not the period. This value allows for switching between samples/frames to seconds
+
+
+There are 3 options for loading your own data into a sorting object
+
+With lists of spike trains and spike labels
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In this case we need a list of spike times unit labels, sampling_frequency and optional unit_ids
+if you want specific labels to be used (in this case we only create the :code:`Sorting` based on
+the requested unit_ids).
+
+.. code-block:: python
+
+    import numpy as np
+    from spikeinterface.core import NumpySorting
+
+    # in this case we are making a monosegment sorting
+    # we have four spikes that are spread among two neurons
+    my_sorting = NumpySorting.from_times_labels(
+        times_list=[
+            np.array([1000,12000,15000,22000])   # Note these are samples/frames not times in seconds
+            ],
+        labels_list=[
+            np.array(["a","b","a","b"])
+            ],
+        sampling_frequency=30_000.0
+        )
+
+
+With a unit dictionary
+^^^^^^^^^^^^^^^^^^^^^^
+
+We can also use a dictionary where each unit is a key and its spike times are values.
+This is entered as either a list of dicts with each dict being a segment or as a single
+dict for monosegment. We still need to separately specify the sampling_frequency
+
+.. code-block:: python
+
+    from spikeinterface.core import NumpySorting
+
+    my_sorting = NumpySorting.from_unit_dict(
+        units_dict_list={
+            '0': [1000,15000],
+            '1': [12000,22000],
+            },
+        sampling_frequency=30_000.0
+        )
+
+
+With Neo SpikeTrains
+^^^^^^^^^^^^^^^^^^^^
+
+Finally since SpikeInterface is tightly integrated with the Neo project you can create
+a sorting from :code:`Neo.SpikeTrain` objects. See :doc:`Neo documentation<neo:index>` for more information on
+using :code:`Neo.SpikeTrain`'s.
+
+.. code-block:: python
+
+    from spikeinterface.core import NumpySorting
+
+    # neo_spiketrain is a Neo spiketrain object
+    my_sorting = NumpySorting.from_neo_spiketrain_list(
+        neo_spiketrain,
+        sampling_frequency=30_000.0,
+        )
+
+
+Loading multisegment data into a :code:`Sorting`
+-----------------------------------------------
+
+One of the great advantages of SpikeInterface :code:`Sorting` objects is that they can also handle
+multisegment recordings and sortings (e.g. you have a baseline, stimulus, post-stimulus). The
+exact same machinery can be used to generate your sorting, but in this case we do a list of arrays instead of
+a single list. Let's go through one example for using :code:`from_times_labels`:
+
+.. code-block:: python
+
+    import numpy as np
+    from spikeinterface.core import NumpySorting
+
+    # in this case we are making three-segment sorting
+    # we have four spikes that are spread among two neurons
+    # in each segment
+    my_sorting = NumpySorting.from_times_labels(
+        times_list=[
+            np.array([1000,12000,15000,22000]),
+            np.array([30000,33000, 41000, 47000]),
+            np.array([50000,53000,64000,70000]),
+            ],
+        labels_list=[
+            np.array([0,1,0,1]),
+            np.array([0,0,1,1]),
+            np.array([1,0,1,0]),
+        ],
+        sampling_frequency=30_000.0
+        )
+
+
+Next steps
+----------
+
+Now that we've created a Sorting object you can combine it with a Recording to make a
+:ref:`SortingAnalyzer<sphx_glr_tutorials_core_plot_4_sorting_analyzer.py>`
+or start visualizing using plotting functions from our widgets model such as
+:py:func:`~spikeinterface.widgets.plot_crosscorrelograms`.
diff --git a/doc/index.rst b/doc/index.rst
@@ -58,6 +58,7 @@ SpikeInterface is made of several modules to deal with different aspects of the
     development/development
     whatisnew
     authors
+    references
 
 
 Other resources