From 7011166771af5f284a905f78400bfb233864164e Mon Sep 17 00:00:00 2001
From: zm711 <92116279+zm711@users.noreply.github.com>
Date: Wed, 6 Mar 2024 13:28:30 -0500
Subject: [PATCH 1/2] sortinganalyzer doc cleanup

---
 .../plot_5_comparison_sorter_weaknesses.py    | 20 ++++-----
 .../core/plot_1_recording_extractor.py        |  2 +-
 .../core/plot_3_handle_probe_info.py          |  4 +-
 .../core/plot_4_sorting_analyzer.py           | 43 ++++++++++---------
 .../plot_5_append_concatenate_segments.py     |  4 +-
 .../qualitymetrics/plot_4_curation.py         |  4 +-
 .../widgets/plot_3_waveforms_gallery.py       |  8 ++--
 pyproject.toml                                |  2 +-
 8 files changed, 44 insertions(+), 43 deletions(-)

diff --git a/examples/modules_gallery/comparison/plot_5_comparison_sorter_weaknesses.py b/examples/modules_gallery/comparison/plot_5_comparison_sorter_weaknesses.py
index 562b174a31..c588ee82cb 100644
--- a/examples/modules_gallery/comparison/plot_5_comparison_sorter_weaknesses.py
+++ b/examples/modules_gallery/comparison/plot_5_comparison_sorter_weaknesses.py
@@ -17,17 +17,17 @@
   * several units are merged into one units (overmerged units)
 
 
-To demonstrate this the script `generate_erroneous_sorting.py` generate a ground truth sorting with 10 units.
+To demonstrate this the script `generate_erroneous_sorting.py` generates a ground truth sorting with 10 units.
 We duplicate the results and modify it a bit to inject some "errors":
 
   * unit 1 2 are perfect
   * unit 3 4 have medium agreement
-  * unit 5 6 are over merge
-  * unit 7 is over split in 2 part
+  * unit 5 6 are overmerged
+  * unit 7 is oversplit in 2 parts
   * unit 8 is redundant 3 times
   * unit 9 is missing
-  * unit 10 have low agreement
-  * some units in tested do not exist at all in GT (15, 16, 17)
+  * unit 10 has low agreement
+  * some units in the tested data do not exist at all in GT (15, 16, 17)
 
 """
 
@@ -46,15 +46,15 @@
 
 
 ##############################################################################
-# Here the agreement matrix
+# Here is the agreement matrix
 
 sorting_true, sorting_err = generate_erroneous_sorting()
 comp = compare_sorter_to_ground_truth(sorting_true, sorting_err, exhaustive_gt=True)
 sw.plot_agreement_matrix(comp, ordered=False)
 
 ##############################################################################
-# Here the same matrix but **ordered**
-# It is now quite trivial to check that fake injected errors are enlighted here.
+# Here is the same matrix but **ordered**
+# It is now quite trivial to check that fake injected errors are here.
 
 sw.plot_agreement_matrix(comp, ordered=True)
 
@@ -81,13 +81,13 @@
 
 
 ##############################################################################
-# Here we can explore **"bad units"** units that a mixed a several possible errors.
+# Here we can explore **"bad units"** units that have a mix of several possible errors.
 
 print("bad", comp.get_bad_units())
 
 
 ##############################################################################
-# There is a convenient function to summary everything.
+# Here is a convenient function to summarize everything.
 
 comp.print_summary(well_detected_score=0.75, redundant_score=0.2, overmerged_score=0.2)
 
diff --git a/examples/modules_gallery/core/plot_1_recording_extractor.py b/examples/modules_gallery/core/plot_1_recording_extractor.py
index aa59abd76d..e7d773e9e6 100644
--- a/examples/modules_gallery/core/plot_1_recording_extractor.py
+++ b/examples/modules_gallery/core/plot_1_recording_extractor.py
@@ -48,7 +48,7 @@
 ##############################################################################
 # We can now print properties that the :code:`RecordingExtractor` retrieves from the underlying recording.
 
-print(f"Number of channels = {recording.get_channel_ids()}")
+print(f"Number of channels = {len(recording.get_channel_ids())}")
 print(f"Sampling frequency = {recording.get_sampling_frequency()} Hz")
 print(f"Number of segments= {recording.get_num_segments()}")
 print(f"Number of timepoints in seg0= {recording.get_num_frames(segment_index=0)}")
diff --git a/examples/modules_gallery/core/plot_3_handle_probe_info.py b/examples/modules_gallery/core/plot_3_handle_probe_info.py
index 75b2b56be8..157efb683f 100644
--- a/examples/modules_gallery/core/plot_3_handle_probe_info.py
+++ b/examples/modules_gallery/core/plot_3_handle_probe_info.py
@@ -22,7 +22,7 @@
 print(recording)
 
 ###############################################################################
-# This generator already contain a probe object that you can retrieve
+# This generator already contains a probe object that you can retrieve
 # directly and plot:
 
 probe = recording.get_probe()
@@ -33,7 +33,7 @@
 plot_probe(probe)
 
 ###############################################################################
-# You can also overwrite the probe. In this case you need to manually make
+# You can also overwrite the probe. In this case you need to manually set
 # the wiring (e.g. virtually connect each electrode to the recording device).
 # Let's use a probe from Cambridge Neurotech with 32 channels:
 
diff --git a/examples/modules_gallery/core/plot_4_sorting_analyzer.py b/examples/modules_gallery/core/plot_4_sorting_analyzer.py
index 20dc078197..381d3aec00 100644
--- a/examples/modules_gallery/core/plot_4_sorting_analyzer.py
+++ b/examples/modules_gallery/core/plot_4_sorting_analyzer.py
@@ -3,26 +3,26 @@
 ===============
 
 SpikeInterface provides an object to gather a Recording and a Sorting to make
-analyzer and visualization of the sorting : :py:class:`~spikeinterface.core.SortingAnalyzer`.
+analysis and visualization of the sorting : :py:class:`~spikeinterface.core.SortingAnalyzer`.
 
 This :py:class:`~spikeinterface.core.SortingAnalyzer` class:
 
   * is the first step for all post post processing, quality metrics, and visualization.
-  * gather a recording and a sorting
-  * can be sparse or dense : all channel are used for all units or not.
+  * gathers a recording and a sorting
+  * can be sparse or dense : (i.e. whether all channel are used for all units or not).
   * handle a list of "extensions"
-  * "core extensions" are the one to extract some waveforms to compute templates:
+  * "core extensions" are the ones to extract some waveforms to compute templates:
     * "random_spikes" : select randomly a subset of spikes per unit
     * "waveforms" : extract waveforms per unit
-    * "templates": compute template using average or median
-    * "noise_levels" : compute noise level from traces (usefull to get snr of units)
+    * "templates": compute templates using average or median
+    * "noise_levels" : compute noise levels from traces (useful to get the snr of units)
   * can be in memory or persistent to disk (2 formats binary/npy or zarr)
 
-More extesions are available in `spikeinterface.postprocessing` like "principal_components", "spike_amplitudes",
+More extensions are available in `spikeinterface.postprocessing` like "principal_components", "spike_amplitudes",
 "unit_lcations", ...
 
 
-Here the how!
+Here is the how!
 """
 
 import matplotlib.pyplot as plt
@@ -46,11 +46,11 @@
 recording = se.MEArecRecordingExtractor(local_path)
 print(recording)
 sorting = se.MEArecSortingExtractor(local_path)
-print(recording)
+print(sorting)
 
 ###############################################################################
 # The MEArec dataset already contains a probe object that you can retrieve
-# an plot:
+# and plot:
 
 probe = recording.get_probe()
 print(probe)
@@ -68,22 +68,22 @@
 print(analyzer)
 
 ###############################################################################
-# A :py:class:`~spikeinterface.core.SortingAnalyzer` object can be persistane to disk
+# A :py:class:`~spikeinterface.core.SortingAnalyzer` object can be persistant to disk
 # when using format="binary_folder" or format="zarr"
 
 folder = "analyzer_folder"
 analyzer = create_sorting_analyzer(sorting=sorting, recording=recording, format="binary_folder", folder=folder)
 print(analyzer)
 
-# then it can be load back
+# then it can be loaded back
 analyzer = load_sorting_analyzer(folder)
 print(analyzer)
 
 ###############################################################################
-# No extension are computed yet.
+# No extensions are computed yet.
 # Lets compute the most basic ones : select some random spikes per units,
-# extract waveforms (sparse in this examples) and compute templates.
-# You can see that printing the object indicate which extension are computed yet.
+# extract waveforms (sparse in this example) and compute templates.
+# You can see that printing the object indicates which extension are already computed.
 
 analyzer.compute(
     "random_spikes",
@@ -103,14 +103,14 @@
     "waveforms", ms_before=1.0, ms_after=2.0, return_scaled=True, n_jobs=8, chunk_duration="1s", progress_bar=True
 )
 
-# which is equivalent of this
+# which is equivalent to this:
 job_kwargs = dict(n_jobs=8, chunk_duration="1s", progress_bar=True)
 analyzer.compute("waveforms", ms_before=1.0, ms_after=2.0, return_scaled=True, **job_kwargs)
 
 
 ###############################################################################
 # Each extension can retrieve some data
-# For instance "waveforms" extension can retrieve wavfroms per units
+# For instance the "waveforms" extension can retrieve waveforms per units
 # which is a numpy array of shape (num_spikes, num_sample, num_channel):
 
 ext_wf = analyzer.get_extension("waveforms")
@@ -134,7 +134,7 @@
 
 
 ###############################################################################
-# This can be plot easily.
+# This can be plotted easily.
 
 for unit_index, unit_id in enumerate(analyzer.unit_ids[:3]):
     fig, ax = plt.subplots()
@@ -144,14 +144,15 @@
 
 
 ###############################################################################
-# The SortingAnalyzer can be saved as to another format using save_as()
-# So the computation can be done with format="memory" and
+# The SortingAnalyzer can be saved to another format using save_as()
+# So the computation can be done with format="memory" and then saved to disk
+# in the zarr format by using save_as()
 
 analyzer.save_as(folder="analyzer.zarr", format="zarr")
 
 
 ###############################################################################
-# The SortingAnalyzer offer also select_units() method wich allows to export
+# The SortingAnalyzer also offeres select_units() method wich allows exporting
 # only some relevant units for instance to a new SortingAnalyzer instance.
 
 analyzer_some_units = analyzer.select_units(
diff --git a/examples/modules_gallery/core/plot_5_append_concatenate_segments.py b/examples/modules_gallery/core/plot_5_append_concatenate_segments.py
index b67a1ff0c2..5b2858edd6 100644
--- a/examples/modules_gallery/core/plot_5_append_concatenate_segments.py
+++ b/examples/modules_gallery/core/plot_5_append_concatenate_segments.py
@@ -4,11 +4,11 @@
 Append and/or concatenate segments
 ===================================
 
-Sometimes a recording can be split in several subparts, for instance a baseline and an intervention.
+Sometimes a recording can be split into several subparts, for instance a baseline and an intervention.
 
 Similarly to `NEO <https://github.com/NeuralEnsemble/python-neo>`_ we define each subpart as a "segment".
 
-SpikeInterface has tools to manipulate these segments. There are two ways:
+SpikeInterface has tools to interacct with these segments. There are two ways:
 
   1. :py:func:`~spikeinterface.core.append_recordings()` and :py:func:`~spikeinterface.core.append_sortings()`
 
diff --git a/examples/modules_gallery/qualitymetrics/plot_4_curation.py b/examples/modules_gallery/qualitymetrics/plot_4_curation.py
index f625914191..6a9253c093 100644
--- a/examples/modules_gallery/qualitymetrics/plot_4_curation.py
+++ b/examples/modules_gallery/qualitymetrics/plot_4_curation.py
@@ -32,8 +32,8 @@
 # Create SortingAnalyzer
 # -----------------------
 #
-# For this example, we will need a :code:`SortingAnalyzer` and some extension
-# to be computed fist
+# For this example, we will need a :code:`SortingAnalyzer` and some extensions
+# to be computed first
 
 
 analyzer = si.create_sorting_analyzer(sorting=sorting, recording=recording, format="memory")
diff --git a/examples/modules_gallery/widgets/plot_3_waveforms_gallery.py b/examples/modules_gallery/widgets/plot_3_waveforms_gallery.py
index fc4a7775d2..2845dcc62c 100644
--- a/examples/modules_gallery/widgets/plot_3_waveforms_gallery.py
+++ b/examples/modules_gallery/widgets/plot_3_waveforms_gallery.py
@@ -17,7 +17,7 @@
 #  from the repo 'https://gin.g-node.org/NeuralEnsemble/ephy_testing_data'
 
 local_path = si.download_dataset(remote_path="mearec/mearec_test_10s.h5")
-recording, sorting = si.read_mearec(local_path)
+recording, sorting = se.read_mearec(local_path)
 print(recording)
 print(sorting)
 
@@ -25,8 +25,8 @@
 # Extract spike waveforms
 # -----------------------
 #
-# For convenience, metrics are computed on the WaveformExtractor object that gather recording/sorting and
-# extracted waveforms in a single object
+# For convenience, metrics are computed on the SortingAnalyzer object that gathers recording/sorting and
+# the extracted waveforms in a single object
 
 
 analyzer = si.create_sorting_analyzer(sorting=sorting, recording=recording, format="memory")
@@ -72,7 +72,7 @@
 # plot_unit_waveform_density_map()
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
-# This is your best friend to check over merge
+# This is your best friend to check for overmerge
 
 unit_ids = sorting.unit_ids[:4]
 sw.plot_unit_waveforms_density_map(analyzer, unit_ids=unit_ids, figsize=(14, 8))
diff --git a/pyproject.toml b/pyproject.toml
index 804c89178e..5c3a82848f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,7 @@ dependencies = [
     "threadpoolctl>=3.0.0",
     "tqdm",
     "zarr>=0.2.16",
+    "xarray",
     "neo>=0.13.0",
     "probeinterface>=0.2.21",
 ]
@@ -89,7 +90,6 @@ streaming_extractors = [
 full = [
     "h5py",
     "pandas",
-    "xarray",
     "scipy",
     "scikit-learn",
     "networkx",

From b74127ebe492b99232dbededa21390825ea6434b Mon Sep 17 00:00:00 2001
From: zm711 <92116279+zm711@users.noreply.github.com>
Date: Wed, 6 Mar 2024 14:31:52 -0500
Subject: [PATCH 2/2] add xarray to docs instead + couple more typos

---
 examples/modules_gallery/core/plot_4_sorting_analyzer.py    | 6 +++---
 .../core/plot_5_append_concatenate_segments.py              | 2 +-
 pyproject.toml                                              | 3 ++-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/examples/modules_gallery/core/plot_4_sorting_analyzer.py b/examples/modules_gallery/core/plot_4_sorting_analyzer.py
index 381d3aec00..d2be8be1d4 100644
--- a/examples/modules_gallery/core/plot_4_sorting_analyzer.py
+++ b/examples/modules_gallery/core/plot_4_sorting_analyzer.py
@@ -2,8 +2,8 @@
 SortingAnalyzer
 ===============
 
-SpikeInterface provides an object to gather a Recording and a Sorting to make
-analysis and visualization of the sorting : :py:class:`~spikeinterface.core.SortingAnalyzer`.
+SpikeInterface provides an object to gather a Recording and a Sorting to perform various
+analyses and visualizations of the sorting : :py:class:`~spikeinterface.core.SortingAnalyzer`.
 
 This :py:class:`~spikeinterface.core.SortingAnalyzer` class:
 
@@ -152,7 +152,7 @@
 
 
 ###############################################################################
-# The SortingAnalyzer also offeres select_units() method wich allows exporting
+# The SortingAnalyzer also offers select_units() method which allows exporting
 # only some relevant units for instance to a new SortingAnalyzer instance.
 
 analyzer_some_units = analyzer.select_units(
diff --git a/examples/modules_gallery/core/plot_5_append_concatenate_segments.py b/examples/modules_gallery/core/plot_5_append_concatenate_segments.py
index 5b2858edd6..5cb1cccb6f 100644
--- a/examples/modules_gallery/core/plot_5_append_concatenate_segments.py
+++ b/examples/modules_gallery/core/plot_5_append_concatenate_segments.py
@@ -8,7 +8,7 @@
 
 Similarly to `NEO <https://github.com/NeuralEnsemble/python-neo>`_ we define each subpart as a "segment".
 
-SpikeInterface has tools to interacct with these segments. There are two ways:
+SpikeInterface has tools to interact with these segments. There are two ways:
 
   1. :py:func:`~spikeinterface.core.append_recordings()` and :py:func:`~spikeinterface.core.append_sortings()`
 
diff --git a/pyproject.toml b/pyproject.toml
index 5c3a82848f..e7b9a98427 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,6 @@ dependencies = [
     "threadpoolctl>=3.0.0",
     "tqdm",
     "zarr>=0.2.16",
-    "xarray",
     "neo>=0.13.0",
     "probeinterface>=0.2.21",
 ]
@@ -90,6 +89,7 @@ streaming_extractors = [
 full = [
     "h5py",
     "pandas",
+    "xarray",
     "scipy",
     "scikit-learn",
     "networkx",
@@ -169,6 +169,7 @@ docs = [
     "pandas", # in the modules gallery comparison tutorial
     "hdbscan>=0.8.33",   # For sorters spykingcircus2 + tridesclous
     "numba", # For many postprocessing functions
+    "xarray", # For use of SortingAnalyzer zarr format
     # for release we need pypi, so this needs to be commented
     "probeinterface @ git+https://github.com/SpikeInterface/probeinterface.git",  # We always build from the latest version
     "neo @ git+https://github.com/NeuralEnsemble/python-neo.git",  # We always build from the latest version