From 299077e878619ea9e7af6d83ae1cf1a5278e354f Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Wed, 28 Jun 2023 22:13:08 +0200
Subject: [PATCH 01/10] failing test

---
 src/spikeinterface/core/core_tools.py         |  25 ++++
 .../tests/test_binaryrecordingextractor.py    | 114 +++++++++++++++++-
 .../core/tests/test_generate.py               |  29 +----
 3 files changed, 139 insertions(+), 29 deletions(-)

diff --git a/src/spikeinterface/core/core_tools.py b/src/spikeinterface/core/core_tools.py
index 316d8f79a2..3a02b6f71c 100644
--- a/src/spikeinterface/core/core_tools.py
+++ b/src/spikeinterface/core/core_tools.py
@@ -922,3 +922,28 @@ def convert_bytes_to_str(byte_value: int) -> str:
         byte_value /= 1024
         i += 1
     return f"{byte_value:.2f} {suffixes[i]}"
+
+
+def measure_memory_allocation(measure_in_process: bool = True) -> float:
+    """
+    A local utility to measure memory allocation at a specific point in time.
+    Can measure either the process resident memory or system wide memory available
+
+    Uses psutil package.
+
+    Parameters
+    ----------
+    measure_in_process : bool, True by default
+        Mesure memory allocation in the current process only, if false then measures at the system
+        level.
+    """
+    import psutil
+
+    if measure_in_process:
+        process = psutil.Process()
+        memory = process.memory_info().rss
+    else:
+        mem_info = psutil.virtual_memory()
+        memory = mem_info.total - mem_info.available
+
+    return memory
diff --git a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
index 1d2c6e4c21..16001325ae 100644
--- a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
+++ b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
@@ -4,6 +4,8 @@
 
 from spikeinterface.core import BinaryRecordingExtractor
 from spikeinterface.core.numpyextractors import NumpyRecording
+from spikeinterface.core.core_tools import measure_memory_allocation
+from spikeinterface.core.generate import GeneratorRecording
 
 if hasattr(pytest, "global_test_folder"):
     cache_folder = pytest.global_test_folder / "core"
@@ -35,9 +37,10 @@ def test_BinaryRecordingExtractor():
 def test_round_trip(tmp_path):
     num_channels = 10
     num_samples = 50
-    traces_list = [np.ones(shape=(num_samples, num_channels), dtype="int32")]
+
+    traces = np.arange(num_channels * num_samples, dtype="int16").reshape(num_samples, num_channels)
     sampling_frequency = 30_000.0
-    recording = NumpyRecording(traces_list=traces_list, sampling_frequency=sampling_frequency)
+    recording = NumpyRecording(traces_list=[traces], sampling_frequency=sampling_frequency)
 
     file_path = tmp_path / "test_BinaryRecordingExtractor.raw"
     dtype = recording.get_dtype()
@@ -59,5 +62,112 @@ def test_round_trip(tmp_path):
     np.allclose(smaller_traces, binary_smaller_traces)
 
 
+@pytest.fixture(scope="module")
+def folder_with_binary_files(tmpdir_factory):
+    tmp_path = Path(tmpdir_factory.mktemp("spike_interface_test"))
+    folder = tmp_path / "test_binary_recording"
+    num_channels = 32
+    sampling_frequency = 30_000.0
+    dtype = "float32"
+    recording = GeneratorRecording(
+        durations=[3600],
+        sampling_frequency=sampling_frequency,
+        num_channels=num_channels,
+        dtype=dtype,
+    )
+    dtype = recording.get_dtype()
+    recording.save(folder=folder, overwrite=True)
+
+    return folder
+
+
+def test_memory_effcienty(folder_with_binary_files):
+    folder = folder_with_binary_files
+    num_channels = 32
+    sampling_frequency = 30_000.0
+    dtype = "float32"
+
+    file_paths = [folder / "traces_cached_seg0.raw"]
+    recorder_binary = BinaryRecordingExtractor(
+        num_chan=num_channels,
+        file_paths=file_paths,
+        sampling_frequency=sampling_frequency,
+        dtype=dtype,
+    )
+
+    memory_before_traces_bytes = measure_memory_allocation()
+    traces = recorder_binary.get_traces(start_frame=1000, end_frame=10_000)
+    memory_after_traces_bytes = measure_memory_allocation()
+    traces_size_bytes = traces.nbytes
+
+    expected_memory_usage = memory_before_traces_bytes + traces_size_bytes
+    expected_memory_usage_GiB = expected_memory_usage / 1024**3
+    memory_after_traces_bytes_GiB = memory_after_traces_bytes / 1024**3
+    assert expected_memory_usage_GiB == pytest.approx(memory_after_traces_bytes_GiB, rel=0.1)
+
+
+def measure_peak_memory_usage():
+    """
+    Measure the peak memory usage in bytes for the current process.
+
+    The `resource.getrusage(resource.RUSAGE_SELF).ru_maxrss` command is used to get the peak memory usage.
+    The `ru_maxrss` attribute represents the maximum resident set size used (in kilobytes on Linux and bytes on MacOS),
+    which is the maximum memory used by the process since it was started.
+
+    This function only works on Unix systems (including Linux and MacOS).
+
+    Returns
+    -------
+    int
+        Peak memory usage in bytes.
+
+    Raises
+    ------
+    NotImplementedError
+        If the function is called on a Windows system.
+    """
+
+    import sys
+    import resource
+
+    if sys.platform == "win32":
+        raise NotImplementedError("Function cannot be used on Windows")
+
+    mem_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+
+    # If ru_maxrss returns memory in kilobytes (like on Linux), convert to bytes
+    if hasattr(resource, "RLIMIT_AS"):
+        mem_usage = mem_usage * 1024
+
+    return mem_usage
+
+
+def test_peak_memory_usage(folder_with_binary_files):
+    folder = folder_with_binary_files
+    num_channels = 32
+    sampling_frequency = 30_000.0
+    dtype = "float32"
+
+    file_paths = [folder / "traces_cached_seg0.raw"]
+    recorder_binary = BinaryRecordingExtractor(
+        num_chan=num_channels,
+        file_paths=file_paths,
+        sampling_frequency=sampling_frequency,
+        dtype=dtype,
+    )
+
+    memory_before_traces_bytes = measure_memory_allocation()
+    traces = recorder_binary.get_traces(start_frame=1000, end_frame=10_000)
+    traces_size_bytes = traces.nbytes
+
+    expected_memory_usage = memory_before_traces_bytes + traces_size_bytes
+    peak_memory_GiB = measure_peak_memory_usage() / 1024**3
+    expected_memory_usage_GiB = expected_memory_usage / 1024**3
+    assert expected_memory_usage_GiB == pytest.approx(peak_memory_GiB, rel=0.1)
+
+    print("Expected memory usage: {:.2f} GiB".format(expected_memory_usage_GiB))
+    print(f"Peak memory usage: {peak_memory_GiB:.2f} GiB")
+
+
 if __name__ == "__main__":
     test_BinaryRecordingExtractor()
diff --git a/src/spikeinterface/core/tests/test_generate.py b/src/spikeinterface/core/tests/test_generate.py
index 873105e115..45ed791ab3 100644
--- a/src/spikeinterface/core/tests/test_generate.py
+++ b/src/spikeinterface/core/tests/test_generate.py
@@ -1,44 +1,19 @@
 import pytest
-import psutil
 
 import numpy as np
 
 from spikeinterface.core.generate import GeneratorRecording, generate_lazy_recording
-from spikeinterface.core.core_tools import convert_bytes_to_str
+from spikeinterface.core.core_tools import convert_bytes_to_str, measure_memory_allocation
 
 mode_list = GeneratorRecording.available_modes
 
 
-def measure_memory_allocation(measure_in_process: bool = True) -> float:
-    """
-    A local utility to measure memory allocation at a specific point in time.
-    Can measure either the process resident memory or system wide memory available
-
-    Uses psutil package.
-
-    Parameters
-    ----------
-    measure_in_process : bool, True by default
-        Mesure memory allocation in the current process only, if false then measures at the system
-        level.
-    """
-
-    if measure_in_process:
-        process = psutil.Process()
-        memory = process.memory_info().rss
-    else:
-        mem_info = psutil.virtual_memory()
-        memory = mem_info.total - mem_info.available
-
-    return memory
-
-
 @pytest.mark.parametrize("mode", mode_list)
 def test_lazy_random_recording(mode):
     # Test that get_traces does not consume more memory than allocated.
 
     bytes_to_MiB_factor = 1024**2
-    relative_tolerance = 0.05  # relative tolerance of 5 per cent
+    relative_tolerance = 0.01  # relative tolerance of 5 per cent
 
     sampling_frequency = 30000  # Hz
     durations = [2.0]

From 628b0a884a25233bed4baf56418dd4d3a3d0adc1 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Thu, 29 Jun 2023 10:54:11 +0200
Subject: [PATCH 02/10] passing tests

---
 .../core/binaryrecordingextractor.py          | 68 +++++++++++++------
 .../tests/test_binaryrecordingextractor.py    | 68 +++++++++++++++----
 2 files changed, 101 insertions(+), 35 deletions(-)

diff --git a/src/spikeinterface/core/binaryrecordingextractor.py b/src/spikeinterface/core/binaryrecordingextractor.py
index c04a1c6ec7..deadcc2624 100644
--- a/src/spikeinterface/core/binaryrecordingextractor.py
+++ b/src/spikeinterface/core/binaryrecordingextractor.py
@@ -155,25 +155,17 @@ def get_binary_description(self):
 
 
 class BinaryRecordingSegment(BaseRecordingSegment):
-    def __init__(self, datfile, sampling_frequency, t_start, num_chan, dtype, time_axis, file_offset):
+    def __init__(self, file_path, sampling_frequency, t_start, num_chan, dtype, time_axis, file_offset):
         BaseRecordingSegment.__init__(self, sampling_frequency=sampling_frequency, t_start=t_start)
         self.num_chan = num_chan
         self.dtype = np.dtype(dtype)
         self.file_offset = file_offset
         self.time_axis = time_axis
-        self.datfile = datfile
-        self.file = open(self.datfile, "r")
-        self.num_samples = (Path(datfile).stat().st_size - file_offset) // (num_chan * np.dtype(dtype).itemsize)
-        if self.time_axis == 0:
-            self.shape = (self.num_samples, self.num_chan)
-        else:
-            self.shape = (self.num_chan, self.num_samples)
-
-        byte_offset = self.file_offset
-        dtype_size_bytes = self.dtype.itemsize
-        data_size_bytes = dtype_size_bytes * self.num_samples * self.num_chan
-        self.memmap_offset, self.array_offset = divmod(byte_offset, mmap.ALLOCATIONGRANULARITY)
-        self.memmap_length = data_size_bytes + self.array_offset
+        self.file_path = file_path
+        self.file = open(self.file_path, "rb")
+        self.elements_per_sample = self.num_chan * self.dtype.itemsize
+        self.data_size_in_bytes = Path(file_path).stat().st_size - file_offset
+        self.num_samples = self.data_size_in_bytes // self.elements_per_sample
 
     def get_num_samples(self) -> int:
         """Returns the number of samples in this signal block
@@ -189,23 +181,55 @@ def get_traces(
         end_frame: Union[int, None] = None,
         channel_indices: Union[List, None] = None,
     ) -> np.ndarray:
-        length = self.memmap_length
-        memmap_offset = self.memmap_offset
+        if start_frame is None:
+            start_frame = 0
+
+        if end_frame is None:
+            end_frame = self.get_num_samples()
+
+        if end_frame > self.get_num_samples():
+            raise ValueError(f"end_frame {end_frame} is larger than the number of samples {self.get_num_samples()}")
+
+        dtype_size_bytes = np.dtype(self.dtype).itemsize
+        elements_per_sample = self.num_chan * dtype_size_bytes
+
+        # Calculate byte offsets for start and end frames
+        start_byte = self.file_offset + start_frame * elements_per_sample
+        end_byte = self.file_offset + end_frame * elements_per_sample
+
+        # Calculate the length of the data chunk to load into memory
+        length = end_byte - start_byte
+
+        # The mmap offset must be a multiple of mmap.ALLOCATIONGRANULARITY
+        memmap_offset, start_offset = divmod(start_byte, mmap.ALLOCATIONGRANULARITY)
+        memmap_offset *= mmap.ALLOCATIONGRANULARITY
+
+        # Adjust the length so it includes the extra data from rounding down the memmap offset to a multiple of ALLOCATIONGRANULARITY
+        length += start_offset
+
+        # Create the mmap object
         memmap_obj = mmap.mmap(self.file.fileno(), length=length, access=mmap.ACCESS_READ, offset=memmap_offset)
 
-        array = np.ndarray.__new__(
-            np.ndarray,
-            shape=self.shape,
+        # Create a numpy array using the mmap object as the buffer
+        # Note that the shape must be recalculated based on the new data chunk
+        if self.time_axis == 0:
+            shape = ((end_frame - start_frame), self.num_chan)
+        else:
+            shape = (self.num_chan, (end_frame - start_frame))
+
+        array = np.ndarray(
+            shape=shape,
             dtype=self.dtype,
             buffer=memmap_obj,
-            order="C",
-            offset=self.array_offset,
+            offset=start_offset,
         )
 
         if self.time_axis == 1:
             array = array.T
 
-        traces = array[start_frame:end_frame]
+        # Now the entire array should correspond to the data between start_frame and end_frame, so we can use it directly
+        traces = array
+
         if channel_indices is not None:
             traces = traces[:, channel_indices]
 
diff --git a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
index 16001325ae..ed0b2922e7 100644
--- a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
+++ b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
@@ -52,10 +52,12 @@ def test_round_trip(tmp_path):
         file_paths=file_path, sampling_frequency=sampling_frequency, num_chan=num_chan, dtype=dtype
     )
 
+    # Test for full traces
     assert np.allclose(recording.get_traces(), binary_recorder.get_traces())
 
-    start_frame = 200
-    end_frame = 500
+    # Ttest for a sub-set of the traces
+    start_frame = 20
+    end_frame = 40
     smaller_traces = recording.get_traces(start_frame=start_frame, end_frame=end_frame)
     binary_smaller_traces = binary_recorder.get_traces(start_frame=start_frame, end_frame=end_frame)
 
@@ -70,7 +72,7 @@ def folder_with_binary_files(tmpdir_factory):
     sampling_frequency = 30_000.0
     dtype = "float32"
     recording = GeneratorRecording(
-        durations=[3600],
+        durations=[1.0],
         sampling_frequency=sampling_frequency,
         num_channels=num_channels,
         dtype=dtype,
@@ -81,14 +83,53 @@ def folder_with_binary_files(tmpdir_factory):
     return folder
 
 
+def test_sequential_reading_of_small_traces(folder_with_binary_files):
+    folder = folder_with_binary_files
+    num_channels = 32
+    sampling_frequency = 30_000.0
+    dtype = "float32"
+
+    file_paths = [folder / "traces_cached_seg0.raw"]
+    recording = BinaryRecordingExtractor(
+        num_chan=num_channels,
+        file_paths=file_paths,
+        sampling_frequency=sampling_frequency,
+        dtype=dtype,
+    )
+
+    full_traces = recording.get_traces()
+
+    # Test for a sub-set of the traces
+    start_frame = 10
+    end_frame = 15
+    small_traces = recording.get_traces(start_frame=start_frame, end_frame=end_frame)
+    expected_traces = full_traces[start_frame:end_frame, :]
+    assert np.allclose(small_traces, expected_traces)
+
+    # Test for a sub-set of the traces
+    start_frame = 1000
+    end_frame = 1100
+    small_traces = recording.get_traces(start_frame=start_frame, end_frame=end_frame)
+    expected_traces = full_traces[start_frame:end_frame, :]
+    assert np.allclose(small_traces, expected_traces)
+
+    # Test for a sub-set of the traces
+    start_frame = 10_000
+    end_frame = 11_000
+    small_traces = recording.get_traces(start_frame=start_frame, end_frame=end_frame)
+    expected_traces = full_traces[start_frame:end_frame, :]
+    assert np.allclose(small_traces, expected_traces)
+
+
 def test_memory_effcienty(folder_with_binary_files):
+    "This test that memory is freed afte reading the traces"
     folder = folder_with_binary_files
     num_channels = 32
     sampling_frequency = 30_000.0
     dtype = "float32"
 
     file_paths = [folder / "traces_cached_seg0.raw"]
-    recorder_binary = BinaryRecordingExtractor(
+    recording = BinaryRecordingExtractor(
         num_chan=num_channels,
         file_paths=file_paths,
         sampling_frequency=sampling_frequency,
@@ -96,14 +137,14 @@ def test_memory_effcienty(folder_with_binary_files):
     )
 
     memory_before_traces_bytes = measure_memory_allocation()
-    traces = recorder_binary.get_traces(start_frame=1000, end_frame=10_000)
+    traces = recording.get_traces(start_frame=1000, end_frame=10_000)
     memory_after_traces_bytes = measure_memory_allocation()
     traces_size_bytes = traces.nbytes
 
     expected_memory_usage = memory_before_traces_bytes + traces_size_bytes
     expected_memory_usage_GiB = expected_memory_usage / 1024**3
     memory_after_traces_bytes_GiB = memory_after_traces_bytes / 1024**3
-    assert expected_memory_usage_GiB == pytest.approx(memory_after_traces_bytes_GiB, rel=0.1)
+    assert memory_after_traces_bytes_GiB == pytest.approx(expected_memory_usage_GiB, rel=0.1)
 
 
 def measure_peak_memory_usage():
@@ -143,13 +184,14 @@ def measure_peak_memory_usage():
 
 
 def test_peak_memory_usage(folder_with_binary_files):
+    "This tests that there are no spikes in memory usage when reading traces."
     folder = folder_with_binary_files
     num_channels = 32
     sampling_frequency = 30_000.0
     dtype = "float32"
 
     file_paths = [folder / "traces_cached_seg0.raw"]
-    recorder_binary = BinaryRecordingExtractor(
+    recording = BinaryRecordingExtractor(
         num_chan=num_channels,
         file_paths=file_paths,
         sampling_frequency=sampling_frequency,
@@ -157,16 +199,16 @@ def test_peak_memory_usage(folder_with_binary_files):
     )
 
     memory_before_traces_bytes = measure_memory_allocation()
-    traces = recorder_binary.get_traces(start_frame=1000, end_frame=10_000)
+    traces = recording.get_traces(start_frame=1000, end_frame=2000)
     traces_size_bytes = traces.nbytes
 
     expected_memory_usage = memory_before_traces_bytes + traces_size_bytes
-    peak_memory_GiB = measure_peak_memory_usage() / 1024**3
-    expected_memory_usage_GiB = expected_memory_usage / 1024**3
-    assert expected_memory_usage_GiB == pytest.approx(peak_memory_GiB, rel=0.1)
+    peak_memory_MiB = measure_peak_memory_usage() / 1024**2
+    expected_memory_usage_MiB = expected_memory_usage / 1024**2
+    assert expected_memory_usage_MiB == pytest.approx(peak_memory_MiB, rel=0.1)
 
-    print("Expected memory usage: {:.2f} GiB".format(expected_memory_usage_GiB))
-    print(f"Peak memory usage: {peak_memory_GiB:.2f} GiB")
+    print("Expected memory usage: {:.2f} MiB".format(expected_memory_usage_MiB))
+    print(f"Peak memory usage: {peak_memory_MiB:.2f} MiB")
 
 
 if __name__ == "__main__":

From 59e82a13b590809abfe8803111e4e584e5f155e2 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 4 Jul 2023 20:49:52 +0200
Subject: [PATCH 03/10] merging

---
 .../core/binaryrecordingextractor.py          | 26 ++++++++-----------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/src/spikeinterface/core/binaryrecordingextractor.py b/src/spikeinterface/core/binaryrecordingextractor.py
index 5b588a31a2..2cbc52f4b4 100644
--- a/src/spikeinterface/core/binaryrecordingextractor.py
+++ b/src/spikeinterface/core/binaryrecordingextractor.py
@@ -175,9 +175,9 @@ def __init__(self, file_path, sampling_frequency, t_start, num_channels, dtype,
         self.time_axis = time_axis
         self.file_path = file_path
         self.file = open(self.file_path, "rb")
-        self.elements_per_sample = self.num_chan * self.dtype.itemsize
+        self.bytes_per_sample = self.num_channels * self.dtype.itemsize
         self.data_size_in_bytes = Path(file_path).stat().st_size - file_offset
-        self.num_samples = self.data_size_in_bytes // self.elements_per_sample
+        self.num_samples = self.data_size_in_bytes // self.bytes_per_sample
 
     def get_num_samples(self) -> int:
         """Returns the number of samples in this signal block
@@ -202,12 +202,9 @@ def get_traces(
         if end_frame > self.get_num_samples():
             raise ValueError(f"end_frame {end_frame} is larger than the number of samples {self.get_num_samples()}")
 
-        dtype_size_bytes = np.dtype(self.dtype).itemsize
-        elements_per_sample = self.num_chan * dtype_size_bytes
-
         # Calculate byte offsets for start and end frames
-        start_byte = self.file_offset + start_frame * elements_per_sample
-        end_byte = self.file_offset + end_frame * elements_per_sample
+        start_byte = self.file_offset + start_frame * self.bytes_per_sample
+        end_byte = self.file_offset + end_frame * self.bytes_per_sample
 
         # Calculate the length of the data chunk to load into memory
         length = end_byte - start_byte
@@ -216,7 +213,8 @@ def get_traces(
         memmap_offset, start_offset = divmod(start_byte, mmap.ALLOCATIONGRANULARITY)
         memmap_offset *= mmap.ALLOCATIONGRANULARITY
 
-        # Adjust the length so it includes the extra data from rounding down the memmap offset to a multiple of ALLOCATIONGRANULARITY
+        # Adjust the length so it includes the extra data from rounding down
+        # the memmap offset to a multiple of ALLOCATIONGRANULARITY
         length += start_offset
 
         # Create the mmap object
@@ -225,11 +223,12 @@ def get_traces(
         # Create a numpy array using the mmap object as the buffer
         # Note that the shape must be recalculated based on the new data chunk
         if self.time_axis == 0:
-            shape = ((end_frame - start_frame), self.num_chan)
+            shape = ((end_frame - start_frame), self.num_channels)
         else:
-            shape = (self.num_chan, (end_frame - start_frame))
+            shape = (self.num_channels, (end_frame - start_frame))
 
-        array = np.ndarray(
+        # Now the entire array should correspond to the data between start_frame and end_frame, so we can use it directly
+        traces = np.ndarray(
             shape=shape,
             dtype=self.dtype,
             buffer=memmap_obj,
@@ -237,10 +236,7 @@ def get_traces(
         )
 
         if self.time_axis == 1:
-            array = array.T
-
-        # Now the entire array should correspond to the data between start_frame and end_frame, so we can use it directly
-        traces = array
+            traces = traces.T
 
         if channel_indices is not None:
             traces = traces[:, channel_indices]

From 4d4c55e140014ea79947910883f3ef360fe0f723 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 4 Jul 2023 21:03:53 +0200
Subject: [PATCH 04/10] refactor tests

---
 .../tests/test_binaryrecordingextractor.py    | 26 ++++++++++++++-----
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
index 16cd01141e..a73cc8d1f4 100644
--- a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
+++ b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
@@ -1,6 +1,8 @@
 import pytest
 import numpy as np
 from pathlib import Path
+import sys
+import resource
 
 from spikeinterface.core import BinaryRecordingExtractor
 from spikeinterface.core.numpyextractors import NumpyRecording
@@ -91,6 +93,7 @@ def folder_with_binary_files(tmpdir_factory):
 
 
 def test_sequential_reading_of_small_traces(folder_with_binary_files):
+    # Test that memmap is readed correctly when pointing to specific frames
     folder = folder_with_binary_files
     num_channels = 32
     sampling_frequency = 30_000.0
@@ -151,6 +154,15 @@ def test_memory_effcienty(folder_with_binary_files):
     expected_memory_usage = memory_before_traces_bytes + traces_size_bytes
     expected_memory_usage_GiB = expected_memory_usage / 1024**3
     memory_after_traces_bytes_GiB = memory_after_traces_bytes / 1024**3
+
+    ratio = memory_after_traces_bytes_GiB / expected_memory_usage_GiB
+
+    assertion_msg = (
+        f"Peak memory {memory_after_traces_bytes_GiB} GiB usage is {ratio:.2f} times"
+        f"the expected memory usage of {expected_memory_usage_GiB} GiB."
+    )
+    assert ratio <= 1.05, assertion_msg
+
     assert memory_after_traces_bytes_GiB == pytest.approx(expected_memory_usage_GiB, rel=0.1)
 
 
@@ -175,9 +187,6 @@ def measure_peak_memory_usage():
         If the function is called on a Windows system.
     """
 
-    import sys
-    import resource
-
     if sys.platform == "win32":
         raise NotImplementedError("Function cannot be used on Windows")
 
@@ -190,6 +199,7 @@ def measure_peak_memory_usage():
     return mem_usage
 
 
+@pytest.mark.skipif(sys.platform == "win32", reason="Don't know how to calculate peak memory on widnows")
 def test_peak_memory_usage(folder_with_binary_files):
     "This tests that there are no spikes in memory usage when reading traces."
     folder = folder_with_binary_files
@@ -212,10 +222,12 @@ def test_peak_memory_usage(folder_with_binary_files):
     expected_memory_usage = memory_before_traces_bytes + traces_size_bytes
     peak_memory_MiB = measure_peak_memory_usage() / 1024**2
     expected_memory_usage_MiB = expected_memory_usage / 1024**2
-    assert expected_memory_usage_MiB == pytest.approx(peak_memory_MiB, rel=0.1)
-
-    print("Expected memory usage: {:.2f} MiB".format(expected_memory_usage_MiB))
-    print(f"Peak memory usage: {peak_memory_MiB:.2f} MiB")
+    ratio = peak_memory_MiB / expected_memory_usage_MiB
+    assertion_msg = (
+        f"Peak memory {peak_memory_MiB} MiB usage is {ratio:.2f} times"
+        f"the expected memory usage of {expected_memory_usage_MiB} MiB."
+    )
+    assert ratio <= 1.05, assertion_msg
 
 
 if __name__ == "__main__":

From 2ea7f1bde6ec38c43f1a747ca43f4765a49cb97a Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 4 Jul 2023 21:06:44 +0200
Subject: [PATCH 05/10] window import

---
 .../core/tests/test_binaryrecordingextractor.py            | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
index a73cc8d1f4..a62974c833 100644
--- a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
+++ b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
@@ -2,7 +2,6 @@
 import numpy as np
 from pathlib import Path
 import sys
-import resource
 
 from spikeinterface.core import BinaryRecordingExtractor
 from spikeinterface.core.numpyextractors import NumpyRecording
@@ -188,7 +187,9 @@ def measure_peak_memory_usage():
     """
 
     if sys.platform == "win32":
-        raise NotImplementedError("Function cannot be used on Windows")
+        raise NotImplementedError("Resource module not available on Windows")
+
+    import resource
 
     mem_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
 
@@ -199,7 +200,7 @@ def measure_peak_memory_usage():
     return mem_usage
 
 
-@pytest.mark.skipif(sys.platform == "win32", reason="Don't know how to calculate peak memory on widnows")
+@pytest.mark.skipif(sys.platform == "win32", reason="resource module not available on Windows")
 def test_peak_memory_usage(folder_with_binary_files):
     "This tests that there are no spikes in memory usage when reading traces."
     folder = folder_with_binary_files

From 80a41525b1bb583df8a05a34c50baaab1529f63d Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Mon, 11 Sep 2023 11:56:12 +0200
Subject: [PATCH 06/10] update generator recording

---
 .../core/tests/test_binaryrecordingextractor.py               | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
index a62974c833..6f63dad576 100644
--- a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
+++ b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
@@ -6,7 +6,7 @@
 from spikeinterface.core import BinaryRecordingExtractor
 from spikeinterface.core.numpyextractors import NumpyRecording
 from spikeinterface.core.core_tools import measure_memory_allocation
-from spikeinterface.core.generate import GeneratorRecording
+from spikeinterface.core.generate import NoiseGeneratorRecording
 
 if hasattr(pytest, "global_test_folder"):
     cache_folder = pytest.global_test_folder / "core"
@@ -79,7 +79,7 @@ def folder_with_binary_files(tmpdir_factory):
     num_channels = 32
     sampling_frequency = 30_000.0
     dtype = "float32"
-    recording = GeneratorRecording(
+    recording = NoiseGeneratorRecording(
         durations=[1.0],
         sampling_frequency=sampling_frequency,
         num_channels=num_channels,

From 25b125ece7c465f74e0cd8e100278604d8b9d124 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Mon, 11 Sep 2023 11:58:46 +0200
Subject: [PATCH 07/10] add missing import

---
 src/spikeinterface/core/tests/test_generate.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/spikeinterface/core/tests/test_generate.py b/src/spikeinterface/core/tests/test_generate.py
index abcbd9c4e2..2b4e65980f 100644
--- a/src/spikeinterface/core/tests/test_generate.py
+++ b/src/spikeinterface/core/tests/test_generate.py
@@ -18,6 +18,7 @@
 
 
 from spikeinterface.core.core_tools import convert_bytes_to_str, measure_memory_allocation
+from spikeinterface.core.testing import check_recordings_equal
 
 strategy_list = ["tile_pregenerated", "on_the_fly"]
 

From 757e939b8c3e45d12c952d073c9e22215453b9cc Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Thu, 14 Dec 2023 15:33:25 +0100
Subject: [PATCH 08/10] maybe tests

---
 .../tests/test_binaryrecordingextractor.py    | 198 +++++++++---------
 1 file changed, 99 insertions(+), 99 deletions(-)

diff --git a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
index 6f63dad576..03b1927b33 100644
--- a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
+++ b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
@@ -130,105 +130,105 @@ def test_sequential_reading_of_small_traces(folder_with_binary_files):
     assert np.allclose(small_traces, expected_traces)
 
 
-def test_memory_effcienty(folder_with_binary_files):
-    "This test that memory is freed afte reading the traces"
-    folder = folder_with_binary_files
-    num_channels = 32
-    sampling_frequency = 30_000.0
-    dtype = "float32"
-
-    file_paths = [folder / "traces_cached_seg0.raw"]
-    recording = BinaryRecordingExtractor(
-        num_chan=num_channels,
-        file_paths=file_paths,
-        sampling_frequency=sampling_frequency,
-        dtype=dtype,
-    )
-
-    memory_before_traces_bytes = measure_memory_allocation()
-    traces = recording.get_traces(start_frame=1000, end_frame=10_000)
-    memory_after_traces_bytes = measure_memory_allocation()
-    traces_size_bytes = traces.nbytes
-
-    expected_memory_usage = memory_before_traces_bytes + traces_size_bytes
-    expected_memory_usage_GiB = expected_memory_usage / 1024**3
-    memory_after_traces_bytes_GiB = memory_after_traces_bytes / 1024**3
-
-    ratio = memory_after_traces_bytes_GiB / expected_memory_usage_GiB
-
-    assertion_msg = (
-        f"Peak memory {memory_after_traces_bytes_GiB} GiB usage is {ratio:.2f} times"
-        f"the expected memory usage of {expected_memory_usage_GiB} GiB."
-    )
-    assert ratio <= 1.05, assertion_msg
-
-    assert memory_after_traces_bytes_GiB == pytest.approx(expected_memory_usage_GiB, rel=0.1)
-
-
-def measure_peak_memory_usage():
-    """
-    Measure the peak memory usage in bytes for the current process.
-
-    The `resource.getrusage(resource.RUSAGE_SELF).ru_maxrss` command is used to get the peak memory usage.
-    The `ru_maxrss` attribute represents the maximum resident set size used (in kilobytes on Linux and bytes on MacOS),
-    which is the maximum memory used by the process since it was started.
-
-    This function only works on Unix systems (including Linux and MacOS).
-
-    Returns
-    -------
-    int
-        Peak memory usage in bytes.
-
-    Raises
-    ------
-    NotImplementedError
-        If the function is called on a Windows system.
-    """
-
-    if sys.platform == "win32":
-        raise NotImplementedError("Resource module not available on Windows")
-
-    import resource
-
-    mem_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
-
-    # If ru_maxrss returns memory in kilobytes (like on Linux), convert to bytes
-    if hasattr(resource, "RLIMIT_AS"):
-        mem_usage = mem_usage * 1024
-
-    return mem_usage
-
-
-@pytest.mark.skipif(sys.platform == "win32", reason="resource module not available on Windows")
-def test_peak_memory_usage(folder_with_binary_files):
-    "This tests that there are no spikes in memory usage when reading traces."
-    folder = folder_with_binary_files
-    num_channels = 32
-    sampling_frequency = 30_000.0
-    dtype = "float32"
-
-    file_paths = [folder / "traces_cached_seg0.raw"]
-    recording = BinaryRecordingExtractor(
-        num_chan=num_channels,
-        file_paths=file_paths,
-        sampling_frequency=sampling_frequency,
-        dtype=dtype,
-    )
-
-    memory_before_traces_bytes = measure_memory_allocation()
-    traces = recording.get_traces(start_frame=1000, end_frame=2000)
-    traces_size_bytes = traces.nbytes
-
-    expected_memory_usage = memory_before_traces_bytes + traces_size_bytes
-    peak_memory_MiB = measure_peak_memory_usage() / 1024**2
-    expected_memory_usage_MiB = expected_memory_usage / 1024**2
-    ratio = peak_memory_MiB / expected_memory_usage_MiB
-    assertion_msg = (
-        f"Peak memory {peak_memory_MiB} MiB usage is {ratio:.2f} times"
-        f"the expected memory usage of {expected_memory_usage_MiB} MiB."
-    )
-    assert ratio <= 1.05, assertion_msg
+# def test_memory_effcienty(folder_with_binary_files):
+#     "This test that memory is freed afte reading the traces"
+#     folder = folder_with_binary_files
+#     num_channels = 32
+#     sampling_frequency = 30_000.0
+#     dtype = "float32"
+
+#     file_paths = [folder / "traces_cached_seg0.raw"]
+#     recording = BinaryRecordingExtractor(
+#         num_chan=num_channels,
+#         file_paths=file_paths,
+#         sampling_frequency=sampling_frequency,
+#         dtype=dtype,
+#     )
+
+#     memory_before_traces_bytes = measure_memory_allocation()
+#     traces = recording.get_traces(start_frame=1000, end_frame=10_000)
+#     memory_after_traces_bytes = measure_memory_allocation()
+#     traces_size_bytes = traces.nbytes
+
+#     expected_memory_usage = memory_before_traces_bytes + traces_size_bytes
+#     expected_memory_usage_GiB = expected_memory_usage / 1024**3
+#     memory_after_traces_bytes_GiB = memory_after_traces_bytes / 1024**3
+
+#     ratio = memory_after_traces_bytes_GiB / expected_memory_usage_GiB
+
+#     assertion_msg = (
+#         f"Peak memory {memory_after_traces_bytes_GiB} GiB usage is {ratio:.2f} times"
+#         f"the expected memory usage of {expected_memory_usage_GiB} GiB."
+#     )
+#     assert ratio <= 1.05, assertion_msg
+
+#     assert memory_after_traces_bytes_GiB == pytest.approx(expected_memory_usage_GiB, rel=0.1)
+
+
+# def measure_peak_memory_usage():
+#     """
+#     Measure the peak memory usage in bytes for the current process.
+
+#     The `resource.getrusage(resource.RUSAGE_SELF).ru_maxrss` command is used to get the peak memory usage.
+#     The `ru_maxrss` attribute represents the maximum resident set size used (in kilobytes on Linux and bytes on MacOS),
+#     which is the maximum memory used by the process since it was started.
+
+#     This function only works on Unix systems (including Linux and MacOS).
+
+#     Returns
+#     -------
+#     int
+#         Peak memory usage in bytes.
+
+#     Raises
+#     ------
+#     NotImplementedError
+#         If the function is called on a Windows system.
+#     """
+
+#     if sys.platform == "win32":
+#         raise NotImplementedError("Resource module not available on Windows")
+
+#     import resource
+
+#     mem_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+
+#     # If ru_maxrss returns memory in kilobytes (like on Linux), convert to bytes
+#     if hasattr(resource, "RLIMIT_AS"):
+#         mem_usage = mem_usage * 1024
+
+#     return mem_usage
+
+
+# @pytest.mark.skipif(sys.platform == "win32", reason="resource module not available on Windows")
+# def test_peak_memory_usage(folder_with_binary_files):
+#     "This tests that there are no spikes in memory usage when reading traces."
+#     folder = folder_with_binary_files
+#     num_channels = 32
+#     sampling_frequency = 30_000.0
+#     dtype = "float32"
+
+#     file_paths = [folder / "traces_cached_seg0.raw"]
+#     recording = BinaryRecordingExtractor(
+#         num_chan=num_channels,
+#         file_paths=file_paths,
+#         sampling_frequency=sampling_frequency,
+#         dtype=dtype,
+#     )
+
+#     memory_before_traces_bytes = measure_memory_allocation()
+#     traces = recording.get_traces(start_frame=1000, end_frame=2000)
+#     traces_size_bytes = traces.nbytes
+
+#     expected_memory_usage = memory_before_traces_bytes + traces_size_bytes
+#     peak_memory_MiB = measure_peak_memory_usage() / 1024**2
+#     expected_memory_usage_MiB = expected_memory_usage / 1024**2
+#     ratio = peak_memory_MiB / expected_memory_usage_MiB
+#     assertion_msg = (
+#         f"Peak memory {peak_memory_MiB} MiB usage is {ratio:.2f} times"
+#         f"the expected memory usage of {expected_memory_usage_MiB} MiB."
+#     )
+#     assert ratio <= 1.05, assertion_msg
 
 
 if __name__ == "__main__":

From 091042052165e5292b70c934b6da190bcb413b21 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Thu, 6 Jun 2024 12:05:14 -0600
Subject: [PATCH 09/10] remove unused tests

---
 .../tests/test_binaryrecordingextractor.py    | 101 ------------------
 1 file changed, 101 deletions(-)

diff --git a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
index 30435df820..b0fab7a579 100644
--- a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
+++ b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py
@@ -125,106 +125,5 @@ def test_sequential_reading_of_small_traces(folder_with_binary_files):
     assert np.allclose(small_traces, expected_traces)
 
 
-# def test_memory_effcienty(folder_with_binary_files):
-#     "This test that memory is freed afte reading the traces"
-#     folder = folder_with_binary_files
-#     num_channels = 32
-#     sampling_frequency = 30_000.0
-#     dtype = "float32"
-
-#     file_paths = [folder / "traces_cached_seg0.raw"]
-#     recording = BinaryRecordingExtractor(
-#         num_chan=num_channels,
-#         file_paths=file_paths,
-#         sampling_frequency=sampling_frequency,
-#         dtype=dtype,
-#     )
-
-#     memory_before_traces_bytes = measure_memory_allocation()
-#     traces = recording.get_traces(start_frame=1000, end_frame=10_000)
-#     memory_after_traces_bytes = measure_memory_allocation()
-#     traces_size_bytes = traces.nbytes
-
-#     expected_memory_usage = memory_before_traces_bytes + traces_size_bytes
-#     expected_memory_usage_GiB = expected_memory_usage / 1024**3
-#     memory_after_traces_bytes_GiB = memory_after_traces_bytes / 1024**3
-
-#     ratio = memory_after_traces_bytes_GiB / expected_memory_usage_GiB
-
-#     assertion_msg = (
-#         f"Peak memory {memory_after_traces_bytes_GiB} GiB usage is {ratio:.2f} times"
-#         f"the expected memory usage of {expected_memory_usage_GiB} GiB."
-#     )
-#     assert ratio <= 1.05, assertion_msg
-
-#     assert memory_after_traces_bytes_GiB == pytest.approx(expected_memory_usage_GiB, rel=0.1)
-
-
-# def measure_peak_memory_usage():
-#     """
-#     Measure the peak memory usage in bytes for the current process.
-
-#     The `resource.getrusage(resource.RUSAGE_SELF).ru_maxrss` command is used to get the peak memory usage.
-#     The `ru_maxrss` attribute represents the maximum resident set size used (in kilobytes on Linux and bytes on MacOS),
-#     which is the maximum memory used by the process since it was started.
-
-#     This function only works on Unix systems (including Linux and MacOS).
-
-#     Returns
-#     -------
-#     int
-#         Peak memory usage in bytes.
-
-#     Raises
-#     ------
-#     NotImplementedError
-#         If the function is called on a Windows system.
-#     """
-
-#     if sys.platform == "win32":
-#         raise NotImplementedError("Resource module not available on Windows")
-
-#     import resource
-
-#     mem_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
-
-#     # If ru_maxrss returns memory in kilobytes (like on Linux), convert to bytes
-#     if hasattr(resource, "RLIMIT_AS"):
-#         mem_usage = mem_usage * 1024
-
-#     return mem_usage
-
-
-# @pytest.mark.skipif(sys.platform == "win32", reason="resource module not available on Windows")
-# def test_peak_memory_usage(folder_with_binary_files):
-#     "This tests that there are no spikes in memory usage when reading traces."
-#     folder = folder_with_binary_files
-#     num_channels = 32
-#     sampling_frequency = 30_000.0
-#     dtype = "float32"
-
-#     file_paths = [folder / "traces_cached_seg0.raw"]
-#     recording = BinaryRecordingExtractor(
-#         num_chan=num_channels,
-#         file_paths=file_paths,
-#         sampling_frequency=sampling_frequency,
-#         dtype=dtype,
-#     )
-
-#     memory_before_traces_bytes = measure_memory_allocation()
-#     traces = recording.get_traces(start_frame=1000, end_frame=2000)
-#     traces_size_bytes = traces.nbytes
-
-#     expected_memory_usage = memory_before_traces_bytes + traces_size_bytes
-#     peak_memory_MiB = measure_peak_memory_usage() / 1024**2
-#     expected_memory_usage_MiB = expected_memory_usage / 1024**2
-#     ratio = peak_memory_MiB / expected_memory_usage_MiB
-#     assertion_msg = (
-#         f"Peak memory {peak_memory_MiB} MiB usage is {ratio:.2f} times"
-#         f"the expected memory usage of {expected_memory_usage_MiB} MiB."
-#     )
-#     assert ratio <= 1.05, assertion_msg
-
-
 if __name__ == "__main__":
     test_BinaryRecordingExtractor()

From a0d8097cc69d8efca1443ce5e8edb3152285e640 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Fri, 5 Jul 2024 08:27:51 -0600
Subject: [PATCH 10/10] Update
 src/spikeinterface/core/binaryrecordingextractor.py

Co-authored-by: Garcia Samuel <sam.garcia.die@gmail.com>
---
 src/spikeinterface/core/binaryrecordingextractor.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/spikeinterface/core/binaryrecordingextractor.py b/src/spikeinterface/core/binaryrecordingextractor.py
index 3733c1f0c3..f91d8165df 100644
--- a/src/spikeinterface/core/binaryrecordingextractor.py
+++ b/src/spikeinterface/core/binaryrecordingextractor.py
@@ -193,14 +193,6 @@ def get_traces(
         end_frame: int | None = None,
         channel_indices: list | None = None,
     ) -> np.ndarray:
-        if start_frame is None:
-            start_frame = 0
-
-        if end_frame is None:
-            end_frame = self.get_num_samples()
-
-        if end_frame > self.get_num_samples():
-            raise ValueError(f"end_frame {end_frame} is larger than the number of samples {self.get_num_samples()}")
 
         # Calculate byte offsets for start and end frames
         start_byte = self.file_offset + start_frame * self.bytes_per_sample