From 299077e878619ea9e7af6d83ae1cf1a5278e354f Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Wed, 28 Jun 2023 22:13:08 +0200 Subject: [PATCH 01/10] failing test --- src/spikeinterface/core/core_tools.py | 25 ++++ .../tests/test_binaryrecordingextractor.py | 114 +++++++++++++++++- .../core/tests/test_generate.py | 29 +---- 3 files changed, 139 insertions(+), 29 deletions(-) diff --git a/src/spikeinterface/core/core_tools.py b/src/spikeinterface/core/core_tools.py index 316d8f79a2..3a02b6f71c 100644 --- a/src/spikeinterface/core/core_tools.py +++ b/src/spikeinterface/core/core_tools.py @@ -922,3 +922,28 @@ def convert_bytes_to_str(byte_value: int) -> str: byte_value /= 1024 i += 1 return f"{byte_value:.2f} {suffixes[i]}" + + +def measure_memory_allocation(measure_in_process: bool = True) -> float: + """ + A local utility to measure memory allocation at a specific point in time. + Can measure either the process resident memory or system wide memory available + + Uses psutil package. + + Parameters + ---------- + measure_in_process : bool, True by default + Mesure memory allocation in the current process only, if false then measures at the system + level. + """ + import psutil + + if measure_in_process: + process = psutil.Process() + memory = process.memory_info().rss + else: + mem_info = psutil.virtual_memory() + memory = mem_info.total - mem_info.available + + return memory diff --git a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py index 1d2c6e4c21..16001325ae 100644 --- a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py +++ b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py @@ -4,6 +4,8 @@ from spikeinterface.core import BinaryRecordingExtractor from spikeinterface.core.numpyextractors import NumpyRecording +from spikeinterface.core.core_tools import measure_memory_allocation +from spikeinterface.core.generate import GeneratorRecording if hasattr(pytest, "global_test_folder"): cache_folder = pytest.global_test_folder / "core" @@ -35,9 +37,10 @@ def test_BinaryRecordingExtractor(): def test_round_trip(tmp_path): num_channels = 10 num_samples = 50 - traces_list = [np.ones(shape=(num_samples, num_channels), dtype="int32")] + + traces = np.arange(num_channels * num_samples, dtype="int16").reshape(num_samples, num_channels) sampling_frequency = 30_000.0 - recording = NumpyRecording(traces_list=traces_list, sampling_frequency=sampling_frequency) + recording = NumpyRecording(traces_list=[traces], sampling_frequency=sampling_frequency) file_path = tmp_path / "test_BinaryRecordingExtractor.raw" dtype = recording.get_dtype() @@ -59,5 +62,112 @@ def test_round_trip(tmp_path): np.allclose(smaller_traces, binary_smaller_traces) +@pytest.fixture(scope="module") +def folder_with_binary_files(tmpdir_factory): + tmp_path = Path(tmpdir_factory.mktemp("spike_interface_test")) + folder = tmp_path / "test_binary_recording" + num_channels = 32 + sampling_frequency = 30_000.0 + dtype = "float32" + recording = GeneratorRecording( + durations=[3600], + sampling_frequency=sampling_frequency, + num_channels=num_channels, + dtype=dtype, + ) + dtype = recording.get_dtype() + recording.save(folder=folder, overwrite=True) + + return folder + + +def test_memory_effcienty(folder_with_binary_files): + folder = folder_with_binary_files + num_channels = 32 + sampling_frequency = 30_000.0 + dtype = "float32" + + file_paths = [folder / "traces_cached_seg0.raw"] + recorder_binary = BinaryRecordingExtractor( + num_chan=num_channels, + file_paths=file_paths, + sampling_frequency=sampling_frequency, + dtype=dtype, + ) + + memory_before_traces_bytes = measure_memory_allocation() + traces = recorder_binary.get_traces(start_frame=1000, end_frame=10_000) + memory_after_traces_bytes = measure_memory_allocation() + traces_size_bytes = traces.nbytes + + expected_memory_usage = memory_before_traces_bytes + traces_size_bytes + expected_memory_usage_GiB = expected_memory_usage / 1024**3 + memory_after_traces_bytes_GiB = memory_after_traces_bytes / 1024**3 + assert expected_memory_usage_GiB == pytest.approx(memory_after_traces_bytes_GiB, rel=0.1) + + +def measure_peak_memory_usage(): + """ + Measure the peak memory usage in bytes for the current process. + + The `resource.getrusage(resource.RUSAGE_SELF).ru_maxrss` command is used to get the peak memory usage. + The `ru_maxrss` attribute represents the maximum resident set size used (in kilobytes on Linux and bytes on MacOS), + which is the maximum memory used by the process since it was started. + + This function only works on Unix systems (including Linux and MacOS). + + Returns + ------- + int + Peak memory usage in bytes. + + Raises + ------ + NotImplementedError + If the function is called on a Windows system. + """ + + import sys + import resource + + if sys.platform == "win32": + raise NotImplementedError("Function cannot be used on Windows") + + mem_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + + # If ru_maxrss returns memory in kilobytes (like on Linux), convert to bytes + if hasattr(resource, "RLIMIT_AS"): + mem_usage = mem_usage * 1024 + + return mem_usage + + +def test_peak_memory_usage(folder_with_binary_files): + folder = folder_with_binary_files + num_channels = 32 + sampling_frequency = 30_000.0 + dtype = "float32" + + file_paths = [folder / "traces_cached_seg0.raw"] + recorder_binary = BinaryRecordingExtractor( + num_chan=num_channels, + file_paths=file_paths, + sampling_frequency=sampling_frequency, + dtype=dtype, + ) + + memory_before_traces_bytes = measure_memory_allocation() + traces = recorder_binary.get_traces(start_frame=1000, end_frame=10_000) + traces_size_bytes = traces.nbytes + + expected_memory_usage = memory_before_traces_bytes + traces_size_bytes + peak_memory_GiB = measure_peak_memory_usage() / 1024**3 + expected_memory_usage_GiB = expected_memory_usage / 1024**3 + assert expected_memory_usage_GiB == pytest.approx(peak_memory_GiB, rel=0.1) + + print("Expected memory usage: {:.2f} GiB".format(expected_memory_usage_GiB)) + print(f"Peak memory usage: {peak_memory_GiB:.2f} GiB") + + if __name__ == "__main__": test_BinaryRecordingExtractor() diff --git a/src/spikeinterface/core/tests/test_generate.py b/src/spikeinterface/core/tests/test_generate.py index 873105e115..45ed791ab3 100644 --- a/src/spikeinterface/core/tests/test_generate.py +++ b/src/spikeinterface/core/tests/test_generate.py @@ -1,44 +1,19 @@ import pytest -import psutil import numpy as np from spikeinterface.core.generate import GeneratorRecording, generate_lazy_recording -from spikeinterface.core.core_tools import convert_bytes_to_str +from spikeinterface.core.core_tools import convert_bytes_to_str, measure_memory_allocation mode_list = GeneratorRecording.available_modes -def measure_memory_allocation(measure_in_process: bool = True) -> float: - """ - A local utility to measure memory allocation at a specific point in time. - Can measure either the process resident memory or system wide memory available - - Uses psutil package. - - Parameters - ---------- - measure_in_process : bool, True by default - Mesure memory allocation in the current process only, if false then measures at the system - level. - """ - - if measure_in_process: - process = psutil.Process() - memory = process.memory_info().rss - else: - mem_info = psutil.virtual_memory() - memory = mem_info.total - mem_info.available - - return memory - - @pytest.mark.parametrize("mode", mode_list) def test_lazy_random_recording(mode): # Test that get_traces does not consume more memory than allocated. bytes_to_MiB_factor = 1024**2 - relative_tolerance = 0.05 # relative tolerance of 5 per cent + relative_tolerance = 0.01 # relative tolerance of 5 per cent sampling_frequency = 30000 # Hz durations = [2.0] From 628b0a884a25233bed4baf56418dd4d3a3d0adc1 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 29 Jun 2023 10:54:11 +0200 Subject: [PATCH 02/10] passing tests --- .../core/binaryrecordingextractor.py | 68 +++++++++++++------ .../tests/test_binaryrecordingextractor.py | 68 +++++++++++++++---- 2 files changed, 101 insertions(+), 35 deletions(-) diff --git a/src/spikeinterface/core/binaryrecordingextractor.py b/src/spikeinterface/core/binaryrecordingextractor.py index c04a1c6ec7..deadcc2624 100644 --- a/src/spikeinterface/core/binaryrecordingextractor.py +++ b/src/spikeinterface/core/binaryrecordingextractor.py @@ -155,25 +155,17 @@ def get_binary_description(self): class BinaryRecordingSegment(BaseRecordingSegment): - def __init__(self, datfile, sampling_frequency, t_start, num_chan, dtype, time_axis, file_offset): + def __init__(self, file_path, sampling_frequency, t_start, num_chan, dtype, time_axis, file_offset): BaseRecordingSegment.__init__(self, sampling_frequency=sampling_frequency, t_start=t_start) self.num_chan = num_chan self.dtype = np.dtype(dtype) self.file_offset = file_offset self.time_axis = time_axis - self.datfile = datfile - self.file = open(self.datfile, "r") - self.num_samples = (Path(datfile).stat().st_size - file_offset) // (num_chan * np.dtype(dtype).itemsize) - if self.time_axis == 0: - self.shape = (self.num_samples, self.num_chan) - else: - self.shape = (self.num_chan, self.num_samples) - - byte_offset = self.file_offset - dtype_size_bytes = self.dtype.itemsize - data_size_bytes = dtype_size_bytes * self.num_samples * self.num_chan - self.memmap_offset, self.array_offset = divmod(byte_offset, mmap.ALLOCATIONGRANULARITY) - self.memmap_length = data_size_bytes + self.array_offset + self.file_path = file_path + self.file = open(self.file_path, "rb") + self.elements_per_sample = self.num_chan * self.dtype.itemsize + self.data_size_in_bytes = Path(file_path).stat().st_size - file_offset + self.num_samples = self.data_size_in_bytes // self.elements_per_sample def get_num_samples(self) -> int: """Returns the number of samples in this signal block @@ -189,23 +181,55 @@ def get_traces( end_frame: Union[int, None] = None, channel_indices: Union[List, None] = None, ) -> np.ndarray: - length = self.memmap_length - memmap_offset = self.memmap_offset + if start_frame is None: + start_frame = 0 + + if end_frame is None: + end_frame = self.get_num_samples() + + if end_frame > self.get_num_samples(): + raise ValueError(f"end_frame {end_frame} is larger than the number of samples {self.get_num_samples()}") + + dtype_size_bytes = np.dtype(self.dtype).itemsize + elements_per_sample = self.num_chan * dtype_size_bytes + + # Calculate byte offsets for start and end frames + start_byte = self.file_offset + start_frame * elements_per_sample + end_byte = self.file_offset + end_frame * elements_per_sample + + # Calculate the length of the data chunk to load into memory + length = end_byte - start_byte + + # The mmap offset must be a multiple of mmap.ALLOCATIONGRANULARITY + memmap_offset, start_offset = divmod(start_byte, mmap.ALLOCATIONGRANULARITY) + memmap_offset *= mmap.ALLOCATIONGRANULARITY + + # Adjust the length so it includes the extra data from rounding down the memmap offset to a multiple of ALLOCATIONGRANULARITY + length += start_offset + + # Create the mmap object memmap_obj = mmap.mmap(self.file.fileno(), length=length, access=mmap.ACCESS_READ, offset=memmap_offset) - array = np.ndarray.__new__( - np.ndarray, - shape=self.shape, + # Create a numpy array using the mmap object as the buffer + # Note that the shape must be recalculated based on the new data chunk + if self.time_axis == 0: + shape = ((end_frame - start_frame), self.num_chan) + else: + shape = (self.num_chan, (end_frame - start_frame)) + + array = np.ndarray( + shape=shape, dtype=self.dtype, buffer=memmap_obj, - order="C", - offset=self.array_offset, + offset=start_offset, ) if self.time_axis == 1: array = array.T - traces = array[start_frame:end_frame] + # Now the entire array should correspond to the data between start_frame and end_frame, so we can use it directly + traces = array + if channel_indices is not None: traces = traces[:, channel_indices] diff --git a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py index 16001325ae..ed0b2922e7 100644 --- a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py +++ b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py @@ -52,10 +52,12 @@ def test_round_trip(tmp_path): file_paths=file_path, sampling_frequency=sampling_frequency, num_chan=num_chan, dtype=dtype ) + # Test for full traces assert np.allclose(recording.get_traces(), binary_recorder.get_traces()) - start_frame = 200 - end_frame = 500 + # Ttest for a sub-set of the traces + start_frame = 20 + end_frame = 40 smaller_traces = recording.get_traces(start_frame=start_frame, end_frame=end_frame) binary_smaller_traces = binary_recorder.get_traces(start_frame=start_frame, end_frame=end_frame) @@ -70,7 +72,7 @@ def folder_with_binary_files(tmpdir_factory): sampling_frequency = 30_000.0 dtype = "float32" recording = GeneratorRecording( - durations=[3600], + durations=[1.0], sampling_frequency=sampling_frequency, num_channels=num_channels, dtype=dtype, @@ -81,14 +83,53 @@ def folder_with_binary_files(tmpdir_factory): return folder +def test_sequential_reading_of_small_traces(folder_with_binary_files): + folder = folder_with_binary_files + num_channels = 32 + sampling_frequency = 30_000.0 + dtype = "float32" + + file_paths = [folder / "traces_cached_seg0.raw"] + recording = BinaryRecordingExtractor( + num_chan=num_channels, + file_paths=file_paths, + sampling_frequency=sampling_frequency, + dtype=dtype, + ) + + full_traces = recording.get_traces() + + # Test for a sub-set of the traces + start_frame = 10 + end_frame = 15 + small_traces = recording.get_traces(start_frame=start_frame, end_frame=end_frame) + expected_traces = full_traces[start_frame:end_frame, :] + assert np.allclose(small_traces, expected_traces) + + # Test for a sub-set of the traces + start_frame = 1000 + end_frame = 1100 + small_traces = recording.get_traces(start_frame=start_frame, end_frame=end_frame) + expected_traces = full_traces[start_frame:end_frame, :] + assert np.allclose(small_traces, expected_traces) + + # Test for a sub-set of the traces + start_frame = 10_000 + end_frame = 11_000 + small_traces = recording.get_traces(start_frame=start_frame, end_frame=end_frame) + expected_traces = full_traces[start_frame:end_frame, :] + assert np.allclose(small_traces, expected_traces) + + def test_memory_effcienty(folder_with_binary_files): + "This test that memory is freed afte reading the traces" folder = folder_with_binary_files num_channels = 32 sampling_frequency = 30_000.0 dtype = "float32" file_paths = [folder / "traces_cached_seg0.raw"] - recorder_binary = BinaryRecordingExtractor( + recording = BinaryRecordingExtractor( num_chan=num_channels, file_paths=file_paths, sampling_frequency=sampling_frequency, @@ -96,14 +137,14 @@ def test_memory_effcienty(folder_with_binary_files): ) memory_before_traces_bytes = measure_memory_allocation() - traces = recorder_binary.get_traces(start_frame=1000, end_frame=10_000) + traces = recording.get_traces(start_frame=1000, end_frame=10_000) memory_after_traces_bytes = measure_memory_allocation() traces_size_bytes = traces.nbytes expected_memory_usage = memory_before_traces_bytes + traces_size_bytes expected_memory_usage_GiB = expected_memory_usage / 1024**3 memory_after_traces_bytes_GiB = memory_after_traces_bytes / 1024**3 - assert expected_memory_usage_GiB == pytest.approx(memory_after_traces_bytes_GiB, rel=0.1) + assert memory_after_traces_bytes_GiB == pytest.approx(expected_memory_usage_GiB, rel=0.1) def measure_peak_memory_usage(): @@ -143,13 +184,14 @@ def measure_peak_memory_usage(): def test_peak_memory_usage(folder_with_binary_files): + "This tests that there are no spikes in memory usage when reading traces." folder = folder_with_binary_files num_channels = 32 sampling_frequency = 30_000.0 dtype = "float32" file_paths = [folder / "traces_cached_seg0.raw"] - recorder_binary = BinaryRecordingExtractor( + recording = BinaryRecordingExtractor( num_chan=num_channels, file_paths=file_paths, sampling_frequency=sampling_frequency, @@ -157,16 +199,16 @@ def test_peak_memory_usage(folder_with_binary_files): ) memory_before_traces_bytes = measure_memory_allocation() - traces = recorder_binary.get_traces(start_frame=1000, end_frame=10_000) + traces = recording.get_traces(start_frame=1000, end_frame=2000) traces_size_bytes = traces.nbytes expected_memory_usage = memory_before_traces_bytes + traces_size_bytes - peak_memory_GiB = measure_peak_memory_usage() / 1024**3 - expected_memory_usage_GiB = expected_memory_usage / 1024**3 - assert expected_memory_usage_GiB == pytest.approx(peak_memory_GiB, rel=0.1) + peak_memory_MiB = measure_peak_memory_usage() / 1024**2 + expected_memory_usage_MiB = expected_memory_usage / 1024**2 + assert expected_memory_usage_MiB == pytest.approx(peak_memory_MiB, rel=0.1) - print("Expected memory usage: {:.2f} GiB".format(expected_memory_usage_GiB)) - print(f"Peak memory usage: {peak_memory_GiB:.2f} GiB") + print("Expected memory usage: {:.2f} MiB".format(expected_memory_usage_MiB)) + print(f"Peak memory usage: {peak_memory_MiB:.2f} MiB") if __name__ == "__main__": From 59e82a13b590809abfe8803111e4e584e5f155e2 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Tue, 4 Jul 2023 20:49:52 +0200 Subject: [PATCH 03/10] merging --- .../core/binaryrecordingextractor.py | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/src/spikeinterface/core/binaryrecordingextractor.py b/src/spikeinterface/core/binaryrecordingextractor.py index 5b588a31a2..2cbc52f4b4 100644 --- a/src/spikeinterface/core/binaryrecordingextractor.py +++ b/src/spikeinterface/core/binaryrecordingextractor.py @@ -175,9 +175,9 @@ def __init__(self, file_path, sampling_frequency, t_start, num_channels, dtype, self.time_axis = time_axis self.file_path = file_path self.file = open(self.file_path, "rb") - self.elements_per_sample = self.num_chan * self.dtype.itemsize + self.bytes_per_sample = self.num_channels * self.dtype.itemsize self.data_size_in_bytes = Path(file_path).stat().st_size - file_offset - self.num_samples = self.data_size_in_bytes // self.elements_per_sample + self.num_samples = self.data_size_in_bytes // self.bytes_per_sample def get_num_samples(self) -> int: """Returns the number of samples in this signal block @@ -202,12 +202,9 @@ def get_traces( if end_frame > self.get_num_samples(): raise ValueError(f"end_frame {end_frame} is larger than the number of samples {self.get_num_samples()}") - dtype_size_bytes = np.dtype(self.dtype).itemsize - elements_per_sample = self.num_chan * dtype_size_bytes - # Calculate byte offsets for start and end frames - start_byte = self.file_offset + start_frame * elements_per_sample - end_byte = self.file_offset + end_frame * elements_per_sample + start_byte = self.file_offset + start_frame * self.bytes_per_sample + end_byte = self.file_offset + end_frame * self.bytes_per_sample # Calculate the length of the data chunk to load into memory length = end_byte - start_byte @@ -216,7 +213,8 @@ def get_traces( memmap_offset, start_offset = divmod(start_byte, mmap.ALLOCATIONGRANULARITY) memmap_offset *= mmap.ALLOCATIONGRANULARITY - # Adjust the length so it includes the extra data from rounding down the memmap offset to a multiple of ALLOCATIONGRANULARITY + # Adjust the length so it includes the extra data from rounding down + # the memmap offset to a multiple of ALLOCATIONGRANULARITY length += start_offset # Create the mmap object @@ -225,11 +223,12 @@ def get_traces( # Create a numpy array using the mmap object as the buffer # Note that the shape must be recalculated based on the new data chunk if self.time_axis == 0: - shape = ((end_frame - start_frame), self.num_chan) + shape = ((end_frame - start_frame), self.num_channels) else: - shape = (self.num_chan, (end_frame - start_frame)) + shape = (self.num_channels, (end_frame - start_frame)) - array = np.ndarray( + # Now the entire array should correspond to the data between start_frame and end_frame, so we can use it directly + traces = np.ndarray( shape=shape, dtype=self.dtype, buffer=memmap_obj, @@ -237,10 +236,7 @@ def get_traces( ) if self.time_axis == 1: - array = array.T - - # Now the entire array should correspond to the data between start_frame and end_frame, so we can use it directly - traces = array + traces = traces.T if channel_indices is not None: traces = traces[:, channel_indices] From 4d4c55e140014ea79947910883f3ef360fe0f723 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Tue, 4 Jul 2023 21:03:53 +0200 Subject: [PATCH 04/10] refactor tests --- .../tests/test_binaryrecordingextractor.py | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py index 16cd01141e..a73cc8d1f4 100644 --- a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py +++ b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py @@ -1,6 +1,8 @@ import pytest import numpy as np from pathlib import Path +import sys +import resource from spikeinterface.core import BinaryRecordingExtractor from spikeinterface.core.numpyextractors import NumpyRecording @@ -91,6 +93,7 @@ def folder_with_binary_files(tmpdir_factory): def test_sequential_reading_of_small_traces(folder_with_binary_files): + # Test that memmap is readed correctly when pointing to specific frames folder = folder_with_binary_files num_channels = 32 sampling_frequency = 30_000.0 @@ -151,6 +154,15 @@ def test_memory_effcienty(folder_with_binary_files): expected_memory_usage = memory_before_traces_bytes + traces_size_bytes expected_memory_usage_GiB = expected_memory_usage / 1024**3 memory_after_traces_bytes_GiB = memory_after_traces_bytes / 1024**3 + + ratio = memory_after_traces_bytes_GiB / expected_memory_usage_GiB + + assertion_msg = ( + f"Peak memory {memory_after_traces_bytes_GiB} GiB usage is {ratio:.2f} times" + f"the expected memory usage of {expected_memory_usage_GiB} GiB." + ) + assert ratio <= 1.05, assertion_msg + assert memory_after_traces_bytes_GiB == pytest.approx(expected_memory_usage_GiB, rel=0.1) @@ -175,9 +187,6 @@ def measure_peak_memory_usage(): If the function is called on a Windows system. """ - import sys - import resource - if sys.platform == "win32": raise NotImplementedError("Function cannot be used on Windows") @@ -190,6 +199,7 @@ def measure_peak_memory_usage(): return mem_usage +@pytest.mark.skipif(sys.platform == "win32", reason="Don't know how to calculate peak memory on widnows") def test_peak_memory_usage(folder_with_binary_files): "This tests that there are no spikes in memory usage when reading traces." folder = folder_with_binary_files @@ -212,10 +222,12 @@ def test_peak_memory_usage(folder_with_binary_files): expected_memory_usage = memory_before_traces_bytes + traces_size_bytes peak_memory_MiB = measure_peak_memory_usage() / 1024**2 expected_memory_usage_MiB = expected_memory_usage / 1024**2 - assert expected_memory_usage_MiB == pytest.approx(peak_memory_MiB, rel=0.1) - - print("Expected memory usage: {:.2f} MiB".format(expected_memory_usage_MiB)) - print(f"Peak memory usage: {peak_memory_MiB:.2f} MiB") + ratio = peak_memory_MiB / expected_memory_usage_MiB + assertion_msg = ( + f"Peak memory {peak_memory_MiB} MiB usage is {ratio:.2f} times" + f"the expected memory usage of {expected_memory_usage_MiB} MiB." + ) + assert ratio <= 1.05, assertion_msg if __name__ == "__main__": From 2ea7f1bde6ec38c43f1a747ca43f4765a49cb97a Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Tue, 4 Jul 2023 21:06:44 +0200 Subject: [PATCH 05/10] window import --- .../core/tests/test_binaryrecordingextractor.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py index a73cc8d1f4..a62974c833 100644 --- a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py +++ b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py @@ -2,7 +2,6 @@ import numpy as np from pathlib import Path import sys -import resource from spikeinterface.core import BinaryRecordingExtractor from spikeinterface.core.numpyextractors import NumpyRecording @@ -188,7 +187,9 @@ def measure_peak_memory_usage(): """ if sys.platform == "win32": - raise NotImplementedError("Function cannot be used on Windows") + raise NotImplementedError("Resource module not available on Windows") + + import resource mem_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss @@ -199,7 +200,7 @@ def measure_peak_memory_usage(): return mem_usage -@pytest.mark.skipif(sys.platform == "win32", reason="Don't know how to calculate peak memory on widnows") +@pytest.mark.skipif(sys.platform == "win32", reason="resource module not available on Windows") def test_peak_memory_usage(folder_with_binary_files): "This tests that there are no spikes in memory usage when reading traces." folder = folder_with_binary_files From 80a41525b1bb583df8a05a34c50baaab1529f63d Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Mon, 11 Sep 2023 11:56:12 +0200 Subject: [PATCH 06/10] update generator recording --- .../core/tests/test_binaryrecordingextractor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py index a62974c833..6f63dad576 100644 --- a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py +++ b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py @@ -6,7 +6,7 @@ from spikeinterface.core import BinaryRecordingExtractor from spikeinterface.core.numpyextractors import NumpyRecording from spikeinterface.core.core_tools import measure_memory_allocation -from spikeinterface.core.generate import GeneratorRecording +from spikeinterface.core.generate import NoiseGeneratorRecording if hasattr(pytest, "global_test_folder"): cache_folder = pytest.global_test_folder / "core" @@ -79,7 +79,7 @@ def folder_with_binary_files(tmpdir_factory): num_channels = 32 sampling_frequency = 30_000.0 dtype = "float32" - recording = GeneratorRecording( + recording = NoiseGeneratorRecording( durations=[1.0], sampling_frequency=sampling_frequency, num_channels=num_channels, From 25b125ece7c465f74e0cd8e100278604d8b9d124 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Mon, 11 Sep 2023 11:58:46 +0200 Subject: [PATCH 07/10] add missing import --- src/spikeinterface/core/tests/test_generate.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/spikeinterface/core/tests/test_generate.py b/src/spikeinterface/core/tests/test_generate.py index abcbd9c4e2..2b4e65980f 100644 --- a/src/spikeinterface/core/tests/test_generate.py +++ b/src/spikeinterface/core/tests/test_generate.py @@ -18,6 +18,7 @@ from spikeinterface.core.core_tools import convert_bytes_to_str, measure_memory_allocation +from spikeinterface.core.testing import check_recordings_equal strategy_list = ["tile_pregenerated", "on_the_fly"] From 757e939b8c3e45d12c952d073c9e22215453b9cc Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 14 Dec 2023 15:33:25 +0100 Subject: [PATCH 08/10] maybe tests --- .../tests/test_binaryrecordingextractor.py | 198 +++++++++--------- 1 file changed, 99 insertions(+), 99 deletions(-) diff --git a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py index 6f63dad576..03b1927b33 100644 --- a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py +++ b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py @@ -130,105 +130,105 @@ def test_sequential_reading_of_small_traces(folder_with_binary_files): assert np.allclose(small_traces, expected_traces) -def test_memory_effcienty(folder_with_binary_files): - "This test that memory is freed afte reading the traces" - folder = folder_with_binary_files - num_channels = 32 - sampling_frequency = 30_000.0 - dtype = "float32" - - file_paths = [folder / "traces_cached_seg0.raw"] - recording = BinaryRecordingExtractor( - num_chan=num_channels, - file_paths=file_paths, - sampling_frequency=sampling_frequency, - dtype=dtype, - ) - - memory_before_traces_bytes = measure_memory_allocation() - traces = recording.get_traces(start_frame=1000, end_frame=10_000) - memory_after_traces_bytes = measure_memory_allocation() - traces_size_bytes = traces.nbytes - - expected_memory_usage = memory_before_traces_bytes + traces_size_bytes - expected_memory_usage_GiB = expected_memory_usage / 1024**3 - memory_after_traces_bytes_GiB = memory_after_traces_bytes / 1024**3 - - ratio = memory_after_traces_bytes_GiB / expected_memory_usage_GiB - - assertion_msg = ( - f"Peak memory {memory_after_traces_bytes_GiB} GiB usage is {ratio:.2f} times" - f"the expected memory usage of {expected_memory_usage_GiB} GiB." - ) - assert ratio <= 1.05, assertion_msg - - assert memory_after_traces_bytes_GiB == pytest.approx(expected_memory_usage_GiB, rel=0.1) - - -def measure_peak_memory_usage(): - """ - Measure the peak memory usage in bytes for the current process. - - The `resource.getrusage(resource.RUSAGE_SELF).ru_maxrss` command is used to get the peak memory usage. - The `ru_maxrss` attribute represents the maximum resident set size used (in kilobytes on Linux and bytes on MacOS), - which is the maximum memory used by the process since it was started. - - This function only works on Unix systems (including Linux and MacOS). - - Returns - ------- - int - Peak memory usage in bytes. - - Raises - ------ - NotImplementedError - If the function is called on a Windows system. - """ - - if sys.platform == "win32": - raise NotImplementedError("Resource module not available on Windows") - - import resource - - mem_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - - # If ru_maxrss returns memory in kilobytes (like on Linux), convert to bytes - if hasattr(resource, "RLIMIT_AS"): - mem_usage = mem_usage * 1024 - - return mem_usage - - -@pytest.mark.skipif(sys.platform == "win32", reason="resource module not available on Windows") -def test_peak_memory_usage(folder_with_binary_files): - "This tests that there are no spikes in memory usage when reading traces." - folder = folder_with_binary_files - num_channels = 32 - sampling_frequency = 30_000.0 - dtype = "float32" - - file_paths = [folder / "traces_cached_seg0.raw"] - recording = BinaryRecordingExtractor( - num_chan=num_channels, - file_paths=file_paths, - sampling_frequency=sampling_frequency, - dtype=dtype, - ) - - memory_before_traces_bytes = measure_memory_allocation() - traces = recording.get_traces(start_frame=1000, end_frame=2000) - traces_size_bytes = traces.nbytes - - expected_memory_usage = memory_before_traces_bytes + traces_size_bytes - peak_memory_MiB = measure_peak_memory_usage() / 1024**2 - expected_memory_usage_MiB = expected_memory_usage / 1024**2 - ratio = peak_memory_MiB / expected_memory_usage_MiB - assertion_msg = ( - f"Peak memory {peak_memory_MiB} MiB usage is {ratio:.2f} times" - f"the expected memory usage of {expected_memory_usage_MiB} MiB." - ) - assert ratio <= 1.05, assertion_msg +# def test_memory_effcienty(folder_with_binary_files): +# "This test that memory is freed afte reading the traces" +# folder = folder_with_binary_files +# num_channels = 32 +# sampling_frequency = 30_000.0 +# dtype = "float32" + +# file_paths = [folder / "traces_cached_seg0.raw"] +# recording = BinaryRecordingExtractor( +# num_chan=num_channels, +# file_paths=file_paths, +# sampling_frequency=sampling_frequency, +# dtype=dtype, +# ) + +# memory_before_traces_bytes = measure_memory_allocation() +# traces = recording.get_traces(start_frame=1000, end_frame=10_000) +# memory_after_traces_bytes = measure_memory_allocation() +# traces_size_bytes = traces.nbytes + +# expected_memory_usage = memory_before_traces_bytes + traces_size_bytes +# expected_memory_usage_GiB = expected_memory_usage / 1024**3 +# memory_after_traces_bytes_GiB = memory_after_traces_bytes / 1024**3 + +# ratio = memory_after_traces_bytes_GiB / expected_memory_usage_GiB + +# assertion_msg = ( +# f"Peak memory {memory_after_traces_bytes_GiB} GiB usage is {ratio:.2f} times" +# f"the expected memory usage of {expected_memory_usage_GiB} GiB." +# ) +# assert ratio <= 1.05, assertion_msg + +# assert memory_after_traces_bytes_GiB == pytest.approx(expected_memory_usage_GiB, rel=0.1) + + +# def measure_peak_memory_usage(): +# """ +# Measure the peak memory usage in bytes for the current process. + +# The `resource.getrusage(resource.RUSAGE_SELF).ru_maxrss` command is used to get the peak memory usage. +# The `ru_maxrss` attribute represents the maximum resident set size used (in kilobytes on Linux and bytes on MacOS), +# which is the maximum memory used by the process since it was started. + +# This function only works on Unix systems (including Linux and MacOS). + +# Returns +# ------- +# int +# Peak memory usage in bytes. + +# Raises +# ------ +# NotImplementedError +# If the function is called on a Windows system. +# """ + +# if sys.platform == "win32": +# raise NotImplementedError("Resource module not available on Windows") + +# import resource + +# mem_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + +# # If ru_maxrss returns memory in kilobytes (like on Linux), convert to bytes +# if hasattr(resource, "RLIMIT_AS"): +# mem_usage = mem_usage * 1024 + +# return mem_usage + + +# @pytest.mark.skipif(sys.platform == "win32", reason="resource module not available on Windows") +# def test_peak_memory_usage(folder_with_binary_files): +# "This tests that there are no spikes in memory usage when reading traces." +# folder = folder_with_binary_files +# num_channels = 32 +# sampling_frequency = 30_000.0 +# dtype = "float32" + +# file_paths = [folder / "traces_cached_seg0.raw"] +# recording = BinaryRecordingExtractor( +# num_chan=num_channels, +# file_paths=file_paths, +# sampling_frequency=sampling_frequency, +# dtype=dtype, +# ) + +# memory_before_traces_bytes = measure_memory_allocation() +# traces = recording.get_traces(start_frame=1000, end_frame=2000) +# traces_size_bytes = traces.nbytes + +# expected_memory_usage = memory_before_traces_bytes + traces_size_bytes +# peak_memory_MiB = measure_peak_memory_usage() / 1024**2 +# expected_memory_usage_MiB = expected_memory_usage / 1024**2 +# ratio = peak_memory_MiB / expected_memory_usage_MiB +# assertion_msg = ( +# f"Peak memory {peak_memory_MiB} MiB usage is {ratio:.2f} times" +# f"the expected memory usage of {expected_memory_usage_MiB} MiB." +# ) +# assert ratio <= 1.05, assertion_msg if __name__ == "__main__": From 091042052165e5292b70c934b6da190bcb413b21 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 6 Jun 2024 12:05:14 -0600 Subject: [PATCH 09/10] remove unused tests --- .../tests/test_binaryrecordingextractor.py | 101 ------------------ 1 file changed, 101 deletions(-) diff --git a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py index 30435df820..b0fab7a579 100644 --- a/src/spikeinterface/core/tests/test_binaryrecordingextractor.py +++ b/src/spikeinterface/core/tests/test_binaryrecordingextractor.py @@ -125,106 +125,5 @@ def test_sequential_reading_of_small_traces(folder_with_binary_files): assert np.allclose(small_traces, expected_traces) -# def test_memory_effcienty(folder_with_binary_files): -# "This test that memory is freed afte reading the traces" -# folder = folder_with_binary_files -# num_channels = 32 -# sampling_frequency = 30_000.0 -# dtype = "float32" - -# file_paths = [folder / "traces_cached_seg0.raw"] -# recording = BinaryRecordingExtractor( -# num_chan=num_channels, -# file_paths=file_paths, -# sampling_frequency=sampling_frequency, -# dtype=dtype, -# ) - -# memory_before_traces_bytes = measure_memory_allocation() -# traces = recording.get_traces(start_frame=1000, end_frame=10_000) -# memory_after_traces_bytes = measure_memory_allocation() -# traces_size_bytes = traces.nbytes - -# expected_memory_usage = memory_before_traces_bytes + traces_size_bytes -# expected_memory_usage_GiB = expected_memory_usage / 1024**3 -# memory_after_traces_bytes_GiB = memory_after_traces_bytes / 1024**3 - -# ratio = memory_after_traces_bytes_GiB / expected_memory_usage_GiB - -# assertion_msg = ( -# f"Peak memory {memory_after_traces_bytes_GiB} GiB usage is {ratio:.2f} times" -# f"the expected memory usage of {expected_memory_usage_GiB} GiB." -# ) -# assert ratio <= 1.05, assertion_msg - -# assert memory_after_traces_bytes_GiB == pytest.approx(expected_memory_usage_GiB, rel=0.1) - - -# def measure_peak_memory_usage(): -# """ -# Measure the peak memory usage in bytes for the current process. - -# The `resource.getrusage(resource.RUSAGE_SELF).ru_maxrss` command is used to get the peak memory usage. -# The `ru_maxrss` attribute represents the maximum resident set size used (in kilobytes on Linux and bytes on MacOS), -# which is the maximum memory used by the process since it was started. - -# This function only works on Unix systems (including Linux and MacOS). - -# Returns -# ------- -# int -# Peak memory usage in bytes. - -# Raises -# ------ -# NotImplementedError -# If the function is called on a Windows system. -# """ - -# if sys.platform == "win32": -# raise NotImplementedError("Resource module not available on Windows") - -# import resource - -# mem_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - -# # If ru_maxrss returns memory in kilobytes (like on Linux), convert to bytes -# if hasattr(resource, "RLIMIT_AS"): -# mem_usage = mem_usage * 1024 - -# return mem_usage - - -# @pytest.mark.skipif(sys.platform == "win32", reason="resource module not available on Windows") -# def test_peak_memory_usage(folder_with_binary_files): -# "This tests that there are no spikes in memory usage when reading traces." -# folder = folder_with_binary_files -# num_channels = 32 -# sampling_frequency = 30_000.0 -# dtype = "float32" - -# file_paths = [folder / "traces_cached_seg0.raw"] -# recording = BinaryRecordingExtractor( -# num_chan=num_channels, -# file_paths=file_paths, -# sampling_frequency=sampling_frequency, -# dtype=dtype, -# ) - -# memory_before_traces_bytes = measure_memory_allocation() -# traces = recording.get_traces(start_frame=1000, end_frame=2000) -# traces_size_bytes = traces.nbytes - -# expected_memory_usage = memory_before_traces_bytes + traces_size_bytes -# peak_memory_MiB = measure_peak_memory_usage() / 1024**2 -# expected_memory_usage_MiB = expected_memory_usage / 1024**2 -# ratio = peak_memory_MiB / expected_memory_usage_MiB -# assertion_msg = ( -# f"Peak memory {peak_memory_MiB} MiB usage is {ratio:.2f} times" -# f"the expected memory usage of {expected_memory_usage_MiB} MiB." -# ) -# assert ratio <= 1.05, assertion_msg - - if __name__ == "__main__": test_BinaryRecordingExtractor() From a0d8097cc69d8efca1443ce5e8edb3152285e640 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Fri, 5 Jul 2024 08:27:51 -0600 Subject: [PATCH 10/10] Update src/spikeinterface/core/binaryrecordingextractor.py Co-authored-by: Garcia Samuel --- src/spikeinterface/core/binaryrecordingextractor.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/spikeinterface/core/binaryrecordingextractor.py b/src/spikeinterface/core/binaryrecordingextractor.py index 3733c1f0c3..f91d8165df 100644 --- a/src/spikeinterface/core/binaryrecordingextractor.py +++ b/src/spikeinterface/core/binaryrecordingextractor.py @@ -193,14 +193,6 @@ def get_traces( end_frame: int | None = None, channel_indices: list | None = None, ) -> np.ndarray: - if start_frame is None: - start_frame = 0 - - if end_frame is None: - end_frame = self.get_num_samples() - - if end_frame > self.get_num_samples(): - raise ValueError(f"end_frame {end_frame} is larger than the number of samples {self.get_num_samples()}") # Calculate byte offsets for start and end frames start_byte = self.file_offset + start_frame * self.bytes_per_sample