Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backend Config ~ Reference ~ DO NOT MERGE] move H5DataIO from add_* functions and methods to dataset configuration #475

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# (Upcoming)
# 0.4.0 (Upcoming)

### Features

* Create separate `.add_to_nwbfile` method for all DataInterfaces. [PR #455](https://github.com/catalystneuro/neuroconv/pull/455)

* Remove HDF5-specific dataset configuration from NWB assembly methods (e.g. `.add_*`), and move to
`.configure_datasets()`. [PR #475](https://github.com/catalystneuro/neuroconv/pull/475)
* Added stream control with the `stream_name` argument to the `NeuralynxRecordingExtractor`. [PR #369](https://github.com/catalystneuro/neuroconv/pull/369)


Expand Down
66 changes: 63 additions & 3 deletions src/neuroconv/basedatainterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@
import warnings
from abc import ABC, abstractmethod
from pathlib import Path
from typing import List, Optional
from typing import Dict, List, Literal, Optional, Tuple

from pynwb import NWBFile
from pynwb import NWBContainer, NWBFile

from .tools.nwb_helpers import make_nwbfile_from_metadata, make_or_load_nwbfile
from .tools.nwb_helpers import (
backend_configs,
configure_datasets,
make_nwbfile_from_metadata,
make_or_load_nwbfile,
)
from .utils import get_schema_from_method_signature, load_dict_from_file
from .utils.dict import DeepDict

Expand Down Expand Up @@ -51,12 +56,60 @@ def create_nwbfile(self, metadata=None, **conversion_options):
def add_to_nwbfile(self, nwbfile: NWBFile, **conversion_options):
raise NotImplementedError()

@staticmethod
def configure_datasets(
nwbfile: NWBFile,
backend: Literal["hdf5", "zarr"] = "hdf5",
dataset_configurations: Optional[Dict[Tuple[NWBContainer, str], dict]] = None,
):
"""
Apply dataset configurations. Use the default configuration for the backend if not specified. Modifies the
NWBfile in place.

Parameters
----------
nwbfile : NWBFile
backend : {"hdf5", "zarr"}
dataset_configurations : dict, optional
Dict of the form `(nwb_container_object, field): data_io_kwargs`
To specify that no DataIO configuration should be applied, use `(nwb_container_object, field): None`

"""
configure_datasets(nwbfile=nwbfile, backend=backend, dataset_configurations=dataset_configurations)

def write(
self,
nwbfile: NWBFile,
backend: Literal["hdf5", "zarr"] = None,
dataset_configurations: Optional[Dict[Tuple[NWBContainer, str], dict]] = None,
):
"""
Write the in-memory NWBfile to disk.

Parameters
----------
nwbfile: NWBFile
backend : {"hdf5", "zarr"}, default: "hdf5"
Backend to use for loading and/or saving the NWB file.
dataset_configurations : dict, optional
Dict of the form `(nwb_container_object, field): data_io_kwargs`
To specify that no DataIO configuration should be applied, use `(nwb_container_object, field): None`

"""

if dataset_configurations is not None:
self.configure_datasets(nwbfile=nwbfile, backend=backend, dataset_configurations=dataset_configurations)
with backend_configs[backend].nwb_io as io:
io.write(nwbfile)

def run_conversion(
self,
nwbfile_path: Optional[str] = None,
nwbfile: Optional[NWBFile] = None,
metadata: Optional[dict] = None,
overwrite: bool = False,
backend: Literal["hdf5", "zarr"] = "hdf5",
dataset_configurations: Optional[dict] = None,
**conversion_options,
):
"""
Expand All @@ -74,6 +127,11 @@ def run_conversion(
overwrite : bool, default: False
Whether to overwrite the NWBFile if one exists at the nwbfile_path.
The default is False (append mode).
dataset_configurations : dict, optional
Dict of the form `(nwb_container_object, field): data_io_kwargs`
To specify that no DataIO configuration should be applied, use `(nwb_container_object, field): None`
backend : {"hdf5", "zarr"}, default: "hdf5"
Backend to use for loading and/or saving the NWB file.
"""
if nwbfile_path is None:
warnings.warn(
Expand All @@ -88,5 +146,7 @@ def run_conversion(
metadata=metadata,
overwrite=overwrite,
verbose=getattr(self, "verbose", False),
backend=backend,
) as nwbfile_out:
self.add_to_nwbfile(nwbfile_out, metadata=metadata, **conversion_options)
self.configure_datasets(nwbfile_out, backend=backend, dataset_configurations=dataset_configurations)
4 changes: 0 additions & 4 deletions src/neuroconv/datainterfaces/behavior/audio/audiointerface.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,6 @@ def add_to_nwbfile(
stub_frames: int = 1000,
write_as: Literal["stimulus", "acquisition"] = "stimulus",
iterator_options: Optional[dict] = None,
compression_options: Optional[dict] = None,
overwrite: bool = False,
verbose: bool = True,
):
Expand All @@ -177,8 +176,6 @@ def add_to_nwbfile(
"stimulus" or as "acquisition".
iterator_options : dict, optional
Dictionary of options for the SliceableDataChunkIterator.
compression_options : dict, optional
Dictionary of options for compressing the data for H5DataIO.
overwrite : bool, default: False
verbose : bool, default: True

Expand Down Expand Up @@ -220,7 +217,6 @@ def add_to_nwbfile(
write_as=write_as,
starting_time=starting_times[file_index],
iterator_options=iterator_options,
compression_options=compression_options,
)

return nwbfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import numpy as np
import psutil
from hdmf.backends.hdf5.h5_utils import H5DataIO
from hdmf.data_utils import DataChunkIterator
from pynwb import NWBFile
from pynwb.image import ImageSeries
Expand Down Expand Up @@ -400,14 +399,7 @@ def add_to_nwbfile(
pbar.update(1)
iterable = video

# Wrap data for compression
wrapped_io_data = H5DataIO(
iterable,
compression=compression,
compression_opts=compression_options,
chunks=chunks,
)
image_series_kwargs.update(data=wrapped_io_data)
image_series_kwargs.update(data=iterable)

if timing_type == "starting_time and rate":
starting_time = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from pynwb import NWBFile

from .baserecordingextractorinterface import BaseRecordingExtractorInterface
from ...utils import FilePathType


class BaseLFPExtractorInterface(BaseRecordingExtractorInterface):
Expand Down Expand Up @@ -39,8 +38,6 @@ def add_to_nwbfile(
starting_time=starting_time,
write_as=write_as,
write_electrical_series=write_electrical_series,
compression=compression,
compression_opts=compression_opts,
iterator_type=iterator_type,
iterator_opts=iterator_opts,
)
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,6 @@ def add_to_nwbfile(
starting_time: Optional[float] = None,
write_as: Literal["raw", "lfp", "processed"] = "raw",
write_electrical_series: bool = True,
compression: Optional[str] = None,
compression_opts: Optional[int] = None,
iterator_type: str = "v2",
iterator_opts: Optional[dict] = None,
):
Expand All @@ -300,11 +298,6 @@ def add_to_nwbfile(
write_electrical_series : bool, default: True
Electrical series are written in acquisition. If False, only device, electrode_groups,
and electrodes are written to NWB.
compression : {'gzip', 'lzf', None}
Type of compression to use.
Set to None to disable all compression.
compression_opts : int, default: 4
Only applies to compression="gzip". Controls the level of the GZIP.
iterator_type : {'v2', 'v1'}
The type of DataChunkIterator to use.
'v1' is the original DataChunkIterator of the hdmf data_utils.
Expand Down Expand Up @@ -345,8 +338,6 @@ def add_to_nwbfile(
write_as=write_as,
write_electrical_series=write_electrical_series,
es_key=self.es_key,
compression=compression,
compression_opts=compression_opts,
iterator_type=iterator_type,
iterator_opts=iterator_opts,
)
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ def add_to_nwbfile(
include_roi_acceptance: bool = True,
mask_type: Optional[str] = "image", # Literal["image", "pixel", "voxel"]
iterator_options: Optional[dict] = None,
compression_options: Optional[dict] = None,
):
"""

Expand Down Expand Up @@ -119,8 +118,6 @@ def add_to_nwbfile(
Defaults to 'image'.
iterator_options : dict, optional
The options to use when iterating over the image masks of the segmentation extractor.
compression_options : dict, optional
The options to use when compressing the image masks of the segmentation extractor.

Returns
-------
Expand All @@ -142,5 +139,4 @@ def add_to_nwbfile(
include_roi_acceptance=include_roi_acceptance,
mask_type=mask_type,
iterator_options=iterator_options,
compression_options=compression_options,
)
7 changes: 1 addition & 6 deletions src/neuroconv/tools/audio/audio.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from typing import Literal, Optional
from warnings import warn

from hdmf.backends.hdf5 import H5DataIO
from pynwb import NWBFile

from neuroconv.tools.hdmf import SliceableDataChunkIterator
Expand All @@ -16,7 +15,6 @@ def add_acoustic_waveform_series(
starting_time: float = 0.0,
write_as: Literal["stimulus", "acquisition"] = "stimulus",
iterator_options: Optional[dict] = None,
compression_options: Optional[dict] = None,
) -> NWBFile:
"""

Expand All @@ -42,8 +40,6 @@ def add_acoustic_waveform_series(
"stimulus" or as "acquisition".
iterator_options : dict, optional
Dictionary of options for the SliceableDataChunkIterator.
compression_options : dict, optional
Dictionary of options for compressing the data for H5DataIO.

Returns
-------
Expand All @@ -56,7 +52,6 @@ def add_acoustic_waveform_series(
"acquisition",
], "Acoustic series can be written either as 'stimulus' or 'acquisition'."

compression_options = compression_options or dict(compression="gzip")
iterator_options = iterator_options or dict()

container = nwbfile.acquisition if write_as == "acquisition" else nwbfile.stimulus
Expand All @@ -68,7 +63,7 @@ def add_acoustic_waveform_series(
acoustic_waveform_series_kwargs = dict(
rate=float(rate),
starting_time=starting_time,
data=H5DataIO(SliceableDataChunkIterator(data=acoustic_series, **iterator_options), **compression_options),
data=SliceableDataChunkIterator(data=acoustic_series, **iterator_options),
)

# Add metadata
Expand Down
6 changes: 1 addition & 5 deletions src/neuroconv/tools/neo/neo.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import neo.io.baseio
import numpy as np
import pynwb
from hdmf.backends.hdf5 import H5DataIO

from ..nwb_helpers import add_device_from_metadata
from ...utils import OptionalFilePathType
Expand Down Expand Up @@ -313,10 +312,7 @@ def add_icephys_recordings(
name=response_name,
description=f"Response to: {session_stimulus_type}",
electrode=electrode,
data=H5DataIO(
data=neo_reader.get_analogsignal_chunk(block_index=0, seg_index=si, channel_indexes=ei),
compression=compression,
),
data=neo_reader.get_analogsignal_chunk(block_index=0, seg_index=si, channel_indexes=ei),
starting_time=starting_time,
rate=sampling_rate,
conversion=response_conversion * response_gain,
Expand Down
Loading