Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify dset/attr builders based on sidecar JSON #677

Draft
wants to merge 27 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
9e4ba60
Add first at reading sidecar modifications
rly Nov 11, 2021
1f53919
Pretty-print json
rly Nov 11, 2021
dafc650
Update to work if json is not present
rly Nov 11, 2021
de5fefe
Refactor BuilderUpdater functionality to sep class
rly Nov 11, 2021
3f1f8f2
Merge branch 'dev' into sidecar_mods
rly Nov 30, 2021
036fa1e
Handle changing sub-dataset attr, add sidecar fields
rly Nov 30, 2021
b4b5419
Use semantic versioning in version label
rly Nov 30, 2021
151c69d
Add jsonschema for sidecar json
rly Dec 1, 2021
32d1397
Add validation to read
rly Dec 1, 2021
933ef40
Update to use new schema. More tests needed
rly Dec 7, 2021
393e5b3
Update tests (more to do)
rly Dec 8, 2021
2fda06d
Add description, author, and contact to sidecar JSON, fix tests
rly Dec 8, 2021
28c6893
Merge branch 'dev' into sidecar_mods
rly Jan 25, 2022
6da168d
Merge branch 'dev' into sidecar_mods
rly Apr 11, 2022
618ab1c
Merge branch 'dev' of https://github.com/hdmf-dev/hdmf into sidecar_mods
rly Apr 11, 2022
393ffdf
Merge branch 'sidecar_mods' of https://github.com/hdmf-dev/hdmf into …
rly Apr 11, 2022
729e989
Update documentation, refactor, and add test cases
rly Apr 12, 2022
ecd244d
Update
rly Apr 12, 2022
168f4a9
Add link to sidecar json schema
rly Apr 12, 2022
1c57573
Add examples to doc
rly Apr 12, 2022
62ed248
Update sidecar.rst
rly Apr 12, 2022
7078ca1
Merge branch 'dev' into sidecar_mods
rly Apr 21, 2022
9faf7a2
Update sidecar.rst
rly Apr 21, 2022
827d61d
Update docs/source/sidecar.rst
rly Apr 21, 2022
ef22dc5
Update sidecar.rst
rly Apr 21, 2022
2bb7185
Merge branch 'dev' into sidecar_mods
rly Aug 31, 2022
fee5245
Merge branch 'dev' into sidecar_mods
rly Nov 29, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import logging
import os.path
import warnings
Expand Down Expand Up @@ -514,6 +515,7 @@ def read_builder(self):
if f_builder is None:
f_builder = self.__read_group(self.__file, ROOT_NAME, ignore=ignore)
self.__read[self.__file] = f_builder
self.update_builder_from_sidecar(f_builder)
return f_builder

def __set_written(self, builder):
Expand Down Expand Up @@ -1549,3 +1551,63 @@ def set_dataio(cls, **kwargs):
"""
cargs, ckwargs = fmt_docval_args(H5DataIO.__init__, kwargs)
return H5DataIO(*cargs, **ckwargs)

@docval(
rly marked this conversation as resolved.
Show resolved Hide resolved
{'name': 'f_builder', 'type': GroupBuilder, 'doc': 'A GroupBuilder representing the main file object.'},
returns='The same input GroupBuilder, now modified.',
rtype='GroupBuilder'
)
def update_builder_from_sidecar(self, **kwargs):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could also add a post_read_builder function to HDMFIO itself to provide a standard place for I/O backends to update builders after read

# in-place update of the builder
# the sidecar json will have the same name as the file but have suffix .json
f_builder = getargs('f_builder', kwargs)
sidecar_path = Path(self.__file.filename).with_suffix('.json')
if not sidecar_path.is_file():
return

with open(sidecar_path, 'r') as f:
versions = json.load(f)['versions']

builder_map = self.__get_object_id_map(f_builder)
for version_dict in versions:
for change_dict in version_dict.get('changes'):
object_id = change_dict['object_id']
relative_path = change_dict.get('relative_path')
new_value = change_dict['new_value']

builder = builder_map[object_id]
if relative_path in builder.attributes:
# TODO handle different dtypes
builder.attributes[relative_path] = new_value
elif isinstance(builder, GroupBuilder):
obj = builder.get(relative_path)
if isinstance(obj, DatasetBuilder): # update data in sub-DatasetBuilder
self.__update_dataset_builder(obj, new_value)
else:
raise ValueError("Relative path '%s' not recognized as a dataset or attribute")
else: # DatasetBuilder has object_id
if not relative_path: # update data
self.__update_dataset_builder(builder, new_value)
else:
raise ValueError("Relative path '%s' not recognized as None or attribute")
# TODO handle compound dtypes

return f_builder

def __update_dataset_builder(self, dset_builder, value):
# TODO handle different dtypes
dset_builder['data'] = value

def __get_object_id_map(self, builder):
stack = [builder]
ret = dict()
while len(stack):
b = stack.pop()
if 'object_id' in b.attributes:
ret[b.attributes['object_id']] = b
if isinstance(b, GroupBuilder):
for g in b.groups.values():
stack.append(g)
for d in b.datasets.values():
stack.append(d)
return ret
Empty file added tests/unit/io_tests/__init__.py
Empty file.
162 changes: 162 additions & 0 deletions tests/unit/io_tests/test_sidecar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import json
import os

from hdmf import Container
from hdmf.backends.hdf5.h5tools import HDF5IO
from hdmf.build import BuildManager, TypeMap, ObjectMapper
from hdmf.spec import AttributeSpec, DatasetSpec, GroupSpec, SpecCatalog, SpecNamespace, NamespaceCatalog
from hdmf.testing import TestCase
from hdmf.utils import getargs, docval


class TestBasic(TestCase):

def setUp(self):
self.h5_path = "./tests/unit/io_tests/test_sidecar.h5"
foo2 = Foo('sub_foo', [-1, -2, -3], 'OLD', [-17])
foo1 = Foo('foo1', [1, 2, 3], 'old', [17], foo2)
with HDF5IO(self.h5_path, manager=_get_manager(), mode='w') as io:
io.write(foo1)

version2 = {
"label": "version 2",
"description": "change attr1 from 'old' to 'my experiment' and my_data from [1, 2, 3] to [4, 5, 6, 7]",
"changes": [
{
"object_id": foo1.object_id,
"relative_path": "attr1",
"new_value": "my experiment"
},
{
"object_id": foo1.object_id,
"relative_path": "my_data",
"new_value": [4, 5, 6, 7]
}
]
}

version3 = {
"label": "version 3",
"description": "change sub_foo/my_data from [-1, -2, -3] to [[0]]",
"changes": [
{
"object_id": foo2.object_id,
"relative_path": "my_data",
"new_value": [[0]]
}
]
}

sidecar = dict()
sidecar["versions"] = [version2, version3]

self.json_path = "./tests/unit/io_tests/test_sidecar.json"
with open(self.json_path, 'w') as outfile:
json.dump(sidecar, outfile, indent=4)

def tearDown(self):
if os.path.exists(self.h5_path):
os.remove(self.h5_path)
if os.path.exists(self.json_path):
os.remove(self.json_path)

def test_update_builder(self):
io = HDF5IO(self.h5_path, 'r', manager=_get_manager())
foo1 = io.read()
assert foo1.attr1 == "my experiment"
assert foo1.my_data == [4, 5, 6, 7]
assert foo1.sub_foo.my_data == [[0]]


class Foo(Container):

@docval({'name': 'name', 'type': str, 'doc': 'the name of this Foo'},
{'name': 'my_data', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer dataset'},
{'name': 'attr1', 'type': str, 'doc': 'a string attribute'},
{'name': 'attr2', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute'},
{'name': 'sub_foo', 'type': 'Foo', 'doc': 'a child Foo', 'default': None})
def __init__(self, **kwargs):
name, my_data, attr1, attr2, sub_foo = getargs('name', 'my_data', 'attr1', 'attr2', 'sub_foo', kwargs)
super().__init__(name=name)
self.__data = my_data
self.__attr1 = attr1
self.__attr2 = attr2
self.__sub_foo = sub_foo
if sub_foo is not None:
assert sub_foo.name == 'sub_foo' # on read mapping will not work otherwise
self.__sub_foo.parent = self

@property
def my_data(self):
return self.__data

@property
def attr1(self):
return self.__attr1

@property
def attr2(self):
return self.__attr2

@property
def sub_foo(self):
return self.__sub_foo


def _get_manager():
foo_spec = GroupSpec(
doc='A test group specification with a data type',
data_type_def='Foo',
groups=[
GroupSpec(
doc='a child Foo',
data_type_inc='Foo',
name='sub_foo',
quantity='?',
)
],
datasets=[
DatasetSpec(
doc='a 1-D integer dataset',
dtype='int',
name='my_data',
shape=[None, ],
attributes=[
AttributeSpec(
name='attr2',
doc='a 1-D integer attribute',
dtype='int',
shape=[None, ],
)
]
)
],
attributes=[
AttributeSpec(name='attr1', doc='a string attribute', dtype='text'),
]
)

class FooMapper(ObjectMapper):
"""Remap 'attr2' attribute on Foo container to 'my_data' dataset spec > 'attr2' attribute spec."""
def __init__(self, spec):
super().__init__(spec)
my_data_spec = spec.get_dataset('my_data')
self.map_spec('attr2', my_data_spec.get_attribute('attr2'))

spec_catalog = SpecCatalog()
spec_catalog.register_spec(foo_spec, 'test.yaml')
namespace_name = 'test_core'
namespace = SpecNamespace(
doc='a test namespace',
name=namespace_name,
schema=[{'source': 'test.yaml'}],
version='0.1.0',
catalog=spec_catalog
)
namespace_catalog = NamespaceCatalog()
namespace_catalog.add_namespace(namespace_name, namespace)
type_map = TypeMap(namespace_catalog)
type_map.register_container_type(namespace_name, 'Foo', Foo)
type_map.register_map(Foo, FooMapper)
manager = BuildManager(type_map)
return manager