Skip to content

Commit

Permalink
Metadata for BIDS assets
Browse files Browse the repository at this point in the history
  • Loading branch information
jwodder committed Aug 3, 2022
1 parent 5d30d49 commit ab4733f
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 56 deletions.
65 changes: 64 additions & 1 deletion dandi/files/bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,23 @@

from collections import defaultdict
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from threading import Lock
from typing import Optional
from typing import Any, Optional
import weakref

from dandischema.models import BareAsset

from .bases import GenericAsset, LocalFileAsset, NWBAsset
from .zarr import ZarrAsset
from ..metadata import add_common_metadata, prepare_metadata
from ..misctypes import Digest

BIDS_TO_DANDI = {
"subject": "subject_id",
"session": "session_id",
}


@dataclass
Expand All @@ -29,6 +39,11 @@ class BIDSDatasetDescriptionAsset(LocalFileAsset):
#: dataset, keyed by `bids_path` properties; populated by `_validate()`
_asset_errors: Optional[dict[str, list[str]]] = None

#: Asset metadata (in the form of a `dict` of BareAsset fields) for
#: individual assets in the dataset, keyed by `bids_path` properties;
#: populated by `_validate()`
_asset_metadata: Optional[dict[str, dict[str, Any]]] = None

#: Threading lock needed in case multiple assets are validated in parallel
#: during upload
_lock: Lock = field(init=False, default_factory=Lock, repr=False, compare=False)
Expand Down Expand Up @@ -68,8 +83,21 @@ def _validate(self) -> None:
self._asset_errors[bids_path].append(
"File not matched by any regex schema entry"
)
self._asset_metadata = defaultdict(dict)
for meta in results["match_listing"]:
bids_path = (
Path(meta.pop("path")).relative_to(self.bids_root).as_posix()
)
meta = {
BIDS_TO_DANDI[k]: v
for k, v in meta.items()
if k in BIDS_TO_DANDI
}
# meta["bids_schema_version"] = results["bids_schema_version"]
self._asset_metadata[bids_path] = prepare_metadata(meta)

def get_asset_errors(self, asset: BIDSAsset) -> list[str]:
""":meta private:"""
self._validate()
errors: list[str] = []
if self._dataset_errors:
Expand All @@ -78,6 +106,12 @@ def get_asset_errors(self, asset: BIDSAsset) -> list[str]:
errors.extend(self._asset_errors[asset.bids_path])
return errors

def get_asset_metadata(self, asset: BIDSAsset) -> dict[str, Any]:
""":meta private:"""
self._validate()
assert self._asset_metadata is not None
return self._asset_metadata[asset.bids_path]

def get_validation_errors(
self,
schema_version: Optional[str] = None,
Expand All @@ -87,6 +121,13 @@ def get_validation_errors(
assert self._dataset_errors is not None
return list(self._dataset_errors)

def get_metadata(
self,
digest: Optional[Digest] = None,
ignore_errors: bool = True,
) -> BareAsset:
raise NotImplementedError


@dataclass
class BIDSAsset(LocalFileAsset):
Expand Down Expand Up @@ -131,6 +172,17 @@ def get_validation_errors(
) -> list[str]:
return self.bids_dataset_description.get_asset_errors(self)

def get_metadata(
self,
digest: Optional[Digest] = None,
ignore_errors: bool = True,
) -> BareAsset:
metadata = self.bids_dataset_description.get_asset_metadata(self)
start_time = end_time = datetime.now().astimezone()
add_common_metadata(metadata, self.filepath, start_time, end_time, digest)
metadata["path"] = self.path
return BareAsset(**metadata)


class NWBBIDSAsset(BIDSAsset, NWBAsset):
"""An NWB file in a BIDS dataset"""
Expand All @@ -144,6 +196,17 @@ def get_validation_errors(
self, schema_version, devel_debug
) + BIDSAsset.get_validation_errors(self)

def get_metadata(
self,
digest: Optional[Digest] = None,
ignore_errors: bool = True,
) -> BareAsset:
bids_metadata = BIDSAsset.get_metadata(self)
nwb_metadata = NWBAsset.get_metadata(self, digest, ignore_errors)
return BareAsset(
**{**bids_metadata.dict(), **nwb_metadata.dict(exclude_none=True)}
)


class ZarrBIDSAsset(BIDSAsset, ZarrAsset):
"""A Zarr directory in a BIDS dataset"""
Expand Down
55 changes: 0 additions & 55 deletions dandi/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from datetime import datetime, timedelta
from functools import lru_cache
import itertools
import os
import os.path as op
from pathlib import Path
Expand Down Expand Up @@ -44,48 +43,6 @@

lgr = get_logger()

# Remove hard-coding when current version fallback is merged.

BIDS_TO_DANDI = {
"subject": "subject_id",
"session": "session_id",
}


def _rename_bids_keys(bids_metadata, mapping=BIDS_TO_DANDI):
"""Standardize BIDS metadata field naming to match DANDI."""
return {mapping.get(k, k): v for k, v in bids_metadata.items()}


def _path_in_bids(
check_path, bids_marker="dataset_description.json", end_marker="dandiset.yaml"
):
"""Determine whether a path is a member of a BIDS dataset.
Parameters
----------
check_path: str or Path
bids_marker: str, optional
String giving a filename, the existence of which in a directory will mark it as a
BIDS dataset root directory.
end_marker: str, optional
String giving a filename, the existence of which in a directory will end the
search.
Returns
-------
bool
"""
check_path = Path(check_path)
for dir_level in itertools.chain([check_path], check_path.parents):
bids_marker_candidate = dir_level / bids_marker
end_marker_candidate = dir_level / end_marker
if bids_marker_candidate.is_file() or bids_marker_candidate.is_symlink():
return True
if end_marker_candidate.is_file() or end_marker_candidate.is_symlink():
return False
return False


# Disable this for clean hacking
@metadata_cache.memoize_path
Expand Down Expand Up @@ -115,18 +72,6 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]:
lgr.debug("Failed to get metadata for %s: %s", path, exc)
return None

# Somewhat less fragile search than previous proposals,
# could still be augmented with `_is_nwb` to disambiguate both cases
# at the detection level.
if _path_in_bids(path):
from .validate import validate_bids

_meta = validate_bids(path)
meta = _meta["match_listing"][0]
meta["bids_schema_version"] = _meta["bids_schema_version"]
meta = _rename_bids_keys(meta)
return meta

if nwb_has_external_links(path):
raise NotImplementedError(
f"NWB files with external links are not supported: {path}"
Expand Down

0 comments on commit ab4733f

Please sign in to comment.