Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor CuBIDS to support file collections #308

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cubids/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
# but both are hardcoded in the relevant function.
ID_VARS = set(["KeyGroup", "ParamGroup", "FilePath"])
# Entities that should not be used to group parameter sets
NON_KEY_ENTITIES = set(["subject", "session", "extension"])
FILE_COLLECTION_ENTITIES = set(["echo", "part", "flip", "mt", "inv"])
NON_KEY_ENTITIES = set(["subject", "session", "run", "extension"]).union(FILE_COLLECTION_ENTITIES)
# Multi-dimensional keys SliceTiming XXX: what is this line about?
# List of metadata fields and parameters (calculated by CuBIDS)
# Not sure what this specific list is used for.
Expand Down
815 changes: 444 additions & 371 deletions cubids/cubids.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion cubids/data/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ derived_params:
ImageOrientation:
suggest_variant_rename: yes
# These fields reflect relationships between images.
# Not modality specific
# Not datatype-specific
relational_params:
FieldmapKey:
# can be
Expand Down
15 changes: 8 additions & 7 deletions cubids/metadata_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def merge_json_into_json(from_file, to_file, raise_on_error=False):
if not merged_metadata == orig_dest_metadata:
print("OVERWRITING", to_file)
with open(to_file, "w") as tofw:
json.dump(merged_metadata, tofw, indent=4)
json.dump(merged_metadata, tofw, indent=4, sort_keys=True)

return 0

Expand All @@ -259,9 +259,12 @@ def get_acq_dictionary():
acq_dict = {}
acq_dict["subject"] = {"Description": "Participant ID"}
acq_dict["session"] = {"Description": "Session ID"}
docs = " https://cubids.readthedocs.io/en/latest/about.html#definitions"
desc = "Acquisition Group. See Read the Docs for more information"
acq_dict["AcqGroup"] = {"Description": desc + docs}
acq_dict["AcqGroup"] = {
"Description": (
"Acquisition Group. See Read the Docs for more information "
"https://cubids.readthedocs.io/en/latest/about.html#definitions"
)
}

return acq_dict

Expand Down Expand Up @@ -290,9 +293,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):

config.set_option("extension_initial_dot", True)

files_df = pd.read_table(
files_tsv,
)
files_df = pd.read_table(files_tsv)
acq_groups = defaultdict(list)
for _, row in files_df.iterrows():
file_entities = parse_file_entities(row.FilePath)
Expand Down
2 changes: 1 addition & 1 deletion cubids/tests/test_bond.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ def test_tsv_merge_changes(tmp_path):
renamed = True
new_keys = applied["KeyGroup"].tolist()
for row in range(len(orig)):
if orig.loc[row, "Modality"] != "fmap":
if orig.loc[row, "Datatype"] != "fmap":
if (
str(orig.loc[row, "RenameKeyGroup"]) != "nan"
and str(orig.loc[row, "RenameKeyGroup"]) not in new_keys
Expand Down
86 changes: 86 additions & 0 deletions cubids/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
"""Miscellaneous utility functions for CuBIDS."""

import copy
import re
from pathlib import Path

from bids.layout import Query
from bids.utils import listify

from cubids.constants import FILE_COLLECTION_ENTITIES


def _get_container_type(image_name):
"""Get and return the container type.
Expand Down Expand Up @@ -31,3 +37,83 @@ def _get_container_type(image_name):
return "docker"

raise Exception("Unable to determine the container type of " + image_name)


def resolve_bids_uri(uri, root, dataset_links={}):
"""Resolve a BIDS URI to an absolute path.

Parameters
----------
uri : :obj:`str`
The BIDS URI to resolve.
root : :obj:`pathlib.Path`
The root directory of the BIDS dataset.
dataset_links : :obj:`dict`, optional
A dictionary of dataset links.
The keys are the names of the datasets,
and the values are the paths to the root of the dataset.
The paths can be either absolute or relative to the root of the current dataset.

Returns
-------
:obj:`str`
The absolute path to the file or directory specified by the URI.
"""
if uri.startswith("bids::"):
# This is a relative path from the root
path = root / uri[6:]
elif uri.startswith("bids:"):
# More advanced BIDS URIs
dataset_name, relative_path = uri[5:].split(":", 1)
if dataset_name not in dataset_links:
raise ValueError(f"Dataset '{dataset_name}' not found in dataset_links")

dataset_link = dataset_links[dataset_name]
if dataset_link.startswith("file://"):
# Direct file link
dataset_link = Path(dataset_link[7:])
elif dataset_link.startswith("doi:"):
# Remote link using a DOI
raise NotImplementedError("doi URIs are not yet supported.")
else:
# Relative path from the root
dataset_link = root / dataset_link

path = dataset_link / relative_path

return str(path.absolute())


def patch_collection_entities(entities):
"""Patch the entities of a collection.

Parameters
----------
entities : :obj:`dict`
The entities of the collection.

Returns
-------
:obj:`dict`
The patched entities.
"""
out_entities = copy.deepcopy(dict(entities))
for entity in FILE_COLLECTION_ENTITIES:
updated_values = listify(out_entities.get(entity, []))
updated_values.append(Query.NONE)
out_entities[entity] = updated_values

return out_entities


def find_file(entities, layout):
"""Find a single file associated with the given entities."""
file_candidates = layout.get(return_type="file", **entities)
if len(file_candidates) > 1:
file_str = "\n\t" + "\n\t".join(file_candidates)
raise ValueError(f"Multiple associated files found:{file_str}")
elif len(file_candidates) == 1:
bvec_file = file_candidates[0]
return bvec_file
else:
return None
4 changes: 1 addition & 3 deletions cubids/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,7 @@ def group(bids_dir, container, acq_group_level, config, output_prefix):
acq_group_level=acq_group_level,
grouping_config=config,
)
bod.get_tsvs(
str(output_prefix),
)
bod.get_tsvs(str(output_prefix))
sys.exit(0)

# Run it through a container
Expand Down
4 changes: 2 additions & 2 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ and therefore both MRI image type and acquisition specific—
each BIDS session directory contains images that belong to a set of Parameter Groups.
CuBIDS assigns each session, or set of Parameter Groups,
to an Acquisition Group such that all sessions in an Acquisition Group possesses an identical set of
scan acquisitions and metadata parameters across all image modalities present in the dataset.
scan acquisitions and metadata parameters across all image datatypes present in the dataset.
We find Acquisition Groups to be a particularly useful categorization of BIDS data,
as they identify homogeneous sets of sessions (not individual scans) in a large dataset.
They are also useful for expediting the testing of pipelines;
Expand Down Expand Up @@ -255,7 +255,7 @@ In addition to facilitating curation of large, heterogeneous BIDS datasets,
``CuBIDS`` also prepares datasets for testing BIDS Apps.
This portion of the ``CuBIDS`` workflow relies on the concept of the Acquisition Group:
a set of sessions that have identical scan types and metadata across all imaging
modalities present in the session set.
datatypes present in the session set.
Specifically, ``cubids copy-exemplars`` copies one subject from each
Acquisition Group into a separate directory,
which we call an ``Exemplar Dataset``.
Expand Down