PennLINC · tsalo · Feb 12, 2024 · Feb 13, 2024 · Feb 14, 2024 · Feb 16, 2024
diff --git a/cubids/constants.py b/cubids/constants.py
@@ -5,7 +5,8 @@
 # but both are hardcoded in the relevant function.
 ID_VARS = set(["KeyGroup", "ParamGroup", "FilePath"])
 # Entities that should not be used to group parameter sets
-NON_KEY_ENTITIES = set(["subject", "session", "extension"])
+FILE_COLLECTION_ENTITIES = set(["echo", "part", "flip", "mt", "inv"])
+NON_KEY_ENTITIES = set(["subject", "session", "run", "extension"]).union(FILE_COLLECTION_ENTITIES)
 # Multi-dimensional keys SliceTiming  XXX: what is this line about?
 # List of metadata fields and parameters (calculated by CuBIDS)
 # Not sure what this specific list is used for.

diff --git a/cubids/cubids.py b/cubids/cubids.py
diff --git a/cubids/data/config.yml b/cubids/data/config.yml
@@ -172,7 +172,7 @@ derived_params:
     ImageOrientation:
       suggest_variant_rename: yes
 # These fields reflect relationships between images.
-# Not modality specific
+# Not datatype-specific
 relational_params:
   FieldmapKey:
     # can be

diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py
@@ -238,7 +238,7 @@ def merge_json_into_json(from_file, to_file, raise_on_error=False):
     if not merged_metadata == orig_dest_metadata:
         print("OVERWRITING", to_file)
         with open(to_file, "w") as tofw:
-            json.dump(merged_metadata, tofw, indent=4)
+            json.dump(merged_metadata, tofw, indent=4, sort_keys=True)
 
     return 0
 
@@ -259,9 +259,12 @@ def get_acq_dictionary():
     acq_dict = {}
     acq_dict["subject"] = {"Description": "Participant ID"}
     acq_dict["session"] = {"Description": "Session ID"}
-    docs = " https://cubids.readthedocs.io/en/latest/about.html#definitions"
-    desc = "Acquisition Group. See Read the Docs for more information"
-    acq_dict["AcqGroup"] = {"Description": desc + docs}
+    acq_dict["AcqGroup"] = {
+        "Description": (
+            "Acquisition Group. See Read the Docs for more information "
+            "https://cubids.readthedocs.io/en/latest/about.html#definitions"
+        )
+    }
 
     return acq_dict
 
@@ -290,9 +293,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
 
     config.set_option("extension_initial_dot", True)
 
-    files_df = pd.read_table(
-        files_tsv,
-    )
+    files_df = pd.read_table(files_tsv)
     acq_groups = defaultdict(list)
     for _, row in files_df.iterrows():
         file_entities = parse_file_entities(row.FilePath)

diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py
@@ -453,7 +453,7 @@ def test_tsv_merge_changes(tmp_path):
     renamed = True
     new_keys = applied["KeyGroup"].tolist()
     for row in range(len(orig)):
-        if orig.loc[row, "Modality"] != "fmap":
+        if orig.loc[row, "Datatype"] != "fmap":
             if (
                 str(orig.loc[row, "RenameKeyGroup"]) != "nan"
                 and str(orig.loc[row, "RenameKeyGroup"]) not in new_keys

diff --git a/cubids/utils.py b/cubids/utils.py
@@ -1,8 +1,14 @@
 """Miscellaneous utility functions for CuBIDS."""
 
+import copy
 import re
 from pathlib import Path
 
+from bids.layout import Query
+from bids.utils import listify
+
+from cubids.constants import FILE_COLLECTION_ENTITIES
+
 
 def _get_container_type(image_name):
     """Get and return the container type.
@@ -31,3 +37,83 @@ def _get_container_type(image_name):
         return "docker"
 
     raise Exception("Unable to determine the container type of " + image_name)
+
+
+def resolve_bids_uri(uri, root, dataset_links={}):
+    """Resolve a BIDS URI to an absolute path.
+
+    Parameters
+    ----------
+    uri : :obj:`str`
+        The BIDS URI to resolve.
+    root : :obj:`pathlib.Path`
+        The root directory of the BIDS dataset.
+    dataset_links : :obj:`dict`, optional
+        A dictionary of dataset links.
+        The keys are the names of the datasets,
+        and the values are the paths to the root of the dataset.
+        The paths can be either absolute or relative to the root of the current dataset.
+
+    Returns
+    -------
+    :obj:`str`
+        The absolute path to the file or directory specified by the URI.
+    """
+    if uri.startswith("bids::"):
+        # This is a relative path from the root
+        path = root / uri[6:]
+    elif uri.startswith("bids:"):
+        # More advanced BIDS URIs
+        dataset_name, relative_path = uri[5:].split(":", 1)
+        if dataset_name not in dataset_links:
+            raise ValueError(f"Dataset '{dataset_name}' not found in dataset_links")
+
+        dataset_link = dataset_links[dataset_name]
+        if dataset_link.startswith("file://"):
+            # Direct file link
+            dataset_link = Path(dataset_link[7:])
+        elif dataset_link.startswith("doi:"):
+            # Remote link using a DOI
+            raise NotImplementedError("doi URIs are not yet supported.")
+        else:
+            # Relative path from the root
+            dataset_link = root / dataset_link
+
+        path = dataset_link / relative_path
+
+    return str(path.absolute())
+
+
+def patch_collection_entities(entities):
+    """Patch the entities of a collection.
+
+    Parameters
+    ----------
+    entities : :obj:`dict`
+        The entities of the collection.
+
+    Returns
+    -------
+    :obj:`dict`
+        The patched entities.
+    """
+    out_entities = copy.deepcopy(dict(entities))
+    for entity in FILE_COLLECTION_ENTITIES:
+        updated_values = listify(out_entities.get(entity, []))
+        updated_values.append(Query.NONE)
+        out_entities[entity] = updated_values
+
+    return out_entities
+
+
+def find_file(entities, layout):
+    """Find a single file associated with the given entities."""
+    file_candidates = layout.get(return_type="file", **entities)
+    if len(file_candidates) > 1:
+        file_str = "\n\t" + "\n\t".join(file_candidates)
+        raise ValueError(f"Multiple associated files found:{file_str}")
+    elif len(file_candidates) == 1:
+        bvec_file = file_candidates[0]
+        return bvec_file
+    else:
+        return None
diff --git a/cubids/workflows.py b/cubids/workflows.py
@@ -287,9 +287,7 @@ def group(bids_dir, container, acq_group_level, config, output_prefix):
             acq_group_level=acq_group_level,
             grouping_config=config,
         )
-        bod.get_tsvs(
-            str(output_prefix),
-        )
+        bod.get_tsvs(str(output_prefix))
         sys.exit(0)
 
     # Run it through a container

diff --git a/docs/usage.rst b/docs/usage.rst
@@ -67,7 +67,7 @@ and therefore both MRI image type and acquisition specific—
 each BIDS session directory contains images that belong to a set of Parameter Groups.
 CuBIDS assigns each session, or set of Parameter Groups,
 to an Acquisition Group such that all sessions in an Acquisition Group possesses an identical set of
-scan acquisitions and metadata parameters across all image modalities present in the dataset.
+scan acquisitions and metadata parameters across all image datatypes present in the dataset.
 We find Acquisition Groups to be a particularly useful categorization of BIDS data,
 as they identify homogeneous sets of sessions (not individual scans) in a large dataset.
 They are also useful for expediting the testing of pipelines;
@@ -255,7 +255,7 @@ In addition to facilitating curation of large, heterogeneous BIDS datasets,
 ``CuBIDS`` also prepares datasets for testing BIDS Apps.
 This portion of the ``CuBIDS`` workflow relies on the concept of the Acquisition Group:
 a set of sessions that have identical scan types and metadata across all imaging
-modalities present in the session set.
+datatypes present in the session set.
 Specifically, ``cubids copy-exemplars`` copies one subject from each
 Acquisition Group into a separate directory,
 which we call an ``Exemplar Dataset``.