Keep working.

tsalo · Feb 20, 2024 · ca8929f · ca8929f
1 parent c81bd2e
commit ca8929f
Show file tree

Hide file tree

Showing 3 changed files with 95 additions and 31 deletions.
diff --git a/cubids/constants.py b/cubids/constants.py
@@ -5,9 +5,8 @@
 # but both are hardcoded in the relevant function.
 ID_VARS = set(["KeyGroup", "ParamGroup", "FilePath"])
 # Entities that should not be used to group parameter sets
-NON_KEY_ENTITIES = set(
-    ["subject", "session", "echo", "part", "flip", "mt", "inv", "run", "extension"]
-)
+FILE_COLLECTION_ENTITIES = set(["echo", "part", "flip", "mt", "inv"])
+NON_KEY_ENTITIES = set(["subject", "session", "run", "extension"]).union(FILE_COLLECTION_ENTITIES)
 # Multi-dimensional keys SliceTiming  XXX: what is this line about?
 # List of metadata fields and parameters (calculated by CuBIDS)
 # Not sure what this specific list is used for.

diff --git a/cubids/cubids.py b/cubids/cubids.py
@@ -24,7 +24,7 @@
 from cubids.config import load_config
 from cubids.constants import ID_VARS, NON_KEY_ENTITIES
 from cubids.metadata_merge import check_merging_operations, group_by_acquisition_sets
-from cubids.utils import resolve_bids_uri
+from cubids.utils import find_file, patch_collection_entities, resolve_bids_uri
 
 warnings.simplefilter(action="ignore", category=FutureWarning)
 bids.config.set_option("extension_initial_dot", True)
@@ -399,6 +399,16 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T
                         # generate new filenames according to new key group
                         self.change_filename(file_path, new_entities)
 
+            renaming_df = pd.DataFrame(
+                columns=["original", "renamed"],
+                data=list(map(list, zip(*[self.old_filenames, self.new_filenames]))),
+            )
+            renaming_df.to_csv(
+                os.path.join(self.path, f"code/CuBIDS/{new_prefix}_renaming.tsv"),
+                index=False,
+                sep="\t",
+            )
+
             # create string of mv command ; mv command for dlapi.run
             for from_file, to_file in zip(self.old_filenames, self.new_filenames):
                 if Path(from_file).exists():
@@ -711,12 +721,17 @@ def _purge_associations(self, scans):
             if_scans.append(_get_intended_for_reference(self.path + scan))
 
         # XXX: Session folders are not guaranteed to exist.
-        for path in Path(self.path).rglob("sub-*/*/fmap/*.json"):
-            # json_file = self.layout.get_file(str(path))
+        fmap_files = self.layout.get(
+            return_type="file",
+            datatype="fmap",
+            extension=[".json"],
+        )
+        for fmap_file in fmap_files:
+            # json_file = self.layout.get_file(fmap_file)
             # data = json_file.get_dict()
-            data = get_sidecar_metadata(str(path))
+            data = get_sidecar_metadata(fmap_file)
             if data == "Erroneous sidecar":
-                print("Error parsing sidecar: ", str(path))
+                print(f"Error parsing sidecar: {fmap_file}")
                 continue
 
             # remove scan references in the IntendedFor
@@ -728,7 +743,7 @@ def _purge_associations(self, scans):
                         data["IntendedFor"].remove(item)
 
                 # update the json with the new data dictionary
-                _update_json(str(path), data)
+                _update_json(fmap_file, data)
 
         # save IntendedFor purges so that you can datalad run the
         # remove association file commands on a clean dataset
@@ -741,35 +756,44 @@ def _purge_associations(self, scans):
                 self.reset_bids_layout()
 
         # NOW WE WANT TO PURGE ALL ASSOCIATIONS
-
         to_remove = []
-
-        for path in Path(self.path).rglob("sub-*/**/*.nii.gz"):
-            if str(path) in scans:
-                # bids_file = self.layout.get_file(str(path))
+        nifti_files = self.layout.get(return_type="file", extension=["nii", "nii.gz"])
+        for nifti_file in nifti_files:
+            nifti_entities = self.layout.get_file(nifti_file).entities
+            if nifti_file in scans:
+                # bids_file = self.layout.get_file(nifti_file)
                 # associations = bids_file.get_associations()
-                associations = self.get_nifti_associations(str(path))
+                associations = self.get_nifti_associations(nifti_file)
                 for assoc in associations:
                     to_remove.append(assoc)
                     # filepath = assoc.path
 
             # ensure association is not an IntendedFor reference!
-            if ".nii" not in str(path):
-                if "/dwi/" in str(path):
-                    # add the bval and bvec if there
-                    if Path(img_to_new_ext(str(path), ".bval")).exists():
-                        to_remove.append(img_to_new_ext(str(path), ".bval"))
-                    if Path(img_to_new_ext(str(path), ".bvec")).exists():
-                        to_remove.append(img_to_new_ext(str(path), ".bvec"))
-
-                if "/func/" in str(path):
-                    # add tsvs
-                    tsv = img_to_new_ext(str(path), ".tsv").replace("_bold", "_events")
-                    if Path(tsv).exists():
-                        to_remove.append(tsv)
-                    # add tsv json (if exists)
-                    if Path(tsv.replace(".tsv", ".json")).exists():
-                        to_remove.append(tsv.replace(".tsv", ".json"))
+            if nifti_entities["datatype"] == "dwi":
+                # add the bval and bvec if there
+                temp_entities = patch_collection_entities(nifti_entities)
+                temp_entities["extension"] = "bval"
+                bval_file = find_file(temp_entities, self.layout)
+                if bval_file:
+                    to_remove.append(bval_file)
+
+                temp_entities["extension"] = "bvec"
+                bvec_file = find_file(temp_entities, self.layout)
+                if bvec_file:
+                    to_remove.append(bvec_file)
+
+            if nifti_entities["datatype"] == "func":
+                temp_entities = patch_collection_entities(nifti_entities)
+                temp_entities["suffix"] = "events"
+                temp_entities["extension"] = "tsv"
+                events_tsv = find_file(temp_entities, self.layout)
+                if events_tsv:
+                    to_remove.append(events_tsv)
+
+                temp_entities["extension"] = "json"
+                events_json = find_file(temp_entities, self.layout)
+                if events_json:
+                    to_remove.append(events_json)
 
         to_remove += scans
 

diff --git a/cubids/utils.py b/cubids/utils.py
@@ -1,8 +1,14 @@
 """Miscellaneous utility functions for CuBIDS."""
 
+import copy
 import re
 from pathlib import Path
 
+from bids.layout import Query
+from bids.utils import listify
+
+from cubids.constants import FILE_COLLECTION_ENTITIES
+
 
 def _get_container_type(image_name):
     """Get and return the container type.
@@ -76,3 +82,38 @@ def resolve_bids_uri(uri, root, dataset_links={}):
         path = dataset_link / relative_path
 
     return str(path.absolute())
+
+
+def patch_collection_entities(entities):
+    """Patch the entities of a collection.
+
+    Parameters
+    ----------
+    entities : :obj:`dict`
+        The entities of the collection.
+
+    Returns
+    -------
+    :obj:`dict`
+        The patched entities.
+    """
+    out_entities = copy.deepcopy(dict(entities))
+    for entity in FILE_COLLECTION_ENTITIES:
+        updated_values = listify(out_entities.get(entity, []))
+        updated_values.append(Query.NONE)
+        out_entities[entity] = updated_values
+
+    return out_entities
+
+
+def find_file(entities, layout):
+    """Find a single file associated with the given entities."""
+    file_candidates = layout.get(return_type="file", **entities)
+    if len(file_candidates) > 1:
+        file_str = "\n\t" + "\n\t".join(file_candidates)
+        raise ValueError(f"Multiple associated files found:{file_str}")
+    elif len(file_candidates) == 1:
+        bvec_file = file_candidates[0]
+        return bvec_file
+    else:
+        return None