From 96a484b4925ec729eb972f3ba21b935943d59a1e Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 30 Jan 2024 10:02:02 -0500 Subject: [PATCH] Add ASL fields to config (#282) * Add ASL fields. * Update constants.py * Keep working on docstrings. * Update config.py * Use PathExists and IsFile for param types. * Remove type from boolean parameter. * More work. * Update some docs. * Add support for m0scan, aslcontext, asllabeling. * Update cubids.py * Update cubids.py * Try fixing PyPi deployment job. * Run black. * Minor changes. * Update cubids.py * Update cubids.py * Address review. --- AUTHORS.rst | 14 +- CONTRIBUTING.rst | 61 +++--- HISTORY.rst | 23 +++ README.rst | 12 +- cubids/cli.py | 153 ++++++++++---- cubids/config.py | 7 +- cubids/constants.py | 9 +- cubids/cubids.py | 424 +++++++++++++++++++++++++++----------- cubids/data/config.yml | 41 +++- cubids/metadata_merge.py | 94 ++++++++- cubids/tests/test_bond.py | 1 + cubids/tests/utils.py | 1 + cubids/utils.py | 19 +- cubids/validator.py | 53 ++++- cubids/workflows.py | 153 +++++++++----- pyproject.toml | 13 +- 16 files changed, 805 insertions(+), 273 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index 81d4937b1..eb67a8e3e 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -4,14 +4,18 @@ Credits Development Lead ---------------- -* Sydney Covitz +* Taylor Salo Contributors ------------ -* Matt Cieslak - -* Tinashe Tapera +* Matt Cieslak Principal Investigator ---------------------- -* Theodore Satterthwaite +* Theodore Satterthwaite + +Previous Contributors +--------------------- +* Sydney Covitz (previous development lead) + +* Tinashe Tapera diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index e4f95ef1a..65a6e2ec6 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -4,8 +4,8 @@ Contributing ============ -Contributions are welcome, and they are greatly appreciated! Every little bit -helps, and credit will always be given. +Contributions are welcome, and they are greatly appreciated! +Every little bit helps, and credit will always be given. You can contribute in many ways: @@ -26,21 +26,20 @@ If you are reporting a bug, please include: Fix Bugs ~~~~~~~~ -Look through the GitHub issues for bugs. Anything tagged with "bug" and "help -wanted" is open to whoever wants to implement it. +Look through the GitHub issues for bugs. +Anything tagged with "bug" and "help wanted" is open to whoever wants to implement it. Implement Features ~~~~~~~~~~~~~~~~~~ -Look through the GitHub issues for features. Anything tagged with "enhancement" -and "help wanted" is open to whoever wants to implement it. +Look through the GitHub issues for features. +Anything tagged with "enhancement" and "help wanted" is open to whoever wants to implement it. Write Documentation ~~~~~~~~~~~~~~~~~~~ -CuBIDS could always use more documentation, whether as part of the -official CuBIDS docs, in docstrings, or even on the web in blog posts, -articles, and such. +CuBIDS could always use more documentation, whether as part of the official CuBIDS docs, +in docstrings, or even on the web in blog posts, articles, and such. Submit Feedback ~~~~~~~~~~~~~~~ @@ -59,60 +58,62 @@ Get Started! Ready to contribute? Here's how to set up `cubids` for local development. -1. Fork the `cubids` repo on GitHub. -2. Clone your fork locally:: +1. Fork the `cubids` repo on GitHub. +2. Clone your fork locally:: $ git clone git@github.com:your_name_here/cubids.git -3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development:: +3. Install your local copy into a virtualenv. + Assuming you have virtualenvwrapper installed, + this is how you set up your fork for local development:: $ mkvirtualenv cubids $ cd cubids/ $ python setup.py develop -4. Create a branch for local development:: +4. Create a branch for local development:: $ git checkout -b name-of-your-bugfix-or-feature Now you can make your changes locally. -5. When you're done making changes, check that your changes pass flake8 and the - tests, including testing other Python versions with tox:: +5. When you're done making changes, check that your changes pass flake8 and the + tests, including testing other Python versions with tox:: $ flake8 cubids tests $ python setup.py test or pytest $ tox - To get flake8 and tox, just pip install them into your virtualenv. + To get flake8 and tox, just pip install them into your virtualenv. -6. Commit your changes and push your branch to GitHub:: +6. Commit your changes and push your branch to GitHub:: $ git add . $ git commit -m "Your detailed description of your changes." $ git push origin name-of-your-bugfix-or-feature -7. Submit a pull request through the GitHub website. +7. Submit a pull request through the GitHub website. Pull Request Guidelines ----------------------- Before you submit a pull request, check that it meets these guidelines: -1. The pull request should include tests. -2. If the pull request adds functionality, the docs should be updated. Put - your new functionality into a function with a docstring, and add the - feature to the list in README.rst. -3. The pull request should work for Python 3.5, 3.6, 3.7 and 3.8, and for PyPy. Check - https://circleci.com/gh/PennLINC/CuBIDS - and make sure that the tests pass for all supported Python versions. +1. The pull request should include tests. +2. If the pull request adds functionality, the docs should be updated. Put + your new functionality into a function with a docstring, and add the + feature to the list in README.rst. +3. The pull request should work for Python 3.5, 3.6, 3.7 and 3.8, and for PyPy. + Check https://circleci.com/gh/PennLINC/CuBIDS + and make sure that the tests pass for all supported Python versions. Tips ---- To run a subset of tests:: -$ cd PATH/TO/LOCAL/CuBIDS/CLONE -$ py.test -sv --pdb tests + $ cd PATH/TO/LOCAL/CuBIDS/CLONE + $ py.test -sv --pdb tests Deploying @@ -122,8 +123,8 @@ A reminder for the maintainers on how to deploy. Make sure all your changes are committed (including an entry in HISTORY.rst). Then run:: -$ bump2version patch # possible: major / minor / patch -$ git push -$ git push --tags + $ bump2version patch # possible: major / minor / patch + $ git push + $ git push --tags CircleCI will then deploy to PyPI if tests pass. diff --git a/HISTORY.rst b/HISTORY.rst index 0d1123c89..03ec01d9f 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,6 +2,29 @@ History ======= +1.0.2 (2023-09-07) +------------------ + +* Add image orientation by @scovitz in https://github.com/PennLINC/CuBIDS/pull/205 +* review feedback milestone: adding code/CuBIDS option and converting CSVs to TSVs by @scovitz in https://github.com/PennLINC/CuBIDS/pull/217 +* Reviewer feedback incorporated into docs and pybids layout update by @scovitz in https://github.com/PennLINC/CuBIDS/pull/227 +* Data dictionaries by @scovitz in https://github.com/PennLINC/CuBIDS/pull/230 +* No index metadata by @scovitz in https://github.com/PennLINC/CuBIDS/pull/231 +* updated _update_json to no longer use pybids by @scovitz in https://github.com/PennLINC/CuBIDS/pull/232 +* Minor tune ups: codespell'ing (fixes + tox + CI (github actions)), remove of unintended to be committed 2 files by @yarikoptic in https://github.com/PennLINC/CuBIDS/pull/239 +* ENH: Make "NumVolumes" an integer for 3D images by @cookpa in https://github.com/PennLINC/CuBIDS/pull/211 +* adding note about fmap renamekeygroups by @megardn in https://github.com/PennLINC/CuBIDS/pull/140 +* Update usage.rst by @megardn in https://github.com/PennLINC/CuBIDS/pull/138 +* printing erroneous jsons and only rounding float parameters by @scovitz in https://github.com/PennLINC/CuBIDS/pull/257 + +New Contributors +````````````````` +* @yarikoptic made their first contribution in https://github.com/PennLINC/CuBIDS/pull/239 +* @cookpa made their first contribution in https://github.com/PennLINC/CuBIDS/pull/211 +* @megardn made their first contribution in https://github.com/PennLINC/CuBIDS/pull/140 + +**Full Changelog**: https://github.com/PennLINC/CuBIDS/compare/v1.0.1...1.0.2 + 0.1.0 (2020-10-07) ------------------ diff --git a/README.rst b/README.rst index 36a2d0167..db0440b24 100644 --- a/README.rst +++ b/README.rst @@ -18,16 +18,18 @@ About Curation of BIDS, or ``CuBIDS``, is a workflow and software package designed to facilitate reproducible curation of neuroimaging `BIDS `_ datasets. -CuBIDS breaks down BIDS dataset curation into four main components and addresses each one using -various command line programs complete with version control capabilities. These components are not necessarily linear but all are critical -in the process of preparing BIDS data for successful preprocessing and analysis pipeline runs. +CuBIDS breaks down BIDS dataset curation into four main components and addresses each one using +various command line programs complete with version control capabilities. +These components are not necessarily linear but all are critical +in the process of preparing BIDS data for successful preprocessing and analysis pipeline runs. 1. CuBIDS facilitates the validation of BIDS data. - 2. CuBIDS visualizes and summarizes the heterogeneity in a BIDS dataset. + 2. CuBIDS visualizes and summarizes the heterogeneity in a BIDS dataset. 3. CuBIDS helps users test pipelines on the entire parameter space of a BIDS dataset. 4. CuBIDS allows users to perform metadata-based quality control on their BIDS data. .. image:: https://github.com/PennLINC/CuBIDS/raw/main/docs/_static/cubids_workflow.png :width: 600 -For full documentation, please visit our `ReadTheDocs `_ \ No newline at end of file +For full documentation, please visit our +`ReadTheDocs `_ \ No newline at end of file diff --git a/cubids/cli.py b/cubids/cli.py index a55cecb0d..6fde0885e 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -1,8 +1,10 @@ """Console script for cubids.""" + import argparse import logging import os import warnings +from functools import partial from pathlib import Path from cubids import workflows @@ -14,14 +16,31 @@ logging.getLogger("datalad").setLevel(logging.ERROR) +def _path_exists(path, parser): + """Ensure a given path exists.""" + if path is None or not Path(path).exists(): + raise parser.error(f"Path does not exist: <{path}>.") + return Path(path).absolute() + + +def _is_file(path, parser): + """Ensure a given path exists and it is a file.""" + path = _path_exists(path, parser) + if not path.is_file(): + raise parser.error(f"Path should point to a file (or symlink of file): <{path}>.") + return path + + def _parse_validate(): parser = argparse.ArgumentParser( description="cubids-validate: Wrapper around the official BIDS Validator", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "the root of a BIDS dataset. It should contain " @@ -93,12 +112,19 @@ def _parse_bids_sidecar_merge(): description=("bids-sidecar-merge: merge critical keys from one sidecar to another"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) - parser.add_argument("from_json", type=Path, action="store", help="Source json file.") + IsFile = partial(_is_file, parser=parser) + + parser.add_argument( + "from_json", + type=IsFile, + action="store", + help="Source json file.", + ) parser.add_argument( "to_json", - type=Path, + type=IsFile, action="store", - help=("destination json. This file will have data from `from_json` copied into it."), + help="destination json. This file will have data from `from_json` copied into it.", ) return parser @@ -120,9 +146,11 @@ def _parse_group(): description="cubids-group: find key and parameter groups in BIDS", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "the root of a BIDS dataset. It should contain " @@ -156,7 +184,14 @@ def _parse_group(): help=("Level at which acquisition groups are created options: 'subject' or 'session'"), ) parser.add_argument( - "--config", action="store", type=Path, help="path to a config file for grouping" + "--config", + action="store", + type=PathExists, + default=None, + help=( + "Path to a config file for grouping. " + "If not provided, then the default config file from CuBIDS will be used." + ), ) return parser @@ -177,9 +212,12 @@ def _parse_apply(): description=("cubids-apply: apply the changes specified in a tsv to a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + IsFile = partial(_is_file, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "the root of a BIDS dataset. It should contain " @@ -188,7 +226,7 @@ def _parse_apply(): ) parser.add_argument( "edited_summary_tsv", - type=Path, + type=IsFile, action="store", help=( "path to the _summary.tsv that has been edited " @@ -200,7 +238,7 @@ def _parse_apply(): ) parser.add_argument( "files_tsv", - type=Path, + type=IsFile, action="store", help=( "path to the _files.tsv that has been edited " @@ -226,6 +264,7 @@ def _parse_apply(): parser.add_argument( "--use-datalad", action="store_true", + default=False, help="ensure that there are no untracked changes before finding groups", ) parser.add_argument( @@ -241,7 +280,14 @@ def _parse_apply(): help=("Level at which acquisition groups are created options: 'subject' or 'session'"), ) parser.add_argument( - "--config", action="store", type=Path, help="path to a config file for grouping" + "--config", + action="store", + type=IsFile, + default=None, + help=( + "Path to a config file for grouping. " + "If not provided, then the default config file from CuBIDS will be used." + ), ) return parser @@ -263,16 +309,22 @@ def _parse_datalad_save(): description=("cubids-datalad-save: perform a DataLad save on a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "the root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) - parser.add_argument("-m", action="store", help="message for this commit") + parser.add_argument( + "-m", + action="store", + help="message for this commit", + ) parser.add_argument( "--container", action="store", @@ -299,9 +351,11 @@ def _parse_undo(): description="cubids-undo: revert most recent commit", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "the root of a BIDS dataset. It should contain " @@ -336,41 +390,56 @@ def _parse_copy_exemplars(): ), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + IsFile = partial(_is_file, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", - help="path to the root of a BIDS dataset. " - "It should contain sub-X directories and " - "dataset_description.json.", + help=( + "path to the root of a BIDS dataset. " + "It should contain sub-X directories and " + "dataset_description.json." + ), ) parser.add_argument( "exemplars_dir", - type=Path, + type=PathExists, action="store", - help="absolute path to the root of a BIDS dataset " - "containing one subject from each Acquisition Group. " - "It should contain sub-X directories and " - "dataset_description.json.", + help=( + "absolute path to the root of a BIDS dataset " + "containing one subject from each Acquisition Group. " + "It should contain sub-X directories and " + "dataset_description.json." + ), ) parser.add_argument( "exemplars_tsv", - type=Path, + type=IsFile, action="store", - help="absolute path to the .tsv file that lists one " - "subject from each Acquisition Group " - "(*_AcqGrouping.tsv from the cubids-group output)", + help=( + "absolute path to the .tsv file that lists one " + "subject from each Acquisition Group " + "(*_AcqGrouping.tsv from the cubids-group output)" + ), ) parser.add_argument( - "--use-datalad", action="store_true", help="check exemplar dataset into DataLad" + "--use-datalad", + action="store_true", + default=False, + help="check exemplar dataset into DataLad", ) parser.add_argument( "--min-group-size", action="store", default=1, - help="minimum number of subjects an Acquisition Group " - "must have in order to be included in the exemplar " - "dataset ", + type=int, + help=( + "minimum number of subjects an Acquisition Group " + "must have in order to be included in the exemplar " + "dataset " + ), required=False, ) # parser.add_argument('--include-groups', @@ -408,9 +477,11 @@ def _parse_add_nifti_info(): ), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "absolute path to the root of a BIDS dataset. " @@ -421,11 +492,13 @@ def _parse_add_nifti_info(): parser.add_argument( "--use-datalad", action="store_true", + default=False, help="ensure that there are no untracked changes before finding groups", ) parser.add_argument( "--force-unlock", action="store_true", + default=False, help="unlock dataset before adding nifti info ", ) parser.add_argument( @@ -453,9 +526,12 @@ def _parse_purge(): description="cubids-purge: purge associations from the dataset", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + IsFile = partial(_is_file, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "path to the root of a BIDS dataset. " @@ -465,13 +541,14 @@ def _parse_purge(): ) parser.add_argument( "scans", - type=Path, + type=IsFile, action="store", help="path to the txt file of scans whose associations should be purged.", ) parser.add_argument( "--use-datalad", action="store_true", + default=False, help="ensure that there are no untracked changes before finding groups", ) parser.add_argument( @@ -498,9 +575,11 @@ def _parse_remove_metadata_fields(): description="cubids-remove-metadata-fields: delete fields from metadata", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "the root of a BIDS dataset. It should contain " @@ -524,6 +603,7 @@ def _parse_remove_metadata_fields(): def _enter_remove_metadata_fields(argv=None): + """Set entrypoint for "cubids-remove-metadata-fields" CLI.""" warnings.warn( "cubids-remove-metadata-fields is deprecated and will be removed in the future. " "Please use cubids remove-metadata-fields.", @@ -536,13 +616,16 @@ def _enter_remove_metadata_fields(argv=None): def _parse_print_metadata_fields(): + """Create the parser for the "cubids print-metadata-fields" command.""" parser = argparse.ArgumentParser( description="cubids-print-metadata-fields: print all unique metadata fields", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + PathExists = partial(_path_exists, parser=parser) + parser.add_argument( "bids_dir", - type=Path, + type=PathExists, action="store", help=( "the root of a BIDS dataset. It should contain " diff --git a/cubids/config.py b/cubids/config.py index 59fd5a157..5d4948336 100644 --- a/cubids/config.py +++ b/cubids/config.py @@ -1,6 +1,4 @@ -""" -Functions for configuring CuBIDS -""" +"""Functions for configuring CuBIDS.""" from pathlib import Path @@ -9,8 +7,7 @@ def load_config(config_file): - """Loads a YAML file containing a configuration for param groups.""" - + """Load a YAML file containing a configuration for param groups.""" if config_file is None: config_file = Path(pkgrf("cubids", "data/config.yml")) diff --git a/cubids/constants.py b/cubids/constants.py index 21c8982cd..ec24b6691 100644 --- a/cubids/constants.py +++ b/cubids/constants.py @@ -1,7 +1,14 @@ """Constants for CuBIDS.""" + +# Names of identifier variables. +# Used to place KeyGroup and ParamGroup at the beginning of a dataframe, +# but both are hardcoded in the relevant function. ID_VARS = set(["KeyGroup", "ParamGroup", "FilePath"]) +# Entities that should not be used to group parameter sets NON_KEY_ENTITIES = set(["subject", "session", "extension"]) -# Multi-dimensional keys SliceTiming +# Multi-dimensional keys SliceTiming XXX: what is this line about? +# List of metadata fields and parameters (calculated by CuBIDS) +# Not sure what this specific list is used for. IMAGING_PARAMS = set( [ "ParallelReductionFactorInPlane", diff --git a/cubids/cubids.py b/cubids/cubids.py index 842215e0c..9195163aa 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -1,4 +1,5 @@ """Main module.""" + import csv import json import os @@ -31,7 +32,58 @@ class CuBIDS(object): """The main CuBIDS class. - TODO: Complete docstring. + Parameters + ---------- + data_root : :obj:`str` + Path to the root of the BIDS dataset. + use_datalad : :obj:`bool`, optional + If True, use datalad to track changes to the BIDS dataset. + Default is False. + acq_group_level : :obj:`str`, optional + The level at which to group scans. Default is "subject". + grouping_config : :obj:`str`, optional + Path to the grouping config file. + Default is None, in which case the default config in CuBIDS is used. + force_unlock : :obj:`bool`, optional + If True, force unlock all files in the BIDS dataset. + Default is False. + + Attributes + ---------- + path : :obj:`str` + Path to the root of the BIDS dataset. + _layout : :obj:`bids.layout.BIDSLayout` + The BIDSLayout object. + keys_files : :obj:`dict` + A dictionary of key groups and the files that belong to them. + fieldmaps_cached : :obj:`bool` + If True, the fieldmaps have been cached. + datalad_ready : :obj:`bool` + If True, the datalad dataset has been initialized. + datalad_handle : :obj:`datalad.api.Dataset` + The datalad dataset handle. + old_filenames : :obj:`list` + A list of old filenames. + new_filenames : :obj:`list` + A list of new filenames. + IF_rename_paths : :obj:`list` + A list of IntendedFor paths that have been renamed. + grouping_config : :obj:`dict` + The grouping config dictionary. + acq_group_level : :obj:`str` + The level at which to group scans. + scans_txt : :obj:`str` + Path to the .txt file that lists the scans + you want to be deleted from the dataset, along + with their associations. + force_unlock : :obj:`bool` + If True, force unlock all files in the BIDS dataset. + cubids_code_dir : :obj:`bool` + If True, the CuBIDS code directory exists. + data_dict : :obj:`dict` + A data dictionary for TSV outputs. + use_datalad : :obj:`bool` + If True, use datalad to track changes to the BIDS dataset. """ def __init__( @@ -68,7 +120,7 @@ def __init__( def layout(self): """Return the BIDSLayout object. - TODO: Complete docstring. + If the BIDSLayout object has not been created, create it. """ if self._layout is None: # print("SETTING LAYOUT OBJECT") @@ -79,7 +131,12 @@ def layout(self): def reset_bids_layout(self, validate=False): """Reset the BIDS layout. - TODO: Complete docstring. + This sets the ``_layout`` attribute to a new :obj:`bids.layout.BIDSLayout` object. + + Parameters + ---------- + validate : :obj:`bool`, optional + If True, validate the BIDS dataset. Default is False. """ # create BIDS Layout Indexer class @@ -99,7 +156,16 @@ def reset_bids_layout(self, validate=False): def create_cubids_code_dir(self): """Create CuBIDS code directory. - TODO: Complete docstring. + This creates the CuBIDS code directory at self.path/code/CuBIDS. + + Returns + ------- + :obj:`str` + Path to the CuBIDS code directory. + + Notes + ----- + Why not use ``os.makedirs``? """ # check if BIDS_ROOT/code/CuBIDS exists if not self.cubids_code_dir: @@ -109,7 +175,12 @@ def create_cubids_code_dir(self): return self.cubids_code_dir def init_datalad(self): - """Initialize a datalad Dataset at self.path.""" + """Initialize a datalad Dataset at self.path. + + This creates a datalad dataset at self.path and sets the + ``datalad_ready`` attribute to True. + It also sets the ``datalad_handle`` attribute to the datalad.Dataset object. + """ self.datalad_ready = True self.datalad_handle = dlapi.Dataset(self.path) @@ -138,7 +209,18 @@ def datalad_save(self, message=None): raise Exception("Failed to save in DataLad") def is_datalad_clean(self): - """If True, no changes are detected in the datalad dataset.""" + """If True, no changes are detected in the datalad dataset. + + Returns + ------- + :obj:`bool` + True if the datalad dataset is clean, False otherwise. + + Raises + ------ + Exception + If datalad has not been initialized. + """ if not self.datalad_ready: raise Exception("Datalad not initialized, can't determine status") statuses = set([status["state"] for status in self.datalad_handle.status()]) @@ -148,6 +230,11 @@ def datalad_undo_last_commit(self): """Revert the most recent commit, remove it from history. Uses git reset --hard to revert to the previous commit. + + Raises + ------ + Exception + If there are untracked changes in the datalad dataset. """ if not self.is_datalad_clean(): raise Exception("Untracked changes present. Run clear_untracked_changes first") @@ -166,12 +253,14 @@ def add_nifti_info(self): # ignore all dot directories if "/." in str(path): continue + if str(path).endswith(".nii") or str(path).endswith(".nii.gz"): try: img = nb.load(str(path)) except Exception: print("Empty Nifti File: ", str(path)) continue + # get important info from niftis obliquity = np.any(nb.affines.obliquity(img.affine) > 1e-4) voxel_sizes = img.header.get_zooms() @@ -208,11 +297,13 @@ def add_nifti_info(self): orient = nb.orientations.aff2axcodes(img.affine) joined = "".join(orient) + "+" data["ImageOrientation"] = joined + with open(sidecar, "w") as file: json.dump(data, file, indent=4) if self.use_datalad: self.datalad_save(message="Added nifti info to sidecars") + self.reset_bids_layout() def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=True): @@ -224,10 +315,14 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T Parameters ---------- - summary_tsv - files_tsv - new_prefix + summary_tsv : :obj:`str` + Path to the edited summary tsv file. + files_tsv : :obj:`str` + Path to the edited files tsv file. + new_prefix : :obj:`str` + Path prefix to the new tsv files. raise_on_error : :obj:`bool` + If True, raise an error if the MergeInto column contains invalid merges. """ # reset lists of old and new filenames self.old_filenames = [] @@ -319,16 +414,13 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T full_cmd = "\n".join(merge_commands + move_ops) if full_cmd: - # write full_cmd to a .sh file - # Open file for writing - fileObject = open(new_prefix + "_full_cmd.sh", "w") - fileObject.write("#!/bin/bash\n") - fileObject.write(full_cmd) - # Close the file - fileObject.close() - renames = new_prefix + "_full_cmd.sh" + # write full_cmd to a .sh file + with open(renames, "w") as fo: + fo.write("#!/bin/bash\n") + fo.write(full_cmd) + if self.use_datalad: # first check if IntendedFor renames need to be saved if not self.is_datalad_clean(): @@ -366,19 +458,22 @@ def change_filename(self, filepath, entities): Parameters ---------- - filepath : str - Path prefix to a file in the affected key group change - entities : dictionary - A pybids dictionary of entities parsed from the new key - group name. + filepath : :obj:`str` + Path prefix to a file in the affected key group change. + entities : :obj:`dict` + A pybids dictionary of entities parsed from the new key group name. + + Notes + ----- + This is the function I need to spend the most time on, since it has entities hardcoded. """ exts = Path(filepath).suffixes - old_ext = "" - for ext in exts: - old_ext += ext + old_ext = "".join(exts) suffix = entities["suffix"] entity_file_keys = [] + + # Entities that may be in the filename? file_keys = ["task", "acquisition", "direction", "reconstruction", "run"] for key in file_keys: @@ -390,6 +485,7 @@ def change_filename(self, filepath, entities): sub_ses = sub + "_" + ses if "run" in list(entities.keys()) and "run-0" in filepath: + # XXX: This adds an extra leading zero to run. entities["run"] = "0" + str(entities["run"]) filename = "_".join([f"{key}-{entities[key]}" for key in entity_file_keys]) @@ -401,28 +497,31 @@ def change_filename(self, filepath, entities): if len(filename) > 0: filename = sub_ses + "_" + filename + "_" + suffix + old_ext else: - filename = sub_ses + filename + "_" + suffix + old_ext + raise ValueError(f"Could not construct new filename for {filepath}") # CHECK TO SEE IF DATATYPE CHANGED + # datatype may be overridden/changed if the original file is located in the wrong folder. dtypes = ["anat", "func", "perf", "fmap", "dwi"] - old = "" + dtype_orig = "" for dtype in dtypes: if dtype in filepath: - old = dtype + dtype_orig = dtype if "datatype" in entities.keys(): - dtype = entities["datatype"] - if entities["datatype"] != old: + dtype_new = entities["datatype"] + if entities["datatype"] != dtype_orig: print("WARNING: DATATYPE CHANGE DETECETD") else: - dtype = old - new_path = str(self.path) + "/" + sub + "/" + ses + "/" + dtype + "/" + filename + dtype_new = dtype_orig + + # Construct the new filename + new_path = str(self.path) + "/" + sub + "/" + ses + "/" + dtype_new + "/" + filename - # add the scan path + new path to the lists of old, new filenames + # Add the scan path + new path to the lists of old, new filenames self.old_filenames.append(filepath) self.new_filenames.append(new_path) - # NOW NEED TO RENAME ASSOCIATIONS + # NOW NEED TO RENAME ASSOCIATED FILES # bids_file = self.layout.get_file(filepath) bids_file = filepath # associations = bids_file.get_associations() @@ -439,34 +538,35 @@ def change_filename(self, filepath, entities): self.new_filenames.append(new_ext_path) # MAKE SURE THESE AREN'T COVERED BY get_associations!!! + # Update DWI-specific files if "/dwi/" in filepath: # add the bval and bvec if there - if ( - Path(img_to_new_ext(filepath, ".bval")).exists() - and img_to_new_ext(filepath, ".bval") not in self.old_filenames - ): - self.old_filenames.append(img_to_new_ext(filepath, ".bval")) - self.new_filenames.append(img_to_new_ext(new_path, ".bval")) - - if ( - Path(img_to_new_ext(filepath, ".bvec")).exists() - and img_to_new_ext(filepath, ".bvec") not in self.old_filenames - ): - self.old_filenames.append(img_to_new_ext(filepath, ".bvec")) - self.new_filenames.append(img_to_new_ext(new_path, ".bvec")) - + bval_old = img_to_new_ext(filepath, ".bval") + bval_new = img_to_new_ext(new_path, ".bval") + if Path(bval_old).exists() and bval_old not in self.old_filenames: + self.old_filenames.append(bval_old) + self.new_filenames.append(bval_new) + + bvec_old = img_to_new_ext(filepath, ".bvec") + bvec_new = img_to_new_ext(new_path, ".bvec") + if Path(bvec_old).exists() and bvec_old not in self.old_filenames: + self.old_filenames.append(bvec_old) + self.new_filenames.append(bvec_new) + + # Update func-specific files # now rename _events and _physio files! old_suffix = parse_file_entities(filepath)["suffix"] scan_end = "_" + old_suffix + old_ext if "_task-" in filepath: old_events = filepath.replace(scan_end, "_events.tsv") - old_ejson = filepath.replace(scan_end, "_events.json") if Path(old_events).exists(): self.old_filenames.append(old_events) new_scan_end = "_" + suffix + old_ext new_events = new_path.replace(new_scan_end, "_events.tsv") self.new_filenames.append(new_events) + + old_ejson = filepath.replace(scan_end, "_events.json") if Path(old_ejson).exists(): self.old_filenames.append(old_ejson) new_scan_end = "_" + suffix + old_ext @@ -480,37 +580,63 @@ def change_filename(self, filepath, entities): new_physio = new_path.replace(new_scan_end, "_physio.tsv.gz") self.new_filenames.append(new_physio) + # Update ASL-specific files + if "/perf/" in filepath: + old_context = filepath.replace(scan_end, "_aslcontext.tsv") + if Path(old_context).exists(): + self.old_filenames.append(old_context) + new_scan_end = "_" + suffix + old_ext + new_context = new_path.replace(new_scan_end, "_aslcontext.tsv") + self.new_filenames.append(new_context) + + old_m0scan = filepath.replace(scan_end, "_m0scan.nii.gz") + if Path(old_m0scan).exists(): + self.old_filenames.append(old_m0scan) + new_scan_end = "_" + suffix + old_ext + new_m0scan = new_path.replace(new_scan_end, "_m0scan.nii.gz") + self.new_filenames.append(new_m0scan) + + old_mjson = filepath.replace(scan_end, "_m0scan.json") + if Path(old_mjson).exists(): + self.old_filenames.append(old_mjson) + new_scan_end = "_" + suffix + old_ext + new_mjson = new_path.replace(new_scan_end, "_m0scan.json") + self.new_filenames.append(new_mjson) + + old_labeling = filepath.replace(scan_end, "_asllabeling.jpg") + if Path(old_labeling).exists(): + self.old_filenames.append(old_labeling) + new_scan_end = "_" + suffix + old_ext + new_labeling = new_path.replace(new_scan_end, "_asllabeling.jpg") + self.new_filenames.append(new_labeling) + # RENAME INTENDED FORS! ses_path = self.path + "/" + sub + "/" + ses - for path in Path(ses_path).rglob("fmap/*.json"): - self.IF_rename_paths.append(str(path)) - # json_file = self.layout.get_file(str(path)) + files_with_if = [] + files_with_if += Path(ses_path).rglob("fmap/*.json") + files_with_if += Path(ses_path).rglob("perf/*_m0scan.json") + for path_with_if in files_with_if: + filename_with_if = str(path_with_if) + self.IF_rename_paths.append(filename_with_if) + # json_file = self.layout.get_file(filename_with_if) # data = json_file.get_dict() - data = get_sidecar_metadata(str(path)) + data = get_sidecar_metadata(filename_with_if) if data == "Erroneous sidecar": - print("Error parsing sidecar: ", str(path)) + print("Error parsing sidecar: ", filename_with_if) continue if "IntendedFor" in data.keys(): - # check if IntendedFor field is a str or list - if isinstance(data["IntendedFor"], str): - if data["IntendedFor"] == _get_intended_for_reference(filepath): - # replace old filename with new one (overwrite string) - data["IntendedFor"] = _get_intended_for_reference(new_path) - - # update the json with the new data dictionary - _update_json(str(path), data) - - if isinstance(data["IntendedFor"], list): - for item in data["IntendedFor"]: - if item in _get_intended_for_reference(filepath): - # remove old filename - data["IntendedFor"].remove(item) - # add new filename - data["IntendedFor"].append(_get_intended_for_reference(new_path)) - - # update the json with the new data dictionary - _update_json(str(path), data) + # Coerce IntendedFor to a list. + data["IntendedFor"] = listify(data["IntendedFor"]) + for item in data["IntendedFor"]: + if item in _get_intended_for_reference(filepath): + # remove old filename + data["IntendedFor"].remove(item) + # add new filename + data["IntendedFor"].append(_get_intended_for_reference(new_path)) + + # update the json with the new data dictionary + _update_json(filename_with_if, data) # save IntendedFor purges so that you can datalad run the # remove association file commands on a clean dataset @@ -529,16 +655,18 @@ def copy_exemplars(self, exemplars_dir, exemplars_tsv, min_group_size): Parameters ---------- - exemplars_dir : str + exemplars_dir : :obj:`str` path to the directory that will contain one subject from each Acqusition Group (*_AcqGrouping.tsv) example path: /Users/Covitz/tsvs/CCNP_Acq_Groups/ - exemplars_tsv : str + exemplars_tsv : :obj:`str` path to the .tsv file that lists one subject from each Acqusition Group (*_AcqGrouping.tsv from the cubids-group output) example path: /Users/Covitz/tsvs/CCNP_Acq_Grouping.tsv - min_group_size + min_group_size : :obj:`int` + Minimum number of subjects in an acq group for it to be included + in the exemplar dataset. """ # create the exemplar ds if self.use_datalad: @@ -558,11 +686,11 @@ def copy_exemplars(self, exemplars_dir, exemplars_tsv, min_group_size): subs = pd.read_table(exemplars_tsv) # if min group size flag set, drop acq groups with less than min - if int(min_group_size) > 1: + if min_group_size > 1: for row in range(len(subs)): acq_group = subs.loc[row, "AcqGroup"] size = int(subs["AcqGroup"].value_counts()[acq_group]) - if size < int(min_group_size): + if size < min_group_size: subs = subs.drop([row]) # get one sub from each acq group @@ -612,7 +740,13 @@ def purge(self, scans_txt): self._purge_associations(scans) def _purge_associations(self, scans): - """Purge field map JSONs' IntendedFor references.""" + """Purge field map JSONs' IntendedFor references. + + Parameters + ---------- + scans : :obj:`list` of :obj:`str` + List of file paths to remove from field map JSONs. + """ # truncate all paths to intendedfor reference format # sub, ses, modality only (no self.path) if_scans = [] @@ -629,20 +763,14 @@ def _purge_associations(self, scans): # remove scan references in the IntendedFor if "IntendedFor" in data.keys(): - # check if IntendedFor field value is a list or a string - if isinstance(data["IntendedFor"], str): - if data["IntendedFor"] in if_scans: - data["IntendedFor"] = [] - # update the json with the new data dictionary - _update_json(str(path), data) + data["IntendedFor"] = listify(data["IntendedFor"]) - if isinstance(data["IntendedFor"], list): - for item in data["IntendedFor"]: - if item in if_scans: - data["IntendedFor"].remove(item) + for item in data["IntendedFor"]: + if item in if_scans: + data["IntendedFor"].remove(item) - # update the json with the new data dictionary - _update_json(str(path), data) + # update the json with the new data dictionary + _update_json(str(path), data) # save IntendedFor purges so that you can datalad run the # remove association file commands on a clean dataset @@ -675,6 +803,7 @@ def _purge_associations(self, scans): to_remove.append(img_to_new_ext(str(path), ".bval")) if Path(img_to_new_ext(str(path), ".bvec")).exists(): to_remove.append(img_to_new_ext(str(path), ".bvec")) + if "/func/" in str(path): # add tsvs tsv = img_to_new_ext(str(path), ".tsv").replace("_bold", "_events") @@ -683,6 +812,7 @@ def _purge_associations(self, scans): # add tsv json (if exists) if Path(tsv.replace(".tsv", ".json")).exists(): to_remove.append(tsv.replace(".tsv", ".json")) + to_remove += scans # create rm commands for all files that need to be purged @@ -699,11 +829,10 @@ def _purge_associations(self, scans): path_prefix = str(Path(self.path).parent) - fileObject = open(path_prefix + "/" + "_full_cmd.sh", "w") - fileObject.write("#!/bin/bash\n") - fileObject.write(full_cmd) - # Close the file - fileObject.close() + with open(path_prefix + "/" + "_full_cmd.sh", "w") as fo: + fo.write("#!/bin/bash\n") + fo.write(full_cmd) + if self.scans_txt: cmt = f"Purged scans listed in {self.scans_txt} from dataset" else: @@ -725,13 +854,18 @@ def _purge_associations(self, scans): print("Not running any association removals") def get_nifti_associations(self, nifti): - """Get nifti associations.""" + """Get nifti associations. + + This uses globbing to find files with the same path, entities, and suffix as the NIfTI, + but with a different extension. + """ # get all assocation files of a nifti image no_ext_file = str(nifti).split("/")[-1].split(".")[0] associations = [] - for path in Path(self.path).rglob("sub-*/**/*.*"): - if no_ext_file in str(path) and ".nii.gz" not in str(path): + for path in Path(self.path).rglob(f"sub-*/**/{no_ext_file}.*"): + if ".nii.gz" not in str(path): associations.append(str(path)) + return associations def _cache_fieldmaps(self): @@ -809,6 +943,7 @@ def get_param_groups_from_key_group(self, key_group): for mod in modalities: if mod in filepath: modality = mod.replace("/", "").replace("/", "") + if modality == "": print("Unusual Modality Detected") modality = "other" @@ -1125,20 +1260,20 @@ def get_tsvs(self, path_prefix): summary_dict = self.get_data_dictionary(summary) # Save data dictionaires as JSONs - with open(path_prefix + "_files.json", "w") as outfile: + with open(f"{path_prefix}_files.json", "w") as outfile: json.dump(files_dict, outfile, indent=4) - with open(path_prefix + "_summary.json", "w") as outfile: + with open(f"{path_prefix}_summary.json", "w") as outfile: json.dump(summary_dict, outfile, indent=4) - big_df.to_csv(path_prefix + "_files.tsv", sep="\t", index=False) + big_df.to_csv(f"{path_prefix}_files.tsv", sep="\t", index=False) - summary.to_csv(path_prefix + "_summary.tsv", sep="\t", index=False) + summary.to_csv(f"{path_prefix}_summary.tsv", sep="\t", index=False) # Calculate the acq groups - group_by_acquisition_sets(path_prefix + "_files.tsv", path_prefix, self.acq_group_level) + group_by_acquisition_sets(f"{path_prefix}_files.tsv", path_prefix, self.acq_group_level) - print("CuBIDS detected " + str(len(summary)) + " Parameter Groups.") + print(f"CuBIDS detected {len(summary)} Parameter Groups.") def get_key_groups(self): """Identify the key groups for the bids dataset.""" @@ -1215,6 +1350,7 @@ def remove_metadata_fields(self, fields_to_remove): if ".git" not in str(json_file): with open(json_file, "r") as jsonr: metadata = json.load(jsonr) + offending_keys = remove_fields.intersection(metadata.keys()) # Quit if there are none in there if not offending_keys: @@ -1294,21 +1430,21 @@ def _get_param_groups( Parameters ---------- - files : list + files : :obj:`list` of :obj:`str` List of file names - fieldmap_lookup : defaultdict + fieldmap_lookup : :obj:`dict` mapping of filename strings relative to the bids root (e.g. "sub-X/ses-Y/func/sub-X_ses-Y_task-rest_bold.nii.gz") - grouping_config : dict + grouping_config : :obj:`dict` configuration for defining parameter groups Returns ------- - labeled_files : pd.DataFrame + labeled_files : :obj:`pandas.DataFrame` A data frame with one row per file where the ParamGroup column indicates which group each scan is a part of. - param_groups_with_counts : pd.DataFrame - A data frame with param group summaries + param_groups_with_counts : :obj:`pandas.DataFrame` + A data frame with param group summaries. """ if not files: print("WARNING: no files for", key_group_name) @@ -1393,7 +1529,7 @@ def _get_param_groups( # get the subset of columns to drop duplicates by check_cols = [] for col in list(df.columns): - if "Cluster_" + col not in list(df.columns) and col != "FilePath": + if f"Cluster_{col}" not in list(df.columns) and col != "FilePath": check_cols.append(col) # Find the unique ParamGroups and assign ID numbers in "ParamGroup"\ @@ -1446,13 +1582,14 @@ def _get_param_groups( def round_params(param_group_df, config, modality): - """Round parameters.""" + """Round columns' values in DataFrame according to requested precision.""" to_format = config["sidecar_params"][modality] to_format.update(config["derived_params"][modality]) for column_name, column_fmt in to_format.items(): if column_name not in param_group_df: continue + if "precision" in column_fmt: if isinstance(param_group_df[column_name], float): param_group_df[column_name] = param_group_df[column_name].round( @@ -1465,7 +1602,7 @@ def round_params(param_group_df, config, modality): def get_sidecar_metadata(json_file): """Get all metadata values in a file's sidecar. - Transform json dictionary to python dictionary. + Transform json dictionary to Python dictionary. """ try: with open(json_file) as json_file: @@ -1477,13 +1614,51 @@ def get_sidecar_metadata(json_file): def format_params(param_group_df, config, modality): - """Run AgglomerativeClustering on param groups and add columns to dataframe.""" + """Run AgglomerativeClustering on param groups and add columns to dataframe. + + Parameters + ---------- + param_group_df : :obj:`pandas.DataFrame` + A data frame with one row per file where the ParamGroup column + indicates which group each scan is a part of. + config : :obj:`dict` + Configuration for defining parameter groups. + This dictionary has two keys: ``'sidecar_params'`` and ``'derived_params'``. + modality : :obj:`str` + Modality of the scan. + This is used to select the correct configuration from the config dict. + + Returns + ------- + param_group_df : :obj:`pandas.DataFrame` + An updated version of the input data frame, + with a new column added for each element in the modality's + ``'sidecar_params'`` and ``'derived_params'`` dictionaries. + The new columns will have the name ``'Cluster_' + column_name``, + and will contain the cluster labels for each parameter group. + + Notes + ----- + ``'sidecar_params'`` is a dictionary of dictionaries, where keys are modalities. + The modality-wise dictionary's keys are names of BIDS fields to directly include + in the Parameter Groupings, + and the values describe the parameters by which those BIDS' fields are compared. + For example, + {"RepetitionTime": {"tolerance": 0.000001, "precision": 6, "suggest_variant_rename": True} + means that the RepetitionTime field should be compared across files and flagged as a + variant if it differs from others by 0.000001 or more. + + ``'derived_params'`` is a dictionary of dictionaries, where keys are modalities. + The modality-wise dictionary's keys are names of BIDS fields to derive from the + NIfTI header and include in the Parameter Groupings. + """ to_format = config["sidecar_params"][modality] to_format.update(config["derived_params"][modality]) for column_name, column_fmt in to_format.items(): if column_name not in param_group_df: continue + if "tolerance" in column_fmt and len(param_group_df) > 1: array = param_group_df[column_name].to_numpy().reshape(-1, 1) @@ -1495,12 +1670,13 @@ def format_params(param_group_df, config, modality): clustering = AgglomerativeClustering( n_clusters=None, distance_threshold=tolerance, linkage="complete" ).fit(array) + for i in range(len(array)): if array[i, 0] == -999: array[i, 0] = np.nan # now add clustering_labels as a column - param_group_df["Cluster_" + column_name] = clustering.labels_ + param_group_df[f"Cluster_{column_name}"] = clustering.labels_ return param_group_df @@ -1510,6 +1686,12 @@ def _order_columns(df): This ensures that KeyGroup and ParamGroup are the first two columns, FilePath is the last, and the others are sorted alphabetically. + + Notes + ----- + This is the only place where the constant ID_VARS is used, + and the strings in that constant are hardcoded here, + so we might not need that constant at all. """ cols = set(df.columns.to_list()) non_id_cols = cols - ID_VARS @@ -1523,12 +1705,18 @@ def _order_columns(df): def img_to_new_ext(img_path, new_ext): - """Convert img to new extension.""" + """Convert img to new extension. + + Notes + ----- + The hardcoded suffix associated with each extension may not be comprehensive. + BIDS has been extended a lot in recent years. + """ # handle .tsv edge case if new_ext == ".tsv": # take out suffix return img_path.rpartition("_")[0] + "_events" + new_ext - if new_ext == ".tsv.gz": + elif new_ext == ".tsv.gz": return img_path.rpartition("_")[0] + "_physio" + new_ext else: return img_path.replace(".nii.gz", "").replace(".nii", "") + new_ext diff --git a/cubids/data/config.yml b/cubids/data/config.yml index ec7eb8ef7..eb442399f 100644 --- a/cubids/data/config.yml +++ b/cubids/data/config.yml @@ -1,28 +1,38 @@ # These are non-BIDS fields that can be added by CuBIDS derived_params: anat: + # Number of voxels in first dimension Dim1Size: suggest_variant_rename: yes + # Number of voxels in second dimension Dim2Size: suggest_variant_rename: yes + # Number of voxels in third dimension Dim3Size: suggest_variant_rename: yes + # Number of slice time values NSliceTimes: suggest_variant_rename: yes + # Number of volumes NumVolumes: suggest_variant_rename: yes + # Boolean indicating oblique acquisition Obliquity: suggest_variant_rename: yes + # String describing image orientation (e.g., LAS+) ImageOrientation: suggest_variant_rename: yes + # Size of voxels in first dimension, in mm VoxelSizeDim1: tolerance: 0.001 precision: 3 suggest_variant_rename: yes + # Size of voxels in second dimension, in mm VoxelSizeDim2: tolerance: 0.001 precision: 3 suggest_variant_rename: yes + # Size of voxels in third dimension, in mm VoxelSizeDim3: tolerance: 0.001 precision: 3 @@ -333,6 +343,18 @@ sidecar_params: precision: 6 suggest_variant_rename: yes perf: + ArterialSpinLabelingType: + suggest_variant_rename: yes + BackgroundSuppression: + suggest_variant_rename: yes + BackgroundSuppressionNumberPulses: + suggest_variant_rename: yes + BolusCutOffFlag: + suggest_variant_rename: yes + BolusCutOffTechnique: + suggest_variant_rename: yes + CASLType: + suggest_variant_rename: yes EchoTime: tolerance: 0.001 precision: 3 @@ -343,6 +365,17 @@ sidecar_params: suggest_variant_rename: yes FlipAngle: suggest_variant_rename: yes + LabelingDistance: + tolerance: 0.1 + suggest_variant_rename: yes + LabelingEfficiency: + tolerance: 0.001 + precision: 3 + suggest_variant_rename: yes + LookLocker: + suggest_variant_rename: yes + M0Type: + suggest_variant_rename: yes MultibandAccelerationFactor: suggest_variant_rename: yes NumberOfVolumesDiscardedByScanner: @@ -355,6 +388,10 @@ sidecar_params: suggest_variant_rename: yes PartialFourier: suggest_variant_rename: yes + PASLType: + suggest_variant_rename: yes + PCASLType: + suggest_variant_rename: yes PhaseEncodingDirection: suggest_variant_rename: yes RepetitionTime: @@ -367,6 +404,8 @@ sidecar_params: tolerance: 0.001 precision: 3 suggest_variant_rename: yes + VascularCrushing: + suggest_variant_rename: yes VolumeTiming: tolerance: 0.000001 precision: 6 @@ -409,4 +448,4 @@ sidecar_params: VolumeTiming: tolerance: 0.000001 precision: 6 - suggest_variant_rename: yes \ No newline at end of file + suggest_variant_rename: yes diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py index b542e691c..5bd3c9579 100644 --- a/cubids/metadata_merge.py +++ b/cubids/metadata_merge.py @@ -1,4 +1,5 @@ """Tools for merging metadata.""" + import json from collections import defaultdict from copy import deepcopy @@ -15,7 +16,24 @@ def check_merging_operations(action_tsv, raise_on_error=False): """Check that the merges in an action tsv are possible. - To be mergeable the + Parameters + ---------- + action_tsv : :obj:`str` + Path to the action tsv file. + raise_on_error : :obj:`bool`, optional + Whether to raise an exception if there are errors. + + Returns + ------- + ok_merges : :obj:`list` + List of tuples of ok merges. + deletions : :obj:`list` + List of tuples of deletions. + + Raises + ------ + :obj:`Exception` + If there are errors and ``raise_on_error`` is ``True``. """ actions = pd.read_table(action_tsv) ok_merges = [] @@ -45,8 +63,10 @@ def _check_sdc_cols(meta1, meta2): print("going to delete ", dest_param_key) deletions.append(dest_param_key) continue + if not source_row.shape[0] == 1: raise Exception("Could not identify a unique source group") + source_metadata = source_row.iloc[0].to_dict() merge_id = (source_param_key, dest_param_key) # Check for compatible fieldmaps @@ -59,6 +79,7 @@ def _check_sdc_cols(meta1, meta2): ): overwrite_merges.append(merge_id) continue + # add to the list of ok merges if there are no conflicts ok_merges.append(merge_id) @@ -89,7 +110,9 @@ def _check_sdc_cols(meta1, meta2): if overwrite_merges or sdc_incompatible: if raise_on_error: raise Exception(error_message) + print(error_message) + return ok_merges, deletions @@ -99,6 +122,25 @@ def merge_without_overwrite(source_meta, dest_meta_orig, raise_on_error=False): Here, "safe" means that no non-NaN values in `dest_meta` are overwritten by the merge. If any overwrites occur an empty dictionary is returned. + + Parameters + ---------- + source_meta : :obj:`dict` + The metadata to merge from. + dest_meta_orig : :obj:`dict` + The metadata to merge into. + raise_on_error : :obj:`bool`, optional + Whether to raise an exception if there are errors. + + Returns + ------- + :obj:`dict` + The merged metadata. + + Raises + ------ + :obj:`Exception` + If there are errors and ``raise_on_error`` is ``True``. """ # copy the original json params dest_meta = deepcopy(dest_meta_orig) @@ -111,6 +153,7 @@ def merge_without_overwrite(source_meta, dest_meta_orig, raise_on_error=False): % (source_meta.get("NSliceTimes"), source_meta.get("NSliceTimes")) ) return {} + for parameter in DIRECT_IMAGING_PARAMS: source_value = source_meta.get(parameter, nan) dest_value = dest_meta.get(parameter, nan) @@ -131,6 +174,7 @@ def merge_without_overwrite(source_meta, dest_meta_orig, raise_on_error=False): return {} dest_meta[parameter] = source_value + return dest_meta @@ -155,7 +199,24 @@ def print_merges(merge_list): def merge_json_into_json(from_file, to_file, raise_on_error=False): - """Merge imaging metadata into JSON.""" + """Merge imaging metadata into JSON. + + Parameters + ---------- + from_file : :obj:`str` + Path to the JSON file to merge from. + to_file : :obj:`str` + Path to the JSON file to merge into. + raise_on_error : :obj:`bool`, optional + Whether to raise an exception if there are errors. + Defaults to ``False``. + + Returns + ------- + :obj:`int` + Exit code. + Either 255 if there was an error or 0 if there was not. + """ print(f"Merging imaging metadata from {from_file} to {to_file}") with open(from_file, "r") as fromf: source_metadata = json.load(fromf) @@ -165,7 +226,9 @@ def merge_json_into_json(from_file, to_file, raise_on_error=False): orig_dest_metadata = deepcopy(dest_metadata) merged_metadata = merge_without_overwrite( - source_metadata, dest_metadata, raise_on_error=raise_on_error + source_metadata, + dest_metadata, + raise_on_error=raise_on_error, ) if not merged_metadata: @@ -185,12 +248,12 @@ def get_acq_dictionary(): Parameters ---------- - df: Pandas DataFrame - Pre export TSV that will be converted to a json dictionary + df : :obj:`pandas.DataFrame` + Pre export TSV that will be converted to a json dictionary. Returns ------- - acq_dict: dictionary + acq_dict : :obj:`dict` Python dictionary in BIDS data dictionary format """ acq_dict = {} @@ -204,7 +267,24 @@ def get_acq_dictionary(): def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): - """Find unique sets of Key/Param groups across subjects.""" + """Find unique sets of Key/Param groups across subjects. + + This writes out the following files: + - _AcqGrouping.tsv: A tsv with the mapping of subject/session to + acquisition group. + - _AcqGrouping.json: A data dictionary for the AcqGrouping.tsv. + - _AcqGroupInfo.txt: A text file with the summary of acquisition. + - _AcqGroupInfo.json: A data dictionary for the AcqGroupInfo.txt. + + Parameters + ---------- + files_tsv : :obj:`str` + Path to the files tsv. + output_prefix : :obj:`str` + Prefix for output files. + acq_group_level : {"subject", "session"} + Level at which to group acquisitions. + """ from bids import config from bids.layout import parse_file_entities diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 5a0c3aac1..15bfc35f0 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -1,4 +1,5 @@ """Tests for `cubids` package.""" + import json import os import subprocess diff --git a/cubids/tests/utils.py b/cubids/tests/utils.py index 6f786e46f..22263f9ba 100644 --- a/cubids/tests/utils.py +++ b/cubids/tests/utils.py @@ -1,4 +1,5 @@ """Utility functions for CuBIDS' tests.""" + import hashlib import json import os diff --git a/cubids/utils.py b/cubids/utils.py index 56704d8c6..09c02e349 100644 --- a/cubids/utils.py +++ b/cubids/utils.py @@ -1,10 +1,27 @@ """Miscellaneous utility functions for CuBIDS.""" + import re from pathlib import Path def _get_container_type(image_name): - """Get and return the container type.""" + """Get and return the container type. + + Parameters + ---------- + image_name : :obj:`str` + The name of the container image. + + Returns + ------- + :obj:`str` + The container type, either "docker" or "singularity". + + Raises + ------ + :obj:`Exception` + If the container type cannot be determined. + """ # If it's a file on disk, it must be a singularity image if Path(image_name).exists(): return "singularity" diff --git a/cubids/validator.py b/cubids/validator.py index 3f6700417..01dad11c8 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -1,4 +1,5 @@ """Methods for validating BIDS datasets.""" + import glob import json import logging @@ -54,7 +55,18 @@ def build_subject_paths(bids_dir): def run_validator(call): - """Run the validator with subprocess.""" + """Run the validator with subprocess. + + Parameters + ---------- + call : :obj:`list` + List of strings to pass to subprocess.run(). + + Returns + ------- + :obj:`subprocess.CompletedProcess` + The result of the subprocess call. + """ # if verbose: # logger.info("Running the validator with call:") # logger.info('\"' + ' '.join(call) + '\"') @@ -68,15 +80,30 @@ def parse_validator_output(output): Parameters ---------- - path : string + output : :obj:`str` Path to JSON file of BIDS validator output Returns ------- - Pandas DataFrame + df : :obj:`pandas.DataFrame` + Dataframe of validator output. """ def get_nested(dct, *keys): + """Get a nested value from a dictionary. + + Parameters + ---------- + dct : :obj:`dict` + Dictionary to get value from. + keys : :obj:`list` + List of keys to get value from. + + Returns + ------- + :obj:`dict` + The nested value. + """ for key in keys: try: dct = dct[key] @@ -89,6 +116,18 @@ def get_nested(dct, *keys): issues = data["issues"] def parse_issue(issue_dict): + """Parse a single issue from the validator output. + + Parameters + ---------- + issue_dict : :obj:`dict` + Dictionary of issue. + + Returns + ------- + return_dict : :obj:`dict` + Dictionary of parsed issue. + """ return_dict = {} return_dict["files"] = [ get_nested(x, "file", "relativePath") for x in issue_dict.get("files", "") @@ -117,7 +156,13 @@ def parse_issue(issue_dict): def get_val_dictionary(): - """Get value dictionary.""" + """Get value dictionary. + + Returns + ------- + val_dict : dict + Dictionary of values. + """ val_dict = {} val_dict["files"] = {"Description": "File with warning orerror"} val_dict["type"] = {"Description": "BIDS validation warning or error"} diff --git a/cubids/workflows.py b/cubids/workflows.py index 37793981a..7984216b6 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -1,4 +1,5 @@ """First order workflows in CuBIDS.""" + import json import logging import os @@ -42,12 +43,18 @@ def validate( Parameters ---------- - bids_dir - output_prefix - container - sequential - sequential_subjects - ignore_nifti_headers + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + output_prefix : :obj:`pathlib.Path` + Output filename prefix. + container : :obj:`str` + Container in which to run the workflow. + sequential : :obj:`bool` + Run the validator sequentially. + sequential_subjects : :obj:`list` of :obj:`str` + Filter the sequential run to only include the listed subjects. + ignore_nifti_headers : :obj:`bool` + Ignore NIfTI headers when validating. """ # check status of output_prefix, absolute or relative? abs_path_output = True @@ -55,10 +62,10 @@ def validate( # not an absolute path --> put in code/CuBIDS dir abs_path_output = False # check if code/CuBIDS dir exists - if not Path(str(bids_dir) + "/code/CuBIDS").is_dir(): + if not (bids_dir / "code" / "CuBIDS").is_dir(): # if not, create it - subprocess.run(["mkdir", str(bids_dir) + "/code"]) - subprocess.run(["mkdir", str(bids_dir) + "/code/CuBIDS/"]) + subprocess.run(["mkdir", str(bids_dir / "code")]) + subprocess.run(["mkdir", str(bids_dir / "code" / "CuBIDS")]) # Run directly from python using subprocess if container is None: @@ -262,11 +269,16 @@ def group(bids_dir, container, acq_group_level, config, output_prefix): Parameters ---------- - bids_dir - container - acq_group_level - config - output_prefix + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + container : :obj:`str` + Container in which to run the workflow. + acq_group_level : {"subject", "session"} + Level at which acquisition groups are created. + config : :obj:`pathlib.Path` + Path to the grouping config file. + output_prefix : :obj:`pathlib.Path` + Output filename prefix. """ # Run directly from python using if container is None: @@ -357,16 +369,26 @@ def apply( Parameters ---------- - bids_dir - use_datalad - acq_group_level - config - edited_summary_tsv - edited_tsv_prefix - files_tsv - new_tsv_prefix - output_prefix - container + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + use_datalad : :obj:`bool` + Use datalad to track changes. + acq_group_level : {"subject", "session"} + Level at which acquisition groups are created. + config : :obj:`pathlib.Path` + Path to the grouping config file. + edited_summary_tsv : :obj:`pathlib.Path` + Path to the edited summary tsv. + edited_tsv_prefix : :obj:`pathlib.Path` + Path to the edited tsv prefix. + files_tsv : :obj:`pathlib.Path` + Path to the files tsv. + new_tsv_prefix : :obj:`pathlib.Path` + Path to the new tsv prefix. + output_prefix : :obj:`pathlib.Path` + Output filename prefix. + container : :obj:`str` + Container in which to run the workflow. """ # Run directly from python using if container is None: @@ -471,18 +493,17 @@ def apply( sys.exit(proc.returncode) -def datalad_save( - bids_dir, - container, - m, -): +def datalad_save(bids_dir, container, m): """Perform datalad save. Parameters ---------- - bids_dir - container - m + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + container : :obj:`str` + Container in which to run the workflow. + m : :obj:`str` + Commit message. """ # Run directly from python using if container is None: @@ -532,8 +553,10 @@ def undo(bids_dir, container): Parameters ---------- - bids_dir - container + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + container : :obj:`str` + Container in which to run the workflow. """ # Run directly from python using if container is None: @@ -587,13 +610,20 @@ def copy_exemplars( Parameters ---------- - bids_dir - container - use_datalad - exemplars_dir - exemplars_tsv - min_group_size - force_unlock + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + container : :obj:`str` + Container in which to run the workflow. + use_datalad : :obj:`bool` + Use datalad to track changes. + exemplars_dir : :obj:`pathlib.Path` + Path to the directory where the exemplars will be saved. + exemplars_tsv : :obj:`pathlib.Path` + Path to the tsv file with the exemplars. + min_group_size : :obj:`int` + Minimum number of subjects in a group to be considered for exemplar. + force_unlock : :obj:`bool` + Force unlock the dataset. """ # Run directly from python using if container is None: @@ -640,8 +670,10 @@ def copy_exemplars( if force_unlock: cmd.append("--force-unlock") + if min_group_size: cmd.append("--min-group-size") + elif container_type == "singularity": cmd = [ "singularity", @@ -675,10 +707,14 @@ def add_nifti_info(bids_dir, container, use_datalad, force_unlock): Parameters ---------- - bids_dir - container - use_datalad - force_unlock + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + container : :obj:`str` + Container in which to run the workflow. + use_datalad : :obj:`bool` + Use datalad to track changes. + force_unlock : :obj:`bool` + Force unlock the dataset. """ # Run directly from python using if container is None: @@ -739,10 +775,14 @@ def purge(bids_dir, container, use_datalad, scans): Parameters ---------- - bids_dir - container - use_datalad - scans + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + container : :obj:`str` + Container in which to run the workflow. + use_datalad : :obj:`bool` + Use datalad to track changes. + scans : :obj:`pathlib.Path` + Path to the scans tsv. """ # Run directly from python using if container is None: @@ -801,9 +841,12 @@ def remove_metadata_fields(bids_dir, container, fields): Parameters ---------- - bids_dir - container - fields + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + container : :obj:`str` + Container in which to run the workflow. + fields : :obj:`list` of :obj:`str` + List of fields to remove. """ # Run directly from python if container is None: @@ -849,8 +892,10 @@ def print_metadata_fields(bids_dir, container): Parameters ---------- - bids_dir - container + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + container : :obj:`str` + Container in which to run the workflow. """ # Run directly from python if container is None: diff --git a/pyproject.toml b/pyproject.toml index 804b05f71..8ce72d518 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,16 +21,15 @@ classifiers = [ license = {file = "LICENSE"} requires-python = ">=3.8" dependencies = [ - "pyyaml", - "pybids", - "pandas", - "tqdm", + "datalad>=0.13.5,!=0.17.3,!=0.17.0,!=0.16.1", + "jinja2 < 3.1", "numpy", + "pandas", + "pybids", + "pyyaml", "scikit-learn", - "datalad>=0.13.5,!=0.17.3,!=0.17.0,!=0.16.1", + "tqdm", "wrapt<2,>=1.10", - "Sphinx", - "jinja2 < 3.1", ] dynamic = ["version"]