From b8f5d47d64ba86506880a58249929ab13000c54a Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 13 Sep 2023 14:53:40 -0400 Subject: [PATCH] Use black and isort to autoformat the main package (#266) * Add linting job. * Run black and isort. * Keep working on formatting. * Forgot to install the linters... * Wrong code path. * Address style issues in cubids. * Address style issues in validator. * Update metadata_merge.py --- .github/workflows/lint.yml | 8 +- cubids/__init__.py | 10 +- cubids/cli.py | 1369 ++++++++++++++++---------- cubids/config.py | 8 +- cubids/constants.py | 39 +- cubids/cubids.py | 1012 +++++++++---------- cubids/metadata_merge.py | 182 ++-- cubids/validator.py | 86 +- notebooks/Key_and_Param_Groups.ipynb | 2 +- pyproject.toml | 49 + setup.cfg | 8 + tests/test_bond.py | 16 +- 12 files changed, 1573 insertions(+), 1216 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 472f0115c..a02f531ce 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -19,8 +19,8 @@ jobs: python-version: '3.7' - name: Install dependencies run: | - python -m pip install --upgrade pip - python -m pip install --upgrade tox + pip install flake8 flake8-absolute-import flake8-black flake8-docstrings \ + flake8-isort flake8-pyproject flake8-unused-arguments \ + flake8-use-fstring pep8-naming - name: Run linters - run: | - tox -e codespell + run: python -m flake8 cubids diff --git a/cubids/__init__.py b/cubids/__init__.py index a46eef713..dd11a6c3c 100644 --- a/cubids/__init__.py +++ b/cubids/__init__.py @@ -1,7 +1,11 @@ """Top-level package for CuBIDS.""" __author__ = """PennLINC""" -__email__ = 'PennLINC@gmail.com' -__version__ = '0.1.0' +__email__ = "PennLINC@gmail.com" +__version__ = "0.1.0" -from .cubids import CuBIDS +from cubids.cubids import CuBIDS + +__all__ = [ + "CuBIDS", +] diff --git a/cubids/cli.py b/cubids/cli.py index 5aae9dbed..7e82f1cd9 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -1,109 +1,134 @@ """Console script for cubids.""" -import warnings import argparse -import subprocess +import json +import logging import os -import sys import re -import logging -import tempfile -import tqdm import shutil -import json -import pandas as pd -from cubids import CuBIDS +import subprocess +import sys +import tempfile +import warnings from pathlib import Path -from .validator import (build_validator_call, - run_validator, parse_validator_output, - build_subject_paths, get_val_dictionary) -from .metadata_merge import merge_json_into_json -warnings.simplefilter(action='ignore', category=FutureWarning) +import pandas as pd +import tqdm + +from cubids import CuBIDS +from cubids.metadata_merge import merge_json_into_json +from cubids.validator import ( + build_subject_paths, + build_validator_call, + get_val_dictionary, + parse_validator_output, + run_validator, +) + +warnings.simplefilter(action="ignore", category=FutureWarning) logging.basicConfig(level=logging.INFO) -logger = logging.getLogger('cubids-cli') -GIT_CONFIG = os.path.join(os.path.expanduser("~"), '.gitconfig') -logging.getLogger('datalad').setLevel(logging. ERROR) +logger = logging.getLogger("cubids-cli") +GIT_CONFIG = os.path.join(os.path.expanduser("~"), ".gitconfig") +logging.getLogger("datalad").setLevel(logging.ERROR) def cubids_validate(): - '''Command Line Interface function for running the bids validator.''' - + """Run the bids validator.""" parser = argparse.ArgumentParser( - description="cubids-validate: Wrapper around the official " - "BIDS Validator", - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('bids_dir', - type=Path, - action='store', - help='the root of a BIDS dataset. It should contain ' - 'sub-X directories and dataset_description.json') - parser.add_argument('output_prefix', - type=Path, - action='store', - help='file prefix to which tabulated validator output ' - 'is written. If users pass in just a filename prefix ' - 'e.g. V1, then CuBIDS will put the validation ' - 'output in bids_dir/code/CuBIDS. If the user ' - 'specifies a path (e.g. /Users/scovitz/BIDS/V1) ' - 'then output files will go to the specified location.') - parser.add_argument('--sequential', - action='store_true', - default=False, - help='Run the BIDS validator sequentially ' - 'on each subject.', - required=False) - parser.add_argument('--container', - action='store', - help='Docker image tag or Singularity image file.', - default=None) - parser.add_argument('--ignore_nifti_headers', - action='store_true', - default=False, - help='Disregard NIfTI header content during' - ' validation', - required=False) - parser.add_argument('--ignore_subject_consistency', - action='store_true', - default=True, - help='Skip checking that any given file for one' - ' subject is present for all other subjects', - required=False) - parser.add_argument('--sequential-subjects', - action='store', - default=None, - help='List: Filter the sequential run to only include' - ' the listed subjects. e.g. --sequential-subjects ' - 'sub-01 sub-02 sub-03', - nargs='+', - required=False) + description="cubids-validate: Wrapper around the official BIDS Validator", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "bids_dir", + type=Path, + action="store", + help=( + "the root of a BIDS dataset. It should contain " + "sub-X directories and dataset_description.json" + ), + ) + parser.add_argument( + "output_prefix", + type=Path, + action="store", + help=( + "file prefix to which tabulated validator output " + "is written. If users pass in just a filename prefix " + "e.g. V1, then CuBIDS will put the validation " + "output in bids_dir/code/CuBIDS. If the user " + "specifies a path (e.g. /Users/scovitz/BIDS/V1) " + "then output files will go to the specified location." + ), + ) + parser.add_argument( + "--sequential", + action="store_true", + default=False, + help="Run the BIDS validator sequentially on each subject.", + required=False, + ) + parser.add_argument( + "--container", + action="store", + help="Docker image tag or Singularity image file.", + default=None, + ) + parser.add_argument( + "--ignore_nifti_headers", + action="store_true", + default=False, + help="Disregard NIfTI header content during validation", + required=False, + ) + parser.add_argument( + "--ignore_subject_consistency", + action="store_true", + default=True, + help=( + "Skip checking that any given file for one " + "subject is present for all other subjects" + ), + required=False, + ) + parser.add_argument( + "--sequential-subjects", + action="store", + default=None, + help=( + "List: Filter the sequential run to only include " + "the listed subjects. e.g. --sequential-subjects " + "sub-01 sub-02 sub-03" + ), + nargs="+", + required=False, + ) opts = parser.parse_args() # check status of output_prefix, absolute or relative? abs_path_output = True - if '/' not in str(opts.output_prefix): + if "/" not in str(opts.output_prefix): # not an absolute path --> put in code/CuBIDS dir abs_path_output = False # check if code/CuBIDS dir exists - if not Path(str(opts.bids_dir) + '/code/CuBIDS').is_dir(): + if not Path(str(opts.bids_dir) + "/code/CuBIDS").is_dir(): # if not, create it - subprocess.run(['mkdir', str(opts.bids_dir) + '/code']) - subprocess.run(['mkdir', str(opts.bids_dir) + '/code/CuBIDS/']) + subprocess.run(["mkdir", str(opts.bids_dir) + "/code"]) + subprocess.run(["mkdir", str(opts.bids_dir) + "/code/CuBIDS/"]) # Run directly from python using subprocess if opts.container is None: - if not opts.sequential: # run on full dataset - call = build_validator_call(str(opts.bids_dir), - opts.ignore_nifti_headers, - opts.ignore_subject_consistency) + call = build_validator_call( + str(opts.bids_dir), + opts.ignore_nifti_headers, + opts.ignore_subject_consistency, + ) ret = run_validator(call) # parse the string output - parsed = parse_validator_output(ret.stdout.decode('UTF-8')) + parsed = parse_validator_output(ret.stdout.decode("UTF-8")) if parsed.shape[1] < 1: - logger.info("No issues/warnings parsed, your dataset" - " is BIDS valid.") + logger.info("No issues/warnings parsed, your dataset is BIDS valid.") sys.exit(0) else: logger.info("BIDS issues/warnings found in the dataset") @@ -115,16 +140,18 @@ def cubids_validate(): val_tsv = str(opts.output_prefix) + "_validation.tsv" else: - val_tsv = str(opts.bids_dir) \ - + '/code/CuBIDS/' \ - + str(opts.output_prefix) \ - + "_validation.tsv" + val_tsv = ( + str(opts.bids_dir) + + "/code/CuBIDS/" + + str(opts.output_prefix) + + "_validation.tsv" + ) parsed.to_csv(val_tsv, sep="\t", index=False) # build validation data dictionary json sidecar - val_dict = get_val_dictionary(parsed) - val_json = val_tsv.replace('tsv', 'json') + val_dict = get_val_dictionary() + val_json = val_tsv.replace("tsv", "json") with open(val_json, "w") as outfile: json.dump(val_dict, outfile, indent=4) @@ -145,17 +172,15 @@ def cubids_validate(): parsed = [] if opts.sequential_subjects: - subjects_dict = {k: v for k, v in subjects_dict.items() - if k in opts.sequential_subjects} - assert len(list(subjects_dict.keys())) > 1, ("No subjects found" - " in filter") + subjects_dict = { + k: v for k, v in subjects_dict.items() if k in opts.sequential_subjects + } + assert len(list(subjects_dict.keys())) > 1, "No subjects found in filter" for subject, files_list in tqdm.tqdm(subjects_dict.items()): - # logger.info(" ".join(["Processing subject:", subject])) # create a temporary directory and symlink the data with tempfile.TemporaryDirectory() as tmpdirname: for fi in files_list: - # cut the path down to the subject label bids_start = fi.find(subject) @@ -166,41 +191,37 @@ def cubids_validate(): else: bids_folder = Path(fi[bids_start:]).parent - fi_tmpdir = tmpdirname + '/' + str(bids_folder) + fi_tmpdir = tmpdirname + "/" + str(bids_folder) if not os.path.exists(fi_tmpdir): os.makedirs(fi_tmpdir) - output = fi_tmpdir + '/' + str(Path(fi).name) + output = fi_tmpdir + "/" + str(Path(fi).name) shutil.copy2(fi, output) # run the validator nifti_head = opts.ignore_nifti_headers subj_consist = opts.ignore_subject_consistency - call = build_validator_call(tmpdirname, - nifti_head, - subj_consist) + call = build_validator_call(tmpdirname, nifti_head, subj_consist) ret = run_validator(call) # parse output if ret.returncode != 0: - logger.error("Errors returned " - "from validator run, parsing now") + logger.error("Errors returned from validator run, parsing now") # parse the output and add to list if it returns a df - decoded = ret.stdout.decode('UTF-8') + decoded = ret.stdout.decode("UTF-8") tmp_parse = parse_validator_output(decoded) if tmp_parse.shape[1] > 1: - tmp_parse['subject'] = subject + tmp_parse["subject"] = subject parsed.append(tmp_parse) # concatenate the parsed data and exit if len(parsed) < 1: - logger.info("No issues/warnings parsed, your dataset" - " is BIDS valid.") + logger.info("No issues/warnings parsed, your dataset is BIDS valid.") sys.exit(0) else: parsed = pd.concat(parsed, axis=0) - subset = parsed.columns.difference(['subject']) + subset = parsed.columns.difference(["subject"]) parsed = parsed.drop_duplicates(subset=subset) logger.info("BIDS issues/warnings found in the dataset") @@ -210,16 +231,18 @@ def cubids_validate(): if abs_path_output: val_tsv = str(opts.output_prefix) + "_validation.tsv" else: - val_tsv = str(opts.bids_dir) \ - + '/code/CuBIDS/' \ - + str(opts.output_prefix) \ - + "_validation.tsv" + val_tsv = ( + str(opts.bids_dir) + + "/code/CuBIDS/" + + str(opts.output_prefix) + + "_validation.tsv" + ) parsed.to_csv(val_tsv, sep="\t", index=False) # build validation data dictionary json sidecar - val_dict = get_val_dictionary(parsed) - val_json = val_tsv.replace('tsv', 'json') + val_dict = get_val_dictionary() + val_json = val_tsv.replace("tsv", "json") with open(val_json, "w") as outfile: json.dump(val_dict, outfile, indent=4) @@ -235,97 +258,130 @@ def cubids_validate(): output_dir_link_t = str(opts.output_prefix.parent.absolute()) + ":/tsv:rw" output_dir_link_j = str(opts.output_prefix.parent.absolute()) + ":/json:rw" linked_output_prefix_t = "/tsv/" + opts.output_prefix.name - if container_type == 'docker': - cmd = ['docker', 'run', '--rm', '-v', bids_dir_link, - '-v', GIT_CONFIG+":/root/.gitconfig", - '-v', output_dir_link_t, - '-v', output_dir_link_j, - '--entrypoint', 'cubids-validate', opts.container, - '/bids', linked_output_prefix_t] + if container_type == "docker": + cmd = [ + "docker", + "run", + "--rm", + "-v", + bids_dir_link, + "-v", + GIT_CONFIG + ":/root/.gitconfig", + "-v", + output_dir_link_t, + "-v", + output_dir_link_j, + "--entrypoint", + "cubids-validate", + opts.container, + "/bids", + linked_output_prefix_t, + ] if opts.ignore_nifti_headers: - cmd.append('--ignore_nifti_headers') + cmd.append("--ignore_nifti_headers") if opts.ignore_subject_consistency: - cmd.append('--ignore_subject_consistency') - elif container_type == 'singularity': - cmd = ['singularity', 'exec', '--cleanenv', - '-B', bids_dir_link, - '-B', output_dir_link_t, - '-B', output_dir_link_j, - opts.container, 'cubids-validate', '/bids', - linked_output_prefix_t] + cmd.append("--ignore_subject_consistency") + elif container_type == "singularity": + cmd = [ + "singularity", + "exec", + "--cleanenv", + "-B", + bids_dir_link, + "-B", + output_dir_link_t, + "-B", + output_dir_link_j, + opts.container, + "cubids-validate", + "/bids", + linked_output_prefix_t, + ] if opts.ignore_nifti_headers: - cmd.append('--ignore_nifti_headers') + cmd.append("--ignore_nifti_headers") if opts.ignore_subject_consistency: - cmd.append('--ignore_subject_consistency') + cmd.append("--ignore_subject_consistency") if opts.sequential: - cmd.append('--sequential') + cmd.append("--sequential") - print("RUNNING: " + ' '.join(cmd)) + print("RUNNING: " + " ".join(cmd)) proc = subprocess.run(cmd) sys.exit(proc.returncode) def bids_sidecar_merge(): + """Merge critical keys from one sidecar to another.""" parser = argparse.ArgumentParser( - description="bids-sidecar-merge: merge critical keys from one " - "sidecar to another", - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('from_json', - type=Path, - action='store', - help='Source json file.') - parser.add_argument('to_json', - type=Path, - action='store', - help='destination json. This file will have data ' - 'from `from_json` copied into it.') + description=("bids-sidecar-merge: merge critical keys from one sidecar to another"), + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("from_json", type=Path, action="store", help="Source json file.") + parser.add_argument( + "to_json", + type=Path, + action="store", + help=("destination json. This file will have data from `from_json` copied into it."), + ) opts = parser.parse_args() - merge_status = merge_json_into_json(opts.from_json, opts.to_json, - raise_on_error=False) + merge_status = merge_json_into_json(opts.from_json, opts.to_json, raise_on_error=False) sys.exit(merge_status) def cubids_group(): - '''Command Line Interface function for finding key and param groups.''' - + """Find key and param groups.""" parser = argparse.ArgumentParser( description="cubids-group: find key and parameter groups in BIDS", - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('bids_dir', - type=Path, - action='store', - help='the root of a BIDS dataset. It should contain ' - 'sub-X directories and dataset_description.json') - parser.add_argument('output_prefix', - type=Path, - action='store', - help='file prefix to which a _summary.tsv, _files.tsv ' - '_AcqGrouping.tsv, and _AcqGroupInfo.txt, are ' - 'written. If users pass in just a filename prefix ' - 'e.g. V1, then CuBIDS will put the four grouping ' - 'outputs in bids_dir/code/CuBIDS. If the user ' - 'specifies a path (e.g. /Users/scovitz/BIDS/V1 ' - 'then output files will go to the specified location.') - parser.add_argument('--container', - action='store', - help='Docker image tag or Singularity image file.') - parser.add_argument('--acq-group-level', - default='subject', - action='store', - help='Level at which acquisition groups are created ' - 'options: "subject" or "session"') - parser.add_argument('--config', - action='store', - type=Path, - help='path to a config file for grouping') + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "bids_dir", + type=Path, + action="store", + help=( + "the root of a BIDS dataset. It should contain " + "sub-X directories and dataset_description.json" + ), + ) + parser.add_argument( + "output_prefix", + type=Path, + action="store", + help=( + "file prefix to which a _summary.tsv, _files.tsv " + "_AcqGrouping.tsv, and _AcqGroupInfo.txt, are " + "written. If users pass in just a filename prefix " + "e.g. V1, then CuBIDS will put the four grouping " + "outputs in bids_dir/code/CuBIDS. If the user " + "specifies a path (e.g. /Users/scovitz/BIDS/V1 " + "then output files will go to the specified location." + ), + ) + parser.add_argument( + "--container", + action="store", + help="Docker image tag or Singularity image file.", + ) + parser.add_argument( + "--acq-group-level", + default="subject", + action="store", + help=("Level at which acquisition groups are created " 'options: "subject" or "session"'), + ) + parser.add_argument( + "--config", action="store", type=Path, help="path to a config file for grouping" + ) opts = parser.parse_args() # Run directly from python using if opts.container is None: - bod = CuBIDS(data_root=str(opts.bids_dir), - acq_group_level=opts.acq_group_level, - grouping_config=opts.config) - bod.get_TSVs(str(opts.output_prefix),) + bod = CuBIDS( + data_root=str(opts.bids_dir), + acq_group_level=opts.acq_group_level, + grouping_config=opts.config, + ) + bod.get_tsvs( + str(opts.output_prefix), + ) sys.exit(0) # Run it through a container @@ -335,165 +391,225 @@ def cubids_group(): apply_config = opts.config is not None if apply_config: - input_config_dir_link = str( - opts.config.parent.absolute()) + ":/in_config:ro" + input_config_dir_link = str(opts.config.parent.absolute()) + ":/in_config:ro" linked_input_config = "/in_config/" + opts.config.name linked_output_prefix = "/tsv/" + opts.output_prefix.name - if container_type == 'docker': - cmd = ['docker', 'run', '--rm', '-v', bids_dir_link, - '-v', GIT_CONFIG+":/root/.gitconfig", - '-v', output_dir_link, - '--entrypoint', 'cubids-group', - opts.container, '/bids', linked_output_prefix] + if container_type == "docker": + cmd = [ + "docker", + "run", + "--rm", + "-v", + bids_dir_link, + "-v", + GIT_CONFIG + ":/root/.gitconfig", + "-v", + output_dir_link, + "--entrypoint", + "cubids-group", + opts.container, + "/bids", + linked_output_prefix, + ] if apply_config: - cmd.insert(3, '-v') + cmd.insert(3, "-v") cmd.insert(4, input_config_dir_link) - cmd += ['--config', linked_input_config] - - elif container_type == 'singularity': - cmd = ['singularity', 'exec', '--cleanenv', - '-B', bids_dir_link, - '-B', output_dir_link, - opts.container, 'cubids-group', - '/bids', linked_output_prefix] + cmd += ["--config", linked_input_config] + + elif container_type == "singularity": + cmd = [ + "singularity", + "exec", + "--cleanenv", + "-B", + bids_dir_link, + "-B", + output_dir_link, + opts.container, + "cubids-group", + "/bids", + linked_output_prefix, + ] if apply_config: - cmd.insert(3, '-B') + cmd.insert(3, "-B") cmd.insert(4, input_config_dir_link) - cmd += ['--config', linked_input_config] + cmd += ["--config", linked_input_config] if opts.acq_group_level: cmd.append("--acq-group-level") cmd.append(str(opts.acq_group_level)) - print("RUNNING: " + ' '.join(cmd)) + print("RUNNING: " + " ".join(cmd)) proc = subprocess.run(cmd) sys.exit(proc.returncode) def cubids_apply(): - ''' Command Line Interface funciton for applying the tsv changes.''' - + """Apply the tsv changes.""" parser = argparse.ArgumentParser( - description="cubids-apply: apply the changes specified in a tsv " - "to a BIDS directory", - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('bids_dir', - type=Path, - action='store', - help='the root of a BIDS dataset. It should contain ' - 'sub-X directories and dataset_description.json') - parser.add_argument('edited_summary_tsv', - type=Path, - action='store', - help='path to the _summary.tsv that has been edited ' - 'in the MergeInto and RenameKeyGroup columns. If the ' - ' summary table is located in the code/CuBIDS ' - 'directory, then users can just pass the summary tsv ' - 'filename instead of the full path to the tsv') - parser.add_argument('files_tsv', - type=Path, - action='store', - help='path to the _files.tsv that has been edited ' - 'in the MergeInto and RenameKeyGroup columns. If the ' - ' files table is located in the code/CuBIDS ' - 'directory, then users can just pass the files tsv ' - 'filename instead of the full path to the tsv') - parser.add_argument('new_tsv_prefix', - type=Path, - action='store', - help='file prefix for writing the post-apply grouping ' - 'outputs. If users pass in just a filename prefix ' - 'e.g. V2, then CuBIDS will put the four grouping ' - 'outputs in bids_dir/code/CuBIDS. If the user ' - 'specifies a path (e.g. /Users/scovitz/BIDS/V2 ' - 'then output files will go to the specified location.') - parser.add_argument('--use-datalad', - action='store_true', - help='ensure that there are no untracked changes ' - 'before finding groups') - parser.add_argument('--container', - action='store', - help='Docker image tag or Singularity image file.') - parser.add_argument('--acq-group-level', - default='subject', - action='store', - help='Level at which acquisition groups are created ' - 'options: "subject" or "session"') - parser.add_argument('--config', - action='store', - type=Path, - help='path to a config file for grouping') + description=("cubids-apply: apply the changes specified in a tsv to a BIDS directory"), + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "bids_dir", + type=Path, + action="store", + help=( + "the root of a BIDS dataset. It should contain " + "sub-X directories and dataset_description.json" + ), + ) + parser.add_argument( + "edited_summary_tsv", + type=Path, + action="store", + help=( + "path to the _summary.tsv that has been edited " + "in the MergeInto and RenameKeyGroup columns. If the " + " summary table is located in the code/CuBIDS " + "directory, then users can just pass the summary tsv " + "filename instead of the full path to the tsv" + ), + ) + parser.add_argument( + "files_tsv", + type=Path, + action="store", + help=( + "path to the _files.tsv that has been edited " + "in the MergeInto and RenameKeyGroup columns. If the " + "files table is located in the code/CuBIDS " + "directory, then users can just pass the files tsv " + "filename instead of the full path to the tsv" + ), + ) + parser.add_argument( + "new_tsv_prefix", + type=Path, + action="store", + help=( + "file prefix for writing the post-apply grouping " + "outputs. If users pass in just a filename prefix " + "e.g. V2, then CuBIDS will put the four grouping " + "outputs in bids_dir/code/CuBIDS. If the user " + "specifies a path (e.g. /Users/scovitz/BIDS/V2 " + "then output files will go to the specified location." + ), + ) + parser.add_argument( + "--use-datalad", + action="store_true", + help="ensure that there are no untracked changes before finding groups", + ) + parser.add_argument( + "--container", + action="store", + help="Docker image tag or Singularity image file.", + ) + parser.add_argument( + "--acq-group-level", + default="subject", + action="store", + help=("Level at which acquisition groups are created " 'options: "subject" or "session"'), + ) + parser.add_argument( + "--config", action="store", type=Path, help="path to a config file for grouping" + ) opts = parser.parse_args() # Run directly from python using if opts.container is None: - bod = CuBIDS(data_root=str(opts.bids_dir), - use_datalad=opts.use_datalad, - acq_group_level=opts.acq_group_level, - grouping_config=opts.config) + bod = CuBIDS( + data_root=str(opts.bids_dir), + use_datalad=opts.use_datalad, + acq_group_level=opts.acq_group_level, + grouping_config=opts.config, + ) if opts.use_datalad: if not bod.is_datalad_clean(): raise Exception("Untracked change in " + str(opts.bids_dir)) - bod.apply_tsv_changes(str(opts.edited_summary_tsv), - str(opts.files_tsv), - str(opts.new_tsv_prefix), - raise_on_error=False) + bod.apply_tsv_changes( + str(opts.edited_summary_tsv), + str(opts.files_tsv), + str(opts.new_tsv_prefix), + raise_on_error=False, + ) sys.exit(0) # Run it through a container container_type = _get_container_type(opts.container) bids_dir_link = str(opts.bids_dir.absolute()) + ":/bids" - input_summary_tsv_dir_link = str( - opts.edited_tsv_prefix.parent.absolute()) + ":/in_summary_tsv:ro" - input_files_tsv_dir_link = str( - opts.edited_tsv_prefix.parent.absolute()) + ":/in_files_tsv:ro" - output_tsv_dir_link = str( - opts.new_tsv_prefix.parent.absolute()) + ":/out_tsv:rw" + input_summary_tsv_dir_link = ( + str(opts.edited_tsv_prefix.parent.absolute()) + ":/in_summary_tsv:ro" + ) + input_files_tsv_dir_link = str(opts.edited_tsv_prefix.parent.absolute()) + ":/in_files_tsv:ro" + output_tsv_dir_link = str(opts.new_tsv_prefix.parent.absolute()) + ":/out_tsv:rw" # FROM BOND-GROUP apply_config = opts.config is not None if apply_config: - input_config_dir_link = str( - opts.config.parent.absolute()) + ":/in_config:ro" + input_config_dir_link = str(opts.config.parent.absolute()) + ":/in_config:ro" linked_input_config = "/in_config/" + opts.config.name linked_output_prefix = "/tsv/" + opts.output_prefix.name #### - - linked_input_summary_tsv = "/in_summary_tsv/" \ - + opts.edited_summary_tsv.name + linked_input_summary_tsv = "/in_summary_tsv/" + opts.edited_summary_tsv.name linked_input_files_tsv = "/in_files_tsv/" + opts.files_tsv.name linked_output_prefix = "/out_tsv/" + opts.new_tsv_prefix.name - if container_type == 'docker': - cmd = ['docker', 'run', '--rm', - '-v', bids_dir_link, - '-v', GIT_CONFIG+":/root/.gitconfig", - '-v', input_summary_tsv_dir_link, - '-v', input_files_tsv_dir_link, - '-v', output_tsv_dir_link, - '--entrypoint', 'cubids-apply', - opts.container, '/bids', linked_input_summary_tsv, - linked_input_files_tsv, linked_output_prefix] + if container_type == "docker": + cmd = [ + "docker", + "run", + "--rm", + "-v", + bids_dir_link, + "-v", + GIT_CONFIG + ":/root/.gitconfig", + "-v", + input_summary_tsv_dir_link, + "-v", + input_files_tsv_dir_link, + "-v", + output_tsv_dir_link, + "--entrypoint", + "cubids-apply", + opts.container, + "/bids", + linked_input_summary_tsv, + linked_input_files_tsv, + linked_output_prefix, + ] if apply_config: - cmd.insert(3, '-v') + cmd.insert(3, "-v") cmd.insert(4, input_config_dir_link) - cmd += ['--config', linked_input_config] - - elif container_type == 'singularity': - cmd = ['singularity', 'exec', '--cleanenv', - '-B', bids_dir_link, - '-B', input_summary_tsv_dir_link, - '-B', input_files_tsv_dir_link, - '-B', output_tsv_dir_link, - opts.container, 'cubids-apply', - '/bids', linked_input_summary_tsv, - linked_input_files_tsv, linked_output_prefix] + cmd += ["--config", linked_input_config] + + elif container_type == "singularity": + cmd = [ + "singularity", + "exec", + "--cleanenv", + "-B", + bids_dir_link, + "-B", + input_summary_tsv_dir_link, + "-B", + input_files_tsv_dir_link, + "-B", + output_tsv_dir_link, + opts.container, + "cubids-apply", + "/bids", + linked_input_summary_tsv, + linked_input_files_tsv, + linked_output_prefix, + ] if apply_config: - cmd.insert(3, '-B') + cmd.insert(3, "-B") cmd.insert(4, input_config_dir_link) - cmd += ['--config', linked_input_config] + cmd += ["--config", linked_input_config] if opts.use_datalad: cmd.append("--use-datalad") @@ -502,29 +618,32 @@ def cubids_apply(): cmd.append("--acq-group-level") cmd.append(str(opts.acq_group_level)) - print("RUNNING: " + ' '.join(cmd)) + print("RUNNING: " + " ".join(cmd)) proc = subprocess.run(cmd) sys.exit(proc.returncode) def cubids_datalad_save(): - ''' Command Line Interfcae function for performing datalad save.''' - + """Perform datalad save.""" parser = argparse.ArgumentParser( - description="cubids-datalad-save: perform a DataLad save on a BIDS " - "directory", - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('bids_dir', - type=Path, - action='store', - help='the root of a BIDS dataset. It should contain ' - 'sub-X directories and dataset_description.json') - parser.add_argument('-m', - action='store', - help='message for this commit') - parser.add_argument('--container', - action='store', - help='Docker image tag or Singularity image file.') + description=("cubids-datalad-save: perform a DataLad save on a BIDS directory"), + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "bids_dir", + type=Path, + action="store", + help=( + "the root of a BIDS dataset. It should contain " + "sub-X directories and dataset_description.json" + ), + ) + parser.add_argument("-m", action="store", help="message for this commit") + parser.add_argument( + "--container", + action="store", + help="Docker image tag or Singularity image file.", + ) opts = parser.parse_args() # Run directly from python using @@ -536,35 +655,60 @@ def cubids_datalad_save(): # Run it through a container container_type = _get_container_type(opts.container) bids_dir_link = str(opts.bids_dir.absolute()) + ":/bids" - if container_type == 'docker': - cmd = ['docker', 'run', '--rm', '-v', bids_dir_link, - '-v', GIT_CONFIG+":/root/.gitconfig", - '--entrypoint', 'cubids-datalad-save', - opts.container, '/bids', '-m', opts.m] - elif container_type == 'singularity': - cmd = ['singularity', 'exec', '--cleanenv', - '-B', bids_dir_link, - opts.container, 'cubids-datalad-save', - '/bids', '-m', opts.m] - print("RUNNING: " + ' '.join(cmd)) + if container_type == "docker": + cmd = [ + "docker", + "run", + "--rm", + "-v", + bids_dir_link, + "-v", + GIT_CONFIG + ":/root/.gitconfig", + "--entrypoint", + "cubids-datalad-save", + opts.container, + "/bids", + "-m", + opts.m, + ] + elif container_type == "singularity": + cmd = [ + "singularity", + "exec", + "--cleanenv", + "-B", + bids_dir_link, + opts.container, + "cubids-datalad-save", + "/bids", + "-m", + opts.m, + ] + print("RUNNING: " + " ".join(cmd)) proc = subprocess.run(cmd) sys.exit(proc.returncode) def cubids_undo(): - ''' Command Line Interface function for reverting a commit.''' - + """Revert the most recent commit.""" parser = argparse.ArgumentParser( description="cubids-undo: revert most recent commit", - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('bids_dir', - type=Path, - action='store', - help='the root of a BIDS dataset. It should contain ' - 'sub-X directories and dataset_description.json') - parser.add_argument('--container', - action='store', - help='Docker image tag or Singularity image file.') + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "bids_dir", + type=Path, + action="store", + help=( + "the root of a BIDS dataset. It should contain " + "sub-X directories and dataset_description.json" + ), + ) + parser.add_argument( + "--container", + action="store", + help="Docker image tag or Singularity image file.", + ) opts = parser.parse_args() # Run directly from python using @@ -576,56 +720,90 @@ def cubids_undo(): # Run it through a container container_type = _get_container_type(opts.container) bids_dir_link = str(opts.bids_dir.absolute()) + ":/bids" - if container_type == 'docker': - cmd = ['docker', 'run', '--rm', '-v', bids_dir_link, - '-v', GIT_CONFIG+":/root/.gitconfig", - '--entrypoint', 'cubids-undo', - opts.container, '/bids'] - elif container_type == 'singularity': - cmd = ['singularity', 'exec', '--cleanenv', - '-B', bids_dir_link, - opts.container, 'cubids-undo', '/bids'] - print("RUNNING: " + ' '.join(cmd)) + if container_type == "docker": + cmd = [ + "docker", + "run", + "--rm", + "-v", + bids_dir_link, + "-v", + GIT_CONFIG + ":/root/.gitconfig", + "--entrypoint", + "cubids-undo", + opts.container, + "/bids", + ] + elif container_type == "singularity": + cmd = [ + "singularity", + "exec", + "--cleanenv", + "-B", + bids_dir_link, + opts.container, + "cubids-undo", + "/bids", + ] + print("RUNNING: " + " ".join(cmd)) proc = subprocess.run(cmd) sys.exit(proc.returncode) def cubids_copy_exemplars(): - ''' Command Line Interface function for purging scan associations.''' - + """Create and save a directory with one subject from each acquisition group.""" parser = argparse.ArgumentParser( - description="cubids-copy-exemplars: create and save a directory with " - " one subject from each Acquisition Group in the BIDS dataset", - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('bids_dir', - type=Path, - action='store', - help='path to the root of a BIDS dataset. ' - 'It should contain sub-X directories and ' - 'dataset_description.json.') - parser.add_argument('exemplars_dir', - type=Path, - action='store', - help='absolute path to the root of a BIDS dataset ' - 'containing one subject from each Acquisition Group. ' - 'It should contain sub-X directories and ' - 'dataset_description.json.') - parser.add_argument('exemplars_tsv', - type=Path, - action='store', - help='absolute path to the .tsv file that lists one ' - 'subject from each Acqusition Group ' - '(*_AcqGrouping.tsv from the cubids-group output)') - parser.add_argument('--use-datalad', - action='store_true', - help='check exemplar dataset into DataLad') - parser.add_argument('--min-group-size', - action='store', - default=1, - help='minimum number of subjects an Acquisition Group ' - 'must have in order to be included in the exemplar ' - 'dataset ', - required=False) + description=( + "cubids-copy-exemplars: create and save a directory with " + "one subject from each Acquisition Group in the BIDS dataset" + ), + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "bids_dir", + type=Path, + action="store", + help=( + "path to the root of a BIDS dataset. " + "It should contain sub-X directories and " + "dataset_description.json." + ), + ) + parser.add_argument( + "exemplars_dir", + type=Path, + action="store", + help=( + "absolute path to the root of a BIDS dataset " + "containing one subject from each Acquisition Group. " + "It should contain sub-X directories and " + "dataset_description.json." + ), + ) + parser.add_argument( + "exemplars_tsv", + type=Path, + action="store", + help=( + "absolute path to the .tsv file that lists one " + "subject from each Acqusition Group " + "(*_AcqGrouping.tsv from the cubids-group output)" + ), + ) + parser.add_argument( + "--use-datalad", action="store_true", help="check exemplar dataset into DataLad" + ) + parser.add_argument( + "--min-group-size", + action="store", + default=1, + help=( + "minimum number of subjects an Acquisition Group " + "must have in order to be included in the exemplar " + "dataset " + ), + required=False, + ) # parser.add_argument('--include-groups', # action='store', # nargs='+', @@ -633,23 +811,28 @@ def cubids_copy_exemplars(): # help='only include an exemplar subject from these ' # 'listed Acquisition Groups in the exemplar dataset ', # required=False) - parser.add_argument('--container', - action='store', - help='Docker image tag or Singularity image file.') + parser.add_argument( + "--container", + action="store", + help="Docker image tag or Singularity image file.", + ) opts = parser.parse_args() # Run directly from python using if opts.container is None: - bod = CuBIDS(data_root=str(opts.bids_dir), - use_datalad=opts.use_datalad) + bod = CuBIDS(data_root=str(opts.bids_dir), use_datalad=opts.use_datalad) if opts.use_datalad: if not bod.is_datalad_clean(): - raise Exception("Untracked changes. Need to save " - + str(opts.bids_dir) + - " before coyping exemplars") - bod.copy_exemplars(str(opts.exemplars_dir), str(opts.exemplars_tsv), - min_group_size=opts.min_group_size, - raise_on_error=True) + raise Exception( + "Untracked changes. Need to save " + + str(opts.bids_dir) + + " before coyping exemplars" + ) + bod.copy_exemplars( + str(opts.exemplars_dir), + str(opts.exemplars_tsv), + min_group_size=opts.min_group_size, + ) sys.exit(0) # Run it through a container @@ -657,65 +840,101 @@ def cubids_copy_exemplars(): bids_dir_link = str(opts.bids_dir.absolute()) + ":/bids:ro" exemplars_dir_link = str(opts.exemplars_dir.absolute()) + ":/exemplars:ro" exemplars_tsv_link = str(opts.exemplars_tsv.absolute()) + ":/in_tsv:ro" - if container_type == 'docker': - cmd = ['docker', 'run', '--rm', '-v', bids_dir_link, - '-v', exemplars_dir_link, - '-v', GIT_CONFIG+":/root/.gitconfig", - '-v', exemplars_tsv_link, '--entrypoint', - 'cubids-copy-exemplars', - opts.container, '/bids', '/exemplars', '/in_tsv'] + if container_type == "docker": + cmd = [ + "docker", + "run", + "--rm", + "-v", + bids_dir_link, + "-v", + exemplars_dir_link, + "-v", + GIT_CONFIG + ":/root/.gitconfig", + "-v", + exemplars_tsv_link, + "--entrypoint", + "cubids-copy-exemplars", + opts.container, + "/bids", + "/exemplars", + "/in_tsv", + ] if opts.force_unlock: - cmd.append('--force-unlock') + cmd.append("--force-unlock") if opts.min_group_size: - cmd.append('--min-group-size') - elif container_type == 'singularity': - cmd = ['singularity', 'exec', '--cleanenv', - '-B', bids_dir_link, - '-B', exemplars_dir_link, - '-B', exemplars_tsv_link, opts.container, - 'cubids-copy-exemplars', - '/bids', '/exemplars', '/in_tsv'] + cmd.append("--min-group-size") + elif container_type == "singularity": + cmd = [ + "singularity", + "exec", + "--cleanenv", + "-B", + bids_dir_link, + "-B", + exemplars_dir_link, + "-B", + exemplars_tsv_link, + opts.container, + "cubids-copy-exemplars", + "/bids", + "/exemplars", + "/in_tsv", + ] if opts.force_unlock: - cmd.append('--force-unlock') + cmd.append("--force-unlock") if opts.min_group_size: - cmd.append('--min-group-size') + cmd.append("--min-group-size") - print("RUNNING: " + ' '.join(cmd)) + print("RUNNING: " + " ".join(cmd)) proc = subprocess.run(cmd) sys.exit(proc.returncode) def cubids_add_nifti_info(): - ''' Command Line Interface function for purging scan associations.''' - + """Add information from nifti files to the dataset's sidecars.""" parser = argparse.ArgumentParser( - description="cubids-add-nifti-info: Add information from nifti" - "files to the sidecars of each dataset", - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('bids_dir', - type=Path, - action='store', - help='absolute path to the root of a BIDS dataset. ' - 'It should contain sub-X directories and ' - 'dataset_description.json.') - parser.add_argument('--use-datalad', - action='store_true', - help='ensure that there are no untracked changes ' - 'before finding groups') - parser.add_argument('--force-unlock', - action='store_true', - help='unlock dataset before adding nifti info ') - parser.add_argument('--container', - action='store', - help='Docker image tag or Singularity image file.') + description=( + "cubids-add-nifti-info: Add information from nifti" + "files to the sidecars of each dataset" + ), + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "bids_dir", + type=Path, + action="store", + help=( + "absolute path to the root of a BIDS dataset. " + "It should contain sub-X directories and " + "dataset_description.json." + ), + ) + parser.add_argument( + "--use-datalad", + action="store_true", + help="ensure that there are no untracked changes before finding groups", + ) + parser.add_argument( + "--force-unlock", + action="store_true", + help="unlock dataset before adding nifti info ", + ) + parser.add_argument( + "--container", + action="store", + help="Docker image tag or Singularity image file.", + ) opts = parser.parse_args() # Run directly from python using if opts.container is None: - bod = CuBIDS(data_root=str(opts.bids_dir), - use_datalad=opts.use_datalad, - force_unlock=opts.force_unlock) + bod = CuBIDS( + data_root=str(opts.bids_dir), + use_datalad=opts.use_datalad, + force_unlock=opts.force_unlock, + ) if opts.use_datalad: if not bod.is_datalad_clean(): raise Exception("Untracked change in " + str(opts.bids_dir)) @@ -727,83 +946,122 @@ def cubids_add_nifti_info(): # Run it through a container container_type = _get_container_type(opts.container) bids_dir_link = str(opts.bids_dir.absolute()) + ":/bids:ro" - if container_type == 'docker': - cmd = ['docker', 'run', '--rm', '-v', bids_dir_link, - '-v', GIT_CONFIG+":/root/.gitconfig", - '--entrypoint', 'cubids-add-nifti-info', - opts.container, '/bids'] + if container_type == "docker": + cmd = [ + "docker", + "run", + "--rm", + "-v", + bids_dir_link, + "-v", + GIT_CONFIG + ":/root/.gitconfig", + "--entrypoint", + "cubids-add-nifti-info", + opts.container, + "/bids", + ] if opts.force_unlock: - cmd.append('--force-unlock') - elif container_type == 'singularity': - cmd = ['singularity', 'exec', '--cleanenv', - '-B', bids_dir_link, - opts.container, 'cubids-add-nifti-info', - '/bids'] + cmd.append("--force-unlock") + elif container_type == "singularity": + cmd = [ + "singularity", + "exec", + "--cleanenv", + "-B", + bids_dir_link, + opts.container, + "cubids-add-nifti-info", + "/bids", + ] if opts.force_unlock: - cmd.append('--force-unlock') + cmd.append("--force-unlock") - print("RUNNING: " + ' '.join(cmd)) + print("RUNNING: " + " ".join(cmd)) proc = subprocess.run(cmd) sys.exit(proc.returncode) def cubids_purge(): - ''' Command Line Interface function for purging scan associations.''' - + """Purge scan associations.""" parser = argparse.ArgumentParser( description="cubids-purge: purge associations from the dataset", - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('bids_dir', - type=Path, - action='store', - help='path to the root of a BIDS dataset. ' - 'It should contain sub-X directories and ' - 'dataset_description.json.') - parser.add_argument('scans', - type=Path, - action='store', - help='path to the txt file of scans whose ' - 'associations should be purged.') - parser.add_argument('--use-datalad', - action='store_true', - help='ensure that there are no untracked changes ' - 'before finding groups') - parser.add_argument('--container', - action='store', - help='Docker image tag or Singularity image file.') + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "bids_dir", + type=Path, + action="store", + help=( + "path to the root of a BIDS dataset. " + "It should contain sub-X directories and " + "dataset_description.json." + ), + ) + parser.add_argument( + "scans", + type=Path, + action="store", + help="path to the txt file of scans whose associations should be purged.", + ) + parser.add_argument( + "--use-datalad", + action="store_true", + help="ensure that there are no untracked changes before finding groups", + ) + parser.add_argument( + "--container", + action="store", + help="Docker image tag or Singularity image file.", + ) opts = parser.parse_args() # Run directly from python using if opts.container is None: - bod = CuBIDS(data_root=str(opts.bids_dir), - use_datalad=opts.use_datalad) + bod = CuBIDS(data_root=str(opts.bids_dir), use_datalad=opts.use_datalad) if opts.use_datalad: if not bod.is_datalad_clean(): raise Exception("Untracked change in " + str(opts.bids_dir)) - bod.purge(str(opts.scans), raise_on_error=False) + bod.purge(str(opts.scans)) sys.exit(0) # Run it through a container container_type = _get_container_type(opts.container) bids_dir_link = str(opts.bids_dir.absolute()) + ":/bids" - input_scans_link = str( - opts.scans.parent.absolute()) + ":/in_scans:ro" - if container_type == 'docker': - cmd = ['docker', 'run', '--rm', - '-v', bids_dir_link, - '-v', GIT_CONFIG+":/root/.gitconfig", - '-v', input_scans_link, - '--entrypoint', 'cubids-purge', - opts.container, '/bids', input_scans_link] - - elif container_type == 'singularity': - cmd = ['singularity', 'exec', '--cleanenv', - '-B', bids_dir_link, - '-B', input_scans_link, - opts.container, 'cubids-purge', - '/bids', input_scans_link] - print("RUNNING: " + ' '.join(cmd)) + input_scans_link = str(opts.scans.parent.absolute()) + ":/in_scans:ro" + if container_type == "docker": + cmd = [ + "docker", + "run", + "--rm", + "-v", + bids_dir_link, + "-v", + GIT_CONFIG + ":/root/.gitconfig", + "-v", + input_scans_link, + "--entrypoint", + "cubids-purge", + opts.container, + "/bids", + input_scans_link, + ] + + elif container_type == "singularity": + cmd = [ + "singularity", + "exec", + "--cleanenv", + "-B", + bids_dir_link, + "-B", + input_scans_link, + opts.container, + "cubids-purge", + "/bids", + input_scans_link, + ] + print("RUNNING: " + " ".join(cmd)) if opts.use_datalad: cmd.append("--use-datalad") proc = subprocess.run(cmd) @@ -811,26 +1069,32 @@ def cubids_purge(): def cubids_remove_metadata_fields(): - ''' Command Line Interface function for deteling fields from metadata.''' - + """Delete fields from metadata.""" parser = argparse.ArgumentParser( - description="cubids-remove-metadata-fields: delete fields from " - "metadata", - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('bids_dir', - type=Path, - action='store', - help='the root of a BIDS dataset. It should contain ' - 'sub-X directories and dataset_description.json') - parser.add_argument('--fields', - nargs='+', - action='store', - default=[], - help='space-separated list of metadata fields to ' - 'remove.') - parser.add_argument('--container', - action='store', - help='Docker image tag or Singularity image file.') + description="cubids-remove-metadata-fields: delete fields from metadata", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "bids_dir", + type=Path, + action="store", + help=( + "the root of a BIDS dataset. It should contain " + "sub-X directories and dataset_description.json" + ), + ) + parser.add_argument( + "--fields", + nargs="+", + action="store", + default=[], + help="space-separated list of metadata fields to remove.", + ) + parser.add_argument( + "--container", + action="store", + help="Docker image tag or Singularity image file.", + ) opts = parser.parse_args() # Run directly from python @@ -842,35 +1106,56 @@ def cubids_remove_metadata_fields(): # Run it through a container container_type = _get_container_type(opts.container) bids_dir_link = str(opts.bids_dir.absolute()) + ":/bids:rw" - if container_type == 'docker': - cmd = ['docker', 'run', '--rm', '-v', bids_dir_link, - '--entrypoint', 'cubids-remove-metadata-fields', - opts.container, '/bids', '--fields'] + opts.fields - elif container_type == 'singularity': - cmd = ['singularity', 'exec', '--cleanenv', - '-B', bids_dir_link, - opts.container, 'cubids-remove-metadata-fields', - '/bids', '--fields'] + opts.fields - print("RUNNING: " + ' '.join(cmd)) + if container_type == "docker": + cmd = [ + "docker", + "run", + "--rm", + "-v", + bids_dir_link, + "--entrypoint", + "cubids-remove-metadata-fields", + opts.container, + "/bids", + "--fields", + ] + opts.fields + elif container_type == "singularity": + cmd = [ + "singularity", + "exec", + "--cleanenv", + "-B", + bids_dir_link, + opts.container, + "cubids-remove-metadata-fields", + "/bids", + "--fields", + ] + opts.fields + print("RUNNING: " + " ".join(cmd)) proc = subprocess.run(cmd) sys.exit(proc.returncode) def cubids_print_metadata_fields(): - '''Command Line Interface function that prints unique metadata fields.''' - + """Print unique metadata fields.""" parser = argparse.ArgumentParser( - description="cubids-print-metadata-fields: print all unique " - "metadata fields", - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('bids_dir', - type=Path, - action='store', - help='the root of a BIDS dataset. It should contain ' - 'sub-X directories and dataset_description.json') - parser.add_argument('--container', - action='store', - help='Docker image tag or Singularity image file.') + description="cubids-print-metadata-fields: print all unique metadata fields", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "bids_dir", + type=Path, + action="store", + help=( + "the root of a BIDS dataset. It should contain " + "sub-X directories and dataset_description.json" + ), + ) + parser.add_argument( + "--container", + action="store", + help="Docker image tag or Singularity image file.", + ) opts = parser.parse_args() # Run directly from python @@ -883,23 +1168,36 @@ def cubids_print_metadata_fields(): # Run it through a container container_type = _get_container_type(opts.container) bids_dir_link = str(opts.bids_dir.absolute()) + ":/bids:ro" - if container_type == 'docker': - cmd = ['docker', 'run', '--rm', '-v', bids_dir_link, - '--entrypoint', 'cubids-print-metadata-fields', - opts.container, '/bids'] - elif container_type == 'singularity': - cmd = ['singularity', 'exec', '--cleanenv', - '-B', bids_dir_link, - opts.container, 'cubids-print-metadata-fields', - '/bids'] - print("RUNNING: " + ' '.join(cmd)) + if container_type == "docker": + cmd = [ + "docker", + "run", + "--rm", + "-v", + bids_dir_link, + "--entrypoint", + "cubids-print-metadata-fields", + opts.container, + "/bids", + ] + elif container_type == "singularity": + cmd = [ + "singularity", + "exec", + "--cleanenv", + "-B", + bids_dir_link, + opts.container, + "cubids-print-metadata-fields", + "/bids", + ] + print("RUNNING: " + " ".join(cmd)) proc = subprocess.run(cmd) sys.exit(proc.returncode) def _get_container_type(image_name): - '''Gets and returns the container type.''' - + """Get and return the container type.""" # If it's a file on disk, it must be a singularity image if Path(image_name).exists(): return "singularity" @@ -908,5 +1206,4 @@ def _get_container_type(image_name): if re.match(r"(?:.+\/)?([^:]+)(?::.+)?", image_name): return "docker" - raise Exception("Unable to determine the container type of " - + image_name) + raise Exception("Unable to determine the container type of " + image_name) diff --git a/cubids/config.py b/cubids/config.py index 377bd9352..59fd5a157 100644 --- a/cubids/config.py +++ b/cubids/config.py @@ -2,14 +2,14 @@ Functions for configuring CuBIDS """ -from pkg_resources import resource_filename as pkgrf -import yaml from pathlib import Path +import yaml +from pkg_resources import resource_filename as pkgrf + def load_config(config_file): - """Loads a YAML file containing a configuration for param groups. - """ + """Loads a YAML file containing a configuration for param groups.""" if config_file is None: config_file = Path(pkgrf("cubids", "data/config.yml")) diff --git a/cubids/constants.py b/cubids/constants.py index a54ba33cf..21c8982cd 100644 --- a/cubids/constants.py +++ b/cubids/constants.py @@ -1,13 +1,32 @@ +"""Constants for CuBIDS.""" ID_VARS = set(["KeyGroup", "ParamGroup", "FilePath"]) NON_KEY_ENTITIES = set(["subject", "session", "extension"]) # Multi-dimensional keys SliceTiming -IMAGING_PARAMS = set([ - "ParallelReductionFactorInPlane", "ParallelAcquisitionTechnique", - "ParallelAcquisitionTechnique", "PartialFourier", "PhaseEncodingDirection", - "EffectiveEchoSpacing", "TotalReadoutTime", "EchoTime", - "SliceEncodingDirection", "DwellTime", "FlipAngle", - "MultibandAccelerationFactor", "RepetitionTime", - "VolumeTiming", "NumberOfVolumesDiscardedByScanner", - "NumberOfVolumesDiscardedByUser", "Obliquity", "VoxelSizeDim1", - "VoxelSizeDim2", "VoxelSizeDim3", "Dim1Size", "Dim2Size", "Dim3Size", - "NumVolumes"]) +IMAGING_PARAMS = set( + [ + "ParallelReductionFactorInPlane", + "ParallelAcquisitionTechnique", + "ParallelAcquisitionTechnique", + "PartialFourier", + "PhaseEncodingDirection", + "EffectiveEchoSpacing", + "TotalReadoutTime", + "EchoTime", + "SliceEncodingDirection", + "DwellTime", + "FlipAngle", + "MultibandAccelerationFactor", + "RepetitionTime", + "VolumeTiming", + "NumberOfVolumesDiscardedByScanner", + "NumberOfVolumesDiscardedByUser", + "Obliquity", + "VoxelSizeDim1", + "VoxelSizeDim2", + "VoxelSizeDim3", + "Dim1Size", + "Dim2Size", + "Dim3Size", + "NumVolumes", + ] +) diff --git a/cubids/cubids.py b/cubids/cubids.py index bae9236bf..09400eb5d 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -1,36 +1,47 @@ """Main module.""" -import warnings -from collections import defaultdict -import subprocess -import bids -import bids.layout -import json import csv +import json import os import re +import subprocess +import warnings +from collections import defaultdict from pathlib import Path -from bids.layout import parse_file_entities -from bids.utils import listify +from shutil import copyfile, copytree + +import bids +import bids.layout +import datalad.api as dlapi +import nibabel as nb import numpy as np import pandas as pd -import nibabel as nb -import datalad.api as dlapi -from shutil import copytree, copyfile +from bids.layout import parse_file_entities +from bids.utils import listify from sklearn.cluster import AgglomerativeClustering from tqdm import tqdm -from .constants import ID_VARS, NON_KEY_ENTITIES -from .config import load_config -from .metadata_merge import ( - check_merging_operations, group_by_acquisition_sets) -warnings.simplefilter(action='ignore', category=FutureWarning) -bids.config.set_option('extension_initial_dot', True) + +from cubids.config import load_config +from cubids.constants import ID_VARS, NON_KEY_ENTITIES +from cubids.metadata_merge import check_merging_operations, group_by_acquisition_sets + +warnings.simplefilter(action="ignore", category=FutureWarning) +bids.config.set_option("extension_initial_dot", True) class CuBIDS(object): + """The main CuBIDS class. - def __init__(self, data_root, use_datalad=False, acq_group_level='subject', - grouping_config=None, force_unlock=False): + TODO: Complete docstring. + """ + def __init__( + self, + data_root, + use_datalad=False, + acq_group_level="subject", + grouping_config=None, + force_unlock=False, + ): self.path = os.path.abspath(data_root) self._layout = None self.keys_files = {} @@ -44,17 +55,21 @@ def __init__(self, data_root, use_datalad=False, acq_group_level='subject', self.acq_group_level = acq_group_level self.scans_txt = None # txt file of scans to purge (for purge only) self.force_unlock = force_unlock # force unlock for add-nifti-info - self.cubids_code_dir = Path(self.path + '/code/CuBIDS').is_dir() + self.cubids_code_dir = Path(self.path + "/code/CuBIDS").is_dir() self.data_dict = {} # data dictionary for TSV outputs self.use_datalad = use_datalad # True if flag set, False if flag unset if self.use_datalad: self.init_datalad() - if self.acq_group_level == 'session': + if self.acq_group_level == "session": NON_KEY_ENTITIES.remove("session") @property def layout(self): + """Return the BIDSLayout object. + + TODO: Complete docstring. + """ if self._layout is None: # print("SETTING LAYOUT OBJECT") self.reset_bids_layout() @@ -62,73 +77,71 @@ def layout(self): return self._layout def reset_bids_layout(self, validate=False): + """Reset the BIDS layout. + + TODO: Complete docstring. + """ # create BIDS Layout Indexer class - ignores = ["code", "stimuli", "sourcedata", "models", - re.compile(r'^\.'), re.compile(r'/\.')] + ignores = [ + "code", + "stimuli", + "sourcedata", + "models", + re.compile(r"^\."), + re.compile(r"/\."), + ] - indexer = bids.BIDSLayoutIndexer(validate=validate, ignore=ignores, - index_metadata=False) + indexer = bids.BIDSLayoutIndexer(validate=validate, ignore=ignores, index_metadata=False) - self._layout = bids.BIDSLayout(self.path, - validate=validate, - indexer=indexer) + self._layout = bids.BIDSLayout(self.path, validate=validate, indexer=indexer) def create_cubids_code_dir(self): + """Create CuBIDS code directory. + + TODO: Complete docstring. + """ # check if BIDS_ROOT/code/CuBIDS exists if not self.cubids_code_dir: - subprocess.run(['mkdir', self.path + '/code']) - subprocess.run(['mkdir', self.path + '/code/CuBIDS/']) + subprocess.run(["mkdir", self.path + "/code"]) + subprocess.run(["mkdir", self.path + "/code/CuBIDS/"]) self.cubids_code_dir = True return self.cubids_code_dir def init_datalad(self): - """Initializes a datalad Dataset at self.path. - - Parameters: - ----------- - - save: bool - Run datalad save to add any untracked files - message: str or None - Message to add to - """ + """Initialize a datalad Dataset at self.path.""" self.datalad_ready = True self.datalad_handle = dlapi.Dataset(self.path) if not self.datalad_handle.is_installed(): - self.datalad_handle = dlapi.create(self.path, - cfg_proc='text2git', - force=True, - annex=True) + self.datalad_handle = dlapi.create( + self.path, cfg_proc="text2git", force=True, annex=True + ) def datalad_save(self, message=None): - """Performs a DataLad Save operation on the BIDS tree. + """Perform a DataLad Save operation on the BIDS tree. Additionally a check for an active datalad handle and that the status of all objects after the save is "ok". Parameters: ----------- - message : str or None - Commit message to use with datalad save + message : str or None + Commit message to use with datalad save. """ - if not self.datalad_ready: - raise Exception( - "DataLad has not been initialized. use datalad_init()") + raise Exception("DataLad has not been initialized. use datalad_init()") + statuses = self.datalad_handle.save(message=message or "CuBIDS Save") - saved_status = set([status['status'] for status in statuses]) + saved_status = set([status["status"] for status in statuses]) if not saved_status == set(["ok"]): raise Exception("Failed to save in DataLad") def is_datalad_clean(self): """If True, no changes are detected in the datalad dataset.""" if not self.datalad_ready: - raise Exception( - "Datalad not initialized, can't determine status") - statuses = set([status['state'] for status in - self.datalad_handle.status()]) + raise Exception("Datalad not initialized, can't determine status") + statuses = set([status["state"] for status in self.datalad_handle.status()]) return statuses == set(["clean"]) def datalad_undo_last_commit(self): @@ -137,14 +150,12 @@ def datalad_undo_last_commit(self): Uses git reset --hard to revert to the previous commit. """ if not self.is_datalad_clean(): - raise Exception("Untracked changes present. " - "Run clear_untracked_changes first") - reset_proc = subprocess.run( - ["git", "reset", "--hard", "HEAD~1"], cwd=self.path) + raise Exception("Untracked changes present. " "Run clear_untracked_changes first") + reset_proc = subprocess.run(["git", "reset", "--hard", "HEAD~1"], cwd=self.path) reset_proc.check_returncode() - def add_nifti_info(self, raise_on_error=True): - """Adds info from nifti files to json sidecars.""" + def add_nifti_info(self): + """Add info from nifti files to json sidecars.""" # check if force_unlock is set if self.force_unlock: # CHANGE TO SUBPROCESS.CALL IF NOT BLOCKING @@ -153,7 +164,7 @@ def add_nifti_info(self, raise_on_error=True): # loop through all niftis in the bids dir for path in Path(self.path).rglob("sub-*/**/*.*"): # ignore all dot directories - if '/.' in str(path): + if "/." in str(path): continue if str(path).endswith(".nii") or str(path).endswith(".nii.gz"): try: @@ -162,12 +173,11 @@ def add_nifti_info(self, raise_on_error=True): print("Empty Nifti File: ", str(path)) continue # get important info from niftis - obliquity = np.any(nb.affines.obliquity(img.affine) - > 1e-4) + obliquity = np.any(nb.affines.obliquity(img.affine) > 1e-4) voxel_sizes = img.header.get_zooms() matrix_dims = img.shape # add nifti info to corresponding sidecars​ - sidecar = img_to_new_ext(str(path), '.json') + sidecar = img_to_new_ext(str(path), ".json") if Path(sidecar).exists(): try: with open(sidecar) as f: @@ -196,81 +206,67 @@ def add_nifti_info(self, raise_on_error=True): data["NumVolumes"] = 1 if "ImageOrientation" not in data.keys(): orient = nb.orientations.aff2axcodes(img.affine) - joined = ''.join(orient) + '+' + joined = "".join(orient) + "+" data["ImageOrientation"] = joined - with open(sidecar, 'w') as file: + with open(sidecar, "w") as file: json.dump(data, file, indent=4) if self.use_datalad: self.datalad_save(message="Added nifti info to sidecars") self.reset_bids_layout() - def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, - raise_on_error=True): - """Applies changes documented in the edited _summary tsv - and generates the new tsv files. + def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=True): + """Apply changes documented in the edited summary tsv and generate the new tsv files. This function looks at the RenameKeyGroup and MergeInto columns and modifies the bids datset according to the specified changs. - Parameters: - ----------- - orig_prefix : str - Path prefix and file stem for the original - _summary and _files tsvs. - For example, if orig_prefix is - '/cbica/projects/HBN/old_tsvs' then the paths to - the summary and files tsvs will be - '/cbica/projects/HBN/old_tsvs_summary.tsv' and - '/cbica/projects/HBN/old_tsvs_files.tsv' respectively. - new_prefix : str - Path prefix and file stem for the new summary and - files tsvs. + Parameters + ---------- + summary_tsv + files_tsv + new_prefix + raise_on_error : :obj:`bool` """ # reset lists of old and new filenames self.old_filenames = [] self.new_filenames = [] - if '/' not in str(summary_tsv): + if "/" not in str(summary_tsv): if not self.cubids_code_dir: self.create_cubids_code_dir() - summary_tsv = self.path + '/code/CuBIDS/' + summary_tsv - if '/' not in str(files_tsv): + summary_tsv = self.path + "/code/CuBIDS/" + summary_tsv + + if "/" not in str(files_tsv): if not self.cubids_code_dir: self.create_cubids_code_dir() - files_tsv = self.path + '/code/CuBIDS/' + files_tsv + files_tsv = self.path + "/code/CuBIDS/" + files_tsv summary_df = pd.read_table(summary_tsv) files_df = pd.read_table(files_tsv) # Check that the MergeInto column only contains valid merges - ok_merges, deletions = check_merging_operations( - summary_tsv, raise_on_error=raise_on_error) + ok_merges, deletions = check_merging_operations(summary_tsv, raise_on_error=raise_on_error) merge_commands = [] for source_id, dest_id in ok_merges: - dest_files = files_df.loc[ - (files_df[["ParamGroup", "KeyGroup"]] == dest_id).all(1)] - source_files = files_df.loc[ - (files_df[["ParamGroup", "KeyGroup"]] == source_id).all(1)] + dest_files = files_df.loc[(files_df[["ParamGroup", "KeyGroup"]] == dest_id).all(1)] + source_files = files_df.loc[(files_df[["ParamGroup", "KeyGroup"]] == source_id).all(1)] # Get a source json file img_full_path = self.path + source_files.iloc[0].FilePath - source_json = img_to_new_ext(img_full_path, '.json') + source_json = img_to_new_ext(img_full_path, ".json") for dest_nii in dest_files.FilePath: - dest_json = img_to_new_ext(self.path + dest_nii, '.json') + dest_json = img_to_new_ext(self.path + dest_nii, ".json") if Path(dest_json).exists() and Path(source_json).exists(): - merge_commands.append( - 'bids-sidecar-merge %s %s' - % (source_json, dest_json)) + merge_commands.append(f"bids-sidecar-merge {source_json} {dest_json}") # Get the delete commands # delete_commands = [] to_remove = [] for rm_id in deletions: - files_to_rm = files_df.loc[ - (files_df[["ParamGroup", "KeyGroup"]] == rm_id).all(1)] + files_to_rm = files_df.loc[(files_df[["ParamGroup", "KeyGroup"]] == rm_id).all(1)] for rm_me in files_to_rm.FilePath: if Path(self.path + rm_me).exists(): @@ -285,12 +281,11 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, move_ops = [] # return if nothing to change if len(change_keys_df) > 0: - key_groups = {} for i in range(len(change_keys_df)): - new_key = change_keys_df.iloc[i]['RenameKeyGroup'] - old_key_param = change_keys_df.iloc[i]['KeyParamGroup'] + new_key = change_keys_df.iloc[i]["RenameKeyGroup"] + old_key_param = change_keys_df.iloc[i]["KeyParamGroup"] # add to dictionary key_groups[old_key_param] = new_key @@ -299,14 +294,12 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, to_change = list(key_groups.keys()) for row in range(len(files_df)): - file_path = self.path + files_df.loc[row, 'FilePath'] - if Path(file_path).exists() and '/fmap/' not in file_path: - - key_param_group = files_df.loc[row, 'KeyParamGroup'] + file_path = self.path + files_df.loc[row, "FilePath"] + if Path(file_path).exists() and "/fmap/" not in file_path: + key_param_group = files_df.loc[row, "KeyParamGroup"] if key_param_group in to_change: - - orig_key_param = files_df.loc[row, 'KeyParamGroup'] + orig_key_param = files_df.loc[row, "KeyParamGroup"] new_key = key_groups[orig_key_param] @@ -316,15 +309,14 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, self.change_filename(file_path, new_entities) # create string of mv command ; mv command for dlapi.run - for from_file, to_file in zip(self.old_filenames, - self.new_filenames): - + for from_file, to_file in zip(self.old_filenames, self.new_filenames): if Path(from_file).exists(): # if using datalad, we want to git mv instead of mv if self.use_datalad: - move_ops.append('git mv %s %s' % (from_file, to_file)) + move_ops.append(f"git mv {from_file} {to_file}") else: - move_ops.append('mv %s %s' % (from_file, to_file)) + move_ops.append(f"mv {from_file} {to_file}") + full_cmd = "\n".join(merge_commands + move_ops) if full_cmd: # write full_cmd to a .sh file @@ -335,10 +327,9 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, # Close the file fileObject.close() - renames = new_prefix + '_full_cmd.sh' + renames = new_prefix + "_full_cmd.sh" if self.use_datalad: - # first check if IntendedFor renames need to be saved if not self.is_datalad_clean(): s1 = "Renamed IntendedFor references to " @@ -351,80 +342,81 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, rename_commit = s1 + s2 - self.datalad_handle.run(cmd=["bash", renames], - message=rename_commit) + self.datalad_handle.run(cmd=["bash", renames], message=rename_commit) else: - subprocess.run(["bash", renames], - stdout=subprocess.PIPE, - cwd=str(Path(new_prefix).parent)) + subprocess.run( + ["bash", renames], + stdout=subprocess.PIPE, + cwd=str(Path(new_prefix).parent), + ) else: print("Not running any commands") self.reset_bids_layout() - self.get_TSVs(new_prefix) + self.get_tsvs(new_prefix) # remove renames file that gets created under the hood - subprocess.run(['rm', '-rf', 'renames']) + subprocess.run(["rm", "-rf", "renames"]) def change_filename(self, filepath, entities): - """Applies changes to a filename based on the renamed - key groups. + """Apply changes to a filename based on the renamed key groups. + This function takes into account the new key group names and renames all files whose key group names changed. - Parameters: - ----------- - filepath : str - Path prefix to a file in the affected key group change - entities : dictionary - A pybids dictionary of entities parsed from the new key - group name. + + Parameters + ---------- + filepath : str + Path prefix to a file in the affected key group change + entities : dictionary + A pybids dictionary of entities parsed from the new key + group name. """ exts = Path(filepath).suffixes old_ext = "" for ext in exts: old_ext += ext - suffix = entities['suffix'] + suffix = entities["suffix"] entity_file_keys = [] - file_keys = ['task', 'acquisition', 'direction', - 'reconstruction', 'run'] + file_keys = ["task", "acquisition", "direction", "reconstruction", "run"] for key in file_keys: if key in list(entities.keys()): entity_file_keys.append(key) - sub = get_key_name(filepath, 'sub') - ses = get_key_name(filepath, 'ses') - sub_ses = sub + '_' + ses + sub = get_key_name(filepath, "sub") + ses = get_key_name(filepath, "ses") + sub_ses = sub + "_" + ses - if 'run' in list(entities.keys()) and 'run-0' in filepath: - entities['run'] = '0' + str(entities['run']) + if "run" in list(entities.keys()) and "run-0" in filepath: + entities["run"] = "0" + str(entities["run"]) - filename = "_".join(["{}-{}".format(key, entities[key]) - for key in entity_file_keys]) - filename = filename.replace('acquisition', 'acq') \ - .replace('direction', 'dir') \ - .replace('reconstruction', 'rec') + filename = "_".join([f"{key}-{entities[key]}" for key in entity_file_keys]) + filename = ( + filename.replace("acquisition", "acq") + .replace("direction", "dir") + .replace("reconstruction", "rec") + ) if len(filename) > 0: - filename = sub_ses + '_' + filename + '_' + suffix + old_ext + filename = sub_ses + "_" + filename + "_" + suffix + old_ext else: - filename = sub_ses + filename + '_' + suffix + old_ext + filename = sub_ses + filename + "_" + suffix + old_ext # CHECK TO SEE IF DATATYPE CHANGED - dtypes = ['anat', 'func', 'perf', 'fmap', 'dwi'] - old = '' + dtypes = ["anat", "func", "perf", "fmap", "dwi"] + old = "" for dtype in dtypes: if dtype in filepath: old = dtype - if 'datatype' in entities.keys(): - dtype = entities['datatype'] - if entities['datatype'] != old: + if "datatype" in entities.keys(): + dtype = entities["datatype"] + if entities["datatype"] != old: print("WARNING: DATATYPE CHANGE DETECETD") else: dtype = old - new_path = str(self.path) + '/' + sub + '/' + ses \ - + '/' + dtype + '/' + filename + new_path = str(self.path) + "/" + sub + "/" + ses + "/" + dtype + "/" + filename # add the scan path + new path to the lists of old, new filenames self.old_filenames.append(filepath) @@ -441,89 +433,81 @@ def change_filename(self, filepath, entities): # print("FILE: ", filepath) # print("ASSOC: ", assoc.path) # ensure assoc not an IntendedFor reference - if '.nii' not in str(assoc_path): + if ".nii" not in str(assoc_path): self.old_filenames.append(assoc_path) - new_ext_path = img_to_new_ext(new_path, - ''.join(Path(assoc_path) - .suffixes)) + new_ext_path = img_to_new_ext(new_path, "".join(Path(assoc_path).suffixes)) self.new_filenames.append(new_ext_path) # MAKE SURE THESE AREN'T COVERED BY get_associations!!! - if '/dwi/' in filepath: + if "/dwi/" in filepath: # add the bval and bvec if there - if Path(img_to_new_ext(filepath, '.bval')).exists() \ - and img_to_new_ext(filepath, '.bval') \ - not in self.old_filenames: - self.old_filenames.append(img_to_new_ext(filepath, - '.bval')) - self.new_filenames.append(img_to_new_ext(new_path, - '.bval')) - - if Path(img_to_new_ext(filepath, '.bvec')).exists() \ - and img_to_new_ext(filepath, '.bvec') \ - not in self.old_filenames: - self.old_filenames.append(img_to_new_ext(filepath, - '.bvec')) - self.new_filenames.append(img_to_new_ext(new_path, - '.bvec')) + if ( + Path(img_to_new_ext(filepath, ".bval")).exists() + and img_to_new_ext(filepath, ".bval") not in self.old_filenames + ): + self.old_filenames.append(img_to_new_ext(filepath, ".bval")) + self.new_filenames.append(img_to_new_ext(new_path, ".bval")) + + if ( + Path(img_to_new_ext(filepath, ".bvec")).exists() + and img_to_new_ext(filepath, ".bvec") not in self.old_filenames + ): + self.old_filenames.append(img_to_new_ext(filepath, ".bvec")) + self.new_filenames.append(img_to_new_ext(new_path, ".bvec")) # now rename _events and _physio files! - old_suffix = parse_file_entities(filepath)['suffix'] - scan_end = '_' + old_suffix + old_ext + old_suffix = parse_file_entities(filepath)["suffix"] + scan_end = "_" + old_suffix + old_ext - if '_task-' in filepath: - old_events = filepath.replace(scan_end, '_events.tsv') - old_ejson = filepath.replace(scan_end, '_events.json') + if "_task-" in filepath: + old_events = filepath.replace(scan_end, "_events.tsv") + old_ejson = filepath.replace(scan_end, "_events.json") if Path(old_events).exists(): self.old_filenames.append(old_events) - new_scan_end = '_' + suffix + old_ext - new_events = new_path.replace(new_scan_end, '_events.tsv') + new_scan_end = "_" + suffix + old_ext + new_events = new_path.replace(new_scan_end, "_events.tsv") self.new_filenames.append(new_events) if Path(old_ejson).exists(): self.old_filenames.append(old_ejson) - new_scan_end = '_' + suffix + old_ext - new_ejson = new_path.replace(new_scan_end, '_events.json') + new_scan_end = "_" + suffix + old_ext + new_ejson = new_path.replace(new_scan_end, "_events.json") self.new_filenames.append(new_ejson) - old_physio = filepath.replace(scan_end, '_physio.tsv.gz') + old_physio = filepath.replace(scan_end, "_physio.tsv.gz") if Path(old_physio).exists(): self.old_filenames.append(old_physio) - new_scan_end = '_' + suffix + old_ext - new_physio = new_path.replace(new_scan_end, '_physio.tsv.gz') + new_scan_end = "_" + suffix + old_ext + new_physio = new_path.replace(new_scan_end, "_physio.tsv.gz") self.new_filenames.append(new_physio) # RENAME INTENDED FORS! - ses_path = self.path + '/' + sub + '/' + ses + ses_path = self.path + "/" + sub + "/" + ses for path in Path(ses_path).rglob("fmap/*.json"): self.IF_rename_paths.append(str(path)) # json_file = self.layout.get_file(str(path)) # data = json_file.get_dict() data = get_sidecar_metadata(str(path)) if data == "Erroneous sidecar": - print('Error parsing sidecar: ', str(path)) + print("Error parsing sidecar: ", str(path)) continue - if 'IntendedFor' in data.keys(): + if "IntendedFor" in data.keys(): # check if IntendedFor field is a str or list - if isinstance(data['IntendedFor'], str): - if data['IntendedFor'] == \ - _get_intended_for_reference(filepath): + if isinstance(data["IntendedFor"], str): + if data["IntendedFor"] == _get_intended_for_reference(filepath): # replace old filename with new one (overwrite string) - data['IntendedFor'] = \ - _get_intended_for_reference(new_path) + data["IntendedFor"] = _get_intended_for_reference(new_path) # update the json with the new data dictionary _update_json(str(path), data) - if isinstance(data['IntendedFor'], list): - for item in data['IntendedFor']: + if isinstance(data["IntendedFor"], list): + for item in data["IntendedFor"]: if item in _get_intended_for_reference(filepath): - # remove old filename - data['IntendedFor'].remove(item) + data["IntendedFor"].remove(item) # add new filename - data['IntendedFor'].append( - _get_intended_for_reference(new_path)) + data["IntendedFor"].append(_get_intended_for_reference(new_path)) # update the json with the new data dictionary _update_json(str(path), data) @@ -534,32 +518,41 @@ def change_filename(self, filepath, entities): # if not self.is_datalad_clean(): # self.datalad_save(message="Renamed IntendedFors") # self.reset_bids_layout() - # else: - # print("No IntendedFor References to Rename") + # else: + # print("No IntendedFor References to Rename") + + def copy_exemplars(self, exemplars_dir, exemplars_tsv, min_group_size): + """Copy one subject from each Acquisition Group into a new directory for testing preps. - def copy_exemplars(self, exemplars_dir, exemplars_tsv, min_group_size, - raise_on_error=True): - """Copies one subject from each Acquisition Group into a new directory - for testing *preps, raises an error if the subjects are not unlocked, + Raises an error if the subjects are not unlocked, unlocks each subject before copying if --force_unlock is set. - Parameters: - ----------- - exemplars_dir: str - path to the directory that will contain one subject - from each Acqusition Gorup (*_AcqGrouping.tsv) - example path: /Users/Covitz/tsvs/CCNP_Acq_Groups/ - - exemplars_tsv: str - path to the .tsv file that lists one subject - from each Acqusition Group (*_AcqGrouping.tsv - from the cubids-group output) - example path: /Users/Covitz/tsvs/CCNP_Acq_Grouping.tsv + Parameters + ---------- + exemplars_dir : str + path to the directory that will contain one subject + from each Acqusition Group (*_AcqGrouping.tsv) + example path: /Users/Covitz/tsvs/CCNP_Acq_Groups/ + exemplars_tsv : str + path to the .tsv file that lists one subject + from each Acqusition Group (*_AcqGrouping.tsv + from the cubids-group output) + example path: /Users/Covitz/tsvs/CCNP_Acq_Grouping.tsv + min_group_size """ # create the exemplar ds if self.use_datalad: - subprocess.run(['datalad', '--log-level', 'error', 'create', '-c', - 'text2git', exemplars_dir]) + subprocess.run( + [ + "datalad", + "--log-level", + "error", + "create", + "-c", + "text2git", + exemplars_dir, + ] + ) # load the exemplars tsv subs = pd.read_table(exemplars_tsv) @@ -567,8 +560,8 @@ def copy_exemplars(self, exemplars_dir, exemplars_tsv, min_group_size, # if min group size flag set, drop acq groups with less than min if int(min_group_size) > 1: for row in range(len(subs)): - acq_group = subs.loc[row, 'AcqGroup'] - size = int(subs['AcqGroup'].value_counts()[acq_group]) + acq_group = subs.loc[row, "AcqGroup"] + size = int(subs["AcqGroup"].value_counts()[acq_group]) if size < int(min_group_size): subs = subs.drop([row]) @@ -576,52 +569,50 @@ def copy_exemplars(self, exemplars_dir, exemplars_tsv, min_group_size, unique = subs.drop_duplicates(subset=["AcqGroup"]) # cast list to a set to drop duplicates, then convert back to list - unique_subs = list(set(unique['subject'].tolist())) + unique_subs = list(set(unique["subject"].tolist())) for subid in unique_subs: - source = str(self.path) + '/' + subid - dest = exemplars_dir + '/' + subid + source = str(self.path) + "/" + subid + dest = exemplars_dir + "/" + subid # Copy the content of source to destination copytree(source, dest) # Copy the dataset_description.json - copyfile(str(self.path) + '/' + 'dataset_description.json', - exemplars_dir + '/' + 'dataset_description.json') + copyfile( + str(self.path) + "/" + "dataset_description.json", + exemplars_dir + "/" + "dataset_description.json", + ) s1 = "Copied one subject from each Acquisition Group " s2 = "into the Exemplar Dataset" msg = s1 + s2 if self.use_datalad: - subprocess.run(['datalad', 'save', '-d', exemplars_dir, - '-m', msg]) - - def purge(self, scans_txt, raise_on_error=True): - """Purges all associations of desired scans from a bids dataset. - - Parameters: - ----------- - scans_txt: str - path to the .txt file that lists the scans - you want to be deleted from the dataset, along - with thier associations. - example path: /Users/Covitz/CCNP/scans_to_delete.txt + subprocess.run(["datalad", "save", "-d", exemplars_dir, "-m", msg]) + + def purge(self, scans_txt): + """Purge all associations of desired scans from a bids dataset. + + Parameters + ---------- + scans_txt : str + path to the .txt file that lists the scans + you want to be deleted from the dataset, along + with thier associations. + example path: /Users/Covitz/CCNP/scans_to_delete.txt """ - self.scans_txt = scans_txt scans = [] - with open(scans_txt, 'r') as fd: + with open(scans_txt, "r") as fd: reader = csv.reader(fd) for row in reader: - scans.append(self.path + '/' + str(row[0])) + scans.append(self.path + "/" + str(row[0])) # check to ensure scans are all real files in the ds! self._purge_associations(scans) def _purge_associations(self, scans): - - # PURGE FMAP JSONS' INTENDED FOR REFERENCES - + """Purge field map JSONs' IntendedFor references.""" # truncate all paths to intendedfor reference format # sub, ses, modality only (no self.path) if_scans = [] @@ -629,27 +620,26 @@ def _purge_associations(self, scans): if_scans.append(_get_intended_for_reference(self.path + scan)) for path in Path(self.path).rglob("sub-*/*/fmap/*.json"): - # json_file = self.layout.get_file(str(path)) # data = json_file.get_dict() data = get_sidecar_metadata(str(path)) if data == "Erroneous sidecar": - print('Error parsing sidecar: ', str(path)) + print("Error parsing sidecar: ", str(path)) continue # remove scan references in the IntendedFor - if 'IntendedFor' in data.keys(): + if "IntendedFor" in data.keys(): # check if IntendedFor field value is a list or a string - if isinstance(data['IntendedFor'], str): - if data['IntendedFor'] in if_scans: - data['IntendedFor'] = [] + if isinstance(data["IntendedFor"], str): + if data["IntendedFor"] in if_scans: + data["IntendedFor"] = [] # update the json with the new data dictionary _update_json(str(path), data) - if isinstance(data['IntendedFor'], list): - for item in data['IntendedFor']: + if isinstance(data["IntendedFor"], list): + for item in data["IntendedFor"]: if item in if_scans: - data['IntendedFor'].remove(item) + data["IntendedFor"].remove(item) # update the json with the new data dictionary _update_json(str(path), data) @@ -669,7 +659,6 @@ def _purge_associations(self, scans): to_remove = [] for path in Path(self.path).rglob("sub-*/**/*.nii.gz"): - if str(path) in scans: # bids_file = self.layout.get_file(str(path)) # associations = bids_file.get_associations() @@ -679,23 +668,21 @@ def _purge_associations(self, scans): # filepath = assoc.path # ensure association is not an IntendedFor reference! - if '.nii' not in str(path): - - if '/dwi/' in str(path): + if ".nii" not in str(path): + if "/dwi/" in str(path): # add the bval and bvec if there - if Path(img_to_new_ext(str(path), '.bval')).exists(): - to_remove.append(img_to_new_ext(str(path), '.bval')) - if Path(img_to_new_ext(str(path), '.bvec')).exists(): - to_remove.append(img_to_new_ext(str(path), '.bvec')) - if '/func/' in str(path): + if Path(img_to_new_ext(str(path), ".bval")).exists(): + to_remove.append(img_to_new_ext(str(path), ".bval")) + if Path(img_to_new_ext(str(path), ".bvec")).exists(): + to_remove.append(img_to_new_ext(str(path), ".bvec")) + if "/func/" in str(path): # add tsvs - tsv = img_to_new_ext(str(path), '.tsv').replace( - '_bold', '_events') + tsv = img_to_new_ext(str(path), ".tsv").replace("_bold", "_events") if Path(tsv).exists(): to_remove.append(tsv) # add tsv json (if exists) - if Path(tsv.replace('.tsv', '.json')).exists(): - to_remove.append(tsv.replace('.tsv', '.json')) + if Path(tsv.replace(".tsv", ".json")).exists(): + to_remove.append(tsv.replace(".tsv", ".json")) to_remove += scans # create rm commands for all files that need to be purged @@ -707,7 +694,6 @@ def _purge_associations(self, scans): # datalad run the file deletions (purges) full_cmd = "\n".join(purge_commands) if full_cmd: - # write full_cmd to a .sh file # Open file for writing @@ -719,56 +705,54 @@ def _purge_associations(self, scans): # Close the file fileObject.close() if self.scans_txt: - cmt = "Purged scans listed in %s from dataset" % self.scans_txt + cmt = f"Purged scans listed in {self.scans_txt} from dataset" else: cmt = "Purged Parameter Groups marked for removal" - purge_file = path_prefix + "/" + '_full_cmd.sh' + + purge_file = path_prefix + "/" + "_full_cmd.sh" if self.use_datalad: - self.datalad_handle.run(cmd=["bash", purge_file], - message=cmt) + self.datalad_handle.run(cmd=["bash", purge_file], message=cmt) else: - subprocess.run(["bash", path_prefix + "/" + "_full_cmd.sh"], - stdout=subprocess.PIPE, - cwd=path_prefix) + subprocess.run( + ["bash", path_prefix + "/" + "_full_cmd.sh"], + stdout=subprocess.PIPE, + cwd=path_prefix, + ) + self.reset_bids_layout() + else: print("Not running any association removals") def get_nifti_associations(self, nifti): + """Get nifti associations.""" # get all assocation files of a nifti image - no_ext_file = str(nifti).split('/')[-1].split('.')[0] + no_ext_file = str(nifti).split("/")[-1].split(".")[0] associations = [] for path in Path(self.path).rglob("sub-*/**/*.*"): - if no_ext_file in str(path) and '.nii.gz' not in str(path): + if no_ext_file in str(path) and ".nii.gz" not in str(path): associations.append(str(path)) return associations def _cache_fieldmaps(self): - """Searches all fieldmaps and creates a lookup for each file. - - Returns: - ----------- - misfits : list - A list of fmap filenames for whom CuBIDS has not detected - an IntnededFor. - """ - - suffix = '(phase1|phasediff|epi|fieldmap)' - fmap_files = self.layout.get(suffix=suffix, regex_search=True, - extension=['.nii.gz', '.nii']) + """Search all fieldmaps and create a lookup for each file.""" + suffix = "(phase1|phasediff|epi|fieldmap)" + fmap_files = self.layout.get( + suffix=suffix, regex_search=True, extension=[".nii.gz", ".nii"] + ) misfits = [] files_to_fmaps = defaultdict(list) for fmap_file in tqdm(fmap_files): # intentions = listify(fmap_file.get_metadata().get("IntendedFor")) - fmap_json = img_to_new_ext(fmap_file.path, '.json') + fmap_json = img_to_new_ext(fmap_file.path, ".json") metadata = get_sidecar_metadata(fmap_json) if metadata == "Erroneous sidecar": - print('Error parsing sidecar: ', str(fmap_json)) + print("Error parsing sidecar: ", str(fmap_json)) continue if_list = metadata.get("IntendedFor") intentions = listify(if_list) - subject_prefix = "sub-%s" % fmap_file.entities['subject'] + subject_prefix = f"sub-{fmap_file.entities['subject']}" if intentions is not None: for intended_for in intentions: @@ -787,28 +771,28 @@ def _cache_fieldmaps(self): return misfits def get_param_groups_from_key_group(self, key_group): - """Splits key groups into param groups based on json metadata. - - Parameters: - ----------- - key_group : str - Key group name. - - Returns: - ----------- - ret : tuple of two DataFrames - 1. A data frame with one row per file where the ParamGroup - column indicates the group to which each scan belongs. - 2. A data frame with param group summaries + """Split key groups into param groups based on json metadata. + + Parameters + ---------- + key_group : str + Key group name. + + Returns + ------- + ret : tuple of two DataFrames + 1. A data frame with one row per file where the ParamGroup + column indicates the group to which each scan belongs. + 2. A data frame with param group summaries """ if not self.fieldmaps_cached: - raise Exception( - "Fieldmaps must be cached to find parameter groups.") + raise Exception("Fieldmaps must be cached to find parameter groups.") key_entities = _key_group_to_entities(key_group) key_entities["extension"] = ".nii[.gz]*" - matching_files = self.layout.get(return_type="file", scope="self", - regex_search=True, **key_entities) + matching_files = self.layout.get( + return_type="file", scope="self", regex_search=True, **key_entities + ) # ensure files who's entities contain key_entities but include other # entities do not also get added to matching_files @@ -820,18 +804,23 @@ def get_param_groups_from_key_group(self, key_group): to_include.append(filepath) # get the modality associated with the key group - modalities = ['/dwi/', '/anat/', '/func/', '/perf/', '/fmap/'] - modality = '' + modalities = ["/dwi/", "/anat/", "/func/", "/perf/", "/fmap/"] + modality = "" for mod in modalities: if mod in filepath: - modality = mod.replace('/', '').replace('/', '') - if modality == '': + modality = mod.replace("/", "").replace("/", "") + if modality == "": print("Unusual Modality Detected") - modality = 'other' + modality = "other" ret = _get_param_groups( - to_include, self.layout, self.fieldmap_lookup, key_group, - self.grouping_config, modality, self.keys_files) + to_include, + self.fieldmap_lookup, + key_group, + self.grouping_config, + modality, + self.keys_files, + ) if ret == "erroneous sidecar found": return "erroneous sidecar found" @@ -843,28 +832,25 @@ def get_param_groups_from_key_group(self, key_group): return tup_ret def create_data_dictionary(self): - - sidecar_params = self.grouping_config.get('sidecar_params') + """Create a data dictionary.""" + sidecar_params = self.grouping_config.get("sidecar_params") for mod in sidecar_params.keys(): mod_dict = sidecar_params[mod] for s_param in mod_dict.keys(): if s_param not in self.data_dict.keys(): - self.data_dict[s_param] = {"Description": - "Scanning Parameter"} + self.data_dict[s_param] = {"Description": "Scanning Parameter"} - relational_params = self.grouping_config.get('relational_params') + relational_params = self.grouping_config.get("relational_params") for r_param in relational_params.keys(): if r_param not in self.data_dict.keys(): - self.data_dict[r_param] = {"Description": - "Scanning Parameter"} + self.data_dict[r_param] = {"Description": "Scanning Parameter"} - derived_params = self.grouping_config.get('derived_params') + derived_params = self.grouping_config.get("derived_params") for mod in derived_params.keys(): mod_dict = derived_params[mod] for d_param in mod_dict.keys(): if d_param not in self.data_dict.keys(): - self.data_dict[d_param] = {"Description": - "NIfTI Header Parameter"} + self.data_dict[d_param] = {"Description": "NIfTI Header Parameter"} # Manually add non-sidecar columns/descriptions to data_dict desc1 = "Column where users mark groups to manually check" @@ -905,24 +891,18 @@ def create_data_dictionary(self): self.data_dict["KeyParamGroup"]["Description"] = desc91 + desc92 def get_data_dictionary(self, df): - """Creates a BIDS data dictionary from dataframe columns + """Create a BIDS data dictionary from dataframe columns. - Parameters: - ----------- - - name: str - Data dictionary name (should be identical to filename of TSV) - - df: Pandas DataFrame - Pre export TSV that will be converted to a json dictionary + Parameters + ---------- + df : Pandas DataFrame + Pre export TSV that will be converted to a json dictionary - Returns: - ----------- - - data_dict: dictionary - Python dictionary in BIDS data dictionary format + Returns + ------- + data_dict : dictionary + Python dictionary in BIDS data dictionary format """ - json_dict = {} # Build column dictionary @@ -952,15 +932,17 @@ def get_data_dictionary(self, df): return json_dict def get_param_groups_dataframes(self): - '''Creates DataFrames of files x param groups and a summary''' - + """Create DataFrames of files x param groups and a summary.""" key_groups = self.get_key_groups() labeled_files = [] param_group_summaries = [] for key_group in key_groups: try: - labeled_file_params, param_summary, modality = \ - self.get_param_groups_from_key_group(key_group) + ( + labeled_file_params, + param_summary, + modality, + ) = self.get_param_groups_from_key_group(key_group) except Exception: continue if labeled_file_params is None: @@ -972,23 +954,20 @@ def get_param_groups_dataframes(self): # make Filepaths relative to bids dir for row in range(len(big_df)): - long_name = big_df.loc[row, 'FilePath'] - big_df.loc[row, 'FilePath'] = long_name.replace(self.path, '') + long_name = big_df.loc[row, "FilePath"] + big_df.loc[row, "FilePath"] = long_name.replace(self.path, "") - summary = _order_columns(pd.concat(param_group_summaries, - ignore_index=True)) + summary = _order_columns(pd.concat(param_group_summaries, ignore_index=True)) # create new col that strings key and param group together - summary["KeyParamGroup"] = summary["KeyGroup"] \ - + '__' + summary["ParamGroup"].map(str) + summary["KeyParamGroup"] = summary["KeyGroup"] + "__" + summary["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = summary.pop("KeyParamGroup") summary.insert(0, "KeyParamGroup", key_param_col) # do the same for the files df - big_df["KeyParamGroup"] = big_df["KeyGroup"] \ - + '__' + big_df["ParamGroup"].map(str) + big_df["KeyParamGroup"] = big_df["KeyGroup"] + "__" + big_df["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = big_df.pop("KeyParamGroup") @@ -1002,38 +981,37 @@ def get_param_groups_dataframes(self): # Now automate suggested rename based on variant params # loop though imaging and derived param keys - sidecar = self.grouping_config.get('sidecar_params') + sidecar = self.grouping_config.get("sidecar_params") sidecar = sidecar[modality] - relational = self.grouping_config.get('relational_params') + relational = self.grouping_config.get("relational_params") # list of columns names that we account for in suggested renaming - summary['RenameKeyGroup'] = summary['RenameKeyGroup'].apply(str) + summary["RenameKeyGroup"] = summary["RenameKeyGroup"].apply(str) rename_cols = [] tolerance_cols = [] for col in sidecar.keys(): - if 'suggest_variant_rename' in sidecar[col].keys(): - if sidecar[col]['suggest_variant_rename'] \ - and col in summary.columns: + if "suggest_variant_rename" in sidecar[col].keys(): + if sidecar[col]["suggest_variant_rename"] and col in summary.columns: rename_cols.append(col) - if 'tolerance' in sidecar[col].keys(): + if "tolerance" in sidecar[col].keys(): tolerance_cols.append(col) # deal with Fmap! - if 'FieldmapKey' in relational: - if 'suggest_variant_rename' in relational['FieldmapKey'].keys(): - if relational['FieldmapKey']['suggest_variant_rename']: + if "FieldmapKey" in relational: + if "suggest_variant_rename" in relational["FieldmapKey"].keys(): + if relational["FieldmapKey"]["suggest_variant_rename"]: # check if 'bool' or 'columns' - if relational['FieldmapKey']['display_mode'] == 'bool': + if relational["FieldmapKey"]["display_mode"] == "bool": rename_cols.append("HasFieldmap") # deal with IntendedFor Key! - if 'IntendedForKey' in relational: - if 'suggest_variant_rename' in relational['IntendedForKey'].keys(): - if relational['FieldmapKey']['suggest_variant_rename']: + if "IntendedForKey" in relational: + if "suggest_variant_rename" in relational["IntendedForKey"].keys(): + if relational["FieldmapKey"]["suggest_variant_rename"]: # check if 'bool' or 'columns' - if relational['IntendedForKey']['display_mode'] == 'bool': + if relational["IntendedForKey"]["display_mode"] == "bool": rename_cols.append("UsedAsFieldmap") dom_dict = {} @@ -1044,7 +1022,7 @@ def get_param_groups_dataframes(self): # summary.at[row, "NumVolumes"] = 1.0 # if dominant group identified - if str(summary.loc[row, 'ParamGroup']) == '1': + if str(summary.loc[row, "ParamGroup"]) == "1": val = {} # grab col, all vals send to dict key = summary.loc[row, "KeyGroup"] @@ -1058,7 +1036,7 @@ def get_param_groups_dataframes(self): # check to see if renaming has already happened renamed = False entities = _key_group_to_entities(summary.loc[row, "KeyGroup"]) - if 'VARIANT' in summary.loc[row, 'KeyGroup']: + if "VARIANT" in summary.loc[row, "KeyGroup"]: renamed = True # if NumVolumes is nan, set to 1.0 @@ -1067,81 +1045,79 @@ def get_param_groups_dataframes(self): # summary.at[row, "NumVolumes"] = 1.0 if summary.loc[row, "ParamGroup"] != 1 and not renamed: - acq_str = 'VARIANT' + acq_str = "VARIANT" # now we know we have a deviant param group # check if TR is same as param group 1 key = summary.loc[row, "KeyGroup"] for col in rename_cols: summary[col] = summary[col].apply(str) if summary.loc[row, col] != dom_dict[key][col]: - - if col == 'HasFieldmap': - if dom_dict[key][col] == 'True': - acq_str = acq_str + 'NoFmap' + if col == "HasFieldmap": + if dom_dict[key][col] == "True": + acq_str = acq_str + "NoFmap" else: - acq_str = acq_str + 'HasFmap' - elif col == 'UsedAsFieldmap': - if dom_dict[key][col] == 'True': - acq_str = acq_str + 'Unused' + acq_str = acq_str + "HasFmap" + elif col == "UsedAsFieldmap": + if dom_dict[key][col] == "True": + acq_str = acq_str + "Unused" else: - acq_str = acq_str + 'IsUsed' + acq_str = acq_str + "IsUsed" else: acq_str = acq_str + col - if acq_str == 'VARIANT': - acq_str = acq_str + 'Other' + if acq_str == "VARIANT": + acq_str = acq_str + "Other" - if 'acquisition' in entities.keys(): - acq = 'acquisition-%s' % entities['acquisition'] + acq_str + if "acquisition" in entities.keys(): + acq = f"acquisition-{entities['acquisition'] + acq_str}" new_name = summary.loc[row, "KeyGroup"].replace( - 'acquisition-%s' % entities['acquisition'], acq) + f"acquisition-{entities['acquisition']}", + acq, + ) else: - acq = 'acquisition-%s' % acq_str - new_name = acq + '_' + summary.loc[row, "KeyGroup"] + acq = f"acquisition-{acq_str}" + new_name = acq + "_" + summary.loc[row, "KeyGroup"] - summary.at[row, 'RenameKeyGroup'] = new_name + summary.at[row, "RenameKeyGroup"] = new_name # convert all "nan" to empty str # so they don't show up in the summary tsv - if summary.loc[row, "RenameKeyGroup"] == 'nan': - summary.at[row, "RenameKeyGroup"] = '' + if summary.loc[row, "RenameKeyGroup"] == "nan": + summary.at[row, "RenameKeyGroup"] = "" for col in rename_cols: - if summary.loc[row, col] == 'nan': - summary.at[row, col] = '' + if summary.loc[row, col] == "nan": + summary.at[row, col] = "" return (big_df, summary) - def get_TSVs(self, path_prefix): - """Creates the _summary and _files tsvs for the bids dataset. + def get_tsvs(self, path_prefix): + """Create the _summary and _files tsvs for the bids dataset. - Parameters: - ----------- - prefix_path: str - prefix of the path to the directory where you want - to save your tsvs - example path: /Users/Covitz/PennLINC/RBC/CCNP/ + Parameters + ---------- + path_prefix : str + prefix of the path to the directory where you want + to save your tsvs + example path: /Users/Covitz/PennLINC/RBC/CCNP/ """ - self._cache_fieldmaps() # check if path_prefix is absolute or relative # if relative, put output in BIDS_ROOT/code/CuBIDS/ dir - if '/' not in path_prefix: + if "/" not in path_prefix: # path is relative # first check if code/CuBIDS dir exits # if not, create it self.create_cubids_code_dir() # send outputs to code/CuBIDS in BIDS tree - path_prefix = self.path + '/code/CuBIDS/' + path_prefix + path_prefix = self.path + "/code/CuBIDS/" + path_prefix big_df, summary = self.get_param_groups_dataframes() - summary = summary.sort_values(by=['Modality', 'KeyGroupCount'], - ascending=[True, False]) - big_df = big_df.sort_values(by=['Modality', 'KeyGroupCount'], - ascending=[True, False]) + summary = summary.sort_values(by=["Modality", "KeyGroupCount"], ascending=[True, False]) + big_df = big_df.sort_values(by=["Modality", "KeyGroupCount"], ascending=[True, False]) # Create json dictionaries for summary and files tsvs self.create_data_dictionary() @@ -1160,14 +1136,12 @@ def get_TSVs(self, path_prefix): summary.to_csv(path_prefix + "_summary.tsv", sep="\t", index=False) # Calculate the acq groups - group_by_acquisition_sets(path_prefix + "_files.tsv", path_prefix, - self.acq_group_level) + group_by_acquisition_sets(path_prefix + "_files.tsv", path_prefix, self.acq_group_level) print("CuBIDS detected " + str(len(summary)) + " Parameter Groups.") def get_key_groups(self): - '''Identifies the key groups for the bids dataset''' - + """Identify the key groups for the bids dataset.""" # reset self.keys_files self.keys_files = {} @@ -1175,7 +1149,7 @@ def get_key_groups(self): for path in Path(self.path).rglob("sub-*/**/*.*"): # ignore all dot directories - if '/.' in str(path): + if "/." in str(path): continue if str(path).endswith(".nii") or str(path).endswith(".nii.gz"): @@ -1185,26 +1159,28 @@ def get_key_groups(self): ret = _file_to_key_group(path) if ret not in self.keys_files.keys(): - self.keys_files[ret] = [] self.keys_files[ret].append(path) return sorted(key_groups) - def change_metadata(self, filters, pattern, metadata): + def change_metadata(self, filters, metadata): + """Change metadata. - files_to_change = self.layout.get(return_type='object', **filters) + NOTE: Appears unused. + """ + files_to_change = self.layout.get(return_type="object", **filters) for bidsfile in files_to_change: # get the sidecar file # bidsjson_file = bidsfile.get_associations() - bidsjson_file = img_to_new_ext(str(bidsfile), '.json') + bidsjson_file = img_to_new_ext(str(bidsfile), ".json") if not bidsjson_file: print("NO JSON FILES FOUND IN ASSOCIATIONS") continue - json_file = [x for x in bidsjson_file if 'json' in x.filename] + json_file = [x for x in bidsjson_file if "json" in x.filename] if not len(json_file) == 1: print("FOUND IRREGULAR ASSOCIATIONS") @@ -1219,25 +1195,24 @@ def change_metadata(self, filters, pattern, metadata): _update_json(json_file.path, sidecar) def get_all_metadata_fields(self): - ''' Returns all metadata fields in a bids directory''' - + """Return all metadata fields in a bids directory.""" found_fields = set() for json_file in Path(self.path).rglob("*.json"): - if '.git' not in str(json_file): + if ".git" not in str(json_file): with open(json_file, "r") as jsonr: metadata = json.load(jsonr) found_fields.update(metadata.keys()) return sorted(found_fields) def remove_metadata_fields(self, fields_to_remove): - '''Removes specific fields from all metadata files.''' - + """Remove specific fields from all metadata files.""" remove_fields = set(fields_to_remove) if not remove_fields: return + for json_file in tqdm(Path(self.path).rglob("*.json")): # Check for offending keys in the json file - if '.git' not in str(json_file): + if ".git" not in str(json_file): with open(json_file, "r") as jsonr: metadata = json.load(jsonr) offending_keys = remove_fields.intersection(metadata.keys()) @@ -1254,99 +1229,98 @@ def remove_metadata_fields(self, fields_to_remove): # # # # FOR TESTING # # # # def get_filenames(self): + """Get filenames.""" return self.keys_files def get_fieldmap_lookup(self): + """Get fieldmap lookup.""" return self.fieldmap_lookup def get_layout(self): + """Get layout.""" return self.layout -def _validateJSON(json_file): +def _validate_json(): + """Validate a JSON file's contents. + + This is currently not implemented, but would accept metadata as its param. + """ # TODO: implement this or delete ??? return True def _update_json(json_file, metadata): - - if _validateJSON(metadata): - with open(json_file, 'w', encoding='utf-8') as f: + if _validate_json(): + with open(json_file, "w", encoding="utf-8") as f: json.dump(metadata, f, ensure_ascii=False, indent=4) else: print("INVALID JSON DATA") def _key_group_to_entities(key_group): - '''Splits a key_group name into a pybids dictionary of entities.''' - + """Split a key_group name into a pybids dictionary of entities.""" return dict([group.split("-") for group in key_group.split("_")]) def _entities_to_key_group(entities): - '''Converts a pybids entities dictionary into a key group name.''' - + """Convert a pybids entities dictionary into a key group name.""" group_keys = sorted(entities.keys() - NON_KEY_ENTITIES) - return "_".join( - ["{}-{}".format(key, entities[key]) for key in group_keys]) + return "_".join([f"{key}-{entities[key]}" for key in group_keys]) def _file_to_key_group(filename): - '''Identifies and returns the key group of a bids valid filename.''' - + """Identify and return the key group of a bids valid filename.""" entities = parse_file_entities(str(filename)) return _entities_to_key_group(entities) def _get_intended_for_reference(scan): - return '/'.join(Path(scan).parts[-3:]) - + return "/".join(Path(scan).parts[-3:]) -def _get_param_groups(files, layout, fieldmap_lookup, key_group_name, - grouping_config, modality, keys_files): - """Finds a list of *parameter groups* from a list of files. +def _get_param_groups( + files, + fieldmap_lookup, + key_group_name, + grouping_config, + modality, + keys_files, +): + """Find a list of *parameter groups* from a list of files. For each file in `files`, find critical parameters for metadata. Then find unique sets of these critical parameters. - Parameters: - ----------- + Parameters + ---------- files : list List of file names - - layout : bids.BIDSLayout - PyBIDS BIDSLayout object where `files` come from - fieldmap_lookup : defaultdict mapping of filename strings relative to the bids root (e.g. "sub-X/ses-Y/func/sub-X_ses-Y_task-rest_bold.nii.gz") - grouping_config : dict configuration for defining parameter groups - Returns: - -------- + Returns + ------- labeled_files : pd.DataFrame A data frame with one row per file where the ParamGroup column indicates which group each scan is a part of. - param_groups_with_counts : pd.DataFrame A data frame with param group summaries - """ - if not files: print("WARNING: no files for", key_group_name) return None, None # Split the config into separate parts - imaging_params = grouping_config.get('sidecar_params', {}) + imaging_params = grouping_config.get("sidecar_params", {}) imaging_params = imaging_params[modality] - relational_params = grouping_config.get('relational_params', {}) + relational_params = grouping_config.get("relational_params", {}) - derived_params = grouping_config.get('derived_params') + derived_params = grouping_config.get("derived_params") derived_params = derived_params[modality] imaging_params.update(derived_params) @@ -1356,9 +1330,9 @@ def _get_param_groups(files, layout, fieldmap_lookup, key_group_name, for path in files: # metadata = layout.get_metadata(path) - metadata = get_sidecar_metadata(img_to_new_ext(path, '.json')) + metadata = get_sidecar_metadata(img_to_new_ext(path, ".json")) if metadata == "Erroneous sidecar": - print('Error parsing sidecar: ', img_to_new_ext(path, '.json')) + print("Error parsing sidecar: ", img_to_new_ext(path, ".json")) else: intentions = metadata.get("IntendedFor", []) slice_times = metadata.get("SliceTiming", []) @@ -1368,20 +1342,20 @@ def _get_param_groups(files, layout, fieldmap_lookup, key_group_name, example_data["KeyGroup"] = key_group_name # Get the fieldmaps out and add their types - if 'FieldmapKey' in relational_params: - fieldmap_types = sorted([_file_to_key_group(fmap.path) for - fmap in fieldmap_lookup[path]]) + if "FieldmapKey" in relational_params: + fieldmap_types = sorted( + [_file_to_key_group(fmap.path) for fmap in fieldmap_lookup[path]] + ) # check if config says columns or bool - if relational_params['FieldmapKey']['display_mode'] == \ - 'bool': + if relational_params["FieldmapKey"]["display_mode"] == "bool": if len(fieldmap_types) > 0: - example_data['HasFieldmap'] = True + example_data["HasFieldmap"] = True else: - example_data['HasFieldmap'] = False + example_data["HasFieldmap"] = False else: for fmap_num, fmap_type in enumerate(fieldmap_types): - example_data['FieldmapKey%02d' % fmap_num] = fmap_type + example_data[f"FieldmapKey{fmap_num:02d}"] = fmap_type # Add the number of slice times specified if "NSliceTimes" in derived_params: @@ -1391,22 +1365,19 @@ def _get_param_groups(files, layout, fieldmap_lookup, key_group_name, # If it's a fieldmap, see what key group it's intended to correct if "IntendedForKey" in relational_params: - intended_key_groups = sorted([_file_to_key_group(intention) for - intention in intentions]) + intended_key_groups = sorted( + [_file_to_key_group(intention) for intention in intentions] + ) # check if config says columns or bool - if relational_params['IntendedForKey']['display_mode'] == \ - 'bool': + if relational_params["IntendedForKey"]["display_mode"] == "bool": if len(intended_key_groups) > 0: example_data["UsedAsFieldmap"] = True else: example_data["UsedAsFieldmap"] = False else: - for intention_num, intention_key_group in \ - enumerate(intended_key_groups): - example_data[ - "IntendedForKey%02d" % intention_num] = \ - intention_key_group + for intention_num, intention_key_group in enumerate(intended_key_groups): + example_data[f"IntendedForKey{intention_num:02d}"] = intention_key_group dfs.append(example_data) @@ -1422,12 +1393,12 @@ def _get_param_groups(files, layout, fieldmap_lookup, key_group_name, # get the subset of columns to drop duplicates by check_cols = [] for col in list(df.columns): - if "Cluster_" + col not in list(df.columns) and col != 'FilePath': + if "Cluster_" + col not in list(df.columns) and col != "FilePath": check_cols.append(col) # Find the unique ParamGroups and assign ID numbers in "ParamGroup"\ try: - deduped = df.drop('FilePath', axis=1) + deduped = df.drop("FilePath", axis=1) except Exception: return "erroneous sidecar found" @@ -1446,56 +1417,56 @@ def _get_param_groups(files, layout, fieldmap_lookup, key_group_name, value_counts = labeled_files.ParamGroup.value_counts() param_group_counts = pd.DataFrame( - {"Counts": value_counts.to_numpy(), - "ParamGroup": value_counts.index.to_numpy()}) + {"Counts": value_counts.to_numpy(), "ParamGroup": value_counts.index.to_numpy()} + ) - param_groups_with_counts = pd.merge( - deduped, param_group_counts, on=["ParamGroup"]) + param_groups_with_counts = pd.merge(deduped, param_group_counts, on=["ParamGroup"]) # Sort by counts and relabel the param groups - param_groups_with_counts.sort_values(by=['Counts'], inplace=True, - ascending=False) - param_groups_with_counts["ParamGroup"] = np.arange( - param_groups_with_counts.shape[0]) + 1 + param_groups_with_counts.sort_values(by=["Counts"], inplace=True, ascending=False) + param_groups_with_counts["ParamGroup"] = np.arange(param_groups_with_counts.shape[0]) + 1 # Send the new, ordered param group ids to the files list - ordered_labeled_files = pd.merge(df, param_groups_with_counts, - on=check_cols, suffixes=('_x', '')) + ordered_labeled_files = pd.merge( + df, param_groups_with_counts, on=check_cols, suffixes=("_x", "") + ) # sort ordered_labeled_files by param group - ordered_labeled_files.sort_values(by=['Counts'], inplace=True, - ascending=False) + ordered_labeled_files.sort_values(by=["Counts"], inplace=True, ascending=False) # now get rid of cluster cols from deduped and df for col in list(ordered_labeled_files.columns): - if col.startswith('Cluster_'): + if col.startswith("Cluster_"): ordered_labeled_files = ordered_labeled_files.drop(col, axis=1) - param_groups_with_counts = param_groups_with_counts.drop(col, - axis=1) - if col.endswith('_x'): + param_groups_with_counts = param_groups_with_counts.drop(col, axis=1) + if col.endswith("_x"): ordered_labeled_files = ordered_labeled_files.drop(col, axis=1) return ordered_labeled_files, param_groups_with_counts def round_params(param_group_df, config, modality): - to_format = config['sidecar_params'][modality] - to_format.update(config['derived_params'][modality]) + """Round parameters.""" + to_format = config["sidecar_params"][modality] + to_format.update(config["derived_params"][modality]) for column_name, column_fmt in to_format.items(): if column_name not in param_group_df: continue - if 'precision' in column_fmt: + if "precision" in column_fmt: if isinstance(param_group_df[column_name], float): - param_group_df[column_name] = \ - param_group_df[column_name].round(column_fmt['precision']) + param_group_df[column_name] = param_group_df[column_name].round( + column_fmt["precision"] + ) return param_group_df def get_sidecar_metadata(json_file): - # get all metadata values in a file's sidecar - # transform json dictionary to python dictionary + """Get all metadata values in a file's sidecar. + + Transform json dictionary to python dictionary. + """ try: with open(json_file) as json_file: data = json.load(json_file) @@ -1506,40 +1477,40 @@ def get_sidecar_metadata(json_file): def format_params(param_group_df, config, modality): - '''Run AgglomerativeClustering on param groups, add columns to dataframe''' - - to_format = config['sidecar_params'][modality] - to_format.update(config['derived_params'][modality]) + """Run AgglomerativeClustering on param groups and add columns to dataframe.""" + to_format = config["sidecar_params"][modality] + to_format.update(config["derived_params"][modality]) for column_name, column_fmt in to_format.items(): if column_name not in param_group_df: continue - if 'tolerance' in column_fmt and len(param_group_df) > 1: + if "tolerance" in column_fmt and len(param_group_df) > 1: array = param_group_df[column_name].to_numpy().reshape(-1, 1) for i in range(len(array)): if np.isnan(array[i, 0]): array[i, 0] = -999 - tolerance = to_format[column_name]['tolerance'] - clustering = AgglomerativeClustering(n_clusters=None, - distance_threshold=tolerance, - linkage='complete').fit(array) + tolerance = to_format[column_name]["tolerance"] + clustering = AgglomerativeClustering( + n_clusters=None, distance_threshold=tolerance, linkage="complete" + ).fit(array) for i in range(len(array)): if array[i, 0] == -999: array[i, 0] = np.nan # now add clustering_labels as a column - param_group_df['Cluster_' + column_name] = clustering.labels_ + param_group_df["Cluster_" + column_name] = clustering.labels_ return param_group_df def _order_columns(df): - '''Organizes columns of the summary and files DataFrames so that - KeyGroup and ParamGroup are the first two columns, FilePath is - the last, and the others are sorted alphabetically.''' + """Organize columns of the summary and files DataFrames. + This ensures that KeyGroup and ParamGroup are the first two columns, + FilePath is the last, and the others are sorted alphabetically. + """ cols = set(df.columns.to_list()) non_id_cols = cols - ID_VARS new_columns = ["KeyGroup", "ParamGroup"] + sorted(non_id_cols) @@ -1552,19 +1523,20 @@ def _order_columns(df): def img_to_new_ext(img_path, new_ext): + """Convert img to new extension.""" # handle .tsv edge case - if new_ext == '.tsv': + if new_ext == ".tsv": # take out suffix - return img_path.rpartition('_')[0] + '_events' + new_ext - if new_ext == '.tsv.gz': - return img_path.rpartition('_')[0] + '_physio' + new_ext + return img_path.rpartition("_")[0] + "_events" + new_ext + if new_ext == ".tsv.gz": + return img_path.rpartition("_")[0] + "_physio" + new_ext else: return img_path.replace(".nii.gz", "").replace(".nii", "") + new_ext def get_key_name(path, key): - # given a filepath and BIDS key name, return value + """Given a filepath and BIDS key name, return value.""" parts = Path(path).parts for part in parts: - if part.startswith(key + '-'): + if part.startswith(key + "-"): return part diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py index 6f6af47cf..439eb2224 100644 --- a/cubids/metadata_merge.py +++ b/cubids/metadata_merge.py @@ -1,16 +1,19 @@ -"""Main module.""" +"""Tools for merging metadata.""" import json from collections import defaultdict +from copy import deepcopy +from math import isnan, nan + import numpy as np import pandas as pd -from copy import deepcopy -from math import nan, isnan -from .constants import IMAGING_PARAMS + +from cubids.constants import IMAGING_PARAMS + DIRECT_IMAGING_PARAMS = IMAGING_PARAMS - set(["NSliceTimes"]) def check_merging_operations(action_tsv, raise_on_error=False): - """Checks that the merges in an action tsv are possible. + """Check that the merges in an action tsv are possible. To be mergable the """ @@ -20,21 +23,23 @@ def check_merging_operations(action_tsv, raise_on_error=False): overwrite_merges = [] sdc_incompatible = [] - sdc_cols = set([col for col in actions.columns if - col.startswith("IntendedForKey") or - col.startswith("FieldmapKey")]) + sdc_cols = set( + [ + col + for col in actions.columns + if col.startswith("IntendedForKey") or col.startswith("FieldmapKey") + ] + ) def _check_sdc_cols(meta1, meta2): - return {key: meta1[key] for key in sdc_cols} == \ - {key: meta2[key] for key in sdc_cols} + return {key: meta1[key] for key in sdc_cols} == {key: meta2[key] for key in sdc_cols} - needs_merge = actions[np.isfinite(actions['MergeInto'])] + needs_merge = actions[np.isfinite(actions["MergeInto"])] for _, row_needs_merge in needs_merge.iterrows(): source_param_key = tuple(row_needs_merge[["MergeInto", "KeyGroup"]]) dest_param_key = tuple(row_needs_merge[["ParamGroup", "KeyGroup"]]) dest_metadata = row_needs_merge.to_dict() - source_row = actions.loc[ - (actions[["ParamGroup", "KeyGroup"]] == source_param_key).all(1)] + source_row = actions.loc[(actions[["ParamGroup", "KeyGroup"]] == source_param_key).all(1)] if source_param_key[0] == 0: print("going to delete ", dest_param_key) @@ -49,29 +54,36 @@ def _check_sdc_cols(meta1, meta2): sdc_incompatible.append(merge_id) continue - if not merge_without_overwrite(source_metadata, dest_metadata, - raise_on_error=raise_on_error): + if not merge_without_overwrite( + source_metadata, dest_metadata, raise_on_error=raise_on_error + ): overwrite_merges.append(merge_id) continue # add to the list of ok merges if there are no conflicts ok_merges.append(merge_id) - error_message = "\n\nProblems were found in the requested merge.\n" \ - "===========================================\n\n" + error_message = ( + "\n\nProblems were found in the requested merge.\n" + "===========================================\n\n" + ) if sdc_incompatible: - error_message += "Some merges are incompatible due to differing " \ - "distortion correction strategies. Check that " \ - "fieldmaps exist and have the correct " \ - "\"IntendedFor\" in their sidecars. These merges " \ - "could not be completed:\n" + error_message += ( + "Some merges are incompatible due to differing " + "distortion correction strategies. Check that " + "fieldmaps exist and have the correct " + '"IntendedFor" in their sidecars. These merges ' + "could not be completed:\n" + ) error_message += print_merges(sdc_incompatible) + "\n\n" if overwrite_merges: - error_message += "Some merges are incompatible because the metadata " \ - "in the destination json conflicts with the values " \ - "in the source json. Merging should only be used " \ - "to fill in missing metadata. The following " \ - "merges could not be completed:\n\n" + error_message += ( + "Some merges are incompatible because the metadata " + "in the destination json conflicts with the values " + "in the source json. Merging should only be used " + "to fill in missing metadata. The following " + "merges could not be completed:\n\n" + ) error_message += print_merges(overwrite_merges) if overwrite_merges or sdc_incompatible: @@ -82,7 +94,7 @@ def _check_sdc_cols(meta1, meta2): def merge_without_overwrite(source_meta, dest_meta_orig, raise_on_error=False): - """Performs a safe metadata copy. + """Perform a safe metadata copy. Here, "safe" means that no non-NaN values in `dest_meta` are overwritten by the merge. If any overwrites occur an empty @@ -93,10 +105,11 @@ def merge_without_overwrite(source_meta, dest_meta_orig, raise_on_error=False): if not source_meta.get("NSliceTimes") == dest_meta.get("NSliceTimes"): if raise_on_error: - raise Exception("Value for NSliceTimes is %d in destination " - "but %d in source" - % (source_meta.get("NSliceTimes"), - source_meta.get("NSliceTimes"))) + raise Exception( + "Value for NSliceTimes is %d in destination " + "but %d in source" + % (source_meta.get("NSliceTimes"), source_meta.get("NSliceTimes")) + ) return {} for parameter in DIRECT_IMAGING_PARAMS: source_value = source_meta.get(parameter, nan) @@ -110,34 +123,40 @@ def merge_without_overwrite(source_meta, dest_meta_orig, raise_on_error=False): # need to figure out if we can merge if not is_nan(dest_value) and source_value != dest_value: if raise_on_error: - raise Exception("Value for %s is %s in destination " - "but %s in source" - % (parameter, str(dest_value), - str(source_value))) + raise Exception( + f"Value for {parameter} is {dest_value} in destination " + f"but {source_value} in source" + ) + return {} + dest_meta[parameter] = source_value return dest_meta def is_nan(val): - '''Returns True if val is nan''' + """Return True if val is NaN.""" if not isinstance(val, float): return False + return isnan(val) def print_merges(merge_list): - """Print formatted text of merges""" - return "\n\t" + "\n\t".join( - ["%s \n\t\t-> %s" % ("%s:%d" % src_id[::-1], - "%s:%d" % dest_id[::-1]) for - src_id, dest_id in merge_list]) + """Print formatted text of merges.""" + merge_strings = [] + for src_id, dest_id in merge_list: + src_id_str = f"{src_id[-1]}:{src_id[0]}" + dest_id_str = f"{dest_id[-1]}:{dest_id[0]}" + merge_str = f"{src_id_str} \n\t\t-> {dest_id_str}" + merge_strings.append(merge_str) + + return "\n\t" + "\n\t".join(merge_strings) -def merge_json_into_json(from_file, to_file, - raise_on_error=False): - print("Merging imaging metadata from %s to %s" - % (from_file, to_file)) +def merge_json_into_json(from_file, to_file, raise_on_error=False): + """Merge imaging metadata into JSON.""" + print(f"Merging imaging metadata from {from_file} to {to_file}") with open(from_file, "r") as fromf: source_metadata = json.load(fromf) @@ -146,7 +165,8 @@ def merge_json_into_json(from_file, to_file, orig_dest_metadata = deepcopy(dest_metadata) merged_metadata = merge_without_overwrite( - source_metadata, dest_metadata, raise_on_error=raise_on_error) + source_metadata, dest_metadata, raise_on_error=raise_on_error + ) if not merged_metadata: return 255 @@ -160,20 +180,18 @@ def merge_json_into_json(from_file, to_file, return 0 -def get_acq_dictionary(df): - """Creates a BIDS data dictionary from dataframe columns - - Parameters: - ----------- - - df: Pandas DataFrame - Pre export TSV that will be converted to a json dictionary +def get_acq_dictionary(): + """Create a BIDS data dictionary from dataframe columns. - Returns: - ----------- + Parameters + ---------- + df: Pandas DataFrame + Pre export TSV that will be converted to a json dictionary - acq_dict: dictionary - Python dictionary in BIDS data dictionary format + Returns + ------- + acq_dict: dictionary + Python dictionary in BIDS data dictionary format """ acq_dict = {} acq_dict["subject"] = {"Description": "Participant ID"} @@ -186,25 +204,25 @@ def get_acq_dictionary(df): def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): - '''Finds unique sets of Key/Param groups across subjects. - ''' - from bids.layout import parse_file_entities + """Find unique sets of Key/Param groups across subjects.""" from bids import config - config.set_option('extension_initial_dot', True) + from bids.layout import parse_file_entities + + config.set_option("extension_initial_dot", True) - files_df = pd.read_table(files_tsv, ) + files_df = pd.read_table( + files_tsv, + ) acq_groups = defaultdict(list) for _, row in files_df.iterrows(): file_entities = parse_file_entities(row.FilePath) - if acq_group_level == 'subject': - acq_id = (file_entities.get("subject"), - file_entities.get("session")) + if acq_group_level == "subject": + acq_id = (file_entities.get("subject"), file_entities.get("session")) acq_groups[acq_id].append((row.KeyGroup, row.ParamGroup)) else: acq_id = (file_entities.get("subject"), None) - acq_groups[acq_id].append((row.KeyGroup, row.ParamGroup, - file_entities.get("session"))) + acq_groups[acq_id].append((row.KeyGroup, row.ParamGroup, file_entities.get("session"))) # Map the contents to a list of subjects/sessions contents_to_subjects = defaultdict(list) @@ -225,38 +243,34 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): acq_group_info = [] for groupnum, content_id_row in enumerate(descending_order, start=1): content_id = content_ids[content_id_row] - acq_group_info.append( - (groupnum, content_id_counts[content_id_row]) + content_id) + acq_group_info.append((groupnum, content_id_counts[content_id_row]) + content_id) for subject, session in contents_to_subjects[content_id]: grouped_sub_sess.append( - {"subject": 'sub-' + subject, - "session": session, - "AcqGroup": groupnum}) + {"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum} + ) # Write the mapping of subject/session to acq_group_df = pd.DataFrame(grouped_sub_sess) - acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", - index=False) + acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", index=False) # Create data dictionary for acq group tsv - acq_dict = get_acq_dictionary(acq_group_df) + acq_dict = get_acq_dictionary() with open(output_prefix + "_AcqGrouping.json", "w") as outfile: json.dump(acq_dict, outfile, indent=4) # Write the summary of acq groups to a text file with open(output_prefix + "_AcqGroupInfo.txt", "w") as infotxt: - infotxt.write( - "\n".join([" ".join(map(str, line)) for line in acq_group_info])) + infotxt.write("\n".join([" ".join(map(str, line)) for line in acq_group_info])) # Create and save AcqGroupInfo data dictionary header_dict = {} - header_dict['Long Description'] = 'Acquisition Group Info' - description = 'https://cubids.readthedocs.io/en/latest/usage.html' - header_dict['Description'] = description - header_dict['Version'] = 'CuBIDS v1.0.5' + header_dict["Long Description"] = "Acquisition Group Info" + description = "https://cubids.readthedocs.io/en/latest/usage.html" + header_dict["Description"] = description + header_dict["Version"] = "CuBIDS v1.0.5" acq_info_dict = {} - acq_info_dict['AcqGroupInfo.txt Data Dictionary'] = header_dict + acq_info_dict["AcqGroupInfo.txt Data Dictionary"] = header_dict with open(output_prefix + "_AcqGroupInfo.json", "w") as outfile: json.dump(acq_info_dict, outfile, indent=4) diff --git a/cubids/validator.py b/cubids/validator.py index 9f48e3402..40a130b8c 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -1,24 +1,25 @@ -import subprocess +"""Methods for validating BIDS datasets.""" +import glob import json import logging import os -import glob import pathlib +import subprocess + import pandas as pd -logger = logging.getLogger('cubids-cli') +logger = logging.getLogger("cubids-cli") def build_validator_call(path, ignore_headers=False, ignore_subject=True): - """Build a subprocess command to the bids validator""" - + """Build a subprocess command to the bids validator.""" # build docker call - command = ['bids-validator', '--verbose', '--json'] + command = ["bids-validator", "--verbose", "--json"] if ignore_headers: - command.append('--ignoreNiftiHeaders') + command.append("--ignoreNiftiHeaders") if ignore_subject: - command.append('--ignoreSubjectConsistency') + command.append("--ignoreSubjectConsistency") command.append(path) @@ -26,8 +27,7 @@ def build_validator_call(path, ignore_headers=False, ignore_subject=True): def build_subject_paths(bids_dir): - """Build a list of BIDS dirs with 1 subject each""" - + """Build a list of BIDS dirs with 1 subject each.""" bids_dir = str(bids_dir) if not bids_dir.endswith("/"): bids_dir += "/" @@ -39,10 +39,7 @@ def build_subject_paths(bids_dir): subjects = glob.glob(bids_dir) if len(subjects) < 1: - - raise ValueError("Couldn't find any subjects " - "in the specified directory:\n" + - bids_dir) + raise ValueError("Couldn't find any subjects " "in the specified directory:\n" + bids_dir) subjects_dict = {} @@ -50,34 +47,34 @@ def build_subject_paths(bids_dir): purepath = pathlib.PurePath(sub) sub_label = purepath.name - files = [x for x in glob.glob(sub + '**', recursive=True) - if os.path.isfile(x)] + files = [x for x in glob.glob(sub + "**", recursive=True) if os.path.isfile(x)] files.extend(root_files) subjects_dict[sub_label] = files return subjects_dict -def run_validator(call, verbose=True): - """Run the validator with subprocess""" +def run_validator(call): + """Run the validator with subprocess.""" # if verbose: # logger.info("Running the validator with call:") # logger.info('\"' + ' '.join(call) + '\"') - ret = subprocess.run(call, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - return (ret) + ret = subprocess.run(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + return ret def parse_validator_output(output): - """Parse the JSON output of the BIDS validator into a pandas dataframe - Parameters: - ----------- - - path : string - Path to JSON file of BIDS validator output + """Parse the JSON output of the BIDS validator into a pandas dataframe. + + Parameters + ---------- + path : string + Path to JSON file of BIDS validator output + Returns - ----------- - - Pandas DataFrame + ------- + Pandas DataFrame """ def get_nested(dct, *keys): @@ -90,33 +87,29 @@ def get_nested(dct, *keys): data = json.loads(output) - issues = data['issues'] + issues = data["issues"] def parse_issue(issue_dict): - return_dict = {} - return_dict['files'] = [ - get_nested(x, 'file', 'relativePath') - for x in issue_dict.get('files', '') - ] - return_dict['type'] = issue_dict.get('key' '') - return_dict['severity'] = issue_dict.get('severity', '') - return_dict['description'] = issue_dict.get('reason', '') - return_dict['code'] = issue_dict.get('code', '') - return_dict['url'] = issue_dict.get('helpUrl', '') - - return (return_dict) + return_dict["files"] = [ + get_nested(x, "file", "relativePath") for x in issue_dict.get("files", "") + ] + return_dict["type"] = issue_dict.get("key" "") + return_dict["severity"] = issue_dict.get("severity", "") + return_dict["description"] = issue_dict.get("reason", "") + return_dict["code"] = issue_dict.get("code", "") + return_dict["url"] = issue_dict.get("helpUrl", "") - df = pd.DataFrame() + return return_dict - for warn in issues['warnings']: + df = pd.DataFrame() + for warn in issues["warnings"]: parsed = parse_issue(warn) parsed = pd.DataFrame(parsed) df = pd.concat([df, parsed], ignore_index=True) - for err in issues['errors']: - + for err in issues["errors"]: parsed = parse_issue(err) parsed = pd.DataFrame(parsed) df = pd.concat([df, parsed], ignore_index=True) @@ -124,7 +117,8 @@ def parse_issue(issue_dict): return df -def get_val_dictionary(df): +def get_val_dictionary(): + """Get value dictionary.""" val_dict = {} val_dict["files"] = {"Description": "File with warning orerror"} val_dict["type"] = {"Description": "BIDS validation warning or error"} diff --git a/notebooks/Key_and_Param_Groups.ipynb b/notebooks/Key_and_Param_Groups.ipynb index 2a8ab35b5..94c49359e 100644 --- a/notebooks/Key_and_Param_Groups.ipynb +++ b/notebooks/Key_and_Param_Groups.ipynb @@ -119,7 +119,7 @@ "# generating tsvs\n", "\n", "bod = CuBIDS(data_root2)\n", - "out = bod.get_TSVs('/Users/scovitz/CuBIDS/notebooks/NewTests')\n", + "out = bod.get_tsvs('/Users/scovitz/CuBIDS/notebooks/NewTests')\n", "out" ] }, diff --git a/pyproject.toml b/pyproject.toml index 10b94d6a1..83bada467 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,51 @@ [build-system] requires = ["setuptools >= 40.8.0", "wheel"] + +# +# Developer tool configurations +# + +[tool.black] +line-length = 99 +target-version = ['py38'] +include = '\.pyi?$' +exclude = ''' +( + /( + \.eggs # exclude a few common directories in the + | \.git # root of the project + | \.github + | \.hg + | \.pytest_cache + | _build + | build + | dist + )/ + | versioneer.py + | cubids/_version.py +) +''' + +[tool.isort] +profile = "black" +multi_line_output = 3 +src_paths = ["isort", "test"] +known_local_folder = ["cubids"] + +[tool.flake8] +max-line-length = "99" +doctests = "False" +exclude = [ + "*build/", + "cubids/_version.py", + "cubids/_warnings.py", + "cubids/config.py", + "cubids/data/", + "cubids/tests/", + "cubids/utils/sentry.py", +] +ignore = ["D107", "E203", "E402", "E722", "W503", "N803", "N806", "N815"] +per-file-ignores = [ + "**/__init__.py : F401", + "docs/conf.py : E265", +] diff --git a/setup.cfg b/setup.cfg index 8f0f2ab0f..31b4cb7fd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -46,6 +46,14 @@ test_requires = wheel==0.33.6 watchdog==0.9.0 flake8==3.7.8 + flake8-absolute-import + flake8-black + flake8-docstrings + flake8-isort + flake8-pyproject + flake8-unused-arguments + flake8-use-fstring + pep8-naming tox==3.14.0 coverage==4.5.4 Sphinx==2.2.0 diff --git a/tests/test_bond.py b/tests/test_bond.py index 033e1bae0..f9fda0d06 100644 --- a/tests/test_bond.py +++ b/tests/test_bond.py @@ -89,7 +89,7 @@ def test_copy_exemplars(tmp_path): data_root = get_data(tmp_path) bod = CuBIDS(data_root / "complete", use_datalad=True) tsv_prefix = str(tmp_path / "tsvs") - bod.get_TSVs(tsv_prefix) + bod.get_tsvs(tsv_prefix) acq_group_tsv = tsv_prefix + "_AcqGrouping.tsv" print("ACQ GROUP PATH: ", acq_group_tsv) exemplars_dir = str(tmp_path / "exemplars") @@ -203,7 +203,7 @@ def test_add_nifti_info_datalad(tmp_path): data_root = get_data(tmp_path) bod = CuBIDS(data_root / "complete", use_datalad=True, force_unlock=True) tsv_prefix = str(tmp_path / "tsvs") - bod.get_TSVs(tsv_prefix) + bod.get_tsvs(tsv_prefix) summary_tsv = tsv_prefix + "_summary.tsv" summary_df = pd.read_table(summary_tsv) l_cols = summary_df.columns.tolist() @@ -224,7 +224,7 @@ def test_add_nifti_info_datalad(tmp_path): assert 'ImageOrientation' in found_fields # nifti_tsv_prefix = str(tmp_path / "nifti_tsvs") - # bod.get_TSVs(nifti_tsv_prefix) + # bod.get_tsvs(nifti_tsv_prefix) # nifti_summary_tsv = nifti_tsv_prefix + "_summary.tsv" # nifti_summary_df = pd.read_table(nifti_summary_tsv) # nifti_l_cols = nifti_summary_df.columns.tolist() @@ -248,7 +248,7 @@ def test_add_nifti_info_no_datalad(tmp_path): assert 'ImageOrientation' in found_fields # tsv_prefix = str(tmp_path / "tsvs") - # bod.get_TSVs(tsv_prefix) + # bod.get_tsvs(tsv_prefix) # summary_tsv = tsv_prefix + "_summary.tsv" # summary_df = pd.read_table(summary_tsv) # l_cols = summary_df.columns.tolist() @@ -262,7 +262,7 @@ def test_tsv_merge_no_datalad(tmp_path): # Get an initial grouping summary and files list tsv_prefix = str(tmp_path / "originals") - bod.get_TSVs(tsv_prefix) + bod.get_tsvs(tsv_prefix) original_summary_tsv = tsv_prefix + "_summary.tsv" original_files_tsv = tsv_prefix + "_files.tsv" @@ -329,7 +329,7 @@ def test_tsv_merge_changes(tmp_path): # Get an initial grouping summary and files list tsv_prefix = str(tmp_path / "originals") - bod.get_TSVs(tsv_prefix) + bod.get_tsvs(tsv_prefix) original_summary_tsv = tsv_prefix + "_summary.tsv" original_files_tsv = tsv_prefix + "_files.tsv" @@ -653,7 +653,7 @@ def test_apply_tsv_changes(tmp_path): complete_cubids = CuBIDS(data_root / "complete", use_datalad=True) complete_cubids.datalad_save() - complete_cubids.get_TSVs(str(tmp_path / "originals")) + complete_cubids.get_tsvs(str(tmp_path / "originals")) # give tsv with no changes (make sure it does nothing) complete_cubids.apply_tsv_changes(str(tmp_path / "originals_summary.tsv"), @@ -757,7 +757,7 @@ def test_session_apply(tmp_path): ses_cubids = CuBIDS(data_root / "inconsistent", acq_group_level='session', use_datalad=True) - ses_cubids.get_TSVs(str(tmp_path / "originals")) + ses_cubids.get_tsvs(str(tmp_path / "originals")) # give tsv and make sure 'session' is in summary both pre and post apply ses_cubids.apply_tsv_changes(str(tmp_path / "originals_summary.tsv"),