diff --git a/pyproject.toml b/pyproject.toml index d9557018..dc452272 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ dependencies = [ "nireports ~= 23.2.2", "niworkflows >=1.9,<= 1.10", "numpy <= 1.26.3", + "packaging", # for version string parsing "pandas < 2.0.0", "psutil <= 5.9.8", "pyAFQ == 1.3.2", diff --git a/qsirecon/cli/run.py b/qsirecon/cli/run.py index 44a6d3d7..5aea076e 100644 --- a/qsirecon/cli/run.py +++ b/qsirecon/cli/run.py @@ -179,7 +179,7 @@ def main(): write_derivative_description( config.execution.bids_dir, config.execution.output_dir, - # dataset_links=config.execution.dataset_links, + dataset_links=config.execution.dataset_links, ) write_bidsignore(config.execution.output_dir) @@ -192,6 +192,19 @@ def main(): config.execution.output_dir / "derivatives" / f"qsirecon-{qsirecon_suffix}" ) + # Add other pipeline-specific suffixes to the dataset links + other_suffixes = [s for s in qsirecon_suffixes if s != qsirecon_suffix] + dataset_links = config.execution.dataset_links.copy() + dataset_links["qsirecon"] = str(config.execution.output_dir) + dataset_links.update( + { + f"qsirecon-{s}": str( + config.execution.output_dir / "derivatives" / f"qsirecon-{s}" + ) + for s in other_suffixes + } + ) + # Copy the boilerplate files copy_boilerplate(config.execution.output_dir, suffix_dir) @@ -206,7 +219,7 @@ def main(): write_derivative_description( config.execution.bids_dir, suffix_dir, - # dataset_links=config.execution.dataset_links, + dataset_links=dataset_links, ) write_bidsignore(suffix_dir) diff --git a/qsirecon/config.py b/qsirecon/config.py index 155f92a5..281b72a6 100644 --- a/qsirecon/config.py +++ b/qsirecon/config.py @@ -435,6 +435,8 @@ class execution(_Config): """A dictionary of dataset links to be used to track Sources in sidecars.""" aggr_ses_reports = 4 # TODO: Change to None when implemented on command line """Maximum number of sessions aggregated in one subject's visual report.""" + dataset_links = {} + """A dictionary of dataset links to be used to track Sources in sidecars.""" _layout = None @@ -451,6 +453,7 @@ class execution(_Config): "output_dir", "templateflow_home", "work_dir", + "dataset_links", ) @classmethod @@ -516,9 +519,12 @@ def _process_value(value): cls.bids_filters[acq][k] = _process_value(v) dataset_links = { - "raw": cls.bids_dir, + "preprocessed": cls.bids_dir, "templateflow": Path(TF_LAYOUT.root), } + if cls.fs_subjects_dir: + dataset_links["freesurfer"] = cls.fs_subjects_dir + for deriv_name, deriv_path in cls.derivatives.items(): dataset_links[deriv_name] = deriv_path cls.dataset_links = dataset_links diff --git a/qsirecon/utils/bids.py b/qsirecon/utils/bids.py index 21173374..041a3956 100644 --- a/qsirecon/utils/bids.py +++ b/qsirecon/utils/bids.py @@ -32,6 +32,7 @@ from bids import BIDSLayout from nipype.pipeline import engine as pe +from packaging.version import Version from .. import config @@ -141,59 +142,92 @@ def collect_participants(bids_dir, participant_label=None, strict=False, bids_va return found_label -def write_derivative_description(bids_dir, deriv_dir): +def write_derivative_description(bids_dir, deriv_dir, dataset_links=None): from qsirecon import __version__ - DOWNLOAD_URL = f"https://github.com/PennLINC/qsirecon/archive/{__version__}.tar.gz" + # Keys deriving from source dataset + orig_dset_description = os.path.join(bids_dir, "dataset_description.json") + if os.path.exists(orig_dset_description): + with open(orig_dset_description) as fobj: + dset_desc = json.load(fobj) + else: + config.loggers.utils.warning(f"Dataset description DNE: {orig_dset_description}") + dset_desc = {} + + # Check if the dataset type is derivative + if "DatasetType" not in dset_desc.keys(): + config.loggers.utils.warning( + f"DatasetType key not in {orig_dset_description}. Assuming 'derivative'." + ) + dset_desc["DatasetType"] = "derivative" + + if dset_desc.get("DatasetType", "derivative") != "derivative": + raise ValueError( + f"DatasetType key in {orig_dset_description} is not 'derivative'. " + "QSIRecon only works on derivative datasets." + ) - desc = { - "Name": "qsirecon output", - "BIDSVersion": "1.1.1", - "PipelineDescription": { + dset_desc["Name"] = "QSIRecon output" + DOWNLOAD_URL = f"https://github.com/PennLINC/qsirecon/archive/{__version__}.tar.gz" + generated_by = dset_desc.get("GeneratedBy", []) + generated_by.insert( + 0, + { "Name": "qsirecon", "Version": __version__, "CodeURL": DOWNLOAD_URL, }, - "GeneratedBy": [ - { - "Name": "qsirecon", - "Version": __version__, - "CodeURL": DOWNLOAD_URL, - } - ], - "CodeURL": "https://github.com/pennlinc/qsirecon", - "HowToAcknowledge": "Please cite our paper " - "(https://www.nature.com/articles/s41592-021-01185-5#citeas), and " - "include the generated citation boilerplate within the Methods " - "section of the text.", - } + ) + dset_desc["GeneratedBy"] = generated_by + dset_desc["HowToAcknowledge"] = "Include the generated boilerplate in the methods section." # Keys that can only be set by environment if "QSIRECON_DOCKER_TAG" in os.environ: - desc["DockerHubContainerTag"] = os.environ["QSIRECON_DOCKER_TAG"] - if "QSIRECON_SINGULARITY_URL" in os.environ: - singularity_url = os.environ["QSIRECON_SINGULARITY_URL"] - desc["SingularityContainerURL"] = singularity_url - try: - desc["SingularityContainerMD5"] = _get_shub_version(singularity_url) - except ValueError: - pass + dset_desc["GeneratedBy"][0]["Container"] = { + "Type": "docker", + "Tag": f"nipreps/fmriprep:{os.environ['QSIRECON_DOCKER_TAG']}", + } + elif "QSIRECON_SINGULARITY_URL" in os.environ: + dset_desc["GeneratedBy"][0]["Container"] = { + "Type": "singularity", + "URI": os.getenv("QSIRECON_SINGULARITY_URL"), + } + + if "DatasetDOI" in dset_desc: + dset_desc["SourceDatasetsURLs"] = [f"https://doi.org/{dset_desc['DatasetDOI']}"] + + # Add DatasetLinks + if "DatasetLinks" not in dset_desc.keys(): + dset_desc["DatasetLinks"] = {} + + if "preprocessed" in dset_desc["DatasetLinks"].keys(): + config.loggers.utils.warning("'preprocessed' is already a dataset link. Overwriting.") + + dset_desc["DatasetLinks"]["preprocessed"] = str(bids_dir) + if dataset_links: + for key, value in dataset_links.items(): + if key in dset_desc["DatasetLinks"]: + config.loggers.utils.warning(f"'{key}' is already a dataset link. Overwriting.") + + if key == "templateflow": + value = "https://github.com/templateflow/templateflow" + + dset_desc["DatasetLinks"][key] = str(value) + + out_dset_description = os.path.join(deriv_dir, "dataset_description.json") + if os.path.isfile(out_dset_description): + with open(out_dset_description, "r") as fo: + old_dset_desc = json.load(fo) + + old_version = old_dset_desc["GeneratedBy"][0]["Version"] + if Version(__version__).public != Version(old_version).public: + config.loggers.utils.warning( + f"Previous output generated by version {old_version} found." + ) - # Keys deriving from source dataset - fname = os.path.join(bids_dir, "dataset_description.json") - if os.path.exists(fname): - with open(fname) as fobj: - orig_desc = json.load(fobj) else: - orig_desc = {} - - if "DatasetDOI" in orig_desc: - desc["SourceDatasetsURLs"] = ["https://doi.org/{}".format(orig_desc["DatasetDOI"])] - if "License" in orig_desc: - desc["License"] = orig_desc["License"] - - with open(os.path.join(deriv_dir, "dataset_description.json"), "w") as fobj: - json.dump(desc, fobj, indent=4) + with open(out_dset_description, "w") as fo: + json.dump(dset_desc, fo, indent=4, sort_keys=True) def write_bidsignore(deriv_dir):