Skip to content

Commit

Permalink
Add DatasetLinks to dataset_description.json (#77)
Browse files Browse the repository at this point in the history
* Add dataset links.

* Fix stuff.

* Add Freesurfer folder.

* Update bids.py
  • Loading branch information
tsalo authored Aug 22, 2024
1 parent 0c9bdce commit 4aff38a
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 44 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ dependencies = [
"nireports ~= 23.2.2",
"niworkflows >=1.9,<= 1.10",
"numpy <= 1.26.3",
"packaging", # for version string parsing
"pandas < 2.0.0",
"psutil <= 5.9.8",
"pyAFQ == 1.3.2",
Expand Down
17 changes: 15 additions & 2 deletions qsirecon/cli/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def main():
write_derivative_description(
config.execution.bids_dir,
config.execution.output_dir,
# dataset_links=config.execution.dataset_links,
dataset_links=config.execution.dataset_links,
)
write_bidsignore(config.execution.output_dir)

Expand All @@ -192,6 +192,19 @@ def main():
config.execution.output_dir / "derivatives" / f"qsirecon-{qsirecon_suffix}"
)

# Add other pipeline-specific suffixes to the dataset links
other_suffixes = [s for s in qsirecon_suffixes if s != qsirecon_suffix]
dataset_links = config.execution.dataset_links.copy()
dataset_links["qsirecon"] = str(config.execution.output_dir)
dataset_links.update(
{
f"qsirecon-{s}": str(
config.execution.output_dir / "derivatives" / f"qsirecon-{s}"
)
for s in other_suffixes
}
)

# Copy the boilerplate files
copy_boilerplate(config.execution.output_dir, suffix_dir)

Expand All @@ -206,7 +219,7 @@ def main():
write_derivative_description(
config.execution.bids_dir,
suffix_dir,
# dataset_links=config.execution.dataset_links,
dataset_links=dataset_links,
)
write_bidsignore(suffix_dir)

Expand Down
8 changes: 7 additions & 1 deletion qsirecon/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,8 @@ class execution(_Config):
"""A dictionary of dataset links to be used to track Sources in sidecars."""
aggr_ses_reports = 4 # TODO: Change to None when implemented on command line
"""Maximum number of sessions aggregated in one subject's visual report."""
dataset_links = {}
"""A dictionary of dataset links to be used to track Sources in sidecars."""

_layout = None

Expand All @@ -451,6 +453,7 @@ class execution(_Config):
"output_dir",
"templateflow_home",
"work_dir",
"dataset_links",
)

@classmethod
Expand Down Expand Up @@ -516,9 +519,12 @@ def _process_value(value):
cls.bids_filters[acq][k] = _process_value(v)

dataset_links = {
"raw": cls.bids_dir,
"preprocessed": cls.bids_dir,
"templateflow": Path(TF_LAYOUT.root),
}
if cls.fs_subjects_dir:
dataset_links["freesurfer"] = cls.fs_subjects_dir

for deriv_name, deriv_path in cls.derivatives.items():
dataset_links[deriv_name] = deriv_path
cls.dataset_links = dataset_links
Expand Down
116 changes: 75 additions & 41 deletions qsirecon/utils/bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

from bids import BIDSLayout
from nipype.pipeline import engine as pe
from packaging.version import Version

from .. import config

Expand Down Expand Up @@ -141,59 +142,92 @@ def collect_participants(bids_dir, participant_label=None, strict=False, bids_va
return found_label


def write_derivative_description(bids_dir, deriv_dir):
def write_derivative_description(bids_dir, deriv_dir, dataset_links=None):
from qsirecon import __version__

DOWNLOAD_URL = f"https://github.com/PennLINC/qsirecon/archive/{__version__}.tar.gz"
# Keys deriving from source dataset
orig_dset_description = os.path.join(bids_dir, "dataset_description.json")
if os.path.exists(orig_dset_description):
with open(orig_dset_description) as fobj:
dset_desc = json.load(fobj)
else:
config.loggers.utils.warning(f"Dataset description DNE: {orig_dset_description}")
dset_desc = {}

# Check if the dataset type is derivative
if "DatasetType" not in dset_desc.keys():
config.loggers.utils.warning(
f"DatasetType key not in {orig_dset_description}. Assuming 'derivative'."
)
dset_desc["DatasetType"] = "derivative"

if dset_desc.get("DatasetType", "derivative") != "derivative":
raise ValueError(
f"DatasetType key in {orig_dset_description} is not 'derivative'. "
"QSIRecon only works on derivative datasets."
)

desc = {
"Name": "qsirecon output",
"BIDSVersion": "1.1.1",
"PipelineDescription": {
dset_desc["Name"] = "QSIRecon output"
DOWNLOAD_URL = f"https://github.com/PennLINC/qsirecon/archive/{__version__}.tar.gz"
generated_by = dset_desc.get("GeneratedBy", [])
generated_by.insert(
0,
{
"Name": "qsirecon",
"Version": __version__,
"CodeURL": DOWNLOAD_URL,
},
"GeneratedBy": [
{
"Name": "qsirecon",
"Version": __version__,
"CodeURL": DOWNLOAD_URL,
}
],
"CodeURL": "https://github.com/pennlinc/qsirecon",
"HowToAcknowledge": "Please cite our paper "
"(https://www.nature.com/articles/s41592-021-01185-5#citeas), and "
"include the generated citation boilerplate within the Methods "
"section of the text.",
}
)
dset_desc["GeneratedBy"] = generated_by
dset_desc["HowToAcknowledge"] = "Include the generated boilerplate in the methods section."

# Keys that can only be set by environment
if "QSIRECON_DOCKER_TAG" in os.environ:
desc["DockerHubContainerTag"] = os.environ["QSIRECON_DOCKER_TAG"]
if "QSIRECON_SINGULARITY_URL" in os.environ:
singularity_url = os.environ["QSIRECON_SINGULARITY_URL"]
desc["SingularityContainerURL"] = singularity_url
try:
desc["SingularityContainerMD5"] = _get_shub_version(singularity_url)
except ValueError:
pass
dset_desc["GeneratedBy"][0]["Container"] = {
"Type": "docker",
"Tag": f"nipreps/fmriprep:{os.environ['QSIRECON_DOCKER_TAG']}",
}
elif "QSIRECON_SINGULARITY_URL" in os.environ:
dset_desc["GeneratedBy"][0]["Container"] = {
"Type": "singularity",
"URI": os.getenv("QSIRECON_SINGULARITY_URL"),
}

if "DatasetDOI" in dset_desc:
dset_desc["SourceDatasetsURLs"] = [f"https://doi.org/{dset_desc['DatasetDOI']}"]

# Add DatasetLinks
if "DatasetLinks" not in dset_desc.keys():
dset_desc["DatasetLinks"] = {}

if "preprocessed" in dset_desc["DatasetLinks"].keys():
config.loggers.utils.warning("'preprocessed' is already a dataset link. Overwriting.")

dset_desc["DatasetLinks"]["preprocessed"] = str(bids_dir)
if dataset_links:
for key, value in dataset_links.items():
if key in dset_desc["DatasetLinks"]:
config.loggers.utils.warning(f"'{key}' is already a dataset link. Overwriting.")

if key == "templateflow":
value = "https://github.com/templateflow/templateflow"

dset_desc["DatasetLinks"][key] = str(value)

out_dset_description = os.path.join(deriv_dir, "dataset_description.json")
if os.path.isfile(out_dset_description):
with open(out_dset_description, "r") as fo:
old_dset_desc = json.load(fo)

old_version = old_dset_desc["GeneratedBy"][0]["Version"]
if Version(__version__).public != Version(old_version).public:
config.loggers.utils.warning(
f"Previous output generated by version {old_version} found."
)

# Keys deriving from source dataset
fname = os.path.join(bids_dir, "dataset_description.json")
if os.path.exists(fname):
with open(fname) as fobj:
orig_desc = json.load(fobj)
else:
orig_desc = {}

if "DatasetDOI" in orig_desc:
desc["SourceDatasetsURLs"] = ["https://doi.org/{}".format(orig_desc["DatasetDOI"])]
if "License" in orig_desc:
desc["License"] = orig_desc["License"]

with open(os.path.join(deriv_dir, "dataset_description.json"), "w") as fobj:
json.dump(desc, fobj, indent=4)
with open(out_dset_description, "w") as fo:
json.dump(dset_desc, fo, indent=4, sort_keys=True)


def write_bidsignore(deriv_dir):
Expand Down

0 comments on commit 4aff38a

Please sign in to comment.