From 1ee09d27944e4b69f54e32e80161c7b72fe59e96 Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Mon, 19 Dec 2022 17:50:58 -0600 Subject: [PATCH 1/4] Simplify installation code by loading from pyNS --- neuroscout_cli/commands/get.py | 97 ++++++++-------------------------- 1 file changed, 21 insertions(+), 76 deletions(-) diff --git a/neuroscout_cli/commands/get.py b/neuroscout_cli/commands/get.py index 5987308..32c60db 100644 --- a/neuroscout_cli/commands/get.py +++ b/neuroscout_cli/commands/get.py @@ -2,16 +2,16 @@ import tarfile import logging import sys +import datalad from pathlib import Path from shutil import copy from packaging import version from neuroscout_cli.commands.base import Command from neuroscout_cli import __version__ as VERSION -from datalad.api import install, get -import datalad from bids.utils import convert_JSON -from ..tools.convert import check_convert_model +from ..tools.convert import check_convert_mode +from pyns.fetch_utils import fetch_preproc datalad.ui.ui.set_backend('console') @@ -32,7 +32,7 @@ def __init__(self, options): self.main_dir.mkdir(parents=True, exist_ok=True) self.bundle_dir.mkdir(parents=True, exist_ok=True) - def download_bundle(self, no_get=False): + def download(self, no_get=False, bundle_only=False): """ Download analysis bundle and setup preproc dir """ # If tarball doesn't exist, download it bundle_tarball = self.bundle_dir / f'{self.bundle_id}.tar.gz' @@ -58,81 +58,25 @@ def download_bundle(self, no_get=False): (self.bundle_dir / 'model.json').absolute() ) # Convert if necessary - self.dataset_dir = self.download_dir / self.resources['dataset_name'] - - # Install DataLad dataset if dataset_dir does not exist - if not self.dataset_dir.exists() and not no_get: - # Use datalad to install the preproc dataset - install(source=self.resources['preproc_address'], - path=str(self.dataset_dir)) - - for option in ['preproc', 'fmriprep']: - if (self.dataset_dir / option).exists(): - self.preproc_dir = (self.dataset_dir / option).absolute() - break - else: - self.preproc_dir = self.dataset_dir - - return 0 - - def download_data(self, no_get=False): - """ Use DataLad to download necessary data to disk """ + # Load model with self.model_path.open() as f: model = convert_JSON(json.load(f)) - try: - # Custom logic to fetch and avoid indexing dataset - paths = [] - - # Custom logic to fetch relevant files - # Avoiding PyBIDS for peformance gains in indexing - tasks = model['input'].get('task', '') - if not isinstance(tasks, list): - subjects = [tasks] - tasks = [ f'task-{t}*' for t in tasks] - - subjects = model['input'].get('subject', ['*']) - if not isinstance(subjects, list): - subjects = [subjects] - - run_ids = model['input'].get('run', ['']) - if not isinstance(run_ids, list): - runs = [run_ids] - runs = [f'run-{r}*' if r else r for r in run_ids] - runs += [f'run-{str(r).zfill(2)}*' if r else r for r in run_ids] - runs = list(set(runs)) - - for sub in subjects: - for run in runs: - for task in tasks: - pre = f'sub-{sub}/**/func/*{task}{run}space-MNI152NLin2009cAsym*' - paths += list(self.preproc_dir.glob(pre + 'preproc*.nii.gz')) - paths += list(self.preproc_dir.glob(pre + 'brain_mask.nii.gz')) - - if not paths: - raise Exception("No images suitable for download.") - - # Get all JSON files - paths += list(self.preproc_dir.rglob('*.json')) - - # Get with DataLad - if not no_get: - get([str(p) for p in paths], dataset=self.dataset_dir, jobs=self.options['datalad_jobs']) + self.preproc_dir, paths = fetch_preproc( + self.resources['dataset_name'], self.dataset_dir, no_get=no_get, + preproc_address=self.resources['preproc_address'], + datalad_jobs=self.options.get('datalad_jobs', -1), **model['input']) - except Exception as exp: - if hasattr(exp, 'failed'): - message = exp.failed[0]['message'] - raise ValueError("Datalad failed. Reason: {}".format(message)) - else: - raise exp + self.dataset_dir = self.download_dir / self.resources['dataset_name'] - # Copy meta-data to root of preproc_dir - meta = list(self.bundle_dir.glob('task-*json'))[0] - if not (self.preproc_dir/ meta.parts[-1]).exists(): - copy(meta, self.preproc_dir) + if not bundle_only: + # Copy meta-data to root of preproc_dir + meta = list(self.bundle_dir.glob('task-*json'))[0] + if not (self.preproc_dir/ meta.parts[-1]).exists(): + copy(meta, self.preproc_dir) return 0 - + def _check_version(self): # Check version req = self.resources.get('version_required', 0.3) @@ -149,10 +93,11 @@ def _check_version(self): sys.exit(1) def run(self, no_get=False): - retcode = self.download_bundle(no_get=no_get) - - if not self.options.get('bundle_only', False): - retcode = self.download_data(no_get=no_get) + bundle_only = self.options.get('bundle_only', False) + if bundle_only: + no_get = True + + retcode = self.download(no_get=no_get, bundle_only=bundle_only) return retcode From 9c7e92fe518e8726a5c99a763d6f4504781d7bb0 Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Mon, 19 Dec 2022 18:08:48 -0600 Subject: [PATCH 2/4] Set dataset_dir --- neuroscout_cli/commands/get.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neuroscout_cli/commands/get.py b/neuroscout_cli/commands/get.py index 32c60db..8f647b3 100644 --- a/neuroscout_cli/commands/get.py +++ b/neuroscout_cli/commands/get.py @@ -67,7 +67,7 @@ def download(self, no_get=False, bundle_only=False): preproc_address=self.resources['preproc_address'], datalad_jobs=self.options.get('datalad_jobs', -1), **model['input']) - self.dataset_dir = self.download_dir / self.resources['dataset_name'] + self.dataset_dir = self.preproc_dir.parent if not bundle_only: # Copy meta-data to root of preproc_dir From bfa61a025598c6747379afd768d708ccd6dcbb22 Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Mon, 19 Dec 2022 18:10:29 -0600 Subject: [PATCH 3/4] Simplify bundle only --- neuroscout_cli/commands/get.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/neuroscout_cli/commands/get.py b/neuroscout_cli/commands/get.py index 8f647b3..d12a2bc 100644 --- a/neuroscout_cli/commands/get.py +++ b/neuroscout_cli/commands/get.py @@ -32,7 +32,7 @@ def __init__(self, options): self.main_dir.mkdir(parents=True, exist_ok=True) self.bundle_dir.mkdir(parents=True, exist_ok=True) - def download(self, no_get=False, bundle_only=False): + def download(self, no_get=False): """ Download analysis bundle and setup preproc dir """ # If tarball doesn't exist, download it bundle_tarball = self.bundle_dir / f'{self.bundle_id}.tar.gz' @@ -69,7 +69,7 @@ def download(self, no_get=False, bundle_only=False): self.dataset_dir = self.preproc_dir.parent - if not bundle_only: + if not no_get: # Copy meta-data to root of preproc_dir meta = list(self.bundle_dir.glob('task-*json'))[0] if not (self.preproc_dir/ meta.parts[-1]).exists(): @@ -96,8 +96,7 @@ def run(self, no_get=False): bundle_only = self.options.get('bundle_only', False) if bundle_only: no_get = True - - retcode = self.download(no_get=no_get, bundle_only=bundle_only) + retcode = self.download(no_get=no_get) return retcode From b8daa74d576e998c13730bf841b3b3fb4070263f Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Mon, 19 Dec 2022 18:34:35 -0600 Subject: [PATCH 4/4] Update function name --- neuroscout_cli/commands/get.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/neuroscout_cli/commands/get.py b/neuroscout_cli/commands/get.py index d12a2bc..f427754 100644 --- a/neuroscout_cli/commands/get.py +++ b/neuroscout_cli/commands/get.py @@ -11,7 +11,7 @@ from neuroscout_cli import __version__ as VERSION from bids.utils import convert_JSON from ..tools.convert import check_convert_mode -from pyns.fetch_utils import fetch_preproc +from pyns.fetch_utils import fetch_images datalad.ui.ui.set_backend('console') @@ -62,10 +62,11 @@ def download(self, no_get=False): with self.model_path.open() as f: model = convert_JSON(json.load(f)) - self.preproc_dir, paths = fetch_preproc( + self.preproc_dir, paths = fetch_images( self.resources['dataset_name'], self.dataset_dir, no_get=no_get, preproc_address=self.resources['preproc_address'], - datalad_jobs=self.options.get('datalad_jobs', -1), **model['input']) + datalad_jobs=self.options.get('datalad_jobs', -1), + fetch_json=True, fetch_brain_mask=True, **model['input']) self.dataset_dir = self.preproc_dir.parent