From a838c46a1fcb8fff1e93bd72c19c00ed0c4eb2d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boris=20Cl=C3=A9net?= <117362283+bclenet@users.noreply.github.com> Date: Thu, 5 Oct 2023 11:24:02 +0200 Subject: [PATCH 1/2] Task information (#117) * [BUG] inside unit_tests workflow * [ENH] A class to parse task data * [TEST] making tests pass * [DOC] update for narps_open.data --- docs/data.md | 36 +++++++++++++++ narps_open/data/task.py | 27 +++++++++++ tests/data/test_task.py | 57 ++++++++++++++++++++++++ tests/test_data/data/task/task-info.json | 12 +++++ 4 files changed, 132 insertions(+) create mode 100644 narps_open/data/task.py create mode 100644 tests/data/test_task.py create mode 100644 tests/test_data/data/task/task-info.json diff --git a/docs/data.md b/docs/data.md index 1e6b4fc3..e2e84da1 100644 --- a/docs/data.md +++ b/docs/data.md @@ -94,3 +94,39 @@ python narps_open/utils/results -r -t 2T6S C88N L1A8 The collections are also available [here](https://zenodo.org/record/3528329/) as one release on Zenodo that you can download. Each team results collection is kept in the `data/results/orig` directory, in a folder using the pattern `_` (e.g.: `4881_2T6S` for the 2T6S team). + +## Access NARPS data + +Inside `narps_open.data`, several modules allow to parse data from the NARPS file, so it's easier to use it inside the Narps Open Pipelines project. These are : + +### `narps_open.data.description` +Get textual description of the pipelines, as written by the teams (see [docs/description.md](/docs/description.md)). + +### `narps_open.data.results` +Get the result collections, as described earlier in this file. + +### `narps_open.data.participants` +Get the participants data (parses the `data/original/ds001734/participants.tsv` file) as well as participants subsets to perform analyses on lower numbers of images. + +### `narps_open.data.task` +Get information about the task (parses the `data/original/ds001734/task-MGT_bold.json` file). Here is an example how to use it : + +```python +from narps_open.data.task import TaskInformation + +task_info = TaskInformation() # task_info is a dict + +# All available keys +print(task_info.keys()) +# dict_keys(['TaskName', 'Manufacturer', 'ManufacturersModelName', 'MagneticFieldStrength', 'RepetitionTime', 'EchoTime', 'FlipAngle', 'MultibandAccelerationFactor', 'EffectiveEchoSpacing', 'SliceTiming', 'BandwidthPerPixelPhaseEncode', 'PhaseEncodingDirection', 'TaskDescription', 'CogAtlasID', 'NumberOfSlices', 'AcquisitionTime', 'TotalReadoutTime']) + +# Original data +print(task_info['TaskName']) +print(task_info['Manufacturer']) +print(task_info['RepetitionTime']) # And so on ... + +# Derived data +print(task_info['NumberOfSlices']) +print(task_info['AcquisitionTime']) +print(task_info['TotalReadoutTime']) +``` diff --git a/narps_open/data/task.py b/narps_open/data/task.py new file mode 100644 index 00000000..f3e86803 --- /dev/null +++ b/narps_open/data/task.py @@ -0,0 +1,27 @@ +#!/usr/bin/python +# coding: utf-8 + +""" A mdoule to parse task data from NARPS for the narps_open package """ + +from os.path import join +from json import load + +from narps_open.utils.configuration import Configuration +from narps_open.utils.singleton import SingletonMeta + +class TaskInformation(dict, metaclass=SingletonMeta): + """ This class allows to access information about the task performed in NARPS """ + + task_information_file = join(Configuration()['directories']['dataset'], 'task-MGT_bold.json') + + def __init__(self): + super().__init__() + + # Load information from the task-MGT_bold.json file + with open(self.task_information_file, 'rb') as file: + self.update(load(file)) + + # Compute derived information + self['NumberOfSlices'] = len(self['SliceTiming']) + self['AcquisitionTime'] = self['RepetitionTime'] / self['NumberOfSlices'] + self['TotalReadoutTime'] = self['NumberOfSlices'] * self['EffectiveEchoSpacing'] diff --git a/tests/data/test_task.py b/tests/data/test_task.py new file mode 100644 index 00000000..8b6860dd --- /dev/null +++ b/tests/data/test_task.py @@ -0,0 +1,57 @@ +#!/usr/bin/python +# coding: utf-8 + +""" Tests of the 'narps_open.data.task' module. + +Launch this test with PyTest + +Usage: +====== + pytest -q test_task.py + pytest -q test_task.py -k +""" +from os.path import join + +from pytest import mark, fixture + +from narps_open.utils.configuration import Configuration +import narps_open.data.task as task + +@fixture(scope='function', autouse=True) +def mock_task_data(mocker): + """ Patch the json.load method to mock task data """ + mocker.patch.object( + task.TaskInformation, 'task_information_file', + join(Configuration()['directories']['test_data'], 'data', 'task', 'task-info.json') + ) + +class TestTaskInformation: + """ A class that contains all the unit tests for the TaskInformation class.""" + + @staticmethod + @mark.unit_test + def test_accessing(): + """ Check that task information is reachable """ + + assert task.TaskInformation()['RepetitionTime'] == 1 + assert len(task.TaskInformation()['SliceTiming']) == 6 + + @staticmethod + @mark.unit_test + def test_singleton(): + """ Check that TaskInformation is a singleton. """ + + obj1 = task.TaskInformation() + obj2 = task.TaskInformation() + + assert id(obj1) == id(obj2) + + @staticmethod + @mark.unit_test + def test_derived(): + """ Test the derived values of a TaskInformation object """ + + task_info = task.TaskInformation() + assert task_info['NumberOfSlices'] == 6 + assert task_info['AcquisitionTime'] == 1 / 6 + assert task_info['TotalReadoutTime'] == 12 diff --git a/tests/test_data/data/task/task-info.json b/tests/test_data/data/task/task-info.json new file mode 100644 index 00000000..7927183d --- /dev/null +++ b/tests/test_data/data/task/task-info.json @@ -0,0 +1,12 @@ +{ + "RepetitionTime": 1, + "EffectiveEchoSpacing": 2, + "SliceTiming": [ + 0, + 0.4375, + 0.875, + 0.3125, + 0.75, + 0.1875 + ] +} \ No newline at end of file From 5b375cce9ebe89ea1ba19de2f9f59ed7fad6a5bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boris=20Cl=C3=A9net?= <117362283+bclenet@users.noreply.github.com> Date: Thu, 5 Oct 2023 11:25:11 +0200 Subject: [PATCH 2/2] Credits section + install documentation (#91) * [BUG] inside unit_tests workflow * Adding names of contributors * [DOC] simplifying the install doc * [DOC] simplifying the install doc * [DOC] simplifying the environment doc * [DOC] simplifying the environment doc * [DOC] typo correction --- INSTALL.md | 91 +++++++++++++++++----------------- README.md | 4 +- docs/environment.md | 116 +++++++++++++++++++------------------------- docs/running.md | 58 +++++++++++----------- 4 files changed, 127 insertions(+), 142 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index f1589893..b6142cc0 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -1,82 +1,85 @@ # How to install NARPS Open Pipelines ? -## 1 - Get the code +## 1 - Fork the repository -First, [fork](https://docs.github.com/en/get-started/quickstart/fork-a-repo) the repository, so you have your own working copy of it. +[Fork](https://docs.github.com/en/get-started/quickstart/fork-a-repo) the repository, so you have your own working copy of it. -Then, you have two options to [clone](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository) the project : +## 2 - Clone the code -### Option 1: Using DataLad (recommended) +First, install [Datalad](https://www.datalad.org/). This will allow you to access the NARPS data easily, as it is included in the repository as [datalad subdatasets](http://handbook.datalad.org/en/latest/basics/101-106-nesting.html). -Cloning the fork using [Datalad](https://www.datalad.org/) will allow you to get the code as well as "links" to the data, because the NARPS data is bundled in this repository as [datalad subdatasets](http://handbook.datalad.org/en/latest/basics/101-106-nesting.html). +Then, [clone](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository) the project : ```bash +# Replace YOUR_GITHUB_USERNAME in the following command. datalad install --recursive https://github.com/YOUR_GITHUB_USERNAME/narps_open_pipelines.git ``` -### Option 2: Using Git +> [!WARNING] +> It is still possible to clone the fork using [git](https://git-scm.com/) ; but by doing this, you will only get the code. +> ```bash +> # Replace YOUR_GITHUB_USERNAME in the following command. +> git clone https://github.com/YOUR_GITHUB_USERNAME/narps_open_pipelines.git +> ``` -Cloning the fork using [git](https://git-scm.com/) ; by doing this, you will only get the code. +## 3 - Get the data -```bash -git clone https://github.com/YOUR_GITHUB_USERNAME/narps_open_pipelines.git -``` - -## 2 - Get the data +Now that you cloned the repository using Datalad, you are able to get the data : -Ignore this step if you used DataLad (option 1) in the previous step. - -Otherwise, there are several ways to get the data. +```bash +# Move inside the root directory of the repository. +cd narps_open_pipelines -## 3 - Set up the environment +# Select the data you want to download. Here is an example to get data of the first 4 subjects. +datalad get data/original/ds001734/sub-00[1-4] -J 12 +datalad get data/original/ds001734/derivatives/fmriprep/sub-00[1-4] -J 12 +``` -The Narps Open Pipelines project is build upon several dependencies, such as [Nipype](https://nipype.readthedocs.io/en/latest/) but also the original software packages used by the pipelines (SPM, FSL, AFNI...). +> [!NOTE] +> For further information and alternatives on how to get the data, see the corresponding documentation page [docs/data.md](docs/data.md). -To facilitate this step, we created a Docker container based on [Neurodocker](https://github.com/ReproNim/neurodocker) that contains the necessary Python packages and software. To install the Docker image, two options are available. +## 4 - Set up the environment -### Option 1: Using Dockerhub +[Install Docker](https://docs.docker.com/engine/install/) then pull the Docker image : ```bash docker pull elodiegermani/open_pipeline:latest ``` -The image should install itself. Once it's done you can check the image is available on your system: +Once it's done you can check the image is available on your system : ```bash docker images + REPOSITORY TAG IMAGE ID CREATED SIZE docker.io/elodiegermani/open_pipeline latest 0f3c74d28406 9 months ago 22.7 GB ``` -### Option 2: Using a Dockerfile +> [!NOTE] +> Feel free to read this documentation page [docs/environment.md](docs/environment.md) to get further information about this environment. + +## 5 - Run the project + +Start a Docker container from the Docker image : + +```bash +# Replace PATH_TO_THE_REPOSITORY in the following command (e.g.: with /home/user/dev/narps_open_pipelines/) +docker run -it -v PATH_TO_THE_REPOSITORY:/home/neuro/code/ elodiegermani/open_pipeline +``` -The Dockerfile used to create the image stored on DockerHub is available at the root of the repository ([Dockerfile](Dockerfile)). But you might want to personalize this Dockerfile. To do so, change the command below that will generate a new Dockerfile: +Install NARPS Open Pipelines inside the container : ```bash -docker run --rm repronim/neurodocker:0.7.0 generate docker \ - --base neurodebian:stretch-non-free --pkg-manager apt \ - --install git \ - --fsl version=6.0.3 \ - --afni version=latest method=binaries install_r=true install_r_pkgs=true install_python2=true install_python3=true \ - --spm12 version=r7771 method=binaries \ - --user=neuro \ - --workdir /home \ - --miniconda create_env=neuro \ - conda_install="python=3.8 traits jupyter nilearn graphviz nipype scikit-image" \ - pip_install="matplotlib" \ - activate=True \ - --env LD_LIBRARY_PATH="/opt/miniconda-latest/envs/neuro:$LD_LIBRARY_PATH" \ - --run-bash "source activate neuro" \ - --user=root \ - --run 'chmod 777 -Rf /home' \ - --run 'chown -R neuro /home' \ - --user=neuro \ - --run 'mkdir -p ~/.jupyter && echo c.NotebookApp.ip = \"0.0.0.0\" > ~/.jupyter/jupyter_notebook_config.py' > Dockerfile +source activate neuro +cd /home/neuro/code/ +pip install . ``` -When you are satisfied with your Dockerfile, just build the image: +Finally, you are able to run pipelines : ```bash -docker build --tag [name_of_the_image] - < Dockerfile +python narps_open/runner.py + usage: runner.py [-h] -t TEAM (-r RSUBJECTS | -s SUBJECTS [SUBJECTS ...] | -n NSUBJECTS) [-g | -f] [-c] ``` -When the image is built, follow the instructions in [docs/environment.md](docs/environment.md) to start the environment from it. +> [!NOTE] +> For further information, read this documentation page [docs/running.md](docs/running.md). diff --git a/README.md b/README.md index 7ad3172c..20125d83 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,6 @@ This project is developed in the Empenn team by Boris Clenet, Elodie Germani, Je In addition, this project was presented and received contributions during the following events: - OHBM Brainhack 2022 (June 2022): Elodie Germani, Arshitha Basavaraj, Trang Cao, Rémi Gau, Anna Menacher, Camille Maumet. - - e-ReproNim FENS NENS Cluster Brainhack: - - OHBM Brainhack 2023 (July 2023): + - e-ReproNim FENS NENS Cluster Brainhack (June 2023) : Liz Bushby, Boris Clénet, Michael Dayan, Aimee Westbrook. + - OHBM Brainhack 2023 (July 2023): Arshitha Basavaraj, Boris Clénet, Rémi Gau, Élodie Germani, Yaroslav Halchenko, Camille Maumet, Paul Taylor. - ORIGAMI lab hackathon (Sept 2023): diff --git a/docs/environment.md b/docs/environment.md index 98addd6a..edab9b4d 100644 --- a/docs/environment.md +++ b/docs/environment.md @@ -1,100 +1,82 @@ -# Set up the environment to run pipelines +# About the environment of NARPS Open Pipelines -## Run a docker container :whale: +## The Docker container :whale: -Start a container using the command below: +The NARPS Open Pipelines project is build upon several dependencies, such as [Nipype](https://nipype.readthedocs.io/en/latest/) but also the original software packages used by the pipelines (SPM, FSL, AFNI...). Therefore, we created a Docker container based on [Neurodocker](https://github.com/ReproNim/neurodocker) that contains software dependencies. -```bash -docker run -ti \ - -p 8888:8888 \ - elodiegermani/open_pipeline -``` - -On this command line, you need to add volumes to be able to link with your local files (original dataset and git repository). If you stored the original dataset in `data/original`, just make a volume with the `narps_open_pipelines` directory: - -```bash -docker run -ti \ - -p 8888:8888 \ - -v /users/egermani/Documents/narps_open_pipelines:/home/ \ - elodiegermani/open_pipeline -``` - -If it is in another directory, make a second volume with the path to your dataset: - -```bash -docker run -ti \ - -p 8888:8888 \ - -v /Users/egermani/Documents/narps_open_pipelines:/home/ \ - -v /Users/egermani/Documents/data/NARPS/:/data/ \ - elodiegermani/open_pipeline -``` - -After that, your container will be launched! - -## Other useful docker commands - -### START A CONTAINER - -```bash -docker start [name_of_the_container] -``` - -### VERIFY A CONTAINER IS IN THE LIST - -```bash -docker ps -``` - -### EXECUTE BASH OR ATTACH YOUR CONTAINER +The simplest way to start the container using the command below : ```bash -docker exec -ti [name_of_the_container] bash +docker run -it elodiegermani/open_pipeline ``` -**OR** +From this command line, you need to add volumes to be able to link with your local files (code repository). ```bash -docker attach [name_of_the_container] -``` +# Replace PATH_TO_THE_REPOSITORY in the following command (e.g.: with /home/user/dev/narps_open_pipelines/) +docker run -it \ + -v PATH_TO_THE_REPOSITORY:/home/neuro/code/ \ + elodiegermani/open_pipeline +``` -## Useful commands inside the container +## Use Jupyter with the container -### ACTIVATE CONDA ENVIRONMENT +If you wish to use [Jupyter](https://jupyter.org/) to run the code, a port forwarding is needed : ```bash -source activate neuro -``` +docker run -it \ + -v PATH_TO_THE_REPOSITORY:/home/neuro/code/ \ + -p 8888:8888 \ + elodiegermani/open_pipeline +``` -### LAUNCH JUPYTER NOTEBOOK +Then, from inside the container : ```bash jupyter notebook --port=8888 --no-browser --ip=0.0.0.0 ``` -## If you did not use your container for a while +You can now access Jupyter using the address provided by the command line. -Verify it still runs : +> [!NOTE] +> Find useful information on the [Docker documentation page](https://docs.docker.com/get-started/). Here is a [cheat sheet with Docker commands](https://docs.docker.com/get-started/docker_cheatsheet.pdf) -```bash -docker ps -l -``` +## Create a custom Docker image -If your container is in the list, run : +The `elodiegermani/open_pipeline` Docker image is based on [Neurodocker](https://github.com/ReproNim/neurodocker). It was created using the following command line : ```bash -docker start [name_of_the_container] +docker run --rm repronim/neurodocker:0.7.0 generate docker \ + --base neurodebian:stretch-non-free --pkg-manager apt \ + --install git \ + --fsl version=6.0.3 \ + --afni version=latest method=binaries install_r=true install_r_pkgs=true install_python2=true install_python3=true \ + --spm12 version=r7771 method=binaries \ + --user=neuro \ + --workdir /home \ + --miniconda create_env=neuro \ + conda_install="python=3.8 traits jupyter nilearn graphviz nipype scikit-image" \ + pip_install="matplotlib" \ + activate=True \ + --env LD_LIBRARY_PATH="/opt/miniconda-latest/envs/neuro:$LD_LIBRARY_PATH" \ + --run-bash "source activate neuro" \ + --user=root \ + --run 'chmod 777 -Rf /home' \ + --run 'chown -R neuro /home' \ + --user=neuro \ + --run 'mkdir -p ~/.jupyter && echo c.NotebookApp.ip = \"0.0.0.0\" > ~/.jupyter/jupyter_notebook_config.py' > Dockerfile ``` -Else, relaunch it with : +If you wish to create your own custom environment, make changes to the parameters, and build your custom image from the generated Dockerfile. ```bash -docker run -ti \ - -p 8888:8888 \ - -v /home/egermani:/home \ - [name_of_the_image] +# Replace IMAGE_NAME in the following command +docker build --tag IMAGE_NAME - < Dockerfile ``` -### To use SPM inside the container, use this command at the beginning of your script: +## Good to know + +To use SPM inside the container, use this command at the beginning of your script: ```python from nipype.interfaces import spm diff --git a/docs/running.md b/docs/running.md index 6344c042..eb614eef 100644 --- a/docs/running.md +++ b/docs/running.md @@ -1,6 +1,33 @@ -# :running: How to run NARPS open pipelines ? +# How to run NARPS open pipelines ? :running: -## Using the `PipelineRunner` +## Using the runner application + +The `narps_open.runner` module allows to run pipelines from the command line : + +```bash +python narps_open/runner.py -h + usage: runner.py [-h] -t TEAM (-r RANDOM | -s SUBJECTS [SUBJECTS ...]) [-g | -f] + + Run the pipelines from NARPS. + + options: + -h, --help show this help message and exit + -t TEAM, --team TEAM the team ID + -r RANDOM, --random RANDOM the number of subjects to be randomly selected + -s SUBJECTS [SUBJECTS ...], --subjects SUBJECTS [SUBJECTS ...] a list of subjects + -g, --group run the group level only + -f, --first run the first levels only (preprocessing + subjects + runs) + -c, --check check pipeline outputs (runner is not launched) + +python narps_open/runner.py -t 2T6S -s 001 006 020 100 +python narps_open/runner.py -t 2T6S -r 4 +python narps_open/runner.py -t 2T6S -r 4 -f +python narps_open/runner.py -t 2T6S -r 4 -f -c # Check the output files without launching the runner +``` + +In this usecase, the paths where to store the outputs and to the dataset are picked by the runner from the [configuration](docs/configuration.md). + +## Using the `PipelineRunner` object The class `PipelineRunner` is available from the `narps_open.runner` module. You can use it from inside python code, as follows : @@ -35,30 +62,3 @@ runner.start(True, True) runner.get_missing_first_level_outputs() runner.get_missing_group_level_outputs() ``` - -## Using the runner application - -The `narps_open.runner` module also allows to run pipelines from the command line : - -```bash -python narps_open/runner.py -h - usage: runner.py [-h] -t TEAM (-r RANDOM | -s SUBJECTS [SUBJECTS ...]) [-g | -f] - - Run the pipelines from NARPS. - - options: - -h, --help show this help message and exit - -t TEAM, --team TEAM the team ID - -r RANDOM, --random RANDOM the number of subjects to be randomly selected - -s SUBJECTS [SUBJECTS ...], --subjects SUBJECTS [SUBJECTS ...] a list of subjects - -g, --group run the group level only - -f, --first run the first levels only (preprocessing + subjects + runs) - -c, --check check pipeline outputs (runner is not launched) - -python narps_open/runner.py -t 2T6S -s 001 006 020 100 -python narps_open/runner.py -t 2T6S -r 4 -python narps_open/runner.py -t 2T6S -r 4 -f -python narps_open/runner.py -t 2T6S -r 4 -f -c # Check the output files without launching the runner -``` - -In this usecase, the paths where to store the outputs and to the dataset are picked by the runner from the [configuration](docs/configuration.md).