From a838c46a1fcb8fff1e93bd72c19c00ed0c4eb2d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Boris=20Cl=C3=A9net?=
 <117362283+bclenet@users.noreply.github.com>
Date: Thu, 5 Oct 2023 11:24:02 +0200
Subject: [PATCH 1/2] Task information (#117)

* [BUG] inside unit_tests workflow

* [ENH] A class to parse task data

* [TEST] making tests pass

* [DOC] update for narps_open.data
---
 docs/data.md                             | 36 +++++++++++++++
 narps_open/data/task.py                  | 27 +++++++++++
 tests/data/test_task.py                  | 57 ++++++++++++++++++++++++
 tests/test_data/data/task/task-info.json | 12 +++++
 4 files changed, 132 insertions(+)
 create mode 100644 narps_open/data/task.py
 create mode 100644 tests/data/test_task.py
 create mode 100644 tests/test_data/data/task/task-info.json

diff --git a/docs/data.md b/docs/data.md
index 1e6b4fc3..e2e84da1 100644
--- a/docs/data.md
+++ b/docs/data.md
@@ -94,3 +94,39 @@ python narps_open/utils/results -r -t 2T6S C88N L1A8
 The collections are also available [here](https://zenodo.org/record/3528329/) as one release on Zenodo that you can download.
 
 Each team results collection is kept in the `data/results/orig` directory, in a folder using the pattern `<neurovault_collection_id>_<team_id>` (e.g.: `4881_2T6S` for the 2T6S team).
+
+## Access NARPS data
+
+Inside `narps_open.data`, several modules allow to parse data from the NARPS file, so it's easier to use it inside the Narps Open Pipelines project. These are :
+
+### `narps_open.data.description`
+Get textual description of the pipelines, as written by the teams (see [docs/description.md](/docs/description.md)).
+
+### `narps_open.data.results`
+Get the result collections, as described earlier in this file.
+
+### `narps_open.data.participants`
+Get the participants data (parses the `data/original/ds001734/participants.tsv` file) as well as participants subsets to perform analyses on lower numbers of images.
+
+### `narps_open.data.task`
+Get information about the task (parses the `data/original/ds001734/task-MGT_bold.json` file). Here is an example how to use it :
+
+```python
+from narps_open.data.task import TaskInformation
+
+task_info = TaskInformation() # task_info is a dict
+
+# All available keys
+print(task_info.keys())
+# dict_keys(['TaskName', 'Manufacturer', 'ManufacturersModelName', 'MagneticFieldStrength', 'RepetitionTime', 'EchoTime', 'FlipAngle', 'MultibandAccelerationFactor', 'EffectiveEchoSpacing', 'SliceTiming', 'BandwidthPerPixelPhaseEncode', 'PhaseEncodingDirection', 'TaskDescription', 'CogAtlasID', 'NumberOfSlices', 'AcquisitionTime', 'TotalReadoutTime'])
+
+# Original data
+print(task_info['TaskName'])
+print(task_info['Manufacturer'])
+print(task_info['RepetitionTime']) # And so on ...
+
+# Derived data
+print(task_info['NumberOfSlices'])
+print(task_info['AcquisitionTime'])
+print(task_info['TotalReadoutTime'])
+```
diff --git a/narps_open/data/task.py b/narps_open/data/task.py
new file mode 100644
index 00000000..f3e86803
--- /dev/null
+++ b/narps_open/data/task.py
@@ -0,0 +1,27 @@
+#!/usr/bin/python
+# coding: utf-8
+
+""" A mdoule to parse task data from NARPS for the narps_open package """
+
+from os.path import join
+from json import load
+
+from narps_open.utils.configuration import Configuration
+from narps_open.utils.singleton import SingletonMeta
+
+class TaskInformation(dict, metaclass=SingletonMeta):
+    """ This class allows to access information about the task performed in NARPS """
+
+    task_information_file = join(Configuration()['directories']['dataset'], 'task-MGT_bold.json')
+
+    def __init__(self):
+        super().__init__()
+
+        # Load information from the task-MGT_bold.json file
+        with open(self.task_information_file, 'rb') as file:
+            self.update(load(file))
+
+        # Compute derived information
+        self['NumberOfSlices'] = len(self['SliceTiming'])
+        self['AcquisitionTime'] = self['RepetitionTime'] / self['NumberOfSlices']
+        self['TotalReadoutTime'] = self['NumberOfSlices'] * self['EffectiveEchoSpacing']
diff --git a/tests/data/test_task.py b/tests/data/test_task.py
new file mode 100644
index 00000000..8b6860dd
--- /dev/null
+++ b/tests/data/test_task.py
@@ -0,0 +1,57 @@
+#!/usr/bin/python
+# coding: utf-8
+
+""" Tests of the 'narps_open.data.task' module.
+
+Launch this test with PyTest
+
+Usage:
+======
+    pytest -q test_task.py
+    pytest -q test_task.py -k <selected_test>
+"""
+from os.path import join
+
+from pytest import mark, fixture
+
+from narps_open.utils.configuration import Configuration
+import narps_open.data.task as task
+
+@fixture(scope='function', autouse=True)
+def mock_task_data(mocker):
+    """ Patch the json.load method to mock task data """
+    mocker.patch.object(
+        task.TaskInformation, 'task_information_file',
+        join(Configuration()['directories']['test_data'], 'data', 'task', 'task-info.json')
+        )
+
+class TestTaskInformation:
+    """ A class that contains all the unit tests for the TaskInformation class."""
+
+    @staticmethod
+    @mark.unit_test
+    def test_accessing():
+        """ Check that task information is reachable """
+
+        assert task.TaskInformation()['RepetitionTime'] == 1
+        assert len(task.TaskInformation()['SliceTiming']) == 6
+
+    @staticmethod
+    @mark.unit_test
+    def test_singleton():
+        """ Check that TaskInformation is a singleton. """
+
+        obj1 = task.TaskInformation()
+        obj2 = task.TaskInformation()
+
+        assert id(obj1) == id(obj2)
+
+    @staticmethod
+    @mark.unit_test
+    def test_derived():
+        """ Test the derived values of a TaskInformation object """
+        
+        task_info = task.TaskInformation()
+        assert task_info['NumberOfSlices'] == 6
+        assert task_info['AcquisitionTime'] == 1 / 6
+        assert task_info['TotalReadoutTime'] == 12
diff --git a/tests/test_data/data/task/task-info.json b/tests/test_data/data/task/task-info.json
new file mode 100644
index 00000000..7927183d
--- /dev/null
+++ b/tests/test_data/data/task/task-info.json
@@ -0,0 +1,12 @@
+{
+  "RepetitionTime": 1,
+  "EffectiveEchoSpacing": 2,
+  "SliceTiming": [
+    0,
+    0.4375,
+    0.875,
+    0.3125,
+    0.75,
+    0.1875
+  ]
+}
\ No newline at end of file

From 5b375cce9ebe89ea1ba19de2f9f59ed7fad6a5bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Boris=20Cl=C3=A9net?=
 <117362283+bclenet@users.noreply.github.com>
Date: Thu, 5 Oct 2023 11:25:11 +0200
Subject: [PATCH 2/2] Credits section + install documentation (#91)

* [BUG] inside unit_tests workflow

* Adding names of contributors

* [DOC] simplifying the install doc

* [DOC] simplifying the install doc

* [DOC] simplifying the environment doc

* [DOC] simplifying the environment doc

* [DOC] typo correction
---
 INSTALL.md          |  91 +++++++++++++++++-----------------
 README.md           |   4 +-
 docs/environment.md | 116 +++++++++++++++++++-------------------------
 docs/running.md     |  58 +++++++++++-----------
 4 files changed, 127 insertions(+), 142 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index f1589893..b6142cc0 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -1,82 +1,85 @@
 # How to install NARPS Open Pipelines ? 
 
-## 1 - Get the code
+## 1 - Fork the repository
 
-First, [fork](https://docs.github.com/en/get-started/quickstart/fork-a-repo) the repository, so you have your own working copy of it.
+[Fork](https://docs.github.com/en/get-started/quickstart/fork-a-repo) the repository, so you have your own working copy of it.
 
-Then, you have two options to [clone](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository) the project :
+## 2 - Clone the code
 
-### Option 1: Using DataLad (recommended)
+First, install [Datalad](https://www.datalad.org/). This will allow you to access the NARPS data easily, as it is included in the repository as [datalad subdatasets](http://handbook.datalad.org/en/latest/basics/101-106-nesting.html).
 
-Cloning the fork using [Datalad](https://www.datalad.org/) will allow you to get the code as well as "links" to the data, because the NARPS data is bundled in this repository as [datalad subdatasets](http://handbook.datalad.org/en/latest/basics/101-106-nesting.html).
+Then, [clone](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository) the project :
 
 ```bash
+# Replace YOUR_GITHUB_USERNAME in the following command.
 datalad install --recursive https://github.com/YOUR_GITHUB_USERNAME/narps_open_pipelines.git
 ```
 
-### Option 2: Using Git
+> [!WARNING]  
+> It is still possible to clone the fork using [git](https://git-scm.com/) ; but by doing this, you will only get the code.
+> ```bash
+> # Replace YOUR_GITHUB_USERNAME in the following command.
+> git clone https://github.com/YOUR_GITHUB_USERNAME/narps_open_pipelines.git
+> ```
 
-Cloning the fork using [git](https://git-scm.com/) ; by doing this, you will only get the code.
+## 3 - Get the data
 
-```bash
-git clone https://github.com/YOUR_GITHUB_USERNAME/narps_open_pipelines.git
-```
-
-## 2 - Get the data
+Now that you cloned the repository using Datalad, you are able to get the data :
 
-Ignore this step if you used DataLad (option 1) in the previous step.
-
-Otherwise, there are several ways to get the data.
+```bash
+# Move inside the root directory of the repository.
+cd narps_open_pipelines
 
-## 3 - Set up the environment
+# Select the data you want to download. Here is an example to get data of the first 4 subjects.
+datalad get data/original/ds001734/sub-00[1-4] -J 12
+datalad get data/original/ds001734/derivatives/fmriprep/sub-00[1-4] -J 12
+```
 
-The Narps Open Pipelines project is build upon several dependencies, such as [Nipype](https://nipype.readthedocs.io/en/latest/) but also the original software packages used by the pipelines (SPM, FSL, AFNI...). 
+> [!NOTE]  
+> For further information and alternatives on how to get the data, see the corresponding documentation page [docs/data.md](docs/data.md).
 
-To facilitate this step, we created a Docker container based on [Neurodocker](https://github.com/ReproNim/neurodocker) that contains the necessary Python packages and software. To install the Docker image, two options are available.
+## 4 - Set up the environment
 
-### Option 1: Using Dockerhub
+[Install Docker](https://docs.docker.com/engine/install/) then pull the Docker image :
 
 ```bash
 docker pull elodiegermani/open_pipeline:latest
 ```
 
-The image should install itself. Once it's done you can check the image is available on your system:
+Once it's done you can check the image is available on your system :
 
 ```bash
 docker images
+   REPOSITORY                               TAG       IMAGE ID        CREATED         SIZE
    docker.io/elodiegermani/open_pipeline    latest    0f3c74d28406    9 months ago    22.7 GB
 ```
 
-### Option 2: Using a Dockerfile 
+> [!NOTE]  
+> Feel free to read this documentation page [docs/environment.md](docs/environment.md) to get further information about this environment.
+
+## 5 - Run the project
+
+Start a Docker container from the Docker image :
+
+```bash
+# Replace PATH_TO_THE_REPOSITORY in the following command (e.g.: with /home/user/dev/narps_open_pipelines/)
+docker run -it -v PATH_TO_THE_REPOSITORY:/home/neuro/code/ elodiegermani/open_pipeline
+```
 
-The Dockerfile used to create the image stored on DockerHub is available at the root of the repository ([Dockerfile](Dockerfile)). But you might want to personalize this Dockerfile. To do so, change the command below that will generate a new Dockerfile: 
+Install NARPS Open Pipelines inside the container :
 
 ```bash
-docker run --rm repronim/neurodocker:0.7.0 generate docker \
-           --base neurodebian:stretch-non-free --pkg-manager apt \
-           --install git \
-           --fsl version=6.0.3 \
-           --afni version=latest method=binaries install_r=true install_r_pkgs=true install_python2=true install_python3=true \
-           --spm12 version=r7771 method=binaries \
-           --user=neuro \
-           --workdir /home \
-           --miniconda create_env=neuro \
-                       conda_install="python=3.8 traits jupyter nilearn graphviz nipype scikit-image" \
-                       pip_install="matplotlib" \
-                       activate=True \
-           --env LD_LIBRARY_PATH="/opt/miniconda-latest/envs/neuro:$LD_LIBRARY_PATH" \
-           --run-bash "source activate neuro" \
-           --user=root \
-           --run 'chmod 777 -Rf /home' \
-           --run 'chown -R neuro /home' \
-           --user=neuro \
-           --run 'mkdir -p ~/.jupyter && echo c.NotebookApp.ip = \"0.0.0.0\" > ~/.jupyter/jupyter_notebook_config.py' > Dockerfile
+source activate neuro
+cd /home/neuro/code/
+pip install .
 ```
 
-When you are satisfied with your Dockerfile, just build the image:
+Finally, you are able to run pipelines :
 
 ```bash
-docker build --tag [name_of_the_image] - < Dockerfile
+python narps_open/runner.py
+  usage: runner.py [-h] -t TEAM (-r RSUBJECTS | -s SUBJECTS [SUBJECTS ...] | -n NSUBJECTS) [-g | -f] [-c]
 ```
 
-When the image is built, follow the instructions in [docs/environment.md](docs/environment.md) to start the environment from it.
+> [!NOTE]  
+> For further information, read this documentation page [docs/running.md](docs/running.md).
diff --git a/README.md b/README.md
index 7ad3172c..20125d83 100644
--- a/README.md
+++ b/README.md
@@ -72,6 +72,6 @@ This project is developed in the Empenn team by Boris Clenet, Elodie Germani, Je
 
 In addition, this project was presented and received contributions during the following events:
  - OHBM Brainhack 2022 (June 2022): Elodie Germani, Arshitha Basavaraj, Trang Cao, Rémi Gau, Anna Menacher, Camille Maumet.
- - e-ReproNim FENS NENS Cluster Brainhack: <ADD_NAMES_HERE>
- - OHBM Brainhack 2023 (July 2023): <ADD_NAMES_HERE>
+ - e-ReproNim FENS NENS Cluster Brainhack (June 2023) : Liz Bushby, Boris Clénet, Michael Dayan, Aimee Westbrook.
+ - OHBM Brainhack 2023 (July 2023): Arshitha Basavaraj, Boris Clénet, Rémi Gau, Élodie Germani, Yaroslav Halchenko, Camille Maumet, Paul Taylor.
  - ORIGAMI lab hackathon (Sept 2023): 
diff --git a/docs/environment.md b/docs/environment.md
index 98addd6a..edab9b4d 100644
--- a/docs/environment.md
+++ b/docs/environment.md
@@ -1,100 +1,82 @@
-# Set up the environment to run pipelines
+# About the environment of NARPS Open Pipelines
 
-## Run a docker container :whale:
+## The Docker container :whale:
 
-Start a container using the command below:
+The NARPS Open Pipelines project is build upon several dependencies, such as [Nipype](https://nipype.readthedocs.io/en/latest/) but also the original software packages used by the pipelines (SPM, FSL, AFNI...). Therefore, we created a Docker container based on [Neurodocker](https://github.com/ReproNim/neurodocker) that contains software dependencies.
 
-```bash
-docker run -ti \
-		-p 8888:8888 \
-		elodiegermani/open_pipeline
-```
-
-On this command line, you need to add volumes to be able to link with your local files (original dataset and git repository). If you stored the original dataset in `data/original`, just make a volume with the `narps_open_pipelines` directory:
-
-```bash
-docker run 	-ti \
-		-p 8888:8888 \
-		-v /users/egermani/Documents/narps_open_pipelines:/home/ \
-		elodiegermani/open_pipeline
-``` 
-
-If it is in another directory, make a second volume with the path to your dataset:
-
-```bash
-docker run 	-ti \
-		-p 8888:8888 \
-		-v /Users/egermani/Documents/narps_open_pipelines:/home/ \
-		-v /Users/egermani/Documents/data/NARPS/:/data/ \
-		elodiegermani/open_pipeline
-```
-
-After that, your container will be launched! 
-
-## Other useful docker commands
-
-### START A CONTAINER 
-
-```bash
-docker start [name_of_the_container]
-```
-
-### VERIFY A CONTAINER IS IN THE LIST 
-
-```bash
-docker ps
-```
-
-### EXECUTE BASH OR ATTACH YOUR CONTAINER 
+The simplest way to start the container using the command below :
 
 ```bash
-docker exec -ti [name_of_the_container] bash
+docker run -it elodiegermani/open_pipeline
 ```
 
-**OR**
+From this command line, you need to add volumes to be able to link with your local files (code repository).
 
 ```bash
-docker attach [name_of_the_container]
-```
+# Replace PATH_TO_THE_REPOSITORY in the following command (e.g.: with /home/user/dev/narps_open_pipelines/)
+docker run -it \
+           -v PATH_TO_THE_REPOSITORY:/home/neuro/code/ \
+           elodiegermani/open_pipeline
+``` 
 
-## Useful commands inside the container
+## Use Jupyter with the container
 
-### ACTIVATE CONDA ENVIRONMENT
+If you wish to use [Jupyter](https://jupyter.org/) to run the code, a port forwarding is needed :
 
 ```bash
-source activate neuro
-```
+docker run -it \
+           -v PATH_TO_THE_REPOSITORY:/home/neuro/code/ \
+           -p 8888:8888 \
+           elodiegermani/open_pipeline
+``` 
 
-### LAUNCH JUPYTER NOTEBOOK
+Then, from inside the container :
 
 ```bash
 jupyter notebook --port=8888 --no-browser --ip=0.0.0.0
 ```
 
-## If you did not use your container for a while
+You can now access Jupyter using the address provided by the command line.
 
-Verify it still runs :
+> [!NOTE]  
+> Find useful information on the [Docker documentation page](https://docs.docker.com/get-started/). Here is a [cheat sheet with Docker commands](https://docs.docker.com/get-started/docker_cheatsheet.pdf)
 
-```bash
-docker ps -l
-```
+## Create a custom Docker image
 
-If your container is in the list, run :
+The `elodiegermani/open_pipeline` Docker image is based on [Neurodocker](https://github.com/ReproNim/neurodocker). It was created using the following command line :
 
 ```bash
-docker start [name_of_the_container]
+docker run --rm repronim/neurodocker:0.7.0 generate docker \
+           --base neurodebian:stretch-non-free --pkg-manager apt \
+           --install git \
+           --fsl version=6.0.3 \
+           --afni version=latest method=binaries install_r=true install_r_pkgs=true install_python2=true install_python3=true \
+           --spm12 version=r7771 method=binaries \
+           --user=neuro \
+           --workdir /home \
+           --miniconda create_env=neuro \
+                       conda_install="python=3.8 traits jupyter nilearn graphviz nipype scikit-image" \
+                       pip_install="matplotlib" \
+                       activate=True \
+           --env LD_LIBRARY_PATH="/opt/miniconda-latest/envs/neuro:$LD_LIBRARY_PATH" \
+           --run-bash "source activate neuro" \
+           --user=root \
+           --run 'chmod 777 -Rf /home' \
+           --run 'chown -R neuro /home' \
+           --user=neuro \
+           --run 'mkdir -p ~/.jupyter && echo c.NotebookApp.ip = \"0.0.0.0\" > ~/.jupyter/jupyter_notebook_config.py' > Dockerfile
 ```
 
-Else, relaunch it with : 
+If you wish to create your own custom environment, make changes to the parameters, and build your custom image from the generated Dockerfile.
 
 ```bash
-docker run 	-ti \
-		-p 8888:8888 \
-		-v /home/egermani:/home \
-		[name_of_the_image]
+# Replace IMAGE_NAME in the following command
+docker build --tag IMAGE_NAME - < Dockerfile
 ```
 
-### To use SPM inside the container, use this command at the beginning of your script:
+## Good to know
+
+To use SPM inside the container, use this command at the beginning of your script:
 
 ```python
 from nipype.interfaces import spm
diff --git a/docs/running.md b/docs/running.md
index 6344c042..eb614eef 100644
--- a/docs/running.md
+++ b/docs/running.md
@@ -1,6 +1,33 @@
-# :running: How to run NARPS open pipelines ?
+# How to run NARPS open pipelines ? :running:
 
-## Using the `PipelineRunner`
+## Using the runner application
+
+The `narps_open.runner` module allows to run pipelines from the command line :
+
+```bash
+python narps_open/runner.py -h
+	usage: runner.py [-h] -t TEAM (-r RANDOM | -s SUBJECTS [SUBJECTS ...]) [-g | -f]
+
+	Run the pipelines from NARPS.
+
+	options:
+	  -h, --help            show this help message and exit
+	  -t TEAM, --team TEAM  the team ID
+	  -r RANDOM, --random RANDOM the number of subjects to be randomly selected
+	  -s SUBJECTS [SUBJECTS ...], --subjects SUBJECTS [SUBJECTS ...] a list of subjects
+	  -g, --group           run the group level only
+	  -f, --first           run the first levels only (preprocessing + subjects + runs)
+	  -c, --check           check pipeline outputs (runner is not launched)
+
+python narps_open/runner.py -t 2T6S -s 001 006 020 100
+python narps_open/runner.py -t 2T6S -r 4
+python narps_open/runner.py -t 2T6S -r 4 -f
+python narps_open/runner.py -t 2T6S -r 4 -f -c # Check the output files without launching the runner
+```
+
+In this usecase, the paths where to store the outputs and to the dataset are picked by the runner from the [configuration](docs/configuration.md).
+
+## Using the `PipelineRunner` object
 
 The class `PipelineRunner` is available from the `narps_open.runner` module. You can use it from inside python code, as follows :
 
@@ -35,30 +62,3 @@ runner.start(True, True)
 runner.get_missing_first_level_outputs()
 runner.get_missing_group_level_outputs()
 ```
-
-## Using the runner application
-
-The `narps_open.runner` module also allows to run pipelines from the command line :
-
-```bash
-python narps_open/runner.py -h
-	usage: runner.py [-h] -t TEAM (-r RANDOM | -s SUBJECTS [SUBJECTS ...]) [-g | -f]
-
-	Run the pipelines from NARPS.
-
-	options:
-	  -h, --help            show this help message and exit
-	  -t TEAM, --team TEAM  the team ID
-	  -r RANDOM, --random RANDOM the number of subjects to be randomly selected
-	  -s SUBJECTS [SUBJECTS ...], --subjects SUBJECTS [SUBJECTS ...] a list of subjects
-	  -g, --group           run the group level only
-	  -f, --first           run the first levels only (preprocessing + subjects + runs)
-	  -c, --check           check pipeline outputs (runner is not launched)
-
-python narps_open/runner.py -t 2T6S -s 001 006 020 100
-python narps_open/runner.py -t 2T6S -r 4
-python narps_open/runner.py -t 2T6S -r 4 -f
-python narps_open/runner.py -t 2T6S -r 4 -f -c # Check the output files without launching the runner
-```
-
-In this usecase, the paths where to store the outputs and to the dataset are picked by the runner from the [configuration](docs/configuration.md).