From 1ae9ce16e5125b57583ccf0272918c918a3e78f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Boris=20Cl=C3=A9net?=
 <117362283+bclenet@users.noreply.github.com>
Date: Fri, 12 Apr 2024 09:16:26 +0200
Subject: [PATCH 1/2] L7J7 reproduction (#189)

* Starting repro + testing for L7J7

* Pipeline implemented

* Extent thresholds for clustering

* COntrasts naming

* Hypo names

* Hypo names

* Er vs Ei in group comp

* Output file names
---
 narps_open/pipelines/__init__.py              |   2 +-
 narps_open/pipelines/team_L7J7.py             | 672 ++++++++++++++++++
 tests/pipelines/test_team_L7J7.py             | 122 ++++
 .../pipelines/team_L7J7/confounds.tsv         |   3 +
 4 files changed, 798 insertions(+), 1 deletion(-)
 create mode 100644 narps_open/pipelines/team_L7J7.py
 create mode 100644 tests/pipelines/test_team_L7J7.py
 create mode 100644 tests/test_data/pipelines/team_L7J7/confounds.tsv

diff --git a/narps_open/pipelines/__init__.py b/narps_open/pipelines/__init__.py
index 4294853c..87a8dc70 100644
--- a/narps_open/pipelines/__init__.py
+++ b/narps_open/pipelines/__init__.py
@@ -55,7 +55,7 @@
     'K9P0': None,
     'L1A8': None,
     'L3V8': None,
-    'L7J7': None,
+    'L7J7': 'PipelineTeamL7J7',
     'L9G5': None,
     'O03M': None,
     'O21U': None,
diff --git a/narps_open/pipelines/team_L7J7.py b/narps_open/pipelines/team_L7J7.py
new file mode 100644
index 00000000..973384f2
--- /dev/null
+++ b/narps_open/pipelines/team_L7J7.py
@@ -0,0 +1,672 @@
+#!/usr/bin/python
+# coding: utf-8
+
+""" Write the work of NARPS' team L7J7 using Nipype """
+
+from os.path import join
+from itertools import product
+
+from nipype import Workflow, Node, MapNode
+from nipype.interfaces.utility import IdentityInterface, Function
+from nipype.interfaces.io import SelectFiles, DataSink
+from nipype.interfaces.spm import (
+    Smooth,
+    OneSampleTTestDesign, EstimateModel, EstimateContrast,
+    Level1Design, TwoSampleTTestDesign, Threshold
+    )
+from nipype.algorithms.modelgen import SpecifySPMModel
+from nipype.algorithms.misc import Gunzip
+
+from narps_open.pipelines import Pipeline
+from narps_open.data.task import TaskInformation
+from narps_open.data.participants import get_group
+from narps_open.core.interfaces import InterfaceFactory
+from narps_open.core.common import list_intersection, elements_in_string, clean_list
+from narps_open.utils.configuration import Configuration
+
+class PipelineTeamL7J7(Pipeline):
+    """ A class that defines the pipeline of team L7J7. """
+
+    def __init__(self):
+        super().__init__()
+        self.fwhm = 6.0
+        self.team_id = 'L7J7'
+        self.contrast_list = ['0001', '0002']
+        self.subject_level_contrasts = [
+            ('effect_of_gain', 'T', ['gamble', 'gamblexgain^1', 'gamblexloss^1'], [0, 1, 0]),
+            ('effect_of_loss', 'T', ['gamble', 'gamblexgain^1', 'gamblexloss^1'], [0, 0, 1])
+        ]
+
+    def get_preprocessing(self):
+        """ No preprocessing has been done by team L7J7 """
+        return None
+
+    def get_run_level_analysis(self):
+        """ No run level analysis has been done by team L7J7 """
+        return None
+
+    # @staticmethod # Starting python 3.10, staticmethod should be used here
+    # Otherwise it produces a TypeError: 'staticmethod' object is not callable
+    def get_subject_information(event_files: list):
+        """ Create Bunchs for SpecifySPMModel.
+
+        Parameters :
+        - event_files: list of str, list of events files (one per run) for the subject
+
+        Returns :
+        - subject_info : list of Bunch for 1st level analysis.
+        """
+
+        from nipype.interfaces.base import Bunch
+
+        onsets = {}
+        durations = {}
+        weights_gain = {}
+        weights_loss = {}
+
+        subject_info = []
+
+        for run_id, event_file in enumerate(event_files):
+
+            trial_key = f'gamble_run{run_id + 1}'
+            gain_key = f'gain_run{run_id + 1}'
+            loss_key = f'loss_run{run_id + 1}'
+
+            onsets.update({trial_key: []})
+            durations.update({trial_key: []})
+            weights_gain.update({gain_key: []})
+            weights_loss.update({loss_key: []})
+
+            with open(event_file, 'rt') as file:
+                next(file)  # skip the header
+
+                for line in file:
+                    info = line.strip().split()
+                    onsets[trial_key].append(float(info[0]))
+                    durations[trial_key].append(float(info[1]))
+                    weights_gain[gain_key].append(float(info[2]))
+                    weights_loss[loss_key].append(float(info[3]))
+
+            # Create a Bunch per run, i.e. cond1_run1, cond2_run1, etc.
+            subject_info.append(
+                Bunch(
+                    conditions = ['gamble'],
+                    onsets = [onsets[trial_key]],
+                    durations = [durations[trial_key]],
+                    amplitudes = None,
+                    tmod = None,
+                    pmod = [Bunch(
+                        name = ['gain', 'loss'],
+                        poly = [1, 1],
+                        param = [weights_gain[gain_key], weights_loss[loss_key]]
+                        )],
+                    regressor_names = None,
+                    regressors = None
+                ))
+
+        return subject_info
+
+    # @staticmethod # Starting python 3.10, staticmethod should be used here
+    # Otherwise it produces a TypeError: 'staticmethod' object is not callable
+    def get_confounds_file(filepath, subject_id, run_id, working_dir):
+        """
+        Create a new tsv files with only desired confounds per subject per run.
+        Also computes the first derivative of the motion parameters.
+
+        Parameters :
+        - filepath : path to the subject confounds file
+        - subject_id : related subject id
+        - run_id : related run id
+        - working_dir: str, name of the directory for intermediate results
+
+        Return :
+        - confounds_file : path to new file containing only desired confounds
+        """
+        from os import makedirs
+        from os.path import join
+
+        from pandas import DataFrame, read_csv
+        from numpy import array, transpose
+
+        # Open original confounds file
+        data_frame = read_csv(filepath, sep = '\t', header=0)
+
+        # Extract confounds we want to use for the model
+        retained_parameters = DataFrame(transpose(array([
+            data_frame['X'], data_frame['Y'], data_frame['Z'],
+            data_frame['RotX'], data_frame['RotY'], data_frame['RotZ']
+            ])))
+
+        # Write confounds to a file
+        confounds_file = join(working_dir, 'confounds_files',
+            f'confounds_file_sub-{subject_id}_run-{run_id}.tsv')
+
+        makedirs(join(working_dir, 'confounds_files'), exist_ok = True)
+
+        with open(confounds_file, 'w', encoding = 'utf-8') as writer:
+            writer.write(retained_parameters.to_csv(
+                sep = '\t', index = False, header = False, na_rep = '0.0'))
+
+        return confounds_file
+
+    def get_subject_level_analysis(self):
+        """
+        Create the subject level analysis workflow.
+
+        Returns:
+            - subject_level : nipype.WorkFlow
+        """
+        # Initialize preprocessing workflow to connect nodes along the way
+        subject_level = Workflow(
+            base_dir = self.directories.working_dir, name = 'subject_level'
+            )
+
+        # Identity interface Node - to iterate over subject_id and run
+        info_source = Node(
+            IdentityInterface(fields = ['subject_id']),
+            name = 'info_source')
+        info_source.iterables = [('subject_id', self.subject_list)]
+
+        # Select files from derivatives
+        templates = {
+            'func': join('derivatives', 'fmriprep', 'sub-{subject_id}', 'func',
+                'sub-{subject_id}_task-MGT_run-*_bold_space-MNI152NLin2009cAsym_preproc.nii.gz'),
+            'confounds' : join('derivatives', 'fmriprep', 'sub-{subject_id}', 'func',
+                'sub-{subject_id}_task-MGT_run-*_bold_confounds.tsv'),
+            'events': join('sub-{subject_id}', 'func',
+               'sub-{subject_id}_task-MGT_run-*_events.tsv')
+        }
+        select_files = Node(SelectFiles(templates), name = 'select_files')
+        select_files.inputs.base_directory = self.directories.dataset_dir
+        select_files.inputs.sort_filelist = True
+        subject_level.connect(info_source, 'subject_id', select_files, 'subject_id')
+
+        # Gunzip - gunzip files because SPM do not use .nii.gz files
+        gunzip = MapNode(Gunzip(), name = 'gunzip', iterfield=['in_file'])
+        subject_level.connect(select_files, 'func', gunzip, 'in_file')
+
+        # Smoothing - smooth the func data
+        smooth = Node(Smooth(), name = 'smooth')
+        smooth.inputs.fwhm = self.fwhm
+        smooth.overwrite = False
+        subject_level.connect(gunzip, 'out_file', smooth, 'in_files')
+
+        # Function node get_subject_info - get subject specific condition information
+        subject_info = Node(Function(
+            function = self.get_subject_information,
+            input_names = ['event_files'],
+            output_names = ['subject_info']
+            ),
+            name = 'subject_info')
+        subject_level.connect(select_files, 'events', subject_info, 'event_files')
+
+        # Function node get_confounds_file - Generate confounds files
+        confounds = MapNode(Function(
+            function = self.get_confounds_file,
+            input_names = ['filepath', 'subject_id', 'run_id', 'working_dir'],
+            output_names = ['confounds_file']),
+            name = 'confounds', iterfield = ['filepath', 'run_id'])
+        confounds.inputs.working_dir = self.directories.working_dir
+        confounds.inputs.run_id = self.run_list
+        subject_level.connect(info_source, 'subject_id', confounds, 'subject_id')
+        subject_level.connect(select_files, 'confounds', confounds, 'filepath')
+
+        specify_model = Node(SpecifySPMModel(), name = 'specify_model')
+        specify_model.inputs.concatenate_runs = False
+        specify_model.inputs.input_units = 'secs'
+        specify_model.inputs.output_units = 'secs'
+        specify_model.inputs.time_repetition = TaskInformation()['RepetitionTime']
+        specify_model.inputs.high_pass_filter_cutoff = 128
+        specify_model.overwrite = False
+        subject_level.connect(subject_info, 'subject_info', specify_model, 'subject_info')
+        subject_level.connect(confounds, 'confounds_file', specify_model, 'realignment_parameters')
+        subject_level.connect(smooth, 'smoothed_files', specify_model, 'functional_runs')
+
+        model_design = Node(Level1Design(), name = 'model_design')
+        model_design.inputs.bases = {'hrf': {'derivs': [0, 0]}}
+        model_design.inputs.timing_units = 'secs'
+        model_design.inputs.interscan_interval = TaskInformation()['RepetitionTime']
+        model_design.overwrite = False
+        subject_level.connect(specify_model, 'session_info', model_design, 'session_info')
+
+        model_estimate = Node(EstimateModel(), name = 'model_estimate')
+        model_estimate.inputs.estimation_method = {'Classical': 1}
+        model_estimate.overwrite = False
+        subject_level.connect(model_design, 'spm_mat_file', model_estimate, 'spm_mat_file')
+
+        contrast_estimate = Node(EstimateContrast(), name = 'contraste_estimate')
+        contrast_estimate.inputs.contrasts = self.subject_level_contrasts
+        contrast_estimate.config = {'execution': {'remove_unnecessary_outputs': False}}
+        contrast_estimate.overwrite = False
+        subject_level.connect(model_estimate, 'spm_mat_file', contrast_estimate, 'spm_mat_file')
+        subject_level.connect(model_estimate, 'beta_images', contrast_estimate, 'beta_images')
+        subject_level.connect(
+            model_estimate, 'residual_image', contrast_estimate, 'residual_image')
+
+        # DataSink - store the wanted results in the wanted repository
+        data_sink = Node(DataSink(), name = 'data_sink')
+        data_sink.inputs.base_directory = self.directories.output_dir
+        subject_level.connect(
+            contrast_estimate, 'con_images', data_sink, f'{subject_level.name}.@con_images')
+        subject_level.connect(
+            contrast_estimate, 'spm_mat_file', data_sink, f'{subject_level.name}.@spm_mat_file')
+
+        # Remove large files, if requested
+        if Configuration()['pipelines']['remove_unused_data']:
+
+            # Remove Node - Remove gunzip files once they are no longer needed
+            remove_gunzip = MapNode(
+                InterfaceFactory.create('remove_parent_directory'),
+                name = 'remove_gunzip',
+                iterfield = ['file_name']
+                )
+
+            # Remove Node - Remove smoothed files once they are no longer needed
+            remove_smooth = MapNode(
+                InterfaceFactory.create('remove_parent_directory'),
+                name = 'remove_smooth',
+                iterfield = ['file_name']
+                )
+
+            # Add connections
+            subject_level.connect([
+                (smooth, remove_gunzip, [('smoothed_files', '_')]),
+                (gunzip, remove_gunzip, [('out_file', 'file_name')]),
+                (data_sink, remove_smooth, [('out_file', '_')]),
+                (smooth, remove_smooth, [('smoothed_files', 'file_name')])
+                ])
+
+
+        return subject_level
+
+    def get_subject_level_outputs(self):
+        """ Return the names of the files the subject level analysis is supposed to generate. """
+
+        templates = [join(
+            self.directories.output_dir,
+            'subject_level', '_subject_id_{subject_id}', f'con_{contrast_id}.nii')\
+            for contrast_id in self.contrast_list]
+        templates += [join(
+            self.directories.output_dir,
+            'subject_level', '_subject_id_{subject_id}', 'SPM.mat')]
+
+        # Format with subject_ids
+        return_list = []
+        for template in templates:
+            return_list += [template.format(subject_id = s) for s in self.subject_list]
+
+        return return_list
+
+    def get_group_level_analysis(self):
+        """
+        Return all workflows for the group level analysis.
+
+        Returns;
+            - a list of nipype.WorkFlow
+        """
+
+        return [
+            self.get_group_level_analysis_single_group('equalRange'),
+            self.get_group_level_analysis_single_group('equalIndifference'),
+            self.get_group_level_analysis_group_comparison()
+        ]
+
+    def get_group_level_analysis_single_group(self, method):
+        """
+        Return a workflow for the group level analysis in the single group case.
+
+        Parameters:
+            - method: one of 'equalRange', 'equalIndifference'
+
+        Returns:
+            - group_level_analysis: nipype.WorkFlow
+        """
+        # Compute the number of participants used to do the analysis
+        nb_subjects = len(self.subject_list)
+
+        # Infosource - a function free node to iterate over the list of subject names
+        info_source = Node(IdentityInterface(fields=['contrast_id']),
+                          name = 'info_source')
+        info_source.iterables = [('contrast_id', self.contrast_list)]
+
+        # Select files from subject level analysis
+        templates = {
+            'contrasts': join(self.directories.output_dir,
+                'subject_level', '_subject_id_*', 'con_{contrast_id}.nii'),
+            }
+        select_files = Node(SelectFiles(templates), name = 'select_files')
+        select_files.inputs.sort_filelist = True
+        select_files.inputs.base_directory = self.directories.dataset_dir
+
+        # Datasink - save important files
+        data_sink = Node(DataSink(), name = 'data_sink')
+        data_sink.inputs.base_directory = self.directories.output_dir
+
+        # Function Node get_group_subjects
+        #   Get subjects in the group and in the subject_list
+        get_group_subjects = Node(Function(
+            function = list_intersection,
+            input_names = ['list_1', 'list_2'],
+            output_names = ['out_list']
+            ),
+            name = 'get_group_subjects'
+        )
+        get_group_subjects.inputs.list_1 = get_group(method)
+        get_group_subjects.inputs.list_2 = self.subject_list
+
+        # Create a function to complete the subject ids out from the get_equal_*_subjects nodes
+        #   If not complete, subject id '001' in search patterns
+        #   would match all contrast files with 'con_0001.nii'.
+        complete_subject_ids = lambda l : [f'_subject_id_{a}' for a in l]
+
+        # Function Node elements_in_string
+        #   Get contrast files for required subjects
+        # Note : using a MapNode with elements_in_string requires using clean_list to remove
+        #   None values from the out_list
+        get_contrasts = MapNode(Function(
+            function = elements_in_string,
+            input_names = ['input_str', 'elements'],
+            output_names = ['out_list']
+            ),
+            name = 'get_contrasts', iterfield = 'input_str'
+        )
+
+        # One Sample T-Test Design - creates one sample T-Test Design
+        onesamplettestdes = Node(OneSampleTTestDesign(), name = 'onesampttestdes')
+
+        # EstimateModel - estimate the parameters of the model
+        # Even for second level it should be 'Classical': 1.
+        level2estimate = Node(EstimateModel(), name = 'level2estimate')
+        level2estimate.inputs.estimation_method = {'Classical': 1}
+
+        # EstimateContrast - estimates simple group contrast
+        level2conestimate = Node(EstimateContrast(), name = 'level2conestimate')
+        level2conestimate.inputs.group_contrast = True
+        level2conestimate.inputs.contrasts = [
+            ['Group', 'T', ['mean'], [1]], ['Group', 'T', ['mean'], [-1]]]
+
+        # Threshold Node - Create thresholded maps
+        threshold = MapNode(Threshold(), name = 'threshold',
+            iterfield = ['stat_image', 'contrast_index'])
+        threshold.inputs.use_fwe_correction = True
+        threshold.inputs.height_threshold_type = 'p-value'
+        threshold.inputs.force_activation = False
+        threshold.inputs.height_threshold = 0.05
+        threshold.inputs.contrast_index = [1, 2]
+
+        # Create the group level workflow
+        group_level_analysis = Workflow(
+            base_dir = self.directories.working_dir,
+            name = f'group_level_analysis_{method}_nsub_{nb_subjects}')
+        group_level_analysis.connect([
+            (info_source, select_files, [('contrast_id', 'contrast_id')]),
+            (select_files, get_contrasts, [('contrasts', 'input_str')]),
+            (get_group_subjects, get_contrasts, [
+                (('out_list', complete_subject_ids), 'elements')
+                ]),
+            (get_contrasts, onesamplettestdes, [
+                (('out_list', clean_list), 'in_files')
+                ]),
+            #(select_files, onesamplettestdes, [('mask', 'explicit_mask_file')]),
+            (onesamplettestdes, level2estimate, [('spm_mat_file', 'spm_mat_file')]),
+            (level2estimate, level2conestimate, [
+                ('spm_mat_file', 'spm_mat_file'),
+                ('beta_images', 'beta_images'),
+                ('residual_image', 'residual_image')
+                ]),
+            (level2conestimate, threshold, [
+                ('spm_mat_file', 'spm_mat_file'),
+                ('spmT_images', 'stat_image')
+                ]),
+            (level2estimate, data_sink, [
+                ('mask_image', f'{group_level_analysis.name}.@mask')]),
+            (level2conestimate, data_sink, [
+                ('spm_mat_file', f'{group_level_analysis.name}.@spm_mat'),
+                ('spmT_images', f'{group_level_analysis.name}.@T'),
+                ('con_images', f'{group_level_analysis.name}.@con')]),
+            (threshold, data_sink, [
+                ('thresholded_map', f'{group_level_analysis.name}.@thresh')])
+            ])
+
+        return group_level_analysis
+
+    def get_group_level_analysis_group_comparison(self):
+        """
+        Return a workflow for the group level analysis in the group comparison case.
+
+        Returns:
+            - group_level_analysis: nipype.WorkFlow
+        """
+        # Compute the number of participants used to do the analysis
+        nb_subjects = len(self.subject_list)
+
+        # Infosource - a function free node to iterate over the list of subject names
+        info_source = Node(IdentityInterface(fields=['contrast_id']),
+                          name = 'info_source')
+        info_source.iterables = [('contrast_id', self.contrast_list)]
+
+        # Select files from subject level analysis
+        templates = {
+            'contrasts': join(self.directories.output_dir,
+                'subject_level', '_subject_id_*', 'con_{contrast_id}.nii'),
+            #'mask': join('derivatives/fmriprep/gr_mask_tmax.nii')
+            }
+        select_files = Node(SelectFiles(templates), name = 'select_files')
+        select_files.inputs.sort_filelist = True
+        select_files.inputs.base_directory = self.directories.dataset_dir
+
+        # Datasink - save important files
+        data_sink = Node(DataSink(), name = 'data_sink')
+        data_sink.inputs.base_directory = self.directories.output_dir
+
+        # Function Node get_group_subjects
+        #   Get subjects in the group and in the subject_list
+        get_equal_indifference_subjects = Node(Function(
+            function = list_intersection,
+            input_names = ['list_1', 'list_2'],
+            output_names = ['out_list']
+            ),
+            name = 'get_equal_indifference_subjects'
+        )
+        get_equal_indifference_subjects.inputs.list_1 = get_group('equalIndifference')
+        get_equal_indifference_subjects.inputs.list_2 = self.subject_list
+
+        # Function Node get_group_subjects
+        #   Get subjects in the group and in the subject_list
+        get_equal_range_subjects = Node(Function(
+            function = list_intersection,
+            input_names = ['list_1', 'list_2'],
+            output_names = ['out_list']
+            ),
+            name = 'get_equal_range_subjects'
+        )
+        get_equal_range_subjects.inputs.list_1 = get_group('equalRange')
+        get_equal_range_subjects.inputs.list_2 = self.subject_list
+
+        # Create a function to complete the subject ids out from the get_equal_*_subjects nodes
+        #   If not complete, subject id '001' in search patterns
+        #   would match all contrast files with 'con_0001.nii'.
+        complete_subject_ids = lambda l : [f'_subject_id_{a}' for a in l]
+
+        # Function Node elements_in_string
+        #   Get contrast files for required subjects
+        # Note : using a MapNode with elements_in_string requires using clean_list to remove
+        #   None values from the out_list
+        get_equal_indifference_contrasts = MapNode(Function(
+            function = elements_in_string,
+            input_names = ['input_str', 'elements'],
+            output_names = ['out_list']
+            ),
+            name = 'get_equal_indifference_contrasts', iterfield = 'input_str'
+        )
+        get_equal_range_contrasts = MapNode(Function(
+            function = elements_in_string,
+            input_names = ['input_str', 'elements'],
+            output_names = ['out_list']
+            ),
+            name = 'get_equal_range_contrasts', iterfield = 'input_str'
+        )
+
+        # Two Sample T-Test Design
+        twosampttest = Node(TwoSampleTTestDesign(), name = 'twosampttest')
+
+        # EstimateModel - estimate the parameters of the model
+        # Even for second level it should be 'Classical': 1.
+        level2estimate = Node(EstimateModel(), name = 'level2estimate')
+        level2estimate.inputs.estimation_method = {'Classical': 1}
+
+        # EstimateContrast - estimates simple group contrast
+        level2conestimate = Node(EstimateContrast(), name = 'level2conestimate')
+        level2conestimate.inputs.group_contrast = True
+        level2conestimate.inputs.contrasts = [
+            ['Eq range vs Eq indiff in loss', 'T', ['Group_{1}', 'Group_{2}'], [-1, 1]]
+        ]
+
+        # Threshold Node - Create thresholded maps
+        threshold = Node(Threshold(), name = 'threshold')
+        threshold.inputs.use_fwe_correction = True
+        threshold.inputs.height_threshold_type = 'p-value'
+        threshold.inputs.force_activation = False
+        threshold.inputs.height_threshold = 0.05
+        threshold.inputs.contrast_index = 1
+
+        # Create the group level workflow
+        group_level_analysis = Workflow(
+            base_dir = self.directories.working_dir,
+            name = f'group_level_analysis_groupComp_nsub_{nb_subjects}')
+        group_level_analysis.connect([
+            (info_source, select_files, [('contrast_id', 'contrast_id')]),
+            (select_files, get_equal_range_contrasts, [('contrasts', 'input_str')]),
+            (select_files, get_equal_indifference_contrasts, [('contrasts', 'input_str')]),
+            (get_equal_range_subjects, get_equal_range_contrasts, [
+                (('out_list', complete_subject_ids), 'elements')
+                ]),
+            (get_equal_indifference_subjects, get_equal_indifference_contrasts, [
+                (('out_list', complete_subject_ids), 'elements')
+                ]),
+            (get_equal_range_contrasts, twosampttest, [
+                (('out_list', clean_list), 'group1_files')
+                ]),
+            (get_equal_indifference_contrasts, twosampttest, [
+                (('out_list', clean_list), 'group2_files')
+                ]),
+            #(select_files, twosampttest, [('mask', 'explicit_mask_file')]),
+            (twosampttest, level2estimate, [('spm_mat_file', 'spm_mat_file')]),
+            (level2estimate, level2conestimate, [
+                ('spm_mat_file', 'spm_mat_file'),
+                ('beta_images', 'beta_images'),
+                ('residual_image', 'residual_image')
+                ]),
+            (level2conestimate, threshold, [
+                ('spm_mat_file', 'spm_mat_file'),
+                ('spmT_images', 'stat_image')
+                ]),
+            (level2estimate, data_sink, [
+                ('mask_image', f'{group_level_analysis.name}.@mask')]),
+            (level2conestimate, data_sink, [
+                ('spm_mat_file', f'{group_level_analysis.name}.@spm_mat'),
+                ('spmT_images', f'{group_level_analysis.name}.@T'),
+                ('con_images', f'{group_level_analysis.name}.@con')]),
+            (threshold, data_sink, [
+                ('thresholded_map', f'{group_level_analysis.name}.@thresh')])
+            ])
+
+        return group_level_analysis
+
+    def get_group_level_outputs(self):
+        """ Return all names for the files the group level analysis is supposed to generate. """
+
+        # Handle equalRange and equalIndifference
+        parameters = {
+            'contrast_id': self.contrast_list,
+            'method': ['equalRange', 'equalIndifference'],
+            'file': [
+                'con_0001.nii', 'con_0002.nii', 'mask.nii', 'SPM.mat',
+                'spmT_0001.nii', 'spmT_0002.nii',
+                join('_threshold0', 'spmT_0001_thr.nii'), join('_threshold1', 'spmT_0002_thr.nii')
+                ],
+            'nb_subjects' : [str(len(self.subject_list))]
+        }
+
+        parameter_sets = product(*parameters.values())
+        template = join(
+            self.directories.output_dir,
+            'group_level_analysis_{method}_nsub_{nb_subjects}',
+            '_contrast_id_{contrast_id}',
+            '{file}'
+            )
+        return_list = [template.format(**dict(zip(parameters.keys(), parameter_values)))\
+            for parameter_values in parameter_sets]
+
+        # Handle groupComp
+        parameters = {
+            'contrast_id': self.contrast_list,
+            'method': ['groupComp'],
+            'file': [
+                'con_0001.nii', 'mask.nii', 'SPM.mat', 'spmT_0001.nii', 'spmT_0001_thr.nii'
+                ],
+            'nb_subjects' : [str(len(self.subject_list))]
+        }
+        parameter_sets = product(*parameters.values())
+        template = join(
+            self.directories.output_dir,
+            'group_level_analysis_{method}_nsub_{nb_subjects}',
+            '_contrast_id_{contrast_id}',
+            '{file}'
+            )
+        return_list += [template.format(**dict(zip(parameters.keys(), parameter_values)))\
+            for parameter_values in parameter_sets]
+
+        return return_list
+
+    def get_hypotheses_outputs(self):
+        """ Return all hypotheses output file names. """
+        nb_sub = len(self.subject_list)
+        files = [
+            # Hypothesis 1
+            join(f'group_level_analysis_equalIndifference_nsub_{nb_sub}',
+                '_contrast_id_0001', '_threshold0', 'spmT_0001_thr.nii'),
+            join(f'group_level_analysis_equalIndifference_nsub_{nb_sub}',
+                '_contrast_id_0001', 'spmT_0001.nii'),
+            # Hypothesis 2
+            join(f'group_level_analysis_equalRange_nsub_{nb_sub}',
+                '_contrast_id_0001', '_threshold0', 'spmT_0001_thr.nii'),
+            join(f'group_level_analysis_equalRange_nsub_{nb_sub}',
+                '_contrast_id_0001', 'spmT_0001.nii'),
+            # Hypothesis 3
+            join(f'group_level_analysis_equalIndifference_nsub_{nb_sub}',
+                '_contrast_id_0001', '_threshold0', 'spmT_0001_thr.nii'),
+            join(f'group_level_analysis_equalIndifference_nsub_{nb_sub}',
+                '_contrast_id_0001', 'spmT_0001.nii'),
+            # Hypothesis 4
+            join(f'group_level_analysis_equalRange_nsub_{nb_sub}',
+                '_contrast_id_0001', '_threshold0', 'spmT_0001_thr.nii'),
+            join(f'group_level_analysis_equalRange_nsub_{nb_sub}',
+                '_contrast_id_0001', 'spmT_0001.nii'),
+            # Hypothesis 5
+            join(f'group_level_analysis_equalIndifference_nsub_{nb_sub}',
+                '_contrast_id_0002', '_threshold1', 'spmT_0001_thr.nii'),
+            join(f'group_level_analysis_equalIndifference_nsub_{nb_sub}',
+                '_contrast_id_0002', 'spmT_0001.nii'),
+            # Hypothesis 6
+            join(f'group_level_analysis_equalRange_nsub_{nb_sub}',
+                '_contrast_id_0002', '_threshold1', 'spmT_0001_thr.nii'),
+            join(f'group_level_analysis_equalRange_nsub_{nb_sub}',
+                '_contrast_id_0002', 'spmT_0001.nii'),
+            # Hypothesis 7
+            join(f'group_level_analysis_equalIndifference_nsub_{nb_sub}',
+                '_contrast_id_0002', '_threshold0', 'spmT_0001_thr.nii'),
+            join(f'group_level_analysis_equalIndifference_nsub_{nb_sub}',
+                '_contrast_id_0002', 'spmT_0001.nii'),
+            # Hypothesis 8
+            join(f'group_level_analysis_equalRange_nsub_{nb_sub}',
+                '_contrast_id_0002', '_threshold0', 'spmT_0001_thr.nii'),
+            join(f'group_level_analysis_equalRange_nsub_{nb_sub}',
+                '_contrast_id_0002', 'spmT_0001.nii'),
+            # Hypothesis 9
+            join(f'group_level_analysis_groupComp_nsub_{nb_sub}',
+                '_contrast_id_0002', '_threshold0', 'spmT_0001_thr.nii'),
+            join(f'group_level_analysis_groupComp_nsub_{nb_sub}',
+                '_contrast_id_0002', 'spmT_0001.nii')
+        ]
+        return [join(self.directories.output_dir, f) for f in files]
diff --git a/tests/pipelines/test_team_L7J7.py b/tests/pipelines/test_team_L7J7.py
new file mode 100644
index 00000000..e84e32c2
--- /dev/null
+++ b/tests/pipelines/test_team_L7J7.py
@@ -0,0 +1,122 @@
+#!/usr/bin/python
+# coding: utf-8
+
+""" Tests of the 'narps_open.pipelines.team_L7J7' module.
+
+Launch this test with PyTest
+
+Usage:
+======
+    pytest -q test_team_L7J7.py
+    pytest -q test_team_L7J7.py -k <selected_test>
+"""
+from os.path import join, exists
+from filecmp import cmp
+
+from pytest import helpers, mark
+from nipype import Workflow
+from nipype.interfaces.base import Bunch
+
+from narps_open.utils.configuration import Configuration
+from narps_open.pipelines.team_L7J7 import PipelineTeamL7J7
+
+class TestPipelinesTeamL7J7:
+    """ A class that contains all the unit tests for the PipelineTeamL7J7 class."""
+
+    @staticmethod
+    @mark.unit_test
+    def test_create():
+        """ Test the creation of a PipelineTeamL7J7 object """
+
+        pipeline = PipelineTeamL7J7()
+
+        # 1 - check the parameters
+        assert pipeline.fwhm == 6.0
+        assert pipeline.team_id == 'L7J7'
+
+        # 2 - check workflows
+        assert pipeline.get_preprocessing() is None
+        assert pipeline.get_run_level_analysis() is None
+        assert isinstance(pipeline.get_subject_level_analysis(), Workflow)
+        group_level = pipeline.get_group_level_analysis()
+        assert len(group_level) == 3
+        for sub_workflow in group_level:
+            assert isinstance(sub_workflow, Workflow)
+
+    @staticmethod
+    @mark.unit_test
+    def test_outputs():
+        """ Test the expected outputs of a PipelineTeamL7J7 object """
+        pipeline = PipelineTeamL7J7()
+        # 1 - 1 subject outputs
+        pipeline.subject_list = ['001']
+        helpers.test_pipeline_outputs(pipeline, [0, 0, 3, 8*2*2 + 5*2, 18])
+
+        # 2 - 4 subjects outputs
+        pipeline.subject_list = ['001', '002', '003', '004']
+        helpers.test_pipeline_outputs(pipeline, [0, 0, 12, 8*2*2 + 5*2, 18])
+
+    @staticmethod
+    @mark.unit_test
+    def test_subject_information():
+        """ Test the get_subject_information method """
+
+        # Get test files
+        test_file = join(Configuration()['directories']['test_data'], 'pipelines', 'events.tsv')
+        info = PipelineTeamL7J7.get_subject_information([test_file, test_file])
+
+        # Compare bunches to expected
+        bunch = info[0]
+        assert isinstance(bunch, Bunch)
+        assert bunch.conditions == ['gamble']
+        helpers.compare_float_2d_arrays(bunch.onsets, [[4.071, 11.834, 19.535, 27.535, 36.435]])
+        helpers.compare_float_2d_arrays(bunch.durations, [[4.0, 4.0, 4.0, 4.0, 4.0]])
+        assert bunch.amplitudes is None
+        assert bunch.tmod is None
+        assert bunch.pmod[0].name == ['gain', 'loss']
+        assert bunch.pmod[0].poly == [1, 1]
+        helpers.compare_float_2d_arrays(bunch.pmod[0].param,
+            [[14.0, 34.0, 38.0, 10.0, 16.0], [6.0, 14.0, 19.0, 15.0, 17.0]])
+        assert bunch.regressor_names is None
+        assert bunch.regressors is None
+
+        bunch = info[1]
+        assert isinstance(bunch, Bunch)
+        assert bunch.conditions == ['gamble']
+        helpers.compare_float_2d_arrays(bunch.onsets, [[4.071, 11.834, 19.535, 27.535, 36.435]])
+        helpers.compare_float_2d_arrays(bunch.durations, [[4.0, 4.0, 4.0, 4.0, 4.0]])
+        assert bunch.amplitudes is None
+        assert bunch.tmod is None
+        assert bunch.pmod[0].name == ['gain', 'loss']
+        assert bunch.pmod[0].poly == [1, 1]
+        helpers.compare_float_2d_arrays(bunch.pmod[0].param,
+            [[14.0, 34.0, 38.0, 10.0, 16.0], [6.0, 14.0, 19.0, 15.0, 17.0]])
+        assert bunch.regressor_names is None
+        assert bunch.regressors is None
+
+    @staticmethod
+    @mark.unit_test
+    def test_confounds_file(temporary_data_dir):
+        """ Test the get_confounds_file method """
+
+        confounds_file = join(
+            Configuration()['directories']['test_data'], 'pipelines', 'confounds.tsv')
+        reference_file = join(
+            Configuration()['directories']['test_data'], 'pipelines', 'team_L7J7', 'confounds.tsv')
+
+        # Get new confounds file
+        PipelineTeamL7J7.get_confounds_file(confounds_file, 'sid', 'rid', temporary_data_dir)
+
+        # Check confounds file was created
+        created_confounds_file = join(
+            temporary_data_dir, 'confounds_files', 'confounds_file_sub-sid_run-rid.tsv')
+        assert exists(created_confounds_file)
+
+        # Check contents
+        assert cmp(reference_file, created_confounds_file)
+
+    @staticmethod
+    @mark.pipeline_test
+    def test_execution():
+        """ Test the execution of a PipelineTeamL7J7 and compare results """
+        helpers.test_pipeline_evaluation('L7J7')
diff --git a/tests/test_data/pipelines/team_L7J7/confounds.tsv b/tests/test_data/pipelines/team_L7J7/confounds.tsv
new file mode 100644
index 00000000..cf63c178
--- /dev/null
+++ b/tests/test_data/pipelines/team_L7J7/confounds.tsv
@@ -0,0 +1,3 @@
+0.0	0.0	0.0	0.0	-0.0	0.0
+-0.00996895	-0.0313444	-3.00931e-06	0.00132687	-0.000384193	-0.00016819
+-2.56954e-05	-0.00923735	0.0549667	0.000997278	-0.00019745	-0.000398988

From 695668a9d72e782b7062f67b3efaa1406e9acf0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Boris=20Cl=C3=A9net?=
 <117362283+bclenet@users.noreply.github.com>
Date: Tue, 16 Apr 2024 16:45:37 +0200
Subject: [PATCH 2/2] Adding participants exclusions in narps_open_runner
 (#194)

* Adding a command line tool showing the correlation results of a pipeline execution

* [DOC] install doc about correlation command line tool [skip ci]

* Modifications on runner

* Correlation main + exclusions in runner
---
 INSTALL.md                                    |  6 +++
 narps_open/runner.py                          | 16 +++++-
 .../__init__.py}                              |  0
 narps_open/utils/correlation/__main__.py      | 53 +++++++++++++++++++
 setup.py                                      |  1 +
 tests/conftest.py                             | 18 +++----
 6 files changed, 84 insertions(+), 10 deletions(-)
 rename narps_open/utils/{correlation.py => correlation/__init__.py} (100%)
 create mode 100644 narps_open/utils/correlation/__main__.py

diff --git a/INSTALL.md b/INSTALL.md
index e9f124ba..28936287 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -95,6 +95,7 @@ Finally, you are able to use the scripts of the project :
 
 * `narps_open_runner`: run pipelines
 * `narps_open_tester`: run a pipeline and test its results against original ones from the team
+* `narps_open_correlations`: compute and display correlation between results and original ones from the team
 * `narps_description`: get the textual description made by a team
 * `narps_results`: download the original results from teams
 * `narps_open_status`: get status information about the development process of the pipelines
@@ -107,6 +108,10 @@ narps_open_runner -t 2T6S -n 40
 #   and produces a report with correlation values.
 narps_open_tester -t 08MQ
 
+# Compute the correlation values between results of 2T6S reproduction on 60 subjects with original ones
+#   WARNING : 2T6S must have been previously computed with a group of 60 subjects
+narps_open_correlations -t 2T6S -n 60
+
 # Get the description of team C88N in markdown formatting
 narps_description -t C88N --md
 
@@ -121,6 +126,7 @@ narps_open_status --json
 > For further information about these command line tools, read the corresponding documentation pages.
 > * `narps_open_runner` : [docs/running.md](docs/running.md)
 > * `narps_open_tester` : [docs/testing.md](docs/testing.md#command-line-tool)
+> * `narps_open_correlations` : [docs/correlation.md](docs/correlation.md#command-line-tool)
 > * `narps_description` : [docs/description.md](docs/description.md)
 > * `narps_results` : [docs/data.md](docs/data.md#results-from-narps-teams)
 > * `narps_open_status` : [docs/status.md](docs/status.md)
diff --git a/narps_open/runner.py b/narps_open/runner.py
index bf557ba0..597d1144 100644
--- a/narps_open/runner.py
+++ b/narps_open/runner.py
@@ -178,8 +178,15 @@ def main():
         help='run the first levels only (preprocessing + subjects + runs)')
     parser.add_argument('-c', '--check', action='store_true', required=False,
         help='check pipeline outputs (runner is not launched)')
+    parser.add_argument('-e', '--exclusions', action='store_true', required=False,
+        help='run the analyses without the excluded subjects')
     arguments = parser.parse_args()
 
+    # Check arguments
+    if arguments.exclusions and not arguments.nsubjects:
+        print('Argument -e/--exclusions only works with -n/--nsubjects')
+        return
+
     # Initialize a PipelineRunner
     runner = PipelineRunner(team_id = arguments.team)
     runner.pipeline.directories.dataset_dir = Configuration()['directories']['dataset']
@@ -193,7 +200,14 @@ def main():
     elif arguments.rsubjects is not None:
         runner.random_nb_subjects = int(arguments.rsubjects)
     else:
-        runner.nb_subjects = int(arguments.nsubjects)
+        if arguments.exclusions:
+            # Intersection between the requested subset and the list of not excluded subjects
+            runner.subjects = list(
+                set(get_participants_subset(int(arguments.nsubjects)))
+              & set(get_participants(arguments.team))
+            )
+        else:
+            runner.nb_subjects = int(arguments.nsubjects)
 
     # Check data
     if arguments.check:
diff --git a/narps_open/utils/correlation.py b/narps_open/utils/correlation/__init__.py
similarity index 100%
rename from narps_open/utils/correlation.py
rename to narps_open/utils/correlation/__init__.py
diff --git a/narps_open/utils/correlation/__main__.py b/narps_open/utils/correlation/__main__.py
new file mode 100644
index 00000000..d086499b
--- /dev/null
+++ b/narps_open/utils/correlation/__main__.py
@@ -0,0 +1,53 @@
+#!/usr/bin/python
+# coding: utf-8
+
+""" A command line tool for the narps_open.utils.correlation module """
+
+from os.path import join
+from argparse import ArgumentParser
+
+from narps_open.data.results import ResultsCollection
+from narps_open.utils.configuration import Configuration
+from narps_open.utils.correlation import get_correlation_coefficient
+from narps_open.pipelines import get_implemented_pipelines
+from narps_open.runner import PipelineRunner
+
+def main():
+    """ Entry-point for the command line tool narps_open_correlations """
+
+    # Parse arguments
+    parser = ArgumentParser(description = 'Compare reproduced files to original results.')
+    parser.add_argument('-t', '--team', type = str, required = True,
+        help = 'the team ID', choices = get_implemented_pipelines())
+    subjects.add_argument('-n', '--nsubjects', type=str, required = True,
+        help='the number of subjects to be selected')
+    arguments = parser.parse_args()
+
+    # Initialize pipeline
+    runner = PipelineRunner(arguments.team)
+    runner.pipeline.directories.dataset_dir = Configuration()['directories']['dataset']
+    runner.pipeline.directories.results_dir = Configuration()['directories']['reproduced_results']
+    runner.pipeline.directories.set_output_dir_with_team_id(arguments.team)
+    runner.pipeline.directories.set_working_dir_with_team_id(arguments.team)
+    runner.nb_subjects = arguments.nsubjects
+
+    # Indices and keys to the unthresholded maps
+    indices = list(range(1, 18, 2))
+
+    # Retrieve the paths to the reproduced files
+    reproduced_files = runner.pipeline.get_hypotheses_outputs()
+    reproduced_files = [reproduced_files[i] for i in indices]
+
+    # Retrieve the paths to the results files
+    collection = ResultsCollection(arguments.team)
+    file_keys = [f'hypo{h}_unthresh.nii.gz' for h in range(1,10)]
+    results_files = [join(collection.directory, k) for k in file_keys]
+
+    # Compute the correlation coefficients
+    print([
+        get_correlation_coefficient(reproduced_file, results_file)
+        for reproduced_file, results_file in zip(reproduced_files, results_files)
+        ])
+
+if __name__ == '__main__':
+    main()
diff --git a/setup.py b/setup.py
index b17409b6..e3c65bb0 100644
--- a/setup.py
+++ b/setup.py
@@ -71,6 +71,7 @@
             'narps_open_runner = narps_open.runner:main',
             'narps_open_tester = narps_open.tester:main',
             'narps_open_status = narps_open.utils.status:main',
+            'narps_open_correlations = narps_open.utils.correlation.__main__:main',
             'narps_description = narps_open.data.description.__main__:main',
             'narps_results = narps_open.data.results.__main__:main'
         ]
diff --git a/tests/conftest.py b/tests/conftest.py
index f12f77a0..3e5570ff 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -22,6 +22,7 @@
 from narps_open.utils.correlation import get_correlation_coefficient
 from narps_open.utils.configuration import Configuration
 from narps_open.data.results import ResultsCollection
+from narps_open.data.participants import get_participants_subset
 
 # Init configuration, to ensure it is in testing mode
 Configuration(config_type='testing')
@@ -88,13 +89,12 @@ def test_pipeline_execution(
     TODO : how to keep intermediate files of the low level for the next numbers of subjects ?
         - keep intermediate levels : boolean in PipelineRunner
     """
-    # A list of number of subject to iterate over
-    nb_subjects_list = list(range(
-        Configuration()['testing']['pipelines']['nb_subjects_per_group'],
-        nb_subjects,
-        Configuration()['testing']['pipelines']['nb_subjects_per_group'])
-        )
-    nb_subjects_list.append(nb_subjects)
+    # Create subdivisions of the requested subject list
+    nb_subjects_per_group = Configuration()['testing']['pipelines']['nb_subjects_per_group']
+    all_subjects = get_participants_subset(nb_subjects)
+    subjects_lists = []
+    for index in range(0, len(all_subjects), nb_subjects_per_group):
+        subjects_lists.append(all_subjects[index:index+nb_subjects_per_group])
 
     # Initialize the pipeline
     runner = PipelineRunner(team_id)
@@ -104,8 +104,8 @@ def test_pipeline_execution(
     runner.pipeline.directories.set_working_dir_with_team_id(team_id)
 
     # Run first level by (small) sub-groups of subjects
-    for subjects in nb_subjects_list:
-        runner.nb_subjects = subjects
+    for subjects_list in subjects_lists:
+        runner.subjects = subjects_list
 
         # Run as long as there are missing files after first level (with a max number of trials)
         # TODO : this is a workaround