From 6e481775ffb310b080a1c78a5aa0e60934d8a43a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boris=20Cl=C3=A9net?= <117362283+bclenet@users.noreply.github.com> Date: Fri, 5 Jan 2024 16:00:10 +0100 Subject: [PATCH 1/3] J7F9 refactorisation (#80) * [REFAC] pipeline J7F9 to conform with the Pipeline class * [TEST] init a test for conftest.py * [REFAC] adding analyses outputs * [TEST] updating tests for J7F9 * Cleaning connections * [TEST] bug with pipelines/test_utils * [BUG] issue with iterating over dicts * [BUG] replace mkdir by makedirs in get_parameters * [BUG] inside unit_tests workflow * [TEST] testing the conftest module * [REFAC] [skip ci] * [TEST] test get_confounds method [skip ci] * [TEST] test get_confounds method [skip ci] * [PEP8] [skip ci] * [BUG] runs inputs of get_subject_information no longer needed [skip ci] * [BUG] get_confounds input [skip ci] * [BUG] get_confounds input [skip ci] * Cleaning code [skip ci] * [BUG] real subject ids for search patterns [skip ci] * Consistent Bunches along runs as output of subject_information [skip ci] * [BUG] with subject_information Bunch [skip ci] * Bug with group level [skip ci] * Remove large files earlier in the process [skip ci] * [TEST] exhaustive testing of get_subject_information for J7F9 [skip ci] * [BUG] get_contrasts_2 iterfield [skip ci] --- narps_open/pipelines/__init__.py | 2 +- narps_open/pipelines/team_J7F9.py | 1133 +++++++++-------- tests/pipelines/test_team_J7F9.py | 209 +++ tests/test_data/pipelines/confounds.tsv | 4 + .../pipelines/team_J7F9/confounds.tsv | 3 + .../pipelines/team_J7F9/events_resp.tsv | 5 + 6 files changed, 853 insertions(+), 503 deletions(-) create mode 100644 tests/pipelines/test_team_J7F9.py create mode 100644 tests/test_data/pipelines/confounds.tsv create mode 100644 tests/test_data/pipelines/team_J7F9/confounds.tsv create mode 100644 tests/test_data/pipelines/team_J7F9/events_resp.tsv diff --git a/narps_open/pipelines/__init__.py b/narps_open/pipelines/__init__.py index 6bbd2889..e0dba921 100644 --- a/narps_open/pipelines/__init__.py +++ b/narps_open/pipelines/__init__.py @@ -51,7 +51,7 @@ 'I52Y': None, 'I9D6': None, 'IZ20': None, - 'J7F9': None, + 'J7F9': 'PipelineTeamJ7F9', 'K9P0': None, 'L1A8': None, 'L3V8': None, diff --git a/narps_open/pipelines/team_J7F9.py b/narps_open/pipelines/team_J7F9.py index 98d74488..d04e66e2 100644 --- a/narps_open/pipelines/team_J7F9.py +++ b/narps_open/pipelines/team_J7F9.py @@ -1,508 +1,637 @@ -from nipype.interfaces.spm import (Coregister, Smooth, OneSampleTTestDesign, EstimateModel, EstimateContrast, - Level1Design, TwoSampleTTestDesign, RealignUnwarp, - Normalize12, NewSegment, FieldMap) -from nipype.interfaces.fsl import ExtractROI -from nipype.interfaces.spm import Threshold -from nipype.algorithms.modelgen import SpecifySPMModel +#!/usr/bin/python +# coding: utf-8 + +""" Write the work of NARPS' team J7F9 using Nipype """ + +from os.path import join +from itertools import product + +from nipype import Workflow, Node, MapNode from nipype.interfaces.utility import IdentityInterface, Function from nipype.interfaces.io import SelectFiles, DataSink +from nipype.interfaces.spm import ( + Smooth, Level1Design, OneSampleTTestDesign, TwoSampleTTestDesign, + EstimateModel, EstimateContrast, Threshold + ) +from nipype.algorithms.modelgen import SpecifySPMModel from nipype.algorithms.misc import Gunzip -from nipype import Workflow, Node, MapNode, JoinNode -from nipype.interfaces.base import Bunch - -from os.path import join as opj -import os -import json - -def get_subject_infos(event_files, runs): - ''' - Create Bunchs for specifySPMModel. - - Parameters : - - event_files: list of str, list of events files (one per run) for the subject - - runs: list of str, list of runs to use - - Returns : - - subject_info : list of Bunch for 1st level analysis. - ''' - from nipype.interfaces.base import Bunch - import numpy as np - - cond_names = ['trial', 'missed'] - onset = {} - duration = {} - weights_gain = {} - weights_loss = {} - - for r in range(len(runs)): # Loop over number of runs. - onset.update({s + '_run' + str(r+1) : [] for s in cond_names}) # creates dictionary items with empty lists - duration.update({s + '_run' + str(r+1) : [] for s in cond_names}) - weights_gain.update({'gain_run' + str(r+1) : []}) - weights_loss.update({'loss_run' + str(r+1) : []}) - - for r, run in enumerate(runs): - - f_events = event_files[r] - - with open(f_events, 'rt') as f: - next(f) # skip the header - - for line in f: - info = line.strip().split() - - for cond in cond_names: - val = cond + '_run' + str(r+1) # trial_run1 - val_gain = 'gain_run' + str(r+1) # gain_run1 - val_loss = 'loss_run' + str(r+1) # loss_run1 - if cond =='trial': - onset[val].append(float(info[0])) # onsets for trial_run1 - duration[val].append(float(0)) - weights_gain[val_gain].append(float(info[2])) # weights gain for trial_run1 - weights_loss[val_loss].append(float(info[3])) # weights loss for trial_run1 - elif cond=='missed': - if float(info[4]) < 0.1 or str(info[5]) == 'NoResp': - onset[val].append(float(info[0])) - duration[val].append(float(0)) - - - for gain_val in weights_gain.keys(): - weights_gain[gain_val] = weights_gain[gain_val] - np.mean(weights_gain[gain_val]) - weights_gain[gain_val] = weights_gain[gain_val].tolist() - - for loss_val in weights_loss.keys(): - weights_loss[loss_val] = weights_loss[loss_val] - np.mean(weights_loss[loss_val]) - weights_loss[loss_val] = weights_loss[loss_val].tolist() - - # Bunching is done per run, i.e. trial_run1, trial_run2, etc. - # But names must not have '_run1' etc because we concatenate runs - subject_info = [] - for r in range(len(runs)): - - if len(onset['missed_run' + str(r+1)]) ==0: - cond_names = ['trial'] - - cond = [c + '_run' + str(r+1) for c in cond_names] - gain = 'gain_run' + str(r+1) - loss = 'loss_run' + str(r+1) - - subject_info.insert(r, - Bunch(conditions=cond_names, - onsets=[onset[k] for k in cond], - durations=[duration[k] for k in cond], - amplitudes=None, - tmod=None, - pmod=[Bunch(name=['gain', 'loss'], - poly=[1, 1], - param=[weights_gain[gain], - weights_loss[loss]])], - regressor_names=None, - regressors=None)) - - return subject_info - -def get_contrasts(subject_id): - ''' - Create the list of tuples that represents contrasts. - Each contrast is in the form : - (Name,Stat,[list of condition names],[weights on those conditions]) - ''' - # list of condition names - conditions = ['trial', 'trialxgain^1', 'trialxloss^1'] - - # create contrasts - trial = ('trial', 'T', conditions, [1, 0, 0]) - - effect_gain = ('effect_of_gain', 'T', conditions, [0, 1, 0]) - - effect_loss = ('effect_of_loss', 'T', conditions, [0, 0, 1]) - - # contrast list - contrasts = [trial, effect_gain, effect_loss] - - return contrasts - - -def get_parameters_file(filepaths, subject_id, result_dir, working_dir): - ''' - Create new tsv files with only desired parameters per subject per run. - - Parameters : - - filepaths : paths to subject parameters file (i.e. one per run) - - subject_id : subject for whom the 1st level analysis is made - - result_dir: str, directory where results will be stored - - working_dir: str, name of the sub-directory for intermediate results - - Return : - - parameters_file : paths to new files containing only desired parameters. - ''' - import pandas as pd - import numpy as np - from os.path import join as opj - import os - - if not isinstance(filepaths, list): - filepaths = [filepaths] - parameters_file = [] - - for i, file in enumerate(filepaths): - df = pd.read_csv(file, sep = '\t', header=0) - temp_list = np.array([df['X'], df['Y'], df['Z'], - df['RotX'], df['RotY'], df['RotZ'], - df['CSF'], df['WhiteMatter'], df['GlobalSignal']]) # Parameters we want to use for the model - retained_parameters = pd.DataFrame(np.transpose(temp_list)) - new_path =opj(result_dir, working_dir, 'parameters_file', f"parameters_file_sub-{subject_id}_run0{str(i+1)}.tsv") - if not os.path.isdir(opj(result_dir, working_dir, 'parameters_file')): - os.mkdir(opj(result_dir, working_dir, 'parameters_file')) - writer = open(new_path, "w") - writer.write(retained_parameters.to_csv(sep = '\t', index = False, header = False, na_rep = '0.0')) - writer.close() - - parameters_file.append(new_path) - - return parameters_file - -def rm_gunzip_files(files, subject_id, result_dir, working_dir): - import shutil - from os.path import join as opj - - gunzip_dir = opj(result_dir, working_dir, 'l1_analysis', f"_subject_id_{subject_id}", 'gunzip_func') - - try: - shutil.rmtree(gunzip_dir) - except OSError as e: - print(e) - else: - print("The directory is deleted successfully") - - return files - -def rm_smoothed_files(files, subject_id, result_dir, working_dir): - import shutil - from os.path import join as opj - - smooth_dir = opj(result_dir, working_dir, 'l1_analysis', f"_subject_id_{subject_id}", 'smooth') - - try: - shutil.rmtree(smooth_dir) - except OSError as e: - print(e) - else: - print("The directory is deleted successfully") - - return files - - -def get_l1_analysis(subject_list, TR, fwhm, run_list, exp_dir, result_dir, working_dir, output_dir): - """ - Returns the first level analysis workflow. - - Parameters: - - exp_dir: str, directory where raw data are stored - - result_dir: str, directory where results will be stored - - working_dir: str, name of the sub-directory for intermediate results - - output_dir: str, name of the sub-directory for final results - - subject_list: list of str, list of subject for which you want to do the analysis - - run_list: list of str, list of runs for which you want to do the analysis - - fwhm: float, fwhm for smoothing step - - TR: float, time repetition used during acquisition - - Returns: - - l1_analysis : Nipype WorkFlow - """ - # Infosource Node - To iterate on subjects - infosource = Node(IdentityInterface(fields = ['subject_id', 'exp_dir', 'result_dir', - 'working_dir', 'run_list'], - exp_dir = exp_dir, result_dir = result_dir, working_dir = working_dir, - run_list = run_list), - name = 'infosource') - - infosource.iterables = [('subject_id', subject_list)] - - # Templates to select files node - param_file = opj('derivatives', 'fmriprep', 'sub-{subject_id}', 'func', - 'sub-{subject_id}_task-MGT_run-*_bold_confounds.tsv') - - func_file = opj('derivatives', 'fmriprep', 'sub-{subject_id}', 'func', - 'sub-{subject_id}_task-MGT_run-*_bold_space-MNI152NLin2009cAsym_preproc.nii.gz') - - event_files = opj('sub-{subject_id}', 'func', 'sub-{subject_id}_task-MGT_run-*_events.tsv') - - template = {'param' : param_file, 'func' : func_file, 'event' : event_files} - - # SelectFiles node - to select necessary files - selectfiles = Node(SelectFiles(template, base_directory=exp_dir), name = 'selectfiles') - - # DataSink Node - store the wanted results in the wanted repository - datasink = Node(DataSink(base_directory=result_dir, container=output_dir), name='datasink') - - # GUNZIP NODE : SPM do not use .nii.gz files - gunzip_func = MapNode(Gunzip(), name = 'gunzip_func', iterfield = ['in_file']) - - ## Smoothing node - smooth = Node(Smooth(fwhm = fwhm), name = 'smooth') - - # Get Subject Info - get subject specific condition information - subject_infos = Node(Function(input_names=['event_files', 'runs'], - output_names=['subject_info'], - function=get_subject_infos), - name='subject_infos') - - # SpecifyModel - Generates SPM-specific Model - specify_model = Node(SpecifySPMModel(concatenate_runs = True, input_units = 'secs', output_units = 'secs', - time_repetition = TR, high_pass_filter_cutoff = 128), name='specify_model') - - # Level1Design - Generates an SPM design matrix - l1_design = Node(Level1Design(bases = {'hrf': {'derivs': [0, 0]}}, timing_units = 'secs', - interscan_interval = TR), name='l1_design') - - # EstimateModel - estimate the parameters of the model - l1_estimate = Node(EstimateModel(estimation_method={'Classical': 1}), - name="l1_estimate") - - # Node contrasts to get contrasts - contrasts = Node(Function(function=get_contrasts, - input_names=['subject_id'], - output_names=['contrasts']), - name='contrasts') - - # Node parameters to get parameters files - parameters = Node(Function(function=get_parameters_file, - input_names=['filepaths', 'subject_id', 'result_dir', 'working_dir'], - output_names=['parameters_file']), - name='parameters') - - # EstimateContrast - estimates contrasts - contrast_estimate = Node(EstimateContrast(), name="contrast_estimate") - - remove_gunzip_files = Node(Function(input_names = ['files', 'subject_id', 'result_dir', 'working_dir'], - output_names = ['files'], - function = rm_gunzip_files), name = 'remove_gunzip_files') - - remove_smoothed_files = Node(Function(input_names = ['files', 'subject_id', 'result_dir', 'working_dir'], - output_names = ['files'], - function = rm_smoothed_files), name = 'remove_smoothed_files') - - # Create l1 analysis workflow and connect its nodes - l1_analysis = Workflow(base_dir = opj(result_dir, working_dir), name = "l1_analysis") - - l1_analysis.connect([(infosource, selectfiles, [('subject_id', 'subject_id')]), - (infosource, subject_infos, [('exp_dir', 'exp_dir'), ('run_list', 'runs')]), - (infosource, contrasts, [('subject_id', 'subject_id')]), - (infosource, remove_gunzip_files, [('subject_id', 'subject_id'), ('result_dir', 'result_dir'), - ('working_dir', 'working_dir')]), - (infosource, remove_smoothed_files, [('subject_id', 'subject_id'), - ('result_dir', 'result_dir'), - ('working_dir', 'working_dir')]), - (subject_infos, specify_model, [('subject_info', 'subject_info')]), - (contrasts, contrast_estimate, [('contrasts', 'contrasts')]), - (selectfiles, parameters, [('param', 'filepaths')]), - (selectfiles, subject_infos, [('event', 'event_files')]), - (infosource, parameters, [('subject_id', 'subject_id'), ('result_dir', 'result_dir'), - ('working_dir', 'working_dir')]), - (selectfiles, gunzip_func, [('func', 'in_file')]), - (gunzip_func, smooth, [('out_file', 'in_files')]), - (smooth, remove_gunzip_files, [('smoothed_files', 'files')]), - (remove_gunzip_files, specify_model, [('files', 'functional_runs')]), - (parameters, specify_model, [('parameters_file', 'realignment_parameters')]), - (specify_model, l1_design, [('session_info', 'session_info')]), - (l1_design, l1_estimate, [('spm_mat_file', 'spm_mat_file')]), - (l1_estimate, contrast_estimate, [('spm_mat_file', 'spm_mat_file'), - ('beta_images', 'beta_images'), - ('residual_image', 'residual_image')]), - (contrast_estimate, datasink, [('con_images', 'l1_analysis.@con_images'), - ('spmT_images', 'l1_analysis.@spmT_images'), - ('spm_mat_file', 'l1_analysis.@spm_mat_file')]), - (contrast_estimate, remove_smoothed_files, [('spmT_images', 'files')]) - ]) - - return l1_analysis - - -def get_subset_contrasts(file_list, method, subject_list, participants_file): - ''' - Parameters : - - file_list : original file list selected by selectfiles node - - subject_list : list of subject IDs that are in the wanted group for the analysis - - participants_file: str, file containing participants characteristics - - method: str, one of "equalRange", "equalIndifference" or "groupComp" - - This function return the file list containing only the files belonging to subject in the wanted group. - ''' - equalIndifference_id = [] - equalRange_id = [] - equalIndifference_files = [] - equalRange_files = [] - - with open(participants_file, 'rt') as f: - next(f) # skip the header - - for line in f: - info = line.strip().split() - - if info[0][-3:] in subject_list and info[1] == "equalIndifference": - equalIndifference_id.append(info[0][-3:]) - elif info[0][-3:] in subject_list and info[1] == "equalRange": - equalRange_id.append(info[0][-3:]) - - for file in file_list: - sub_id = file.split('/') - if sub_id[-1][-7:-4] in equalIndifference_id: - equalIndifference_files.append(file) - elif sub_id[-1][-7:-4] in equalRange_id: - equalRange_files.append(file) - - return equalIndifference_id, equalRange_id, equalIndifference_files, equalRange_files - - -def get_l2_analysis(subject_list, n_sub, contrast_list, method, exp_dir, output_dir, working_dir, result_dir, data_dir): - """ - Returns the 2nd level of analysis workflow. - - Parameters: - - exp_dir: str, directory where raw data are stored - - result_dir: str, directory where results will be stored - - working_dir: str, name of the sub-directory for intermediate results - - output_dir: str, name of the sub-directory for final results - - subject_list: list of str, list of subject for which you want to do the preprocessing - - contrast_list: list of str, list of contrasts to analyze - - n_sub: float, number of subjects used to do the analysis - - method: one of "equalRange", "equalIndifference" or "groupComp" - - Returns: - - l2_analysis: Nipype WorkFlow - """ - # Infosource - a function free node to iterate over the list of subject names - infosource_groupanalysis = Node(IdentityInterface(fields=['contrast_id', 'subjects'], - subjects = subject_list), - name="infosource_groupanalysis") - - infosource_groupanalysis.iterables = [('contrast_id', contrast_list)] - - # SelectFiles - contrast_file = opj(data_dir, 'NARPS-J7F9', "hypo_*_{contrast_id}_con_sub-*.nii") - - participants_file = opj(exp_dir, 'participants.tsv') - - templates = {'contrast' : contrast_file, 'participants' : participants_file} - - selectfiles_groupanalysis = Node(SelectFiles(templates, base_directory=result_dir, force_list= True), - name="selectfiles_groupanalysis") - - # Datasink node : to save important files - datasink_groupanalysis = Node(DataSink(base_directory = result_dir, container = output_dir), - name = 'datasink_groupanalysis') - - # Node to select subset of contrasts - sub_contrasts = Node(Function(input_names = ['file_list', 'method', 'subject_list', 'participants_file'], - output_names = ['equalIndifference_id', 'equalRange_id', 'equalIndifference_files', 'equalRange_files'], - function = get_subset_contrasts), - name = 'sub_contrasts') - - sub_contrasts.inputs.method = method - - ## Estimate model - estimate_model = Node(EstimateModel(estimation_method={'Classical':1}), name = "estimate_model") - - ## Estimate contrasts - estimate_contrast = Node(EstimateContrast(group_contrast=True), - name = "estimate_contrast") - - ## Create thresholded maps - threshold = MapNode(Threshold(use_fwe_correction=False, height_threshold = 0.001, - extent_fdr_p_threshold = 0.05, use_topo_fdr = False, force_activation = True), name = "threshold", iterfield = ["stat_image", "contrast_index"]) - - l2_analysis = Workflow(base_dir = opj(result_dir, working_dir), name = f"l2_analysis_{method}_nsub_{n_sub}") - - l2_analysis.connect([(infosource_groupanalysis, selectfiles_groupanalysis, [('contrast_id', 'contrast_id')]), - (infosource_groupanalysis, sub_contrasts, [('subjects', 'subject_list')]), - (selectfiles_groupanalysis, sub_contrasts, [('contrast', 'file_list'), ('participants', 'participants_file')]), - (estimate_model, estimate_contrast, [('spm_mat_file', 'spm_mat_file'), - ('residual_image', 'residual_image'), - ('beta_images', 'beta_images')]), - (estimate_contrast, threshold, [('spm_mat_file', 'spm_mat_file'), - ('spmT_images', 'stat_image')]), - (estimate_model, datasink_groupanalysis, [('mask_image', f"l2_analysis_{method}_nsub_{n_sub}.@mask")]), - (estimate_contrast, datasink_groupanalysis, [('spm_mat_file', f"l2_analysis_{method}_nsub_{n_sub}.@spm_mat"), - ('spmT_images', f"l2_analysis_{method}_nsub_{n_sub}.@T"), - ('con_images', f"l2_analysis_{method}_nsub_{n_sub}.@con")]), - (threshold, datasink_groupanalysis, [('thresholded_map', f"l2_analysis_{method}_nsub_{n_sub}.@thresh")])]) - - if method=='equalRange' or method=='equalIndifference': - contrasts = [('Group', 'T', ['mean'], [1]), ('Group', 'T', ['mean'], [-1])] - ## Specify design matrix - one_sample_t_test_design = Node(OneSampleTTestDesign(), name = "one_sample_t_test_design") - - l2_analysis.connect([(sub_contrasts, one_sample_t_test_design, [(f"{method}_files", 'in_files')]), - (one_sample_t_test_design, estimate_model, [('spm_mat_file', 'spm_mat_file')])]) - - threshold.inputs.contrast_index = [1, 2] - threshold.synchronize = True - elif method == 'groupComp': - contrasts = [('Eq range vs Eq indiff in loss', 'T', ['Group_{1}', 'Group_{2}'], [1, -1])] - # Node for the design matrix - two_sample_t_test_design = Node(TwoSampleTTestDesign(unequal_variance=True), name = 'two_sample_t_test_design') - - l2_analysis.connect([(sub_contrasts, two_sample_t_test_design, [('equalRange_files', "group1_files"), - ('equalIndifference_files', 'group2_files')]), - (two_sample_t_test_design, estimate_model, [("spm_mat_file", "spm_mat_file")])]) - - threshold.inputs.contrast_index = [1] +from narps_open.pipelines import Pipeline +from narps_open.data.task import TaskInformation +from narps_open.data.participants import get_group +from narps_open.core.common import ( + remove_file, list_intersection, elements_in_string, clean_list + ) + +class PipelineTeamJ7F9(Pipeline): + """ A class that defines the pipeline of team J7F9. """ + + def __init__(self): + super().__init__() + self.fwhm = 8.0 + self.team_id = 'J7F9' + self.contrast_list = ['0001', '0002', '0003'] + self.subject_level_contrasts = [ + ['trial', 'T', ['trial', 'trialxgain^1', 'trialxloss^1'], [1, 0, 0]], + ['effect_of_gain', 'T', ['trial', 'trialxgain^1', 'trialxloss^1'], [0, 1, 0]], + ['effect_of_loss', 'T', ['trial', 'trialxgain^1', 'trialxloss^1'], [0, 0, 1]] + ] + + def get_preprocessing(self): + """ No preprocessing has been done by team J7F9 """ + return None + + def get_run_level_analysis(self): + """ No run level analysis has been done by team J7F9 """ + return None + + def get_subject_information(event_files): + """ + Create Bunchs for specifySPMModel. + + Parameters : + - event_files: list of str, list of events files (one per run) for the subject + + Returns : + - subject_information : list of Bunch for subject level analysis. + """ + from numpy import mean, ravel + from nipype.interfaces.base import Bunch + + subject_information = [] + + # Create empty dicts + onsets = {} + durations = {} + weights_gain = {} + weights_loss = {} + onsets_missed = {} + durations_missed = {} + + # Run list + run_list = [str(r).zfill(2) for r in range(1, len(event_files) + 1)] + + # Parse event file + for run_id, event_file in zip(run_list, event_files): + + # Init empty lists inside directiries + onsets[run_id] = [] + durations[run_id] = [] + weights_gain[run_id] = [] + weights_loss[run_id] = [] + onsets_missed[run_id] = [] + durations_missed[run_id] = [] + + with open(event_file, 'rt') as file: + next(file) # skip the header + + for line in file: + info = line.strip().split() + + # Trials + onsets[run_id].append(float(info[0])) + durations[run_id].append(0.0) + weights_gain[run_id].append(float(info[2])) + weights_loss[run_id].append(float(info[3])) + + # Missed trials + if float(info[4]) < 0.1 or 'NoResp' in info[5]: + onsets_missed[run_id].append(float(info[0])) + durations_missed[run_id].append(0.0) + + # Compute mean weight values across all runs + all_weights_gain = [] + for value in weights_gain.values(): + all_weights_gain += value + mean_gain_weight = mean(all_weights_gain) + + all_weights_loss = [] + for value in weights_loss.values(): + all_weights_loss += value + mean_loss_weight = mean(all_weights_loss) + + # Check if there are any missed trials across all runs + missed_trials = any(t for t in onsets_missed.values()) + + # Create one Bunch per run + for run_id in run_list: + + # Mean center gain and loss weights + for element_id, element in enumerate(weights_gain[run_id]): + weights_gain[run_id][element_id] = element - mean_gain_weight + for element_id, element in enumerate(weights_loss[run_id]): + weights_loss[run_id][element_id] = element - mean_loss_weight + + # Fill Bunch + subject_information.append( + Bunch( + conditions = ['trial', 'missed'] if missed_trials else ['trial'], + onsets = [onsets[run_id], onsets_missed[run_id]] if missed_trials\ + else [onsets[run_id]], + durations = [durations[run_id], durations_missed[run_id]] if missed_trials\ + else [durations[run_id]], + amplitudes = None, + tmod = None, + pmod = [ + Bunch( + name = ['gain', 'loss'], + poly = [1, 1], + param = [weights_gain[run_id], weights_loss[run_id]] + ) + ], + regressor_names = None, + regressors = None + ) + ) + + return subject_information + + def get_confounds_file(filepath, subject_id, run_id, working_dir): + """ + Create a new tsv files with only desired confounds per subject per run. + + Parameters : + - filepath : path to the subject confounds file + - subject_id : related subject id + - run_id : related run id + - working_dir: str, name of the directory for intermediate results + + Return : + - confounds_file : path to new file containing only desired confounds + """ + from os import makedirs + from os.path import join + + from pandas import DataFrame, read_csv + from numpy import array, transpose + + # Open original confounds file + data_frame = read_csv(filepath, sep = '\t', header=0) + + # Extract confounds we want to use for the model + retained_parameters = DataFrame(transpose(array([ + data_frame['X'], data_frame['Y'], data_frame['Z'], + data_frame['RotX'], data_frame['RotY'], data_frame['RotZ'], + data_frame['CSF'], data_frame['WhiteMatter'], data_frame['GlobalSignal']]))) + + # Write confounds to a file + confounds_file = join(working_dir, 'confounds_files', + f'confounds_file_sub-{subject_id}_run-{run_id}.tsv') + + makedirs(join(working_dir, 'confounds_files'), exist_ok = True) + + with open(confounds_file, 'w', encoding = 'utf-8') as writer: + writer.write(retained_parameters.to_csv( + sep = '\t', index = False, header = False, na_rep = '0.0')) + + return confounds_file + + def get_subject_level_analysis(self): + """ + Create the subject level analysis workflow. + + Returns: + - subject_level_analysis : nipype.WorkFlow + """ + # Infosource Node - To iterate on subjects + information_source = Node(IdentityInterface( + fields = ['subject_id']), + name = 'information_source') + information_source.iterables = [('subject_id', self.subject_list)] + + # Templates to select files node + template = { + 'confounds' : join('derivatives', 'fmriprep', 'sub-{subject_id}', 'func', + 'sub-{subject_id}_task-MGT_run-*_bold_confounds.tsv'), + 'func' : join('derivatives', 'fmriprep', 'sub-{subject_id}', 'func', + 'sub-{subject_id}_task-MGT_run-*_bold_space-MNI152NLin2009cAsym_preproc.nii.gz'), + 'event' : join('sub-{subject_id}', 'func', + 'sub-{subject_id}_task-MGT_run-*_events.tsv') + } + + # SelectFiles - to select necessary files + select_files = Node(SelectFiles(template), name = 'select_files') + select_files.inputs.base_directory = self.directories.dataset_dir + + # DataSink - store the wanted results in the wanted repository + data_sink = Node(DataSink(), name = 'data_sink') + data_sink.inputs.base_directory = self.directories.output_dir + + # Gunzip - gunzip files because SPM do not use .nii.gz files + gunzip_func = MapNode(Gunzip(), + name = 'gunzip_func', + iterfield = ['in_file']) + + # Smooth - smoothing node + smoothing = Node(Smooth(), name = 'smoothing') + smoothing.inputs.fwhm = self.fwhm + + # Function node get_subject_information - get subject specific condition information + subject_information = Node(Function( + function = self.get_subject_information, + input_names = ['event_files'], + output_names = ['subject_info']), + name = 'subject_information') + + # SpecifyModel - generates SPM-specific Model + specify_model = Node(SpecifySPMModel(), name = 'specify_model') + specify_model.inputs.concatenate_runs = True + specify_model.inputs.input_units = 'secs' + specify_model.inputs.output_units = 'secs' + specify_model.inputs.time_repetition = TaskInformation()['RepetitionTime'] + specify_model.inputs.high_pass_filter_cutoff = 128 + + # Level1Design - Generates an SPM design matrix + model_design = Node(Level1Design(), name = 'model_design') + model_design.inputs.bases = {'hrf': {'derivs': [0, 0]}} + model_design.inputs.timing_units = 'secs' + model_design.inputs.interscan_interval = TaskInformation()['RepetitionTime'] + + # EstimateModel - estimate the parameters of the model + model_estimate = Node(EstimateModel(), name = 'model_estimate') + model_estimate.inputs.estimation_method = {'Classical': 1} + + # Function node get_confounds_file - get confounds files + confounds = MapNode(Function( + function = self.get_confounds_file, + input_names = ['filepath', 'subject_id', 'run_id', 'working_dir'], + output_names = ['confounds_file']), + name = 'confounds', iterfield = ['filepath', 'run_id']) + confounds.inputs.working_dir = self.directories.working_dir + confounds.inputs.run_id = self.run_list + + # EstimateContrast - estimates contrasts + contrast_estimate = Node(EstimateContrast(), name = 'contrast_estimate') + contrast_estimate.inputs.contrasts = self.subject_level_contrasts + + # Function node remove_gunzip_files - remove output of the gunzip node + remove_gunzip_files = MapNode(Function( + function = remove_file, + input_names = ['_', 'file_name'], + output_names = []), + name = 'remove_gunzip_files', iterfield = 'file_name') + + # Function node remove_smoothed_files - remove output of the smoothing node + remove_smoothed_files = MapNode(Function( + function = remove_file, + input_names = ['_', 'file_name'], + output_names = []), + name = 'remove_smoothed_files', iterfield = 'file_name') + + # Create l1 analysis workflow and connect its nodes + subject_level_analysis = Workflow( + base_dir = self.directories.working_dir, + name = 'subject_level_analysis' + ) + subject_level_analysis.connect([ + (information_source, select_files, [('subject_id', 'subject_id')]), + (subject_information, specify_model, [('subject_info', 'subject_info')]), + (select_files, confounds, [('confounds', 'filepath')]), + (select_files, subject_information, [('event', 'event_files')]), + (information_source, confounds, [('subject_id', 'subject_id')]), + (select_files, gunzip_func, [('func', 'in_file')]), + (gunzip_func, smoothing, [('out_file', 'in_files')]), + (gunzip_func, remove_gunzip_files, [('out_file', 'file_name')]), + (smoothing, remove_gunzip_files, [('smoothed_files', '_')]), + (smoothing, remove_smoothed_files, [('smoothed_files', 'file_name')]), + (smoothing, specify_model, [('smoothed_files', 'functional_runs')]), + (specify_model, remove_smoothed_files, [('session_info', '_')]), + (confounds, specify_model, [('confounds_file', 'realignment_parameters')]), + (specify_model, model_design, [('session_info', 'session_info')]), + (model_design, model_estimate, [('spm_mat_file', 'spm_mat_file')]), + (model_estimate, contrast_estimate, [ + ('spm_mat_file', 'spm_mat_file'), + ('beta_images', 'beta_images'), + ('residual_image', 'residual_image')]), + (contrast_estimate, data_sink, [ + ('con_images', 'subject_level_analysis.@con_images'), + ('spmT_images', 'subject_level_analysis.@spmT_images'), + ('spm_mat_file', 'subject_level_analysis.@spm_mat_file') + ]) + ]) + + return subject_level_analysis + + def get_subject_level_outputs(self): + """ Return the names of the files the subject level analysis is supposed to generate. """ + + # Contrat maps + templates = [join( + self.directories.output_dir, + 'subject_level_analysis', '_subject_id_{subject_id}', f'con_{contrast_id}.nii')\ + for contrast_id in self.contrast_list] + + # SPM.mat file + templates += [join( + self.directories.output_dir, + 'subject_level_analysis', '_subject_id_{subject_id}', 'SPM.mat')] + + # spmT maps + templates += [join( + self.directories.output_dir, + 'subject_level_analysis', '_subject_id_{subject_id}', f'spmT_{contrast_id}.nii')\ + for contrast_id in self.contrast_list] + + # Format with subject_ids + return_list = [] + for template in templates: + return_list += [template.format(subject_id = s) for s in self.subject_list] + + return return_list + + def get_group_level_analysis(self): + """ + Return all workflows for the group level analysis. + + Returns; + - a list of nipype.WorkFlow + """ + + methods = ['equalRange', 'equalIndifference', 'groupComp'] + return [self.get_group_level_analysis_sub_workflow(method) for method in methods] + + def get_group_level_analysis_sub_workflow(self, method): + """ + Return a workflow for the group level analysis. + + Parameters: + - method: one of 'equalRange', 'equalIndifference' or 'groupComp' + + Returns: + - group_level_analysis: nipype.WorkFlow + """ + # Compute the number of participants used to do the analysis + nb_subjects = len(self.subject_list) + + # Infosource - a function free node to iterate over the list of subject names + information_source = Node( + IdentityInterface( + fields=['contrast_id']), + name='information_source') + information_source.iterables = [('contrast_id', self.contrast_list)] + + # SelectFiles + templates = { + # Contrasts for all participants + 'contrasts' : join(self.directories.output_dir, + 'subject_level_analysis', '_subject_id_*', 'con_{contrast_id}.nii') + } + + select_files = Node(SelectFiles(templates), name = 'select_files') + select_files.inputs.base_directory = self.directories.results_dir + select_files.inputs.force_lists = True + + # Datasink - save important files + data_sink = Node(DataSink(), name = 'data_sink') + data_sink.inputs.base_directory = self.directories.output_dir + + # Function Node get_equal_range_subjects + # Get subjects in the equalRange group and in the subject_list + get_equal_range_subjects = Node(Function( + function = list_intersection, + input_names = ['list_1', 'list_2'], + output_names = ['out_list'] + ), + name = 'get_equal_range_subjects' + ) + get_equal_range_subjects.inputs.list_1 = get_group('equalRange') + get_equal_range_subjects.inputs.list_2 = self.subject_list + + # Function Node get_equal_indifference_subjects + # Get subjects in the equalIndifference group and in the subject_list + get_equal_indifference_subjects = Node(Function( + function = list_intersection, + input_names = ['list_1', 'list_2'], + output_names = ['out_list'] + ), + name = 'get_equal_indifference_subjects' + ) + get_equal_indifference_subjects.inputs.list_1 = get_group('equalIndifference') + get_equal_indifference_subjects.inputs.list_2 = self.subject_list + + # Create a function to complete the subject ids out from the get_equal_*_subjects nodes + # If not complete, subject id '001' in search patterns + # would match all contrast files with 'con_0001.nii'. + complete_subject_ids = lambda l : [f'_subject_id_{a}' for a in l] + + # Function Node elements_in_string + # Get contrast files for required subjects + # Note : using a MapNode with elements_in_string requires using clean_list to remove + # None values from the out_list + get_contrasts = MapNode(Function( + function = elements_in_string, + input_names = ['input_str', 'elements'], + output_names = ['out_list'] + ), + name = 'get_contrasts', iterfield = 'input_str' + ) + + # Estimate model + estimate_model = Node(EstimateModel(), name = 'estimate_model') + estimate_model.inputs.estimation_method = {'Classical':1} + + # Estimate contrasts + estimate_contrast = Node(EstimateContrast(), name = 'estimate_contrast') + estimate_contrast.inputs.group_contrast = True + + ## Create thresholded maps + threshold = MapNode(Threshold(), + name = 'threshold', + iterfield = ['stat_image', 'contrast_index']) + threshold.inputs.use_fwe_correction = False + threshold.inputs.height_threshold = 0.001 + threshold.inputs.extent_fdr_p_threshold = 0.05 + threshold.inputs.use_topo_fdr = False + threshold.inputs.force_activation = True threshold.synchronize = True - estimate_contrast.inputs.contrasts = contrasts - - return l2_analysis - - -def reorganize_results(result_dir, output_dir, n_sub, team_ID): - """ - Reorganize the results to analyze them. - - Parameters: - - result_dir: str, directory where results will be stored - - output_dir: str, name of the sub-directory for final results - - n_sub: float, number of subject used for the analysis - - team_ID: str, ID of the team to reorganize results - - """ - from os.path import join as opj - import os - import shutil - import gzip - - h1 = opj(result_dir, output_dir, f"l2_analysis_equalIndifference_nsub_{n_sub}", '_contrast_id_gain') - h2 = opj(result_dir, output_dir, f"l2_analysis_equalRange_nsub_{n_sub}", '_contrast_id_gain') - h3 = opj(result_dir, output_dir, f"l2_analysis_equalIndifference_nsub_{n_sub}", '_contrast_id_gain') - h4 = opj(result_dir, output_dir, f"l2_analysis_equalRange_nsub_{n_sub}", '_contrast_id_gain') - h5 = opj(result_dir, output_dir, f"l2_analysis_equalIndifference_nsub_{n_sub}", '_contrast_id_loss') - h6 = opj(result_dir, output_dir, f"l2_analysis_equalRange_nsub_{n_sub}", '_contrast_id_loss') - h7 = opj(result_dir, output_dir, f"l2_analysis_equalIndifference_nsub_{n_sub}", '_contrast_id_loss') - h8 = opj(result_dir, output_dir, f"l2_analysis_equalRange_nsub_{n_sub}", '_contrast_id_loss') - h9 = opj(result_dir, output_dir, f"l2_analysis_groupComp_nsub_{n_sub}", '_contrast_id_loss') - - h = [h1, h2, h3, h4, h5, h6, h7, h8, h9] - - repro_unthresh = [opj(filename, "spmT_0002.nii") if i in [4, 5] else opj(filename, - "spmT_0001.nii") for i, filename in enumerate(h)] - - repro_thresh = [opj(filename, "_threshold1", - "spmT_0002_thr.nii") if i in [4, 5] else opj(filename, - "_threshold0", "spmT_0001_thr.nii") for i, filename in enumerate(h)] - - if not os.path.isdir(opj(result_dir, "NARPS-reproduction")): - os.mkdir(opj(result_dir, "NARPS-reproduction")) - - for i, filename in enumerate(repro_unthresh): - f_in = filename - f_out = opj(result_dir, "NARPS-reproduction", f"team_{team_ID}_nsub_{n_sub}_hypo{i+1}_unthresholded.nii") - shutil.copyfile(f_in, f_out) - - for i, filename in enumerate(repro_thresh): - f_in = filename - f_out = opj(result_dir, "NARPS-reproduction", f"team_{team_ID}_nsub_{n_sub}_hypo{i+1}_thresholded.nii") - shutil.copyfile(f_in, f_out) - - print(f"Results files of team {team_ID} reorganized.") - - + group_level_analysis = Workflow( + base_dir = self.directories.working_dir, + name = f'group_level_analysis_{method}_nsub_{nb_subjects}') + group_level_analysis.connect([ + (information_source, select_files, [('contrast_id', 'contrast_id')]), + (select_files, get_contrasts, [('contrasts', 'input_str')]), + (estimate_model, estimate_contrast, [ + ('spm_mat_file', 'spm_mat_file'), + ('residual_image', 'residual_image'), + ('beta_images', 'beta_images')]), + (estimate_contrast, threshold, [ + ('spm_mat_file', 'spm_mat_file'), + ('spmT_images', 'stat_image')]), + (estimate_model, data_sink, [ + ('mask_image', f'group_level_analysis_{method}_nsub_{nb_subjects}.@mask')]), + (estimate_contrast, data_sink, [ + ('spm_mat_file', f'group_level_analysis_{method}_nsub_{nb_subjects}.@spm_mat'), + ('spmT_images', f'group_level_analysis_{method}_nsub_{nb_subjects}.@T'), + ('con_images', f'group_level_analysis_{method}_nsub_{nb_subjects}.@con')]), + (threshold, data_sink, [ + ('thresholded_map', f'group_level_analysis_{method}_nsub_{nb_subjects}.@thresh')])]) + + if method in ('equalRange', 'equalIndifference'): + estimate_contrast.inputs.contrasts = [ + ('Group', 'T', ['mean'], [1]), + ('Group', 'T', ['mean'], [-1]) + ] + + threshold.inputs.contrast_index = [1, 2] + + # Specify design matrix + one_sample_t_test_design = Node(OneSampleTTestDesign(), + name = 'one_sample_t_test_design') + group_level_analysis.connect([ + (get_contrasts, one_sample_t_test_design, [ + (('out_list', clean_list), 'in_files') + ]), + (one_sample_t_test_design, estimate_model, [('spm_mat_file', 'spm_mat_file')]) + ]) + + if method == 'equalRange': + group_level_analysis.connect([ + (get_equal_range_subjects, get_contrasts, [ + (('out_list', complete_subject_ids), 'elements') + ]) + ]) + + elif method == 'equalIndifference': + group_level_analysis.connect([ + (get_equal_indifference_subjects, get_contrasts, [ + (('out_list', complete_subject_ids), 'elements') + ]) + ]) + + elif method == 'groupComp': + estimate_contrast.inputs.contrasts = [ + ('Eq range vs Eq indiff in loss', 'T', ['Group_{1}', 'Group_{2}'], [1, -1]) + ] + + threshold.inputs.contrast_index = [1] + + # Function Node elements_in_string + # Get contrast files for required subjects + # Note : using a MapNode with elements_in_string requires using clean_list to remove + # None values from the out_list + get_contrasts_2 = MapNode(Function( + function = elements_in_string, + input_names = ['input_str', 'elements'], + output_names = ['out_list'] + ), + name = 'get_contrasts_2', iterfield = 'input_str' + ) + + # Specify design matrix + two_sample_t_test_design = Node(TwoSampleTTestDesign(), + name = 'two_sample_t_test_design') + two_sample_t_test_design.inputs.unequal_variance = True + + group_level_analysis.connect([ + (select_files, get_contrasts_2, [('contrasts', 'input_str')]), + (get_equal_range_subjects, get_contrasts, [ + (('out_list', complete_subject_ids), 'elements') + ]), + (get_equal_indifference_subjects, get_contrasts_2, [ + (('out_list', complete_subject_ids), 'elements') + ]), + (get_contrasts, two_sample_t_test_design, [ + (('out_list', clean_list), 'group1_files') + ]), + (get_contrasts_2, two_sample_t_test_design, [ + (('out_list', clean_list), 'group2_files') + ]), + (two_sample_t_test_design, estimate_model, [('spm_mat_file', 'spm_mat_file')]) + ]) + + return group_level_analysis + + def get_group_level_outputs(self): + """ Return all names for the files the group level analysis is supposed to generate. """ + + # Handle equalRange and equalIndifference + parameters = { + 'contrast_id': self.contrast_list, + 'method': ['equalRange', 'equalIndifference'], + 'file': [ + 'con_0001.nii', 'con_0002.nii', 'mask.nii', 'SPM.mat', + 'spmT_0001.nii', 'spmT_0002.nii', + join('_threshold0', 'spmT_0001_thr.nii'), join('_threshold1', 'spmT_0002_thr.nii') + ], + 'nb_subjects' : [str(len(self.subject_list))] + } + parameter_sets = product(*parameters.values()) + template = join( + self.directories.output_dir, + 'group_level_analysis_{method}_nsub_{nb_subjects}', + '_contrast_id_{contrast_id}', + '{file}' + ) + + return_list = [template.format(**dict(zip(parameters.keys(), parameter_values)))\ + for parameter_values in parameter_sets] + + # Handle groupComp + parameters = { + 'contrast_id': self.contrast_list, + 'method': ['groupComp'], + 'file': [ + 'con_0001.nii', 'mask.nii', 'SPM.mat', 'spmT_0001.nii', + join('_threshold0', 'spmT_0001_thr.nii') + ], + 'nb_subjects' : [str(len(self.subject_list))] + } + parameter_sets = product(*parameters.values()) + template = join( + self.directories.output_dir, + 'group_level_analysis_{method}_nsub_{nb_subjects}', + '_contrast_id_{contrast_id}', + '{file}' + ) + + return_list += [template.format(**dict(zip(parameters.keys(), parameter_values)))\ + for parameter_values in parameter_sets] + + return return_list + + def get_hypotheses_outputs(self): + """ Return all hypotheses output file names. + Note that hypotheses 5 to 8 correspond to the maps given by the team in their results ; + but they are not fully consistent with the hypotheses definitions as expected by NARPS. + """ + nb_sub = len(self.subject_list) + files = [ + join(f'group_level_analysis_equalIndifference_nsub_{nb_sub}', + '_contrast_id_0002', '_threshold0', 'spmT_0001_thr.nii'), + join(f'group_level_analysis_equalIndifference_nsub_{nb_sub}', + '_contrast_id_0002', 'spmT_0001.nii'), + join(f'group_level_analysis_equalRange_nsub_{nb_sub}', + '_contrast_id_0002', '_threshold0', 'spmT_0001_thr.nii'), + join(f'group_level_analysis_equalRange_nsub_{nb_sub}', + '_contrast_id_0002', 'spmT_0001.nii'), + join(f'group_level_analysis_equalIndifference_nsub_{nb_sub}', + '_contrast_id_0002', '_threshold0', 'spmT_0001_thr.nii'), + join(f'group_level_analysis_equalIndifference_nsub_{nb_sub}', + '_contrast_id_0002', 'spmT_0001.nii'), + join(f'group_level_analysis_equalRange_nsub_{nb_sub}', + '_contrast_id_0002', '_threshold0', 'spmT_0001_thr.nii'), + join(f'group_level_analysis_equalRange_nsub_{nb_sub}', + '_contrast_id_0002', 'spmT_0001.nii'), + join(f'group_level_analysis_equalIndifference_nsub_{nb_sub}', + '_contrast_id_0003', '_threshold1', 'spmT_0002_thr.nii'), + join(f'group_level_analysis_equalIndifference_nsub_{nb_sub}', + '_contrast_id_0003', 'spmT_0002.nii'), + join(f'group_level_analysis_equalRange_nsub_{nb_sub}', + '_contrast_id_0003', '_threshold1', 'spmT_0001_thr.nii'), + join(f'group_level_analysis_equalRange_nsub_{nb_sub}', + '_contrast_id_0003', 'spmT_0001.nii'), + join(f'group_level_analysis_equalIndifference_nsub_{nb_sub}', + '_contrast_id_0003', '_threshold0', 'spmT_0001_thr.nii'), + join(f'group_level_analysis_equalIndifference_nsub_{nb_sub}', + '_contrast_id_0003', 'spmT_0001.nii'), + join(f'group_level_analysis_equalRange_nsub_{nb_sub}', + '_contrast_id_0003', '_threshold0', 'spmT_0002_thr.nii'), + join(f'group_level_analysis_equalRange_nsub_{nb_sub}', + '_contrast_id_0003', 'spmT_0002.nii'), + join(f'group_level_analysis_groupComp_nsub_{nb_sub}', + '_contrast_id_0003', '_threshold0', 'spmT_0001_thr.nii'), + join(f'group_level_analysis_groupComp_nsub_{nb_sub}', + '_contrast_id_0003', 'spmT_0001.nii') + ] + return [join(self.directories.output_dir, f) for f in files] diff --git a/tests/pipelines/test_team_J7F9.py b/tests/pipelines/test_team_J7F9.py new file mode 100644 index 00000000..706c0b56 --- /dev/null +++ b/tests/pipelines/test_team_J7F9.py @@ -0,0 +1,209 @@ +#!/usr/bin/python +# coding: utf-8 + +""" Tests of the 'narps_open.pipelines.team_J7F9' module. + +Launch this test with PyTest + +Usage: +====== + pytest -q test_team_J7F9.py + pytest -q test_team_J7F9.py -k +""" +from os import mkdir +from os.path import join, exists +from shutil import rmtree +from filecmp import cmp + +from pytest import helpers, mark, fixture +from numpy import isclose +from nipype import Workflow +from nipype.interfaces.base import Bunch + +from narps_open.utils.configuration import Configuration +from narps_open.pipelines.team_J7F9 import PipelineTeamJ7F9 + +TEMPORARY_DIR = join(Configuration()['directories']['test_runs'], 'test_J7F9') + +@fixture +def remove_test_dir(): + """ A fixture to remove temporary directory created by tests """ + + rmtree(TEMPORARY_DIR, ignore_errors = True) + mkdir(TEMPORARY_DIR) + yield # test runs here + rmtree(TEMPORARY_DIR, ignore_errors = True) + +def compare_float_2d_arrays(array_1, array_2): + """ Assert array_1 and array_2 are close enough """ + + assert len(array_1) == len(array_2) + for reference_array, test_array in zip(array_1, array_2): + assert len(reference_array) == len(test_array) + assert isclose(reference_array, test_array).all() + +class TestPipelinesTeamJ7F9: + """ A class that contains all the unit tests for the PipelineTeamJ7F9 class.""" + + @staticmethod + @mark.unit_test + def test_create(): + """ Test the creation of a PipelineTeamJ7F9 object """ + + pipeline = PipelineTeamJ7F9() + + # 1 - check the parameters + assert pipeline.fwhm == 8.0 + assert pipeline.team_id == 'J7F9' + + # 2 - check workflows + assert pipeline.get_preprocessing() is None + assert pipeline.get_run_level_analysis() is None + assert isinstance(pipeline.get_subject_level_analysis(), Workflow) + group_level = pipeline.get_group_level_analysis() + + assert len(group_level) == 3 + for sub_workflow in group_level: + assert isinstance(sub_workflow, Workflow) + + @staticmethod + @mark.unit_test + def test_outputs(): + """ Test the expected outputs of a PipelineTeamJ7F9 object """ + pipeline = PipelineTeamJ7F9() + # 1 - 1 subject outputs + pipeline.subject_list = ['001'] + assert len(pipeline.get_preprocessing_outputs()) == 0 + assert len(pipeline.get_run_level_outputs()) == 0 + assert len(pipeline.get_subject_level_outputs()) == 7 + assert len(pipeline.get_group_level_outputs()) == 63 + assert len(pipeline.get_hypotheses_outputs()) == 18 + + # 2 - 4 subjects outputs + pipeline.subject_list = ['001', '002', '003', '004'] + assert len(pipeline.get_preprocessing_outputs()) == 0 + assert len(pipeline.get_run_level_outputs()) == 0 + assert len(pipeline.get_subject_level_outputs()) == 28 + assert len(pipeline.get_group_level_outputs()) == 63 + assert len(pipeline.get_hypotheses_outputs()) == 18 + + @staticmethod + @mark.unit_test + def test_subject_information(): + """ Test the get_subject_information method """ + + # Get test files + test_file = join(Configuration()['directories']['test_data'], 'pipelines', 'events.tsv') + test_file_2 = join(Configuration()['directories']['test_data'], + 'pipelines', 'team_J7F9', 'events_resp.tsv') + + # Prepare several scenarii + info_missed = PipelineTeamJ7F9.get_subject_information([test_file, test_file]) + info_ok = PipelineTeamJ7F9.get_subject_information([test_file_2, test_file_2]) + info_half = PipelineTeamJ7F9.get_subject_information([test_file_2, test_file]) + + # Compare bunches to expected + bunch = info_missed[0] + assert isinstance(bunch, Bunch) + assert bunch.conditions == ['trial', 'missed'] + compare_float_2d_arrays(bunch.onsets, [[4.071, 11.834, 19.535, 27.535, 36.435], [19.535]]) + compare_float_2d_arrays(bunch.durations, [[0.0, 0.0, 0.0, 0.0, 0.0], [0.0]]) + assert bunch.amplitudes == None + assert bunch.tmod == None + assert bunch.pmod[0].name == ['gain', 'loss'] + assert bunch.pmod[0].poly == [1, 1] + compare_float_2d_arrays(bunch.pmod[0].param, [[-8.4, 11.6, 15.6, -12.4, -6.4], [-8.2, -0.2, 4.8, 0.8, 2.8]]) + assert bunch.regressor_names == None + assert bunch.regressors == None + + bunch = info_missed[1] + assert isinstance(bunch, Bunch) + assert bunch.conditions == ['trial', 'missed'] + compare_float_2d_arrays(bunch.onsets, [[4.071, 11.834, 19.535, 27.535, 36.435], [19.535]]) + compare_float_2d_arrays(bunch.durations, [[0.0, 0.0, 0.0, 0.0, 0.0], [0.0]]) + assert bunch.amplitudes == None + assert bunch.tmod == None + assert bunch.pmod[0].name == ['gain', 'loss'] + assert bunch.pmod[0].poly == [1, 1] + compare_float_2d_arrays(bunch.pmod[0].param, [[-8.4, 11.6, 15.6, -12.4, -6.4], [-8.2, -0.2, 4.8, 0.8, 2.8]]) + assert bunch.regressor_names == None + assert bunch.regressors == None + + bunch = info_ok[0] + assert isinstance(bunch, Bunch) + assert bunch.conditions == ['trial'] + compare_float_2d_arrays(bunch.onsets, [[4.071, 11.834, 27.535, 36.435]]) + compare_float_2d_arrays(bunch.durations, [[0.0, 0.0, 0.0, 0.0]]) + assert bunch.amplitudes == None + assert bunch.tmod == None + assert bunch.pmod[0].name == ['gain', 'loss'] + assert bunch.pmod[0].poly == [1, 1] + compare_float_2d_arrays(bunch.pmod[0].param, [[-4.5, 15.5, -8.5, -2.5], [-7.0, 1.0, 2.0, 4.0]]) + assert bunch.regressor_names == None + assert bunch.regressors == None + + bunch = info_ok[1] + assert isinstance(bunch, Bunch) + assert bunch.conditions == ['trial'] + compare_float_2d_arrays(bunch.onsets, [[4.071, 11.834, 27.535, 36.435]]) + compare_float_2d_arrays(bunch.durations, [[0.0, 0.0, 0.0, 0.0]]) + assert bunch.amplitudes == None + assert bunch.tmod == None + assert bunch.pmod[0].name == ['gain', 'loss'] + assert bunch.pmod[0].poly == [1, 1] + compare_float_2d_arrays(bunch.pmod[0].param, [[-4.5, 15.5, -8.5, -2.5], [-7.0, 1.0, 2.0, 4.0]]) + assert bunch.regressor_names == None + assert bunch.regressors == None + + bunch = info_half[0] + assert isinstance(bunch, Bunch) + assert bunch.conditions == ['trial', 'missed'] + compare_float_2d_arrays(bunch.onsets, [[4.071, 11.834, 27.535, 36.435], []]) + compare_float_2d_arrays(bunch.durations, [[0.0, 0.0, 0.0, 0.0], []]) + assert bunch.amplitudes == None + assert bunch.tmod == None + assert bunch.pmod[0].name == ['gain', 'loss'] + assert bunch.pmod[0].poly == [1, 1] + compare_float_2d_arrays(bunch.pmod[0].param, [[-6.666666666666668, 13.333333333333332, -10.666666666666668, -4.666666666666668], [-7.666666666666666, 0.3333333333333339, 1.333333333333334, 3.333333333333334]]) + assert bunch.regressor_names == None + assert bunch.regressors == None + + bunch = info_half[1] + assert isinstance(bunch, Bunch) + assert bunch.conditions == ['trial', 'missed'] + compare_float_2d_arrays(bunch.onsets, [[4.071, 11.834, 19.535, 27.535, 36.435], [19.535]]) + compare_float_2d_arrays(bunch.durations, [[0.0, 0.0, 0.0, 0.0, 0.0], [0.0]]) + assert bunch.amplitudes == None + assert bunch.tmod == None + assert bunch.pmod[0].name == ['gain', 'loss'] + assert bunch.pmod[0].poly == [1, 1] + compare_float_2d_arrays(bunch.pmod[0].param, [[-6.666666666666668, 13.333333333333332, 17.333333333333332, -10.666666666666668, -4.666666666666668], [-7.666666666666666, 0.3333333333333339, 5.333333333333334, 1.333333333333334, 3.333333333333334]]) + assert bunch.regressor_names == None + assert bunch.regressors == None + + @staticmethod + @mark.unit_test + def test_confounds_file(remove_test_dir): + """ Test the get_confounds_file method """ + + confounds_file = join( + Configuration()['directories']['test_data'], 'pipelines', 'confounds.tsv') + reference_file = join( + Configuration()['directories']['test_data'], 'pipelines', 'team_J7F9', 'confounds.tsv') + + # Get new confounds file + PipelineTeamJ7F9.get_confounds_file(confounds_file, 'sid', 'rid', TEMPORARY_DIR) + + # Check confounds file was created + created_confounds_file = join( + TEMPORARY_DIR, 'confounds_files', 'confounds_file_sub-sid_run-rid.tsv') + assert exists(created_confounds_file) + + # Check contents + assert cmp(reference_file, created_confounds_file) + + @staticmethod + @mark.pipeline_test + def test_execution(): + """ Test the execution of a PipelineTeamJ7F9 and compare results """ + helpers.test_pipeline_evaluation('J7F9') diff --git a/tests/test_data/pipelines/confounds.tsv b/tests/test_data/pipelines/confounds.tsv new file mode 100644 index 00000000..f49d4fea --- /dev/null +++ b/tests/test_data/pipelines/confounds.tsv @@ -0,0 +1,4 @@ +CSF WhiteMatter GlobalSignal stdDVARS non-stdDVARS vx-wisestdDVARS FramewiseDisplacement tCompCor00 tCompCor01 tCompCor02 tCompCor03 tCompCor04 tCompCor05 aCompCor00 aCompCor01 aCompCor02 aCompCor03 aCompCor04 aCompCor05 Cosine00 Cosine01 Cosine02 Cosine03 Cosine04 Cosine05 NonSteadyStateOutlier00 X Y Z RotX RotY RotZ +6551.281999999999 6476.4653 9874.576 n/a n/a n/a n/a 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 -0.0 0.0 +6484.7285 6473.4890000000005 9830.212 1.09046686 52.78273392 1.05943739 0.13527900930999998 0.0263099209 -0.0673065879 0.0934882554 -0.0079328884 0.0338007737 -0.011491083999999999 -0.042411347099999996 0.027736422900000002 0.0453303087 -0.07022609490000001 0.0963618709 -0.0200867957 0.0665186088 0.0665174038 0.0665153954 0.0665125838 0.0665089688 0.06650455059999999 0.0 -0.00996895 -0.0313444 -3.00931e-06 0.00132687 -0.000384193 -0.00016819 +6441.5337 6485.7256 9821.212 1.07520139 52.04382706 1.03821933 0.12437666391 -0.0404820317 0.034150583 0.13661184210000002 0.0745358691 -0.0054829985999999995 -0.0217322686 0.046214115199999996 0.005774624 -0.043909359800000006 -0.075619539 0.17546891539999998 -0.0345256763 0.0665153954 0.06650455059999999 0.06648647719999999 0.0664611772 0.0664286533 0.0663889091 0.0 -2.56954e-05 -0.00923735 0.0549667 0.000997278 -0.00019745 -0.000398988 diff --git a/tests/test_data/pipelines/team_J7F9/confounds.tsv b/tests/test_data/pipelines/team_J7F9/confounds.tsv new file mode 100644 index 00000000..81d3ecd9 --- /dev/null +++ b/tests/test_data/pipelines/team_J7F9/confounds.tsv @@ -0,0 +1,3 @@ +0.0 0.0 0.0 0.0 -0.0 0.0 6551.281999999999 6476.4653 9874.576 +-0.00996895 -0.0313444 -3.00931e-06 0.00132687 -0.000384193 -0.00016819 6484.7285 6473.4890000000005 9830.212 +-2.56954e-05 -0.00923735 0.0549667 0.000997278 -0.00019745 -0.000398988 6441.5337 6485.7256 9821.212 diff --git a/tests/test_data/pipelines/team_J7F9/events_resp.tsv b/tests/test_data/pipelines/team_J7F9/events_resp.tsv new file mode 100644 index 00000000..dd5ea1a5 --- /dev/null +++ b/tests/test_data/pipelines/team_J7F9/events_resp.tsv @@ -0,0 +1,5 @@ +onset duration gain loss RT participant_response +4.071 4 14 6 2.388 weakly_accept +11.834 4 34 14 2.289 strongly_accept +27.535 4 10 15 2.08 strongly_reject +36.435 4 16 17 2.288 weakly_reject \ No newline at end of file From 5f7ba4246b3b1e4df077abf91f8b1e7cd5fc61ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boris=20Cl=C3=A9net?= <117362283+bclenet@users.noreply.github.com> Date: Tue, 9 Jan 2024 17:02:54 +0100 Subject: [PATCH 2/3] Documentation fixes (#108) * [BUG] inside unit_tests workflow * [DOC] fix some broken links * [DOC] adding template for pipeline testing * [DOC] adding template for pipeline testing * About implemented_pipelines * Deal with test template * [DOC] new readme for the doc * Changes in README.md * [DOC] slight changes to docs/README.md * Add links to past events * Changes in readme.md * fMRI trail * Adding trail description in contribution guide * Separate trails in contribution guide * [TEST] Solving pytest issues with template test --- CONTRIBUTING.md | 92 +++++++++---------- README.md | 26 +++--- docs/README.md | 25 ++--- docs/ci-cd.md | 4 +- docs/data.md | 2 +- docs/pipelines.md | 27 +++++- docs/testing.md | 2 +- pytest.ini | 2 +- tests/pipelines/templates/template_test.py | 102 +++++++++++++++++++++ 9 files changed, 202 insertions(+), 80 deletions(-) create mode 100644 tests/pipelines/templates/template_test.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7429acdd..98b31c06 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,74 +1,70 @@ # How to contribute to NARPS Open Pipelines ? For the reproductions, we are especially looking for contributors with the following profiles: - - 👩‍🎤 SPM, FSL, AFNI or nistats has no secrets for you? You know this fMRI analysis software by heart 💓. Please help us by reproducing the corresponding NARPS pipelines. 👣 after step 1, follow the fMRI expert trail. - - 🧑‍🎤 You are a nipype guru? 👣 after step 1, follow the nipype expert trail. + - `🧠 fMRI soft` SPM, FSL, AFNI or nistats has no secrets for you ; you know one of these fMRI analysis tools by :heart:. + - `🐍 Python` You are a Python guru, willing to use [Nipype](https://nipype.readthedocs.io/en/latest/). -# Step 1: Choose a pipeline to reproduce :keyboard: -:thinking: Not sure which pipeline to start with ? 🚦The [pipeline dashboard](https://github.com/Inria-Empenn/narps_open_pipelines/wiki/pipeline_status) provides the progress status for each pipeline. You can pick any pipeline that is in red (not started). +In the following, read the instruction sections where the badge corresponding to your profile appears. -Need more information to make a decision? The `narps_open.utils.description` module of the project, as described [in the documentation](/docs/description.md) provides easy access to all the info we have on each pipeline. +## 1 - Choose a pipeline +`🧠 fMRI soft` `🐍 Python` -When you are ready, [start an issue](https://github.com/Inria-Empenn/narps_open_pipelines/issues/new/choose) and choose **Pipeline reproduction**! +Not sure which pipeline to start with :thinking:? The [pipeline dashboard](https://github.com/Inria-Empenn/narps_open_pipelines/wiki/pipeline_status) provides the progress status for each pipeline. You can pick any pipeline that is not fully reproduced, i.e.: not started :red_circle: or in progress :orange_circle: . -# Step 2: Reproduction +> [!NOTE] +> Need more information to make a decision? The `narps_open.utils.description` module of the project, as described [in the documentation](/docs/description.md) provides easy access to all the info we have on each pipeline. -## 🧑‍🎤 NiPype trail +## 2 - Interact using issues +`🧠 fMRI soft` `🐍 Python` -We created templates with modifications to make and holes to fill to create a pipeline. You can find them in [`narps_open/pipelines/templates`](/narps_open/pipelines/templates). +Browse [issues](https://github.com/Inria-Empenn/narps_open_pipelines/issues/) before starting a new one. If the pipeline is :orange_circle:, the associated issues are listed on the [pipeline dashboard](https://github.com/Inria-Empenn/narps_open_pipelines/wiki/pipeline_status). -If you feel it could be better explained, do not hesitate to suggest modifications for the templates. +You can either: +* comment on an existing issue with details or your findings about the pipeline; +* [start an issue](https://github.com/Inria-Empenn/narps_open_pipelines/issues/new/choose) and choose **Pipeline reproduction**. -Feel free to have a look to the following pipelines, these are examples : -| team_id | softwares | fmriprep used ? | pipeline file | -| --- | --- | --- | --- | -| 2T6S | SPM | Yes | [/narps_open/pipelines/team_2T6S.py](/narps_open/pipelines/team_2T6S.py) | -| X19V | FSL | Yes | [/narps_open/pipelines/team_X19V.py](/narps_open/pipelines/team_2T6S.py) | +> [!WARNING] +> As soon as the issue is marked as `🏁 status: ready for dev` you can proceed to the next step. -## 👩‍🎤 fMRI software trail +## 3 - Use pull requests +`🐍 Python` -... +1. [Fork](https://docs.github.com/en/get-started/quickstart/fork-a-repo) the repository; +2. create a separate branch for the issue you're working on (do not make changes to the default branch of your fork). +3. push your work to the branch as soon as possible; +4. visit [this page](https://github.com/Inria-Empenn/narps_open_pipelines/pulls) to start a draft pull request. -## Find or propose an issue :clipboard: -Issues are very important for this project. If you want to contribute, you can either **comment an existing issue** or **proposing a new issue**. +> [!WARNING] +> Make sure you create a **Draft Pull Request** as described [here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork), and please stick to the description of the pull request template as much as possible. -### Answering an existing issue :label: -To answer an existing issue, make a new comment with the following information: - - Your name and/or github username - - The step you want to contribute to - - The approximate time needed +## 4 - Reproduce pipeline -### Proposing a new issue :bulb: -In order to start a new issue, click [here](https://github.com/Inria-Empenn/narps_open_pipelines/issues/new/choose) and choose the type of issue you want: - - **Feature request** if you aim at improving the project with your ideas ; - - **Bug report** if you encounter a problem or identified a bug ; - - **Classic issue** to ask question, give feedbacks... +### Translate the pipeline description into code +`🐍 Python` -Some issues are (probably) already open, please browse them before starting a new one. If your issue was already reported, you may want complete it with details or other circumstances in which a problem appear. +Write your code and push it to the branch. Make sure you perform all the items of the pull request checklist. -## Pull Requests :inbox_tray: -Pull requests are the best way to get your ideas into this repository and to solve the problems as fast as possible. +From the description provided by the team you chose, write Nipype workflows that match the steps performed by the teams (preprocessing, run level analysis, subject level analysis, group level analysis). -### Make A Branch :deciduous_tree: -Create a separate branch for each issue you're working on. Do not make changes to the default branch (e.g. master, develop) of your fork. +We created templates with modifications to make and holes to fill to help you with that. Find them in [`narps_open/pipelines/templates`](/narps_open/pipelines/templates). -### Push Your Code :outbox_tray: -Push your code as soon as possible. +> [!TIP] +> Have a look to the already reproduced pipelines, as examples : +> | team_id | softwares | fmriprep used ? | pipeline file | +> | --- | --- | --- | --- | +> | Q6O0 | SPM | Yes | [/narps_open/pipelines/team_Q6O0.py](/narps_open/pipelines/team_Q6O0.py) | -### Create the Pull Request (PR) :inbox_tray: -Once you pushed your first lines of code to the branch in your fork, visit [this page](https://github.com/Inria-Empenn/narps_open_pipelines/pulls) to start creating a PR for the NARPS Open Pipelines project. +Once your work is ready, you may ask a reviewer to your pull request, as described [here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/requesting-a-pull-request-review). Please turn your draft pull request into a *regular* pull request, by clicking **Ready for review** in the pull request page. -:warning: Please create a **Draft Pull Request** as described [here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork), and please stick to the PR description template as much as possible. +### Run the pipeline and produce evidences +`🧠 fMRI soft` -Continue writing your code and push to the same branch. Make sure you perform all the items of the PR checklist. +From the description provided by the team you chose, perform the analysis on the associated software to get as many metadata (log, configuration files, and other relevant files for reproducibility) as possible from the execution. Complementary hints and comments on the process would definitely be welcome, to enrich the description (e.g.: relevant parameters not written in the description, etc.). -### Request Review :disguised_face: -Once your PR is ready, you may add a reviewer to your PR, as described [here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/requesting-a-pull-request-review) in the GitHub documentation. +Especially these files contain valuable information about model design: +* for FSL pipelines, `design.fsf` setup files coming from FEAT ; +* for SPM pipelines, `matlabbatch` files. -Please turn your Draft Pull Request into a "regular" Pull Request, by clicking **Ready for review** in the Pull Request page. +You can attach these files as comments on the pipeline reproduction issue. -**:wave: Thank you in advance for contributing to the project!** - -## Additional resources - - - git and Gitub: general guidelines can be found [here](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) in the GitHub documentation. +**:wave: Thank you for contributing to the project!** diff --git a/README.md b/README.md index c042855d..7172e25b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# The NARPS Open Pipelines project +# NARPS Open Pipelines

@@ -15,8 +15,6 @@

-## Project presentation - **The goal of the NARPS Open Pipelines project is to create a codebase reproducing the 70 pipelines of the NARPS study (Botvinik-Nezer et al., 2020) and share this as an open resource for the community**. We base our reproductions on the [original descriptions provided by the teams](https://github.com/poldrack/narps/blob/1.0.1/ImageAnalyses/metadata_files/analysis_pipelines_for_analysis.xlsx) and test the quality of the reproductions by comparing our results with the original results published on NeuroVault. @@ -25,16 +23,17 @@ We base our reproductions on the [original descriptions provided by the teams](h ## Contributing -NARPS open pipelines uses [nipype](https://nipype.readthedocs.io/en/latest/index.html) as a workflow manager and provides a series of templates and examples to help reproduce the different teams’ analysis. +There are many ways you can contribute 🤗 :wave: Any help is welcome ! + +NARPS Open Pipelines uses [nipype](https://nipype.readthedocs.io/en/latest/index.html) as a workflow manager and provides a series of templates and examples to help reproducing the different teams’ analyses. Nevertheless knowing Python or Nipype is not required to take part in the project. -There are many ways you can contribute 🤗 :wave: Any help is welcome ! Follow the guidelines in [CONTRIBUTING.md](/CONTRIBUTING.md) if you wish to get involved ! +Follow the guidelines in [CONTRIBUTING.md](/CONTRIBUTING.md) if you wish to get involved ! -### Installation +## Using the codebase -To get the pipelines running, please follow the installation steps in [INSTALL.md](/INSTALL.md) +To get the pipelines running, please follow the installation steps in [INSTALL.md](/INSTALL.md). -## Getting started -If you are interested in using the codebase to run the pipelines, see the [user documentation (work-in-progress)]. +If you are interested in using the codebase, see the user documentation in [docs](/docs/) (work-in-progress). ## References @@ -52,7 +51,8 @@ This project is supported by Région Bretagne (Boost MIND) and by Inria (Explora This project is developed in the Empenn team by Boris Clenet, Elodie Germani, Jeremy Lefort-Besnard and Camille Maumet with contributions by Rémi Gau. In addition, this project was presented and received contributions during the following events: - - OHBM Brainhack 2022 (June 2022): Elodie Germani, Arshitha Basavaraj, Trang Cao, Rémi Gau, Anna Menacher, Camille Maumet. - - e-ReproNim FENS NENS Cluster Brainhack (June 2023) : Liz Bushby, Boris Clénet, Michael Dayan, Aimee Westbrook. - - OHBM Brainhack 2023 (July 2023): Arshitha Basavaraj, Boris Clénet, Rémi Gau, Élodie Germani, Yaroslav Halchenko, Camille Maumet, Paul Taylor. - - ORIGAMI lab hackathon (Sept 2023): + - [OHBM Brainhack 2022](https://ohbm.github.io/hackathon2022/) (June 2022): Elodie Germani, Arshitha Basavaraj, Trang Cao, Rémi Gau, Anna Menacher, Camille Maumet. + - [e-ReproNim FENS NENS Cluster Brainhack](https://repro.school/2023-e-repronim-brainhack/) (June 2023) : Liz Bushby, Boris Clénet, Michael Dayan, Aimee Westbrook. + - [OHBM Brainhack 2023](https://ohbm.github.io/hackathon2023/) (July 2023): Arshitha Basavaraj, Boris Clénet, Rémi Gau, Élodie Germani, Yaroslav Halchenko, Camille Maumet, Paul Taylor. + - [ORIGAMI lab](https://neurodatascience.github.io/) hackathon (September 2023): + - [Brainhack Marseille 2023](https://brainhack-marseille.github.io/) (December 2023): diff --git a/docs/README.md b/docs/README.md index f9f6d193..f909aa0e 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,14 +2,17 @@ :mega: This is the starting point for the documentation of the NARPS open pipelines project. -Here are the available topics : - -* :runner: [running](/docs/running.md) tells you how to run pipelines in NARPS open pipelines -* :brain: [data](/docs/data.md) contains instructions to handle the data needed by the project -* :hammer_and_wrench: [environment](/docs/environment.md) contains instructions to handle the software environment needed by the project -* :goggles: [description](/docs/description.md) tells you how to get convenient descriptions of the pipelines, as written by the teams involved in NARPS. -* :microscope: [testing](/docs/testing.md) details the testing features of the project, i.e.: how is the code tested ? -* :package: [ci-cd](/docs/ci-cd.md) contains the information on how continuous integration and delivery (knowned as CI/CD) is set up. -* :writing_hand: [pipeline](/docs/pipelines.md) tells you all you need to know in order to write pipelines -* :compass: [core](/docs/core.md) a list of helpful functions when writing pipelines -* :vertical_traffic_light: [status](/docs/status.md) contains the information on how to get the work progress status for a pipeline. +## Use the project +* :brain: [data](/docs/data.md) - Handle the needed data. +* :hammer_and_wrench: [environment](/docs/environment.md) - Handle the software environment. +* :rocket: [running](/docs/running.md) - Launch pipelines ! + +## Contribute to the code +* :goggles: [description](/docs/description.md) - Conveniently access descriptions of the pipelines, as written by the teams involved in NARPS. +* :writing_hand: [pipelines](/docs/pipelines.md) - How to write pipelines. +* :compass: [core](/docs/core.md) - Helpful functions for writing pipelines. +* :microscope: [testing](/docs/testing.md) - How to test the code. + +## For maintainers +* :vertical_traffic_light: [status](/docs/status.md) - Work progress status for pipelines. +* :package: [ci-cd](/docs/ci-cd.md) - Continuous Integration and Delivery (a.k.a. CI/CD). diff --git a/docs/ci-cd.md b/docs/ci-cd.md index c292eed1..92175866 100644 --- a/docs/ci-cd.md +++ b/docs/ci-cd.md @@ -35,10 +35,10 @@ For now, the following workflows are set up: | Name / File | What does it do ? | When is it launched ? | Where does it run ? | How can I see the results ? | | ----------- | ----------- | ----------- | ----------- | ----------- | | [code_quality](/.github/workflows/code_quality.yml) | A static analysis of the python code (see the [testing](/docs/testing.md) topic of the documentation for more information). | For every push or pull_request if there are changes on `.py` files. | On GitHub servers. | Outputs (logs of pylint) are stored as [downloadable artifacts](https://docs.github.com/en/actions/managing-workflow-runs/downloading-workflow-artifacts) during 15 days after the push. | -| [codespell](/.github/workflows/codespell.yml) | A static analysis of the text files for commonly made typos using [codespell](codespell-project/codespell: check code for common misspellings). | For every push or pull_request to the `maint` branch. | On GitHub servers. | Outputs (logs of codespell) are stored as [downloadable artifacts](https://docs.github.com/en/actions/managing-workflow-runs/downloading-workflow-artifacts) during 15 days after the push. | +| [codespell](/.github/workflows/codespell.yml) | A static analysis of the text files for commonly made typos using [codespell](https://github.com/codespell-project/codespell). | For every push or pull_request to the `main` branch. | On GitHub servers. | Typos are displayed in the workflow summary. | | [pipeline_tests](/.github/workflows/pipelines.yml) | Runs all the tests for changed pipelines. | For every push or pull_request, if a pipeline file changed. | On Empenn runners. | Outputs (logs of pytest) are stored as downloadable artifacts during 15 days after the push. | | [test_changes](/.github/workflows/test_changes.yml) | It runs all the changed tests for the project. | For every push or pull_request, if a test file changed. | On Empenn runners. | Outputs (logs of pytest) are stored as downloadable artifacts during 15 days after the push. | -| [unit_testing](/.github/workflows/unit_testing.yml) | It runs all the unit tests for the project (see the [testing](/docs/testing.md) topic of the documentation for more information). | For every push or pull_request, if a file changed inside `narps_open/`, or a file related to test execution. | On GitHub servers. | Outputs (logs of pytest) are stored as downloadable artifacts during 15 days after the push. | +| [unit_testing](/.github/workflows/unit_testing.yml) | It runs all the unit tests for the project (see the [testing](/docs/testing.md) topic of the documentation for more information). | For every push or pull_request, if a file changed inside `narps_open/`, or a file related to test execution. | On Empenn runners. | Outputs (logs of pytest) are stored as downloadable artifacts during 15 days after the push. | ### Cache diff --git a/docs/data.md b/docs/data.md index e2e84da1..c5b55fba 100644 --- a/docs/data.md +++ b/docs/data.md @@ -2,7 +2,7 @@ The datasets used for the project can be downloaded using one of the two options below. -The path to these datasets must conform with the information located in the configuration file you plan to use (cf. [documentation about configuration](docs/configuration.md)). By default, these paths are in the repository: +The path to these datasets must conform with the information located in the configuration file you plan to use (cf. [documentation about configuration](/docs/configuration.md)). By default, these paths are in the repository: * `data/original/`: original data from NARPS * `data/results/`: results from NARPS teams diff --git a/docs/pipelines.md b/docs/pipelines.md index fb7d2afc..d59f9a9e 100644 --- a/docs/pipelines.md +++ b/docs/pipelines.md @@ -5,6 +5,7 @@ Here are a few principles you should know before creating a pipeline. Further in Please apply these principles in the following order. ## Create a file containing the pipeline + The pipeline must be contained in a single file named `narps_open/pipelines/team_.py`. ## Inherit from `Pipeline` @@ -89,7 +90,8 @@ def get_group_level_outputs(self): """ Return the names of the files the group level analysis is supposed to generate. """ ``` -:warning: Do not declare the method if no files are generated by the corresponding step. For example, if no preprocessing was done by the team, the `get_preprocessing_outputs` method must not be implemented. +> [!WARNING] +> Do not declare the method if no files are generated by the corresponding step. For example, if no preprocessing was done by the team, the `get_preprocessing_outputs` method must not be implemented. You should use other pipeline attributes to generate the lists of outputs dynamically. E.g.: @@ -124,8 +126,27 @@ As explained before, all pipeline inherit from the `narps_open.pipelines.Pipelin * `fwhm` : full width at half maximum for the smoothing kernel (in mm) : * `tr` : repetition time of the fMRI acquisition (equals 1.0s) +## Set your pipeline as implemented + +Inside `narps_open/pipelines/__init__.py`, set the pipeline as implemented. I.e.: if the pipeline you reproduce is 2T6S, update the line : + +```python + '2T6S': None, +``` + +with : + +```python + '2T6S': 'PipelineTeam2T6S', +``` + +inside the `implemented_pipelines` dictionary. + ## Test your pipeline -First have a look at the [testing topic of the documentation](./docs/testing.md). It explains how testing works for inside project and how you should write the tests related to your pipeline. +First have a look at the [testing page of the documentation](/docs/testing.md). It explains how testing works for the project and how you should write the tests related to your pipeline. + +All tests must be contained in a single file named `tests/pipelines/test_team_.py`. You can start by copy-pasting the template file : [tests/pipelines/templates/template_test.py](/tests/pipelines/templates/template_test.py) inside the `tests/pipelines/` directory, renaming it accordingly. Then, follow the instructions and tips inside the template and don't forget to replace `XXXX` with the actual team id, inside the document. -Feel free to have a look at [tests/pipelines/test_team_2T6S.py](./tests/pipelines/test_team_2T6S.py), which is the file containing all the automatic tests for the 2T6S pipeline : it gives a good example. +> [!NOTE] +> Feel free to have a look at [tests/pipelines/test_team_2T6S.py](/tests/pipelines/test_team_2T6S.py), which is the file containing all the automatic tests for the 2T6S pipeline : it gives an example. diff --git a/docs/testing.md b/docs/testing.md index 2bd96584..5294ea9b 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -55,7 +55,7 @@ Use pytest [markers](https://docs.pytest.org/en/7.1.x/example/markers.html) to i | Type of test | marker | Description | | ----------- | ----------- | ----------- | | unit tests | `unit_test` | Unitary test a method/function | -| pipeline tests | `pieline_test` | Compute a whole pipeline and check its outputs are close enough with the team's results | +| pipeline tests | `pipeline_test` | Compute a whole pipeline and check its outputs are close enough with the team's results | ## Save time by downsampling data diff --git a/pytest.ini b/pytest.ini index 14522dc7..f949712a 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,5 @@ [pytest] -addopts = --strict-markers +addopts = --strict-markers --ignore=tests/pipelines/templates/ testpaths = tests markers = diff --git a/tests/pipelines/templates/template_test.py b/tests/pipelines/templates/template_test.py new file mode 100644 index 00000000..0c3683fd --- /dev/null +++ b/tests/pipelines/templates/template_test.py @@ -0,0 +1,102 @@ +#!/usr/bin/python +# coding: utf-8 + +""" This template can be use to test a pipeline. + + - Replace all occurrences of XXXX by the actual id of the team. + - All lines starting with [INFO], are meant to help you during the reproduction, + these can be removed eventually. + - Also remove lines starting with [TODO], once you did what they suggested. + - Remove this docstring once you are done with coding the tests. +""" + +""" Tests of the 'narps_open.pipelines.team_XXXX' module. + +Launch this test with PyTest + +Usage: +====== + pytest -q test_team_XXXX.py + pytest -q test_team_XXXX.py -k +""" + +# [INFO] About these imports : +# [INFO] - pytest.helpers allows to use the helpers registered in tests/conftest.py +# [INFO] - pytest.mark allows to categorize tests as unitary or pipeline tests +from pytest import helpers, mark + +# [INFO] Only for type testing +from nipype import Workflow + +# [INFO] Of course, import the class you want to test, here the Pipeline class for the team XXXX +from narps_open.pipelines.team_XXXX import PipelineTeamXXXX + +# [INFO] All tests should be contained in the following class, in order to sort them. +class TestPipelinesTeamXXXX: + """ A class that contains all the unit tests for the PipelineTeamXXXX class.""" + + # [TODO] Write one or several unit_test (and mark them as such) + # [TODO] ideally for each method of the class you test. + + # [INFO] Here is one example for the __init__() method + @staticmethod + @mark.unit_test + def test_create(): + """ Test the creation of a PipelineTeamXXXX object """ + + pipeline = PipelineTeamXXXX() + assert pipeline.fwhm == 8.0 + assert pipeline.team_id == 'XXXX' + + # [INFO] Here is one example for the methods returning workflows + @staticmethod + @mark.unit_test + def test_workflows(): + """ Test the workflows of a PipelineTeamXXXX object """ + + pipeline = PipelineTeamXXXX() + assert pipeline.get_preprocessing() is None + assert pipeline.get_run_level_analysis() is None + assert isinstance(pipeline.get_subject_level_analysis(), Workflow) + group_level = pipeline.get_group_level_analysis() + + assert len(group_level) == 3 + for sub_workflow in group_level: + assert isinstance(sub_workflow, Workflow) + + # [INFO] Here is one example for the methods returning outputs + @staticmethod + @mark.unit_test + def test_outputs(): + """ Test the expected outputs of a PipelineTeamXXXX object """ + pipeline = PipelineTeamXXXX() + + # 1 - 1 subject outputs + pipeline.subject_list = ['001'] + assert len(pipeline.get_preprocessing_outputs()) == 0 + assert len(pipeline.get_run_level_outputs()) == 0 + assert len(pipeline.get_subject_level_outputs()) == 7 + assert len(pipeline.get_group_level_outputs()) == 63 + assert len(pipeline.get_hypotheses_outputs()) == 18 + + # 2 - 4 subjects outputs + pipeline.subject_list = ['001', '002', '003', '004'] + assert len(pipeline.get_preprocessing_outputs()) == 0 + assert len(pipeline.get_run_level_outputs()) == 0 + assert len(pipeline.get_subject_level_outputs()) == 28 + assert len(pipeline.get_group_level_outputs()) == 63 + assert len(pipeline.get_hypotheses_outputs()) == 18 + + # [TODO] Feel free to add other methods, e.g. to test the custom node functions of the pipeline + + # [TODO] Write one pipeline_test (and mark it as such) + + # [INFO] The pipeline_test will most likely be exactly written this way : + @staticmethod + @mark.pipeline_test + def test_execution(): + """ Test the execution of a PipelineTeamXXXX and compare results """ + + # [INFO] We use the `test_pipeline_evaluation` helper which is responsible for running the + # [INFO] pipeline, iterating over subjects and comparing output with expected results. + helpers.test_pipeline_evaluation('XXXX') From d13e25f256ff53f07c5e072bbbc441a36aa90c1f Mon Sep 17 00:00:00 2001 From: Camille Maumet Date: Tue, 9 Jan 2024 17:35:03 +0100 Subject: [PATCH 3/3] Datalad instructions in INSTALL.md (#136) Link to install information in datalad handbook (rather than datalad homepage): https://handbook.datalad.org/en/latest/intro/installation.html#install-datalad as it includes more detailed instructions (in particular to use brew for macOS M1) --- INSTALL.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/INSTALL.md b/INSTALL.md index b6142cc0..3d78cd3b 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -6,7 +6,7 @@ ## 2 - Clone the code -First, install [Datalad](https://www.datalad.org/). This will allow you to access the NARPS data easily, as it is included in the repository as [datalad subdatasets](http://handbook.datalad.org/en/latest/basics/101-106-nesting.html). +First, install [Datalad](https://handbook.datalad.org/en/latest/intro/installation.html#install-datalad). This will allow you to access the NARPS data easily, as it is included in the repository as [datalad subdatasets](http://handbook.datalad.org/en/latest/basics/101-106-nesting.html). Then, [clone](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository) the project :