From 25891232871c1642dccd549129a0afc484e974e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boris=20Cl=C3=A9net?= <117362283+bclenet@users.noreply.github.com> Date: Thu, 21 Sep 2023 11:04:55 +0200 Subject: [PATCH] 2T6S pipeline reproduction (#75) * Dummy change to test 2T6S pipeline on CI with rectified results data * [REFAC] 2T6S pipeline * [BUG] cleaning connections * [BUG] cleaning connections * [TEST] init a test for conftest.py * [BUG] inside unit_tests workflow * [TEST] testing the conftest module * Issue with parameters dir creation * Bug with makedirs import * Workaround to change hypotheses 5&8 files * Change correlation threshold values --- narps_open/pipelines/team_2T6S.py | 134 ++++++++++-------- .../utils/configuration/testing_config.toml | 2 +- 2 files changed, 77 insertions(+), 59 deletions(-) diff --git a/narps_open/pipelines/team_2T6S.py b/narps_open/pipelines/team_2T6S.py index 42aa344f..6c42201a 100755 --- a/narps_open/pipelines/team_2T6S.py +++ b/narps_open/pipelines/team_2T6S.py @@ -1,7 +1,7 @@ #!/usr/bin/python # coding: utf-8 -""" Write the work of NARPS' team 2T6S using Nipype """ +""" Write the work of NARPS team 2T6S using Nipype """ from os.path import join from itertools import product @@ -78,8 +78,10 @@ def get_subject_infos(event_files, runs): duration[val].append(float(info[4])) # durations for trial (rpz by RT) else: # trial with no response : duration of 4 s duration[val].append(float(4)) - weights_gain[val_gain].append(float(info[2])) # weights gain for trial_run1 - weights_loss[val_loss].append(-1.0 * float(info[3])) # weights loss for trial_run1 + # weights gain for trial_run1 + weights_gain[val_gain].append(float(info[2])) + # weights loss for trial_run1 + weights_loss[val_loss].append(-1.0 * float(info[3])) # Bunching is done per run, i.e. trial_run1, trial_run2, etc. # But names must not have '_run1' etc because we concatenate runs @@ -141,11 +143,11 @@ def get_parameters_file(filepaths, subject_id, working_dir): Return : - parameters_file : paths to new files containing only desired parameters. """ - from os import mkdir + from os import makedirs from os.path import join, isdir - import pandas as pd - import numpy as np + from pandas import read_csv, DataFrame + from numpy import array, transpose # Handle the case where filepaths is a single path (str) if not isinstance(filepaths, list): @@ -154,21 +156,20 @@ def get_parameters_file(filepaths, subject_id, working_dir): # Create the parameters files parameters_file = [] for file_id, file in enumerate(filepaths): - data_frame = pd.read_csv(file, sep = '\t', header=0) + data_frame = read_csv(file, sep = '\t', header=0) # Extract parameters we want to use for the model - temp_list = np.array([ + temp_list = array([ data_frame['X'], data_frame['Y'], data_frame['Z'], data_frame['RotX'], data_frame['RotY'], data_frame['RotZ']]) - retained_parameters = pd.DataFrame(np.transpose(temp_list)) + retained_parameters = DataFrame(transpose(temp_list)) # Write parameters to a parameters file # TODO : warning !!! filepaths must be ordered (1,2,3,4) for the following code to work - new_path =join(working_dir, 'parameters_file', + new_path = join(working_dir, 'parameters_file', f'parameters_file_sub-{subject_id}_run-{str(file_id + 1).zfill(2)}.tsv') - if not isdir(join(working_dir, 'parameters_file')): - mkdir(join(working_dir, 'parameters_file')) + makedirs(join(working_dir, 'parameters_file'), exist_ok = True) with open(new_path, 'w') as writer: writer.write(retained_parameters.to_csv( @@ -187,11 +188,11 @@ def remove_gunzip_files(_, subject_id, working_dir): Parameters: - _: Node input only used for triggering the Node - - subject_id: str, TODO - - working_id: str, TODO + - subject_id: str, subject id from which to remove the unzipped file + - working_dir: str, path to the working directory """ - from shutil import rmtree from os.path import join + from shutil import rmtree try: rmtree(join(working_dir, 'l1_analysis', f'_subject_id_{subject_id}', 'gunzip_func')) @@ -209,11 +210,11 @@ def remove_smoothed_files(_, subject_id, working_dir): Parameters: - _: Node input only used for triggering the Node - - subject_id: str, TODO - - working_id: str, TODO + - subject_id: str, subject id from which to remove the smoothed file + - working_dir: str, path to the working directory """ - from shutil import rmtree from os.path import join + from shutil import rmtree try: rmtree(join(working_dir, 'l1_analysis', f'_subject_id_{subject_id}', 'smooth')) @@ -231,11 +232,7 @@ def get_subject_level_analysis(self): """ # Infosource Node - To iterate on subjects infosource = Node(IdentityInterface( - fields = ['subject_id', 'dataset_dir', 'results_dir', 'working_dir', 'run_list'], - dataset_dir = self.directories.dataset_dir, - results_dir = self.directories.results_dir, - working_dir = self.directories.working_dir, - run_list = self.run_list), + fields = ['subject_id']), name = 'infosource') infosource.iterables = [('subject_id', self.subject_list)] @@ -275,6 +272,7 @@ def get_subject_level_analysis(self): input_names = ['event_files', 'runs'], output_names = ['subject_info']), name = 'subject_infos') + subject_infos.inputs.runs = self.run_list # SpecifyModel - generates SPM-specific Model specify_model = Node(SpecifySPMModel( @@ -332,16 +330,13 @@ def get_subject_level_analysis(self): l1_analysis = Workflow(base_dir = self.directories.working_dir, name = 'l1_analysis') l1_analysis.connect([ (infosource, selectfiles, [('subject_id', 'subject_id')]), - (infosource, subject_infos, [('run_list', 'runs')]), (infosource, remove_gunzip_files, [('subject_id', 'subject_id')]), (infosource, remove_smoothed_files, [('subject_id', 'subject_id')]), + (infosource, parameters, [('subject_id', 'subject_id')]), (subject_infos, specify_model, [('subject_info', 'subject_info')]), (contrasts, contrast_estimate, [('contrasts', 'contrasts')]), (selectfiles, parameters, [('param', 'filepaths')]), (selectfiles, subject_infos, [('event', 'event_files')]), - (infosource, parameters, [ - ('subject_id', 'subject_id'), - ('working_dir', 'working_dir')]), (selectfiles, gunzip_func, [('func', 'in_file')]), (gunzip_func, smooth, [('out_file', 'in_files')]), (smooth, specify_model, [('smoothed_files', 'functional_runs')]), @@ -401,8 +396,10 @@ def get_subset_contrasts(file_list, subject_list, participants_file): Returns : - equal_indifference_id : a list of subject ids in the equalIndifference group - equal_range_id : a list of subject ids in the equalRange group - - equal_indifference_files : a subset of file_list corresponding to subjects in the equalIndifference group - - equal_range_files : a subset of file_list corresponding to subjects in the equalRange group + - equal_indifference_files : a subset of file_list corresponding to + subjects in the equalIndifference group + - equal_range_files : a subset of file_list corresponding to + subjects in the equalRange group """ equal_indifference_id = [] equal_range_id = [] @@ -454,8 +451,7 @@ def get_group_level_analysis_sub_workflow(self, method): # Infosource - iterate over the list of contrasts infosource_groupanalysis = Node( IdentityInterface( - fields = ['contrast_id', 'subjects'], - subjects = self.subject_list), + fields = ['contrast_id', 'subjects']), name = 'infosource_groupanalysis') infosource_groupanalysis.iterables = [('contrast_id', self.contrast_list)] @@ -469,7 +465,7 @@ def get_group_level_analysis_sub_workflow(self, method): } selectfiles_groupanalysis = Node(SelectFiles( - templates, base_directory=self.directories.results_dir, force_list= True), + templates, base_directory = self.directories.results_dir, force_list = True), name = 'selectfiles_groupanalysis') # Datasink - save important files @@ -481,14 +477,14 @@ def get_group_level_analysis_sub_workflow(self, method): # Function node get_subset_contrasts - select subset of contrasts sub_contrasts = Node(Function( function = self.get_subset_contrasts, - input_names = ['file_list', 'method', 'subject_list', 'participants_file'], + input_names = ['file_list', 'subject_list', 'participants_file'], output_names = [ 'equalIndifference_id', 'equalRange_id', 'equalIndifference_files', 'equalRange_files']), name = 'sub_contrasts') - sub_contrasts.inputs.method = method + sub_contrasts.inputs.subject_list = self.subject_list # Estimate model estimate_model = Node(EstimateModel( @@ -513,8 +509,6 @@ def get_group_level_analysis_sub_workflow(self, method): l2_analysis.connect([ (infosource_groupanalysis, selectfiles_groupanalysis, [ ('contrast_id', 'contrast_id')]), - (infosource_groupanalysis, sub_contrasts, [ - ('subjects', 'subject_list')]), (selectfiles_groupanalysis, sub_contrasts, [ ('contrast', 'file_list'), ('participants', 'participants_file')]), @@ -618,29 +612,53 @@ def get_group_level_outputs(self): return return_list def get_hypotheses_outputs(self): - """ Return all hypotheses output file names. - Note that hypotheses 5 to 8 correspond to the maps given by the team in their results ; - but they are not fully consistent with the hypotheses definitions as expected by NARPS. - """ + """ Return all hypotheses output file names. """ nb_sub = len(self.subject_list) files = [ - join(f'l2_analysis_equalIndifference_nsub_{nb_sub}', '_contrast_id_0002', '_threshold0', 'spmT_0001_thr.nii'), - join(f'l2_analysis_equalIndifference_nsub_{nb_sub}', '_contrast_id_0002', 'spmT_0001.nii'), - join(f'l2_analysis_equalRange_nsub_{nb_sub}', '_contrast_id_0002', '_threshold0', 'spmT_0001_thr.nii'), - join(f'l2_analysis_equalRange_nsub_{nb_sub}', '_contrast_id_0002', 'spmT_0001.nii'), - join(f'l2_analysis_equalIndifference_nsub_{nb_sub}', '_contrast_id_0002', '_threshold0', 'spmT_0001_thr.nii'), - join(f'l2_analysis_equalIndifference_nsub_{nb_sub}', '_contrast_id_0002', 'spmT_0001.nii'), - join(f'l2_analysis_equalRange_nsub_{nb_sub}', '_contrast_id_0002', '_threshold0', 'spmT_0001_thr.nii'), - join(f'l2_analysis_equalRange_nsub_{nb_sub}', '_contrast_id_0002', 'spmT_0001.nii'), - join(f'l2_analysis_equalIndifference_nsub_{nb_sub}', '_contrast_id_0003', '_threshold1', 'spmT_0002_thr.nii'), - join(f'l2_analysis_equalIndifference_nsub_{nb_sub}', '_contrast_id_0003', 'spmT_0002.nii'), - join(f'l2_analysis_equalRange_nsub_{nb_sub}', '_contrast_id_0003', '_threshold1', 'spmT_0001_thr.nii'), - join(f'l2_analysis_equalRange_nsub_{nb_sub}', '_contrast_id_0003', 'spmT_0001.nii'), - join(f'l2_analysis_equalIndifference_nsub_{nb_sub}', '_contrast_id_0003', '_threshold0', 'spmT_0001_thr.nii'), - join(f'l2_analysis_equalIndifference_nsub_{nb_sub}', '_contrast_id_0003', 'spmT_0001.nii'), - join(f'l2_analysis_equalRange_nsub_{nb_sub}', '_contrast_id_0003', '_threshold0', 'spmT_0002_thr.nii'), - join(f'l2_analysis_equalRange_nsub_{nb_sub}', '_contrast_id_0003', 'spmT_0002.nii'), - join(f'l2_analysis_groupComp_nsub_{nb_sub}', '_contrast_id_0003', '_threshold0', 'spmT_0001_thr.nii'), - join(f'l2_analysis_groupComp_nsub_{nb_sub}', '_contrast_id_0003', 'spmT_0001.nii') + # Hypothesis 1 + join(f'l2_analysis_equalIndifference_nsub_{nb_sub}', + '_contrast_id_0002', '_threshold0', 'spmT_0001_thr.nii'), + join(f'l2_analysis_equalIndifference_nsub_{nb_sub}', + '_contrast_id_0002', 'spmT_0001.nii'), + # Hypothesis 2 + join(f'l2_analysis_equalRange_nsub_{nb_sub}', + '_contrast_id_0002', '_threshold0', 'spmT_0001_thr.nii'), + join(f'l2_analysis_equalRange_nsub_{nb_sub}', + '_contrast_id_0002', 'spmT_0001.nii'), + # Hypothesis 3 + join(f'l2_analysis_equalIndifference_nsub_{nb_sub}', + '_contrast_id_0002', '_threshold0', 'spmT_0001_thr.nii'), + join(f'l2_analysis_equalIndifference_nsub_{nb_sub}', + '_contrast_id_0002', 'spmT_0001.nii'), + # Hypothesis 4 + join(f'l2_analysis_equalRange_nsub_{nb_sub}', + '_contrast_id_0002', '_threshold0', 'spmT_0001_thr.nii'), + join(f'l2_analysis_equalRange_nsub_{nb_sub}', + '_contrast_id_0002', 'spmT_0001.nii'), + # Hypothesis 5 + join(f'l2_analysis_equalIndifference_nsub_{nb_sub}', + '_contrast_id_0003', '_threshold0', 'spmT_0001_thr.nii'), + join(f'l2_analysis_equalIndifference_nsub_{nb_sub}', + '_contrast_id_0003', 'spmT_0001.nii'), + # Hypothesis 6 + join(f'l2_analysis_equalRange_nsub_{nb_sub}', + '_contrast_id_0003', '_threshold1', 'spmT_0002_thr.nii'), + join(f'l2_analysis_equalRange_nsub_{nb_sub}', + '_contrast_id_0003', 'spmT_0002.nii'), + # Hypothesis 7 + join(f'l2_analysis_equalIndifference_nsub_{nb_sub}', + '_contrast_id_0003', '_threshold0', 'spmT_0001_thr.nii'), + join(f'l2_analysis_equalIndifference_nsub_{nb_sub}', + '_contrast_id_0003', 'spmT_0001.nii'), + # Hypothesis 8 + join(f'l2_analysis_equalRange_nsub_{nb_sub}', + '_contrast_id_0003', '_threshold1', 'spmT_0002_thr.nii'), + join(f'l2_analysis_equalRange_nsub_{nb_sub}', + '_contrast_id_0003', 'spmT_0002.nii'), + # Hypothesis 9 + join(f'l2_analysis_groupComp_nsub_{nb_sub}', + '_contrast_id_0003', '_threshold0', 'spmT_0001_thr.nii'), + join(f'l2_analysis_groupComp_nsub_{nb_sub}', + '_contrast_id_0003', 'spmT_0001.nii') ] return [join(self.directories.output_dir, f) for f in files] diff --git a/narps_open/utils/configuration/testing_config.toml b/narps_open/utils/configuration/testing_config.toml index ec0ab8d1..b1fb28ba 100644 --- a/narps_open/utils/configuration/testing_config.toml +++ b/narps_open/utils/configuration/testing_config.toml @@ -19,4 +19,4 @@ neurovault_naming = true # true if results files are saved using the neurovault [testing] [testing.pipelines] -correlation_thresholds = [0.30, 0.70, 0.80, 0.85, 0.93] # Correlation between reproduced hypotheses files and results, respectively for [20, 40, 60, 80, 108] subjects. +correlation_thresholds = [0.30, 0.70, 0.79, 0.85, 0.93] # Correlation between reproduced hypotheses files and results, respectively for [20, 40, 60, 80, 108] subjects.