Merge branch 'main' into documentation

bclenet · Oct 13, 2023 · c10e3a4 · c10e3a4
2 parents 7c3b6df + 91dc744
commit c10e3a4
Show file tree

Hide file tree

Showing 8 changed files with 133 additions and 7 deletions.
diff --git a/docs/description.md b/docs/description.md
@@ -6,20 +6,22 @@ It is a conversion into tsv format (tab-separated values) of the [original .xlsx
 
 The file `narps_open/data/description/analysis_pipelines_derived_descriptions.tsv` contains for each team a set of programmatically usable data based on the textual descriptions of the previous file. This data is available in the `derived` sub dictionary (see examples hereafter).
 
+The file `narps_open/data/description/analysis_pipelines_comments.tsv` contains for each team a set of comments made by the NARPS Open Pipelines team about reproducibility and exclusions of the pipeline. This data is available in the `comments` sub dictionary (see examples hereafter).
+
 The class `TeamDescription` of module `narps_open.data.description` acts as a parser for these two files.
 
-You can use the command-line tool as so. Option `-t` is for the team id, option `-d` allows to print only one of the sub parts of the description among : `general`, `exclusions`, `preprocessing`, `analysis`, and `categorized_for_analysis`. Options `--json` and `--md` allow to choose the export format you prefer between JSON and Markdown.
+You can use the command-line tool as so. Option `-t` is for the team id, option `-d` allows to print only one of the sub parts of the description among : `general`, `exclusions`, `preprocessing`, `analysis`, `categorized_for_analysis`, `derived`, and `comments`. Options `--json` and `--md` allow to choose the export format you prefer between JSON and Markdown.
 
 ```bash
 python narps_open/data/description -h
-# usage: __init__.py [-h] -t TEAM [-d {general,exclusions,preprocessing,analysis,categorized_for_analysis,derived}]
+# usage: __init__.py [-h] -t TEAM [-d {general,exclusions,preprocessing,analysis,categorized_for_analysis,derived,comments}]
 #
 # Get description of a NARPS pipeline.
 #
 # options:
 #   -h, --help            show this help message and exit
 #   -t TEAM, --team TEAM  the team ID
-#   -d {general,exclusions,preprocessing,analysis,categorized_for_analysis,derived}, --dictionary {general,exclusions,preprocessing,analysis,categorized_for_analysis,derived}
+#   -d {general,exclusions,preprocessing,analysis,categorized_for_analysis,derived,comments}, --dictionary {general,exclusions,preprocessing,analysis,categorized_for_analysis,derived,comments}
 #                         the sub dictionary of team description
 #  --json                output team description as JSON
 #  --md                  output team description as Markdown
@@ -91,6 +93,7 @@ description.preprocessing
 description.analysis
 description.categorized_for_analysis
 description.derived
+description.comments
 # Access values of sub dictionaries
 description.general['teamID']
 # Other keys in general are: ['teamID', 'NV_collection_link', 'results_comments', 'preregistered', 'link_preregistration_form', 'regions_definition', 'softwares', 'general_comments']

diff --git a/narps_open/data/description/__init__.py b/narps_open/data/description/__init__.py
@@ -20,6 +20,9 @@ class TeamDescription(dict):
     derived_description_file = join(
         files('narps_open.data.description'),
         'analysis_pipelines_derived_descriptions.tsv')
+    comments_description_file = join(
+        files('narps_open.data.description'),
+        'analysis_pipelines_comments.tsv')
 
     def __init__(self, team_id):
         super().__init__()
@@ -59,6 +62,11 @@ def derived(self) -> dict:
         """ Getter for the sub dictionary containing derived team description """
         return self._get_sub_dict('derived')
 
+    @property
+    def comments(self) -> dict:
+        """ Getter for the sub dictionary containing comments for NARPS Open Pipeline """
+        return self._get_sub_dict('comments')
+
     def markdown(self):
         """ Return the team description as a string formatted in markdown """
         return_string = f'# NARPS team description : {self.team_id}\n'
@@ -69,7 +77,8 @@ def markdown(self):
             self.preprocessing,
             self.analysis,
             self.categorized_for_analysis,
-            self.derived
+            self.derived,
+            self.comments
             ]
 
         names = [
@@ -78,7 +87,8 @@ def markdown(self):
             'Preprocessing',
             'Analysis',
             'Categorized for analysis',
-            'Derived'
+            'Derived',
+            'Comments'
             ]
 
         for dictionary, name in zip(dictionaries, names):
@@ -175,3 +185,29 @@ def _load(self):
             if not found:
                 raise AttributeError(f'Team {self.team_id}\
                     was not found in the derived description.')
+
+        # Parsing third file : self.comments_description_file
+        with open(self.comments_description_file, newline='', encoding='utf-8') as csv_file:
+            # Prepare first line (whose elements are second part of the keys)
+            first_line = csv_file.readline().replace('\n','').split('\t')
+
+            # Read the rest of the file as a dict
+            reader = DictReader(
+                csv_file,
+                fieldnames = ['comments.' + k2 for k2 in first_line],
+                delimiter = '\t'
+                )
+
+            # Update self with the key/value pairs from the file
+            found = False
+            for row in reader:
+                if row['comments.teamID'] == self.team_id:
+                    found = True
+                    row.pop('comments.teamID', None) # Remove useless 'comments.teamID' key
+                    self.update(row)
+                    break
+
+            # If team id was not found in the file
+            if not found:
+                raise AttributeError(f'Team {self.team_id}\
+                    was not found in the comments description.')
diff --git a/narps_open/data/description/__main__.py b/narps_open/data/description/__main__.py
@@ -19,7 +19,8 @@
         'preprocessing',
         'analysis',
         'categorized_for_analysis',
-        'derived'
+        'derived',
+        'comments'
         ],
     help='the sub dictionary of team description')
 formats = parser.add_mutually_exclusive_group(required = False)
@@ -49,5 +50,7 @@
         print(dumps(information.categorized_for_analysis, indent = 4))
     elif arguments.dictionary == 'derived':
         print(dumps(information.derived, indent = 4))
+    elif arguments.dictionary == 'comments':
+        print(dumps(information.comments, indent = 4))
     else:
         print(dumps(information, indent = 4))
diff --git a/narps_open/data/description/analysis_pipelines_comments.tsv b/narps_open/data/description/analysis_pipelines_comments.tsv
@@ -0,0 +1,71 @@
+teamID	excluded_from_narps_analysis	exclusion_comment	reproducibility	reproducibility_comment
+50GV	no	N/A	?	Uses custom software (Denoiser)
+9Q6R	no	N/A		
+O21U	no	N/A		
+U26C	no	N/A		
+43FJ	no	N/A		
+C88N	no	N/A		
+4TQ6	yes	Resampled image offset and too large compared to template.		
+T54A	no	N/A		
+2T6S	no	N/A		
+L7J7	no	N/A		
+0JO0	no	N/A		
+X1Y5	no	N/A		
+51PW	no	N/A		
+6VV2	no	N/A		
+O6R6	no	N/A		
+C22U	no	N/A	?	Custom Matlab script for white matter PCA confounds
+3PQ2	no	N/A		
+UK24	no	N/A		
+4SZ2	yes	Resampled image offset from template brain.		
+9T8E	no	N/A		
+94GU	no	N/A	?	Multiple software dependencies : SPM + ART + TAPAS + Matlab.
+I52Y	no	N/A		
+5G9K	no	N/A	?	? 
+2T7P	yes	Missing thresholded images.	?	?
+UI76	no	N/A		
+B5I6	no	N/A		
+V55J	yes	Bad histogram : very small values.		
+X19V	no	N/A		
+0C7Q	yes	Appears to be a p-value distribution, with slight excursions below and above zero.		
+R5K7	no	N/A		
+0I4U	no	N/A		
+3C6G	no	N/A		
+R9K3	no	N/A		
+O03M	no	N/A		
+08MQ	no	N/A		
+80GC	no	N/A		
+J7F9	no	N/A		
+R7D1	no	N/A		
+Q58J	yes	Bad histogram : bimodal, zero-inflated with a second distribution centered around 5.		
+L3V8	yes	Rejected due to large amount of missing brain in center.		
+SM54	no	N/A		
+1KB2	no	N/A		
+0H5E	yes	Rejected due to large amount of missing brain in center.		
+P5F3	yes	Rejected due to large amounts of missing data across brain.		
+Q6O0	no	N/A		
+R42Q	no	N/A	?	Uses fMRIflows, a custom software based on NiPype.
+L9G5	no	N/A		
+DC61	no	N/A		
+E3B6	yes	Bad histogram : very long tail, with substantial inflation at a value just below zero.		
+16IN	no	N/A	?	Multiple software dependencies : matlab + SPM + FSL + R + TExPosition + neuroim
+46CD	no	N/A		
+6FH5	yes	Missing much of the central brain.		
+K9P0	no	N/A		
+9U7M	no	N/A		
+VG39	no	N/A		
+1K0E	yes	Used surface-based analysis, only provided data for cortical ribbon.	?	?
+X1Z4	yes	Used surface-based analysis, only provided data for cortical ribbon.	?	Multiple software dependencies : FSL + fmriprep + ciftify + HCP workbench + Freesurfer + ANTs
+I9D6	no	N/A		
+E6R3	no	N/A		
+27SS	no	N/A		
+B23O	no	N/A		
+AO86	no	N/A		
+L1A8	yes	Resampled image much smaller than template brain.	?	?
+IZ20	no	N/A		
+3TR7	no	N/A		
+98BT	yes	Rejected due to very bad normalization.		
+XU70	no	N/A	?	Uses custom software : FSL + 4drealign
+0ED6	no	N/A	?	? 
+I07H	yes	Bad histogram : bimodal, with second distribution centered around 2.5.		
+1P0Y	no	N/A		
diff --git a/setup.py b/setup.py
@@ -60,6 +60,7 @@
     data_files = [
         ('narps_open/utils/configuration', ['narps_open/utils/configuration/default_config.toml']),
         ('narps_open/utils/configuration', ['narps_open/utils/configuration/testing_config.toml']),
+        ('narps_open/data/description', ['narps_open/data/description/analysis_pipelines_comments.tsv']),
         ('narps_open/data/description', ['narps_open/data/description/analysis_pipelines_derived_descriptions.tsv']),
         ('narps_open/data/description', ['narps_open/data/description/analysis_pipelines_full_descriptions.tsv'])
     ]

diff --git a/tests/data/test_description.py b/tests/data/test_description.py
@@ -55,6 +55,7 @@ def test_arguments_properties():
         assert description['analysis.RT_modeling'] == 'duration'
         assert description['categorized_for_analysis.analysis_SW_with_version'] == 'SPM12'
         assert description['derived.func_fwhm'] == '8'
+        assert description['comments.excluded_from_narps_analysis'] == 'no'
 
         # 4 - Check properties
         assert isinstance(description.general, dict)
@@ -63,6 +64,7 @@ def test_arguments_properties():
         assert isinstance(description.analysis, dict)
         assert isinstance(description.categorized_for_analysis, dict)
         assert isinstance(description.derived, dict)
+        assert isinstance(description.comments, dict)
 
         assert list(description.general.keys()) == [
             'teamID',
@@ -82,6 +84,7 @@ def test_arguments_properties():
         assert description.analysis['RT_modeling'] == 'duration'
         assert description.categorized_for_analysis['analysis_SW_with_version'] == 'SPM12'
         assert description.derived['func_fwhm'] == '8'
+        assert description.comments['excluded_from_narps_analysis'] == 'no'
 
         # 6 - Test another team
         description = TeamDescription('9Q6R')

diff --git a/tests/test_data/data/description/test_markdown.md b/tests/test_data/data/description/test_markdown.md
@@ -96,3 +96,8 @@ Model EVs (2): eq_indiff, eq_range
 * `excluded_participants` : 018, 030, 088, 100
 * `func_fwhm` : 5
 * `con_fwhm` : 
+## Comments
+* `excluded_from_narps_analysis` : no
+* `exclusion_comment` : N/A
+* `reproducibility` : 
+* `reproducibility_comment` : 
diff --git a/tests/test_data/data/description/test_str.json b/tests/test_data/data/description/test_str.json
@@ -53,5 +53,9 @@
     "derived.n_participants": "104",
     "derived.excluded_participants": "018, 030, 088, 100",
     "derived.func_fwhm": "5",
-    "derived.con_fwhm": ""
+    "derived.con_fwhm": "",
+    "comments.excluded_from_narps_analysis": "no",
+    "comments.exclusion_comment": "N/A",
+    "comments.reproducibility": "",
+    "comments.reproducibility_comment": ""
 }