Skip to content

Commit

Permalink
Update trackers to use manifest and handle tar archives (nipoppy#181)
Browse files Browse the repository at this point in the history
* use manifest instead of doughnut to get participants to track

* fix identical bagel still being written

* Check `tar` and `tar.gz` extensions

* add HAS_DATATYPE__{} columns to bagel

* fix warning in pd.DataFrame.compare()

* change logic for UNAVAILABLE status based on manifest

* commit dashboard schema

* fix warning about df_bagel_old_full not being defined

* refactor based on Nikhil comments in Slack meeting
  • Loading branch information
michellewang authored Nov 17, 2023
1 parent 8993be3 commit 2a08ab8
Show file tree
Hide file tree
Showing 2 changed files with 195 additions and 51 deletions.
94 changes: 94 additions & 0 deletions nipoppy/trackers/bagel_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
{
"GLOBAL_COLUMNS": {
"participant_id": {
"Description": "Participant identifier within a given dataset.",
"dtype": "str",
"IsRequired": true,
"IsPrefixedColumn": false
},
"bids_id": {
"Description": "BIDS dataset identifier for a participant, if available/different from the participant_id.",
"dtype": "str",
"IsRequired": false,
"IsPrefixedColumn": false
},
"session": {
"Description": "Participant session ID.",
"dtype": "str",
"IsRequired": true,
"IsPrefixedColumn": false
},
"has_mri_data": {
"Description": "Whether or not participant had MRI data acquired in a given session.",
"dtype": "bool",
"IsRequired": false,
"Range": [true, false],
"IsPrefixedColumn": false
},
"HAS_DATATYPE__": {
"Description": "Whether or not participant session has specified raw BIDS datatype. Column suffix should correspond to a specific BIDS subdirectory. e.g., 'HAS_DATATYPE__anat'",
"dtype": "bool",
"IsRequired": false,
"Range": [true, false],
"IsPrefixedColumn": true
},
"HAS_IMAGE__": {
"Description": "Whether or not participant session has specified imaging file. Column suffix should correspond to a BIDS file suffix. e.g. 'HAS_IMAGE__T1w'",
"dtype": "bool",
"IsRequired": false,
"Range": [true, false],
"IsPrefixedColumn": true
},
"pipeline_name": {
"Description": "Name of a pipeline that was run for the participant, if applicable. Example value: 'freesurfer'",
"dtype": "str",
"IsRequired": true,
"MissingValue": "UNAVAILABLE",
"IsPrefixedColumn": false
},
"pipeline_version": {
"description": "Version of pipeline that was run. Must have a value if the value for 'pipeline_name' is not 'UNAVAILABLE'. Example value: '7.3.0'",
"dtype": "str",
"IsRequired": true,
"MissingValue": "UNAVAILABLE",
"IsPrefixedColumn": false
},
"pipeline_starttime": {
"Description": "Date/time that pipeline run was started. In format of 'YYYY-MM-DD HH:MM:SS'.",
"dtype": "str",
"IsRequired": true,
"MissingValue": "UNAVAILABLE",
"IsPrefixedColumn": false
},
"pipeline_endtime": {
"Description": "Date/time that pipeline run ended. In format of 'YYYY-MM-DD HH:MM:SS'.",
"dtype": "str",
"IsRequired": false,
"MissingValue": "UNAVAILABLE",
"IsPrefixedColumn": false
}
},
"PIPELINE_STATUS_COLUMNS": {
"pipeline_complete": {
"Description": "Status of pipeline run. 'SUCCESS': All stages of pipeline (as configured by user) finished successfully (all expected pipeline output files are present). 'FAIL': At least one stage of the pipeline failed. 'INCOMPLETE': Pipeline has not yet been run for the participant or at least one stage is unfinished/still running. 'UNAVAILABLE': Relevant data modality for pipeline not available for participant.",
"dtype": "str",
"IsRequired": true,
"Range": ["SUCCESS", "FAIL", "INCOMPLETE", "UNAVAILABLE"],
"IsPrefixedColumn": false
},
"PHASE__": {
"Description": "Completion status of tracker-specified phase/subworkflow of a pipeline. This prefix must be followed by a second that is a composite of {pipeline_name}-{pipeline_version} to be grouped to the relevant pipeline. e.g., 'PHASE__fmriprep-20.2.7__func'. Each phase may correspond to a specific output subdirectory, and may be associated with multiple related output files. If phase and stage columns are both present, each phase is expected to correspond to >= 1 stage. 'SUCCESS': All output files corresponding to phase are present. 'FAIL': At least one output file of phase is missing. This status may be used to indicate that the phase crashed. 'INCOMPLETE': Output files for phase are not present. This status may be used to indicate that the phase was not configured for the run (e.g., if it corresponds to a specific derivative type). 'UNAVAILABLE': Relevant data modality for pipeline not available for participant. '' (no value): Specified phase not part of pipeline described by current row/record.",
"dtype": "str",
"IsRequired": false,
"Range": ["SUCCESS", "FAIL", "INCOMPLETE", "UNAVAILABLE", ""],
"IsPrefixedColumn": true
},
"STAGE__": {
"Description": "Completion status of tracker-specified stage of a pipeline. This prefix must be followed by a second that is a composite of {pipeline_name}-{pipeline_version} to be grouped to the relevant pipeline. e.g., 'STAGE__fmriprep-20.2.7__space-MNI152Lin_res-1'. Each stage may correspond to a single output file or a few linked outputs expected to always coexist. If phase and stage columns are both present, each phase is expected to correspond to >= 1 stage. 'SUCCESS': All output files corresponding to stage are present. 'FAIL': At least one output file of stage is missing. This status may be used to indicate that the stage crashed. 'INCOMPLETE': Output files for phase are not present. This status may be used to indicate that this stage was not configured for the run. 'UNAVAILABLE': Relevant data modality for pipeline not available for participant. '' (no value): Specified stage not part of pipeline described by current row/record.",
"dtype": "str",
"IsRequired": false,
"Range": ["SUCCESS", "FAIL", "INCOMPLETE", "UNAVAILABLE", ""],
"IsPrefixedColumn": true
}
}
}
Loading

0 comments on commit 2a08ab8

Please sign in to comment.