diff --git a/.vscode/launch.json b/.vscode/launch.json index 15932eb4..6b116a90 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -56,10 +56,16 @@ // "--type_session", "multi-ses", // "--type_system", "sge" // ] + // "args": [ + // "--project-root", + // "/home/faird/zhaoc/data/test_babs_multi-ses_toybidsapp", + // "--container-config-yaml-file", + // "/home/faird/zhaoc/babs_tests/notebooks/bidsapp-toybidsapp-0-0-7_task-rawBIDS_system-slurm_cluster-MSI_egConfig.yaml", + // "--job-account" + // ] "args": [ "--project-root", - "/home/faird/zhaoc/data/test_babs_multi-ses_toybidsapp""--container-config-yaml-file", - "/home/faird/zhaoc/babs_tests/notebooks/bidsapp-toybidsapp-0-0-7_task-rawBIDS_system-slurm_cluster-MSI_egConfig.yaml", + "/home/faird/zhaoc/data/test_babs_single-ses_PNC_fmriprep_sloppy", "--job-account" ] // "args": [ diff --git a/babs/babs.py b/babs/babs.py index 1a25b894..0180d75e 100644 --- a/babs/babs.py +++ b/babs/babs.py @@ -1179,6 +1179,8 @@ def babs_status(self, flags_resubmit, # `create_job_status_csv(self)` has been called in `babs_status()` # in `cli.py` + from .constants import MSG_NO_ALERT_IN_LOGS + # Load the csv file lock_path = self.job_status_path_abs + ".lock" lock = FileLock(lock_path) @@ -1256,7 +1258,7 @@ def babs_status(self, flags_resubmit, # NOTE: in theory can skip failed jobs in previous round, # but making assigning variables hard; so not to skip # if df_job.at[i_job, "is_failed"] is not True: # np.nan or False - alert_message_in_log_files, if_no_alert_in_log = \ + alert_message_in_log_files, if_no_alert_in_log, if_found_log_files = \ get_alert_message_in_log_files(config_msg_alert, log_fn) # ^^ the function will handle even if `config_msg_alert=None` df_job_updated.at[i_job, "alert_message"] = \ @@ -1436,6 +1438,15 @@ def babs_status(self, flags_resubmit, df_job_updated.at[i_job, "job_state_code"] = np.nan df_job_updated.at[i_job, "duration"] = np.nan # ROADMAP: ^^ get duration via `qacct` + if if_found_log_files == False: # bool or np.nan + # If there is no log files, the alert message would be 'np.nan'; + # however this is a failed job, so it should have log files, + # unless it was killed by the user when pending. + # change the 'alert_message' to no alert in logs, + # so that when reporting job status, + # info from job accounting will be reported + df_job_updated.at[i_job, "alert_message"] = \ + MSG_NO_ALERT_IN_LOGS # check the log file: # TODO ^^ @@ -1575,7 +1586,7 @@ def babs_status(self, flags_resubmit, get_last_line(o_fn) # Check if any alert message in log files for this job: # this is to update `alert_message` in case user changes configs in yaml - alert_message_in_log_files, if_no_alert_in_log = \ + alert_message_in_log_files, if_no_alert_in_log, _ = \ get_alert_message_in_log_files(config_msg_alert, log_fn) # ^^ the function will handle even if `config_msg_alert=None` df_job_updated.at[i_job, "alert_message"] = \ @@ -2542,7 +2553,9 @@ def generate_bash_participant_job(self, bash_path, input_ds, type_session, # Write into the bash file: bash_file = open(bash_path, "a") # open in append mode - bash_file.write("#!/bin/bash\n") + # NOTE: not to automatically generate the interpreting shell; + # instead, let users specify it in the container config yaml file + # using `interpreting_shell` # Cluster resources requesting: cmd_bashhead_resources = generate_bashhead_resources(system, self.config) @@ -2745,7 +2758,9 @@ def generate_bash_test_job(self, folder_check_setup, # Write into the bash file: bash_file = open(fn_call_test_job, "a") # open in append mode - bash_file.write("#!/bin/bash\n") + # NOTE: not to automatically generate the interpreting shell; + # instead, let users specify it in the container config yaml file + # using `interpreting_shell` # Cluster resources requesting: cmd_bashhead_resources = generate_bashhead_resources(system, self.config) diff --git a/babs/cli.py b/babs/cli.py index 0f3e9f0b..faa5947d 100644 --- a/babs/cli.py +++ b/babs/cli.py @@ -493,8 +493,10 @@ def babs_status_cli(): action='store_true', # ^^ if `--job-account` is specified, args.job_account = True; otherwise, False help="Whether to account failed jobs, which may take some time." - " If `--resubmit failed` or `--resubmit-job` for this failed job is also requested," - " this `--job-account` will be skipped.") + " When using ``--job-account``, please also add ``--container_config_yaml_file``." + " If ``--resubmit failed`` or ``--resubmit-job`` (for some failed jobs)" + " is also requested," + " this ``--job-account`` will be skipped.") return parser diff --git a/babs/dict_cluster_systems.yaml b/babs/dict_cluster_systems.yaml index 29392207..3027a717 100644 --- a/babs/dict_cluster_systems.yaml +++ b/babs/dict_cluster_systems.yaml @@ -3,18 +3,22 @@ # format: : "" # placeholder "$VALUE" will be replaced by the real value provided by the user. +# For 'interpreting_shell': nothing else will be added by BABS +# For other keys: cluster-type-specific prefix will be added +# e.g., '#$ ' for SGE clusters +# e.g., '#SBATCH ' for Slurm clusters sge: - interpreting_shell: "-S $VALUE" # "-S /bin/bash" on cubic + interpreting_shell: "#!$VALUE" # "#!/bin/bash" on cubic hard_memory_limit: "-l h_vmem=$VALUE" # "-l h_vmem=25G" on cubic soft_memory_limit: "-l s_vmem=$VALUE" # "-l s_vmem=23.5G" on cubic temporary_disk_space: "-l tmpfree=$VALUE" # "-l tmpfree=200G" on cubic number_of_cpus: "-pe threaded $VALUE" # "-pe threaded N" or a range: "-pe threaded N-M", N___.yaml` + * ``: BIDS App name and version + * ``: For what application of BIDS App? Full run? Sloppy mode? + * ``: `sge` or `slurm` + * ``: name of example cluster where the YAML file was tested diff --git a/notebooks/docs_developer.md b/notebooks/docs_developer.md index be466229..8c97bf90 100644 --- a/notebooks/docs_developer.md +++ b/notebooks/docs_developer.md @@ -74,12 +74,29 @@ when `print(df)` by python: # Testing ## Create pending, failed, or stalled jobs Change/Add these in `participant_job.sh`: -- failed: add `-l h_rt=0:0:20` (hard runtime limit is 20 sec) +- failed: see next section - pending: increase `-l h_vmem` and `-l s_vmem`; increase `-pe threaded N` - stalled (`eqw`): see Bergman email 12/20/22 After these changes, `datalad save -m "message"` and `datalad push --to input` +## Create failed cases for testing `babs-status` failed job auditing +* Add `sleep 3600` to `container_zip.sh`; make sure you `datalad save` the changes +* Change hard runtime limit to 20min (on SGE: `-l h_rt=0:20:00`) +* Create failed cases: + * when the job is pending, manually kill it + * For Slurm cluster: you'll see normal msg from `State` column of `sacct` msg when `--job-account` + * For SGE cluster: you'll see warning that `qacct` failed for this job - this is normal. See PR #98 for more details. + * when the job is running, manually kill it + * wait until the job is running out of time, killed by the cluster + * if you don't want to wait for that long, just set the hard runtime limit to very low value, e.g., 20 sec +* Perform job auditing using `--container-config-yaml-file`: + * add some msg into the `alert_log_messages`, which can be seen in the "failed" jobs - for testing purpose + * although they can be normal msg seen in successful jobs +* Perform job auditing using `--job-account` (and `--container-config-yaml-file`): + * delete the `alert_log_messages` from the yaml file; + * Now, you should see job account for these failed jobs + # Terminology - ".o": standard output stream of the job diff --git a/notebooks/example_container_fmriprep_anatonly.yaml b/notebooks/eg_fmriprep-20-2-3_anatonly_sge_cubic.yaml similarity index 100% rename from notebooks/example_container_fmriprep_anatonly.yaml rename to notebooks/eg_fmriprep-20-2-3_anatonly_sge_cubic.yaml diff --git a/notebooks/example_container_fmriprep.yaml b/notebooks/eg_fmriprep-20-2-3_full_sge_cubic.yaml similarity index 100% rename from notebooks/example_container_fmriprep.yaml rename to notebooks/eg_fmriprep-20-2-3_full_sge_cubic.yaml diff --git a/notebooks/example_container_fmriprep_ingressed_fs.yaml b/notebooks/eg_fmriprep-20-2-3_ingressed-fs_sge_cubic.yaml similarity index 100% rename from notebooks/example_container_fmriprep_ingressed_fs.yaml rename to notebooks/eg_fmriprep-20-2-3_ingressed-fs_sge_cubic.yaml diff --git a/notebooks/example_container_fmriprep_sloppyFlag.yaml b/notebooks/eg_fmriprep-20-2-3_sloppy_sge_cubic.yaml similarity index 60% rename from notebooks/example_container_fmriprep_sloppyFlag.yaml rename to notebooks/eg_fmriprep-20-2-3_sloppy_sge_cubic.yaml index 4ea199ea..81c5919f 100644 --- a/notebooks/example_container_fmriprep_sloppyFlag.yaml +++ b/notebooks/eg_fmriprep-20-2-3_sloppy_sge_cubic.yaml @@ -1,27 +1,33 @@ -# This is an example config yaml file for fMRIPrep version 20.2.3, -# but in `--sloppy` mode, i.e., test mode, and there is no FreeSurfer recon either. -# This should NOT be used to produce real outputs you need. +# This is an example config yaml file for: +# BIDS App: fMRIPrep ("fmriprep") +# BIDS App version: 20.2.3 +# Task: `--sloppy` mode + without FreeSurfer reconstruction +# Which system: SGE +# Tested on which cluster: Penn Med CUBIC cluster +# fMRIPrep's Docker image is publicly available at: https://hub.docker.com/r/nipreps/fmriprep/ -# Warning!!! +# WARNING!!! # This is only an example, which may not necessarily fit your purpose, # or be an optimized solution for your case, -# or be compatible to the fMRIPrep version you're using. +# or be compatible to the BIDS App version you're using. # Therefore, please change and tailor it for your case before use it!!! +# WARNING!!! +# We'll use `--sloppy` testing mode of fMRIPrep. +# Therefore this YAML file should only be used for testing purpose. +# You should NOT use this YAML file to generate formal results! # Arguments when executing the BIDS App using `singularity run`: -# You should not have flags of : `--participant-label`, or `--bids-filter-file`! singularity_run: -w: "$BABS_TMPDIR" # this is a placeholder. To be changed to `${PWD}/.git/tmp/wkdir` --n_cpus: '1' --stop-on-first-crash: "" - --fs-license-file: "/cbica/projects/BABS/software/FreeSurfer/license.txt" # path to FS license file + --fs-license-file: "/cbica/projects/BABS/software/FreeSurfer/license.txt" # [FIX ME] path to FS license file --skip-bids-validation: Null # Null or NULL is also a placeholder - --output-spaces: "MNI152NLin6Asym:res-2 MNI152NLin2009cAsym" # e.g., two output spaces + --output-spaces: "MNI152NLin6Asym:res-2" # for two output spaces: e.g., "MNI152NLin6Asym:res-2 MNI152NLin2009cAsym" --force-bbr: "" - #--cifti-output: 91k # ADD THIS WHEN FULL RUN! -v: '-v' # this is for double "-v" - --sloppy: '' # TEST RUN! - --fs-no-reconall: '' # TEST RUN! + --sloppy: '' # WARNING: use this only when testing + --fs-no-reconall: '' # WARNING: use this only when testing # Output foldername(s) to be zipped, and the BIDS App version to be included in the zip filename(s): # per `--fs-no-reconall`, there won't be an output folder called `freesurfer` @@ -29,9 +35,9 @@ zip_foldernames: fmriprep: "20-2-3" # folder 'fmriprep' will be zipped into 'sub-xx_ses-yy_fmriprep-20-2-3.zip' cluster_resources: - interpreting_shell: /bin/bash # "-S /bin/bash" on cubic - hard_memory_limit: 25G # "-l h_vmem=25G" on cubic - temporary_disk_space: 200G # "-l tmpfree=200G" on cubic # this is highly-recommended on cubic + interpreting_shell: /bin/bash # --> "#!/bin/bash" + hard_memory_limit: 25G # --> "#$ -l h_vmem=25G" + temporary_disk_space: 200G # --> "#$ -l tmpfree=200G" # this is highly-recommended on CUBIC cluster customized_text: | #$ -R y #$ -l hostname=!compute-fed* @@ -42,8 +48,8 @@ cluster_resources: # they will be used as preambles in `participant_job.sh` # the commands should not be quoted! script_preamble: | - source ${CONDA_PREFIX}/bin/activate mydatalad # Penn Med CUBIC cluster; replace 'mydatalad' with your conda env name - echo "I am running BABS." # this is an example command to show how to add another line; not necessary to include. + source ${CONDA_PREFIX}/bin/activate mydatalad # [FIX ME] Penn Med CUBIC cluster; replace 'mydatalad' with your conda env name + echo "I am running BABS." # [FIX ME] this is an example command to show how to add another line; not necessary to include. # ^^ conda env above: where the scripts generated by BABS will run # not necessary the same one for running `babs-init` @@ -51,7 +57,7 @@ script_preamble: | # for MSI, might need to add command e.g., "module_load" # Where to run the jobs: -job_compute_space: "${CBICA_TMPDIR}" # Penn Med CUBIC cluster tmp space +job_compute_space: "${CBICA_TMPDIR}" # [FIX ME] Penn Med CUBIC cluster tmp space # Below is to filter out subjects (or sessions) # right now we only filter based on unzipped dataset diff --git a/notebooks/eg_fmriprep-20-2-3_sloppy_slurm_msi.yaml b/notebooks/eg_fmriprep-20-2-3_sloppy_slurm_msi.yaml new file mode 100644 index 00000000..0d877816 --- /dev/null +++ b/notebooks/eg_fmriprep-20-2-3_sloppy_slurm_msi.yaml @@ -0,0 +1,82 @@ +# This is an example config yaml file for: +# BIDS App: fMRIPrep ("fmriprep") +# BIDS App version: 20.2.3 +# Task: `--sloppy` mode + without FreeSurfer reconstruction +# Which system: Slurm +# Tested on which cluster: MSI cluster +# fMRIPrep's Docker image is publicly available at: https://hub.docker.com/r/nipreps/fmriprep/ + +# WARNING!!! +# This is only an example, which may not necessarily fit your purpose, +# or be an optimized solution for your case, +# or be compatible to the BIDS App version you're using. +# Therefore, please change and tailor it for your case before use it!!! +# WARNING!!! +# We'll use `--sloppy` testing mode of fMRIPrep. +# Therefore this YAML file should only be used for testing purpose. +# You should NOT use this YAML file to generate formal results! + +# Arguments when executing the BIDS App using `singularity run`: +singularity_run: + -w: "$BABS_TMPDIR" # this is a placeholder. BABS will replace it with `${PWD}/.git/tmp/wkdir` + --n_cpus: '1' + --stop-on-first-crash: "" + --fs-license-file: "/home/faird/zhaoc/software/FreeSurfer/license.txt" # [FIX ME] path to FS license file + --skip-bids-validation: Null # Null or NULL is also a placeholder + --output-spaces: "MNI152NLin6Asym:res-2" # for two output spaces: e.g., "MNI152NLin6Asym:res-2 MNI152NLin2009cAsym" + --force-bbr: "" + -v: '-v' # this is for double "-v" + --sloppy: '' # WARNING: use this only when testing + --fs-no-reconall: '' # WARNING: use this only when testing + +# Output foldername(s) to be zipped, and the BIDS App version to be included in the zip filename(s): +# per `--fs-no-reconall`, there won't be an output folder called `freesurfer` +zip_foldernames: + fmriprep: "20-2-3" # folder 'fmriprep' will be zipped into 'sub-xx_ses-yy_fmriprep-20-2-3.zip' + +cluster_resources: + interpreting_shell: "/bin/bash -l" # --> "#!/bin/bash -l" + # number_of_cpus: "1" # --> "#SBATCH --cpus-per-task=1" + hard_memory_limit: 25G # --> "#SBATCH --mem=25G" + temporary_disk_space: 200G # --> "#SBATCH --tmp=200G" + hard_runtime_limit: "72:00:00" # --> "--time=72:00:00", i.e., 3 days. Should NOT large than partition's time limit! + customized_text: | + #SBATCH -p k40 +# Other choices of job partitions on MSI: amd2tb,ram256g,v100,k40 +# Notes: Above `customized_text` is MSI Slurm cluster specific. +# So it may not be relevant for other clusters + +# Users need to add their customized bash command below, +# they will be used as preambles in `participant_job.sh` +# the commands should not be quoted! +script_preamble: | + source /home/faird/shared/code/external/envs/miniconda3/load_miniconda3.sh # [FIX ME] MSI cluster faird group. Replace filepath with yours. + conda activate babs # [FIX ME] replace 'babs' with your env variable name + +# ^^ conda env above: where the scripts generated by BABS will run +# not necessary the same one for running `babs-init` +# ^^ based on what you need on your cluster; some people even don't use `conda`... +# for MSI, might need to add command e.g., "module_load" + +# Where to run the jobs: +job_compute_space: "/tmp" # [FIX ME] MSI cluster + +# Below is to filter out subjects (or sessions) +# right now we only filter based on unzipped dataset +required_files: + $INPUT_DATASET_#1: + - "func/*_bold.nii*" + - "anat/*_T1w.nii*" + +# Alert messages that might be found in log files of failed jobs: +# These messages may be helpful for debugging errors in failed jobs. +alert_log_messages: + stdout: + - "Exception: No T1w images found for" # probably not needed, after setting `required_files` + - "Excessive topologic defect encountered" + - "Cannot allocate memory" + - "mris_curvature_stats: Could not open file" + - "Numerical result out of range" + - "fMRIPrep failed" + # stderr: + # - "xxxxx" diff --git a/notebooks/example_container_fmriprepfake.yaml b/notebooks/eg_fmriprepfake-0-1-1_full_sge_cubic.yaml similarity index 100% rename from notebooks/example_container_fmriprepfake.yaml rename to notebooks/eg_fmriprepfake-0-1-1_full_sge_cubic.yaml diff --git a/notebooks/eg_qsiprep-0-16-0RC3_sloppy_sge_cubic.yaml b/notebooks/eg_qsiprep-0-16-0RC3_sloppy_sge_cubic.yaml new file mode 100644 index 00000000..43e9ec53 --- /dev/null +++ b/notebooks/eg_qsiprep-0-16-0RC3_sloppy_sge_cubic.yaml @@ -0,0 +1,60 @@ +# This is an example config yaml file for: +# BIDS App: QSIPrep ("qsiprep") +# BIDS App version: 0.16.0RC3 +# Task: `--sloppy` mode +# Which system: SGE +# Tested on which cluster: Penn Med CUBIC cluster +# QSIPrep's Docker image is publicly available at: https://hub.docker.com/r/pennbbl/qsiprep + +# WARNING!!! +# This is only an example, which may not necessarily fit your purpose, +# or be an optimized solution for your case, +# or be compatible to the BIDS App version you're using. +# Therefore, please change and tailor it for your case before use it!!! +# WARNING!!! +# We'll use `--sloppy` testing mode of QSIPrep. +# Therefore this YAML file should only be used for testing purpose. +# You should NOT use this YAML file to generate formal results! + +singularity_run: + -v: "-v" + -w: "$BABS_TMPDIR" # this is a placeholder. To be changed to `${PWD}/.git/tmp/wkdir` + --n_cpus: "$NSLOTS" # `$NSLOTS` can be recognized by SGE + --omp-nthreads: "3" # without this, only run single-threaded jobs (N=`--n_cpus`) at once + --stop-on-first-crash: "" + --fs-license-file: "/cbica/projects/BABS/software/FreeSurfer/license.txt" # [FIX ME] path to FS license file + --skip-bids-validation: Null # Null or NULL is also a placeholder + --unringing-method: "mrdegibbs" + --output-resolution: "2.0" + --sloppy: "" # WARNING: only use this when testing + --hmc-model: "none" # WARNING: only use this when testing + --dwi-only: "" # WARNING: only use this when testing + +# Output foldername(s) to be zipped, and the BIDS App version to be included in the zip filename(s): +zip_foldernames: + qsiprep: "0-16-0RC3" # folder 'qsiprep' will be zipped into 'sub-xx_ses-yy_qsiprep-0-16-0RC3.zip' + +cluster_resources: + interpreting_shell: /bin/bash # --> "#!/bin/bash" + hard_memory_limit: 32G # --> `#$ -l h_vmem=32G` + temporary_disk_space: 200G # --> `#$ -l tmpfree=200G` + number_of_cpus: "6" # --> `#$ -pe threaded 6` + customized_text: | + #$ -R y + #$ -l hostname=!compute-fed* +# Notes: Above `customized_text` is Penn Med CUBIC cluster specific. +# So it's probably not relevant for other clusters + +# Users need to add their customized bash command below, +# they will be used as preambles in `participant_job.sh` +# the commands should not be quoted! +script_preamble: | + source ${CONDA_PREFIX}/bin/activate mydatalad # [FIX ME] Penn Med CUBIC cluster; replace 'mydatalad' with your conda env name + echo "I am running BABS." # [FIX ME] this is an example command to show how to add another line; not necessary to include. + +# Where to run the jobs: +job_compute_space: "${CBICA_TMPDIR}" # [FIX ME] Penn Med CUBIC cluster tmp space + +required_files: + $INPUT_DATASET_#1: + - "dwi/*_dwi.nii*" # QSIPrep diff --git a/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml b/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml new file mode 100644 index 00000000..e292b7bf --- /dev/null +++ b/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml @@ -0,0 +1,63 @@ +# This is an example config yaml file for: +# BIDS App: QSIPrep ("qsiprep") +# BIDS App version: 0.16.0RC3 +# Task: `--sloppy` mode +# Which system: Slurm +# Tested on which cluster: MSI cluster +# QSIPrep's Docker image is publicly available at: https://hub.docker.com/r/pennbbl/qsiprep + +# WARNING!!! +# This is only an example, which may not necessarily fit your purpose, +# or be an optimized solution for your case, +# or be compatible to the BIDS App version you're using. +# Therefore, please change and tailor it for your case before use it!!! +# WARNING!!! +# We'll use `--sloppy` testing mode of QSIPrep. +# Therefore this YAML file should only be used for testing purpose. +# You should NOT use this YAML file to generate formal results! + +singularity_run: + -v: "-v" + -w: "$BABS_TMPDIR" # this is a placeholder. BABS will replace it with `${PWD}/.git/tmp/wkdir` + --n_cpus: "$SLURM_CPUS_PER_TASK" # Slurm env variable, taking value from `--cpus-per-task`, i.e., "number_of_cpus" in section "cluster_resources" + --omp-nthreads: "3" # without this, only run single-threaded jobs (N=`--n_cpus`) at once + --stop-on-first-crash: "" + --fs-license-file: "/home/faird/zhaoc/software/FreeSurfer/license.txt" # [FIX ME] path to FS license file + --skip-bids-validation: Null # Null or NULL is also a placeholder + --unringing-method: "mrdegibbs" + --output-resolution: "2.0" + --sloppy: "" # WARNING: only use this when testing + --hmc-model: "none" # WARNING: only use this when testing + --dwi-only: "" # WARNING: only use this when testing + +# Output foldername(s) to be zipped, and the BIDS App version to be included in the zip filename(s): +zip_foldernames: + qsiprep: "0-16-0RC3" # folder 'qsiprep' will be zipped into 'sub-xx_ses-yy_qsiprep-0-16-0RC3.zip' + +cluster_resources: + interpreting_shell: "/bin/bash -l" # --> "#!/bin/bash -l" + hard_memory_limit: 32G # --> "#SBATCH --mem=32G" + temporary_disk_space: 200G # --> "#SBATCH --tmp=200G" + number_of_cpus: "6" # --> "#SBATCH --cpus-per-task=6" + hard_runtime_limit: "48:00:00" # --> "--time=48:00:00", i.e., 2 days. Should NOT large than partition's time limit! + customized_text: | + #SBATCH -p ram256g +# Other choices of job partitions on MSI: amd2tb,ram256g,v100,k40 +# Notes: Above `customized_text` is MSI Slurm cluster specific. +# So it may not be relevant for other clusters + +# Users need to add their customized bash command below, +# they will be used as preambles in `participant_job.sh` +# the commands should not be quoted! +script_preamble: | + source /home/faird/shared/code/external/envs/miniconda3/load_miniconda3.sh # [FIX ME] MSI cluster faird group. Replace filepath with yours. + conda activate babs # [FIX ME] replace 'babs' with your env variable name + +# Where to run the jobs: +job_compute_space: "/tmp" # [FIX ME] MSI cluster + +# Below is to filter out subjects (or sessions) +# right now we only filter based on unzipped dataset +required_files: + $INPUT_DATASET_#1: + - "dwi/*_dwi.nii*" # QSIPrep diff --git a/notebooks/example_container_toybidsapp.yaml b/notebooks/eg_toybidsapp-0-0-7_rawBIDS_sge_cubic.yaml similarity index 100% rename from notebooks/example_container_toybidsapp.yaml rename to notebooks/eg_toybidsapp-0-0-7_rawBIDS_sge_cubic.yaml diff --git a/notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml b/notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml new file mode 100644 index 00000000..fb4df255 --- /dev/null +++ b/notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml @@ -0,0 +1,59 @@ +# This is an example config yaml file for: +# BIDS App: toy BIDS App ("toy_bids_app") +# BIDS App version: 0.0.7 +# Task: for raw BIDS dataset ("unzipped") +# Which system: Slurm +# Tested on which cluster: MSI cluster +# Toy BIDS App's Docker image is publicly available at: https://hub.docker.com/r/pennlinc/toy_bids_app +# This BIDS App counts number of non-hidden files in a subject's (or a session's) folder; +# More details please see: https://github.com/PennLINC/babs_tests/blob/main/docker/README.md#toy-bids-app-toy_bids_app +# How to prepare a container DataLad dataset of this toy BIDS App? See docs here: https://pennlinc-babs.readthedocs.io/en/latest/preparation_container.html + +# Warning!!! +# This is only an example, which may not necessarily fit your purpose, +# or be an optimized solution for your case, +# or be compatible to the BIDS App version you're using. +# Therefore, please change and tailor it for your case before use it!!! + +singularity_run: + --no-zipped: "" # for raw BIDS dataset + --dummy: "2" # this is a dummy variable, accepting values + -v: "" # this is also a dummy variable, not accepting values + +# Output foldername(s) to be zipped, and the BIDS App version to be included in the zip filename(s): +zip_foldernames: + toybidsapp: "0-0-7" # folder 'toybidsapp' will be zipped into 'sub-xx_ses-yy_toybidsapp-0-0-7.zip' + +cluster_resources: + interpreting_shell: "/bin/bash -l" # --> "#!/bin/bash -l" + number_of_cpus: "1" # --> "#SBATCH --cpus-per-task=1" + temporary_disk_space: 20G # --> "#SBATCH --tmp=20G" + hard_memory_limit: 2G # --> "#SBATCH --mem=2G" + hard_runtime_limit: "20" # --> "--time=20", i.e., 20min. Should NOT large than partition's time limit! + customized_text: | + #SBATCH -p ram256g +# Notes: Above `customized_text` is MSI Slurm cluster specific. +# So it may not be relevant for other clusters +# Other things to test: +# #SBATCH --ntasks=1 +# #SBATCH -p amd2tb,ram256g,v100,k40 + +# Users need to add their customized bash command below, +# they will be used as preambles in `participant_job.sh` +# the commands should not be quoted! +script_preamble: | + source /home/faird/shared/code/external/envs/miniconda3/load_miniconda3.sh # [FIX ME] MSI cluster faird group. Replace filepath with yours. + conda activate babs # [FIX ME] replace 'babs' with your env variable name + +# Where to run the jobs: +job_compute_space: "/tmp" # [FIX ME] MSI cluster + +# 'required_files' section is not needed for toy BIDS App. + +# Alert messages that might be found in log files of failed jobs: +# These messages may be helpful for debugging errors in failed jobs. +alert_log_messages: + stdout: + - "xxxx" # this is a dummy alert message. + stderr: + - "xxx" # this is a dummy alert message. diff --git a/notebooks/example_container_zipped_toybidsapp.yaml b/notebooks/eg_toybidsapp-0-0-7_zipped_sge_cubic.yaml similarity index 100% rename from notebooks/example_container_zipped_toybidsapp.yaml rename to notebooks/eg_toybidsapp-0-0-7_zipped_sge_cubic.yaml diff --git a/notebooks/example_container_xcpd.yaml b/notebooks/eg_xcpd-0-3-0_full_sge_cubic.yaml similarity index 100% rename from notebooks/example_container_xcpd.yaml rename to notebooks/eg_xcpd-0-3-0_full_sge_cubic.yaml diff --git a/notebooks/example_container_qsiprep.yaml b/notebooks/example_container_qsiprep.yaml deleted file mode 100644 index c93183d9..00000000 --- a/notebooks/example_container_qsiprep.yaml +++ /dev/null @@ -1,56 +0,0 @@ -# Below is example config yaml file for QSIPrep: - -cli_call: | - -v -v \ - -w ${PWD}/.git/tmp/wkdir \ - --n_cpus $NSLOTS \ - --stop-on-first-crash \ - --fs-license-file code/license.txt \ - --skip-bids-validation \ - --unringing-method mrdegibbs \ - --output-resolution 2.0 - -# You should not have flags of : `--participant-label`, or `--bids-filter-file`! - -singularity_run: - -v: "-v" - -w: "$BABS_TMPDIR" # this is a placeholder. To be changed to `${PWD}/.git/tmp/wkdir` - --n_cpus: "$NSLOTS" # `$NSLOTS` can be recognized by SGE; if you're using Slurm clusters please change to Slurm version! - --omp-nthreads: "3" # without this, only run single-threaded jobs (N=`--n_cpus`) at once - --stop-on-first-crash: "" - --fs-license-file: "/cbica/projects/BABS/software/FreeSurfer/license.txt" # path to FS license file - --skip-bids-validation: Null # Null or NULL is also a placeholder - --unringing-method: "mrdegibbs" - --output-resolution: "2.0" - --sloppy: "" # ADD THIS WHEN TESTING - --hmc-model: "none" # ADD THIS WHEN TESTING - --dwi-only: "" # ADD THIS WHEN TESTING - -# Output foldername(s) to be zipped, and the BIDS App version to be included in the zip filename(s): -zip_foldernames: - qsiprep: "0-16-0RC3" # folder 'qsiprep' will be zipped into 'sub-xx_ses-yy_qsiprep-0-16-0RC3.zip' - -cluster_resources: - interpreting_shell: /bin/bash # `#$ -S /bin/bash` on cubic - hard_memory_limit: 32G # `#$ -l h_vmem=32G` on cubic - temporary_disk_space: 200G # `#$ -l tmpfree=200G` on cubic - number_of_cpus: "6" # `#$ -pe threaded 6` on cubic - customized_text: | - #$ -R y - #$ -l hostname=!compute-fed* -# Notes: Above `customized_text` is Penn Med CUBIC cluster specific. -# So it's probably not relevant for other clusters - -# Users need to add their customized bash command below, -# they will be used as preambles in `participant_job.sh` -# the commands should not be quoted! -script_preamble: | - source ${CONDA_PREFIX}/bin/activate mydatalad # Penn Med CUBIC cluster; replace 'mydatalad' with your conda env name - echo "I am running BABS." # this is an example command to show how to add another line; not necessary to include. - -# Where to run the jobs: -job_compute_space: "${CBICA_TMPDIR}" # Penn Med CUBIC cluster tmp space - -required_files: - $INPUT_DATASET_#1: - - "dwi/*_dwi.nii*" # QSIPrep diff --git a/notebooks/show_babs_init_InputBIDS.ipynb b/notebooks/show_babs_init_InputBIDS.ipynb index 337a68d1..aa6d1b9b 100644 --- a/notebooks/show_babs_init_InputBIDS.ipynb +++ b/notebooks/show_babs_init_InputBIDS.ipynb @@ -42,18 +42,34 @@ "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/cbica/projects/BABS/babs/notebooks/eg_qsiprep-0-16-0RC3_sloppy_sge_cubic.yaml\n" + ] + } + ], "source": [ - "# This notebook only accepts toybidsapp, qsiprep or fmriprep\n", + "# This notebook only accepts toybidsapp_rawBIDS, qsiprep or fmriprep\n", "# or fmriprep_anatonly or 'fmriprep_sloppyFlag' or fmriprepfake (input ds: BIDS)\n", "# ++++++++++++++++++++++++++++++++++\n", - "flag_instance = \"toybidsapp\"\n", + "bidsapp = \"qsiprep\"\n", + "task_name = \"sloppy\" # for fmriprep: 'anatonly', 'sloppy'; for toybidsapp: 'rawBIDS'\n", "type_session = \"multi-ses\"\n", - "which_dataset = \"toy_fake\" # \"toy_fake\", \"toy_real\", \"HBN\"\n", + "which_dataset = \"toy_real\" # \"toy_fake\", \"toy_real\", \"HBN\", \"PNC\"\n", "\n", - "flag_where = \"cubic\" # \"cubic\" or \"local\"\n", + "flag_where = \"cubic\" # \"cubic\" or \"local\" or \"msi\"\n", + "type_system = \"sge\" # \"sge\" or \"slurm\"\n", "# ++++++++++++++++++++++++++++++++++\n", "\n", + "# sanity checks:\n", + "if flag_where == \"cubic\":\n", + " assert type_system == \"sge\"\n", + "elif flag_where == \"msi\":\n", + " assert type_system == \"slurm\"\n", + "\n", "# where:\n", "if flag_where == \"cubic\":\n", " where_root = \"/cbica/projects/BABS\"\n", @@ -63,15 +79,21 @@ " where_root = \"/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper\"\n", " where_project = op.join(where_root, \"data\")\n", " where_notebooks = op.join(where_root, \"babs/notebooks\")\n", + "elif flag_where == \"msi\":\n", + " where_root = \"/home/faird/zhaoc\"\n", + " where_project = \"/home/faird/zhaoc/data\"\n", + " where_notebooks = op.join(where_root, \"babs/notebooks\")\n", "else:\n", " raise Exception(\"not valid `flag_where`!\")\n", "\n", "# Input dataset:\n", "if which_dataset == \"toy_fake\":\n", " if type_session == \"multi-ses\":\n", - " input_ds = op.join(where_project, \"w2nu3\")\n", + " input_ds = \"https://osf.io/w2nu3/\"\n", + " # input_ds = op.join(where_project, \"w2nu3\")\n", " elif type_session == \"single-ses\":\n", - " input_ds = op.join(where_project, \"t8urc\")\n", + " input_ds = \t\"https://osf.io/t8urc/\"\n", + " # input_ds = op.join(where_project, \"t8urc\")\n", "elif which_dataset == \"toy_real\": # real data:\n", " if type_session == \"multi-ses\":\n", " # input_ds = \"/cbica/projects/RBC/chenying_practice/data_for_babs/NKI/data_hashedID_bids\"\n", @@ -79,34 +101,34 @@ " elif type_session == \"single-ses\":\n", " raise Exception(\"not supported yet!\")\n", "elif which_dataset == \"HBN\": # HBN data:\n", - " if type_session == \"single-ses\":\n", + " assert type_session == \"single-ses\"\n", + " if flag_where == \"cubic\":\n", " input_ds = \"/cbica/projects/BABS/data/rawdata_HBN\" # datalad sibling in BABS cubic project\n", - " elif type_session == \"multi-ses\":\n", - " raise Exception(\"HBN is a single-ses data!\")\n", + " elif flag_where == \"msi\":\n", + " input_ds = \"/home/faird/zhaoc/data/HBN_BIDS\"\n", + "elif which_dataset == \"PNC\": # PNC data:\n", + " assert type_session == \"single-ses\"\n", + " input_ds = \"/home/faird/zhaoc/data/PNC_BIDS\" # cloned from RBC github account\n", "\n", "\n", "project_name = \"test_babs_\" + type_session\n", "# Based on which dataset:\n", - "if which_dataset == \"HBN\": # HBN data:\n", - " project_name += \"_HBN\"\n", + "if which_dataset in [\"HBN\", \"PNC\"]: # specific real dataset\n", + " project_name += \"_\" + which_dataset\n", "# Based on which BIDS App:\n", - "if flag_instance == \"toybidsapp\":\n", + "if bidsapp == \"toybidsapp\":\n", " input_cli = [[\"BIDS\", input_ds]]\n", - " bidsapp = \"toybidsapp\"\n", " container_name = bidsapp + \"-0-0-7\"\n", - "elif flag_instance in [\"fmriprep\", \"fmriprep_anatonly\", \"fmriprep_sloppyFlag\"]:\n", - " bidsapp = \"fmriprep\"\n", + "elif bidsapp in [\"fmriprep\", \"fmriprep_anatonly\", \"fmriprep_sloppy\"]:\n", " container_name = bidsapp + \"-20-2-3\"\n", - "elif flag_instance == \"qsiprep\":\n", - " bidsapp = \"qsiprep\"\n", + "elif bidsapp == \"qsiprep\":\n", " container_name = bidsapp + \"-0-16-0RC3\"\n", - "elif flag_instance == \"fmriprepfake\":\n", - " bidsapp = \"fmriprepfake\"\n", + "elif bidsapp == \"fmriprepfake\":\n", " container_name = bidsapp + \"-0-1-1\"\n", "else:\n", - " raise Exception(\"`flag_instance` is not QSIPrep or fMRIPrep!\")\n", + " raise Exception(\"Invalid `flag_instance`!\")\n", "\n", - "project_name += \"_\" + flag_instance\n", + "project_name += \"_\" + bidsapp + \"_\" + task_name\n", "\n", "\n", "# Container:\n", @@ -114,7 +136,20 @@ "if flag_where == \"local\":\n", " # container_ds += \"-docker\" # add \"docker\" at the end\n", " container_ds = op.join(where_project, \"toybidsapp-container-docker\")\n", - "container_config_yaml_file = op.join(where_notebooks, \"example_container_\" + flag_instance + \".yaml\")\n", + "\n", + "container_config_yaml_file = op.join(where_notebooks, \"eg_\" + container_name\n", + " + \"_\" + task_name + \"_\" + type_system)\n", + "if flag_where in [\"cubic\", \"msi\"]:\n", + " container_config_yaml_file += \"_\" + flag_where\n", + "else:\n", + " if type_system == \"sge\":\n", + " container_config_yaml_file += \"_\" + \"cubic\"\n", + " elif type_system == \"slurm\":\n", + " container_config_yaml_file += \"_\" + \"msi\"\n", + "container_config_yaml_file += \".yaml\"\n", + "print(container_config_yaml_file)\n", + "assert op.exists(container_config_yaml_file)\n", + "# container_config_yaml_file = op.join(where_notebooks, \"example_container_\" + flag_instance + \".yaml\")\n", "\n", "# list_sub_file = op.join(where_notebooks, \"initial_sub_list_\" + type_session + \".csv\")\n", "list_sub_file = None" @@ -139,11 +174,11 @@ "The command to execute:\n", "babs-init \\\n", "\t--where_project /cbica/projects/BABS/data \\\n", - "\t--project_name test_babs_multi-ses_toybidsapp \\\n", - "\t--input BIDS /cbica/projects/BABS/data/w2nu3 \\\n", - "\t--container_ds /cbica/projects/BABS/data/toybidsapp-container \\\n", - "\t--container_name toybidsapp-0-0-7 \\\n", - "\t--container_config_yaml_file /cbica/projects/BABS/babs/notebooks/example_container_toybidsapp.yaml \\\n", + "\t--project_name test_babs_multi-ses_qsiprep_sloppy \\\n", + "\t--input BIDS /cbica/projects/BABS/data/testdata_NKI/data_hashedID_bids \\\n", + "\t--container_ds /cbica/projects/BABS/data/qsiprep-container \\\n", + "\t--container_name qsiprep-0-16-0RC3 \\\n", + "\t--container_config_yaml_file /cbica/projects/BABS/babs/notebooks/eg_qsiprep-0-16-0RC3_sloppy_sge_cubic.yaml \\\n", "\t--type_session multi-ses \\\n", "\t--type_system sge\n", "WARNING: make sure you've changed `--fs-license-file` value in YAML file if you use it!!!\n" @@ -161,7 +196,7 @@ "cmd += \"\\t\" + \"--container_name \" + container_name + \" \\\\\\n\"\n", "cmd += \"\\t\" + \"--container_config_yaml_file \" + container_config_yaml_file + \" \\\\\\n\"\n", "cmd += \"\\t\" + \"--type_session \" + type_session + \" \\\\\\n\"\n", - "cmd += \"\\t\" + \"--type_system \" + \"sge\"\n", + "cmd += \"\\t\" + \"--type_system \" + type_system\n", "\n", "print(\"The command to execute:\")\n", "print(cmd)\n", diff --git a/tests/get_data.py b/tests/get_data.py index 42e02848..e0551404 100644 --- a/tests/get_data.py +++ b/tests/get_data.py @@ -194,6 +194,59 @@ def container_ds_path(where_now, tmp_path_factory): return origin_container_ds +def get_container_config_yaml_filename(which_bidsapp, + which_input, if_two_input, + type_system): + """ + This is to get the container's config YAML file name, + depending on the BIDS App and if there are two inputs (for fMRIPrep) + + Parameters: + ------------- + which_bidsapp: str + name of the bidsapp + which_input: str + "BIDS" for raw BIDS + "fmriprep" for zipped BIDS derivates + if_two_input: bool + whether there are two input BIDS datasets + type_system: str + "sge" or "slurm" + + Returns: + ----------- + container_config_yaml_filename: str + the filename, without the path. + """ + dict_cluster_name = {'sge': 'cubic', + 'slurm': 'msi'} + dict_bidsapp_version = {"qsiprep": "0-16-0RC3", + "fmriprep": "20-2-3", + "toybidsapp": "0-0-7"} + dict_task_name = {"qsiprep": 'sloppy', + "fmriprep": "full", + "toybidsapp": "rawBIDS"} + + # bidsapp and its version: + container_config_yaml_filename = "eg_" + which_bidsapp + "-" \ + + dict_bidsapp_version[which_bidsapp] + + # task: + container_config_yaml_filename += "_" + if (which_bidsapp == "fmriprep") & if_two_input: + container_config_yaml_filename += "ingressed-fs" + elif (which_bidsapp == "toybidsapp") & (which_input == "fmriprep"): + # the input is zipped BIDS derivatives: + container_config_yaml_filename += "zipped" + else: + container_config_yaml_filename += dict_task_name[which_bidsapp] + + # cluster system type and example name: + container_config_yaml_filename += "_" + type_system + "_" \ + + dict_cluster_name[type_system] + ".yaml" + + return container_config_yaml_filename + def if_command_installed(cmd): """ diff --git a/tests/test_babs_check_setup.py b/tests/test_babs_check_setup.py index 5940aa9e..5799434a 100644 --- a/tests/test_babs_check_setup.py +++ b/tests/test_babs_check_setup.py @@ -16,6 +16,7 @@ container_ds_path, where_now, if_circleci, + get_container_config_yaml_filename, __location__, INFO_2ND_INPUT_DATA, LIST_WHICH_BIDSAPP, @@ -80,8 +81,12 @@ def test_babs_check_setup( project_root = op.join(where_project, project_name) container_name = which_bidsapp + "-" + TOYBIDSAPP_VERSION_DASH container_config_yaml_filename = "example_container_" + which_bidsapp + ".yaml" + container_config_yaml_filename = \ + get_container_config_yaml_filename(which_bidsapp, which_input, if_two_input=False, + type_system="sge") # TODO: also test slurm! container_config_yaml_file = op.join(op.dirname(__location__), "notebooks", container_config_yaml_filename) + assert op.exists(container_config_yaml_file) # below are all correct options: babs_init_opts = argparse.Namespace( diff --git a/tests/test_babs_init.py b/tests/test_babs_init.py index c691339d..b56643ab 100644 --- a/tests/test_babs_init.py +++ b/tests/test_babs_init.py @@ -16,6 +16,7 @@ container_ds_path, where_now, if_circleci, + get_container_config_yaml_filename, __location__, INFO_2ND_INPUT_DATA, LIST_WHICH_BIDSAPP, @@ -71,6 +72,8 @@ def test_babs_init(which_bidsapp, which_input, type_session, if_input_local, if_ Path to the container datalad dataset if_circleci: fixture; bool Whether currently in CircleCI + + TODO: add `type_system` and to test out Slurm version! """ # Sanity checks: assert which_bidsapp in LIST_WHICH_BIDSAPP @@ -94,12 +97,12 @@ def test_babs_init(which_bidsapp, which_input, type_session, if_input_local, if_ # Preparation of freesurfer: for fmriprep and qsiprep: # check if `--fs-license-file` is included in YAML file: - container_config_yaml_filename = "example_container_" + which_bidsapp + ".yaml" - if (which_bidsapp == "fmriprep") & if_two_input: - container_config_yaml_filename = \ - "example_container_" + which_bidsapp + "_ingressed_fs.yaml" + container_config_yaml_filename = \ + get_container_config_yaml_filename(which_bidsapp, which_input, if_two_input, + type_system="sge") # TODO: also test slurm! container_config_yaml_file = op.join(op.dirname(__location__), "notebooks", container_config_yaml_filename) + assert op.exists(container_config_yaml_file) container_config_yaml = read_yaml(container_config_yaml_file) if "--fs-license-file" in container_config_yaml["singularity_run"]: