From d6974c5f75e3f13d937d66721ac346d5cd9d1081 Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Mon, 22 May 2023 15:57:21 -0500 Subject: [PATCH 01/14] remove duplicated interpreting shell directives; add eg yaml fille for toy bids app on slurm --- babs/babs.py | 4 +- babs/dict_cluster_systems.yaml | 10 +++- babs/utils.py | 34 +++++++++-- ...IDS_system-slurm_cluster-MSI_egConfig.yaml | 58 +++++++++++++++++++ 4 files changed, 96 insertions(+), 10 deletions(-) create mode 100644 notebooks/bidsapp-toybidsapp-0-0-7_task-rawBIDS_system-slurm_cluster-MSI_egConfig.yaml diff --git a/babs/babs.py b/babs/babs.py index 1a25b894..b0017e35 100644 --- a/babs/babs.py +++ b/babs/babs.py @@ -2542,7 +2542,9 @@ def generate_bash_participant_job(self, bash_path, input_ds, type_session, # Write into the bash file: bash_file = open(bash_path, "a") # open in append mode - bash_file.write("#!/bin/bash\n") + # NOTE: not to automatically generate the interpreting shell; + # instead, let users specify it in the container config yaml file + # using `interpreting_shell` # Cluster resources requesting: cmd_bashhead_resources = generate_bashhead_resources(system, self.config) diff --git a/babs/dict_cluster_systems.yaml b/babs/dict_cluster_systems.yaml index 29392207..3027a717 100644 --- a/babs/dict_cluster_systems.yaml +++ b/babs/dict_cluster_systems.yaml @@ -3,18 +3,22 @@ # format: : "" # placeholder "$VALUE" will be replaced by the real value provided by the user. +# For 'interpreting_shell': nothing else will be added by BABS +# For other keys: cluster-type-specific prefix will be added +# e.g., '#$ ' for SGE clusters +# e.g., '#SBATCH ' for Slurm clusters sge: - interpreting_shell: "-S $VALUE" # "-S /bin/bash" on cubic + interpreting_shell: "#!$VALUE" # "#!/bin/bash" on cubic hard_memory_limit: "-l h_vmem=$VALUE" # "-l h_vmem=25G" on cubic soft_memory_limit: "-l s_vmem=$VALUE" # "-l s_vmem=23.5G" on cubic temporary_disk_space: "-l tmpfree=$VALUE" # "-l tmpfree=200G" on cubic number_of_cpus: "-pe threaded $VALUE" # "-pe threaded N" or a range: "-pe threaded N-M", N Date: Mon, 22 May 2023 17:43:52 -0500 Subject: [PATCH 02/14] also update intepreting shell when babs-check-setup --job-test --- babs/babs.py | 4 +- babs/utils.py | 4 +- ...g_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml} | 15 ++-- notebooks/show_babs_init_InputBIDS.ipynb | 89 +++++++++++++------ 4 files changed, 73 insertions(+), 39 deletions(-) rename notebooks/{bidsapp-toybidsapp-0-0-7_task-rawBIDS_system-slurm_cluster-MSI_egConfig.yaml => eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml} (87%) diff --git a/babs/babs.py b/babs/babs.py index b0017e35..c427599f 100644 --- a/babs/babs.py +++ b/babs/babs.py @@ -2747,7 +2747,9 @@ def generate_bash_test_job(self, folder_check_setup, # Write into the bash file: bash_file = open(fn_call_test_job, "a") # open in append mode - bash_file.write("#!/bin/bash\n") + # NOTE: not to automatically generate the interpreting shell; + # instead, let users specify it in the container config yaml file + # using `interpreting_shell` # Cluster resources requesting: cmd_bashhead_resources = generate_bashhead_resources(system, self.config) diff --git a/babs/utils.py b/babs/utils.py index c2357861..2e9ee6b1 100644 --- a/babs/utils.py +++ b/babs/utils.py @@ -682,8 +682,8 @@ def generate_one_bashhead_resources(system, key, value): def generate_bashhead_resources(system, config): """ - This is to generate the head of the bash file - for requesting cluster resources. + This is to generate the directives ("head of the bash file") + for requesting cluster resources, specifying interpreting shell, etc. Parameters: ------------ diff --git a/notebooks/bidsapp-toybidsapp-0-0-7_task-rawBIDS_system-slurm_cluster-MSI_egConfig.yaml b/notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml similarity index 87% rename from notebooks/bidsapp-toybidsapp-0-0-7_task-rawBIDS_system-slurm_cluster-MSI_egConfig.yaml rename to notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml index 80d73001..ceadacf0 100644 --- a/notebooks/bidsapp-toybidsapp-0-0-7_task-rawBIDS_system-slurm_cluster-MSI_egConfig.yaml +++ b/notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml @@ -25,17 +25,18 @@ zip_foldernames: toybidsapp: "0-0-7" # folder 'toybidsapp' will be zipped into 'sub-xx_ses-yy_toybidsapp-0-0-7.zip' cluster_resources: - number_of_cpus: '1' # "--cpus-per-task=$VALUE" - temporary_disk_space: 20g - hard_memory_limit: 2g # on MSI, use 'g' for GB + interpreting_shell: "/bin/bash -l" # --> "#!/bin/bash -l" + number_of_cpus: "1" # --> "#SBATCH --cpus-per-task=1" + temporary_disk_space: 20G # --> "#SBATCH --tmp=20G" + hard_memory_limit: 2G # --> "#SBATCH --mem=2G" customized_text: | #SBATCH --time=20 - #SBATCH --ntasks=2 - #SBATCH -p amd2tb,ram256g,v100,k40 -# interpreting_shell: /bin/bash -l # Slurm: better to add `-l` # needs fix in `interpreting_shell` - issue #94 + #SBATCH -p ram256g # Notes: Above `customized_text` is MSI Slurm cluster specific. # So it may not be relevant for other clusters - +# Other things to test: +# #SBATCH --ntasks=1 +# #SBATCH -p amd2tb,ram256g,v100,k40 # Users need to add their customized bash command below, # they will be used as preambles in `participant_job.sh` diff --git a/notebooks/show_babs_init_InputBIDS.ipynb b/notebooks/show_babs_init_InputBIDS.ipynb index 337a68d1..c4e7e009 100644 --- a/notebooks/show_babs_init_InputBIDS.ipynb +++ b/notebooks/show_babs_init_InputBIDS.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -40,20 +40,36 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/faird/zhaoc/babs/notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml\n" + ] + } + ], "source": [ - "# This notebook only accepts toybidsapp, qsiprep or fmriprep\n", + "# This notebook only accepts toybidsapp_rawBIDS, qsiprep or fmriprep\n", "# or fmriprep_anatonly or 'fmriprep_sloppyFlag' or fmriprepfake (input ds: BIDS)\n", "# ++++++++++++++++++++++++++++++++++\n", - "flag_instance = \"toybidsapp\"\n", + "bidsapp = \"toybidsapp\"\n", + "task_name = \"rawBIDS\" # for fmriprep: 'anatonly', 'sloppyFlag'; for toybidsapp: 'rawBIDS'\n", "type_session = \"multi-ses\"\n", "which_dataset = \"toy_fake\" # \"toy_fake\", \"toy_real\", \"HBN\"\n", "\n", - "flag_where = \"cubic\" # \"cubic\" or \"local\"\n", + "flag_where = \"msi\" # \"cubic\" or \"local\" or \"msi\"\n", + "type_system = \"slurm\" # \"sge\" or \"slurm\"\n", "# ++++++++++++++++++++++++++++++++++\n", "\n", + "# sanity checks:\n", + "if flag_where == \"cubic\":\n", + " assert type_system == \"sge\"\n", + "elif flag_where == \"msi\":\n", + " assert type_system == \"slurm\"\n", + "\n", "# where:\n", "if flag_where == \"cubic\":\n", " where_root = \"/cbica/projects/BABS\"\n", @@ -63,15 +79,21 @@ " where_root = \"/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper\"\n", " where_project = op.join(where_root, \"data\")\n", " where_notebooks = op.join(where_root, \"babs/notebooks\")\n", + "elif flag_where == \"msi\":\n", + " where_root = \"/home/faird/zhaoc\"\n", + " where_project = \"/home/faird/zhaoc/data\"\n", + " where_notebooks = op.join(where_root, \"babs/notebooks\")\n", "else:\n", " raise Exception(\"not valid `flag_where`!\")\n", "\n", "# Input dataset:\n", "if which_dataset == \"toy_fake\":\n", " if type_session == \"multi-ses\":\n", - " input_ds = op.join(where_project, \"w2nu3\")\n", + " input_ds = \"https://osf.io/w2nu3/\"\n", + " # input_ds = op.join(where_project, \"w2nu3\")\n", " elif type_session == \"single-ses\":\n", - " input_ds = op.join(where_project, \"t8urc\")\n", + " input_ds = \t\"https://osf.io/t8urc/\"\n", + " # input_ds = op.join(where_project, \"t8urc\")\n", "elif which_dataset == \"toy_real\": # real data:\n", " if type_session == \"multi-ses\":\n", " # input_ds = \"/cbica/projects/RBC/chenying_practice/data_for_babs/NKI/data_hashedID_bids\"\n", @@ -90,23 +112,19 @@ "if which_dataset == \"HBN\": # HBN data:\n", " project_name += \"_HBN\"\n", "# Based on which BIDS App:\n", - "if flag_instance == \"toybidsapp\":\n", + "if bidsapp == \"toybidsapp\":\n", " input_cli = [[\"BIDS\", input_ds]]\n", - " bidsapp = \"toybidsapp\"\n", " container_name = bidsapp + \"-0-0-7\"\n", - "elif flag_instance in [\"fmriprep\", \"fmriprep_anatonly\", \"fmriprep_sloppyFlag\"]:\n", - " bidsapp = \"fmriprep\"\n", + "elif bidsapp in [\"fmriprep\", \"fmriprep_anatonly\", \"fmriprep_sloppyFlag\"]:\n", " container_name = bidsapp + \"-20-2-3\"\n", - "elif flag_instance == \"qsiprep\":\n", - " bidsapp = \"qsiprep\"\n", + "elif bidsapp == \"qsiprep\":\n", " container_name = bidsapp + \"-0-16-0RC3\"\n", - "elif flag_instance == \"fmriprepfake\":\n", - " bidsapp = \"fmriprepfake\"\n", + "elif bidsapp == \"fmriprepfake\":\n", " container_name = bidsapp + \"-0-1-1\"\n", "else:\n", - " raise Exception(\"`flag_instance` is not QSIPrep or fMRIPrep!\")\n", + " raise Exception(\"Invalid `flag_instance`!\")\n", "\n", - "project_name += \"_\" + flag_instance\n", + "project_name += \"_\" + bidsapp + \"_\" + task_name\n", "\n", "\n", "# Container:\n", @@ -114,7 +132,20 @@ "if flag_where == \"local\":\n", " # container_ds += \"-docker\" # add \"docker\" at the end\n", " container_ds = op.join(where_project, \"toybidsapp-container-docker\")\n", - "container_config_yaml_file = op.join(where_notebooks, \"example_container_\" + flag_instance + \".yaml\")\n", + "\n", + "container_config_yaml_file = op.join(where_notebooks, \"eg_\" + container_name\n", + " + \"_\" + task_name + \"_\" + type_system)\n", + "if flag_where in [\"cubic\", \"msi\"]:\n", + " container_config_yaml_file += \"_\" + flag_where\n", + "else:\n", + " if type_system == \"sge\":\n", + " container_config_yaml_file += \"_\" + \"cubic\"\n", + " elif type_system == \"slurm\":\n", + " container_config_yaml_file += \"_\" + \"msi\"\n", + "container_config_yaml_file += \".yaml\"\n", + "print(container_config_yaml_file)\n", + "assert op.exists(container_config_yaml_file)\n", + "# container_config_yaml_file = op.join(where_notebooks, \"example_container_\" + flag_instance + \".yaml\")\n", "\n", "# list_sub_file = op.join(where_notebooks, \"initial_sub_list_\" + type_session + \".csv\")\n", "list_sub_file = None" @@ -129,7 +160,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -138,14 +169,14 @@ "text": [ "The command to execute:\n", "babs-init \\\n", - "\t--where_project /cbica/projects/BABS/data \\\n", - "\t--project_name test_babs_multi-ses_toybidsapp \\\n", - "\t--input BIDS /cbica/projects/BABS/data/w2nu3 \\\n", - "\t--container_ds /cbica/projects/BABS/data/toybidsapp-container \\\n", + "\t--where_project /home/faird/zhaoc/data \\\n", + "\t--project_name test_babs_multi-ses_toybidsapp_rawBIDS \\\n", + "\t--input BIDS https://osf.io/w2nu3/ \\\n", + "\t--container_ds /home/faird/zhaoc/data/toybidsapp-container \\\n", "\t--container_name toybidsapp-0-0-7 \\\n", - "\t--container_config_yaml_file /cbica/projects/BABS/babs/notebooks/example_container_toybidsapp.yaml \\\n", + "\t--container_config_yaml_file /home/faird/zhaoc/babs/notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml \\\n", "\t--type_session multi-ses \\\n", - "\t--type_system sge\n", + "\t--type_system slurm\n", "WARNING: make sure you've changed `--fs-license-file` value in YAML file if you use it!!!\n" ] } @@ -161,7 +192,7 @@ "cmd += \"\\t\" + \"--container_name \" + container_name + \" \\\\\\n\"\n", "cmd += \"\\t\" + \"--container_config_yaml_file \" + container_config_yaml_file + \" \\\\\\n\"\n", "cmd += \"\\t\" + \"--type_session \" + type_session + \" \\\\\\n\"\n", - "cmd += \"\\t\" + \"--type_system \" + \"sge\"\n", + "cmd += \"\\t\" + \"--type_system \" + type_system\n", "\n", "print(\"The command to execute:\")\n", "print(cmd)\n", @@ -180,7 +211,7 @@ ], "metadata": { "kernelspec": { - "display_name": "mydatalad", + "display_name": "babs", "language": "python", "name": "python3" }, @@ -199,7 +230,7 @@ "orig_nbformat": 4, "vscode": { "interpreter": { - "hash": "abcc7813313a81f6f916a4574498d1c2de65ad7fdfeb04d04cdf237cdcbdda8b" + "hash": "2538d15ebb217aff7ed13fa29cc6f5f706af190e6008d76f30d7ce8c1383d79a" } } }, From 6afd4dadd397db87659f0bef7e4d9f71ccc65b3e Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Tue, 23 May 2023 11:14:05 -0500 Subject: [PATCH 03/14] add yaml files for slurm --- babs/utils.py | 1 + .../eg_fmriprep-20-2-3_sloppy_slurm_msi.yaml | 82 +++++++++++++++++++ ...eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml | 60 ++++++++++++++ ...eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml | 8 +- notebooks/show_babs_init_InputBIDS.ipynb | 37 +++++---- 5 files changed, 167 insertions(+), 21 deletions(-) create mode 100644 notebooks/eg_fmriprep-20-2-3_sloppy_slurm_msi.yaml create mode 100644 notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml diff --git a/babs/utils.py b/babs/utils.py index 2e9ee6b1..4973c3d5 100644 --- a/babs/utils.py +++ b/babs/utils.py @@ -2095,6 +2095,7 @@ def _check_job_account_slurm(job_id_str, job_name, username_lowercase): stdout=subprocess.PIPE ) # ref: https://slurm.schedmd.com/sacct.html + # also based on ref: https://github.com/ComputeCanada/slurm_utils/blob/master/sacct-all.py proc_sacct.check_returncode() # even if the job does not exist, there will still be printed msg from sacct, diff --git a/notebooks/eg_fmriprep-20-2-3_sloppy_slurm_msi.yaml b/notebooks/eg_fmriprep-20-2-3_sloppy_slurm_msi.yaml new file mode 100644 index 00000000..4c8d19c7 --- /dev/null +++ b/notebooks/eg_fmriprep-20-2-3_sloppy_slurm_msi.yaml @@ -0,0 +1,82 @@ +# This is an example config yaml file for: +# BIDS App: fMRIPrep ("fmriprep") +# BIDS App version: 20.2.3 +# Task: `--sloppy` mode + without FreeSurfer reconstruction +# Which system: Slurm +# Tested on which cluster: MSI cluster +# fMRIPrep's Docker image is publicly available at: https://hub.docker.com/r/nipreps/fmriprep/ + +# WARNING!!! +# This is only an example, which may not necessarily fit your purpose, +# or be an optimized solution for your case, +# or be compatible to the BIDS App version you're using. +# Therefore, please change and tailor it for your case before use it!!! +# WARNING!!! +# We'll use `--sloppy` testing mode of fMRIPrep. +# Therefore this YAML file should only be used for testing purpose. +# You should NOT use this YAML file to generate formal results! + +# Arguments when executing the BIDS App using `singularity run`: +singularity_run: + -w: "$BABS_TMPDIR" # this is a placeholder. To be changed to `${PWD}/.git/tmp/wkdir` + --n_cpus: '1' + --stop-on-first-crash: "" + --fs-license-file: "/home/faird/zhaoc/software/FreeSurfer/license.txt" # [FIX ME] path to FS license file + --skip-bids-validation: Null # Null or NULL is also a placeholder + --output-spaces: "MNI152NLin6Asym:res-2" # for two output spaces: e.g., "MNI152NLin6Asym:res-2 MNI152NLin2009cAsym" + --force-bbr: "" + -v: '-v' # this is for double "-v" + --sloppy: '' # WARNING: use this only when testing + --fs-no-reconall: '' # WARNING: use this only when testing + +# Output foldername(s) to be zipped, and the BIDS App version to be included in the zip filename(s): +# per `--fs-no-reconall`, there won't be an output folder called `freesurfer` +zip_foldernames: + fmriprep: "20-2-3" # folder 'fmriprep' will be zipped into 'sub-xx_ses-yy_fmriprep-20-2-3.zip' + +cluster_resources: + interpreting_shell: "/bin/bash -l" # --> "#!/bin/bash -l" + # number_of_cpus: "1" # --> "#SBATCH --cpus-per-task=1" + hard_memory_limit: 25G # --> "#SBATCH --mem=25G" + temporary_disk_space: 200G # --> "#SBATCH --tmp=200G" + hard_runtime_limit: "5-0" # --> "--time=5-0", i.e., 5 days + customized_text: | + #SBATCH -p k40 +# Other choices of job partitions on MSI: amd2tb,ram256g,v100,k40 +# Notes: Above `customized_text` is MSI Slurm cluster specific. +# So it may not be relevant for other clusters + +# Users need to add their customized bash command below, +# they will be used as preambles in `participant_job.sh` +# the commands should not be quoted! +script_preamble: | + source /home/faird/shared/code/external/envs/miniconda3/load_miniconda3.sh # [FIX ME] MSI cluster faird group. Replace filepath with yours. + conda activate babs # [FIX ME] replace 'babs' with your env variable name + +# ^^ conda env above: where the scripts generated by BABS will run +# not necessary the same one for running `babs-init` +# ^^ based on what you need on your cluster; some people even don't use `conda`... +# for MSI, might need to add command e.g., "module_load" + +# Where to run the jobs: +job_compute_space: "/tmp" # [FIX ME] MSI cluster + +# Below is to filter out subjects (or sessions) +# right now we only filter based on unzipped dataset +required_files: + $INPUT_DATASET_#1: + - "func/*_bold.nii*" + - "anat/*_T1w.nii*" + +# Alert messages that might be found in log files of failed jobs: +# These messages may be helpful for debugging errors in failed jobs. +alert_log_messages: + stdout: + - "Exception: No T1w images found for" # probably not needed, after setting `required_files` + - "Excessive topologic defect encountered" + - "Cannot allocate memory" + - "mris_curvature_stats: Could not open file" + - "Numerical result out of range" + - "fMRIPrep failed" + # stderr: + # - "xxxxx" diff --git a/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml b/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml new file mode 100644 index 00000000..766b65e1 --- /dev/null +++ b/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml @@ -0,0 +1,60 @@ +# This is an example config yaml file for: +# BIDS App: QSIPrep ("qsiprep") +# BIDS App version: 0.16.0RC3 +# Task: `--sloppy` mode +# Which system: Slurm +# Tested on which cluster: MSI cluster +# QSIPrep's Docker image is publicly available at: https://hub.docker.com/r/pennbbl/qsiprep + +# WARNING!!! +# This is only an example, which may not necessarily fit your purpose, +# or be an optimized solution for your case, +# or be compatible to the BIDS App version you're using. +# Therefore, please change and tailor it for your case before use it!!! +# WARNING!!! +# We'll use `--sloppy` testing mode of QSIPrep. +# Therefore this YAML file should only be used for testing purpose. +# You should NOT use this YAML file to generate formal results! + +singularity_run: + -v: "-v" + -w: "$BABS_TMPDIR" # this is a placeholder. To be changed to `${PWD}/.git/tmp/wkdir` + --n_cpus: "$NSLOTS" # `$NSLOTS` can be recognized by SGE; if you're using Slurm clusters please change to Slurm version! + --omp-nthreads: "3" # without this, only run single-threaded jobs (N=`--n_cpus`) at once + --stop-on-first-crash: "" + --fs-license-file: "/home/faird/zhaoc/software/FreeSurfer/license.txt" # [FIX ME] path to FS license file + --skip-bids-validation: Null # Null or NULL is also a placeholder + --unringing-method: "mrdegibbs" + --output-resolution: "2.0" + --sloppy: "" # WARNING: only use this when testing + --hmc-model: "none" # WARNING: only use this when testing + --dwi-only: "" # WARNING: only use this when testing + +# Output foldername(s) to be zipped, and the BIDS App version to be included in the zip filename(s): +zip_foldernames: + qsiprep: "0-16-0RC3" # folder 'qsiprep' will be zipped into 'sub-xx_ses-yy_qsiprep-0-16-0RC3.zip' + +cluster_resources: + interpreting_shell: /bin/bash # `#$ -S /bin/bash` on cubic + hard_memory_limit: 32G # `#$ -l h_vmem=32G` on cubic + temporary_disk_space: 200G # `#$ -l tmpfree=200G` on cubic + number_of_cpus: "6" # `#$ -pe threaded 6` on cubic + customized_text: | + #$ -R y + #$ -l hostname=!compute-fed* +# Notes: Above `customized_text` is Penn Med CUBIC cluster specific. +# So it's probably not relevant for other clusters + +# Users need to add their customized bash command below, +# they will be used as preambles in `participant_job.sh` +# the commands should not be quoted! +script_preamble: | + source ${CONDA_PREFIX}/bin/activate mydatalad # Penn Med CUBIC cluster; replace 'mydatalad' with your conda env name + echo "I am running BABS." # this is an example command to show how to add another line; not necessary to include. + +# Where to run the jobs: +job_compute_space: "${CBICA_TMPDIR}" # Penn Med CUBIC cluster tmp space + +required_files: + $INPUT_DATASET_#1: + - "dwi/*_dwi.nii*" # QSIPrep diff --git a/notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml b/notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml index ceadacf0..2bd0f44a 100644 --- a/notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml +++ b/notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml @@ -12,7 +12,7 @@ # Warning!!! # This is only an example, which may not necessarily fit your purpose, # or be an optimized solution for your case, -# or be compatible to the toy BIDS App version you're using. +# or be compatible to the BIDS App version you're using. # Therefore, please change and tailor it for your case before use it!!! singularity_run: @@ -42,11 +42,11 @@ cluster_resources: # they will be used as preambles in `participant_job.sh` # the commands should not be quoted! script_preamble: | - source /home/faird/shared/code/external/envs/miniconda3/load_miniconda3.sh # MSI cluster faird group. Replace filepath with yours. - conda activate babs # replace 'babs' with your env variable name + source /home/faird/shared/code/external/envs/miniconda3/load_miniconda3.sh # [FIX ME] MSI cluster faird group. Replace filepath with yours. + conda activate babs # [FIX ME] replace 'babs' with your env variable name # Where to run the jobs: -job_compute_space: "/tmp" # MSI cluster +job_compute_space: "/tmp" # [FIX ME] MSI cluster # 'required_files' section is not needed for toy BIDS App. diff --git a/notebooks/show_babs_init_InputBIDS.ipynb b/notebooks/show_babs_init_InputBIDS.ipynb index c4e7e009..18614358 100644 --- a/notebooks/show_babs_init_InputBIDS.ipynb +++ b/notebooks/show_babs_init_InputBIDS.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -40,14 +40,14 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "/home/faird/zhaoc/babs/notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml\n" + "/home/faird/zhaoc/babs/notebooks/eg_fmriprep-20-2-3_sloppy_slurm_msi.yaml\n" ] } ], @@ -55,10 +55,10 @@ "# This notebook only accepts toybidsapp_rawBIDS, qsiprep or fmriprep\n", "# or fmriprep_anatonly or 'fmriprep_sloppyFlag' or fmriprepfake (input ds: BIDS)\n", "# ++++++++++++++++++++++++++++++++++\n", - "bidsapp = \"toybidsapp\"\n", - "task_name = \"rawBIDS\" # for fmriprep: 'anatonly', 'sloppyFlag'; for toybidsapp: 'rawBIDS'\n", - "type_session = \"multi-ses\"\n", - "which_dataset = \"toy_fake\" # \"toy_fake\", \"toy_real\", \"HBN\"\n", + "bidsapp = \"fmriprep\"\n", + "task_name = \"sloppy\" # for fmriprep: 'anatonly', 'sloppy'; for toybidsapp: 'rawBIDS'\n", + "type_session = \"single-ses\"\n", + "which_dataset = \"PNC\" # \"toy_fake\", \"toy_real\", \"HBN\", \"PNC\"\n", "\n", "flag_where = \"msi\" # \"cubic\" or \"local\" or \"msi\"\n", "type_system = \"slurm\" # \"sge\" or \"slurm\"\n", @@ -105,17 +105,20 @@ " input_ds = \"/cbica/projects/BABS/data/rawdata_HBN\" # datalad sibling in BABS cubic project\n", " elif type_session == \"multi-ses\":\n", " raise Exception(\"HBN is a single-ses data!\")\n", + "elif which_dataset == \"PNC\": # PNC data:\n", + " assert type_session == \"single-ses\"\n", + " input_ds = \"/home/faird/zhaoc/data/PNC_BIDS\" # cloned from RBC github account\n", "\n", "\n", "project_name = \"test_babs_\" + type_session\n", "# Based on which dataset:\n", - "if which_dataset == \"HBN\": # HBN data:\n", - " project_name += \"_HBN\"\n", + "if which_dataset in [\"HBN\", \"PNC\"]: # specific real dataset\n", + " project_name += \"_\" + which_dataset\n", "# Based on which BIDS App:\n", "if bidsapp == \"toybidsapp\":\n", " input_cli = [[\"BIDS\", input_ds]]\n", " container_name = bidsapp + \"-0-0-7\"\n", - "elif bidsapp in [\"fmriprep\", \"fmriprep_anatonly\", \"fmriprep_sloppyFlag\"]:\n", + "elif bidsapp in [\"fmriprep\", \"fmriprep_anatonly\", \"fmriprep_sloppy\"]:\n", " container_name = bidsapp + \"-20-2-3\"\n", "elif bidsapp == \"qsiprep\":\n", " container_name = bidsapp + \"-0-16-0RC3\"\n", @@ -160,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -170,12 +173,12 @@ "The command to execute:\n", "babs-init \\\n", "\t--where_project /home/faird/zhaoc/data \\\n", - "\t--project_name test_babs_multi-ses_toybidsapp_rawBIDS \\\n", - "\t--input BIDS https://osf.io/w2nu3/ \\\n", - "\t--container_ds /home/faird/zhaoc/data/toybidsapp-container \\\n", - "\t--container_name toybidsapp-0-0-7 \\\n", - "\t--container_config_yaml_file /home/faird/zhaoc/babs/notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml \\\n", - "\t--type_session multi-ses \\\n", + "\t--project_name test_babs_single-ses_fmriprep_sloppy \\\n", + "\t--input BIDS /home/faird/zhaoc/data/PNC_BIDS \\\n", + "\t--container_ds /home/faird/zhaoc/data/fmriprep-container \\\n", + "\t--container_name fmriprep-20-2-3 \\\n", + "\t--container_config_yaml_file /home/faird/zhaoc/babs/notebooks/eg_fmriprep-20-2-3_sloppy_slurm_msi.yaml \\\n", + "\t--type_session single-ses \\\n", "\t--type_system slurm\n", "WARNING: make sure you've changed `--fs-license-file` value in YAML file if you use it!!!\n" ] From fc34bc1f2a6dd8fa3081dc7fe59e56bd4759d496 Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Tue, 23 May 2023 15:35:59 -0500 Subject: [PATCH 04/14] env var for ncpus; reduce time limit; prep qsiprep yaml for slurm cluster --- docs/source/preparation_config_yaml_file.rst | 17 ++++++----- .../eg_fmriprep-20-2-3_sloppy_slurm_msi.yaml | 4 +-- ...eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml | 29 ++++++++++--------- notebooks/show_babs_init_InputBIDS.ipynb | 6 ++-- 4 files changed, 30 insertions(+), 26 deletions(-) diff --git a/docs/source/preparation_config_yaml_file.rst b/docs/source/preparation_config_yaml_file.rst index 757cf8d5..e036ca0b 100644 --- a/docs/source/preparation_config_yaml_file.rst +++ b/docs/source/preparation_config_yaml_file.rst @@ -239,18 +239,19 @@ Advanced - Manual of writing section ``singularity_run`` * Can I use a job environment variable, e.g., number of CPUs? - * Yes you can! For number of CPUs (e.g., ``--n_cpus`` in QSIPrep), for *SGE* clusters, - you can use environment variable ``$NSLOTS``, and you can specify it as:: + * Yes you can! For number of CPUs (e.g., ``--n_cpus`` in QSIPrep), + if you also use ``number_of_cpus`` in **cluster_resources** section (see below), + then you can use environment variable for this Singularity run argument. + * For *SGE* clusters, you can use environment variable ``$NSLOTS``, and you can specify it as:: --n_cpus: "$NSLOTS" - as long as you also set ``number_of_cpus`` in **cluster_resources** section (see below). - - * :octicon:`alert-fill` :bdg-warning:`warning` However *Slurm* clusters probably have different environment variable name - for this - please check out its manual! + * For *Slurm* clusters, you can use environment variable ``$NSLOTS``, and you can specify it as:: -.. developer's note: for Slurm it might be ``$SLURM_NTASKS`` (below ref), however did not find for MSI cluster.. -.. ref: https://docs.mpcdf.mpg.de/doc/computing/clusters/aux/migration-from-sge-to-slurm + --n_cpus: "$SLURM_CPUS_PER_TASK" + +.. developer's note: for Slurm: ref: https://login.scg.stanford.edu/faqs/cores/ +.. other ref: https://docs.mpcdf.mpg.de/doc/computing/clusters/aux/migration-from-sge-to-slurm * When **more than one** input BIDS dataset: You need to specify which dataset goes to the positional argument ``input_dataset`` in the BIDS App, which dataset goes to another named argument. diff --git a/notebooks/eg_fmriprep-20-2-3_sloppy_slurm_msi.yaml b/notebooks/eg_fmriprep-20-2-3_sloppy_slurm_msi.yaml index 4c8d19c7..0d877816 100644 --- a/notebooks/eg_fmriprep-20-2-3_sloppy_slurm_msi.yaml +++ b/notebooks/eg_fmriprep-20-2-3_sloppy_slurm_msi.yaml @@ -18,7 +18,7 @@ # Arguments when executing the BIDS App using `singularity run`: singularity_run: - -w: "$BABS_TMPDIR" # this is a placeholder. To be changed to `${PWD}/.git/tmp/wkdir` + -w: "$BABS_TMPDIR" # this is a placeholder. BABS will replace it with `${PWD}/.git/tmp/wkdir` --n_cpus: '1' --stop-on-first-crash: "" --fs-license-file: "/home/faird/zhaoc/software/FreeSurfer/license.txt" # [FIX ME] path to FS license file @@ -39,7 +39,7 @@ cluster_resources: # number_of_cpus: "1" # --> "#SBATCH --cpus-per-task=1" hard_memory_limit: 25G # --> "#SBATCH --mem=25G" temporary_disk_space: 200G # --> "#SBATCH --tmp=200G" - hard_runtime_limit: "5-0" # --> "--time=5-0", i.e., 5 days + hard_runtime_limit: "72:00:00" # --> "--time=72:00:00", i.e., 3 days. Should NOT large than partition's time limit! customized_text: | #SBATCH -p k40 # Other choices of job partitions on MSI: amd2tb,ram256g,v100,k40 diff --git a/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml b/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml index 766b65e1..1d06e3d5 100644 --- a/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml +++ b/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml @@ -18,8 +18,8 @@ singularity_run: -v: "-v" - -w: "$BABS_TMPDIR" # this is a placeholder. To be changed to `${PWD}/.git/tmp/wkdir` - --n_cpus: "$NSLOTS" # `$NSLOTS` can be recognized by SGE; if you're using Slurm clusters please change to Slurm version! + -w: "$BABS_TMPDIR" # this is a placeholder. BABS will replace it with `${PWD}/.git/tmp/wkdir` + --n_cpus: "$SLURM_CPUS_PER_TASK" # Slurm env variable, taking value from `--cpus-per-task`, i.e., "number_of_cpus" in section "cluster_resources" --omp-nthreads: "3" # without this, only run single-threaded jobs (N=`--n_cpus`) at once --stop-on-first-crash: "" --fs-license-file: "/home/faird/zhaoc/software/FreeSurfer/license.txt" # [FIX ME] path to FS license file @@ -35,26 +35,29 @@ zip_foldernames: qsiprep: "0-16-0RC3" # folder 'qsiprep' will be zipped into 'sub-xx_ses-yy_qsiprep-0-16-0RC3.zip' cluster_resources: - interpreting_shell: /bin/bash # `#$ -S /bin/bash` on cubic - hard_memory_limit: 32G # `#$ -l h_vmem=32G` on cubic - temporary_disk_space: 200G # `#$ -l tmpfree=200G` on cubic - number_of_cpus: "6" # `#$ -pe threaded 6` on cubic + interpreting_shell: /bin/bash # --> "#!/bin/bash -l" + hard_memory_limit: 32G # --> "#SBATCH --mem=32G" + temporary_disk_space: 200G # --> "#SBATCH --tmp=200G" + number_of_cpus: "6" # --> "#SBATCH --cpus-per-task=6" + hard_runtime_limit: "48:00:00" # --> "--time=48:00:00", i.e., 2 days. Should NOT large than partition's time limit! customized_text: | - #$ -R y - #$ -l hostname=!compute-fed* -# Notes: Above `customized_text` is Penn Med CUBIC cluster specific. -# So it's probably not relevant for other clusters + #SBATCH -p ram256g +# Other choices of job partitions on MSI: amd2tb,ram256g,v100,k40 +# Notes: Above `customized_text` is MSI Slurm cluster specific. +# So it may not be relevant for other clusters # Users need to add their customized bash command below, # they will be used as preambles in `participant_job.sh` # the commands should not be quoted! script_preamble: | - source ${CONDA_PREFIX}/bin/activate mydatalad # Penn Med CUBIC cluster; replace 'mydatalad' with your conda env name - echo "I am running BABS." # this is an example command to show how to add another line; not necessary to include. + source /home/faird/shared/code/external/envs/miniconda3/load_miniconda3.sh # [FIX ME] MSI cluster faird group. Replace filepath with yours. + conda activate babs # [FIX ME] replace 'babs' with your env variable name # Where to run the jobs: -job_compute_space: "${CBICA_TMPDIR}" # Penn Med CUBIC cluster tmp space +job_compute_space: "/tmp" # [FIX ME] MSI cluster +# Below is to filter out subjects (or sessions) +# right now we only filter based on unzipped dataset required_files: $INPUT_DATASET_#1: - "dwi/*_dwi.nii*" # QSIPrep diff --git a/notebooks/show_babs_init_InputBIDS.ipynb b/notebooks/show_babs_init_InputBIDS.ipynb index 18614358..09e55e03 100644 --- a/notebooks/show_babs_init_InputBIDS.ipynb +++ b/notebooks/show_babs_init_InputBIDS.ipynb @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -163,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -173,7 +173,7 @@ "The command to execute:\n", "babs-init \\\n", "\t--where_project /home/faird/zhaoc/data \\\n", - "\t--project_name test_babs_single-ses_fmriprep_sloppy \\\n", + "\t--project_name test_babs_single-ses_PNC_fmriprep_sloppy \\\n", "\t--input BIDS /home/faird/zhaoc/data/PNC_BIDS \\\n", "\t--container_ds /home/faird/zhaoc/data/fmriprep-container \\\n", "\t--container_name fmriprep-20-2-3 \\\n", From 33eec30a3b481f53a245e22925efb266f75fa414 Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Tue, 23 May 2023 15:36:09 -0500 Subject: [PATCH 05/14] fix parsing sacct; fix job report when a job was canceled during pending; --- .vscode/launch.json | 10 ++++++++-- babs/babs.py | 15 +++++++++++++-- babs/utils.py | 20 ++++++++++++++++---- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 15932eb4..6b116a90 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -56,10 +56,16 @@ // "--type_session", "multi-ses", // "--type_system", "sge" // ] + // "args": [ + // "--project-root", + // "/home/faird/zhaoc/data/test_babs_multi-ses_toybidsapp", + // "--container-config-yaml-file", + // "/home/faird/zhaoc/babs_tests/notebooks/bidsapp-toybidsapp-0-0-7_task-rawBIDS_system-slurm_cluster-MSI_egConfig.yaml", + // "--job-account" + // ] "args": [ "--project-root", - "/home/faird/zhaoc/data/test_babs_multi-ses_toybidsapp""--container-config-yaml-file", - "/home/faird/zhaoc/babs_tests/notebooks/bidsapp-toybidsapp-0-0-7_task-rawBIDS_system-slurm_cluster-MSI_egConfig.yaml", + "/home/faird/zhaoc/data/test_babs_single-ses_PNC_fmriprep_sloppy", "--job-account" ] // "args": [ diff --git a/babs/babs.py b/babs/babs.py index c427599f..0180d75e 100644 --- a/babs/babs.py +++ b/babs/babs.py @@ -1179,6 +1179,8 @@ def babs_status(self, flags_resubmit, # `create_job_status_csv(self)` has been called in `babs_status()` # in `cli.py` + from .constants import MSG_NO_ALERT_IN_LOGS + # Load the csv file lock_path = self.job_status_path_abs + ".lock" lock = FileLock(lock_path) @@ -1256,7 +1258,7 @@ def babs_status(self, flags_resubmit, # NOTE: in theory can skip failed jobs in previous round, # but making assigning variables hard; so not to skip # if df_job.at[i_job, "is_failed"] is not True: # np.nan or False - alert_message_in_log_files, if_no_alert_in_log = \ + alert_message_in_log_files, if_no_alert_in_log, if_found_log_files = \ get_alert_message_in_log_files(config_msg_alert, log_fn) # ^^ the function will handle even if `config_msg_alert=None` df_job_updated.at[i_job, "alert_message"] = \ @@ -1436,6 +1438,15 @@ def babs_status(self, flags_resubmit, df_job_updated.at[i_job, "job_state_code"] = np.nan df_job_updated.at[i_job, "duration"] = np.nan # ROADMAP: ^^ get duration via `qacct` + if if_found_log_files == False: # bool or np.nan + # If there is no log files, the alert message would be 'np.nan'; + # however this is a failed job, so it should have log files, + # unless it was killed by the user when pending. + # change the 'alert_message' to no alert in logs, + # so that when reporting job status, + # info from job accounting will be reported + df_job_updated.at[i_job, "alert_message"] = \ + MSG_NO_ALERT_IN_LOGS # check the log file: # TODO ^^ @@ -1575,7 +1586,7 @@ def babs_status(self, flags_resubmit, get_last_line(o_fn) # Check if any alert message in log files for this job: # this is to update `alert_message` in case user changes configs in yaml - alert_message_in_log_files, if_no_alert_in_log = \ + alert_message_in_log_files, if_no_alert_in_log, _ = \ get_alert_message_in_log_files(config_msg_alert, log_fn) # ^^ the function will handle even if `config_msg_alert=None` df_job_updated.at[i_job, "alert_message"] = \ diff --git a/babs/utils.py b/babs/utils.py index 4973c3d5..9e3943a6 100644 --- a/babs/utils.py +++ b/babs/utils.py @@ -1948,6 +1948,9 @@ def get_alert_message_in_log_files(config_msg_alert, log_fn): When `alert_message` is `msg_no_alert`, or is `np.nan` (`if_valid_alert_msg=False`), this is True; Otherwise, any other message, this is False + if_found_log_files: bool or np.nan + np.nan if `config_msg_alert` is None, as it's unknown whether log files exist or not + Otherwise, True or False based on if any log files were found Notes: ----------------- @@ -1960,15 +1963,18 @@ def get_alert_message_in_log_files(config_msg_alert, log_fn): msg_no_alert = MSG_NO_ALERT_IN_LOGS if_valid_alert_msg = True # by default, `alert_message` is valid (i.e., not np.nan) # this is to avoid check `np.isnan(alert_message)`, as `np.isnan(str)` causes error. + if_found_log_files = np.nan if config_msg_alert is None: alert_message = np.nan if_valid_alert_msg = False + if_found_log_files = np.nan # unknown if log files exist or not else: o_fn = log_fn.replace("*", 'o') e_fn = log_fn.replace("*", 'e') if op.exists(o_fn) or op.exists(e_fn): # either exists: + if_found_log_files = True found_message = False alert_message = msg_no_alert @@ -1999,6 +2005,7 @@ def get_alert_message_in_log_files(config_msg_alert, log_fn): break # no need to go to next log file else: # neither o_fn nor e_fn exists yet: + if_found_log_files = False alert_message = np.nan if_valid_alert_msg = False @@ -2008,7 +2015,7 @@ def get_alert_message_in_log_files(config_msg_alert, log_fn): else: # `alert_message`: np.nan or any other message: if_no_alert_in_log = False - return alert_message, if_no_alert_in_log + return alert_message, if_no_alert_in_log, if_found_log_files def get_username(): """ @@ -2100,14 +2107,19 @@ def _check_job_account_slurm(job_id_str, job_name, username_lowercase): proc_sacct.check_returncode() # even if the job does not exist, there will still be printed msg from sacct, # at least a header. So `check_returncode()` should always succeed. - msg_l = proc_sacct.stdout.decode('utf-8').split("\n") + msg_l = proc_sacct.stdout.decode('utf-8').split("\n") # all lines from `sacct` + # 1st line: column names + # 2nd and forward lines: job information + # ^^ if using `--parsable2` and `--delimiter`, there is no 2nd line of "----" dashes + # Usually there are more than one job lines; + # However if the job was manually killed when pending, then there will only be one job line. msg_head = msg_l[0].split(the_delimiter) # list of column names # Check if there is any problem when calling `sacct` for this job: if "State" not in msg_head or "JobID" not in msg_head or "JobName" not in msg_head: if_no_sacct = True - if len(msg_l) <= 2 or msg_l[2] == '': - # if there is only header (len <= 2 or the 3rd element is empty): + if len(msg_l) <= 1 or msg_l[1] == '': + # if there is only header (len <= 1 or the 2nd element is empty): if_no_sacct = True if if_no_sacct: # there is no information about this job in sacct: From 3d7bead34ca93e9a250aea4975225bc3872b5858 Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Tue, 23 May 2023 16:58:10 -0400 Subject: [PATCH 06/14] add example commands for babs-status; update docs for --job-account --- babs/cli.py | 6 +++-- docs/source/babs-status.rst | 49 ++++++++++++++++++++++++++++++++++--- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/babs/cli.py b/babs/cli.py index 0f3e9f0b..faa5947d 100644 --- a/babs/cli.py +++ b/babs/cli.py @@ -493,8 +493,10 @@ def babs_status_cli(): action='store_true', # ^^ if `--job-account` is specified, args.job_account = True; otherwise, False help="Whether to account failed jobs, which may take some time." - " If `--resubmit failed` or `--resubmit-job` for this failed job is also requested," - " this `--job-account` will be skipped.") + " When using ``--job-account``, please also add ``--container_config_yaml_file``." + " If ``--resubmit failed`` or ``--resubmit-job`` (for some failed jobs)" + " is also requested," + " this ``--job-account`` will be skipped.") return parser diff --git a/docs/source/babs-status.rst b/docs/source/babs-status.rst index 57a1dd82..6c0d8ae1 100644 --- a/docs/source/babs-status.rst +++ b/docs/source/babs-status.rst @@ -1,9 +1,52 @@ -************************************* +################################################## ``babs-status``: Check job status -************************************* +################################################## + +.. contents:: Table of Contents + +********************** +Command-Line Arguments +********************** .. argparse:: :ref: babs.cli.babs_status_cli :prog: babs-status :nodefault: - :nodefaultconst: \ No newline at end of file + :nodefaultconst: + + +********************** +Example commands +********************** + +Basic use: you'll only get job summary (number of jobs finished/pending/running/failed): + +.. code-block:: bash + + babs-status \ + --project-root /path/to/my_BABS_project + +Failed job auditing: only using alert messages in log files: + +.. code-block:: bash + + babs-status \ + --project-root /path/to/my_BABS_project \ + --container-config-yaml-file /path/to/container_config.yaml + +Failed job auditing: using alert messages in log files + performing job account for jobs +without alert messages in log files: + +.. code-block:: bash + + babs-status \ + --project-root /path/to/my_BABS_project \ + --container-config-yaml-file /path/to/container_config.yaml \ + --job-account + +When using ``--job-account``, you should also use ``--container-config-yaml-file``. + +.. developer's note: seems like if only using `--job-account` without `--container-config-yaml-file`, +.. although job account commands will be called (taking more time), +.. it won't report the message e.g., "Among job(s) that are failed and don't have alert message in log files:" +.. This is probably because the "alert_message" was cleared up, so no job has "BABS: No alert message found in log files." From 068840cd13cab0a0b9340810add57db6085f8449 Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Tue, 23 May 2023 17:19:27 -0500 Subject: [PATCH 07/14] minor changes in yaml file; use HBN and qsiprep to test out slurm version --- ...eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml | 2 +- notebooks/show_babs_init_InputBIDS.ipynb | 23 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml b/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml index 1d06e3d5..fd316b54 100644 --- a/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml +++ b/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml @@ -35,7 +35,7 @@ zip_foldernames: qsiprep: "0-16-0RC3" # folder 'qsiprep' will be zipped into 'sub-xx_ses-yy_qsiprep-0-16-0RC3.zip' cluster_resources: - interpreting_shell: /bin/bash # --> "#!/bin/bash -l" + interpreting_shell: "/bin/bash -l" # --> "#!/bin/bash -l" hard_memory_limit: 32G # --> "#SBATCH --mem=32G" temporary_disk_space: 200G # --> "#SBATCH --tmp=200G" number_of_cpus: "6" # --> "#SBATCH --cpus-per-task=6" diff --git a/notebooks/show_babs_init_InputBIDS.ipynb b/notebooks/show_babs_init_InputBIDS.ipynb index 09e55e03..50ed52de 100644 --- a/notebooks/show_babs_init_InputBIDS.ipynb +++ b/notebooks/show_babs_init_InputBIDS.ipynb @@ -47,7 +47,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "/home/faird/zhaoc/babs/notebooks/eg_fmriprep-20-2-3_sloppy_slurm_msi.yaml\n" + "/home/faird/zhaoc/babs/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml\n" ] } ], @@ -55,10 +55,10 @@ "# This notebook only accepts toybidsapp_rawBIDS, qsiprep or fmriprep\n", "# or fmriprep_anatonly or 'fmriprep_sloppyFlag' or fmriprepfake (input ds: BIDS)\n", "# ++++++++++++++++++++++++++++++++++\n", - "bidsapp = \"fmriprep\"\n", + "bidsapp = \"qsiprep\"\n", "task_name = \"sloppy\" # for fmriprep: 'anatonly', 'sloppy'; for toybidsapp: 'rawBIDS'\n", "type_session = \"single-ses\"\n", - "which_dataset = \"PNC\" # \"toy_fake\", \"toy_real\", \"HBN\", \"PNC\"\n", + "which_dataset = \"HBN\" # \"toy_fake\", \"toy_real\", \"HBN\", \"PNC\"\n", "\n", "flag_where = \"msi\" # \"cubic\" or \"local\" or \"msi\"\n", "type_system = \"slurm\" # \"sge\" or \"slurm\"\n", @@ -101,10 +101,11 @@ " elif type_session == \"single-ses\":\n", " raise Exception(\"not supported yet!\")\n", "elif which_dataset == \"HBN\": # HBN data:\n", - " if type_session == \"single-ses\":\n", + " assert type_session == \"single-ses\"\n", + " if flag_where == \"cubic\":\n", " input_ds = \"/cbica/projects/BABS/data/rawdata_HBN\" # datalad sibling in BABS cubic project\n", - " elif type_session == \"multi-ses\":\n", - " raise Exception(\"HBN is a single-ses data!\")\n", + " elif flag_where == \"msi\":\n", + " input_ds = \"/home/faird/zhaoc/data/HBN_BIDS\"\n", "elif which_dataset == \"PNC\": # PNC data:\n", " assert type_session == \"single-ses\"\n", " input_ds = \"/home/faird/zhaoc/data/PNC_BIDS\" # cloned from RBC github account\n", @@ -173,11 +174,11 @@ "The command to execute:\n", "babs-init \\\n", "\t--where_project /home/faird/zhaoc/data \\\n", - "\t--project_name test_babs_single-ses_PNC_fmriprep_sloppy \\\n", - "\t--input BIDS /home/faird/zhaoc/data/PNC_BIDS \\\n", - "\t--container_ds /home/faird/zhaoc/data/fmriprep-container \\\n", - "\t--container_name fmriprep-20-2-3 \\\n", - "\t--container_config_yaml_file /home/faird/zhaoc/babs/notebooks/eg_fmriprep-20-2-3_sloppy_slurm_msi.yaml \\\n", + "\t--project_name test_babs_single-ses_HBN_qsiprep_sloppy \\\n", + "\t--input BIDS /home/faird/zhaoc/data/HBN_BIDS \\\n", + "\t--container_ds /home/faird/zhaoc/data/qsiprep-container \\\n", + "\t--container_name qsiprep-0-16-0RC3 \\\n", + "\t--container_config_yaml_file /home/faird/zhaoc/babs/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml \\\n", "\t--type_session single-ses \\\n", "\t--type_system slurm\n", "WARNING: make sure you've changed `--fs-license-file` value in YAML file if you use it!!!\n" From fc5df03e14d25eeeed53dba018c90a43c394516b Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Tue, 23 May 2023 18:46:23 -0400 Subject: [PATCH 08/14] doc update: add slurm version of directives --- docs/source/preparation_config_yaml_file.rst | 50 +++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/docs/source/preparation_config_yaml_file.rst b/docs/source/preparation_config_yaml_file.rst index e036ca0b..9fac1b48 100644 --- a/docs/source/preparation_config_yaml_file.rst +++ b/docs/source/preparation_config_yaml_file.rst @@ -417,7 +417,7 @@ Example section **cluster_resources** for ``QSIPrep``:: temporary_disk_space: 200G number_of_cpus: "6" -These will be turned into options in the preambles of ``participant_job.sh`` on an SGE cluster +These will be turned into options in the directives (at the beginning) of ``participant_job.sh`` on an SGE cluster (this script could be found at: ``/path/to/my_BABS_project/analysis/code``) shown as below:: #!/bin/bash @@ -448,28 +448,32 @@ The second row in each cell, which is also in (), is an example. .. - ``interpreting_shell: /bin/bash`` .. - ``-S /bin/bash`` -+------------------------------------------+---------------------------------------+ -| | Section ``cluster_resources`` in YAML | | Generated preamble for SGE clusters | -| | (example key-value) | | (example outcome) | -+==========================================+=======================================+ -| | ``interpreting_shell: $VALUE`` | | ``-S $VALUE`` | -| | (``interpreting_shell: /bin/bash``) | | (``-S /bin/bash``) | -+------------------------------------------+---------------------------------------+ -| | ``hard_memory_limit: $VALUE`` | | ``-l h_vmem=$VALUE`` | -| | (``hard_memory_limit: 25G``) | | (``-l h_vmem=25G``) | -+------------------------------------------+---------------------------------------+ -| | ``soft_memory_limit: $VALUE`` | | ``-l s_vmem=$VALUE`` | -| | (``soft_memory_limit: 23.5G``) | | (``-l s_vmem=23.5G``) | -+------------------------------------------+---------------------------------------+ -| | ``temporary_disk_space: $VALUE`` | | ``-l tmpfree=$VALUE`` | -| | (``temporary_disk_space: 200G``) | | (``-l tmpfree=200G``) | -+------------------------------------------+---------------------------------------+ -| | ``number_of_cpus: "$VALUE"`` | | ``-pe threaded $VALUE`` | -| | (``number_of_cpus: "6"``) | | (``-pe threaded 6``) | -+------------------------------------------+---------------------------------------+ -| | ``hard_runtime_limit: "$VALUE"`` | | ``-l h_rt=$VALUE`` | -| | (``hard_runtime_limit: "24:00:00"``) | | (``-l h_rt=24:00:00``) | -+------------------------------------------+---------------------------------------+ +.. developer's note: actually the width is not working here.... +.. table:: + :widths: 60 40 40 + + +------------------------------------------+------------------------------------------+-------------------------------------------+ + | | Section ``cluster_resources`` in YAML | | Generated directives for SGE clusters | | Generated directives for Slurm clusters | + | | (example key-value) | | (example outcome) | | (example outcome) | + +==========================================+==========================================+===========================================+ + | | ``interpreting_shell: $VALUE`` | | ``#!$VALUE`` | | ``#!$VALUE`` | + | | (``interpreting_shell: /bin/bash``) | | (``#!/bin/bash``) | | (``#!/bin/bash``) | + +------------------------------------------+------------------------------------------+-------------------------------------------+ + | | ``hard_memory_limit: $VALUE`` | | ``#$ -l h_vmem=$VALUE`` | | ``#SBATCH --mem=$VALUE`` | + | | (``hard_memory_limit: 25G``) | | (``#$ -l h_vmem=25G``) | | (``#SBATCH --mem=25G``) | + +------------------------------------------+------------------------------------------+-------------------------------------------+ + | | ``soft_memory_limit: $VALUE`` | | ``#$ -l s_vmem=$VALUE`` | Not applicable. | + | | (``soft_memory_limit: 23.5G``) | | (``#$ -l s_vmem=23.5G``) | | + +------------------------------------------+------------------------------------------+-------------------------------------------+ + | | ``temporary_disk_space: $VALUE`` | | ``#$ -l tmpfree=$VALUE`` | | ``#SBATCH --tmp=$VALUE`` | + | | (``temporary_disk_space: 200G``) | | (``#$ -l tmpfree=200G``) | | (``#SBATCH --tmp=200G``) | + +------------------------------------------+------------------------------------------+-------------------------------------------+ + | | ``number_of_cpus: "$VALUE"`` | | ``#$ -pe threaded $VALUE`` | | ``#SBATCH --cpus-per-task=$VALUE`` | + | | (``number_of_cpus: "6"``) | | (``#$ -pe threaded 6``) | | (``#SBATCH --cpus-per-task=6``) | + +------------------------------------------+------------------------------------------+-------------------------------------------+ + | | ``hard_runtime_limit: "$VALUE"`` | | ``#$ -l h_rt=$VALUE`` | | ``#SBATCH --time=$VALUE`` | + | | (``hard_runtime_limit: "24:00:00"``) | | (``#$ -l h_rt=24:00:00``) | | (``#SBATCH --time=24:00:00``) | + +------------------------------------------+------------------------------------------+-------------------------------------------+ If you cannot find the one you want in the above table, you can still add it by ``customized_text``. Below is an example for SGE cluster:: From a5a93837aa51858f187249831b5dea8ff557c6f8 Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Tue, 23 May 2023 19:09:12 -0400 Subject: [PATCH 09/14] minor changes in eg toybidsapp yaml file; --- docs/source/preparation_config_yaml_file.rst | 1 + notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/preparation_config_yaml_file.rst b/docs/source/preparation_config_yaml_file.rst index 9fac1b48..5d834b03 100644 --- a/docs/source/preparation_config_yaml_file.rst +++ b/docs/source/preparation_config_yaml_file.rst @@ -449,6 +449,7 @@ The second row in each cell, which is also in (), is an example. .. - ``-S /bin/bash`` .. developer's note: actually the width is not working here.... +.. tried `||` and `| |` for each row's beginning but did not help... .. table:: :widths: 60 40 40 diff --git a/notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml b/notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml index 2bd0f44a..fb4df255 100644 --- a/notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml +++ b/notebooks/eg_toybidsapp-0-0-7_rawBIDS_slurm_msi.yaml @@ -29,8 +29,8 @@ cluster_resources: number_of_cpus: "1" # --> "#SBATCH --cpus-per-task=1" temporary_disk_space: 20G # --> "#SBATCH --tmp=20G" hard_memory_limit: 2G # --> "#SBATCH --mem=2G" + hard_runtime_limit: "20" # --> "--time=20", i.e., 20min. Should NOT large than partition's time limit! customized_text: | - #SBATCH --time=20 #SBATCH -p ram256g # Notes: Above `customized_text` is MSI Slurm cluster specific. # So it may not be relevant for other clusters From 03cd31ec675654f7a5a0782df0af4353b12c099f Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Wed, 24 May 2023 21:33:36 -0400 Subject: [PATCH 10/14] add to developer's doc: how to test job auditing; add additional hint for failed qacct; --- babs/utils.py | 3 +++ notebooks/docs_developer.md | 19 ++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/babs/utils.py b/babs/utils.py index 9e3943a6..1d324616 100644 --- a/babs/utils.py +++ b/babs/utils.py @@ -2224,6 +2224,9 @@ def _check_job_account_sge(job_id_str, job_name, username_lowercase): print("Hint: check if the job is still in the queue, e.g., in state of qw, r, etc") print("Hint: check if the username used for submitting this job" + " was not current username '" + username_lowercase + "'") + print("Hint: check if the job was killed during pending state") + # ^^ for SGE cluster: job manually killed during pending: `qacct` will fail: + # "error: job id xxx not found" msg_toreturn = msg_failed_to_call_qacct return msg_toreturn diff --git a/notebooks/docs_developer.md b/notebooks/docs_developer.md index be466229..8c97bf90 100644 --- a/notebooks/docs_developer.md +++ b/notebooks/docs_developer.md @@ -74,12 +74,29 @@ when `print(df)` by python: # Testing ## Create pending, failed, or stalled jobs Change/Add these in `participant_job.sh`: -- failed: add `-l h_rt=0:0:20` (hard runtime limit is 20 sec) +- failed: see next section - pending: increase `-l h_vmem` and `-l s_vmem`; increase `-pe threaded N` - stalled (`eqw`): see Bergman email 12/20/22 After these changes, `datalad save -m "message"` and `datalad push --to input` +## Create failed cases for testing `babs-status` failed job auditing +* Add `sleep 3600` to `container_zip.sh`; make sure you `datalad save` the changes +* Change hard runtime limit to 20min (on SGE: `-l h_rt=0:20:00`) +* Create failed cases: + * when the job is pending, manually kill it + * For Slurm cluster: you'll see normal msg from `State` column of `sacct` msg when `--job-account` + * For SGE cluster: you'll see warning that `qacct` failed for this job - this is normal. See PR #98 for more details. + * when the job is running, manually kill it + * wait until the job is running out of time, killed by the cluster + * if you don't want to wait for that long, just set the hard runtime limit to very low value, e.g., 20 sec +* Perform job auditing using `--container-config-yaml-file`: + * add some msg into the `alert_log_messages`, which can be seen in the "failed" jobs - for testing purpose + * although they can be normal msg seen in successful jobs +* Perform job auditing using `--job-account` (and `--container-config-yaml-file`): + * delete the `alert_log_messages` from the yaml file; + * Now, you should see job account for these failed jobs + # Terminology - ".o": standard output stream of the job From 25f7ed56ae42ff8eb9170976d88734c2dff6eb9b Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Thu, 25 May 2023 16:03:49 -0400 Subject: [PATCH 11/14] rename yaml files for fmriprep/qsiprep; update comments --- ... eg_fmriprep-20-2-3_sloppy_sge_cubic.yaml} | 40 +++++++------ ...eg_qsiprep-0-16-0RC3_sloppy_sge_cubic.yaml | 60 +++++++++++++++++++ ...eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml | 2 +- notebooks/example_container_qsiprep.yaml | 56 ----------------- notebooks/show_babs_init_InputBIDS.ipynb | 32 +++++----- 5 files changed, 100 insertions(+), 90 deletions(-) rename notebooks/{example_container_fmriprep_sloppyFlag.yaml => eg_fmriprep-20-2-3_sloppy_sge_cubic.yaml} (60%) create mode 100644 notebooks/eg_qsiprep-0-16-0RC3_sloppy_sge_cubic.yaml delete mode 100644 notebooks/example_container_qsiprep.yaml diff --git a/notebooks/example_container_fmriprep_sloppyFlag.yaml b/notebooks/eg_fmriprep-20-2-3_sloppy_sge_cubic.yaml similarity index 60% rename from notebooks/example_container_fmriprep_sloppyFlag.yaml rename to notebooks/eg_fmriprep-20-2-3_sloppy_sge_cubic.yaml index 4ea199ea..81c5919f 100644 --- a/notebooks/example_container_fmriprep_sloppyFlag.yaml +++ b/notebooks/eg_fmriprep-20-2-3_sloppy_sge_cubic.yaml @@ -1,27 +1,33 @@ -# This is an example config yaml file for fMRIPrep version 20.2.3, -# but in `--sloppy` mode, i.e., test mode, and there is no FreeSurfer recon either. -# This should NOT be used to produce real outputs you need. +# This is an example config yaml file for: +# BIDS App: fMRIPrep ("fmriprep") +# BIDS App version: 20.2.3 +# Task: `--sloppy` mode + without FreeSurfer reconstruction +# Which system: SGE +# Tested on which cluster: Penn Med CUBIC cluster +# fMRIPrep's Docker image is publicly available at: https://hub.docker.com/r/nipreps/fmriprep/ -# Warning!!! +# WARNING!!! # This is only an example, which may not necessarily fit your purpose, # or be an optimized solution for your case, -# or be compatible to the fMRIPrep version you're using. +# or be compatible to the BIDS App version you're using. # Therefore, please change and tailor it for your case before use it!!! +# WARNING!!! +# We'll use `--sloppy` testing mode of fMRIPrep. +# Therefore this YAML file should only be used for testing purpose. +# You should NOT use this YAML file to generate formal results! # Arguments when executing the BIDS App using `singularity run`: -# You should not have flags of : `--participant-label`, or `--bids-filter-file`! singularity_run: -w: "$BABS_TMPDIR" # this is a placeholder. To be changed to `${PWD}/.git/tmp/wkdir` --n_cpus: '1' --stop-on-first-crash: "" - --fs-license-file: "/cbica/projects/BABS/software/FreeSurfer/license.txt" # path to FS license file + --fs-license-file: "/cbica/projects/BABS/software/FreeSurfer/license.txt" # [FIX ME] path to FS license file --skip-bids-validation: Null # Null or NULL is also a placeholder - --output-spaces: "MNI152NLin6Asym:res-2 MNI152NLin2009cAsym" # e.g., two output spaces + --output-spaces: "MNI152NLin6Asym:res-2" # for two output spaces: e.g., "MNI152NLin6Asym:res-2 MNI152NLin2009cAsym" --force-bbr: "" - #--cifti-output: 91k # ADD THIS WHEN FULL RUN! -v: '-v' # this is for double "-v" - --sloppy: '' # TEST RUN! - --fs-no-reconall: '' # TEST RUN! + --sloppy: '' # WARNING: use this only when testing + --fs-no-reconall: '' # WARNING: use this only when testing # Output foldername(s) to be zipped, and the BIDS App version to be included in the zip filename(s): # per `--fs-no-reconall`, there won't be an output folder called `freesurfer` @@ -29,9 +35,9 @@ zip_foldernames: fmriprep: "20-2-3" # folder 'fmriprep' will be zipped into 'sub-xx_ses-yy_fmriprep-20-2-3.zip' cluster_resources: - interpreting_shell: /bin/bash # "-S /bin/bash" on cubic - hard_memory_limit: 25G # "-l h_vmem=25G" on cubic - temporary_disk_space: 200G # "-l tmpfree=200G" on cubic # this is highly-recommended on cubic + interpreting_shell: /bin/bash # --> "#!/bin/bash" + hard_memory_limit: 25G # --> "#$ -l h_vmem=25G" + temporary_disk_space: 200G # --> "#$ -l tmpfree=200G" # this is highly-recommended on CUBIC cluster customized_text: | #$ -R y #$ -l hostname=!compute-fed* @@ -42,8 +48,8 @@ cluster_resources: # they will be used as preambles in `participant_job.sh` # the commands should not be quoted! script_preamble: | - source ${CONDA_PREFIX}/bin/activate mydatalad # Penn Med CUBIC cluster; replace 'mydatalad' with your conda env name - echo "I am running BABS." # this is an example command to show how to add another line; not necessary to include. + source ${CONDA_PREFIX}/bin/activate mydatalad # [FIX ME] Penn Med CUBIC cluster; replace 'mydatalad' with your conda env name + echo "I am running BABS." # [FIX ME] this is an example command to show how to add another line; not necessary to include. # ^^ conda env above: where the scripts generated by BABS will run # not necessary the same one for running `babs-init` @@ -51,7 +57,7 @@ script_preamble: | # for MSI, might need to add command e.g., "module_load" # Where to run the jobs: -job_compute_space: "${CBICA_TMPDIR}" # Penn Med CUBIC cluster tmp space +job_compute_space: "${CBICA_TMPDIR}" # [FIX ME] Penn Med CUBIC cluster tmp space # Below is to filter out subjects (or sessions) # right now we only filter based on unzipped dataset diff --git a/notebooks/eg_qsiprep-0-16-0RC3_sloppy_sge_cubic.yaml b/notebooks/eg_qsiprep-0-16-0RC3_sloppy_sge_cubic.yaml new file mode 100644 index 00000000..43e9ec53 --- /dev/null +++ b/notebooks/eg_qsiprep-0-16-0RC3_sloppy_sge_cubic.yaml @@ -0,0 +1,60 @@ +# This is an example config yaml file for: +# BIDS App: QSIPrep ("qsiprep") +# BIDS App version: 0.16.0RC3 +# Task: `--sloppy` mode +# Which system: SGE +# Tested on which cluster: Penn Med CUBIC cluster +# QSIPrep's Docker image is publicly available at: https://hub.docker.com/r/pennbbl/qsiprep + +# WARNING!!! +# This is only an example, which may not necessarily fit your purpose, +# or be an optimized solution for your case, +# or be compatible to the BIDS App version you're using. +# Therefore, please change and tailor it for your case before use it!!! +# WARNING!!! +# We'll use `--sloppy` testing mode of QSIPrep. +# Therefore this YAML file should only be used for testing purpose. +# You should NOT use this YAML file to generate formal results! + +singularity_run: + -v: "-v" + -w: "$BABS_TMPDIR" # this is a placeholder. To be changed to `${PWD}/.git/tmp/wkdir` + --n_cpus: "$NSLOTS" # `$NSLOTS` can be recognized by SGE + --omp-nthreads: "3" # without this, only run single-threaded jobs (N=`--n_cpus`) at once + --stop-on-first-crash: "" + --fs-license-file: "/cbica/projects/BABS/software/FreeSurfer/license.txt" # [FIX ME] path to FS license file + --skip-bids-validation: Null # Null or NULL is also a placeholder + --unringing-method: "mrdegibbs" + --output-resolution: "2.0" + --sloppy: "" # WARNING: only use this when testing + --hmc-model: "none" # WARNING: only use this when testing + --dwi-only: "" # WARNING: only use this when testing + +# Output foldername(s) to be zipped, and the BIDS App version to be included in the zip filename(s): +zip_foldernames: + qsiprep: "0-16-0RC3" # folder 'qsiprep' will be zipped into 'sub-xx_ses-yy_qsiprep-0-16-0RC3.zip' + +cluster_resources: + interpreting_shell: /bin/bash # --> "#!/bin/bash" + hard_memory_limit: 32G # --> `#$ -l h_vmem=32G` + temporary_disk_space: 200G # --> `#$ -l tmpfree=200G` + number_of_cpus: "6" # --> `#$ -pe threaded 6` + customized_text: | + #$ -R y + #$ -l hostname=!compute-fed* +# Notes: Above `customized_text` is Penn Med CUBIC cluster specific. +# So it's probably not relevant for other clusters + +# Users need to add their customized bash command below, +# they will be used as preambles in `participant_job.sh` +# the commands should not be quoted! +script_preamble: | + source ${CONDA_PREFIX}/bin/activate mydatalad # [FIX ME] Penn Med CUBIC cluster; replace 'mydatalad' with your conda env name + echo "I am running BABS." # [FIX ME] this is an example command to show how to add another line; not necessary to include. + +# Where to run the jobs: +job_compute_space: "${CBICA_TMPDIR}" # [FIX ME] Penn Med CUBIC cluster tmp space + +required_files: + $INPUT_DATASET_#1: + - "dwi/*_dwi.nii*" # QSIPrep diff --git a/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml b/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml index fd316b54..e292b7bf 100644 --- a/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml +++ b/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml @@ -35,7 +35,7 @@ zip_foldernames: qsiprep: "0-16-0RC3" # folder 'qsiprep' will be zipped into 'sub-xx_ses-yy_qsiprep-0-16-0RC3.zip' cluster_resources: - interpreting_shell: "/bin/bash -l" # --> "#!/bin/bash -l" + interpreting_shell: "/bin/bash -l" # --> "#!/bin/bash -l" hard_memory_limit: 32G # --> "#SBATCH --mem=32G" temporary_disk_space: 200G # --> "#SBATCH --tmp=200G" number_of_cpus: "6" # --> "#SBATCH --cpus-per-task=6" diff --git a/notebooks/example_container_qsiprep.yaml b/notebooks/example_container_qsiprep.yaml deleted file mode 100644 index c93183d9..00000000 --- a/notebooks/example_container_qsiprep.yaml +++ /dev/null @@ -1,56 +0,0 @@ -# Below is example config yaml file for QSIPrep: - -cli_call: | - -v -v \ - -w ${PWD}/.git/tmp/wkdir \ - --n_cpus $NSLOTS \ - --stop-on-first-crash \ - --fs-license-file code/license.txt \ - --skip-bids-validation \ - --unringing-method mrdegibbs \ - --output-resolution 2.0 - -# You should not have flags of : `--participant-label`, or `--bids-filter-file`! - -singularity_run: - -v: "-v" - -w: "$BABS_TMPDIR" # this is a placeholder. To be changed to `${PWD}/.git/tmp/wkdir` - --n_cpus: "$NSLOTS" # `$NSLOTS` can be recognized by SGE; if you're using Slurm clusters please change to Slurm version! - --omp-nthreads: "3" # without this, only run single-threaded jobs (N=`--n_cpus`) at once - --stop-on-first-crash: "" - --fs-license-file: "/cbica/projects/BABS/software/FreeSurfer/license.txt" # path to FS license file - --skip-bids-validation: Null # Null or NULL is also a placeholder - --unringing-method: "mrdegibbs" - --output-resolution: "2.0" - --sloppy: "" # ADD THIS WHEN TESTING - --hmc-model: "none" # ADD THIS WHEN TESTING - --dwi-only: "" # ADD THIS WHEN TESTING - -# Output foldername(s) to be zipped, and the BIDS App version to be included in the zip filename(s): -zip_foldernames: - qsiprep: "0-16-0RC3" # folder 'qsiprep' will be zipped into 'sub-xx_ses-yy_qsiprep-0-16-0RC3.zip' - -cluster_resources: - interpreting_shell: /bin/bash # `#$ -S /bin/bash` on cubic - hard_memory_limit: 32G # `#$ -l h_vmem=32G` on cubic - temporary_disk_space: 200G # `#$ -l tmpfree=200G` on cubic - number_of_cpus: "6" # `#$ -pe threaded 6` on cubic - customized_text: | - #$ -R y - #$ -l hostname=!compute-fed* -# Notes: Above `customized_text` is Penn Med CUBIC cluster specific. -# So it's probably not relevant for other clusters - -# Users need to add their customized bash command below, -# they will be used as preambles in `participant_job.sh` -# the commands should not be quoted! -script_preamble: | - source ${CONDA_PREFIX}/bin/activate mydatalad # Penn Med CUBIC cluster; replace 'mydatalad' with your conda env name - echo "I am running BABS." # this is an example command to show how to add another line; not necessary to include. - -# Where to run the jobs: -job_compute_space: "${CBICA_TMPDIR}" # Penn Med CUBIC cluster tmp space - -required_files: - $INPUT_DATASET_#1: - - "dwi/*_dwi.nii*" # QSIPrep diff --git a/notebooks/show_babs_init_InputBIDS.ipynb b/notebooks/show_babs_init_InputBIDS.ipynb index 50ed52de..aa6d1b9b 100644 --- a/notebooks/show_babs_init_InputBIDS.ipynb +++ b/notebooks/show_babs_init_InputBIDS.ipynb @@ -40,14 +40,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "/home/faird/zhaoc/babs/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml\n" + "/cbica/projects/BABS/babs/notebooks/eg_qsiprep-0-16-0RC3_sloppy_sge_cubic.yaml\n" ] } ], @@ -57,11 +57,11 @@ "# ++++++++++++++++++++++++++++++++++\n", "bidsapp = \"qsiprep\"\n", "task_name = \"sloppy\" # for fmriprep: 'anatonly', 'sloppy'; for toybidsapp: 'rawBIDS'\n", - "type_session = \"single-ses\"\n", - "which_dataset = \"HBN\" # \"toy_fake\", \"toy_real\", \"HBN\", \"PNC\"\n", + "type_session = \"multi-ses\"\n", + "which_dataset = \"toy_real\" # \"toy_fake\", \"toy_real\", \"HBN\", \"PNC\"\n", "\n", - "flag_where = \"msi\" # \"cubic\" or \"local\" or \"msi\"\n", - "type_system = \"slurm\" # \"sge\" or \"slurm\"\n", + "flag_where = \"cubic\" # \"cubic\" or \"local\" or \"msi\"\n", + "type_system = \"sge\" # \"sge\" or \"slurm\"\n", "# ++++++++++++++++++++++++++++++++++\n", "\n", "# sanity checks:\n", @@ -164,7 +164,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -173,14 +173,14 @@ "text": [ "The command to execute:\n", "babs-init \\\n", - "\t--where_project /home/faird/zhaoc/data \\\n", - "\t--project_name test_babs_single-ses_HBN_qsiprep_sloppy \\\n", - "\t--input BIDS /home/faird/zhaoc/data/HBN_BIDS \\\n", - "\t--container_ds /home/faird/zhaoc/data/qsiprep-container \\\n", + "\t--where_project /cbica/projects/BABS/data \\\n", + "\t--project_name test_babs_multi-ses_qsiprep_sloppy \\\n", + "\t--input BIDS /cbica/projects/BABS/data/testdata_NKI/data_hashedID_bids \\\n", + "\t--container_ds /cbica/projects/BABS/data/qsiprep-container \\\n", "\t--container_name qsiprep-0-16-0RC3 \\\n", - "\t--container_config_yaml_file /home/faird/zhaoc/babs/notebooks/eg_qsiprep-0-16-0RC3_sloppy_slurm_msi.yaml \\\n", - "\t--type_session single-ses \\\n", - "\t--type_system slurm\n", + "\t--container_config_yaml_file /cbica/projects/BABS/babs/notebooks/eg_qsiprep-0-16-0RC3_sloppy_sge_cubic.yaml \\\n", + "\t--type_session multi-ses \\\n", + "\t--type_system sge\n", "WARNING: make sure you've changed `--fs-license-file` value in YAML file if you use it!!!\n" ] } @@ -215,7 +215,7 @@ ], "metadata": { "kernelspec": { - "display_name": "babs", + "display_name": "mydatalad", "language": "python", "name": "python3" }, @@ -234,7 +234,7 @@ "orig_nbformat": 4, "vscode": { "interpreter": { - "hash": "2538d15ebb217aff7ed13fa29cc6f5f706af190e6008d76f30d7ce8c1383d79a" + "hash": "abcc7813313a81f6f916a4574498d1c2de65ad7fdfeb04d04cdf237cdcbdda8b" } } }, From 055312122153c6ccf20632ebb047a0527062dcd3 Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Thu, 25 May 2023 18:03:08 -0400 Subject: [PATCH 12/14] rename example yaml files; fix yaml file names in pytest; --- notebooks/README.md | 8 +++ ...g_fmriprep-20-2-3_anatonly_sge_cubic.yaml} | 0 ...=> eg_fmriprep-20-2-3_full_sge_cubic.yaml} | 0 ...riprep-20-2-3_ingressed-fs_sge_cubic.yaml} | 0 ...eg_fmriprepfake-0-1-1_full_sge_cubic.yaml} | 0 ...g_toybidsapp-0-0-7_rawBIDS_sge_cubic.yaml} | 0 ...eg_toybidsapp-0-0-7_zipped_sge_cubic.yaml} | 0 ...yaml => eg_xcpd-0-3-0_full_sge_cubic.yaml} | 0 tests/get_data.py | 53 +++++++++++++++++++ tests/test_babs_check_setup.py | 5 ++ tests/test_babs_init.py | 11 ++-- 11 files changed, 73 insertions(+), 4 deletions(-) create mode 100644 notebooks/README.md rename notebooks/{example_container_fmriprep_anatonly.yaml => eg_fmriprep-20-2-3_anatonly_sge_cubic.yaml} (100%) rename notebooks/{example_container_fmriprep.yaml => eg_fmriprep-20-2-3_full_sge_cubic.yaml} (100%) rename notebooks/{example_container_fmriprep_ingressed_fs.yaml => eg_fmriprep-20-2-3_ingressed-fs_sge_cubic.yaml} (100%) rename notebooks/{example_container_fmriprepfake.yaml => eg_fmriprepfake-0-1-1_full_sge_cubic.yaml} (100%) rename notebooks/{example_container_toybidsapp.yaml => eg_toybidsapp-0-0-7_rawBIDS_sge_cubic.yaml} (100%) rename notebooks/{example_container_zipped_toybidsapp.yaml => eg_toybidsapp-0-0-7_zipped_sge_cubic.yaml} (100%) rename notebooks/{example_container_xcpd.yaml => eg_xcpd-0-3-0_full_sge_cubic.yaml} (100%) diff --git a/notebooks/README.md b/notebooks/README.md new file mode 100644 index 00000000..60cb3e4a --- /dev/null +++ b/notebooks/README.md @@ -0,0 +1,8 @@ +# Notebooks folder + +## Example container configuration YAML files +* Naming convension: `eg____.yaml` + * ``: BIDS App name and version + * ``: For what application of BIDS App? Full run? Sloppy mode? + * ``: `sge` or `slurm` + * ``: name of example cluster where the YAML file was tested diff --git a/notebooks/example_container_fmriprep_anatonly.yaml b/notebooks/eg_fmriprep-20-2-3_anatonly_sge_cubic.yaml similarity index 100% rename from notebooks/example_container_fmriprep_anatonly.yaml rename to notebooks/eg_fmriprep-20-2-3_anatonly_sge_cubic.yaml diff --git a/notebooks/example_container_fmriprep.yaml b/notebooks/eg_fmriprep-20-2-3_full_sge_cubic.yaml similarity index 100% rename from notebooks/example_container_fmriprep.yaml rename to notebooks/eg_fmriprep-20-2-3_full_sge_cubic.yaml diff --git a/notebooks/example_container_fmriprep_ingressed_fs.yaml b/notebooks/eg_fmriprep-20-2-3_ingressed-fs_sge_cubic.yaml similarity index 100% rename from notebooks/example_container_fmriprep_ingressed_fs.yaml rename to notebooks/eg_fmriprep-20-2-3_ingressed-fs_sge_cubic.yaml diff --git a/notebooks/example_container_fmriprepfake.yaml b/notebooks/eg_fmriprepfake-0-1-1_full_sge_cubic.yaml similarity index 100% rename from notebooks/example_container_fmriprepfake.yaml rename to notebooks/eg_fmriprepfake-0-1-1_full_sge_cubic.yaml diff --git a/notebooks/example_container_toybidsapp.yaml b/notebooks/eg_toybidsapp-0-0-7_rawBIDS_sge_cubic.yaml similarity index 100% rename from notebooks/example_container_toybidsapp.yaml rename to notebooks/eg_toybidsapp-0-0-7_rawBIDS_sge_cubic.yaml diff --git a/notebooks/example_container_zipped_toybidsapp.yaml b/notebooks/eg_toybidsapp-0-0-7_zipped_sge_cubic.yaml similarity index 100% rename from notebooks/example_container_zipped_toybidsapp.yaml rename to notebooks/eg_toybidsapp-0-0-7_zipped_sge_cubic.yaml diff --git a/notebooks/example_container_xcpd.yaml b/notebooks/eg_xcpd-0-3-0_full_sge_cubic.yaml similarity index 100% rename from notebooks/example_container_xcpd.yaml rename to notebooks/eg_xcpd-0-3-0_full_sge_cubic.yaml diff --git a/tests/get_data.py b/tests/get_data.py index 42e02848..e0551404 100644 --- a/tests/get_data.py +++ b/tests/get_data.py @@ -194,6 +194,59 @@ def container_ds_path(where_now, tmp_path_factory): return origin_container_ds +def get_container_config_yaml_filename(which_bidsapp, + which_input, if_two_input, + type_system): + """ + This is to get the container's config YAML file name, + depending on the BIDS App and if there are two inputs (for fMRIPrep) + + Parameters: + ------------- + which_bidsapp: str + name of the bidsapp + which_input: str + "BIDS" for raw BIDS + "fmriprep" for zipped BIDS derivates + if_two_input: bool + whether there are two input BIDS datasets + type_system: str + "sge" or "slurm" + + Returns: + ----------- + container_config_yaml_filename: str + the filename, without the path. + """ + dict_cluster_name = {'sge': 'cubic', + 'slurm': 'msi'} + dict_bidsapp_version = {"qsiprep": "0-16-0RC3", + "fmriprep": "20-2-3", + "toybidsapp": "0-0-7"} + dict_task_name = {"qsiprep": 'sloppy', + "fmriprep": "full", + "toybidsapp": "rawBIDS"} + + # bidsapp and its version: + container_config_yaml_filename = "eg_" + which_bidsapp + "-" \ + + dict_bidsapp_version[which_bidsapp] + + # task: + container_config_yaml_filename += "_" + if (which_bidsapp == "fmriprep") & if_two_input: + container_config_yaml_filename += "ingressed-fs" + elif (which_bidsapp == "toybidsapp") & (which_input == "fmriprep"): + # the input is zipped BIDS derivatives: + container_config_yaml_filename += "zipped" + else: + container_config_yaml_filename += dict_task_name[which_bidsapp] + + # cluster system type and example name: + container_config_yaml_filename += "_" + type_system + "_" \ + + dict_cluster_name[type_system] + ".yaml" + + return container_config_yaml_filename + def if_command_installed(cmd): """ diff --git a/tests/test_babs_check_setup.py b/tests/test_babs_check_setup.py index 5940aa9e..5799434a 100644 --- a/tests/test_babs_check_setup.py +++ b/tests/test_babs_check_setup.py @@ -16,6 +16,7 @@ container_ds_path, where_now, if_circleci, + get_container_config_yaml_filename, __location__, INFO_2ND_INPUT_DATA, LIST_WHICH_BIDSAPP, @@ -80,8 +81,12 @@ def test_babs_check_setup( project_root = op.join(where_project, project_name) container_name = which_bidsapp + "-" + TOYBIDSAPP_VERSION_DASH container_config_yaml_filename = "example_container_" + which_bidsapp + ".yaml" + container_config_yaml_filename = \ + get_container_config_yaml_filename(which_bidsapp, which_input, if_two_input=False, + type_system="sge") # TODO: also test slurm! container_config_yaml_file = op.join(op.dirname(__location__), "notebooks", container_config_yaml_filename) + assert op.exists(container_config_yaml_file) # below are all correct options: babs_init_opts = argparse.Namespace( diff --git a/tests/test_babs_init.py b/tests/test_babs_init.py index c691339d..b56643ab 100644 --- a/tests/test_babs_init.py +++ b/tests/test_babs_init.py @@ -16,6 +16,7 @@ container_ds_path, where_now, if_circleci, + get_container_config_yaml_filename, __location__, INFO_2ND_INPUT_DATA, LIST_WHICH_BIDSAPP, @@ -71,6 +72,8 @@ def test_babs_init(which_bidsapp, which_input, type_session, if_input_local, if_ Path to the container datalad dataset if_circleci: fixture; bool Whether currently in CircleCI + + TODO: add `type_system` and to test out Slurm version! """ # Sanity checks: assert which_bidsapp in LIST_WHICH_BIDSAPP @@ -94,12 +97,12 @@ def test_babs_init(which_bidsapp, which_input, type_session, if_input_local, if_ # Preparation of freesurfer: for fmriprep and qsiprep: # check if `--fs-license-file` is included in YAML file: - container_config_yaml_filename = "example_container_" + which_bidsapp + ".yaml" - if (which_bidsapp == "fmriprep") & if_two_input: - container_config_yaml_filename = \ - "example_container_" + which_bidsapp + "_ingressed_fs.yaml" + container_config_yaml_filename = \ + get_container_config_yaml_filename(which_bidsapp, which_input, if_two_input, + type_system="sge") # TODO: also test slurm! container_config_yaml_file = op.join(op.dirname(__location__), "notebooks", container_config_yaml_filename) + assert op.exists(container_config_yaml_file) container_config_yaml = read_yaml(container_config_yaml_file) if "--fs-license-file" in container_config_yaml["singularity_run"]: From e1c46679a46f0849e1f785a83ee756f9da5b8180 Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Thu, 25 May 2023 18:05:49 -0400 Subject: [PATCH 13/14] fix typo --- notebooks/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/README.md b/notebooks/README.md index 60cb3e4a..c21c6e9f 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -1,7 +1,7 @@ # Notebooks folder ## Example container configuration YAML files -* Naming convension: `eg____.yaml` +* Naming convention: `eg____.yaml` * ``: BIDS App name and version * ``: For what application of BIDS App? Full run? Sloppy mode? * ``: `sge` or `slurm` From e3057faa94da835cdd668d5c235c1800a3f110f1 Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Thu, 25 May 2023 18:56:17 -0400 Subject: [PATCH 14/14] note for interpreting_shell --- docs/source/preparation_config_yaml_file.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/source/preparation_config_yaml_file.rst b/docs/source/preparation_config_yaml_file.rst index 5d834b03..3e93c588 100644 --- a/docs/source/preparation_config_yaml_file.rst +++ b/docs/source/preparation_config_yaml_file.rst @@ -421,7 +421,6 @@ These will be turned into options in the directives (at the beginning) of ``part (this script could be found at: ``/path/to/my_BABS_project/analysis/code``) shown as below:: #!/bin/bash - #$ -S /bin/bash #$ -l h_vmem=32G #$ -l tmpfree=200G #$ -pe threaded 6 @@ -430,6 +429,13 @@ For example, a job requires no more than 32 GB of memory, i.e., on SGE clusters, ``-l h_vmem=32G``. You may simply specify: ``hard_memory_limit: 32G``. +.. warning:: + Make sure you add ``interpreting_shell``! + It is very important. + For SGE, you might need: ``interpreting_shell: /bin/bash``; + For Slurm, you might need: ``interpreting_shell: /bin/bash -l``. + Check what it should be like in the manual of your cluster! + The table below lists all the named cluster resources requests that BABS supports. You may not need all of them. BABS will replace ``$VALUE`` with the value you provide.