Skip to content

Commit

Permalink
Merge pull request #116 from PennLINC/enh/mk_output_folder
Browse files Browse the repository at this point in the history
[ENH] Add an option of creating a sub-folder in `<output_dir>` to zip all derivatives
  • Loading branch information
Chenying Zhao authored Jul 25, 2023
2 parents e04a216 + 873871f commit 5e408e5
Show file tree
Hide file tree
Showing 28 changed files with 1,512 additions and 256 deletions.
20 changes: 13 additions & 7 deletions babs/babs.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
generate_cmd_set_envvar,
generate_cmd_filterfile,
generate_cmd_singularityRun_from_config, generate_cmd_unzip_inputds,
get_info_zip_foldernames,
generate_cmd_zipping_from_config,
validate_type_session,
validate_type_system,
Expand Down Expand Up @@ -2257,17 +2258,17 @@ def generate_bash_run_bidsapp(self, bash_path, input_ds, type_session):
When writing `singularity run` part, each chunk to write should start with " \\" + "\n\t",
meaning, starting with space, a backward slash, a return, and a tab.
"""
from .constants import PATH_FS_LICENSE_IN_CONTAINER
from .constants import PATH_FS_LICENSE_IN_CONTAINER, OUTPUT_MAIN_FOLDERNAME

type_session = validate_type_session(type_session)
output_foldername = "outputs" # folername of BIDS App outputs

# Check if the folder exist; if not, create it:
bash_dir = op.dirname(bash_path)
if not op.exists(bash_dir):
os.makedirs(bash_dir)

# check if `self.config` from the YAML file contains information we need:
# 1. check `singularity_run` section:
if "singularity_run" not in self.config:
# sanity check: there should be only one input ds
# otherwise need to specify in this section:
Expand Down Expand Up @@ -2295,10 +2296,14 @@ def generate_bash_run_bidsapp(self, bash_path, input_ds, type_session):
cmd_singularity_flags, flag_fs_license, path_fs_license, singuRun_input_dir = \
generate_cmd_singularityRun_from_config(self.config, input_ds)

print()

# TODO: also corporate the `call-fmt` in `datalad containers-add`

# 2. check `zip_foldernames` section:
dict_zip_foldernames, if_mk_output_folder, path_output_folder = \
get_info_zip_foldernames(self.config)

print()

# Check if the bash file already exist:
if op.exists(bash_path):
os.remove(bash_path) # remove it
Expand Down Expand Up @@ -2386,7 +2391,7 @@ def generate_bash_run_bidsapp(self, bash_path, input_ds, type_session):
cmd_head_singularityRun += " \\" + "\n\t"
cmd_head_singularityRun += singuRun_input_dir # inputs/data/<name>
cmd_head_singularityRun += " \\" + "\n\t"
cmd_head_singularityRun += output_foldername # output folder
cmd_head_singularityRun += path_output_folder # defined above

# currently all BIDS App support `participant` positional argu:
cmd_head_singularityRun += " \\" + "\n\t"
Expand All @@ -2411,15 +2416,16 @@ def generate_bash_run_bidsapp(self, bash_path, input_ds, type_session):
print(cmd_head_singularityRun + cmd_singularity_flags)

# Zip:
cmd_zip = generate_cmd_zipping_from_config(self.config, type_session, output_foldername)
cmd_zip = generate_cmd_zipping_from_config(dict_zip_foldernames, type_session)
bash_file.write(cmd_zip)

# Delete folders and files:
"""
rm -rf prep .git/tmp/wkdir
rm ${filterfile}
"""
cmd_clean = "rm -rf " + output_foldername + " " + ".git/tmp/wkdir" + "\n"
cmd_clean = "rm -rf " + OUTPUT_MAIN_FOLDERNAME + " " + ".git/tmp/wkdir" + "\n"
# ^^ rm the entire output folder `outputs`
if flag_filterfile is True:
cmd_clean += "rm ${filterfile}" + " \n"

Expand Down
5 changes: 5 additions & 0 deletions babs/constants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
MSG_NO_ALERT_IN_LOGS = "BABS: No alert message found in log files."
CHECK_MARK = u'\N{check mark}' # can be used by print(CHECK_MARK)
PATH_FS_LICENSE_IN_CONTAINER = "/SGLR/FREESURFER_HOME/license.txt"

# The upper layer of output folder - BABS expects there are sub-folers in it to zip:
OUTPUT_MAIN_FOLDERNAME = "outputs"
# Placeholder for creating a sub-folder to hold all outputs:
PLACEHOLDER_MK_SUB_OUTPUT_FOLDER = "$TO_CREATE_FOLDER"
138 changes: 110 additions & 28 deletions babs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,20 +442,105 @@ def generate_cmd_set_envvar(env_var_name):

return cmd, env_var_value, env_var_value_in_container


def generate_cmd_zipping_from_config(config, type_session, output_foldername="outputs"):
def get_info_zip_foldernames(config):
"""
This is to generate bash command to zip BIDS App outputs.
This is to get information from `zip_foldernames` section
in the container configuration YAML file.
Note that users have option to request creating a sub-folder in `outputs` folder,
if the BIDS App does not do so (e.g., fMRIPrep new BIDS output layout).
Information:
1. foldernames to zip
2. whether the user requests creating a sub-folder
3. path to the output dir to be used in the `singularity run`
Parameters:
------------
config: dictionary
attribute `config` in class Container;
got from `read_container_config_yaml()`
Returns:
---------
dict_zip_foldernames: dict
`config["zip_foldernames"]` w/ placeholder key/value pair removed.
if_mk_folder: bool
whether requested to create a sub-folder in `outputs`.
path_output_folder: str
output folder used in `singularity run` of the BIDS App.
see examples below.
Examples `path_output_folder` of BIDS App:
-------------------------------------------------
In `zip_foldernames` section:
1. No placeholder: outputs
2. placeholder = true & 1 folder: outputs/<foldername>
Notes:
----------
In fact, we use `OUTPUT_MAIN_FOLDERNAME` to define the 'outputs' string.
"""

from .constants import OUTPUT_MAIN_FOLDERNAME, PLACEHOLDER_MK_SUB_OUTPUT_FOLDER

# Sanity check: this section should exist:
if "zip_foldernames" not in config:
raise Exception("The `container_config_yaml_file` does not contain"
+ " the section `zip_foldernames`. Please add this section!")

# Check if placeholder to make a sub-folder in `outputs` folder:
if_mk_folder = False
if PLACEHOLDER_MK_SUB_OUTPUT_FOLDER in config["zip_foldernames"]:
# check its value:
# there cannot be two placeholders (w/ same strings);
# otherwise error when loading yaml file
value = config["zip_foldernames"][PLACEHOLDER_MK_SUB_OUTPUT_FOLDER]
if value.lower() == "true": # lower case is "true"
if_mk_folder = True

# Get the dict of foldernames + version number:
dict_zip_foldernames = config["zip_foldernames"]
if if_mk_folder:
# remove key of placeholder if there is:
_ = dict_zip_foldernames.pop(PLACEHOLDER_MK_SUB_OUTPUT_FOLDER)
# ^^ the returned value is the value of this key

# sanity check: if there was placeholder, we expect only one output folder to create:
if len(dict_zip_foldernames) == 1: # good
pass
elif len(dict_zip_foldernames) == 0: # only placeholder was provided:
raise Exception("Only placeholder '" + PLACEHOLDER_MK_SUB_OUTPUT_FOLDER + "'"
+ " is provided in section 'zip_foldernames'."
+ " You should also provide"
+ " a name of output folder to create and zip.")
else: # len(dict_zip_foldernames) > 1: # more than one foldernames provided:
raise Exception("You ask BABS to create more than one output folder,"
+ " but BABS can only create one output folder."
+ " Please only keep one of them in 'zip_foldernames' section.")

# Get the list of foldernames (w/o version number):
list_foldernames = list(dict_zip_foldernames.keys())

# Generate the output folder path:
path_output_folder = OUTPUT_MAIN_FOLDERNAME
if if_mk_folder:
the_folder = list_foldernames[0] # there is only one folder
path_output_folder += "/" + the_folder

return dict_zip_foldernames, if_mk_folder, path_output_folder


def generate_cmd_zipping_from_config(dict_zip_foldernames, type_session):
"""
This is to generate bash command to zip BIDS App outputs.
Parameters:
------------
dict_zip_foldernames: dictionary
`config["zip_foldernames"]` w/ placeholder key/value pair removed.
got from `get_info_zip_foldernames()`.
type_session: str
"multi-ses" or "single-ses"
output_foldername: str
the foldername of the outputs of BIDS App; default is "outputs".
Returns:
---------
Expand All @@ -464,37 +549,34 @@ def generate_cmd_zipping_from_config(config, type_session, output_foldername="ou
based on section `zip_foldernames` in the yaml file.
"""

from .constants import OUTPUT_MAIN_FOLDERNAME

# cd to output folder:
cmd = "cd " + output_foldername + "\n"
cmd = "cd " + OUTPUT_MAIN_FOLDERNAME + "\n"

# 7z:
if type_session == "multi-ses":
str_sesid = "_${sesid}"
else:
str_sesid = ""

if "zip_foldernames" in config:
value_temp = ""
temp = 0

for key, value in config["zip_foldernames"].items():
# each key is a foldername to be zipped;
# each value is the version string;
temp = temp + 1
if (temp != 1) & (value_temp != value): # not matching last value
warnings.warn("In section `zip_foldernames` in `container_config_yaml_file`: \n"
"The version string of '" + key + "': '" + value + "'"
+ " does not match with the last version string; "
+ "we suggest using the same version string across all foldernames.")
value_temp = value

cmd += "7z a ../${subid}" + str_sesid + "_" + \
key + "-" + value + ".zip" + " " + key + "\n"
# e.g., 7z a ../${subid}_${sesid}_fmriprep-0-0-0.zip fmriprep # this is multi-ses

else: # the yaml file does not have the section `zip_foldernames`:
raise Exception("The `container_config_yaml_file` does not contain"
+ " the section `zip_foldernames`. Please add this section!")
# start to generate 7z commands:
value_temp = ""
temp = 0
for key, value in dict_zip_foldernames.items():
# each key is a foldername to be zipped;
# each value is the version string;
temp = temp + 1
if (temp != 1) & (value_temp != value): # not matching last value
warnings.warn("In section `zip_foldernames` in `container_config_yaml_file`: \n"
"The version string of '" + key + "': '" + value + "'"
+ " does not match with the last version string; "
+ "we suggest using the same version string across all foldernames.")
value_temp = value

cmd += "7z a ../${subid}" + str_sesid + "_" + \
key + "-" + value + ".zip" + " " + key + "\n"
# e.g., 7z a ../${subid}_${sesid}_fmriprep-0-0-0.zip fmriprep # this is multi-ses

# return to original dir:
cmd += "cd ..\n"
Expand Down
Loading

0 comments on commit 5e408e5

Please sign in to comment.