diff --git a/scripts/cp-dataset-histos.sh b/scripts/cp-dataset-histos.sh index e912afa..a3dc1a9 100755 --- a/scripts/cp-dataset-histos.sh +++ b/scripts/cp-dataset-histos.sh @@ -4,20 +4,24 @@ set -euo pipefail ####################################################################################### # This script automates the copying of histogram files from dataset directories # into a specified destination directory, while preserving the original directory -# structure, like: +# structure. For example: +# BASE_PATH: '.../sim/IceCube/2023/generated' +# source dataset dir: '.../sim/IceCube/2023/generated/neutrino-generator/22645' +# destination dataset dir: '$DEST_DIR/sim/IceCube/2023/generated/neutrino-generator/22645' # # Usage: -# ./copy-histograms.sh +# ./cp-dataset-histos.sh [--force] # # Arguments: -# - The root path under which all dataset directories are located. +# - The path under which all requested dataset directories are located. # - Destination directory where histogram files will be copied. +# [--force] - Optional flag to overwrite existing histogram files in the destination. # ####################################################################################### # Check args if [ "$#" -lt 2 ]; then - echo "Usage: $0 " + echo "Usage: $0 [--force]" exit 1 fi @@ -25,11 +29,21 @@ BASE_PATH=$(realpath "$1") DEST_DIR=$(realpath "$2") SIM="sim" +# Parse optional --force flag +FORCE=false +for arg in "$@"; do + if [[ $arg == "--force" ]]; then + FORCE=true + break + fi +done + # Ensure the destination directory exists mkdir -p "$DEST_DIR" ####################################################################################### # Calculate the depth of dataset directories relative to BASE_PATH + depth_to_datasets=$(python -m simprod_histogram.calc_depth_to_dataset_dirs "$BASE_PATH" 2>&1) ####################################################################################### @@ -39,7 +53,8 @@ cp_histo() { # Copies all histogram files (*.histo.hdf5) from a given dataset directory # to a destination directory, preserving the directory structure relative # to a specified base path (SIM). If multiple histogram files exist in the - # dataset directory, they are all copied. + # dataset directory, they are all copied. Existing files can be overwritten + # if the --force flag is specified. local src_dataset_dir="$1" # Check for histo files @@ -61,17 +76,25 @@ cp_histo() { # Loop through all matching files and copy them local histo_file for histo_file in "${histo_files[@]}"; do - if [ -f "$dest_dataset_dir/$(basename "$histo_file")" ]; then - echo "Histogram file already exists at $dest_dataset_dir (will not overwrite)" - else - cp "$histo_file" "$dest_dataset_dir/" - echo "Copied $histo_file to $dest_dataset_dir" + local dest_file="$dest_dataset_dir/$(basename "$histo_file")" + # check if this overwrites a file + if [ -f "$dest_file" ]; then + if [ "$FORCE" == true ]; then + echo "Overwriting: $dest_file..." + # cp below + else + echo "Histogram file already exists at $dest_dataset_dir (will not overwrite)" + continue + fi fi + # cp! + cp "$histo_file" "$dest_file" + echo "Copied $histo_file to $dest_dataset_dir" done } -export -f copy_histo_file -export BASE_PATH DEST_DIR SIM +export -f cp_histo +export BASE_PATH DEST_DIR SIM FORCE # Use find with -exec to copy files from each dataset find "$BASE_PATH" \ diff --git a/scripts/cp-job-histos.sh b/scripts/cp-job-histos.sh index 4db36da..f36ac78 100755 --- a/scripts/cp-job-histos.sh +++ b/scripts/cp-job-histos.sh @@ -42,9 +42,12 @@ DATASET_DIR=$(realpath "$1") # Determine if the --dryrun flag is provided DRYRUN=false -if [ "$#" -gt 1 ] && [ "$2" == "--dryrun" ]; then - DRYRUN=true -fi +for arg in "$@"; do + if [[ $arg == "--dryrun" ]]; then + DRYRUN=true + break + fi +done ########################################################################################