diff --git a/scripts/sample-each-dataset.sh b/scripts/sample-each-dataset.sh index ca4fa5a..16d0f50 100755 --- a/scripts/sample-each-dataset.sh +++ b/scripts/sample-each-dataset.sh @@ -58,25 +58,7 @@ python -m virtualenv $PYVENV # ex: /data/sim/IceCube/2023/generated/neutrino-generator/22645 -> depth=0 # ex: /data/sim/IceCube/2023/generated/neutrino-generator/ -> depth=1 # ex: /data/sim/IceCube/2023/generated/ -> depth=2 -depth_to_datasets=$(python3 -c " -from pathlib import Path -import sys - -path = Path(sys.argv[1]) -SIM = 'sim' -N_SEGMENTS_BASE_TO_DATASET = 5 - -try: - base_index = list(path.parts).index(SIM) -except ValueError: - raise ValueError(f'Path {path} does not contain the base identifier {SIM}/') -segments_after_base = path.parts[base_index + 1:] - -depth = N_SEGMENTS_BASE_TO_DATASET - len(segments_after_base) -if depth < 0: - raise ValueError(f'Path {path} is too specific; the user can supply up to a dataset dir') -print(depth) -" "$BASE_PATH" 2>&1) +depth_to_datasets=$(python -m simprod_histogram.calc_depth_to_dataset_dirs "$BASE_PATH" 2>&1) ####################################################################################### # Run! diff --git a/simprod_histogram/calc_depth_to_dataset_dirs.py b/simprod_histogram/calc_depth_to_dataset_dirs.py new file mode 100644 index 0000000..c5ee3b4 --- /dev/null +++ b/simprod_histogram/calc_depth_to_dataset_dirs.py @@ -0,0 +1,49 @@ +"""Calculate the filetree depth from dirpath to the dataset directories. + +See calculate() below for more info. +""" + +import sys +from pathlib import Path + + +def calculate(dirpath: str) -> int: + """ + Calculate the filetree depth from dirpath to the dataset directories. + + Assumes the naming convention: + .../sim/IceCube//// + + Examples: + .../sim/IceCube/2023/generated/neutrino-generator/22645 -> depth=0 + .../sim/IceCube/2023/generated/neutrino-generator/ -> depth=1 + .../sim/IceCube/2023/generated/ -> depth=2 + + Note: + This does not enforce that the dirpath be rooted at /data/sim, so it + allows, both: + 1. using 'realpath' (ex: /data/sim/IceCube/... -> /mnt/lfs6/sim/IceCube/...) + 2: running in a testbed directory (ex: /home/eevans/test/data/sim/IceCube/...) + """ + dirpath = Path(dirpath) + + SIM = "sim" # as in '/data/sim' (or a local tree '/home/.../sim/...') + N_SEGMENTS_BASE_TO_DATASET = 5 + + try: + base_index = list(dirpath.parts).index(SIM) + except ValueError: + raise ValueError(f"Path {dirpath} does not contain the base identifier {SIM}/") + segments_after_base = dirpath.parts[base_index + 1 :] + + depth = N_SEGMENTS_BASE_TO_DATASET - len(segments_after_base) + if depth < 0: + raise ValueError( + f"Path {dirpath} is too specific; the user can supply up to a dataset dir" + ) + + return depth + + +if __name__ == "__main__": + calculate(sys.argv[1])