Skip to content

Commit

Permalink
add calc_depth_to_dataset_dirs.py
Browse files Browse the repository at this point in the history
  • Loading branch information
ric-evans committed Nov 21, 2024
1 parent a49bc87 commit 91f7e7b
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 19 deletions.
20 changes: 1 addition & 19 deletions scripts/sample-each-dataset.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,25 +58,7 @@ python -m virtualenv $PYVENV
# ex: /data/sim/IceCube/2023/generated/neutrino-generator/22645 -> depth=0
# ex: /data/sim/IceCube/2023/generated/neutrino-generator/ -> depth=1
# ex: /data/sim/IceCube/2023/generated/ -> depth=2
depth_to_datasets=$(python3 -c "
from pathlib import Path
import sys
path = Path(sys.argv[1])
SIM = 'sim'
N_SEGMENTS_BASE_TO_DATASET = 5
try:
base_index = list(path.parts).index(SIM)
except ValueError:
raise ValueError(f'Path {path} does not contain the base identifier {SIM}/')
segments_after_base = path.parts[base_index + 1:]
depth = N_SEGMENTS_BASE_TO_DATASET - len(segments_after_base)
if depth < 0:
raise ValueError(f'Path {path} is too specific; the user can supply up to a dataset dir')
print(depth)
" "$BASE_PATH" 2>&1)
depth_to_datasets=$(python -m simprod_histogram.calc_depth_to_dataset_dirs "$BASE_PATH" 2>&1)

#######################################################################################
# Run!
Expand Down
49 changes: 49 additions & 0 deletions simprod_histogram/calc_depth_to_dataset_dirs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""Calculate the filetree depth from dirpath to the dataset directories.
See calculate() below for more info.
"""

import sys
from pathlib import Path


def calculate(dirpath: str) -> int:
"""
Calculate the filetree depth from dirpath to the dataset directories.
Assumes the naming convention:
.../sim/IceCube/<year>/<generated>/<neutrino-generator>/<dataset_id>
Examples:
.../sim/IceCube/2023/generated/neutrino-generator/22645 -> depth=0
.../sim/IceCube/2023/generated/neutrino-generator/ -> depth=1
.../sim/IceCube/2023/generated/ -> depth=2
Note:
This does not enforce that the dirpath be rooted at /data/sim, so it
allows, both:
1. using 'realpath' (ex: /data/sim/IceCube/... -> /mnt/lfs6/sim/IceCube/...)
2: running in a testbed directory (ex: /home/eevans/test/data/sim/IceCube/...)
"""
dirpath = Path(dirpath)

SIM = "sim" # as in '/data/sim' (or a local tree '/home/.../sim/...')
N_SEGMENTS_BASE_TO_DATASET = 5

try:
base_index = list(dirpath.parts).index(SIM)
except ValueError:
raise ValueError(f"Path {dirpath} does not contain the base identifier {SIM}/")
segments_after_base = dirpath.parts[base_index + 1 :]

depth = N_SEGMENTS_BASE_TO_DATASET - len(segments_after_base)
if depth < 0:
raise ValueError(
f"Path {dirpath} is too specific; the user can supply up to a dataset dir"
)

return depth


if __name__ == "__main__":
calculate(sys.argv[1])

0 comments on commit 91f7e7b

Please sign in to comment.