From 57271d866e420132f56ed6fde87f6173d2cc3fba Mon Sep 17 00:00:00 2001 From: rkansal47 Date: Wed, 24 Jul 2024 02:26:34 -0700 Subject: [PATCH] max files --- src/HHbbVV/run_utils.py | 4 ++++ src/condor/submit.py | 8 +++++++- src/condor/submit_configs/skimmer_24_07_24_signal_lp.yaml | 3 ++- src/condor/submit_from_yaml.py | 1 + 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/HHbbVV/run_utils.py b/src/HHbbVV/run_utils.py index 780b614e..36e9773b 100644 --- a/src/HHbbVV/run_utils.py +++ b/src/HHbbVV/run_utils.py @@ -111,6 +111,7 @@ def get_fileset( starti: int = 0, endi: int = -1, get_num_files: bool = False, + max_files: int = None, coffea_casa: str = False, ): if processor.startswith("trigger"): @@ -140,6 +141,9 @@ def get_fileset( fileset[sample] = {} for subsample, fnames in sample_set.items(): fileset[sample][subsample] = len(fnames) + if max_files is not None: + print("Max files") + fileset[sample][subsample] = min(fileset[sample][subsample], max_files) else: # return all files per subsample diff --git a/src/condor/submit.py b/src/condor/submit.py index 0e72855b..0fd9cb77 100755 --- a/src/condor/submit.py +++ b/src/condor/submit.py @@ -55,7 +55,12 @@ def main(args): print("CONDOR work dir: ", local_dir) fileset = run_utils.get_fileset( - args.processor, args.year, args.samples, args.subsamples, get_num_files=True + args.processor, + args.year, + args.samples, + args.subsamples, + get_num_files=True, + max_files=args.max_files, ) print(f"fileset: {fileset}") @@ -150,6 +155,7 @@ def parse_args(parser): help="test run or not - test run means only 2 jobs per sample will be created", ) parser.add_argument("--files-per-job", default=20, help="# files per condor job", type=int) + parser.add_argument("--max-files", default=None, help="max total files to run over", type=int) run_utils.add_bool_arg( parser, "submit", default=False, help="submit files as well as create them" diff --git a/src/condor/submit_configs/skimmer_24_07_24_signal_lp.yaml b/src/condor/submit_configs/skimmer_24_07_24_signal_lp.yaml index 0a94e801..1f197468 100644 --- a/src/condor/submit_configs/skimmer_24_07_24_signal_lp.yaml +++ b/src/condor/submit_configs/skimmer_24_07_24_signal_lp.yaml @@ -17,7 +17,8 @@ "VBF_HHTobbVV_CV_1_C2V_1_C3_2", ], "files_per_job": 20, + "max_files": 60, "chunksize": 80000, - "maxchunks": 20 + # "maxchunks": 20 }, } } diff --git a/src/condor/submit_from_yaml.py b/src/condor/submit_from_yaml.py index 680971b5..ce129d53 100644 --- a/src/condor/submit_from_yaml.py +++ b/src/condor/submit_from_yaml.py @@ -52,6 +52,7 @@ def add_bool_arg(parser, name, help, default=False, no_name=None): args.subsamples = sdict.get("subsamples", []) args.files_per_job = sdict["files_per_job"] args.njets = sdict.get("njets", 2) + args.max_files = sdict.get("max_files", None) args.maxchunks = sdict.get("maxchunks", 0) args.chunksize = sdict.get("chunksize", 10000)