From 215d7dc99396d709a0fca043255dec7170bb44bc Mon Sep 17 00:00:00 2001 From: Steph Merritt Date: Wed, 7 Aug 2024 16:50:00 +0100 Subject: [PATCH 1/3] Fixing unit tests when sorcha-addons is installed --- tests/activity/test_activity_registration.py | 2 +- tests/lightcurves/test_lightcurve_registration.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/activity/test_activity_registration.py b/tests/activity/test_activity_registration.py index 418c6e97..cdd56d02 100644 --- a/tests/activity/test_activity_registration.py +++ b/tests/activity/test_activity_registration.py @@ -9,7 +9,7 @@ def test_register_subclasses(): output = register_activity_subclasses() - + update_activity_subclasses() # if sorcha-addons is installed we need to update the subclasses assert output == CA_METHODS diff --git a/tests/lightcurves/test_lightcurve_registration.py b/tests/lightcurves/test_lightcurve_registration.py index 261d5e9b..5c2c53b6 100644 --- a/tests/lightcurves/test_lightcurve_registration.py +++ b/tests/lightcurves/test_lightcurve_registration.py @@ -9,7 +9,7 @@ def test_register_subclasses(): output = register_lc_subclasses() - + update_lc_subclasses() # if sorcha-addons is installed we need to update the subclasses assert output == LC_METHODS From 5d7de49e0a388e8bfd56fcdee75dfa99ff62998c Mon Sep 17 00:00:00 2001 From: Steph Merritt Date: Wed, 7 Aug 2024 18:22:00 +0100 Subject: [PATCH 2/3] Implementing subset code. --- src/sorcha/modules/PPCommandLineParser.py | 16 ++++++++++++ src/sorcha/sorcha.py | 26 +++++++++++++++++--- src/sorcha/utilities/diffTestUtils.py | 20 +++++++++++++++ src/sorcha/utilities/sorchaArguments.py | 5 ++++ src/sorcha_cmdline/run.py | 8 ++++++ tests/ephemeris/test_ephemeris_generation.py | 1 + tests/ephemeris/test_pixdict.py | 1 + tests/sorcha/test_PPCommandLineParser.py | 5 +++- tests/sorcha/test_PPConfigParser.py | 1 + tests/sorcha/test_sorchaArguments.py | 1 + 10 files changed, 80 insertions(+), 4 deletions(-) diff --git a/src/sorcha/modules/PPCommandLineParser.py b/src/sorcha/modules/PPCommandLineParser.py index c6f903e8..16601d6a 100644 --- a/src/sorcha/modules/PPCommandLineParser.py +++ b/src/sorcha/modules/PPCommandLineParser.py @@ -2,6 +2,7 @@ import sys import logging import glob +import re from .PPConfigParser import PPFindFileOrExit, PPFindDirectoryOrExit @@ -64,6 +65,21 @@ def PPCommandLineParser(args): cmd_args_dict["outpath"] = PPFindFileOrExit(args.o, "-o, --outfile") cmd_args_dict["pointing_database"] = PPFindFileOrExit(args.pd, "-pd, --pointing_database") + if args.process_subset: + m = re.match(r"^(\d+)/(\d+)$", args.process_subset) + if m is None: + sys.exit("--process-subset: the argument must be in form of /") + + split, nsplits = int(m.group(1)), int(m.group(2)) + if nsplits <= 0: + pplogger.error("--process-subset: the number of splits must be >= 1") + sys.exit("--process-subset: the number of splits must be >= 1") + if split < 1 or split > nsplits: + pplogger.error("--process-subset: the chosen splits must be between 1 and (inclusive).") + sys.exit("--process-subset: the chosen splits must be between 1 and (inclusive).") + + cmd_args_dict["process_subset"] = (split, nsplits) + if args.cp: cmd_args_dict["complex_physical_parameters"] = PPFindFileOrExit( args.cp, "-cp, --complex_physical_parameters" diff --git a/src/sorcha/sorcha.py b/src/sorcha/sorcha.py index a9ceb6d2..7da1710c 100755 --- a/src/sorcha/sorcha.py +++ b/src/sorcha/sorcha.py @@ -162,12 +162,31 @@ def runLSSTSimulation(args, configs): loopCounter = 0 lastChunk = False + # Find the number of objects in the input file. FIXME: This assumes the + # input file has a header, and has no empty or comment lines. ii = -1 with open(args.orbinfile) as f: for ii, l in enumerate(f): pass lenf = ii + split, nsplits = args.process_subset + print(split, nsplits) + if nsplits > 1: + # calculate the [beginning, end) indices. For example + # np.linspace(0, 100, 3+1, dtype=int) + # --> array([ 0, 33, 66, 100]) + edges = np.linspace(0, lenf, nsplits + 1, dtype=int) + b, e = edges[split - 1], edges[split] + lenf = e - b + + # fast-forward to the requested split + at = 0 + while at < b: + bs = min(at + configs["size_serial_chunk"], b) + reader.read_aux_block(block_size=bs) + at += bs + footprint = None if configs["camera_model"] == "footprint": verboselog("Creating sensor footprint object for filtering") @@ -180,11 +199,12 @@ def runLSSTSimulation(args, configs): # Processing begins, all processing is done for chunks if configs["ephemerides_type"].casefold() == "external": + bs = min(endChunk, lenf) - startChunk verboselog("Reading in chunk of orbits and associated ephemeris from an external file") - observations = reader.read_block(block_size=configs["size_serial_chunk"]) + observations = reader.read_block(block_size=bs) else: - verboselog("Ingest chunk of orbits") - orbits_df = reader.read_aux_block(block_size=configs["size_serial_chunk"]) + bs = min(endChunk, lenf) - startChunk + orbits_df = reader.read_aux_block(block_size=bs) verboselog("Starting ephemeris generation") observations = create_ephemeris(orbits_df, filterpointing, args, configs) verboselog("Ephemeris generation completed") diff --git a/src/sorcha/utilities/diffTestUtils.py b/src/sorcha/utilities/diffTestUtils.py index 017b7d4e..9b83eadc 100644 --- a/src/sorcha/utilities/diffTestUtils.py +++ b/src/sorcha/utilities/diffTestUtils.py @@ -55,6 +55,7 @@ def compare_result_files(test_output, golden_output): "outfilestem": f"out_end2end", "verbose": False, "stats": None, + "process_subset": (1, 1), } WITH_EPHEMERIS_ARGS = { @@ -67,6 +68,7 @@ def compare_result_files(test_output, golden_output): "outfilestem": f"out_end2end_with_ephemeris_generation", "verbose": False, "stats": None, + "process_subset": (1, 1), } CHUNKED_ARGS = { @@ -79,6 +81,7 @@ def compare_result_files(test_output, golden_output): "outfilestem": f"out_end2end_chunked", "verbose": False, "stats": None, + "process_subset": (1, 1), } UNCHUNKED_ARGS = { @@ -91,6 +94,7 @@ def compare_result_files(test_output, golden_output): "outfilestem": f"out_end2end_unchunked", "verbose": False, "stats": None, + "process_subset": (1, 1), } @@ -104,6 +108,20 @@ def compare_result_files(test_output, golden_output): "outfilestem": f"verification_output", "verbose": False, "stats": None, + "process_subset": (1, 1), +} + +PROCESS_SUBSET_ARGS = { + "paramsinput": get_demo_filepath("sspp_testset_colours.txt"), + "orbinfile": get_demo_filepath("sspp_testset_orbits.des"), + "oifoutput": get_demo_filepath("example_oif_output.txt"), + "configfile": get_demo_filepath("PPConfig_test.ini"), + "pointing_database": get_demo_filepath("baseline_v2.0_1yr.db"), + "surveyname": "rubin_sim", + "outfilestem": f"out_end2end_subset", + "verbose": False, + "stats": None, + "process_subset": (2, 10), } @@ -135,6 +153,8 @@ def override_seed_and_run(outpath, arg_set="baseline"): cmd_args_dict = UNCHUNKED_ARGS elif arg_set == "truth": cmd_args_dict = VERIFICATION_TRUTH + elif arg_set == "subset": + cmd_args_dict = PROCESS_SUBSET_ARGS else: raise ValueError( f"Unknown arg set name, {arg_set}. Must be one of: 'baseline', 'with_ephemeris', 'truth'." diff --git a/src/sorcha/utilities/sorchaArguments.py b/src/sorcha/utilities/sorchaArguments.py index 384072ab..b374f50b 100644 --- a/src/sorcha/utilities/sorchaArguments.py +++ b/src/sorcha/utilities/sorchaArguments.py @@ -3,6 +3,7 @@ import time from os import path, urandom import logging +from typing import Tuple from sorcha.modules.PPModuleRNG import PerModuleRNG from sorcha.modules.PPGetLogger import PPGetLogger @@ -31,6 +32,9 @@ class sorchaArguments: surveyname: str = "" """name of the survey (`rubin_sim` is only one implemented currently)""" + process_subset: Tuple[int, int] = (1, 1) + """the subset of the file to process, in form of (split, nsplits)""" + complex_parameters: str = "" """optional, extra complex physical parameter input files""" @@ -73,6 +77,7 @@ def read_from_dict(self, args): self.ar_data_file_path = args.get("ar_data_path") self.verbose = args["verbose"] self.stats = args["stats"] + self.process_subset = args["process_subset"] self.surveyname = args["surveyname"] diff --git a/src/sorcha_cmdline/run.py b/src/sorcha_cmdline/run.py index fdafa3af..7d0b8d9b 100644 --- a/src/sorcha_cmdline/run.py +++ b/src/sorcha_cmdline/run.py @@ -115,6 +115,14 @@ def main(): dest="st", default=None, ) + optional.add_argument( + "--process-subset", + help="Process a subset of the input objects. Specify in form of /, where is the number of chunks into which" + " the input will be divided, and is the (1-based) chunk for to be processed here. For example, writing 3/5 with a catalog" + " of 100 objects will process objects with (0-based) indices [40, 60).", + type=str, + default="1/1", + ) args = parser.parse_args() diff --git a/tests/ephemeris/test_ephemeris_generation.py b/tests/ephemeris/test_ephemeris_generation.py index ebb65cd8..52c75911 100644 --- a/tests/ephemeris/test_ephemeris_generation.py +++ b/tests/ephemeris/test_ephemeris_generation.py @@ -53,6 +53,7 @@ def test_ephemeris_end2end(single_synthetic_pointing, tmp_path): "outfilestem": f"out_400k", "verbose": False, "stats": None, + "process_subset": (1, 1), } pplogger = PPGetLogger(cmd_args_dict["outpath"]) diff --git a/tests/ephemeris/test_pixdict.py b/tests/ephemeris/test_pixdict.py index 7d22debb..6c508d9f 100644 --- a/tests/ephemeris/test_pixdict.py +++ b/tests/ephemeris/test_pixdict.py @@ -62,6 +62,7 @@ def test_pixeldict(tmp_path): "outfilestem": f"out_400k", "verbose": False, "stats": None, + "process_subset": (1, 1), } args = sorchaArguments(cmd_args_dict) diff --git a/tests/sorcha/test_PPCommandLineParser.py b/tests/sorcha/test_PPCommandLineParser.py index 7c48958d..6c007cd7 100644 --- a/tests/sorcha/test_PPCommandLineParser.py +++ b/tests/sorcha/test_PPCommandLineParser.py @@ -5,7 +5,7 @@ class args: - def __init__(self, cp, t="testout", o="./", f=False): + def __init__(self, cp, t="testout", o="./", f=False, process_subset=(1 / 1)): self.p = get_test_filepath("testcolour.txt") self.ob = get_test_filepath("testorb.des") self.er = get_test_filepath("oiftestoutput.txt") @@ -20,6 +20,7 @@ def __init__(self, cp, t="testout", o="./", f=False): self.f = f self.ar = None self.st = "test.csv" + self.process_subset = "1/1" def test_PPCommandLineParser(): @@ -41,6 +42,7 @@ def test_PPCommandLineParser(): "ar_data_path": None, "output_ephemeris_file": None, "stats": "test.csv", + "process_subset": (1, 1), } cmd_dict_2 = PPCommandLineParser(args(get_test_filepath("testcomet.txt"))) @@ -58,6 +60,7 @@ def test_PPCommandLineParser(): "ar_data_path": None, "output_ephemeris_file": None, "stats": "test.csv", + "process_subset": (1, 1), } with open(os.path.join(tmp_path, "dummy_file.txt"), "w") as _: diff --git a/tests/sorcha/test_PPConfigParser.py b/tests/sorcha/test_PPConfigParser.py index 6053dad1..ab648c1a 100644 --- a/tests/sorcha/test_PPConfigParser.py +++ b/tests/sorcha/test_PPConfigParser.py @@ -249,6 +249,7 @@ def test_PPPrintConfigsToLog(tmp_path): "verbose": True, "seed": 24601, "stats": None, + "process_subset": (1, 1), } args = sorchaArguments(cmd_args) diff --git a/tests/sorcha/test_sorchaArguments.py b/tests/sorcha/test_sorchaArguments.py index 8957986d..8da399f9 100644 --- a/tests/sorcha/test_sorchaArguments.py +++ b/tests/sorcha/test_sorchaArguments.py @@ -14,6 +14,7 @@ "verbose": False, "pointing_database": get_demo_filepath("baseline_v2.0_1yr.db"), "stats": "./test.csv", + "process_subset": (1, 1), } From dbce46f431a3c55db246e7865e449b5d26434d84 Mon Sep 17 00:00:00 2001 From: Steph Merritt Date: Wed, 7 Aug 2024 18:47:58 +0100 Subject: [PATCH 3/3] Adding unit tests. --- src/sorcha/utilities/diffTestUtils.py | 2 +- tests/sorcha/test_PPCommandLineParser.py | 25 ++++++++++++++++++++++-- tests/sorcha/test_demo_process_subset.py | 23 ++++++++++++++++++++++ 3 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 tests/sorcha/test_demo_process_subset.py diff --git a/src/sorcha/utilities/diffTestUtils.py b/src/sorcha/utilities/diffTestUtils.py index 50c04211..a196130b 100644 --- a/src/sorcha/utilities/diffTestUtils.py +++ b/src/sorcha/utilities/diffTestUtils.py @@ -115,7 +115,7 @@ def compare_result_files(test_output, golden_output): "paramsinput": get_demo_filepath("sspp_testset_colours.txt"), "orbinfile": get_demo_filepath("sspp_testset_orbits.des"), "oifoutput": get_demo_filepath("example_oif_output.txt"), - "configfile": get_demo_filepath("PPConfig_test.ini"), + "configfile": get_test_filepath("PPConfig_goldens_test.ini"), "pointing_database": get_demo_filepath("baseline_v2.0_1yr.db"), "surveyname": "rubin_sim", "outfilestem": f"out_end2end_subset", diff --git a/tests/sorcha/test_PPCommandLineParser.py b/tests/sorcha/test_PPCommandLineParser.py index 6c007cd7..13808387 100644 --- a/tests/sorcha/test_PPCommandLineParser.py +++ b/tests/sorcha/test_PPCommandLineParser.py @@ -5,7 +5,7 @@ class args: - def __init__(self, cp, t="testout", o="./", f=False, process_subset=(1 / 1)): + def __init__(self, cp, t="testout", o="./", f=False, process_subset=("1/1")): self.p = get_test_filepath("testcolour.txt") self.ob = get_test_filepath("testorb.des") self.er = get_test_filepath("oiftestoutput.txt") @@ -20,7 +20,7 @@ def __init__(self, cp, t="testout", o="./", f=False, process_subset=(1 / 1)): self.f = f self.ar = None self.st = "test.csv" - self.process_subset = "1/1" + self.process_subset = process_subset def test_PPCommandLineParser(): @@ -77,3 +77,24 @@ def test_PPCommandLineParser(): assert not os.path.isfile(os.path.join(tmp_path, "dummy_file.txt")) return + + +def test_PPCommandLineParser_subset(): + from sorcha.modules.PPCommandLineParser import PPCommandLineParser + + tmp_path = os.path.dirname(get_test_filepath("test_input_fullobs.csv")) + + with pytest.raises(SystemExit) as e: + _ = PPCommandLineParser(args(False, process_subset="3/1")) + + assert e.value.code == "--process-subset: the chosen splits must be between 1 and (inclusive)." + + with pytest.raises(SystemExit) as e2: + _ = PPCommandLineParser(args(False, process_subset="-1/1")) + + assert e2.value.code == "--process-subset: the argument must be in form of /" + + with pytest.raises(SystemExit) as e3: + _ = PPCommandLineParser(args(False, process_subset="1/0")) + + assert e3.value.code == "--process-subset: the number of splits must be >= 1" diff --git a/tests/sorcha/test_demo_process_subset.py b/tests/sorcha/test_demo_process_subset.py new file mode 100644 index 00000000..8018c377 --- /dev/null +++ b/tests/sorcha/test_demo_process_subset.py @@ -0,0 +1,23 @@ +import os +import tempfile +import pandas as pd + +from sorcha.utilities.dataUtilitiesForTests import get_demo_filepath +from sorcha.utilities.diffTestUtils import override_seed_and_run + + +def test_demo_process_subset(): + """This tests the --process-subset command line option, where only a chunk of + the input files are run through Sorcha. It is a full end-to-end test + with all randomised elements turned off for a quick test. + """ + + with tempfile.TemporaryDirectory() as dir_name: + override_seed_and_run(dir_name, arg_set="subset") + res_file = os.path.join(dir_name, "out_end2end_subset.csv") + assert os.path.isfile(res_file) + + subset_data = pd.read_csv(res_file) + + assert len(subset_data["ObjID"].unique()) == 1 + assert subset_data["ObjID"].unique()[0] == "2010_TC209"