Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Including code to process a subset of inputs, fixing unit tests #998

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/sorcha/modules/PPCommandLineParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import logging
import glob
import re
from .PPConfigParser import PPFindFileOrExit, PPFindDirectoryOrExit


Expand Down Expand Up @@ -64,6 +65,21 @@ def PPCommandLineParser(args):
cmd_args_dict["outpath"] = PPFindFileOrExit(args.o, "-o, --outfile")
cmd_args_dict["pointing_database"] = PPFindFileOrExit(args.pd, "-pd, --pointing_database")

if args.process_subset:
m = re.match(r"^(\d+)/(\d+)$", args.process_subset)
if m is None:
sys.exit("--process-subset: the argument must be in form of <split>/<nsplits>")

split, nsplits = int(m.group(1)), int(m.group(2))
if nsplits <= 0:
pplogger.error("--process-subset: the number of splits must be >= 1")
sys.exit("--process-subset: the number of splits must be >= 1")
if split < 1 or split > nsplits:
pplogger.error("--process-subset: the chosen splits must be between 1 and <nsplits> (inclusive).")
sys.exit("--process-subset: the chosen splits must be between 1 and <nsplits> (inclusive).")

cmd_args_dict["process_subset"] = (split, nsplits)

if args.cp:
cmd_args_dict["complex_physical_parameters"] = PPFindFileOrExit(
args.cp, "-cp, --complex_physical_parameters"
Expand Down
26 changes: 22 additions & 4 deletions src/sorcha/sorcha.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,31 @@ def runLSSTSimulation(args, configs):
endChunk = 0
loopCounter = 0

# Find the number of objects in the input file. FIXME: This assumes the
# input file has a header, and has no empty or comment lines.
ii = -1
with open(args.orbinfile) as f:
for ii, l in enumerate(f):
pass
lenf = ii

split, nsplits = args.process_subset
print(split, nsplits)
if nsplits > 1:
# calculate the [beginning, end) indices. For example
# np.linspace(0, 100, 3+1, dtype=int)
# --> array([ 0, 33, 66, 100])
edges = np.linspace(0, lenf, nsplits + 1, dtype=int)
b, e = edges[split - 1], edges[split]
lenf = e - b

# fast-forward to the requested split
at = 0
while at < b:
bs = min(at + configs["size_serial_chunk"], b)
reader.read_aux_block(block_size=bs)
at += bs

footprint = None
if configs["camera_model"] == "footprint":
verboselog("Creating sensor footprint object for filtering")
Expand All @@ -178,13 +197,12 @@ def runLSSTSimulation(args, configs):
verboselog("Working on objects {}-{}".format(startChunk, endChunk))

# Processing begins, all processing is done for chunks
bs = min(endChunk, lenf) - startChunk
if configs["ephemerides_type"].casefold() == "external":
verboselog("Reading in chunk of orbits and associated ephemeris from an external file")
observations = reader.read_block(block_size=configs["size_serial_chunk"])
observations.to_csv("post_readin_ephem_nonprimary.csv")
observations = reader.read_block(block_size=bs)
else:
verboselog("Ingest chunk of orbits")
orbits_df = reader.read_aux_block(block_size=configs["size_serial_chunk"])
orbits_df = reader.read_aux_block(block_size=bs)
verboselog("Starting ephemeris generation")
observations = create_ephemeris(orbits_df, filterpointing, args, configs)
verboselog("Ephemeris generation completed")
Expand Down
20 changes: 20 additions & 0 deletions src/sorcha/utilities/diffTestUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def compare_result_files(test_output, golden_output):
"outfilestem": f"out_end2end",
"verbose": False,
"stats": None,
"process_subset": (1, 1),
}

WITH_EPHEMERIS_ARGS = {
Expand All @@ -67,6 +68,7 @@ def compare_result_files(test_output, golden_output):
"outfilestem": f"out_end2end_with_ephemeris_generation",
"verbose": False,
"stats": None,
"process_subset": (1, 1),
}

CHUNKED_ARGS = {
Expand All @@ -79,6 +81,7 @@ def compare_result_files(test_output, golden_output):
"outfilestem": f"out_end2end_chunked",
"verbose": False,
"stats": None,
"process_subset": (1, 1),
}

UNCHUNKED_ARGS = {
Expand All @@ -91,6 +94,7 @@ def compare_result_files(test_output, golden_output):
"outfilestem": f"out_end2end_unchunked",
"verbose": False,
"stats": None,
"process_subset": (1, 1),
}


Expand All @@ -104,6 +108,20 @@ def compare_result_files(test_output, golden_output):
"outfilestem": f"verification_output",
"verbose": False,
"stats": None,
"process_subset": (1, 1),
}

PROCESS_SUBSET_ARGS = {
"paramsinput": get_demo_filepath("sspp_testset_colours.txt"),
"orbinfile": get_demo_filepath("sspp_testset_orbits.des"),
"oifoutput": get_demo_filepath("example_oif_output.txt"),
"configfile": get_test_filepath("PPConfig_goldens_test.ini"),
"pointing_database": get_demo_filepath("baseline_v2.0_1yr.db"),
"surveyname": "rubin_sim",
"outfilestem": f"out_end2end_subset",
"verbose": False,
"stats": None,
"process_subset": (2, 10),
}


Expand Down Expand Up @@ -135,6 +153,8 @@ def override_seed_and_run(outpath, arg_set="baseline"):
cmd_args_dict = UNCHUNKED_ARGS
elif arg_set == "truth":
cmd_args_dict = VERIFICATION_TRUTH
elif arg_set == "subset":
cmd_args_dict = PROCESS_SUBSET_ARGS
else:
raise ValueError(
f"Unknown arg set name, {arg_set}. Must be one of: 'baseline', 'with_ephemeris', 'truth'."
Expand Down
5 changes: 5 additions & 0 deletions src/sorcha/utilities/sorchaArguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import time
from os import path, urandom
import logging
from typing import Tuple

from sorcha.modules.PPModuleRNG import PerModuleRNG
from sorcha.modules.PPGetLogger import PPGetLogger
Expand Down Expand Up @@ -31,6 +32,9 @@ class sorchaArguments:
surveyname: str = ""
"""name of the survey (`rubin_sim` is only one implemented currently)"""

process_subset: Tuple[int, int] = (1, 1)
"""the subset of the file to process, in form of (split, nsplits)"""

complex_parameters: str = ""
"""optional, extra complex physical parameter input files"""

Expand Down Expand Up @@ -73,6 +77,7 @@ def read_from_dict(self, args):
self.ar_data_file_path = args.get("ar_data_path")
self.verbose = args["verbose"]
self.stats = args["stats"]
self.process_subset = args["process_subset"]

self.surveyname = args["surveyname"]

Expand Down
8 changes: 8 additions & 0 deletions src/sorcha_cmdline/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,14 @@ def main():
dest="st",
default=None,
)
optional.add_argument(
"--process-subset",
help="Process a subset of the input objects. Specify in form of <split>/<nsplits>, where <nsplits> is the number of chunks into which"
" the input will be divided, and <split> is the (1-based) chunk for to be processed here. For example, writing 3/5 with a catalog"
" of 100 objects will process objects with (0-based) indices [40, 60).",
type=str,
default="1/1",
)

args = parser.parse_args()

Expand Down
2 changes: 1 addition & 1 deletion tests/activity/test_activity_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

def test_register_subclasses():
output = register_activity_subclasses()

update_activity_subclasses() # if sorcha-addons is installed we need to update the subclasses
assert output == CA_METHODS


Expand Down
1 change: 1 addition & 0 deletions tests/ephemeris/test_ephemeris_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def test_ephemeris_end2end(single_synthetic_pointing, tmp_path):
"outfilestem": f"out_400k",
"verbose": False,
"stats": None,
"process_subset": (1, 1),
}

pplogger = PPGetLogger(cmd_args_dict["outpath"])
Expand Down
1 change: 1 addition & 0 deletions tests/ephemeris/test_pixdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def test_pixeldict(tmp_path):
"outfilestem": f"out_400k",
"verbose": False,
"stats": None,
"process_subset": (1, 1),
}

args = sorchaArguments(cmd_args_dict)
Expand Down
2 changes: 1 addition & 1 deletion tests/lightcurves/test_lightcurve_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

def test_register_subclasses():
output = register_lc_subclasses()

update_lc_subclasses() # if sorcha-addons is installed we need to update the subclasses
assert output == LC_METHODS


Expand Down
26 changes: 25 additions & 1 deletion tests/sorcha/test_PPCommandLineParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


class args:
def __init__(self, cp, t="testout", o="./", f=False):
def __init__(self, cp, t="testout", o="./", f=False, process_subset=("1/1")):
self.p = get_test_filepath("testcolour.txt")
self.ob = get_test_filepath("testorb.des")
self.er = get_test_filepath("oiftestoutput.txt")
Expand All @@ -20,6 +20,7 @@ def __init__(self, cp, t="testout", o="./", f=False):
self.f = f
self.ar = None
self.st = "test.csv"
self.process_subset = process_subset


def test_PPCommandLineParser():
Expand All @@ -41,6 +42,7 @@ def test_PPCommandLineParser():
"ar_data_path": None,
"output_ephemeris_file": None,
"stats": "test.csv",
"process_subset": (1, 1),
}

cmd_dict_2 = PPCommandLineParser(args(get_test_filepath("testcomet.txt")))
Expand All @@ -58,6 +60,7 @@ def test_PPCommandLineParser():
"ar_data_path": None,
"output_ephemeris_file": None,
"stats": "test.csv",
"process_subset": (1, 1),
}

with open(os.path.join(tmp_path, "dummy_file.txt"), "w") as _:
Expand All @@ -74,3 +77,24 @@ def test_PPCommandLineParser():
assert not os.path.isfile(os.path.join(tmp_path, "dummy_file.txt"))

return


def test_PPCommandLineParser_subset():
from sorcha.modules.PPCommandLineParser import PPCommandLineParser

tmp_path = os.path.dirname(get_test_filepath("test_input_fullobs.csv"))

with pytest.raises(SystemExit) as e:
_ = PPCommandLineParser(args(False, process_subset="3/1"))

assert e.value.code == "--process-subset: the chosen splits must be between 1 and <nsplits> (inclusive)."

with pytest.raises(SystemExit) as e2:
_ = PPCommandLineParser(args(False, process_subset="-1/1"))

assert e2.value.code == "--process-subset: the argument must be in form of <split>/<nsplits>"

with pytest.raises(SystemExit) as e3:
_ = PPCommandLineParser(args(False, process_subset="1/0"))

assert e3.value.code == "--process-subset: the number of splits must be >= 1"
1 change: 1 addition & 0 deletions tests/sorcha/test_PPConfigParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ def test_PPPrintConfigsToLog(tmp_path):
"verbose": True,
"seed": 24601,
"stats": None,
"process_subset": (1, 1),
}

args = sorchaArguments(cmd_args)
Expand Down
23 changes: 23 additions & 0 deletions tests/sorcha/test_demo_process_subset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import os
import tempfile
import pandas as pd

from sorcha.utilities.dataUtilitiesForTests import get_demo_filepath
from sorcha.utilities.diffTestUtils import override_seed_and_run


def test_demo_process_subset():
"""This tests the --process-subset command line option, where only a chunk of
the input files are run through Sorcha. It is a full end-to-end test
with all randomised elements turned off for a quick test.
"""

with tempfile.TemporaryDirectory() as dir_name:
override_seed_and_run(dir_name, arg_set="subset")
res_file = os.path.join(dir_name, "out_end2end_subset.csv")
assert os.path.isfile(res_file)

subset_data = pd.read_csv(res_file)

assert len(subset_data["ObjID"].unique()) == 1
assert subset_data["ObjID"].unique()[0] == "2010_TC209"
1 change: 1 addition & 0 deletions tests/sorcha/test_sorchaArguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"verbose": False,
"pointing_database": get_demo_filepath("baseline_v2.0_1yr.db"),
"stats": "./test.csv",
"process_subset": (1, 1),
}


Expand Down