From cd69ca50d03aa4a1f3aab16a2f4e5c45a3fbed6d Mon Sep 17 00:00:00 2001 From: JulienPeloton Date: Thu, 24 Oct 2024 08:19:39 +0200 Subject: [PATCH 1/2] Include lower bounds when filtering for hostless --- bin/hostless_detection.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/bin/hostless_detection.py b/bin/hostless_detection.py index 616700a0..efa1cc76 100644 --- a/bin/hostless_detection.py +++ b/bin/hostless_detection.py @@ -117,9 +117,16 @@ def main(): F.col("cutoutTemplate.stampData").alias("cutoutTemplate"), ] - cond_science = df["kstest_static"][0] <= 0.5 - cond_template = df["kstest_static"][1] <= 0.85 - pdf = df.filter(cond_science).filter(cond_template).select(cols_).toPandas() + cond_science_low = df["kstest_static"][0] >= 0. + cond_science_high = df["kstest_static"][0] <= 0.5 + cond_template_low = df["kstest_static"][1] >= 0. + cond_template_high = df["kstest_static"][1] <= 0.85 + + pdf = df\ + .filter(cond_science_low & cond_science_high)\ + .filter(cond_template_low & cond_template_high)\ + .select(cols_)\ + .toPandas() # load hostless IDs past_ids = read_past_ids(args.hostless_folder) From 2c66a7060711da6cdfa2a9fc61976e71f4ed6afe Mon Sep 17 00:00:00 2001 From: JulienPeloton Date: Thu, 24 Oct 2024 08:24:53 +0200 Subject: [PATCH 2/2] PEP8 --- bin/hostless_detection.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/bin/hostless_detection.py b/bin/hostless_detection.py index efa1cc76..40c2114a 100644 --- a/bin/hostless_detection.py +++ b/bin/hostless_detection.py @@ -117,16 +117,17 @@ def main(): F.col("cutoutTemplate.stampData").alias("cutoutTemplate"), ] - cond_science_low = df["kstest_static"][0] >= 0. + cond_science_low = df["kstest_static"][0] >= 0.0 cond_science_high = df["kstest_static"][0] <= 0.5 - cond_template_low = df["kstest_static"][1] >= 0. + cond_template_low = df["kstest_static"][1] >= 0.0 cond_template_high = df["kstest_static"][1] <= 0.85 - pdf = df\ - .filter(cond_science_low & cond_science_high)\ - .filter(cond_template_low & cond_template_high)\ - .select(cols_)\ + pdf = ( + df.filter(cond_science_low & cond_science_high) + .filter(cond_template_low & cond_template_high) + .select(cols_) .toPandas() + ) # load hostless IDs past_ids = read_past_ids(args.hostless_folder)