From 8f93922d2ea9288186be8dd80cf37456192209d9 Mon Sep 17 00:00:00 2001
From: Zwart <jzwart@usgs.gov>
Date: Thu, 8 Feb 2024 13:01:59 -0800
Subject: [PATCH 1/4] changing ref branch

---
 .github/workflows/scoring.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/scoring.yaml b/.github/workflows/scoring.yaml
index c0b95407cf..9bdbc7c52c 100644
--- a/.github/workflows/scoring.yaml
+++ b/.github/workflows/scoring.yaml
@@ -19,6 +19,8 @@ jobs:
     #container: eco4cast/rocker-neon4cast:latest
     steps:
       - uses: actions/checkout@v3
+        with:
+          ref: score4casts # TODO: change this to prod when happy
 
       - name: Install
         shell: Rscript {0}

From 64032037c85bba7b099bd6fbfd2b264b62b2fdb7 Mon Sep 17 00:00:00 2001
From: Zwart <jzwart@usgs.gov>
Date: Thu, 8 Feb 2024 13:02:19 -0800
Subject: [PATCH 2/4] updating scoring script

---
 scoring/scoring.R | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/scoring/scoring.R b/scoring/scoring.R
index 7eef5c4f8b..5a02d0ce29 100644
--- a/scoring/scoring.R
+++ b/scoring/scoring.R
@@ -81,7 +81,7 @@ furrr::future_walk(1:nrow(variable_duration), function(k, variable_duration, con
                                     schema = arrow::schema(
                                       project_id = arrow::string(),
                                       site_id = arrow::string(),
-                                      datetime = arrow::timestamp(unit = "ns", timezone = "UTC"),
+                                      datetime = arrow::timestamp(unit = "ns"), # timezone = "UTC"),
                                       duration = arrow::string(),
                                       #depth_m = arrow::float(), #project_specific
                                       variable = arrow::string(),
@@ -96,8 +96,26 @@ furrr::future_walk(1:nrow(variable_duration), function(k, variable_duration, con
   curr_duration <- duration
   curr_project_id <- project_id
 
-  groupings <- arrow::open_dataset(s3_inv) |>
-    dplyr::filter(variable == curr_variable, duration == curr_duration) |>
+  groupings <- arrow::open_dataset(s3_inv,
+                                   schema = arrow::schema(
+                                     duration = arrow::string(),
+                                     model_id = arrow::string(),
+                                     site_id = arrow::string(),
+                                     reference_date = arrow::date32(),
+                                     variable = arrow::string(),
+                                     date = arrow::date32(),
+                                     project_id = arrow::string(),
+                                     pub_date = arrow::date32(),
+                                     path = arrow::string(),
+                                     path_full = arrow::string(),
+                                     path_summaries = arrow::string(),
+                                     endpoint = arrow::string(),
+                                     latitude = arrow::float(),
+                                     longitude = arrow::float(),
+                                   )) |>
+    dplyr::filter(variable == curr_variable,
+                  duration == curr_duration,
+                  project_id == curr_project_id) |>
     dplyr::select(-site_id) |>
     dplyr::collect() |>
     dplyr::distinct() |>

From 4871798fe3daade0d7a073f7aa821c4372487ece Mon Sep 17 00:00:00 2001
From: Zwart <jzwart@usgs.gov>
Date: Thu, 8 Feb 2024 13:50:15 -0800
Subject: [PATCH 3/4] update to score inventory script and getting rid of
 delete score script

---
 scoring/build_score_inventory.R |  4 ++--
 scoring/delete_scores.R         | 34 ---------------------------------
 2 files changed, 2 insertions(+), 36 deletions(-)
 delete mode 100644 scoring/delete_scores.R

diff --git a/scoring/build_score_inventory.R b/scoring/build_score_inventory.R
index fba2215c1a..399354fe4e 100644
--- a/scoring/build_score_inventory.R
+++ b/scoring/build_score_inventory.R
@@ -10,6 +10,7 @@ inventory_df <- arrow::open_dataset(s3) |>
   mutate(reference_date = lubridate::as_date(reference_datetime),
          date = lubridate::as_date(datetime),
          pub_date = lubridate::as_date(pub_datetime)) |>
+  filter(project_id == config$project_id) |>
   distinct(duration, model_id, site_id, reference_date, variable, date, project_id, pub_date) |>
   collect() |>
   mutate(path = glue::glue("{bucket}/parquet/project_id={project_id}/duration={duration}/variable={variable}"),
@@ -18,8 +19,7 @@ inventory_df <- arrow::open_dataset(s3) |>
 
 sites <- readr::read_csv(config$site_table,
                          show_col_types = FALSE) |>
-  select(field_site_id, latitude, longitude) |>
-  rename(site_id = field_site_id)
+  select(site_id, latitude, longitude)
 
 inventory_df <- dplyr::left_join(inventory_df, sites,
                                  by = "site_id")
diff --git a/scoring/delete_scores.R b/scoring/delete_scores.R
deleted file mode 100644
index ea06c01a8b..0000000000
--- a/scoring/delete_scores.R
+++ /dev/null
@@ -1,34 +0,0 @@
-df <- aws.s3::get_bucket_df(bucket = "bio230121-bucket01",
-                            prefix = "vera4cast/scores/",
-                            region =  "renc",
-                            base_url = "osn.xsede.org",
-                   key = Sys.getenv("OSN_KEY"),
-                   secret = Sys.getenv("OSN_SECRET"))
-
-for(i in 1:nrow(df)){
-
-  aws.s3::delete_object(object = df$Key[i],
-                     bucket = "bio230121-bucket01",
-                     region = "renc",
-                     base_url = "osn.xsede.org",
-                     key = Sys.getenv("OSN_KEY"),
-                     secret = Sys.getenv("OSN_SECRET"))
-}
-
-df <- aws.s3::get_bucket_df(bucket = "bio230121-bucket01",
-                            prefix = "vera4cast/prov/",
-                            region =  "renc",
-                            base_url = "osn.xsede.org",
-                            key = Sys.getenv("OSN_KEY"),
-                            secret = Sys.getenv("OSN_SECRET"))
-
-for(i in 1:nrow(df)){
-
-  aws.s3::delete_object(object = df$Key[i],
-                        bucket = "bio230121-bucket01",
-                        region = "renc",
-                        base_url = "osn.xsede.org",
-                        key = Sys.getenv("OSN_KEY"),
-                        secret = Sys.getenv("OSN_SECRET"))
-}
-

From 3a8ed9c3f816f2eacc0436628618a1134c21bb88 Mon Sep 17 00:00:00 2001
From: Zwart <jzwart@usgs.gov>
Date: Thu, 8 Feb 2024 13:57:35 -0800
Subject: [PATCH 4/4] update cron and ref branch

---
 .github/workflows/scoring.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/scoring.yaml b/.github/workflows/scoring.yaml
index 9bdbc7c52c..765d56a80b 100644
--- a/.github/workflows/scoring.yaml
+++ b/.github/workflows/scoring.yaml
@@ -1,8 +1,8 @@
 # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
-  #schedule:
-  #  - cron: '0 0 * * *'
+  schedule:
+    - cron: '0 0 */3 * *'
   workflow_dispatch:
 
 name: scoring
@@ -20,7 +20,7 @@ jobs:
     steps:
       - uses: actions/checkout@v3
         with:
-          ref: score4casts # TODO: change this to prod when happy
+          ref: prod
 
       - name: Install
         shell: Rscript {0}