From 8f93922d2ea9288186be8dd80cf37456192209d9 Mon Sep 17 00:00:00 2001 From: Zwart Date: Thu, 8 Feb 2024 13:01:59 -0800 Subject: [PATCH 1/4] changing ref branch --- .github/workflows/scoring.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/scoring.yaml b/.github/workflows/scoring.yaml index c0b95407cf..9bdbc7c52c 100644 --- a/.github/workflows/scoring.yaml +++ b/.github/workflows/scoring.yaml @@ -19,6 +19,8 @@ jobs: #container: eco4cast/rocker-neon4cast:latest steps: - uses: actions/checkout@v3 + with: + ref: score4casts # TODO: change this to prod when happy - name: Install shell: Rscript {0} From 64032037c85bba7b099bd6fbfd2b264b62b2fdb7 Mon Sep 17 00:00:00 2001 From: Zwart Date: Thu, 8 Feb 2024 13:02:19 -0800 Subject: [PATCH 2/4] updating scoring script --- scoring/scoring.R | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/scoring/scoring.R b/scoring/scoring.R index 7eef5c4f8b..5a02d0ce29 100644 --- a/scoring/scoring.R +++ b/scoring/scoring.R @@ -81,7 +81,7 @@ furrr::future_walk(1:nrow(variable_duration), function(k, variable_duration, con schema = arrow::schema( project_id = arrow::string(), site_id = arrow::string(), - datetime = arrow::timestamp(unit = "ns", timezone = "UTC"), + datetime = arrow::timestamp(unit = "ns"), # timezone = "UTC"), duration = arrow::string(), #depth_m = arrow::float(), #project_specific variable = arrow::string(), @@ -96,8 +96,26 @@ furrr::future_walk(1:nrow(variable_duration), function(k, variable_duration, con curr_duration <- duration curr_project_id <- project_id - groupings <- arrow::open_dataset(s3_inv) |> - dplyr::filter(variable == curr_variable, duration == curr_duration) |> + groupings <- arrow::open_dataset(s3_inv, + schema = arrow::schema( + duration = arrow::string(), + model_id = arrow::string(), + site_id = arrow::string(), + reference_date = arrow::date32(), + variable = arrow::string(), + date = arrow::date32(), + project_id = arrow::string(), + pub_date = arrow::date32(), + path = arrow::string(), + path_full = arrow::string(), + path_summaries = arrow::string(), + endpoint = arrow::string(), + latitude = arrow::float(), + longitude = arrow::float(), + )) |> + dplyr::filter(variable == curr_variable, + duration == curr_duration, + project_id == curr_project_id) |> dplyr::select(-site_id) |> dplyr::collect() |> dplyr::distinct() |> From 4871798fe3daade0d7a073f7aa821c4372487ece Mon Sep 17 00:00:00 2001 From: Zwart Date: Thu, 8 Feb 2024 13:50:15 -0800 Subject: [PATCH 3/4] update to score inventory script and getting rid of delete score script --- scoring/build_score_inventory.R | 4 ++-- scoring/delete_scores.R | 34 --------------------------------- 2 files changed, 2 insertions(+), 36 deletions(-) delete mode 100644 scoring/delete_scores.R diff --git a/scoring/build_score_inventory.R b/scoring/build_score_inventory.R index fba2215c1a..399354fe4e 100644 --- a/scoring/build_score_inventory.R +++ b/scoring/build_score_inventory.R @@ -10,6 +10,7 @@ inventory_df <- arrow::open_dataset(s3) |> mutate(reference_date = lubridate::as_date(reference_datetime), date = lubridate::as_date(datetime), pub_date = lubridate::as_date(pub_datetime)) |> + filter(project_id == config$project_id) |> distinct(duration, model_id, site_id, reference_date, variable, date, project_id, pub_date) |> collect() |> mutate(path = glue::glue("{bucket}/parquet/project_id={project_id}/duration={duration}/variable={variable}"), @@ -18,8 +19,7 @@ inventory_df <- arrow::open_dataset(s3) |> sites <- readr::read_csv(config$site_table, show_col_types = FALSE) |> - select(field_site_id, latitude, longitude) |> - rename(site_id = field_site_id) + select(site_id, latitude, longitude) inventory_df <- dplyr::left_join(inventory_df, sites, by = "site_id") diff --git a/scoring/delete_scores.R b/scoring/delete_scores.R deleted file mode 100644 index ea06c01a8b..0000000000 --- a/scoring/delete_scores.R +++ /dev/null @@ -1,34 +0,0 @@ -df <- aws.s3::get_bucket_df(bucket = "bio230121-bucket01", - prefix = "vera4cast/scores/", - region = "renc", - base_url = "osn.xsede.org", - key = Sys.getenv("OSN_KEY"), - secret = Sys.getenv("OSN_SECRET")) - -for(i in 1:nrow(df)){ - - aws.s3::delete_object(object = df$Key[i], - bucket = "bio230121-bucket01", - region = "renc", - base_url = "osn.xsede.org", - key = Sys.getenv("OSN_KEY"), - secret = Sys.getenv("OSN_SECRET")) -} - -df <- aws.s3::get_bucket_df(bucket = "bio230121-bucket01", - prefix = "vera4cast/prov/", - region = "renc", - base_url = "osn.xsede.org", - key = Sys.getenv("OSN_KEY"), - secret = Sys.getenv("OSN_SECRET")) - -for(i in 1:nrow(df)){ - - aws.s3::delete_object(object = df$Key[i], - bucket = "bio230121-bucket01", - region = "renc", - base_url = "osn.xsede.org", - key = Sys.getenv("OSN_KEY"), - secret = Sys.getenv("OSN_SECRET")) -} - From 3a8ed9c3f816f2eacc0436628618a1134c21bb88 Mon Sep 17 00:00:00 2001 From: Zwart Date: Thu, 8 Feb 2024 13:57:35 -0800 Subject: [PATCH 4/4] update cron and ref branch --- .github/workflows/scoring.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/scoring.yaml b/.github/workflows/scoring.yaml index 9bdbc7c52c..765d56a80b 100644 --- a/.github/workflows/scoring.yaml +++ b/.github/workflows/scoring.yaml @@ -1,8 +1,8 @@ # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: - #schedule: - # - cron: '0 0 * * *' + schedule: + - cron: '0 0 */3 * *' workflow_dispatch: name: scoring @@ -20,7 +20,7 @@ jobs: steps: - uses: actions/checkout@v3 with: - ref: score4casts # TODO: change this to prod when happy + ref: prod - name: Install shell: Rscript {0}