Skip to content

Commit

Permalink
Merge pull request #37 from eco4cast/main
Browse files Browse the repository at this point in the history
main to prod
  • Loading branch information
jzwart authored Feb 8, 2024
2 parents 9e2c207 + 56ece95 commit c7dc405
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 41 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/scoring.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
#schedule:
# - cron: '0 0 * * *'
schedule:
- cron: '0 0 */3 * *'
workflow_dispatch:

name: scoring
Expand All @@ -19,6 +19,8 @@ jobs:
#container: eco4cast/rocker-neon4cast:latest
steps:
- uses: actions/checkout@v3
with:
ref: prod

- name: Install
shell: Rscript {0}
Expand Down
4 changes: 2 additions & 2 deletions scoring/build_score_inventory.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ inventory_df <- arrow::open_dataset(s3) |>
mutate(reference_date = lubridate::as_date(reference_datetime),
date = lubridate::as_date(datetime),
pub_date = lubridate::as_date(pub_datetime)) |>
filter(project_id == config$project_id) |>
distinct(duration, model_id, site_id, reference_date, variable, date, project_id, pub_date) |>
collect() |>
mutate(path = glue::glue("{bucket}/parquet/project_id={project_id}/duration={duration}/variable={variable}"),
Expand All @@ -18,8 +19,7 @@ inventory_df <- arrow::open_dataset(s3) |>

sites <- readr::read_csv(config$site_table,
show_col_types = FALSE) |>
select(field_site_id, latitude, longitude) |>
rename(site_id = field_site_id)
select(site_id, latitude, longitude)

inventory_df <- dplyr::left_join(inventory_df, sites,
by = "site_id")
Expand Down
34 changes: 0 additions & 34 deletions scoring/delete_scores.R

This file was deleted.

24 changes: 21 additions & 3 deletions scoring/scoring.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ furrr::future_walk(1:nrow(variable_duration), function(k, variable_duration, con
schema = arrow::schema(
project_id = arrow::string(),
site_id = arrow::string(),
datetime = arrow::timestamp(unit = "ns", timezone = "UTC"),
datetime = arrow::timestamp(unit = "ns"), # timezone = "UTC"),
duration = arrow::string(),
#depth_m = arrow::float(), #project_specific
variable = arrow::string(),
Expand All @@ -96,8 +96,26 @@ furrr::future_walk(1:nrow(variable_duration), function(k, variable_duration, con
curr_duration <- duration
curr_project_id <- project_id

groupings <- arrow::open_dataset(s3_inv) |>
dplyr::filter(variable == curr_variable, duration == curr_duration) |>
groupings <- arrow::open_dataset(s3_inv,
schema = arrow::schema(
duration = arrow::string(),
model_id = arrow::string(),
site_id = arrow::string(),
reference_date = arrow::date32(),
variable = arrow::string(),
date = arrow::date32(),
project_id = arrow::string(),
pub_date = arrow::date32(),
path = arrow::string(),
path_full = arrow::string(),
path_summaries = arrow::string(),
endpoint = arrow::string(),
latitude = arrow::float(),
longitude = arrow::float(),
)) |>
dplyr::filter(variable == curr_variable,
duration == curr_duration,
project_id == curr_project_id) |>
dplyr::select(-site_id) |>
dplyr::collect() |>
dplyr::distinct() |>
Expand Down

0 comments on commit c7dc405

Please sign in to comment.