diff --git a/drivers/generate_stage3.R b/drivers/generate_stage3.R index d70a426f70..7635372077 100644 --- a/drivers/generate_stage3.R +++ b/drivers/generate_stage3.R @@ -2,7 +2,7 @@ library(minioclient) library(gdalcubes) library(gefs4cast) -source("https://raw.githubusercontent.com/eco4cast/neon4cast/main/R/to_hourly.R") +source("drivers/to_hourly.R") config <- yaml::read_yaml("challenge_configuration.yaml") driver_bucket <- stringr::word(config$driver_bucket, 1, sep = "/") @@ -21,8 +21,7 @@ df <- arrow::open_dataset("pseudo") |> site_list <- readr::read_csv(paste0("https://github.com/eco4cast/usgsrc4cast-ci/", "raw/prod/USGS_site_metadata.csv"), - show_col_types = FALSE) |> - dplyr::pull(site_id) + show_col_types = FALSE) s3_stage3 <- gefs4cast::gefs_s3_dir(product = "stage3", @@ -32,7 +31,7 @@ s3_stage3 <- gefs4cast::gefs_s3_dir(product = "stage3", future::plan("future::multisession", workers = 8) -furrr::future_walk(site_list, function(curr_site_id){ +furrr::future_walk(dplyr::pull(site_list, site_id), function(curr_site_id){ df <- arrow::open_dataset("pseudo") |> dplyr::filter(variable %in% c("PRES","TMP","RH","UGRD","VGRD","APCP","DSWRF","DLWRF")) |> @@ -46,7 +45,9 @@ furrr::future_walk(site_list, function(curr_site_id){ print(curr_site_id) df |> - to_hourly(use_solar_geom = TRUE, psuedo = TRUE) |> + to_hourly(site_list = dplyr::select(site_list, site_id, latitude, longitude), + use_solar_geom = TRUE, + psuedo = TRUE) |> dplyr::mutate(ensemble = as.numeric(stringr::str_sub(ensemble, start = 4, end = 5))) |> dplyr::rename(parameter = ensemble) |> arrow::write_dataset(path = s3, partitioning = "site_id") diff --git a/drivers/update_stage3.R b/drivers/update_stage3.R index 05d86b0fea..bd5625a0a1 100644 --- a/drivers/update_stage3.R +++ b/drivers/update_stage3.R @@ -1,11 +1,10 @@ library(gdalcubes) library(gefs4cast) -source("https://raw.githubusercontent.com/eco4cast/neon4cast/main/R/to_hourly.R") +source("drivers/to_hourly.R") site_list <- readr::read_csv(paste0("https://github.com/eco4cast/usgsrc4cast-ci/", "raw/prod/USGS_site_metadata.csv"), - show_col_types = FALSE) |> - dplyr::pull(site_id) + show_col_types = FALSE) Sys.setenv("GEFS_VERSION"="v12") @@ -15,7 +14,7 @@ driver_path <- stringr::word(config$driver_bucket, 2, -1, sep = "/") future::plan("future::multisession", workers = 8) -furrr::future_walk(site_list, function(curr_site_id){ +furrr::future_walk(dplyr::pull(site_list, site_id), function(curr_site_id){ print(curr_site_id) @@ -50,7 +49,9 @@ furrr::future_walk(site_list, function(curr_site_id){ if(nrow(psuedo_df) > 0){ df2 <- psuedo_df |> - to_hourly(use_solar_geom = TRUE, psuedo = TRUE) |> + to_hourly(site_list = dplyr::select(site_list, site_id, latitude, longitude), + use_solar_geom = TRUE, + psuedo = TRUE) |> dplyr::mutate(ensemble = as.numeric(stringr::str_sub(ensemble, start = 4, end = 5))) |> dplyr::rename(parameter = ensemble)