diff --git a/catalog/forecasts/forecast_models.R b/catalog/forecasts/forecast_models.R index 7c964342df..334d8a33b1 100644 --- a/catalog/forecasts/forecast_models.R +++ b/catalog/forecasts/forecast_models.R @@ -63,6 +63,9 @@ forecast_data_df <- arrow::open_dataset(forecast_s3) |> theme_models <- forecast_data_df |> distinct(model_id) +forecast_sites <- forecast_data_df |> + distinct(site_id) + forecast_date_range <- forecast_data_df |> dplyr::summarise(min(date),max(date)) forecast_min_date <- forecast_date_range$`min(date)` forecast_max_date <- forecast_date_range$`max(date)` @@ -84,31 +87,32 @@ stac4cast::build_forecast_scores(table_schema = forecast_theme_df, link_items = stac4cast::generate_group_values(group_values = names(config$variable_groups)), thumbnail_link = catalog_config$forecasts_thumbnail, thumbnail_title = catalog_config$forecasts_thumbnail_title, + group_sites = forecast_sites$site_id, model_child = TRUE) ## create separate JSON for model landing page -if (!dir.exists(paste0(catalog_config$forecast_path,"models"))){ - dir.create(paste0(catalog_config$forecast_path,"models")) -} - -stac4cast::build_group_variables(table_schema = forecast_theme_df, - table_description = forecast_description_create, - start_date = forecast_min_date, - end_date = forecast_max_date, - id_value = "models", - description_string = build_description, - about_string = catalog_config$about_string, - about_title = catalog_config$about_title, - dashboard_string = catalog_config$dashboard_url, - dashboard_title = catalog_config$dashboard_title, - theme_title = "Models", - destination_path = paste0(catalog_config$forecast_path,"models"), - aws_download_path = catalog_config$aws_download_path_forecasts, - group_var_items = stac4cast::generate_model_items(model_list = theme_models$model_id), - thumbnail_link = 'pending', - thumbnail_title = 'pending', - group_var_vector = NULL, - group_sites = NULL) +# if (!dir.exists(paste0(catalog_config$forecast_path,"models"))){ +# dir.create(paste0(catalog_config$forecast_path,"models")) +# } + +# stac4cast::build_group_variables(table_schema = forecast_theme_df, +# table_description = forecast_description_create, +# start_date = forecast_min_date, +# end_date = forecast_max_date, +# id_value = "models", +# description_string = build_description, +# about_string = catalog_config$about_string, +# about_title = catalog_config$about_title, +# dashboard_string = catalog_config$dashboard_url, +# dashboard_title = catalog_config$dashboard_title, +# theme_title = "Models", +# destination_path = paste0(catalog_config$forecast_path,"models"), +# aws_download_path = catalog_config$aws_download_path_forecasts, +# group_var_items = stac4cast::generate_model_items(model_list = theme_models$model_id), +# thumbnail_link = 'pending', +# thumbnail_title = 'pending', +# group_var_vector = NULL, +# group_sites = NULL) ## CREATE MODELS variable_gsheet <- gsheet2tbl(config$target_metadata_gsheet) @@ -119,8 +123,18 @@ variable_gsheet <- gsheet2tbl(config$target_metadata_gsheet) # registered_model_id <- googlesheets4::read_sheet(config$model_metadata_gsheet) # read in model metadata and filter for the relevant project -registered_model_id <- gsheet2tbl(config$model_metadata_gsheet) |> - filter(`What forecasting challenge are you registering for?` == config$project_id) +# registered_model_id <- gsheet2tbl(config$model_metadata_gsheet) |> +# filter(`What forecasting challenge are you registering for?` == config$project_id) + +gsheet_read <- gsheet2tbl(config$model_metadata_gsheet) +gsheet_read$row_non_na <- rowSums(!is.na(gsheet_read)) + +registered_model_id <- gsheet_read |> + filter(`What forecasting challenge are you registering for?` == config$project_id) |> + rename(project_id = `What forecasting challenge are you registering for?`) |> + arrange(row_non_na) |> + distinct(model_id, project_id, .keep_all = TRUE)#|> +#filter(row_non_na > 20) ## estimate based on current number of rows assuming everything (minus model and project) are empty forecast_sites <- c() @@ -158,6 +172,12 @@ for (m in theme_models$model_id){ idx = which(registered_model_id$model_id == m) + if (is.null(registered_model_id$`Web link to model code`[idx])){ + model_code_link <- 'https://projects.ecoforecast.org/neon4cast-ci/' + } else{ + model_code_link <- registered_model_id$`Web link to model code`[idx] + } + stac4cast::build_model(model_id = m, team_name = registered_model_id$`Long name of the model (can include spaces)`[idx], model_description = registered_model_id[idx,"Describe your modeling approach in your own words."][[1]], @@ -169,14 +189,13 @@ for (m in theme_models$model_id){ site_table = catalog_config$site_metadata_url, model_documentation = registered_model_id, destination_path = paste0(catalog_config$forecast_path,"models/model_items"), - aws_download_path = config$forecasts_bucket, # CHANGE THIS BUCKET NAME + aws_download_path = catalog_config$aws_download_path_forecasts, collection_name = 'forecasts', thumbnail_image_name = NULL, table_schema = forecast_theme_df, table_description = forecast_description_create, full_var_df = model_vars, - #code_web_link = registered_model_id$`Web link to model code`[idx], - code_web_link = 'pending') + code_web_link = model_code_link) } @@ -204,106 +223,137 @@ for (i in 1:length(config$variable_groups)){ ## organize variable groups dir.create(paste0(catalog_config$forecast_path,names(config$variable_groups[i]))) } - for(j in 1:length(config$variable_groups[[i]]$variable)){ # FOR EACH VARIABLE WITHIN A MODEL GROUP + # match variable with full name in gsheet + var_gsheet_arrange <- variable_gsheet |> + arrange(duration) - ## restructure variable names - var_values <- config$variable_groups[[i]]$variable - var_name <- config$variable_groups[[i]]$variable[j] - print(var_name) + var_values <- names(config$variable_groups[[i]]$group_vars) - # check data and skip if no data found - var_data_check <- forecast_data_df |> - filter(variable == var_name) - - if (nrow(var_data_check) == 0){ - print('No data available for variable') - next - } - - duration_name <- config$variable_groups[[i]]$duration[j] - - # match variable with full name in gsheet - #var_name_full <- variable_gsheet[which(variable_gsheet$`"official" targets name` == var_values),1][[1]] - var_name_full <- variable_gsheet[which(variable_gsheet$`"official" targets name` %in% var_values),1][[1]] - - - - ## create new vector to store duration names - duration_values <- config$variable_groups[[i]]$duration - duration_values[which(duration_values == 'P1D')] <- 'Daily' - duration_values[which(duration_values == 'PT1H')] <- 'Hourly' - duration_values[which(duration_values == 'PT30M')] <- '30min' - duration_values[which(duration_values == 'P1W')] <- 'Weekly' - - #var_name_combined_list <- paste0(var_values, '_',duration_values) - var_name_combined_list <- paste0(duration_values,'_',var_name_full) - - ## CREATE VARIABLE GROUP JSONS - group_description <- paste0('This page includes variables for the ',names(config$variable_groups[i]),' group.') - - ## find group sites - find_group_sites <- forecast_data_df |> - filter(variable %in% var_values) |> - distinct(site_id) - - stac4cast::build_group_variables(table_schema = forecast_theme_df, - #theme_id = names(config$variable_groups[i]), - table_description = forecast_description_create, - start_date = forecast_min_date, - end_date = forecast_max_date, - id_value = names(config$variable_groups[i]), - description_string = group_description, - about_string = catalog_config$about_string, - about_title = catalog_config$about_title, - dashboard_string = catalog_config$dashboard_url, - dashboard_title = catalog_config$dashboard_title, - theme_title = names(config$variable_groups[i]), - destination_path = paste0(catalog_config$forecast_path,names(config$variable_groups[i])), - aws_download_path = catalog_config$aws_download_path_forecasts, - group_var_items = stac4cast::generate_group_variable_items(variables = var_name_combined_list), - thumbnail_link = config$variable_groups[[i]]$thumbnail_link, - thumbnail_title = config$variable_groups[[i]]$thumbnail_title, - group_var_vector = unique(var_values), - group_sites = find_group_sites$site_id) - - if (!dir.exists(paste0(catalog_config$forecast_path,names(config$variable_groups)[i],'/',var_name_combined_list[j]))){ - dir.create(paste0(catalog_config$forecast_path,names(config$variable_groups)[i],'/',var_name_combined_list[j])) - } - - var_data <- forecast_data_df |> - filter(variable == var_name, - duration == duration_name) - - var_date_range <- var_data |> dplyr::summarise(min(date),max(date)) - var_min_date <- var_date_range$`min(date)` - var_max_date <- var_date_range$`max(date)` - - var_models <- var_data |> distinct(model_id) - - find_var_sites <- forecast_data_df |> - filter(variable == var_name) |> - distinct(site_id) - - var_description <- paste0('This page includes all models for the ',var_name_combined_list[j],' variable.') - - stac4cast::build_group_variables(table_schema = forecast_theme_df, - table_description = forecast_description_create, - start_date = var_min_date, - end_date = var_max_date, - id_value = var_name_combined_list[j], - description_string = var_description, - about_string = catalog_config$about_string, - about_title = catalog_config$about_title, - dashboard_string = catalog_config$dashboard_url, - dashboard_title = catalog_config$dashboard_title, - theme_title = var_name_combined_list[j], - destination_path = file.path(catalog_config$forecast_path,names(config$variable_groups)[i],var_name_combined_list[j]), - aws_download_path = var_data$path[1], - group_var_items = stac4cast::generate_variable_model_items(model_list = var_models$model_id), - thumbnail_link = 'pending', - thumbnail_title = 'pending', - group_var_vector = NULL, - group_sites = find_var_sites$site_id) + var_name_full <- var_gsheet_arrange[which(var_gsheet_arrange$`"official" targets name` %in% var_values),1][[1]] - } -} + ## CREATE VARIABLE GROUP JSONS + group_description <- paste0('This page includes variables for the ',names(config$variable_groups[i]),' group.') + + ## find group sites + find_group_sites <- forecast_data_df |> + filter(variable %in% var_values) |> + distinct(site_id) + + ## create empty vector to track publication information + citation_build <- c() + doi_build <- c() + + ## create empty vector to track variable information + variable_name_build <- c() + + for(j in 1:length(config$variable_groups[[i]]$group_vars)){ # FOR EACH VARIABLE WITHIN A MODEL GROUP + + + var_name <- names(config$variable_groups[[i]]$group_vars[j]) + print(var_name) + + for (k in 1:length(config$variable_groups[[i]]$group_vars[[j]]$duration)){ + duration_value <- config$variable_groups[[i]]$group_vars[[j]]$duration[k] + print(duration_value) + + ## save original duration name for reference + duration_name <- config$variable_groups[[i]]$group_vars[[j]]$duration[k] + + ## create formal variable name + duration_value[which(duration_value == 'P1D')] <- 'Daily' + duration_value[which(duration_value == 'PT1H')] <- 'Hourly' + duration_value[which(duration_value == 'PT30M')] <- '30min' + duration_value[which(duration_value == 'P1W')] <- 'Weekly' + + var_formal_name <- paste0(duration_value,'_',var_name_full[j]) + + # check data and skip if no data found + var_data_check <- forecast_data_df |> + filter(variable == var_name) + + if (nrow(var_data_check) == 0){ + print('No data available for variable') + next + } + + if (!dir.exists(file.path(catalog_config$forecast_path,names(config$variable_groups)[i],var_formal_name))){ + dir.create(file.path(catalog_config$forecast_path,names(config$variable_groups)[i],var_formal_name)) + } + + var_data <- forecast_data_df |> + filter(variable == var_name, + duration == duration_name) + + var_date_range <- var_data |> dplyr::summarise(min(date),max(date)) + var_min_date <- var_date_range$`min(date)` + var_max_date <- var_date_range$`max(date)` + + var_models <- var_data |> distinct(model_id) + + find_var_sites <- forecast_data_df |> + filter(variable == var_name) |> + distinct(site_id) + + var_description <- paste0('This page includes all models for the ',var_formal_name,' variable.') + + var_path <- var_data$path[1] + + ## build lists for creating publication items + var_citations <- config$variable_groups[[i]]$group_vars[[j]]$var_citation + doi_citations <- config$variable_groups[[i]]$group_vars[[j]]$var_doi + + #update group list of publication information + citation_build <- append(citation_build, var_citations) + doi_build <- append(doi_build, doi_citations) + + variable_name_build <- append(variable_name_build, var_formal_name) + + variable_name_build <- append(variable_name_build, var_formal_name) + + stac4cast::build_group_variables(table_schema = forecast_data_df, + #theme_id = var_formal_name[j], + table_description = forecast_description_create, + start_date = var_min_date, + end_date = var_max_date, + id_value = var_formal_name, + description_string = var_description, + about_string = catalog_config$about_string, + about_title = catalog_config$about_title, + dashboard_string = catalog_config$dashboard_url, + dashboard_title = catalog_config$dashboard_title, + theme_title = var_formal_name, + destination_path = file.path(catalog_config$forecast_path,names(config$variable_groups)[i],var_formal_name), + aws_download_path = var_path, + group_var_items = stac4cast::generate_variable_model_items(model_list = var_models$model_id), + thumbnail_link = 'pending', + thumbnail_title = 'pending', + group_var_vector = NULL, + group_sites = find_var_sites$site_id, + citation_values = var_citations, + doi_values = config$variable_groups[[i]]$group_vars[[j]]$var_doi) + } ## end duration loop + + } ## end variable loop + + ## BUILD THE GROUP PAGES WITH UPDATED VAR/PUB INFORMATION + stac4cast::build_group_variables(table_schema = forecast_data_df, + table_description = forecast_description_create, + start_date = forecast_min_date, + end_date = forecast_max_date, + id_value = names(config$variable_groups)[i], + description_string = group_description, + about_string = catalog_config$about_string, + about_title = catalog_config$about_title, + dashboard_string = catalog_config$dashboard_url, + dashboard_title = catalog_config$dashboard_title, + theme_title = names(config$variable_groups[i]), + destination_path = file.path(catalog_config$scores_path,names(config$variable_groups)[i]), + aws_download_path = catalog_config$aws_download_path_scores, + group_var_items = stac4cast::generate_group_variable_items(variables = variable_name_build), + thumbnail_link = config$variable_groups[[i]]$thumbnail_link, + thumbnail_title = config$variable_groups[[i]]$thumbnail_title, + group_var_vector = unique(var_values), + group_sites = find_group_sites$site_id, + citation_values = citation_build, + doi_values = doi_build) +} # end group loop diff --git a/challenge_configuration.yaml b/challenge_configuration.yaml index e7646beb0c..7a176cde98 100644 --- a/challenge_configuration.yaml +++ b/challenge_configuration.yaml @@ -52,6 +52,9 @@ variable_groups: duration: ['P1D'] thumbnail_link: 'https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/thumbnails/image/Back-b.jpg' thumbnail_title: 'USGS Streamgage' + group_vars: + chla: + duration: "P1D" # Forecast catalog configuration catalog_config: @@ -87,6 +90,6 @@ catalog_config: base_image_path: 'https://data.ecoforecast.org/usgsrc4cast-catalog' citation_doi: "https://doi.org/10.1002/fee.2616" citation_text: "Thomas, R.Q., C. Boettiger, C.C. Carey, M.C. Dietze, L.R. Johnson, M.A. Kenney, J.S. Mclachlan, J.A. Peters, E.R. Sokol, J.F. Weltzin, A. Willson, W.M. Woelmer, and Challenge Contributors. 2023. The NEON Ecological Forecasting Challenge. Frontiers in Ecology and Environment 21: 112-113." - dashboard_url: "https://projects.ecoforecast.org/usgsrc4cast-docs/" + dashboard_url: "https://projects.ecoforecast.org/usgsrc4cast-ci/" dashboard_title: "EFI-USGS River Chlorophyll Forecast Challenge Dashboard" site_metadata_url: 'https://raw.githubusercontent.com/eco4cast/usgsrc4cast-ci/main/USGS_site_metadata.csv'