diff --git a/.github/workflows/after-validate-submission.yaml b/.github/workflows/after-validate-submission.yaml index f05b7f45..d96c3e8b 100644 --- a/.github/workflows/after-validate-submission.yaml +++ b/.github/workflows/after-validate-submission.yaml @@ -25,6 +25,11 @@ jobs: with: install-r: false use-public-rspm: true + - name: Get list of changed files + id: files + uses: lots0logs/gh-action-get-changed-files@2.1.4 + with: + token: ${{ secrets.GITHUB_TOKEN }} - name: Install system dependencies run: | sudo apt-get update @@ -61,17 +66,30 @@ jobs: Rscript -e 'remotes::install_github("Infectious-Disease-Modeling-Hubs/hubVis")' - name: Install hubUtils run: | - Rscript -e 'remotes::install_github("Infectious-Disease-Modeling-Hubs/hubUtils")' + Rscript -e 'remotes::install_github("kjsato/hubUtils@enhancement/v3-utils", upgrade = "never")' - name: Install hubEnsembles run: | Rscript -e 'remotes::install_github("Infectious-Disease-Modeling-Hubs/hubEnsembles")' - name: Install hubData run: | - Rscript -e 'remotes::install_github("Infectious-Disease-Modeling-Hubs/hubData")' + Rscript -e 'remotes::install_github("kjsato/hubData@feature/handle-samples", upgrade = "never")' + - name: Check if RETRO files are present + id: check_files + run: | + echo 'retro='$(echo '${{ steps.files.outputs.all }}' | grep 'model-output' | grep -c '.*-RETRO.*\.\(csv\|parquet\)') >> $GITHUB_ENV + echo 'conventional='$(echo '${{ steps.files.outputs.all }}' | grep -c -v '.*-RETRO.*\.\(csv\|parquet\)') >> $GITHUB_ENV - name: Run R script run: | cd ./scripts Rscript ensemble.R + if: env.conventional != '0' + - name: Run retro_ensemble.R + run: | + cd ./scripts + for file in $(echo '${{ steps.files.outputs.all }}' | grep 'model-output' | grep '.*-RETRO.*\.\(csv\|parquet\)'); do + Rscript retro_ensemble.R $file + done + if: env.retro != '0' - name: Commit and push new files to root repository run: | @@ -90,8 +108,8 @@ jobs: - name: Checkout and push to another repository uses: actions/checkout@v2 with: - #repository: kjsato/rsv-forecast-hub_data - repository: HopkinsIDD/rsv-forecast-hub_data + repository: kjsato/rsv-forecast-hub_data + #repository: HopkinsIDD/rsv-forecast-hub_data token: ${{ secrets.KJ3_PATC }} path: ./rsv-forecast-hub_data fetch-depth: 2 diff --git a/.github/workflows/validate-submission.yaml b/.github/workflows/validate-submission.yaml index c45ba943..0731db34 100644 --- a/.github/workflows/validate-submission.yaml +++ b/.github/workflows/validate-submission.yaml @@ -33,10 +33,19 @@ jobs: path: ${{ env.R_LIBS_USER }} key: ${{ runner.os }}-r-${{ hashFiles('DESCRIPTION') }} restore-keys: ${{ runner.os }}-r- + - name: Install hubUtils from specific branch + run: | + Rscript -e 'install.packages("remotes")' + Rscript -e 'remotes::install_github("kjsato/hubUtils@enhancement/v3-utils", upgrade = "never")' + - name: Install hubAdmin from specific branch + run: | + Rscript -e 'remotes::install_github("kjsato/hubAdmin@feature/sample-support", upgrade = "never")' + - name: Install hubData from specific branch + run: | + Rscript -e 'remotes::install_github("kjsato/hubData@feature/handle-samples", upgrade = "never")' - name: Install HubValidations run: | - install.packages("remotes") - remotes::install_github("Infectious-Disease-Modeling-Hubs/hubValidations", upgrade = "always") + remotes::install_github("kjsato/hubValidations@patch4conversion_origin_date", upgrade = "never") shell: Rscript {0} - name: Run validations diff --git a/hub-config/admin.json b/hub-config/admin.json index 07679e3c..3d2bbddd 100644 --- a/hub-config/admin.json +++ b/hub-config/admin.json @@ -1,12 +1,16 @@ { - "schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/admin-schema.json", - "name": "Simple Forecast Hub", - "maintainer": "Consortium of Infectious Disease Modeling Hubs", + "schema_version": "https://raw.githubusercontent.com/kjsato/schemas/br-v3.0.0/v3.0.0/admin-schema.json", + "name": "US RSV Forecast Hub", + "maintainer": "Johns Hopkins University, Infectious Disease Dynamics Group", "contact": { "name": "K. Sato", "email": "ksato8@jh.edu" }, - "repository_url": "https://github.com/Infectious-Disease-Modeling-Hubs/hubTemplate", + "repository": { + "host": "github", + "owner": "kjsato", + "name": "rsv-forecast-hub" + }, "file_format": ["csv", "parquet"], "timezone": "US/Eastern" } diff --git a/hub-config/tasks.json b/hub-config/tasks.json index 054a7d72..8714001e 100644 --- a/hub-config/tasks.json +++ b/hub-config/tasks.json @@ -1,7 +1,8 @@ { - "schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/tasks-schema.json", + "schema_version": "https://raw.githubusercontent.com/kjsato/schemas/br-v3.0.0/v3.0.0/tasks-schema.json", "rounds": [ { + "round_name": "retrospective round", "round_id_from_variable": true, "round_id": "origin_date", "model_tasks": [ @@ -10,6 +11,7 @@ "origin_date": { "required": null, "optional": [ + "2023-10-18", "2023-10-22", "2023-10-29", "2023-11-12", "2023-11-19", "2023-11-26", "2023-12-03", "2023-12-10", "2023-12-17", "2023-12-24", "2023-12-31", "2024-01-07", "2024-01-14", "2024-01-21", "2024-01-28", @@ -147,6 +149,7 @@ "origin_date": { "required": null, "optional": [ + "2023-10-18", "2023-10-22", "2023-10-29", "2023-11-12", "2023-11-19", "2023-11-26", "2023-12-03", "2023-12-10", "2023-12-17", "2023-12-24", "2023-12-31", "2024-01-07", "2024-01-14", "2024-01-21", "2024-01-28", @@ -230,13 +233,15 @@ }, "output_type": { "sample":{ - "output_type_id":{ - "required": null, - "optional":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100] + "output_type_id_params":{ + "is_required": true, + "type": "integer", + "min_samples_per_task": 100, + "max_samples_per_task": 100 }, "value":{ - "type":"double", - "minimum":0 + "type":"integer", + "minimum":0 } } }, @@ -271,7 +276,7 @@ "submissions_due": { "relative_to": "origin_date", "start": -6, - "end": 100 + "end": 252 } } ] diff --git a/model-metadata/teamsam-modelple.yaml b/model-metadata/teamsam-modelple.yaml new file mode 100644 index 00000000..1de0f2dd --- /dev/null +++ b/model-metadata/teamsam-modelple.yaml @@ -0,0 +1,32 @@ +schema_version: "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/admin-schema.json" +team_name: "Team Sam" +team_abbr: "teamsam" +model_name: "Model Ple" +model_abbr: "modelple" +model_contributors: [ + { + "name": "Smith J", + "affiliation": "Affiliation", + "email": "ksato8@jh.edu" + }, +] +data_inputs: "Description of Data Inputs" +methods: "Short description of the model" +methods_long: "Long description of the model" + +#model_details: [ +# { +# "data_inputs": "Description of Data Inputs" +# }, +# { +# "methods": "Short description of the model" +# }, +# { +# "methods_long": "Long description of the model" +# } +#] +license: "MIT" +model_version: "1.0" +website_url: "https://url_to_team2_modelb_website.com" +team_funding: "funding information" +citation: "Gibson GC , Reich NG , Sheldon D. Real-time mechanistic bayesian forecasts of Covid-19 mortality. medRxiv. 2020. https://doi.org/10.1101/2020.12.22.20248736" diff --git a/model-output/CU-RSV_SVIRS/2024-05-12-CU-RSV_SVIRS.parquet b/model-output/CU-RSV_SVIRS/2024-05-12-CU-RSV_SVIRS.parquet index d199cbc7..63cbe6cd 100644 Binary files a/model-output/CU-RSV_SVIRS/2024-05-12-CU-RSV_SVIRS.parquet and b/model-output/CU-RSV_SVIRS/2024-05-12-CU-RSV_SVIRS.parquet differ diff --git a/model-output/teamsam-modelple/2023-11-05-teamsam-modelple-RETRO.parquet b/model-output/teamsam-modelple/2023-11-05-teamsam-modelple-RETRO.parquet new file mode 100644 index 00000000..6e9e29fa Binary files /dev/null and b/model-output/teamsam-modelple/2023-11-05-teamsam-modelple-RETRO.parquet differ diff --git a/model-output/teamsam-modelple/2023-11-12-teamsam-modelple-RETRO.parquet b/model-output/teamsam-modelple/2023-11-12-teamsam-modelple-RETRO.parquet new file mode 100644 index 00000000..a395d764 Binary files /dev/null and b/model-output/teamsam-modelple/2023-11-12-teamsam-modelple-RETRO.parquet differ diff --git a/model-output/teamsam-modelple/2023-11-19-teamsam-modelple-RETRO.parquet b/model-output/teamsam-modelple/2023-11-19-teamsam-modelple-RETRO.parquet new file mode 100644 index 00000000..e57017be Binary files /dev/null and b/model-output/teamsam-modelple/2023-11-19-teamsam-modelple-RETRO.parquet differ diff --git a/scripts/ensemble.R b/scripts/ensemble.R index 6af19f44..69032f02 100644 --- a/scripts/ensemble.R +++ b/scripts/ensemble.R @@ -1,11 +1,11 @@ ## ensemble.R customized for rsv-forecast-hub, split from rsv-forecast-hub_data local_path <- paste0(dirname(here::here())) -dir_path <- file.path(local_path, "rsv-forecast-hub/") -data_path <- file.path(local_path, "rsv-forecast-hub/") -print(local_path) -#dir_path <- local_path -#data_path <- local_path +#dir_path <- file.path(local_path, "rsv-forecast-hub-kjsato/") +#data_path <- file.path(local_path, "rsv-forecast-hub-kjsato/") +#print(local_path) +dir_path <- local_path +data_path <- local_path print(dir_path) ## ----lib-ens, include=FALSE--------------------------------------------------- @@ -29,8 +29,8 @@ library(jsonlite) dates_archive <- unlist(jsonlite::read_json(file.path(dir_path, "hub-config/tasks.json"))$rounds[[1]]$model_tasks[[1]]$task_ids$origin_date$optional) dates_archive <- dates_archive[as.Date(dates_archive) <= Sys.Date()] -curr_origin_date <- as.Date(max(dates_archive, na.rm = TRUE)) -#curr_origin_date <- as.Date("2024-03-17") +#curr_origin_date <- as.Date(max(dates_archive, na.rm = TRUE)) +curr_origin_date <- as.Date("2024-03-24") ## ----prep_ens, include=FALSE-------------------------------------------------- diff --git a/scripts/retro_ensemble.R b/scripts/retro_ensemble.R new file mode 100644 index 00000000..251fc21d --- /dev/null +++ b/scripts/retro_ensemble.R @@ -0,0 +1,74 @@ +## retro_ensemble.R customized for rsv-forecast-hub + +# Get the file_path from the command line arguments +args <- commandArgs(trailingOnly = TRUE) +retro_file_path <- args[1] + +local_path <- paste0(dirname(here::here())) +dir_path <- file.path(local_path, "rsv-forecast-hub/") +data_path <- file.path(local_path, "rsv-forecast-hub/") +print(local_path) +#dir_path <- local_path +#data_path <- local_path +print(dir_path) + +library(hubUtils) +library(hubData) +library(hubEnsembles) +library(dplyr) +library(purrr) +library(jsonlite) + +hub_path <- dir_path +print(hub_path) +hub_con <- connect_hub(hub_path) + +loc_data <- readr::read_csv(file.path(dir_path, "auxiliary-data/location_census/locations.csv")) + +output_path <- file.path(dir_path, "model-output") + +# Extract date from file name +curr_origin_date <- as.Date(gsub("^(\\d{4}-\\d{2}-\\d{2}).*", "\\1", basename(retro_file_path))) + +# Get all RETRO files (maybe needed consideration for the case of a combo without RETRO files) +file_paths <- list.files(output_path, pattern = "-RETRO\\.parquet$|-RETRO\\.csv$", full.names = TRUE, recursive = TRUE) +file_paths <- file_paths[grepl(curr_origin_date, file_paths)] +print(file_paths) + +# read the files, and concatenate all the data frames with adding the team name in "model_id" column +projection_data_all <- file_paths %>% + map_df(~{ + # func selection according to the input file format + read_fun <- ifelse(grepl("\\.parquet$", .x), arrow::read_parquet, readr::read_csv) + + # read data + data <- read_fun(.x, stringsAsFactors = FALSE) + + # check if 'origin_date' column exists + if (!"origin_date" %in% names(data)) { + print(paste("File", .x, "does not contain 'origin_date' column")) + } + + # append the team name in "model_id" + data$model_id <- basename(dirname(.x)) + + # return data + data + }) +head(projection_data_all) + +# Prepare data +projection_data_all <- dplyr::mutate(projection_data_all, + target_date = as.Date(origin_date) + (horizon * 7) - 1) +projection_data_all <- as_model_out_tbl(projection_data_all) + +round <- projection_data_all %>% + dplyr::filter(origin_date == as.Date(curr_origin_date)) %>% + dplyr::collect() + +# Generate ensemble +round_ens <- hubEnsembles::simple_ensemble(round) + +# Save ensemble +dir.create(file.path(dir_path, "model-output", "hub-ensemble"), showWarnings = FALSE, recursive = TRUE) +arrow::write_parquet(round_ens, file.path(dir_path, "model-output", "hub-ensemble", paste0(curr_origin_date, "-hub-ensemble-RETRO.parquet")))