Skip to content

Commit

Permalink
Merge pull request #76 from kjsato/main-v3-2
Browse files Browse the repository at this point in the history
Main v3 2
  • Loading branch information
kjsato authored Jun 5, 2024
2 parents dfa92bb + ba9da57 commit 079b063
Show file tree
Hide file tree
Showing 11 changed files with 166 additions and 24 deletions.
26 changes: 22 additions & 4 deletions .github/workflows/after-validate-submission.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ jobs:
with:
install-r: false
use-public-rspm: true
- name: Get list of changed files
id: files
uses: lots0logs/[email protected]
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Install system dependencies
run: |
sudo apt-get update
Expand Down Expand Up @@ -61,17 +66,30 @@ jobs:
Rscript -e 'remotes::install_github("Infectious-Disease-Modeling-Hubs/hubVis")'
- name: Install hubUtils
run: |
Rscript -e 'remotes::install_github("Infectious-Disease-Modeling-Hubs/hubUtils")'
Rscript -e 'remotes::install_github("kjsato/hubUtils@enhancement/v3-utils", upgrade = "never")'
- name: Install hubEnsembles
run: |
Rscript -e 'remotes::install_github("Infectious-Disease-Modeling-Hubs/hubEnsembles")'
- name: Install hubData
run: |
Rscript -e 'remotes::install_github("Infectious-Disease-Modeling-Hubs/hubData")'
Rscript -e 'remotes::install_github("kjsato/hubData@feature/handle-samples", upgrade = "never")'
- name: Check if RETRO files are present
id: check_files
run: |
echo 'retro='$(echo '${{ steps.files.outputs.all }}' | grep 'model-output' | grep -c '.*-RETRO.*\.\(csv\|parquet\)') >> $GITHUB_ENV
echo 'conventional='$(echo '${{ steps.files.outputs.all }}' | grep -c -v '.*-RETRO.*\.\(csv\|parquet\)') >> $GITHUB_ENV
- name: Run R script
run: |
cd ./scripts
Rscript ensemble.R
if: env.conventional != '0'
- name: Run retro_ensemble.R
run: |
cd ./scripts
for file in $(echo '${{ steps.files.outputs.all }}' | grep 'model-output' | grep '.*-RETRO.*\.\(csv\|parquet\)'); do
Rscript retro_ensemble.R $file
done
if: env.retro != '0'
- name: Commit and push new files to root repository
run: |
Expand All @@ -90,8 +108,8 @@ jobs:
- name: Checkout and push to another repository
uses: actions/checkout@v2
with:
#repository: kjsato/rsv-forecast-hub_data
repository: HopkinsIDD/rsv-forecast-hub_data
repository: kjsato/rsv-forecast-hub_data
#repository: HopkinsIDD/rsv-forecast-hub_data
token: ${{ secrets.KJ3_PATC }}
path: ./rsv-forecast-hub_data
fetch-depth: 2
Expand Down
13 changes: 11 additions & 2 deletions .github/workflows/validate-submission.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,19 @@ jobs:
path: ${{ env.R_LIBS_USER }}
key: ${{ runner.os }}-r-${{ hashFiles('DESCRIPTION') }}
restore-keys: ${{ runner.os }}-r-
- name: Install hubUtils from specific branch
run: |
Rscript -e 'install.packages("remotes")'
Rscript -e 'remotes::install_github("kjsato/hubUtils@enhancement/v3-utils", upgrade = "never")'
- name: Install hubAdmin from specific branch
run: |
Rscript -e 'remotes::install_github("kjsato/hubAdmin@feature/sample-support", upgrade = "never")'
- name: Install hubData from specific branch
run: |
Rscript -e 'remotes::install_github("kjsato/hubData@feature/handle-samples", upgrade = "never")'
- name: Install HubValidations
run: |
install.packages("remotes")
remotes::install_github("Infectious-Disease-Modeling-Hubs/hubValidations", upgrade = "always")
remotes::install_github("kjsato/hubValidations@patch4conversion_origin_date", upgrade = "never")
shell: Rscript {0}

- name: Run validations
Expand Down
12 changes: 8 additions & 4 deletions hub-config/admin.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
{
"schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/admin-schema.json",
"name": "Simple Forecast Hub",
"maintainer": "Consortium of Infectious Disease Modeling Hubs",
"schema_version": "https://raw.githubusercontent.com/kjsato/schemas/br-v3.0.0/v3.0.0/admin-schema.json",
"name": "US RSV Forecast Hub",
"maintainer": "Johns Hopkins University, Infectious Disease Dynamics Group",
"contact": {
"name": "K. Sato",
"email": "[email protected]"
},
"repository_url": "https://github.com/Infectious-Disease-Modeling-Hubs/hubTemplate",
"repository": {
"host": "github",
"owner": "kjsato",
"name": "rsv-forecast-hub"
},
"file_format": ["csv", "parquet"],
"timezone": "US/Eastern"
}
19 changes: 12 additions & 7 deletions hub-config/tasks.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
{
"schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/tasks-schema.json",
"schema_version": "https://raw.githubusercontent.com/kjsato/schemas/br-v3.0.0/v3.0.0/tasks-schema.json",
"rounds": [
{
"round_name": "retrospective round",
"round_id_from_variable": true,
"round_id": "origin_date",
"model_tasks": [
Expand All @@ -10,6 +11,7 @@
"origin_date": {
"required": null,
"optional": [
"2023-10-18", "2023-10-22", "2023-10-29",
"2023-11-12", "2023-11-19", "2023-11-26", "2023-12-03",
"2023-12-10", "2023-12-17", "2023-12-24", "2023-12-31",
"2024-01-07", "2024-01-14", "2024-01-21", "2024-01-28",
Expand Down Expand Up @@ -147,6 +149,7 @@
"origin_date": {
"required": null,
"optional": [
"2023-10-18", "2023-10-22", "2023-10-29",
"2023-11-12", "2023-11-19", "2023-11-26", "2023-12-03",
"2023-12-10", "2023-12-17", "2023-12-24", "2023-12-31",
"2024-01-07", "2024-01-14", "2024-01-21", "2024-01-28",
Expand Down Expand Up @@ -230,13 +233,15 @@
},
"output_type": {
"sample":{
"output_type_id":{
"required": null,
"optional":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100]
"output_type_id_params":{
"is_required": true,
"type": "integer",
"min_samples_per_task": 100,
"max_samples_per_task": 100
},
"value":{
"type":"double",
"minimum":0
"type":"integer",
"minimum":0
}
}
},
Expand Down Expand Up @@ -271,7 +276,7 @@
"submissions_due": {
"relative_to": "origin_date",
"start": -6,
"end": 100
"end": 252
}
}
]
Expand Down
32 changes: 32 additions & 0 deletions model-metadata/teamsam-modelple.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
schema_version: "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/admin-schema.json"
team_name: "Team Sam"
team_abbr: "teamsam"
model_name: "Model Ple"
model_abbr: "modelple"
model_contributors: [
{
"name": "Smith J",
"affiliation": "Affiliation",
"email": "[email protected]"
},
]
data_inputs: "Description of Data Inputs"
methods: "Short description of the model"
methods_long: "Long description of the model"

#model_details: [
# {
# "data_inputs": "Description of Data Inputs"
# },
# {
# "methods": "Short description of the model"
# },
# {
# "methods_long": "Long description of the model"
# }
#]
license: "MIT"
model_version: "1.0"
website_url: "https://url_to_team2_modelb_website.com"
team_funding: "funding information"
citation: "Gibson GC , Reich NG , Sheldon D. Real-time mechanistic bayesian forecasts of Covid-19 mortality. medRxiv. 2020. https://doi.org/10.1101/2020.12.22.20248736"
Binary file modified model-output/CU-RSV_SVIRS/2024-05-12-CU-RSV_SVIRS.parquet
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
14 changes: 7 additions & 7 deletions scripts/ensemble.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
## ensemble.R customized for rsv-forecast-hub, split from rsv-forecast-hub_data

local_path <- paste0(dirname(here::here()))
dir_path <- file.path(local_path, "rsv-forecast-hub/")
data_path <- file.path(local_path, "rsv-forecast-hub/")
print(local_path)
#dir_path <- local_path
#data_path <- local_path
#dir_path <- file.path(local_path, "rsv-forecast-hub-kjsato/")
#data_path <- file.path(local_path, "rsv-forecast-hub-kjsato/")
#print(local_path)
dir_path <- local_path
data_path <- local_path
print(dir_path)

## ----lib-ens, include=FALSE---------------------------------------------------
Expand All @@ -29,8 +29,8 @@ library(jsonlite)
dates_archive <- unlist(jsonlite::read_json(file.path(dir_path, "hub-config/tasks.json"))$rounds[[1]]$model_tasks[[1]]$task_ids$origin_date$optional)
dates_archive <- dates_archive[as.Date(dates_archive) <= Sys.Date()]

curr_origin_date <- as.Date(max(dates_archive, na.rm = TRUE))
#curr_origin_date <- as.Date("2024-03-17")
#curr_origin_date <- as.Date(max(dates_archive, na.rm = TRUE))
curr_origin_date <- as.Date("2024-03-24")

## ----prep_ens, include=FALSE--------------------------------------------------

Expand Down
74 changes: 74 additions & 0 deletions scripts/retro_ensemble.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
## retro_ensemble.R customized for rsv-forecast-hub

# Get the file_path from the command line arguments
args <- commandArgs(trailingOnly = TRUE)
retro_file_path <- args[1]

local_path <- paste0(dirname(here::here()))
dir_path <- file.path(local_path, "rsv-forecast-hub/")
data_path <- file.path(local_path, "rsv-forecast-hub/")
print(local_path)
#dir_path <- local_path
#data_path <- local_path
print(dir_path)

library(hubUtils)
library(hubData)
library(hubEnsembles)
library(dplyr)
library(purrr)
library(jsonlite)

hub_path <- dir_path
print(hub_path)
hub_con <- connect_hub(hub_path)

loc_data <- readr::read_csv(file.path(dir_path, "auxiliary-data/location_census/locations.csv"))

output_path <- file.path(dir_path, "model-output")

# Extract date from file name
curr_origin_date <- as.Date(gsub("^(\\d{4}-\\d{2}-\\d{2}).*", "\\1", basename(retro_file_path)))

# Get all RETRO files (maybe needed consideration for the case of a combo without RETRO files)
file_paths <- list.files(output_path, pattern = "-RETRO\\.parquet$|-RETRO\\.csv$", full.names = TRUE, recursive = TRUE)
file_paths <- file_paths[grepl(curr_origin_date, file_paths)]
print(file_paths)

# read the files, and concatenate all the data frames with adding the team name in "model_id" column
projection_data_all <- file_paths %>%
map_df(~{
# func selection according to the input file format
read_fun <- ifelse(grepl("\\.parquet$", .x), arrow::read_parquet, readr::read_csv)

# read data
data <- read_fun(.x, stringsAsFactors = FALSE)

# check if 'origin_date' column exists
if (!"origin_date" %in% names(data)) {
print(paste("File", .x, "does not contain 'origin_date' column"))
}

# append the team name in "model_id"
data$model_id <- basename(dirname(.x))

# return data
data
})
head(projection_data_all)

# Prepare data
projection_data_all <- dplyr::mutate(projection_data_all,
target_date = as.Date(origin_date) + (horizon * 7) - 1)
projection_data_all <- as_model_out_tbl(projection_data_all)

round <- projection_data_all %>%
dplyr::filter(origin_date == as.Date(curr_origin_date)) %>%
dplyr::collect()

# Generate ensemble
round_ens <- hubEnsembles::simple_ensemble(round)

# Save ensemble
dir.create(file.path(dir_path, "model-output", "hub-ensemble"), showWarnings = FALSE, recursive = TRUE)
arrow::write_parquet(round_ens, file.path(dir_path, "model-output", "hub-ensemble", paste0(curr_origin_date, "-hub-ensemble-RETRO.parquet")))

0 comments on commit 079b063

Please sign in to comment.