forked from brendandaisy/tripledemic-cdc-forecasts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
read-flu-data.R
52 lines (44 loc) · 2.39 KB
/
read-flu-data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
library(tidyverse)
library(RSocrata)
fetch_flu <- function(temporal_resolution = "weekly",
na.rm = TRUE){
locations <- readr::read_csv(file = "https://raw.githubusercontent.com/cdcepi/FluSight-forecast-hub/main/auxiliary-data/locations.csv", col_select= 1:4)
#read data from healthdata.gov,
health_data = RSocrata::read.socrata(url = "https://healthdata.gov/resource/g62h-syeh.json") %>%
# dplyr::filter(date >= as.Date("2022-02-02")) # filtering for when flu reporting became mandatory
dplyr::filter(date >= as.Date("2021-10-01")) # filtering for the full previous season
#remove VI and AS as they are not included for FluSight, keep only necessary vars and add epiweek and epiyear
recent_data = health_data %>%
dplyr::filter(!state %in% c("VI", "AS")) %>%
dplyr::select(state, date, previous_day_admission_influenza_confirmed) %>%
dplyr::rename("value" = "previous_day_admission_influenza_confirmed") %>%
dplyr::mutate(date = as.Date(date),
value = as.numeric(value),
epiweek = lubridate::epiweek(date),
epiyear = lubridate::epiyear(date))
#summarize US Flu
us_data = recent_data %>% dplyr::group_by(date, epiweek, epiyear) %>%
dplyr::summarise(value = sum(value, na.rm = na.rm)) %>%
dplyr::mutate(state = "US") %>%
dplyr::ungroup()
#bind state and US data
full_data = rbind(recent_data, us_data) %>%
dplyr::left_join(., locations, by = join_by("state" == "abbreviation"))
#convert counts to weekly and calculates weekly rate
weeklydat = full_data %>%
dplyr::group_by(state,epiweek,epiyear, location, location_name, population) %>%
dplyr::summarise(value = sum(value, na.rm = na.rm), date = max(date), num_days = n()) %>%
dplyr::ungroup() %>%
dplyr::filter(num_days == 7L) %>%
dplyr::select(-num_days, -epiweek, -epiyear) %>%
dplyr::mutate(weekly_rate = (value*100000)/population )
#if daily data is ever wanted, this returns correct final data
if(temporal_resolution == "weekly"){
final_dat = weeklydat %>%
dplyr::select(date, location, location_name, population, value, weekly_rate) %>%
dplyr::arrange(desc(date))
} else{
final_dat = full_data
}
return(final_dat)
}