-
Notifications
You must be signed in to change notification settings - Fork 11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Compute percentiles for daily values and their colors #45
Changes from 1 commit
7206599
bd04778
16cec6f
6c3fa3e
b1a783a
919cabc
30ba280
9c0b0c1
a9d6e60
f9fc643
f4cb8ec
c1ebb51
af242e2
8d09f41
be8a91a
afaa891
5e82f3b
def11ed
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,8 +16,8 @@ fetch_dv_sites <- function(ind_file, dates){ | |
endDate = dates$end, | ||
parameterCd = "00060", | ||
statCd = "00003") %>% | ||
dplyr::distinct() %>% | ||
dplyr::pull(site_no) %>% | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider swapping the above two lines. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. True, but There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, right. |
||
unique() %>% | ||
c(sites) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it possible that some sites show up in multiple HUCs? I don't know how that would happen, but since you're still seeing duplicates after many other fixes... |
||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,19 @@ | ||
#' @title Compute the color for each daily value percentile | ||
#' | ||
#' | ||
#' @param ind_file character file name where the output should be saved | ||
#' @param dv_stats_ind indicator file for the data.frame of dv_data | ||
#' @param color_palette list of colors to use for the color ramp (from viz_config.yml) | ||
process_dv_stat_colors <- function(ind_file, dv_stats_ind, color_palette){ | ||
|
||
dv_stats <- readRDS(scipiper::sc_retrieve(dv_stats_ind, remake_file = '2_process.yml')) | ||
col_fun <- colorRamp(color_palette) | ||
|
||
# just removing NA percentiles for now | ||
dv_stats_with_color <- dv_stats %>% | ||
filter(!is.na(per)) %>% | ||
dv_stats_with_color <- dv_stats %>% | ||
filter(!is.na(per)) %>% | ||
mutate(color = rgb(col_fun(per), maxColorValue = 255)) # don't know how necessary maxColorValue is | ||
|
||
# Write the data file and the indicator file | ||
data_file <- scipiper::as_data_file(ind_file) | ||
saveRDS(dv_stats_with_color, data_file) | ||
scipiper::gd_put(ind_file, data_file) | ||
saveRDS(dv_stats_with_color, scipiper::as_data_file(ind_file)) | ||
scipiper::gd_put(ind_file) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,41 +1,43 @@ | ||
#' @title Calculate the stat category for each gage's discharge value | ||
#' | ||
#' | ||
#' @param ind_file character file name where the output should be saved | ||
#' @param dv_data_ind indicator file for the data.frame of dv_data | ||
#' @param site_stats_clean_ind indicator file for the data.frame of dv stats for each site | ||
#' @param dates object from viz_config.yml that specifies dates as string | ||
#' @param percentiles character vector of the types of stats to include, i.e. `c("10", "75")` | ||
#' @param percentiles character vector of the types of stats to include, i.e. `c("10", "75")` | ||
#' will return the 10th and 75th percentiles (from viz_config.yml) | ||
process_dv_stats <- function(ind_file, dv_data_ind, site_stats_clean_ind, dates, percentiles){ | ||
|
||
dv_data <- readRDS(scipiper::sc_retrieve(dv_data_ind, remake_file = '1_fetch.yml')) | ||
site_stats <- readRDS(scipiper::sc_retrieve(site_stats_clean_ind, remake_file = '2_process.yml')) | ||
|
||
# breakdown date into month & day pairs | ||
dv_data_md <- dv_data %>% | ||
dv_data_md <- dv_data %>% | ||
dplyr::mutate(month_nu = as.numeric(format(dateTime, "%m")), | ||
day_nu = as.numeric(format(dateTime, "%d"))) | ||
|
||
# merge stats with the dv data | ||
# merge still results in extra rows - 24 extra to be exact | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I was seeing those with the test data you shared - they're duplicates in dv_data_md, right? They might be resolved by the suggestion above to call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. even after adding that step, I am still getting 24 extra observations There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmmm. Can you figure out which ones they are? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Working on this FYI! |
||
dv_with_stats <- left_join(dv_data_md, site_stats, by = c("site_no", "month_nu", "day_nu")) | ||
|
||
stat_colnames <- sprintf("p%s_va", percentiles) | ||
stat_perc <- as.numeric(percentiles)/100 | ||
|
||
int_per <- function(df){ | ||
|
||
interpolate_percentile <- function(df){ | ||
# This function takes the current daily value and interpolates it's percentile based | ||
# on the percentiles for the matching site and day of the year | ||
df <- select(df, "dv_val", one_of(stat_colnames)) | ||
out <- rep(NA, nrow(df)) | ||
|
||
for (i in 1:length(out)){ | ||
dv_val <- df$dv_val[i] | ||
df_i <- slice(df, i) %>% | ||
select(-dv_val) %>% | ||
tidyr::gather(stat_name, stat_value) %>% | ||
|
||
df_i <- slice(df, i) %>% | ||
select(-dv_val) %>% | ||
tidyr::gather(stat_name, stat_value) %>% | ||
mutate(stat_value = as.numeric(stat_value), | ||
stat_type = as.numeric(gsub("p|_va", "", stat_name))/100) | ||
|
||
y <- df_i$stat_type | ||
x <- df_i$stat_value | ||
nas <- is.na(x) | ||
|
@@ -52,17 +54,17 @@ process_dv_stats <- function(ind_file, dv_data_ind, site_stats_clean_ind, dates, | |
} | ||
} | ||
return(out) | ||
|
||
} | ||
dv_stats <- dv_with_stats %>% | ||
mutate(dv_val = Flow) %>% | ||
filter_(sprintf("!is.na(%s)", stat_colnames[1]), | ||
sprintf("!is.na(%s)", tail(stat_colnames,1)), | ||
|
||
dv_stats <- dv_with_stats %>% | ||
mutate(dv_val = Flow) %>% | ||
filter_(sprintf("!is.na(%s)", stat_colnames[1]), | ||
sprintf("!is.na(%s)", tail(stat_colnames,1)), | ||
sprintf("!is.na(%s)", "dv_val")) %>% | ||
mutate(per = int_per(.)) %>% | ||
mutate(per = interpolate_percentile(.)) %>% | ||
select(site_no, dateTime, dv_val, per, p50_va) | ||
|
||
# Write the data file and the indicator file | ||
data_file <- scipiper::as_data_file(ind_file) | ||
saveRDS(dv_stats, data_file) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Other duplicates may be discoverable here if you look for rows that are distinct just among the columns agency_cd, site_no, and dateTime (e.g., check the results of
dup_dv <- dv_data %>% group_by(site_no, month_nu, day_nu) %>% summarize(n=n()) %>% filter(n > 1) %>% left_join(dv_data, by=c('site_no','month_nu','day_nu'))
)