From 720659933c587e71865ace8a740a5fa49b322c1e Mon Sep 17 00:00:00 2001 From: Carr Date: Mon, 5 Nov 2018 15:49:12 -0600 Subject: [PATCH 01/14] merge conflicts --- 2_process.yml | 11 +++++++++++ 2_process/src/choose_timesteps.R | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/2_process.yml b/2_process.yml index d6ebd71..f7568d9 100644 --- a/2_process.yml +++ b/2_process.yml @@ -15,6 +15,7 @@ file_extensions: sources: - 2_process/src/choose_timesteps.R - 2_process/src/process_site_locations_to_sp.R + - 2_process/src/process_dv_stats.R targets: @@ -22,6 +23,7 @@ targets: depends: - 2_process/out/timesteps.rds.ind - 2_process/out/site_locations_sp.rds.ind + - 2_process/out/dv_stats.rds.ind # -- config -- proj_str: @@ -39,3 +41,12 @@ targets: 2_process/out/site_locations_sp.rds: command: gd_get('2_process/out/site_locations_sp.rds.ind') + 2_process/out/dv_stats.rds.ind: + command: process_dv_stats( + ind_file = target_name, + dv_data_ind = '1_fetch/out/dv_data.rds.ind', + site_stats_ind = '1_fetch/out/site_stats.rds.ind', + dates = dates, + percentiles = percentiles) + 2_process/out/dv_stats.rds: + command: gd_get('2_process/out/dv_stats.rds.ind') diff --git a/2_process/src/choose_timesteps.R b/2_process/src/choose_timesteps.R index af5fe4b..9299a47 100644 --- a/2_process/src/choose_timesteps.R +++ b/2_process/src/choose_timesteps.R @@ -1,5 +1,5 @@ choose_timesteps <- function(ind_file, dates) { - timesteps <- seq(as.POSIXct(dates$start, tz = "UTC"), as.POSIXct(dates$end, tz = "UTC"), by = 'hours') + timesteps <- seq(as.POSIXct(dates$start, tz = "UTC"), as.POSIXct(dates$end, tz = "UTC"), by = 'days') data_file <- as_data_file(ind_file) saveRDS(timesteps, data_file) gd_put(ind_file, data_file) From bd0477897534566c8af94b1a15bccfa49967fce1 Mon Sep 17 00:00:00 2001 From: Carr Date: Tue, 6 Nov 2018 10:49:20 -0600 Subject: [PATCH 02/14] update how dv_data and site_stats are fetched --- 1_fetch.yml | 4 +- 1_fetch/out/dv_data.rds.ind | 2 +- 1_fetch/out/site_stats.rds.ind | 3 +- 1_fetch/src/fetch_dv_data.R | 37 +++++++++++-------- 1_fetch/src/fetch_site_stats.R | 17 ++++++++- .../MV9mZXRjaC9vdXQvZHZfZGF0YS5yZHMuaW5k.yml | 7 ++-- 6 files changed, 46 insertions(+), 24 deletions(-) diff --git a/1_fetch.yml b/1_fetch.yml index fdfa71e..b41c4f8 100644 --- a/1_fetch.yml +++ b/1_fetch.yml @@ -4,6 +4,7 @@ packages: - dataRetrieval - dplyr - scipiper + - tidyr - yaml file_extensions: @@ -66,7 +67,8 @@ targets: command: fetch_dv_data( ind_file = target_name, sites_ind = '1_fetch/out/sites.rds.ind', - dates = dates) + dates = dates, + request_limit = request_limit) 1_fetch/out/dv_data.rds: command: gd_get('1_fetch/out/dv_data.rds.ind') diff --git a/1_fetch/out/dv_data.rds.ind b/1_fetch/out/dv_data.rds.ind index 97b7f26..e3e7f9f 100644 --- a/1_fetch/out/dv_data.rds.ind +++ b/1_fetch/out/dv_data.rds.ind @@ -1,2 +1,2 @@ -hash: 1526c1af515d9c34c65bfda095715f4d +hash: 59ddf61dcaec1f0d16aa1e81bb853d2e diff --git a/1_fetch/out/site_stats.rds.ind b/1_fetch/out/site_stats.rds.ind index 94dddfc..92fca9f 100644 --- a/1_fetch/out/site_stats.rds.ind +++ b/1_fetch/out/site_stats.rds.ind @@ -1,2 +1,3 @@ -hash: 4481da677798c03c96badb7d7303ac09 +warning: dry_put=TRUE; not actually pushed +hash: 8fe396018783020a2d6e412032d140bf diff --git a/1_fetch/src/fetch_dv_data.R b/1_fetch/src/fetch_dv_data.R index 2d30307..26985a0 100644 --- a/1_fetch/src/fetch_dv_data.R +++ b/1_fetch/src/fetch_dv_data.R @@ -3,31 +3,36 @@ #' @param ind_file character file name where the output should be saved #' @param sites_ind indicator file for the vector of site numbers #' @param dates object from viz_config.yml that specifies dates as string -fetch_dv_data <- function(ind_file, sites_ind, dates){ +#' @param request_limit number indicating how many sites to include per dataRetrieval request (from viz_config.yml) +fetch_dv_data <- function(ind_file, sites_ind, dates, request_limit){ sites <- readRDS(scipiper::sc_retrieve(sites_ind, remake_file = '1_fetch.yml')) - dv_sites_data <- lapply(sites, FUN = function(x){ - d <- dataRetrieval::readNWISdata( - service="dv", - site = x, - parameterCd = "00060", - startDate = dates$start, - endDate = dates$end) %>% + req_bks <- seq(1, length(sites), by=request_limit) + dv_data <- data.frame() + for(i in req_bks) { + last_site <- i+request_limit-1 + get_sites <- sites[i:last_site] + data_i <- dataRetrieval::readNWISdata( + service = "dv", + site = get_sites, + parameterCd = "00060", + startDate = dates$start, + endDate = dates$end) %>% dataRetrieval::renameNWISColumns() - - if(nrow(d) > 0 && any(names(d) == "Flow")) { - d[, c("dateTime", "Flow")] # keep only dateTime and Flow columns + + if(nrow(data_i) > 0 && any(names(data_i) == "Flow")) { + data_i <- data_i[, c("site_no", "dateTime", "Flow")] # keep only dateTime and Flow columns } else { - NULL # no data returned situation + data_i <- NULL # no data returned situation } - }) - - names(dv_sites_data) <- sites + dv_data <- rbind(dv_data, data_i) + print(paste("Completed", last_site, "of", length(sites))) + } # Write the data file and the indicator file data_file <- scipiper::as_data_file(ind_file) - saveRDS(dv_sites_data, data_file) + saveRDS(dv_data, data_file) scipiper::gd_put(ind_file, data_file) } diff --git a/1_fetch/src/fetch_site_stats.R b/1_fetch/src/fetch_site_stats.R index f5ba8b9..1f02c89 100644 --- a/1_fetch/src/fetch_site_stats.R +++ b/1_fetch/src/fetch_site_stats.R @@ -20,13 +20,26 @@ fetch_site_stats <- function(ind_file, sites_ind, request_limit, percentiles){ parameterCd = "00060", statReportType="daily", statType=paste0("P", percentiles) - )) + )) %>% + dplyr::select(-agency_cd, -parameter_cd, -ts_id, -loc_web_ds) stat_data <- rbind(stat_data, current_sites) print(paste("Completed", last_site, "of", length(sites))) } + # For duplicated site stats, pick the result with the more recent end_yr + # E.g. Site number 12010000 has two sets of stats for some of it's data + # filter by January 1 and you will see one set from 1930 - 2003 and one + # from 1930 - 2018. Filter so that only the 2018 one is used. + stat_data_unique <- stat_data %>% + tidyr::unite(mashed, site_no, month_nu, day_nu) %>% + dplyr::group_by(mashed) %>% + dplyr::filter(end_yr == max(end_yr)) %>% + dplyr::ungroup() %>% + tidyr::separate(mashed, c("site_no", "month_nu", "day_nu"), sep = "_") %>% + dplyr::mutate(month_nu = as.numeric(month_nu), day_nu = as.numeric(day_nu)) + # Write the data file and the indicator file data_file <- scipiper::as_data_file(ind_file) - saveRDS(stat_data, data_file) + saveRDS(stat_data_unique, data_file) scipiper::gd_put(ind_file, data_file) } diff --git a/build/status/MV9mZXRjaC9vdXQvZHZfZGF0YS5yZHMuaW5k.yml b/build/status/MV9mZXRjaC9vdXQvZHZfZGF0YS5yZHMuaW5k.yml index f0d6bec..e7cd48b 100644 --- a/build/status/MV9mZXRjaC9vdXQvZHZfZGF0YS5yZHMuaW5k.yml +++ b/build/status/MV9mZXRjaC9vdXQvZHZfZGF0YS5yZHMuaW5k.yml @@ -1,13 +1,14 @@ version: 0.3.0 name: 1_fetch/out/dv_data.rds.ind type: file -hash: 05742a4b41d7e71d56f2bd660559dd8b -time: 2018-11-05 18:05:27 UTC +hash: ed2b014edf914a2b3ec011a6359b74fa +time: 2018-11-05 22:14:25 UTC depends: 1_fetch/out/sites.rds.ind: f18e7145934e7f8e0f6cd3f05c7bc519 dates: c87840fa717587715effdfedc24e8fca + request_limit: 71db8a6cad03244e6e50f0ad8bc95a65 fixed: df6396a8b26184192ecb1f70ffb6bfc3 code: functions: - fetch_dv_data: 7ecfcaa25b7b767408d92f2a7ddb170a + fetch_dv_data: 14ab9455bf45ef4063299133b7a62e8e From 16cec6f962d2e8bcbaa8cc75ba19fc81f615b973 Mon Sep 17 00:00:00 2001 From: Carr Date: Tue, 6 Nov 2018 14:42:49 -0600 Subject: [PATCH 03/14] fix dv_data fetch to remove duplicates --- 1_fetch/src/fetch_dv_data.R | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/1_fetch/src/fetch_dv_data.R b/1_fetch/src/fetch_dv_data.R index 26985a0..1c096d1 100644 --- a/1_fetch/src/fetch_dv_data.R +++ b/1_fetch/src/fetch_dv_data.R @@ -13,13 +13,15 @@ fetch_dv_data <- function(ind_file, sites_ind, dates, request_limit){ for(i in req_bks) { last_site <- i+request_limit-1 get_sites <- sites[i:last_site] - data_i <- dataRetrieval::readNWISdata( - service = "dv", - site = get_sites, - parameterCd = "00060", - startDate = dates$start, - endDate = dates$end) %>% - dataRetrieval::renameNWISColumns() + data_i <- + dataRetrieval::readNWISdata( + service = "dv", + statCd = "00003", # need this to avoid NAs + site = get_sites, + parameterCd = "00060", + startDate = dates$start, + endDate = dates$end) %>% + dataRetrieval::renameNWISColumns() if(nrow(data_i) > 0 && any(names(data_i) == "Flow")) { data_i <- data_i[, c("site_no", "dateTime", "Flow")] # keep only dateTime and Flow columns @@ -31,8 +33,10 @@ fetch_dv_data <- function(ind_file, sites_ind, dates, request_limit){ print(paste("Completed", last_site, "of", length(sites))) } + dv_data_unique <- dplyr::distinct(dv_data) # need this to avoid some duplicates + # Write the data file and the indicator file data_file <- scipiper::as_data_file(ind_file) - saveRDS(dv_data, data_file) + saveRDS(dv_data_unique, data_file) scipiper::gd_put(ind_file, data_file) } From 6c3fa3e23bccb29a493c941ea230f9a5206b13c5 Mon Sep 17 00:00:00 2001 From: Carr Date: Tue, 6 Nov 2018 14:44:08 -0600 Subject: [PATCH 04/14] add process step for getting unique site stats --- 1_fetch/src/fetch_site_stats.R | 14 +------------ 2_process.yml | 11 +++++++++- 2_process/src/process_site_stats.R | 33 ++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 14 deletions(-) create mode 100644 2_process/src/process_site_stats.R diff --git a/1_fetch/src/fetch_site_stats.R b/1_fetch/src/fetch_site_stats.R index 1f02c89..8cb8f41 100644 --- a/1_fetch/src/fetch_site_stats.R +++ b/1_fetch/src/fetch_site_stats.R @@ -26,20 +26,8 @@ fetch_site_stats <- function(ind_file, sites_ind, request_limit, percentiles){ print(paste("Completed", last_site, "of", length(sites))) } - # For duplicated site stats, pick the result with the more recent end_yr - # E.g. Site number 12010000 has two sets of stats for some of it's data - # filter by January 1 and you will see one set from 1930 - 2003 and one - # from 1930 - 2018. Filter so that only the 2018 one is used. - stat_data_unique <- stat_data %>% - tidyr::unite(mashed, site_no, month_nu, day_nu) %>% - dplyr::group_by(mashed) %>% - dplyr::filter(end_yr == max(end_yr)) %>% - dplyr::ungroup() %>% - tidyr::separate(mashed, c("site_no", "month_nu", "day_nu"), sep = "_") %>% - dplyr::mutate(month_nu = as.numeric(month_nu), day_nu = as.numeric(day_nu)) - # Write the data file and the indicator file data_file <- scipiper::as_data_file(ind_file) - saveRDS(stat_data_unique, data_file) + saveRDS(stat_data, data_file) scipiper::gd_put(ind_file, data_file) } diff --git a/2_process.yml b/2_process.yml index f7568d9..d644ea5 100644 --- a/2_process.yml +++ b/2_process.yml @@ -15,6 +15,7 @@ file_extensions: sources: - 2_process/src/choose_timesteps.R - 2_process/src/process_site_locations_to_sp.R + - 2_process/src/process_site_stats.R - 2_process/src/process_dv_stats.R targets: @@ -23,6 +24,7 @@ targets: depends: - 2_process/out/timesteps.rds.ind - 2_process/out/site_locations_sp.rds.ind + - 2_process/out/site_stats_clean.rds.ind - 2_process/out/dv_stats.rds.ind # -- config -- @@ -41,11 +43,18 @@ targets: 2_process/out/site_locations_sp.rds: command: gd_get('2_process/out/site_locations_sp.rds.ind') + 2_process/out/site_stats_clean.rds.ind: + command: process_site_stats( + ind_file = target_name, + site_stats_ind = '1_fetch/out/site_stats.rds.ind') + 2_process/out/site_stats_clean.rds: + command: gd_get('2_process/out/site_stats_clean.rds.ind') + 2_process/out/dv_stats.rds.ind: command: process_dv_stats( ind_file = target_name, dv_data_ind = '1_fetch/out/dv_data.rds.ind', - site_stats_ind = '1_fetch/out/site_stats.rds.ind', + site_stats_clean_ind = '2_process/out/site_stats_clean.rds.ind', dates = dates, percentiles = percentiles) 2_process/out/dv_stats.rds: diff --git a/2_process/src/process_site_stats.R b/2_process/src/process_site_stats.R new file mode 100644 index 0000000..c30444f --- /dev/null +++ b/2_process/src/process_site_stats.R @@ -0,0 +1,33 @@ +#' @title Clean up the site statistics data to eliminate duplicates +#' +#' @param ind_file character file name where the output should be saved +#' @param site_stats_ind indicator file for the data frame of site statistics +process_site_stats <- function(ind_file, site_stats_ind){ + + stat_data <- readRDS(scipiper::sc_retrieve(site_stats_ind, remake_file = '1_fetch.yml')) + + # For duplicated site stats, pick the result with the more recent end_yr + # E.g. Site number 12010000 has two sets of stats for some of it's data + # filter by January 1 and you will see one set from 1930 - 2003 and one + # from 1930 - 2018. Filter so that only the 2018 one is used. + stat_data_unique <- stat_data %>% + dplyr::mutate(nyears = end_yr - begin_yr) %>% + tidyr::unite(mashed, site_no, month_nu, day_nu) %>% + dplyr::distinct() %>% # some of the stats are literally exact copies + dplyr::group_by(mashed) %>% + dplyr::mutate(same_window = any(duplicated(nyears))) %>% + dplyr::filter(ifelse(!same_window, + # pick the stat that has more years of data + nyears == max(nyears), + # if there are > 1 with the same number of years, + # pick the more recent stat + end_yr == max(end_yr))) %>% + dplyr::ungroup() %>% + tidyr::separate(mashed, c("site_no", "month_nu", "day_nu"), sep = "_") %>% + dplyr::mutate(month_nu = as.numeric(month_nu), day_nu = as.numeric(day_nu)) + + # Write the data file and the indicator file + data_file <- scipiper::as_data_file(ind_file) + saveRDS(stat_data_unique, data_file) + scipiper::gd_put(ind_file, data_file) +} From b1a783aa55e4ada9a201798572d75f541038ec4c Mon Sep 17 00:00:00 2001 From: Carr Date: Tue, 6 Nov 2018 14:44:57 -0600 Subject: [PATCH 05/14] calculate the percentiles for all daily values --- 2_process/src/process_dv_stats.R | 70 ++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 2_process/src/process_dv_stats.R diff --git a/2_process/src/process_dv_stats.R b/2_process/src/process_dv_stats.R new file mode 100644 index 0000000..3d35a22 --- /dev/null +++ b/2_process/src/process_dv_stats.R @@ -0,0 +1,70 @@ +#' @title Calculate the stat category for each gage's discharge value +#' +#' @param ind_file character file name where the output should be saved +#' @param dv_data_ind indicator file for the data.frame of dv_data +#' @param site_stats_clean_ind indicator file for the data.frame of dv stats for each site +#' @param dates object from viz_config.yml that specifies dates as string +#' @param percentiles character vector of the types of stats to include, i.e. `c("10", "75")` +#' will return the 10th and 75th percentiles (from viz_config.yml) +process_dv_stats <- function(ind_file, dv_data_ind, site_stats_clean_ind, dates, percentiles){ + + dv_data <- readRDS(scipiper::sc_retrieve(dv_data_ind, remake_file = '1_fetch.yml')) + site_stats <- readRDS(scipiper::sc_retrieve(site_stats_clean_ind, remake_file = '2_process.yml')) + + # breakdown date into month & day pairs + dv_data_md <- dv_data %>% + dplyr::mutate(month_nu = as.numeric(format(dateTime, "%m")), + day_nu = as.numeric(format(dateTime, "%d"))) + + # merge stats with the dv data + # merge still results in extra rows - 24 extra to be exact + dv_with_stats <- left_join(dv_data_md, site_stats, by = c("site_no", "month_nu", "day_nu")) + + stat_colnames <- sprintf("p%s_va", percentiles) + stat_perc <- as.numeric(percentiles)/100 + + int_per <- function(df){ + df <- select(df, "dv_val", one_of(stat_colnames)) + out <- rep(NA, nrow(df)) + + for (i in 1:length(out)){ + dv_val <- df$dv_val[i] + + df_i <- slice(df, i) %>% + select(-dv_val) %>% + tidyr::gather(stat_name, stat_value) %>% + mutate(stat_value = as.numeric(stat_value), + stat_type = as.numeric(gsub("p|_va", "", stat_name))/100) + + y <- df_i$stat_type + x <- df_i$stat_value + nas <- is.na(x) + x <- x[!nas] + y <- y[!nas] + if (length(unique(x)) < 2){ + out[i] <- NA + } else if (dv_val < x[1L]){ # the first and last *have* to be numbers per filtering criteria + out[i] <- head(stat_perc, 1) + } else if (dv_val > tail(x, 1L)){ + out[i] <- tail(stat_perc, 1) + } else { + out[i] <- approx(x, y, xout = dv_val)$y + } + } + return(out) + + } + + dv_stats <- dv_with_stats %>% + mutate(dv_val = Flow) %>% + filter_(sprintf("!is.na(%s)", stat_colnames[1]), + sprintf("!is.na(%s)", tail(stat_colnames,1)), + sprintf("!is.na(%s)", "dv_val")) %>% + mutate(per = int_per(.)) %>% + select(site_no, dateTime, dv_val, per, p50_va) + + # Write the data file and the indicator file + data_file <- scipiper::as_data_file(ind_file) + saveRDS(dv_stats, data_file) + scipiper::gd_put(ind_file, data_file) +} From 919cabca035895640d02993049c0aac99fd43716 Mon Sep 17 00:00:00 2001 From: Carr Date: Tue, 6 Nov 2018 14:45:42 -0600 Subject: [PATCH 06/14] calculate colors based on the dv percentiles --- 2_process.yml | 12 ++++++++++++ 2_process/src/process_dv_stat_colors.R | 20 ++++++++++++++++++++ viz_config.yml | 1 + 3 files changed, 33 insertions(+) create mode 100644 2_process/src/process_dv_stat_colors.R diff --git a/2_process.yml b/2_process.yml index d644ea5..e179d2e 100644 --- a/2_process.yml +++ b/2_process.yml @@ -17,6 +17,7 @@ sources: - 2_process/src/process_site_locations_to_sp.R - 2_process/src/process_site_stats.R - 2_process/src/process_dv_stats.R + - 2_process/src/process_dv_stat_colors.R targets: @@ -26,10 +27,13 @@ targets: - 2_process/out/site_locations_sp.rds.ind - 2_process/out/site_stats_clean.rds.ind - 2_process/out/dv_stats.rds.ind + - 2_process/out/dv_stat_colors.rds.ind # -- config -- proj_str: command: viz_config[[I('projection')]] + color_palette: + command: viz_config[[I('color_palette')]] 2_process/out/timesteps.rds.ind: command: choose_timesteps(target_name, dates = dates) @@ -59,3 +63,11 @@ targets: percentiles = percentiles) 2_process/out/dv_stats.rds: command: gd_get('2_process/out/dv_stats.rds.ind') + + 2_process/out/dv_stat_colors.rds.ind: + command: process_dv_stat_colors( + ind_file = target_name, + dv_stats_ind = '2_process/out/dv_stats.rds.ind', + color_palette = color_palette) + 2_process/out/dv_stat_colors.rds: + command: gd_get('2_process/out/dv_stat_colors.rds.ind') diff --git a/2_process/src/process_dv_stat_colors.R b/2_process/src/process_dv_stat_colors.R new file mode 100644 index 0000000..7e3d65a --- /dev/null +++ b/2_process/src/process_dv_stat_colors.R @@ -0,0 +1,20 @@ +#' @title Compute the color for each daily value percentile +#' +#' @param ind_file character file name where the output should be saved +#' @param dv_stats_ind indicator file for the data.frame of dv_data +#' @param color_palette list of colors to use for the color ramp (from viz_config.yml) +process_dv_stat_colors <- function(ind_file, dv_stats_ind, color_palette){ + + dv_stats <- readRDS(scipiper::sc_retrieve(dv_stats_ind, remake_file = '2_process.yml')) + col_fun <- colorRamp(color_palette) + + # just removing NA percentiles for now + dv_stats_with_color <- dv_stats %>% + filter(!is.na(per)) %>% + mutate(color = rgb(col_fun(per), maxColorValue = 255)) # don't know how necessary maxColorValue is + + # Write the data file and the indicator file + data_file <- scipiper::as_data_file(ind_file) + saveRDS(dv_stats_with_color, data_file) + scipiper::gd_put(ind_file, data_file) +} diff --git a/viz_config.yml b/viz_config.yml index cadbcb4..28735a6 100644 --- a/viz_config.yml +++ b/viz_config.yml @@ -15,6 +15,7 @@ request_limit: 10 percentiles: ["05","10","20","25","50","75","80","90","95"] # styling +color_palette: ['#ca0020','#f4a582','#efefef','#efefef','#92c5de','#034064'] gage_line_col: "#3c829c" gage_norm_col: "#4BA3C3" legend_text: From 9c0b0c1cd341cd6b0a484ee458f0b4c9a85edc15 Mon Sep 17 00:00:00 2001 From: Carr Date: Tue, 6 Nov 2018 14:51:52 -0600 Subject: [PATCH 07/14] delete extra Rproj file --- vizstorm-GIF.Rproj | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 vizstorm-GIF.Rproj diff --git a/vizstorm-GIF.Rproj b/vizstorm-GIF.Rproj deleted file mode 100644 index 97b47c3..0000000 --- a/vizstorm-GIF.Rproj +++ /dev/null @@ -1,17 +0,0 @@ -Version: 1.0 - -RestoreWorkspace: No -SaveWorkspace: No -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: knitr -LaTeX: pdfLaTeX - -AutoAppendNewline: Yes -StripTrailingWhitespace: Yes -LineEndingConversion: Posix From f9fc6435b18638112b118dca2441ad9a0a6ab63f Mon Sep 17 00:00:00 2001 From: Carr Date: Tue, 6 Nov 2018 17:06:54 -0600 Subject: [PATCH 08/14] a few more measures to request unique data only --- 1_fetch/src/fetch_dv_data.R | 23 +++++++++++++---------- 1_fetch/src/fetch_dv_sites.R | 19 ++++++++++--------- 1_fetch/src/fetch_site_stats.R | 17 ++++++++++------- 3 files changed, 33 insertions(+), 26 deletions(-) diff --git a/1_fetch/src/fetch_dv_data.R b/1_fetch/src/fetch_dv_data.R index 1c096d1..5b3ac65 100644 --- a/1_fetch/src/fetch_dv_data.R +++ b/1_fetch/src/fetch_dv_data.R @@ -1,40 +1,43 @@ #' @title Download the discharge from NWIS for each dv gage -#' +#' #' @param ind_file character file name where the output should be saved #' @param sites_ind indicator file for the vector of site numbers #' @param dates object from viz_config.yml that specifies dates as string #' @param request_limit number indicating how many sites to include per dataRetrieval request (from viz_config.yml) fetch_dv_data <- function(ind_file, sites_ind, dates, request_limit){ - + sites <- readRDS(scipiper::sc_retrieve(sites_ind, remake_file = '1_fetch.yml')) - + req_bks <- seq(1, length(sites), by=request_limit) dv_data <- data.frame() for(i in req_bks) { last_site <- i+request_limit-1 + if(i == tail(req_bks, 1) && last_site > length(sites)) { + last_site <- length(sites) + } get_sites <- sites[i:last_site] - data_i <- + data_i <- dataRetrieval::readNWISdata( service = "dv", statCd = "00003", # need this to avoid NAs site = get_sites, parameterCd = "00060", startDate = dates$start, - endDate = dates$end) %>% - dataRetrieval::renameNWISColumns() - + endDate = dates$end) %>% + dataRetrieval::renameNWISColumns() + if(nrow(data_i) > 0 && any(names(data_i) == "Flow")) { data_i <- data_i[, c("site_no", "dateTime", "Flow")] # keep only dateTime and Flow columns } else { data_i <- NULL # no data returned situation } - + dv_data <- rbind(dv_data, data_i) print(paste("Completed", last_site, "of", length(sites))) } - + dv_data_unique <- dplyr::distinct(dv_data) # need this to avoid some duplicates - + # Write the data file and the indicator file data_file <- scipiper::as_data_file(ind_file) saveRDS(dv_data_unique, data_file) diff --git a/1_fetch/src/fetch_dv_sites.R b/1_fetch/src/fetch_dv_sites.R index 853a24b..0fc7b80 100644 --- a/1_fetch/src/fetch_dv_sites.R +++ b/1_fetch/src/fetch_dv_sites.R @@ -1,25 +1,26 @@ #' @title Fetch appropriate daily value sites from NWIS -#' +#' #' @param ind_file character file name where the output should be saved #' @param dates object from viz_config.yml that specifies dates as string fetch_dv_sites <- function(ind_file, dates){ hucs <- zeroPad(1:21, 2) # all hucs - + sites <- c() for(huc in hucs){ - sites <- + sites <- dataRetrieval::whatNWISdata( - huc = huc, - service = "dv", + huc = huc, + service = "dv", startDate = dates$start, endDate = dates$end, - parameterCd = "00060", - statCd = "00003") %>% - dplyr::pull(site_no) %>% + parameterCd = "00060", + statCd = "00003") %>% + dplyr::distinct() %>% + dplyr::pull(site_no) %>% c(sites) } - + # Write the data file and the indicator file data_file <- scipiper::as_data_file(ind_file) saveRDS(sites, data_file) diff --git a/1_fetch/src/fetch_site_stats.R b/1_fetch/src/fetch_site_stats.R index 8cb8f41..01c1c83 100644 --- a/1_fetch/src/fetch_site_stats.R +++ b/1_fetch/src/fetch_site_stats.R @@ -1,31 +1,34 @@ #' @title Get the discharge quantiles for each dv gage -#' +#' #' @param ind_file character file name where the output should be saved #' @param sites_ind indicator file for the vector of site numbers #' @param request_limit number indicating how many sites to include per dataRetrieval request (from viz_config.yml) -#' @param percentiles character vector of the types of stats to include, i.e. `c("10", "75")` +#' @param percentiles character vector of the types of stats to include, i.e. `c("10", "75")` #' will return the 10th and 75th percentiles (from viz_config.yml) fetch_site_stats <- function(ind_file, sites_ind, request_limit, percentiles){ - + sites <- readRDS(scipiper::sc_retrieve(sites_ind, remake_file = '1_fetch.yml')) - + req_bks <- seq(1, length(sites), by=request_limit) stat_data <- data.frame() for(i in req_bks) { last_site <- i+request_limit-1 + if(i == tail(req_bks, 1) && last_site > length(sites)) { + last_site <- length(sites) + } get_sites <- sites[i:last_site] current_sites <- suppressMessages( dataRetrieval::readNWISstat( siteNumbers = get_sites, - parameterCd = "00060", + parameterCd = "00060", statReportType="daily", statType=paste0("P", percentiles) - )) %>% + )) %>% dplyr::select(-agency_cd, -parameter_cd, -ts_id, -loc_web_ds) stat_data <- rbind(stat_data, current_sites) print(paste("Completed", last_site, "of", length(sites))) } - + # Write the data file and the indicator file data_file <- scipiper::as_data_file(ind_file) saveRDS(stat_data, data_file) From f4cb8ec293953ae918182ebcdea390528e9c9c30 Mon Sep 17 00:00:00 2001 From: Carr Date: Tue, 6 Nov 2018 17:12:49 -0600 Subject: [PATCH 09/14] run scmake(remake_file='2_process.yml') --- 1_fetch/out/dv_data.rds.ind | 2 +- 1_fetch/out/site_locations.rds.ind | 2 +- 1_fetch/out/site_stats.rds.ind | 3 +-- 1_fetch/out/sites.rds.ind | 2 +- 2_process/out/dv_stat_colors.rds.ind | 2 ++ 2_process/out/dv_stats.rds.ind | 2 ++ 2_process/out/site_locations_sp.rds.ind | 2 +- 2_process/out/site_stats_clean.rds.ind | 2 ++ 2_process/out/timesteps.rds.ind | 2 +- .../MV9mZXRjaC9vdXQvZHZfZGF0YS5yZHMuaW5k.yml | 8 ++++---- ...ZXRjaC9vdXQvc2l0ZV9sb2NhdGlvbnMucmRzLmluZA.yml | 6 +++--- .../status/MV9mZXRjaC9vdXQvc2l0ZXMucmRzLmluZA.yml | 6 +++--- ...Ml9wcm9jZXNzL291dC90aW1lc3RlcHMucmRzLmluZA.yml | 6 +++--- ...9jZXNzL291dC9kdl9zdGF0X2NvbG9ycy5yZHMuaW5k.yml | 13 +++++++++++++ .../Ml9wcm9jZXNzL291dC9kdl9zdGF0cy5yZHMuaW5k.yml | 15 +++++++++++++++ ...NzL291dC9zaXRlX2xvY2F0aW9uc19zcC5yZHMuaW5k.yml | 6 +++--- 16 files changed, 56 insertions(+), 23 deletions(-) create mode 100644 2_process/out/dv_stat_colors.rds.ind create mode 100644 2_process/out/dv_stats.rds.ind create mode 100644 2_process/out/site_stats_clean.rds.ind create mode 100644 build/status/Ml9wcm9jZXNzL291dC9kdl9zdGF0X2NvbG9ycy5yZHMuaW5k.yml create mode 100644 build/status/Ml9wcm9jZXNzL291dC9kdl9zdGF0cy5yZHMuaW5k.yml diff --git a/1_fetch/out/dv_data.rds.ind b/1_fetch/out/dv_data.rds.ind index e3e7f9f..a1e28a9 100644 --- a/1_fetch/out/dv_data.rds.ind +++ b/1_fetch/out/dv_data.rds.ind @@ -1,2 +1,2 @@ -hash: 59ddf61dcaec1f0d16aa1e81bb853d2e +hash: 9d304e4bada2c2c115e4c664d129823b diff --git a/1_fetch/out/site_locations.rds.ind b/1_fetch/out/site_locations.rds.ind index f7eb39e..aac3b11 100644 --- a/1_fetch/out/site_locations.rds.ind +++ b/1_fetch/out/site_locations.rds.ind @@ -1,2 +1,2 @@ -hash: a2002214eb302dee23e475fa6379df85 +hash: bdf819be202ded812280a27a0c45c9de diff --git a/1_fetch/out/site_stats.rds.ind b/1_fetch/out/site_stats.rds.ind index 92fca9f..6b10657 100644 --- a/1_fetch/out/site_stats.rds.ind +++ b/1_fetch/out/site_stats.rds.ind @@ -1,3 +1,2 @@ -warning: dry_put=TRUE; not actually pushed -hash: 8fe396018783020a2d6e412032d140bf +hash: f9de080a31361f1bd22a2a6b59f746cb diff --git a/1_fetch/out/sites.rds.ind b/1_fetch/out/sites.rds.ind index 36d7d70..e0adaa2 100644 --- a/1_fetch/out/sites.rds.ind +++ b/1_fetch/out/sites.rds.ind @@ -1,2 +1,2 @@ -hash: 53ac4d4371319321a9840cb6801ca326 +hash: 2e5992fb50c4fd9cd43fe4b6ac5335a6 diff --git a/2_process/out/dv_stat_colors.rds.ind b/2_process/out/dv_stat_colors.rds.ind new file mode 100644 index 0000000..0d3a028 --- /dev/null +++ b/2_process/out/dv_stat_colors.rds.ind @@ -0,0 +1,2 @@ +hash: 2fffd2ca1a8032c5709770869fab3bbe + diff --git a/2_process/out/dv_stats.rds.ind b/2_process/out/dv_stats.rds.ind new file mode 100644 index 0000000..79fe93f --- /dev/null +++ b/2_process/out/dv_stats.rds.ind @@ -0,0 +1,2 @@ +hash: 4167874102333723dd8b0191c52a612d + diff --git a/2_process/out/site_locations_sp.rds.ind b/2_process/out/site_locations_sp.rds.ind index d96951a..08ae153 100644 --- a/2_process/out/site_locations_sp.rds.ind +++ b/2_process/out/site_locations_sp.rds.ind @@ -1,2 +1,2 @@ -hash: 432df1d7c12cc9ba0a42c544be5b3c33 +hash: 0d690807b3803cbb87ba1818fec9ce33 diff --git a/2_process/out/site_stats_clean.rds.ind b/2_process/out/site_stats_clean.rds.ind new file mode 100644 index 0000000..e309f02 --- /dev/null +++ b/2_process/out/site_stats_clean.rds.ind @@ -0,0 +1,2 @@ +hash: 5854dcede0a39113e95d3fecce8982de + diff --git a/2_process/out/timesteps.rds.ind b/2_process/out/timesteps.rds.ind index ffdb864..e65ea58 100644 --- a/2_process/out/timesteps.rds.ind +++ b/2_process/out/timesteps.rds.ind @@ -1,2 +1,2 @@ -hash: 0bcfda07c078bd4ad4d3c9b995e50712 +hash: 0a7f4eb0123a895c664e964592d19f8c diff --git a/build/status/MV9mZXRjaC9vdXQvZHZfZGF0YS5yZHMuaW5k.yml b/build/status/MV9mZXRjaC9vdXQvZHZfZGF0YS5yZHMuaW5k.yml index e7cd48b..4c8ee01 100644 --- a/build/status/MV9mZXRjaC9vdXQvZHZfZGF0YS5yZHMuaW5k.yml +++ b/build/status/MV9mZXRjaC9vdXQvZHZfZGF0YS5yZHMuaW5k.yml @@ -1,14 +1,14 @@ version: 0.3.0 name: 1_fetch/out/dv_data.rds.ind type: file -hash: ed2b014edf914a2b3ec011a6359b74fa -time: 2018-11-05 22:14:25 UTC +hash: 6e08dc56a248b9a7b52a5255f71f1b16 +time: 2018-11-06 22:59:45 UTC depends: - 1_fetch/out/sites.rds.ind: f18e7145934e7f8e0f6cd3f05c7bc519 + 1_fetch/out/sites.rds.ind: d0daf83d52f6bcd684e8aee83f5d44f4 dates: c87840fa717587715effdfedc24e8fca request_limit: 71db8a6cad03244e6e50f0ad8bc95a65 fixed: df6396a8b26184192ecb1f70ffb6bfc3 code: functions: - fetch_dv_data: 14ab9455bf45ef4063299133b7a62e8e + fetch_dv_data: 39dac13ad9fec1f57bd6a49eb20ede5e diff --git a/build/status/MV9mZXRjaC9vdXQvc2l0ZV9sb2NhdGlvbnMucmRzLmluZA.yml b/build/status/MV9mZXRjaC9vdXQvc2l0ZV9sb2NhdGlvbnMucmRzLmluZA.yml index 2462028..4de9c0f 100644 --- a/build/status/MV9mZXRjaC9vdXQvc2l0ZV9sb2NhdGlvbnMucmRzLmluZA.yml +++ b/build/status/MV9mZXRjaC9vdXQvc2l0ZV9sb2NhdGlvbnMucmRzLmluZA.yml @@ -1,10 +1,10 @@ version: 0.3.0 name: 1_fetch/out/site_locations.rds.ind type: file -hash: 1361f1b1aad0236c52b3059f5c145eea -time: 2018-11-06 14:53:28 UTC +hash: 9f46596111066903b3a9a736d3b4492e +time: 2018-11-06 21:38:59 UTC depends: - 1_fetch/out/sites.rds.ind: f4ac12b049dc443782c13d4a040a3509 + 1_fetch/out/sites.rds.ind: d0daf83d52f6bcd684e8aee83f5d44f4 fixed: 815cf73829504ede512d11cef2f0b77a code: functions: diff --git a/build/status/MV9mZXRjaC9vdXQvc2l0ZXMucmRzLmluZA.yml b/build/status/MV9mZXRjaC9vdXQvc2l0ZXMucmRzLmluZA.yml index 711a0fa..71b28e2 100644 --- a/build/status/MV9mZXRjaC9vdXQvc2l0ZXMucmRzLmluZA.yml +++ b/build/status/MV9mZXRjaC9vdXQvc2l0ZXMucmRzLmluZA.yml @@ -1,12 +1,12 @@ version: 0.3.0 name: 1_fetch/out/sites.rds.ind type: file -hash: f4ac12b049dc443782c13d4a040a3509 -time: 2018-11-06 14:53:11 UTC +hash: d0daf83d52f6bcd684e8aee83f5d44f4 +time: 2018-11-06 21:38:33 UTC depends: dates: c87840fa717587715effdfedc24e8fca fixed: 7b38a85f946fecf4cde480abb9a88a9b code: functions: - fetch_dv_sites: 62a83ab6784d9283e1b3830c054fc745 + fetch_dv_sites: 19cbacd0abd7ed1ffead1c05b508be8e diff --git a/build/status/Ml9wcm9jZXNzL291dC90aW1lc3RlcHMucmRzLmluZA.yml b/build/status/Ml9wcm9jZXNzL291dC90aW1lc3RlcHMucmRzLmluZA.yml index 87b7172..0cf92b6 100644 --- a/build/status/Ml9wcm9jZXNzL291dC90aW1lc3RlcHMucmRzLmluZA.yml +++ b/build/status/Ml9wcm9jZXNzL291dC90aW1lc3RlcHMucmRzLmluZA.yml @@ -1,12 +1,12 @@ version: 0.3.0 name: 2_process/out/timesteps.rds.ind type: file -hash: b0324e95071e687ffc507ec9637f139c -time: 2018-11-05 21:16:31 UTC +hash: 018692333545699616a5989502e00e1e +time: 2018-11-06 23:11:46 UTC depends: dates: c87840fa717587715effdfedc24e8fca fixed: a69af68e368ac84d7c9edf47648c5800 code: functions: - choose_timesteps: fb979f896d1352971c7fc36b2952068f + choose_timesteps: e6f3a7a602f698496712d896ae30e841 diff --git a/build/status/Ml9wcm9jZXNzL291dC9kdl9zdGF0X2NvbG9ycy5yZHMuaW5k.yml b/build/status/Ml9wcm9jZXNzL291dC9kdl9zdGF0X2NvbG9ycy5yZHMuaW5k.yml new file mode 100644 index 0000000..f1ea018 --- /dev/null +++ b/build/status/Ml9wcm9jZXNzL291dC9kdl9zdGF0X2NvbG9ycy5yZHMuaW5k.yml @@ -0,0 +1,13 @@ +version: 0.3.0 +name: 2_process/out/dv_stat_colors.rds.ind +type: file +hash: 2f58f593049eaf07a44d8a98e3ec94c8 +time: 2018-11-06 23:03:56 UTC +depends: + 2_process/out/dv_stats.rds.ind: 1f4c09a9405993b4b102355ad1b3b1c9 + color_palette: ff2cbc2b3f91933a46f1062a4703a9ac +fixed: 486c649635ebbc0a9b69d6a88fe562b4 +code: + functions: + process_dv_stat_colors: 68c74329ca3519a344f7d75090ec23a9 + diff --git a/build/status/Ml9wcm9jZXNzL291dC9kdl9zdGF0cy5yZHMuaW5k.yml b/build/status/Ml9wcm9jZXNzL291dC9kdl9zdGF0cy5yZHMuaW5k.yml new file mode 100644 index 0000000..17fc808 --- /dev/null +++ b/build/status/Ml9wcm9jZXNzL291dC9kdl9zdGF0cy5yZHMuaW5k.yml @@ -0,0 +1,15 @@ +version: 0.3.0 +name: 2_process/out/dv_stats.rds.ind +type: file +hash: 1f4c09a9405993b4b102355ad1b3b1c9 +time: 2018-11-06 23:08:10 UTC +depends: + 1_fetch/out/dv_data.rds.ind: 6e08dc56a248b9a7b52a5255f71f1b16 + 2_process/out/site_stats_clean.rds.ind: bcba9e0879d5f487deb4945806eae5e7 + dates: c87840fa717587715effdfedc24e8fca + percentiles: af632b99e36448cfb51ec4b40b8bfeb9 +fixed: d68b248e742da518189e972bd02d3d7b +code: + functions: + process_dv_stats: a29541eed8fbee524c3f87ba900d3246 + diff --git a/build/status/Ml9wcm9jZXNzL291dC9zaXRlX2xvY2F0aW9uc19zcC5yZHMuaW5k.yml b/build/status/Ml9wcm9jZXNzL291dC9zaXRlX2xvY2F0aW9uc19zcC5yZHMuaW5k.yml index e482137..05850e7 100644 --- a/build/status/Ml9wcm9jZXNzL291dC9zaXRlX2xvY2F0aW9uc19zcC5yZHMuaW5k.yml +++ b/build/status/Ml9wcm9jZXNzL291dC9zaXRlX2xvY2F0aW9uc19zcC5yZHMuaW5k.yml @@ -1,10 +1,10 @@ version: 0.3.0 name: 2_process/out/site_locations_sp.rds.ind type: file -hash: 44dfc826c656ab1b32dfcede0ef5db05 -time: 2018-11-05 21:16:34 UTC +hash: e87ce152422fadaebf04e6087611f51c +time: 2018-11-06 23:11:49 UTC depends: - 1_fetch/out/site_locations.rds.ind: 204284312d62f416dacdb22ace61f845 + 1_fetch/out/site_locations.rds.ind: 9f46596111066903b3a9a736d3b4492e fixed: 4509b4a06686ca31c1874b601471c247 code: functions: From af242e28e5fba9eb7ac25a6850e3f6f29c7112dd Mon Sep 17 00:00:00 2001 From: Carr Date: Wed, 7 Nov 2018 10:38:56 -0600 Subject: [PATCH 10/14] fixes based on aappling-usgs review --- 1_fetch/src/fetch_dv_data.R | 7 ++-- 1_fetch/src/fetch_dv_sites.R | 2 +- 1_fetch/src/fetch_site_stats.R | 5 +-- 2_process.yml | 6 ++-- 2_process/src/process_dv_stat_colors.R | 17 +++++---- 2_process/src/process_dv_stats.R | 48 ++++++++++++++------------ viz_config.yml | 2 +- 7 files changed, 41 insertions(+), 46 deletions(-) diff --git a/1_fetch/src/fetch_dv_data.R b/1_fetch/src/fetch_dv_data.R index 5b3ac65..ec97a1b 100644 --- a/1_fetch/src/fetch_dv_data.R +++ b/1_fetch/src/fetch_dv_data.R @@ -11,10 +11,7 @@ fetch_dv_data <- function(ind_file, sites_ind, dates, request_limit){ req_bks <- seq(1, length(sites), by=request_limit) dv_data <- data.frame() for(i in req_bks) { - last_site <- i+request_limit-1 - if(i == tail(req_bks, 1) && last_site > length(sites)) { - last_site <- length(sites) - } + last_site <- min(i+request_limit-1, length(sites)) get_sites <- sites[i:last_site] data_i <- dataRetrieval::readNWISdata( @@ -33,7 +30,7 @@ fetch_dv_data <- function(ind_file, sites_ind, dates, request_limit){ } dv_data <- rbind(dv_data, data_i) - print(paste("Completed", last_site, "of", length(sites))) + message(paste("Completed", last_site, "of", length(sites))) } dv_data_unique <- dplyr::distinct(dv_data) # need this to avoid some duplicates diff --git a/1_fetch/src/fetch_dv_sites.R b/1_fetch/src/fetch_dv_sites.R index 0fc7b80..99912a8 100644 --- a/1_fetch/src/fetch_dv_sites.R +++ b/1_fetch/src/fetch_dv_sites.R @@ -16,8 +16,8 @@ fetch_dv_sites <- function(ind_file, dates){ endDate = dates$end, parameterCd = "00060", statCd = "00003") %>% - dplyr::distinct() %>% dplyr::pull(site_no) %>% + unique() %>% c(sites) } diff --git a/1_fetch/src/fetch_site_stats.R b/1_fetch/src/fetch_site_stats.R index 01c1c83..4e6ce74 100644 --- a/1_fetch/src/fetch_site_stats.R +++ b/1_fetch/src/fetch_site_stats.R @@ -12,10 +12,7 @@ fetch_site_stats <- function(ind_file, sites_ind, request_limit, percentiles){ req_bks <- seq(1, length(sites), by=request_limit) stat_data <- data.frame() for(i in req_bks) { - last_site <- i+request_limit-1 - if(i == tail(req_bks, 1) && last_site > length(sites)) { - last_site <- length(sites) - } + last_site <- min(i+request_limit-1, length(sites)) get_sites <- sites[i:last_site] current_sites <- suppressMessages( dataRetrieval::readNWISstat( diff --git a/2_process.yml b/2_process.yml index b1032b6..a22c164 100644 --- a/2_process.yml +++ b/2_process.yml @@ -34,8 +34,8 @@ targets: # -- config -- proj_str: command: viz_config[[I('projection')]] - color_palette: - command: viz_config[[I('color_palette')]] + sites_color_palette: + command: viz_config[[I('sites_color_palette')]] 2_process/out/timesteps.rds.ind: command: choose_timesteps(target_name, dates = dates) @@ -64,7 +64,7 @@ targets: command: process_dv_stat_colors( ind_file = target_name, dv_stats_ind = '2_process/out/dv_stats.rds.ind', - color_palette = color_palette) + color_palette = sites_color_palette) 2_process/out/dv_stat_colors.rds: command: gd_get('2_process/out/dv_stat_colors.rds.ind') diff --git a/2_process/src/process_dv_stat_colors.R b/2_process/src/process_dv_stat_colors.R index 7e3d65a..2a98ab2 100644 --- a/2_process/src/process_dv_stat_colors.R +++ b/2_process/src/process_dv_stat_colors.R @@ -1,20 +1,19 @@ #' @title Compute the color for each daily value percentile -#' +#' #' @param ind_file character file name where the output should be saved #' @param dv_stats_ind indicator file for the data.frame of dv_data #' @param color_palette list of colors to use for the color ramp (from viz_config.yml) process_dv_stat_colors <- function(ind_file, dv_stats_ind, color_palette){ - + dv_stats <- readRDS(scipiper::sc_retrieve(dv_stats_ind, remake_file = '2_process.yml')) col_fun <- colorRamp(color_palette) - + # just removing NA percentiles for now - dv_stats_with_color <- dv_stats %>% - filter(!is.na(per)) %>% + dv_stats_with_color <- dv_stats %>% + filter(!is.na(per)) %>% mutate(color = rgb(col_fun(per), maxColorValue = 255)) # don't know how necessary maxColorValue is - + # Write the data file and the indicator file - data_file <- scipiper::as_data_file(ind_file) - saveRDS(dv_stats_with_color, data_file) - scipiper::gd_put(ind_file, data_file) + saveRDS(dv_stats_with_color, scipiper::as_data_file(ind_file)) + scipiper::gd_put(ind_file) } diff --git a/2_process/src/process_dv_stats.R b/2_process/src/process_dv_stats.R index 3d35a22..0d98139 100644 --- a/2_process/src/process_dv_stats.R +++ b/2_process/src/process_dv_stats.R @@ -1,41 +1,43 @@ #' @title Calculate the stat category for each gage's discharge value -#' +#' #' @param ind_file character file name where the output should be saved #' @param dv_data_ind indicator file for the data.frame of dv_data #' @param site_stats_clean_ind indicator file for the data.frame of dv stats for each site #' @param dates object from viz_config.yml that specifies dates as string -#' @param percentiles character vector of the types of stats to include, i.e. `c("10", "75")` +#' @param percentiles character vector of the types of stats to include, i.e. `c("10", "75")` #' will return the 10th and 75th percentiles (from viz_config.yml) process_dv_stats <- function(ind_file, dv_data_ind, site_stats_clean_ind, dates, percentiles){ - + dv_data <- readRDS(scipiper::sc_retrieve(dv_data_ind, remake_file = '1_fetch.yml')) site_stats <- readRDS(scipiper::sc_retrieve(site_stats_clean_ind, remake_file = '2_process.yml')) - + # breakdown date into month & day pairs - dv_data_md <- dv_data %>% + dv_data_md <- dv_data %>% dplyr::mutate(month_nu = as.numeric(format(dateTime, "%m")), day_nu = as.numeric(format(dateTime, "%d"))) - + # merge stats with the dv data # merge still results in extra rows - 24 extra to be exact dv_with_stats <- left_join(dv_data_md, site_stats, by = c("site_no", "month_nu", "day_nu")) - + stat_colnames <- sprintf("p%s_va", percentiles) stat_perc <- as.numeric(percentiles)/100 - - int_per <- function(df){ + + interpolate_percentile <- function(df){ + # This function takes the current daily value and interpolates it's percentile based + # on the percentiles for the matching site and day of the year df <- select(df, "dv_val", one_of(stat_colnames)) out <- rep(NA, nrow(df)) - + for (i in 1:length(out)){ dv_val <- df$dv_val[i] - - df_i <- slice(df, i) %>% - select(-dv_val) %>% - tidyr::gather(stat_name, stat_value) %>% + + df_i <- slice(df, i) %>% + select(-dv_val) %>% + tidyr::gather(stat_name, stat_value) %>% mutate(stat_value = as.numeric(stat_value), stat_type = as.numeric(gsub("p|_va", "", stat_name))/100) - + y <- df_i$stat_type x <- df_i$stat_value nas <- is.na(x) @@ -52,17 +54,17 @@ process_dv_stats <- function(ind_file, dv_data_ind, site_stats_clean_ind, dates, } } return(out) - + } - - dv_stats <- dv_with_stats %>% - mutate(dv_val = Flow) %>% - filter_(sprintf("!is.na(%s)", stat_colnames[1]), - sprintf("!is.na(%s)", tail(stat_colnames,1)), + + dv_stats <- dv_with_stats %>% + mutate(dv_val = Flow) %>% + filter_(sprintf("!is.na(%s)", stat_colnames[1]), + sprintf("!is.na(%s)", tail(stat_colnames,1)), sprintf("!is.na(%s)", "dv_val")) %>% - mutate(per = int_per(.)) %>% + mutate(per = interpolate_percentile(.)) %>% select(site_no, dateTime, dv_val, per, p50_va) - + # Write the data file and the indicator file data_file <- scipiper::as_data_file(ind_file) saveRDS(dv_stats, data_file) diff --git a/viz_config.yml b/viz_config.yml index f54b99f..0597619 100644 --- a/viz_config.yml +++ b/viz_config.yml @@ -16,7 +16,7 @@ percentiles: ["05","10","20","25","50","75","80","90","95"] # styling background_col: "gray90" -color_palette: ['#ca0020','#f4a582','#efefef','#efefef','#92c5de','#034064'] +sites_color_palette: ['#ca0020','#f4a582','#efefef','#efefef','#92c5de','#034064'] gage_line_col: "#3c829c" gage_norm_col: "#4BA3C3" legend_text: From 8d09f419deecf2663174d1d96ad9f9644ac01ed7 Mon Sep 17 00:00:00 2001 From: Carr Date: Wed, 7 Nov 2018 11:24:26 -0600 Subject: [PATCH 11/14] build fetch sites, fetch stats, and process stats_clean --- 1_fetch/out/site_stats.rds.ind | 2 +- 1_fetch/out/sites.rds.ind | 2 +- 2_process/out/site_stats_clean.rds.ind | 2 +- .../MV9mZXRjaC9vdXQvc2l0ZV9zdGF0cy5yZHMuaW5k.yml | 14 ++++++++++++++ .../status/MV9mZXRjaC9vdXQvc2l0ZXMucmRzLmluZA.yml | 6 +++--- ...ZXNzL291dC9zaXRlX3N0YXRzX2NsZWFuLnJkcy5pbmQ.yml | 12 ++++++++++++ 6 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 build/status/MV9mZXRjaC9vdXQvc2l0ZV9zdGF0cy5yZHMuaW5k.yml create mode 100644 build/status/Ml9wcm9jZXNzL291dC9zaXRlX3N0YXRzX2NsZWFuLnJkcy5pbmQ.yml diff --git a/1_fetch/out/site_stats.rds.ind b/1_fetch/out/site_stats.rds.ind index 6b10657..a06446f 100644 --- a/1_fetch/out/site_stats.rds.ind +++ b/1_fetch/out/site_stats.rds.ind @@ -1,2 +1,2 @@ -hash: f9de080a31361f1bd22a2a6b59f746cb +hash: 64a273b80a416f15a80da2ec7d4f05eb diff --git a/1_fetch/out/sites.rds.ind b/1_fetch/out/sites.rds.ind index e0adaa2..e4d9b92 100644 --- a/1_fetch/out/sites.rds.ind +++ b/1_fetch/out/sites.rds.ind @@ -1,2 +1,2 @@ -hash: 2e5992fb50c4fd9cd43fe4b6ac5335a6 +hash: 643764a5a20f82946253c41b1e503a5d diff --git a/2_process/out/site_stats_clean.rds.ind b/2_process/out/site_stats_clean.rds.ind index e309f02..01844ef 100644 --- a/2_process/out/site_stats_clean.rds.ind +++ b/2_process/out/site_stats_clean.rds.ind @@ -1,2 +1,2 @@ -hash: 5854dcede0a39113e95d3fecce8982de +hash: ad0e59822f9a265a44ebf6c143494c06 diff --git a/build/status/MV9mZXRjaC9vdXQvc2l0ZV9zdGF0cy5yZHMuaW5k.yml b/build/status/MV9mZXRjaC9vdXQvc2l0ZV9zdGF0cy5yZHMuaW5k.yml new file mode 100644 index 0000000..d991c1e --- /dev/null +++ b/build/status/MV9mZXRjaC9vdXQvc2l0ZV9zdGF0cy5yZHMuaW5k.yml @@ -0,0 +1,14 @@ +version: 0.3.0 +name: 1_fetch/out/site_stats.rds.ind +type: file +hash: b9340a9d65c4260474b5599d24673ff4 +time: 2018-11-07 17:04:27 UTC +depends: + 1_fetch/out/sites.rds.ind: 6e931c8c143913423fcabedde591610a + request_limit: 71db8a6cad03244e6e50f0ad8bc95a65 + percentiles: af632b99e36448cfb51ec4b40b8bfeb9 +fixed: a447e0f033c83c25b92148c0bb37a510 +code: + functions: + fetch_site_stats: f39288bbea2f64f36863f8bd58c35794 + diff --git a/build/status/MV9mZXRjaC9vdXQvc2l0ZXMucmRzLmluZA.yml b/build/status/MV9mZXRjaC9vdXQvc2l0ZXMucmRzLmluZA.yml index 71b28e2..be93876 100644 --- a/build/status/MV9mZXRjaC9vdXQvc2l0ZXMucmRzLmluZA.yml +++ b/build/status/MV9mZXRjaC9vdXQvc2l0ZXMucmRzLmluZA.yml @@ -1,12 +1,12 @@ version: 0.3.0 name: 1_fetch/out/sites.rds.ind type: file -hash: d0daf83d52f6bcd684e8aee83f5d44f4 -time: 2018-11-06 21:38:33 UTC +hash: 6e931c8c143913423fcabedde591610a +time: 2018-11-07 16:42:52 UTC depends: dates: c87840fa717587715effdfedc24e8fca fixed: 7b38a85f946fecf4cde480abb9a88a9b code: functions: - fetch_dv_sites: 19cbacd0abd7ed1ffead1c05b508be8e + fetch_dv_sites: 6417ebb7220e31604e4bb72a44725e3e diff --git a/build/status/Ml9wcm9jZXNzL291dC9zaXRlX3N0YXRzX2NsZWFuLnJkcy5pbmQ.yml b/build/status/Ml9wcm9jZXNzL291dC9zaXRlX3N0YXRzX2NsZWFuLnJkcy5pbmQ.yml new file mode 100644 index 0000000..9f0e186 --- /dev/null +++ b/build/status/Ml9wcm9jZXNzL291dC9zaXRlX3N0YXRzX2NsZWFuLnJkcy5pbmQ.yml @@ -0,0 +1,12 @@ +version: 0.3.0 +name: 2_process/out/site_stats_clean.rds.ind +type: file +hash: 9d2874c468177b9b78d90759d47d26e3 +time: 2018-11-07 17:11:46 UTC +depends: + 1_fetch/out/site_stats.rds.ind: b9340a9d65c4260474b5599d24673ff4 +fixed: 6280d5ff6dc38f409141e1fe1d0bd609 +code: + functions: + process_site_stats: 768f3ae86828278f318b2556e065cb5a + From be8a91abd788d5ad07d6677dc1c0eca9151ddc59 Mon Sep 17 00:00:00 2001 From: Carr Date: Wed, 7 Nov 2018 11:33:08 -0600 Subject: [PATCH 12/14] fetch dv data --- 1_fetch/out/dv_data.rds.ind | 2 +- build/status/MV9mZXRjaC9vdXQvZHZfZGF0YS5yZHMuaW5k.yml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/1_fetch/out/dv_data.rds.ind b/1_fetch/out/dv_data.rds.ind index a1e28a9..fc8171a 100644 --- a/1_fetch/out/dv_data.rds.ind +++ b/1_fetch/out/dv_data.rds.ind @@ -1,2 +1,2 @@ -hash: 9d304e4bada2c2c115e4c664d129823b +hash: b9097707855bed84ababea6e279bf73c diff --git a/build/status/MV9mZXRjaC9vdXQvZHZfZGF0YS5yZHMuaW5k.yml b/build/status/MV9mZXRjaC9vdXQvZHZfZGF0YS5yZHMuaW5k.yml index 4c8ee01..ce6d65f 100644 --- a/build/status/MV9mZXRjaC9vdXQvZHZfZGF0YS5yZHMuaW5k.yml +++ b/build/status/MV9mZXRjaC9vdXQvZHZfZGF0YS5yZHMuaW5k.yml @@ -1,14 +1,14 @@ version: 0.3.0 name: 1_fetch/out/dv_data.rds.ind type: file -hash: 6e08dc56a248b9a7b52a5255f71f1b16 -time: 2018-11-06 22:59:45 UTC +hash: 637f8fb258fd00a213028166afe4628c +time: 2018-11-07 17:32:43 UTC depends: - 1_fetch/out/sites.rds.ind: d0daf83d52f6bcd684e8aee83f5d44f4 + 1_fetch/out/sites.rds.ind: 6e931c8c143913423fcabedde591610a dates: c87840fa717587715effdfedc24e8fca request_limit: 71db8a6cad03244e6e50f0ad8bc95a65 fixed: df6396a8b26184192ecb1f70ffb6bfc3 code: functions: - fetch_dv_data: 39dac13ad9fec1f57bd6a49eb20ede5e + fetch_dv_data: b940f3d2730cc9abba27e858c6aaf535 From afaa89160d3badf5565e9d85247f5faecac00d56 Mon Sep 17 00:00:00 2001 From: Carr Date: Wed, 7 Nov 2018 11:39:54 -0600 Subject: [PATCH 13/14] build dv stats and colors steps --- 2_process/out/dv_stat_colors.rds.ind | 2 +- 2_process/out/dv_stats.rds.ind | 2 +- ...l9wcm9jZXNzL291dC9kdl9zdGF0X2NvbG9ycy5yZHMuaW5k.yml | 10 +++++----- .../Ml9wcm9jZXNzL291dC9kdl9zdGF0cy5yZHMuaW5k.yml | 10 +++++----- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/2_process/out/dv_stat_colors.rds.ind b/2_process/out/dv_stat_colors.rds.ind index 0d3a028..935f0df 100644 --- a/2_process/out/dv_stat_colors.rds.ind +++ b/2_process/out/dv_stat_colors.rds.ind @@ -1,2 +1,2 @@ -hash: 2fffd2ca1a8032c5709770869fab3bbe +hash: 2d6859e8e3aa3e660fe89227c7ee7e36 diff --git a/2_process/out/dv_stats.rds.ind b/2_process/out/dv_stats.rds.ind index 79fe93f..b128b22 100644 --- a/2_process/out/dv_stats.rds.ind +++ b/2_process/out/dv_stats.rds.ind @@ -1,2 +1,2 @@ -hash: 4167874102333723dd8b0191c52a612d +hash: 0dffd490e10569053d4900287b3783dd diff --git a/build/status/Ml9wcm9jZXNzL291dC9kdl9zdGF0X2NvbG9ycy5yZHMuaW5k.yml b/build/status/Ml9wcm9jZXNzL291dC9kdl9zdGF0X2NvbG9ycy5yZHMuaW5k.yml index f1ea018..59db30e 100644 --- a/build/status/Ml9wcm9jZXNzL291dC9kdl9zdGF0X2NvbG9ycy5yZHMuaW5k.yml +++ b/build/status/Ml9wcm9jZXNzL291dC9kdl9zdGF0X2NvbG9ycy5yZHMuaW5k.yml @@ -1,13 +1,13 @@ version: 0.3.0 name: 2_process/out/dv_stat_colors.rds.ind type: file -hash: 2f58f593049eaf07a44d8a98e3ec94c8 -time: 2018-11-06 23:03:56 UTC +hash: 8c59cd6937ee13c4ef7b3e31ac7521b0 +time: 2018-11-07 17:39:26 UTC depends: - 2_process/out/dv_stats.rds.ind: 1f4c09a9405993b4b102355ad1b3b1c9 - color_palette: ff2cbc2b3f91933a46f1062a4703a9ac + 2_process/out/dv_stats.rds.ind: 720501f733596a258a2475bf416e9dde + sites_color_palette: ff2cbc2b3f91933a46f1062a4703a9ac fixed: 486c649635ebbc0a9b69d6a88fe562b4 code: functions: - process_dv_stat_colors: 68c74329ca3519a344f7d75090ec23a9 + process_dv_stat_colors: 525e74d3cc1839c1a5134d07723cf261 diff --git a/build/status/Ml9wcm9jZXNzL291dC9kdl9zdGF0cy5yZHMuaW5k.yml b/build/status/Ml9wcm9jZXNzL291dC9kdl9zdGF0cy5yZHMuaW5k.yml index 17fc808..d7247e6 100644 --- a/build/status/Ml9wcm9jZXNzL291dC9kdl9zdGF0cy5yZHMuaW5k.yml +++ b/build/status/Ml9wcm9jZXNzL291dC9kdl9zdGF0cy5yZHMuaW5k.yml @@ -1,15 +1,15 @@ version: 0.3.0 name: 2_process/out/dv_stats.rds.ind type: file -hash: 1f4c09a9405993b4b102355ad1b3b1c9 -time: 2018-11-06 23:08:10 UTC +hash: 720501f733596a258a2475bf416e9dde +time: 2018-11-07 17:38:43 UTC depends: - 1_fetch/out/dv_data.rds.ind: 6e08dc56a248b9a7b52a5255f71f1b16 - 2_process/out/site_stats_clean.rds.ind: bcba9e0879d5f487deb4945806eae5e7 + 1_fetch/out/dv_data.rds.ind: 637f8fb258fd00a213028166afe4628c + 2_process/out/site_stats_clean.rds.ind: 9d2874c468177b9b78d90759d47d26e3 dates: c87840fa717587715effdfedc24e8fca percentiles: af632b99e36448cfb51ec4b40b8bfeb9 fixed: d68b248e742da518189e972bd02d3d7b code: functions: - process_dv_stats: a29541eed8fbee524c3f87ba900d3246 + process_dv_stats: cf13f7d546c0c55fa754cf4d6ed299a8 From def11edd2f9fa5002ffc30bba13e9020bd00b40e Mon Sep 17 00:00:00 2001 From: Carr Date: Wed, 7 Nov 2018 11:48:47 -0600 Subject: [PATCH 14/14] remove site_locations_sp stuff and rebuild --- 1_fetch/out/site_locations.rds.ind | 2 +- 2_process.yml | 2 -- 2_process/out/site_locations_sp.rds.ind | 2 -- ...V9mZXRjaC9vdXQvc2l0ZV9sb2NhdGlvbnMucmRzLmluZA.yml | 6 +++--- ...jZXNzL291dC9zaXRlX2xvY2F0aW9uc19zcC5yZHMuaW5k.yml | 12 ------------ 5 files changed, 4 insertions(+), 20 deletions(-) delete mode 100644 2_process/out/site_locations_sp.rds.ind delete mode 100644 build/status/Ml9wcm9jZXNzL291dC9zaXRlX2xvY2F0aW9uc19zcC5yZHMuaW5k.yml diff --git a/1_fetch/out/site_locations.rds.ind b/1_fetch/out/site_locations.rds.ind index aac3b11..5491740 100644 --- a/1_fetch/out/site_locations.rds.ind +++ b/1_fetch/out/site_locations.rds.ind @@ -1,2 +1,2 @@ -hash: bdf819be202ded812280a27a0c45c9de +hash: d25d6b36a48a4c9ffbb95997df49802e diff --git a/2_process.yml b/2_process.yml index 282fe95..e125585 100644 --- a/2_process.yml +++ b/2_process.yml @@ -15,7 +15,6 @@ file_extensions: sources: - 2_process/src/choose_timesteps.R - - 2_process/src/process_site_locations_to_sp.R - 2_process/src/process_site_stats.R - 2_process/src/process_dv_stats.R - 2_process/src/process_dv_stat_colors.R @@ -27,7 +26,6 @@ targets: 2_process: depends: - 2_process/out/timesteps.rds.ind - - 2_process/out/site_locations_sp.rds.ind - 2_process/out/site_stats_clean.rds.ind - 2_process/out/dv_stats.rds.ind - 2_process/out/dv_stat_colors.rds.ind diff --git a/2_process/out/site_locations_sp.rds.ind b/2_process/out/site_locations_sp.rds.ind deleted file mode 100644 index 08ae153..0000000 --- a/2_process/out/site_locations_sp.rds.ind +++ /dev/null @@ -1,2 +0,0 @@ -hash: 0d690807b3803cbb87ba1818fec9ce33 - diff --git a/build/status/MV9mZXRjaC9vdXQvc2l0ZV9sb2NhdGlvbnMucmRzLmluZA.yml b/build/status/MV9mZXRjaC9vdXQvc2l0ZV9sb2NhdGlvbnMucmRzLmluZA.yml index 4de9c0f..c55bd06 100644 --- a/build/status/MV9mZXRjaC9vdXQvc2l0ZV9sb2NhdGlvbnMucmRzLmluZA.yml +++ b/build/status/MV9mZXRjaC9vdXQvc2l0ZV9sb2NhdGlvbnMucmRzLmluZA.yml @@ -1,10 +1,10 @@ version: 0.3.0 name: 1_fetch/out/site_locations.rds.ind type: file -hash: 9f46596111066903b3a9a736d3b4492e -time: 2018-11-06 21:38:59 UTC +hash: ea313a908c5ddf657622f2047f2c43c4 +time: 2018-11-07 17:47:35 UTC depends: - 1_fetch/out/sites.rds.ind: d0daf83d52f6bcd684e8aee83f5d44f4 + 1_fetch/out/sites.rds.ind: 6e931c8c143913423fcabedde591610a fixed: 815cf73829504ede512d11cef2f0b77a code: functions: diff --git a/build/status/Ml9wcm9jZXNzL291dC9zaXRlX2xvY2F0aW9uc19zcC5yZHMuaW5k.yml b/build/status/Ml9wcm9jZXNzL291dC9zaXRlX2xvY2F0aW9uc19zcC5yZHMuaW5k.yml deleted file mode 100644 index 05850e7..0000000 --- a/build/status/Ml9wcm9jZXNzL291dC9zaXRlX2xvY2F0aW9uc19zcC5yZHMuaW5k.yml +++ /dev/null @@ -1,12 +0,0 @@ -version: 0.3.0 -name: 2_process/out/site_locations_sp.rds.ind -type: file -hash: e87ce152422fadaebf04e6087611f51c -time: 2018-11-06 23:11:49 UTC -depends: - 1_fetch/out/site_locations.rds.ind: 9f46596111066903b3a9a736d3b4492e -fixed: 4509b4a06686ca31c1874b601471c247 -code: - functions: - process_site_locations_to_sp: 4042453f628e196259ed3b930455c159 -