DOI-USGS · lindsayplatt · Nov 7, 2018 · Nov 5, 2018 · Nov 6, 2018 · Nov 6, 2018
diff --git a/1_fetch/src/fetch_dv_data.R b/1_fetch/src/fetch_dv_data.R
@@ -11,10 +11,7 @@ fetch_dv_data <- function(ind_file, sites_ind, dates, request_limit){
   req_bks <- seq(1, length(sites), by=request_limit)
   dv_data <- data.frame()
   for(i in req_bks) {
-    last_site <- i+request_limit-1
-    if(i == tail(req_bks, 1) && last_site > length(sites)) {
-      last_site <- length(sites)
-    }
+    last_site <- min(i+request_limit-1, length(sites))
     get_sites <- sites[i:last_site]
     data_i <-
       dataRetrieval::readNWISdata(
@@ -33,7 +30,7 @@ fetch_dv_data <- function(ind_file, sites_ind, dates, request_limit){
     }
 
     dv_data <- rbind(dv_data, data_i)
-    print(paste("Completed", last_site, "of", length(sites)))
+    message(paste("Completed", last_site, "of", length(sites)))
   }
 
   dv_data_unique <- dplyr::distinct(dv_data) # need this to avoid some duplicates

diff --git a/1_fetch/src/fetch_dv_sites.R b/1_fetch/src/fetch_dv_sites.R
@@ -16,8 +16,8 @@ fetch_dv_sites <- function(ind_file, dates){
         endDate = dates$end,
         parameterCd = "00060",
         statCd = "00003") %>%
-      dplyr::distinct() %>%
       dplyr::pull(site_no) %>%
+      unique() %>%
       c(sites)
   }
 

diff --git a/1_fetch/src/fetch_site_stats.R b/1_fetch/src/fetch_site_stats.R
@@ -12,10 +12,7 @@ fetch_site_stats <- function(ind_file, sites_ind, request_limit, percentiles){
   req_bks <- seq(1, length(sites), by=request_limit)
   stat_data <- data.frame()
   for(i in req_bks) {
-    last_site <- i+request_limit-1
-    if(i == tail(req_bks, 1) && last_site > length(sites)) {
-      last_site <- length(sites)
-    }
+    last_site <- min(i+request_limit-1, length(sites))
     get_sites <- sites[i:last_site]
     current_sites <- suppressMessages(
       dataRetrieval::readNWISstat(

diff --git a/2_process.yml b/2_process.yml
@@ -34,8 +34,8 @@ targets:
   # -- config --
   proj_str:
     command: viz_config[[I('projection')]]
-  color_palette:
-    command: viz_config[[I('color_palette')]]
+  sites_color_palette:
+    command: viz_config[[I('sites_color_palette')]]
 
   2_process/out/timesteps.rds.ind:
     command: choose_timesteps(target_name, dates = dates)
@@ -64,7 +64,7 @@ targets:
     command: process_dv_stat_colors(
       ind_file = target_name,
       dv_stats_ind = '2_process/out/dv_stats.rds.ind',
-      color_palette = color_palette)
+      color_palette = sites_color_palette)
   2_process/out/dv_stat_colors.rds:
     command: gd_get('2_process/out/dv_stat_colors.rds.ind')
 

diff --git a/2_process/src/process_dv_stat_colors.R b/2_process/src/process_dv_stat_colors.R
@@ -1,20 +1,19 @@
 #' @title Compute the color for each daily value percentile
-#' 
+#'
 #' @param ind_file character file name where the output should be saved
 #' @param dv_stats_ind indicator file for the data.frame of dv_data
 #' @param color_palette list of colors to use for the color ramp (from viz_config.yml)
 process_dv_stat_colors <- function(ind_file, dv_stats_ind, color_palette){
-  
+
   dv_stats <- readRDS(scipiper::sc_retrieve(dv_stats_ind, remake_file = '2_process.yml'))
   col_fun <- colorRamp(color_palette)
-  
+
   # just removing NA percentiles for now
-  dv_stats_with_color <- dv_stats %>% 
-    filter(!is.na(per)) %>% 
+  dv_stats_with_color <- dv_stats %>%
+    filter(!is.na(per)) %>%
     mutate(color = rgb(col_fun(per), maxColorValue = 255)) # don't know how necessary maxColorValue is
-  
+
   # Write the data file and the indicator file
-  data_file <- scipiper::as_data_file(ind_file)
-  saveRDS(dv_stats_with_color, data_file)
-  scipiper::gd_put(ind_file, data_file)
+  saveRDS(dv_stats_with_color, scipiper::as_data_file(ind_file))
+  scipiper::gd_put(ind_file)
 }
diff --git a/2_process/src/process_dv_stats.R b/2_process/src/process_dv_stats.R
@@ -1,41 +1,43 @@
 #' @title Calculate the stat category for each gage's discharge value
-#' 
+#'
 #' @param ind_file character file name where the output should be saved
 #' @param dv_data_ind indicator file for the data.frame of dv_data
 #' @param site_stats_clean_ind indicator file for the data.frame of dv stats for each site
 #' @param dates object from viz_config.yml that specifies dates as string
-#' @param percentiles character vector of the types of stats to include, i.e. `c("10", "75")` 
+#' @param percentiles character vector of the types of stats to include, i.e. `c("10", "75")`
 #' will return the 10th and 75th percentiles (from viz_config.yml)
 process_dv_stats <- function(ind_file, dv_data_ind, site_stats_clean_ind, dates, percentiles){
-  
+
   dv_data <- readRDS(scipiper::sc_retrieve(dv_data_ind, remake_file = '1_fetch.yml'))
   site_stats <- readRDS(scipiper::sc_retrieve(site_stats_clean_ind, remake_file = '2_process.yml'))
-  
+
   # breakdown date into month & day pairs
-  dv_data_md <- dv_data %>% 
+  dv_data_md <- dv_data %>%
     dplyr::mutate(month_nu = as.numeric(format(dateTime, "%m")),
                   day_nu = as.numeric(format(dateTime, "%d")))
-  
+
   # merge stats with the dv data
   # merge still results in extra rows - 24 extra to be exact
   dv_with_stats <- left_join(dv_data_md, site_stats, by = c("site_no", "month_nu", "day_nu"))
-  
+
   stat_colnames <- sprintf("p%s_va", percentiles)
   stat_perc <- as.numeric(percentiles)/100
-
-  int_per <- function(df){
+
+  interpolate_percentile <- function(df){
+    # This function takes the current daily value and interpolates it's percentile based
+    # on the percentiles for the matching site and day of the year
     df <- select(df, "dv_val", one_of(stat_colnames))
     out <- rep(NA, nrow(df))
-    
+
     for (i in 1:length(out)){
       dv_val <- df$dv_val[i]
-      
-      df_i <- slice(df, i) %>% 
-        select(-dv_val) %>% 
-        tidyr::gather(stat_name, stat_value) %>% 
+
+      df_i <- slice(df, i) %>%
+        select(-dv_val) %>%
+        tidyr::gather(stat_name, stat_value) %>%
         mutate(stat_value = as.numeric(stat_value),
                stat_type = as.numeric(gsub("p|_va", "", stat_name))/100)
-      
+
       y <- df_i$stat_type
       x <- df_i$stat_value
       nas <- is.na(x)
@@ -52,17 +54,17 @@ process_dv_stats <- function(ind_file, dv_data_ind, site_stats_clean_ind, dates,
       }
     }
     return(out)
-    
+
   }
-  
-  dv_stats <- dv_with_stats %>% 
-    mutate(dv_val = Flow) %>% 
-    filter_(sprintf("!is.na(%s)", stat_colnames[1]), 
-            sprintf("!is.na(%s)", tail(stat_colnames,1)), 
+
+  dv_stats <- dv_with_stats %>%
+    mutate(dv_val = Flow) %>%
+    filter_(sprintf("!is.na(%s)", stat_colnames[1]),
+            sprintf("!is.na(%s)", tail(stat_colnames,1)),
             sprintf("!is.na(%s)", "dv_val")) %>%
-    mutate(per = int_per(.)) %>% 
+    mutate(per = interpolate_percentile(.)) %>%
     select(site_no, dateTime, dv_val, per, p50_va)
-  
+
   # Write the data file and the indicator file
   data_file <- scipiper::as_data_file(ind_file)
   saveRDS(dv_stats, data_file)

diff --git a/viz_config.yml b/viz_config.yml
@@ -16,7 +16,7 @@ percentiles: ["05","10","20","25","50","75","80","90","95"]
 
 # styling
 background_col: "gray90"
-color_palette: ['#ca0020','#f4a582','#efefef','#efefef','#92c5de','#034064']
+sites_color_palette: ['#ca0020','#f4a582','#efefef','#efefef','#92c5de','#034064']
 gage_line_col: "#3c829c"
 gage_norm_col: "#4BA3C3"
 legend_text: