Merge pull request #61 from miguelcleon/master

resolves issues #16 and #17.
DOI-USGS · Jun 15, 2015 · c8b4776 · c8b4776
2 parents 3f27dab + 13b3f1c
commit c8b4776
Show file tree

Hide file tree

Showing 14 changed files with 110 additions and 78 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -85,10 +85,20 @@ importFrom(car,durbinWatsonTest)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,group_by_)
 importFrom(dplyr,summarise)
+importFrom(ggplot2,aes)
+importFrom(ggplot2,aes_string)
+importFrom(ggplot2,geom_histogram)
 importFrom(ggplot2,geom_line)
+importFrom(ggplot2,geom_point)
 importFrom(ggplot2,ggplot)
+importFrom(ggplot2,scale_colour_manual)
+importFrom(ggplot2,scale_linetype_manual)
+importFrom(ggplot2,scale_shape_manual)
 importFrom(ggplot2,theme_bw)
+importFrom(ggplot2,xlab)
 importFrom(ggplot2,ylab)
+importFrom(lubridate,is.Date)
+importFrom(lubridate,is.POSIXt)
 importFrom(lubridate,tz)
 importFrom(methods,setClass)
 importFrom(methods,setMethod)

diff --git a/R/diagnostics.R b/R/diagnostics.R
@@ -17,8 +17,10 @@
 #' @param tol time step tolerance, the accepatable amount of difference between time steps to still consider them regular.
 #' @return TRUE if the time steps in \code{dates} are identical to within the 
 #'   tolerance set by {tol}, FALSE otherwise.
+#' @importFrom ggplot2 ggplot aes geom_histogram xlab
 #' @export
 isTimestepRegular <- function(dates, hist=TRUE, tol=.Machine$double.eps^0.5, handler=stop) {
+  TimeInterval  <- '.ggplot.var'
   time_diffs <- diff(dates)
   is_irregular <- (length(unique(time_diffs)) > 1) & (diff(range(time_diffs)) > tol)
   if(is_irregular) {

diff --git a/R/loadComp.R b/R/loadComp.R
@@ -397,7 +397,7 @@ predictSolute.loadComp <- function(
 #'   points are needed to make this estimation with precision.
 #' @export
 #' @family estimateMSE
-estimateMSE.loadComp <- function(load.model, n.iter=100, method="parametric", rho) {
+estimateMSE.loadComp <- function(load.model, n.iter=100, method="parametric", rho, ...) {
 
   # Pull out the resid.model for easier access
   resid.model <- load.model@fit@resid.model
@@ -424,9 +424,11 @@ estimateMSE.loadComp <- function(load.model, n.iter=100, method="parametric", rh
   # Pre-calculate the conversion factors to make preds and obs in "conc" or "flux" format. Calls to formatPreds are expensive, so do them infrequently.
   is_flux_format <- resid.model@pred.format == "flux"
   if(is_flux_format) {
-    conc_factor <- formatPreds(rep(1, nrow(resid.model@data)), from=resid.model@pred.format, to="conc", newdata=resid.model@data, metadata=resid.model@metadata, attach.units=FALSE)
+    conc_factor <- formatPreds(rep(1, nrow(resid.model@data)), from.format=resid.model@pred.format, 
+                               to.format="conc", newdata=resid.model@data, metadata=resid.model@metadata, attach.units=FALSE)
   } else {
-    flux_factor <- formatPreds(rep(1, nrow(resid.model@data)), from=resid.model@pred.format, to="flux", newdata=resid.model@data, metadata=resid.model@metadata, attach.units=FALSE)
+    flux_factor <- formatPreds(rep(1, nrow(resid.model@data)), from.format=resid.model@pred.format, 
+                               to.format="flux", newdata=resid.model@data, metadata=resid.model@metadata, attach.units=FALSE)
   }
 
   # Loop, each time resampling coefficients and calculating the leave-n-out 

diff --git a/R/loadInterp.R b/R/loadInterp.R
@@ -339,7 +339,7 @@ predictSolute.loadInterp <- function(
 #'   observations (TRUE) or without replacement (FALSE)?
 #' @export
 #' @family estimateMSE
-estimateMSE.loadInterp <- function(load.model, n.out, n.iter=floor(nrow(getFittingData(load.model))/n.out), replace) {
+estimateMSE.loadInterp <- function(load.model, n.out, n.iter=floor(nrow(getFittingData(load.model))/n.out), replace, ...) {
 
   # Validate args
   replace <- match.arg.loadflex(replace, c(TRUE, FALSE))
@@ -365,11 +365,13 @@ estimateMSE.loadInterp <- function(load.model, n.out, n.iter=floor(nrow(getFitti
   is_flux_format <- load.model@pred.format == "flux"
   if(is_flux_format) {
     conc_factor <- tryCatch(
-      formatPreds(rep(1, nrow(load.model@data)), from=load.model@pred.format, to="conc", newdata=load.model@data, metadata=load.model@metadata, attach.units=FALSE),
+      formatPreds(rep(1, nrow(load.model@data)), from.format=load.model@pred.format, 
+                  to.format="conc", newdata=load.model@data, metadata=load.model@metadata, attach.units=FALSE),
       error=function(e) rep(NA, nrow(load.model@data)))
   } else {
     flux_factor <- tryCatch(
-      formatPreds(rep(1, nrow(load.model@data)), from=load.model@pred.format, to="flux", newdata=load.model@data, metadata=load.model@metadata, attach.units=FALSE),
+      formatPreds(rep(1, nrow(load.model@data)), from.format=load.model@pred.format, 
+                  to.format="flux", newdata=load.model@data, metadata=load.model@metadata, attach.units=FALSE),
       error=function(e) rep(NA, nrow(load.model@data)))
   }
 

diff --git a/R/loadLm.R b/R/loadLm.R
@@ -88,7 +88,6 @@ loadLm <- function(formula, pred.format=c("flux","conc"),
                    data, metadata, fitting_function=NULL, 
                    y.trans.function=NULL, retrans.function=exp,
                    store=c("data","fitting.function"), ylog) {
-
   # Validate arguments
   pred.format <- match.arg.loadflex(pred.format)
   store <- match.arg.loadflex(store, choices=c("data","fitting.function"), several.ok=TRUE)
@@ -361,7 +360,6 @@ resampleCoefficients.lm <- function(fit) {
 #' @family simulateSolute
 simulateSolute.loadLm <- function(load.model, flux.or.conc=c("flux","conc"), newdata, 
                                   method=c("parametric", "non-parametric"), from.interval=c("confidence", "prediction"), rho, ...) {
-
   # Validate arguments
   flux.or.conc <- match.arg.loadflex(flux.or.conc)
   from.interval <- match.arg.loadflex(from.interval, c("confidence","prediction"))
@@ -404,6 +402,7 @@ simulateSolute.loadLm <- function(load.model, flux.or.conc=c("flux","conc"), new
     # residuals to the original; it seems not to.
     noise <- as.numeric(arima.sim(model=list(order=c(1,0,0), ar=rho), n=nrow(newdata)))
     # Normalize the noise to have the same sd as the original residuals. Is that reasonable?
+    s.hat <- summary(load.model@fit)$sigma
     noise <- noise * s.hat/sd(noise)
     # Add the noise back into the predictions, first converting back to linear space
     fitting.preds <- exp(log(fitting.preds) + noise)

diff --git a/R/loadReg.R b/R/loadReg.R
@@ -20,23 +20,23 @@ checkRloadestStatus <- function() {
 #' #' 
 #' @inheritParams getMetadata 
 #' @importFrom rloadest loadReg
-#' @param fit a loadReg object
+#' @param load.model a loadReg object
 #' @export
 #' @return Object of class "metadata" with slots modified according to the
 #'   metadata contained in load.model
 #' @family getMetadata
-getMetadata.loadReg <- function(fit) {
+getMetadata.loadReg <- function(load.model) {
 
   metadata(
-    constituent=fit$constituent,
-    flow=fit$flow,
+    constituent=load.model$constituent,
+    flow=load.model$flow,
     load.rate="",
-    dates=fit$dates,
-    conc.units=fit$conc.units, 
-    flow.units=fit$flow.units, 
-    load.units=fit$load.units, 
-    load.rate.units=paste0(fit$load.units,"/day"),
-    station=fit$station)
+    dates=load.model$dates,
+    conc.units=load.model$conc.units, 
+    flow.units=load.model$flow.units, 
+    load.units=load.model$load.units, 
+    load.rate.units=paste0(load.model$load.units,"/day"),
+    station=load.model$station)
 
 }
 

diff --git a/R/loadReg2.R b/R/loadReg2.R
@@ -393,7 +393,6 @@ predictSolute.loadReg2 <- function(
 #' @family simulateSolute
 simulateSolute.loadReg2 <- function(load.model, flux.or.conc=c("flux","conc"), newdata, 
                                     method=c("parametric", "non-parametric"), from.interval=c("confidence", "prediction"), rho, ...) {
-
   # Validate arguments
   flux.or.conc <- match.arg.loadflex(flux.or.conc)
   from.interval <- match.arg.loadflex(from.interval, c("confidence","prediction"))
@@ -439,7 +438,9 @@ simulateSolute.loadReg2 <- function(load.model, flux.or.conc=c("flux","conc"), n
     # residuals to the original; it seems not to.
     noise <- as.numeric(arima.sim(model=list(order=c(1,0,0), ar=rho), n=nrow(newdata)))
     # Normalize the noise to have the same sd as the original residuals. Is that reasonable?
-    noise <- noise * s.hat/sd(noise)
+    error <- "print.function"
+    error("need to calculate s.hat")
+    #noise <- noise * s.hat/sd(noise)
     # Add the noise back into the predictions, first converting back to linear space
     fitting.preds <- exp(log(fitting.preds) + noise)
   }

diff --git a/R/plots.R b/R/plots.R
@@ -57,12 +57,11 @@ plotObservationsCM <- function(...) {
 #'   regression results
 #' @param xrange, a user provded range for the xvalues (dates)
 #' @param verbose flag for to print for debugging
-#' 
-#' @importFrom ggplot2 ggplot geom_line ylab theme_bw
+#' @importFrom ggplot2 ggplot aes aes_string xlab geom_point geom_line ylab theme_bw scale_colour_manual scale_linetype_manual scale_shape_manual
 plotCM <- function(flux.or.conc, show.observations, load.model, finalloads, observations=NULL, 
                    composite=TRUE, linear.interpolation=FALSE, regression=TRUE,  xrange="none", 
                    dateField="Date", verbose=FALSE) {
-
+  value <- yday<- variable <- '.ggplot.var'
   # Check to make sure the argument is one of the two currently accepted choices
   match.arg.loadflex(flux.or.conc)
 
@@ -102,7 +101,7 @@ plotCM <- function(flux.or.conc, show.observations, load.model, finalloads, obse
   # adding to plotsols, we'll actually create the data.frame right here - it
   # will be a data.frame with only one column, called DATE.
 
-
+  DATE <- "plotsols.var"
   plotsols <- setNames(finalloads[date_col], "DATE")
   if(verbose){
     print("plotsols ----")
@@ -270,19 +269,20 @@ plotLoadResidualsCM <- function(...){
 #'   
 #' @param load.model use this to pull names of date column and constituent name
 #' @param observations these are the observations to plot the residuals for
+#' @param residuals the observation residuals 
 #' @param dateField name of the date field in observations
 #' @param verbose debugging flag
 #' @param day.of.year flag if true will plot residuals by the day of year to,
-#'   can help user look for seasonality
-plotResidualsCM <- function(type, load.model, observations, dateField="Date", verbose=FALSE,  xObservations=FALSE, xFlow=FALSE, day.of.year=FALSE){
-
+#' @importFrom ggplot2 ggplot aes_string geom_point  xlab
+plotResidualsCM <- function(type, load.model, observations,resids, dateField="Date", verbose=FALSE,  xObservations=FALSE, xFlow=FALSE, day.of.year=FALSE){
+  yday<- '.ggplot.var'
   dateName <- load.model$dates
   soluteName <- load.model$constituent
   flowName <- load.model$flow
   date_col <- names(which(sapply(c(dateField,  "Date",  dateName,"Period"), function(field) { !is.null(observations[[field]]) } ))[1])
   estName <- ""
   if(type=="conc"){
-    resids <- predictSoluteFromRegression("conc", load.model, observations)
+    #resids <- predictSoluteFromRegression("conc", load.model, observations)
     names(resids)[which(names(resids)==paste0(date_col))] <- "Date"
     names(resids)[which(names(resids)=="Conc")] <- "Conc_Est"
     resids$Conc_Obs <- observations[match(resids$Date, observations[[date_col]]),paste0(soluteName)]
@@ -344,10 +344,12 @@ plotResidualsCM <- function(type, load.model, observations, dateField="Date", ve
 #' @param plotsols A data.frame with a DATE column
 #' @return same data.frame but with DATE field converted to POSIXct or lt
 #' @keywords internal
+#' @importFrom lubridate is.POSIXt is.Date
 transformDates <- function(plotsols){
   # Convert to POSIXct if feasible. This will require some thought to figure out
   # which sorts of Period formats can be generated by predLoad and predConc. In
   # the meantime, here we address just a couple of the possibilities.
+  DATE<- '.transform.var'
   plotsols <- transform(
     plotsols, 
     DATE={

diff --git a/R/predictors.R b/R/predictors.R
@@ -57,55 +57,56 @@
 #'   2013 Hydrology course (http://user.engineering.uiowa.edu/~flood)
 #' @family predictors
 #' @keywords internal
+#' @importFrom lubridate is.POSIXt is.Date
 getPred_baseflow <- function(data, metadata, method=c("hysep","1p digital filter","2p digital filter"), 
                              da, select, alpha, BFImax, ...) {
-
+  #hysep<- 'method.var'
   method <- match.arg(method)
   Flows <- getCol(metadata, data, "flow", FALSE)
   Dates <- getCol(metadata, data, "date", FALSE)
 
   switch(
     method,
-    "hysep"={
-      # I've made hysep unavailable, at least for now, because it requires an
-      # additional USGS-R dependency that may not be used much. Can we provide a
-      # helper function to help the user prepare/process data for/from
-      # DVstats::hysep() rather than wrapping that function here?
-
-      # This method wraps the DVstats::hysep() approach for separating baseflow and stormflow.
-      # Advantage: already implemented. Disadvantage: requires that dates are in the
-      # Date format, effectively limiting the resolution to <=1 observation per day.
-
-      # roxygen2 documentation: 
-      #   method=="hysep": Uses the HYSEP model, and specifically the hysep function 
-      #   from the USGS package DVstats. This package must be installed before the 
-      #   method can be used. To install from the USGS GitHub page, call 
-      #   \code{library(devtools); install_github("DVstats",user="USGS-R")}. When using
-      #   the hysep() method, the user will likely want to choose among methods 
-      #   available within hysep(), which are specified by setting the \code{select} 
-      #   argument; see the parameter description for \code{select} for details. This 
-      #   method requires specification of the \code{da} and \code{select} parameters. 
-      #   The \code{hysep} method further requires that dates can be specified as dates
-      #   without hours, minutes, or seconds, i.e., that the time step is a multiple of
-      #   1 day.  
-      #
-      #   importFrom DVstats hysep
-
-      # install_github("USGS-R/DVstats")
-      # library(DVstats)
-
-      if(is.Date(Dates)) {
-        DateDates <- Dates
-      } else if(is.POSIXt(Dates)) {
-        warning("hysep() requires dates in Date format; coercing to Date")
-        DateDates <- as.Date(Dates, tz=Sys.timezone() )
-      } else if(is.character(Dates)) {
-        DateDates <- as.Date()
-      }
-
-      # ChoptankFlow$BaseQ <- with(ChoptankFlow, 
-      baseflow <- hysep(Flows, Dates, da=da, select=select, ...)$BaseQ
-    },
+#     "hysep"={
+#       # I've made hysep unavailable, at least for now, because it requires an
+#       # additional USGS-R dependency that may not be used much. Can we provide a
+#       # helper function to help the user prepare/process data for/from
+#       # DVstats::hysep() rather than wrapping that function here?
+#       
+#       # This method wraps the DVstats::hysep() approach for separating baseflow and stormflow.
+#       # Advantage: already implemented. Disadvantage: requires that dates are in the
+#       # Date format, effectively limiting the resolution to <=1 observation per day.
+#       
+#       # roxygen2 documentation: 
+#       #   method=="hysep": Uses the HYSEP model, and specifically the hysep function 
+#       #   from the USGS package DVstats. This package must be installed before the 
+#       #   method can be used. To install from the USGS GitHub page, call 
+#       #   \code{library(devtools); install_github("DVstats",user="USGS-R")}. When using
+#       #   the hysep() method, the user will likely want to choose among methods 
+#       #   available within hysep(), which are specified by setting the \code{select} 
+#       #   argument; see the parameter description for \code{select} for details. This 
+#       #   method requires specification of the \code{da} and \code{select} parameters. 
+#       #   The \code{hysep} method further requires that dates can be specified as dates
+#       #   without hours, minutes, or seconds, i.e., that the time step is a multiple of
+#       #   1 day.  
+#       #
+#       #   importFrom DVstats hysep
+#       
+#       # install_github("USGS-R/DVstats")
+#       # library(DVstats)
+#       
+#       if(is.Date(Dates)) {
+#         DateDates <- Dates
+#       } else if(is.POSIXt(Dates)) {
+#         warning("hysep() requires dates in Date format; coercing to Date")
+#         DateDates <- as.Date(Dates, tz=Sys.timezone() )
+#       } else if(is.character(Dates)) {
+#         DateDates <- as.Date()
+#       }
+#       
+#       # ChoptankFlow$BaseQ <- with(ChoptankFlow, 
+#       baseflow <- hysep(Flows, Dates, da=da, select=select, ...)$BaseQ
+#     },
     "1p digital filter"={
       # Implements the digital filter described at
       # http://user.engineering.uiowa.edu/~flood/handouts/HO-L17-Baseflow-Separation.pdf

diff --git a/R/unit.conversions.R b/R/unit.conversions.R
@@ -46,6 +46,7 @@ NULL
 #' @keywords data units internal
 generateUnitsData <- function() {
   # valid.metadata.units
+
   valid.metadata.units <- rbind(
     data.frame(
       dimension="volume",
@@ -126,6 +127,8 @@ generateUnitsData <- function() {
     ))
   ), c("numerator", "denominator", "value"))
   # Append the reverse conversions
+  numerator<-denominator<-value<-".transform.var"
+
   unit.conversions <- rbind(
     unit.conversions,
     transform(unit.conversions, 
@@ -164,14 +167,15 @@ generateUnitsData <- function() {
 #' validMetadataUnits("g", unit.type="flow.units") # FALSE
 validMetadataUnits <- function(unitstr, unit.type=c("ANY","flow.units","conc.units","load.units","load.rate.units")) {
   unit.type <- match.arg(unit.type)
-  
+
   # Parse the unit string into numerator and denominator strings
   unitpieces <- separate_units(unitbundle(unitstr))
   numerator <- get_units(unitbundle(unitpieces[which(unitpieces$Power > 0),]))
   denominator <- get_units(1/unitbundle(unitpieces[which(unitpieces$Power < 0),]))
 
   # A useful helper function: returns TRUE iif oneunit is present in valid.metadata.units
   # and has a dimension that's in dims
+  unit <- "subset.var"
   hasDim <- function(oneunit, dims) {
     unit_row <- subset(valid.metadata.units, unit==oneunit)
     if(nrow(unit_row) != 1) {
@@ -182,7 +186,7 @@ validMetadataUnits <- function(unitstr, unit.type=c("ANY","flow.units","conc.uni
       return(TRUE)
     }
   }
-  
+
   # The numerator (and denominator if it exists) should have specific dimensions
   # for each units type. Check.
   switch(
@@ -217,6 +221,7 @@ validMetadataUnits <- function(unitstr, unit.type=c("ANY","flow.units","conc.uni
 #' loadflex:::translateFreeformToUnitted("mg L^-1") # "mg L^-1"
 translateFreeformToUnitted <- function(freeform.units, attach.units=FALSE) {
   # Quick escape if our work is already done.
+
   if(validMetadataUnits(freeform.units, "ANY")) {
     return(if(attach.units) unitbundle(freeform.units) else get_units(unitbundle(freeform.units)))
   }
@@ -227,6 +232,7 @@ translateFreeformToUnitted <- function(freeform.units, attach.units=FALSE) {
   unitslist <- strsplit(units, "/")
   # for each bundle of units, find each element in the dictionary and translate if needed
   #data(freeform.unit.translations)
+  old <- "tidyunit.var"
   units <- lapply(unitslist, function(units) {
     units <- lapply(units, function(unit) {
       tidyunit <- gsub("^ +| +$", "", unit) # strip surrounding spaces
@@ -273,6 +279,7 @@ flowconcToFluxConversion <- function(flow.units, conc.units, load.rate.units, at
   ## Code inspired by rloadest::loadConvFactor code by DLLorenz and ldecicco
   ## Makes heavy use of unitted package by A Appling
 
+
   # Translate units - goes quickly if they're good already
   flow.units <- translateFreeformToUnitted(flow.units, TRUE)
   conc.units <- translateFreeformToUnitted(conc.units, TRUE)
@@ -292,6 +299,7 @@ flowconcToFluxConversion <- function(flow.units, conc.units, load.rate.units, at
   # Identify the right components of the multiplier. Components that are
   # unavailable will be omitted from the multipliers data.frame
   #data(unit.conversions)
+  numerator<-denominator<- "rbind.var"
   multipliers <- rbind(
     # Convert to mg/day
     numer_to_mg = subset(unit.conversions, numerator == "mg" & denominator %in% fcu_numerstrs),