piecewise trends and descriptions

nicholasjclark · Nov 21, 2023 · ff37826 · ff37826
1 parent 89cdaef
commit ff37826
Show file tree

Hide file tree

Showing 16 changed files with 391 additions and 86 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: mvgam
 Title: Multivariate (Dynamic) Generalized Additive Models
-Version: 1.0.7
+Version: 1.0.8
 Date: 2023-11-01
 Authors@R:
     person("Nicholas J", "Clark", , "[email protected]", role = c("aut", "cre"),

diff --git a/R/forecast.mvgam.R b/R/forecast.mvgam.R
@@ -744,6 +744,14 @@ forecast_draws = function(object,
                             time = data_test$time,
                             cap = suppressWarnings(linkfun(data_test$cap,
                                                            link = family$link)))
+
+          if(any(is.na(cap$cap)) | any(is.infinite(cap$cap))){
+            stop(paste0('Missing or infinite values found for some "cap" terms\n',
+                        'after transforming to the ',
+                        family$link, ' link scale'),
+                 call. = FALSE)
+          }
+
         } else {
           cap <- NULL
         }

diff --git a/R/get_mvgam_priors.R b/R/get_mvgam_priors.R
@@ -732,27 +732,27 @@ get_mvgam_priors = function(formula,
 
     if(trend_model %in% c('PWlinear', 'PWlogistic')){
       # Need to fix this as a next priority
-      trend_df <- NULL
-      # trend_df <- data.frame(param_name = c('vector[n_series] k_trend;',
-      #                                       'vector[n_series] m_trend;'),
-      #                        param_length = length(unique(data_train$series)),
-      #                        param_info = c('base trend growth rates',
-      #                                       'trend offset parameters'),
-      #                        prior = c('k_trend ~ std_normal();',
-      #                                  'm_trend ~ student_t(3, 0, 2.5);'),
-      #                        example_change = c(paste0(
-      #                          'k ~ normal(',
-      #                          round(runif(min = -1, max = 1, n = 1), 2),
-      #                          ', ',
-      #                          round(runif(min = 0.1, max = 1, n = 1), 2),
-      #                          ');'),
-      #                          paste0(
-      #                            'm ~ normal(',
-      #                            round(runif(min = -1, max = 1, n = 1), 2),
-      #                            ', ',
-      #                            round(runif(min = 0.1, max = 1, n = 1), 2),
-      #                            ');')))
-      #
+      # trend_df <- NULL
+      trend_df <- data.frame(param_name = c('vector[n_series] k_trend;',
+                                            'vector[n_series] m_trend;'),
+                             param_length = length(unique(data_train$series)),
+                             param_info = c('base trend growth rates',
+                                            'trend offset parameters'),
+                             prior = c('k_trend ~ std_normal();',
+                                       'm_trend ~ student_t(3, 0, 2.5);'),
+                             example_change = c(paste0(
+                               'k ~ normal(',
+                               round(runif(min = -1, max = 1, n = 1), 2),
+                               ', ',
+                               round(runif(min = 0.1, max = 1, n = 1), 2),
+                               ');'),
+                               paste0(
+                                 'm ~ normal(',
+                                 round(runif(min = -1, max = 1, n = 1), 2),
+                                 ', ',
+                                 round(runif(min = 0.1, max = 1, n = 1), 2),
+                                 ');')))
+
 
     }
     if(trend_model == 'GP'){

diff --git a/R/index-mvgam.R b/R/index-mvgam.R
@@ -138,7 +138,7 @@ variables.mvgam = function(x, ...){
                        'ar1', 'ar2',
                        'ar3', 'A',
                        'Sigma', 'error', 'theta',
-                       'k_trend', 'delta', 'm_trend'), collapse = '|'),
+                       'k_trend', 'delta_trend', 'm_trend'), collapse = '|'),
                parnames) &
          !grepl('sigma_obs', parnames, fixed = TRUE) &
          !grepl('sigma_raw', parnames, fixed = TRUE))){
@@ -147,7 +147,7 @@ variables.mvgam = function(x, ...){
                                 'ar1', 'ar2',
                                 'ar3', 'A',
                                 'Sigma', 'error', 'theta',
-                                'k_trend', 'delta', 'm_trend'), collapse = '|'),
+                                'k_trend', 'delta_trend', 'm_trend'), collapse = '|'),
                         parnames) &
       !grepl('sigma_obs', parnames, fixed = TRUE) &
       !grepl('sigma_raw', parnames, fixed = TRUE)

diff --git a/R/mvgam.R b/R/mvgam.R
@@ -78,10 +78,11 @@
 #'   \item `'AR2'` or `AR(p = 2)`
 #'   \item `'AR3'` or `AR(p = 3)`
 #'   \item `'VAR1'`  or `VAR()`(only available in \code{Stan})
+#'   \item `'PWlogistic`, `'PWlinear` or `PW()` (only available in \code{Stan})
 #'   \item `'GP'` (Gaussian Process with squared exponential kernel;
 #'only available in \code{Stan})}
 #'
-#'For all types apart from `'GP'`, moving average and/or correlated
+#'For all types apart from `'GP'` and `PW()`, moving average and/or correlated
 #'process error terms can also be estimated (for example, `RW(cor = TRUE)` will set up a
 #'multivariate Random Walk if `n_series > 1`). See [mvgam_trends] for more details
 #'@param trend_map Optional `data.frame` specifying which series should depend on which latent
@@ -170,7 +171,12 @@
 #'\cr
 #'\cr
 #'*Formula syntax*: Details of the formula syntax used by \pkg{mvgam} can be found in
-#'\code{\link[mgcv]{formula.gam}}.
+#'\code{\link[mgcv]{formula.gam}}. Note that it is possible to supply an empty formula where
+#'there are no predictors or intercepts in the observation model (i.e. `y ~ 0` or `y ~ -1`).
+#'In this case, an intercept-only observation model will be set up but the intercept coefficient
+#'will be fixed at zero. This can be handy if you wish to fit pure State-Space models where
+#'the variation in the dynamic trend controls the average expectation, and/or where intercepts
+#'are non-identifiable (as in piecewise trends, see examples below)
 #'\cr
 #'\cr
 #'*Families and link functions*: Details of families supported by \pkg{mvgam} can be found in
@@ -354,20 +360,20 @@
 #'                        trend = c(1,1,2))
 #'
 #' # Fit the model using AR1 trends
-#' mod1 <- mvgam(y ~ s(season, bs = 'cc'),
+#' mod <- mvgam(y ~ s(season, bs = 'cc'),
 #'               trend_map = trend_map,
 #'               trend_model = 'AR1',
 #'               data = mod_data,
 #'               return_model_data = TRUE)
 #'
 #' # The mapping matrix is now supplied as data to the model in the 'Z' element
 #' mod1$model_data$Z
-#' code(mod1)
+#' code(mod)
 #'
 #' # The first two series share an identical latent trend; the third is different
-#' plot(mod1, type = 'trend', series = 1)
-#' plot(mod1, type = 'trend', series = 2)
-#' plot(mod1, type = 'trend', series = 3)
+#' plot(mod, type = 'trend', series = 1)
+#' plot(mod, type = 'trend', series = 2)
+#' plot(mod, type = 'trend', series = 3)
 #'
 #' # Example of how to use dynamic coefficients
 #' # Simulate a time-varying coefficient for the effect of temperature
@@ -418,7 +424,7 @@
 #'
 #' # Fit a model that includes the offset in the linear predictor as well as
 #' # hierarchical seasonal smooths
-#' mod1 <- mvgam(formula = y ~ offset(offset) +
+#' mod <- mvgam(formula = y ~ offset(offset) +
 #'               s(series, bs = 're') +
 #'               s(season, bs = 'cc') +
 #'               s(season, by = series, m = 1, k = 5),
@@ -428,25 +434,25 @@
 #'
 #' # Inspect the model file to see the modification to the linear predictor
 #' # (eta)
-#' mod1$model_file
+#' mod$model_file
 #'
 #' # Forecasts for the first two series will differ in magnitude
 #' layout(matrix(1:2, ncol = 2))
-#' plot(mod1, type = 'forecast', series = 1, newdata = dat$data_test,
+#' plot(mod, type = 'forecast', series = 1, newdata = dat$data_test,
 #'      ylim = c(0, 75))
-#' plot(mod1, type = 'forecast', series = 2, newdata = dat$data_test,
+#' plot(mod, type = 'forecast', series = 2, newdata = dat$data_test,
 #'      ylim = c(0, 75))
 #' layout(1)
 #'
 #' # Changing the offset for the testing data should lead to changes in
 #' # the forecast
 #' dat$data_test$offset <- dat$data_test$offset - 2
-#' plot(mod1, 'forecast', newdata = dat$data_test)
+#' plot(mod, 'forecast', newdata = dat$data_test)
 #'
 #' # Relative Risks can be computed by fixing the offset to the same value
 #' # for each series
 #' dat$data_test$offset <- rep(1, NROW(dat$data_test))
-#' preds_rr <- predict(mod1, type = 'link', newdata = dat$data_test)
+#' preds_rr <- predict(mod, type = 'link', newdata = dat$data_test)
 #' series1_inds <- which(dat$data_test$series == 'series_1')
 #' series2_inds <- which(dat$data_test$series == 'series_2')
 #'
@@ -467,6 +473,66 @@
 #'  }
 #' layout(1)
 #'
+#' # Example of logistic growth with possible changepoints
+#' # Simple logistic growth model
+#' dNt = function(r, N, k){
+#'    r * N * (k - N)
+#' }
+#'
+#' # Iterate growth through time
+#' Nt = function(r, N, t, k) {
+#' for (i in 1:(t - 1)) {
+#'
+#'  # population at next time step is current population + growth,
+#'  # but we introduce several 'shocks' as changepoints
+#'  if(i %in% c(5, 15, 25, 41, 45, 60, 80)){
+#'    N[i + 1] <- max(1, N[i] + dNt(r + runif(1, -0.1, 0.1),
+#'                                  N[i], k))
+#'    } else {
+#'    N[i + 1] <- max(1, N[i] + dNt(r, N[i], k))
+#'    }
+#'   }
+#'  N
+#' }
+#'
+#' # Simulate expected values
+#' set.seed(11)
+#' expected <- Nt(0.004, 2, 100, 30)
+#' plot(expected, xlab = 'Time')
+#'
+#' # Take Poisson draws
+#' y <- rpois(100, expected)
+#' plot(y, xlab = 'Time')
+#'
+#' # Assemble data into dataframe and model. We set a
+#' # fixed carrying capacity of 35 for this example, but note that
+#' # this value is not required to be fixed at each timepoint
+#' mod_data <- data.frame(y = y,
+#'                        time = 1:100,
+#'                        cap = 35,
+#'                        series = as.factor('series_1'))
+#' plot_mvgam_series(data = mod_data)
+#'
+#' # The intercept is nonidentifiable when using piecewise
+#' # trends because the trend functions have their own offset
+#' # parameters 'm'; it is recommended to always drop intercepts
+#' # when using these trend models
+#' mod <- mvgam(y ~ 0,
+#'              trend_model = PW(growth = 'logistic'),
+#'              family = poisson(),
+#'              data = mod_data)
+#' summary(mod)
+#'
+#' # Plot the posterior hindcast
+#' plot(mod, type = 'forecast')
+#'
+#' # View the changepoints with ggplot2 utilities
+#' library(ggplot2)
+#' mcmc_plot(mod, variable = 'delta_trend',
+#'           regex = TRUE) +
+#' scale_y_discrete(labels = mod$trend_model$changepoints) +
+#' labs(y = 'Potential changepoint',
+#'      x = 'Rate change')
 #' }
 #'@export
 
@@ -754,12 +820,7 @@ mvgam = function(formula,
                                   family = family,
                                   use_lv = use_lv,
                                   n_lv = n_lv,
-                                  trend_model =
-                                    if(trend_model %in% c('PWlinear', 'PWlogistic')){
-                                      'RW'
-                                    } else {
-                                      trend_model
-                                    },
+                                  trend_model = trend_model,
                                   trend_map = trend_map,
                                   drift = drift)
 

diff --git a/R/piecewise_trends.R b/R/piecewise_trends.R
@@ -15,11 +15,36 @@
 #'   Large values will allow many changepoints and a more flexible trend, while
 #'   small values will allow few changepoints. Default is `0.05`.
 #' @param growth Character string specifying either 'linear' or 'logistic' growth of
-#' the trend. If 'logistic', a variable labelled 'cap' MUST be in \code{data} to specify the
-#' maximum saturation point for the trend (see examples in \code{\link{mvgam}} for details).
+#' the trend. If 'logistic', a variable labelled `cap` MUST be in \code{data} to specify the
+#' maximum saturation point for the trend (see details and examples in \code{\link{mvgam}} for
+#' more information).
 #' Default is 'linear'.
 #' @return An object of class \code{mvgam_trend}, which contains a list of
 #' arguments to be interpreted by the parsing functions in \code{mvgam}
+#' @details
+#'*Offsets and intercepts*:
+#' For each of these trend models, an offset parameter is included in the trend
+#' estimation process. This parameter will be incredibly difficult to identify
+#' if you also include an intercept in the observation formula. For that reason,
+#' it is highly recommended that you drop the intercept from the formula
+#' (i.e. `y ~ x + 0` or `y ~ x - 1`, where `x` are your optional predictor terms).
+#'\cr
+#'\cr
+#'*Logistic growth and the cap variable*:
+#' When forecasting growth, there is often some maximum achievable point that
+#' a time series can reach. For example, total market size, total population size
+#' or carrying capacity in population dynamics. It can be advantageous for the forecast
+#' to saturate at or near this point so that predictions are more sensible.
+#' This function allows you to make forecasts using a logistic growth trend model,
+#' with a specified carrying capacity. Note that this capacity does not need to be static
+#' over time, it can vary with each series x timepoint combination if necessary. But you
+#' must supply a `cap` value for each observation in the data when using `growth = 'logistic'`.
+#' For observation families that use a non-identity link function, the `cap` value will
+#' be internally transformed to the link scale (i.e. your specified `cap` will be log
+#' transformed if you are using a `poisson()` or `nb()` family). It is therefore important
+#' that you specify the `cap` values on the scale of your outcome. Note also that
+#' no missing values are allowed in `cap`.
+#'
 #' @rdname piecewise_trends
 #' @export
 PW = function(n_changepoints = 10,
@@ -64,15 +89,27 @@ add_piecewise = function(model_file,
     if(!(exists('cap', where = data_train))) {
       stop('Capacities must be supplied as a variable named "cap" for logistic growth',
            call. = FALSE)
+    }
 
+    if(any(is.na(data_train$cap))){
+      stop('Missing values found for some "cap" terms',
+           call. = FALSE)
     }
+
     # Matrix of capacities per series (these must operate on the link scale)
     all_caps <- data.frame(series = as.numeric(data_train$series),
                            time = data_train$time,
                            cap = suppressWarnings(linkfun(data_train$cap,
                                                           link = family$link))) %>%
       dplyr::arrange(time, series)
 
+    if(any(is.na(all_caps$cap)) | any(is.infinite(all_caps$cap))){
+      stop(paste0('Missing or infinite values found for some "cap" terms\n',
+                  'after transforming to the ',
+                  family$link, ' link scale'),
+           call. = FALSE)
+    }
+
     if(!is.null(data_test)){
       if(!(exists('cap', where = data_test))) {
         stop('Capacities must also be supplied in "newdata" for logistic growth predictions',
@@ -85,6 +122,13 @@ add_piecewise = function(model_file,
                                    cap = suppressWarnings(linkfun(data_test$cap,
                                                                   link = family$link)))) %>%
         dplyr::arrange(time, series)
+
+      if(any(is.na(all_caps$cap)) | any(is.infinite(all_caps$cap))){
+        stop(paste0('Missing or infinite values found for some "cap" terms\n',
+                    'after transforming to the ',
+                    family$link, ' link scale'),
+             call. = FALSE)
+      }
     }
 
     cap <- matrix(NA, nrow = length(unique(all_caps$time)),
@@ -317,7 +361,7 @@ add_piecewise = function(model_file,
            "// trend offset parameters\n",
            "vector[n_series] m_trend;\n\n",
            "// trend rate adjustments per series\n",
-           "matrix[n_changepoints, n_series] delta;\n")
+           "matrix[n_changepoints, n_series] delta_trend;\n")
   model_file <- readLines(textConnection(model_file), n = -1)
 
   # Update the transformed parameters block
@@ -333,9 +377,9 @@ add_piecewise = function(model_file,
     '// trend estimates\n',
     'for (s in 1 : n_series) {\n',
     if(trend_model == 'PWlogistic'){
-      'trend[1 : n, s] = logistic_trend(k_trend[s], m_trend[s], to_vector(delta[,s]), time, to_vector(cap[,s]), A, t_change, n_changepoints);\n'
+      'trend[1 : n, s] = logistic_trend(k_trend[s], m_trend[s], to_vector(delta_trend[,s]), time, to_vector(cap[,s]), A, t_change, n_changepoints);\n'
     } else {
-      'trend[1 : n, s] = linear_trend(k_trend[s], m_trend[s], to_vector(delta[,s]), time, A, t_change);\n'
+      'trend[1 : n, s] = linear_trend(k_trend[s], m_trend[s], to_vector(delta_trend[,s]), time, A, t_change);\n'
     },
 
     '}\n')
@@ -353,13 +397,15 @@ add_piecewise = function(model_file,
     paste0('// trend parameter priors\n',
            'm_trend ~ student_t(3, 0, 2.5);\n',
            'k_trend ~ std_normal();\n',
-           'to_vector(delta) ~ double_exponential(0, changepoint_scale);\n',
+           'to_vector(delta_trend) ~ double_exponential(0, changepoint_scale);\n',
            model_file[grep("// likelihood functions", model_file, fixed = TRUE) - 1])
+  model_file <- readLines(textConnection(model_file), n = -1)
 
   # Update the generated quantities block
   model_file <- model_file[-grep("vector[n_series] tau;", model_file, fixed = TRUE)]
   tau_start <- grep("tau[s] = pow(sigma[s], -2.0);", model_file, fixed = TRUE) - 1
   model_file <- model_file[-c(tau_start:(tau_start + 2))]
+  model_file <- readLines(textConnection(model_file), n = -1)
 
   #### Return ####
   return(list(model_file = model_file,