Skip to content

Commit

Permalink
piecewise trends and descriptions
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicholas Clark committed Nov 21, 2023
1 parent 89cdaef commit ff37826
Show file tree
Hide file tree
Showing 16 changed files with 391 additions and 86 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mvgam
Title: Multivariate (Dynamic) Generalized Additive Models
Version: 1.0.7
Version: 1.0.8
Date: 2023-11-01
Authors@R:
person("Nicholas J", "Clark", , "[email protected]", role = c("aut", "cre"),
Expand Down
8 changes: 8 additions & 0 deletions R/forecast.mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,14 @@ forecast_draws = function(object,
time = data_test$time,
cap = suppressWarnings(linkfun(data_test$cap,
link = family$link)))

if(any(is.na(cap$cap)) | any(is.infinite(cap$cap))){
stop(paste0('Missing or infinite values found for some "cap" terms\n',
'after transforming to the ',
family$link, ' link scale'),
call. = FALSE)
}

} else {
cap <- NULL
}
Expand Down
42 changes: 21 additions & 21 deletions R/get_mvgam_priors.R
Original file line number Diff line number Diff line change
Expand Up @@ -732,27 +732,27 @@ get_mvgam_priors = function(formula,

if(trend_model %in% c('PWlinear', 'PWlogistic')){
# Need to fix this as a next priority
trend_df <- NULL
# trend_df <- data.frame(param_name = c('vector[n_series] k_trend;',
# 'vector[n_series] m_trend;'),
# param_length = length(unique(data_train$series)),
# param_info = c('base trend growth rates',
# 'trend offset parameters'),
# prior = c('k_trend ~ std_normal();',
# 'm_trend ~ student_t(3, 0, 2.5);'),
# example_change = c(paste0(
# 'k ~ normal(',
# round(runif(min = -1, max = 1, n = 1), 2),
# ', ',
# round(runif(min = 0.1, max = 1, n = 1), 2),
# ');'),
# paste0(
# 'm ~ normal(',
# round(runif(min = -1, max = 1, n = 1), 2),
# ', ',
# round(runif(min = 0.1, max = 1, n = 1), 2),
# ');')))
#
# trend_df <- NULL
trend_df <- data.frame(param_name = c('vector[n_series] k_trend;',
'vector[n_series] m_trend;'),
param_length = length(unique(data_train$series)),
param_info = c('base trend growth rates',
'trend offset parameters'),
prior = c('k_trend ~ std_normal();',
'm_trend ~ student_t(3, 0, 2.5);'),
example_change = c(paste0(
'k ~ normal(',
round(runif(min = -1, max = 1, n = 1), 2),
', ',
round(runif(min = 0.1, max = 1, n = 1), 2),
');'),
paste0(
'm ~ normal(',
round(runif(min = -1, max = 1, n = 1), 2),
', ',
round(runif(min = 0.1, max = 1, n = 1), 2),
');')))


}
if(trend_model == 'GP'){
Expand Down
4 changes: 2 additions & 2 deletions R/index-mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ variables.mvgam = function(x, ...){
'ar1', 'ar2',
'ar3', 'A',
'Sigma', 'error', 'theta',
'k_trend', 'delta', 'm_trend'), collapse = '|'),
'k_trend', 'delta_trend', 'm_trend'), collapse = '|'),
parnames) &
!grepl('sigma_obs', parnames, fixed = TRUE) &
!grepl('sigma_raw', parnames, fixed = TRUE))){
Expand All @@ -147,7 +147,7 @@ variables.mvgam = function(x, ...){
'ar1', 'ar2',
'ar3', 'A',
'Sigma', 'error', 'theta',
'k_trend', 'delta', 'm_trend'), collapse = '|'),
'k_trend', 'delta_trend', 'm_trend'), collapse = '|'),
parnames) &
!grepl('sigma_obs', parnames, fixed = TRUE) &
!grepl('sigma_raw', parnames, fixed = TRUE)
Expand Down
99 changes: 80 additions & 19 deletions R/mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,11 @@
#' \item `'AR2'` or `AR(p = 2)`
#' \item `'AR3'` or `AR(p = 3)`
#' \item `'VAR1'` or `VAR()`(only available in \code{Stan})
#' \item `'PWlogistic`, `'PWlinear` or `PW()` (only available in \code{Stan})
#' \item `'GP'` (Gaussian Process with squared exponential kernel;
#'only available in \code{Stan})}
#'
#'For all types apart from `'GP'`, moving average and/or correlated
#'For all types apart from `'GP'` and `PW()`, moving average and/or correlated
#'process error terms can also be estimated (for example, `RW(cor = TRUE)` will set up a
#'multivariate Random Walk if `n_series > 1`). See [mvgam_trends] for more details
#'@param trend_map Optional `data.frame` specifying which series should depend on which latent
Expand Down Expand Up @@ -170,7 +171,12 @@
#'\cr
#'\cr
#'*Formula syntax*: Details of the formula syntax used by \pkg{mvgam} can be found in
#'\code{\link[mgcv]{formula.gam}}.
#'\code{\link[mgcv]{formula.gam}}. Note that it is possible to supply an empty formula where
#'there are no predictors or intercepts in the observation model (i.e. `y ~ 0` or `y ~ -1`).
#'In this case, an intercept-only observation model will be set up but the intercept coefficient
#'will be fixed at zero. This can be handy if you wish to fit pure State-Space models where
#'the variation in the dynamic trend controls the average expectation, and/or where intercepts
#'are non-identifiable (as in piecewise trends, see examples below)
#'\cr
#'\cr
#'*Families and link functions*: Details of families supported by \pkg{mvgam} can be found in
Expand Down Expand Up @@ -354,20 +360,20 @@
#' trend = c(1,1,2))
#'
#' # Fit the model using AR1 trends
#' mod1 <- mvgam(y ~ s(season, bs = 'cc'),
#' mod <- mvgam(y ~ s(season, bs = 'cc'),
#' trend_map = trend_map,
#' trend_model = 'AR1',
#' data = mod_data,
#' return_model_data = TRUE)
#'
#' # The mapping matrix is now supplied as data to the model in the 'Z' element
#' mod1$model_data$Z
#' code(mod1)
#' code(mod)
#'
#' # The first two series share an identical latent trend; the third is different
#' plot(mod1, type = 'trend', series = 1)
#' plot(mod1, type = 'trend', series = 2)
#' plot(mod1, type = 'trend', series = 3)
#' plot(mod, type = 'trend', series = 1)
#' plot(mod, type = 'trend', series = 2)
#' plot(mod, type = 'trend', series = 3)
#'
#' # Example of how to use dynamic coefficients
#' # Simulate a time-varying coefficient for the effect of temperature
Expand Down Expand Up @@ -418,7 +424,7 @@
#'
#' # Fit a model that includes the offset in the linear predictor as well as
#' # hierarchical seasonal smooths
#' mod1 <- mvgam(formula = y ~ offset(offset) +
#' mod <- mvgam(formula = y ~ offset(offset) +
#' s(series, bs = 're') +
#' s(season, bs = 'cc') +
#' s(season, by = series, m = 1, k = 5),
Expand All @@ -428,25 +434,25 @@
#'
#' # Inspect the model file to see the modification to the linear predictor
#' # (eta)
#' mod1$model_file
#' mod$model_file
#'
#' # Forecasts for the first two series will differ in magnitude
#' layout(matrix(1:2, ncol = 2))
#' plot(mod1, type = 'forecast', series = 1, newdata = dat$data_test,
#' plot(mod, type = 'forecast', series = 1, newdata = dat$data_test,
#' ylim = c(0, 75))
#' plot(mod1, type = 'forecast', series = 2, newdata = dat$data_test,
#' plot(mod, type = 'forecast', series = 2, newdata = dat$data_test,
#' ylim = c(0, 75))
#' layout(1)
#'
#' # Changing the offset for the testing data should lead to changes in
#' # the forecast
#' dat$data_test$offset <- dat$data_test$offset - 2
#' plot(mod1, 'forecast', newdata = dat$data_test)
#' plot(mod, 'forecast', newdata = dat$data_test)
#'
#' # Relative Risks can be computed by fixing the offset to the same value
#' # for each series
#' dat$data_test$offset <- rep(1, NROW(dat$data_test))
#' preds_rr <- predict(mod1, type = 'link', newdata = dat$data_test)
#' preds_rr <- predict(mod, type = 'link', newdata = dat$data_test)
#' series1_inds <- which(dat$data_test$series == 'series_1')
#' series2_inds <- which(dat$data_test$series == 'series_2')
#'
Expand All @@ -467,6 +473,66 @@
#' }
#' layout(1)
#'
#' # Example of logistic growth with possible changepoints
#' # Simple logistic growth model
#' dNt = function(r, N, k){
#' r * N * (k - N)
#' }
#'
#' # Iterate growth through time
#' Nt = function(r, N, t, k) {
#' for (i in 1:(t - 1)) {
#'
#' # population at next time step is current population + growth,
#' # but we introduce several 'shocks' as changepoints
#' if(i %in% c(5, 15, 25, 41, 45, 60, 80)){
#' N[i + 1] <- max(1, N[i] + dNt(r + runif(1, -0.1, 0.1),
#' N[i], k))
#' } else {
#' N[i + 1] <- max(1, N[i] + dNt(r, N[i], k))
#' }
#' }
#' N
#' }
#'
#' # Simulate expected values
#' set.seed(11)
#' expected <- Nt(0.004, 2, 100, 30)
#' plot(expected, xlab = 'Time')
#'
#' # Take Poisson draws
#' y <- rpois(100, expected)
#' plot(y, xlab = 'Time')
#'
#' # Assemble data into dataframe and model. We set a
#' # fixed carrying capacity of 35 for this example, but note that
#' # this value is not required to be fixed at each timepoint
#' mod_data <- data.frame(y = y,
#' time = 1:100,
#' cap = 35,
#' series = as.factor('series_1'))
#' plot_mvgam_series(data = mod_data)
#'
#' # The intercept is nonidentifiable when using piecewise
#' # trends because the trend functions have their own offset
#' # parameters 'm'; it is recommended to always drop intercepts
#' # when using these trend models
#' mod <- mvgam(y ~ 0,
#' trend_model = PW(growth = 'logistic'),
#' family = poisson(),
#' data = mod_data)
#' summary(mod)
#'
#' # Plot the posterior hindcast
#' plot(mod, type = 'forecast')
#'
#' # View the changepoints with ggplot2 utilities
#' library(ggplot2)
#' mcmc_plot(mod, variable = 'delta_trend',
#' regex = TRUE) +
#' scale_y_discrete(labels = mod$trend_model$changepoints) +
#' labs(y = 'Potential changepoint',
#' x = 'Rate change')
#' }
#'@export

Expand Down Expand Up @@ -754,12 +820,7 @@ mvgam = function(formula,
family = family,
use_lv = use_lv,
n_lv = n_lv,
trend_model =
if(trend_model %in% c('PWlinear', 'PWlogistic')){
'RW'
} else {
trend_model
},
trend_model = trend_model,
trend_map = trend_map,
drift = drift)

Expand Down
58 changes: 52 additions & 6 deletions R/piecewise_trends.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,36 @@
#' Large values will allow many changepoints and a more flexible trend, while
#' small values will allow few changepoints. Default is `0.05`.
#' @param growth Character string specifying either 'linear' or 'logistic' growth of
#' the trend. If 'logistic', a variable labelled 'cap' MUST be in \code{data} to specify the
#' maximum saturation point for the trend (see examples in \code{\link{mvgam}} for details).
#' the trend. If 'logistic', a variable labelled `cap` MUST be in \code{data} to specify the
#' maximum saturation point for the trend (see details and examples in \code{\link{mvgam}} for
#' more information).
#' Default is 'linear'.
#' @return An object of class \code{mvgam_trend}, which contains a list of
#' arguments to be interpreted by the parsing functions in \code{mvgam}
#' @details
#'*Offsets and intercepts*:
#' For each of these trend models, an offset parameter is included in the trend
#' estimation process. This parameter will be incredibly difficult to identify
#' if you also include an intercept in the observation formula. For that reason,
#' it is highly recommended that you drop the intercept from the formula
#' (i.e. `y ~ x + 0` or `y ~ x - 1`, where `x` are your optional predictor terms).
#'\cr
#'\cr
#'*Logistic growth and the cap variable*:
#' When forecasting growth, there is often some maximum achievable point that
#' a time series can reach. For example, total market size, total population size
#' or carrying capacity in population dynamics. It can be advantageous for the forecast
#' to saturate at or near this point so that predictions are more sensible.
#' This function allows you to make forecasts using a logistic growth trend model,
#' with a specified carrying capacity. Note that this capacity does not need to be static
#' over time, it can vary with each series x timepoint combination if necessary. But you
#' must supply a `cap` value for each observation in the data when using `growth = 'logistic'`.
#' For observation families that use a non-identity link function, the `cap` value will
#' be internally transformed to the link scale (i.e. your specified `cap` will be log
#' transformed if you are using a `poisson()` or `nb()` family). It is therefore important
#' that you specify the `cap` values on the scale of your outcome. Note also that
#' no missing values are allowed in `cap`.
#'
#' @rdname piecewise_trends
#' @export
PW = function(n_changepoints = 10,
Expand Down Expand Up @@ -64,15 +89,27 @@ add_piecewise = function(model_file,
if(!(exists('cap', where = data_train))) {
stop('Capacities must be supplied as a variable named "cap" for logistic growth',
call. = FALSE)
}

if(any(is.na(data_train$cap))){
stop('Missing values found for some "cap" terms',
call. = FALSE)
}

# Matrix of capacities per series (these must operate on the link scale)
all_caps <- data.frame(series = as.numeric(data_train$series),
time = data_train$time,
cap = suppressWarnings(linkfun(data_train$cap,
link = family$link))) %>%
dplyr::arrange(time, series)

if(any(is.na(all_caps$cap)) | any(is.infinite(all_caps$cap))){
stop(paste0('Missing or infinite values found for some "cap" terms\n',
'after transforming to the ',
family$link, ' link scale'),
call. = FALSE)
}

if(!is.null(data_test)){
if(!(exists('cap', where = data_test))) {
stop('Capacities must also be supplied in "newdata" for logistic growth predictions',
Expand All @@ -85,6 +122,13 @@ add_piecewise = function(model_file,
cap = suppressWarnings(linkfun(data_test$cap,
link = family$link)))) %>%
dplyr::arrange(time, series)

if(any(is.na(all_caps$cap)) | any(is.infinite(all_caps$cap))){
stop(paste0('Missing or infinite values found for some "cap" terms\n',
'after transforming to the ',
family$link, ' link scale'),
call. = FALSE)
}
}

cap <- matrix(NA, nrow = length(unique(all_caps$time)),
Expand Down Expand Up @@ -317,7 +361,7 @@ add_piecewise = function(model_file,
"// trend offset parameters\n",
"vector[n_series] m_trend;\n\n",
"// trend rate adjustments per series\n",
"matrix[n_changepoints, n_series] delta;\n")
"matrix[n_changepoints, n_series] delta_trend;\n")
model_file <- readLines(textConnection(model_file), n = -1)

# Update the transformed parameters block
Expand All @@ -333,9 +377,9 @@ add_piecewise = function(model_file,
'// trend estimates\n',
'for (s in 1 : n_series) {\n',
if(trend_model == 'PWlogistic'){
'trend[1 : n, s] = logistic_trend(k_trend[s], m_trend[s], to_vector(delta[,s]), time, to_vector(cap[,s]), A, t_change, n_changepoints);\n'
'trend[1 : n, s] = logistic_trend(k_trend[s], m_trend[s], to_vector(delta_trend[,s]), time, to_vector(cap[,s]), A, t_change, n_changepoints);\n'
} else {
'trend[1 : n, s] = linear_trend(k_trend[s], m_trend[s], to_vector(delta[,s]), time, A, t_change);\n'
'trend[1 : n, s] = linear_trend(k_trend[s], m_trend[s], to_vector(delta_trend[,s]), time, A, t_change);\n'
},

'}\n')
Expand All @@ -353,13 +397,15 @@ add_piecewise = function(model_file,
paste0('// trend parameter priors\n',
'm_trend ~ student_t(3, 0, 2.5);\n',
'k_trend ~ std_normal();\n',
'to_vector(delta) ~ double_exponential(0, changepoint_scale);\n',
'to_vector(delta_trend) ~ double_exponential(0, changepoint_scale);\n',
model_file[grep("// likelihood functions", model_file, fixed = TRUE) - 1])
model_file <- readLines(textConnection(model_file), n = -1)

# Update the generated quantities block
model_file <- model_file[-grep("vector[n_series] tau;", model_file, fixed = TRUE)]
tau_start <- grep("tau[s] = pow(sigma[s], -2.0);", model_file, fixed = TRUE) - 1
model_file <- model_file[-c(tau_start:(tau_start + 2))]
model_file <- readLines(textConnection(model_file), n = -1)

#### Return ####
return(list(model_file = model_file,
Expand Down
Loading

0 comments on commit ff37826

Please sign in to comment.