diff --git a/R/add_stan_data.R b/R/add_stan_data.R
new file mode 100644
index 00000000..9afe3e8f
--- /dev/null
+++ b/R/add_stan_data.R
@@ -0,0 +1,308 @@
+#' Add remaining data, model and parameter blocks to a Stan model
+#'
+#'
+#' @export
+#' @param jags_file Prepared JAGS mvgam model file
+#' @param stan_file Incomplete Stan model file to be edited
+#' @param jags_data Prepared mvgam data for JAGS modelling
+#' @return A `list` containing the updated Stan model and model data
+add_stan_data = function(jags_file, stan_file, jags_data, family = 'poisson'){
+
+  #### Modify the Stan file ####
+  # Update lines associated with particular family
+  if(family == 'poisson'){
+
+  }
+
+  if(family == 'nb'){
+    stan_file[grep('// raw basis', stan_file) + 2] <-
+  '\n// negative binomial overdispersion\nvector<lower=0>[n_series] r_inv;\n'
+
+    stan_file[grep('// priors for smoothing', stan_file) + 2] <-
+    '\n// priors for overdispersion parameters\nr_inv ~ normal(0, 10);\n'
+
+      to_negbin <- gsub('poisson_log', 'neg_binomial_2',
+           stan_file[grep('y[i, s] ~ poisson', stan_file, fixed = T)])
+      stan_file[grep('y[i, s] ~ poisson', stan_file, fixed = T)] <-
+      gsub(');', ', inv(r_inv[s]));', to_negbin)
+
+      add_exp_open <- gsub('\\(eta', '(exp(eta',
+                      stan_file[grep('y[i, s] ~ neg_binomial', stan_file, fixed = T)])
+      add_exp_cl <- gsub('],', ']),',
+                         add_exp_open)
+      stan_file[grep('y[i, s] ~ neg_binomial', stan_file, fixed = T)] <-
+        add_exp_cl
+
+      stan_file[grep('// posterior predictions', stan_file, fixed = T) - 1] <-
+        paste0('matrix[n, n_series] r_vec;\n',
+               'vector[n_series] r;\n',
+               'r = inv(r_inv);\n',
+               'for (s in 1:n_series) {\n',
+               'r_vec[1:n,s] = rep_vector(inv(r_inv[s]), n);\n}\n')
+
+      to_negbin <- gsub('poisson_log_rng', 'neg_binomial_2_rng',
+                        stan_file[grep('ypred = poisson_log_rng', stan_file, fixed = T)])
+      stan_file[grep('ypred = poisson_log_rng', stan_file, fixed = T)] <-
+        gsub(');', ', to_vector(r_vec));', to_negbin)
+
+      add_exp_open <- gsub('\\(eta', '(exp(eta',
+                           stan_file[grep('ypred = neg_binomial', stan_file, fixed = T)])
+
+      if(any(grepl('to_vector(trend)', stan_file, fixed = T))){
+        add_exp_cl <- gsub('to_vector(trend)', 'to_vector(trend))',
+                           add_exp_open, fixed = T)
+      } else {
+        add_exp_cl <- gsub('eta)', 'eta,),',
+                           add_exp_open)
+      }
+
+      stan_file[grep('ypred = neg_binomial', stan_file, fixed = T)] <-
+        add_exp_cl
+
+    stan_file <- readLines(textConnection(stan_file), n = -1)
+  }
+
+  # Get dimensions and numbers of smooth terms
+  snames <- names(jags_data)[grep('S.*', names(jags_data))]
+  smooth_dims <- matrix(NA, ncol = 2, nrow = length(snames))
+  for(i in 1:length(snames)){
+    smooth_dims[i,] <- dim(jags_data[[snames[i]]])
+  }
+
+  # Insert the data block for the model
+  smooth_penalty_data <- vector()
+  for(i in 1:length(snames)){
+    smooth_penalty_data[i] <- paste0('matrix[', smooth_dims[i, 1],
+                                     ',',
+                                     smooth_dims[i, 2], '] ',
+                                     snames[i],
+                                     '; // mgcv smooth penalty matrix ', snames[i])
+  }
+
+  # Search for any non-contiguous indices that sometimes are used by mgcv
+  if(any(grep('in c\\(', jags_file))){
+    add_idxs <- TRUE
+    seq_character = function(x){
+      all_nums <- as.numeric(unlist(strsplit(x, ':')))
+      if(length(all_nums) > 1){
+        out <- seq(all_nums[1], all_nums[2])
+      } else {
+        out <- all_nums
+      }
+      out
+    }
+
+    idx_locations <- grep('in c\\(', jags_file)
+    idx_vals <- list()
+    idx_data <- vector()
+    for(i in 1:length(idx_locations)){
+      list_vals <- unlist(strsplit(gsub('^.*c\\(*|\\s*).*$', '', jags_file[idx_locations[i]]), ','))
+      idx_vals[[i]] <- unlist(lapply(list_vals, seq_character))
+      idx_data[i] <- paste0('int idx', i, '[', length(idx_vals[[i]]), ']; // discontiguous index values')
+      jags_file[idx_locations][i] <- sub("in.*\\)\\)", paste0("in idx", i, ')'), jags_file[idx_locations][i])
+    }
+
+    # Update the Stan data block
+    stan_file[grep('##insert data',
+                         stan_file)] <- paste0('//Stan code generated by package mvgam\n',
+                                                     'data {',
+                                                     '\n',
+                                                     paste0(idx_data, collapse = '\n'), '\n',
+                                                     'int<lower=0> total_obs; // total number of observations\n',
+                                                     'int<lower=0> n; // number of timepoints per series\n',
+                                                     'int<lower=0> n_sp; // number of smoothing parameters\n',
+                                                     'int<lower=0> n_series; // number of series\n',
+                                                     'int<lower=0> num_basis; // total number of basis coefficients\n',
+                                                     'vector[num_basis] zero; // priro locations for basis coefficients\n',
+                                                     'matrix[num_basis, total_obs] X; // transposed mgcv GAM design matrix\n',
+                                                     'int<lower=0> ytimes[n, n_series]; // time-ordered matrix (which col in X belongs to each [time, series] observation?)\n',
+                                                     paste0(smooth_penalty_data, collapse = '\n'), '\n',
+                                                     'int<lower=0, upper=1> y_observed[n, n_series]; // indices of missing vs observed\n',
+                                                     'int<lower=-1> y[n, n_series]; // time-ordered observations, with -1 indicating missing\n',
+                                                     '}\n')
+  } else {
+    add_idxs <- FALSE
+    stan_file[grep('##insert data',
+                         stan_file)] <- paste0('//Stan code generated by package mvgam\n',
+                                                     'data {',
+                                                     '\n',
+                                                     'int<lower=0> total_obs; // total number of observations\n',
+                                                     'int<lower=0> n; // number of timepoints per series\n',
+                                                     'int<lower=0> n_sp; // number of smoothing parameters\n',
+                                                     'int<lower=0> n_series; // number of series\n',
+                                                     'int<lower=0> num_basis; // total number of basis coefficients\n',
+                                                     'vector[num_basis] zero; // prior locations for basis coefficients\n',
+                                                     'matrix[num_basis, total_obs] X; // transposed mgcv GAM design matrix\n',
+                                                     'int<lower=0> ytimes[n, n_series]; // time-ordered matrix (which col in X belongs to each [time, series] observation?)\n',
+                                                     paste0(smooth_penalty_data, collapse = '\n'), '\n',
+                                                     'int<lower=0, upper=1> y_observed[n, n_series]; // indices of missing vs observed\n',
+                                                     'int<lower=-1> y[n, n_series]; // time-ordered observations, with -1 indicating missing\n',
+                                                     '}\n')
+  }
+  stan_file <- readLines(textConnection(stan_file), n = -1)
+
+  # Modify the model block to include each smooth term
+  smooths_start <- grep('## GAM-specific priors', jags_file) + 1
+  smooths_end <- grep('## smoothing parameter priors...', jags_file) - 1
+  jags_smooth_text <- jags_file[smooths_start:smooths_end]
+  jags_smooth_text <- gsub('##', '//', jags_smooth_text)
+  jags_smooth_text <- gsub('dexp', 'exponential', jags_smooth_text)
+
+  K_starts <- grep('K.* <- ', jags_smooth_text)
+  for(i in 1:length(K_starts)){
+    jags_smooth_text[K_starts[i]+1] <- gsub('\\bb\\b', 'b_raw',
+                                            gsub('dmnorm', 'multi_normal_prec',
+                                                 paste0(gsub('K.*',
+                                                             trimws(gsub('K.* <- ', '',
+                                                                         jags_smooth_text[K_starts[i]])),
+                                                             jags_smooth_text[K_starts[i]+1]), ')')))
+  }
+  jags_smooth_text <- jags_smooth_text[-K_starts]
+  if(any(grep('b\\[i\\] <- b_raw', jags_smooth_text))){
+    jags_smooth_text <- jags_smooth_text[-grep('b\\[i\\] <- b_raw', jags_smooth_text)]
+  }
+  jags_smooth_text <- gsub('dnorm', 'normal', jags_smooth_text)
+  jags_smooth_text <- gsub('  ', ' ', jags_smooth_text)
+  jags_smooth_text[-grep('//|\\}|\\{', jags_smooth_text)] <-
+    paste0(jags_smooth_text[-grep('//|\\}|\\{', jags_smooth_text)], ';')
+  jags_smooth_text <- gsub(') }', '); }', jags_smooth_text)
+  jags_smooth_text <- gsub('}', '}\n', jags_smooth_text)
+  jags_smooth_text[(grep('//',
+                         jags_smooth_text) - 1)[-1]] <-
+    paste0(jags_smooth_text[(grep('//',
+                                  jags_smooth_text) - 1)[-1]], '\n')
+  stan_file[grep('##insert smooths',
+                       stan_file)] <- paste0(jags_smooth_text, collapse = '\n')
+  stan_file <- readLines(textConnection(stan_file), n = -1)
+
+  # Deal with any random effect priors
+  if(any(grep('b_raw\\[i\\] ~', stan_file))){
+    b_raw_string <- paste0(stan_file[grep('b_raw\\[i\\] ~', stan_file)-1], collapse = ',')
+    n_b_raw <- max(as.numeric(unlist(regmatches(b_raw_string,
+                                                gregexpr("[[:digit:]]+",
+                                                         b_raw_string)))))
+
+    n_sigma_raw <- max(as.numeric(unlist(regmatches(grep('sigma_raw', stan_file, value = T),
+                                                    gregexpr("[[:digit:]]+",
+                                                             grep('sigma_raw',
+                                                                  stan_file, value = T))))))
+
+
+    stan_file <- stan_file[-grep('mu_raw.* ~ ', stan_file)]
+    stan_file <- stan_file[-grep('<- mu_raw', stan_file)]
+    stan_file <- stan_file[-grep('sigma_raw.* ~ ', stan_file)]
+    stan_file[grep('model \\{', stan_file)] <-
+      paste0('model {\n// prior for random effect population variances\nsigma_raw ~ exponential(0.5);\n\n',
+             '// prior for random effect population means\nmu_raw ~ normal(0, 1);\n')
+
+    stan_file[grep('parameters \\{', stan_file)[1] + 2] <-
+      paste0(stan_file[grep('parameters \\{', stan_file)[1] + 2],
+             '\n',
+             '\n// random effect variances\n',
+             paste0('vector<lower=0>[',n_sigma_raw,'] sigma_raw', ';\n', collapse = ''),
+             '\n',
+             paste0('vector<lower=0>[',n_sigma_raw,'] mu_raw', ';\n', collapse = ''))
+
+    b_raw_text <- vector()
+    b_raw_indices <- grep('b_raw\\[i\\] ~', stan_file)
+    for(i in 1:length(b_raw_indices)){
+
+      b_raw_text[i] <- paste0('for (i in ', as.numeric(sub('.*(?=.$)', '',
+                                                           sub("\\:.*", "",
+                                                               stan_file[b_raw_indices[i] - 1]), perl=T)),
+                              ':', as.numeric(substr(sub(".*\\:", "",
+                                                         stan_file[b_raw_indices[i]-1]),
+                                                     1, 1)),') {\nb[i] <- mu_raw[', i, '] + b_raw[i] * sigma_raw[',i,
+                              '];\n}')
+    }
+
+    # If parametric coefficients are included, they'll come before random effects
+    min_re_betas <- as.numeric(sub('.*(?=.$)', '',
+                                   sub("\\:.*", "",
+                                       stan_file[b_raw_indices[1]-1]), perl=T))
+    if(min_re_betas > 1){
+      b_raw_text <- c(paste0('\nfor (i in 1:',
+                             min_re_betas - 1, ') {\nb[i] <- b_raw[i];\n}'),
+                      b_raw_text,
+                      paste0('\nfor (i in ',  n_b_raw+1,':num_basis) {\nb[i] <- b_raw[i];\n}\n'))
+    } else {
+      b_raw_text <- c(b_raw_text,
+                      paste0('\nfor (i in ',  n_b_raw+1,':num_basis) {\nb[i] <- b_raw[i];\n}\n'))
+    }
+
+    stan_file[grep('// basis coefficients', stan_file) + 2] <- paste0(b_raw_text,
+                                                                                  collapse = '\n')
+    stan_file <- readLines(textConnection(stan_file), n = -1)
+
+    # If no random effects, betas are equal to beta_raws
+  } else {
+    stan_file[grep('// basis coefficients', stan_file) + 2] <-
+      paste0('\nfor (i in ','1:num_basis) {\nb[i] <- b_raw[i];\n}')
+    stan_file <- readLines(textConnection(stan_file), n = -1)
+  }
+
+  # Update parametric effect priors
+  if(any(grep('// parametric effect', stan_file))){
+    stan_file[grep('// parametric effect', stan_file) + 1] <-
+      paste0('for (i in ',
+
+             as.numeric(sub('.*(?=.$)', '',
+                            sub("\\:.*", "",
+                                stan_file[grep('// parametric effect', stan_file) + 1]), perl=T)),
+             ':', as.numeric(substr(sub(".*\\:", "",
+                                        stan_file[grep('// parametric effect', stan_file) + 1]),
+                                    1, 1)),
+             ') {\nb_raw[i] ~ normal(0, 1);\n}')
+    stan_file <- readLines(textConnection(stan_file), n = -1)
+  }
+  unlink('base_gam_stan.txt')
+  stan_file <- readLines(textConnection(stan_file), n = -1)
+
+  # Final tidying of the Stan model for readability
+  clean_up <- vector()
+  for(x in 1:length(stan_file)){
+    clean_up[x] <- stan_file[x-1] == "" & stan_file[x] == ""
+  }
+  clean_up[is.na(clean_up)] <- FALSE
+  stan_file <- stan_file[!clean_up]
+
+
+  #### Modify the Stan data list ####
+  # Create matrix representing whether an observation was missing or not
+  y_observed <- matrix(NA, ncol = NCOL(jags_data$y),
+                       nrow = NROW(jags_data$y))
+  for (i in 1:dim(jags_data$y)[1]) {
+    for (s in 1:dim(jags_data$y)[2]) {
+      if (is.na(jags_data$y[i, s])) {
+        y_observed[i, s] = 0
+      } else {
+        y_observed[i, s] = 1
+      }
+    }
+  }
+
+  # Use -1 for any missing observations so Stan doesn't throw errors due to NAs
+  y <- jags_data$y
+  y[is.na(y)] <- -1
+
+  # The data list for Stan
+  stan_data <- jags_data
+  stan_data$y <- y
+  stan_data$y_observed <- y_observed
+  stan_data$X <- t(stan_data$X)
+  stan_data$total_obs <- NCOL(stan_data$X)
+  stan_data$num_basis <- NROW(stan_data$X)
+  stan_data$n_sp <- as.numeric(sub('\\) \\{', '',
+                                   sub('for \\(i in 1\\:', '',
+                                       jags_file[grep('lambda\\[i\\] ~ ',
+                                                       trimws(jags_file)) - 1])))
+
+  # Add discontiguous index values if required
+  if(add_idxs){
+    names(idx_vals) <- paste0('idx', seq_len(length(idx_vals)))
+    stan_data <- append(stan_data, idx_vals)
+  }
+
+  return(list(stan_file = stan_file,
+              model_data = stan_data))
+}
diff --git a/R/mvgam.R b/R/mvgam.R
new file mode 100644
index 00000000..e60688db
--- /dev/null
+++ b/R/mvgam.R
@@ -0,0 +1,1040 @@
+#'Fit a Bayesian dynamic GAM to a univariate or multivariate set of discrete time series
+#'
+#'This function estimates the posterior distribution for Generalised Additive Models (GAMs) that can include
+#'smooth functions, specified in the GAM formula, as well as latent temporal processes, specified by trend_model.
+#'There are currently two options for specifying the structures of the trends (either as latent
+#'dynamic factors to capture trend dependencies among series in a reduced dimension format, or as independent trends)
+#'
+#'
+#'@param formula A \code{character} string specifying the GAM formula. These are exactly like the formula
+#'for a GLM except that smooth terms, s, te, ti and t2, can be added to the right hand side
+#'to specify that the linear predictor depends on smooth functions of predictors (or linear functionals of these).
+#'@param knots An optional \code{list} containing user specified knot values to be used for basis construction.
+#'For most bases the user simply supplies the knots to be used, which must match up with the k value supplied
+#'(note that the number of knots is not always just k). Different terms can use different numbers of knots,
+#'unless they share a covariate.
+#'@param data_train A \code{dataframe} or \code{list} containing the model response variable and covariates
+#'required by the GAM \code{formula}. Should include columns:
+#''y' (the discrete outcomes; \code{NA}s allowed)
+#''series' (character or factor index of the series IDs)
+#''time' (numeric index of the time point for each observation).
+#'Any other variables to be included in the linear predictor of \code{formula} must also be present
+#'@param data_test Optional \code{dataframe} or \code{list} of test data containing at least 'series' and 'time'
+#'in addition to any other variables included in the linear predictor of \code{formula}. If included, the
+#'observations in \code{data_test} will be set to \code{NA} when fitting the model so that posterior
+#'simulations can be obtained
+#'@param run_model \code{logical}. If \code{FALSE}, the model is not fitted but instead the function will
+#'return the model file and the data / initial values that are needed to fit the \code{JAGS} model
+#'@param prior_simulation \code{logical}. If \code{TRUE}, no observations are fed to the model, and instead
+#'simulations from prior distributions are returned
+#'@param return_model_data \code{logical}. If \code{TRUE}, the list of data that is needed to fit the
+#'model is returned, along with the initial values for smooth and AR parameters, once the model is fitted.
+#'This will be helpful if users wish to modify the model file to add
+#'other stochastic elements that are not currently avaiable in \code{mvgam}. Default is \code{FALSE} to reduce
+#'the size of the returned object, unless \code{run_model == FALSE}
+#'@param family \code{character}. Must be either 'nb' (for Negative Binomial), 'tw' (for Tweedie) or 'poisson'
+#'@param use_lv \code{logical}. If \code{TRUE}, use dynamic factors to estimate series'
+#'latent trends in a reduced dimension format. If \code{FALSE}, estimate independent latent trends for each series
+#'@param n_lv \code{integer} the number of latent dynamic factors to use if \code{use_lv == TRUE}.
+#'Cannot be \code{>n_series}. Defaults arbitrarily to \code{min(5, floor(n_series / 2))}
+#'@param trend_model \code{character} specifying the time series dynamics for the latent trend. Options are:
+#''None' (no latent trend component; i.e. the GAM component is all that contributes to the linear predictor,
+#'and the observation process is the only source of error; similarly to what is estimated by \code{\link[mcgv]{gam}}),
+#''RW' (random walk with possible drift),
+#''AR1' (AR1 model with intercept),
+#''AR2' (AR2 model with intercept) or
+#''AR3' (AR3 model with intercept) or
+#''GP' (Gaussian process with squared exponential kernel; currently under development and
+#'only available for estimation in stan)
+#'@param drift \code{logical} estimate a drift parameter in the latent trend components. Useful if the latent
+#'trend is expected to broadly follow a non-zero slope. Note that if the latent trend is more or less stationary,
+#'the drift parameter can become unidentifiable, especially if an intercept term is included in the GAM linear
+#'predictor (which it is by default when calling \code{\link[mcgv]{jagam}}). Therefore this defaults to \code{FALSE}
+#'@param chains \code{integer} specifying the number of parallel chains for the model
+#'@param burnin \code{integer} specifying the number of iterations of the Markov chain to run during
+#'adaptive mode to tune sampling algorithms
+#'@param n_samples \code{integer} specifying the number of iterations of the Markov chain to run for
+#'sampling the posterior distribution
+#'@param thin Thinning interval for monitors
+#'@param parallel \code{logical} specifying whether multiple cores should be used for
+#'for generating \code{JAGS} simulations in parallel. If \code{TRUE}, the number of cores to use will be
+#'\code{min(c(chains, parallel::detectCores() - 1))}
+#'@param phi_prior \code{character} specifying (in JAGS syntax) the prior distribution for the drift terms/intercepts
+#'in the latent trends
+#'@param ar_prior \code{character} specifying (in JAGS syntax) the prior distribution for the AR terms
+#'in the latent trends
+#'@param r_prior \code{character} specifying (in JAGS syntax) the prior distribution for the Negative Binomial
+#'overdispersion parameters. Note that this prior acts on the SQUARE ROOT of \code{r}, which is convenient
+#'for inducing a complexity-penalising prior model whereby the observation process reduces to a Poisson
+#'as the sampled parameter approaches \code{0}. Ignored if family is Poisson or Tweedie
+#'@param twdis_prior \code{character} specifying (in JAGS syntax) the prior distribution for the Tweedie
+#'overdispersion parameters. Ignored if family is Poisson or Negative Binomial
+#'@param sigma_prior \code{character} specifying (in JAGS syntax) the prior distributions for the independent gaussian
+#'variances used for the latent trends (ignored if \code{use_lv == TRUE})
+#'@param upper_bounds Optional \code{vector} of \code{integer} values specifying upper limits for each series. If supplied,
+#'this generates a modified likelihood where values above the bound are given a likelihood of zero. Note this modification
+#'is computationally expensive in \code{JAGS} but can lead to better estimates when true bounds exist. Default is to remove
+#'truncation entirely (i.e. there is no upper bound for each series)
+#'@param use_stan Logical. If \code{TRUE} and if \code{rstan} is installed, the model will be compiled and sampled using
+#'the Hamiltonian Monte Carlo with a call to \code{\link[rstan]{stan}}. Note that this functionality is still in development and
+#'not all options that are available in \code{JAGS} can be used, including: no option for a Tweedie family and no option for
+#'dynamic factor trends. However, as \code{rstan} can estimate Hilbert base approximate gaussian processes, which
+#'are much more computationally tractable than full GPs for time series with `>100` observations, estimation
+#'in \code{rstan} can support latent GP trends while estimation in \code{JAGS} cannot
+#'@param jags_path Optional character vector specifying the path to the location of the `JAGS` executable (.exe) to use
+#'for modelling if `use_stan == FALSE`. If missing, the path will be recovered from a call to \code{\link[runjags]{findjags}}
+#'
+#'@details Dynamic GAMs are useful when we wish to predict future values from time series that show temporal dependence
+#'but we do not want to rely on extrapolating from a smooth term (which can sometimes lead to unpredictable and unrealistic behaviours).
+#'In addition, smooths can often try to wiggle excessively to capture any autocorrelation that is present in a time series,
+#'which exacerbates the problem of forecasting ahead. As GAMs are very naturally viewed through a Bayesian lens, and we often
+#'must model time series that show complex distributional features and missing data, parameters for `mvgam` models are estimated
+#'in a Bayesian framework using Markov Chain Monte Carlo.
+#'\cr
+#'\cr
+#'*Priors*: A \code{\link[mcgv]{jagam}} model file is generated from \code{formula} and modified to include any latent
+#'temporal processes. Prior distributions for most important model parameters can be altered by the user to inspect model
+#'sensitivities to given priors. Note that latent trends are estimated on the log scale so choose tau, AR and phi priors
+#'accordingly. However more control over the model specification can be accomplished by first using `mvgam` as a
+#'baseline, then editing the returned model accordingly. The model file can be edited and run outside
+#'of `mvgam` by setting \code{run_model = TRUE} and this is encouraged for complex modelling tasks
+#'\cr
+#'\cr
+#'*Random effects*: For any smooth terms using the random effect basis (\code{\link[mcgv]{smooth.construct.re.smooth.spec}}),
+#'a non-centred parameterisation is automatically employed to avoid degeneracies that are common in hierarchical models.
+#'Note however that centred versions may perform better for series that are particularly informative, so as with any
+#'foray into Bayesian modelling, it is worth building an understanding of the model's assumptions and limitations by following a
+#'principled workflow.
+#'\cr
+#'\cr
+#'*Overdispersion parameters*: When more than one series is included in \code{data_train} and an overdispersed
+#'exponential family is used, by default the overdispersion parameters (`r` for Poisson, `twdis` for Tweedie) are
+#'estimated independently for each series. Note that for Tweedie
+#'models, estimating the power parameter `p` alongside the overdispersion parameter
+#'`twdis` and the smooth coefficients is very challenging for noisy data, introducing some difficult posterior geometries.
+#'The `p` parameter is therefore fixed at `1.5` (i.e. a so-called Geometric Poisson model).
+#'\cr
+#'\cr
+#'*Factor regularisation*: When using a dynamic factor model for the trends, factor precisions are given
+#'regularized penalty priors to theoretically allow some factors to be dropped from the model by squeezing increasing
+#'factors' variances to zero. This is done to help protect against selecting too many latent factors than are needed to
+#'capture dependencies in the data, so it can often be advantageous to set `n_lv`` to a slightly larger number. However
+#'larger numbers of factors do come with additional computational costs so these should be balanced as well.
+#'\cr
+#'\cr
+#'*Residuals*: For each series, randomized quantile (i.e. Dunn-Smyth) residuals are calculated for inspecting model diagnostics
+#'If the fitted model is appropriate then Dunn-Smyth residuals will be standard normal in distribution and no
+#'autocorrelation will be evident. When a particular observation is missing, the residual is calculated by comparing independent
+#'draws from the model's posterior distribution
+#'
+#'@author Nicholas J Clark
+#'
+#'@seealso \code{\link[rjags]{jags.model}}, \code{\link[mcgv]{jagam}}, \code{\link[mcgv]{gam}}
+#'@return A \code{list} object of class \code{mvgam} containing model output, the text representation of the model file,
+#'the mgcv model output (for easily generating simulations at
+#'unsampled covariate values), Dunn-Smyth residuals for each series and key information needed
+#'for other functions in the package
+#'
+#'@export
+
+mvgam = function(formula,
+                   knots,
+                   data_train,
+                   data_test,
+                   run_model = TRUE,
+                   prior_simulation = FALSE,
+                   return_model_data = FALSE,
+                   family = 'poisson',
+                   use_lv = FALSE,
+                   n_lv,
+                   trend_model = 'RW',
+                   drift = FALSE,
+                   chains = 2,
+                   burnin = 5000,
+                   n_samples = 2000,
+                   thin = 4,
+                   parallel = TRUE,
+                   phi_prior,
+                   ar_prior,
+                   r_prior,
+                   twdis_prior,
+                   sigma_prior,
+                   upper_bounds,
+                   use_stan = FALSE,
+                   jags_path){
+
+  # Check arguments
+  trend_model <- match.arg(arg = trend_model, choices = c("None", "RW", "AR1", "AR2", "AR3", "GP"))
+  family <- match.arg(arg = family, choices = c("nb", "poisson", "tw"))
+
+  if(chains == 1){
+    stop('number of chains must be >1')
+  }
+  if(sign(chains) != 1){
+    stop('argument "chains" must be a positive integer',
+         call. = FALSE)
+  } else {
+    if(chains%%1 != 0){
+      stop('argument "chains" must be a positive integer',
+           call. = FALSE)
+    }
+  }
+
+  if(sign(burnin) != 1){
+    stop('argument "burnin" must be a positive integer',
+         call. = FALSE)
+  } else {
+    if(burnin%%1 != 0){
+      stop('argument "burnin" must be a positive integer',
+           call. = FALSE)
+    }
+  }
+
+  # JAGS cannot support latent GP trends as there is no easy way to use Hilbert base
+  # approximation to reduce the computational demands
+  if(!use_stan & trend_model == 'GP'){
+    if(!require(rstan)){
+      stop('gaussian process trends not yet supported for JAGS and cannot find rstan library')
+    } else {
+      warning('gaussian process trends not yet supported for JAGS; reverting to stan')
+      use_stan <- TRUE
+    }
+  }
+
+  # If Stan is to be used, make sure it is installed
+  if(use_stan){
+    if(!require(rstan)){
+      warning('rstan library not installed; reverting to JAGS')
+      use_stan <- FALSE
+    }
+  }
+
+  # Stan can only handle certain options in the development phase
+  if(use_stan & use_lv){
+    warning('dynamic factor trends not yet supported for stan; reverting to JAGS')
+    use_stan <- FALSE
+  }
+
+  if(use_stan & family == 'tw'){
+    warning('Tweedie family not yet supported for stan; reverting to JAGS')
+    use_stan <- FALSE
+  }
+
+  # If the model is to be run in JAGS, make sure the JAGS software can be located
+  if(!use_stan){
+    if(run_model){
+      if(missing(jags_path)){
+        jags_path <- runjags::findjags()
+      }
+
+      # Code borrowed from the runjags package
+      jags_status <- runjags::testjags(jags_path, silent = TRUE)
+      if(!jags_status$JAGS.available){
+        if(jags_status$os=="windows"){
+          # Try it again - sometimes this helps
+          Sys.sleep(0.2)
+          jags_status <- runjags::testjags(jags_path, silent = TRUE)
+        }
+
+        if(!jags_status$JAGS.available){
+          cat("Unable to call JAGS using '", jags_path,
+              "'\nTry specifying the path to the JAGS binary as the jags_path argument, or installing the rjags package.\nUse the runjags::testjags() function for more detailed diagnostics.\n", sep="")
+          stop("Unable to call JAGS", call. = FALSE)
+        }
+      }
+    }
+  }
+
+  # Add series factor variable if missing
+  if(class(data_train)[1] != 'list'){
+  if(!'series' %in% colnames(data_train)){
+    data_train$series <- factor('series1')
+    if(!missing(data_test)){
+      data_test$series <- factor('series1')
+    }
+  }
+
+  # Must be able to index by time; it is too dangerous to 'guess' as this could make a huge
+    # impact on resulting estimates / inferences
+  if(!'time' %in% colnames(data_train)){
+    stop('data_train does not contain a "time" column')
+  }
+    }
+
+  if(class(data_train)[1] == 'list'){
+    if(!'series' %in% names(data_train)){
+      data_train$series <- factor('series1')
+      if(!missing(data_test)){
+        data_test$series <- factor('series1')
+      }
+    }
+
+    if(!'time' %in% names(data_train)){
+      stop('data_train does not contain a "time" column')
+    }
+  }
+
+  # Number of latent variables cannot be greater than number of series
+  if(use_lv){
+    if(missing(n_lv)){
+      n_lv <- min(2, floor(length(unique(data_train$series)) / 2))
+    }
+    if(n_lv > length(unique(data_train$series))){
+      stop('number of latent variables cannot be greater than number of series')
+    }
+  }
+
+  # No point in latent variables if trend model is None
+  if(trend_model == 'None' & use_lv){
+    use_lv <- FALSE
+    warning('No point in latent variables if trend model is None; changing use_lv to FALSE')
+  }
+
+  # Ensure outcome is labelled 'y'
+  form_terms <- terms(formula(formula))
+  if(dimnames(attr(form_terms, 'factors'))[[1]][1] != 'y'){
+    stop('Outcome variable must be named "y"')
+  }
+
+  # If there are missing values in y, use predictions from an initial mgcv model to fill
+  # these in so that initial values to maintain the true size of the training dataset
+  orig_y <- data_train$y
+
+    if(!missing(knots)){
+
+      # Estimate the GAM model using mgcv so that the linear predictor matrix can be easily calculated
+      # when simulating from the JAGS model later on
+      if(family == 'nb'){
+        ss_gam <- mgcv::gam(formula(formula),
+                            data = data_train,
+                            method = "REML",
+                            family = nb(),
+                            drop.unused.levels = FALSE,
+                            knots = knots,
+                            control = list(nthreads = parallel::detectCores()-1,
+                                           maxit = 100))
+      } else if(family == 'poisson'){
+        ss_gam <- mgcv::gam(formula(formula),
+                            data = data_train,
+                            method = "REML",
+                            family = poisson(),
+                            drop.unused.levels = FALSE,
+                            knots = knots,
+                            control = list(nthreads = parallel::detectCores()-1,
+                                           maxit = 100))
+      } else {
+        ss_gam <- mgcv::gam(formula(formula),
+                            data = data_train,
+                            method = "REML",
+                            family = tw(),
+                            drop.unused.levels = FALSE,
+                            knots = knots,
+                            control = list(nthreads = parallel::detectCores()-1,
+                                           maxit = 100))
+      }
+
+    } else {
+      if(family == 'nb'){
+        ss_gam <- mgcv::gam(formula(formula),
+                            data = data_train,
+                            method = "REML",
+                            family = nb(),
+                            drop.unused.levels = FALSE,
+                            control = list(nthreads = parallel::detectCores()-1))
+      } else if(family == 'poisson'){
+        ss_gam <- mgcv::gam(formula(formula),
+                            data = data_train,
+                            method = "REML",
+                            family = poisson(),
+                            drop.unused.levels = FALSE,
+                            control = list(nthreads = parallel::detectCores()-1))
+      } else {
+        ss_gam <- mgcv::gam(formula(formula),
+                            data = data_train,
+                            method = "REML",
+                            family = tw(),
+                            drop.unused.levels = FALSE,
+                            control = list(nthreads = parallel::detectCores()-1))
+      }
+
+    }
+
+  # Fill in missing observations in data_train so the size of the dataset is correct when
+  # building the JAGS model
+  data_train$y[which(is.na(data_train$y))] <- round(predict(ss_gam,
+                                                              newdata = data_train,
+                                                              type = 'response'), 0)[which(is.na(data_train$y))]
+
+
+  # Make jags file and appropriate data structures; note this has to use Poisson but the
+  # resulting JAGS file will be modified to accomodate the specified response distribution accordingly
+  if(!missing(knots)){
+    ss_jagam <- mgcv::jagam(formula,
+                            data = data_train,
+                            family = poisson(),
+                            drop.unused.levels = FALSE,
+                            file = 'base_gam.txt',
+                            sp.prior = 'gamma',
+                            diagonalize = F,
+                            knots = knots)
+  } else {
+    ss_jagam <- mgcv::jagam(formula,
+                            data = data_train,
+                            family = poisson(),
+                            drop.unused.levels = FALSE,
+                            file = 'base_gam.txt',
+                            sp.prior = 'gamma',
+                            diagonalize = F)
+  }
+
+  # Update initial values of lambdas using the full estimates from the
+  # fitted bam model to speed convergence; remove initial betas so that the
+  # chains can start in very different regions of the parameter space
+  ss_jagam$jags.ini$b <- NULL
+
+  if(length(ss_gam$sp) == length(ss_jagam$jags.ini$lambda)){
+    ss_jagam$jags.ini$lambda <- ss_gam$sp
+    ss_jagam$jags.ini$lambda[log(ss_jagam$jags.ini$lambda) > 10] <- exp(10)
+  }
+
+  # Fill y with NAs if this is a simulation from the priors
+  if(prior_simulation){
+    data_train$y <- rep(NA, length(data_train$y))
+  } else {
+    data_train$y <- orig_y
+  }
+
+  # Read in the base (unmodified) jags model file
+  base_model <- suppressWarnings(readLines('base_gam.txt'))
+
+  # Remove lines from the linear predictor section
+  lines_remove <- c(1:grep('## response', base_model))
+  base_model <- base_model[-lines_remove]
+
+  # Any parametric effects in the gam (particularly the intercept) need to get more sensible priors to ensure they
+  # do not directly compete with the latent trends
+  if(any(grepl('Parametric effect priors', base_model))){
+
+    in_parenth <- regmatches(base_model[grep('Parametric effect priors',
+                               base_model) + 1],
+               gregexpr( "(?<=\\().+?(?=\\))", base_model[grep('Parametric effect priors',
+                                                               base_model) + 1], perl = T))[[1]][1]
+    n_terms <- as.numeric(sub(".*:", "", in_parenth))
+    ss_jagam$jags.data$p_coefs <- coef(ss_gam)[1:n_terms]
+
+    rmvn <- function(n,mu,sig) {
+      L <- mroot(sig);m <- ncol(L);
+      t(mu + L%*%matrix(rnorm(m*n),m,n))
+    }
+
+    beta_sims <- rmvn(1000, coef(ss_gam), ss_gam$Vp)
+    ss_jagam$jags.data$p_taus <- apply(as.matrix(beta_sims[,1:n_terms]),
+                                       2, function(x) 1 / sd(x))
+
+    base_model[grep('Parametric effect priors',
+                      base_model) + 1] <- paste0('  for (i in 1:',
+                                                 n_terms,
+                                                 ') { b[i] ~ dnorm(p_coefs[i], p_taus[i]) }')
+    base_model[grep('Parametric effect priors',
+                    base_model)] <- c('  ## parametric effect priors (regularised for identifiability)')
+  }
+
+  # For any random effect smooths, use the non-centred parameterisation to avoid degeneracies
+  smooth_labs <- do.call(rbind, lapply(seq_along(ss_gam$smooth), function(x){
+    data.frame(label = ss_gam$smooth[[x]]$label, class = class(ss_gam$smooth[[x]])[1])
+  }))
+
+  if(any(smooth_labs$class == 'random.effect')){
+    re_smooths <- smooth_labs %>%
+      dplyr::filter(class == 'random.effect') %>%
+      dplyr::pull(label)
+
+    for(i in 1:length(re_smooths)){
+      in_parenth <- regmatches(base_model[grep(re_smooths[i],
+                                               base_model, fixed = T) + 1],
+                               gregexpr( "(?<=\\().+?(?=\\))", base_model[grep(re_smooths[i],
+                                                                               base_model, fixed = T) + 1],
+                                         perl = T))[[1]][1]
+      n_terms <- as.numeric(sub(".*:", "", in_parenth))
+      n_start <- as.numeric(strsplit(sub(".*\\(", "", in_parenth), ':')[[1]][1])
+      base_model[grep(re_smooths[i],
+                      base_model, fixed = T) + 1] <- paste0('  for (i in ', n_start, ':',
+                                                            n_terms,
+                                                            ') {\n   b_raw[i] ~ dnorm(0, 1)\n',
+                                                            'b[i] <- ',
+                                                            paste0('mu_raw', i), ' + b_raw[i] * ',
+                                                            paste0('sigma_raw', i), '\n  }\n  ',
+                                                            paste0('sigma_raw', i), ' ~ dexp(1)\n',
+                                                            paste0('mu_raw', i), ' ~ dnorm(0, 1)')
+      base_model[grep(re_smooths[i],
+                      base_model, fixed = T)] <- paste0('  ## prior (non-centred) for ', re_smooths[i], '...')
+    }
+
+  }
+
+  base_model[grep('smoothing parameter priors',
+                  base_model)] <- c('   ## smoothing parameter priors...')
+
+  # Add replacement lines for priors, trends and the linear predictor
+  fil <- tempfile(fileext = ".xt")
+  modification <- add_base_dgam_lines(use_lv)
+  cat(c(readLines(textConnection(modification)), base_model), file = fil,
+      sep = "\n")
+  model_file <- trimws(readLines(fil, n = -1))
+
+  # Update prior distributions
+  if(!missing(phi_prior)){
+    model_file[grep('phi\\[s\\] ~', model_file)] <- paste0(' phi[s] ~ ', phi_prior)
+  }
+
+  if(!missing(ar_prior)){
+    model_file[grep('ar1\\[s\\] ~', model_file)] <- paste0(' ar1[s] ~ ', ar_prior)
+    model_file[grep('ar2\\[s\\] ~', model_file)] <- paste0(' ar2[s] ~ ', ar_prior)
+    model_file[grep('ar3\\[s\\] ~', model_file)] <- paste0(' ar3[s] ~ ', ar_prior)
+  }
+
+  if(!missing(sigma_prior)){
+    model_file[grep('sigma\\[s\\] ~', model_file)] <- paste0(' sigma[s] ~ ', sigma_prior)
+  }
+
+  # Modify observation distribution lines
+  if(family == 'tw'){
+    model_file <- add_tweedie_lines(model_file, upper_bounds = upper_bounds,
+                                    twdis_prior = twdis_prior)
+
+  } else if(family == 'poisson'){
+    model_file <- add_poisson_lines(model_file, upper_bounds = upper_bounds)
+
+  } else {
+    if(missing(upper_bounds)){
+      model_file[grep('y\\[i, s\\] ~', model_file)] <- '  y[i, s] ~ dnegbin(rate[i, s], r[s])'
+      model_file[grep('ypred\\[i, s\\] ~', model_file)] <- '  ypred[i, s] ~ dnegbin(rate[i, s], r[s])'
+    }
+
+    if(!missing(r_prior)){
+      model_file[grep('r_raw\\[s\\] ~', model_file)] <- paste0('r_raw[s] ~ ', r_prior)
+    }
+  }
+
+  # Modify lines needed for the specified trend model
+  model_file <- add_trend_lines(model_file, stan = F,
+                                use_lv = use_lv,
+                                trend_model = trend_model,
+                                drift = drift)
+
+  # Use informative priors based on the fitted mgcv model to speed convergence
+  # and eliminate searching over strange parameter spaces
+  if(length(ss_gam$sp) == length(ss_jagam$jags.ini$lambda)){
+    model_file[grep('lambda\\[i\\] ~', model_file)] <- '   lambda[i] ~ dexp(1/sp[i])T(0.0001, )'
+  } else {
+    model_file[grep('lambda\\[i\\] ~', model_file)] <- '   lambda[i] ~ dexp(0.05)T(0.0001, )'
+  }
+
+  # Final tidying of the JAGS model for readability
+  clean_up <- vector()
+  for(x in 1:length(model_file)){
+    clean_up[x] <- model_file[x-1] == "" & model_file[x] == ""
+  }
+  clean_up[is.na(clean_up)] <- FALSE
+  model_file <- model_file[!clean_up]
+
+  model_file_jags <- textConnection(model_file)
+
+  # Covariate dataframe including training and testing observations
+  if(!missing(data_test)){
+    X <- data.frame(rbind(ss_jagam$jags.data$X,
+                          predict(ss_gam, newdata = data_test, type = 'lpmatrix')))
+
+    # Add a time variable
+    if(class(data_train)[1] == 'list'){
+      temp_dat_train <- data.frame(time = data_train$time,
+                                   series = data_train$series)
+      temp_dat_test <- data.frame(time = data_test$time,
+                                  series = data_test$series)
+
+      X$time <- rbind(temp_dat_train, temp_dat_test) %>%
+        dplyr::left_join(rbind(temp_dat_train, temp_dat_test) %>%
+                           dplyr::select(time) %>%
+                           dplyr::distinct() %>%
+                           dplyr::arrange(time) %>%
+                           dplyr::mutate(time = dplyr::row_number()),
+                         by = c('time')) %>%
+        dplyr::pull(time)
+
+      # Add a series identifier variable
+      X$series <- as.numeric(rbind(temp_dat_train, temp_dat_test)$series)
+
+      # Add an outcome variable
+      X$outcome <- c(orig_y, rep(NA, NROW(temp_dat_test)))
+
+    } else {
+
+    X$time <- rbind(data_train, data_test[,1:ncol(data_train)]) %>%
+      dplyr::left_join(rbind(data_train, data_test[,1:ncol(data_train)]) %>%
+                         dplyr::select(time) %>%
+                         dplyr::distinct() %>%
+                         dplyr::arrange(time) %>%
+                         dplyr::mutate(time = dplyr::row_number()),
+                       by = c('time')) %>%
+      dplyr::pull(time)
+
+    # Add a series identifier variable
+    X$series <- as.numeric(rbind(data_train, data_test)$series)
+
+    # Add an outcome variable
+    X$outcome <- c(data_train$y, rep(NA, NROW(data_test)))
+    }
+
+  } else {
+    X <- data.frame(ss_jagam$jags.data$X)
+
+    if(class(data_train)[1] == 'list'){
+      temp_dat <- data.frame(time = data_train$time)
+      X$time <- temp_dat %>%
+        dplyr::left_join(temp_dat %>%
+                           dplyr::select(time) %>%
+                           dplyr::distinct() %>%
+                           dplyr::arrange(time) %>%
+                           dplyr::mutate(time = dplyr::row_number()),
+                         by = c('r')) %>%
+        dplyr::pull(time)
+    } else {
+      X$time <- data_train %>%
+        dplyr::left_join(data_train %>%
+                           dplyr::select(time) %>%
+                           dplyr::distinct() %>%
+                           dplyr::arrange(time) %>%
+                           dplyr::mutate(time = dplyr::row_number()),
+                         by = c('time')) %>%
+        dplyr::pull(time)
+    }
+
+    X$outcome <- c(data_train$y)
+    X$series <- as.numeric(data_train$series)
+  }
+
+  # Arrange by time then by series
+  X %>%
+    dplyr::arrange(time, series) -> X
+
+  # Matrix of indices in X that correspond to timepoints for each series
+  ytimes <- matrix(NA, nrow = length(unique(X$time)), ncol = length(unique(X$series)))
+  for(i in 1:length(unique(X$series))){
+    ytimes[,i] <- which(X$series == i)
+  }
+  ss_jagam$jags.data$ytimes <- ytimes
+
+  # Matrix of outcomes in X that correspond to each series at each timepoint
+  ys_mat <- matrix(NA, nrow = NROW(ytimes), ncol = NCOL(ytimes))
+  for(i in 1:length(unique(X$series))){
+    ys_mat[,i] <- X$outcome[which(X$series == i)]
+  }
+  ss_jagam$jags.data$y <- ys_mat
+
+  # Other necessary variables for JAGS
+  ss_jagam$jags.data$n <- NROW(ytimes)
+  ss_jagam$jags.data$n_series <- NCOL(ytimes)
+  ss_jagam$jags.data$X <- as.matrix(X %>%
+                                     dplyr::select(-time, -series, -outcome))
+  if(!missing(upper_bounds)){
+    ss_jagam$jags.data$upper_bound <- upper_bounds
+  }
+
+  if(length(ss_gam$sp) == length(ss_jagam$jags.ini$lambda)){
+    ss_jagam$jags.data$sp <- ss_gam$sp
+  }
+
+  # Machine epsilon for minimum allowable non-zero rate
+  if(family == 'nb'){
+    ss_jagam$jags.data$min_eps <- .Machine$double.eps
+  }
+
+  # Number of latent variables to use
+  if(use_lv){
+      if(missing(n_lv)){
+        ss_jagam$jags.data$n_lv <- min(2, floor(ss_jagam$jags.data$n_series / 2))
+      } else {
+        ss_jagam$jags.data$n_lv <- n_lv
+        ss_jagam$jags.ini$X1 <- rep(1, n_lv)
+        ss_jagam$jags.ini$X2 <- 1
+      }
+      if(ss_jagam$jags.data$n_lv > ss_jagam$jags.data$n_series){
+        stop('Number of latent variables cannot be greater than number of series')
+      }
+  }
+
+  if(missing(upper_bounds)){
+    upper_bounds <- NULL
+  }
+
+  if(use_lv){
+    n_lv <- ss_jagam$jags.data$n_lv
+  } else {
+    n_lv <- NULL
+  }
+
+  # Add information about the call and necessary data structures to the model file
+  # Get dimensions and numbers of smooth terms
+  snames <- names(ss_jagam$jags.data)[grep('S.*', names(ss_jagam$jags.data))]
+  smooth_dims <- matrix(NA, ncol = 2, nrow = length(snames))
+  for(i in 1:length(snames)){
+    smooth_dims[i,] <- dim(ss_jagam$jags.data[[snames[i]]])
+  }
+  smooth_penalty_data <- vector()
+  for(i in 1:length(snames)){
+    smooth_penalty_data[i] <- paste0('matrix ',
+                                     snames[i],
+                                     ';  mgcv smooth penalty matrix ', snames[i])
+  }
+
+  if('sp' %in% names(ss_jagam$jags.data)){
+    if(length(ss_jagam$jags.data$sp) == 1){
+      sp_data <- c(paste0('real sp;  inverse exponential location prior for smoothing parameter ',
+                        paste0(names(ss_jagam$jags.data$sp))),
+                   '___________values ranging 5 - 50 are a good start')
+    } else {
+      sp_data <- c(paste0('vector sp;  inverse exponential location priors for smoothing parameters: ',
+                        paste0(names(ss_jagam$jags.data$sp), collapse = '; ')),
+                   '___________values ranging 5 - 50 are a good start')
+    }
+
+  } else {
+    sp_data <- NULL
+  }
+
+  if('p_coefs' %in% names(ss_jagam$jags.data)){
+    parametric_ldata <- paste0('vector p_coefs;  vector (length = ',
+                               length(ss_jagam$jags.data$p_coefs),
+                               ') of prior Gaussian means for parametric effects')
+  } else {
+    parametric_ldata <- NULL
+  }
+
+  if('p_taus' %in% names(ss_jagam$jags.data)){
+    parametric_tdata <- paste0('vector p_taus;  vector (length = ',
+                               length(ss_jagam$jags.data$p_coefs),
+                               ') of prior Gaussian precisions for parametric effects')
+  } else {
+    parametric_tdata <- NULL
+  }
+
+  model_file <- c('JAGS code generated by package mvgam',
+                       '\n',
+                       'GAM formula:',
+                  gsub('\"', '', paste(formula[2],formula[3],sep=' ~ ')),
+                       '\n',
+                  'Trend model:',
+                  trend_model,
+                  '\n',
+                  'Required data:',
+                  'integer n;  number of timepoints per series\n',
+                  'integer n_series;  number of series\n',
+                  'matrix y;  time-ordered observations of dimension n x n_series (missing values allowed)\n',
+                  'matrix ytimes;  time-ordered n x n_series matrix (which row in X belongs to each [time, series] observation?)\n',
+                  'matrix X;  mgcv GAM design matrix of dimension (n x n_series) x basis dimension\n',
+                  paste0(smooth_penalty_data),
+                  'vector zero; prior basis coefficient locations vector of length ncol(X)\n',
+                  paste0(parametric_ldata),
+                  paste0(parametric_tdata),
+                  sp_data,
+                  '\n',
+                  model_file)
+
+  if(!run_model){
+    # Return only the model file and all data / inits needed to run the model
+    # outside of mvgam
+    unlink('base_gam.txt')
+    if(use_stan){
+      # Import the base Stan model file
+      modification <- add_base_dgam_lines(stan = TRUE)
+      unlink('base_gam_stan.txt')
+      cat(modification, file = 'base_gam_stan.txt', sep = '\n', append = T)
+      base_stan_model <- trimws(suppressWarnings(readLines('base_gam_stan.txt')))
+      unlink('base_gam_stan.txt')
+
+      # Add necessary trend structure
+      base_stan_model <- add_trend_lines(model_file = base_stan_model,
+                                         stan = T,
+                                         trend_model = trend_model,
+                                         drift = drift)
+
+      # Add remaining data, model and parameters blocks to the Stan model file;
+      # gather Stan data structure
+      stan_objects <- add_stan_data(jags_file = trimws(model_file),
+                                    stan_file = base_stan_model,
+                                    jags_data = ss_jagam$jags.data,
+                                    family = family)
+
+      output <- structure(list(call = formula,
+                               family = dplyr::case_when(family == 'tw' ~ 'Tweedie',
+                                                         family == 'poisson' ~ 'Poisson',
+                                                         TRUE ~ 'Negative Binomial'),
+                               trend_model = trend_model,
+                               drift = drift,
+                               pregam = ss_jagam$pregam,
+                               model_file = stan_objects$stan_file,
+                               model_data = stan_objects$model_data,
+                               mgcv_model = ss_gam,
+                               sp_names = names(ss_jagam$pregam$lsp0),
+                               ytimes = ytimes,
+                               use_lv = use_lv,
+                               n_lv = n_lv,
+                               upper_bounds = upper_bounds,
+                               obs_data = data_train,
+                               fit_engine = 'stan'),
+                          class = 'mvgam_prefit')
+    } else {
+      output <- structure(list(call = formula,
+                               family = dplyr::case_when(family == 'tw' ~ 'Tweedie',
+                                                         family == 'poisson' ~ 'Poisson',
+                                                         TRUE ~ 'Negative Binomial'),
+                               trend_model = trend_model,
+                               drift = drift,
+                               pregam = ss_jagam$pregam,
+                               model_file = trimws(model_file),
+                               model_data = ss_jagam$jags.data,
+                               mgcv_model = ss_gam,
+                               sp_names = names(ss_jagam$pregam$lsp0),
+                               ytimes = ytimes,
+                               use_lv = use_lv,
+                               n_lv = n_lv,
+                               upper_bounds = upper_bounds,
+                               obs_data = data_train,
+                               fit_engine = 'jags'),
+                          class = 'mvgam_prefit')
+    }
+
+  } else {
+
+    if(use_stan){
+      fit_engine <- 'stan'
+      # Import the base Stan model file
+      modification <- add_base_dgam_lines(stan = TRUE)
+      unlink('base_gam_stan.txt')
+      cat(modification, file = 'base_gam_stan.txt', sep = '\n', append = T)
+      base_stan_model <- trimws(suppressWarnings(readLines('base_gam_stan.txt')))
+      unlink('base_gam_stan.txt')
+
+      # Add necessary trend structure
+      base_stan_model <- add_trend_lines(model_file = base_stan_model,
+                                         stan = T,
+                                         trend_model = trend_model,
+                                         drift = drift)
+
+      # Add remaining data, model and parameters blocks to the Stan model file;
+      # gather Stan data structure
+      stan_objects <- add_stan_data(jags_file = trimws(model_file),
+                                    stan_file = base_stan_model,
+                                    jags_data = ss_jagam$jags.data,
+                                    family = family)
+      model_data <- stan_objects$model_data
+
+      # Should never call library in a function, but just doing this for now while
+      # developing stan functionality!!
+      require(rstan)
+      options(mc.cores = parallel::detectCores())
+
+      # Sensible inits needed for the betas
+      if(trend_model != 'None'){
+        initf1 <- function() {
+          list(b_raw = runif(stan_objects$model_data$num_basis, -0.25, 0.25))
+        }
+      } else {
+        initf1 <- function() {
+          list(b_raw = runif(stan_objects$model_data$num_basis, -0.25, 0.25),
+               sigma = runif(stan_objects$model_data$n_series, 0.075, 1))
+        }
+      }
+      inits <- initf1
+
+      # Fit the model in stan
+      if(trend_model == 'GP'){
+        stan_control <- list(max_treedepth = 12)
+      } else {
+        stan_control <- list(max_treedepth = 10)
+      }
+
+      fit1 <- stan(model_code = stan_objects$stan_file,
+                   iter = burnin + 1000,
+                   chains = chains,
+                   data = stan_objects$model_data,
+                   cores = min(c(chains, parallel::detectCores() - 1)),
+                   init = initf1,
+                   verbose = F,
+                   control = stan_control,
+                   pars = get_monitor_pars(family = family,
+                                           use_lv = use_lv,
+                                           trend_model = trend_model,
+                                           drift = drift),
+                   refresh = 500)
+
+      # Use Michael Betancourt's utility functions for checking diagnostics
+      #source('https://raw.githubusercontent.com/betanalpha/knitr_case_studies/master/factor_modeling/stan_utility.R')
+      check_all_diagnostics(fit1)
+
+      out_gam_mod <- fit1
+    }
+
+    if(!use_stan){
+      fit_engine <- 'jags'
+      model_data <- ss_jagam$jags.data
+
+      # Set monitor parameters and initial values
+      param <- get_monitor_pars(family, use_lv, trend_model, drift)
+
+      initlist <- replicate(chains, ss_jagam$jags.ini,
+                            simplify = FALSE)
+      inits <- initlist
+      runjags::runjags.options(silent.jags = TRUE, silent.runjags = TRUE)
+      n.burn <- burnin
+
+      # Initiate adaptation of the model for the full burnin period. This is necessary as JAGS
+      # will take a while to optimise the samplers, so long adaptation with little 'burnin'
+      # is more crucial than little adaptation but long 'burnin' https://mmeredith.net/blog/2016/Adapt_or_burn.htm
+      unlink('base_gam.txt')
+      cat(model_file, file = 'base_gam.txt', sep = '\n', append = T)
+      if(parallel){
+        cl <- parallel::makePSOCKcluster(min(c(chains, parallel::detectCores() - 1)))
+        setDefaultCluster(cl)
+        gam_mod <- runjags::run.jags(model = 'base_gam.txt',
+                                     data = ss_jagam$jags.data,
+                                     modules = 'glm',
+                                     inits = initlist,
+                                     n.chains = chains,
+                                     # Rely on long adaptation to tune samplers appropriately
+                                     adapt = max(1000, n.burn - 1000),
+                                     burnin = 1000,
+                                     sample = n_samples,
+                                     jags = jags_path,
+                                     thin = thin,
+                                     method = "rjparallel",
+                                     monitor = c(param, 'deviance'),
+                                     silent.jags = TRUE,
+                                     cl = cl)
+        stopCluster(cl)
+      } else {
+        gam_mod <- runjags::run.jags(model = 'base_gam.txt',
+                                     data = ss_jagam$jags.data,
+                                     modules = 'glm',
+                                     inits = initlist,
+                                     n.chains = chains,
+                                     # Rely on long adaptation to tune samplers appropriately
+                                     adapt = max(1000, n.burn - 1000),
+                                     burnin = 1000,
+                                     sample = n_samples,
+                                     jags = jags_path,
+                                     thin = thin,
+                                     method = "rjags",
+                                     monitor = c(param, 'deviance'),
+                                     silent.jags = TRUE)
+      }
+      out_gam_mod <- coda::as.mcmc.list(gam_mod)
+    }
+
+  unlink('base_gam.txt')
+  unlink(fil)
+
+  # Get Dunn-Smyth Residual distributions for each series
+  series_resids <- get_mvgam_resids(object = list(
+    model_output = out_gam_mod,
+    fit_engine = fit_engine,
+    family = dplyr::case_when(family == 'tw' ~ 'Tweedie',
+                              family == 'poisson' ~ 'Poisson',
+                              TRUE ~ 'Negative Binomial'),
+    obs_data = data_train,
+    ytimes = ytimes),
+    n_cores = min(c(parallel::detectCores() - 1, NCOL(ytimes))))
+
+  # Create a jam object and get smooth penalty names in more interpretable format
+  ## Modified sim2jam function; takes simulation output
+  ## and a pregam object from jagam, and attempts to create a fake gam object suitable
+  ## for calculating estimated degrees of freedom
+  create_jam <- function(out_gam_mod, pregam) {
+
+    b <- t(MCMCvis::MCMCchains(out_gam_mod, 'b'))
+    pregam$Vp <- cov(t(b))
+    pregam$coefficients <- rowMeans(b)
+    pregam$sig2 <- 1
+
+    # Compute estimated degrees of freedom
+    eta <- pregam$X %*% pregam$coefficients
+    mu <- pregam$family$linkinv(eta)
+    w <- as.numeric(pregam$w * pregam$family$mu.eta(eta)^2 / pregam$family$variance(mu))
+    XWX <- t(pregam$X) %*% (w*pregam$X)
+
+    pregam$edf <- rowSums(pregam$Vp*t(XWX)) / pregam$sig2
+    class(pregam) <- "jam"
+    pregam
+  }
+
+  jam <- create_jam(out_gam_mod, ss_jagam$pregam)
+  jam$sp <- exp(colMeans(MCMCvis::MCMCchains(out_gam_mod, 'rho')))
+
+  name_starts <- unlist(purrr:::map(jam$smooth, 'first.sp'))
+  name_ends <- unlist(purrr:::map(jam$smooth, 'last.sp'))
+
+  rho_names <- unlist(lapply(seq(1:length(ss_gam$smooth)), function(i){
+
+    number_seq <- seq(1:(1 + name_ends[i] - name_starts[i]))
+    number_seq[1] <- ''
+
+    paste0(rep(ss_gam$smooth[[i]]$label,
+               length(number_seq)),
+           number_seq)
+  }))
+
+  if(use_stan){
+    model_file <- stan_objects$stan_file
+  } else {
+    model_file <- trimws(model_file)
+  }
+
+  if(return_model_data){
+    output <- structure(list(call = formula,
+                             family = dplyr::case_when(family == 'tw' ~ 'Tweedie',
+                                                       family == 'poisson' ~ 'Poisson',
+                                                       TRUE ~ 'Negative Binomial'),
+                             trend_model = trend_model,
+                             drift = drift,
+                             pregam = ss_jagam$pregam,
+                             model_output = out_gam_mod,
+                             model_file = model_file,
+                             sp_names = rho_names,
+                             model_data = model_data,
+                             inits = inits,
+                             mgcv_model = ss_gam,
+                             jam_model = jam,
+                             ytimes = ytimes,
+                             resids = series_resids,
+                             use_lv = use_lv,
+                             n_lv = n_lv,
+                             upper_bounds = upper_bounds,
+                             obs_data = data_train,
+                             fit_engine = fit_engine),
+                        class = 'mvgam')
+  } else {
+    output <- structure(list(call = formula,
+                             family = dplyr::case_when(family == 'tw' ~ 'Tweedie',
+                                                       family == 'poisson' ~ 'Poisson',
+                                                       TRUE ~ 'Negative Binomial'),
+                             trend_model = trend_model,
+                             drift = drift,
+                             pregam = ss_jagam$pregam,
+                             model_output = out_gam_mod,
+                             model_file = model_file,
+                             sp_names = rho_names,
+                             mgcv_model = ss_gam,
+                             jam_model = jam,
+                             ytimes = ytimes,
+                             resids = series_resids,
+                             use_lv = use_lv,
+                             n_lv = n_lv,
+                             upper_bounds = upper_bounds,
+                             obs_data = data_train,
+                             fit_engine = fit_engine),
+                        class = 'mvgam')
+  }
+  }
+
+  return(output)
+}
diff --git a/R/stan_checks.R b/R/stan_checks.R
new file mode 100644
index 00000000..fef45551
--- /dev/null
+++ b/R/stan_checks.R
@@ -0,0 +1,198 @@
+#' Check transitions that ended with a divergence
+#' @param fit A stanfit object
+#' @param quiet Logical (verbose or not?)
+#' @details Utility function written by Michael Betancourt (https://betanalpha.github.io/)
+#' @noRd
+check_div <- function(fit, quiet=FALSE) {
+  sampler_params <- rstan::get_sampler_params(fit, inc_warmup=FALSE)
+  divergent <- do.call(rbind, sampler_params)[,'divergent__']
+  n = sum(divergent)
+  N = length(divergent)
+
+  if (!quiet) print(sprintf('%s of %s iterations ended with a divergence (%s%%)',
+                            n, N, 100 * n / N))
+  if (n > 0) {
+    if (!quiet) print('  Try running with larger adapt_delta to remove the divergences')
+    if (quiet) return(FALSE)
+  } else {
+    if (quiet) return(TRUE)
+  }
+}
+
+#' Check transitions that ended prematurely due to maximum tree depth limit
+#' @param fit A stanfit object
+#' @param quiet Logical (verbose or not?)
+#' @details Utility function written by Michael Betancourt (https://betanalpha.github.io/)
+#' @noRd
+check_treedepth <- function(fit, max_depth = 10, quiet=FALSE) {
+  sampler_params <- rstan::get_sampler_params(fit, inc_warmup=FALSE)
+  treedepths <- do.call(rbind, sampler_params)[,'treedepth__']
+  n = length(treedepths[sapply(treedepths, function(x) x == max_depth)])
+  N = length(treedepths)
+
+  if (!quiet)
+    print(sprintf('%s of %s iterations saturated the maximum tree depth of %s (%s%%)',
+                  n, N, max_depth, 100 * n / N))
+
+  if (n > 0) {
+    if (!quiet) print('  Run again with max_treedepth set to a larger value to avoid saturation')
+    if (quiet) return(FALSE)
+  } else {
+    if (quiet) return(TRUE)
+  }
+}
+
+#' Check the energy fraction of missing information (E-FMI)
+#' @param fit A stanfit object
+#' @param quiet Logical (verbose or not?)
+#' @details Utility function written by Michael Betancourt (https://betanalpha.github.io/)
+#' @noRd
+check_energy <- function(fit, quiet=FALSE) {
+  sampler_params <- rstan::get_sampler_params(fit, inc_warmup=FALSE)
+  no_warning <- TRUE
+  for (n in 1:length(sampler_params)) {
+    energies = sampler_params[n][[1]][,'energy__']
+    numer = sum(diff(energies)**2) / length(energies)
+    denom = var(energies)
+    if (numer / denom < 0.2) {
+      if (!quiet) print(sprintf('Chain %s: E-FMI = %s', n, numer / denom))
+      no_warning <- FALSE
+    }
+  }
+  if (no_warning) {
+    if (!quiet) print('E-FMI indicated no pathological behavior')
+    if (quiet) return(TRUE)
+  } else {
+    if (!quiet) print('  E-FMI below 0.2 indicates you may need to reparameterize your model')
+    if (quiet) return(FALSE)
+  }
+}
+
+#' Check the effective sample size per iteration
+#' @param fit A stanfit object
+#' @param quiet Logical (verbose or not?)
+#' @details Utility function written by Michael Betancourt (https://betanalpha.github.io/)
+#' @noRd
+check_n_eff <- function(fit, quiet=FALSE) {
+  fit_summary <- rstan::summary(fit, probs = c(0.5))$summary
+  N <- dim(fit_summary)[[1]]
+
+  iter <- dim(rstan:::extract(fit)[[1]])[[1]]
+
+  no_warning <- TRUE
+  for (n in 1:N) {
+    if (is.nan(fit_summary[,'n_eff'][n])) {
+      if (!quiet) print(sprintf('n_eff for parameter %s is NaN!',
+                                rownames(fit_summary)[n]))
+      no_warning <- FALSE
+    } else {
+      ratio <- fit_summary[,'n_eff'][n] / iter
+      if (ratio < 0.001) {
+        if (!quiet) print(sprintf('n_eff / iter for parameter %s is %s!',
+                                  rownames(fit_summary)[n], ratio))
+        no_warning <- FALSE
+      }
+    }
+  }
+  if (no_warning) {
+    if (!quiet) print('n_eff / iter looks reasonable for all parameters')
+    if (quiet) return(TRUE)
+  }
+  else {
+    if (!quiet) print('  n_eff / iter below 0.001 indicates that the effective sample size has likely been overestimated')
+    if (quiet) return(FALSE)
+  }
+}
+
+#' Check the potential scale reduction factors
+#' @param fit A stanfit object
+#' @param quiet Logical (verbose or not?)
+#' @details Utility function written by Michael Betancourt (https://betanalpha.github.io/)
+#' @noRd
+check_rhat <- function(fit, quiet=FALSE) {
+  fit_summary <- rstan::summary(fit, probs = c(0.5))$summary
+  N <- dim(fit_summary)[[1]]
+
+  no_warning <- TRUE
+  for (n in 1:N) {
+    rhat <- fit_summary[,'Rhat'][n]
+    if (rhat > 1.1 || is.infinite(rhat) || is.nan(rhat)) {
+      if (!quiet) print(sprintf('Rhat for parameter %s is %s!',
+                                rownames(fit_summary)[n], rhat))
+      no_warning <- FALSE
+    }
+  }
+  if (no_warning) {
+    if (!quiet) print('Rhat looks reasonable for all parameters')
+    if (quiet) return(TRUE)
+  } else {
+    if (!quiet) print('  Rhat above 1.1 indicates that the chains very likely have not mixed')
+    if (quiet) return(FALSE)
+  }
+}
+
+#' Run all diagnostic checks
+#' @param fit A stanfit object
+#' @param quiet Logical (verbose or not?)
+#' @details Utility function written by Michael Betancourt (https://betanalpha.github.io/)
+#' @noRd
+check_all_diagnostics <- function(fit, quiet=FALSE) {
+  if (!quiet) {
+    check_n_eff(fit)
+    check_rhat(fit)
+    check_div(fit)
+    check_treedepth(fit)
+    check_energy(fit)
+  } else {
+    warning_code <- 0
+
+    if (!check_n_eff(fit, quiet=TRUE))
+      warning_code <- bitwOr(warning_code, bitwShiftL(1, 0))
+    if (!check_rhat(fit, quiet=TRUE))
+      warning_code <- bitwOr(warning_code, bitwShiftL(1, 1))
+    if (!check_div(fit, quiet=TRUE))
+      warning_code <- bitwOr(warning_code, bitwShiftL(1, 2))
+    if (!check_treedepth(fit, quiet=TRUE))
+      warning_code <- bitwOr(warning_code, bitwShiftL(1, 3))
+    if (!check_energy(fit, quiet=TRUE))
+      warning_code <- bitwOr(warning_code, bitwShiftL(1, 4))
+
+    return(warning_code)
+  }
+}
+
+#' Parse warnings
+#' @param warning_code Type of warning code to generate
+#' @details Utility function written by Michael Betancourt (https://betanalpha.github.io/)
+#' @noRd
+parse_warning_code <- function(warning_code) {
+  if (bitwAnd(warning_code, bitwShiftL(1, 0)))
+    print("n_eff / iteration warning")
+  if (bitwAnd(warning_code, bitwShiftL(1, 1)))
+    print("rhat warning")
+  if (bitwAnd(warning_code, bitwShiftL(1, 2)))
+    print("divergence warning")
+  if (bitwAnd(warning_code, bitwShiftL(1, 3)))
+    print("treedepth warning")
+  if (bitwAnd(warning_code, bitwShiftL(1, 4)))
+    print("energy warning")
+}
+
+#' Return parameter arrays separated into divergent and non-divergent transitions
+#' @param fit A stanfit object
+#' @details Utility function written by Michael Betancourt (https://betanalpha.github.io/)
+#' @noRd
+partition_div <- function(fit) {
+  nom_params <- rstan:::extract(fit, permuted=FALSE)
+  n_chains <- dim(nom_params)[2]
+  params <- as.data.frame(do.call(rbind, lapply(1:n_chains, function(n) nom_params[,n,])))
+
+  sampler_params <- get_sampler_params(fit, inc_warmup=FALSE)
+  divergent <- do.call(rbind, sampler_params)[,'divergent__']
+  params$divergent <- divergent
+
+  div_params <- params[params$divergent == 1,]
+  nondiv_params <- params[params$divergent == 0,]
+
+  return(list(div_params, nondiv_params))
+}
diff --git a/man/add_stan_data.Rd b/man/add_stan_data.Rd
new file mode 100644
index 00000000..a881da6d
--- /dev/null
+++ b/man/add_stan_data.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/add_stan_data.R
+\name{add_stan_data}
+\alias{add_stan_data}
+\title{Add remaining data, model and parameter blocks to a Stan model}
+\usage{
+add_stan_data(jags_file, stan_file, jags_data, family = "poisson")
+}
+\arguments{
+\item{jags_file}{Prepared JAGS mvgam model file}
+
+\item{stan_file}{Incomplete Stan model file to be edited}
+
+\item{jags_data}{Prepared mvgam data for JAGS modelling}
+}
+\value{
+A \code{list} containing the updated Stan model and model data
+}
+\description{
+Add remaining data, model and parameter blocks to a Stan model
+}
diff --git a/man/mvgam.Rd b/man/mvgam.Rd
new file mode 100644
index 00000000..9315480f
--- /dev/null
+++ b/man/mvgam.Rd
@@ -0,0 +1,199 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mvgam.R
+\name{mvgam}
+\alias{mvgam}
+\title{Fit a Bayesian dynamic GAM to a univariate or multivariate set of discrete time series}
+\usage{
+mvgam(
+  formula,
+  knots,
+  data_train,
+  data_test,
+  run_model = TRUE,
+  prior_simulation = FALSE,
+  return_model_data = FALSE,
+  family = "poisson",
+  use_lv = FALSE,
+  n_lv,
+  trend_model = "RW",
+  drift = FALSE,
+  chains = 2,
+  burnin = 5000,
+  n_samples = 2000,
+  thin = 4,
+  parallel = TRUE,
+  phi_prior,
+  ar_prior,
+  r_prior,
+  twdis_prior,
+  sigma_prior,
+  upper_bounds,
+  use_stan = FALSE,
+  jags_path
+)
+}
+\arguments{
+\item{formula}{A \code{character} string specifying the GAM formula. These are exactly like the formula
+for a GLM except that smooth terms, s, te, ti and t2, can be added to the right hand side
+to specify that the linear predictor depends on smooth functions of predictors (or linear functionals of these).}
+
+\item{knots}{An optional \code{list} containing user specified knot values to be used for basis construction.
+For most bases the user simply supplies the knots to be used, which must match up with the k value supplied
+(note that the number of knots is not always just k). Different terms can use different numbers of knots,
+unless they share a covariate.}
+
+\item{data_train}{A \code{dataframe} or \code{list} containing the model response variable and covariates
+required by the GAM \code{formula}. Should include columns:
+'y' (the discrete outcomes; \code{NA}s allowed)
+'series' (character or factor index of the series IDs)
+'time' (numeric index of the time point for each observation).
+Any other variables to be included in the linear predictor of \code{formula} must also be present}
+
+\item{data_test}{Optional \code{dataframe} or \code{list} of test data containing at least 'series' and 'time'
+in addition to any other variables included in the linear predictor of \code{formula}. If included, the
+observations in \code{data_test} will be set to \code{NA} when fitting the model so that posterior
+simulations can be obtained}
+
+\item{run_model}{\code{logical}. If \code{FALSE}, the model is not fitted but instead the function will
+return the model file and the data / initial values that are needed to fit the \code{JAGS} model}
+
+\item{prior_simulation}{\code{logical}. If \code{TRUE}, no observations are fed to the model, and instead
+simulations from prior distributions are returned}
+
+\item{return_model_data}{\code{logical}. If \code{TRUE}, the list of data that is needed to fit the
+model is returned, along with the initial values for smooth and AR parameters, once the model is fitted.
+This will be helpful if users wish to modify the model file to add
+other stochastic elements that are not currently avaiable in \code{mvgam}. Default is \code{FALSE} to reduce
+the size of the returned object, unless \code{run_model == FALSE}}
+
+\item{family}{\code{character}. Must be either 'nb' (for Negative Binomial), 'tw' (for Tweedie) or 'poisson'}
+
+\item{use_lv}{\code{logical}. If \code{TRUE}, use dynamic factors to estimate series'
+latent trends in a reduced dimension format. If \code{FALSE}, estimate independent latent trends for each series}
+
+\item{n_lv}{\code{integer} the number of latent dynamic factors to use if \code{use_lv == TRUE}.
+Cannot be \code{>n_series}. Defaults arbitrarily to \code{min(5, floor(n_series / 2))}}
+
+\item{trend_model}{\code{character} specifying the time series dynamics for the latent trend. Options are:
+'None' (no latent trend component; i.e. the GAM component is all that contributes to the linear predictor,
+and the observation process is the only source of error; similarly to what is estimated by \code{\link[mcgv]{gam}}),
+'RW' (random walk with possible drift),
+'AR1' (AR1 model with intercept),
+'AR2' (AR2 model with intercept) or
+'AR3' (AR3 model with intercept) or
+'GP' (Gaussian process with squared exponential kernel; currently under development and
+only available for estimation in stan)}
+
+\item{drift}{\code{logical} estimate a drift parameter in the latent trend components. Useful if the latent
+trend is expected to broadly follow a non-zero slope. Note that if the latent trend is more or less stationary,
+the drift parameter can become unidentifiable, especially if an intercept term is included in the GAM linear
+predictor (which it is by default when calling \code{\link[mcgv]{jagam}}). Therefore this defaults to \code{FALSE}}
+
+\item{chains}{\code{integer} specifying the number of parallel chains for the model}
+
+\item{burnin}{\code{integer} specifying the number of iterations of the Markov chain to run during
+adaptive mode to tune sampling algorithms}
+
+\item{n_samples}{\code{integer} specifying the number of iterations of the Markov chain to run for
+sampling the posterior distribution}
+
+\item{thin}{Thinning interval for monitors}
+
+\item{parallel}{\code{logical} specifying whether multiple cores should be used for
+for generating \code{JAGS} simulations in parallel. If \code{TRUE}, the number of cores to use will be
+\code{min(c(chains, parallel::detectCores() - 1))}}
+
+\item{phi_prior}{\code{character} specifying (in JAGS syntax) the prior distribution for the drift terms/intercepts
+in the latent trends}
+
+\item{ar_prior}{\code{character} specifying (in JAGS syntax) the prior distribution for the AR terms
+in the latent trends}
+
+\item{r_prior}{\code{character} specifying (in JAGS syntax) the prior distribution for the Negative Binomial
+overdispersion parameters. Note that this prior acts on the SQUARE ROOT of \code{r}, which is convenient
+for inducing a complexity-penalising prior model whereby the observation process reduces to a Poisson
+as the sampled parameter approaches \code{0}. Ignored if family is Poisson or Tweedie}
+
+\item{twdis_prior}{\code{character} specifying (in JAGS syntax) the prior distribution for the Tweedie
+overdispersion parameters. Ignored if family is Poisson or Negative Binomial}
+
+\item{sigma_prior}{\code{character} specifying (in JAGS syntax) the prior distributions for the independent gaussian
+variances used for the latent trends (ignored if \code{use_lv == TRUE})}
+
+\item{upper_bounds}{Optional \code{vector} of \code{integer} values specifying upper limits for each series. If supplied,
+this generates a modified likelihood where values above the bound are given a likelihood of zero. Note this modification
+is computationally expensive in \code{JAGS} but can lead to better estimates when true bounds exist. Default is to remove
+truncation entirely (i.e. there is no upper bound for each series)}
+
+\item{use_stan}{Logical. If \code{TRUE} and if \code{rstan} is installed, the model will be compiled and sampled using
+the Hamiltonian Monte Carlo with a call to \code{\link[rstan]{stan}}. Note that this functionality is still in development and
+not all options that are available in \code{JAGS} can be used, including: no option for a Tweedie family and no option for
+dynamic factor trends. However, as \code{rstan} can estimate Hilbert base approximate gaussian processes, which
+are much more computationally tractable than full GPs for time series with \verb{>100} observations, estimation
+in \code{rstan} can support latent GP trends while estimation in \code{JAGS} cannot}
+
+\item{jags_path}{Optional character vector specifying the path to the location of the \code{JAGS} executable (.exe) to use
+for modelling if \code{use_stan == FALSE}. If missing, the path will be recovered from a call to \code{\link[runjags]{findjags}}}
+}
+\value{
+A \code{list} object of class \code{mvgam} containing model output, the text representation of the model file,
+the mgcv model output (for easily generating simulations at
+unsampled covariate values), Dunn-Smyth residuals for each series and key information needed
+for other functions in the package
+}
+\description{
+This function estimates the posterior distribution for Generalised Additive Models (GAMs) that can include
+smooth functions, specified in the GAM formula, as well as latent temporal processes, specified by trend_model.
+There are currently two options for specifying the structures of the trends (either as latent
+dynamic factors to capture trend dependencies among series in a reduced dimension format, or as independent trends)
+}
+\details{
+Dynamic GAMs are useful when we wish to predict future values from time series that show temporal dependence
+but we do not want to rely on extrapolating from a smooth term (which can sometimes lead to unpredictable and unrealistic behaviours).
+In addition, smooths can often try to wiggle excessively to capture any autocorrelation that is present in a time series,
+which exacerbates the problem of forecasting ahead. As GAMs are very naturally viewed through a Bayesian lens, and we often
+must model time series that show complex distributional features and missing data, parameters for \code{mvgam} models are estimated
+in a Bayesian framework using Markov Chain Monte Carlo.
+\cr
+\cr
+\emph{Priors}: A \code{\link[mcgv]{jagam}} model file is generated from \code{formula} and modified to include any latent
+temporal processes. Prior distributions for most important model parameters can be altered by the user to inspect model
+sensitivities to given priors. Note that latent trends are estimated on the log scale so choose tau, AR and phi priors
+accordingly. However more control over the model specification can be accomplished by first using \code{mvgam} as a
+baseline, then editing the returned model accordingly. The model file can be edited and run outside
+of \code{mvgam} by setting \code{run_model = TRUE} and this is encouraged for complex modelling tasks
+\cr
+\cr
+\emph{Random effects}: For any smooth terms using the random effect basis (\code{\link[mcgv]{smooth.construct.re.smooth.spec}}),
+a non-centred parameterisation is automatically employed to avoid degeneracies that are common in hierarchical models.
+Note however that centred versions may perform better for series that are particularly informative, so as with any
+foray into Bayesian modelling, it is worth building an understanding of the model's assumptions and limitations by following a
+principled workflow.
+\cr
+\cr
+\emph{Overdispersion parameters}: When more than one series is included in \code{data_train} and an overdispersed
+exponential family is used, by default the overdispersion parameters (\code{r} for Poisson, \code{twdis} for Tweedie) are
+estimated independently for each series. Note that for Tweedie
+models, estimating the power parameter \code{p} alongside the overdispersion parameter
+\code{twdis} and the smooth coefficients is very challenging for noisy data, introducing some difficult posterior geometries.
+The \code{p} parameter is therefore fixed at \code{1.5} (i.e. a so-called Geometric Poisson model).
+\cr
+\cr
+\emph{Factor regularisation}: When using a dynamic factor model for the trends, factor precisions are given
+regularized penalty priors to theoretically allow some factors to be dropped from the model by squeezing increasing
+factors' variances to zero. This is done to help protect against selecting too many latent factors than are needed to
+capture dependencies in the data, so it can often be advantageous to set `n_lv`` to a slightly larger number. However
+larger numbers of factors do come with additional computational costs so these should be balanced as well.
+\cr
+\cr
+\emph{Residuals}: For each series, randomized quantile (i.e. Dunn-Smyth) residuals are calculated for inspecting model diagnostics
+If the fitted model is appropriate then Dunn-Smyth residuals will be standard normal in distribution and no
+autocorrelation will be evident. When a particular observation is missing, the residual is calculated by comparing independent
+draws from the model's posterior distribution
+}
+\seealso{
+\code{\link[rjags]{jags.model}}, \code{\link[mcgv]{jagam}}, \code{\link[mcgv]{gam}}
+}
+\author{
+Nicholas J Clark
+}