Skip to content

Commit

Permalink
beta binomial additions
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicholas Clark committed Mar 20, 2024
1 parent 79572c2 commit 8839452
Show file tree
Hide file tree
Showing 54 changed files with 627 additions and 242 deletions.
93 changes: 93 additions & 0 deletions R/add_binomial.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,51 @@ add_binomial = function(formula,
trials <- NULL
}

# Update parameters block
if(family_char == 'beta_binomial'){
model_file[grep("vector[num_basis] b_raw;",
model_file, fixed = TRUE)] <-
paste0("vector[num_basis] b_raw;\n",
"vector<lower=0>[n_series] phi;")
model_file <- readLines(textConnection(model_file), n = -1)
}

# Update functions block
if(family_char == 'beta_binomial'){
if(any(grepl('functions {', model_file, fixed = TRUE))){
model_file[grep('functions {', model_file, fixed = TRUE)] <-
paste0("functions {\n",
"vector rep_each(vector x, int K) {\n",
"int N = rows(x);\n",
"vector[N * K] y;\n",
"int pos = 1;\n",
"for (n in 1 : N) {\n",
"for (k in 1 : K) {\n",
"y[pos] = x[n];\n",
"pos += 1;\n",
"}\n",
"}\n",
"return y;\n",
"}")
} else {
model_file[grep('Stan model code', model_file)] <-
paste0('// Stan model code generated by package mvgam\n',
'functions {\n',
"vector rep_each(vector x, int K) {\n",
"int N = rows(x);\n",
"vector[N * K] y;\n",
"int pos = 1;\n",
"for (n in 1 : N) {\n",
"for (k in 1 : K) {\n",
"y[pos] = x[n];\n",
"pos += 1;\n",
"}\n",
"}\n",
"return y;\n",
"}\n}")
}
}

# Update model block
if(family_char == 'binomial'){
if(any(grepl("flat_ys ~ poisson_log_glm(flat_xs,",
Expand All @@ -92,6 +137,38 @@ add_binomial = function(formula,
}
}

if(family_char == 'beta_binomial'){
model_file[grep("model {", model_file, fixed = TRUE)] <-
paste0("model {\n",
"// priors for Beta dispersion parameters\n",
"phi ~ gamma(0.01, 0.01);")
if(any(grepl("flat_ys ~ poisson_log_glm(flat_xs,",
model_file, fixed = TRUE))){
linenum <- grep("flat_ys ~ poisson_log_glm(flat_xs,",
model_file, fixed = TRUE)
model_file[linenum] <-
paste0("vector[n_nonmissing] flat_phis;\n",
"flat_phis = rep_each(phi, n)[obs_ind];\n",
"flat_ys ~ beta_binomial(flat_trials_train, inv_logit(flat_xs * b) .* flat_phis, (1 - inv_logit(flat_xs * b)) .* flat_phis);")

model_file <- model_file[-(linenum + 1)]
model_file <- readLines(textConnection(model_file), n = -1)
}

if(any(grepl("flat_ys ~ poisson_log_glm(append_col(flat_xs, flat_trends),",
model_file, fixed = TRUE))){
linenum <- grep("flat_ys ~ poisson_log_glm(append_col(flat_xs, flat_trends),",
model_file, fixed = TRUE)
model_file[linenum] <-
paste0("vector[n_nonmissing] flat_phis;\n",
"flat_phis = rep_each(phi, n)[obs_ind];\n",
"flat_ys ~ beta_binomial(flat_trials_train, inv_logit(append_col(flat_xs, flat_trends) * b) .* flat_phis, (1 - inv_logit(append_col(flat_xs, flat_trends) * b)) .* flat_phis);")

model_file <- model_file[-(linenum + 1)]
model_file <- readLines(textConnection(model_file), n = -1)
}
}

if(family_char == 'bernoulli'){
if(any(grepl("flat_ys ~ poisson_log_glm(flat_xs,",
model_file, fixed = TRUE))){
Expand All @@ -116,6 +193,22 @@ add_binomial = function(formula,
"ypred[1:n, s] = binomial_rng(flat_trials[ytimes[1:n, s]], inv_logit(mus[1:n, s]));"
}

if(family_char == 'beta_binomial'){
model_file[grep("vector[total_obs] eta;", model_file, fixed = TRUE)] <-
paste0("vector[total_obs] eta;\n",
"matrix[n, n_series] phi_vec;")

model_file[grep("eta = X * b;", model_file, fixed = TRUE)] <-
paste0("eta = X * b;;\n",
"for (s in 1 : n_series) {\n",
"phi_vec[1 : n, s] = rep_vector(phi[s], n);\n",
"}")

model_file[grep("ypred[1:n, s] = poisson_log_rng(mus[1:n, s]);" ,
model_file, fixed = TRUE)] <-
"ypred[1:n, s] = beta_binomial_rng(flat_trials[ytimes[1:n, s]], inv_logit(mus[1:n, s]) .* phi_vec[1:n, s], (1 - inv_logit(mus[1:n, s])) .* phi_vec[1:n, s]);"
}

if(family_char == 'bernoulli'){
model_file[grep("ypred[1:n, s] = poisson_log_rng(mus[1:n, s]);" ,
model_file, fixed = TRUE)] <-
Expand Down
17 changes: 17 additions & 0 deletions R/conditional_effects.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,23 @@ conditional_effects.mvgam = function(x,
type <- match.arg(type, c('response', 'link',
'detection', 'latent_N',
'expected'))

# Can't plot points or rugs with binomial models due to the
# cbind syntax
if(rug){
if(x$family %in% c('binomial', 'beta_binomial')){
rug <- FALSE
warning('Cannot show observation rug for binomial models')
}
}

if(points){
if(x$family %in% c('binomial', 'beta_binomial')){
points <- FALSE
warning('Cannot show observation points for binomial models')
}
}

if(type == 'response'){
if(points){
points <- 0.5
Expand Down
135 changes: 131 additions & 4 deletions R/families.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#' @importFrom stats make.link dgamma pgamma rgamma qnorm plnorm runif pbeta dlnorm dpois
#' @importFrom stats pnorm ppois plogis gaussian poisson Gamma dnbinom rnbinom dnorm dbeta
#' @importFrom stats binomial rbinom pbinom dbinom qbinom qlogis
#' @importFrom brms lognormal student bernoulli rstudent_t qstudent_t dstudent_t pstudent_t
#' @importFrom brms lognormal student bernoulli rstudent_t qstudent_t dstudent_t pstudent_t dbeta_binomial rbeta_binomial pbeta_binomial
#' @param link a specification for the family link function. At present these cannot
#' be changed
#' @details \code{mvgam} currently supports the following standard observation families:
Expand Down Expand Up @@ -192,6 +192,33 @@ student_t = function(link = 'identity'){
#' conf_level = 0.5) +
#' ylab('Observed abundance') +
#' theme_classic()
#'
#' # Example showcasing how cbind() is needed for Binomial observations
#' # Simulate two time series of Binomial trials
#' trials <- sample(c(20:25), 50, replace = TRUE)
#' x <- rnorm(50)
#' detprob1 <- plogis(-0.5 + 0.9*x)
#' detprob2 <- plogis(-0.1 -0.7*x)
#' dat <- rbind(data.frame(y = rbinom(n = 50, size = trials, prob = detprob1),
#' time = 1:50,
#' series = 'series1',
#' x = x,
#' ntrials = trials),
#' data.frame(y = rbinom(n = 50, size = trials, prob = detprob2),
#' time = 1:50,
#' series = 'series2',
#' x = x,
#' ntrials = trials))
#' dat <- dplyr::mutate(dat, series = as.factor(series))
#' dat <- dplyr::arrange(dat, time, series)
#'
#' # Fit a model using the binomial() family; must specify observations
#' # and number of trials in the cbind() wrapper
#' mod <- mvgam(cbind(y, ntrials) ~ series + s(x, by = series),
#' family = binomial(),
#' data = dat)
#' summary(mod)
#'
#' }
#' @export
nmix = function(link = 'log'){
Expand All @@ -209,6 +236,7 @@ family_char_choices = function(){
c('negative binomial',
"poisson",
"binomial",
'beta_binomial',
"bernoulli",
"nmix",
"tweedie",
Expand Down Expand Up @@ -541,7 +569,7 @@ mvgam_predict = function(Xp,
} else if(type == 'variance'){
mu <- plogis(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*%
betas) + attr(Xp, 'model.offset')))
out <- mu * (1 - mu)
out <- as.vector(family_pars$trials) * mu * (1 - mu)
} else {
out <- plogis(((matrix(Xp, ncol = NCOL(Xp)) %*%
betas)) +
Expand All @@ -550,6 +578,45 @@ mvgam_predict = function(Xp,
}
}

# Beta_Binomial observations (requires arguments 'trials' and 'phi')
if(family == 'beta_binomial'){
if(type == 'link'){
out <- as.vector((matrix(Xp, ncol = NCOL(Xp)) %*%
betas) + attr(Xp, 'model.offset'))

if(density){
out <- dbeta_binomial(truth,
mu = plogis(out),
phi = as.vector(family_pars$phi),
size = as.vector(family_pars$trials),
log = TRUE)
}

} else if(type == 'response'){
out <- rbeta_binomial(n = NROW(Xp),
mu = plogis(((matrix(Xp, ncol = NCOL(Xp)) %*%
betas)) +
attr(Xp, 'model.offset')),
phi = as.vector(family_pars$phi),
size = as.vector(family_pars$trials))
} else if(type == 'variance'){
mu <- plogis(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*%
betas) + attr(Xp, 'model.offset')))
# https://en.wikipedia.org/wiki/Beta-binomial_distribution
alpha <- mu * as.vector(family_pars$phi)
beta <- (1 - mu) * as.vector(family_pars$phi)
p <- 1 / (alpha + beta + 1)
n <- as.vector(family_pars$trials)
out <- ((n * p) * (1 - p)) * ((alpha + beta + n) / (alpha + beta + 1))

} else {
out <- plogis(((matrix(Xp, ncol = NCOL(Xp)) %*%
betas)) +
attr(Xp, 'model.offset')) *
as.vector(family_pars$trials)
}
}

# Negative Binomial observations (requires argument 'phi')
if(family == 'negative binomial'){
if(type == 'link'){
Expand Down Expand Up @@ -710,7 +777,7 @@ family_to_mgcvfam = function(family){
mgcv::Tweedie(p = 1.5, link = 'log')
} else if(family$family == 'nmix'){
poisson()
} else if(family$family == 'bernoulli'){
} else if(family$family %in% c('bernoulli', 'beta_binomial')){
binomial()
} else {
family
Expand Down Expand Up @@ -781,6 +848,7 @@ family_par_names = function(family){
}

if(family %in% c('beta',
'beta_binomial',
'negative binomial',
'tweedie')){
out <- c('phi')
Expand Down Expand Up @@ -921,6 +989,21 @@ family_prior_info = function(family, use_stan, data){
)))
}

if(family == 'beta_binomial'){
prior_df <- data.frame(param_name = c('vector<lower=0>[n_series] phi;'),
param_length = length(unique(data$series)),
param_info = c('Beta Binomial precision parameter'),
prior = c('phi ~ gamma(0.01, 0.01);'),
example_change = c(
paste0(
'phi ~ normal(',
round(runif(min = -1, max = 1, n = 1), 2),
', ',
round(runif(min = 0.1, max = 1, n = 1), 2),
');'
)))
}

if(family == 'Gamma'){
prior_df <- data.frame(param_name = c('vector<lower=0>[n_series] shape;'),
param_length = length(unique(data$series)),
Expand Down Expand Up @@ -1045,6 +1128,37 @@ ds_resids_binomial = function(truth, fitted, draw, N){
dsres_out
}

#' @noRd
ds_resids_beta_binomial = function(truth, fitted, draw, N, phi){
na_obs <- is.na(truth)
a_obs <- pbeta_binomial(ifelse(as.vector(truth[!na_obs]) - 1.e-6 > 0,
as.vector(truth[!na_obs]) - 1.e-6,
0),
size = N[!na_obs],
mu = fitted[!na_obs],
phi = phi[!na_obs])
b_obs <- pbeta_binomial(ifelse(as.vector(truth[!na_obs]),
as.vector(truth[!na_obs]),
0),
size = N[!na_obs],
mu = fitted[!na_obs],
phi = phi[!na_obs])
u_obs <- runif(n = length(truth[!na_obs]),
min = pmin(a_obs, b_obs),
max = pmax(a_obs, b_obs))

if(any(is.na(truth))){
u <- vector(length = length(truth))
u[na_obs] <- NaN
u[!na_obs] <- u_obs
} else {
u <- u_obs
}
dsres_out <- qnorm(u)
dsres_out[is.infinite(dsres_out)] <- NaN
dsres_out
}

#' @noRd
ds_resids_nb = function(truth, fitted, draw, size){
na_obs <- is.na(truth)
Expand Down Expand Up @@ -1529,7 +1643,7 @@ dsresids_vec = function(object){
N <- mcmc_chains(object$model_output, 'latent_ypred')[,1:last_train, drop = FALSE]
}

if(family == 'binomial'){
if(family %in% c('binomial', 'beta_binomial')){
p <- plogis(mcmc_chains(object$model_output, 'mus')[,1:last_train,
drop = FALSE])
N <- as.vector(attr(object$mgcv_model, 'trials'))[1:length(truth)]
Expand Down Expand Up @@ -1573,6 +1687,19 @@ dsresids_vec = function(object){
N = as.vector(N_mat)),
nrow = NROW(preds))
}

if(family == 'beta_binomial'){
N_mat <- matrix(rep(N, NROW(preds)),
nrow = NROW(preds),
byrow = TRUE)
resids <- matrix(ds_resids_beta_binomial(truth = as.vector(truth_mat),
fitted = as.vector(p),
draw = 1,
N = as.vector(N_mat),
phi = family_extracts$phi),
nrow = NROW(preds))
}

if(family == 'bernoulli'){
resids <- matrix(ds_resids_binomial(truth = as.vector(truth_mat),
fitted = as.vector(preds),
Expand Down
10 changes: 5 additions & 5 deletions R/forecast.mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ forecast.mvgam = function(object, newdata, data_test,
names(par_extracts) <- names(family_pars)

# Add trial information if this is a Binomial model
if(object$family == 'binomial'){
if(object$family %in% c('binomial', 'beta_binomial')){
trials <- as.vector(matrix(rep(as.vector(attr(object$mgcv_model, 'trials')[,series]),
NROW(mus)),
nrow = NROW(mus),
Expand Down Expand Up @@ -438,7 +438,7 @@ forecast.mvgam = function(object, newdata, data_test,
names(par_extracts) <- names(family_pars)

# Add trial information if this is a Binomial model
if(object$family == 'binomial'){
if(object$family %in% c('binomial', 'beta_binomial')){
trials <- as.vector(matrix(rep(as.vector(attr(object$mgcv_model, 'trials')[,series]),
NROW(mus)),
nrow = NROW(mus),
Expand Down Expand Up @@ -534,7 +534,7 @@ forecast.mvgam = function(object, newdata, data_test,
names(par_extracts) <- names(family_pars)

# Add trial information if this is a Binomial model
if(object$family == 'binomial'){
if(object$family %in% c('binomial', 'beta_binomial')){
trials <- as.vector(matrix(rep(as.vector(attr(object$mgcv_model, 'trials')[1:last_train, series]),
NROW(mus)),
nrow = NROW(mus),
Expand Down Expand Up @@ -915,7 +915,7 @@ forecast_draws = function(object,
family_pars <- extract_family_pars(object = object, newdata = data_test)

# Add trial information if this is a Binomial model
if(object$family == 'binomial'){
if(object$family %in% c('binomial', 'beta_binomial')){
resp_terms <- as.character(terms(formula(object$formula))[[2]])
resp_terms <- resp_terms[-grepl('cbind', resp_terms)]
trial_name <- resp_terms[2]
Expand Down Expand Up @@ -1108,7 +1108,7 @@ forecast_draws = function(object,
names(family_extracts) <- names(family_pars)

# Add trial information if this is a Binomial model
if(family == 'binomial'){
if(family %in% c('binomial', 'beta_binomial')){
family_extracts$trials <- trials[,series]
}

Expand Down
Loading

0 comments on commit 8839452

Please sign in to comment.