From 80d6e7beabdf859e1fd857b6ab341d3cb3133eab Mon Sep 17 00:00:00 2001 From: quantifish Date: Mon, 5 Aug 2024 20:31:21 +1200 Subject: [PATCH] adding all new stuff - more to do --- DESCRIPTION | 4 ++-- NAMESPACE | 5 ----- R/get-coefs.R | 27 ++++++++++------------- R/get-influ.R | 13 +++++------ R/helper-functions.R | 46 --------------------------------------- R/influ.R | 1 - R/influ2.R | 1 - R/plot-cdi.R | 16 ++++++-------- _pkgdown.yml | 4 +--- man/get_marginal.Rd | 3 --- man/influ-package.Rd | 1 - man/influ2.Rd | 1 - man/inv_logit.Rd | 14 ------------ man/logistic.Rd | 24 -------------------- man/logit.Rd | 21 ------------------ man/plot_bayesian_cdi.Rd | 3 --- tests/testthat/test-cdi.R | 7 ++---- vignettes/influ2.Rmd | 42 ++++++++++++++++++----------------- 18 files changed, 50 insertions(+), 183 deletions(-) delete mode 100644 man/inv_logit.Rd delete mode 100644 man/logistic.Rd delete mode 100644 man/logit.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 3a30e9e..c9f2ec2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: influ2 Title: Influence plots -Version: 0.0.0 +Version: 1.0.0 Authors@R: person(given = "Darcy", family = "Webber", @@ -38,4 +38,4 @@ Suggests: bayesplot LazyData: false VignetteBuilder: knitr -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 diff --git a/NAMESPACE b/NAMESPACE index 201d55f..07694bf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,9 +14,6 @@ export(get_marginal) export(get_unstandarsied) export(id_var_type) export(influ_app) -export(inv_logit) -export(logistic) -export(logit) export(plot_bayesian_cdi) export(plot_bayesian_cdi2) export(plot_bubble) @@ -57,11 +54,9 @@ importFrom(stats,coefficients) importFrom(stats,fitted) importFrom(stats,median) importFrom(stats,model.matrix) -importFrom(stats,plogis) importFrom(stats,poly) importFrom(stats,ppoints) importFrom(stats,qgamma) -importFrom(stats,qlogis) importFrom(stats,qnorm) importFrom(stats,qqplot) importFrom(stats,quantile) diff --git a/R/get-coefs.R b/R/get-coefs.R index bcb3bed..1c8b1fc 100644 --- a/R/get-coefs.R +++ b/R/get-coefs.R @@ -3,9 +3,6 @@ #' @param fit An object of class \code{brmsfit}. #' @param var The variable to obtain. #' @return A \code{data.frame}. -#' -#' @author Darcy Webber \email{darcy@quantifish.co.nz} -#' #' @importFrom reshape2 melt #' @importFrom readr parse_number #' @importFrom nlme fixef ranef @@ -124,7 +121,6 @@ get_coefs_raw <- function(fit, var = "area") { #' @param hurdle If a hurdle model then use the hurdle. #' @param transform if the coefficients should be transformed using the link function. #' @return A \code{data.frame}. -#' #' @importFrom reshape2 melt #' @importFrom readr parse_number #' @importFrom nlme fixef ranef @@ -132,15 +128,14 @@ get_coefs_raw <- function(fit, var = "area") { #' @import dplyr #' @export #' -get_coefs <- function(fit, var = "area", +get_coefs <- function(fit, + var = "area", normalise = TRUE, hurdle = FALSE, transform = FALSE) { if (!is.brmsfit(fit)) stop("fit is not an object of class brmsfit.") - type <- id_var_type(fit = fit, xfocus = var, hurdle = hurdle) - is_poly <- FALSE if (any(grepl("\\(1 \\|", var))) { @@ -156,8 +151,7 @@ get_coefs <- function(fit, var = "area", mutate(variable = gsub(".*\\[|\\]", "", variable)) %>% mutate(variable = gsub(",Intercept", "", variable)) if (!str_detect(var, ":")) { - ps <- ps %>% - filter(!str_detect(variable, ":")) + ps <- ps %>% filter(!str_detect(variable, ":")) } } else { # Population-level effects @@ -182,11 +176,8 @@ get_coefs <- function(fit, var = "area", ps <- ps %>% filter(!str_detect(variable, ":")) } - # mutate(variable = paste0(var, gregexpr("[[:digit:]]+", variable))) - # mutate(variable = paste0(var, parse_number(as.character(variable)))) - # unique(ps$variable) - # tail(ps) } + # unique(ps$variable) # Check to see if this is a polynomial if (any(grepl("poly", ps$variable))) { @@ -203,8 +194,7 @@ get_coefs <- function(fit, var = "area", filter(grepl("hu", variable)) %>% mutate(variable = gsub("hu_", "", variable)) } else { - ps <- ps %>% - filter(!grepl("hu", variable)) + ps <- ps %>% filter(!grepl("hu", variable)) } } @@ -232,7 +222,12 @@ get_coefs <- function(fit, var = "area", value = 0) coefs <- rbind(ps0, ps) } else { - coefs <- ps + data <- fit$data + data[,var] <- paste0(var, data[,var]) + ps0 <- data.frame(iteration = 1:max(ps$iteration), + variable = unique(data[,var])[!unique(data[,var]) %in% unique(ps$variable)], + value = 0) + coefs <- rbind(ps0, ps) } # Arrange by vessel coefficient if vessel chosen diff --git a/R/get-influ.R b/R/get-influ.R index bb9a502..c499cdb 100644 --- a/R/get-influ.R +++ b/R/get-influ.R @@ -18,8 +18,7 @@ get_influ2 <- function(fit, if (!is.brmsfit(fit)) stop("fit is not an object of class brmsfit.") # Model data - data <- fit$data %>% - mutate(id = 1:n()) + data <- fit$data %>% mutate(id = 1:n()) y <- names(data)[1] # Identify the type of variable we are dealing with @@ -59,10 +58,8 @@ get_influ2 <- function(fit, sdata <- standata(fit) X <- sdata$Xs Z <- sdata$Zs_1_1 - s_coefs <- coefs %>% - filter(!str_detect(.data$variable, "bs_|sds_")) - coefs <- coefs %>% - filter(str_detect(.data$variable, "bs_")) + s_coefs <- coefs %>% filter(!str_detect(.data$variable, "bs_|sds_")) + coefs <- coefs %>% filter(str_detect(.data$variable, "bs_")) } # Do the matrix multiplication @@ -82,7 +79,7 @@ get_influ2 <- function(fit, } influ_var <- melt(Xbeta) %>% - rename(iteration = .data$Var1, id = .data$Var2) %>% + rename(iteration = Var1, id = Var2) %>% left_join(data, by = "id") influ_rho <- influ_var %>% group_by(.data$iteration) %>% @@ -153,7 +150,7 @@ get_influ <- function(fit, group = c("fishing_year", "area"), hurdle = FALSE) { } influ_var <- melt(Xbeta) %>% - rename(iteration = .data$Var1, id = .data$Var2) %>% + rename(iteration = Var1, id = .data$Var2) %>% left_join(data, by = "id") influ_rho <- influ_var %>% group_by(.data$iteration) %>% diff --git a/R/helper-functions.R b/R/helper-functions.R index 58b6f02..380917a 100644 --- a/R/helper-functions.R +++ b/R/helper-functions.R @@ -73,32 +73,6 @@ get_first_term <- function(fit) { } -#' Logit -#' -#' @inheritParams stats::qlogis -#' @return the density. -#' @importFrom stats qlogis -#' @export -#' -logit <- function(p, location = 0, scale = 1, lower.tail = TRUE, log.p = FALSE) { - qlogis(p = p, location = location, scale = scale, lower.tail = lower.tail, log.p = log.p) -} - - -#' Logistic -#' -#' @param q vector of quantiles. -#' @inheritParams stats::plogis -#' @param log.p logical; if TRUE, probabilities p are given as log(p). -#' @return gives the distribution function. -#' @importFrom stats plogis -#' @export -#' -logistic <- function(q, location = 0, scale = 1, lower.tail = TRUE, log.p = FALSE) { - plogis(q = q, location = location, scale = scale, lower.tail = lower.tail, log.p = log.p) -} - - #' Identify the variable type #' #' @param fit An object of class \code{brmsfit}. @@ -150,23 +124,3 @@ id_var_type <- function(fit, xfocus, hurdle = FALSE) { geo_mean <- function(a) { prod(a)^(1.0 / length(a)) } - - -#' Inverse logit -#' -#' @param a a vector. -#' @export -#' -inv_logit <- function(a) { - exp(a) / (exp(a) + 1.0) -} - - -#' logit -#' -#' @param p a vector. -#' @export -#' -logit <- function(p) { - log(p / (1.0 - p)) -} diff --git a/R/influ.R b/R/influ.R index 9d13991..7ded201 100644 --- a/R/influ.R +++ b/R/influ.R @@ -27,7 +27,6 @@ #' This document provides minimal documentation and the best way to learn how to use this package is through the vignette: #' "An example of how to use the 'influ' package" #' -#' @docType package #' @name influ-package #' @aliases influ #' @title Influence in linear models diff --git a/R/influ2.R b/R/influ2.R index 5500481..cd02b8c 100644 --- a/R/influ2.R +++ b/R/influ2.R @@ -22,7 +22,6 @@ #' understanding fisheries catch-per-unit-effort standardizations. \emph{ICES Journal of Marine Science}. #' \code{doi:10.1093/icesjms/fsr174} #' -#' @docType package #' @name influ2 #' NULL diff --git a/R/plot-cdi.R b/R/plot-cdi.R index 0e28887..eda69f0 100644 --- a/R/plot-cdi.R +++ b/R/plot-cdi.R @@ -23,11 +23,7 @@ #' the same for all and raw, but the legend will change from numbers of records to a proportion. #' @param ... Further arguments passed to nothing. #' @return a \code{ggplot} object. -#' -#' @author Darcy Webber \email{darcy@quantifish.co.nz} -#' #' @seealso \code{\link{get_coefs}}, \code{\link{get_influ}}, \code{\link{plot_bubble}} -#' #' @importFrom gtable is.gtable gtable_filter #' @importFrom stats poly #' @importFrom tidyselect all_of @@ -49,7 +45,6 @@ plot_bayesian_cdi <- function(fit, sum_by = "row", ...) { if (!is.brmsfit(fit)) stop("fit is not an object of class brmsfit.") - if (is.null(xlab)) xlab <- xfocus if (is.null(ylab)) ylab <- yfocus y_coefs <- "Conditional effect" @@ -58,10 +53,15 @@ plot_bayesian_cdi <- function(fit, type <- id_var_type(fit = fit, xfocus = xfocus, hurdle = hurdle) # Posterior samples of coefficients + # if (type %in% c("random_effect")) { if (type %in% c("fixed_effect", "random_effect")) { coefs <- get_coefs(fit = fit, var = xfocus, hurdle = hurdle) } else { - coefs <- get_marginal(fit = fit, var = xfocus) # this would plot the marginal/conditional effect, but if it is a hurdle model it ignores the hurdle bit + # this would plot the marginal/conditional effect, but if it is a hurdle model it ignores the hurdle bit + coefs <- get_marginal(fit = fit, var = xfocus)# %>% + # mutate(value = log(value)) + # library(marginaleffects) + # pred <- predictions(model = fit) # coefs <- get_coefs_raw(fit = fit, var = xfocus) } @@ -73,9 +73,7 @@ plot_bayesian_cdi <- function(fit, # Model data if (is.numeric(coefs$variable)) { - data <- fit$data %>% - select(all_of(c(yfocus, xfocus))) - + data <- fit$data %>% select(all_of(c(yfocus, xfocus))) length.out <- 15 dmin <- min(data[,xfocus]) dmax <- max(data[,xfocus]) diff --git a/_pkgdown.yml b/_pkgdown.yml index 106bf45..b02a747 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -15,11 +15,9 @@ reference: contents: - influ2 - starts_with("get_") - - logit - - inv_logit - - logistic - id_var_type - geo_mean + - influ_app - title: influ2 plots desc: Plots and tables from the new influ2 R package. diff --git a/man/get_marginal.Rd b/man/get_marginal.Rd index ab3b817..5c96f9d 100644 --- a/man/get_marginal.Rd +++ b/man/get_marginal.Rd @@ -17,6 +17,3 @@ A \code{data.frame}. \description{ Get model coefficients new version } -\author{ -Darcy Webber \email{darcy@quantifish.co.nz} -} diff --git a/man/influ-package.Rd b/man/influ-package.Rd index f48ba4d..10d572a 100644 --- a/man/influ-package.Rd +++ b/man/influ-package.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/influ.R -\docType{package} \name{influ-package} \alias{influ-package} \alias{influ} diff --git a/man/influ2.Rd b/man/influ2.Rd index ee4c6b7..5f427ce 100644 --- a/man/influ2.Rd +++ b/man/influ2.Rd @@ -1,6 +1,5 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/influ2.R -\docType{package} \name{influ2} \alias{influ2} \title{Influence plots} diff --git a/man/inv_logit.Rd b/man/inv_logit.Rd deleted file mode 100644 index a67392b..0000000 --- a/man/inv_logit.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/helper-functions.R -\name{inv_logit} -\alias{inv_logit} -\title{Inverse logit} -\usage{ -inv_logit(a) -} -\arguments{ -\item{a}{a vector.} -} -\description{ -Inverse logit -} diff --git a/man/logistic.Rd b/man/logistic.Rd deleted file mode 100644 index e0b3df8..0000000 --- a/man/logistic.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/helper-functions.R -\name{logistic} -\alias{logistic} -\title{Logistic} -\usage{ -logistic(q, location = 0, scale = 1, lower.tail = TRUE, log.p = FALSE) -} -\arguments{ -\item{q}{vector of quantiles.} - -\item{location, scale}{location and scale parameters.} - -\item{lower.tail}{logical; if TRUE (default), probabilities are - \eqn{P[X \le x]}, otherwise, \eqn{P[X > x]}.} - -\item{log.p}{logical; if TRUE, probabilities p are given as log(p).} -} -\value{ -gives the distribution function. -} -\description{ -Logistic -} diff --git a/man/logit.Rd b/man/logit.Rd deleted file mode 100644 index 3a55a3a..0000000 --- a/man/logit.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/helper-functions.R -\name{logit} -\alias{logit} -\title{Logit} -\usage{ -logit(p) - -logit(p) -} -\arguments{ -\item{p}{a vector.} -} -\value{ -the density. -} -\description{ -Logit - -logit -} diff --git a/man/plot_bayesian_cdi.Rd b/man/plot_bayesian_cdi.Rd index 07c247b..7e46776 100644 --- a/man/plot_bayesian_cdi.Rd +++ b/man/plot_bayesian_cdi.Rd @@ -63,6 +63,3 @@ The CDI plot presents the coefficients for the variable of interest (top-left pa \seealso{ \code{\link{get_coefs}}, \code{\link{get_influ}}, \code{\link{plot_bubble}} } -\author{ -Darcy Webber \email{darcy@quantifish.co.nz} -} diff --git a/tests/testthat/test-cdi.R b/tests/testthat/test-cdi.R index 980d04c..a1182fc 100644 --- a/tests/testthat/test-cdi.R +++ b/tests/testthat/test-cdi.R @@ -76,11 +76,8 @@ test_that("this matches Nokome Bentley's influ package", { c2a <- get_coefs(fit = brm1, var = "year", normalise = FALSE, transform = FALSE) %>% filter(variable != "year2000") %>% group_by(variable) %>% - summarise(Estimate = mean(value), - Est.Error = sd(value), - Q5 = quantile(value, probs = 0.05), - Q50 = quantile(value, probs = 0.5), - Q95 = quantile(value, probs = 0.95)) + summarise(Estimate = mean(value), Est.Error = sd(value), + Q5 = quantile(value, probs = 0.05), Q50 = quantile(value, probs = 0.5), Q95 = quantile(value, probs = 0.95)) c2b <- fixef(object = brm1, probs = c(0.05, 0.95)) %>% data.frame() %>% diff --git a/vignettes/influ2.Rmd b/vignettes/influ2.Rmd index 4963e5e..e87bdb3 100644 --- a/vignettes/influ2.Rmd +++ b/vignettes/influ2.Rmd @@ -70,7 +70,8 @@ plot_bubble(df = lobsters_per_pot, group = c("year", "month")) + ``` ```{r plot-bubble-2, echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Distribution of data by year and month, also coloured by month."} -plot_bubble(df = lobsters_per_pot, group = c("year", "month"), fill = "month") + +plot_bubble(df = lobsters_per_pot, group = c("year", "month"), + fill = "month") + labs(x = "Month", y = "Year") + theme(legend.position = "none") ``` @@ -89,6 +90,7 @@ fit1 <- brm(lobsters ~ year + month + poly(soak, 3), data = lobsters_per_pot, family = poisson, chains = 2, iter = 1500, refresh = 0, seed = 42, file = "fit1", file_refit = "never") + fit2 <- brm(lobsters ~ year + (1 | month) + s(depth, k = 3), data = lobsters_per_pot, family = negbinomial(), control = list(adapt_delta = 0.99), @@ -98,8 +100,8 @@ fit2 <- brm(lobsters ~ year + (1 | month) + s(depth, k = 3), ```{r add-criterion} criterion <- c("loo", "waic", "bayes_R2") -fit1 <- add_criterion(fit1, criterion = criterion, file = "fit1") -fit2 <- add_criterion(fit2, criterion = criterion, file = "fit2") +fit1 <- add_criterion(x = fit1, criterion = criterion, file = "fit1") +fit2 <- add_criterion(x = fit2, criterion = criterion, file = "fit2") ``` Fit a model using glm and generate influence statistics using the original `influ` package @@ -109,7 +111,7 @@ fit_glm <- glm(lobsters ~ year + month + poly(soak, 3), data = lobsters_per_pot, family = poisson) ``` -```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The original CDI plot from the influ package."} +```{r influ1-setup, echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The original CDI plot from the influ package."} myInfl <- Influence$new(model = fit_glm) myInfl$init() myInfl$calc() @@ -122,7 +124,7 @@ cdi_month <- plot_bayesian_cdi(fit = fit1, xfocus = "month", yfocus = "year", cdi_month ``` -```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The original step-plot from the influ package."} +```{r influ1-step, echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The original step-plot from the influ package."} myInfl$stepPlot() ``` @@ -138,7 +140,7 @@ plot_step(fits = fits) plot_index(fit = fit2) ``` -```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE} +```{r influ1-plot, echo=TRUE, fig.height=6, fig.width=6, message=FALSE} myInfl$stanPlot() ``` @@ -158,18 +160,18 @@ i_month <- get_influ(fit = fit1, group = c("year", "month")) %>% group_by(year) %>% summarise(value = mean(delta)) %>% mutate(variable = "month") -i_soak <- get_influ(fit = fit1, group = c("year", "soak")) %>% - group_by(year) %>% - summarise(value = mean(delta)) %>% - mutate(variable = "poly(soak, 3)") -i2 <- rbind(i_month, i_soak) - -ggplot(data = i2, aes(x = year, y = exp(value))) + - geom_point(data = i1) + - geom_line(aes(colour = variable, group = variable)) + - facet_wrap(variable ~ ., ncol = 1) + - labs(x = "Year", y = "Influence") + - theme(legend.position = "none") +# i_soak <- get_influ(fit = fit1, group = c("year", "soak")) %>% +# group_by(year) %>% +# summarise(value = mean(delta)) %>% +# mutate(variable = "poly(soak, 3)") +# i2 <- bind_rows(i_month, i_soak) +# +# ggplot(data = i2, aes(x = year, y = exp(value))) + +# geom_point(data = i1) + +# geom_line(aes(colour = variable, group = variable)) + +# facet_wrap(variable ~ ., ncol = 1) + +# labs(x = "Year", y = "Influence") + +# theme(legend.position = "none") ``` # Functions for exploring models @@ -229,7 +231,7 @@ plot_index(fit = fit1) The `influ2` package is based on the original `influ` package which was developed to generate influence plots. The `influ` package has functions that use outputs from the `glm` function. The `influ2` package was developed to use outputs from `brms` and relies heavily on the R packages `ggplot2` and `dplyr`. This vignette showcases the `influ2` package with a focus on model diagnostics including posterior predictive distributions, residuals, and quantile-quantile plots. ```{r get-yrep} -yrep <- posterior_predict(fit1, ndraws = 100) +yrep <- posterior_predict(object = fit1, ndraws = 100) ``` ```{r plot-bars, echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Comparison of the empirical distribution of the data (y) to the distributions of simulated/replicated data (yrep) from the posterior predictive distribution."} @@ -278,7 +280,7 @@ names(pred) <- paste0("pred.", names(pred)) resid <- residuals(fit) %>% data.frame() names(resid) <- paste0("resid.", names(resid)) -df <- cbind(resid, pred, lobsters_per_pot) +df <- bind_cols(resid, pred, lobsters_per_pot) ggplot(data = df, aes(x = year, y = .data$resid.Estimate)) + geom_pointrange(aes(ymin = .data$resid.Q2.5, ymax = .data$resid.Q97.5), alpha = 0.75) +