From 80d6e7beabdf859e1fd857b6ab341d3cb3133eab Mon Sep 17 00:00:00 2001
From: quantifish <darcy@quantifish.co.nz>
Date: Mon, 5 Aug 2024 20:31:21 +1200
Subject: [PATCH] adding all new stuff - more to do

---
 DESCRIPTION               |  4 ++--
 NAMESPACE                 |  5 -----
 R/get-coefs.R             | 27 ++++++++++-------------
 R/get-influ.R             | 13 +++++------
 R/helper-functions.R      | 46 ---------------------------------------
 R/influ.R                 |  1 -
 R/influ2.R                |  1 -
 R/plot-cdi.R              | 16 ++++++--------
 _pkgdown.yml              |  4 +---
 man/get_marginal.Rd       |  3 ---
 man/influ-package.Rd      |  1 -
 man/influ2.Rd             |  1 -
 man/inv_logit.Rd          | 14 ------------
 man/logistic.Rd           | 24 --------------------
 man/logit.Rd              | 21 ------------------
 man/plot_bayesian_cdi.Rd  |  3 ---
 tests/testthat/test-cdi.R |  7 ++----
 vignettes/influ2.Rmd      | 42 ++++++++++++++++++-----------------
 18 files changed, 50 insertions(+), 183 deletions(-)
 delete mode 100644 man/inv_logit.Rd
 delete mode 100644 man/logistic.Rd
 delete mode 100644 man/logit.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 3a30e9e..c9f2ec2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: influ2
 Title: Influence plots
-Version: 0.0.0
+Version: 1.0.0
 Authors@R: 
     person(given = "Darcy",
            family = "Webber",
@@ -38,4 +38,4 @@ Suggests:
     bayesplot
 LazyData: false
 VignetteBuilder: knitr
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.2
diff --git a/NAMESPACE b/NAMESPACE
index 201d55f..07694bf 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -14,9 +14,6 @@ export(get_marginal)
 export(get_unstandarsied)
 export(id_var_type)
 export(influ_app)
-export(inv_logit)
-export(logistic)
-export(logit)
 export(plot_bayesian_cdi)
 export(plot_bayesian_cdi2)
 export(plot_bubble)
@@ -57,11 +54,9 @@ importFrom(stats,coefficients)
 importFrom(stats,fitted)
 importFrom(stats,median)
 importFrom(stats,model.matrix)
-importFrom(stats,plogis)
 importFrom(stats,poly)
 importFrom(stats,ppoints)
 importFrom(stats,qgamma)
-importFrom(stats,qlogis)
 importFrom(stats,qnorm)
 importFrom(stats,qqplot)
 importFrom(stats,quantile)
diff --git a/R/get-coefs.R b/R/get-coefs.R
index bcb3bed..1c8b1fc 100644
--- a/R/get-coefs.R
+++ b/R/get-coefs.R
@@ -3,9 +3,6 @@
 #' @param fit An object of class \code{brmsfit}.
 #' @param var The variable to obtain.
 #' @return A \code{data.frame}.
-#' 
-#' @author Darcy Webber \email{darcy@quantifish.co.nz}
-#' 
 #' @importFrom reshape2 melt
 #' @importFrom readr parse_number
 #' @importFrom nlme fixef ranef
@@ -124,7 +121,6 @@ get_coefs_raw <- function(fit, var = "area") {
 #' @param hurdle If a hurdle model then use the hurdle.
 #' @param transform if the coefficients should be transformed using the link function.
 #' @return A \code{data.frame}.
-#' 
 #' @importFrom reshape2 melt
 #' @importFrom readr parse_number
 #' @importFrom nlme fixef ranef
@@ -132,15 +128,14 @@ get_coefs_raw <- function(fit, var = "area") {
 #' @import dplyr
 #' @export
 #' 
-get_coefs <- function(fit, var = "area", 
+get_coefs <- function(fit, 
+                      var = "area", 
                       normalise = TRUE, 
                       hurdle = FALSE, 
                       transform = FALSE) {
   
   if (!is.brmsfit(fit)) stop("fit is not an object of class brmsfit.")
-  
   type <- id_var_type(fit = fit, xfocus = var, hurdle = hurdle)
-  
   is_poly <- FALSE
   
   if (any(grepl("\\(1 \\|", var))) {
@@ -156,8 +151,7 @@ get_coefs <- function(fit, var = "area",
       mutate(variable = gsub(".*\\[|\\]", "", variable)) %>%
       mutate(variable = gsub(",Intercept", "", variable))
     if (!str_detect(var, ":")) {
-      ps <- ps %>%
-        filter(!str_detect(variable, ":"))
+      ps <- ps %>% filter(!str_detect(variable, ":"))
     }
   } else {
     # Population-level effects
@@ -182,11 +176,8 @@ get_coefs <- function(fit, var = "area",
       ps <- ps %>%
         filter(!str_detect(variable, ":"))
     }
-      # mutate(variable = paste0(var, gregexpr("[[:digit:]]+", variable)))
-      # mutate(variable = paste0(var, parse_number(as.character(variable))))
-    # unique(ps$variable)
-    # tail(ps)
   }
+  # unique(ps$variable)
   
   # Check to see if this is a polynomial
   if (any(grepl("poly", ps$variable))) {
@@ -203,8 +194,7 @@ get_coefs <- function(fit, var = "area",
         filter(grepl("hu", variable)) %>%
         mutate(variable = gsub("hu_", "", variable))
     } else {
-      ps <- ps %>%
-        filter(!grepl("hu", variable))
+      ps <- ps %>% filter(!grepl("hu", variable))
     }
   }
   
@@ -232,7 +222,12 @@ get_coefs <- function(fit, var = "area",
                       value = 0)
     coefs <- rbind(ps0, ps)
   } else {
-    coefs <- ps
+    data <- fit$data
+    data[,var] <- paste0(var, data[,var])
+    ps0 <- data.frame(iteration = 1:max(ps$iteration),
+                      variable = unique(data[,var])[!unique(data[,var]) %in% unique(ps$variable)],
+                      value = 0)
+    coefs <- rbind(ps0, ps)
   }
 
   # Arrange by vessel coefficient if vessel chosen
diff --git a/R/get-influ.R b/R/get-influ.R
index bb9a502..c499cdb 100644
--- a/R/get-influ.R
+++ b/R/get-influ.R
@@ -18,8 +18,7 @@ get_influ2 <- function(fit,
   if (!is.brmsfit(fit)) stop("fit is not an object of class brmsfit.")
   
   # Model data
-  data <- fit$data %>%
-    mutate(id = 1:n())
+  data <- fit$data %>% mutate(id = 1:n())
   y <- names(data)[1]
   
   # Identify the type of variable we are dealing with
@@ -59,10 +58,8 @@ get_influ2 <- function(fit,
     sdata <- standata(fit)
     X <- sdata$Xs
     Z <- sdata$Zs_1_1
-    s_coefs <- coefs %>%
-      filter(!str_detect(.data$variable, "bs_|sds_"))
-    coefs <- coefs %>%
-      filter(str_detect(.data$variable, "bs_"))
+    s_coefs <- coefs %>% filter(!str_detect(.data$variable, "bs_|sds_"))
+    coefs <- coefs %>% filter(str_detect(.data$variable, "bs_"))
   }
 
   # Do the matrix multiplication
@@ -82,7 +79,7 @@ get_influ2 <- function(fit,
   }
   
   influ_var <- melt(Xbeta) %>%
-    rename(iteration = .data$Var1, id = .data$Var2) %>%
+    rename(iteration = Var1, id = Var2) %>%
     left_join(data, by = "id")
   influ_rho <- influ_var %>%
     group_by(.data$iteration) %>%
@@ -153,7 +150,7 @@ get_influ <- function(fit, group = c("fishing_year", "area"), hurdle = FALSE) {
   }
   
   influ_var <- melt(Xbeta) %>%
-    rename(iteration = .data$Var1, id = .data$Var2) %>%
+    rename(iteration = Var1, id = .data$Var2) %>%
     left_join(data, by = "id")
   influ_rho <- influ_var %>%
     group_by(.data$iteration) %>%
diff --git a/R/helper-functions.R b/R/helper-functions.R
index 58b6f02..380917a 100644
--- a/R/helper-functions.R
+++ b/R/helper-functions.R
@@ -73,32 +73,6 @@ get_first_term <- function(fit) {
 }
 
 
-#' Logit
-#' 
-#' @inheritParams stats::qlogis
-#' @return the density.
-#' @importFrom stats qlogis
-#' @export
-#' 
-logit <- function(p, location = 0, scale = 1, lower.tail = TRUE, log.p = FALSE) {
-  qlogis(p = p, location = location, scale = scale, lower.tail = lower.tail, log.p = log.p)
-}
-
-
-#' Logistic
-#' 
-#' @param q vector of quantiles.
-#' @inheritParams stats::plogis
-#' @param log.p logical; if TRUE, probabilities p are given as log(p).
-#' @return gives the distribution function.
-#' @importFrom stats plogis
-#' @export
-#' 
-logistic <- function(q, location = 0, scale = 1, lower.tail = TRUE, log.p = FALSE) {
-  plogis(q = q, location = location, scale = scale, lower.tail = lower.tail, log.p = log.p)
-}
-
-
 #' Identify the variable type
 #' 
 #' @param fit An object of class \code{brmsfit}.
@@ -150,23 +124,3 @@ id_var_type <- function(fit, xfocus, hurdle = FALSE) {
 geo_mean <- function(a) {
   prod(a)^(1.0 / length(a))
 }
-
-
-#' Inverse logit
-#' 
-#' @param a a vector.
-#' @export
-#' 
-inv_logit <- function(a) {
-  exp(a) / (exp(a) + 1.0)
-}
-
-
-#' logit
-#' 
-#' @param p a vector.
-#' @export
-#' 
-logit <- function(p) {
-  log(p / (1.0 - p))
-}
diff --git a/R/influ.R b/R/influ.R
index 9d13991..7ded201 100644
--- a/R/influ.R
+++ b/R/influ.R
@@ -27,7 +27,6 @@
 #' This document provides minimal documentation and the best way to learn how to use this package is through the vignette:
 #'  "An example of how to use the 'influ' package"
 #'
-#' @docType package
 #' @name influ-package
 #' @aliases influ
 #' @title Influence in linear models 
diff --git a/R/influ2.R b/R/influ2.R
index 5500481..cd02b8c 100644
--- a/R/influ2.R
+++ b/R/influ2.R
@@ -22,7 +22,6 @@
 #' understanding fisheries catch-per-unit-effort standardizations. \emph{ICES Journal of Marine Science}. 
 #' \code{doi:10.1093/icesjms/fsr174}
 #' 
-#' @docType package
 #' @name influ2
 #' 
 NULL
diff --git a/R/plot-cdi.R b/R/plot-cdi.R
index 0e28887..eda69f0 100644
--- a/R/plot-cdi.R
+++ b/R/plot-cdi.R
@@ -23,11 +23,7 @@
 #'   the same for all and raw, but the legend will change from numbers of records to a proportion.
 #' @param ... Further arguments passed to nothing.
 #' @return a \code{ggplot} object.
-#' 
-#' @author Darcy Webber \email{darcy@quantifish.co.nz}
-#' 
 #' @seealso \code{\link{get_coefs}}, \code{\link{get_influ}}, \code{\link{plot_bubble}}
-#' 
 #' @importFrom gtable is.gtable gtable_filter
 #' @importFrom stats poly
 #' @importFrom tidyselect all_of
@@ -49,7 +45,6 @@ plot_bayesian_cdi <- function(fit,
                               sum_by = "row", ...) {
   
   if (!is.brmsfit(fit)) stop("fit is not an object of class brmsfit.")
-  
   if (is.null(xlab)) xlab <- xfocus
   if (is.null(ylab)) ylab <- yfocus
   y_coefs <- "Conditional effect"
@@ -58,10 +53,15 @@ plot_bayesian_cdi <- function(fit,
   type <- id_var_type(fit = fit, xfocus = xfocus, hurdle = hurdle)
   
   # Posterior samples of coefficients
+  # if (type %in% c("random_effect")) {
   if (type %in% c("fixed_effect", "random_effect")) {
     coefs <- get_coefs(fit = fit, var = xfocus, hurdle = hurdle)
   } else {
-    coefs <- get_marginal(fit = fit, var = xfocus) # this would plot the marginal/conditional effect, but if it is a hurdle model it ignores the hurdle bit
+    # this would plot the marginal/conditional effect, but if it is a hurdle model it ignores the hurdle bit
+    coefs <- get_marginal(fit = fit, var = xfocus)# %>%
+      # mutate(value = log(value))
+    # library(marginaleffects)
+    # pred <- predictions(model = fit)
     # coefs <- get_coefs_raw(fit = fit, var = xfocus)
   }
   
@@ -73,9 +73,7 @@ plot_bayesian_cdi <- function(fit,
   
   # Model data
   if (is.numeric(coefs$variable)) {
-    data <- fit$data %>%
-      select(all_of(c(yfocus, xfocus)))
-    
+    data <- fit$data %>% select(all_of(c(yfocus, xfocus)))
     length.out <- 15
     dmin <- min(data[,xfocus])
     dmax <- max(data[,xfocus])
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 106bf45..b02a747 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -15,11 +15,9 @@ reference:
   contents:
   - influ2
   - starts_with("get_")
-  - logit
-  - inv_logit
-  - logistic
   - id_var_type
   - geo_mean
+  - influ_app
 
 - title: influ2 plots
   desc: Plots and tables from the new influ2 R package.
diff --git a/man/get_marginal.Rd b/man/get_marginal.Rd
index ab3b817..5c96f9d 100644
--- a/man/get_marginal.Rd
+++ b/man/get_marginal.Rd
@@ -17,6 +17,3 @@ A \code{data.frame}.
 \description{
 Get model coefficients new version
 }
-\author{
-Darcy Webber \email{darcy@quantifish.co.nz}
-}
diff --git a/man/influ-package.Rd b/man/influ-package.Rd
index f48ba4d..10d572a 100644
--- a/man/influ-package.Rd
+++ b/man/influ-package.Rd
@@ -1,6 +1,5 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/influ.R
-\docType{package}
 \name{influ-package}
 \alias{influ-package}
 \alias{influ}
diff --git a/man/influ2.Rd b/man/influ2.Rd
index ee4c6b7..5f427ce 100644
--- a/man/influ2.Rd
+++ b/man/influ2.Rd
@@ -1,6 +1,5 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/influ2.R
-\docType{package}
 \name{influ2}
 \alias{influ2}
 \title{Influence plots}
diff --git a/man/inv_logit.Rd b/man/inv_logit.Rd
deleted file mode 100644
index a67392b..0000000
--- a/man/inv_logit.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/helper-functions.R
-\name{inv_logit}
-\alias{inv_logit}
-\title{Inverse logit}
-\usage{
-inv_logit(a)
-}
-\arguments{
-\item{a}{a vector.}
-}
-\description{
-Inverse logit
-}
diff --git a/man/logistic.Rd b/man/logistic.Rd
deleted file mode 100644
index e0b3df8..0000000
--- a/man/logistic.Rd
+++ /dev/null
@@ -1,24 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/helper-functions.R
-\name{logistic}
-\alias{logistic}
-\title{Logistic}
-\usage{
-logistic(q, location = 0, scale = 1, lower.tail = TRUE, log.p = FALSE)
-}
-\arguments{
-\item{q}{vector of quantiles.}
-
-\item{location, scale}{location and scale parameters.}
-
-\item{lower.tail}{logical; if TRUE (default), probabilities are
-    \eqn{P[X \le x]}, otherwise, \eqn{P[X > x]}.}
-
-\item{log.p}{logical; if TRUE, probabilities p are given as log(p).}
-}
-\value{
-gives the distribution function.
-}
-\description{
-Logistic
-}
diff --git a/man/logit.Rd b/man/logit.Rd
deleted file mode 100644
index 3a55a3a..0000000
--- a/man/logit.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/helper-functions.R
-\name{logit}
-\alias{logit}
-\title{Logit}
-\usage{
-logit(p)
-
-logit(p)
-}
-\arguments{
-\item{p}{a vector.}
-}
-\value{
-the density.
-}
-\description{
-Logit
-
-logit
-}
diff --git a/man/plot_bayesian_cdi.Rd b/man/plot_bayesian_cdi.Rd
index 07c247b..7e46776 100644
--- a/man/plot_bayesian_cdi.Rd
+++ b/man/plot_bayesian_cdi.Rd
@@ -63,6 +63,3 @@ The CDI plot presents the coefficients for the variable of interest (top-left pa
 \seealso{
 \code{\link{get_coefs}}, \code{\link{get_influ}}, \code{\link{plot_bubble}}
 }
-\author{
-Darcy Webber \email{darcy@quantifish.co.nz}
-}
diff --git a/tests/testthat/test-cdi.R b/tests/testthat/test-cdi.R
index 980d04c..a1182fc 100644
--- a/tests/testthat/test-cdi.R
+++ b/tests/testthat/test-cdi.R
@@ -76,11 +76,8 @@ test_that("this matches Nokome Bentley's influ package", {
   c2a <- get_coefs(fit = brm1, var = "year", normalise = FALSE, transform = FALSE) %>%
     filter(variable != "year2000") %>%
     group_by(variable) %>%
-    summarise(Estimate = mean(value), 
-              Est.Error = sd(value), 
-              Q5 = quantile(value, probs = 0.05), 
-              Q50 = quantile(value, probs = 0.5), 
-              Q95 = quantile(value, probs = 0.95))
+    summarise(Estimate = mean(value), Est.Error = sd(value), 
+              Q5 = quantile(value, probs = 0.05), Q50 = quantile(value, probs = 0.5), Q95 = quantile(value, probs = 0.95))
   
   c2b <- fixef(object = brm1, probs = c(0.05, 0.95)) %>%
     data.frame() %>%
diff --git a/vignettes/influ2.Rmd b/vignettes/influ2.Rmd
index 4963e5e..e87bdb3 100644
--- a/vignettes/influ2.Rmd
+++ b/vignettes/influ2.Rmd
@@ -70,7 +70,8 @@ plot_bubble(df = lobsters_per_pot, group = c("year", "month")) +
 ```
 
 ```{r plot-bubble-2, echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Distribution of data by year and month, also coloured by month."}
-plot_bubble(df = lobsters_per_pot, group = c("year", "month"), fill = "month") +
+plot_bubble(df = lobsters_per_pot, group = c("year", "month"), 
+            fill = "month") +
   labs(x = "Month", y = "Year") +
   theme(legend.position = "none")
 ```
@@ -89,6 +90,7 @@ fit1 <- brm(lobsters ~ year + month + poly(soak, 3),
             data = lobsters_per_pot, family = poisson, 
             chains = 2, iter = 1500, refresh = 0, seed = 42, 
             file = "fit1", file_refit = "never")
+
 fit2 <- brm(lobsters ~ year + (1 | month) + s(depth, k = 3), 
             data = lobsters_per_pot, family = negbinomial(), 
             control = list(adapt_delta = 0.99),
@@ -98,8 +100,8 @@ fit2 <- brm(lobsters ~ year + (1 | month) + s(depth, k = 3),
 
 ```{r add-criterion}
 criterion <- c("loo", "waic", "bayes_R2")
-fit1 <- add_criterion(fit1, criterion = criterion, file = "fit1")
-fit2 <- add_criterion(fit2, criterion = criterion, file = "fit2")
+fit1 <- add_criterion(x = fit1, criterion = criterion, file = "fit1")
+fit2 <- add_criterion(x = fit2, criterion = criterion, file = "fit2")
 ```
 
 Fit a model using glm and generate influence statistics using the original `influ` package
@@ -109,7 +111,7 @@ fit_glm <- glm(lobsters ~ year + month + poly(soak, 3),
                data = lobsters_per_pot, family = poisson)
 ```
 
-```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The original CDI plot from the influ package."}
+```{r influ1-setup, echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The original CDI plot from the influ package."}
 myInfl <- Influence$new(model = fit_glm)
 myInfl$init()
 myInfl$calc()
@@ -122,7 +124,7 @@ cdi_month <- plot_bayesian_cdi(fit = fit1, xfocus = "month", yfocus = "year",
 cdi_month
 ```
 
-```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The original step-plot from the influ package."}
+```{r influ1-step, echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The original step-plot from the influ package."}
 myInfl$stepPlot()
 ```
 
@@ -138,7 +140,7 @@ plot_step(fits = fits)
 plot_index(fit = fit2)
 ```
 
-```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
+```{r influ1-plot, echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
 myInfl$stanPlot()
 ```
 
@@ -158,18 +160,18 @@ i_month <- get_influ(fit = fit1, group = c("year", "month")) %>%
   group_by(year) %>%
   summarise(value = mean(delta)) %>%
   mutate(variable = "month")
-i_soak <- get_influ(fit = fit1, group = c("year", "soak")) %>%
-  group_by(year) %>%
-  summarise(value = mean(delta)) %>%
-  mutate(variable = "poly(soak, 3)")
-i2 <- rbind(i_month, i_soak)
-
-ggplot(data = i2, aes(x = year, y = exp(value))) +
-  geom_point(data = i1) +
-  geom_line(aes(colour = variable, group = variable)) +
-  facet_wrap(variable ~ ., ncol = 1) +
-  labs(x = "Year", y = "Influence") +
-  theme(legend.position = "none")
+# i_soak <- get_influ(fit = fit1, group = c("year", "soak")) %>%
+#   group_by(year) %>%
+#   summarise(value = mean(delta)) %>%
+#   mutate(variable = "poly(soak, 3)")
+# i2 <- bind_rows(i_month, i_soak)
+# 
+# ggplot(data = i2, aes(x = year, y = exp(value))) +
+#   geom_point(data = i1) +
+#   geom_line(aes(colour = variable, group = variable)) +
+#   facet_wrap(variable ~ ., ncol = 1) +
+#   labs(x = "Year", y = "Influence") +
+#   theme(legend.position = "none")
 ```
 
 # Functions for exploring models
@@ -229,7 +231,7 @@ plot_index(fit = fit1)
 The `influ2` package is based on the original `influ` package which was developed to generate influence plots. The `influ` package has functions that use outputs from the `glm` function. The `influ2` package was developed to use outputs from `brms` and relies heavily on the R packages `ggplot2` and `dplyr`. This vignette showcases the `influ2` package with a focus on model diagnostics including posterior predictive distributions, residuals, and quantile-quantile plots.
 
 ```{r get-yrep}
-yrep <- posterior_predict(fit1, ndraws = 100)
+yrep <- posterior_predict(object = fit1, ndraws = 100)
 ```
 
 ```{r plot-bars, echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Comparison of the empirical distribution of the data (y) to the distributions of simulated/replicated data (yrep) from the posterior predictive distribution."}
@@ -278,7 +280,7 @@ names(pred) <- paste0("pred.", names(pred))
 resid <- residuals(fit) %>% data.frame()
 names(resid) <- paste0("resid.", names(resid))
 
-df <- cbind(resid, pred, lobsters_per_pot)
+df <- bind_cols(resid, pred, lobsters_per_pot)
 
 ggplot(data = df, aes(x = year, y = .data$resid.Estimate)) +
   geom_pointrange(aes(ymin = .data$resid.Q2.5, ymax = .data$resid.Q97.5), alpha = 0.75) +