diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION index abb3c037a..4a0f8ec1b 100644 --- a/CRAN-SUBMISSION +++ b/CRAN-SUBMISSION @@ -1,3 +1,3 @@ -Version: 0.12.2 -Date: 2024-07-17 21:02:38 UTC -SHA: d4c45126ca666644785dc64d2af6b87eee9ca39b +Version: 0.12.3 +Date: 2024-09-02 16:10:51 UTC +SHA: dfbe03fd4961ee9049d5169275248a4ef7a5a21e diff --git a/DESCRIPTION b/DESCRIPTION index b1268ff38..dbab9d1c7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,28 +1,28 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2 +Version: 0.12.4.2 Authors@R: c(person(given = "Daniel", family = "Lüdecke", role = c("aut", "cre"), email = "d.luedecke@uke.de", - comment = c(ORCID = "0000-0002-8895-3206", Twitter = "@strengejacke")), + comment = c(ORCID = "0000-0002-8895-3206")), person(given = "Dominique", family = "Makowski", role = c("aut", "ctb"), email = "dom.makowski@gmail.com", - comment = c(ORCID = "0000-0001-5375-9967", Twitter = "@Dom_Makowski")), + comment = c(ORCID = "0000-0001-5375-9967")), person(given = "Mattan S.", family = "Ben-Shachar", role = c("aut", "ctb"), email = "matanshm@post.bgu.ac.il", - comment = c(ORCID = "0000-0002-4287-4801", Twitter = "@mattansb")), + comment = c(ORCID = "0000-0002-4287-4801")), person(given = "Indrajeet", family = "Patil", role = c("aut", "ctb"), email = "patilindrajeet.science@gmail.com", - comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")), + comment = c(ORCID = "0000-0003-1995-6531")), person(given = "Philip", family = "Waggoner", role = c("aut", "ctb"), @@ -32,12 +32,12 @@ Authors@R: family = "Wiernik", role = c("aut", "ctb"), email = "brenton@wiernik.org", - comment = c(ORCID = "0000-0001-9560-6336", Twitter = "@bmwiernik")), + comment = c(ORCID = "0000-0001-9560-6336")), person(given = "Rémi", family = "Thériault", role = c("aut", "ctb"), email = "remi.theriault@mail.mcgill.ca", - comment = c(ORCID = "0000-0003-4315-6788", Twitter = "@rempsyc")), + comment = c(ORCID = "0000-0003-4315-6788")), person(given = "Vincent", family = "Arel-Bundock", email = "vincent.arel-bundock@umontreal.ca", @@ -48,11 +48,15 @@ Authors@R: role = "rev"), person(given = "gjo11", role = "rev"), - person("Etienne", - "Bacher", , - "etienne.bacher@protonmail.com", + person(given = "Etienne", + family = "Bacher", , + email = "etienne.bacher@protonmail.com", role = "ctb", - comment = c(ORCID = "0000-0002-9271-5075"))) + comment = c(ORCID = "0000-0002-9271-5075")), + person(given = "Joseph", + family = "Luchman", + role = "ctb", + comment = c(ORCID = "0000-0002-8886-9717"))) Maintainer: Daniel Lüdecke Description: Utilities for computing measures to assess model quality, which are not directly provided by R's 'base' or 'stats' packages. @@ -69,9 +73,9 @@ BugReports: https://github.com/easystats/performance/issues Depends: R (>= 3.6) Imports: - bayestestR (>= 0.13.2), - insight (>= 0.20.2), - datawizard (>= 0.10.0), + bayestestR (>= 0.15.0), + insight (>= 0.20.5), + datawizard (>= 0.13.0), stats, utils Suggests: @@ -89,8 +93,9 @@ Suggests: CompQuadForm, correlation, cplm, + dagitty, dbscan, - DHARMa, + DHARMa (>= 0.4.7), estimatr, fixest, flextable, @@ -98,7 +103,8 @@ Suggests: gamm4, geomtextpath, ggplot2, - glmmTMB, + ggdag, + glmmTMB (>= 1.1.10), graphics, Hmisc, httr2, @@ -124,7 +130,7 @@ Suggests: nonnest2, ordinal, parallel, - parameters (>= 0.21.6), + parameters (>= 0.22.0), patchwork, pscl, psych, @@ -137,7 +143,7 @@ Suggests: rstanarm, rstantools, sandwich, - see (>= 0.8.2), + see (>= 0.9.0), survey, survival, testthat (>= 3.2.1), @@ -155,3 +161,4 @@ Config/Needs/website: r-lib/pkgdown, easystats/easystatstemplate Config/rcmdcheck/ignore-inconsequential-notes: true +Remotes: easystats/insight diff --git a/NAMESPACE b/NAMESPACE index 0827931d6..d35d82547 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -170,12 +170,14 @@ S3method(model_performance,censReg) S3method(model_performance,clm) S3method(model_performance,clm2) S3method(model_performance,coxph) +S3method(model_performance,coxph_weightit) S3method(model_performance,default) S3method(model_performance,felm) S3method(model_performance,fixest) S3method(model_performance,fixest_multi) S3method(model_performance,flexsurvreg) S3method(model_performance,glm) +S3method(model_performance,glm_weightit) S3method(model_performance,glmmTMB) S3method(model_performance,glmmadmb) S3method(model_performance,glmx) @@ -197,9 +199,11 @@ S3method(model_performance,mixor) S3method(model_performance,mlogit) S3method(model_performance,model_fit) S3method(model_performance,multinom) +S3method(model_performance,multinom_weightit) S3method(model_performance,negbinirr) S3method(model_performance,negbinmfx) S3method(model_performance,nestedLogit) +S3method(model_performance,ordinal_weightit) S3method(model_performance,plm) S3method(model_performance,poissonirr) S3method(model_performance,poissonmfx) @@ -269,6 +273,7 @@ S3method(plot,binned_residuals) S3method(plot,check_autocorrelation) S3method(plot,check_clusterstructure) S3method(plot,check_collinearity) +S3method(plot,check_dag) S3method(plot,check_distribution) S3method(plot,check_distribution_numeric) S3method(plot,check_heteroscedasticity) @@ -289,6 +294,7 @@ S3method(print,binned_residuals) S3method(print,check_autocorrelation) S3method(print,check_collinearity) S3method(print,check_concurvity) +S3method(print,check_dag) S3method(print,check_distribution) S3method(print,check_distribution_numeric) S3method(print,check_heterogeneity_bias) @@ -484,6 +490,7 @@ S3method(r2_mcfadden,serp) S3method(r2_mcfadden,truncreg) S3method(r2_mcfadden,vglm) S3method(r2_mckelvey,default) +S3method(r2_mlm,mlm) S3method(r2_nagelkerke,BBreg) S3method(r2_nagelkerke,DirichletRegModel) S3method(r2_nagelkerke,bife) @@ -538,12 +545,14 @@ S3method(test_vuong,default) S3method(test_wald,ListNestedRegressions) S3method(test_wald,ListNonNestedRegressions) S3method(test_wald,default) +export(as.dag) export(binned_residuals) export(check_autocorrelation) export(check_clusterstructure) export(check_collinearity) export(check_concurvity) export(check_convergence) +export(check_dag) export(check_distribution) export(check_factorstructure) export(check_heterogeneity_bias) @@ -557,7 +566,6 @@ export(check_multimodal) export(check_normality) export(check_outliers) export(check_overdispersion) -export(check_posterior_predictions) export(check_predictions) export(check_residuals) export(check_singularity) @@ -593,7 +601,6 @@ export(performance_rmse) export(performance_roc) export(performance_rse) export(performance_score) -export(posterior_predictive_check) export(print_html) export(print_md) export(r2) @@ -606,6 +613,7 @@ export(r2_loo) export(r2_loo_posterior) export(r2_mcfadden) export(r2_mckelvey) +export(r2_mlm) export(r2_nagelkerke) export(r2_nakagawa) export(r2_posterior) diff --git a/NEWS.md b/NEWS.md index cb9cc26bd..9eb5ded26 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,38 @@ +# performance 0.12.5 + +## Breaking changes + +* Deprecated arguments and alias-function-names have been removed. + +## Changes + +* Increased accuracy for `check_convergence()` for *glmmTMB* models. + +# performance 0.12.4 + +## Changes + +* `check_dag()` now also checks for colliders, and suggests removing it in the + printed output. + +* Minor revisions to the printed output of `check_dag()`. + +## Bug fixes + +* Fixed failing tests that broke due to changes in latest *glmmTMB* update. + +# performance 0.12.3 + +## New functions + +* `check_dag()`, to check DAGs for correct adjustment sets. + +## Changes + +* `check_heterogeneity_bias()` gets a `nested` argument. Furthermore, `by` can + specify more than one variable, meaning that nested or cross-classified + model designs can also be tested for heterogeneity bias. + # performance 0.12.2 Patch release, to ensure that _performance_ runs with older version of diff --git a/R/check_convergence.R b/R/check_convergence.R index 6f7628a12..f60a7e298 100644 --- a/R/check_convergence.R +++ b/R/check_convergence.R @@ -98,9 +98,10 @@ check_convergence.merMod <- function(x, tolerance = 0.001, ...) { #' @export -check_convergence.glmmTMB <- function(x, ...) { +check_convergence.glmmTMB <- function(x, tolerance = 0.001, ...) { # https://github.com/glmmTMB/glmmTMB/issues/275 - isTRUE(x$sdr$pdHess) + # https://stackoverflow.com/q/79110546/2094622 + isTRUE(all.equal(x$fit$convergence, 0, tolerance = tolerance)) && isTRUE(x$sdr$pdHess) } diff --git a/R/check_dag.R b/R/check_dag.R new file mode 100644 index 000000000..c8b53533f --- /dev/null +++ b/R/check_dag.R @@ -0,0 +1,583 @@ +#' @title Check correct model adjustment for identifying causal effects +#' @name check_dag +#' +#' @description The purpose of `check_dag()` is to build, check and visualize +#' your model based on directed acyclic graphs (DAG). The function checks if a +#' model is correctly adjusted for identifying specific relationships of +#' variables, especially directed (maybe also "causal") effects for given +#' exposures on an outcome. In case of incorrect adjustments, the function +#' suggests the minimal required variables that should be adjusted for (sometimes +#' also called "controlled for"), i.e. variables that *at least* need to be +#' included in the model. Depending on the goal of the analysis, it is still +#' possible to add more variables to the model than just the minimally required +#' adjustment sets. +#' +#' `check_dag()` is a convenient wrapper around `ggdag::dagify()`, +#' `dagitty::adjustmentSets()` and `dagitty::adjustedNodes()` to check correct +#' adjustment sets. It returns a **dagitty** object that can be visualized with +#' `plot()`. `as.dag()` is a small convenient function to return the +#' dagitty-string, which can be used for the online-tool from the +#' dagitty-website. +#' +#' @param ... One or more formulas, which are converted into **dagitty** syntax. +#' First element may also be model object. If a model objects is provided, its +#' formula is used as first formula, and all independent variables will be used +#' for the `adjusted` argument. See 'Details' and 'Examples'. +#' @param outcome Name of the dependent variable (outcome), as character string +#' or as formula. Must be a valid name from the formulas provided in `...`. If +#' not set, the first dependent variable from the formulas is used. +#' @param exposure Name of the exposure variable (as character string or +#' formula), for which the direct and total causal effect on the `outcome` +#' should be checked. Must be a valid name from the formulas provided in `...`. +#' If not set, the first independent variable from the formulas is used. +#' @param adjusted A character vector or formula with names of variables that +#' are adjusted for in the model, e.g. `adjusted = c("x1", "x2")` or +#' `adjusted = ~ x1 + x2`. If a model object is provided in `...`, any values in +#' `adjusted` will be overwritten by the model's independent variables. +#' @param latent A character vector with names of latent variables in the model. +#' @param effect Character string, indicating which effect to check. Can be +#' `"all"` (default), `"total"`, or `"direct"`. +#' @param coords Coordinates of the variables when plotting the DAG. The +#' coordinates can be provided in three different ways: +#' +#' - a list with two elements, `x` and `y`, which both are named vectors of +#' numerics. The names correspond to the variable names in the DAG, and the +#' values for `x` and `y` indicate the x/y coordinates in the plot. +#' - a list with elements that correspond to the variables in the DAG. Each +#' element is a numeric vector of length two with x- and y-coordinate. +#' - a data frame with three columns: `x`, `y` and `name` (which contains the +#' variable names). +#' +#' See 'Examples'. +#' @param x An object of class `check_dag`, as returned by `check_dag()`. +#' +#' @section Specifying the DAG formulas: +#' +#' The formulas have following syntax: +#' +#' - One-directed paths: On the *left-hand-side* is the name of the variables +#' where causal effects point to (direction of the arrows, in dagitty-language). +#' On the *right-hand-side* are all variables where causal effects are assumed +#' to come from. For example, the formula `Y ~ X1 + X2`, paths directed from +#' both `X1` and `X2` to `Y` are assumed. +#' +#' - Bi-directed paths: Use `~~` to indicate bi-directed paths. For example, +#' `Y ~~ X` indicates that the path between `Y` and `X` is bi-directed, and +#' the arrow points in both directions. Bi-directed paths often indicate +#' unmeasured cause, or unmeasured confounding, of the two involved variables. +#' +#' @section Minimally required adjustments: +#' +#' The function checks if the model is correctly adjusted for identifying the +#' direct and total effects of the exposure on the outcome. If the model is +#' correctly specified, no adjustment is needed to estimate the direct effect. +#' If the model is not correctly specified, the function suggests the minimally +#' required variables that should be adjusted for. The function distinguishes +#' between direct and total effects, and checks if the model is correctly +#' adjusted for both. If the model is cyclic, the function stops and suggests +#' to remove cycles from the model. +#' +#' Note that it sometimes could be necessary to try out different combinations +#' of suggested adjustments, because `check_dag()` can not always detect whether +#' _at least_ one of several variables is required, or whether adjustments should +#' be done for _all_ listed variables. It can be useful to copy the dagitty-code +#' (using `as.dag()`, which prints the dagitty-string into the console) into +#' the dagitty-website and play around with different adjustments. +#' +#' @section Direct and total effects: +#' +#' The direct effect of an exposure on an outcome is the effect that is not +#' mediated by any other variable in the model. The total effect is the sum of +#' the direct and indirect effects. The function checks if the model is correctly +#' adjusted for identifying the direct and total effects of the exposure on the +#' outcome. +#' +#' @section Why are DAGs important - the Table 2 fallacy: +#' +#' Correctly thinking about and identifying the relationships between variables +#' is important when it comes to reporting coefficients from regression models +#' that mutually adjust for "confounders" or include covariates. Different +#' coefficients might have different interpretations, depending on their +#' relationship to other variables in the model. Sometimes, a regression +#' coefficient represents the direct effect of an exposure on an outcome, but +#' sometimes it must be interpreted as total effect, due to the involvement +#' of mediating effects. This problem is also called "Table 2 fallacy" +#' (_Westreich and Greenland 2013_). DAG helps visualizing and thereby focusing +#' the relationships of variables in a regression model to detect missing +#' adjustments or over-adjustment. +#' +#' @return An object of class `check_dag`, which can be visualized with `plot()`. +#' The returned object also inherits from class `dagitty` and thus can be used +#' with all functions from the **ggdag** and **dagitty** packages. +#' +#' @references +#' - Rohrer, J. M. (2018). Thinking clearly about correlations and causation: +#' Graphical causal models for observational data. Advances in Methods and +#' Practices in Psychological Science, 1(1), 27–42. \doi{10.1177/2515245917745629} +#' +#' - Westreich, D., & Greenland, S. (2013). The Table 2 Fallacy: Presenting and +#' Interpreting Confounder and Modifier Coefficients. American Journal of +#' Epidemiology, 177(4), 292–298. \doi{10.1093/aje/kws412} +#' +#' @examplesIf require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE) +#' # no adjustment needed +#' check_dag( +#' y ~ x + b, +#' outcome = "y", +#' exposure = "x" +#' ) +#' +#' # incorrect adjustment +#' dag <- check_dag( +#' y ~ x + b + c, +#' x ~ b, +#' outcome = "y", +#' exposure = "x" +#' ) +#' dag +#' plot(dag) +#' +#' # After adjusting for `b`, the model is correctly specified +#' dag <- check_dag( +#' y ~ x + b + c, +#' x ~ b, +#' outcome = "y", +#' exposure = "x", +#' adjusted = "b" +#' ) +#' dag +#' +#' # using formula interface for arguments "outcome", "exposure" and "adjusted" +#' check_dag( +#' y ~ x + b + c, +#' x ~ b, +#' outcome = ~y, +#' exposure = ~x, +#' adjusted = ~ b + c +#' ) +#' +#' # if not provided, "outcome" is taken from first formula, same for "exposure" +#' # thus, we can simplify the above expression to +#' check_dag( +#' y ~ x + b + c, +#' x ~ b, +#' adjusted = ~ b + c +#' ) +#' +#' # use specific layout for the DAG +#' dag <- check_dag( +#' score ~ exp + b + c, +#' exp ~ b, +#' outcome = "score", +#' exposure = "exp", +#' coords = list( +#' # x-coordinates for all nodes +#' x = c(score = 5, exp = 4, b = 3, c = 3), +#' # y-coordinates for all nodes +#' y = c(score = 3, exp = 3, b = 2, c = 4) +#' ) +#' ) +#' plot(dag) +#' +#' # alternative way of providing the coordinates +#' dag <- check_dag( +#' score ~ exp + b + c, +#' exp ~ b, +#' outcome = "score", +#' exposure = "exp", +#' coords = list( +#' # x/y coordinates for each node +#' score = c(5, 3), +#' exp = c(4, 3), +#' b = c(3, 2), +#' c = c(3, 4) +#' ) +#' ) +#' plot(dag) +#' +#' # Objects returned by `check_dag()` can be used with "ggdag" or "dagitty" +#' ggdag::ggdag_status(dag) +#' +#' # Using a model object to extract information about outcome, +#' # exposure and adjusted variables +#' data(mtcars) +#' m <- lm(mpg ~ wt + gear + disp + cyl, data = mtcars) +#' dag <- check_dag( +#' m, +#' wt ~ disp + cyl, +#' wt ~ am +#' ) +#' dag +#' plot(dag) +#' @export +check_dag <- function(..., + outcome = NULL, + exposure = NULL, + adjusted = NULL, + latent = NULL, + effect = c("all", "total", "direct"), + coords = NULL) { + insight::check_if_installed( + c("ggdag", "dagitty"), + reason = "to check correct adjustments for identifying causal effects." + ) + + effect <- match.arg(effect) + + # retrieve formulas + formulas <- list(...) + + # check if first object is a model object, and convert to formula + if (insight::is_regression_model(formulas[[1]])) { + vars <- insight::find_variables( + formulas[[1]], + effects = "fixed", + component = "conditional", + flatten = FALSE + ) + formulas[[1]] <- stats::as.formula( + paste(vars$response, "~", paste(vars$conditional, collapse = "+")) + ) + # if we have a model, we *always* overwrite adjusted + if (!is.null(adjusted)) { + insight::format_alert("The `adjusted` argument will be overwritten by all independent variables from the model.") # nolint + } + adjusted <- vars$conditional + } + + # if outcome is not set, use first dependent variable + if (is.null(outcome)) { + outcome <- insight::find_response(formulas[[1]]) + } + + # if exposure is not set, use first independent variable + if (is.null(exposure)) { + exposure <- insight::find_variables( + formulas[[1]], + effects = "fixed", + component = "conditional", + flatten = FALSE + )$conditional[1] + } + + # handle formula interface - if "outcome", "exposure" or "adjusted" are + # provided as formulas, convert to character here + if (inherits(outcome, "formula")) { + outcome <- all.vars(outcome) + } + if (inherits(exposure, "formula")) { + exposure <- all.vars(exposure) + } + if (inherits(adjusted, "formula")) { + adjusted <- all.vars(adjusted) + } + + # process coords-argument + coords <- .process_coords(coords) + + # convert to dag + dag_args <- c(formulas, list( + exposure = exposure, + outcome = outcome, + latent = latent, + coords = coords + )) + dag <- do.call(ggdag::dagify, dag_args) + + # add adjustments + if (!is.null(adjusted)) { + dag <- .adjust_dag(dag, adjusted) + } + + .finalize_dag(dag, effect, outcome, exposure, adjusted) +} + + +# helper ---------------------------------------------------------------------- + +.finalize_dag <- function(dag, effect, outcome, exposure, adjusted) { + # check for cyclic DAG + cycles <- unlist(dagitty::findCycle(dag)) + + # stop if cyclic + if (!is.null(cycles)) { + insight::format_error(paste0( + "Model is cyclic. Causal effects can't be determined for cyclic models. Please remove cycles from the model. To do so, check following variables: ", # nolint + datawizard::text_concatenate(unique(cycles)) + )) + } + + # data for checking effects + checks <- lapply(c("direct", "total"), function(x) { + adjustment_set <- unlist(dagitty::adjustmentSets(dag, effect = x), use.names = FALSE) + adjustment_nodes <- unlist(dagitty::adjustedNodes(dag), use.names = FALSE) + minimal_adjustments <- as.list(dagitty::adjustmentSets(dag, effect = x)) + collider <- adjustment_nodes[vapply(adjustment_nodes, ggdag::is_collider, logical(1), .dag = dag, downstream = FALSE)] # nolint + if (length(collider)) { + # if we *have* colliders, remove them from minimal adjustments + minimal_adjustments <- lapply(minimal_adjustments, setdiff, y = collider) + } else { + # if we don't have colliders, set to NULL + collider <- NULL + } + list( + # no adjustment needed when + # - required and current adjustment sets are NULL + # - AND we have no collider in current adjustments + adjustment_not_needed = is.null(adjustment_set) && is.null(adjustment_nodes) && is.null(collider), + # incorrect adjustment when + # - required is NULL and current adjustment not NULL + # - OR we have a collider in current adjustments + incorrectly_adjusted = (is.null(adjustment_set) && !is.null(adjustment_nodes)) || (!is.null(collider) && collider %in% adjustment_nodes), # nolint + current_adjustments = adjustment_nodes, + minimal_adjustments = minimal_adjustments, + collider = collider + ) + }) + + attr(dag, "effect") <- effect + attr(dag, "outcome") <- outcome + attr(dag, "exposure") <- exposure + attr(dag, "adjusted") <- adjusted + attr(dag, "adjustment_sets") <- checks[[1]]$current_adjustments + attr(dag, "collider") <- checks[[1]]$collider + # remove collider from sub-attributes + checks[[1]]$collider <- NULL + checks[[2]]$collider <- NULL + attr(dag, "check_direct") <- insight::compact_list(checks[[1]]) + attr(dag, "check_total") <- insight::compact_list(checks[[2]]) + + class(dag) <- c(c("check_dag", "see_check_dag"), class(dag)) + + dag +} + + +.adjust_dag <- function(dag, adjusted) { + for (i in adjusted) { + # first option, we just have the variable name + dag <- gsub(paste0("\n", i, "\n"), paste0("\n", i, " [adjusted]\n"), dag, fixed = TRUE) + # second option, we have the variable name with a [pos] tag when the user + # provided coords + dag <- gsub(paste0("\n", i, " [pos="), paste0("\n", i, " [adjusted,pos="), dag, fixed = TRUE) + } + dag +} + + +.process_coords <- function(coords) { + # check if the coords are not provided as list with x/y elements, but instead + # as list x/y coordinates for each element. This means, "coords" is provided as + # + # coords <- list( + # score = c(5, 3), + # exp = c(4, 3), + # b = c(3, 2), + # c = c(3, 4) + # ) + # + # but we want + # + # coords = list( + # x = c(score = 5, exp = 4, b = 3, c = 3), + # y = c(score = 3, exp = 3, b = 2, c = 4) + # ) + # + # we have to check that it's not a data frame and that it is a list - + # values like `ggdag::time_ordered_coords()` returns a function, not a list + if (!is.null(coords) && !is.data.frame(coords) && is.list(coords) && (length(coords) != 2 || !identical(names(coords), c("x", "y")))) { # nolint + # transform list into data frame, split x and y coordinates into columns + coords <- datawizard::rownames_as_column( + stats::setNames(as.data.frame(do.call(rbind, coords)), c("x", "y")), + "name" + ) + # reorder + coords <- coords[c("x", "y", "name")] + } + coords +} + + +# methods -------------------------------------------------------------------- + +#' @rdname check_dag +#' @export +as.dag <- function(x, ...) { + if (!inherits(x, "check_dag")) { + insight::format_error("Input is not of class `check_dag.") + } + cat(as.character(x)) +} + + +#' @export +print.check_dag <- function(x, ...) { + effect <- attributes(x)$effect + collider <- attributes(x)$collider + + # header + cat(insight::print_color("# Check for correct adjustment sets", "blue")) + + # model specification + exposure_outcome_text <- paste0( + "\n- Outcome: ", attributes(x)$outcome, + "\n- Exposure", ifelse(length(attributes(x)$exposure) > 1, "s", ""), + ": ", datawizard::text_concatenate(attributes(x)$exposure) + ) + + # add information on adjustments + if (!is.null(attributes(x)$adjustment_sets)) { + exposure_outcome_text <- paste0( + exposure_outcome_text, + "\n- Adjustment", + ifelse(length(attributes(x)$adjustment_sets) > 1, "s", ""), + ": ", datawizard::text_concatenate(attributes(x)$adjustment_sets) + ) + } + + # add information on colliders + if (!is.null(collider)) { + exposure_outcome_text <- paste0( + exposure_outcome_text, + "\n- Collider", + ifelse(length(collider) > 1, "s", ""), + ": ", insight::color_text(datawizard::text_concatenate(collider), "cyan") + ) + } + + cat(exposure_outcome_text) + cat("\n\n") + + # minimal adjustment sets for direct and total effect identical? + # Then print only once + if (identical(attributes(x)$check_direct$minimal_adjustments, attributes(x)$check_total$minimal_adjustments)) { + .print_dag_results(attributes(x)$check_direct, x, "direct and total", "all", collider) + } else { + for (i in c("direct", "total")) { + if (i == "direct") { + out <- attributes(x)$check_direct + } else { + out <- attributes(x)$check_total + } + .print_dag_results(out, x, i, effect, collider) + } + } +} + +.print_dag_results <- function(out, x, i, effect, collider = NULL) { + # missing adjustements - minimal_adjustment can be a list of different + # options for minimal adjustements, so we check here if any of the minimal + # adjustments are currently sufficient + sufficient_adjustments <- vapply(out$minimal_adjustments, function(min_adj) { + !is.null(out$current_adjustments) && all(min_adj %in% out$current_adjustments) + }, logical(1)) + + # build message with check results for effects ----------------------- + + if (isTRUE(out$adjustment_not_needed)) { + # Scenario 1: no adjustment needed + msg <- paste0( + insight::color_text("Model is correctly specified.", "green"), + "\nNo adjustment needed to estimate the ", i, " effect of ", + datawizard::text_concatenate(attributes(x)$exposure, enclose = "`"), + " on `", + attributes(x)$outcome, + "`." + ) + } else if (!is.null(collider)) { + # Scenario 2: adjusted for collider + msg <- paste0( + insight::color_text("Incorrectly adjusted!", "red"), + "\nYour model adjusts for a potential collider. To estimate the ", i, " effect, do ", + insight::color_text("not", "italic"), + " adjust for ", + insight::color_text(datawizard::text_concatenate(collider, enclose = "`"), "cyan"), + " to avoid collider-bias. It is recommended to double-check for the collider-bias on the dagitty-website." + ) + } else if (isTRUE(out$incorrectly_adjusted)) { + # Scenario 3: incorrectly adjusted, adjustments where none is allowed + msg <- paste0( + insight::color_text("Incorrectly adjusted!", "red"), + "\nTo estimate the ", i, " effect, do ", + insight::color_text("not", "italic"), + " adjust for ", + ifelse(length(out$current_adjustments) > 1, "some or all of ", ""), + insight::color_text(datawizard::text_concatenate(out$current_adjustments, enclose = "`"), "red"), + "." + ) + } else if (any(sufficient_adjustments)) { + # Scenario 4: correct adjustment + msg <- paste0( + insight::color_text("Model is correctly specified.", "green"), + "\nAll minimal sufficient adjustments to estimate the ", i, " effect were done." + ) + } else { + # Scenario 5: missing adjustments + msg <- paste0( + insight::color_text("Incorrectly adjusted!", "red"), + "\nTo estimate the ", i, " effect, ", + insight::color_text("at least", "italic"), + " adjust for " + ) + # we may have multiple valid adjustment sets - handle this here + if (length(out$minimal_adjustments) > 1) { + msg <- paste0( + msg, + "one of the following sets:\n", + insight::color_text( + paste( + "-", + unlist(lapply(out$minimal_adjustments, paste, collapse = ", "), use.names = FALSE), + collapse = "\n" + ), + "yellow" + ), + "." + ) + current_str <- "\nCurrently" + } else { + msg <- paste0( + msg, + insight::color_text(datawizard::text_concatenate( + unlist(out$minimal_adjustments, use.names = FALSE), + enclose = "`" + ), "yellow"), + "." + ) + current_str <- " Currently" + } + if (is.null(out$current_adjustments)) { + msg <- paste0(msg, current_str, ", the model does not adjust for any variables.") + } else { + msg <- paste0( + msg, current_str, ", the model only adjusts for ", + datawizard::text_concatenate(out$current_adjustments, enclose = "`"), + "." + ) + # check if we could identify missing variables, and if so, add them to the message + missing_vars <- setdiff(unlist(out$minimal_adjustments), out$current_adjustments) + if (length(missing_vars) > 0) { + msg <- paste0( + msg, " You possibly also need to adjust for ", + insight::color_text(datawizard::text_concatenate(missing_vars, enclose = "`"), "green"), + " to block biasing paths." + ) + } + } + } + + if (effect %in% c("all", i)) { + cat(insight::print_color(insight::format_message( + paste0("Identification of ", i, " effects\n\n") + ), "blue")) + cat(msg) + cat("\n\n") + } +} + + +#' @export +plot.check_dag <- function(x, ...) { + insight::check_if_installed("see", "to plot DAG") + NextMethod() +} diff --git a/R/check_heterogeneity_bias.R b/R/check_heterogeneity_bias.R index 424bf8b5b..2414873cc 100644 --- a/R/check_heterogeneity_bias.R +++ b/R/check_heterogeneity_bias.R @@ -9,9 +9,24 @@ #' that should be checked. If `x` is a mixed model object, this argument #' will be ignored. #' @param by Character vector (or formula) with the name of the variable that -#' indicates the group- or cluster-ID. If `x` is a model object, this -#' argument will be ignored. -#' @param group Deprecated. Use `by` instead. +#' indicates the group- or cluster-ID. For cross-classified or nested designs, +#' `by` can also identify two or more variables as group- or cluster-IDs. If +#' the data is nested and should be treated as such, set `nested = TRUE`. Else, +#' if `by` defines two or more variables and `nested = FALSE`, a cross-classified +#' design is assumed. If `x` is a model object, this argument will be ignored. +#' +#' For nested designs, `by` can be: +#' - a character vector with the name of the variable that indicates the +#' levels, ordered from *highest* level to *lowest* (e.g. +#' `by = c("L4", "L3", "L2")`. +#' - a character vector with variable names in the format `by = "L4/L3/L2"`, +#' where the levels are separated by `/`. +#' +#' See also section _De-meaning for cross-classified designs_ and +#' _De-meaning for nested designs_ below. +#' @param nested Logical, if `TRUE`, the data is treated as nested. If `FALSE`, +#' the data is treated as cross-classified. Only applies if `by` contains more +#' than one variable. #' #' @seealso #' For further details, read the vignette @@ -23,20 +38,14 @@ #' Modeling of Time-Series Cross-Sectional and Panel Data. Political Science #' Research and Methods, 3(1), 133–153. #' -#' @examplesIf insight::check_if_installed("datawizard", minimum_version = "0.12.0", quietly = TRUE) +#' @examples #' data(iris) #' iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID #' check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID") #' @export -check_heterogeneity_bias <- function(x, select = NULL, by = NULL, group = NULL) { +check_heterogeneity_bias <- function(x, select = NULL, by = NULL, nested = FALSE) { insight::check_if_installed("datawizard", minimum_version = "0.12.0") - ## TODO: deprecate later - if (!is.null(group)) { - insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint - by <- group - } - if (insight::is_model(x)) { by <- insight::find_random(x, split_nested = TRUE, flatten = TRUE) if (is.null(by)) { @@ -54,8 +63,14 @@ check_heterogeneity_bias <- function(x, select = NULL, by = NULL, group = NULL) my_data <- x } - unique_groups <- .n_unique(my_data[[by]]) - combinations <- expand.grid(select, by) + # for nested designs? + if (nested) { + # separate level-indicators with "/", as supported by datawizard + by <- paste(by, collapse = "/") + } + + # create all combinations that should be checked + combinations <- expand.grid(select, by[1]) result <- Map(function(predictor, id) { # demean predictor @@ -72,7 +87,7 @@ check_heterogeneity_bias <- function(x, select = NULL, by = NULL, group = NULL) } else { NULL } - }, as.character(combinations[[1]]), as.character(combinations[[2]])) + }, as.character(combinations[[1]]), by) out <- unlist(insight::compact_list(result), use.names = FALSE) diff --git a/R/check_model_diagnostics.R b/R/check_model_diagnostics.R index 431a2bc0f..8897c685e 100644 --- a/R/check_model_diagnostics.R +++ b/R/check_model_diagnostics.R @@ -460,7 +460,7 @@ if (!is.na(match(faminfo$family, c("binomial", "poisson", "truncated_poisson")))) { return(1) } - betad <- model$fit$par["betad"] + betad <- model$fit$par["betadisp"] switch(faminfo$family, gaussian = exp(0.5 * betad), Gamma = exp(-0.5 * betad), diff --git a/R/check_predictions.R b/R/check_predictions.R index fbc3b5eaf..c903f4309 100644 --- a/R/check_predictions.R +++ b/R/check_predictions.R @@ -423,26 +423,6 @@ pp_check.glmmTMB <- #' S3method(bayesplot::pp_check, BFBayesFactor) - -# aliases -------------------------- - -#' @rdname check_predictions -#' @export -posterior_predictive_check <- function(object, ...) { - .Deprecated("check_predictions()") - check_predictions(object, ...) -} - -#' @rdname check_predictions -#' @export -check_posterior_predictions <- function(object, ...) { - .Deprecated("check_predictions()") - check_predictions(object, ...) -} - - - - # methods ----------------------- diff --git a/R/check_singularity.R b/R/check_singularity.R index e2e369f61..86de4fcce 100644 --- a/R/check_singularity.R +++ b/R/check_singularity.R @@ -97,6 +97,7 @@ #' ) #' check_singularity(model) #' +#' \dontrun{ #' # Fixing singularity issues using priors in glmmTMB #' # Example taken from `vignette("priors", package = "glmmTMB")` #' dat <- readRDS(system.file( @@ -120,7 +121,7 @@ #' model_with_priors <- update(model, priors = prior) #' # no singular fit #' check_singularity(model_with_priors) -#' +#' } #' @export check_singularity <- function(x, tolerance = 1e-5, ...) { UseMethod("check_singularity") diff --git a/R/model_performance.lm.R b/R/model_performance.lm.R index c797b396f..0199b4ce4 100644 --- a/R/model_performance.lm.R +++ b/R/model_performance.lm.R @@ -3,10 +3,10 @@ #' Compute indices of model performance for regression models. #' #' @param model A model. -#' @param metrics Can be `"all"`, `"common"` or a character vector of -#' metrics to be computed (one or more of `"AIC"`, `"AICc"`, `"BIC"`, `"R2"`, -#' `"R2_adj"`, `"RMSE"`, `"SIGMA"`, `"LOGLOSS"`, `"PCP"`, `"SCORE"`). -#' `"common"` will compute AIC, BIC, R2 and RMSE. +#' @param metrics Can be `"all"`, `"common"` or a character vector of metrics to +#' be computed (one or more of `"AIC"`, `"AICc"`, `"BIC"`, `"R2"`, `"R2_adj"`, +#' `"RMSE"`, `"SIGMA"`, `"LOGLOSS"`, `"PCP"`, `"SCORE"`). `"common"` will +#' compute AIC, BIC, R2 and RMSE. #' @param verbose Toggle off warnings. #' @param ... Arguments passed to or from other methods. #' @@ -209,6 +209,18 @@ model_performance.lm_robust <- model_performance.lm #' @export model_performance.multinom <- model_performance.lm +#' @export +model_performance.multinom_weightit <- model_performance.lm + +#' @export +model_performance.ordinal_weightit <- model_performance.lm + +#' @export +model_performance.coxph_weightit <- model_performance.lm + +#' @export +model_performance.glm_weightit <- model_performance.lm + #' @export model_performance.plm <- model_performance.lm diff --git a/R/performance_aicc.R b/R/performance_aicc.R index 3c433161f..74041ac1b 100644 --- a/R/performance_aicc.R +++ b/R/performance_aicc.R @@ -28,6 +28,9 @@ #' scale. To get back to the original scale, the likelihood of the model is #' multiplied by the Jacobian/derivative of the transformation. #' +#' In case it is not possible to return the corrected AIC value, a warning +#' is given that the corrected log-likelihood value could not be computed. +#' #' @references #' - Akaike, H. (1973) Information theory as an extension of the maximum #' likelihood principle. In: Second International Symposium on Information @@ -52,6 +55,13 @@ #' # performance_aic() correctly detects transformed response and #' # returns corrected AIC #' performance_aic(model) +#' +#' \dontrun{ +#' # there are a few exceptions where the corrected log-likelihood values +#' # cannot be returned. The following exampe gives a warning. +#' model <- lm(1 / mpg ~ factor(cyl), mtcars) +#' performance_aic(model) +#' } #' @export performance_aicc <- function(x, ...) { UseMethod("performance_aicc") @@ -256,8 +266,6 @@ performance_aicc.rma <- function(x, ...) { } - - # jacobian / derivate for log models and other transformations ---------------- @@ -265,68 +273,13 @@ performance_aicc.rma <- function(x, ...) { .adjust_ic_jacobian <- function(model, ic) { response_transform <- insight::find_transformation(model) if (!is.null(ic) && !is.null(response_transform) && !identical(response_transform, "identity")) { - adjustment <- .safe(.ll_analytic_adjustment(model, insight::get_weights(model, na_rm = TRUE))) + adjustment <- .safe(insight::get_loglikelihood_adjustment( + model, + insight::get_weights(model, remove_na = TRUE) + )) if (!is.null(adjustment)) { ic <- ic - 2 * adjustment } } ic } - - -# copied from `insight` -.ll_analytic_adjustment <- function(x, model_weights = NULL) { - tryCatch( - { - trans <- insight::find_transformation(x) - switch(trans, - identity = .weighted_sum(log(insight::get_response(x)), w = model_weights), - log = .weighted_sum(log(1 / insight::get_response(x)), w = model_weights), - log1p = .weighted_sum(log(1 / (insight::get_response(x) + 1)), w = model_weights), - log2 = .weighted_sum(log(1 / (insight::get_response(x) * log(2))), w = model_weights), - log10 = .weighted_sum(log(1 / (insight::get_response(x) * log(10))), w = model_weights), - exp = .weighted_sum(insight::get_response(x), w = model_weights), - expm1 = .weighted_sum((insight::get_response(x) - 1), w = model_weights), - sqrt = .weighted_sum(log(0.5 / sqrt(insight::get_response(x))), w = model_weights), - .ll_jacobian_adjustment(x, model_weights) - ) - }, - error = function(e) { - NULL - } - ) -} - - -# this function calculates the adjustment for the log-likelihood of a model -# with transformed response -.ll_jacobian_adjustment <- function(model, weights = NULL) { - tryCatch( - { - trans <- insight::get_transformation(model)$transformation - .weighted_sum(log( - diag(attr(with( - insight::get_data(model, verbose = FALSE), - stats::numericDeriv( - expr = quote(trans( - get(insight::find_response(model)) - )), - theta = insight::find_response(model) - ) - ), "gradient")) - ), weights) - }, - error = function(e) { - NULL - } - ) -} - - -.weighted_sum <- function(x, w = NULL, ...) { - if (is.null(w)) { - mean(x) * length(x) - } else { - stats::weighted.mean(x, w) * length(x) - } -} diff --git a/R/print-methods.R b/R/print-methods.R index e3cdbbfed..4b4a81aa7 100644 --- a/R/print-methods.R +++ b/R/print-methods.R @@ -68,23 +68,39 @@ print.r2_pseudo <- function(x, digits = 3, ...) { #' @export print.r2_mlm <- function(x, digits = 3, ...) { model_type <- attr(x, "model_type") - if (!is.null(model_type)) { - insight::print_color(sprintf("# R2 for %s Regression\n\n", model_type), "blue") + is_multivar_r2 <- all(names(x) == c("Symmetric Rxy", "Asymmetric Pxy")) + if (!is.null(model_type) && !is_multivar_r2) { + insight::print_color( + sprintf("# R2 for %s Regression\n\n", model_type), + "blue" + ) + } else if (!is.null(model_type) && is_multivar_r2) { + insight::print_color( + sprintf("# Multivariate R2 for %s Regression\n", model_type), + "blue" + ) } else { insight::print_color("# R2\n\n", "blue") } - for (i in names(x)) { - insight::print_color(sprintf("## %s\n", i), "cyan") - out <- paste0( - c( - sprintf(" R2: %.*f", digits, x[[i]]$R2), - sprintf(" adj. R2: %.*f", digits, x[[i]]$R2_adjusted) - ), - collapse = "\n" - ) - cat(out) + if (is_multivar_r2) { + cat(sprintf(" Symmetric Rxy: %.*f", digits, x[["Symmetric Rxy"]])) + cat("\n") + cat(sprintf("Asymmetric Pxy: %.*f", digits, x[["Asymmetric Pxy"]])) cat("\n\n") + } else { + for (i in names(x)) { + insight::print_color(sprintf("## %s\n", i), "cyan") + out <- paste( + c( + sprintf(" R2: %.*f", digits, x[[i]]$R2), + sprintf(" adj. R2: %.*f", digits, x[[i]]$R2_adjusted) + ), + collapse = "\n" + ) + cat(out) + cat("\n\n") + } } invisible(x) } @@ -110,7 +126,10 @@ print.r2_bayes <- function(x, digits = 3, ...) { ci = attributes(x)$CI$R2_Bayes_marginal$CI, digits = digits ) - out <- paste0(c(out, sprintf(" Marginal R2: %.*f (%s)", digits, x$R2_Bayes_marginal, r2_marginal_ci)), collapse = "\n") + out <- paste(c( + out, + sprintf(" Marginal R2: %.*f (%s)", digits, x$R2_Bayes_marginal, r2_marginal_ci) + ), collapse = "\n") } cat(out) @@ -139,7 +158,10 @@ print.r2_loo <- function(x, digits = 3, ...) { ci = attributes(x)$CI$R2_loo_marginal$CI, digits = digits ) - out <- paste0(c(out, sprintf(" Marginal R2: %.*f (%s)", digits, x$R2_loo_marginal, r2_marginal_ci)), collapse = "\n") + out <- paste(c( + out, + sprintf(" Marginal R2: %.*f (%s)", digits, x$R2_loo_marginal, r2_marginal_ci) + ), collapse = "\n") } cat(out) diff --git a/R/r2.R b/R/r2.R index dbfc437fc..d62a0c477 100644 --- a/R/r2.R +++ b/R/r2.R @@ -10,6 +10,9 @@ #' (`TRUE`) or not (`FALSE`)? #' @param ci Confidence interval level, as scalar. If `NULL` (default), no #' confidence intervals for R2 are calculated. +#' @param multivariate Logical. Should multiple R2 values be reported as +#' separated by response (FALSE) or should a single R2 be reported as +#' combined across responses computed by [`r2_mlm`] (TRUE). #' @param ... Arguments passed down to the related r2-methods. #' @inheritParams r2_nakagawa #' @@ -31,7 +34,7 @@ #' @seealso #' [`r2_bayes()`], [`r2_coxsnell()`], [`r2_kullback()`], [`r2_loo()`], #' [`r2_mcfadden()`], [`r2_nagelkerke()`], [`r2_nakagawa()`], [`r2_tjur()`], -#' [`r2_xu()`] and [`r2_zeroinflated()`]. +#' [`r2_xu()`], [`r2_zeroinflated()`], and [`r2_mlm()`]. #' #' @examplesIf require("lme4") #' # Pseudo r-quared for GLM @@ -245,24 +248,28 @@ r2.aov <- function(model, ci = NULL, ...) { structure(class = "r2_generic", out) } - +#' @rdname r2 #' @export -r2.mlm <- function(model, ...) { - model_summary <- summary(model) +r2.mlm <- function(model, multivariate = TRUE, ...) { + if (multivariate) { + out <- r2_mlm(model) + } else { + model_summary <- summary(model) - out <- lapply(names(model_summary), function(i) { - tmp <- list( - R2 = model_summary[[i]]$r.squared, - R2_adjusted = model_summary[[i]]$adj.r.squared, - Response = sub("Response ", "", i, fixed = TRUE) - ) - names(tmp$R2) <- "R2" - names(tmp$R2_adjusted) <- "adjusted R2" - names(tmp$Response) <- "Response" - tmp - }) + out <- lapply(names(model_summary), function(i) { + tmp <- list( + R2 = model_summary[[i]]$r.squared, + R2_adjusted = model_summary[[i]]$adj.r.squared, + Response = sub("Response ", "", i, fixed = TRUE) + ) + names(tmp$R2) <- "R2" + names(tmp$R2_adjusted) <- "adjusted R2" + names(tmp$Response) <- "Response" + tmp + }) - names(out) <- names(model_summary) + names(out) <- names(model_summary) + } attr(out, "model_type") <- "Multivariate Linear" structure(class = "r2_mlm", out) diff --git a/R/r2_coxsnell.R b/R/r2_coxsnell.R index 3db7197b7..36df504ec 100644 --- a/R/r2_coxsnell.R +++ b/R/r2_coxsnell.R @@ -8,12 +8,12 @@ #' @param ... Currently not used. #' #' @details -#' This index was proposed by *Cox and Snell (1989, pp. 208-9)* and, -#' apparently independently, by *Magee (1990)*; but had been suggested -#' earlier for binary response models by *Maddala (1983)*. However, this -#' index achieves a maximum of less than 1 for discrete models (i.e. models -#' whose likelihood is a product of probabilities) which have a maximum of 1, -#' instead of densities, which can become infinite *(Nagelkerke, 1991)*. +#' This index was proposed by *Cox and Snell (1989, pp. 208-9)* and, apparently +#' independently, by *Magee (1990)*; but had been suggested earlier for binary +#' response models by *Maddala (1983)*. However, this index achieves a maximum +#' of less than 1 for discrete models (i.e. models whose likelihood is a product +#' of probabilities) which have a maximum of 1, instead of densities, which can +#' become infinite *(Nagelkerke, 1991)*. #' #' @return A named vector with the R2 value. #' @@ -24,12 +24,12 @@ #' @references #' - Cox, D. R., Snell, E. J. (1989). Analysis of binary data (Vol. 32). #' Monographs on Statistics and Applied Probability. -#' - Magee, L. (1990). R 2 measures based on Wald and likelihood ratio -#' joint significance tests. The American Statistician, 44(3), 250-253. +#' - Magee, L. (1990). R 2 measures based on Wald and likelihood ratio joint +#' significance tests. The American Statistician, 44(3), 250-253. #' - Maddala, G. S. (1986). Limited-dependent and qualitative variables in #' econometrics (No. 3). Cambridge university press. -#' - Nagelkerke, N. J. (1991). A note on a general definition of the -#' coefficient of determination. Biometrika, 78(3), 691-692. +#' - Nagelkerke, N. J. (1991). A note on a general definition of the coefficient +#' of determination. Biometrika, 78(3), 691-692. #' #' @export r2_coxsnell <- function(model, ...) { diff --git a/R/r2_mlm.R b/R/r2_mlm.R new file mode 100644 index 000000000..240701439 --- /dev/null +++ b/R/r2_mlm.R @@ -0,0 +1,85 @@ +#' @title Multivariate R2 +#' @name r2_mlm +#' +#' @description +#' Calculates two multivariate R2 values for multivariate linear regression. +#' +#' @param model Multivariate linear regression model. +#' @param ... Currently not used. +#' +#' @details +#' The two indexes returned summarize model fit for the set of predictors +#' given the system of responses. As compared to the default +#' [r2][performance::r2] index for multivariate linear models, the indexes +#' returned by this function provide a single fit value collapsed across +#' all responses. +#' +#' The two returned indexes were proposed by *Van den Burg and Lewis (1988)* +#' as an extension of the metrics proposed by *Cramer and Nicewander (1979)*. +#' Of the numerous indexes proposed across these two papers, only two metrics, +#' the \eqn{R_{xy}} and \eqn{P_{xy}}, are recommended for use +#' by *Azen and Budescu (2006)*. +#' +#' For a multivariate linear regression with \eqn{p} predictors and +#' \eqn{q} responses where \eqn{p > q}, the \eqn{R_{xy}} index is +#' computed as: +#' +#' \deqn{R_{xy} = 1 - \prod_{i=1}^p (1 - \rho_i^2)} +#' +#' Where \eqn{\rho} is a canonical variate from a +#' [canonical correlation][cancor] between the predictors and responses. +#' This metric is symmetric and its value does not change when the roles of +#' the variables as predictors or responses are swapped. +#' +#' The \eqn{P_{xy}} is computed as: +#' +#' \deqn{P_{xy} = \frac{q - trace(\bf{S}_{\bf{YY}}^{-1}\bf{S}_{\bf{YY|X}})}{q}} +#' +#' Where \eqn{\bf{S}_{\bf{YY}}} is the matrix of response covariances and +#' \eqn{\bf{S}_{\bf{YY|X}}} is the matrix of residual covariances given +#' the predictors. This metric is asymmetric and can change +#' depending on which variables are considered predictors versus responses. +#' +#' @return A named vector with the R2 values. +#' +#' @examples +#' model <- lm(cbind(qsec, drat) ~ wt + mpg + cyl, data = mtcars) +#' r2_mlm(model) +#' +#' model_swap <- lm(cbind(wt, mpg, cyl) ~ qsec + drat, data = mtcars) +#' r2_mlm(model_swap) +#' +#' @references +#' - Azen, R., & Budescu, D. V. (2006). Comparing predictors in +#' multivariate regression models: An extension of dominance analysis. +#' Journal of Educational and Behavioral Statistics, 31(2), 157-180. +#' - Cramer, E. M., & Nicewander, W. A. (1979). Some symmetric, +#' invariant measures of multivariate association. Psychometrika, 44, 43-54. +#' - Van den Burg, W., & Lewis, C. (1988). Some properties of two +#' measures of multivariate association. Psychometrika, 53, 109-122. +#' +#' @author Joseph Luchman +#' +#' @export +r2_mlm <- function(model, ...) { + UseMethod("r2_mlm") +} + +# methods --------------------------- + +#' @export +r2_mlm.mlm <- function(model, verbose = TRUE, ...) { + rho2_vec <- 1 - stats::cancor( + insight::get_predictors(model), + insight::get_response(model) + )$cor^2 + R_xy <- 1 - Reduce(`*`, rho2_vec, 1) + + resid_cov <- stats::cov(residuals(model)) + resp_cov <- stats::cov(insight::get_response(model)) + qq <- ncol(insight::get_response(model)) + V_xy <- qq - sum(diag(solve(resp_cov) %*% resid_cov)) + P_xy <- V_xy / qq + + c("Symmetric Rxy" = R_xy, "Asymmetric Pxy" = P_xy) +} diff --git a/R/r2_nakagawa.R b/R/r2_nakagawa.R index 1246a444a..aeb4df1c1 100644 --- a/R/r2_nakagawa.R +++ b/R/r2_nakagawa.R @@ -30,7 +30,7 @@ #' - Bernoulli (logistic) regression #' - Binomial regression (with other than binary outcomes) #' - Poisson and Quasi-Poisson regression -#' - Negative binomial regression (including nbinom1 and nbinom2 families) +#' - Negative binomial regression (including nbinom1, nbinom2 and nbinom12 families) #' - Gaussian regression (linear models) #' - Gamma regression #' - Tweedie regression @@ -44,6 +44,7 @@ #' - Compound Poisson regression #' - Generalized Poisson regression #' - Log-normal regression +#' - Skew-normal regression #' #' Extracting variance components for models with zero-inflation part is not #' straightforward, because it is not definitely clear how the distribution-specific diff --git a/R/simulate_residuals.R b/R/simulate_residuals.R index 7312c196f..1f58218c0 100644 --- a/R/simulate_residuals.R +++ b/R/simulate_residuals.R @@ -65,9 +65,10 @@ #' @export simulate_residuals <- function(x, iterations = 250, ...) { insight::check_if_installed("DHARMa") - # TODO (low priority): Note that DHARMa::simulateResiduals(x, ...) does its own checks for whether - # or not the model passed to it is supported, do we want to use this or do our - # own checks so we can supply our own error message? + # TODO (low priority): Note that DHARMa::simulateResiduals(x, ...) does its + # own checks for whether or not the model passed to it is supported, do we + # want to use this or do our own checks so we can supply our own error + # message? if (iterations < 2) { insight::format_error("`iterations` must be at least 2.") } diff --git a/README.md b/README.md index 5153170da..fb7bc0ee9 100644 --- a/README.md +++ b/README.md @@ -146,8 +146,8 @@ model <- stan_glmer( r2(model) #> # Bayesian R2 with Compatibility Interval #> -#> Conditional R2: 0.953 (95% CI [0.942, 0.964]) -#> Marginal R2: 0.826 (95% CI [0.724, 0.900]) +#> Conditional R2: 0.954 (95% CI [0.951, 0.957]) +#> Marginal R2: 0.414 (95% CI [0.204, 0.644]) library(lme4) model <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy) diff --git a/cran-comments.md b/cran-comments.md index 0fbbade1c..abc3f9611 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1 +1 @@ -This is a patch release that fixes failing CRAN checks for Mac OS old-rel. +Maintance release. diff --git a/inst/WORDLIST b/inst/WORDLIST index e5e90d844..9c77a11fd 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -9,6 +9,7 @@ Ankerst Archimbaud Arel Asq +Azen BCI BFBayesFactor BMJ @@ -27,6 +28,7 @@ Breunig Breusch BRM Bryk +Budescu Bundock Burnham Byrne @@ -39,13 +41,17 @@ Chisq CochransQ CompQuadForm Concurvity +Confounder +Cramer Cribari Cronbach's Crujeiras Csaki +DAGs DBSCAN DOI Datenerhebung +De Delacre Deskriptivstatistische DHARMa @@ -160,8 +166,10 @@ Neto's Nondegenerate Nordhausen Normed +Nicewander ORCID OSF +OSX Olkin PNFI Pek @@ -180,6 +188,7 @@ Raudenbush Raykov Revelle Rodríguez +Rohrer Rousseeuw Routledge SEM @@ -222,6 +231,7 @@ Vuong Vuong's WAIC Weisberg +Westreich Windmeijer Winsorization Witten @@ -232,6 +242,7 @@ Zavoinas Zhou Zomeren Zuur +acyclic afex al analyse @@ -250,7 +261,10 @@ brms brmsfit cauchy clusterable +confounder +confounders concurvity +dagitty datawizard dbscan der @@ -267,6 +281,7 @@ fixest fpsyg gam geoms +ggdag ggplot gjo glm diff --git a/man/check_dag.Rd b/man/check_dag.Rd new file mode 100644 index 000000000..e366ff07a --- /dev/null +++ b/man/check_dag.Rd @@ -0,0 +1,250 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check_dag.R +\name{check_dag} +\alias{check_dag} +\alias{as.dag} +\title{Check correct model adjustment for identifying causal effects} +\usage{ +check_dag( + ..., + outcome = NULL, + exposure = NULL, + adjusted = NULL, + latent = NULL, + effect = c("all", "total", "direct"), + coords = NULL +) + +as.dag(x, ...) +} +\arguments{ +\item{...}{One or more formulas, which are converted into \strong{dagitty} syntax. +First element may also be model object. If a model objects is provided, its +formula is used as first formula, and all independent variables will be used +for the \code{adjusted} argument. See 'Details' and 'Examples'.} + +\item{outcome}{Name of the dependent variable (outcome), as character string +or as formula. Must be a valid name from the formulas provided in \code{...}. If +not set, the first dependent variable from the formulas is used.} + +\item{exposure}{Name of the exposure variable (as character string or +formula), for which the direct and total causal effect on the \code{outcome} +should be checked. Must be a valid name from the formulas provided in \code{...}. +If not set, the first independent variable from the formulas is used.} + +\item{adjusted}{A character vector or formula with names of variables that +are adjusted for in the model, e.g. \code{adjusted = c("x1", "x2")} or +\code{adjusted = ~ x1 + x2}. If a model object is provided in \code{...}, any values in +\code{adjusted} will be overwritten by the model's independent variables.} + +\item{latent}{A character vector with names of latent variables in the model.} + +\item{effect}{Character string, indicating which effect to check. Can be +\code{"all"} (default), \code{"total"}, or \code{"direct"}.} + +\item{coords}{Coordinates of the variables when plotting the DAG. The +coordinates can be provided in three different ways: +\itemize{ +\item a list with two elements, \code{x} and \code{y}, which both are named vectors of +numerics. The names correspond to the variable names in the DAG, and the +values for \code{x} and \code{y} indicate the x/y coordinates in the plot. +\item a list with elements that correspond to the variables in the DAG. Each +element is a numeric vector of length two with x- and y-coordinate. +\item a data frame with three columns: \code{x}, \code{y} and \code{name} (which contains the +variable names). +} + +See 'Examples'.} + +\item{x}{An object of class \code{check_dag}, as returned by \code{check_dag()}.} +} +\value{ +An object of class \code{check_dag}, which can be visualized with \code{plot()}. +The returned object also inherits from class \code{dagitty} and thus can be used +with all functions from the \strong{ggdag} and \strong{dagitty} packages. +} +\description{ +The purpose of \code{check_dag()} is to build, check and visualize +your model based on directed acyclic graphs (DAG). The function checks if a +model is correctly adjusted for identifying specific relationships of +variables, especially directed (maybe also "causal") effects for given +exposures on an outcome. In case of incorrect adjustments, the function +suggests the minimal required variables that should be adjusted for (sometimes +also called "controlled for"), i.e. variables that \emph{at least} need to be +included in the model. Depending on the goal of the analysis, it is still +possible to add more variables to the model than just the minimally required +adjustment sets. + +\code{check_dag()} is a convenient wrapper around \code{ggdag::dagify()}, +\code{dagitty::adjustmentSets()} and \code{dagitty::adjustedNodes()} to check correct +adjustment sets. It returns a \strong{dagitty} object that can be visualized with +\code{plot()}. \code{as.dag()} is a small convenient function to return the +dagitty-string, which can be used for the online-tool from the +dagitty-website. +} +\section{Specifying the DAG formulas}{ + + +The formulas have following syntax: +\itemize{ +\item One-directed paths: On the \emph{left-hand-side} is the name of the variables +where causal effects point to (direction of the arrows, in dagitty-language). +On the \emph{right-hand-side} are all variables where causal effects are assumed +to come from. For example, the formula \code{Y ~ X1 + X2}, paths directed from +both \code{X1} and \code{X2} to \code{Y} are assumed. +\item Bi-directed paths: Use \verb{~~} to indicate bi-directed paths. For example, +\code{Y ~~ X} indicates that the path between \code{Y} and \code{X} is bi-directed, and +the arrow points in both directions. Bi-directed paths often indicate +unmeasured cause, or unmeasured confounding, of the two involved variables. +} +} + +\section{Minimally required adjustments}{ + + +The function checks if the model is correctly adjusted for identifying the +direct and total effects of the exposure on the outcome. If the model is +correctly specified, no adjustment is needed to estimate the direct effect. +If the model is not correctly specified, the function suggests the minimally +required variables that should be adjusted for. The function distinguishes +between direct and total effects, and checks if the model is correctly +adjusted for both. If the model is cyclic, the function stops and suggests +to remove cycles from the model. + +Note that it sometimes could be necessary to try out different combinations +of suggested adjustments, because \code{check_dag()} can not always detect whether +\emph{at least} one of several variables is required, or whether adjustments should +be done for \emph{all} listed variables. It can be useful to copy the dagitty-code +(using \code{as.dag()}, which prints the dagitty-string into the console) into +the dagitty-website and play around with different adjustments. +} + +\section{Direct and total effects}{ + + +The direct effect of an exposure on an outcome is the effect that is not +mediated by any other variable in the model. The total effect is the sum of +the direct and indirect effects. The function checks if the model is correctly +adjusted for identifying the direct and total effects of the exposure on the +outcome. +} + +\section{Why are DAGs important - the Table 2 fallacy}{ + + +Correctly thinking about and identifying the relationships between variables +is important when it comes to reporting coefficients from regression models +that mutually adjust for "confounders" or include covariates. Different +coefficients might have different interpretations, depending on their +relationship to other variables in the model. Sometimes, a regression +coefficient represents the direct effect of an exposure on an outcome, but +sometimes it must be interpreted as total effect, due to the involvement +of mediating effects. This problem is also called "Table 2 fallacy" +(\emph{Westreich and Greenland 2013}). DAG helps visualizing and thereby focusing +the relationships of variables in a regression model to detect missing +adjustments or over-adjustment. +} + +\examples{ +\dontshow{if (require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# no adjustment needed +check_dag( + y ~ x + b, + outcome = "y", + exposure = "x" +) + +# incorrect adjustment +dag <- check_dag( + y ~ x + b + c, + x ~ b, + outcome = "y", + exposure = "x" +) +dag +plot(dag) + +# After adjusting for `b`, the model is correctly specified +dag <- check_dag( + y ~ x + b + c, + x ~ b, + outcome = "y", + exposure = "x", + adjusted = "b" +) +dag + +# using formula interface for arguments "outcome", "exposure" and "adjusted" +check_dag( + y ~ x + b + c, + x ~ b, + outcome = ~y, + exposure = ~x, + adjusted = ~ b + c +) + +# if not provided, "outcome" is taken from first formula, same for "exposure" +# thus, we can simplify the above expression to +check_dag( + y ~ x + b + c, + x ~ b, + adjusted = ~ b + c +) + +# use specific layout for the DAG +dag <- check_dag( + score ~ exp + b + c, + exp ~ b, + outcome = "score", + exposure = "exp", + coords = list( + # x-coordinates for all nodes + x = c(score = 5, exp = 4, b = 3, c = 3), + # y-coordinates for all nodes + y = c(score = 3, exp = 3, b = 2, c = 4) + ) +) +plot(dag) + +# alternative way of providing the coordinates +dag <- check_dag( + score ~ exp + b + c, + exp ~ b, + outcome = "score", + exposure = "exp", + coords = list( + # x/y coordinates for each node + score = c(5, 3), + exp = c(4, 3), + b = c(3, 2), + c = c(3, 4) + ) +) +plot(dag) + +# Objects returned by `check_dag()` can be used with "ggdag" or "dagitty" +ggdag::ggdag_status(dag) + +# Using a model object to extract information about outcome, +# exposure and adjusted variables +data(mtcars) +m <- lm(mpg ~ wt + gear + disp + cyl, data = mtcars) +dag <- check_dag( + m, + wt ~ disp + cyl, + wt ~ am +) +dag +plot(dag) +\dontshow{\}) # examplesIf} +} +\references{ +\itemize{ +\item Rohrer, J. M. (2018). Thinking clearly about correlations and causation: +Graphical causal models for observational data. Advances in Methods and +Practices in Psychological Science, 1(1), 27–42. \doi{10.1177/2515245917745629} +\item Westreich, D., & Greenland, S. (2013). The Table 2 Fallacy: Presenting and +Interpreting Confounder and Modifier Coefficients. American Journal of +Epidemiology, 177(4), 292–298. \doi{10.1093/aje/kws412} +} +} diff --git a/man/check_heterogeneity_bias.Rd b/man/check_heterogeneity_bias.Rd index 46f9f70a5..db18ae7ef 100644 --- a/man/check_heterogeneity_bias.Rd +++ b/man/check_heterogeneity_bias.Rd @@ -4,7 +4,7 @@ \alias{check_heterogeneity_bias} \title{Check model predictor for heterogeneity bias} \usage{ -check_heterogeneity_bias(x, select = NULL, by = NULL, group = NULL) +check_heterogeneity_bias(x, select = NULL, by = NULL, nested = FALSE) } \arguments{ \item{x}{A data frame or a mixed model object.} @@ -14,10 +14,27 @@ that should be checked. If \code{x} is a mixed model object, this argument will be ignored.} \item{by}{Character vector (or formula) with the name of the variable that -indicates the group- or cluster-ID. If \code{x} is a model object, this -argument will be ignored.} +indicates the group- or cluster-ID. For cross-classified or nested designs, +\code{by} can also identify two or more variables as group- or cluster-IDs. If +the data is nested and should be treated as such, set \code{nested = TRUE}. Else, +if \code{by} defines two or more variables and \code{nested = FALSE}, a cross-classified +design is assumed. If \code{x} is a model object, this argument will be ignored. -\item{group}{Deprecated. Use \code{by} instead.} +For nested designs, \code{by} can be: +\itemize{ +\item a character vector with the name of the variable that indicates the +levels, ordered from \emph{highest} level to \emph{lowest} (e.g. +\code{by = c("L4", "L3", "L2")}. +\item a character vector with variable names in the format \code{by = "L4/L3/L2"}, +where the levels are separated by \code{/}. +} + +See also section \emph{De-meaning for cross-classified designs} and +\emph{De-meaning for nested designs} below.} + +\item{nested}{Logical, if \code{TRUE}, the data is treated as nested. If \code{FALSE}, +the data is treated as cross-classified. Only applies if \code{by} contains more +than one variable.} } \description{ \code{check_heterogeneity_bias()} checks if model predictors or variables may @@ -25,11 +42,9 @@ cause a heterogeneity bias, i.e. if variables have a within- and/or between-effect (\emph{Bell and Jones, 2015}). } \examples{ -\dontshow{if (insight::check_if_installed("datawizard", minimum_version = "0.12.0", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(iris) iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID") -\dontshow{\}) # examplesIf} } \references{ \itemize{ diff --git a/man/check_predictions.Rd b/man/check_predictions.Rd index 148df6994..6c13fc0d5 100644 --- a/man/check_predictions.Rd +++ b/man/check_predictions.Rd @@ -3,8 +3,6 @@ \name{check_predictions} \alias{check_predictions} \alias{check_predictions.default} -\alias{posterior_predictive_check} -\alias{check_posterior_predictions} \title{Posterior predictive checks} \usage{ check_predictions(object, ...) @@ -19,10 +17,6 @@ check_predictions(object, ...) verbose = TRUE, ... ) - -posterior_predictive_check(object, ...) - -check_posterior_predictions(object, ...) } \arguments{ \item{object}{A statistical model.} diff --git a/man/check_singularity.Rd b/man/check_singularity.Rd index cfe2dca0c..49015d0c7 100644 --- a/man/check_singularity.Rd +++ b/man/check_singularity.Rd @@ -92,6 +92,7 @@ model <- lme4::lmer( ) check_singularity(model) +\dontrun{ # Fixing singularity issues using priors in glmmTMB # Example taken from `vignette("priors", package = "glmmTMB")` dat <- readRDS(system.file( @@ -115,6 +116,7 @@ prior <- data.frame( model_with_priors <- update(model, priors = prior) # no singular fit check_singularity(model_with_priors) +} \dontshow{\}) # examplesIf} } \references{ diff --git a/man/icc.Rd b/man/icc.Rd index 071e1b617..8b38ece25 100644 --- a/man/icc.Rd +++ b/man/icc.Rd @@ -212,7 +212,7 @@ should be accurate and reliable for following mixed models or model families: \item Bernoulli (logistic) regression \item Binomial regression (with other than binary outcomes) \item Poisson and Quasi-Poisson regression -\item Negative binomial regression (including nbinom1 and nbinom2 families) +\item Negative binomial regression (including nbinom1, nbinom2 and nbinom12 families) \item Gaussian regression (linear models) \item Gamma regression \item Tweedie regression @@ -227,6 +227,7 @@ Following model families are not yet validated, but should work: \item Compound Poisson regression \item Generalized Poisson regression \item Log-normal regression +\item Skew-normal regression } Extracting variance components for models with zero-inflation part is not diff --git a/man/model_performance.lm.Rd b/man/model_performance.lm.Rd index 208d9b1c4..11a820fff 100644 --- a/man/model_performance.lm.Rd +++ b/man/model_performance.lm.Rd @@ -9,10 +9,10 @@ \arguments{ \item{model}{A model.} -\item{metrics}{Can be \code{"all"}, \code{"common"} or a character vector of -metrics to be computed (one or more of \code{"AIC"}, \code{"AICc"}, \code{"BIC"}, \code{"R2"}, -\code{"R2_adj"}, \code{"RMSE"}, \code{"SIGMA"}, \code{"LOGLOSS"}, \code{"PCP"}, \code{"SCORE"}). -\code{"common"} will compute AIC, BIC, R2 and RMSE.} +\item{metrics}{Can be \code{"all"}, \code{"common"} or a character vector of metrics to +be computed (one or more of \code{"AIC"}, \code{"AICc"}, \code{"BIC"}, \code{"R2"}, \code{"R2_adj"}, +\code{"RMSE"}, \code{"SIGMA"}, \code{"LOGLOSS"}, \code{"PCP"}, \code{"SCORE"}). \code{"common"} will +compute AIC, BIC, R2 and RMSE.} \item{verbose}{Toggle off warnings.} diff --git a/man/performance-package.Rd b/man/performance-package.Rd index b3f1d5a3f..4644e6443 100644 --- a/man/performance-package.Rd +++ b/man/performance-package.Rd @@ -34,16 +34,16 @@ Useful links: } \author{ -\strong{Maintainer}: Daniel Lüdecke \email{d.luedecke@uke.de} (\href{https://orcid.org/0000-0002-8895-3206}{ORCID}) (@strengejacke) +\strong{Maintainer}: Daniel Lüdecke \email{d.luedecke@uke.de} (\href{https://orcid.org/0000-0002-8895-3206}{ORCID}) Authors: \itemize{ - \item Dominique Makowski \email{dom.makowski@gmail.com} (\href{https://orcid.org/0000-0001-5375-9967}{ORCID}) (@Dom_Makowski) [contributor] - \item Mattan S. Ben-Shachar \email{matanshm@post.bgu.ac.il} (\href{https://orcid.org/0000-0002-4287-4801}{ORCID}) (@mattansb) [contributor] - \item Indrajeet Patil \email{patilindrajeet.science@gmail.com} (\href{https://orcid.org/0000-0003-1995-6531}{ORCID}) (@patilindrajeets) [contributor] + \item Dominique Makowski \email{dom.makowski@gmail.com} (\href{https://orcid.org/0000-0001-5375-9967}{ORCID}) [contributor] + \item Mattan S. Ben-Shachar \email{matanshm@post.bgu.ac.il} (\href{https://orcid.org/0000-0002-4287-4801}{ORCID}) [contributor] + \item Indrajeet Patil \email{patilindrajeet.science@gmail.com} (\href{https://orcid.org/0000-0003-1995-6531}{ORCID}) [contributor] \item Philip Waggoner \email{philip.waggoner@gmail.com} (\href{https://orcid.org/0000-0002-7825-7573}{ORCID}) [contributor] - \item Brenton M. Wiernik \email{brenton@wiernik.org} (\href{https://orcid.org/0000-0001-9560-6336}{ORCID}) (@bmwiernik) [contributor] - \item Rémi Thériault \email{remi.theriault@mail.mcgill.ca} (\href{https://orcid.org/0000-0003-4315-6788}{ORCID}) (@rempsyc) [contributor] + \item Brenton M. Wiernik \email{brenton@wiernik.org} (\href{https://orcid.org/0000-0001-9560-6336}{ORCID}) [contributor] + \item Rémi Thériault \email{remi.theriault@mail.mcgill.ca} (\href{https://orcid.org/0000-0003-4315-6788}{ORCID}) [contributor] } Other contributors: @@ -52,6 +52,7 @@ Other contributors: \item Martin Jullum [reviewer] \item gjo11 [reviewer] \item Etienne Bacher \email{etienne.bacher@protonmail.com} (\href{https://orcid.org/0000-0002-9271-5075}{ORCID}) [contributor] + \item Joseph Luchman (\href{https://orcid.org/0000-0002-8886-9717}{ORCID}) [contributor] } } diff --git a/man/performance_aicc.Rd b/man/performance_aicc.Rd index 0bf2120b7..b1d9a1255 100644 --- a/man/performance_aicc.Rd +++ b/man/performance_aicc.Rd @@ -46,6 +46,9 @@ incorporates a correction for small sample sizes. unlike \code{stats::AIC()}, returns the "corrected" AIC value on the original scale. To get back to the original scale, the likelihood of the model is multiplied by the Jacobian/derivative of the transformation. + +In case it is not possible to return the corrected AIC value, a warning +is given that the corrected log-likelihood value could not be computed. } \examples{ m <- lm(mpg ~ wt + cyl + gear + disp, data = mtcars) @@ -63,6 +66,13 @@ AIC(model) # performance_aic() correctly detects transformed response and # returns corrected AIC performance_aic(model) + +\dontrun{ +# there are a few exceptions where the corrected log-likelihood values +# cannot be returned. The following exampe gives a warning. +model <- lm(1 / mpg ~ factor(cyl), mtcars) +performance_aic(model) +} } \references{ \itemize{ diff --git a/man/r2.Rd b/man/r2.Rd index bf783e8d9..a8d514908 100644 --- a/man/r2.Rd +++ b/man/r2.Rd @@ -3,6 +3,7 @@ \name{r2} \alias{r2} \alias{r2.default} +\alias{r2.mlm} \alias{r2.merMod} \title{Compute the model's R2} \usage{ @@ -10,6 +11,8 @@ r2(model, ...) \method{r2}{default}(model, ci = NULL, verbose = TRUE, ...) +\method{r2}{mlm}(model, multivariate = TRUE, ...) + \method{r2}{merMod}(model, ci = NULL, tolerance = 1e-05, ...) } \arguments{ @@ -23,6 +26,10 @@ confidence intervals for R2 are calculated.} \item{verbose}{Logical. Should details about R2 and CI methods be given (\code{TRUE}) or not (\code{FALSE})?} +\item{multivariate}{Logical. Should multiple R2 values be reported as +separated by response (FALSE) or should a single R2 be reported as +combined across responses computed by \code{\link{r2_mlm}} (TRUE).} + \item{tolerance}{Tolerance for singularity check of random effects, to decide whether to compute random effect variances for the conditional r-squared or not. Indicates up to which value the convergence result is accepted. When @@ -70,5 +77,5 @@ r2(model) \seealso{ \code{\link[=r2_bayes]{r2_bayes()}}, \code{\link[=r2_coxsnell]{r2_coxsnell()}}, \code{\link[=r2_kullback]{r2_kullback()}}, \code{\link[=r2_loo]{r2_loo()}}, \code{\link[=r2_mcfadden]{r2_mcfadden()}}, \code{\link[=r2_nagelkerke]{r2_nagelkerke()}}, \code{\link[=r2_nakagawa]{r2_nakagawa()}}, \code{\link[=r2_tjur]{r2_tjur()}}, -\code{\link[=r2_xu]{r2_xu()}} and \code{\link[=r2_zeroinflated]{r2_zeroinflated()}}. +\code{\link[=r2_xu]{r2_xu()}}, \code{\link[=r2_zeroinflated]{r2_zeroinflated()}}, and \code{\link[=r2_mlm]{r2_mlm()}}. } diff --git a/man/r2_coxsnell.Rd b/man/r2_coxsnell.Rd index 2d53cb628..55dfaac1a 100644 --- a/man/r2_coxsnell.Rd +++ b/man/r2_coxsnell.Rd @@ -18,12 +18,12 @@ A named vector with the R2 value. Calculates the pseudo-R2 value based on the proposal from \emph{Cox & Snell (1989)}. } \details{ -This index was proposed by \emph{Cox and Snell (1989, pp. 208-9)} and, -apparently independently, by \emph{Magee (1990)}; but had been suggested -earlier for binary response models by \emph{Maddala (1983)}. However, this -index achieves a maximum of less than 1 for discrete models (i.e. models -whose likelihood is a product of probabilities) which have a maximum of 1, -instead of densities, which can become infinite \emph{(Nagelkerke, 1991)}. +This index was proposed by \emph{Cox and Snell (1989, pp. 208-9)} and, apparently +independently, by \emph{Magee (1990)}; but had been suggested earlier for binary +response models by \emph{Maddala (1983)}. However, this index achieves a maximum +of less than 1 for discrete models (i.e. models whose likelihood is a product +of probabilities) which have a maximum of 1, instead of densities, which can +become infinite \emph{(Nagelkerke, 1991)}. } \examples{ model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial") @@ -34,11 +34,11 @@ r2_coxsnell(model) \itemize{ \item Cox, D. R., Snell, E. J. (1989). Analysis of binary data (Vol. 32). Monographs on Statistics and Applied Probability. -\item Magee, L. (1990). R 2 measures based on Wald and likelihood ratio -joint significance tests. The American Statistician, 44(3), 250-253. +\item Magee, L. (1990). R 2 measures based on Wald and likelihood ratio joint +significance tests. The American Statistician, 44(3), 250-253. \item Maddala, G. S. (1986). Limited-dependent and qualitative variables in econometrics (No. 3). Cambridge university press. -\item Nagelkerke, N. J. (1991). A note on a general definition of the -coefficient of determination. Biometrika, 78(3), 691-692. +\item Nagelkerke, N. J. (1991). A note on a general definition of the coefficient +of determination. Biometrika, 78(3), 691-692. } } diff --git a/man/r2_mlm.Rd b/man/r2_mlm.Rd new file mode 100644 index 000000000..50b5389fd --- /dev/null +++ b/man/r2_mlm.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/r2_mlm.R +\name{r2_mlm} +\alias{r2_mlm} +\title{Multivariate R2} +\usage{ +r2_mlm(model, ...) +} +\arguments{ +\item{model}{Multivariate linear regression model.} + +\item{...}{Currently not used.} +} +\value{ +A named vector with the R2 values. +} +\description{ +Calculates two multivariate R2 values for multivariate linear regression. +} +\details{ +The two indexes returned summarize model fit for the set of predictors +given the system of responses. As compared to the default +\link[=r2]{r2} index for multivariate linear models, the indexes +returned by this function provide a single fit value collapsed across +all responses. + +The two returned indexes were proposed by \emph{Van den Burg and Lewis (1988)} +as an extension of the metrics proposed by \emph{Cramer and Nicewander (1979)}. +Of the numerous indexes proposed across these two papers, only two metrics, +the \eqn{R_{xy}} and \eqn{P_{xy}}, are recommended for use +by \emph{Azen and Budescu (2006)}. + +For a multivariate linear regression with \eqn{p} predictors and +\eqn{q} responses where \eqn{p > q}, the \eqn{R_{xy}} index is +computed as: + +\deqn{R_{xy} = 1 - \prod_{i=1}^p (1 - \rho_i^2)} + +Where \eqn{\rho} is a canonical variate from a +\link[=cancor]{canonical correlation} between the predictors and responses. +This metric is symmetric and its value does not change when the roles of +the variables as predictors or responses are swapped. + +The \eqn{P_{xy}} is computed as: + +\deqn{P_{xy} = \frac{q - trace(\bf{S}_{\bf{YY}}^{-1}\bf{S}_{\bf{YY|X}})}{q}} + +Where \eqn{\bf{S}_{\bf{YY}}} is the matrix of response covariances and +\eqn{\bf{S}_{\bf{YY|X}}} is the matrix of residual covariances given +the predictors. This metric is asymmetric and can change +depending on which variables are considered predictors versus responses. +} +\examples{ +model <- lm(cbind(qsec, drat) ~ wt + mpg + cyl, data = mtcars) +r2_mlm(model) + +model_swap <- lm(cbind(wt, mpg, cyl) ~ qsec + drat, data = mtcars) +r2_mlm(model_swap) + +} +\references{ +\itemize{ +\item Azen, R., & Budescu, D. V. (2006). Comparing predictors in +multivariate regression models: An extension of dominance analysis. +Journal of Educational and Behavioral Statistics, 31(2), 157-180. +\item Cramer, E. M., & Nicewander, W. A. (1979). Some symmetric, +invariant measures of multivariate association. Psychometrika, 44, 43-54. +\item Van den Burg, W., & Lewis, C. (1988). Some properties of two +measures of multivariate association. Psychometrika, 53, 109-122. +} +} +\author{ +Joseph Luchman +} diff --git a/man/r2_nakagawa.Rd b/man/r2_nakagawa.Rd index 169af5709..75f2de3d0 100644 --- a/man/r2_nakagawa.Rd +++ b/man/r2_nakagawa.Rd @@ -111,7 +111,7 @@ should be accurate and reliable for following mixed models or model families: \item Bernoulli (logistic) regression \item Binomial regression (with other than binary outcomes) \item Poisson and Quasi-Poisson regression -\item Negative binomial regression (including nbinom1 and nbinom2 families) +\item Negative binomial regression (including nbinom1, nbinom2 and nbinom12 families) \item Gaussian regression (linear models) \item Gamma regression \item Tweedie regression @@ -126,6 +126,7 @@ Following model families are not yet validated, but should work: \item Compound Poisson regression \item Generalized Poisson regression \item Log-normal regression +\item Skew-normal regression } Extracting variance components for models with zero-inflation part is not diff --git a/tests/testthat/_snaps/check_dag.md b/tests/testthat/_snaps/check_dag.md new file mode 100644 index 000000000..779ec5a71 --- /dev/null +++ b/tests/testthat/_snaps/check_dag.md @@ -0,0 +1,275 @@ +# check_dag + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: y + - Exposure: x + + Identification of direct and total effects + + Model is correctly specified. + No adjustment needed to estimate the direct and total effect of `x` on `y`. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: y + - Exposure: x + - Adjustment: b + + Identification of direct and total effects + + Model is correctly specified. + All minimal sufficient adjustments to estimate the direct and total effect were done. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: y + - Exposure: x + + Identification of direct and total effects + + Incorrectly adjusted! + To estimate the direct and total effect, at least adjust for `b`. Currently, the model does not adjust for any variables. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: y + - Exposure: x + - Adjustment: c + + Identification of direct and total effects + + Incorrectly adjusted! + To estimate the direct and total effect, at least adjust for `b` and `c`. Currently, the model only adjusts for `c`. You possibly also need to adjust for `b` to block biasing paths. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: y + - Exposure: x + - Adjustment: c + + Identification of direct and total effects + + Incorrectly adjusted! + To estimate the direct and total effect, at least adjust for `b` and `c`. Currently, the model only adjusts for `c`. You possibly also need to adjust for `b` to block biasing paths. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: mpg + - Exposure: wt + - Adjustments: cyl, disp and gear + + Identification of direct and total effects + + Model is correctly specified. + All minimal sufficient adjustments to estimate the direct and total effect were done. + + +# check_dag, multiple adjustment sets + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: exam + - Exposure: podcast + + Identification of direct and total effects + + Incorrectly adjusted! + To estimate the direct and total effect, at least adjust for one of the following sets: + - alertness, prepared + - alertness, skills_course + - mood, prepared + - mood, skills_course. + Currently, the model does not adjust for any variables. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: exam + - Exposure: podcast + - Adjustments: alertness and prepared + + Identification of direct and total effects + + Model is correctly specified. + All minimal sufficient adjustments to estimate the direct and total effect were done. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: exam + - Exposure: podcast + - Adjustment: alertness + + Identification of direct and total effects + + Incorrectly adjusted! + To estimate the direct and total effect, at least adjust for one of the following sets: + - alertness, prepared + - alertness, skills_course. + Currently, the model only adjusts for `alertness`. You possibly also need to adjust for `prepared` and `skills_course` to block biasing paths. + + +# check_dag, different adjustements for total and direct + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: outcome + - Exposure: exposure + + Identification of direct effects + + Incorrectly adjusted! + To estimate the direct effect, at least adjust for `x1` and `x2`. Currently, the model does not adjust for any variables. + + Identification of total effects + + Incorrectly adjusted! + To estimate the total effect, at least adjust for `x1`. Currently, the model does not adjust for any variables. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: outcome + - Exposure: exposure + - Adjustment: x1 + + Identification of direct effects + + Incorrectly adjusted! + To estimate the direct effect, at least adjust for `x1` and `x2`. Currently, the model only adjusts for `x1`. You possibly also need to adjust for `x2` to block biasing paths. + + Identification of total effects + + Model is correctly specified. + All minimal sufficient adjustments to estimate the total effect were done. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: outcome + - Exposure: exposure + - Adjustment: x2 + + Identification of direct effects + + Incorrectly adjusted! + To estimate the direct effect, at least adjust for `x1` and `x2`. Currently, the model only adjusts for `x2`. You possibly also need to adjust for `x1` to block biasing paths. + + Identification of total effects + + Incorrectly adjusted! + To estimate the total effect, do not adjust for `x2`. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: outcome + - Exposure: exposure + - Adjustments: x1 and x2 + + Identification of direct effects + + Model is correctly specified. + All minimal sufficient adjustments to estimate the direct effect were done. + + Identification of total effects + + Incorrectly adjusted! + To estimate the total effect, do not adjust for some or all of `x1` and `x2`. + + +# check_dag, collider bias + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: SMD_ICD11 + - Exposure: agegroup + - Adjustments: edgroup3, gender_kid, pss4_kid_sum_2sd and residence + + Identification of direct effects + + Incorrectly adjusted! + To estimate the direct effect, at least adjust for `edgroup3`, `gender_kid`, `pss4_kid_sum_2sd`, `residence` and `sm_h_total_kid`. Currently, the model only adjusts for `edgroup3`, `gender_kid`, `pss4_kid_sum_2sd` and `residence`. You possibly also need to adjust for `sm_h_total_kid` to block biasing paths. + + Identification of total effects + + Model is correctly specified. + All minimal sufficient adjustments to estimate the total effect were done. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: SMD_ICD11 + - Exposure: agegroup + - Adjustments: edgroup3, gender_kid, pss4_kid_sum_2sd, residence and sm_h_total_kid + - Collider: sm_h_total_kid + + Identification of direct effects + + Incorrectly adjusted! + Your model adjusts for a potential collider. To estimate the direct effect, do not adjust for `sm_h_total_kid` to avoid collider-bias. It is recommended to double-check for the collider-bias on the dagitty-website. + + Identification of total effects + + Incorrectly adjusted! + Your model adjusts for a potential collider. To estimate the total effect, do not adjust for `sm_h_total_kid` to avoid collider-bias. It is recommended to double-check for the collider-bias on the dagitty-website. + + diff --git a/tests/testthat/test-check_collinearity.R b/tests/testthat/test-check_collinearity.R index ba456ae6f..d62aec75d 100644 --- a/tests/testthat/test-check_collinearity.R +++ b/tests/testthat/test-check_collinearity.R @@ -24,7 +24,6 @@ test_that("check_collinearity, correct order in print", { test_that("check_collinearity", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not(getRversion() >= "4.0.0") @@ -51,7 +50,6 @@ test_that("check_collinearity", { test_that("check_collinearity", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not(getRversion() >= "4.0.0") diff --git a/tests/testthat/test-check_convergence.R b/tests/testthat/test-check_convergence.R index 0bfadc8a6..8897b785d 100644 --- a/tests/testthat/test-check_convergence.R +++ b/tests/testthat/test-check_convergence.R @@ -29,7 +29,6 @@ test_that("check_convergence", { test_that("check_convergence, glmmTMB", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") data(iris) model <- suppressWarnings(glmmTMB::glmmTMB( diff --git a/tests/testthat/test-check_dag.R b/tests/testthat/test-check_dag.R new file mode 100644 index 000000000..0d6efa9d6 --- /dev/null +++ b/tests/testthat/test-check_dag.R @@ -0,0 +1,194 @@ +skip_on_cran() +skip_if_not_installed("ggdag") +skip_if_not_installed("dagitty") + +test_that("check_dag", { + dag <- check_dag( + y ~ x + b, + outcome = "y", + exposure = "x" + ) + expect_snapshot(print(dag)) + dag <- check_dag( + y ~ x + b, + outcome = "y", + exposure = "x", + adjusted = "b" + ) + expect_snapshot(print(dag)) + dag <- check_dag( + y ~ x + b + c, + x ~ b, + outcome = "y", + exposure = "x" + ) + expect_snapshot(print(dag)) + dag <- check_dag( + y ~ x + b + c, + x ~ b, + outcome = "y", + exposure = "x", + adjusted = "c" + ) + expect_snapshot(print(dag)) + dag <- check_dag( + y ~ x + b + c + d, + x ~ b, + x ~ c, + outcome = "y", + exposure = "x", + adjusted = "c" + ) + expect_snapshot(print(dag)) + data(mtcars) + m <- lm(mpg ~ wt + gear + disp + cyl, data = mtcars) + dag <- check_dag( + m, + wt ~ disp + cyl, + wt ~ am + ) + expect_snapshot(print(dag)) +}) + +test_that("check_dag, cylic error", { + expect_error( + check_dag( + y ~ x + b + c + d, + x ~ c + d, + b ~ x, + b ~ y, + outcome = "y", + exposure = "x", + adjusted = "c" + ), + regex = "Model is cyclic" + ) +}) + + +test_that("check_dag, multiple adjustment sets", { + dag <- check_dag( + podcast ~ mood + humor + skills_course, + alertness ~ mood, + mood ~ humor, + prepared ~ skills_course, + exam ~ alertness + prepared, + coords = ggdag::time_ordered_coords(), + exposure = "podcast", + outcome = "exam" + ) + expect_snapshot(print(dag)) + dag <- check_dag( + podcast ~ mood + humor + skills_course, + alertness ~ mood, + mood ~ humor, + prepared ~ skills_course, + exam ~ alertness + prepared, + adjusted = c("alertness", "prepared"), + exposure = "podcast", + outcome = "exam", + coords = ggdag::time_ordered_coords() + ) + expect_snapshot(print(dag)) + dag <- check_dag( + podcast ~ mood + humor + skills_course, + alertness ~ mood, + mood ~ humor, + prepared ~ skills_course, + exam ~ alertness + prepared, + coords = ggdag::time_ordered_coords(), + adjusted = "alertness", + exposure = "podcast", + outcome = "exam" + ) + expect_snapshot(print(dag)) +}) + + +test_that("check_dag, different adjustements for total and direct", { + dag <- check_dag( + outcome ~ exposure + x1 + x2, + x2 ~ exposure, + exposure ~ x1, + outcome = "outcome", + exposure = "exposure" + ) + expect_snapshot(print(dag)) + + dag <- check_dag( + outcome ~ exposure + x1 + x2, + x2 ~ exposure, + exposure ~ x1, + adjusted = "x1", + outcome = "outcome", + exposure = "exposure" + ) + expect_snapshot(print(dag)) + + dag <- check_dag( + outcome ~ exposure + x1 + x2, + x2 ~ exposure, + exposure ~ x1, + adjusted = "x2", + outcome = "outcome", + exposure = "exposure" + ) + expect_snapshot(print(dag)) + + dag <- check_dag( + outcome ~ exposure + x1 + x2, + x2 ~ exposure, + exposure ~ x1, + adjusted = c("x1", "x2"), + outcome = "outcome", + exposure = "exposure" + ) + expect_snapshot(print(dag)) +}) + + +test_that("check_dag, collider bias", { + dag <- check_dag( + SMD_ICD11 ~ agegroup + gender_kid + edgroup3 + residence + pss4_kid_sum_2sd + sm_h_total_kid, + pss4_kid_sum_2sd ~ gender_kid, + sm_h_total_kid ~ gender_kid + agegroup, + adjusted = c( + "agegroup", "gender_kid", "edgroup3", "residence", + "pss4_kid_sum_2sd" + ), + outcome = "SMD_ICD11", + exposure = "agegroup" + ) + expect_snapshot(print(dag)) + + dag <- check_dag( + SMD_ICD11 ~ agegroup + gender_kid + edgroup3 + residence + pss4_kid_sum_2sd + sm_h_total_kid, + pss4_kid_sum_2sd ~ gender_kid, + sm_h_total_kid ~ gender_kid + agegroup, + adjusted = c( + "agegroup", "gender_kid", "edgroup3", "residence", + "pss4_kid_sum_2sd", "sm_h_total_kid" + ), + outcome = "SMD_ICD11", + exposure = "agegroup" + ) + expect_snapshot(print(dag)) +}) + + +test_that("check_dag, formula-interface", { + dag <- check_dag( + y ~ x + b + c, + x ~ b, + outcome = ~y, + exposure = ~x, + adjusted = ~ b + c + ) + expect_identical(attributes(dag)$adjusted, c("b", "c")) + dag2 <- check_dag( + y ~ x + b + c, + x ~ b, + adjusted = ~ b + c + ) + expect_identical(dag, dag2) +}) diff --git a/tests/testthat/test-check_heterogeneity_bias.R b/tests/testthat/test-check_heterogeneity_bias.R index 429dcfc0c..9701a7c03 100644 --- a/tests/testthat/test-check_heterogeneity_bias.R +++ b/tests/testthat/test-check_heterogeneity_bias.R @@ -1,5 +1,5 @@ test_that("check_heterogeneity_bias", { - skip_if_not_installed("datawizard", minimum_version = "0.12.0") + skip_if_not_installed("datawizard") data(iris) set.seed(123) iris$ID <- sample.int(4, nrow(iris), replace = TRUE) # fake-ID @@ -32,3 +32,19 @@ test_that("check_heterogeneity_bias", { "Possible heterogeneity bias due to following predictors: Petal\\.Length, Petal\\.Width, Species" ) }) + +test_that("check_heterogeneity_bias", { + skip_if_not_installed("datawizard", minimum_version = "0.12.3") + data(efc, package = "datawizard") + dat <- na.omit(efc) + dat$e42dep <- factor(dat$e42dep) + dat$c172code <- factor(dat$c172code) + + out <- check_heterogeneity_bias( + dat, + select = "c12hour", + by = c("e42dep", "c172code"), + nested = TRUE + ) + expect_equal(out, "c12hour", ignore_attr = TRUE) +}) diff --git a/tests/testthat/test-check_model.R b/tests/testthat/test-check_model.R index 097f089a5..2b1141d26 100644 --- a/tests/testthat/test-check_model.R +++ b/tests/testthat/test-check_model.R @@ -52,7 +52,6 @@ test_that("`check_model()` works for quantreg", { }) test_that("`check_model()` warnings for tweedie", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not_installed("lme4") data(sleepstudy, package = "lme4") diff --git a/tests/testthat/test-check_normality.R b/tests/testthat/test-check_normality.R index 44b461aea..f166c6f03 100644 --- a/tests/testthat/test-check_normality.R +++ b/tests/testthat/test-check_normality.R @@ -35,7 +35,6 @@ test_that("check_normality | afex", { }) test_that("check_normality | glmmTMB", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not(getRversion() >= "4.0.0") diff --git a/tests/testthat/test-check_overdispersion.R b/tests/testthat/test-check_overdispersion.R index 06cc95dd0..2930322bb 100644 --- a/tests/testthat/test-check_overdispersion.R +++ b/tests/testthat/test-check_overdispersion.R @@ -1,5 +1,4 @@ test_that("check_overdispersion, glmmTMB-poisson", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not(getRversion() >= "4.0.0") data(Salamanders, package = "glmmTMB") @@ -50,7 +49,6 @@ test_that("check_overdispersion, glmmTMB-poisson", { test_that("check_overdispersion, glmmTMB-poisson mixed", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not(getRversion() >= "4.0.0") data(Salamanders, package = "glmmTMB") @@ -78,7 +76,6 @@ test_that("check_overdispersion, glmmTMB-poisson mixed", { test_that("check_overdispersion, zero-inflated and negbin", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not_installed("DHARMa") skip_if_not(getRversion() >= "4.0.0") @@ -183,7 +180,6 @@ test_that("check_overdispersion, MASS::negbin", { test_that("check_overdispersion, genpois", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not_installed("DHARMa") skip_if_not(getRversion() >= "4.0.0") diff --git a/tests/testthat/test-check_predictions.R b/tests/testthat/test-check_predictions.R index 04bc1d2cd..142a8b429 100644 --- a/tests/testthat/test-check_predictions.R +++ b/tests/testthat/test-check_predictions.R @@ -35,7 +35,6 @@ test_that("check_predictions", { test_that("check_predictions, glmmTMB", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") data(mtcars) model <- glmmTMB::glmmTMB(vs ~ disp, data = mtcars, family = binomial()) diff --git a/tests/testthat/test-check_residuals.R b/tests/testthat/test-check_residuals.R index 9be029c0c..dbaebe447 100644 --- a/tests/testthat/test-check_residuals.R +++ b/tests/testthat/test-check_residuals.R @@ -19,13 +19,13 @@ test_that("check_residuals and simulate_residuals", { # check raw residuals expect_equal( head(residuals(res)), - c(0.55349, 0.44012, 0.39826, 0.9825, 0.90753, 0.05809), + c(0.01815, 0.7062, 0.56076, 0.19282, 0.31423, 0.10115), tolerance = 1e-4, ignore_attr = TRUE ) expect_equal( head(residuals(res, quantile_function = stats::qnorm)), - c(0.13448, -0.15068, -0.25785, 2.10826, 1.3257, -1.57097), + c(-2.09365, 0.54232, 0.1529, -0.86756, -0.4839, -1.27504), tolerance = 1e-4, ignore_attr = TRUE ) @@ -57,10 +57,10 @@ test_that("check_residuals and simulate_residuals", { # check_residuals out <- check_residuals(res) - expect_equal(out, 0.01884602, ignore_attr = TRUE, tolerance = 1e-4) + expect_equal(out, 0.02574286, ignore_attr = TRUE, tolerance = 1e-4) expect_identical( capture.output(print(out)), - "Warning: Non-uniformity of simulated residuals detected (p = 0.019)." + "Warning: Non-uniformity of simulated residuals detected (p = 0.026)." ) expect_error(simulate_residuals(m, iterations = 1), "`iterations` must be") diff --git a/tests/testthat/test-check_singularity.R b/tests/testthat/test-check_singularity.R index dc0d56964..8a07b85e0 100644 --- a/tests/testthat/test-check_singularity.R +++ b/tests/testthat/test-check_singularity.R @@ -29,7 +29,7 @@ test_that("check_singularity", { newparam = list( beta = 0, theta = rep(0, 21), - betad = 0 + betadisp = 0 ) )[[1]] expect_warning(expect_warning({ diff --git a/tests/testthat/test-check_zeroinflation.R b/tests/testthat/test-check_zeroinflation.R index a7ff7cbbd..53f8af9f5 100644 --- a/tests/testthat/test-check_zeroinflation.R +++ b/tests/testthat/test-check_zeroinflation.R @@ -1,5 +1,4 @@ test_that("check_zeroinflation", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") set.seed(123) data(Salamanders, package = "glmmTMB") @@ -22,7 +21,6 @@ test_that("check_zeroinflation", { test_that("check_zeroinflation, glmmTMB with and without zero-inflation component", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not_installed("DHARMa") set.seed(123) @@ -109,27 +107,26 @@ test_that("check_zeroinflation, glmer.nb", { test_that("check_zeroinflation, glmmTMB nbinom", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not_installed("DHARMa") skip_on_cran() + data(Salamanders, package = "glmmTMB") + m <- glmmTMB::glmmTMB( + count ~ spp + mined + (1 | site), + family = glmmTMB::nbinom1(), + data = Salamanders + ) set.seed(1234) - dat <- DHARMa::createData(sampleSize = 1000) - fit <- suppressWarnings(glmmTMB::glmmTMB( - observedResponse ~ Environment1 + (1 | group), - data = dat, - family = glmmTMB::nbinom1() - )) expect_equal( - check_zeroinflation(fit), + check_zeroinflation(m), structure( list( - predicted.zeros = 462, - observed.zeros = 482L, - ratio = 0.95850622406639, + predicted.zeros = 389, + observed.zeros = 387L, + ratio = 1.00635658914729, tolerance = 0.1, - p.value = 0.776 + p.value = 0.944 ), class = "check_zi" ), @@ -166,7 +163,6 @@ test_that("check_zeroinflation, MASS::negbin", { test_that("check_zeroinflation, genpois", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not_installed("DHARMa") skip_if_not(getRversion() >= "4.0.0") diff --git a/tests/testthat/test-icc.R b/tests/testthat/test-icc.R index 82da6a3eb..73f8b6e3f 100644 --- a/tests/testthat/test-icc.R +++ b/tests/testthat/test-icc.R @@ -121,7 +121,7 @@ test_that("icc", { test_that("icc, glmmTMB 1.1.9+", { - skip_if_not_installed("glmmTMB", minimum_version = "1.1.9") + skip_if_not_installed("glmmTMB") set.seed(101) dd <- data.frame( z = rnorm(1000), diff --git a/tests/testthat/test-model_performance.bayesian.R b/tests/testthat/test-model_performance.bayesian.R index ff78ca3cb..a62f4bf40 100644 --- a/tests/testthat/test-model_performance.bayesian.R +++ b/tests/testthat/test-model_performance.bayesian.R @@ -60,7 +60,7 @@ test_that("model_performance.brmsfit", { expect_equal(perf$R2, 0.954538, tolerance = 1e-3) expect_equal(perf$R2_adjusted, 0.9529004, tolerance = 1e-3) expect_equal(perf$ELPD, -70.40493, tolerance = 1e-3) - expect_identical(colnames(perf), c( + expect_named(perf, c( "ELPD", "ELPD_SE", "LOOIC", "LOOIC_SE", "WAIC", "R2", "R2_marginal", "R2_adjusted", "R2_adjusted_marginal", "ICC", "RMSE", "Sigma" )) diff --git a/tests/testthat/test-r2.R b/tests/testthat/test-r2.R index f521b3aab..6b4ac0b05 100644 --- a/tests/testthat/test-r2.R +++ b/tests/testthat/test-r2.R @@ -46,8 +46,7 @@ skip_if_not_installed("withr") withr::with_environment( new.env(), test_that("r2 glmmTMB, no ranef", { - skip_if(getRversion() > "4.3.3") - skip_if_not_installed("glmmTMB") + skip_if_not_installed("glmmTMB", minimum_version = "1.1.10") data(Owls, package = "glmmTMB") # linear --------------------------------------------------------------- m <- glmmTMB::glmmTMB(NegPerChick ~ BroodSize + ArrivalTime, data = Owls) @@ -80,6 +79,7 @@ withr::with_environment( out2 <- r2(m2) expect_equal(out[[1]], out2[[1]], tolerance = 1e-3, ignore_attr = TRUE) # zero-inflated -------------------------------------------------------------- + skip_if_not(packageVersion("glmmTMB") > "1.1.10") skip_if_not_installed("pscl") data(bioChemists, package = "pscl") m <- glmmTMB::glmmTMB( diff --git a/tests/testthat/test-r2_mlm.R b/tests/testthat/test-r2_mlm.R new file mode 100644 index 000000000..a532b57a7 --- /dev/null +++ b/tests/testthat/test-r2_mlm.R @@ -0,0 +1,9 @@ +test_that("r2_mlm_Rxy", { + model <- lm(cbind(qsec, drat) ~ wt + mpg, data = mtcars) + expect_equal(r2_mlm(model)[["Symmetric Rxy"]], 0.68330688076502, tolerance = 1e-3) +}) + +test_that("r2_mlm_Pxy", { + model <- lm(cbind(qsec, drat) ~ wt + mpg, data = mtcars) + expect_equal(r2_mlm(model)[["Asymmetric Pxy"]], 0.407215267524997, tolerance = 1e-3) +})