From 37056e37b77d736c1859985756f338f992dac987 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 21 Jul 2024 15:02:30 +0200 Subject: [PATCH 01/41] update readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5153170da..fb7bc0ee9 100644 --- a/README.md +++ b/README.md @@ -146,8 +146,8 @@ model <- stan_glmer( r2(model) #> # Bayesian R2 with Compatibility Interval #> -#> Conditional R2: 0.953 (95% CI [0.942, 0.964]) -#> Marginal R2: 0.826 (95% CI [0.724, 0.900]) +#> Conditional R2: 0.954 (95% CI [0.951, 0.957]) +#> Marginal R2: 0.414 (95% CI [0.204, 0.644]) library(lme4) model <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy) From b2be78b6e80e28befd9c3e63cd70def3dfc132d6 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 3 Aug 2024 21:26:09 +0200 Subject: [PATCH 02/41] Wrapper around ggdag and dagitty (#761) --- DESCRIPTION | 6 +- NAMESPACE | 6 + NEWS.md | 6 + R/check_dag.R | 299 +++++++++++++++++++++++++++++ inst/WORDLIST | 4 + man/check_dag.Rd | 103 ++++++++++ tests/testthat/_snaps/check_dag.md | 92 +++++++++ tests/testthat/test-check_dag.R | 49 +++++ 8 files changed, 563 insertions(+), 2 deletions(-) create mode 100644 R/check_dag.R create mode 100644 man/check_dag.Rd create mode 100644 tests/testthat/_snaps/check_dag.md create mode 100644 tests/testthat/test-check_dag.R diff --git a/DESCRIPTION b/DESCRIPTION index aa64c172c..d8f21a485 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2 +Version: 0.12.2.1 Authors@R: c(person(given = "Daniel", family = "Lüdecke", @@ -89,6 +89,7 @@ Suggests: CompQuadForm, correlation, cplm, + dagitty, dbscan, DHARMa, estimatr, @@ -97,7 +98,7 @@ Suggests: forecast, ftExtra, gamm4, - ggplot2, + ggdag, glmmTMB, graphics, Hmisc, @@ -155,3 +156,4 @@ Config/Needs/website: r-lib/pkgdown, easystats/easystatstemplate Config/rcmdcheck/ignore-inconsequential-notes: true +Remotes: easystats/see#352 diff --git a/NAMESPACE b/NAMESPACE index 0827931d6..4d59423a9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -49,6 +49,8 @@ S3method(check_convergence,"_glm") S3method(check_convergence,default) S3method(check_convergence,glmmTMB) S3method(check_convergence,merMod) +S3method(check_dag,dagitty) +S3method(check_dag,default) S3method(check_distribution,default) S3method(check_distribution,numeric) S3method(check_heteroscedasticity,default) @@ -269,6 +271,7 @@ S3method(plot,binned_residuals) S3method(plot,check_autocorrelation) S3method(plot,check_clusterstructure) S3method(plot,check_collinearity) +S3method(plot,check_dag) S3method(plot,check_distribution) S3method(plot,check_distribution_numeric) S3method(plot,check_heteroscedasticity) @@ -289,6 +292,7 @@ S3method(print,binned_residuals) S3method(print,check_autocorrelation) S3method(print,check_collinearity) S3method(print,check_concurvity) +S3method(print,check_dag) S3method(print,check_distribution) S3method(print,check_distribution_numeric) S3method(print,check_heterogeneity_bias) @@ -538,12 +542,14 @@ S3method(test_vuong,default) S3method(test_wald,ListNestedRegressions) S3method(test_wald,ListNonNestedRegressions) S3method(test_wald,default) +export(as.dag) export(binned_residuals) export(check_autocorrelation) export(check_clusterstructure) export(check_collinearity) export(check_concurvity) export(check_convergence) +export(check_dag) export(check_distribution) export(check_factorstructure) export(check_heterogeneity_bias) diff --git a/NEWS.md b/NEWS.md index cb9cc26bd..fa8d7451d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# performance 0.12.3 + +## New functions + +* `check_dag()`, to check DAGs for correct adjustment sets. + # performance 0.12.2 Patch release, to ensure that _performance_ runs with older version of diff --git a/R/check_dag.R b/R/check_dag.R new file mode 100644 index 000000000..215f5f89d --- /dev/null +++ b/R/check_dag.R @@ -0,0 +1,299 @@ +#' @title Check correct model adjustment for identifying causal effects +#' @name check_dag +#' +#' @description `check_dag()` checks if a model is correctly adjusted for +#' identifying causal effects. It returns a **dagitty** object that can be +#' visualized with `plot()`. `check_dag()` is a convenient wrapper around +#' `ggdag::dagify()`, which used `dagitty::adjustmentSets()` and +#' `dagitty::adjustedNodes()` to check if the model is correctly adjusted for +#' identifying causal (direct and total) effects of a given exposure on the +#' outcome. `as.dag()` is a small convenient function to return the dagitty-string, +#' which can be used for the online-tool from the dagitty-website. +#' +#' @param ... One or more formulas, which are converted into **dagitty** syntax. +#' First element may also be model object. If a model objects is provided, its +#' formula is used as first formula. See 'Details'. +#' @param outcome Name of the dependent variable (outcome), as character string. +#' Must be a valid name from the formulas. If not set, the first dependent +#' variable from the formulas is used. +#' @param exposure Name of the exposure variable (as character string), for +#' which the direct and total causal effect on the `outcome` should be checked. +#' Must be a valid name from the formulas. If not set, the first independent +#' variable from the formulas is used. +#' @param adjusted A character vector with names of variables that are adjusted +#' for in the model. +#' @param latent A character vector with names of latent variables in the model. +#' @param effect Character string, indicating which effect to check. Can be +#' `"all"` (default), `"total"`, or `"direct"`. +#' @param x An object of class `check_dag`, as returned by `check_dag()`. +#' +#' @details +#' The formulas have following syntax: +#' +#' - One-directed paths: On the *left-hand-side* is the name of the variables +#' where causal effects point to (direction of the arrows, in dagitty-language). +#' On the *right-hand-side* are all variables where causal effects are assumed +#' to come from. For example, the formula `Y ~ X1 + X2`, paths directed from +#' both `X1` and `X2` to `Y` are assumed. +#' +#' - Bi-directed paths: Use `~~` to indicate bi-directed paths. For example, +#' `Y ~~ X` indicates that the path between `Y` and `X` is bi-directed, and +#' the arrow points in both directions. +#' +#' @return An object of class `check_dag`, which can be visualized with `plot()`. +#' The returned object also inherits from class `dagitty` and thus can be used +#' with all functions from the **ggdag** and **dagitty** packages. +#' +#' @examplesIf require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE) +#' # no adjustment needed +#' check_dag( +#' y ~ x + b, +#' outcome = "y", +#' exposure = "x" +#' ) +#' +#' # incorrect adjustment +#' dag <- check_dag( +#' y ~ x + b + c, +#' x ~ b, +#' outcome = "y", +#' exposure = "x" +#' ) +#' dag +#' plot(dag) +#' +#' # After adjusting for `b`, the model is correctly specified +#' dag <- check_dag( +#' y ~ x + b + c, +#' x ~ b, +#' outcome = "y", +#' exposure = "x", +#' adjusted = "b" +#' ) +#' dag +#' +#' # Objects returned by `check_dag()` can be used with "ggdag" or "dagitty" +#' ggdag::ggdag_status(dag) +#' @export +check_dag <- function(..., + outcome = NULL, + exposure = NULL, + adjusted = NULL, + latent = NULL, + effect = c("all", "total", "direct")) { + UseMethod("check_dag") +} + + +#' @export +check_dag.dagitty <- function(..., + outcome = NULL, + exposure = NULL, + adjusted = NULL, + latent = NULL, + effect = c("all", "total", "direct")) { + insight::format_error("This function is not yet implemented.") +} + + +#' @export +check_dag.default <- function(..., + outcome = NULL, + exposure = NULL, + adjusted = NULL, + latent = NULL, + effect = c("all", "total", "direct")) { + insight::check_if_installed( + c("ggdag", "dagitty"), + reason = "to check correct adjustments for identifying causal effects." + ) + + effect <- match.arg(effect) + + # retrieve formulas + formulas <- list(...) + + # check if first object is a model object, and convert to formula + if (insight::is_regression_model(formulas[[1]])) { + vars <- insight::find_variables( + formulas[[1]], + effects = "fixed", + component = "conditional", + flatten = FALSE + ) + formulas[[1]] <- stats::as.formula( + paste(vars$response, "~", paste(vars$conditional, collapse = "+")) + ) + } + + # if outcome is not set, use first dependent variable + if (is.null(outcome)) { + outcome <- insight::find_response(formulas[[1]]) + } + + # if exposure is not set, use first independent variable + if (is.null(exposure)) { + exposure <- insight::find_variables( + formulas[[1]], + effects = "fixed", + component = "conditional", + flatten = FALSE + )$conditional[1] + } + + # convert to dag + dag_args <- c(formulas, list(exposure = exposure, outcome = outcome, latent = latent)) + dag <- do.call(ggdag::dagify, dag_args) + + # add adjustments + if (!is.null(adjusted)) { + dag <- .adjust_dag(dag, adjusted) + } + + .finalize_dag(dag, effect, outcome, exposure, adjusted) +} + + +# helper ---------------------------------------------------------------------- + +.finalize_dag <- function(dag, effect, outcome, exposure, adjusted) { + # check for cyclic DAG + cycles <- unlist(dagitty::findCycle(dag)) + + # stop if cyclic + if (!is.null(cycles)) { + insight::format_error(paste0( + "Model is cyclic. Causal effects can't be determined for cyclic models. Please remove cycles from the model. To do so, check following variables: ", # nolint + datawizard::text_concatenate(unique(cycles)) + )) + } + + # data for checking effects + checks <- lapply(c("direct", "total"), function(x) { + adjustment_set <- unlist(dagitty::adjustmentSets(dag, effect = x), use.names = FALSE) + adjustment_nodes <- unlist(dagitty::adjustedNodes(dag), use.names = FALSE) + list( + adjustment_not_needed = is.null(adjustment_set) && is.null(adjustment_nodes), + incorrectly_adjusted = is.null(adjustment_set) && !is.null(adjustment_nodes), + current_adjustments = adjustment_nodes, + minimal_adjustments = adjustment_set + ) + }) + + attr(dag, "effect") <- effect + attr(dag, "outcome") <- outcome + attr(dag, "exposure") <- exposure + attr(dag, "adjusted") <- adjusted + attr(dag, "check_direct") <- insight::compact_list(checks[[1]]) + attr(dag, "check_total") <- insight::compact_list(checks[[2]]) + + class(dag) <- c(c("check_dag", "see_check_dag"), class(dag)) + + dag +} + + +.adjust_dag <- function(dag, adjusted) { + for (i in adjusted) { + dag <- gsub(paste0("\n", i, "\n"), paste0("\n", i, " [adjusted]\n"), dag) + } + dag +} + + +# methods -------------------------------------------------------------------- + +#' @rdname check_dag +#' @export +as.dag <- function(x, ...) { + if (!inherits(x, "check_dag")) { + insight::format_error("Input is not of class `check_dag.") + } + cat(as.character(x)) +} + + +#' @export +print.check_dag <- function(x, ...) { + effect <- attributes(x)$effect + + for (i in c("direct", "total")) { + if (i == "direct") { + out <- attributes(x)$check_direct + } else { + out <- attributes(x)$check_total + } + + exposure_outcome_text <- paste0( + "\n- Outcome: ", attributes(x)$outcome, + "\n- Exposure", ifelse(length(attributes(x)$exposure) > 1, "s", ""), + ": ", datawizard::text_concatenate(attributes(x)$exposure) + ) + + # build message with check results for effects ----------------------- + + if (isTRUE(out$adjustment_not_needed)) { + # Scenario 1: no adjustment needed + msg <- paste0( + insight::color_text("Model is correctly specified.", "green"), + exposure_outcome_text, + "\n\nNo adjustment needed to estimate the ", i, " effect of ", + datawizard::text_concatenate(attributes(x)$exposure), + " on ", + attributes(x)$outcome, + "." + ) + } else if (isTRUE(out$incorrectly_adjusted)) { + # Scenario 2: incorrectly adjusted, adjustments where none is allowed + msg <- paste0( + insight::color_text("Incorrectly adjusted!", "red"), + exposure_outcome_text, + "\n\nTo estimate the ", i, " effect, do ", + insight::color_text("not", "italic"), + " adjust for: ", + datawizard::text_concatenate(out$current_adjustments), + "." + ) + } else if (length(out$current_adjustments) != length(out$minimal_adjustment)) { + # Scenario 3: missing adjustments + msg <- paste0( + insight::color_text("Incorrectly adjusted!", "red"), + exposure_outcome_text, + "\n\nTo estimate the ", i, " effect, ", + insight::color_text("also", "italic"), + " adjust for: ", + insight::color_text(datawizard::text_concatenate(out$minimal_adjustments), "yellow"), + "." + ) + if (is.null(out$current_adjustments)) { + msg <- paste0(msg, "\nCurrently, the model does not adjust for any variables.") + } else { + msg <- paste0( + msg, "\nCurrently, the model currently only adjusts for ", + insight::color_text(datawizard::text_concatenate(out$current_adjustments), "yellow"), "." + ) + } + } else { + # Scenario 4: correct adjustment + msg <- paste0( + insight::color_text("Model is correctly specified.", "green"), + exposure_outcome_text, + "\n\nAll minimal sufficient adjustments to estimate the ", i, " effect were done." + ) + } + + if (effect %in% c("all", i)) { + cat(insight::print_color(insight::format_message( + paste0("# Correct adjustments for identifying {.i ", i, "} effects\n\n") + ), "blue")) + cat(msg) + cat("\n\n") + } + } +} + +#' @export +plot.check_dag <- function(x, ...) { + insight::check_if_installed("see", "to plot DAG") + NextMethod() +} diff --git a/inst/WORDLIST b/inst/WORDLIST index e5e90d844..9817a3a47 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -43,6 +43,7 @@ Cribari Cronbach's Crujeiras Csaki +DAGs DBSCAN DOI Datenerhebung @@ -162,6 +163,7 @@ Nordhausen Normed ORCID OSF +OSX Olkin PNFI Pek @@ -251,6 +253,7 @@ brmsfit cauchy clusterable concurvity +dagitty datawizard dbscan der @@ -267,6 +270,7 @@ fixest fpsyg gam geoms +ggdag ggplot gjo glm diff --git a/man/check_dag.Rd b/man/check_dag.Rd new file mode 100644 index 000000000..b0039a142 --- /dev/null +++ b/man/check_dag.Rd @@ -0,0 +1,103 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check_dag.R +\name{check_dag} +\alias{check_dag} +\alias{as.dag} +\title{Check correct model adjustment for identifying causal effects} +\usage{ +check_dag( + ..., + outcome = NULL, + exposure = NULL, + adjusted = NULL, + latent = NULL, + effect = c("all", "total", "direct") +) + +as.dag(x, ...) +} +\arguments{ +\item{...}{One or more formulas, which are converted into \strong{dagitty} syntax. +First element may also be model object. If a model objects is provided, its +formula is used as first formula. See 'Details'.} + +\item{outcome}{Name of the dependent variable (outcome), as character string. +Must be a valid name from the formulas. If not set, the first dependent +variable from the formulas is used.} + +\item{exposure}{Name of the exposure variable (as character string), for +which the direct and total causal effect on the \code{outcome} should be checked. +Must be a valid name from the formulas. If not set, the first independent +variable from the formulas is used.} + +\item{adjusted}{A character vector with names of variables that are adjusted +for in the model.} + +\item{latent}{A character vector with names of latent variables in the model.} + +\item{effect}{Character string, indicating which effect to check. Can be +\code{"all"} (default), \code{"total"}, or \code{"direct"}.} + +\item{x}{An object of class \code{check_dag}, as returned by \code{check_dag()}.} +} +\value{ +An object of class \code{check_dag}, which can be visualized with \code{plot()}. +The returned object also inherits from class \code{dagitty} and thus can be used +with all functions from the \strong{ggdag} and \strong{dagitty} packages. +} +\description{ +\code{check_dag()} checks if a model is correctly adjusted for +identifying causal effects. It returns a \strong{dagitty} object that can be +visualized with \code{plot()}. \code{check_dag()} is a convenient wrapper around +\code{ggdag::dagify()}, which used \code{dagitty::adjustmentSets()} and +\code{dagitty::adjustedNodes()} to check if the model is correctly adjusted for +identifying causal (direct and total) effects of a given exposure on the +outcome. \code{as.dag()} is a small convenient function to return the dagitty-string, +which can be used for the online-tool from the dagitty-website. +} +\details{ +The formulas have following syntax: +\itemize{ +\item One-directed paths: On the \emph{left-hand-side} is the name of the variables +where causal effects point to (direction of the arrows, in dagitty-language). +On the \emph{right-hand-side} are all variables where causal effects are assumed +to come from. For example, the formula \code{Y ~ X1 + X2}, paths directed from +both \code{X1} and \code{X2} to \code{Y} are assumed. +\item Bi-directed paths: Use \verb{~~} to indicate bi-directed paths. For example, +\code{Y ~~ X} indicates that the path between \code{Y} and \code{X} is bi-directed, and +the arrow points in both directions. +} +} +\examples{ +\dontshow{if (require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# no adjustment needed +check_dag( + y ~ x + b, + outcome = "y", + exposure = "x" +) + +# incorrect adjustment +dag <- check_dag( + y ~ x + b + c, + x ~ b, + outcome = "y", + exposure = "x" +) +dag +plot(dag) + +# After adjusting for `b`, the model is correctly specified +dag <- check_dag( + y ~ x + b + c, + x ~ b, + outcome = "y", + exposure = "x", + adjusted = "b" +) +dag + +# Objects returned by `check_dag()` can be used with "ggdag" or "dagitty" +ggdag::ggdag_status(dag) +\dontshow{\}) # examplesIf} +} diff --git a/tests/testthat/_snaps/check_dag.md b/tests/testthat/_snaps/check_dag.md new file mode 100644 index 000000000..a3c38cb1a --- /dev/null +++ b/tests/testthat/_snaps/check_dag.md @@ -0,0 +1,92 @@ +# check_dag + + Code + print(dag) + Output + # Correct adjustments for identifying direct effects + + Model is correctly specified. + - Outcome: y + - Exposure: x + + No adjustment needed to estimate the direct effect of x on y. + + # Correct adjustments for identifying total effects + + Model is correctly specified. + - Outcome: y + - Exposure: x + + No adjustment needed to estimate the total effect of x on y. + + +--- + + Code + print(dag) + Output + # Correct adjustments for identifying direct effects + + Model is correctly specified. + - Outcome: y + - Exposure: x + + All minimal sufficient adjustments to estimate the direct effect were done. + + # Correct adjustments for identifying total effects + + Model is correctly specified. + - Outcome: y + - Exposure: x + + All minimal sufficient adjustments to estimate the total effect were done. + + +--- + + Code + print(dag) + Output + # Correct adjustments for identifying direct effects + + Incorrectly adjusted! + - Outcome: y + - Exposure: x + + To estimate the direct effect, also adjust for: b. + Currently, the model does not adjust for any variables. + + # Correct adjustments for identifying total effects + + Incorrectly adjusted! + - Outcome: y + - Exposure: x + + To estimate the total effect, also adjust for: b. + Currently, the model does not adjust for any variables. + + +--- + + Code + print(dag) + Output + # Correct adjustments for identifying direct effects + + Incorrectly adjusted! + - Outcome: y + - Exposure: x + + To estimate the direct effect, also adjust for: b and c. + Currently, the model currently only adjusts for c. + + # Correct adjustments for identifying total effects + + Incorrectly adjusted! + - Outcome: y + - Exposure: x + + To estimate the total effect, also adjust for: b and c. + Currently, the model currently only adjusts for c. + + diff --git a/tests/testthat/test-check_dag.R b/tests/testthat/test-check_dag.R new file mode 100644 index 000000000..458226251 --- /dev/null +++ b/tests/testthat/test-check_dag.R @@ -0,0 +1,49 @@ +skip_on_cran() +skip_if_not_installed("ggdag") +skip_if_not_installed("dagitty") + +test_that("check_dag", { + dag <- check_dag( + y ~ x + b, + outcome = "y", + exposure = "x" + ) + expect_snapshot(print(dag)) + dag <- check_dag( + y ~ x + b, + outcome = "y", + exposure = "x", + adjusted = "b" + ) + expect_snapshot(print(dag)) + dag <- check_dag( + y ~ x + b + c, + x ~ b, + outcome = "y", + exposure = "x" + ) + expect_snapshot(print(dag)) + dag <- check_dag( + y ~ x + b + c, + x ~ b, + outcome = "y", + exposure = "x", + adjusted = "c" + ) + expect_snapshot(print(dag)) +}) + +test_that("check_dag, cylic error", { + expect_error( + check_dag( + y ~ x + b + c + d, + x ~ c + d, + b ~ x, + b ~ y, + outcome = "y", + exposure = "x", + adjusted = "c" + ), + regex = "Model is cyclic" + ) +}) From 98b1c33a3b9746f91d6e76302c74d83a95aa2426 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 3 Aug 2024 21:25:04 +0200 Subject: [PATCH 03/41] use see dev --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d8f21a485..77e6629c0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -156,4 +156,4 @@ Config/Needs/website: r-lib/pkgdown, easystats/easystatstemplate Config/rcmdcheck/ignore-inconsequential-notes: true -Remotes: easystats/see#352 +Remotes: easystats/see From e138366ebeff8209c3eefcc1e29127f76e896b36 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 3 Aug 2024 23:10:43 +0200 Subject: [PATCH 04/41] trigger pkgdown --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 77e6629c0..caf34bded 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2.1 +Version: 0.12.2.2 Authors@R: c(person(given = "Daniel", family = "Lüdecke", From ac866856127324f3cf814e17cf5acb9c317a5581 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 4 Aug 2024 00:41:08 +0200 Subject: [PATCH 05/41] snaps --- DESCRIPTION | 2 +- R/check_dag.R | 16 ++++++++-------- tests/testthat/_snaps/check_dag.md | 16 ++++++++-------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index caf34bded..f29707608 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2.2 +Version: 0.12.2.3 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/R/check_dag.R b/R/check_dag.R index 215f5f89d..5b11ec7f3 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -238,10 +238,10 @@ print.check_dag <- function(x, ...) { insight::color_text("Model is correctly specified.", "green"), exposure_outcome_text, "\n\nNo adjustment needed to estimate the ", i, " effect of ", - datawizard::text_concatenate(attributes(x)$exposure), - " on ", + datawizard::text_concatenate(attributes(x)$exposure, enclose = "`"), + " on `", attributes(x)$outcome, - "." + "`." ) } else if (isTRUE(out$incorrectly_adjusted)) { # Scenario 2: incorrectly adjusted, adjustments where none is allowed @@ -250,8 +250,8 @@ print.check_dag <- function(x, ...) { exposure_outcome_text, "\n\nTo estimate the ", i, " effect, do ", insight::color_text("not", "italic"), - " adjust for: ", - datawizard::text_concatenate(out$current_adjustments), + " adjust for ", + datawizard::text_concatenate(out$current_adjustments, enclose = "`"), "." ) } else if (length(out$current_adjustments) != length(out$minimal_adjustment)) { @@ -261,8 +261,8 @@ print.check_dag <- function(x, ...) { exposure_outcome_text, "\n\nTo estimate the ", i, " effect, ", insight::color_text("also", "italic"), - " adjust for: ", - insight::color_text(datawizard::text_concatenate(out$minimal_adjustments), "yellow"), + " adjust for ", + insight::color_text(datawizard::text_concatenate(out$minimal_adjustments, enclose = "`"), "yellow"), "." ) if (is.null(out$current_adjustments)) { @@ -270,7 +270,7 @@ print.check_dag <- function(x, ...) { } else { msg <- paste0( msg, "\nCurrently, the model currently only adjusts for ", - insight::color_text(datawizard::text_concatenate(out$current_adjustments), "yellow"), "." + insight::color_text(datawizard::text_concatenate(out$current_adjustments, enclose = "`"), "yellow"), "." ) } } else { diff --git a/tests/testthat/_snaps/check_dag.md b/tests/testthat/_snaps/check_dag.md index a3c38cb1a..e28c825c3 100644 --- a/tests/testthat/_snaps/check_dag.md +++ b/tests/testthat/_snaps/check_dag.md @@ -9,7 +9,7 @@ - Outcome: y - Exposure: x - No adjustment needed to estimate the direct effect of x on y. + No adjustment needed to estimate the direct effect of `x` on `y`. # Correct adjustments for identifying total effects @@ -17,7 +17,7 @@ - Outcome: y - Exposure: x - No adjustment needed to estimate the total effect of x on y. + No adjustment needed to estimate the total effect of `x` on `y`. --- @@ -53,7 +53,7 @@ - Outcome: y - Exposure: x - To estimate the direct effect, also adjust for: b. + To estimate the direct effect, also adjust for `b`. Currently, the model does not adjust for any variables. # Correct adjustments for identifying total effects @@ -62,7 +62,7 @@ - Outcome: y - Exposure: x - To estimate the total effect, also adjust for: b. + To estimate the total effect, also adjust for `b`. Currently, the model does not adjust for any variables. @@ -77,8 +77,8 @@ - Outcome: y - Exposure: x - To estimate the direct effect, also adjust for: b and c. - Currently, the model currently only adjusts for c. + To estimate the direct effect, also adjust for `b` and `c`. + Currently, the model currently only adjusts for `c`. # Correct adjustments for identifying total effects @@ -86,7 +86,7 @@ - Outcome: y - Exposure: x - To estimate the total effect, also adjust for: b and c. - Currently, the model currently only adjusts for c. + To estimate the total effect, also adjust for `b` and `c`. + Currently, the model currently only adjusts for `c`. From 9e4111b56f0de1e82f05bbd769202f7474841b2d Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 4 Aug 2024 00:47:02 +0200 Subject: [PATCH 06/41] trigger pkgdown --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f29707608..801a0514d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2.3 +Version: 0.12.2.4 Authors@R: c(person(given = "Daniel", family = "Lüdecke", From 9817070c0800082408ab4193305a2c9154039a26 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 4 Aug 2024 01:06:17 +0200 Subject: [PATCH 07/41] updates --- DESCRIPTION | 2 +- R/check_dag.R | 33 ++++++++++++++++++++++++++++-- man/check_dag.Rd | 18 ++++++++++++++-- tests/testthat/_snaps/check_dag.md | 28 +++++++++++++++++++++++++ tests/testthat/test-check_dag.R | 9 ++++++++ 5 files changed, 85 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 801a0514d..9158e5179 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2.4 +Version: 0.12.2.5 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/R/check_dag.R b/R/check_dag.R index 5b11ec7f3..842508f10 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -12,7 +12,8 @@ #' #' @param ... One or more formulas, which are converted into **dagitty** syntax. #' First element may also be model object. If a model objects is provided, its -#' formula is used as first formula. See 'Details'. +#' formula is used as first formula, and all independent variables will be used +#' for the `adjusted` argument. See 'Details' and 'Examples'. #' @param outcome Name of the dependent variable (outcome), as character string. #' Must be a valid name from the formulas. If not set, the first dependent #' variable from the formulas is used. @@ -21,7 +22,8 @@ #' Must be a valid name from the formulas. If not set, the first independent #' variable from the formulas is used. #' @param adjusted A character vector with names of variables that are adjusted -#' for in the model. +#' for in the model. If a model object is provided in `...`, any values in +#' `adjusted` will be overwritten by the model's independent variables. #' @param latent A character vector with names of latent variables in the model. #' @param effect Character string, indicating which effect to check. Can be #' `"all"` (default), `"total"`, or `"direct"`. @@ -74,6 +76,18 @@ #' #' # Objects returned by `check_dag()` can be used with "ggdag" or "dagitty" #' ggdag::ggdag_status(dag) +#' +#' # Using a model object to extract information about outcome, +#' # exposure and adjusted variables +#' data(mtcars) +#' m <- lm(mpg ~ wt + gear + disp + cyl, data = mtcars) +#' dag <- check_dag( +#' m, +#' wt ~ disp + cyl, +#' wt ~ am +#' ) +#' dag +#' plot(dag) #' @export check_dag <- function(..., outcome = NULL, @@ -124,6 +138,11 @@ check_dag.default <- function(..., formulas[[1]] <- stats::as.formula( paste(vars$response, "~", paste(vars$conditional, collapse = "+")) ) + # if we have a model, we *always* overwrite adjusted + if (!is.null(adjusted)) { + insight::format_alert("The `adjusted` argument will be overwritten by all independent variables from the model.") # nolint + } + adjusted <- vars$conditional } # if outcome is not set, use first dependent variable @@ -230,6 +249,16 @@ print.check_dag <- function(x, ...) { ": ", datawizard::text_concatenate(attributes(x)$exposure) ) + # add information on adjustments + if (!is.null(out$current_adjustments)) { + exposure_outcome_text <- paste0( + exposure_outcome_text, + "\n- Adjustment", + ifelse(length(out$current_adjustments) > 1, "s", ""), + ": ", datawizard::text_concatenate(out$current_adjustments) + ) + } + # build message with check results for effects ----------------------- if (isTRUE(out$adjustment_not_needed)) { diff --git a/man/check_dag.Rd b/man/check_dag.Rd index b0039a142..281a62ef9 100644 --- a/man/check_dag.Rd +++ b/man/check_dag.Rd @@ -19,7 +19,8 @@ as.dag(x, ...) \arguments{ \item{...}{One or more formulas, which are converted into \strong{dagitty} syntax. First element may also be model object. If a model objects is provided, its -formula is used as first formula. See 'Details'.} +formula is used as first formula, and all independent variables will be used +for the \code{adjusted} argument. See 'Details' and 'Examples'.} \item{outcome}{Name of the dependent variable (outcome), as character string. Must be a valid name from the formulas. If not set, the first dependent @@ -31,7 +32,8 @@ Must be a valid name from the formulas. If not set, the first independent variable from the formulas is used.} \item{adjusted}{A character vector with names of variables that are adjusted -for in the model.} +for in the model. If a model object is provided in \code{...}, any values in +\code{adjusted} will be overwritten by the model's independent variables.} \item{latent}{A character vector with names of latent variables in the model.} @@ -99,5 +101,17 @@ dag # Objects returned by `check_dag()` can be used with "ggdag" or "dagitty" ggdag::ggdag_status(dag) + +# Using a model object to extract information about outcome, +# exposure and adjusted variables +data(mtcars) +m <- lm(mpg ~ wt + gear + disp + cyl, data = mtcars) +dag <- check_dag( + m, + wt ~ disp + cyl, + wt ~ am +) +dag +plot(dag) \dontshow{\}) # examplesIf} } diff --git a/tests/testthat/_snaps/check_dag.md b/tests/testthat/_snaps/check_dag.md index e28c825c3..16432d033 100644 --- a/tests/testthat/_snaps/check_dag.md +++ b/tests/testthat/_snaps/check_dag.md @@ -30,6 +30,7 @@ Model is correctly specified. - Outcome: y - Exposure: x + - Adjustment: b All minimal sufficient adjustments to estimate the direct effect were done. @@ -38,6 +39,7 @@ Model is correctly specified. - Outcome: y - Exposure: x + - Adjustment: b All minimal sufficient adjustments to estimate the total effect were done. @@ -76,6 +78,7 @@ Incorrectly adjusted! - Outcome: y - Exposure: x + - Adjustment: c To estimate the direct effect, also adjust for `b` and `c`. Currently, the model currently only adjusts for `c`. @@ -85,8 +88,33 @@ Incorrectly adjusted! - Outcome: y - Exposure: x + - Adjustment: c To estimate the total effect, also adjust for `b` and `c`. Currently, the model currently only adjusts for `c`. +--- + + Code + print(dag) + Output + # Correct adjustments for identifying direct effects + + Model is correctly specified. + - Outcome: mpg + - Exposure: wt + - Adjustments: cyl, disp and gear + + All minimal sufficient adjustments to estimate the direct effect were done. + + # Correct adjustments for identifying total effects + + Model is correctly specified. + - Outcome: mpg + - Exposure: wt + - Adjustments: cyl, disp and gear + + All minimal sufficient adjustments to estimate the total effect were done. + + diff --git a/tests/testthat/test-check_dag.R b/tests/testthat/test-check_dag.R index 458226251..efc7c95bf 100644 --- a/tests/testthat/test-check_dag.R +++ b/tests/testthat/test-check_dag.R @@ -31,6 +31,15 @@ test_that("check_dag", { adjusted = "c" ) expect_snapshot(print(dag)) + data(mtcars) + m <- lm(mpg ~ wt + gear + disp + cyl, data = mtcars) + dag <- check_dag( + m, + wt ~ disp + cyl, + wt ~ am + ) + dag + expect_snapshot(print(dag)) }) test_that("check_dag, cylic error", { From 3f7af632ee824acf196e2d6d5889422dafb3551b Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 4 Aug 2024 10:00:21 +0200 Subject: [PATCH 08/41] update print, docs --- DESCRIPTION | 2 +- R/check_dag.R | 103 +++++++++++++++++++---------- man/check_dag.Rd | 54 ++++++++++++--- tests/testthat/_snaps/check_dag.md | 78 ++++++++++++---------- tests/testthat/test-check_dag.R | 9 +++ 5 files changed, 166 insertions(+), 80 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9158e5179..494c87973 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2.5 +Version: 0.12.2.6 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/R/check_dag.R b/R/check_dag.R index 842508f10..451499de1 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -1,14 +1,18 @@ #' @title Check correct model adjustment for identifying causal effects #' @name check_dag #' -#' @description `check_dag()` checks if a model is correctly adjusted for -#' identifying causal effects. It returns a **dagitty** object that can be -#' visualized with `plot()`. `check_dag()` is a convenient wrapper around -#' `ggdag::dagify()`, which used `dagitty::adjustmentSets()` and -#' `dagitty::adjustedNodes()` to check if the model is correctly adjusted for -#' identifying causal (direct and total) effects of a given exposure on the -#' outcome. `as.dag()` is a small convenient function to return the dagitty-string, -#' which can be used for the online-tool from the dagitty-website. +#' @description The purpose of `check_dag()` is to build, check and visualize +#' your model based on directed acyclic graphs (DAG). The function checks if a +#' model is correctly adjusted for identifying specific relationships of +#' variables, especiall directed (maybe also "causal") effects for given +#' exposures on an outcome. It returns a **dagitty** object that can be +#' visualized with `plot()`. +#' +#' `check_dag()` is a convenient wrapper around `ggdag::dagify()`, +#' `dagitty::adjustmentSets()` and `dagitty::adjustedNodes()` to check correct +#' adjustment sets. `as.dag()` is a small convenient function to return the +#' dagitty-string, which can be used for the online-tool from the +#' dagitty-website. #' #' @param ... One or more formulas, which are converted into **dagitty** syntax. #' First element may also be model object. If a model objects is provided, its @@ -29,7 +33,8 @@ #' `"all"` (default), `"total"`, or `"direct"`. #' @param x An object of class `check_dag`, as returned by `check_dag()`. #' -#' @details +#' @section Specifying the DAG formulas: +#' #' The formulas have following syntax: #' #' - One-directed paths: On the *left-hand-side* is the name of the variables @@ -40,12 +45,36 @@ #' #' - Bi-directed paths: Use `~~` to indicate bi-directed paths. For example, #' `Y ~~ X` indicates that the path between `Y` and `X` is bi-directed, and -#' the arrow points in both directions. +#' the arrow points in both directions. Bi-directed paths often indicate +#' unmeasured cause, or umeasured confounding, of the two involved variables. +#' +#' @section Why are DAGs important - the Table 2 fallacy: +#' +#' Correctly thinking about and identifying the relationships between variables +#' is important when it comes to reporting coefficients from regression models +#' that mutually adjust for "confounders" or include covariates. Different +#' coefficients might have different interpretations, depending on their +#' relationship to other variables in the model. Sometimes, a regression +#' coefficient represents the direct effect of an exposure on an outcome, but +#' sometimes it must be interpreted as total effect, due to the involvement +#' of mediating effects. This problem is also called "Table 2 fallacy" +#' (_Westreich and Greenland 2013_). DAG helps visualizing and thereby focusing +#' the relationships of variables in a regression model to detect missing +#' adjustments or over-adjustment. #' #' @return An object of class `check_dag`, which can be visualized with `plot()`. #' The returned object also inherits from class `dagitty` and thus can be used #' with all functions from the **ggdag** and **dagitty** packages. #' +#' @references +#' - Rohrer, J. M. (2018). Thinking clearly about correlations and causation: +#' Graphical causal models for observational data. Advances in Methods and +#' Practices in Psychological Science, 1(1), 27–42. \doi{10.1177/2515245917745629} +#' +#' - Westreich, D., & Greenland, S. (2013). The Table 2 Fallacy: Presenting and +#' Interpreting Confounder and Modifier Coefficients. American Journal of +#' Epidemiology, 177(4), 292–298. \doi{10.1093/aje/kws412} +#' #' @examplesIf require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE) #' # no adjustment needed #' check_dag( @@ -203,6 +232,7 @@ check_dag.default <- function(..., attr(dag, "outcome") <- outcome attr(dag, "exposure") <- exposure attr(dag, "adjusted") <- adjusted + attr(dag, "adjustment_sets") <- checks[[1]]$current_adjustments attr(dag, "check_direct") <- insight::compact_list(checks[[1]]) attr(dag, "check_total") <- insight::compact_list(checks[[2]]) @@ -236,6 +266,29 @@ as.dag <- function(x, ...) { print.check_dag <- function(x, ...) { effect <- attributes(x)$effect + # header + cat(insight::print_color("# Check for correct adjustment sets", "blue")) + + # model specification + exposure_outcome_text <- paste0( + "\n- Outcome: ", attributes(x)$outcome, + "\n- Exposure", ifelse(length(attributes(x)$exposure) > 1, "s", ""), + ": ", datawizard::text_concatenate(attributes(x)$exposure) + ) + + # add information on adjustments + if (!is.null(attributes(x)$adjustment_sets)) { + exposure_outcome_text <- paste0( + exposure_outcome_text, + "\n- Adjustment", + ifelse(length(attributes(x)$adjustment_sets) > 1, "s", ""), + ": ", datawizard::text_concatenate(attributes(x)$adjustment_sets) + ) + } + + cat(exposure_outcome_text) + cat("\n\n") + for (i in c("direct", "total")) { if (i == "direct") { out <- attributes(x)$check_direct @@ -243,30 +296,13 @@ print.check_dag <- function(x, ...) { out <- attributes(x)$check_total } - exposure_outcome_text <- paste0( - "\n- Outcome: ", attributes(x)$outcome, - "\n- Exposure", ifelse(length(attributes(x)$exposure) > 1, "s", ""), - ": ", datawizard::text_concatenate(attributes(x)$exposure) - ) - - # add information on adjustments - if (!is.null(out$current_adjustments)) { - exposure_outcome_text <- paste0( - exposure_outcome_text, - "\n- Adjustment", - ifelse(length(out$current_adjustments) > 1, "s", ""), - ": ", datawizard::text_concatenate(out$current_adjustments) - ) - } - # build message with check results for effects ----------------------- if (isTRUE(out$adjustment_not_needed)) { # Scenario 1: no adjustment needed msg <- paste0( insight::color_text("Model is correctly specified.", "green"), - exposure_outcome_text, - "\n\nNo adjustment needed to estimate the ", i, " effect of ", + "\nNo adjustment needed to estimate the ", i, " effect of ", datawizard::text_concatenate(attributes(x)$exposure, enclose = "`"), " on `", attributes(x)$outcome, @@ -276,8 +312,7 @@ print.check_dag <- function(x, ...) { # Scenario 2: incorrectly adjusted, adjustments where none is allowed msg <- paste0( insight::color_text("Incorrectly adjusted!", "red"), - exposure_outcome_text, - "\n\nTo estimate the ", i, " effect, do ", + "\nTo estimate the ", i, " effect, do ", insight::color_text("not", "italic"), " adjust for ", datawizard::text_concatenate(out$current_adjustments, enclose = "`"), @@ -287,8 +322,7 @@ print.check_dag <- function(x, ...) { # Scenario 3: missing adjustments msg <- paste0( insight::color_text("Incorrectly adjusted!", "red"), - exposure_outcome_text, - "\n\nTo estimate the ", i, " effect, ", + "\nTo estimate the ", i, " effect, ", insight::color_text("also", "italic"), " adjust for ", insight::color_text(datawizard::text_concatenate(out$minimal_adjustments, enclose = "`"), "yellow"), @@ -306,14 +340,13 @@ print.check_dag <- function(x, ...) { # Scenario 4: correct adjustment msg <- paste0( insight::color_text("Model is correctly specified.", "green"), - exposure_outcome_text, - "\n\nAll minimal sufficient adjustments to estimate the ", i, " effect were done." + "\nAll minimal sufficient adjustments to estimate the ", i, " effect were done." ) } if (effect %in% c("all", i)) { cat(insight::print_color(insight::format_message( - paste0("# Correct adjustments for identifying {.i ", i, "} effects\n\n") + paste0("Identification of {.i ", i, "} effects\n\n") ), "blue")) cat(msg) cat("\n\n") diff --git a/man/check_dag.Rd b/man/check_dag.Rd index 281a62ef9..2b259f478 100644 --- a/man/check_dag.Rd +++ b/man/check_dag.Rd @@ -48,16 +48,22 @@ The returned object also inherits from class \code{dagitty} and thus can be used with all functions from the \strong{ggdag} and \strong{dagitty} packages. } \description{ -\code{check_dag()} checks if a model is correctly adjusted for -identifying causal effects. It returns a \strong{dagitty} object that can be -visualized with \code{plot()}. \code{check_dag()} is a convenient wrapper around -\code{ggdag::dagify()}, which used \code{dagitty::adjustmentSets()} and -\code{dagitty::adjustedNodes()} to check if the model is correctly adjusted for -identifying causal (direct and total) effects of a given exposure on the -outcome. \code{as.dag()} is a small convenient function to return the dagitty-string, -which can be used for the online-tool from the dagitty-website. +The purpose of \code{check_dag()} is to build, check and visualize +your model based on directed acyclic graphs (DAG). The function checks if a +model is correctly adjusted for identifying specific relationships of +variables, especiall directed (maybe also "causal") effects for given +exposures on an outcome. It returns a \strong{dagitty} object that can be +visualized with \code{plot()}. + +\code{check_dag()} is a convenient wrapper around \code{ggdag::dagify()}, +\code{dagitty::adjustmentSets()} and \code{dagitty::adjustedNodes()} to check correct +adjustment sets. \code{as.dag()} is a small convenient function to return the +dagitty-string, which can be used for the online-tool from the +dagitty-website. } -\details{ +\section{Specifying the DAG formulas}{ + + The formulas have following syntax: \itemize{ \item One-directed paths: On the \emph{left-hand-side} is the name of the variables @@ -67,9 +73,27 @@ to come from. For example, the formula \code{Y ~ X1 + X2}, paths directed from both \code{X1} and \code{X2} to \code{Y} are assumed. \item Bi-directed paths: Use \verb{~~} to indicate bi-directed paths. For example, \code{Y ~~ X} indicates that the path between \code{Y} and \code{X} is bi-directed, and -the arrow points in both directions. +the arrow points in both directions. Bi-directed paths often indicate +unmeasured cause, or umeasured confounding, of the two involved variables. } } + +\section{Why are DAGs important - the Table 2 fallacy}{ + + +Correctly thinking about and identifying the relationships between variables +is important when it comes to reporting coefficients from regression models +that mutually adjust for "confounders" or include covariates. Different +coefficients might have different interpretations, depending on their +relationship to other variables in the model. Sometimes, a regression +coefficient represents the direct effect of an exposure on an outcome, but +sometimes it must be interpreted as total effect, due to the involvement +of mediating effects. This problem is also called "Table 2 fallacy" +(\emph{Westreich and Greenland 2013}). DAG helps visualizing and thereby focusing +the relationships of variables in a regression model to detect missing +adjustments or over-adjustment. +} + \examples{ \dontshow{if (require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # no adjustment needed @@ -115,3 +139,13 @@ dag plot(dag) \dontshow{\}) # examplesIf} } +\references{ +\itemize{ +\item Rohrer, J. M. (2018). Thinking clearly about correlations and causation: +Graphical causal models for observational data. Advances in Methods and +Practices in Psychological Science, 1(1), 27–42. \doi{10.1177/2515245917745629} +\item Westreich, D., & Greenland, S. (2013). The Table 2 Fallacy: Presenting and +Interpreting Confounder and Modifier Coefficients. American Journal of +Epidemiology, 177(4), 292–298. \doi{10.1093/aje/kws412} +} +} diff --git a/tests/testthat/_snaps/check_dag.md b/tests/testthat/_snaps/check_dag.md index 16432d033..d3b44a397 100644 --- a/tests/testthat/_snaps/check_dag.md +++ b/tests/testthat/_snaps/check_dag.md @@ -3,20 +3,18 @@ Code print(dag) Output - # Correct adjustments for identifying direct effects - - Model is correctly specified. + # Check for correct adjustment sets - Outcome: y - Exposure: x + Identification of direct effects + + Model is correctly specified. No adjustment needed to estimate the direct effect of `x` on `y`. - # Correct adjustments for identifying total effects + Identification of total effects Model is correctly specified. - - Outcome: y - - Exposure: x - No adjustment needed to estimate the total effect of `x` on `y`. @@ -25,22 +23,19 @@ Code print(dag) Output - # Correct adjustments for identifying direct effects - - Model is correctly specified. + # Check for correct adjustment sets - Outcome: y - Exposure: x - Adjustment: b + Identification of direct effects + + Model is correctly specified. All minimal sufficient adjustments to estimate the direct effect were done. - # Correct adjustments for identifying total effects + Identification of total effects Model is correctly specified. - - Outcome: y - - Exposure: x - - Adjustment: b - All minimal sufficient adjustments to estimate the total effect were done. @@ -49,21 +44,19 @@ Code print(dag) Output - # Correct adjustments for identifying direct effects - - Incorrectly adjusted! + # Check for correct adjustment sets - Outcome: y - Exposure: x + Identification of direct effects + + Incorrectly adjusted! To estimate the direct effect, also adjust for `b`. Currently, the model does not adjust for any variables. - # Correct adjustments for identifying total effects + Identification of total effects Incorrectly adjusted! - - Outcome: y - - Exposure: x - To estimate the total effect, also adjust for `b`. Currently, the model does not adjust for any variables. @@ -73,23 +66,43 @@ Code print(dag) Output - # Correct adjustments for identifying direct effects - - Incorrectly adjusted! + # Check for correct adjustment sets - Outcome: y - Exposure: x - Adjustment: c + Identification of direct effects + + Incorrectly adjusted! To estimate the direct effect, also adjust for `b` and `c`. Currently, the model currently only adjusts for `c`. - # Correct adjustments for identifying total effects + Identification of total effects Incorrectly adjusted! + To estimate the total effect, also adjust for `b` and `c`. + Currently, the model currently only adjusts for `c`. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets - Outcome: y - Exposure: x - Adjustment: c + Identification of direct effects + + Incorrectly adjusted! + To estimate the direct effect, also adjust for `b` and `c`. + Currently, the model currently only adjusts for `c`. + + Identification of total effects + + Incorrectly adjusted! To estimate the total effect, also adjust for `b` and `c`. Currently, the model currently only adjusts for `c`. @@ -99,22 +112,19 @@ Code print(dag) Output - # Correct adjustments for identifying direct effects - - Model is correctly specified. + # Check for correct adjustment sets - Outcome: mpg - Exposure: wt - Adjustments: cyl, disp and gear + Identification of direct effects + + Model is correctly specified. All minimal sufficient adjustments to estimate the direct effect were done. - # Correct adjustments for identifying total effects + Identification of total effects Model is correctly specified. - - Outcome: mpg - - Exposure: wt - - Adjustments: cyl, disp and gear - All minimal sufficient adjustments to estimate the total effect were done. diff --git a/tests/testthat/test-check_dag.R b/tests/testthat/test-check_dag.R index efc7c95bf..7d834b60a 100644 --- a/tests/testthat/test-check_dag.R +++ b/tests/testthat/test-check_dag.R @@ -31,6 +31,15 @@ test_that("check_dag", { adjusted = "c" ) expect_snapshot(print(dag)) + dag <- check_dag( + y ~ x + b + c + d, + x ~ b, + x ~ c, + outcome = "y", + exposure = "x", + adjusted = "c" + ) + expect_snapshot(print(dag)) data(mtcars) m <- lm(mpg ~ wt + gear + disp + cyl, data = mtcars) dag <- check_dag( From 1682d4dd0d0a94ccfb89e33a36101cf501d3a051 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 4 Aug 2024 10:19:15 +0200 Subject: [PATCH 09/41] typos, wordlist --- R/check_dag.R | 6 +++--- inst/WORDLIST | 6 ++++++ man/check_dag.Rd | 4 ++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/R/check_dag.R b/R/check_dag.R index 451499de1..becc39750 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -4,7 +4,7 @@ #' @description The purpose of `check_dag()` is to build, check and visualize #' your model based on directed acyclic graphs (DAG). The function checks if a #' model is correctly adjusted for identifying specific relationships of -#' variables, especiall directed (maybe also "causal") effects for given +#' variables, especially directed (maybe also "causal") effects for given #' exposures on an outcome. It returns a **dagitty** object that can be #' visualized with `plot()`. #' @@ -46,7 +46,7 @@ #' - Bi-directed paths: Use `~~` to indicate bi-directed paths. For example, #' `Y ~~ X` indicates that the path between `Y` and `X` is bi-directed, and #' the arrow points in both directions. Bi-directed paths often indicate -#' unmeasured cause, or umeasured confounding, of the two involved variables. +#' unmeasured cause, or unmeasured confounding, of the two involved variables. #' #' @section Why are DAGs important - the Table 2 fallacy: #' @@ -236,7 +236,7 @@ check_dag.default <- function(..., attr(dag, "check_direct") <- insight::compact_list(checks[[1]]) attr(dag, "check_total") <- insight::compact_list(checks[[2]]) - class(dag) <- c(c("check_dag", "see_check_dag"), class(dag)) + class(dag) <- c(c("check_dag", "see_check_dag"), class(dag)) dag } diff --git a/inst/WORDLIST b/inst/WORDLIST index 9817a3a47..2366773d8 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -39,6 +39,7 @@ Chisq CochransQ CompQuadForm Concurvity +Confounder Cribari Cronbach's Crujeiras @@ -182,6 +183,7 @@ Raudenbush Raykov Revelle Rodríguez +Rohrer Rousseeuw Routledge SEM @@ -224,6 +226,7 @@ Vuong Vuong's WAIC Weisberg +Westreich Windmeijer Winsorization Witten @@ -234,6 +237,7 @@ Zavoinas Zhou Zomeren Zuur +acyclic afex al analyse @@ -252,6 +256,8 @@ brms brmsfit cauchy clusterable +confounder +confounders concurvity dagitty datawizard diff --git a/man/check_dag.Rd b/man/check_dag.Rd index 2b259f478..278dcfd10 100644 --- a/man/check_dag.Rd +++ b/man/check_dag.Rd @@ -51,7 +51,7 @@ with all functions from the \strong{ggdag} and \strong{dagitty} packages. The purpose of \code{check_dag()} is to build, check and visualize your model based on directed acyclic graphs (DAG). The function checks if a model is correctly adjusted for identifying specific relationships of -variables, especiall directed (maybe also "causal") effects for given +variables, especially directed (maybe also "causal") effects for given exposures on an outcome. It returns a \strong{dagitty} object that can be visualized with \code{plot()}. @@ -74,7 +74,7 @@ both \code{X1} and \code{X2} to \code{Y} are assumed. \item Bi-directed paths: Use \verb{~~} to indicate bi-directed paths. For example, \code{Y ~~ X} indicates that the path between \code{Y} and \code{X} is bi-directed, and the arrow points in both directions. Bi-directed paths often indicate -unmeasured cause, or umeasured confounding, of the two involved variables. +unmeasured cause, or unmeasured confounding, of the two involved variables. } } From 30639548b1cc3ba7b0941d102ebfb1c7acf3a624 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 4 Aug 2024 18:47:43 +0200 Subject: [PATCH 10/41] add coords argument --- DESCRIPTION | 2 +- NAMESPACE | 2 -- R/check_dag.R | 51 +++++++++++++++++++++++++----------------------- man/check_dag.Rd | 23 +++++++++++++++++++++- 4 files changed, 50 insertions(+), 28 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 494c87973..08854ca1b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2.6 +Version: 0.12.2.7 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NAMESPACE b/NAMESPACE index 4d59423a9..e0543f2b8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -49,8 +49,6 @@ S3method(check_convergence,"_glm") S3method(check_convergence,default) S3method(check_convergence,glmmTMB) S3method(check_convergence,merMod) -S3method(check_dag,dagitty) -S3method(check_dag,default) S3method(check_distribution,default) S3method(check_distribution,numeric) S3method(check_heteroscedasticity,default) diff --git a/R/check_dag.R b/R/check_dag.R index becc39750..66fae0c1c 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -31,6 +31,10 @@ #' @param latent A character vector with names of latent variables in the model. #' @param effect Character string, indicating which effect to check. Can be #' `"all"` (default), `"total"`, or `"direct"`. +#' @param coords A list with two elements, `x` and `y`, which both are named +#' vectors of numerics. The names correspond to the variable names in the DAG, +#' and the values for `x` and `y` indicate the x/y coordinates in the plot. +#' See 'Examples'. #' @param x An object of class `check_dag`, as returned by `check_dag()`. #' #' @section Specifying the DAG formulas: @@ -103,6 +107,21 @@ #' ) #' dag #' +#' # use specific layout for the DAG +#' dag <- check_dag( +#' score ~ exp + b + c, +#' exp ~ b, +#' outcome = "score", +#' exposure = "exp", +#' coords = list( +#' # x-coordinates for all nodes +#' x = c(score = 5, exp = 4, b = 3, c = 3), +#' # y-coordinates for all nodes +#' y = c(score = 3, exp = 3, b = 2, c = 4) +#' ) +#' ) +#' plot(dag) +#' #' # Objects returned by `check_dag()` can be used with "ggdag" or "dagitty" #' ggdag::ggdag_status(dag) #' @@ -123,29 +142,8 @@ check_dag <- function(..., exposure = NULL, adjusted = NULL, latent = NULL, - effect = c("all", "total", "direct")) { - UseMethod("check_dag") -} - - -#' @export -check_dag.dagitty <- function(..., - outcome = NULL, - exposure = NULL, - adjusted = NULL, - latent = NULL, - effect = c("all", "total", "direct")) { - insight::format_error("This function is not yet implemented.") -} - - -#' @export -check_dag.default <- function(..., - outcome = NULL, - exposure = NULL, - adjusted = NULL, - latent = NULL, - effect = c("all", "total", "direct")) { + effect = c("all", "total", "direct"), + coords = NULL) { insight::check_if_installed( c("ggdag", "dagitty"), reason = "to check correct adjustments for identifying causal effects." @@ -190,7 +188,12 @@ check_dag.default <- function(..., } # convert to dag - dag_args <- c(formulas, list(exposure = exposure, outcome = outcome, latent = latent)) + dag_args <- c(formulas, list( + exposure = exposure, + outcome = outcome, + latent = latent, + coords = coords + )) dag <- do.call(ggdag::dagify, dag_args) # add adjustments diff --git a/man/check_dag.Rd b/man/check_dag.Rd index 278dcfd10..0ba8729d1 100644 --- a/man/check_dag.Rd +++ b/man/check_dag.Rd @@ -11,7 +11,8 @@ check_dag( exposure = NULL, adjusted = NULL, latent = NULL, - effect = c("all", "total", "direct") + effect = c("all", "total", "direct"), + coords = NULL ) as.dag(x, ...) @@ -40,6 +41,11 @@ for in the model. If a model object is provided in \code{...}, any values in \item{effect}{Character string, indicating which effect to check. Can be \code{"all"} (default), \code{"total"}, or \code{"direct"}.} +\item{coords}{A list with two elements, \code{x} and \code{y}, which both are named +vectors of numerics. The names correspond to the variable names in the DAG, +and the values for \code{x} and \code{y} indicate the x/y coordinates in the plot. +See 'Examples'.} + \item{x}{An object of class \code{check_dag}, as returned by \code{check_dag()}.} } \value{ @@ -123,6 +129,21 @@ dag <- check_dag( ) dag +# use specific layout for the DAG +dag <- check_dag( + score ~ exp + b + c, + exp ~ b, + outcome = "score", + exposure = "exp", + coords = list( + # x-coordinates for all nodes + x = c(score = 5, exp = 4, b = 3, c = 3), + # y-coordinates for all nodes + y = c(score = 3, exp = 3, b = 2, c = 4) + ) +) +plot(dag) + # Objects returned by `check_dag()` can be used with "ggdag" or "dagitty" ggdag::ggdag_status(dag) From 9bdccc7aa0290930e948ac2cb09c6b490aa951f0 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 4 Aug 2024 20:07:47 +0200 Subject: [PATCH 11/41] wording, remove repetition --- DESCRIPTION | 2 +- R/check_dag.R | 2 +- tests/testthat/_snaps/check_dag.md | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 08854ca1b..1354d9678 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2.7 +Version: 0.12.2.8 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/R/check_dag.R b/R/check_dag.R index 66fae0c1c..0f29863af 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -335,7 +335,7 @@ print.check_dag <- function(x, ...) { msg <- paste0(msg, "\nCurrently, the model does not adjust for any variables.") } else { msg <- paste0( - msg, "\nCurrently, the model currently only adjusts for ", + msg, "\nCurrently, the model only adjusts for ", insight::color_text(datawizard::text_concatenate(out$current_adjustments, enclose = "`"), "yellow"), "." ) } diff --git a/tests/testthat/_snaps/check_dag.md b/tests/testthat/_snaps/check_dag.md index d3b44a397..f3c4dfcbc 100644 --- a/tests/testthat/_snaps/check_dag.md +++ b/tests/testthat/_snaps/check_dag.md @@ -75,13 +75,13 @@ Incorrectly adjusted! To estimate the direct effect, also adjust for `b` and `c`. - Currently, the model currently only adjusts for `c`. + Currently, the model only adjusts for `c`. Identification of total effects Incorrectly adjusted! To estimate the total effect, also adjust for `b` and `c`. - Currently, the model currently only adjusts for `c`. + Currently, the model only adjusts for `c`. --- @@ -98,13 +98,13 @@ Incorrectly adjusted! To estimate the direct effect, also adjust for `b` and `c`. - Currently, the model currently only adjusts for `c`. + Currently, the model only adjusts for `c`. Identification of total effects Incorrectly adjusted! To estimate the total effect, also adjust for `b` and `c`. - Currently, the model currently only adjusts for `c`. + Currently, the model only adjusts for `c`. --- From e225fdda04e87f310158db79500745692b4dc09d Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 10 Aug 2024 11:02:39 +0200 Subject: [PATCH 12/41] Fix issues with multiple possible adjustments (#762) --- DESCRIPTION | 2 +- R/check_dag.R | 46 +++++++++++++++++++++++---- tests/testthat/_snaps/check_dag.md | 51 ++++++++++++++++++++++++++++++ tests/testthat/test-check_dag.R | 28 +++++++++++++++- 4 files changed, 119 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1354d9678..4cdb94862 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2.8 +Version: 0.12.2.9 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/R/check_dag.R b/R/check_dag.R index 0f29863af..b78090a6f 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -227,7 +227,7 @@ check_dag <- function(..., adjustment_not_needed = is.null(adjustment_set) && is.null(adjustment_nodes), incorrectly_adjusted = is.null(adjustment_set) && !is.null(adjustment_nodes), current_adjustments = adjustment_nodes, - minimal_adjustments = adjustment_set + minimal_adjustments = as.list(dagitty::adjustmentSets(dag, effect = x)) ) }) @@ -247,7 +247,11 @@ check_dag <- function(..., .adjust_dag <- function(dag, adjusted) { for (i in adjusted) { - dag <- gsub(paste0("\n", i, "\n"), paste0("\n", i, " [adjusted]\n"), dag) + # first option, we just have the variable name + dag <- gsub(paste0("\n", i, "\n"), paste0("\n", i, " [adjusted]\n"), dag, fixed = TRUE) + # second option, we have the variable name with a [pos] tag when the user + # provided coords + dag <- gsub(paste0("\n", i, " [pos="), paste0("\n", i, " [adjusted,pos="), dag, fixed = TRUE) } dag } @@ -299,6 +303,13 @@ print.check_dag <- function(x, ...) { out <- attributes(x)$check_total } + # missing adjustements - minimal_adjustment can be a list of different + # options for minimal adjustements, so we check here if any of the minimal + # adjustements are currently sufficient + missing_adjustments <- vapply(out$minimal_adjustments, function(i) { + !is.null(out$current_adjustments) && all(i %in% out$current_adjustments) + }, logical(1)) + # build message with check results for effects ----------------------- if (isTRUE(out$adjustment_not_needed)) { @@ -321,16 +332,39 @@ print.check_dag <- function(x, ...) { datawizard::text_concatenate(out$current_adjustments, enclose = "`"), "." ) - } else if (length(out$current_adjustments) != length(out$minimal_adjustment)) { + } else if (!any(missing_adjustments)) { # Scenario 3: missing adjustments msg <- paste0( insight::color_text("Incorrectly adjusted!", "red"), "\nTo estimate the ", i, " effect, ", insight::color_text("also", "italic"), - " adjust for ", - insight::color_text(datawizard::text_concatenate(out$minimal_adjustments, enclose = "`"), "yellow"), - "." + " adjust for " ) + # we may have multiple valid adjustment sets - handle this here + if (length(out$minimal_adjustments) > 1) { + msg <- paste0( + msg, + "one of the following sets:\n", + insight::color_text( + paste( + "-", + unlist(lapply(out$minimal_adjustments, paste, collapse = ", "), use.names = FALSE), + collapse = "\n" + ), + "yellow" + ), + "." + ) + } else { + msg <- paste0( + msg, + insight::color_text(datawizard::text_concatenate( + unlist(out$minimal_adjustments, use.names = FALSE), + enclose = "`" + ), "yellow"), + "." + ) + } if (is.null(out$current_adjustments)) { msg <- paste0(msg, "\nCurrently, the model does not adjust for any variables.") } else { diff --git a/tests/testthat/_snaps/check_dag.md b/tests/testthat/_snaps/check_dag.md index f3c4dfcbc..3a5c07da8 100644 --- a/tests/testthat/_snaps/check_dag.md +++ b/tests/testthat/_snaps/check_dag.md @@ -128,3 +128,54 @@ All minimal sufficient adjustments to estimate the total effect were done. +# check_dag, multiple adjustment sets + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: exam + - Exposure: podcast + + Identification of direct effects + + Incorrectly adjusted! + To estimate the direct effect, also adjust for one of the following sets: + - alertness, prepared + - alertness, skills_course + - mood, prepared + - mood, skills_course. + Currently, the model does not adjust for any variables. + + Identification of total effects + + Incorrectly adjusted! + To estimate the total effect, also adjust for one of the following sets: + - alertness, prepared + - alertness, skills_course + - mood, prepared + - mood, skills_course. + Currently, the model does not adjust for any variables. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: exam + - Exposure: podcast + - Adjustments: alertness and prepared + + Identification of direct effects + + Model is correctly specified. + All minimal sufficient adjustments to estimate the direct effect were done. + + Identification of total effects + + Model is correctly specified. + All minimal sufficient adjustments to estimate the total effect were done. + + diff --git a/tests/testthat/test-check_dag.R b/tests/testthat/test-check_dag.R index 7d834b60a..efbffac30 100644 --- a/tests/testthat/test-check_dag.R +++ b/tests/testthat/test-check_dag.R @@ -47,7 +47,6 @@ test_that("check_dag", { wt ~ disp + cyl, wt ~ am ) - dag expect_snapshot(print(dag)) }) @@ -65,3 +64,30 @@ test_that("check_dag, cylic error", { regex = "Model is cyclic" ) }) + + +test_that("check_dag, multiple adjustment sets", { + dag <- check_dag( + podcast ~ mood + humor + skills_course, + alertness ~ mood, + mood ~ humor, + prepared ~ skills_course, + exam ~ alertness + prepared, + coords = ggdag::time_ordered_coords(), + exposure = "podcast", + outcome = "exam" + ) + expect_snapshot(print(dag)) + dag <- check_dag( + podcast ~ mood + humor + skills_course, + alertness ~ mood, + mood ~ humor, + prepared ~ skills_course, + exam ~ alertness + prepared, + adjusted = c("alertness", "prepared"), + exposure = "podcast", + outcome = "exam", + coords = ggdag::time_ordered_coords() + ) + expect_snapshot(print(dag)) +}) From 691300188482345f4406cfac505a4721293562ee Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 11 Aug 2024 12:17:56 +0200 Subject: [PATCH 13/41] docs --- R/check_dag.R | 26 +++++++++++++++++++++++--- man/check_dag.Rd | 28 ++++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/R/check_dag.R b/R/check_dag.R index b78090a6f..fd2ee04c2 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -5,8 +5,10 @@ #' your model based on directed acyclic graphs (DAG). The function checks if a #' model is correctly adjusted for identifying specific relationships of #' variables, especially directed (maybe also "causal") effects for given -#' exposures on an outcome. It returns a **dagitty** object that can be -#' visualized with `plot()`. +#' exposures on an outcome. In case of incorrect adjustments, the function +#' suggest the minimal required variables that should be adjusted for (sometimes +#' also called "controlled for"), i.e. that need to be included in the model. +#' It returns a **dagitty** object that can be visualized with `plot()`. #' #' `check_dag()` is a convenient wrapper around `ggdag::dagify()`, #' `dagitty::adjustmentSets()` and `dagitty::adjustedNodes()` to check correct @@ -52,6 +54,24 @@ #' the arrow points in both directions. Bi-directed paths often indicate #' unmeasured cause, or unmeasured confounding, of the two involved variables. #' +#' @section Minimally required adjustments: +#' +#' The function checks if the model is correctly adjusted for identifying the +#' direct and total effects of the exposure on the outcome. If the model is +#' correctly specified, no adjustment is needed to estimate the direct effect. +#' If the model is not correctly specified, the function suggests the minimal +#' required variables that should be adjusted for. The function distinguishes +#' between direct and total effects, and checks if the model is correctly +#' adjusted for both. If the model is cyclic, the function stops and suggests +#' to remove cycles from the model. +#' +#' @section Direct and total effects: +#' The direct effect of an exposure on an outcome is the effect that is not +#' mediated by any other variable in the model. The total effect is the sum of +#' the direct and indirect effects. The function checks if the model is correctly +#' adjusted for identifying the direct and total effects of the exposure on the +#' outcome. +#' #' @section Why are DAGs important - the Table 2 fallacy: #' #' Correctly thinking about and identifying the relationships between variables @@ -332,7 +352,7 @@ print.check_dag <- function(x, ...) { datawizard::text_concatenate(out$current_adjustments, enclose = "`"), "." ) - } else if (!any(missing_adjustments)) { + } else if (!any(missing_adjustments)) { # nolint # Scenario 3: missing adjustments msg <- paste0( insight::color_text("Incorrectly adjusted!", "red"), diff --git a/man/check_dag.Rd b/man/check_dag.Rd index 0ba8729d1..98e254f94 100644 --- a/man/check_dag.Rd +++ b/man/check_dag.Rd @@ -58,8 +58,10 @@ The purpose of \code{check_dag()} is to build, check and visualize your model based on directed acyclic graphs (DAG). The function checks if a model is correctly adjusted for identifying specific relationships of variables, especially directed (maybe also "causal") effects for given -exposures on an outcome. It returns a \strong{dagitty} object that can be -visualized with \code{plot()}. +exposures on an outcome. In case of incorrect adjustments, the function +suggest the minimal required variables that should be adjusted for (sometimes +also called "controlled for"), i.e. that need to be included in the model. +It returns a \strong{dagitty} object that can be visualized with \code{plot()}. \code{check_dag()} is a convenient wrapper around \code{ggdag::dagify()}, \code{dagitty::adjustmentSets()} and \code{dagitty::adjustedNodes()} to check correct @@ -84,6 +86,28 @@ unmeasured cause, or unmeasured confounding, of the two involved variables. } } +\section{Minimally required adjustments}{ + + +The function checks if the model is correctly adjusted for identifying the +direct and total effects of the exposure on the outcome. If the model is +correctly specified, no adjustment is needed to estimate the direct effect. +If the model is not correctly specified, the function suggests the minimal +required variables that should be adjusted for. The function distinguishes +between direct and total effects, and checks if the model is correctly +adjusted for both. If the model is cyclic, the function stops and suggests +to remove cycles from the model. +} + +\section{Direct and total effects}{ + +The direct effect of an exposure on an outcome is the effect that is not +mediated by any other variable in the model. The total effect is the sum of +the direct and indirect effects. The function checks if the model is correctly +adjusted for identifying the direct and total effects of the exposure on the +outcome. +} + \section{Why are DAGs important - the Table 2 fallacy}{ From 7d010cca31820cfd1b8dc46679ece9405f98e553 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 11 Aug 2024 21:05:24 +0100 Subject: [PATCH 14/41] Ptint DAG result only once if identical (#763) --- DESCRIPTION | 2 +- R/check_dag.R | 171 +++++++++++++++-------------- man/check_dag.Rd | 1 + tests/testthat/_snaps/check_dag.md | 80 +++----------- 4 files changed, 109 insertions(+), 145 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4cdb94862..5c0cc0415 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2.9 +Version: 0.12.2.10 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/R/check_dag.R b/R/check_dag.R index fd2ee04c2..e197e7b86 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -66,6 +66,7 @@ #' to remove cycles from the model. #' #' @section Direct and total effects: +#' #' The direct effect of an exposure on an outcome is the effect that is not #' mediated by any other variable in the model. The total effect is the sum of #' the direct and indirect effects. The function checks if the model is correctly @@ -316,101 +317,111 @@ print.check_dag <- function(x, ...) { cat(exposure_outcome_text) cat("\n\n") - for (i in c("direct", "total")) { - if (i == "direct") { - out <- attributes(x)$check_direct - } else { - out <- attributes(x)$check_total + # minimal adjustment sets for direct and total effect identical? + # Then print only once + if (identical(attributes(x)$check_direct$minimal_adjustments, attributes(x)$check_total$minimal_adjustments)) { + .print_dag_results(attributes(x)$check_direct, x, "direct and total", "all") + } else { + for (i in c("direct", "total")) { + if (i == "direct") { + out <- attributes(x)$check_direct + } else { + out <- attributes(x)$check_total + } + .print_dag_results(out, x, i, effect) } + } +} - # missing adjustements - minimal_adjustment can be a list of different - # options for minimal adjustements, so we check here if any of the minimal - # adjustements are currently sufficient - missing_adjustments <- vapply(out$minimal_adjustments, function(i) { - !is.null(out$current_adjustments) && all(i %in% out$current_adjustments) - }, logical(1)) - - # build message with check results for effects ----------------------- - - if (isTRUE(out$adjustment_not_needed)) { - # Scenario 1: no adjustment needed - msg <- paste0( - insight::color_text("Model is correctly specified.", "green"), - "\nNo adjustment needed to estimate the ", i, " effect of ", - datawizard::text_concatenate(attributes(x)$exposure, enclose = "`"), - " on `", - attributes(x)$outcome, - "`." - ) - } else if (isTRUE(out$incorrectly_adjusted)) { - # Scenario 2: incorrectly adjusted, adjustments where none is allowed +.print_dag_results <- function(out, x, i, effect) { + # missing adjustements - minimal_adjustment can be a list of different + # options for minimal adjustements, so we check here if any of the minimal + # adjustments are currently sufficient + sufficient_adjustments <- vapply(out$minimal_adjustments, function(min_adj) { + !is.null(out$current_adjustments) && all(min_adj %in% out$current_adjustments) + }, logical(1)) + + # build message with check results for effects ----------------------- + + if (isTRUE(out$adjustment_not_needed)) { + # Scenario 1: no adjustment needed + msg <- paste0( + insight::color_text("Model is correctly specified.", "green"), + "\nNo adjustment needed to estimate the ", i, " effect of ", + datawizard::text_concatenate(attributes(x)$exposure, enclose = "`"), + " on `", + attributes(x)$outcome, + "`." + ) + } else if (isTRUE(out$incorrectly_adjusted)) { + # Scenario 2: incorrectly adjusted, adjustments where none is allowed + msg <- paste0( + insight::color_text("Incorrectly adjusted!", "red"), + "\nTo estimate the ", i, " effect, do ", + insight::color_text("not", "italic"), + " adjust for ", + datawizard::text_concatenate(out$current_adjustments, enclose = "`"), + "." + ) + } else if (any(sufficient_adjustments)) { + # Scenario 3: correct adjustment + msg <- paste0( + insight::color_text("Model is correctly specified.", "green"), + "\nAll minimal sufficient adjustments to estimate the ", i, " effect were done." + ) + } else { + # Scenario 4: missing adjustments + msg <- paste0( + insight::color_text("Incorrectly adjusted!", "red"), + "\nTo estimate the ", i, " effect, ", + insight::color_text("at least", "italic"), + " adjust for " + ) + # we may have multiple valid adjustment sets - handle this here + if (length(out$minimal_adjustments) > 1) { msg <- paste0( - insight::color_text("Incorrectly adjusted!", "red"), - "\nTo estimate the ", i, " effect, do ", - insight::color_text("not", "italic"), - " adjust for ", - datawizard::text_concatenate(out$current_adjustments, enclose = "`"), + msg, + "one of the following sets:\n", + insight::color_text( + paste( + "-", + unlist(lapply(out$minimal_adjustments, paste, collapse = ", "), use.names = FALSE), + collapse = "\n" + ), + "yellow" + ), "." ) - } else if (!any(missing_adjustments)) { # nolint - # Scenario 3: missing adjustments + } else { msg <- paste0( - insight::color_text("Incorrectly adjusted!", "red"), - "\nTo estimate the ", i, " effect, ", - insight::color_text("also", "italic"), - " adjust for " + msg, + insight::color_text(datawizard::text_concatenate( + unlist(out$minimal_adjustments, use.names = FALSE), + enclose = "`" + ), "yellow"), + "." ) - # we may have multiple valid adjustment sets - handle this here - if (length(out$minimal_adjustments) > 1) { - msg <- paste0( - msg, - "one of the following sets:\n", - insight::color_text( - paste( - "-", - unlist(lapply(out$minimal_adjustments, paste, collapse = ", "), use.names = FALSE), - collapse = "\n" - ), - "yellow" - ), - "." - ) - } else { - msg <- paste0( - msg, - insight::color_text(datawizard::text_concatenate( - unlist(out$minimal_adjustments, use.names = FALSE), - enclose = "`" - ), "yellow"), - "." - ) - } - if (is.null(out$current_adjustments)) { - msg <- paste0(msg, "\nCurrently, the model does not adjust for any variables.") - } else { - msg <- paste0( - msg, "\nCurrently, the model only adjusts for ", - insight::color_text(datawizard::text_concatenate(out$current_adjustments, enclose = "`"), "yellow"), "." - ) - } + } + if (is.null(out$current_adjustments)) { + msg <- paste0(msg, "\nCurrently, the model does not adjust for any variables.") } else { - # Scenario 4: correct adjustment msg <- paste0( - insight::color_text("Model is correctly specified.", "green"), - "\nAll minimal sufficient adjustments to estimate the ", i, " effect were done." + msg, "\nCurrently, the model only adjusts for ", + insight::color_text(datawizard::text_concatenate(out$current_adjustments, enclose = "`"), "yellow"), "." ) } + } - if (effect %in% c("all", i)) { - cat(insight::print_color(insight::format_message( - paste0("Identification of {.i ", i, "} effects\n\n") - ), "blue")) - cat(msg) - cat("\n\n") - } + if (effect %in% c("all", i)) { + cat(insight::print_color(insight::format_message( + paste0("Identification of ", i, " effects\n\n") + ), "blue")) + cat(msg) + cat("\n\n") } } + #' @export plot.check_dag <- function(x, ...) { insight::check_if_installed("see", "to plot DAG") diff --git a/man/check_dag.Rd b/man/check_dag.Rd index 98e254f94..c5153ffb0 100644 --- a/man/check_dag.Rd +++ b/man/check_dag.Rd @@ -101,6 +101,7 @@ to remove cycles from the model. \section{Direct and total effects}{ + The direct effect of an exposure on an outcome is the effect that is not mediated by any other variable in the model. The total effect is the sum of the direct and indirect effects. The function checks if the model is correctly diff --git a/tests/testthat/_snaps/check_dag.md b/tests/testthat/_snaps/check_dag.md index 3a5c07da8..2f2548a78 100644 --- a/tests/testthat/_snaps/check_dag.md +++ b/tests/testthat/_snaps/check_dag.md @@ -7,15 +7,10 @@ - Outcome: y - Exposure: x - Identification of direct effects + Identification of direct and total effects Model is correctly specified. - No adjustment needed to estimate the direct effect of `x` on `y`. - - Identification of total effects - - Model is correctly specified. - No adjustment needed to estimate the total effect of `x` on `y`. + No adjustment needed to estimate the direct and total effect of `x` on `y`. --- @@ -28,15 +23,10 @@ - Exposure: x - Adjustment: b - Identification of direct effects - - Model is correctly specified. - All minimal sufficient adjustments to estimate the direct effect were done. - - Identification of total effects + Identification of direct and total effects Model is correctly specified. - All minimal sufficient adjustments to estimate the total effect were done. + All minimal sufficient adjustments to estimate the direct and total effect were done. --- @@ -48,16 +38,10 @@ - Outcome: y - Exposure: x - Identification of direct effects - - Incorrectly adjusted! - To estimate the direct effect, also adjust for `b`. - Currently, the model does not adjust for any variables. - - Identification of total effects + Identification of direct and total effects Incorrectly adjusted! - To estimate the total effect, also adjust for `b`. + To estimate the direct and total effect, at least adjust for `b`. Currently, the model does not adjust for any variables. @@ -71,16 +55,10 @@ - Exposure: x - Adjustment: c - Identification of direct effects - - Incorrectly adjusted! - To estimate the direct effect, also adjust for `b` and `c`. - Currently, the model only adjusts for `c`. - - Identification of total effects + Identification of direct and total effects Incorrectly adjusted! - To estimate the total effect, also adjust for `b` and `c`. + To estimate the direct and total effect, at least adjust for `b` and `c`. Currently, the model only adjusts for `c`. @@ -94,16 +72,10 @@ - Exposure: x - Adjustment: c - Identification of direct effects - - Incorrectly adjusted! - To estimate the direct effect, also adjust for `b` and `c`. - Currently, the model only adjusts for `c`. - - Identification of total effects + Identification of direct and total effects Incorrectly adjusted! - To estimate the total effect, also adjust for `b` and `c`. + To estimate the direct and total effect, at least adjust for `b` and `c`. Currently, the model only adjusts for `c`. @@ -117,15 +89,10 @@ - Exposure: wt - Adjustments: cyl, disp and gear - Identification of direct effects - - Model is correctly specified. - All minimal sufficient adjustments to estimate the direct effect were done. - - Identification of total effects + Identification of direct and total effects Model is correctly specified. - All minimal sufficient adjustments to estimate the total effect were done. + All minimal sufficient adjustments to estimate the direct and total effect were done. # check_dag, multiple adjustment sets @@ -137,20 +104,10 @@ - Outcome: exam - Exposure: podcast - Identification of direct effects - - Incorrectly adjusted! - To estimate the direct effect, also adjust for one of the following sets: - - alertness, prepared - - alertness, skills_course - - mood, prepared - - mood, skills_course. - Currently, the model does not adjust for any variables. - - Identification of total effects + Identification of direct and total effects Incorrectly adjusted! - To estimate the total effect, also adjust for one of the following sets: + To estimate the direct and total effect, at least adjust for one of the following sets: - alertness, prepared - alertness, skills_course - mood, prepared @@ -168,14 +125,9 @@ - Exposure: podcast - Adjustments: alertness and prepared - Identification of direct effects - - Model is correctly specified. - All minimal sufficient adjustments to estimate the direct effect were done. - - Identification of total effects + Identification of direct and total effects Model is correctly specified. - All minimal sufficient adjustments to estimate the total effect were done. + All minimal sufficient adjustments to estimate the direct and total effect were done. From fb421797d96ed265ffb0126ce975366b09129ff0 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 12 Aug 2024 20:11:29 +0200 Subject: [PATCH 15/41] docs, add tests --- DESCRIPTION | 2 +- R/check_dag.R | 13 +++-- man/check_dag.Rd | 13 +++-- tests/testthat/_snaps/check_dag.md | 87 ++++++++++++++++++++++++++++++ tests/testthat/test-check_dag.R | 42 +++++++++++++++ 5 files changed, 146 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5c0cc0415..c0345d5ec 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2.10 +Version: 0.12.2.11 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/R/check_dag.R b/R/check_dag.R index e197e7b86..0a6411056 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -6,13 +6,16 @@ #' model is correctly adjusted for identifying specific relationships of #' variables, especially directed (maybe also "causal") effects for given #' exposures on an outcome. In case of incorrect adjustments, the function -#' suggest the minimal required variables that should be adjusted for (sometimes -#' also called "controlled for"), i.e. that need to be included in the model. -#' It returns a **dagitty** object that can be visualized with `plot()`. +#' suggests the minimal required variables that should be adjusted for (sometimes +#' also called "controlled for"), i.e. variables that *at least* need to be +#' included in the model. Depending on the goal of the analysis, it is still +#' possible to add more variables to the model than just the minimally required +#' adjustment sets. #' #' `check_dag()` is a convenient wrapper around `ggdag::dagify()`, #' `dagitty::adjustmentSets()` and `dagitty::adjustedNodes()` to check correct -#' adjustment sets. `as.dag()` is a small convenient function to return the +#' adjustment sets. It returns a **dagitty** object that can be visualized with +#' `plot()`. `as.dag()` is a small convenient function to return the #' dagitty-string, which can be used for the online-tool from the #' dagitty-website. #' @@ -59,7 +62,7 @@ #' The function checks if the model is correctly adjusted for identifying the #' direct and total effects of the exposure on the outcome. If the model is #' correctly specified, no adjustment is needed to estimate the direct effect. -#' If the model is not correctly specified, the function suggests the minimal +#' If the model is not correctly specified, the function suggests the minimally #' required variables that should be adjusted for. The function distinguishes #' between direct and total effects, and checks if the model is correctly #' adjusted for both. If the model is cyclic, the function stops and suggests diff --git a/man/check_dag.Rd b/man/check_dag.Rd index c5153ffb0..266a62ec7 100644 --- a/man/check_dag.Rd +++ b/man/check_dag.Rd @@ -59,13 +59,16 @@ your model based on directed acyclic graphs (DAG). The function checks if a model is correctly adjusted for identifying specific relationships of variables, especially directed (maybe also "causal") effects for given exposures on an outcome. In case of incorrect adjustments, the function -suggest the minimal required variables that should be adjusted for (sometimes -also called "controlled for"), i.e. that need to be included in the model. -It returns a \strong{dagitty} object that can be visualized with \code{plot()}. +suggests the minimal required variables that should be adjusted for (sometimes +also called "controlled for"), i.e. variables that \emph{at least} need to be +included in the model. Depending on the goal of the analysis, it is still +possible to add more variables to the model than just the minimally required +adjustment sets. \code{check_dag()} is a convenient wrapper around \code{ggdag::dagify()}, \code{dagitty::adjustmentSets()} and \code{dagitty::adjustedNodes()} to check correct -adjustment sets. \code{as.dag()} is a small convenient function to return the +adjustment sets. It returns a \strong{dagitty} object that can be visualized with +\code{plot()}. \code{as.dag()} is a small convenient function to return the dagitty-string, which can be used for the online-tool from the dagitty-website. } @@ -92,7 +95,7 @@ unmeasured cause, or unmeasured confounding, of the two involved variables. The function checks if the model is correctly adjusted for identifying the direct and total effects of the exposure on the outcome. If the model is correctly specified, no adjustment is needed to estimate the direct effect. -If the model is not correctly specified, the function suggests the minimal +If the model is not correctly specified, the function suggests the minimally required variables that should be adjusted for. The function distinguishes between direct and total effects, and checks if the model is correctly adjusted for both. If the model is cyclic, the function stops and suggests diff --git a/tests/testthat/_snaps/check_dag.md b/tests/testthat/_snaps/check_dag.md index 2f2548a78..47420aa89 100644 --- a/tests/testthat/_snaps/check_dag.md +++ b/tests/testthat/_snaps/check_dag.md @@ -131,3 +131,90 @@ All minimal sufficient adjustments to estimate the direct and total effect were done. +# check_dag, different adjustements for total and direct + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: outcome + - Exposure: exposure + + Identification of direct effects + + Incorrectly adjusted! + To estimate the direct effect, at least adjust for `x1` and `x2`. + Currently, the model does not adjust for any variables. + + Identification of total effects + + Incorrectly adjusted! + To estimate the total effect, at least adjust for `x1`. + Currently, the model does not adjust for any variables. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: outcome + - Exposure: exposure + - Adjustment: x1 + + Identification of direct effects + + Incorrectly adjusted! + To estimate the direct effect, at least adjust for `x1` and `x2`. + Currently, the model only adjusts for `x1`. + + Identification of total effects + + Model is correctly specified. + All minimal sufficient adjustments to estimate the total effect were done. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: outcome + - Exposure: exposure + - Adjustment: x2 + + Identification of direct effects + + Incorrectly adjusted! + To estimate the direct effect, at least adjust for `x1` and `x2`. + Currently, the model only adjusts for `x2`. + + Identification of total effects + + Incorrectly adjusted! + To estimate the total effect, do not adjust for `x2`. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: outcome + - Exposure: exposure + - Adjustments: x1 and x2 + + Identification of direct effects + + Model is correctly specified. + All minimal sufficient adjustments to estimate the direct effect were done. + + Identification of total effects + + Incorrectly adjusted! + To estimate the total effect, do not adjust for `x1` and `x2`. + + diff --git a/tests/testthat/test-check_dag.R b/tests/testthat/test-check_dag.R index efbffac30..1ac94d520 100644 --- a/tests/testthat/test-check_dag.R +++ b/tests/testthat/test-check_dag.R @@ -91,3 +91,45 @@ test_that("check_dag, multiple adjustment sets", { ) expect_snapshot(print(dag)) }) + + +test_that("check_dag, different adjustements for total and direct", { + dag <- check_dag( + outcome ~ exposure + x1 + x2, + x2 ~ exposure, + exposure ~ x1, + outcome = "outcome", + exposure = "exposure" + ) + expect_snapshot(print(dag)) + + dag <- check_dag( + outcome ~ exposure + x1 + x2, + x2 ~ exposure, + exposure ~ x1, + adjusted = "x1", + outcome = "outcome", + exposure = "exposure" + ) + expect_snapshot(print(dag)) + + dag <- check_dag( + outcome ~ exposure + x1 + x2, + x2 ~ exposure, + exposure ~ x1, + adjusted = "x2", + outcome = "outcome", + exposure = "exposure" + ) + expect_snapshot(print(dag)) + + dag <- check_dag( + outcome ~ exposure + x1 + x2, + x2 ~ exposure, + exposure ~ x1, + adjusted = c("x1", "x2"), + outcome = "outcome", + exposure = "exposure" + ) + expect_snapshot(print(dag)) +}) From 852c1d9af91680efeaaeda599babcbe38f824d76 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 25 Aug 2024 19:03:36 +0200 Subject: [PATCH 16/41] update `check_heterogeneity_bias()` (#765) --- DESCRIPTION | 4 ++-- NEWS.md | 6 ++++++ R/check_heterogeneity_bias.R | 34 +++++++++++++++++++++++++++------ man/check_heterogeneity_bias.Rd | 31 +++++++++++++++++++++++++++--- 4 files changed, 64 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c0345d5ec..ee591ae47 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2.11 +Version: 0.12.2.12 Authors@R: c(person(given = "Daniel", family = "Lüdecke", @@ -156,4 +156,4 @@ Config/Needs/website: r-lib/pkgdown, easystats/easystatstemplate Config/rcmdcheck/ignore-inconsequential-notes: true -Remotes: easystats/see +Remotes: easystats/see, easystats/insight diff --git a/NEWS.md b/NEWS.md index fa8d7451d..7be029839 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,12 @@ * `check_dag()`, to check DAGs for correct adjustment sets. +## Changes + +* `check_heterogeneity_bias()` gets a `nested` argument. Furthermore, `by` can + specify more than one variable, meaning that nested or cross-classified + model designs can also be tested for heterogeneity bias. + # performance 0.12.2 Patch release, to ensure that _performance_ runs with older version of diff --git a/R/check_heterogeneity_bias.R b/R/check_heterogeneity_bias.R index 424bf8b5b..3c9b502ce 100644 --- a/R/check_heterogeneity_bias.R +++ b/R/check_heterogeneity_bias.R @@ -9,8 +9,24 @@ #' that should be checked. If `x` is a mixed model object, this argument #' will be ignored. #' @param by Character vector (or formula) with the name of the variable that -#' indicates the group- or cluster-ID. If `x` is a model object, this -#' argument will be ignored. +#' indicates the group- or cluster-ID. For cross-classified or nested designs, +#' `by` can also identify two or more variables as group- or cluster-IDs. If +#' the data is nested and should be treated as such, set `nested = TRUE`. Else, +#' if `by` defines two or more variables and `nested = FALSE`, a cross-classified +#' design is assumed. If `x` is a model object, this argument will be ignored. +#' +#' For nested designs, `by` can be: +#' - a character vector with the name of the variable that indicates the +#' levels, ordered from *highest* level to *lowest* (e.g. +#' `by = c("L4", "L3", "L2")`. +#' - a character vector with variable names in the format `by = "L4/L3/L2"`, +#' where the levels are separated by `/`. +#' +#' See also section _De-meaning for cross-classified designs_ and +#' _De-meaning for nested designs_ below. +#' @param nested Logical, if `TRUE`, the data is treated as nested. If `FALSE`, +#' the data is treated as cross-classified. Only applies if `by` contains more +#' than one variable. #' @param group Deprecated. Use `by` instead. #' #' @seealso @@ -28,7 +44,7 @@ #' iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID #' check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID") #' @export -check_heterogeneity_bias <- function(x, select = NULL, by = NULL, group = NULL) { +check_heterogeneity_bias <- function(x, select = NULL, by = NULL, nested = FALSE, group = NULL) { insight::check_if_installed("datawizard", minimum_version = "0.12.0") ## TODO: deprecate later @@ -54,8 +70,14 @@ check_heterogeneity_bias <- function(x, select = NULL, by = NULL, group = NULL) my_data <- x } - unique_groups <- .n_unique(my_data[[by]]) - combinations <- expand.grid(select, by) + # for nested designs? + if (nested) { + # separate level-indicators with "/", as supported by datawizard + by <- paste(by, collapse = "/") + } + + # create all combinations that should be checked + combinations <- expand.grid(select, by[1]) result <- Map(function(predictor, id) { # demean predictor @@ -72,7 +94,7 @@ check_heterogeneity_bias <- function(x, select = NULL, by = NULL, group = NULL) } else { NULL } - }, as.character(combinations[[1]]), as.character(combinations[[2]])) + }, as.character(combinations[[1]]), by) out <- unlist(insight::compact_list(result), use.names = FALSE) diff --git a/man/check_heterogeneity_bias.Rd b/man/check_heterogeneity_bias.Rd index 46f9f70a5..228d26510 100644 --- a/man/check_heterogeneity_bias.Rd +++ b/man/check_heterogeneity_bias.Rd @@ -4,7 +4,13 @@ \alias{check_heterogeneity_bias} \title{Check model predictor for heterogeneity bias} \usage{ -check_heterogeneity_bias(x, select = NULL, by = NULL, group = NULL) +check_heterogeneity_bias( + x, + select = NULL, + by = NULL, + nested = FALSE, + group = NULL +) } \arguments{ \item{x}{A data frame or a mixed model object.} @@ -14,8 +20,27 @@ that should be checked. If \code{x} is a mixed model object, this argument will be ignored.} \item{by}{Character vector (or formula) with the name of the variable that -indicates the group- or cluster-ID. If \code{x} is a model object, this -argument will be ignored.} +indicates the group- or cluster-ID. For cross-classified or nested designs, +\code{by} can also identify two or more variables as group- or cluster-IDs. If +the data is nested and should be treated as such, set \code{nested = TRUE}. Else, +if \code{by} defines two or more variables and \code{nested = FALSE}, a cross-classified +design is assumed. If \code{x} is a model object, this argument will be ignored. + +For nested designs, \code{by} can be: +\itemize{ +\item a character vector with the name of the variable that indicates the +levels, ordered from \emph{highest} level to \emph{lowest} (e.g. +\code{by = c("L4", "L3", "L2")}. +\item a character vector with variable names in the format \code{by = "L4/L3/L2"}, +where the levels are separated by \code{/}. +} + +See also section \emph{De-meaning for cross-classified designs} and +\emph{De-meaning for nested designs} below.} + +\item{nested}{Logical, if \code{TRUE}, the data is treated as nested. If \code{FALSE}, +the data is treated as cross-classified. Only applies if \code{by} contains more +than one variable.} \item{group}{Deprecated. Use \code{by} instead.} } From d27281c0edcd5d742f52d79e83f9d31121cf8bcc Mon Sep 17 00:00:00 2001 From: Joseph Luchman Date: Wed, 28 Aug 2024 06:16:42 -0500 Subject: [PATCH 17/41] Adding `r2_mlm()` and possible update to `r2()` to work with it (#764) Co-authored-by: Daniel --- DESCRIPTION | 8 +++- NAMESPACE | 2 + R/print-methods.R | 40 ++++++++++++----- R/r2.R | 40 ++++++++++------- R/r2_mlm.R | 85 ++++++++++++++++++++++++++++++++++++ inst/WORDLIST | 4 ++ man/performance-package.Rd | 1 + man/r2.Rd | 9 +++- man/r2_mlm.Rd | 74 +++++++++++++++++++++++++++++++ tests/testthat/test-r2_mlm.R | 9 ++++ 10 files changed, 241 insertions(+), 31 deletions(-) create mode 100644 R/r2_mlm.R create mode 100644 man/r2_mlm.Rd create mode 100644 tests/testthat/test-r2_mlm.R diff --git a/DESCRIPTION b/DESCRIPTION index ee591ae47..593afc5b1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2.12 +Version: 0.12.2.13 Authors@R: c(person(given = "Daniel", family = "Lüdecke", @@ -52,7 +52,11 @@ Authors@R: "Bacher", , "etienne.bacher@protonmail.com", role = "ctb", - comment = c(ORCID = "0000-0002-9271-5075"))) + comment = c(ORCID = "0000-0002-9271-5075")), + person(given = "Joseph", + family = "Luchman", + role = "ctb", + comment = c(ORCID = "0000-0002-8886-9717"))) Maintainer: Daniel Lüdecke Description: Utilities for computing measures to assess model quality, which are not directly provided by R's 'base' or 'stats' packages. diff --git a/NAMESPACE b/NAMESPACE index e0543f2b8..67add6a04 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -486,6 +486,7 @@ S3method(r2_mcfadden,serp) S3method(r2_mcfadden,truncreg) S3method(r2_mcfadden,vglm) S3method(r2_mckelvey,default) +S3method(r2_mlm,mlm) S3method(r2_nagelkerke,BBreg) S3method(r2_nagelkerke,DirichletRegModel) S3method(r2_nagelkerke,bife) @@ -610,6 +611,7 @@ export(r2_loo) export(r2_loo_posterior) export(r2_mcfadden) export(r2_mckelvey) +export(r2_mlm) export(r2_nagelkerke) export(r2_nakagawa) export(r2_posterior) diff --git a/R/print-methods.R b/R/print-methods.R index e3cdbbfed..1509188fa 100644 --- a/R/print-methods.R +++ b/R/print-methods.R @@ -68,23 +68,39 @@ print.r2_pseudo <- function(x, digits = 3, ...) { #' @export print.r2_mlm <- function(x, digits = 3, ...) { model_type <- attr(x, "model_type") - if (!is.null(model_type)) { - insight::print_color(sprintf("# R2 for %s Regression\n\n", model_type), "blue") + is_multivar_r2 <- all(names(x) == c("Symmetric Rxy", "Asymmetric Pxy")) + if (!is.null(model_type) && !is_multivar_r2) { + insight::print_color( + sprintf("# R2 for %s Regression\n\n", model_type), + "blue" + ) + } else if (!is.null(model_type) && is_multivar_r2) { + insight::print_color( + sprintf("# Multivariate R2 for %s Regression\n", model_type), + "blue" + ) } else { insight::print_color("# R2\n\n", "blue") } - for (i in names(x)) { - insight::print_color(sprintf("## %s\n", i), "cyan") - out <- paste0( - c( - sprintf(" R2: %.*f", digits, x[[i]]$R2), - sprintf(" adj. R2: %.*f", digits, x[[i]]$R2_adjusted) - ), - collapse = "\n" - ) - cat(out) + if (is_multivar_r2) { + cat(sprintf(" Symmetric Rxy: %.*f", digits, x[["Symmetric Rxy"]])) + cat("\n") + cat(sprintf("Asymmetric Pxy: %.*f", digits, x[["Asymmetric Pxy"]])) cat("\n\n") + } else { + for (i in names(x)) { + insight::print_color(sprintf("## %s\n", i), "cyan") + out <- paste( + c( + sprintf(" R2: %.*f", digits, x[[i]]$R2), + sprintf(" adj. R2: %.*f", digits, x[[i]]$R2_adjusted) + ), + collapse = "\n" + ) + cat(out) + cat("\n\n") + } } invisible(x) } diff --git a/R/r2.R b/R/r2.R index dbfc437fc..aeddd56ce 100644 --- a/R/r2.R +++ b/R/r2.R @@ -10,6 +10,9 @@ #' (`TRUE`) or not (`FALSE`)? #' @param ci Confidence interval level, as scalar. If `NULL` (default), no #' confidence intervals for R2 are calculated. +#' @param multivariate Logical. Should multiple R2 values be reported as +#' separated by response (FALSE) or should a single R2 be reported as +#' combined across responses computed by [`r2_mlm`] (TRUE). #' @param ... Arguments passed down to the related r2-methods. #' @inheritParams r2_nakagawa #' @@ -31,7 +34,7 @@ #' @seealso #' [`r2_bayes()`], [`r2_coxsnell()`], [`r2_kullback()`], [`r2_loo()`], #' [`r2_mcfadden()`], [`r2_nagelkerke()`], [`r2_nakagawa()`], [`r2_tjur()`], -#' [`r2_xu()`] and [`r2_zeroinflated()`]. +#' [`r2_xu()`], [`r2_zeroinflated()`], and [`r2_mlm()`]. #' #' @examplesIf require("lme4") #' # Pseudo r-quared for GLM @@ -245,24 +248,29 @@ r2.aov <- function(model, ci = NULL, ...) { structure(class = "r2_generic", out) } - +#' @rdname r2 #' @export -r2.mlm <- function(model, ...) { - model_summary <- summary(model) +r2.mlm <- function(model, multivariate = TRUE, ...) { - out <- lapply(names(model_summary), function(i) { - tmp <- list( - R2 = model_summary[[i]]$r.squared, - R2_adjusted = model_summary[[i]]$adj.r.squared, - Response = sub("Response ", "", i, fixed = TRUE) - ) - names(tmp$R2) <- "R2" - names(tmp$R2_adjusted) <- "adjusted R2" - names(tmp$Response) <- "Response" - tmp - }) + if (multivariate) { + out <- r2_mlm(model) + } else { + model_summary <- summary(model) + + out <- lapply(names(model_summary), function(i) { + tmp <- list( + R2 = model_summary[[i]]$r.squared, + R2_adjusted = model_summary[[i]]$adj.r.squared, + Response = sub("Response ", "", i, fixed = TRUE) + ) + names(tmp$R2) <- "R2" + names(tmp$R2_adjusted) <- "adjusted R2" + names(tmp$Response) <- "Response" + tmp + }) - names(out) <- names(model_summary) + names(out) <- names(model_summary) + } attr(out, "model_type") <- "Multivariate Linear" structure(class = "r2_mlm", out) diff --git a/R/r2_mlm.R b/R/r2_mlm.R new file mode 100644 index 000000000..0f08ad235 --- /dev/null +++ b/R/r2_mlm.R @@ -0,0 +1,85 @@ +#' @title Multivariate R2 +#' @name r2_mlm +#' +#' @description +#' Calculates two multivariate R2 values for multivariate linear regression. +#' +#' @param model Multivariate linear regression model. +#' @param ... Currently not used. +#' +#' @details +#' The two indexes returned summarize model fit for the set of predictors +#' given the system of responses. As compared to the default +#' [r2][performance::r2] index for multivariate linear models, the indexes +#' returned by this function provide a single fit value collapsed across +#' all responses. +#' +#' The two returned indexes were proposed by *Van den Burg and Lewis (1988)* +#' as an extension of the metrics proposed by *Cramer and Nicewander (1979)*. +#' Of the numerous indexes proposed across these two papers, only two metrics, +#' the \eqn{R_{xy}} and \eqn{P_{xy}}, are recommended for use +#' by *Azen and Budescu (2006)*. +#' +#' For a multivariate linear regression with \eqn{p} predictors and +#' \eqn{q} responses where \eqn{p > q}, the \eqn{R_{xy}} index is +#' computed as: +#' +#' \deqn{R_{xy} = 1 - \prod_{i=1}^p (1 - \rho_i^2)} +#' +#' Where \eqn{\rho} is a canonical variate from a +#' [canonical correlation][cancor] between the predictors and responses. +#' This metric is symmetric and its value does not change when the roles of +#' the variables as predictors or responses are swapped. +#' +#' The \eqn{P_{xy}} is computed as: +#' +#' \deqn{P_{xy} = \frac{q - trace(\bf{S}_{\bf{YY}}^{-1}\bf{S}_{\bf{YY|X}})}{q}} +#' +#' Where \eqn{\bf{S}_{\bf{YY}}} is the matrix of response covariances and +#' \eqn{\bf{S}_{\bf{YY|X}}} is the matrix of residual covariances given +#' the predictors. This metric is asymmetric and can change +#' depending on which variables are considered predictors versus responses. +#' +#' @return A named vector with the R2 values. +#' +#' @examples +#' model <- lm(cbind(qsec, drat) ~ wt + mpg + cyl, data = mtcars) +#' r2_mlm(model) +#' +#' model_swap <- lm(cbind(wt, mpg, cyl) ~ qsec + drat, data = mtcars) +#' r2_mlm(model_swap) +#' +#' @references +#' - Azen, R., & Budescu, D. V. (2006). Comparing predictors in +#' multivariate regression models: An extension of dominance analysis. +#' Journal of Educational and Behavioral Statistics, 31(2), 157-180. +#'- Cramer, E. M., & Nicewander, W. A. (1979). Some symmetric, +#' invariant measures of multivariate association. Psychometrika, 44, 43-54. +#' - Van den Burg, W., & Lewis, C. (1988). Some properties of two +#' measures of multivariate association. Psychometrika, 53, 109-122. +#' +#' @author Joseph Luchman +#' +#' @export +r2_mlm <- function(model, ...) { + UseMethod("r2_mlm") +} + +# methods --------------------------- + +#' @export +r2_mlm.mlm <- function(model, verbose = TRUE, ...) { + rho2_vec <- 1 - stats::cancor( + insight::get_predictors(model), + insight::get_response(model) + )$cor^2 + R_xy <- 1 - Reduce(`*`, rho2_vec, 1) + + resid_cov <- stats::cov(residuals(model)) + resp_cov <- stats::cov(insight::get_response(model)) + qq <- ncol(insight::get_response(model)) + V_xy <- qq - sum(diag(solve(resp_cov) %*% resid_cov)) + P_xy <- V_xy / qq + + c("Symmetric Rxy" = R_xy, "Asymmetric Pxy" = P_xy) +} diff --git a/inst/WORDLIST b/inst/WORDLIST index 2366773d8..2989faf72 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -9,6 +9,7 @@ Ankerst Archimbaud Arel Asq +Azen BCI BFBayesFactor BMJ @@ -27,6 +28,7 @@ Breunig Breusch BRM Bryk +Budescu Bundock Burnham Byrne @@ -40,6 +42,7 @@ CochransQ CompQuadForm Concurvity Confounder +Cramer Cribari Cronbach's Crujeiras @@ -162,6 +165,7 @@ Neto's Nondegenerate Nordhausen Normed +Nicewander ORCID OSF OSX diff --git a/man/performance-package.Rd b/man/performance-package.Rd index b3f1d5a3f..f7a05c369 100644 --- a/man/performance-package.Rd +++ b/man/performance-package.Rd @@ -52,6 +52,7 @@ Other contributors: \item Martin Jullum [reviewer] \item gjo11 [reviewer] \item Etienne Bacher \email{etienne.bacher@protonmail.com} (\href{https://orcid.org/0000-0002-9271-5075}{ORCID}) [contributor] + \item Joseph Luchman (\href{https://orcid.org/0000-0002-8886-9717}{ORCID}) [contributor] } } diff --git a/man/r2.Rd b/man/r2.Rd index bf783e8d9..a8d514908 100644 --- a/man/r2.Rd +++ b/man/r2.Rd @@ -3,6 +3,7 @@ \name{r2} \alias{r2} \alias{r2.default} +\alias{r2.mlm} \alias{r2.merMod} \title{Compute the model's R2} \usage{ @@ -10,6 +11,8 @@ r2(model, ...) \method{r2}{default}(model, ci = NULL, verbose = TRUE, ...) +\method{r2}{mlm}(model, multivariate = TRUE, ...) + \method{r2}{merMod}(model, ci = NULL, tolerance = 1e-05, ...) } \arguments{ @@ -23,6 +26,10 @@ confidence intervals for R2 are calculated.} \item{verbose}{Logical. Should details about R2 and CI methods be given (\code{TRUE}) or not (\code{FALSE})?} +\item{multivariate}{Logical. Should multiple R2 values be reported as +separated by response (FALSE) or should a single R2 be reported as +combined across responses computed by \code{\link{r2_mlm}} (TRUE).} + \item{tolerance}{Tolerance for singularity check of random effects, to decide whether to compute random effect variances for the conditional r-squared or not. Indicates up to which value the convergence result is accepted. When @@ -70,5 +77,5 @@ r2(model) \seealso{ \code{\link[=r2_bayes]{r2_bayes()}}, \code{\link[=r2_coxsnell]{r2_coxsnell()}}, \code{\link[=r2_kullback]{r2_kullback()}}, \code{\link[=r2_loo]{r2_loo()}}, \code{\link[=r2_mcfadden]{r2_mcfadden()}}, \code{\link[=r2_nagelkerke]{r2_nagelkerke()}}, \code{\link[=r2_nakagawa]{r2_nakagawa()}}, \code{\link[=r2_tjur]{r2_tjur()}}, -\code{\link[=r2_xu]{r2_xu()}} and \code{\link[=r2_zeroinflated]{r2_zeroinflated()}}. +\code{\link[=r2_xu]{r2_xu()}}, \code{\link[=r2_zeroinflated]{r2_zeroinflated()}}, and \code{\link[=r2_mlm]{r2_mlm()}}. } diff --git a/man/r2_mlm.Rd b/man/r2_mlm.Rd new file mode 100644 index 000000000..50b5389fd --- /dev/null +++ b/man/r2_mlm.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/r2_mlm.R +\name{r2_mlm} +\alias{r2_mlm} +\title{Multivariate R2} +\usage{ +r2_mlm(model, ...) +} +\arguments{ +\item{model}{Multivariate linear regression model.} + +\item{...}{Currently not used.} +} +\value{ +A named vector with the R2 values. +} +\description{ +Calculates two multivariate R2 values for multivariate linear regression. +} +\details{ +The two indexes returned summarize model fit for the set of predictors +given the system of responses. As compared to the default +\link[=r2]{r2} index for multivariate linear models, the indexes +returned by this function provide a single fit value collapsed across +all responses. + +The two returned indexes were proposed by \emph{Van den Burg and Lewis (1988)} +as an extension of the metrics proposed by \emph{Cramer and Nicewander (1979)}. +Of the numerous indexes proposed across these two papers, only two metrics, +the \eqn{R_{xy}} and \eqn{P_{xy}}, are recommended for use +by \emph{Azen and Budescu (2006)}. + +For a multivariate linear regression with \eqn{p} predictors and +\eqn{q} responses where \eqn{p > q}, the \eqn{R_{xy}} index is +computed as: + +\deqn{R_{xy} = 1 - \prod_{i=1}^p (1 - \rho_i^2)} + +Where \eqn{\rho} is a canonical variate from a +\link[=cancor]{canonical correlation} between the predictors and responses. +This metric is symmetric and its value does not change when the roles of +the variables as predictors or responses are swapped. + +The \eqn{P_{xy}} is computed as: + +\deqn{P_{xy} = \frac{q - trace(\bf{S}_{\bf{YY}}^{-1}\bf{S}_{\bf{YY|X}})}{q}} + +Where \eqn{\bf{S}_{\bf{YY}}} is the matrix of response covariances and +\eqn{\bf{S}_{\bf{YY|X}}} is the matrix of residual covariances given +the predictors. This metric is asymmetric and can change +depending on which variables are considered predictors versus responses. +} +\examples{ +model <- lm(cbind(qsec, drat) ~ wt + mpg + cyl, data = mtcars) +r2_mlm(model) + +model_swap <- lm(cbind(wt, mpg, cyl) ~ qsec + drat, data = mtcars) +r2_mlm(model_swap) + +} +\references{ +\itemize{ +\item Azen, R., & Budescu, D. V. (2006). Comparing predictors in +multivariate regression models: An extension of dominance analysis. +Journal of Educational and Behavioral Statistics, 31(2), 157-180. +\item Cramer, E. M., & Nicewander, W. A. (1979). Some symmetric, +invariant measures of multivariate association. Psychometrika, 44, 43-54. +\item Van den Burg, W., & Lewis, C. (1988). Some properties of two +measures of multivariate association. Psychometrika, 53, 109-122. +} +} +\author{ +Joseph Luchman +} diff --git a/tests/testthat/test-r2_mlm.R b/tests/testthat/test-r2_mlm.R new file mode 100644 index 000000000..a532b57a7 --- /dev/null +++ b/tests/testthat/test-r2_mlm.R @@ -0,0 +1,9 @@ +test_that("r2_mlm_Rxy", { + model <- lm(cbind(qsec, drat) ~ wt + mpg, data = mtcars) + expect_equal(r2_mlm(model)[["Symmetric Rxy"]], 0.68330688076502, tolerance = 1e-3) +}) + +test_that("r2_mlm_Pxy", { + model <- lm(cbind(qsec, drat) ~ wt + mpg, data = mtcars) + expect_equal(r2_mlm(model)[["Asymmetric Pxy"]], 0.407215267524997, tolerance = 1e-3) +}) From 11a881bc077a35953b5196391d3a30a967846e76 Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 28 Aug 2024 15:26:52 +0200 Subject: [PATCH 18/41] lintr, spell --- R/print-methods.R | 10 ++++++++-- inst/WORDLIST | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/R/print-methods.R b/R/print-methods.R index 1509188fa..4b4a81aa7 100644 --- a/R/print-methods.R +++ b/R/print-methods.R @@ -126,7 +126,10 @@ print.r2_bayes <- function(x, digits = 3, ...) { ci = attributes(x)$CI$R2_Bayes_marginal$CI, digits = digits ) - out <- paste0(c(out, sprintf(" Marginal R2: %.*f (%s)", digits, x$R2_Bayes_marginal, r2_marginal_ci)), collapse = "\n") + out <- paste(c( + out, + sprintf(" Marginal R2: %.*f (%s)", digits, x$R2_Bayes_marginal, r2_marginal_ci) + ), collapse = "\n") } cat(out) @@ -155,7 +158,10 @@ print.r2_loo <- function(x, digits = 3, ...) { ci = attributes(x)$CI$R2_loo_marginal$CI, digits = digits ) - out <- paste0(c(out, sprintf(" Marginal R2: %.*f (%s)", digits, x$R2_loo_marginal, r2_marginal_ci)), collapse = "\n") + out <- paste(c( + out, + sprintf(" Marginal R2: %.*f (%s)", digits, x$R2_loo_marginal, r2_marginal_ci) + ), collapse = "\n") } cat(out) diff --git a/inst/WORDLIST b/inst/WORDLIST index 2989faf72..9c77a11fd 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -51,6 +51,7 @@ DAGs DBSCAN DOI Datenerhebung +De Delacre Deskriptivstatistische DHARMa From b9d4917c760574279e142b182f90a3bd50b85cf8 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 2 Sep 2024 20:04:01 +0200 Subject: [PATCH 19/41] Prepare CRAN submission (#767) --- CRAN-SUBMISSION | 6 +++--- DESCRIPTION | 3 +-- NAMESPACE | 4 ++++ R/check_dag.R | 2 +- R/check_singularity.R | 3 ++- R/model_performance.lm.R | 20 +++++++++++++++---- R/r2.R | 1 - R/r2_coxsnell.R | 20 +++++++++---------- R/r2_mlm.R | 4 ++-- cran-comments.md | 2 +- man/check_dag.Rd | 2 +- man/check_singularity.Rd | 2 ++ man/model_performance.lm.Rd | 8 ++++---- man/r2_coxsnell.Rd | 20 +++++++++---------- .../testthat/test-check_heterogeneity_bias.R | 16 +++++++++++++++ .../test-model_performance.bayesian.R | 2 +- 16 files changed, 74 insertions(+), 41 deletions(-) diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION index abb3c037a..4a0f8ec1b 100644 --- a/CRAN-SUBMISSION +++ b/CRAN-SUBMISSION @@ -1,3 +1,3 @@ -Version: 0.12.2 -Date: 2024-07-17 21:02:38 UTC -SHA: d4c45126ca666644785dc64d2af6b87eee9ca39b +Version: 0.12.3 +Date: 2024-09-02 16:10:51 UTC +SHA: dfbe03fd4961ee9049d5169275248a4ef7a5a21e diff --git a/DESCRIPTION b/DESCRIPTION index 593afc5b1..bf2622588 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2.13 +Version: 0.12.3 Authors@R: c(person(given = "Daniel", family = "Lüdecke", @@ -160,4 +160,3 @@ Config/Needs/website: r-lib/pkgdown, easystats/easystatstemplate Config/rcmdcheck/ignore-inconsequential-notes: true -Remotes: easystats/see, easystats/insight diff --git a/NAMESPACE b/NAMESPACE index 67add6a04..08f6bcc15 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -170,12 +170,14 @@ S3method(model_performance,censReg) S3method(model_performance,clm) S3method(model_performance,clm2) S3method(model_performance,coxph) +S3method(model_performance,coxph_weightit) S3method(model_performance,default) S3method(model_performance,felm) S3method(model_performance,fixest) S3method(model_performance,fixest_multi) S3method(model_performance,flexsurvreg) S3method(model_performance,glm) +S3method(model_performance,glm_weightit) S3method(model_performance,glmmTMB) S3method(model_performance,glmmadmb) S3method(model_performance,glmx) @@ -197,9 +199,11 @@ S3method(model_performance,mixor) S3method(model_performance,mlogit) S3method(model_performance,model_fit) S3method(model_performance,multinom) +S3method(model_performance,multinom_weightit) S3method(model_performance,negbinirr) S3method(model_performance,negbinmfx) S3method(model_performance,nestedLogit) +S3method(model_performance,ordinal_weightit) S3method(model_performance,plm) S3method(model_performance,poissonirr) S3method(model_performance,poissonmfx) diff --git a/R/check_dag.R b/R/check_dag.R index 0a6411056..dcfe8f33c 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -103,7 +103,7 @@ #' Interpreting Confounder and Modifier Coefficients. American Journal of #' Epidemiology, 177(4), 292–298. \doi{10.1093/aje/kws412} #' -#' @examplesIf require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE) +#' @examplesIf require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE) && packageVersion("see") > "0.8.5" #' # no adjustment needed #' check_dag( #' y ~ x + b, diff --git a/R/check_singularity.R b/R/check_singularity.R index e2e369f61..86de4fcce 100644 --- a/R/check_singularity.R +++ b/R/check_singularity.R @@ -97,6 +97,7 @@ #' ) #' check_singularity(model) #' +#' \dontrun{ #' # Fixing singularity issues using priors in glmmTMB #' # Example taken from `vignette("priors", package = "glmmTMB")` #' dat <- readRDS(system.file( @@ -120,7 +121,7 @@ #' model_with_priors <- update(model, priors = prior) #' # no singular fit #' check_singularity(model_with_priors) -#' +#' } #' @export check_singularity <- function(x, tolerance = 1e-5, ...) { UseMethod("check_singularity") diff --git a/R/model_performance.lm.R b/R/model_performance.lm.R index c797b396f..0199b4ce4 100644 --- a/R/model_performance.lm.R +++ b/R/model_performance.lm.R @@ -3,10 +3,10 @@ #' Compute indices of model performance for regression models. #' #' @param model A model. -#' @param metrics Can be `"all"`, `"common"` or a character vector of -#' metrics to be computed (one or more of `"AIC"`, `"AICc"`, `"BIC"`, `"R2"`, -#' `"R2_adj"`, `"RMSE"`, `"SIGMA"`, `"LOGLOSS"`, `"PCP"`, `"SCORE"`). -#' `"common"` will compute AIC, BIC, R2 and RMSE. +#' @param metrics Can be `"all"`, `"common"` or a character vector of metrics to +#' be computed (one or more of `"AIC"`, `"AICc"`, `"BIC"`, `"R2"`, `"R2_adj"`, +#' `"RMSE"`, `"SIGMA"`, `"LOGLOSS"`, `"PCP"`, `"SCORE"`). `"common"` will +#' compute AIC, BIC, R2 and RMSE. #' @param verbose Toggle off warnings. #' @param ... Arguments passed to or from other methods. #' @@ -209,6 +209,18 @@ model_performance.lm_robust <- model_performance.lm #' @export model_performance.multinom <- model_performance.lm +#' @export +model_performance.multinom_weightit <- model_performance.lm + +#' @export +model_performance.ordinal_weightit <- model_performance.lm + +#' @export +model_performance.coxph_weightit <- model_performance.lm + +#' @export +model_performance.glm_weightit <- model_performance.lm + #' @export model_performance.plm <- model_performance.lm diff --git a/R/r2.R b/R/r2.R index aeddd56ce..d62a0c477 100644 --- a/R/r2.R +++ b/R/r2.R @@ -251,7 +251,6 @@ r2.aov <- function(model, ci = NULL, ...) { #' @rdname r2 #' @export r2.mlm <- function(model, multivariate = TRUE, ...) { - if (multivariate) { out <- r2_mlm(model) } else { diff --git a/R/r2_coxsnell.R b/R/r2_coxsnell.R index 3db7197b7..36df504ec 100644 --- a/R/r2_coxsnell.R +++ b/R/r2_coxsnell.R @@ -8,12 +8,12 @@ #' @param ... Currently not used. #' #' @details -#' This index was proposed by *Cox and Snell (1989, pp. 208-9)* and, -#' apparently independently, by *Magee (1990)*; but had been suggested -#' earlier for binary response models by *Maddala (1983)*. However, this -#' index achieves a maximum of less than 1 for discrete models (i.e. models -#' whose likelihood is a product of probabilities) which have a maximum of 1, -#' instead of densities, which can become infinite *(Nagelkerke, 1991)*. +#' This index was proposed by *Cox and Snell (1989, pp. 208-9)* and, apparently +#' independently, by *Magee (1990)*; but had been suggested earlier for binary +#' response models by *Maddala (1983)*. However, this index achieves a maximum +#' of less than 1 for discrete models (i.e. models whose likelihood is a product +#' of probabilities) which have a maximum of 1, instead of densities, which can +#' become infinite *(Nagelkerke, 1991)*. #' #' @return A named vector with the R2 value. #' @@ -24,12 +24,12 @@ #' @references #' - Cox, D. R., Snell, E. J. (1989). Analysis of binary data (Vol. 32). #' Monographs on Statistics and Applied Probability. -#' - Magee, L. (1990). R 2 measures based on Wald and likelihood ratio -#' joint significance tests. The American Statistician, 44(3), 250-253. +#' - Magee, L. (1990). R 2 measures based on Wald and likelihood ratio joint +#' significance tests. The American Statistician, 44(3), 250-253. #' - Maddala, G. S. (1986). Limited-dependent and qualitative variables in #' econometrics (No. 3). Cambridge university press. -#' - Nagelkerke, N. J. (1991). A note on a general definition of the -#' coefficient of determination. Biometrika, 78(3), 691-692. +#' - Nagelkerke, N. J. (1991). A note on a general definition of the coefficient +#' of determination. Biometrika, 78(3), 691-692. #' #' @export r2_coxsnell <- function(model, ...) { diff --git a/R/r2_mlm.R b/R/r2_mlm.R index 0f08ad235..240701439 100644 --- a/R/r2_mlm.R +++ b/R/r2_mlm.R @@ -53,7 +53,7 @@ #' - Azen, R., & Budescu, D. V. (2006). Comparing predictors in #' multivariate regression models: An extension of dominance analysis. #' Journal of Educational and Behavioral Statistics, 31(2), 157-180. -#'- Cramer, E. M., & Nicewander, W. A. (1979). Some symmetric, +#' - Cramer, E. M., & Nicewander, W. A. (1979). Some symmetric, #' invariant measures of multivariate association. Psychometrika, 44, 43-54. #' - Van den Burg, W., & Lewis, C. (1988). Some properties of two #' measures of multivariate association. Psychometrika, 53, 109-122. @@ -73,7 +73,7 @@ r2_mlm.mlm <- function(model, verbose = TRUE, ...) { insight::get_predictors(model), insight::get_response(model) )$cor^2 - R_xy <- 1 - Reduce(`*`, rho2_vec, 1) + R_xy <- 1 - Reduce(`*`, rho2_vec, 1) resid_cov <- stats::cov(residuals(model)) resp_cov <- stats::cov(insight::get_response(model)) diff --git a/cran-comments.md b/cran-comments.md index 0fbbade1c..abc3f9611 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1 +1 @@ -This is a patch release that fixes failing CRAN checks for Mac OS old-rel. +Maintance release. diff --git a/man/check_dag.Rd b/man/check_dag.Rd index 266a62ec7..c33444f60 100644 --- a/man/check_dag.Rd +++ b/man/check_dag.Rd @@ -129,7 +129,7 @@ adjustments or over-adjustment. } \examples{ -\dontshow{if (require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE) && packageVersion("see") > "0.8.5") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # no adjustment needed check_dag( y ~ x + b, diff --git a/man/check_singularity.Rd b/man/check_singularity.Rd index cfe2dca0c..49015d0c7 100644 --- a/man/check_singularity.Rd +++ b/man/check_singularity.Rd @@ -92,6 +92,7 @@ model <- lme4::lmer( ) check_singularity(model) +\dontrun{ # Fixing singularity issues using priors in glmmTMB # Example taken from `vignette("priors", package = "glmmTMB")` dat <- readRDS(system.file( @@ -115,6 +116,7 @@ prior <- data.frame( model_with_priors <- update(model, priors = prior) # no singular fit check_singularity(model_with_priors) +} \dontshow{\}) # examplesIf} } \references{ diff --git a/man/model_performance.lm.Rd b/man/model_performance.lm.Rd index 208d9b1c4..11a820fff 100644 --- a/man/model_performance.lm.Rd +++ b/man/model_performance.lm.Rd @@ -9,10 +9,10 @@ \arguments{ \item{model}{A model.} -\item{metrics}{Can be \code{"all"}, \code{"common"} or a character vector of -metrics to be computed (one or more of \code{"AIC"}, \code{"AICc"}, \code{"BIC"}, \code{"R2"}, -\code{"R2_adj"}, \code{"RMSE"}, \code{"SIGMA"}, \code{"LOGLOSS"}, \code{"PCP"}, \code{"SCORE"}). -\code{"common"} will compute AIC, BIC, R2 and RMSE.} +\item{metrics}{Can be \code{"all"}, \code{"common"} or a character vector of metrics to +be computed (one or more of \code{"AIC"}, \code{"AICc"}, \code{"BIC"}, \code{"R2"}, \code{"R2_adj"}, +\code{"RMSE"}, \code{"SIGMA"}, \code{"LOGLOSS"}, \code{"PCP"}, \code{"SCORE"}). \code{"common"} will +compute AIC, BIC, R2 and RMSE.} \item{verbose}{Toggle off warnings.} diff --git a/man/r2_coxsnell.Rd b/man/r2_coxsnell.Rd index 2d53cb628..55dfaac1a 100644 --- a/man/r2_coxsnell.Rd +++ b/man/r2_coxsnell.Rd @@ -18,12 +18,12 @@ A named vector with the R2 value. Calculates the pseudo-R2 value based on the proposal from \emph{Cox & Snell (1989)}. } \details{ -This index was proposed by \emph{Cox and Snell (1989, pp. 208-9)} and, -apparently independently, by \emph{Magee (1990)}; but had been suggested -earlier for binary response models by \emph{Maddala (1983)}. However, this -index achieves a maximum of less than 1 for discrete models (i.e. models -whose likelihood is a product of probabilities) which have a maximum of 1, -instead of densities, which can become infinite \emph{(Nagelkerke, 1991)}. +This index was proposed by \emph{Cox and Snell (1989, pp. 208-9)} and, apparently +independently, by \emph{Magee (1990)}; but had been suggested earlier for binary +response models by \emph{Maddala (1983)}. However, this index achieves a maximum +of less than 1 for discrete models (i.e. models whose likelihood is a product +of probabilities) which have a maximum of 1, instead of densities, which can +become infinite \emph{(Nagelkerke, 1991)}. } \examples{ model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial") @@ -34,11 +34,11 @@ r2_coxsnell(model) \itemize{ \item Cox, D. R., Snell, E. J. (1989). Analysis of binary data (Vol. 32). Monographs on Statistics and Applied Probability. -\item Magee, L. (1990). R 2 measures based on Wald and likelihood ratio -joint significance tests. The American Statistician, 44(3), 250-253. +\item Magee, L. (1990). R 2 measures based on Wald and likelihood ratio joint +significance tests. The American Statistician, 44(3), 250-253. \item Maddala, G. S. (1986). Limited-dependent and qualitative variables in econometrics (No. 3). Cambridge university press. -\item Nagelkerke, N. J. (1991). A note on a general definition of the -coefficient of determination. Biometrika, 78(3), 691-692. +\item Nagelkerke, N. J. (1991). A note on a general definition of the coefficient +of determination. Biometrika, 78(3), 691-692. } } diff --git a/tests/testthat/test-check_heterogeneity_bias.R b/tests/testthat/test-check_heterogeneity_bias.R index 429dcfc0c..e01ecf3ff 100644 --- a/tests/testthat/test-check_heterogeneity_bias.R +++ b/tests/testthat/test-check_heterogeneity_bias.R @@ -32,3 +32,19 @@ test_that("check_heterogeneity_bias", { "Possible heterogeneity bias due to following predictors: Petal\\.Length, Petal\\.Width, Species" ) }) + +test_that("check_heterogeneity_bias", { + skip_if_not_installed("datawizard", minimum_version = "0.12.3") + data(efc, package = "datawizard") + dat <- na.omit(efc) + dat$e42dep <- factor(dat$e42dep) + dat$c172code <- factor(dat$c172code) + + out <- check_heterogeneity_bias( + dat, + select = "c12hour", + by = c("e42dep", "c172code"), + nested = TRUE + ) + expect_equal(out, "c12hour", ignore_attr = TRUE) +}) diff --git a/tests/testthat/test-model_performance.bayesian.R b/tests/testthat/test-model_performance.bayesian.R index ff78ca3cb..a62f4bf40 100644 --- a/tests/testthat/test-model_performance.bayesian.R +++ b/tests/testthat/test-model_performance.bayesian.R @@ -60,7 +60,7 @@ test_that("model_performance.brmsfit", { expect_equal(perf$R2, 0.954538, tolerance = 1e-3) expect_equal(perf$R2_adjusted, 0.9529004, tolerance = 1e-3) expect_equal(perf$ELPD, -70.40493, tolerance = 1e-3) - expect_identical(colnames(perf), c( + expect_named(perf, c( "ELPD", "ELPD_SE", "LOOIC", "LOOIC_SE", "WAIC", "R2", "R2_marginal", "R2_adjusted", "R2_adjusted_marginal", "ICC", "RMSE", "Sigma" )) From 1d730222adde464ed9a70c5d83d29927d0ab5832 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 13 Sep 2024 15:32:59 +0200 Subject: [PATCH 20/41] revise print (#768) --- DESCRIPTION | 2 +- NEWS.md | 9 ++++ R/check_dag.R | 77 +++++++++++++++++++++++++----- tests/testthat/_snaps/check_dag.md | 64 +++++++++++++++++++------ tests/testthat/test-check_dag.R | 28 +++++++++++ 5 files changed, 154 insertions(+), 26 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index bf2622588..f74f6e7f1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.3 +Version: 0.12.3.1 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index 7be029839..81a8c5ad2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,12 @@ +# performance 0.12.4 + +## Changes + +* `check_dag()` now also checks for colliders, and suggests removing it in the + printed output. + +* Minor revisions to the printed output of `check_dag()`. + # performance 0.12.3 ## New functions diff --git a/R/check_dag.R b/R/check_dag.R index dcfe8f33c..5c6ba3462 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -247,11 +247,29 @@ check_dag <- function(..., checks <- lapply(c("direct", "total"), function(x) { adjustment_set <- unlist(dagitty::adjustmentSets(dag, effect = x), use.names = FALSE) adjustment_nodes <- unlist(dagitty::adjustedNodes(dag), use.names = FALSE) + minimal_adjustments <- as.list(dagitty::adjustmentSets(dag, effect = x)) + collider <- adjustment_nodes[vapply(adjustment_nodes, ggdag::is_collider, logical(1), .dag = dag)] + if (!length(collider)) { + # if we don't have colliders, set to NULL + collider <- NULL + } else { + # if we *have* colliders, remove them from minimal adjustments + minimal_adjustments <- lapply(minimal_adjustments, function(ma) { + setdiff(ma, collider) + }) + } list( - adjustment_not_needed = is.null(adjustment_set) && is.null(adjustment_nodes), - incorrectly_adjusted = is.null(adjustment_set) && !is.null(adjustment_nodes), + # no adjustment needed when + # - required and current adjustment sets are NULL + # - AND we have no collider in current adjustments + adjustment_not_needed = is.null(adjustment_set) && is.null(adjustment_nodes) && is.null(collider), + # incorrect adjustment when + # - required is NULL and current adjustment not NULL + # - OR we have a collider in current adjustments + incorrectly_adjusted = (is.null(adjustment_set) && !is.null(adjustment_nodes)) || (!is.null(collider) && collider %in% adjustment_nodes), current_adjustments = adjustment_nodes, - minimal_adjustments = as.list(dagitty::adjustmentSets(dag, effect = x)) + minimal_adjustments = minimal_adjustments, + collider = collider ) }) @@ -260,6 +278,10 @@ check_dag <- function(..., attr(dag, "exposure") <- exposure attr(dag, "adjusted") <- adjusted attr(dag, "adjustment_sets") <- checks[[1]]$current_adjustments + attr(dag, "collider") <- checks[[1]]$collider + # remove collider from sub-attributes + checks[[1]]$collider <- NULL + checks[[2]]$collider <- NULL attr(dag, "check_direct") <- insight::compact_list(checks[[1]]) attr(dag, "check_total") <- insight::compact_list(checks[[2]]) @@ -296,6 +318,7 @@ as.dag <- function(x, ...) { #' @export print.check_dag <- function(x, ...) { effect <- attributes(x)$effect + collider <- attributes(x)$collider # header cat(insight::print_color("# Check for correct adjustment sets", "blue")) @@ -317,6 +340,16 @@ print.check_dag <- function(x, ...) { ) } + # add information on colliders + if (!is.null(collider)) { + exposure_outcome_text <- paste0( + exposure_outcome_text, + "\n- Collider", + ifelse(length(collider) > 1, "s", ""), + ": ", insight::color_text(datawizard::text_concatenate(collider), "cyan") + ) + } + cat(exposure_outcome_text) cat("\n\n") @@ -331,12 +364,12 @@ print.check_dag <- function(x, ...) { } else { out <- attributes(x)$check_total } - .print_dag_results(out, x, i, effect) + .print_dag_results(out, x, i, effect, collider) } } } -.print_dag_results <- function(out, x, i, effect) { +.print_dag_results <- function(out, x, i, effect, collider = NULL) { # missing adjustements - minimal_adjustment can be a list of different # options for minimal adjustements, so we check here if any of the minimal # adjustments are currently sufficient @@ -356,8 +389,18 @@ print.check_dag <- function(x, ...) { attributes(x)$outcome, "`." ) + } else if (!is.null(collider)) { + # Scenario 2: adjusted for (downstream) collider + msg <- paste0( + insight::color_text("Incorrectly adjusted!", "red"), + "\nYour model adjusts for a (downstream) collider, ", + insight::color_text(datawizard::text_concatenate(collider, enclose = "`"), "cyan"), + ". To estimate the ", i, " effect, do ", + insight::color_text("not", "italic"), + " adjust for it, to avoid collider-bias." + ) } else if (isTRUE(out$incorrectly_adjusted)) { - # Scenario 2: incorrectly adjusted, adjustments where none is allowed + # Scenario 3: incorrectly adjusted, adjustments where none is allowed msg <- paste0( insight::color_text("Incorrectly adjusted!", "red"), "\nTo estimate the ", i, " effect, do ", @@ -367,13 +410,13 @@ print.check_dag <- function(x, ...) { "." ) } else if (any(sufficient_adjustments)) { - # Scenario 3: correct adjustment + # Scenario 4: correct adjustment msg <- paste0( insight::color_text("Model is correctly specified.", "green"), "\nAll minimal sufficient adjustments to estimate the ", i, " effect were done." ) } else { - # Scenario 4: missing adjustments + # Scenario 5: missing adjustments msg <- paste0( insight::color_text("Incorrectly adjusted!", "red"), "\nTo estimate the ", i, " effect, ", @@ -395,6 +438,7 @@ print.check_dag <- function(x, ...) { ), "." ) + current_str <- "\nCurrently" } else { msg <- paste0( msg, @@ -404,14 +448,25 @@ print.check_dag <- function(x, ...) { ), "yellow"), "." ) + current_str <- " Currently" } if (is.null(out$current_adjustments)) { - msg <- paste0(msg, "\nCurrently, the model does not adjust for any variables.") + msg <- paste0(msg, current_str, ", the model does not adjust for any variables.") } else { msg <- paste0( - msg, "\nCurrently, the model only adjusts for ", - insight::color_text(datawizard::text_concatenate(out$current_adjustments, enclose = "`"), "yellow"), "." + msg, current_str, ", the model only adjusts for ", + datawizard::text_concatenate(out$current_adjustments, enclose = "`"), + "." ) + # check if we could identify missing variables, and if so, add them to the message + missing_vars <- setdiff(unlist(out$minimal_adjustments), out$current_adjustments) + if (length(missing_vars) > 0) { + msg <- paste0( + msg, " You possibly also need to adjust for ", + insight::color_text(datawizard::text_concatenate(missing_vars, enclose = "`"), "yellow"), + " to block biasing paths." + ) + } } } diff --git a/tests/testthat/_snaps/check_dag.md b/tests/testthat/_snaps/check_dag.md index 47420aa89..146986c7d 100644 --- a/tests/testthat/_snaps/check_dag.md +++ b/tests/testthat/_snaps/check_dag.md @@ -41,8 +41,7 @@ Identification of direct and total effects Incorrectly adjusted! - To estimate the direct and total effect, at least adjust for `b`. - Currently, the model does not adjust for any variables. + To estimate the direct and total effect, at least adjust for `b`. Currently, the model does not adjust for any variables. --- @@ -58,8 +57,7 @@ Identification of direct and total effects Incorrectly adjusted! - To estimate the direct and total effect, at least adjust for `b` and `c`. - Currently, the model only adjusts for `c`. + To estimate the direct and total effect, at least adjust for `b` and `c`. Currently, the model only adjusts for `c`. You possibly also need to adjust for `b` to block biasing paths. --- @@ -75,8 +73,7 @@ Identification of direct and total effects Incorrectly adjusted! - To estimate the direct and total effect, at least adjust for `b` and `c`. - Currently, the model only adjusts for `c`. + To estimate the direct and total effect, at least adjust for `b` and `c`. Currently, the model only adjusts for `c`. You possibly also need to adjust for `b` to block biasing paths. --- @@ -143,14 +140,12 @@ Identification of direct effects Incorrectly adjusted! - To estimate the direct effect, at least adjust for `x1` and `x2`. - Currently, the model does not adjust for any variables. + To estimate the direct effect, at least adjust for `x1` and `x2`. Currently, the model does not adjust for any variables. Identification of total effects Incorrectly adjusted! - To estimate the total effect, at least adjust for `x1`. - Currently, the model does not adjust for any variables. + To estimate the total effect, at least adjust for `x1`. Currently, the model does not adjust for any variables. --- @@ -166,8 +161,7 @@ Identification of direct effects Incorrectly adjusted! - To estimate the direct effect, at least adjust for `x1` and `x2`. - Currently, the model only adjusts for `x1`. + To estimate the direct effect, at least adjust for `x1` and `x2`. Currently, the model only adjusts for `x1`. You possibly also need to adjust for `x2` to block biasing paths. Identification of total effects @@ -188,8 +182,7 @@ Identification of direct effects Incorrectly adjusted! - To estimate the direct effect, at least adjust for `x1` and `x2`. - Currently, the model only adjusts for `x2`. + To estimate the direct effect, at least adjust for `x1` and `x2`. Currently, the model only adjusts for `x2`. You possibly also need to adjust for `x1` to block biasing paths. Identification of total effects @@ -218,3 +211,46 @@ To estimate the total effect, do not adjust for `x1` and `x2`. +# check_dag, collider bias + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: SMD_ICD11 + - Exposure: agegroup + - Adjustments: edgroup3, gender_kid, pss4_kid_sum_2sd and residence + + Identification of direct effects + + Incorrectly adjusted! + To estimate the direct effect, at least adjust for `edgroup3`, `gender_kid`, `pss4_kid_sum_2sd`, `residence` and `sm_h_total_kid`. Currently, the model only adjusts for `edgroup3`, `gender_kid`, `pss4_kid_sum_2sd` and `residence`. You possibly also need to adjust for `sm_h_total_kid` to block biasing paths. + + Identification of total effects + + Model is correctly specified. + All minimal sufficient adjustments to estimate the total effect were done. + + +--- + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: SMD_ICD11 + - Exposure: agegroup + - Adjustments: edgroup3, gender_kid, pss4_kid_sum_2sd, residence and sm_h_total_kid + - Collider: sm_h_total_kid + + Identification of direct effects + + Incorrectly adjusted! + Your model adjusts for a (downstream) collider, `sm_h_total_kid`. To estimate the direct effect, do not adjust for it, to avoid collider-bias. + + Identification of total effects + + Incorrectly adjusted! + Your model adjusts for a (downstream) collider, `sm_h_total_kid`. To estimate the total effect, do not adjust for it, to avoid collider-bias. + + diff --git a/tests/testthat/test-check_dag.R b/tests/testthat/test-check_dag.R index 1ac94d520..7bde379f1 100644 --- a/tests/testthat/test-check_dag.R +++ b/tests/testthat/test-check_dag.R @@ -133,3 +133,31 @@ test_that("check_dag, different adjustements for total and direct", { ) expect_snapshot(print(dag)) }) + +test_that("check_dag, collider bias", { + dag <- check_dag( + SMD_ICD11 ~ agegroup + gender_kid + edgroup3 + residence + pss4_kid_sum_2sd + sm_h_total_kid, + pss4_kid_sum_2sd ~ gender_kid, + sm_h_total_kid ~ gender_kid + agegroup, + adjusted = c( + "agegroup", "gender_kid", "edgroup3", "residence", + "pss4_kid_sum_2sd" + ), + outcome = "SMD_ICD11", + exposure = "agegroup" + ) + expect_snapshot(print(dag)) + + dag <- check_dag( + SMD_ICD11 ~ agegroup + gender_kid + edgroup3 + residence + pss4_kid_sum_2sd + sm_h_total_kid, + pss4_kid_sum_2sd ~ gender_kid, + sm_h_total_kid ~ gender_kid + agegroup, + adjusted = c( + "agegroup", "gender_kid", "edgroup3", "residence", + "pss4_kid_sum_2sd", "sm_h_total_kid" + ), + outcome = "SMD_ICD11", + exposure = "agegroup" + ) + expect_snapshot(print(dag)) +}) From 38d5a038e021fd3cf6dc0498d372d14b84d7d99b Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 13 Sep 2024 15:40:28 +0200 Subject: [PATCH 21/41] add snapshot --- R/check_dag.R | 4 ++-- tests/testthat/_snaps/check_dag.md | 19 +++++++++++++++++++ tests/testthat/test-check_dag.R | 12 ++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/R/check_dag.R b/R/check_dag.R index 5c6ba3462..e0535a0dd 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -406,7 +406,7 @@ print.check_dag <- function(x, ...) { "\nTo estimate the ", i, " effect, do ", insight::color_text("not", "italic"), " adjust for ", - datawizard::text_concatenate(out$current_adjustments, enclose = "`"), + insight::color_text(datawizard::text_concatenate(out$current_adjustments, enclose = "`"), "red"), "." ) } else if (any(sufficient_adjustments)) { @@ -463,7 +463,7 @@ print.check_dag <- function(x, ...) { if (length(missing_vars) > 0) { msg <- paste0( msg, " You possibly also need to adjust for ", - insight::color_text(datawizard::text_concatenate(missing_vars, enclose = "`"), "yellow"), + insight::color_text(datawizard::text_concatenate(missing_vars, enclose = "`"), "green"), " to block biasing paths." ) } diff --git a/tests/testthat/_snaps/check_dag.md b/tests/testthat/_snaps/check_dag.md index 146986c7d..0c3b3233d 100644 --- a/tests/testthat/_snaps/check_dag.md +++ b/tests/testthat/_snaps/check_dag.md @@ -128,6 +128,25 @@ All minimal sufficient adjustments to estimate the direct and total effect were done. +--- + + Code + print(dag) + Output + # Check for correct adjustment sets + - Outcome: exam + - Exposure: podcast + - Adjustment: alertness + + Identification of direct and total effects + + Incorrectly adjusted! + To estimate the direct and total effect, at least adjust for one of the following sets: + - alertness, prepared + - alertness, skills_course. + Currently, the model only adjusts for `alertness`. You possibly also need to adjust for `prepared` and `skills_course` to block biasing paths. + + # check_dag, different adjustements for total and direct Code diff --git a/tests/testthat/test-check_dag.R b/tests/testthat/test-check_dag.R index 7bde379f1..c3d22b32a 100644 --- a/tests/testthat/test-check_dag.R +++ b/tests/testthat/test-check_dag.R @@ -90,6 +90,18 @@ test_that("check_dag, multiple adjustment sets", { coords = ggdag::time_ordered_coords() ) expect_snapshot(print(dag)) + dag <- check_dag( + podcast ~ mood + humor + skills_course, + alertness ~ mood, + mood ~ humor, + prepared ~ skills_course, + exam ~ alertness + prepared, + coords = ggdag::time_ordered_coords(), + adjusted = "alertness", + exposure = "podcast", + outcome = "exam" + ) + expect_snapshot(print(dag)) }) From 7d094b7f52c26ff994362e991f600391d5c9940d Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 13 Sep 2024 18:53:56 +0200 Subject: [PATCH 22/41] minor --- DESCRIPTION | 2 +- R/check_dag.R | 8 ++++---- tests/testthat/_snaps/check_dag.md | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f74f6e7f1..58ce17d59 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.3.1 +Version: 0.12.3.2 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/R/check_dag.R b/R/check_dag.R index e0535a0dd..0b812788f 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -248,7 +248,7 @@ check_dag <- function(..., adjustment_set <- unlist(dagitty::adjustmentSets(dag, effect = x), use.names = FALSE) adjustment_nodes <- unlist(dagitty::adjustedNodes(dag), use.names = FALSE) minimal_adjustments <- as.list(dagitty::adjustmentSets(dag, effect = x)) - collider <- adjustment_nodes[vapply(adjustment_nodes, ggdag::is_collider, logical(1), .dag = dag)] + collider <- adjustment_nodes[vapply(adjustment_nodes, ggdag::is_collider, logical(1), .dag = dag, downstream = FALSE)] if (!length(collider)) { # if we don't have colliders, set to NULL collider <- NULL @@ -356,7 +356,7 @@ print.check_dag <- function(x, ...) { # minimal adjustment sets for direct and total effect identical? # Then print only once if (identical(attributes(x)$check_direct$minimal_adjustments, attributes(x)$check_total$minimal_adjustments)) { - .print_dag_results(attributes(x)$check_direct, x, "direct and total", "all") + .print_dag_results(attributes(x)$check_direct, x, "direct and total", "all", collider) } else { for (i in c("direct", "total")) { if (i == "direct") { @@ -390,10 +390,10 @@ print.check_dag <- function(x, ...) { "`." ) } else if (!is.null(collider)) { - # Scenario 2: adjusted for (downstream) collider + # Scenario 2: adjusted for collider msg <- paste0( insight::color_text("Incorrectly adjusted!", "red"), - "\nYour model adjusts for a (downstream) collider, ", + "\nYour model adjusts for a collider, ", insight::color_text(datawizard::text_concatenate(collider, enclose = "`"), "cyan"), ". To estimate the ", i, " effect, do ", insight::color_text("not", "italic"), diff --git a/tests/testthat/_snaps/check_dag.md b/tests/testthat/_snaps/check_dag.md index 0c3b3233d..c7f577515 100644 --- a/tests/testthat/_snaps/check_dag.md +++ b/tests/testthat/_snaps/check_dag.md @@ -265,11 +265,11 @@ Identification of direct effects Incorrectly adjusted! - Your model adjusts for a (downstream) collider, `sm_h_total_kid`. To estimate the direct effect, do not adjust for it, to avoid collider-bias. + Your model adjusts for a collider, `sm_h_total_kid`. To estimate the direct effect, do not adjust for it, to avoid collider-bias. Identification of total effects Incorrectly adjusted! - Your model adjusts for a (downstream) collider, `sm_h_total_kid`. To estimate the total effect, do not adjust for it, to avoid collider-bias. + Your model adjusts for a collider, `sm_h_total_kid`. To estimate the total effect, do not adjust for it, to avoid collider-bias. From 61da662ca35612e0e39d1c2247032914af0bbf44 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 13 Sep 2024 19:19:06 +0200 Subject: [PATCH 23/41] docs, wording --- R/check_dag.R | 16 ++++++++++++---- man/check_dag.Rd | 7 +++++++ tests/testthat/_snaps/check_dag.md | 6 +++--- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/R/check_dag.R b/R/check_dag.R index 0b812788f..f15c2b7ad 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -68,6 +68,13 @@ #' adjusted for both. If the model is cyclic, the function stops and suggests #' to remove cycles from the model. #' +#' Note that it sometimes could be necessary to try out different combinations +#' of suggested adjustments, because `check_dag()` can not always detect whether +#' _at least_ one of several variables is required, or whether adjustments should +#' be done for _all_ listed variables. It can be useful to copy the dagitty-code +#' (using `as.dag()`, which prints the dagitty-string into the console) into +#' the dagitty-website and play around with different adjustments. +#' #' @section Direct and total effects: #' #' The direct effect of an exposure on an outcome is the effect that is not @@ -393,11 +400,11 @@ print.check_dag <- function(x, ...) { # Scenario 2: adjusted for collider msg <- paste0( insight::color_text("Incorrectly adjusted!", "red"), - "\nYour model adjusts for a collider, ", - insight::color_text(datawizard::text_concatenate(collider, enclose = "`"), "cyan"), - ". To estimate the ", i, " effect, do ", + "\nYour model adjusts for a potential collider. To estimate the ", i, " effect, do ", insight::color_text("not", "italic"), - " adjust for it, to avoid collider-bias." + " adjust for ", + insight::color_text(datawizard::text_concatenate(collider, enclose = "`"), "cyan"), + " to avoid collider-bias. It is recommended to double-check for the collider-bias on the dagitty-website." ) } else if (isTRUE(out$incorrectly_adjusted)) { # Scenario 3: incorrectly adjusted, adjustments where none is allowed @@ -406,6 +413,7 @@ print.check_dag <- function(x, ...) { "\nTo estimate the ", i, " effect, do ", insight::color_text("not", "italic"), " adjust for ", + ifelse(length(out$current_adjustments) > 1, "some or all of ", ""), insight::color_text(datawizard::text_concatenate(out$current_adjustments, enclose = "`"), "red"), "." ) diff --git a/man/check_dag.Rd b/man/check_dag.Rd index c33444f60..f6a8199b5 100644 --- a/man/check_dag.Rd +++ b/man/check_dag.Rd @@ -100,6 +100,13 @@ required variables that should be adjusted for. The function distinguishes between direct and total effects, and checks if the model is correctly adjusted for both. If the model is cyclic, the function stops and suggests to remove cycles from the model. + +Note that it sometimes could be necessary to try out different combinations +of suggested adjustments, because \code{check_dag()} can not always detect whether +\emph{at least} one of several variables is required, or whether adjustments should +be done for \emph{all} listed variables. It can be useful to copy the dagitty-code +(using \code{as.dag()}, which prints the dagitty-string into the console) into +the dagitty-website and play around with different adjustments. } \section{Direct and total effects}{ diff --git a/tests/testthat/_snaps/check_dag.md b/tests/testthat/_snaps/check_dag.md index c7f577515..779ec5a71 100644 --- a/tests/testthat/_snaps/check_dag.md +++ b/tests/testthat/_snaps/check_dag.md @@ -227,7 +227,7 @@ Identification of total effects Incorrectly adjusted! - To estimate the total effect, do not adjust for `x1` and `x2`. + To estimate the total effect, do not adjust for some or all of `x1` and `x2`. # check_dag, collider bias @@ -265,11 +265,11 @@ Identification of direct effects Incorrectly adjusted! - Your model adjusts for a collider, `sm_h_total_kid`. To estimate the direct effect, do not adjust for it, to avoid collider-bias. + Your model adjusts for a potential collider. To estimate the direct effect, do not adjust for `sm_h_total_kid` to avoid collider-bias. It is recommended to double-check for the collider-bias on the dagitty-website. Identification of total effects Incorrectly adjusted! - Your model adjusts for a collider, `sm_h_total_kid`. To estimate the total effect, do not adjust for it, to avoid collider-bias. + Your model adjusts for a potential collider. To estimate the total effect, do not adjust for `sm_h_total_kid` to avoid collider-bias. It is recommended to double-check for the collider-bias on the dagitty-website. From 4da79ddecdc052dc3c018f03dd764d48e59cbbe0 Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 18 Sep 2024 14:19:26 +0200 Subject: [PATCH 24/41] allow formula interface --- R/check_dag.R | 43 +++++++++++++++++++++++++-------- man/check_dag.Rd | 31 ++++++++++++++++-------- tests/testthat/test-check_dag.R | 13 ++++++++++ 3 files changed, 67 insertions(+), 20 deletions(-) diff --git a/R/check_dag.R b/R/check_dag.R index f15c2b7ad..7cdb7f4a5 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -23,16 +23,17 @@ #' First element may also be model object. If a model objects is provided, its #' formula is used as first formula, and all independent variables will be used #' for the `adjusted` argument. See 'Details' and 'Examples'. -#' @param outcome Name of the dependent variable (outcome), as character string. -#' Must be a valid name from the formulas. If not set, the first dependent -#' variable from the formulas is used. -#' @param exposure Name of the exposure variable (as character string), for -#' which the direct and total causal effect on the `outcome` should be checked. -#' Must be a valid name from the formulas. If not set, the first independent -#' variable from the formulas is used. -#' @param adjusted A character vector with names of variables that are adjusted -#' for in the model. If a model object is provided in `...`, any values in -#' `adjusted` will be overwritten by the model's independent variables. +#' @param outcome Name of the dependent variable (outcome), as character string +#' or as formula. Must be a valid name from the formulas provided in `...`. If +#' not set, the first dependent variable from the formulas is used. +#' @param exposure Name of the exposure variable (as character string or +#' formula), for which the direct and total causal effect on the `outcome` +#' should be checked. Must be a valid name from the formulas provided in `...`. +#' If not set, the first independent variable from the formulas is used. +#' @param adjusted A character vector or formula with names of variables that +#' are adjusted for in the model. If a model object is provided in `...`, any +#' values in `adjusted` will be overwritten by the model's independent +#' variables. #' @param latent A character vector with names of latent variables in the model. #' @param effect Character string, indicating which effect to check. Can be #' `"all"` (default), `"total"`, or `"direct"`. @@ -138,6 +139,16 @@ #' ) #' dag #' +#' # using formula interface for arguments "outcome", "exposure" and "adjusted" +#' check_dag( +#' y ~ x + b + c, +#' x ~ b, +#' outcome = ~y, +#' exposure = ~x, +#' adjusted = ~ b + c +#' ) +#' dag +#' #' # use specific layout for the DAG #' dag <- check_dag( #' score ~ exp + b + c, @@ -218,6 +229,18 @@ check_dag <- function(..., )$conditional[1] } + # handle formula interface - if "outcome", "exposure" or "adjusted" are + # provided as formulas, convert to character here + if (inherits(outcome, "formula")) { + outcome <- all.vars(outcome) + } + if (inherits(exposure, "formula")) { + exposure <- all.vars(exposure) + } + if (inherits(adjusted, "formula")) { + adjusted <- all.vars(adjusted) + } + # convert to dag dag_args <- c(formulas, list( exposure = exposure, diff --git a/man/check_dag.Rd b/man/check_dag.Rd index f6a8199b5..a6e2c0ea3 100644 --- a/man/check_dag.Rd +++ b/man/check_dag.Rd @@ -23,18 +23,19 @@ First element may also be model object. If a model objects is provided, its formula is used as first formula, and all independent variables will be used for the \code{adjusted} argument. See 'Details' and 'Examples'.} -\item{outcome}{Name of the dependent variable (outcome), as character string. -Must be a valid name from the formulas. If not set, the first dependent -variable from the formulas is used.} +\item{outcome}{Name of the dependent variable (outcome), as character string +or as formula. Must be a valid name from the formulas provided in \code{...}. If +not set, the first dependent variable from the formulas is used.} -\item{exposure}{Name of the exposure variable (as character string), for -which the direct and total causal effect on the \code{outcome} should be checked. -Must be a valid name from the formulas. If not set, the first independent -variable from the formulas is used.} +\item{exposure}{Name of the exposure variable (as character string or +formula), for which the direct and total causal effect on the \code{outcome} +should be checked. Must be a valid name from the formulas provided in \code{...}. +If not set, the first independent variable from the formulas is used.} -\item{adjusted}{A character vector with names of variables that are adjusted -for in the model. If a model object is provided in \code{...}, any values in -\code{adjusted} will be overwritten by the model's independent variables.} +\item{adjusted}{A character vector or formula with names of variables that +are adjusted for in the model. If a model object is provided in \code{...}, any +values in \code{adjusted} will be overwritten by the model's independent +variables.} \item{latent}{A character vector with names of latent variables in the model.} @@ -164,6 +165,16 @@ dag <- check_dag( ) dag +# using formula interface for arguments "outcome", "exposure" and "adjusted" +check_dag( + y ~ x + b + c, + x ~ b, + outcome = ~y, + exposure = ~x, + adjusted = ~ b + c +) +dag + # use specific layout for the DAG dag <- check_dag( score ~ exp + b + c, diff --git a/tests/testthat/test-check_dag.R b/tests/testthat/test-check_dag.R index c3d22b32a..12153cf18 100644 --- a/tests/testthat/test-check_dag.R +++ b/tests/testthat/test-check_dag.R @@ -146,6 +146,7 @@ test_that("check_dag, different adjustements for total and direct", { expect_snapshot(print(dag)) }) + test_that("check_dag, collider bias", { dag <- check_dag( SMD_ICD11 ~ agegroup + gender_kid + edgroup3 + residence + pss4_kid_sum_2sd + sm_h_total_kid, @@ -173,3 +174,15 @@ test_that("check_dag, collider bias", { ) expect_snapshot(print(dag)) }) + + +test_that("check_dag, formula-interface", { + dag <- check_dag( + y ~ x + b + c, + x ~ b, + outcome = ~y, + exposure = ~x, + adjusted = ~ b + c + ) + expect_identical(attributes(dag)$adjusted, c("b", "c")) +}) From 0799d692afb79e479f3c93d55f35a74ed7ab5683 Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 18 Sep 2024 14:20:41 +0200 Subject: [PATCH 25/41] docs --- R/check_dag.R | 6 +++--- man/check_dag.Rd | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/check_dag.R b/R/check_dag.R index 7cdb7f4a5..2d7bdb42b 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -31,9 +31,9 @@ #' should be checked. Must be a valid name from the formulas provided in `...`. #' If not set, the first independent variable from the formulas is used. #' @param adjusted A character vector or formula with names of variables that -#' are adjusted for in the model. If a model object is provided in `...`, any -#' values in `adjusted` will be overwritten by the model's independent -#' variables. +#' are adjusted for in the model, e.g. `adjusted = c("x1", "x2")` or +#' `adjusted = ~ x1 + x2`. If a model object is provided in `...`, any values in +#' `adjusted` will be overwritten by the model's independent variables. #' @param latent A character vector with names of latent variables in the model. #' @param effect Character string, indicating which effect to check. Can be #' `"all"` (default), `"total"`, or `"direct"`. diff --git a/man/check_dag.Rd b/man/check_dag.Rd index a6e2c0ea3..f7c74ee18 100644 --- a/man/check_dag.Rd +++ b/man/check_dag.Rd @@ -33,9 +33,9 @@ should be checked. Must be a valid name from the formulas provided in \code{...} If not set, the first independent variable from the formulas is used.} \item{adjusted}{A character vector or formula with names of variables that -are adjusted for in the model. If a model object is provided in \code{...}, any -values in \code{adjusted} will be overwritten by the model's independent -variables.} +are adjusted for in the model, e.g. \code{adjusted = c("x1", "x2")} or +\code{adjusted = ~ x1 + x2}. If a model object is provided in \code{...}, any values in +\code{adjusted} will be overwritten by the model's independent variables.} \item{latent}{A character vector with names of latent variables in the model.} From 700311f57fec58c8ef0fdd6888f88f31413e3c64 Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 18 Sep 2024 14:20:53 +0200 Subject: [PATCH 26/41] version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 58ce17d59..842122309 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.3.2 +Version: 0.12.3.3 Authors@R: c(person(given = "Daniel", family = "Lüdecke", From 28c881f77252e8f917c10f2487ae314102c2e88d Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 18 Sep 2024 14:23:49 +0200 Subject: [PATCH 27/41] add test --- R/check_dag.R | 9 ++++++++- man/check_dag.Rd | 9 ++++++++- tests/testthat/test-check_dag.R | 6 ++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/R/check_dag.R b/R/check_dag.R index 2d7bdb42b..7a64ae4d0 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -147,7 +147,14 @@ #' exposure = ~x, #' adjusted = ~ b + c #' ) -#' dag +#' +#' # if not provided, "outcome" is taken from first formula, same for "exposure" +#' # thus, we can simplify the above expression to +#' check_dag( +#' y ~ x + b + c, +#' x ~ b, +#' adjusted = ~ b + c +#' ) #' #' # use specific layout for the DAG #' dag <- check_dag( diff --git a/man/check_dag.Rd b/man/check_dag.Rd index f7c74ee18..fa7e92207 100644 --- a/man/check_dag.Rd +++ b/man/check_dag.Rd @@ -173,7 +173,14 @@ check_dag( exposure = ~x, adjusted = ~ b + c ) -dag + +# if not provided, "outcome" is taken from first formula, same for "exposure" +# thus, we can simplify the above expression to +check_dag( + y ~ x + b + c, + x ~ b, + adjusted = ~ b + c +) # use specific layout for the DAG dag <- check_dag( diff --git a/tests/testthat/test-check_dag.R b/tests/testthat/test-check_dag.R index 12153cf18..0d6efa9d6 100644 --- a/tests/testthat/test-check_dag.R +++ b/tests/testthat/test-check_dag.R @@ -185,4 +185,10 @@ test_that("check_dag, formula-interface", { adjusted = ~ b + c ) expect_identical(attributes(dag)$adjusted, c("b", "c")) + dag2 <- check_dag( + y ~ x + b + c, + x ~ b, + adjusted = ~ b + c + ) + expect_identical(dag, dag2) }) From 077a76e250371c7a910cf395bfb9f48805c01ead Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 24 Sep 2024 10:10:58 +0200 Subject: [PATCH 28/41] Update `DESCRIPTION` to use latest 'easystats' dependencies (#766) Co-authored-by: IndrajeetPatil <11330453+IndrajeetPatil@users.noreply.github.com> --- DESCRIPTION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 842122309..f5cf4d81e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,9 +73,9 @@ BugReports: https://github.com/easystats/performance/issues Depends: R (>= 3.6) Imports: - bayestestR (>= 0.13.2), - insight (>= 0.20.2), - datawizard (>= 0.10.0), + bayestestR (>= 0.14.0), + insight (>= 0.20.3), + datawizard (>= 0.12.2), stats, utils Suggests: From 0a047a7febccec49bd984862ba5436e721221aaf Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 27 Sep 2024 09:48:46 +0200 Subject: [PATCH 29/41] docs --- R/r2_nakagawa.R | 3 ++- man/icc.Rd | 3 ++- man/r2_nakagawa.Rd | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/R/r2_nakagawa.R b/R/r2_nakagawa.R index 1246a444a..aeb4df1c1 100644 --- a/R/r2_nakagawa.R +++ b/R/r2_nakagawa.R @@ -30,7 +30,7 @@ #' - Bernoulli (logistic) regression #' - Binomial regression (with other than binary outcomes) #' - Poisson and Quasi-Poisson regression -#' - Negative binomial regression (including nbinom1 and nbinom2 families) +#' - Negative binomial regression (including nbinom1, nbinom2 and nbinom12 families) #' - Gaussian regression (linear models) #' - Gamma regression #' - Tweedie regression @@ -44,6 +44,7 @@ #' - Compound Poisson regression #' - Generalized Poisson regression #' - Log-normal regression +#' - Skew-normal regression #' #' Extracting variance components for models with zero-inflation part is not #' straightforward, because it is not definitely clear how the distribution-specific diff --git a/man/icc.Rd b/man/icc.Rd index 071e1b617..8b38ece25 100644 --- a/man/icc.Rd +++ b/man/icc.Rd @@ -212,7 +212,7 @@ should be accurate and reliable for following mixed models or model families: \item Bernoulli (logistic) regression \item Binomial regression (with other than binary outcomes) \item Poisson and Quasi-Poisson regression -\item Negative binomial regression (including nbinom1 and nbinom2 families) +\item Negative binomial regression (including nbinom1, nbinom2 and nbinom12 families) \item Gaussian regression (linear models) \item Gamma regression \item Tweedie regression @@ -227,6 +227,7 @@ Following model families are not yet validated, but should work: \item Compound Poisson regression \item Generalized Poisson regression \item Log-normal regression +\item Skew-normal regression } Extracting variance components for models with zero-inflation part is not diff --git a/man/r2_nakagawa.Rd b/man/r2_nakagawa.Rd index 169af5709..75f2de3d0 100644 --- a/man/r2_nakagawa.Rd +++ b/man/r2_nakagawa.Rd @@ -111,7 +111,7 @@ should be accurate and reliable for following mixed models or model families: \item Bernoulli (logistic) regression \item Binomial regression (with other than binary outcomes) \item Poisson and Quasi-Poisson regression -\item Negative binomial regression (including nbinom1 and nbinom2 families) +\item Negative binomial regression (including nbinom1, nbinom2 and nbinom12 families) \item Gaussian regression (linear models) \item Gamma regression \item Tweedie regression @@ -126,6 +126,7 @@ Following model families are not yet validated, but should work: \item Compound Poisson regression \item Generalized Poisson regression \item Log-normal regression +\item Skew-normal regression } Extracting variance components for models with zero-inflation part is not From f1d79148a6a6178afbac8e5fa3e4e77ee0bf7905 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 30 Sep 2024 13:23:57 +0200 Subject: [PATCH 30/41] Fix failing glmmTMB tests (#769) --- DESCRIPTION | 5 +++-- NEWS.md | 4 ++++ R/check_model_diagnostics.R | 2 +- tests/testthat/test-check_collinearity.R | 2 -- tests/testthat/test-check_convergence.R | 1 - tests/testthat/test-check_model.R | 1 - tests/testthat/test-check_normality.R | 1 - tests/testthat/test-check_overdispersion.R | 4 ---- tests/testthat/test-check_predictions.R | 1 - tests/testthat/test-check_singularity.R | 2 +- tests/testthat/test-check_zeroinflation.R | 4 ---- tests/testthat/test-r2.R | 19 ++++++++++--------- 12 files changed, 19 insertions(+), 27 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f5cf4d81e..5d629a4b9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.3.3 +Version: 0.12.3.4 Authors@R: c(person(given = "Daniel", family = "Lüdecke", @@ -103,7 +103,7 @@ Suggests: ftExtra, gamm4, ggdag, - glmmTMB, + glmmTMB (>= 1.1.10), graphics, Hmisc, httr2, @@ -160,3 +160,4 @@ Config/Needs/website: r-lib/pkgdown, easystats/easystatstemplate Config/rcmdcheck/ignore-inconsequential-notes: true +Remotes: easystats/insight, easystats/bayestestR, easystats/see, easystats/parameters diff --git a/NEWS.md b/NEWS.md index 81a8c5ad2..4f0984b48 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,10 @@ * Minor revisions to the printed output of `check_dag()`. +## Bug fixes + +* Fixed failing tests that broke due to changes in latest *glmmTMB* update. + # performance 0.12.3 ## New functions diff --git a/R/check_model_diagnostics.R b/R/check_model_diagnostics.R index 431a2bc0f..8897c685e 100644 --- a/R/check_model_diagnostics.R +++ b/R/check_model_diagnostics.R @@ -460,7 +460,7 @@ if (!is.na(match(faminfo$family, c("binomial", "poisson", "truncated_poisson")))) { return(1) } - betad <- model$fit$par["betad"] + betad <- model$fit$par["betadisp"] switch(faminfo$family, gaussian = exp(0.5 * betad), Gamma = exp(-0.5 * betad), diff --git a/tests/testthat/test-check_collinearity.R b/tests/testthat/test-check_collinearity.R index ba456ae6f..d62aec75d 100644 --- a/tests/testthat/test-check_collinearity.R +++ b/tests/testthat/test-check_collinearity.R @@ -24,7 +24,6 @@ test_that("check_collinearity, correct order in print", { test_that("check_collinearity", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not(getRversion() >= "4.0.0") @@ -51,7 +50,6 @@ test_that("check_collinearity", { test_that("check_collinearity", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not(getRversion() >= "4.0.0") diff --git a/tests/testthat/test-check_convergence.R b/tests/testthat/test-check_convergence.R index 0bfadc8a6..8897b785d 100644 --- a/tests/testthat/test-check_convergence.R +++ b/tests/testthat/test-check_convergence.R @@ -29,7 +29,6 @@ test_that("check_convergence", { test_that("check_convergence, glmmTMB", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") data(iris) model <- suppressWarnings(glmmTMB::glmmTMB( diff --git a/tests/testthat/test-check_model.R b/tests/testthat/test-check_model.R index 097f089a5..2b1141d26 100644 --- a/tests/testthat/test-check_model.R +++ b/tests/testthat/test-check_model.R @@ -52,7 +52,6 @@ test_that("`check_model()` works for quantreg", { }) test_that("`check_model()` warnings for tweedie", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not_installed("lme4") data(sleepstudy, package = "lme4") diff --git a/tests/testthat/test-check_normality.R b/tests/testthat/test-check_normality.R index 44b461aea..f166c6f03 100644 --- a/tests/testthat/test-check_normality.R +++ b/tests/testthat/test-check_normality.R @@ -35,7 +35,6 @@ test_that("check_normality | afex", { }) test_that("check_normality | glmmTMB", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not(getRversion() >= "4.0.0") diff --git a/tests/testthat/test-check_overdispersion.R b/tests/testthat/test-check_overdispersion.R index 06cc95dd0..2930322bb 100644 --- a/tests/testthat/test-check_overdispersion.R +++ b/tests/testthat/test-check_overdispersion.R @@ -1,5 +1,4 @@ test_that("check_overdispersion, glmmTMB-poisson", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not(getRversion() >= "4.0.0") data(Salamanders, package = "glmmTMB") @@ -50,7 +49,6 @@ test_that("check_overdispersion, glmmTMB-poisson", { test_that("check_overdispersion, glmmTMB-poisson mixed", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not(getRversion() >= "4.0.0") data(Salamanders, package = "glmmTMB") @@ -78,7 +76,6 @@ test_that("check_overdispersion, glmmTMB-poisson mixed", { test_that("check_overdispersion, zero-inflated and negbin", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not_installed("DHARMa") skip_if_not(getRversion() >= "4.0.0") @@ -183,7 +180,6 @@ test_that("check_overdispersion, MASS::negbin", { test_that("check_overdispersion, genpois", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not_installed("DHARMa") skip_if_not(getRversion() >= "4.0.0") diff --git a/tests/testthat/test-check_predictions.R b/tests/testthat/test-check_predictions.R index 04bc1d2cd..142a8b429 100644 --- a/tests/testthat/test-check_predictions.R +++ b/tests/testthat/test-check_predictions.R @@ -35,7 +35,6 @@ test_that("check_predictions", { test_that("check_predictions, glmmTMB", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") data(mtcars) model <- glmmTMB::glmmTMB(vs ~ disp, data = mtcars, family = binomial()) diff --git a/tests/testthat/test-check_singularity.R b/tests/testthat/test-check_singularity.R index dc0d56964..8a07b85e0 100644 --- a/tests/testthat/test-check_singularity.R +++ b/tests/testthat/test-check_singularity.R @@ -29,7 +29,7 @@ test_that("check_singularity", { newparam = list( beta = 0, theta = rep(0, 21), - betad = 0 + betadisp = 0 ) )[[1]] expect_warning(expect_warning({ diff --git a/tests/testthat/test-check_zeroinflation.R b/tests/testthat/test-check_zeroinflation.R index a7ff7cbbd..38a5c7726 100644 --- a/tests/testthat/test-check_zeroinflation.R +++ b/tests/testthat/test-check_zeroinflation.R @@ -1,5 +1,4 @@ test_that("check_zeroinflation", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") set.seed(123) data(Salamanders, package = "glmmTMB") @@ -22,7 +21,6 @@ test_that("check_zeroinflation", { test_that("check_zeroinflation, glmmTMB with and without zero-inflation component", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not_installed("DHARMa") set.seed(123) @@ -109,7 +107,6 @@ test_that("check_zeroinflation, glmer.nb", { test_that("check_zeroinflation, glmmTMB nbinom", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not_installed("DHARMa") skip_on_cran() @@ -166,7 +163,6 @@ test_that("check_zeroinflation, MASS::negbin", { test_that("check_zeroinflation, genpois", { - skip_if(getRversion() > "4.3.3") skip_if_not_installed("glmmTMB") skip_if_not_installed("DHARMa") skip_if_not(getRversion() >= "4.0.0") diff --git a/tests/testthat/test-r2.R b/tests/testthat/test-r2.R index f521b3aab..d7ade6b29 100644 --- a/tests/testthat/test-r2.R +++ b/tests/testthat/test-r2.R @@ -46,8 +46,7 @@ skip_if_not_installed("withr") withr::with_environment( new.env(), test_that("r2 glmmTMB, no ranef", { - skip_if(getRversion() > "4.3.3") - skip_if_not_installed("glmmTMB") + skip_if_not_installed("glmmTMB", minimum_version = "1.1.10") data(Owls, package = "glmmTMB") # linear --------------------------------------------------------------- m <- glmmTMB::glmmTMB(NegPerChick ~ BroodSize + ArrivalTime, data = Owls) @@ -89,14 +88,16 @@ withr::with_environment( data = bioChemists ) out <- r2(m) - expect_equal(out[[1]], 0.1797549, tolerance = 1e-3, ignore_attr = TRUE) + expect_equal(out[[1]], 0.14943, tolerance = 1e-3, ignore_attr = TRUE) + ## FIXME: since glmmTMB 1.1.10(?) Pearson residuals differ and results + ## are no longer identical, see https://github.com/glmmTMB/glmmTMB/issues/1101 # validate against pscl::zeroinfl - m2 <- pscl::zeroinfl( - art ~ fem + mar + kid5 + ment | kid5 + phd, - data = bioChemists - ) - out2 <- r2(m2) - expect_equal(out[[1]], out2[[1]], tolerance = 1e-3, ignore_attr = TRUE) + # m2 <- pscl::zeroinfl( + # art ~ fem + mar + kid5 + ment | kid5 + phd, + # data = bioChemists + # ) + # out2 <- r2(m2) + # expect_equal(out[[1]], out2[[1]], tolerance = 1e-3, ignore_attr = TRUE) # Gamma -------------------------------------------------------------- clotting <<- data.frame( u = c(5, 10, 15, 20, 30, 40, 60, 80, 100), From e24bdd83b3e69f41c80b2596dcb69fbd39f2fd7f Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 30 Sep 2024 14:05:23 +0200 Subject: [PATCH 31/41] desc --- DESCRIPTION | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5d629a4b9..0e042c1db 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -48,9 +48,9 @@ Authors@R: role = "rev"), person(given = "gjo11", role = "rev"), - person("Etienne", - "Bacher", , - "etienne.bacher@protonmail.com", + person(given = "Etienne", + family = "Bacher", , + email = "etienne.bacher@protonmail.com", role = "ctb", comment = c(ORCID = "0000-0002-9271-5075")), person(given = "Joseph", @@ -160,4 +160,3 @@ Config/Needs/website: r-lib/pkgdown, easystats/easystatstemplate Config/rcmdcheck/ignore-inconsequential-notes: true -Remotes: easystats/insight, easystats/bayestestR, easystats/see, easystats/parameters From 868144c09801877c52f58236fd09437b791d55a5 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 1 Oct 2024 06:57:28 +0200 Subject: [PATCH 32/41] Update `DESCRIPTION` to use latest 'easystats' dependencies (#770) Co-authored-by: IndrajeetPatil <11330453+IndrajeetPatil@users.noreply.github.com> --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0e042c1db..a6e2a3069 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -74,8 +74,8 @@ Depends: R (>= 3.6) Imports: bayestestR (>= 0.14.0), - insight (>= 0.20.3), - datawizard (>= 0.12.2), + insight (>= 0.20.4), + datawizard (>= 0.12.3), stats, utils Suggests: From 22971d617995a2f39831224e89529595e87acc0c Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 1 Oct 2024 22:06:52 +0200 Subject: [PATCH 33/41] new arg --- R/performance_aicc.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/performance_aicc.R b/R/performance_aicc.R index 3c433161f..52cf3679c 100644 --- a/R/performance_aicc.R +++ b/R/performance_aicc.R @@ -265,7 +265,7 @@ performance_aicc.rma <- function(x, ...) { .adjust_ic_jacobian <- function(model, ic) { response_transform <- insight::find_transformation(model) if (!is.null(ic) && !is.null(response_transform) && !identical(response_transform, "identity")) { - adjustment <- .safe(.ll_analytic_adjustment(model, insight::get_weights(model, na_rm = TRUE))) + adjustment <- .safe(.ll_analytic_adjustment(model, insight::get_weights(model, remove_na = TRUE))) if (!is.null(adjustment)) { ic <- ic - 2 * adjustment } From 35cd5d5006c23e98a578b501bcf5f9eabc7cbe8e Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 1 Oct 2024 22:46:40 +0200 Subject: [PATCH 34/41] lintr --- R/check_dag.R | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/R/check_dag.R b/R/check_dag.R index 7a64ae4d0..272cc5814 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -285,15 +285,13 @@ check_dag <- function(..., adjustment_set <- unlist(dagitty::adjustmentSets(dag, effect = x), use.names = FALSE) adjustment_nodes <- unlist(dagitty::adjustedNodes(dag), use.names = FALSE) minimal_adjustments <- as.list(dagitty::adjustmentSets(dag, effect = x)) - collider <- adjustment_nodes[vapply(adjustment_nodes, ggdag::is_collider, logical(1), .dag = dag, downstream = FALSE)] - if (!length(collider)) { + collider <- adjustment_nodes[vapply(adjustment_nodes, ggdag::is_collider, logical(1), .dag = dag, downstream = FALSE)] # nolint + if (length(collider)) { + # if we *have* colliders, remove them from minimal adjustments + minimal_adjustments <- lapply(minimal_adjustments, setdiff, y = collider) + } else { # if we don't have colliders, set to NULL collider <- NULL - } else { - # if we *have* colliders, remove them from minimal adjustments - minimal_adjustments <- lapply(minimal_adjustments, function(ma) { - setdiff(ma, collider) - }) } list( # no adjustment needed when @@ -303,7 +301,7 @@ check_dag <- function(..., # incorrect adjustment when # - required is NULL and current adjustment not NULL # - OR we have a collider in current adjustments - incorrectly_adjusted = (is.null(adjustment_set) && !is.null(adjustment_nodes)) || (!is.null(collider) && collider %in% adjustment_nodes), + incorrectly_adjusted = (is.null(adjustment_set) && !is.null(adjustment_nodes)) || (!is.null(collider) && collider %in% adjustment_nodes), # nolint current_adjustments = adjustment_nodes, minimal_adjustments = minimal_adjustments, collider = collider From 8d5742ed418f579e50370f7e35dcf70997a7dbb6 Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 2 Oct 2024 01:05:41 +0200 Subject: [PATCH 35/41] remove twitter handle --- DESCRIPTION | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a6e2a3069..9c2f7f969 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,22 +7,22 @@ Authors@R: family = "Lüdecke", role = c("aut", "cre"), email = "d.luedecke@uke.de", - comment = c(ORCID = "0000-0002-8895-3206", Twitter = "@strengejacke")), + comment = c(ORCID = "0000-0002-8895-3206")), person(given = "Dominique", family = "Makowski", role = c("aut", "ctb"), email = "dom.makowski@gmail.com", - comment = c(ORCID = "0000-0001-5375-9967", Twitter = "@Dom_Makowski")), + comment = c(ORCID = "0000-0001-5375-9967")), person(given = "Mattan S.", family = "Ben-Shachar", role = c("aut", "ctb"), email = "matanshm@post.bgu.ac.il", - comment = c(ORCID = "0000-0002-4287-4801", Twitter = "@mattansb")), + comment = c(ORCID = "0000-0002-4287-4801")), person(given = "Indrajeet", family = "Patil", role = c("aut", "ctb"), email = "patilindrajeet.science@gmail.com", - comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")), + comment = c(ORCID = "0000-0003-1995-6531")), person(given = "Philip", family = "Waggoner", role = c("aut", "ctb"), @@ -32,12 +32,12 @@ Authors@R: family = "Wiernik", role = c("aut", "ctb"), email = "brenton@wiernik.org", - comment = c(ORCID = "0000-0001-9560-6336", Twitter = "@bmwiernik")), + comment = c(ORCID = "0000-0001-9560-6336")), person(given = "Rémi", family = "Thériault", role = c("aut", "ctb"), email = "remi.theriault@mail.mcgill.ca", - comment = c(ORCID = "0000-0003-4315-6788", Twitter = "@rempsyc")), + comment = c(ORCID = "0000-0003-4315-6788")), person(given = "Vincent", family = "Arel-Bundock", email = "vincent.arel-bundock@umontreal.ca", From c5a22290808f2200569e980592dc05d84cf4b9f9 Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 2 Oct 2024 01:07:52 +0200 Subject: [PATCH 36/41] Update test based on glmmTMB fix (#771) --- DESCRIPTION | 3 +- R/check_dag.R | 68 +++++++++++++++++-- R/check_heterogeneity_bias.R | 2 +- man/check_dag.Rd | 33 +++++++-- man/check_heterogeneity_bias.Rd | 2 - .../testthat/test-check_heterogeneity_bias.R | 2 +- tests/testthat/test-icc.R | 2 +- tests/testthat/test-r2.R | 17 +++-- 8 files changed, 106 insertions(+), 23 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a6e2a3069..8680fd5b9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -142,7 +142,7 @@ Suggests: rstanarm, rstantools, sandwich, - see (>= 0.8.2), + see (>= 0.9.0), survey, survival, testthat (>= 3.2.1), @@ -160,3 +160,4 @@ Config/Needs/website: r-lib/pkgdown, easystats/easystatstemplate Config/rcmdcheck/ignore-inconsequential-notes: true +Remotes: glmmTMB/glmmTMB/glmmTMB#1102 diff --git a/R/check_dag.R b/R/check_dag.R index 272cc5814..c8b53533f 100644 --- a/R/check_dag.R +++ b/R/check_dag.R @@ -37,9 +37,17 @@ #' @param latent A character vector with names of latent variables in the model. #' @param effect Character string, indicating which effect to check. Can be #' `"all"` (default), `"total"`, or `"direct"`. -#' @param coords A list with two elements, `x` and `y`, which both are named -#' vectors of numerics. The names correspond to the variable names in the DAG, -#' and the values for `x` and `y` indicate the x/y coordinates in the plot. +#' @param coords Coordinates of the variables when plotting the DAG. The +#' coordinates can be provided in three different ways: +#' +#' - a list with two elements, `x` and `y`, which both are named vectors of +#' numerics. The names correspond to the variable names in the DAG, and the +#' values for `x` and `y` indicate the x/y coordinates in the plot. +#' - a list with elements that correspond to the variables in the DAG. Each +#' element is a numeric vector of length two with x- and y-coordinate. +#' - a data frame with three columns: `x`, `y` and `name` (which contains the +#' variable names). +#' #' See 'Examples'. #' @param x An object of class `check_dag`, as returned by `check_dag()`. #' @@ -111,7 +119,7 @@ #' Interpreting Confounder and Modifier Coefficients. American Journal of #' Epidemiology, 177(4), 292–298. \doi{10.1093/aje/kws412} #' -#' @examplesIf require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE) && packageVersion("see") > "0.8.5" +#' @examplesIf require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE) #' # no adjustment needed #' check_dag( #' y ~ x + b, @@ -171,6 +179,22 @@ #' ) #' plot(dag) #' +#' # alternative way of providing the coordinates +#' dag <- check_dag( +#' score ~ exp + b + c, +#' exp ~ b, +#' outcome = "score", +#' exposure = "exp", +#' coords = list( +#' # x/y coordinates for each node +#' score = c(5, 3), +#' exp = c(4, 3), +#' b = c(3, 2), +#' c = c(3, 4) +#' ) +#' ) +#' plot(dag) +#' #' # Objects returned by `check_dag()` can be used with "ggdag" or "dagitty" #' ggdag::ggdag_status(dag) #' @@ -248,6 +272,9 @@ check_dag <- function(..., adjusted <- all.vars(adjusted) } + # process coords-argument + coords <- .process_coords(coords) + # convert to dag dag_args <- c(formulas, list( exposure = exposure, @@ -338,6 +365,39 @@ check_dag <- function(..., } +.process_coords <- function(coords) { + # check if the coords are not provided as list with x/y elements, but instead + # as list x/y coordinates for each element. This means, "coords" is provided as + # + # coords <- list( + # score = c(5, 3), + # exp = c(4, 3), + # b = c(3, 2), + # c = c(3, 4) + # ) + # + # but we want + # + # coords = list( + # x = c(score = 5, exp = 4, b = 3, c = 3), + # y = c(score = 3, exp = 3, b = 2, c = 4) + # ) + # + # we have to check that it's not a data frame and that it is a list - + # values like `ggdag::time_ordered_coords()` returns a function, not a list + if (!is.null(coords) && !is.data.frame(coords) && is.list(coords) && (length(coords) != 2 || !identical(names(coords), c("x", "y")))) { # nolint + # transform list into data frame, split x and y coordinates into columns + coords <- datawizard::rownames_as_column( + stats::setNames(as.data.frame(do.call(rbind, coords)), c("x", "y")), + "name" + ) + # reorder + coords <- coords[c("x", "y", "name")] + } + coords +} + + # methods -------------------------------------------------------------------- #' @rdname check_dag diff --git a/R/check_heterogeneity_bias.R b/R/check_heterogeneity_bias.R index 3c9b502ce..755f47dfa 100644 --- a/R/check_heterogeneity_bias.R +++ b/R/check_heterogeneity_bias.R @@ -39,7 +39,7 @@ #' Modeling of Time-Series Cross-Sectional and Panel Data. Political Science #' Research and Methods, 3(1), 133–153. #' -#' @examplesIf insight::check_if_installed("datawizard", minimum_version = "0.12.0", quietly = TRUE) +#' @examples #' data(iris) #' iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID #' check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID") diff --git a/man/check_dag.Rd b/man/check_dag.Rd index fa7e92207..e366ff07a 100644 --- a/man/check_dag.Rd +++ b/man/check_dag.Rd @@ -42,9 +42,18 @@ are adjusted for in the model, e.g. \code{adjusted = c("x1", "x2")} or \item{effect}{Character string, indicating which effect to check. Can be \code{"all"} (default), \code{"total"}, or \code{"direct"}.} -\item{coords}{A list with two elements, \code{x} and \code{y}, which both are named -vectors of numerics. The names correspond to the variable names in the DAG, -and the values for \code{x} and \code{y} indicate the x/y coordinates in the plot. +\item{coords}{Coordinates of the variables when plotting the DAG. The +coordinates can be provided in three different ways: +\itemize{ +\item a list with two elements, \code{x} and \code{y}, which both are named vectors of +numerics. The names correspond to the variable names in the DAG, and the +values for \code{x} and \code{y} indicate the x/y coordinates in the plot. +\item a list with elements that correspond to the variables in the DAG. Each +element is a numeric vector of length two with x- and y-coordinate. +\item a data frame with three columns: \code{x}, \code{y} and \code{name} (which contains the +variable names). +} + See 'Examples'.} \item{x}{An object of class \code{check_dag}, as returned by \code{check_dag()}.} @@ -137,7 +146,7 @@ adjustments or over-adjustment. } \examples{ -\dontshow{if (require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE) && packageVersion("see") > "0.8.5") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("ggdag", quietly = TRUE) && require("dagitty", quietly = TRUE) && require("see", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # no adjustment needed check_dag( y ~ x + b, @@ -197,6 +206,22 @@ dag <- check_dag( ) plot(dag) +# alternative way of providing the coordinates +dag <- check_dag( + score ~ exp + b + c, + exp ~ b, + outcome = "score", + exposure = "exp", + coords = list( + # x/y coordinates for each node + score = c(5, 3), + exp = c(4, 3), + b = c(3, 2), + c = c(3, 4) + ) +) +plot(dag) + # Objects returned by `check_dag()` can be used with "ggdag" or "dagitty" ggdag::ggdag_status(dag) diff --git a/man/check_heterogeneity_bias.Rd b/man/check_heterogeneity_bias.Rd index 228d26510..40b2b66ca 100644 --- a/man/check_heterogeneity_bias.Rd +++ b/man/check_heterogeneity_bias.Rd @@ -50,11 +50,9 @@ cause a heterogeneity bias, i.e. if variables have a within- and/or between-effect (\emph{Bell and Jones, 2015}). } \examples{ -\dontshow{if (insight::check_if_installed("datawizard", minimum_version = "0.12.0", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(iris) iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID") -\dontshow{\}) # examplesIf} } \references{ \itemize{ diff --git a/tests/testthat/test-check_heterogeneity_bias.R b/tests/testthat/test-check_heterogeneity_bias.R index e01ecf3ff..9701a7c03 100644 --- a/tests/testthat/test-check_heterogeneity_bias.R +++ b/tests/testthat/test-check_heterogeneity_bias.R @@ -1,5 +1,5 @@ test_that("check_heterogeneity_bias", { - skip_if_not_installed("datawizard", minimum_version = "0.12.0") + skip_if_not_installed("datawizard") data(iris) set.seed(123) iris$ID <- sample.int(4, nrow(iris), replace = TRUE) # fake-ID diff --git a/tests/testthat/test-icc.R b/tests/testthat/test-icc.R index 82da6a3eb..73f8b6e3f 100644 --- a/tests/testthat/test-icc.R +++ b/tests/testthat/test-icc.R @@ -121,7 +121,7 @@ test_that("icc", { test_that("icc, glmmTMB 1.1.9+", { - skip_if_not_installed("glmmTMB", minimum_version = "1.1.9") + skip_if_not_installed("glmmTMB") set.seed(101) dd <- data.frame( z = rnorm(1000), diff --git a/tests/testthat/test-r2.R b/tests/testthat/test-r2.R index d7ade6b29..6b4ac0b05 100644 --- a/tests/testthat/test-r2.R +++ b/tests/testthat/test-r2.R @@ -79,6 +79,7 @@ withr::with_environment( out2 <- r2(m2) expect_equal(out[[1]], out2[[1]], tolerance = 1e-3, ignore_attr = TRUE) # zero-inflated -------------------------------------------------------------- + skip_if_not(packageVersion("glmmTMB") > "1.1.10") skip_if_not_installed("pscl") data(bioChemists, package = "pscl") m <- glmmTMB::glmmTMB( @@ -88,16 +89,14 @@ withr::with_environment( data = bioChemists ) out <- r2(m) - expect_equal(out[[1]], 0.14943, tolerance = 1e-3, ignore_attr = TRUE) - ## FIXME: since glmmTMB 1.1.10(?) Pearson residuals differ and results - ## are no longer identical, see https://github.com/glmmTMB/glmmTMB/issues/1101 + expect_equal(out[[1]], 0.1797549, tolerance = 1e-3, ignore_attr = TRUE) # validate against pscl::zeroinfl - # m2 <- pscl::zeroinfl( - # art ~ fem + mar + kid5 + ment | kid5 + phd, - # data = bioChemists - # ) - # out2 <- r2(m2) - # expect_equal(out[[1]], out2[[1]], tolerance = 1e-3, ignore_attr = TRUE) + m2 <- pscl::zeroinfl( + art ~ fem + mar + kid5 + ment | kid5 + phd, + data = bioChemists + ) + out2 <- r2(m2) + expect_equal(out[[1]], out2[[1]], tolerance = 1e-3, ignore_attr = TRUE) # Gamma -------------------------------------------------------------- clotting <<- data.frame( u = c(5, 10, 15, 20, 30, 40, 60, 80, 100), From 6b1020eb802072348253dcbcab38c27a9c2ea495 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 18 Oct 2024 20:39:52 +0200 Subject: [PATCH 37/41] Prepare CRAN submission (#772) --- DESCRIPTION | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 883790eab..826abc7b3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.3.4 +Version: 0.12.4 Authors@R: c(person(given = "Daniel", family = "Lüdecke", @@ -73,9 +73,9 @@ BugReports: https://github.com/easystats/performance/issues Depends: R (>= 3.6) Imports: - bayestestR (>= 0.14.0), - insight (>= 0.20.4), - datawizard (>= 0.12.3), + bayestestR (>= 0.15.0), + insight (>= 0.20.5), + datawizard (>= 0.13.0), stats, utils Suggests: @@ -160,4 +160,3 @@ Config/Needs/website: r-lib/pkgdown, easystats/easystatstemplate Config/rcmdcheck/ignore-inconsequential-notes: true -Remotes: glmmTMB/glmmTMB/glmmTMB#1102 From 8b52f6cabf7ed3c855bc27fc77f09d984a4c79a7 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 21 Oct 2024 10:24:28 +0200 Subject: [PATCH 38/41] update test, due to changes in DHARMa::createData() --- tests/testthat/test-check_residuals.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-check_residuals.R b/tests/testthat/test-check_residuals.R index 9be029c0c..dbaebe447 100644 --- a/tests/testthat/test-check_residuals.R +++ b/tests/testthat/test-check_residuals.R @@ -19,13 +19,13 @@ test_that("check_residuals and simulate_residuals", { # check raw residuals expect_equal( head(residuals(res)), - c(0.55349, 0.44012, 0.39826, 0.9825, 0.90753, 0.05809), + c(0.01815, 0.7062, 0.56076, 0.19282, 0.31423, 0.10115), tolerance = 1e-4, ignore_attr = TRUE ) expect_equal( head(residuals(res, quantile_function = stats::qnorm)), - c(0.13448, -0.15068, -0.25785, 2.10826, 1.3257, -1.57097), + c(-2.09365, 0.54232, 0.1529, -0.86756, -0.4839, -1.27504), tolerance = 1e-4, ignore_attr = TRUE ) @@ -57,10 +57,10 @@ test_that("check_residuals and simulate_residuals", { # check_residuals out <- check_residuals(res) - expect_equal(out, 0.01884602, ignore_attr = TRUE, tolerance = 1e-4) + expect_equal(out, 0.02574286, ignore_attr = TRUE, tolerance = 1e-4) expect_identical( capture.output(print(out)), - "Warning: Non-uniformity of simulated residuals detected (p = 0.019)." + "Warning: Non-uniformity of simulated residuals detected (p = 0.026)." ) expect_error(simulate_residuals(m, iterations = 1), "`iterations` must be") From 90d000834997fa1a593afc8db5d0628c23c2b113 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 22 Oct 2024 10:41:12 +0200 Subject: [PATCH 39/41] docs --- R/performance_aicc.R | 10 ++++++++++ man/performance-package.Rd | 12 ++++++------ man/performance_aicc.Rd | 10 ++++++++++ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/R/performance_aicc.R b/R/performance_aicc.R index 52cf3679c..bc1f3e0ab 100644 --- a/R/performance_aicc.R +++ b/R/performance_aicc.R @@ -28,6 +28,9 @@ #' scale. To get back to the original scale, the likelihood of the model is #' multiplied by the Jacobian/derivative of the transformation. #' +#' In case it is not possible to return the corrected AIC value, a waring +#' is given that the corrected log-likelihood value could not be computed. +#' #' @references #' - Akaike, H. (1973) Information theory as an extension of the maximum #' likelihood principle. In: Second International Symposium on Information @@ -52,6 +55,13 @@ #' # performance_aic() correctly detects transformed response and #' # returns corrected AIC #' performance_aic(model) +#' +#' \dontrun{ +#' # there are a few exceptions where the corrected log-likelihood values +#' # cannot be returned. The following exampe gives a warning. +#' model <- lm(1 / mpg ~ factor(cyl), mtcars) +#' performance_aic(model) +#' } #' @export performance_aicc <- function(x, ...) { UseMethod("performance_aicc") diff --git a/man/performance-package.Rd b/man/performance-package.Rd index f7a05c369..4644e6443 100644 --- a/man/performance-package.Rd +++ b/man/performance-package.Rd @@ -34,16 +34,16 @@ Useful links: } \author{ -\strong{Maintainer}: Daniel Lüdecke \email{d.luedecke@uke.de} (\href{https://orcid.org/0000-0002-8895-3206}{ORCID}) (@strengejacke) +\strong{Maintainer}: Daniel Lüdecke \email{d.luedecke@uke.de} (\href{https://orcid.org/0000-0002-8895-3206}{ORCID}) Authors: \itemize{ - \item Dominique Makowski \email{dom.makowski@gmail.com} (\href{https://orcid.org/0000-0001-5375-9967}{ORCID}) (@Dom_Makowski) [contributor] - \item Mattan S. Ben-Shachar \email{matanshm@post.bgu.ac.il} (\href{https://orcid.org/0000-0002-4287-4801}{ORCID}) (@mattansb) [contributor] - \item Indrajeet Patil \email{patilindrajeet.science@gmail.com} (\href{https://orcid.org/0000-0003-1995-6531}{ORCID}) (@patilindrajeets) [contributor] + \item Dominique Makowski \email{dom.makowski@gmail.com} (\href{https://orcid.org/0000-0001-5375-9967}{ORCID}) [contributor] + \item Mattan S. Ben-Shachar \email{matanshm@post.bgu.ac.il} (\href{https://orcid.org/0000-0002-4287-4801}{ORCID}) [contributor] + \item Indrajeet Patil \email{patilindrajeet.science@gmail.com} (\href{https://orcid.org/0000-0003-1995-6531}{ORCID}) [contributor] \item Philip Waggoner \email{philip.waggoner@gmail.com} (\href{https://orcid.org/0000-0002-7825-7573}{ORCID}) [contributor] - \item Brenton M. Wiernik \email{brenton@wiernik.org} (\href{https://orcid.org/0000-0001-9560-6336}{ORCID}) (@bmwiernik) [contributor] - \item Rémi Thériault \email{remi.theriault@mail.mcgill.ca} (\href{https://orcid.org/0000-0003-4315-6788}{ORCID}) (@rempsyc) [contributor] + \item Brenton M. Wiernik \email{brenton@wiernik.org} (\href{https://orcid.org/0000-0001-9560-6336}{ORCID}) [contributor] + \item Rémi Thériault \email{remi.theriault@mail.mcgill.ca} (\href{https://orcid.org/0000-0003-4315-6788}{ORCID}) [contributor] } Other contributors: diff --git a/man/performance_aicc.Rd b/man/performance_aicc.Rd index 0bf2120b7..529ee53aa 100644 --- a/man/performance_aicc.Rd +++ b/man/performance_aicc.Rd @@ -46,6 +46,9 @@ incorporates a correction for small sample sizes. unlike \code{stats::AIC()}, returns the "corrected" AIC value on the original scale. To get back to the original scale, the likelihood of the model is multiplied by the Jacobian/derivative of the transformation. + +In case it is not possible to return the corrected AIC value, a waring +is given that the corrected log-likelihood value could not be computed. } \examples{ m <- lm(mpg ~ wt + cyl + gear + disp, data = mtcars) @@ -63,6 +66,13 @@ AIC(model) # performance_aic() correctly detects transformed response and # returns corrected AIC performance_aic(model) + +\dontrun{ +# there are a few exceptions where the corrected log-likelihood values +# cannot be returned. The following exampe gives a warning. +model <- lm(1 / mpg ~ factor(cyl), mtcars) +performance_aic(model) +} } \references{ \itemize{ From c8c81200c91a7339fcc65314081895de12078a71 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 22 Oct 2024 11:05:53 +0200 Subject: [PATCH 40/41] DRY --- DESCRIPTION | 3 +- R/performance_aicc.R | 65 +++----------------------------------------- 2 files changed, 6 insertions(+), 62 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 826abc7b3..2b7fe815a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.4 +Version: 0.12.4.1 Authors@R: c(person(given = "Daniel", family = "Lüdecke", @@ -160,3 +160,4 @@ Config/Needs/website: r-lib/pkgdown, easystats/easystatstemplate Config/rcmdcheck/ignore-inconsequential-notes: true +Remotes: easystats/insight diff --git a/R/performance_aicc.R b/R/performance_aicc.R index bc1f3e0ab..eb10bc40f 100644 --- a/R/performance_aicc.R +++ b/R/performance_aicc.R @@ -266,8 +266,6 @@ performance_aicc.rma <- function(x, ...) { } - - # jacobian / derivate for log models and other transformations ---------------- @@ -275,68 +273,13 @@ performance_aicc.rma <- function(x, ...) { .adjust_ic_jacobian <- function(model, ic) { response_transform <- insight::find_transformation(model) if (!is.null(ic) && !is.null(response_transform) && !identical(response_transform, "identity")) { - adjustment <- .safe(.ll_analytic_adjustment(model, insight::get_weights(model, remove_na = TRUE))) + adjustment <- .safe(insight::get_loglikelihood_adjustment( + model, + insight::get_weights(model, remove_na = TRUE) + )) if (!is.null(adjustment)) { ic <- ic - 2 * adjustment } } ic } - - -# copied from `insight` -.ll_analytic_adjustment <- function(x, model_weights = NULL) { - tryCatch( - { - trans <- insight::find_transformation(x) - switch(trans, - identity = .weighted_sum(log(insight::get_response(x)), w = model_weights), - log = .weighted_sum(log(1 / insight::get_response(x)), w = model_weights), - log1p = .weighted_sum(log(1 / (insight::get_response(x) + 1)), w = model_weights), - log2 = .weighted_sum(log(1 / (insight::get_response(x) * log(2))), w = model_weights), - log10 = .weighted_sum(log(1 / (insight::get_response(x) * log(10))), w = model_weights), - exp = .weighted_sum(insight::get_response(x), w = model_weights), - expm1 = .weighted_sum((insight::get_response(x) - 1), w = model_weights), - sqrt = .weighted_sum(log(0.5 / sqrt(insight::get_response(x))), w = model_weights), - .ll_jacobian_adjustment(x, model_weights) - ) - }, - error = function(e) { - NULL - } - ) -} - - -# this function calculates the adjustment for the log-likelihood of a model -# with transformed response -.ll_jacobian_adjustment <- function(model, weights = NULL) { - tryCatch( - { - trans <- insight::get_transformation(model)$transformation - .weighted_sum(log( - diag(attr(with( - insight::get_data(model, verbose = FALSE), - stats::numericDeriv( - expr = quote(trans( - get(insight::find_response(model)) - )), - theta = insight::find_response(model) - ) - ), "gradient")) - ), weights) - }, - error = function(e) { - NULL - } - ) -} - - -.weighted_sum <- function(x, w = NULL, ...) { - if (is.null(w)) { - mean(x) * length(x) - } else { - stats::weighted.mean(x, w) * length(x) - } -} From 0aa349233fd2ba62399f5caf16cbe89511b999ac Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 22 Oct 2024 12:59:00 +0200 Subject: [PATCH 41/41] Remove deprecated functions and arguments (#774) --- DESCRIPTION | 6 +++--- NAMESPACE | 2 -- NEWS.md | 10 ++++++++++ R/check_convergence.R | 5 +++-- R/check_heterogeneity_bias.R | 9 +-------- R/check_predictions.R | 20 -------------------- R/performance_aicc.R | 2 +- R/simulate_residuals.R | 7 ++++--- man/check_heterogeneity_bias.Rd | 10 +--------- man/check_predictions.Rd | 6 ------ man/performance_aicc.Rd | 2 +- tests/testthat/test-check_zeroinflation.R | 22 +++++++++++----------- 12 files changed, 35 insertions(+), 66 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2b7fe815a..abac13053 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.4.1 +Version: 0.12.4.2 Authors@R: c(person(given = "Daniel", family = "Lüdecke", @@ -95,7 +95,7 @@ Suggests: cplm, dagitty, dbscan, - DHARMa, + DHARMa (>= 0.4.7), estimatr, fixest, flextable, @@ -129,7 +129,7 @@ Suggests: nonnest2, ordinal, parallel, - parameters (>= 0.21.6), + parameters (>= 0.22.0), patchwork, pscl, psych, diff --git a/NAMESPACE b/NAMESPACE index 08f6bcc15..d35d82547 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -566,7 +566,6 @@ export(check_multimodal) export(check_normality) export(check_outliers) export(check_overdispersion) -export(check_posterior_predictions) export(check_predictions) export(check_residuals) export(check_singularity) @@ -602,7 +601,6 @@ export(performance_rmse) export(performance_roc) export(performance_rse) export(performance_score) -export(posterior_predictive_check) export(print_html) export(print_md) export(r2) diff --git a/NEWS.md b/NEWS.md index 4f0984b48..9eb5ded26 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,13 @@ +# performance 0.12.5 + +## Breaking changes + +* Deprecated arguments and alias-function-names have been removed. + +## Changes + +* Increased accuracy for `check_convergence()` for *glmmTMB* models. + # performance 0.12.4 ## Changes diff --git a/R/check_convergence.R b/R/check_convergence.R index 6f7628a12..f60a7e298 100644 --- a/R/check_convergence.R +++ b/R/check_convergence.R @@ -98,9 +98,10 @@ check_convergence.merMod <- function(x, tolerance = 0.001, ...) { #' @export -check_convergence.glmmTMB <- function(x, ...) { +check_convergence.glmmTMB <- function(x, tolerance = 0.001, ...) { # https://github.com/glmmTMB/glmmTMB/issues/275 - isTRUE(x$sdr$pdHess) + # https://stackoverflow.com/q/79110546/2094622 + isTRUE(all.equal(x$fit$convergence, 0, tolerance = tolerance)) && isTRUE(x$sdr$pdHess) } diff --git a/R/check_heterogeneity_bias.R b/R/check_heterogeneity_bias.R index 755f47dfa..2414873cc 100644 --- a/R/check_heterogeneity_bias.R +++ b/R/check_heterogeneity_bias.R @@ -27,7 +27,6 @@ #' @param nested Logical, if `TRUE`, the data is treated as nested. If `FALSE`, #' the data is treated as cross-classified. Only applies if `by` contains more #' than one variable. -#' @param group Deprecated. Use `by` instead. #' #' @seealso #' For further details, read the vignette @@ -44,15 +43,9 @@ #' iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID #' check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID") #' @export -check_heterogeneity_bias <- function(x, select = NULL, by = NULL, nested = FALSE, group = NULL) { +check_heterogeneity_bias <- function(x, select = NULL, by = NULL, nested = FALSE) { insight::check_if_installed("datawizard", minimum_version = "0.12.0") - ## TODO: deprecate later - if (!is.null(group)) { - insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint - by <- group - } - if (insight::is_model(x)) { by <- insight::find_random(x, split_nested = TRUE, flatten = TRUE) if (is.null(by)) { diff --git a/R/check_predictions.R b/R/check_predictions.R index fbc3b5eaf..c903f4309 100644 --- a/R/check_predictions.R +++ b/R/check_predictions.R @@ -423,26 +423,6 @@ pp_check.glmmTMB <- #' S3method(bayesplot::pp_check, BFBayesFactor) - -# aliases -------------------------- - -#' @rdname check_predictions -#' @export -posterior_predictive_check <- function(object, ...) { - .Deprecated("check_predictions()") - check_predictions(object, ...) -} - -#' @rdname check_predictions -#' @export -check_posterior_predictions <- function(object, ...) { - .Deprecated("check_predictions()") - check_predictions(object, ...) -} - - - - # methods ----------------------- diff --git a/R/performance_aicc.R b/R/performance_aicc.R index eb10bc40f..74041ac1b 100644 --- a/R/performance_aicc.R +++ b/R/performance_aicc.R @@ -28,7 +28,7 @@ #' scale. To get back to the original scale, the likelihood of the model is #' multiplied by the Jacobian/derivative of the transformation. #' -#' In case it is not possible to return the corrected AIC value, a waring +#' In case it is not possible to return the corrected AIC value, a warning #' is given that the corrected log-likelihood value could not be computed. #' #' @references diff --git a/R/simulate_residuals.R b/R/simulate_residuals.R index 7312c196f..1f58218c0 100644 --- a/R/simulate_residuals.R +++ b/R/simulate_residuals.R @@ -65,9 +65,10 @@ #' @export simulate_residuals <- function(x, iterations = 250, ...) { insight::check_if_installed("DHARMa") - # TODO (low priority): Note that DHARMa::simulateResiduals(x, ...) does its own checks for whether - # or not the model passed to it is supported, do we want to use this or do our - # own checks so we can supply our own error message? + # TODO (low priority): Note that DHARMa::simulateResiduals(x, ...) does its + # own checks for whether or not the model passed to it is supported, do we + # want to use this or do our own checks so we can supply our own error + # message? if (iterations < 2) { insight::format_error("`iterations` must be at least 2.") } diff --git a/man/check_heterogeneity_bias.Rd b/man/check_heterogeneity_bias.Rd index 40b2b66ca..db18ae7ef 100644 --- a/man/check_heterogeneity_bias.Rd +++ b/man/check_heterogeneity_bias.Rd @@ -4,13 +4,7 @@ \alias{check_heterogeneity_bias} \title{Check model predictor for heterogeneity bias} \usage{ -check_heterogeneity_bias( - x, - select = NULL, - by = NULL, - nested = FALSE, - group = NULL -) +check_heterogeneity_bias(x, select = NULL, by = NULL, nested = FALSE) } \arguments{ \item{x}{A data frame or a mixed model object.} @@ -41,8 +35,6 @@ See also section \emph{De-meaning for cross-classified designs} and \item{nested}{Logical, if \code{TRUE}, the data is treated as nested. If \code{FALSE}, the data is treated as cross-classified. Only applies if \code{by} contains more than one variable.} - -\item{group}{Deprecated. Use \code{by} instead.} } \description{ \code{check_heterogeneity_bias()} checks if model predictors or variables may diff --git a/man/check_predictions.Rd b/man/check_predictions.Rd index 148df6994..6c13fc0d5 100644 --- a/man/check_predictions.Rd +++ b/man/check_predictions.Rd @@ -3,8 +3,6 @@ \name{check_predictions} \alias{check_predictions} \alias{check_predictions.default} -\alias{posterior_predictive_check} -\alias{check_posterior_predictions} \title{Posterior predictive checks} \usage{ check_predictions(object, ...) @@ -19,10 +17,6 @@ check_predictions(object, ...) verbose = TRUE, ... ) - -posterior_predictive_check(object, ...) - -check_posterior_predictions(object, ...) } \arguments{ \item{object}{A statistical model.} diff --git a/man/performance_aicc.Rd b/man/performance_aicc.Rd index 529ee53aa..b1d9a1255 100644 --- a/man/performance_aicc.Rd +++ b/man/performance_aicc.Rd @@ -47,7 +47,7 @@ unlike \code{stats::AIC()}, returns the "corrected" AIC value on the original scale. To get back to the original scale, the likelihood of the model is multiplied by the Jacobian/derivative of the transformation. -In case it is not possible to return the corrected AIC value, a waring +In case it is not possible to return the corrected AIC value, a warning is given that the corrected log-likelihood value could not be computed. } \examples{ diff --git a/tests/testthat/test-check_zeroinflation.R b/tests/testthat/test-check_zeroinflation.R index 38a5c7726..53f8af9f5 100644 --- a/tests/testthat/test-check_zeroinflation.R +++ b/tests/testthat/test-check_zeroinflation.R @@ -111,22 +111,22 @@ test_that("check_zeroinflation, glmmTMB nbinom", { skip_if_not_installed("DHARMa") skip_on_cran() + data(Salamanders, package = "glmmTMB") + m <- glmmTMB::glmmTMB( + count ~ spp + mined + (1 | site), + family = glmmTMB::nbinom1(), + data = Salamanders + ) set.seed(1234) - dat <- DHARMa::createData(sampleSize = 1000) - fit <- suppressWarnings(glmmTMB::glmmTMB( - observedResponse ~ Environment1 + (1 | group), - data = dat, - family = glmmTMB::nbinom1() - )) expect_equal( - check_zeroinflation(fit), + check_zeroinflation(m), structure( list( - predicted.zeros = 462, - observed.zeros = 482L, - ratio = 0.95850622406639, + predicted.zeros = 389, + observed.zeros = 387L, + ratio = 1.00635658914729, tolerance = 0.1, - p.value = 0.776 + p.value = 0.944 ), class = "check_zi" ),