From ce079b72052da52b217cd303fe428288705578c4 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 24 Nov 2024 19:25:11 +0100 Subject: [PATCH] Check for influential observations of GLM w/o numeric variables (#779) --- DESCRIPTION | 2 +- NEWS.md | 5 +++ R/check_outliers.R | 50 +++++++++++----------------- tests/testthat/test-check_outliers.R | 10 ++++++ 4 files changed, 36 insertions(+), 31 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b1ce379bf..b6f58b233 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.4.7 +Version: 0.12.4.8 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index 1bddce2ce..a181937d8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,6 +13,11 @@ * Increased accuracy for `check_convergence()` for *glmmTMB* models. +## Bug fixes + +* `check_outliers()` did not warn that no numeric variables were found when only + the response variable was numeric, but all relevant predictors were not. + # performance 0.12.4 ## Changes diff --git a/R/check_outliers.R b/R/check_outliers.R index 7729f9931..ffb2d633f 100644 --- a/R/check_outliers.R +++ b/R/check_outliers.R @@ -378,42 +378,23 @@ check_outliers.default <- function(x, # Check args if (all(method == "all")) { method <- c( - "zscore_robust", - "iqr", - "ci", - "cook", - "pareto", - "mahalanobis", - "mahalanobis_robust", - "mcd", - "ics", - "optics", - "lof" + "zscore_robust", "iqr", "ci", "cook", "pareto", "mahalanobis", + "mahalanobis_robust", "mcd", "ics", "optics", "lof" ) } method <- match.arg( method, c( - "zscore", - "zscore_robust", - "iqr", - "ci", - "hdi", - "eti", - "bci", - "cook", - "pareto", - "mahalanobis", - "mahalanobis_robust", - "mcd", - "ics", - "optics", - "lof" + "zscore", "zscore_robust", "iqr", "ci", "hdi", "eti", "bci", "cook", + "pareto", "mahalanobis", "mahalanobis_robust", "mcd", "ics", "optics", "lof" ), several.ok = TRUE ) + # Get model information + m_info <- insight::model_info(x) + # Get data my_data <- insight::get_data(x, verbose = FALSE) @@ -427,8 +408,17 @@ check_outliers.default <- function(x, ) } - # Remove non-numerics - my_data <- datawizard::data_select(my_data, select = is.numeric, verbose = FALSE) + # Remove non-numerics, but in case of binomial, only check predictors + if (m_info$is_binomial) { + model_predictors <- unique(insight::find_predictors(x, flatten = TRUE)) + } else { + model_predictors <- colnames(my_data) + } + my_data <- datawizard::data_select( + my_data[model_predictors], + select = is.numeric, + verbose = FALSE + ) # check if any data left if (is.null(my_data) || ncol(my_data) == 0) { @@ -468,7 +458,7 @@ check_outliers.default <- function(x, } # Cook - if ("cook" %in% method && !insight::model_info(x)$is_bayesian && !inherits(x, "bife")) { + if ("cook" %in% method && !m_info$is_bayesian && !inherits(x, "bife")) { data_cook <- .check_outliers_cook( x, threshold = thresholds$cook @@ -508,7 +498,7 @@ check_outliers.default <- function(x, } # Pareto - if ("pareto" %in% method && insight::model_info(x)$is_bayesian) { + if ("pareto" %in% method && m_info$is_bayesian) { data_pareto <- .check_outliers_pareto( x, threshold = thresholds$pareto diff --git a/tests/testthat/test-check_outliers.R b/tests/testthat/test-check_outliers.R index 6aa64516f..f037950c8 100644 --- a/tests/testthat/test-check_outliers.R +++ b/tests/testthat/test-check_outliers.R @@ -361,6 +361,16 @@ test_that("check_outliers with invald data", { }) +test_that("check_outliers on numeric data only", { + data(mtcars) + # all predictors categorical + mtcars$wt <- as.factor(mtcars$wt) + mtcars$mpg <- as.factor(mtcars$mpg) + model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial") + expect_error(check_outliers(model), regex = "No numeric") +}) + + test_that("check_outliers with DHARMa", { skip_if_not_installed("DHARMa") mt1 <- mtcars[, c(1, 3, 4)]