From b9b940d35de4e5ec9cff6ace0e57a4a03501a003 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Tue, 23 Jan 2024 11:45:11 -0800 Subject: [PATCH] adding proportion CIs --- DESCRIPTION | 8 +- NAMESPACE | 2 + R/ard_proportion_ci.R | 79 ++++++++++++ R/cardx-package.R | 1 + R/standalone-checks.R | 155 ++++++++++++++++++++++++ man/ard_proportion_ci.Rd | 51 ++++++++ man/check_binary.Rd | 22 ++++ man/check_class.Rd | 48 ++++++++ man/check_class_data_frame.Rd | 43 +++++++ man/check_length.Rd | 43 +++++++ man/check_not_missing.Rd | 34 ++++++ man/check_range.Rd | 37 ++++++ tests/testthat/test-ard_proportion_ci.R | 59 +++++++++ 13 files changed, 580 insertions(+), 2 deletions(-) create mode 100644 R/ard_proportion_ci.R create mode 100644 R/standalone-checks.R create mode 100644 man/ard_proportion_ci.Rd create mode 100644 man/check_binary.Rd create mode 100644 man/check_class.Rd create mode 100644 man/check_class_data_frame.Rd create mode 100644 man/check_length.Rd create mode 100644 man/check_not_missing.Rd create mode 100644 man/check_range.Rd create mode 100644 tests/testthat/test-ard_proportion_ci.R diff --git a/DESCRIPTION b/DESCRIPTION index f60692998..3831905c2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,9 +12,13 @@ BugReports: https://github.com/insightsengineering/cardx/issues Depends: R (>= 4.0) Imports: - cards (>= 0.0.0.9012) + cli (>= 3.6.1), + cards (>= 0.0.0.9012), + dplyr (>= 1.1.2), + rlang (>= 1.1.1) Suggests: - testthat (>= 3.2.0) + testthat (>= 3.2.0), + withr Remotes: insightsengineering/cards Config/Needs/website: insightsengineering/nesttemplate diff --git a/NAMESPACE b/NAMESPACE index 6ae926839..2f30e7952 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,4 @@ # Generated by roxygen2: do not edit by hand +export(ard_proportion_ci) +import(rlang) diff --git a/R/ard_proportion_ci.R b/R/ard_proportion_ci.R new file mode 100644 index 000000000..7de51ae5e --- /dev/null +++ b/R/ard_proportion_ci.R @@ -0,0 +1,79 @@ +#' Proportion ARD Statistics +#' +#' `r lifecycle::badge('experimental')`\cr +#' Calculate confidence intervals for proportions. +#' +#' @inheritParams cards::ard_categorical +#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr +#' columns to include in summaries. Columns must be class `` +#' or `` values coded as `c(0, 1)`. +#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr +#' columns to stratify calculations by +#' @param conf.level (`numeric`)\cr +#' a scalar in `(0, 1)` indicating the confidence level. +#' Default is `0.95` +#' @param method (`string`)\cr +#' string indicating the type of confidence interval to calculate. +#' Must be one of `r formals(ard_proportion_ci)[["method"]] |> eval() |> shQuote()`. +#' See `?proportion_ci` for details. +#' @param strata,weights,max.iterations arguments passed to `proportion_ci_strat_wilson()`, +#' when `method = 'strat_wilson'` +#' +#' @return an ARD data frame +#' @export +#' +#' @examples +#' ard_proportion_ci(mtcars, variables = c(vs, am), method = "wilson") +ard_proportion_ci <- function(data, variables, by = dplyr::group_vars(data), + conf.level = 0.95, + strata, + weights = NULL, + max.iterations = 10, + method = c("waldcc", "wald", "clopper-pearson", + "wilson", "wilsoncc", + "strat_wilson", "strat_wilsoncc", + "agresti-coull", "jeffreys")) { + # process inputs ------------------------------------------------------------- + cards::process_selectors(data, variables = {{ variables }}, by = {{ by }}) + method <- arg_match(method) + if (method %in% c("strat_wilson", "strat_wilsoncc")) { + cards::process_selectors(data, strata = strata) + check_scalar(strata) + } + + # calculate confidence intervals --------------------------------------------- + cards::ard_complex( + data = data, + variables = {{ variables }}, + by = {{ by }}, + statistics = + ~list( + prop_ci = + switch( + method, + "waldcc" = \(x, ...) proportion_ci_wald(x, conf.level = conf.level, correct = TRUE), + "wald" = \(x, ...) proportion_ci_wald(x, conf.level = conf.level, correct = FALSE), + "wilsoncc" = \(x, ...) proportion_ci_wilson(x, conf.level = conf.level, correct = TRUE), + "wilson" = \(x, ...) proportion_ci_wilson(x, conf.level = conf.level, correct = FALSE), + "clopper-pearson" = \(x, ...) proportion_ci_clopper_pearson(x, conf.level = conf.level), + "agresti-coull" = \(x, ...) proportion_ci_agresti_coull(x, conf.level = conf.level), + "jeffreys" = \(x, ...) proportion_ci_jeffreys(x, conf.level = conf.level), + "strat_wilsoncc" = \(x, data, ...) { + proportion_ci_strat_wilson(x, strata = data[[strata]], weights = weights, + max.iterations = max.iterations, + conf.level = conf.level, correct = TRUE) + }, + "strat_wilson" = \(x, data, ...) { + proportion_ci_strat_wilson(x, strata = data[[strata]], weights = weights, + max.iterations = max.iterations, + conf.level = conf.level, correct = FALSE) + } + ) + ) + ) |> + dplyr::mutate( + context = "proportion_ci" + ) +} + + diff --git a/R/cardx-package.R b/R/cardx-package.R index a65cf6430..637bc249e 100644 --- a/R/cardx-package.R +++ b/R/cardx-package.R @@ -1,4 +1,5 @@ #' @keywords internal +#' @import rlang "_PACKAGE" ## usethis namespace: start diff --git a/R/standalone-checks.R b/R/standalone-checks.R new file mode 100644 index 000000000..374cc654b --- /dev/null +++ b/R/standalone-checks.R @@ -0,0 +1,155 @@ +# THIS SCRIPT MUST OPERATE AS A STANDALONE SCRIPT +# DO NOT USE IMPORTED FUNCTIONS AND ONLY USE rlang AND cli NAMESPACING FOR CHECKS +# nocov start + + +#' Check Class +#' +#' @param class (`character`)\cr +#' character vector or string indicating accepted classes. +#' Passed to `inherits(what=class)` +#' @param x `(object)`\cr +#' object to check +#' @param allow_null (`logical(1)`)\cr +#' Logical indicating whether a NULL value will pass the test. +#' Default is `FALSE` +#' @param arg_name (`string`)\cr +#' string indicating the label/symbol of the object being checked. +#' Default is `rlang::caller_arg(x)` +#' @inheritParams cli::cli_abort +#' @keywords internal +check_class <- function(x, class, allow_null = FALSE, + arg_name = rlang::caller_arg(x), call = parent.frame()) { + # include NULL class as acceptable if allow_null is TRUE + if (isTRUE(allow_null) && is.null(x)) return(invisible()) + + if (!inherits(x, class)) { + cli::cli_abort("The {.arg {arg_name}} argument must be class {.cls {class}}.", call = call) + } + invisible() +} + +#' Check Class Data Frame +#' +#' @inheritParams check_class +#' @keywords internal +check_class_data_frame <- function(x, allow_null = FALSE, + arg_name = rlang::caller_arg(x), call = parent.frame()) { + check_class(x = x, class = "data.frame", allow_null = allow_null, + arg_name = arg_name, call = call) +} + +#' Check Argument not Missing +#' +#' @inheritParams check_class +#' @keywords internal +check_not_missing <- function(x, arg_name = caller_arg(x), call = parent.frame()) { + if (missing(x)) { + cli::cli_abort("The {.arg {arg_name}} argument cannot be missing.", call = call) + } + invisible() +} + +#' Check Length +#' +#' @param msg (`string`)\cr +#' string passed to `cli::cli_abort(message=)` +#' @param length (`integer(1)`)\cr +#' integer specifying the required length +#' @inheritParams check_class +#' @keywords internal +#' @name check_length +NULL + +#' @rdname check_length +check_length <- function(x, length, arg_name = caller_arg(x), call = parent.frame()) { + if (length(x) != length) { + cli::cli_abort("The {.arg {arg_name}} argument must be length {.val {length}}.", call = call) + } + invisible() +} + +#' @rdname check_length +check_scalar <- function(x, arg_name = caller_arg(x), call = parent.frame()) { + check_length(x = x, length = 1L, arg_name = arg_name, call = call) +} + +#' Check Range +#' +#' @param x numeric scalar to check +#' @param range numeric vector of length two +#' @param include_bounds logical of length two indicating whether to allow +#' the lower and upper bounds +#' @param scalar logical indicating whether `x` must be a scalar +#' @param msg string passed to `cli::cli_abort(message=)` +#' +#' @return invisible +#' @keywords internal +check_range <- function(x, + range, + include_bounds = c(FALSE, FALSE), + arg_name = caller_arg(x), + scalar = FALSE, + msg = paste( + "The {.arg {arg_name}} argument must be in the interval", + "{.code {ifelse(include_bounds[1], '[', '(')}{range[1]},", + "{range[2]}{ifelse(include_bounds[2], ']', ')')}}."), + call = parent.frame()) { + if (isTRUE(scalar)) { + check_scalar(x, arg_name = arg_name) + } + + print_error <- FALSE + # check input is numeric + if (!is.numeric(x)) { + print_error <- TRUE + } + + # check the lower bound of range + if (isFALSE(print_error) && isTRUE(include_bounds[1]) && any(x < range[1])) { + print_error <- TRUE + } + if (isFALSE(print_error) && isFALSE(include_bounds[1]) && any(x <= range[1])) { + print_error <- TRUE + } + + # check upper bound of range + if (isFALSE(print_error) && isTRUE(include_bounds[2]) && any(x > range[2])) { + print_error <- TRUE + } + if (isFALSE(print_error) && isFALSE(include_bounds[2]) && any(x >= range[2])) { + print_error <- TRUE + } + + # print error + if (print_error) { + cli::cli_abort(msg, call = call) + } + + invisible() +} + + +#' Check Binary +#' +#' Checks if a column in a data frame is binary, +#' that is, if the column is class `` or +#' `` and coded as `c(0, 1)` +#' +#' @param x a vector +#' @param call call environment +#' +#' @return invisible +#' @keywords internal +check_binary <- function(x, arg_name = caller_arg(x), call = parent.frame()) { + if (!is.logical(x) && !(is_integerish(x) && is_empty(setdiff(x, c(0, 1, NA))))) { + paste("Expecting column {.arg {arg_name}} to be either {.cls logical}", + "or {.cls {c('numeric', 'integer')}} coded as {.val {c(0, 1)}}.") |> + cli::cli_abort(call = call) + } + + invisible() +} + +# nocov end + diff --git a/man/ard_proportion_ci.Rd b/man/ard_proportion_ci.Rd new file mode 100644 index 000000000..bc95c6224 --- /dev/null +++ b/man/ard_proportion_ci.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_proportion_ci.R +\name{ard_proportion_ci} +\alias{ard_proportion_ci} +\title{Proportion ARD Statistics} +\usage{ +ard_proportion_ci( + data, + variables, + by = dplyr::group_vars(data), + conf.level = 0.95, + strata, + weights = NULL, + max.iterations = 10, + method = c("waldcc", "wald", "clopper-pearson", "wilson", "wilsoncc", "strat_wilson", + "strat_wilsoncc", "agresti-coull", "jeffreys") +) +} +\arguments{ +\item{data}{(\code{data.frame})\cr +a data frame} + +\item{variables}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +columns to include in summaries. Columns must be class \verb{} +or \verb{} values coded as \code{c(0, 1)}.} + +\item{by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +columns to stratify calculations by} + +\item{conf.level}{(\code{numeric})\cr +a scalar in \verb{(0, 1)} indicating the confidence level. +Default is \code{0.95}} + +\item{strata, weights, max.iterations}{arguments passed to \code{proportion_ci_strat_wilson()}, +when \code{method = 'strat_wilson'}} + +\item{method}{(\code{string})\cr +string indicating the type of confidence interval to calculate. +Must be one of 'waldcc', 'wald', 'clopper-pearson', 'wilson', 'wilsoncc', 'strat_wilson', 'strat_wilsoncc', 'agresti-coull', 'jeffreys'. +See \code{?proportion_ci} for details.} +} +\value{ +an ARD data frame +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr +Calculate confidence intervals for proportions. +} +\examples{ +ard_proportion_ci(mtcars, variables = c(vs, am), method = "wilson") +} diff --git a/man/check_binary.Rd b/man/check_binary.Rd new file mode 100644 index 000000000..c5e6600b8 --- /dev/null +++ b/man/check_binary.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/standalone-checks.R +\name{check_binary} +\alias{check_binary} +\title{Check Binary} +\usage{ +check_binary(x, arg_name = caller_arg(x), call = parent.frame()) +} +\arguments{ +\item{x}{a vector} + +\item{call}{call environment} +} +\value{ +invisible +} +\description{ +Checks if a column in a data frame is binary, +that is, if the column is class \verb{} or +\verb{} and coded as \code{c(0, 1)} +} +\keyword{internal} diff --git a/man/check_class.Rd b/man/check_class.Rd new file mode 100644 index 000000000..295ca6294 --- /dev/null +++ b/man/check_class.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/standalone-checks.R +\name{check_class} +\alias{check_class} +\title{Check Class} +\usage{ +check_class( + x, + class, + allow_null = FALSE, + arg_name = rlang::caller_arg(x), + call = parent.frame() +) +} +\arguments{ +\item{x}{\code{(object)}\cr +object to check} + +\item{class}{(\code{character})\cr +character vector or string indicating accepted classes. +Passed to \code{inherits(what=class)}} + +\item{allow_null}{(\code{logical(1)})\cr +Logical indicating whether a NULL value will pass the test. +Default is \code{FALSE}} + +\item{arg_name}{(\code{string})\cr +string indicating the label/symbol of the object being checked. +Default is \code{rlang::caller_arg(x)}} + +\item{call}{The execution environment of a currently running +function, e.g. \code{call = caller_env()}. The corresponding function +call is retrieved and mentioned in error messages as the source +of the error. + +You only need to supply \code{call} when throwing a condition from a +helper function which wouldn't be relevant to mention in the +message. + +Can also be \code{NULL} or a \link[rlang:topic-defuse]{defused function call} to +respectively not display any call or hard-code a code to display. + +For more information about error calls, see \ifelse{html}{\link[rlang:topic-error-call]{Including function calls in error messages}}{\link[rlang:topic-error-call]{Including function calls in error messages}}.} +} +\description{ +Check Class +} +\keyword{internal} diff --git a/man/check_class_data_frame.Rd b/man/check_class_data_frame.Rd new file mode 100644 index 000000000..01b7128b3 --- /dev/null +++ b/man/check_class_data_frame.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/standalone-checks.R +\name{check_class_data_frame} +\alias{check_class_data_frame} +\title{Check Class Data Frame} +\usage{ +check_class_data_frame( + x, + allow_null = FALSE, + arg_name = rlang::caller_arg(x), + call = parent.frame() +) +} +\arguments{ +\item{x}{\code{(object)}\cr +object to check} + +\item{allow_null}{(\code{logical(1)})\cr +Logical indicating whether a NULL value will pass the test. +Default is \code{FALSE}} + +\item{arg_name}{(\code{string})\cr +string indicating the label/symbol of the object being checked. +Default is \code{rlang::caller_arg(x)}} + +\item{call}{The execution environment of a currently running +function, e.g. \code{call = caller_env()}. The corresponding function +call is retrieved and mentioned in error messages as the source +of the error. + +You only need to supply \code{call} when throwing a condition from a +helper function which wouldn't be relevant to mention in the +message. + +Can also be \code{NULL} or a \link[rlang:topic-defuse]{defused function call} to +respectively not display any call or hard-code a code to display. + +For more information about error calls, see \ifelse{html}{\link[rlang:topic-error-call]{Including function calls in error messages}}{\link[rlang:topic-error-call]{Including function calls in error messages}}.} +} +\description{ +Check Class Data Frame +} +\keyword{internal} diff --git a/man/check_length.Rd b/man/check_length.Rd new file mode 100644 index 000000000..61343ad1c --- /dev/null +++ b/man/check_length.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/standalone-checks.R +\name{check_length} +\alias{check_length} +\alias{check_scalar} +\title{Check Length} +\usage{ +check_length(x, length, arg_name = caller_arg(x), call = parent.frame()) + +check_scalar(x, arg_name = caller_arg(x), call = parent.frame()) +} +\arguments{ +\item{x}{\code{(object)}\cr +object to check} + +\item{length}{(\code{integer(1)})\cr +integer specifying the required length} + +\item{arg_name}{(\code{string})\cr +string indicating the label/symbol of the object being checked. +Default is \code{rlang::caller_arg(x)}} + +\item{call}{The execution environment of a currently running +function, e.g. \code{call = caller_env()}. The corresponding function +call is retrieved and mentioned in error messages as the source +of the error. + +You only need to supply \code{call} when throwing a condition from a +helper function which wouldn't be relevant to mention in the +message. + +Can also be \code{NULL} or a \link[rlang:topic-defuse]{defused function call} to +respectively not display any call or hard-code a code to display. + +For more information about error calls, see \ifelse{html}{\link[rlang:topic-error-call]{Including function calls in error messages}}{\link[rlang:topic-error-call]{Including function calls in error messages}}.} + +\item{msg}{(\code{string})\cr +string passed to \code{cli::cli_abort(message=)}} +} +\description{ +Check Length +} +\keyword{internal} diff --git a/man/check_not_missing.Rd b/man/check_not_missing.Rd new file mode 100644 index 000000000..0ad724ffb --- /dev/null +++ b/man/check_not_missing.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/standalone-checks.R +\name{check_not_missing} +\alias{check_not_missing} +\title{Check Argument not Missing} +\usage{ +check_not_missing(x, arg_name = caller_arg(x), call = parent.frame()) +} +\arguments{ +\item{x}{\code{(object)}\cr +object to check} + +\item{arg_name}{(\code{string})\cr +string indicating the label/symbol of the object being checked. +Default is \code{rlang::caller_arg(x)}} + +\item{call}{The execution environment of a currently running +function, e.g. \code{call = caller_env()}. The corresponding function +call is retrieved and mentioned in error messages as the source +of the error. + +You only need to supply \code{call} when throwing a condition from a +helper function which wouldn't be relevant to mention in the +message. + +Can also be \code{NULL} or a \link[rlang:topic-defuse]{defused function call} to +respectively not display any call or hard-code a code to display. + +For more information about error calls, see \ifelse{html}{\link[rlang:topic-error-call]{Including function calls in error messages}}{\link[rlang:topic-error-call]{Including function calls in error messages}}.} +} +\description{ +Check Argument not Missing +} +\keyword{internal} diff --git a/man/check_range.Rd b/man/check_range.Rd new file mode 100644 index 000000000..edff80054 --- /dev/null +++ b/man/check_range.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/standalone-checks.R +\name{check_range} +\alias{check_range} +\title{Check Range} +\usage{ +check_range( + x, + range, + include_bounds = c(FALSE, FALSE), + arg_name = caller_arg(x), + scalar = FALSE, + msg = paste("The {.arg {arg_name}} argument must be in the interval", + "{.code {ifelse(include_bounds[1], '[', '(')}{range[1]},", + "{range[2]}{ifelse(include_bounds[2], ']', ')')}}."), + call = parent.frame() +) +} +\arguments{ +\item{x}{numeric scalar to check} + +\item{range}{numeric vector of length two} + +\item{include_bounds}{logical of length two indicating whether to allow +the lower and upper bounds} + +\item{scalar}{logical indicating whether \code{x} must be a scalar} + +\item{msg}{string passed to \code{cli::cli_abort(message=)}} +} +\value{ +invisible +} +\description{ +Check Range +} +\keyword{internal} diff --git a/tests/testthat/test-ard_proportion_ci.R b/tests/testthat/test-ard_proportion_ci.R new file mode 100644 index 000000000..3f147271c --- /dev/null +++ b/tests/testthat/test-ard_proportion_ci.R @@ -0,0 +1,59 @@ +test_that("ard_proportion_ci() works", { + # testing the easy methods together + expect_error( + c("waldcc", "wald", "clopper-pearson", + "wilson", "wilsoncc", "agresti-coull", "jeffreys") |> + lapply( + \(x) { + ard_proportion_ci( + data = mtcars, + variables = c(am, vs), + method = x + ) + } + ), + NA + ) +}) + +test_that("ard_proportion_ci(method='strat_wilson') works", { + withr::local_seed(1) + rsp <- c( + sample(c(TRUE, FALSE), size = 40, prob = c(3 / 4, 1 / 4), replace = TRUE), + sample(c(TRUE, FALSE), size = 40, prob = c(1 / 2, 1 / 2), replace = TRUE) + ) + grp <- factor(rep(c("A", "B"), each = 40), levels = c("B", "A")) + strata_data <- data.frame( + "f1" = sample(c("a", "b"), 80, TRUE), + "f2" = sample(c("x", "y", "z"), 80, TRUE), + stringsAsFactors = TRUE + ) + + weights <- 1:6 / sum(1:6) + + expect_snapshot( + ard_proportion_ci( + data = data.frame( + rsp = rsp, + strata = interaction(strata_data) + ), + variables = rsp, + strata = strata, + weights = weights, + method = 'strat_wilson' + ) + ) + + expect_snapshot( + ard_proportion_ci( + data = data.frame( + rsp = rsp, + strata = interaction(strata_data) + ), + variables = rsp, + strata = strata, + weights = weights, + method = 'strat_wilsoncc' + ) + ) +})