diff --git a/DESCRIPTION b/DESCRIPTION index 0bb53e1fb..893937f56 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: cardx Title: Extra Analysis Results Data Utilities -Version: 0.0.0.9027 +Version: 0.0.0.9031 Authors@R: c( person("Daniel", "Sjoberg", , "sjobergd@gene.com", role = c("aut", "cre")), person("F. Hoffmann-La Roche AG", role = c("cph", "fnd")) @@ -15,12 +15,15 @@ Imports: cards (>= 0.0.0.9024), cli (>= 3.6.1), dplyr (>= 1.1.2), + glue (>= 1.6.2), rlang (>= 1.1.1), tidyr (>= 1.3.0) Suggests: broom (>= 1.0.5), + broom.helpers (>= 1.13.0), spelling, - testthat (>= 3.2.0) + testthat (>= 3.2.0), + withr Remotes: insightsengineering/cards Config/Needs/website: insightsengineering/nesttemplate diff --git a/NAMESPACE b/NAMESPACE index 384996f27..7d10b9a41 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,12 +1,17 @@ # Generated by roxygen2: do not edit by hand +S3method(ard_regression,default) export("%>%") export(all_of) export(any_of) export(ard_aov) +export(ard_chisqtest) +export(ard_fishertest) export(ard_onewaytest) export(ard_paired_ttest) export(ard_paired_wilcoxtest) +export(ard_proportion_ci) +export(ard_regression) export(ard_ttest) export(ard_wilcoxtest) export(contains) @@ -16,6 +21,12 @@ export(last_col) export(matches) export(num_range) export(one_of) +export(proportion_ci_agresti_coull) +export(proportion_ci_clopper_pearson) +export(proportion_ci_jeffreys) +export(proportion_ci_strat_wilson) +export(proportion_ci_wald) +export(proportion_ci_wilson) export(starts_with) export(where) import(rlang) diff --git a/NEWS.md b/NEWS.md index 98d6880c7..0f989d422 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# cardx 0.0.0.9027 +# cardx 0.0.0.9031 ### New Features * New package! diff --git a/R/ard_chisqtest.R b/R/ard_chisqtest.R new file mode 100644 index 000000000..4528d83e8 --- /dev/null +++ b/R/ard_chisqtest.R @@ -0,0 +1,57 @@ +#' ARD Chi-squared Test +#' +#' @description +#' Analysis results data for Pearson's Chi-squared Test. +#' Calculated with `chisq.test(x = data[[variable]], y = data[[by]], ...)` +#' +#' +#' @param data (`data.frame`)\cr +#' a data frame. +#' @param by,variable ([`tidy-select`][dplyr::dplyr_tidy_select])\cr +#' column names to compare +#' @param ... additional arguments passed to `fisher.test(...)` +#' +#' @return ARD data frame +#' @export +#' +#' @examples +#' cards::ADSL |> +#' ard_chisqtest(by = "ARM", variable = "AGEGR1") +ard_chisqtest <- function(data, by, variable, ...) { + # check installed packages --------------------------------------------------- + cards::check_pkg_installed("broom.helpers", reference_pkg = "cards") + + # check/process inputs ------------------------------------------------------- + check_not_missing(data) + check_not_missing(variable) + check_not_missing(by) + check_class_data_frame(x = data) + cards::process_selectors(data, by = {{ by }}, variable = {{ variable }}) + check_scalar(by) + check_scalar(variable) + + # build ARD ------------------------------------------------------------------ + cards::tidy_as_ard( + lst_tidy = + cards::eval_capture_conditions( + stats::chisq.test(x = data[[variable]], y = data[[by]], ...) |> + broom::tidy() + ), + tidy_result_names = c("statistic", "p.value", "parameter", "method"), + fun_args_to_record = + c("correct", "p", "rescale.p", "simulate.p.value", "B"), + formals = formals(stats::chisq.test), + passed_args = dots_list(...), + lst_ard_columns = list(group1 = by, variable = variable, context = "chisqtest") + ) |> + dplyr::mutate( + .after = "stat_name", + stat_label = + dplyr::case_when( + .data$stat_name %in% "statistic" ~ "X-squared Statistic", + .data$stat_name %in% "p.value" ~ "p-value", + .data$stat_name %in% "parameter" ~ "Degrees of Freedom", + TRUE ~ .data$stat_name, + ) + ) +} diff --git a/R/ard_fishertest.R b/R/ard_fishertest.R new file mode 100644 index 000000000..04f7f5ff0 --- /dev/null +++ b/R/ard_fishertest.R @@ -0,0 +1,59 @@ +#' ARD Fisher's Exact Test +#' +#' @description +#' Analysis results data for Fisher's Exact Test. +#' Calculated with `fisher.test(x = data[[variable]], y = data[[by]], ...)` +#' +#' +#' @param data (`data.frame`)\cr +#' a data frame. +#' @param by,variable ([`tidy-select`][dplyr::dplyr_tidy_select])\cr +#' column names to compare +#' @param ... additional arguments passed to `fisher.test(...)` +#' +#' @return ARD data frame +#' @export +#' +#' @examples +#' cards::ADSL[1:30, ] |> +#' ard_fishertest(by = "ARM", variable = "AGEGR1") +ard_fishertest <- function(data, by, variable, ...) { + # check installed packages --------------------------------------------------- + cards::check_pkg_installed("broom.helpers", reference_pkg = "cardx") + + # check/process inputs ------------------------------------------------------- + check_not_missing(data) + check_not_missing(variable) + check_not_missing(by) + check_class_data_frame(x = data) + cards::process_selectors(data, by = {{ by }}, variable = {{ variable }}) + check_scalar(by) + check_scalar(variable) + + # build ARD ------------------------------------------------------------------ + cards::tidy_as_ard( + lst_tidy = + cards::eval_capture_conditions( + stats::fisher.test(x = data[[variable]], y = data[[by]], ...) |> + broom::tidy() + ), + tidy_result_names = + c("estimate", "p.value", "conf.low", "conf.high", "method", "alternative"), + fun_args_to_record = + c( + "workspace", "hybrid", "hybridPars", "control", "or", + "conf.int", "conf.level", "simulate.p.value", "B" + ), + formals = formals(stats::fisher.test), + passed_args = dots_list(...), + lst_ard_columns = list(group1 = by, variable = variable, context = "fishertest") + ) |> + dplyr::mutate( + .after = "stat_name", + stat_label = + dplyr::case_when( + .data$stat_name %in% "p.value" ~ "p-value", + TRUE ~ .data$stat_name, + ) + ) +} diff --git a/R/ard_proportion_ci.R b/R/ard_proportion_ci.R new file mode 100644 index 000000000..1b341548e --- /dev/null +++ b/R/ard_proportion_ci.R @@ -0,0 +1,82 @@ +#' ARD Proportion Confidence Intervals +#' +#' `r lifecycle::badge('experimental')`\cr +#' Calculate confidence intervals for proportions. +#' +#' @inheritParams cards::ard_categorical +#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr +#' columns to include in summaries. Columns must be class `` +#' or `` values coded as `c(0, 1)`. +#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr +#' columns to stratify calculations by +#' @param conf.level (`numeric`)\cr +#' a scalar in `(0, 1)` indicating the confidence level. +#' Default is `0.95` +#' @param method (`string`)\cr +#' string indicating the type of confidence interval to calculate. +#' Must be one of `r formals(ard_proportion_ci)[["method"]] |> eval() |> shQuote()`. +#' See `?proportion_ci` for details. +#' @param strata,weights,max.iterations arguments passed to `proportion_ci_strat_wilson()`, +#' when `method='strat_wilson'` +#' +#' @return an ARD data frame +#' @export +#' +#' @examples +#' ard_proportion_ci(mtcars, variables = c(vs, am), method = "wilson") +ard_proportion_ci <- function(data, variables, by = dplyr::group_vars(data), + conf.level = 0.95, + strata, + weights = NULL, + max.iterations = 10, + method = c( + "waldcc", "wald", "clopper-pearson", + "wilson", "wilsoncc", + "strat_wilson", "strat_wilsoncc", + "agresti-coull", "jeffreys" + )) { + # process inputs ------------------------------------------------------------- + cards::process_selectors(data, variables = {{ variables }}, by = {{ by }}) + method <- arg_match(method) + if (method %in% c("strat_wilson", "strat_wilsoncc")) { + cards::process_selectors(data, strata = strata) + check_scalar(strata) + } + + # calculate confidence intervals --------------------------------------------- + cards::ard_complex( + data = data, + variables = {{ variables }}, + by = {{ by }}, + statistics = + ~ list( + prop_ci = + switch(method, + "waldcc" = \(x, ...) proportion_ci_wald(x, conf.level = conf.level, correct = TRUE), + "wald" = \(x, ...) proportion_ci_wald(x, conf.level = conf.level, correct = FALSE), + "wilsoncc" = \(x, ...) proportion_ci_wilson(x, conf.level = conf.level, correct = TRUE), + "wilson" = \(x, ...) proportion_ci_wilson(x, conf.level = conf.level, correct = FALSE), + "clopper-pearson" = \(x, ...) proportion_ci_clopper_pearson(x, conf.level = conf.level), + "agresti-coull" = \(x, ...) proportion_ci_agresti_coull(x, conf.level = conf.level), + "jeffreys" = \(x, ...) proportion_ci_jeffreys(x, conf.level = conf.level), + "strat_wilsoncc" = \(x, data, ...) { + proportion_ci_strat_wilson(x, + strata = data[[strata]], weights = weights, + max.iterations = max.iterations, + conf.level = conf.level, correct = TRUE + ) + }, + "strat_wilson" = \(x, data, ...) { + proportion_ci_strat_wilson(x, + strata = data[[strata]], weights = weights, + max.iterations = max.iterations, + conf.level = conf.level, correct = FALSE + ) + } + ) + ) + ) |> + dplyr::mutate( + context = "proportion_ci" + ) +} diff --git a/R/ard_regression.R b/R/ard_regression.R new file mode 100644 index 000000000..2b74bbbca --- /dev/null +++ b/R/ard_regression.R @@ -0,0 +1,81 @@ +#' Regression ARD +#' +#' Function takes a regression model object and converts it to a ARD +#' structure using the `broom.helpers` package. +#' +#' @param x regression model object +#' @param tidy_fun (`function`)\cr +#' a tidier. Default is [`broom.helpers::tidy_with_broom_or_parameters`] +#' @param ... Arguments passed to `broom.helpers::tidy_plus_plus()` +#' +#' @return data frame +#' @name ard_regression +#' +#' @examples +#' lm(AGE ~ ARM, data = cards::ADSL) |> +#' ard_regression(add_estimate_to_reference_rows = TRUE) +NULL + +#' @rdname ard_regression +#' @export +ard_regression <- function(x, ...) { + UseMethod("ard_regression") +} + +#' @rdname ard_regression +#' @export +ard_regression.default <- function(x, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, ...) { + # check installed packages --------------------------------------------------- + cards::check_pkg_installed("broom.helpers", reference_pkg = "cards") + + # check inputs --------------------------------------------------------------- + check_not_missing(x, "model") + + # summarize model ------------------------------------------------------------ + broom.helpers::tidy_plus_plus( + model = x, + tidy_fun = tidy_fun, + ... + ) |> + dplyr::mutate( + variable_level = dplyr::if_else(.data$var_type %in% "continuous", NA_character_, .data$label), + dplyr::across(-c("variable", "variable_level"), .fns = as.list) + ) |> + tidyr::pivot_longer( + cols = -c("variable", "variable_level"), + names_to = "stat_name", + values_to = "statistic" + ) |> + dplyr::filter(map_lgl(.data$statistic, Negate(is.na))) |> + dplyr::mutate( + statistic_fmt_fn = + lapply( + .data$statistic, + function(x) { + switch(is.integer(x), 0L) %||% # styler: off + switch(is.numeric(x), 1L) # styler: off + } + ), + context = "regression", + stat_label = + dplyr::case_when( + .data$stat_name %in% "var_label" ~ "Label", + .data$stat_name %in% "var_class" ~ "Class", + .data$stat_name %in% "var_type" ~ "Type", + .data$stat_name %in% "var_nlevels" ~ "N Levels", + .data$stat_name %in% "contrasts_type" ~ "Contrast Type", + .data$stat_name %in% "label" ~ "Level Label", + .data$stat_name %in% "n_obs" ~ "N Obs.", + .data$stat_name %in% "n_event" ~ "N Events", + .data$stat_name %in% "exposure" ~ "Exposure Time", + .data$stat_name %in% "estimate" ~ "Coefficient", + .data$stat_name %in% "std.error" ~ "Standard Error", + .data$stat_name %in% "p.value" ~ "p-value", + .data$stat_name %in% "conf.low" ~ "CI Lower Bound", + .data$stat_name %in% "conf.high" ~ "CI Upper Bound", + TRUE ~ .data$stat_name + ) + ) |> + cards::tidy_ard_column_order() %>% + {structure(., class = c("card", class(.)))} # styler: off +} diff --git a/R/cardx-package.R b/R/cardx-package.R index 637bc249e..644703b55 100644 --- a/R/cardx-package.R +++ b/R/cardx-package.R @@ -5,3 +5,5 @@ ## usethis namespace: start ## usethis namespace: end NULL + +utils::globalVariables(c(".")) diff --git a/R/proportion_ci.R b/R/proportion_ci.R new file mode 100644 index 000000000..961b74594 --- /dev/null +++ b/R/proportion_ci.R @@ -0,0 +1,405 @@ +#' Functions for Calculating Proportion Confidence Intervals +#' +#' Functions to calculate different proportion confidence intervals for use in `ard_proportion()`. +#' +#' @inheritParams ard_proportion_ci +#' @param x vector of a binary values, i.e. a logical vector, or numeric with values `c(0, 1)` +#' @return Confidence interval of a proportion. +#' +#' @name proportion_ci +#' @examples +#' x <- c( +#' TRUE, TRUE, TRUE, TRUE, TRUE, +#' FALSE, FALSE, FALSE, FALSE, FALSE +#' ) +#' +#' proportion_ci_wald(x, conf.level = 0.9) +#' proportion_ci_wilson(x, correct = TRUE) +#' proportion_ci_clopper_pearson(x) +#' proportion_ci_agresti_coull(x) +#' proportion_ci_jeffreys(x) +NULL + +#' @describeIn proportion_ci Calculates the Wald interval by following the usual textbook definition +#' for a single proportion confidence interval using the normal approximation. +#' +#' @param correct (`logical`)\cr apply continuity correction. +#' +#' @export +proportion_ci_wald <- function(x, conf.level = 0.95, correct = FALSE) { + # check inputs --------------------------------------------------------------- + check_not_missing(x) + check_binary(x) + check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE), scalar = TRUE) + check_class(x = correct, "logical") + check_scalar(correct) + + x <- stats::na.omit(x) + + n <- length(x) + p_hat <- mean(x) + z <- stats::qnorm((1 + conf.level) / 2) + q_hat <- 1 - p_hat + correction_factor <- ifelse(correct, 1 / (2 * n), 0) + + err <- z * sqrt(p_hat * q_hat) / sqrt(n) + correction_factor + l_ci <- max(0, p_hat - err) + u_ci <- min(1, p_hat + err) + + list( + N = n, + estimate = p_hat, + conf.low = l_ci, + conf.high = u_ci, + conf.level = conf.level, + method = + glue::glue("Wald Confidence Interval {ifelse(correct, 'with', 'without')} continuity correction") + ) +} + + +#' @describeIn proportion_ci Calculates the Wilson interval by calling [stats::prop.test()]. +#' Also referred to as Wilson score interval. +#' +#' @export +proportion_ci_wilson <- function(x, conf.level = 0.95, correct = FALSE) { + cards::check_pkg_installed("broom", reference_pkg = "cards") + # check inputs --------------------------------------------------------------- + check_not_missing(x) + check_binary(x) + check_class(x = correct, "logical") + check_scalar(correct) + check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE), scalar = TRUE) + + x <- stats::na.omit(x) + + n <- length(x) + y <- stats::prop.test(x = sum(x), n = n, correct = correct, conf.level = conf.level) + + list(N = n, conf.level = conf.level) |> + utils::modifyList(val = broom::tidy(y) |> as.list()) |> + utils::modifyList( + list( + method = + glue::glue("Wilson Confidence Interval {ifelse(correct, 'with', 'without')} continuity correction") + ) + ) +} + +#' @describeIn proportion_ci Calculates the Clopper-Pearson interval by calling [stats::binom.test()]. +#' Also referred to as the `exact` method. +#' @export +proportion_ci_clopper_pearson <- function(x, conf.level = 0.95) { + cards::check_pkg_installed("broom", reference_pkg = "cards") + # check inputs --------------------------------------------------------------- + check_not_missing(x) + check_binary(x) + check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE), scalar = TRUE) + + x <- stats::na.omit(x) + n <- length(x) + + y <- stats::binom.test(x = sum(x), n = n, conf.level = conf.level) + + list(N = n, conf.level = conf.level) |> + utils::modifyList(val = broom::tidy(y) |> as.list()) |> + utils::modifyList(list(method = "Clopper-Pearson Confidence Interval")) +} + +#' @describeIn proportion_ci Calculates the `Agresti-Coull` interval (created by `Alan Agresti` and `Brent Coull`) by +#' (for 95% CI) adding two successes and two failures to the data and then using the Wald formula to construct a CI. +#' @export +proportion_ci_agresti_coull <- function(x, conf.level = 0.95) { + # check inputs --------------------------------------------------------------- + check_not_missing(x) + check_binary(x) + check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE), scalar = TRUE) + + x <- stats::na.omit(x) + + n <- length(x) + x_sum <- sum(x) + z <- stats::qnorm((1 + conf.level) / 2) + + # Add here both z^2 / 2 successes and failures. + x_sum_tilde <- x_sum + z^2 / 2 + n_tilde <- n + z^2 + + # Then proceed as with the Wald interval. + p_tilde <- x_sum_tilde / n_tilde + q_tilde <- 1 - p_tilde + err <- z * sqrt(p_tilde * q_tilde) / sqrt(n_tilde) + l_ci <- max(0, p_tilde - err) + u_ci <- min(1, p_tilde + err) + + list( + N = n, + estimate = mean(x), + conf.low = l_ci, + conf.high = u_ci, + conf.level = conf.level, + method = "Agresti-Coull Confidence Interval" + ) +} + +#' @describeIn proportion_ci Calculates the Jeffreys interval, an equal-tailed interval based on the +#' non-informative Jeffreys prior for a binomial proportion. +#' @export +proportion_ci_jeffreys <- function(x, conf.level = 0.95) { + # check inputs --------------------------------------------------------------- + check_not_missing(x) + check_binary(x) + check_range(conf.level, range = c(0, 1), include_bounds = c(FALSE, FALSE), scalar = TRUE) + x <- stats::na.omit(x) + + n <- length(x) + x_sum <- sum(x) + + alpha <- 1 - conf.level + l_ci <- ifelse( + x_sum == 0, + 0, + stats::qbeta(alpha / 2, x_sum + 0.5, n - x_sum + 0.5) + ) + + u_ci <- ifelse( + x_sum == n, + 1, + stats::qbeta(1 - alpha / 2, x_sum + 0.5, n - x_sum + 0.5) + ) + + list( + N = n, + estimate = mean(x), + conf.low = l_ci, + conf.high = u_ci, + conf.level = conf.level, + method = glue::glue("Jeffreys Interval") + ) +} + + +#' @describeIn proportion_ci Calculates the stratified Wilson confidence +#' interval for unequal proportions as described in +#' Xin YA, Su XG. Stratified Wilson and Newcombe confidence intervals +#' for multiple binomial proportions. _Statistics in Biopharmaceutical Research_. 2010;2(3). +#' +#' @param strata (`factor`)\cr variable with one level per stratum and same length as `x`. +#' @param weights (`numeric` or `NULL`)\cr weights for each level of the strata. If `NULL`, they are +#' estimated using the iterative algorithm that +#' minimizes the weighted squared length of the confidence interval. +#' @param max.iterations (`count`)\cr maximum number of iterations for the iterative procedure used +#' to find estimates of optimal weights. +#' @param correct (`flag`)\cr include the continuity correction. For further information, see for example +#' [stats::prop.test()]. +#' +#' @examples +#' # Stratified Wilson confidence interval with unequal probabilities +#' +#' set.seed(1) +#' rsp <- sample(c(TRUE, FALSE), 100, TRUE) +#' strata_data <- data.frame( +#' "f1" = sample(c("a", "b"), 100, TRUE), +#' "f2" = sample(c("x", "y", "z"), 100, TRUE), +#' stringsAsFactors = TRUE +#' ) +#' strata <- interaction(strata_data) +#' n_strata <- ncol(table(rsp, strata)) # Number of strata +#' +#' proportion_ci_strat_wilson( +#' x = rsp, strata = strata, +#' conf.level = 0.90 +#' ) +#' +#' # Not automatic setting of weights +#' proportion_ci_strat_wilson( +#' x = rsp, strata = strata, +#' weights = rep(1 / n_strata, n_strata), +#' conf.level = 0.90 +#' ) +#' +#' @export +proportion_ci_strat_wilson <- function(x, + strata, + weights = NULL, + conf.level = 0.95, + max.iterations = 10L, + correct = FALSE) { + # check inputs --------------------------------------------------------------- + check_not_missing(x) + check_not_missing(strata) + check_binary(x) + check_class(correct, "logical") + check_scalar(correct) + check_class(strata, "factor") + check_range(conf.level, range = c(0, 1), scalar = TRUE) + + # remove missing values from x and strata + is_na <- is.na(x) | is.na(strata) + x <- x[!is_na] + strata <- strata[!is_na] + if (!inherits(x, "logical")) x <- as.logical(x) + # check all TRUE/FALSE, if so, not calculable + if (all(x) || all(!x)) { + cli::cli_abort("All values in {.arg x} argument are either {.code TRUE} or {.code FALSE} and CI is not estimable.") + } + + tbl <- table(factor(x, levels = c(FALSE, TRUE)), strata, useNA = "no") + n_strata <- length(unique(strata)) + + # Checking the weights and maximum number of iterations. + do_iter <- FALSE + if (is.null(weights)) { + weights <- rep(1 / n_strata, n_strata) # Initialization for iterative procedure + do_iter <- TRUE + + # Iteration parameters + if (!is_scalar_integerish(max.iterations) || max.iterations < 1) { + cli::cli_abort("Argument {.arg max.iterations} must be a positive integer.") + } + } + check_range(weights, range = c(0, 1), include_bounds = c(TRUE, TRUE)) + sum_weights <- sum(weights) |> + round() |> + as.integer() + if (sum_weights != 1L || abs(sum_weights - sum(weights)) > sqrt(.Machine$double.eps)) { + cli::cli_abort("The sum of the {.arg weights} argument must be {.val {1L}}") + } + + xs <- tbl["TRUE", ] + ns <- colSums(tbl) + use_stratum <- (ns > 0) + ns <- ns[use_stratum] + xs <- xs[use_stratum] + ests <- xs / ns + vars <- ests * (1 - ests) / ns + + strata_qnorm <- .strata_normal_quantile(vars, weights, conf.level) + + # Iterative setting of weights if they were not passed in `weights` argument + weights_new <- if (do_iter) { + .update_weights_strat_wilson(vars, strata_qnorm, weights, ns, max.iterations, conf.level)$weights + } else { + weights + } + + strata_conf.level <- 2 * stats::pnorm(strata_qnorm) - 1 + + ci_by_strata <- Map( + function(x, n) { + # Classic Wilson's confidence interval + suppressWarnings(stats::prop.test(x, n, correct = correct, conf.level = strata_conf.level)$conf.int) + }, + x = xs, + n = ns + ) + lower_by_strata <- sapply(ci_by_strata, "[", 1L) + upper_by_strata <- sapply(ci_by_strata, "[", 2L) + + lower <- sum(weights_new * lower_by_strata) + upper <- sum(weights_new * upper_by_strata) + + # Return values + list( + N = length(x), + estimate = mean(x), + conf.low = lower, + conf.high = upper, + conf.level = conf.level, + weights = if (do_iter) weights_new else NULL, + method = + glue::glue("Stratified Wilson Confidence Interval {ifelse(correct, 'with', 'without')} continuity correction") + ) |> + compact() +} + +#' Helper Function for the Estimation of Stratified Quantiles +#' +#' This function wraps the estimation of stratified percentiles when we assume +#' the approximation for large numbers. This is necessary only in the case +#' proportions for each strata are unequal. +#' +#' @inheritParams proportion_ci_strat_wilson +#' +#' @return Stratified quantile. +#' +#' @seealso [proportion_ci_strat_wilson()] +#' +#' @keywords internal +#' @examples +#' strata_data <- table(data.frame( +#' "f1" = sample(c(TRUE, FALSE), 100, TRUE), +#' "f2" = sample(c("x", "y", "z"), 100, TRUE), +#' stringsAsFactors = TRUE +#' )) +#' ns <- colSums(strata_data) +#' ests <- strata_data["TRUE", ] / ns +#' vars <- ests * (1 - ests) / ns +#' weights <- rep(1 / length(ns), length(ns)) +#' +#' cardx:::.strata_normal_quantile(vars, weights, 0.95) +.strata_normal_quantile <- function(vars, weights, conf.level) { + summands <- weights^2 * vars + # Stratified quantile + sqrt(sum(summands)) / sum(sqrt(summands)) * stats::qnorm((1 + conf.level) / 2) +} + +#' Helper Function for the Estimation of Weights for `proportion_ci_strat_wilson()` +#' +#' This function wraps the iteration procedure that allows you to estimate +#' the weights for each proportional strata. This assumes to minimize the +#' weighted squared length of the confidence interval. +#' +#' @keywords internal +#' @inheritParams proportion_ci_strat_wilson +#' @param vars (`numeric`)\cr normalized proportions for each strata. +#' @param strata_qnorm (`numeric`)\cr initial estimation with identical weights of the quantiles. +#' @param initial_weights (`numeric`)\cr initial weights used to calculate `strata_qnorm`. This can +#' be optimized in the future if we need to estimate better initial weights. +#' @param n_per_strata (`numeric`)\cr number of elements in each strata. +#' @param max.iterations (`count`)\cr maximum number of iterations to be tried. Convergence is always checked. +#' @param tol (`number`)\cr tolerance threshold for convergence. +#' +#' @return A `list` of 3 elements: `n_it`, `weights`, and `diff_v`. +#' +#' @seealso For references and details see [`proportion_ci_strat_wilson()`]. +#' +#' @examples +#' vs <- c(0.011, 0.013, 0.012, 0.014, 0.017, 0.018) +#' sq <- 0.674 +#' ws <- rep(1 / length(vs), length(vs)) +#' ns <- c(22, 18, 17, 17, 14, 12) +#' +#' cardx:::.update_weights_strat_wilson(vs, sq, ws, ns, 100, 0.95, 0.001) +.update_weights_strat_wilson <- function(vars, + strata_qnorm, + initial_weights, + n_per_strata, + max.iterations = 50, + conf.level = 0.95, + tol = 0.001) { + it <- 0 + diff_v <- NULL + + while (it < max.iterations) { + it <- it + 1 + weights_new_t <- (1 + strata_qnorm^2 / n_per_strata)^2 + weights_new_b <- (vars + strata_qnorm^2 / (4 * n_per_strata^2)) + weights_new <- weights_new_t / weights_new_b + weights_new <- weights_new / sum(weights_new) + strata_qnorm <- .strata_normal_quantile(vars, weights_new, conf.level) + diff_v <- c(diff_v, sum(abs(weights_new - initial_weights))) + if (diff_v[length(diff_v)] < tol) break + initial_weights <- weights_new + } + + if (it == max.iterations) { + warning("The heuristic to find weights did not converge with max.iterations = ", max.iterations) + } + + list( + "n_it" = it, + "weights" = weights_new, + "diff_v" = diff_v + ) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index f73e71c75..ef0402299 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -25,6 +25,17 @@ reference: - subtitle: "Inference" - contents: - ard_aov + - ard_chisqtest + - ard_fishertest - ard_onewaytest - ard_ttest - ard_wilcoxtest + + - subtitle: "Estimation" + - contents: + - ard_proportion_ci + - ard_regression + + - title: "Helpers" + - contents: + - proportion_ci diff --git a/inst/WORDLIST b/inst/WORDLIST index 22b5c0ddb..14073fb50 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,3 +1,20 @@ ARD +Biopharmaceutical +Clopper Hoffmann +Jeffreys +Newcombe +Su +XG +Xin +agresti +clopper +coull funder +jeffreys +pearson +strat +wald +waldcc +wilson +wilsoncc diff --git a/man/ard_chisqtest.Rd b/man/ard_chisqtest.Rd new file mode 100644 index 000000000..2e2d2e190 --- /dev/null +++ b/man/ard_chisqtest.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_chisqtest.R +\name{ard_chisqtest} +\alias{ard_chisqtest} +\title{ARD Chi-squared Test} +\usage{ +ard_chisqtest(data, by, variable, ...) +} +\arguments{ +\item{data}{(\code{data.frame})\cr +a data frame.} + +\item{by, variable}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +column names to compare} + +\item{...}{additional arguments passed to \code{fisher.test(...)}} +} +\value{ +ARD data frame +} +\description{ +Analysis results data for Pearson's Chi-squared Test. +Calculated with \code{chisq.test(x = data[[variable]], y = data[[by]], ...)} +} +\examples{ +cards::ADSL |> + ard_chisqtest(by = "ARM", variable = "AGEGR1") +} diff --git a/man/ard_fishertest.Rd b/man/ard_fishertest.Rd new file mode 100644 index 000000000..93800255b --- /dev/null +++ b/man/ard_fishertest.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_fishertest.R +\name{ard_fishertest} +\alias{ard_fishertest} +\title{ARD Fisher's Exact Test} +\usage{ +ard_fishertest(data, by, variable, ...) +} +\arguments{ +\item{data}{(\code{data.frame})\cr +a data frame.} + +\item{by, variable}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +column names to compare} + +\item{...}{additional arguments passed to \code{fisher.test(...)}} +} +\value{ +ARD data frame +} +\description{ +Analysis results data for Fisher's Exact Test. +Calculated with \code{fisher.test(x = data[[variable]], y = data[[by]], ...)} +} +\examples{ +cards::ADSL[1:30, ] |> + ard_fishertest(by = "ARM", variable = "AGEGR1") +} diff --git a/man/ard_proportion_ci.Rd b/man/ard_proportion_ci.Rd new file mode 100644 index 000000000..0b821060e --- /dev/null +++ b/man/ard_proportion_ci.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_proportion_ci.R +\name{ard_proportion_ci} +\alias{ard_proportion_ci} +\title{ARD Proportion Confidence Intervals} +\usage{ +ard_proportion_ci( + data, + variables, + by = dplyr::group_vars(data), + conf.level = 0.95, + strata, + weights = NULL, + max.iterations = 10, + method = c("waldcc", "wald", "clopper-pearson", "wilson", "wilsoncc", "strat_wilson", + "strat_wilsoncc", "agresti-coull", "jeffreys") +) +} +\arguments{ +\item{data}{(\code{data.frame})\cr +a data frame} + +\item{variables}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +columns to include in summaries. Columns must be class \verb{} +or \verb{} values coded as \code{c(0, 1)}.} + +\item{by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +columns to stratify calculations by} + +\item{conf.level}{(\code{numeric})\cr +a scalar in \verb{(0, 1)} indicating the confidence level. +Default is \code{0.95}} + +\item{strata, weights, max.iterations}{arguments passed to \code{proportion_ci_strat_wilson()}, +when \code{method='strat_wilson'}} + +\item{method}{(\code{string})\cr +string indicating the type of confidence interval to calculate. +Must be one of 'waldcc', 'wald', 'clopper-pearson', 'wilson', 'wilsoncc', 'strat_wilson', 'strat_wilsoncc', 'agresti-coull', 'jeffreys'. +See \code{?proportion_ci} for details.} +} +\value{ +an ARD data frame +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr +Calculate confidence intervals for proportions. +} +\examples{ +ard_proportion_ci(mtcars, variables = c(vs, am), method = "wilson") +} diff --git a/man/ard_regression.Rd b/man/ard_regression.Rd new file mode 100644 index 000000000..1f6895cf0 --- /dev/null +++ b/man/ard_regression.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_regression.R +\name{ard_regression} +\alias{ard_regression} +\alias{ard_regression.default} +\title{Regression ARD} +\usage{ +ard_regression(x, ...) + +\method{ard_regression}{default}(x, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, ...) +} +\arguments{ +\item{x}{regression model object} + +\item{...}{Arguments passed to \code{broom.helpers::tidy_plus_plus()}} + +\item{tidy_fun}{(\code{function})\cr +a tidier. Default is \code{\link[broom.helpers:tidy_with_broom_or_parameters]{broom.helpers::tidy_with_broom_or_parameters}}} +} +\value{ +data frame +} +\description{ +Function takes a regression model object and converts it to a ARD +structure using the \code{broom.helpers} package. +} +\examples{ +lm(AGE ~ ARM, data = cards::ADSL) |> + ard_regression(add_estimate_to_reference_rows = TRUE) +} diff --git a/man/dot-strata_normal_quantile.Rd b/man/dot-strata_normal_quantile.Rd new file mode 100644 index 000000000..7b9b145cf --- /dev/null +++ b/man/dot-strata_normal_quantile.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/proportion_ci.R +\name{.strata_normal_quantile} +\alias{.strata_normal_quantile} +\title{Helper Function for the Estimation of Stratified Quantiles} +\usage{ +.strata_normal_quantile(vars, weights, conf.level) +} +\arguments{ +\item{weights}{(\code{numeric} or \code{NULL})\cr weights for each level of the strata. If \code{NULL}, they are +estimated using the iterative algorithm that +minimizes the weighted squared length of the confidence interval.} + +\item{conf.level}{(\code{numeric})\cr +a scalar in \verb{(0, 1)} indicating the confidence level. +Default is \code{0.95}} +} +\value{ +Stratified quantile. +} +\description{ +This function wraps the estimation of stratified percentiles when we assume +the approximation for large numbers. This is necessary only in the case +proportions for each strata are unequal. +} +\examples{ +strata_data <- table(data.frame( + "f1" = sample(c(TRUE, FALSE), 100, TRUE), + "f2" = sample(c("x", "y", "z"), 100, TRUE), + stringsAsFactors = TRUE +)) +ns <- colSums(strata_data) +ests <- strata_data["TRUE", ] / ns +vars <- ests * (1 - ests) / ns +weights <- rep(1 / length(ns), length(ns)) + +cardx:::.strata_normal_quantile(vars, weights, 0.95) +} +\seealso{ +\code{\link[=proportion_ci_strat_wilson]{proportion_ci_strat_wilson()}} +} +\keyword{internal} diff --git a/man/dot-update_weights_strat_wilson.Rd b/man/dot-update_weights_strat_wilson.Rd new file mode 100644 index 000000000..9b2893de3 --- /dev/null +++ b/man/dot-update_weights_strat_wilson.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/proportion_ci.R +\name{.update_weights_strat_wilson} +\alias{.update_weights_strat_wilson} +\title{Helper Function for the Estimation of Weights for \code{proportion_ci_strat_wilson()}} +\usage{ +.update_weights_strat_wilson( + vars, + strata_qnorm, + initial_weights, + n_per_strata, + max.iterations = 50, + conf.level = 0.95, + tol = 0.001 +) +} +\arguments{ +\item{vars}{(\code{numeric})\cr normalized proportions for each strata.} + +\item{strata_qnorm}{(\code{numeric})\cr initial estimation with identical weights of the quantiles.} + +\item{initial_weights}{(\code{numeric})\cr initial weights used to calculate \code{strata_qnorm}. This can +be optimized in the future if we need to estimate better initial weights.} + +\item{n_per_strata}{(\code{numeric})\cr number of elements in each strata.} + +\item{max.iterations}{(\code{count})\cr maximum number of iterations to be tried. Convergence is always checked.} + +\item{conf.level}{(\code{numeric})\cr +a scalar in \verb{(0, 1)} indicating the confidence level. +Default is \code{0.95}} + +\item{tol}{(\code{number})\cr tolerance threshold for convergence.} +} +\value{ +A \code{list} of 3 elements: \code{n_it}, \code{weights}, and \code{diff_v}. +} +\description{ +This function wraps the iteration procedure that allows you to estimate +the weights for each proportional strata. This assumes to minimize the +weighted squared length of the confidence interval. +} +\examples{ +vs <- c(0.011, 0.013, 0.012, 0.014, 0.017, 0.018) +sq <- 0.674 +ws <- rep(1 / length(vs), length(vs)) +ns <- c(22, 18, 17, 17, 14, 12) + +cardx:::.update_weights_strat_wilson(vs, sq, ws, ns, 100, 0.95, 0.001) +} +\seealso{ +For references and details see \code{\link[=proportion_ci_strat_wilson]{proportion_ci_strat_wilson()}}. +} +\keyword{internal} diff --git a/man/proportion_ci.Rd b/man/proportion_ci.Rd new file mode 100644 index 000000000..d19e9dbe5 --- /dev/null +++ b/man/proportion_ci.Rd @@ -0,0 +1,115 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/proportion_ci.R +\name{proportion_ci} +\alias{proportion_ci} +\alias{proportion_ci_wald} +\alias{proportion_ci_wilson} +\alias{proportion_ci_clopper_pearson} +\alias{proportion_ci_agresti_coull} +\alias{proportion_ci_jeffreys} +\alias{proportion_ci_strat_wilson} +\title{Functions for Calculating Proportion Confidence Intervals} +\usage{ +proportion_ci_wald(x, conf.level = 0.95, correct = FALSE) + +proportion_ci_wilson(x, conf.level = 0.95, correct = FALSE) + +proportion_ci_clopper_pearson(x, conf.level = 0.95) + +proportion_ci_agresti_coull(x, conf.level = 0.95) + +proportion_ci_jeffreys(x, conf.level = 0.95) + +proportion_ci_strat_wilson( + x, + strata, + weights = NULL, + conf.level = 0.95, + max.iterations = 10L, + correct = FALSE +) +} +\arguments{ +\item{x}{vector of a binary values, i.e. a logical vector, or numeric with values \code{c(0, 1)}} + +\item{conf.level}{(\code{numeric})\cr +a scalar in \verb{(0, 1)} indicating the confidence level. +Default is \code{0.95}} + +\item{correct}{(\code{flag})\cr include the continuity correction. For further information, see for example +\code{\link[stats:prop.test]{stats::prop.test()}}.} + +\item{strata}{(\code{factor})\cr variable with one level per stratum and same length as \code{x}.} + +\item{weights}{(\code{numeric} or \code{NULL})\cr weights for each level of the strata. If \code{NULL}, they are +estimated using the iterative algorithm that +minimizes the weighted squared length of the confidence interval.} + +\item{max.iterations}{(\code{count})\cr maximum number of iterations for the iterative procedure used +to find estimates of optimal weights.} +} +\value{ +Confidence interval of a proportion. +} +\description{ +Functions to calculate different proportion confidence intervals for use in \code{ard_proportion()}. +} +\section{Functions}{ +\itemize{ +\item \code{proportion_ci_wald()}: Calculates the Wald interval by following the usual textbook definition +for a single proportion confidence interval using the normal approximation. + +\item \code{proportion_ci_wilson()}: Calculates the Wilson interval by calling \code{\link[stats:prop.test]{stats::prop.test()}}. +Also referred to as Wilson score interval. + +\item \code{proportion_ci_clopper_pearson()}: Calculates the Clopper-Pearson interval by calling \code{\link[stats:binom.test]{stats::binom.test()}}. +Also referred to as the \code{exact} method. + +\item \code{proportion_ci_agresti_coull()}: Calculates the \code{Agresti-Coull} interval (created by \verb{Alan Agresti} and \verb{Brent Coull}) by +(for 95\% CI) adding two successes and two failures to the data and then using the Wald formula to construct a CI. + +\item \code{proportion_ci_jeffreys()}: Calculates the Jeffreys interval, an equal-tailed interval based on the +non-informative Jeffreys prior for a binomial proportion. + +\item \code{proportion_ci_strat_wilson()}: Calculates the stratified Wilson confidence +interval for unequal proportions as described in +Xin YA, Su XG. Stratified Wilson and Newcombe confidence intervals +for multiple binomial proportions. \emph{Statistics in Biopharmaceutical Research}. 2010;2(3). + +}} +\examples{ +x <- c( + TRUE, TRUE, TRUE, TRUE, TRUE, + FALSE, FALSE, FALSE, FALSE, FALSE +) + +proportion_ci_wald(x, conf.level = 0.9) +proportion_ci_wilson(x, correct = TRUE) +proportion_ci_clopper_pearson(x) +proportion_ci_agresti_coull(x) +proportion_ci_jeffreys(x) +# Stratified Wilson confidence interval with unequal probabilities + +set.seed(1) +rsp <- sample(c(TRUE, FALSE), 100, TRUE) +strata_data <- data.frame( + "f1" = sample(c("a", "b"), 100, TRUE), + "f2" = sample(c("x", "y", "z"), 100, TRUE), + stringsAsFactors = TRUE +) +strata <- interaction(strata_data) +n_strata <- ncol(table(rsp, strata)) # Number of strata + +proportion_ci_strat_wilson( + x = rsp, strata = strata, + conf.level = 0.90 +) + +# Not automatic setting of weights +proportion_ci_strat_wilson( + x = rsp, strata = strata, + weights = rep(1 / n_strata, n_strata), + conf.level = 0.90 +) + +} diff --git a/tests/testthat/_snaps/ard_chisqtest.md b/tests/testthat/_snaps/ard_chisqtest.md new file mode 100644 index 000000000..140469664 --- /dev/null +++ b/tests/testthat/_snaps/ard_chisqtest.md @@ -0,0 +1,17 @@ +# shuffle_ard fills missing group levels if the group is meaningful + + Code + as.data.frame(cards::shuffle_ard(cards::bind_ard(ard_chisqtest(data = adsl_sub, + by = "ARM", variable = "AGEGR1"), ard_chisqtest(data = adsl_sub, by = "SEX", + variable = "AGEGR1")))) + Output + ARM SEX variable context stat_name statistic + 1 Overall ARM AGEGR1 chisqtest statistic 5.079442e+00 + 2 Overall ARM AGEGR1 chisqtest p.value 7.888842e-02 + 3 Overall ARM AGEGR1 chisqtest parameter 2.000000e+00 + 4 Overall ARM AGEGR1 chisqtest B 2.000000e+03 + 5 Overall SEX AGEGR1 chisqtest statistic 1.039442e+00 + 6 Overall SEX AGEGR1 chisqtest p.value 5.946864e-01 + 7 Overall SEX AGEGR1 chisqtest parameter 2.000000e+00 + 8 Overall SEX AGEGR1 chisqtest B 2.000000e+03 + diff --git a/tests/testthat/_snaps/ard_proportion_ci.md b/tests/testthat/_snaps/ard_proportion_ci.md new file mode 100644 index 000000000..3050d5027 --- /dev/null +++ b/tests/testthat/_snaps/ard_proportion_ci.md @@ -0,0 +1,34 @@ +# ard_proportion_ci(method='strat_wilson') works + + Code + ard_proportion_ci_strat_wilson + Message + {cards} data frame: 6 x 8 + Output + variable context stat_name stat_label statistic statistic_fmt_fn + 1 rsp proporti… N N 80 0 + 2 rsp proporti… estimate estimate 0.625 1 + 3 rsp proporti… conf.low conf.low 0.487 1 + 4 rsp proporti… conf.high conf.high 0.719 1 + 5 rsp proporti… conf.level conf.lev… 0.95 1 + 6 rsp proporti… method method Stratifi… 1 + Message + i 2 more variables: warning, error + +--- + + Code + ard_proportion_ci_strat_wilsoncc + Message + {cards} data frame: 6 x 8 + Output + variable context stat_name stat_label statistic statistic_fmt_fn + 1 rsp proporti… N N 80 0 + 2 rsp proporti… estimate estimate 0.625 1 + 3 rsp proporti… conf.low conf.low 0.448 1 + 4 rsp proporti… conf.high conf.high 0.753 1 + 5 rsp proporti… conf.level conf.lev… 0.95 1 + 6 rsp proporti… method method Stratifi… 1 + Message + i 2 more variables: warning, error + diff --git a/tests/testthat/_snaps/proportion_ci.md b/tests/testthat/_snaps/proportion_ci.md new file mode 100644 index 000000000..5e9f0732e --- /dev/null +++ b/tests/testthat/_snaps/proportion_ci.md @@ -0,0 +1,682 @@ +# check the proportion_ci_*() functions work + + Code + wilson_dbl <- proportion_ci_wilson(x_dbl, conf.level = 0.9, correct = FALSE) + +--- + + Code + wilsoncc_dbl <- proportion_ci_wilson(x_dbl, conf.level = 0.9, correct = TRUE) + +--- + + Code + wilson_lgl <- proportion_ci_wilson(x_lgl, conf.level = 0.9, correct = FALSE) + +--- + + Code + proportion_ci_wilson(x_rsp, conf.level = 0.9) + Output + $N + [1] 10 + + $conf.level + [1] 0.9 + + $estimate + p + 0.5 + + $statistic + X-squared + 0 + + $p.value + [1] 1 + + $parameter + df + 1 + + $conf.low + [1] 0.2692718 + + $conf.high + [1] 0.7307282 + + $method + Wilson Confidence Interval without continuity correction + + $alternative + [1] "two.sided" + + +--- + + Code + proportion_ci_wilson(x_true) + Output + $N + [1] 32 + + $conf.level + [1] 0.95 + + $estimate + p + 1 + + $statistic + X-squared + 32 + + $p.value + [1] 1.541726e-08 + + $parameter + df + 1 + + $conf.low + [1] 0.8928208 + + $conf.high + [1] 1 + + $method + Wilson Confidence Interval without continuity correction + + $alternative + [1] "two.sided" + + +--- + + Code + proportion_ci_wilson(x_false) + Output + $N + [1] 32 + + $conf.level + [1] 0.95 + + $estimate + p + 0 + + $statistic + X-squared + 32 + + $p.value + [1] 1.541726e-08 + + $parameter + df + 1 + + $conf.low + [1] 0 + + $conf.high + [1] 0.1071792 + + $method + Wilson Confidence Interval without continuity correction + + $alternative + [1] "two.sided" + + +--- + + Code + wald_dbl <- proportion_ci_wald(x_dbl, conf.level = 0.9, correct = FALSE) + +--- + + Code + waldcc_dbl <- proportion_ci_wald(x_dbl, conf.level = 0.9, correct = TRUE) + +--- + + Code + wald_lgl <- proportion_ci_wald(x_lgl, conf.level = 0.9, correct = FALSE) + +--- + + Code + proportion_ci_wald(x_rsp, conf.level = 0.95, correct = TRUE) + Output + $N + [1] 10 + + $estimate + [1] 0.5 + + $conf.low + [1] 0.1401025 + + $conf.high + [1] 0.8598975 + + $conf.level + [1] 0.95 + + $method + Wald Confidence Interval with continuity correction + + +--- + + Code + proportion_ci_wald(x_true) + Output + $N + [1] 32 + + $estimate + [1] 1 + + $conf.low + [1] 1 + + $conf.high + [1] 1 + + $conf.level + [1] 0.95 + + $method + Wald Confidence Interval without continuity correction + + +--- + + Code + proportion_ci_wald(x_false) + Output + $N + [1] 32 + + $estimate + [1] 0 + + $conf.low + [1] 0 + + $conf.high + [1] 0 + + $conf.level + [1] 0.95 + + $method + Wald Confidence Interval without continuity correction + + +--- + + Code + clopper_pearson_dbl <- proportion_ci_clopper_pearson(x_dbl, conf.level = 0.9) + +--- + + Code + clopper_pearson_lgl <- proportion_ci_clopper_pearson(x_lgl, conf.level = 0.9) + +--- + + Code + proportion_ci_clopper_pearson(x_rsp, conf.level = 0.95) + Output + $N + [1] 10 + + $conf.level + [1] 0.95 + + $estimate + probability of success + 0.5 + + $statistic + number of successes + 5 + + $p.value + [1] 1 + + $parameter + number of trials + 10 + + $conf.low + [1] 0.187086 + + $conf.high + [1] 0.812914 + + $method + [1] "Clopper-Pearson Confidence Interval" + + $alternative + [1] "two.sided" + + +--- + + Code + proportion_ci_wilson(x_true) + Output + $N + [1] 32 + + $conf.level + [1] 0.95 + + $estimate + p + 1 + + $statistic + X-squared + 32 + + $p.value + [1] 1.541726e-08 + + $parameter + df + 1 + + $conf.low + [1] 0.8928208 + + $conf.high + [1] 1 + + $method + Wilson Confidence Interval without continuity correction + + $alternative + [1] "two.sided" + + +--- + + Code + proportion_ci_wilson(x_false) + Output + $N + [1] 32 + + $conf.level + [1] 0.95 + + $estimate + p + 0 + + $statistic + X-squared + 32 + + $p.value + [1] 1.541726e-08 + + $parameter + df + 1 + + $conf.low + [1] 0 + + $conf.high + [1] 0.1071792 + + $method + Wilson Confidence Interval without continuity correction + + $alternative + [1] "two.sided" + + +--- + + Code + agresti_coull_dbl <- proportion_ci_agresti_coull(x_dbl, conf.level = 0.9) + +--- + + Code + agresti_coull_lgl <- proportion_ci_agresti_coull(x_lgl, conf.level = 0.9) + +--- + + Code + proportion_ci_agresti_coull(x_rsp, conf.level = 0.95) + Output + $N + [1] 10 + + $estimate + [1] 0.5 + + $conf.low + [1] 0.2365931 + + $conf.high + [1] 0.7634069 + + $conf.level + [1] 0.95 + + $method + [1] "Agresti-Coull Confidence Interval" + + +--- + + Code + proportion_ci_agresti_coull(x_true) + Output + $N + [1] 32 + + $estimate + [1] 1 + + $conf.low + [1] 0.8726819 + + $conf.high + [1] 1 + + $conf.level + [1] 0.95 + + $method + [1] "Agresti-Coull Confidence Interval" + + +--- + + Code + proportion_ci_agresti_coull(x_false) + Output + $N + [1] 32 + + $estimate + [1] 0 + + $conf.low + [1] 0 + + $conf.high + [1] 0.1273181 + + $conf.level + [1] 0.95 + + $method + [1] "Agresti-Coull Confidence Interval" + + +--- + + Code + jeffreys_dbl <- proportion_ci_jeffreys(x_dbl, conf.level = 0.9) + +--- + + Code + jeffreys_lgl <- proportion_ci_jeffreys(x_lgl, conf.level = 0.9) + +--- + + Code + proportion_ci_jeffreys(x_rsp, conf.level = 0.95) + Output + $N + [1] 10 + + $estimate + [1] 0.5 + + $conf.low + [1] 0.2235287 + + $conf.high + [1] 0.7764713 + + $conf.level + [1] 0.95 + + $method + Jeffreys Interval + + +--- + + Code + proportion_ci_jeffreys(x_true) + Output + $N + [1] 32 + + $estimate + [1] 1 + + $conf.low + [1] 0.9250722 + + $conf.high + [1] 1 + + $conf.level + [1] 0.95 + + $method + Jeffreys Interval + + +--- + + Code + proportion_ci_jeffreys(x_false) + Output + $N + [1] 32 + + $estimate + [1] 0 + + $conf.low + [1] 0 + + $conf.high + [1] 0.07492776 + + $conf.level + [1] 0.95 + + $method + Jeffreys Interval + + +--- + + Code + proportion_ci_wilson(x_dbl, conf.level = c(0.9, 0.9)) + Condition + Error in `check_range()`: + ! The `conf.level` argument must be length 1. + +--- + + Code + proportion_ci_wilson(mtcars$cyl) + Condition + Error in `proportion_ci_wilson()`: + ! Expecting column `x` to be either or coded as 0 and 1. + +# check the proportion_ci_strat_wilson() function works + + Code + proportion_ci_strat_wilson(x = rsp, strata = strata, weights = weights, + correct = FALSE) + Output + $N + [1] 80 + + $estimate + [1] 0.625 + + $conf.low + [1] 0.4867191 + + $conf.high + [1] 0.7186381 + + $conf.level + [1] 0.95 + + $method + Stratified Wilson Confidence Interval without continuity correction + + +--- + + Code + proportion_ci_strat_wilson(x = rsp, strata = strata, weights = weights, + correct = TRUE) + Output + $N + [1] 80 + + $estimate + [1] 0.625 + + $conf.low + [1] 0.4482566 + + $conf.high + [1] 0.7531474 + + $conf.level + [1] 0.95 + + $method + Stratified Wilson Confidence Interval with continuity correction + + +--- + + Code + proportion_ci_strat_wilson(x = as.numeric(rsp), strata = strata, weights = weights) + Output + $N + [1] 80 + + $estimate + [1] 0.625 + + $conf.low + [1] 0.4867191 + + $conf.high + [1] 0.7186381 + + $conf.level + [1] 0.95 + + $method + Stratified Wilson Confidence Interval without continuity correction + + +--- + + Code + proportion_ci_strat_wilson(x = as.numeric(rsp), strata = strata) + Output + $N + [1] 80 + + $estimate + [1] 0.625 + + $conf.low + [1] 0.5242016 + + $conf.high + [1] 0.7268788 + + $conf.level + [1] 0.95 + + $weights + a.x b.x a.y b.y a.z b.z + 0.2111332 0.1890860 0.1180990 0.1544903 0.1737106 0.1534809 + + $method + Stratified Wilson Confidence Interval without continuity correction + + +--- + + Code + proportion_ci_strat_wilson(x = rep_len(TRUE, length(rsp)), strata = strata, + weights = weights) + Condition + Error in `proportion_ci_strat_wilson()`: + ! All values in `x` argument are either `TRUE` or `FALSE` and CI is not estimable. + +--- + + Code + proportion_ci_strat_wilson(x = rep_len(FALSE, length(rsp)), strata = strata, + weights = weights) + Condition + Error in `proportion_ci_strat_wilson()`: + ! All values in `x` argument are either `TRUE` or `FALSE` and CI is not estimable. + +--- + + Code + proportion_ci_strat_wilson(x = as.numeric(rsp), strata = strata, + max.iterations = -1) + Condition + Error in `proportion_ci_strat_wilson()`: + ! Argument `max.iterations` must be a positive integer. + +--- + + Code + proportion_ci_strat_wilson(x = as.numeric(rsp), strata = strata, + max.iterations = -1) + Condition + Error in `proportion_ci_strat_wilson()`: + ! Argument `max.iterations` must be a positive integer. + +--- + + Code + proportion_ci_strat_wilson(x = as.numeric(rsp), strata = strata, weights = weights + + pi / 5) + Condition + Error in `proportion_ci_strat_wilson()`: + ! The sum of the `weights` argument must be 1 + +--- + + Code + proportion_ci_strat_wilson(x = as.numeric(rsp), strata = strata, weights = weights + + pi) + Condition + Error in `proportion_ci_strat_wilson()`: + ! The `weights` argument must be in the interval `[0, 1]`. + diff --git a/tests/testthat/test-ard_chisqtest.R b/tests/testthat/test-ard_chisqtest.R new file mode 100644 index 000000000..08d37a85a --- /dev/null +++ b/tests/testthat/test-ard_chisqtest.R @@ -0,0 +1,39 @@ +test_that("ard_chisqtest() works", { + expect_error( + ard_chisqtest <- + cards::ADSL |> + ard_chisqtest(by = ARM, variable = AGEGR1), + NA + ) + + expect_equal( + ard_chisqtest |> + cards::get_ard_statistics(stat_name %in% c("statistic", "p.value")), + with(cards::ADSL, chisq.test(AGEGR1, ARM)) |> + broom::tidy() |> + dplyr::select(statistic, p.value) |> + unclass(), + ignore_attr = TRUE + ) +}) + +test_that("shuffle_ard fills missing group levels if the group is meaningful", { + adsl_sub <- cards::ADSL |> dplyr::filter(ARM %in% unique(ARM)[1:2]) + + expect_snapshot( + cards::bind_ard( + ard_chisqtest( + data = adsl_sub, + by = "ARM", + variable = "AGEGR1" + ), + ard_chisqtest( + data = adsl_sub, + by = "SEX", + variable = "AGEGR1" + ) + ) |> + cards::shuffle_ard() |> + as.data.frame() + ) +}) diff --git a/tests/testthat/test-ard_fishertest.R b/tests/testthat/test-ard_fishertest.R new file mode 100644 index 000000000..873a3a69e --- /dev/null +++ b/tests/testthat/test-ard_fishertest.R @@ -0,0 +1,18 @@ +test_that("ard_fishertest() works", { + expect_error( + ard_fishertest <- + cards::ADSL[1:20, ] |> + ard_fishertest(by = ARM, variable = AGEGR1), + NA + ) + + expect_equal( + ard_fishertest |> + cards::get_ard_statistics(stat_name %in% c("p.value", "method")), + with(cards::ADSL[1:20, ], fisher.test(AGEGR1, ARM)) |> + broom::tidy() |> + dplyr::select(p.value, method) |> + unclass(), + ignore_attr = TRUE + ) +}) diff --git a/tests/testthat/test-ard_proportion_ci.R b/tests/testthat/test-ard_proportion_ci.R new file mode 100644 index 000000000..97bb0b272 --- /dev/null +++ b/tests/testthat/test-ard_proportion_ci.R @@ -0,0 +1,67 @@ +test_that("ard_proportion_ci() works", { + # testing the easy methods together + expect_error( + c( + "waldcc", "wald", "clopper-pearson", + "wilson", "wilsoncc", "agresti-coull", "jeffreys" + ) |> + lapply( + \(x) { + ard_proportion_ci( + data = mtcars, + variables = c(am, vs), + method = x + ) + } + ), + NA + ) +}) + +test_that("ard_proportion_ci(method='strat_wilson') works", { + withr::local_seed(1) + rsp <- c( + sample(c(TRUE, FALSE), size = 40, prob = c(3 / 4, 1 / 4), replace = TRUE), + sample(c(TRUE, FALSE), size = 40, prob = c(1 / 2, 1 / 2), replace = TRUE) + ) + grp <- factor(rep(c("A", "B"), each = 40), levels = c("B", "A")) + strata_data <- data.frame( + "f1" = sample(c("a", "b"), 80, TRUE), + "f2" = sample(c("x", "y", "z"), 80, TRUE), + stringsAsFactors = TRUE + ) + + weights <- 1:6 / sum(1:6) + + expect_error( + ard_proportion_ci_strat_wilson <- + ard_proportion_ci( + data = data.frame( + rsp = rsp, + strata = interaction(strata_data) + ), + variables = rsp, + strata = strata, + weights = weights, + method = "strat_wilson" + ), + NA + ) + expect_snapshot(ard_proportion_ci_strat_wilson) + + expect_error( + ard_proportion_ci_strat_wilsoncc <- + ard_proportion_ci( + data = data.frame( + rsp = rsp, + strata = interaction(strata_data) + ), + variables = rsp, + strata = strata, + weights = weights, + method = "strat_wilsoncc" + ), + NA + ) + expect_snapshot(ard_proportion_ci_strat_wilsoncc) +}) diff --git a/tests/testthat/test-ard_regression.R b/tests/testthat/test-ard_regression.R new file mode 100644 index 000000000..c6ed8c548 --- /dev/null +++ b/tests/testthat/test-ard_regression.R @@ -0,0 +1,10 @@ +test_that("ard_regression() works", { + expect_snapshot( + lm(AGE ~ ARM, data = cards::ADSL) |> + ard_regression(add_estimate_to_reference_rows = TRUE) |> + dplyr::mutate( + statistic = lapply(statistic, function(x) ifelse(is.numeric(x), cards::round5(x, 3), x)) + ) |> + as.data.frame() + ) +}) diff --git a/tests/testthat/test-proportion_ci.R b/tests/testthat/test-proportion_ci.R new file mode 100644 index 000000000..5c34eaedb --- /dev/null +++ b/tests/testthat/test-proportion_ci.R @@ -0,0 +1,175 @@ +test_that("check the proportion_ci_*() functions work", { + # setting vectors to test + x_dbl <- c(NA, mtcars$vs) + x_lgl <- as.numeric(x_dbl) + x_rsp <- c(TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE) + x_true <- rep_len(TRUE, 32) + x_false <- rep_len(FALSE, 32) + + # check Wilson CIs ---------------------------------------------------------- + expect_snapshot( + wilson_dbl <- proportion_ci_wilson(x_dbl, conf.level = 0.9, correct = FALSE) + ) + expect_equal( + wilson_dbl[c("conf.low", "conf.high")], + prop.test(x = sum(x_dbl, na.rm = TRUE), n = 32, conf.level = 0.9, correct = FALSE)$conf.int |> + as.list() |> + setNames(c("conf.low", "conf.high")) + ) + + expect_snapshot( + wilsoncc_dbl <- proportion_ci_wilson(x_dbl, conf.level = 0.9, correct = TRUE) + ) + expect_equal( + wilsoncc_dbl[c("conf.low", "conf.high")], + prop.test(x = sum(x_dbl, na.rm = TRUE), n = 32, conf.level = 0.9, correct = TRUE)$conf.int |> + as.list() |> + setNames(c("conf.low", "conf.high")) + ) + + expect_snapshot( + wilson_lgl <- proportion_ci_wilson(x_lgl, conf.level = 0.9, correct = FALSE) + ) + expect_equal( + wilson_lgl[c("conf.low", "conf.high")], + prop.test(x = sum(x_lgl, na.rm = TRUE), n = 32, conf.level = 0.9, correct = FALSE)$conf.int |> + as.list() |> + setNames(c("conf.low", "conf.high")) + ) + + expect_snapshot(proportion_ci_wilson(x_rsp, conf.level = 0.9)) + expect_snapshot(proportion_ci_wilson(x_true)) + expect_snapshot(proportion_ci_wilson(x_false)) + + # check Wald CIs ---------------------------------------------------------- + expect_snapshot( + wald_dbl <- proportion_ci_wald(x_dbl, conf.level = 0.9, correct = FALSE) + ) + expect_snapshot( + waldcc_dbl <- proportion_ci_wald(x_dbl, conf.level = 0.9, correct = TRUE) + ) + expect_snapshot( + wald_lgl <- proportion_ci_wald(x_lgl, conf.level = 0.9, correct = FALSE) + ) + + expect_snapshot(proportion_ci_wald(x_rsp, conf.level = 0.95, correct = TRUE)) + expect_snapshot(proportion_ci_wald(x_true)) + expect_snapshot(proportion_ci_wald(x_false)) + + # check Clopper-Pearson CIs ---------------------------------------------------------- + expect_snapshot( + clopper_pearson_dbl <- proportion_ci_clopper_pearson(x_dbl, conf.level = 0.9) + ) + expect_equal( + clopper_pearson_dbl[c("conf.low", "conf.high")], + binom.test(x = sum(x_dbl, na.rm = TRUE), n = 32, conf.level = 0.9)$conf.int |> + as.list() |> + setNames(c("conf.low", "conf.high")) + ) + + expect_snapshot( + clopper_pearson_lgl <- proportion_ci_clopper_pearson(x_lgl, conf.level = 0.9) + ) + expect_equal( + clopper_pearson_lgl[c("conf.low", "conf.high")], + binom.test(x = sum(x_lgl, na.rm = TRUE), n = 32, conf.level = 0.9)$conf.int |> + as.list() |> + setNames(c("conf.low", "conf.high")) + ) + + expect_snapshot(proportion_ci_clopper_pearson(x_rsp, conf.level = 0.95)) + expect_snapshot(proportion_ci_wilson(x_true)) + expect_snapshot(proportion_ci_wilson(x_false)) + + # check Agresti-Coull CIs ---------------------------------------------------------- + expect_snapshot( + agresti_coull_dbl <- proportion_ci_agresti_coull(x_dbl, conf.level = 0.9) + ) + expect_snapshot( + agresti_coull_lgl <- proportion_ci_agresti_coull(x_lgl, conf.level = 0.9) + ) + + expect_snapshot(proportion_ci_agresti_coull(x_rsp, conf.level = 0.95)) + expect_snapshot(proportion_ci_agresti_coull(x_true)) + expect_snapshot(proportion_ci_agresti_coull(x_false)) + + # check Jeffreys CIs ---------------------------------------------------------- + expect_snapshot( + jeffreys_dbl <- proportion_ci_jeffreys(x_dbl, conf.level = 0.9) + ) + expect_snapshot( + jeffreys_lgl <- proportion_ci_jeffreys(x_lgl, conf.level = 0.9) + ) + + expect_snapshot(proportion_ci_jeffreys(x_rsp, conf.level = 0.95)) + expect_snapshot(proportion_ci_jeffreys(x_true)) + expect_snapshot(proportion_ci_jeffreys(x_false)) + + # error messaging ------------------------------------------------------------ + expect_snapshot( + proportion_ci_wilson(x_dbl, conf.level = c(0.9, 0.9)), + error = TRUE + ) + + expect_snapshot( + error = TRUE, + proportion_ci_wilson(mtcars$cyl) + ) +}) + +test_that("check the proportion_ci_strat_wilson() function works", { + # check Stratified Wilson CIs ---------------------------------------------------------- + withr::local_seed(1) + rsp <- c( + sample(c(TRUE, FALSE), size = 40, prob = c(3 / 4, 1 / 4), replace = TRUE), + sample(c(TRUE, FALSE), size = 40, prob = c(1 / 2, 1 / 2), replace = TRUE) + ) + grp <- factor(rep(c("A", "B"), each = 40), levels = c("B", "A")) + strata_data <- data.frame( + "f1" = sample(c("a", "b"), 80, TRUE), + "f2" = sample(c("x", "y", "z"), 80, TRUE), + stringsAsFactors = TRUE + ) + strata <- interaction(strata_data) + weights <- 1:6 / sum(1:6) + + expect_snapshot( + proportion_ci_strat_wilson(x = rsp, strata = strata, weights = weights, correct = FALSE) + ) + expect_snapshot( + proportion_ci_strat_wilson(x = rsp, strata = strata, weights = weights, correct = TRUE) + ) + expect_snapshot( + proportion_ci_strat_wilson(x = as.numeric(rsp), strata = strata, weights = weights) + ) + expect_snapshot( + proportion_ci_strat_wilson(x = as.numeric(rsp), strata = strata) + ) + + + # checking error messaging + expect_snapshot( + proportion_ci_strat_wilson(x = rep_len(TRUE, length(rsp)), strata = strata, weights = weights), + error = TRUE + ) + expect_snapshot( + proportion_ci_strat_wilson(x = rep_len(FALSE, length(rsp)), strata = strata, weights = weights), + error = TRUE + ) + expect_snapshot( + proportion_ci_strat_wilson(x = as.numeric(rsp), strata = strata, max.iterations = -1), + error = TRUE + ) + expect_snapshot( + proportion_ci_strat_wilson(x = as.numeric(rsp), strata = strata, max.iterations = -1), + error = TRUE + ) + expect_snapshot( + proportion_ci_strat_wilson(x = as.numeric(rsp), strata = strata, weights = weights + pi / 5), + error = TRUE + ) + expect_snapshot( + proportion_ci_strat_wilson(x = as.numeric(rsp), strata = strata, weights = weights + pi), + error = TRUE + ) +})