Merge branch 'main' into 103-require-new-version-of-effectsize

insightsengineering · May 31, 2024 · 63a2f17 · 63a2f17
2 parents 3ca1b74 + 19c0ee8
commit 63a2f17
Show file tree

Hide file tree

Showing 53 changed files with 1,500 additions and 254 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: cardx
 Title: Extra Analysis Results Data Utilities
-Version: 0.1.0.9042
+Version: 0.1.0.9051
 Authors@R: c(
     person("Daniel", "Sjoberg", , "[email protected]", role = c("aut", "cre")),
     person("Abinaya", "Yogasekaram", , "[email protected]", role = "aut"),
@@ -18,7 +18,7 @@ BugReports: https://github.com/insightsengineering/cardx/issues
 Depends:
     R (>= 4.1)
 Imports:
-    cards (>= 0.1.0.9014),
+    cards (>= 0.1.0.9032),
     cli (>= 3.6.1),
     dplyr (>= 1.1.2),
     glue (>= 1.6.2),
@@ -33,13 +33,13 @@ Suggests:
     effectsize (>= 0.8.8),
     emmeans (>= 1.7.3),
     geepack (>= 1.3.2),
-    ggsurvfit (>= 1.0.0),
+    ggsurvfit (>= 1.1.0),
     lme4 (>= 1.1-31),
     parameters (>= 0.20.2),
     smd (>= 0.6.6),
     spelling,
     survey (>= 4.1),
-    survival (>= 3.2-11),
+    survival (>= 3.6-4),
     testthat (>= 3.2.0),
     withr (>= 2.5.0)
 Remotes: 

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(ard_attributes,survey.design)
+S3method(ard_continuous,survey.design)
 S3method(ard_regression,default)
 S3method(ard_stats_anova,anova)
 S3method(ard_stats_anova,data.frame)
@@ -9,13 +11,18 @@ export("%>%")
 export(all_of)
 export(any_of)
 export(ard_aod_wald_test)
+export(ard_attributes)
 export(ard_car_anova)
 export(ard_car_vif)
+export(ard_categorical)
+export(ard_continuous)
+export(ard_dichotomous)
 export(ard_effectsize_cohens_d)
 export(ard_effectsize_hedges_g)
 export(ard_effectsize_paired_cohens_d)
 export(ard_effectsize_paired_hedges_g)
 export(ard_emmeans_mean_difference)
+export(ard_missing)
 export(ard_proportion_ci)
 export(ard_regression)
 export(ard_regression_basic)
@@ -33,19 +40,22 @@ export(ard_stats_paired_t_test)
 export(ard_stats_paired_wilcox_test)
 export(ard_stats_prop_test)
 export(ard_stats_t_test)
+export(ard_stats_t_test_onesample)
 export(ard_stats_wilcox_test)
+export(ard_stats_wilcox_test_onesample)
 export(ard_survey_svychisq)
-export(ard_survey_svycontinuous)
 export(ard_survey_svyranktest)
 export(ard_survey_svyttest)
 export(ard_survival_survdiff)
 export(ard_survival_survfit)
+export(ard_survival_survfit_diff)
 export(bt)
 export(bt_strip)
 export(construct_model)
 export(contains)
 export(ends_with)
 export(everything)
+export(is_binary)
 export(last_col)
 export(matches)
 export(num_range)
@@ -60,6 +70,11 @@ export(reformulate2)
 export(starts_with)
 export(where)
 import(rlang)
+importFrom(cards,ard_attributes)
+importFrom(cards,ard_categorical)
+importFrom(cards,ard_continuous)
+importFrom(cards,ard_dichotomous)
+importFrom(cards,ard_missing)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,across)
 importFrom(dplyr,all_of)

diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,4 @@
-# cardx 0.1.0.9042
+# cardx 0.1.0.9051
 
 ### Breaking Changes
 
@@ -18,32 +18,38 @@ ard_moodtest()          -> ard_stats_mood_test()
 
 ### New Features
 
+* The `ard_proportion_ci(value)` argument has been added. Previously, only binary variables (0/1 or TRUE/FALSE) could be summarized. When a value is not supplied, each level of the variable is summarized independently. By default, binary variables will have the 1/TRUE level summarized.
+
 * Added the following functions for calculating Analysis Results Data (ARD).
   - `ard_stats_aov()` for calculating ANOVA results using `stats::aov()`. (#3)
   - `ard_stats_anova()` for calculating ANOVA results using `stats::anova()`. (#12) 
   - `ard_stats_mcnemar_test_long()` for McNemar's test from long data using `stats::mcnemar.test()`. 
   - `ard_aod_wald_test()` for calculating Wald Tests for regression models using `aod::wald.test()`. (#84)
   - `ard_car_anova()` for calculating ANOVA results using `car::Anova()`. (#3)
+  - `ard_car_vif()` for calculating the variance inflation factor using `car::vif()`. (#10)
   - `ard_stats_oneway_test()` for calculating ANOVA results using `stats::oneway.test()`. (#3)
   - `ard_effectsize_cohens_d()`, `ard_effectsize_paired_cohens_d()`, `ard_effectsize_hedges_g()`, and `ard_effectsize_paired_hedges_g()` for standardized differences using `effectsize::cohens_d()` and `effectsize::hedges_g()`. (#50)
-  - `ard_stats_prop_test()` for tests of proportions using `stats::prop.test()`. (#64)
-  - `ard_regression_basic()` for basic regression models. The function focuses on matching terms to underlying variables names. (#46)
+  - `ard_emmeans_mean_difference()` for calculating the least-squares mean differences using the {emmeans} package. (#34)
   - `ard_smd_smd()` for calculating standardized mean differences using `smd::smd()`. (#4)
   - `ard_survival_survfit()` for survival analyses using `survival::survfit()`. (#43)
-  - `ard_survey_svycontinuous()` for calculating univariate summary statistics from weighted/survey data using many functions from the {survey} package. (#68)
+  - `ard_continuous.survey.design()` for calculating univariate summary statistics from weighted/survey data using many functions from the {survey} package. (#68)
+  - `ard_attributes.survey.design()` for summarizing labels and attributes from weighted/survey data using many functions from the {survey} package.
   - `ard_survey_svychisq()` for weighted/survey chi-squared test using `survey::svychisq()`. (#72)
   - `ard_survey_svyttest()` for weighted/survey t-tests using `survey::svyttest()`. (#70)
   - `ard_survey_svyranktest()` for weighted/survey rank tests using `survey::svyranktest()`. (#71)
-  - `ard_car_vif()` for calculating the variance inflation factor using `car::vif()`. (#10)
-  - `ard_emmeans_mean_difference()` for calculating the least-squares mean differences using the {emmeans} package. (#34)
+  - `ard_survival_survdiff()` for creating results from `survival::survdiff()`. (#113)
+  - `ard_stats_prop_test()` for tests of proportions using `stats::prop.test()`. (#64)
+  - `ard_stats_t_test_onesample()` for calculating one-sample results.
+  - `ard_stats_wilcox_test_onesample()` for calculating one-sample results.
+  - `ard_regression_basic()` for basic regression models. The function focuses on matching terms to underlying variables names. (#46)
 
 * Updated functions `ard_stats_t_test()`, `ard_stats_paired_t_test()`, `ard_stats_wilcox_test()`, `ard_stats_paired_wilcox_test()`, `ard_stats_chisq_test()`, `ard_stats_fisher_test()`, `ard_stats_kruskal_test()`, `ard_stats_mcnemar_test()`, and `ard_stats_mood_test()` to accept multiple variables at once. Independent tests are calculated for each variable. The `variable` argument is renamed to `variables`. (#77)
 
 * Updated `ard_stats_t_test()` and `ard_stats_wilcox_test()` to no longer require the `by` argument, which yields central estimates with their confidence intervals. (#82)
 
 * Imported cli call environment functions from `https://github.com/ddsjoberg/standalone/blob/main/R/standalone-cli_call_env.R` and implemented `set_cli_abort_call` in user-facing functions. (#111)
 
-* Added `ard_survival_survdiff()` for creating results from `survival::survdiff()`. (#113)
+* Added model construction helpers, `construct_model()`, `reformulate2()`, `bt()`, and `bt_strip()`.
 
 # cardx 0.1.0
 

diff --git a/R/ard_attributes.survey.design.R b/R/ard_attributes.survey.design.R
@@ -0,0 +1,37 @@
+#' ARD Attributes
+#'
+#' @description
+#' Add variable attributes to an ARD data frame.
+#' - The `label` attribute will be added for all columns, and when no label
+#'   is specified and no label has been set for a column using the `label=` argument,
+#'   the column name will be placed in the label statistic.
+#' - The `class` attribute will also be returned for all columns.
+#' - Any other attribute returned by `attributes()` will also be added, e.g. factor levels.
+#'
+#' @rdname ard_attributes
+#' @param data (`survey.design`)\cr
+#'   a design object often created with [`survey::svydesign()`].
+#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
+#'   variables to include
+#' @param label (named `list`)\cr
+#'   named list of variable labels, e.g. `list(cyl = "No. Cylinders")`.
+#'   Default is `NULL`
+#' @inheritParams rlang::args_dots_empty
+#'
+#' @return an ARD data frame of class 'card'
+#' @export
+#'
+#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "survey", reference_pkg = "cardx"))
+#' data(api, package = "survey")
+#' dclus1 <- survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
+#'
+#' ard_attributes(
+#'   data = dclus1,
+#'   variables = c(sname, dname),
+#'   label = list(sname = "School Name", dname = "District Name")
+#' )
+ard_attributes.survey.design <- function(data, variables = everything(), label = NULL, ...) {
+  set_cli_abort_call()
+
+  cards::ard_attributes(data = data[["variables"]], variables = {{ variables }}, label = label, ...)
+}
diff --git a/R/ard_survey_svycontinuous.R → R/ard_continuous.survey.design.R b/R/ard_survey_svycontinuous.R → R/ard_continuous.survey.design.R
@@ -23,6 +23,7 @@
 #'   the list element is either a named list or a list of formulas defining the
 #'   statistic labels, e.g. `everything() ~ list(mean = "Mean", sd = "SD")` or
 #'   `everything() ~ list(mean ~ "Mean", sd ~ "SD")`.
+#' @inheritParams rlang::args_dots_empty
 #'
 #' @section statistic argument:
 #'
@@ -38,16 +39,18 @@
 #' data(api, package = "survey")
 #' dclus1 <- survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
 #'
-#' ard_survey_svycontinuous(
+#' ard_continuous(
 #'   data = dclus1,
 #'   variables = api00,
 #'   by = stype
 #' )
-ard_survey_svycontinuous <- function(data, variables, by = NULL,
-                                     statistic = everything() ~ c("median", "p25", "p75"),
-                                     fmt_fn = NULL,
-                                     stat_label = NULL) {
+ard_continuous.survey.design <- function(data, variables, by = NULL,
+                                         statistic = everything() ~ c("median", "p25", "p75"),
+                                         fmt_fn = NULL,
+                                         stat_label = NULL,
+                                         ...) {
   set_cli_abort_call()
+  check_dots_empty()
 
   # check installed packages ---------------------------------------------------
   check_pkg_installed(pkg = "survey", reference_pkg = "cardx")
@@ -68,7 +71,7 @@ ard_survey_svycontinuous <- function(data, variables, by = NULL,
   )
   cards::fill_formula_selectors(
     data$variables[variables],
-    statistic = formals(ard_survey_svycontinuous)[["statistic"]] |> eval()
+    statistic = formals(asNamespace("cardx")[["ard_continuous.survey.design"]])[["statistic"]] |> eval()
   )
   cards::check_list_elements(
     x = statistic,
@@ -190,7 +193,7 @@ accepted_svy_stats <- function(expand_quantiles = TRUE) {
   else if (stat_name %in% "max") args <- list(FUN = \(x, design, na.rm, ...) max(design$variables[[all.vars(x)]], na.rm = na.rm))
   # define functions for the quantiles
   else if (stat_name %in% c("median", paste0("p", 0:100))) {
-    quantile <- ifelse(stat_name %in% "median", 0.5, substr(stat_name, 2, nchar(stat_name)) |> as.numeric() %>% `/`(100))
+    quantile <- ifelse(stat_name %in% "median", 0.5, as.numeric(substr(stat_name, 2, nchar(stat_name))) / 100)
     # univariate results are returned in a different format from stratified.
     args <-
       if (is_empty(by)) list(FUN = \(...) survey::svyquantile(...)[[1]], quantiles = quantile)

diff --git a/R/ard_emmeans_mean_difference.R b/R/ard_emmeans_mean_difference.R
@@ -70,7 +70,7 @@ ard_emmeans_mean_difference <- function(data, formula, method,
   # construct primary model ----------------------------------------------------
   mod <-
     construct_model(
-      x = data, formula = formula, method = method,
+      data = data, formula = formula, method = method,
       method.args = {{ method.args }},
       package = package, env = caller_env()
     )

diff --git a/R/ard_proportion_ci.R b/R/ard_proportion_ci.R
@@ -18,23 +18,35 @@
 #'   See `?proportion_ci` for details.
 #' @param strata,weights,max.iterations arguments passed to `proportion_ci_strat_wilson()`,
 #'   when `method='strat_wilson'`
+#' @param value ([`formula-list-selector`][syntax])\cr
+#'   function will calculate the CIs for all levels of the variables specified.
+#'   Use this argument to instead request only a single level by summarized.
+#'   Default is `list(where(is_binary) ~ 1L, where(is.logical) ~ TRUE)`, where
+#'   columns coded as `0`/`1` and `TRUE`/`FALSE` will summarize the `1` and `TRUE` levels.
 #'
 #' @return an ARD data frame
 #' @export
 #'
 #' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom", reference_pkg = "cardx"))
+#' # compute CI for binary variables
 #' ard_proportion_ci(mtcars, variables = c(vs, am), method = "wilson")
-ard_proportion_ci <- function(data, variables, by = dplyr::group_vars(data),
-                              conf.level = 0.95,
-                              strata,
-                              weights = NULL,
-                              max.iterations = 10,
+#'
+#' # compute CIs for each level of a categorical variable
+#' ard_proportion_ci(mtcars, variables = cyl, method = "jeffreys")
+ard_proportion_ci <- function(data,
+                              variables,
+                              by = dplyr::group_vars(data),
                               method = c(
                                 "waldcc", "wald", "clopper-pearson",
                                 "wilson", "wilsoncc",
                                 "strat_wilson", "strat_wilsoncc",
                                 "agresti-coull", "jeffreys"
-                              )) {
+                              ),
+                              conf.level = 0.95,
+                              value = list(where(is_binary) ~ 1L, where(is.logical) ~ TRUE),
+                              strata = NULL,
+                              weights = NULL,
+                              max.iterations = 10) {
   set_cli_abort_call()
 
   # check installed packages ---------------------------------------------------
@@ -47,8 +59,43 @@ ard_proportion_ci <- function(data, variables, by = dplyr::group_vars(data),
     cards::process_selectors(data, strata = strata)
     check_scalar(strata)
   }
+  cards::process_formula_selectors(
+    data[variables],
+    value = value
+  )
 
   # calculate confidence intervals ---------------------------------------------
+  map(
+    variables,
+    function(variable) {
+      levels <- .unique_values_sort(data, variable = variable, value = value[[variable]])
+
+      .calculate_ard_proportion(
+        data = .as_dummy(data, variable = variable, levels = levels, by = by, strata = strata),
+        variables = c(everything(), -all_of(c(by, strata))),
+        by = all_of(by),
+        method = method,
+        conf.level = conf.level,
+        strata = strata,
+        weights = weights,
+        max.iterations = max.iterations
+      ) %>%
+        # merge in the variable levels
+        dplyr::left_join(
+          dplyr::select(., "variable") |>
+            dplyr::distinct() |>
+            dplyr::mutate(variable_level = as.list(.env$levels)),
+          by = "variable"
+        ) |>
+        # rename variable column
+        dplyr::mutate(variable = .env$variable) |>
+        dplyr::relocate("variable_level", .after = "variable")
+    }
+  ) |>
+    dplyr::bind_rows()
+}
+
+.calculate_ard_proportion <- function(data, variables, by, method, conf.level, strata, weights, max.iterations) {
   cards::ard_complex(
     data = data,
     variables = {{ variables }},
@@ -85,3 +132,35 @@ ard_proportion_ci <- function(data, variables, by = dplyr::group_vars(data),
       context = "proportion_ci"
     )
 }
+
+.unique_values_sort <- function(data, variable, value = NULL) {
+  unique_levels <-
+    # styler: off
+    if (is.logical(data[[variable]])) c(TRUE, FALSE)
+    else if (is.factor(data[[variable]])) factor(levels(data[[variable]]), levels = levels(data[[variable]]))
+    else unique(data[[variable]]) |> sort()
+  # styler: on
+
+  if (!is_empty(value) && !value %in% unique_levels) {
+    cli::cli_warn(
+      c("A value of {.code value={.val {value}}} for variable {.val {variable}}
+         was passed, but is not one of the observed levels: {.val {unique_levels}}.",
+        i = "This may be an error.",
+        i = "If value is a valid, convert variable to factor with all levels specified to avoid this message."
+      )
+    )
+  }
+  if (!is_empty(value)) {
+    unique_levels <- value
+  }
+
+  unique_levels
+}
+
+.as_dummy <- function(data, variable, levels, by, strata) {
+  # define dummy variables and return tibble
+  map(levels, ~ data[[variable]] == .x) |>
+    set_names(paste0("this_is_not_a_column_name_anyone_would_choose_", variable, "_", levels, "...")) %>%
+    {dplyr::tibble(!!!.)} |> # styler: off
+    dplyr::bind_cols(data[c(by, strata)])
+}
diff --git a/R/ard_stats_anova.R b/R/ard_stats_anova.R
@@ -122,7 +122,7 @@ ard_stats_anova.data.frame <- function(x,
         lapply(
           formulas,
           function(formula) {
-            construct_model(x = x, formula = formula, method = method, method.args = {{ method.args }}, package = package)
+            construct_model(data = x, formula = formula, method = method, method.args = {{ method.args }}, package = package)
           }
         )