insightsengineering · ddsjoberg · Mar 2, 2024 · Feb 21, 2024 · Feb 21, 2024 · Feb 21, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -3,6 +3,7 @@ Title: Extra Analysis Results Data Utilities
 Version: 0.0.0.9044
 Authors@R: c(
     person("Daniel", "Sjoberg", , "[email protected]", role = c("aut", "cre")),
+    person("Abinaya", "Yogasekaram", , "[email protected]", role = c("aut")),
     person("F. Hoffmann-La Roche AG", role = c("cph", "fnd"))
   )
 Description: Extra Analysis Results Data (ARD) functions. The package
@@ -23,6 +24,8 @@ Imports:
 Suggests:
     broom (>= 1.0.5),
     broom.helpers (>= 1.13.0),
+    effectsize (>= 0.6.0),
+    parameters (>= 0.20.2),
     spelling,
     testthat (>= 3.2.0),
     withr

diff --git a/NAMESPACE b/NAMESPACE
@@ -5,10 +5,14 @@ export("%>%")
 export(all_of)
 export(any_of)
 export(ard_chisqtest)
+export(ard_cohens_d)
 export(ard_fishertest)
+export(ard_hedges_g)
 export(ard_kruskaltest)
 export(ard_mcnemartest)
 export(ard_moodtest)
+export(ard_paired_cohens_d)
+export(ard_paired_hedges_g)
 export(ard_paired_ttest)
 export(ard_paired_wilcoxtest)
 export(ard_proportion_ci)

diff --git a/R/ard_cohens_d.R b/R/ard_cohens_d.R
@@ -0,0 +1,171 @@
+#' ARD Cohen's D Test
+#'
+#' @description
+#' Analysis results data for paired and non-paired Cohen's D Effect Size Test
+#' using [`effectsize::cohens_d()`].
+#'
+#' @param data (`data.frame`)\cr
+#'   a data frame. See below for details.
+#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
+#'   column name to compare by. Must be a categorical variable with exactly two levels.
+#' @param variable ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
+#'   column name to be compared. Must be a continuous variable.
+#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
+#'   column name of the subject or participant ID
+#' @param ... arguments passed to `effectsize::cohens_d(...)`
+#'
+#' @return ARD data frame
+#' @name ard_cohens_d
+#'
+#' @details
+#' For the `ard_cohens_d()` function, the data is expected to be one row per subject.
+#' The data is passed as `effectsize::cohens_d(data[[variable]]~data[[by]], data, paired = FALSE, ...)`.
+#'
+#' For the `ard_paired_cohens_d()` function, the data is expected to be one row
+#' per subject per by level. Before the effect size is calculated, the data are
+#' reshaped to a wide format to be one row per subject.
+#' The data are then passed as
+#' `effectsize::cohens_d(x = data_wide[[<by level 1>]], y = data_wide[[<by level 2>]], paired = TRUE, ...)`.
+#'
+#' @examples
+#' cards::ADSL |>
+#'   dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) |>
+#'   ard_cohens_d(by = ARM, variable = AGE)
+#'
+#' # constructing a paired data set,
+#' # where patients receive both treatments
+#' cards::ADSL[c("ARM", "AGE")] |>
+#'   dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) |>
+#'   dplyr::mutate(.by = ARM, USUBJID = dplyr::row_number()) |>
+#'   dplyr::arrange(USUBJID, ARM) |>
+#'   dplyr::group_by(USUBJID) |>
+#'   dplyr::filter(dplyr::n() > 1) |>
+#'   ard_paired_cohens_d(by = ARM, variable = AGE, id = USUBJID)
+NULL
+
+#' @rdname ard_cohens_d
+#' @export
+ard_cohens_d <- function(data, by, variable, ...) {
+  # check installed packages ---------------------------------------------------
+  cards::check_pkg_installed("effectsize", reference_pkg = "cardx")
+  cards::check_pkg_installed("parameters", reference_pkg = "cardx")
+
+  # check/process inputs -------------------------------------------------------
+  check_not_missing(data)
+  check_not_missing(variable)
+  check_not_missing(by)
+  check_data_frame(data)
+  data <- dplyr::ungroup(data)
+  cards::process_selectors(data, by = {{ by }}, variable = {{ variable }})
+  check_scalar(by)
+  check_scalar(variable)
+
+  # build ARD ------------------------------------------------------------------
+  .format_cohens_d_results(
+    by = by,
+    variable = variable,
+    lst_tidy =
+      cards::eval_capture_conditions(
+        effectsize::cohens_d(data[[variable]] ~ data[[by]], data = data, paired = FALSE, ...) |>
+          parameters::standardize_names(style = "broom")
+      ),
+    paired = FALSE,
+    ...
+  )
+}
+
+
+#' @rdname ard_cohens_d
+#' @export
+ard_paired_cohens_d <- function(data, by, variable, id, ...) {
+  # check installed packages ---------------------------------------------------
+  cards::check_pkg_installed("effectsize", reference_pkg = "cardx")
+  cards::check_pkg_installed("parameters", reference_pkg = "cardx")
+  # check/process inputs -------------------------------------------------------
+  check_not_missing(data)
+  check_not_missing(variable)
+  check_not_missing(by)
+  check_not_missing(id)
+  check_data_frame(data)
+  data <- dplyr::ungroup(data)
+  cards::process_selectors(data, by = {{ by }}, variable = {{ variable }}, id = {{ id }})
+  check_scalar(by)
+  check_scalar(variable)
+  check_scalar(id)
+
+  # build ARD ------------------------------------------------------------------
+  .format_cohens_d_results(
+    by = by,
+    variable = variable,
+    lst_tidy =
+      cards::eval_capture_conditions({
+        # adding this reshape inside the eval, so if there is an error it's captured in the ARD object
+        data_wide <- .paired_data_pivot_wider(data, by = by, variable = variable, id = id)
+        # perform paired cohen's d test
+        effectsize::cohens_d(x = data_wide[["by1"]], y = data_wide[["by2"]], paired = TRUE, ...) |>
+          parameters::standardize_names(style = "broom")
+      }),
+    paired = TRUE,
+    ...
+  )
+}
+
+.df_effectsize_stat_labels <- function() {
+  dplyr::tribble(
+    ~stat_name, ~stat_label,
+    "estimate", "Effect Size Estimate",
+    "conf.low", "CI Lower Bound",
+    "conf.high", "CI Upper Bound",
+    "conf.level", "CI Confidence Level",
+    "mu", "H0 Mean",
+    "paired", "Paired test",
+    "pooled_sd", "Pooled Standard Deviation",
+    "alternative", "Alternative Hypothesis"
+  )
+}
+
+
+#' Convert Cohen's D Test to ARD
+#'
+#' @inheritParams cards::tidy_as_ard
+#' @inheritParams effectsize::cohens_d
+#' @param by (`string`)\cr by column name
+#' @param variable (`string`)\cr variable column name
+#' @param ... passed to `cohens_d(...)`
+#'
+#' @return ARD data frame
+#' @keywords internal
+#' @examples
+#' cardx:::.format_cohens_d_results(
+#'   by = "ARM",
+#'   variable = "AGE",
+#'   paired = FALSE,
+#'   lst_tidy =
+#'     cards::eval_capture_conditions(
+#'       effectsize::hedges_g(data[[variable]] ~ data[[by]], paired = FALSE) |>
+#'         parameters::standardize_names(style = "broom")
+#'     )
+#' )
+.format_cohens_d_results <- function(by, variable, lst_tidy, paired, ...) {
+  # build ARD ------------------------------------------------------------------
+  ret <-
+    cards::tidy_as_ard(
+      lst_tidy = lst_tidy,
+      tidy_result_names = c(
+        "estimate", "conf.level", "conf.low", "conf.high"
+      ),
+      fun_args_to_record = c("mu", "paired", "pooled_sd", "alternative"),
+      formals = formals(asNamespace("effectsize")[["cohens_d"]]),
+      passed_args = c(list(paired = paired), dots_list(...)),
+      lst_ard_columns = list(group1 = by, variable = variable, context = "cohens_d")
+    )
+
+  # add the stat label ---------------------------------------------------------
+  ret |>
+    dplyr::left_join(
+      .df_effectsize_stat_labels(),
+      by = "stat_name"
+    ) |>
+    dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |>
+    cards::tidy_ard_column_order()
+}
diff --git a/R/ard_hedges_g.R b/R/ard_hedges_g.R
@@ -0,0 +1,154 @@
+#' ARD Hedge's G Test
+#'
+#' @description
+#' Analysis results data for paired and non-paired Hedge's G Effect Size Test
+#' using [`effectsize::hedges_g()`].
+#'
+#' @param data (`data.frame`)\cr
+#'   a data frame. See below for details.
+#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
+#'   column name to compare by. Must be a categorical variable with exactly two levels.
+#' @param variable ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
+#'   column name to be compared. Must be a continuous variable.
+#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
+#'   column name of the subject or participant ID
+#' @param ... arguments passed to `effectsize::hedges_g(...)`
+#'
+#' @return ARD data frame
+#' @name ard_hedges_g
+#'
+#' @details
+#' For the `ard_hedges_g()` function, the data is expected to be one row per subject.
+#' The data is passed as `effectsize::hedges_g(data[[variable]]~data[[by]], data, paired = FALSE, ...)`.
+#'
+#' For the `ard_paired_hedges_g()` function, the data is expected to be one row
+#' per subject per by level. Before the effect size is calculated, the data are
+#' reshaped to a wide format to be one row per subject.
+#' The data are then passed as
+#' `effectsize::hedges_g(x = data_wide[[<by level 1>]], y = data_wide[[<by level 2>]], paired = TRUE, ...)`.
+#'
+#' @examples
+#' cards::ADSL |>
+#'   dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) |>
+#'   ard_hedges_g(by = ARM, variable = AGE)
+#'
+#' # constructing a paired data set,
+#' # where patients receive both treatments
+#' cards::ADSL[c("ARM", "AGE")] |>
+#'   dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) |>
+#'   dplyr::mutate(.by = ARM, USUBJID = dplyr::row_number()) |>
+#'   dplyr::arrange(USUBJID, ARM) |>
+#'   dplyr::group_by(USUBJID) |>
+#'   dplyr::filter(dplyr::n() > 1) |>
+#'   ard_paired_hedges_g(by = ARM, variable = AGE, id = USUBJID)
+NULL
+
+#' @rdname ard_hedges_g
+#' @export
+ard_hedges_g <- function(data, by, variable, ...) {
+  # check installed packages ---------------------------------------------------
+  cards::check_pkg_installed("effectsize", reference_pkg = "cardx")
+  cards::check_pkg_installed("parameters", reference_pkg = "cardx")
+
+  # check/process inputs -------------------------------------------------------
+  check_not_missing(data)
+  check_data_frame(data)
+  data <- dplyr::ungroup(data)
+  cards::process_selectors(data, by = {{ by }}, variable = {{ variable }})
+  check_scalar(by)
+  check_scalar(variable)
+
+  # build ARD ------------------------------------------------------------------
+  .format_hedges_g_results(
+    by = by,
+    variable = variable,
+    lst_tidy =
+      cards::eval_capture_conditions(
+        effectsize::hedges_g(data[[variable]] ~ data[[by]], paired = FALSE, ...) |>
+          parameters::standardize_names(style = "broom")
+      ),
+    paired = FALSE,
+    ...
+  )
+}
+
+#' @rdname ard_hedges_g
+#' @export
+ard_paired_hedges_g <- function(data, by, variable, id, ...) {
+  # check installed packages ---------------------------------------------------
+  cards::check_pkg_installed("effectsize", reference_pkg = "cardx")
+  cards::check_pkg_installed("parameters", reference_pkg = "cardx")
+
+  # check/process inputs -------------------------------------------------------
+  check_not_missing(data)
+  check_not_missing(variable)
+  check_not_missing(by)
+  check_not_missing(id)
+  check_data_frame(data)
+  data <- dplyr::ungroup(data)
+  cards::process_selectors(data, by = {{ by }}, variable = {{ variable }}, id = {{ id }})
+  check_scalar(by)
+  check_scalar(variable)
+  check_scalar(id)
+
+  # build ARD ------------------------------------------------------------------
+  .format_hedges_g_results(
+    by = by,
+    variable = variable,
+    lst_tidy =
+      cards::eval_capture_conditions({
+        # adding this reshape inside the eval, so if there is an error it's captured in the ARD object
+        data_wide <- .paired_data_pivot_wider(data, by = by, variable = variable, id = id)
+        # perform paired cohen's d test
+        effectsize::hedges_g(x = data_wide[["by1"]], y = data_wide[["by2"]], paired = TRUE, ...) |>
+          parameters::standardize_names(style = "broom")
+      }),
+    paired = TRUE,
+    ...
+  )
+}
+
+#' Convert Hedge's G Test to ARD
+#'
+#' @inheritParams cards::tidy_as_ard
+#' @inheritParams effectsize::hedges_g
+#' @param by (`string`)\cr by column name
+#' @param variable (`string`)\cr variable column name
+#' @param ... passed to `hedges_g(...)`
+#'
+#' @return ARD data frame
+#' @keywords internal
+#' @examples
+#' cardx:::.format_hedges_g_results(
+#'   by = "ARM",
+#'   variable = "AGE",
+#'   paired = FALSE,
+#'   lst_tidy =
+#'     cards::eval_capture_conditions(
+#'       effectsize::hedges_g(data[[variable]] ~ data[[by]], paired = FALSE) |>
+#'         parameters::standardize_names(style = "broom")
+#'     )
+#' )
+.format_hedges_g_results <- function(by, variable, lst_tidy, paired, ...) {
+  # build ARD ------------------------------------------------------------------
+  ret <-
+    cards::tidy_as_ard(
+      lst_tidy = lst_tidy,
+      tidy_result_names = c(
+        "estimate", "conf.level", "conf.low", "conf.high"
+      ),
+      fun_args_to_record = c("mu", "paired", "pooled_sd", "alternative"),
+      formals = formals(asNamespace("effectsize")[["hedges_g"]]),
+      passed_args = c(list(paired = paired), dots_list(...)),
+      lst_ard_columns = list(group1 = by, variable = variable, context = "hedges_g")
+    )
+
+  # add the stat label ---------------------------------------------------------
+  ret |>
+    dplyr::left_join(
+      .df_effectsize_stat_labels(),
+      by = "stat_name"
+    ) |>
+    dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |>
+    cards::tidy_ard_column_order()
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -34,6 +34,8 @@ reference:
 
   - subtitle: "Estimation"
   - contents:
+      - ard_cohens_d
+      - ard_hedges_g
       - ard_proportion_ci
       - ard_regression
       - ard_regression_basic