From 1a585990e71296e60b5b9805eba3408d2aaeac2f Mon Sep 17 00:00:00 2001
From: Daniel Sjoberg <danield.sjoberg@gmail.com>
Date: Sat, 20 Apr 2024 16:18:26 -0700
Subject: [PATCH] Adding `ard_emmeans_mean_difference()` (#130)

**What changes are proposed in this pull request?**
* Adding `ard_emmeans_mean_difference()`. (#34)

Provide more detail here as needed.

**Reference GitHub issue associated with pull request.** _e.g., 'closes
#<issue number>'_
closes #34


--------------------------------------------------------------------------------

Pre-review Checklist (if item does not apply, mark is as complete)
- [ ] **All** GitHub Action workflows pass with a :white_check_mark:
- [ ] PR branch has pulled the most recent updates from master branch:
`usethis::pr_merge_main()`
- [ ] If a bug was fixed, a unit test was added.
- [ ] If a new `ard_*()` function was added, it passes the ARD
structural checks from `cards::check_ard_structure()`.
- [ ] If a new `ard_*()` function was added, `set_cli_abort_call()` has
been set.
- [ ] If a new `ard_*()` function was added and it depends on another
package (such as, `broom`), `is_pkg_installed("broom", reference_pkg =
"cardx")` has been set in the function call and the following added to
the roxygen comments: `@examplesIf
do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"",
reference_pkg = "cardx"))`
- [ ] Code coverage is suitable for any new functions/features
(generally, 100% coverage for new code): `devtools::test_coverage()`

Reviewer Checklist (if item does not apply, mark is as complete)

- [ ] If a bug was fixed, a unit test was added.
- [ ] Code coverage is suitable for any new functions/features:
`devtools::test_coverage()`

When the branch is ready to be merged:
- [ ] Update `NEWS.md` with the changes from this pull request under the
heading "`# cardx (development version)`". If there is an issue
associated with the pull request, reference it in parentheses at the end
update (see `NEWS.md` for examples).
- [ ] **All** GitHub Action workflows pass with a :white_check_mark:
- [ ] Approve Pull Request
- [ ] Merge the PR. Please use "Squash and merge" or "Rebase and merge".
---
 .github/workflows/R-CMD-check.yaml            |   2 +-
 DESCRIPTION                                   |   1 +
 NAMESPACE                                     |   1 +
 NEWS.md                                       |   1 +
 R/ard_emmeans_mean_difference.R               | 134 ++++++++++++++++++
 _pkgdown.yml                                  |   1 +
 inst/WORDLIST                                 |   1 +
 man/ard_emmeans_mean_difference.Rd            |  80 +++++++++++
 .../test-ard_emmeans_mean_difference.R        |  86 +++++++++++
 9 files changed, 306 insertions(+), 1 deletion(-)
 create mode 100644 R/ard_emmeans_mean_difference.R
 create mode 100644 man/ard_emmeans_mean_difference.Rd
 create mode 100644 tests/testthat/test-ard_emmeans_mean_difference.R
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 50de31520..a2dee80c7 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -29,7 +29,7 @@ jobs:
 
           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
           - {os: ubuntu-latest,   r: 'release'}
-          - {os: ubuntu-latest,   r: 'oldrel-1'}
+          # - {os: ubuntu-latest,   r: 'oldrel-1'}
           # - {os: ubuntu-latest,   r: 'oldrel-2'}
           # - {os: ubuntu-latest,   r: 'oldrel-3'}
           # - {os: ubuntu-latest,   r: 'oldrel-4'}
diff --git a/DESCRIPTION b/DESCRIPTION
index 8dca0f8a9..760f487f5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -30,6 +30,7 @@ Suggests:
     broom.helpers (>= 1.15.0),
     car (>= 3.0-11),
     effectsize (>= 0.6.0),
+    emmeans (>= 1.7.3),
     geepack (>= 1.3.2),
     ggsurvfit (>= 1.0.0),
     lme4 (>= 1.1-31),
diff --git a/NAMESPACE b/NAMESPACE
index b971519aa..d1e1abdba 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -15,6 +15,7 @@ export(ard_effectsize_cohens_d)
 export(ard_effectsize_hedges_g)
 export(ard_effectsize_paired_cohens_d)
 export(ard_effectsize_paired_hedges_g)
+export(ard_emmeans_mean_difference)
 export(ard_proportion_ci)
 export(ard_regression)
 export(ard_regression_basic)
diff --git a/NEWS.md b/NEWS.md
index 16b19cd94..4a5ce8978 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -35,6 +35,7 @@ ard_moodtest()          -> ard_stats_mood_test()
   - `ard_survey_svyttest()` for weighted/survey t-tests using `survey::svyttest()`. (#70)
   - `ard_survey_svyranktest()` for weighted/survey rank tests using `survey::svyranktest()`. (#71)
   - `ard_car_vif()` for calculating the variance inflation factor using `car::vif()`. (#10)
+  - `ard_emmeans_mean_difference()` for calculating the least-squares mean differences using the {emmeans} package. (#34)
 
 * Updated functions `ard_stats_t_test()`, `ard_stats_paired_t_test()`, `ard_stats_wilcox_test()`, `ard_stats_paired_wilcox_test()`, `ard_stats_chisq_test()`, `ard_stats_fisher_test()`, `ard_stats_kruskal_test()`, `ard_stats_mcnemar_test()`, and `ard_stats_mood_test()` to accept multiple variables at once. Independent tests are calculated for each variable. The `variable` argument is renamed to `variables`. (#77)
 
diff --git a/R/ard_emmeans_mean_difference.R b/R/ard_emmeans_mean_difference.R
new file mode 100644
index 000000000..b4e2fa5e9
--- /dev/null
+++ b/R/ard_emmeans_mean_difference.R
@@ -0,0 +1,134 @@
+#' ARD for LS Mean Difference
+#'
+#' @description
+#' This function calculates least-squares mean differences using the 'emmeans'
+#' package using the following
+#'
+#' ```r
+#' emmeans::emmeans(object = <regression model>, specs = ~ <primary covariate>) |>
+#'   emmeans::contrast(method = "pairwise") |>
+#'   summary(infer = TRUE, level = <confidence level>)
+#' ```
+#'
+#' The arguments `data`, `formula`, `method`, `method.args`, `package` are used
+#' to construct the regression model via `cardx::construct_model()`.
+#'
+#' @param data (`data.frame`/`survey.design`)\cr
+#'   a data frame or survey design object
+#' @inheritParams construct_model
+#' @param response_type (`string`)
+#'   string indicating whether the model outcome is `'continuous'`
+#'   or `'binary'`. When `'binary'`, the call to `emmeans::emmeans()` is
+#'   supplemented with argument `regrid="response"`.
+#' @param conf.level (scalar `numeric`)\cr
+#'   confidence level for confidence interval. Default is `0.95`.
+#' @param primary_covariate (`string`)\cr
+#'   string indicating the primary covariate (typically the dichotomous treatment variable).
+#'   Default is the first covariate listed in the formula.
+#'
+#' @return ARD data frame
+#' @export
+#'
+#' @examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "emmeans", reference_pkg = "cardx"))
+#' ard_emmeans_mean_difference(
+#'   data = mtcars,
+#'   formula = mpg ~ am + cyl,
+#'   method = "lm"
+#' )
+#'
+#' ard_emmeans_mean_difference(
+#'   data = mtcars,
+#'   formula = vs ~ am + mpg,
+#'   method = "glm",
+#'   method.args = list(family = binomial),
+#'   response_type = "binary"
+#' )
+ard_emmeans_mean_difference <- function(data, formula, method,
+                                        method.args = list(),
+                                        package = "base",
+                                        response_type = c("continuous", "binary"),
+                                        conf.level = 0.95,
+                                        primary_covariate =
+                                          stats::terms(formula) |>
+                                            attr("term.labels") |>
+                                            getElement(1L)) {
+  set_cli_abort_call()
+
+  # check package installation -------------------------------------------------
+  check_pkg_installed(c("emmeans", package), reference_pkg = "cardx")
+  check_not_missing(data)
+  check_not_missing(formula)
+  check_not_missing(method)
+  check_class(data, c("data.frame", "survey.design"))
+  check_class(formula, cls = "formula")
+  check_string(package)
+  check_string(primary_covariate)
+  check_scalar(conf.level)
+  check_range(conf.level, range = c(0, 1))
+  response_type <- arg_match(response_type, error_call = get_cli_abort_call())
+
+  # construct primary model ----------------------------------------------------
+  mod <-
+    construct_model(
+      x = data, formula = formula, method = method,
+      method.args = {{ method.args }},
+      package = package, env = caller_env()
+    )
+
+  # emmeans --------------------------------------------------------------------
+  emmeans_args <- list(object = mod, specs = reformulate2(primary_covariate))
+  if (response_type %in% "binary") emmeans_args <- c(emmeans_args, list(regrid = "response"))
+  emmeans <-
+    withr::with_namespace(
+      package = "emmeans",
+      code = do.call("emmeans", args = emmeans_args)
+    )
+
+  df_results <-
+    emmeans |>
+    emmeans::contrast(method = "pairwise") |>
+    summary(infer = TRUE, level = conf.level)
+
+  # convert results to ARD format ----------------------------------------------
+  df_results |>
+    dplyr::as_tibble() |>
+    dplyr::rename(
+      conf.low = any_of("asymp.LCL"),
+      conf.high = any_of("asymp.UCL"),
+      conf.low = any_of("lower.CL"),
+      conf.high = any_of("upper.CL")
+    ) %>%
+    dplyr::select(
+      variable_level = "contrast",
+      "estimate",
+      std.error = "SE", "df",
+      "conf.low", "conf.high", "p.value"
+    ) %>%
+    dplyr::mutate(
+      conf.level = .env$conf.level,
+      method =
+        ifelse(
+          length(attr(stats::terms(formula), "term.labels") |> discard(~ startsWith(., "1 |"))) == 1L,
+          "Least-squares mean difference",
+          "Least-squares adjusted mean difference"
+        ),
+      across(everything(), as.list),
+      variable = "contrast",
+      group1 = .env$primary_covariate
+    ) |>
+    tidyr::pivot_longer(
+      cols = -c("group1", "variable", "variable_level"),
+      names_to = "stat_name",
+      values_to = "stat"
+    ) |>
+    dplyr::left_join(.df_ttest_stat_labels(primary_covariate), by = "stat_name") |>
+    dplyr::mutate(
+      context = "emmeans_mean_difference",
+      stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name),
+      warning = list(NULL),
+      error = list(NULL),
+      fmt_fn = map(.data$stat, \(.x) if (is.numeric(.x)) 1L else NULL) # styler: off
+    ) |>
+    cards::tidy_ard_column_order() %>%
+    {structure(., class = c("card", class(.)))} # styler: off
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 9dace7a4a..e22a3e83d 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -47,6 +47,7 @@ reference:
       - ard_car_vif
       - ard_effectsize_cohens_d
       - ard_effectsize_hedges_g
+      - ard_emmeans_mean_difference
       - ard_proportion_ci
       - ard_regression
       - ard_regression_basic
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 9ffe51b9f..073fb6ff4 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -24,6 +24,7 @@ clopper
 coull
 de
 deff
+emmeans
 funder
 jeffreys
 pearson
diff --git a/man/ard_emmeans_mean_difference.Rd b/man/ard_emmeans_mean_difference.Rd
new file mode 100644
index 000000000..2a7c9ffa9
--- /dev/null
+++ b/man/ard_emmeans_mean_difference.Rd
@@ -0,0 +1,80 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ard_emmeans_mean_difference.R
+\name{ard_emmeans_mean_difference}
+\alias{ard_emmeans_mean_difference}
+\title{ARD for LS Mean Difference}
+\usage{
+ard_emmeans_mean_difference(
+  data,
+  formula,
+  method,
+  method.args = list(),
+  package = "base",
+  response_type = c("continuous", "binary"),
+  conf.level = 0.95,
+  primary_covariate = getElement(attr(stats::terms(formula), "term.labels"), 1L)
+)
+}
+\arguments{
+\item{data}{(\code{data.frame}/\code{survey.design})\cr
+a data frame or survey design object}
+
+\item{formula}{(\code{formula})\cr
+a formula}
+
+\item{method}{(\code{string})\cr
+string naming the function to be called, e.g. \code{"glm"}.
+If function belongs to a library that is not attached, the package name
+must be specified in the \code{package} argument.}
+
+\item{method.args}{(named \code{list})\cr
+named list of arguments that will be passed to \code{fn}.}
+
+\item{package}{(\code{string})\cr
+string of package name that will be temporarily loaded when function
+specified in \code{method} is executed.}
+
+\item{response_type}{(\code{string})
+string indicating whether the model outcome is \code{'continuous'}
+or \code{'binary'}. When \code{'binary'}, the call to \code{emmeans::emmeans()} is
+supplemented with argument \code{regrid="response"}.}
+
+\item{conf.level}{(scalar \code{numeric})\cr
+confidence level for confidence interval. Default is \code{0.95}.}
+
+\item{primary_covariate}{(\code{string})\cr
+string indicating the primary covariate (typically the dichotomous treatment variable).
+Default is the first covariate listed in the formula.}
+}
+\value{
+ARD data frame
+}
+\description{
+This function calculates least-squares mean differences using the 'emmeans'
+package using the following
+
+\if{html}{\out{<div class="sourceCode r">}}\preformatted{emmeans::emmeans(object = <regression model>, specs = ~ <primary covariate>) |>
+  emmeans::contrast(method = "pairwise") |>
+  summary(infer = TRUE, level = <confidence level>)
+}\if{html}{\out{</div>}}
+
+The arguments \code{data}, \code{formula}, \code{method}, \code{method.args}, \code{package} are used
+to construct the regression model via \code{cardx::construct_model()}.
+}
+\examples{
+\dontshow{if (do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "emmeans", reference_pkg = "cardx"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+ard_emmeans_mean_difference(
+  data = mtcars,
+  formula = mpg ~ am + cyl,
+  method = "lm"
+)
+
+ard_emmeans_mean_difference(
+  data = mtcars,
+  formula = vs ~ am + mpg,
+  method = "glm",
+  method.args = list(family = binomial),
+  response_type = "binary"
+)
+\dontshow{\}) # examplesIf}
+}
diff --git a/tests/testthat/test-ard_emmeans_mean_difference.R b/tests/testthat/test-ard_emmeans_mean_difference.R
new file mode 100644
index 000000000..bb25c6785
--- /dev/null
+++ b/tests/testthat/test-ard_emmeans_mean_difference.R
@@ -0,0 +1,86 @@
+skip_if_not(is_pkg_installed(c("emmeans", "survey", "lme4"), reference_pkg = "cardx"))
+
+test_that("ard_emmeans_mean_difference() works", {
+  expect_error(
+    ard_emmeans_mean_difference <-
+      ard_emmeans_mean_difference(
+        data = mtcars,
+        formula = vs ~ am + mpg,
+        method = "glm",
+        method.args = list(family = binomial),
+        response_type = "binary"
+      ),
+    NA
+  )
+  expect_equal(
+    cards::get_ard_statistics(ard_emmeans_mean_difference, stat_name %in% "method"),
+    list(method = "Least-squares adjusted mean difference")
+  )
+  expect_equal(
+    cards::get_ard_statistics(ard_emmeans_mean_difference, stat_name %in% "estimate") |>
+      unlist() |>
+      unname(),
+    glm(vs ~ am + mpg, data = mtcars, family = binomial) |>
+      emmeans::emmeans(specs = ~am, regrid = "response") |>
+      emmeans::contrast(method = "pairwise") |>
+      summary(infer = TRUE) |>
+      getElement("estimate")
+  )
+
+
+  expect_error(
+    ard_emmeans_mean_difference_lme4 <-
+      ard_emmeans_mean_difference(
+        data = mtcars,
+        formula = vs ~ am + (1 | cyl),
+        method = "glmer",
+        method.args = list(family = binomial),
+        package = "lme4",
+        response_type = "binary"
+      ),
+    NA
+  )
+  expect_equal(
+    cards::get_ard_statistics(ard_emmeans_mean_difference_lme4, stat_name %in% "method"),
+    list(method = "Least-squares mean difference")
+  )
+  expect_equal(
+    cards::get_ard_statistics(ard_emmeans_mean_difference_lme4, stat_name %in% "estimate") |>
+      unlist() |>
+      unname(),
+    lme4::glmer(vs ~ am + (1 | cyl), data = mtcars, family = binomial) |>
+      emmeans::emmeans(specs = ~am, regrid = "response") |>
+      emmeans::contrast(method = "pairwise") |>
+      summary(infer = TRUE) |>
+      getElement("estimate")
+  )
+
+
+  #styler: off
+  expect_error({
+    data(api, package = "survey")
+    ard_emmeans_mean_difference_svy <-
+      survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc) |>
+      ard_emmeans_mean_difference(
+        formula = api00 ~ sch.wide,
+        method = "svyglm",
+        package = "survey"
+      )},
+    NA
+  )
+  # styler: on
+  expect_equal(
+    cards::get_ard_statistics(ard_emmeans_mean_difference_svy, stat_name %in% "method"),
+    list(method = "Least-squares mean difference")
+  )
+  expect_equal(
+    cards::get_ard_statistics(ard_emmeans_mean_difference_svy, stat_name %in% "estimate") |>
+      unlist() |>
+      unname(),
+    survey::svyglm(api00 ~ sch.wide, design = survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)) |>
+      emmeans::emmeans(specs = ~sch.wide, regrid = "response") |>
+      emmeans::contrast(method = "pairwise") |>
+      summary(infer = TRUE) |>
+      getElement("estimate")
+  )
+})