From 39ccf3f0cd68401f8a6b3d524470cb3b994fe13f Mon Sep 17 00:00:00 2001
From: Davide Garolini <dgarolini@gmail.com>
Date: Wed, 21 Feb 2024 19:07:53 +0100
Subject: [PATCH] McNemar's statistical test addition: `ard_mcnemartest` (#51)

**What changes are proposed in this pull request?**
* Added `ard_mcnemartest()` statistical test function.

Closes #40

* I am wondering if we need to have `ccorrect = TRUE` as default,
following the default in stats, and if we should make this option more
apparent in the case we change this default.

* Also, I do not know if it would be relevant but I used an utility
function to find dichotomies in a data-set that could result useful as a
tool if there is not one already doing this:
``` r
# Assuming your data.frame is named df
df <- data.frame(
  A = c(1, 0, 1, 0), # dichotomous
  B = c(1, 2, 3, 4), # not dichotomous
  C = c("Yes", "No", "Yes", "No"), # dichotomous
  D = c(TRUE, FALSE, TRUE, FALSE) # dichotomous
)

# Function to find dichotomous columns
find_dichotomous_columns <- function(df) {
  dichotomous_columns <- c()
  for (col_name in names(df)) {
    if (length(unique(df[[col_name]])) == 2) {
      dichotomous_columns <- c(dichotomous_columns, col_name)
    }
  }
  return(dichotomous_columns)
}

# Find and display dichotomous columns
dichotomous_columns <- find_dichotomous_columns(df)
print(dichotomous_columns)
#> [1] "A" "C" "D"
di_cols <- find_dichotomous_columns(cards::ADSL)
print(di_cols)
#>  [1] "SEX"      "ETHNIC"   "EFFFL"    "COMP8FL"  "COMP16FL" "COMP24FL"
#>  [7] "DISCONFL" "DSRAEFL"  "DTHFL"    "DURDSGR1"
```

<sup>Created on 2024-02-14 with [reprex
v2.1.0](https://reprex.tidyverse.org)</sup>

* `.paired_data_pivot_wider` is in the Wilcoxon test. Should we have it
as a tool in a dedicated file? It looks general to me.

--------------------------------------------------------------------------------

Pre-review Checklist (if item does not apply, mark is as complete)
- [x] **All** GitHub Action workflows pass with a :white_check_mark:
- [x] PR branch has pulled the most recent updates from master branch:
`usethis::pr_merge_main()`
- [x] If a bug was fixed, a unit test was added.
- [x] Code coverage is suitable for any new functions/features
(generally, 100% coverage for new code): `devtools::test_coverage()`
- [x] Request a reviewer

Reviewer Checklist (if item does not apply, mark is as complete)

- [ ] If a bug was fixed, a unit test was added.
- [ ] Run `pkgdown::build_site()`. Check the R console for errors, and
review the rendered website.
- [ ] Code coverage is suitable for any new functions/features:
`devtools::test_coverage()`

When the branch is ready to be merged:
- [ ] Update `NEWS.md` with the changes from this pull request under the
heading "`# cards (development version)`". If there is an issue
associated with the pull request, reference it in parentheses at the end
update (see `NEWS.md` for examples).
- [ ] **All** GitHub Action workflows pass with a :white_check_mark:
- [ ] Approve Pull Request
- [ ] Merge the PR. Please use "Squash and merge" or "Rebase and merge".

---------

Signed-off-by: Davide Garolini <davide.garolini@roche.com>
Signed-off-by: Davide Garolini <dgarolini@gmail.com>
Signed-off-by: Daniel Sjoberg <danield.sjoberg@gmail.com>
Co-authored-by: Abinaya Yogasekaram <73252787+ayogasekaram@users.noreply.github.com>
Co-authored-by: Daniel Sjoberg <danield.sjoberg@gmail.com>
---
 NAMESPACE                             |   1 +
 R/ard_mcnemartest.R                   | 108 ++++++++++++++++++++++++++
 R/ard_wilcoxtest.R                    |  12 ++-
 _pkgdown.yml                          |   1 +
 inst/WORDLIST                         |   1 +
 man/ard_mcnemartest.Rd                |  36 +++++++++
 man/dot-format_mcnemartest_results.Rd |  38 +++++++++
 man/dot-format_wilcoxtest_results.Rd  |   8 +-
 tests/testthat/test-ard_mcnemartest.R |  40 ++++++++++
 9 files changed, 242 insertions(+), 3 deletions(-)
 create mode 100644 R/ard_mcnemartest.R
 create mode 100644 man/ard_mcnemartest.Rd
 create mode 100644 man/dot-format_mcnemartest_results.Rd
 create mode 100644 tests/testthat/test-ard_mcnemartest.R
diff --git a/NAMESPACE b/NAMESPACE
index d58df76b4..6d710dda8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -7,6 +7,7 @@ export(any_of)
 export(ard_chisqtest)
 export(ard_fishertest)
 export(ard_kruskaltest)
+export(ard_mcnemartest)
 export(ard_paired_ttest)
 export(ard_paired_wilcoxtest)
 export(ard_proportion_ci)
diff --git a/R/ard_mcnemartest.R b/R/ard_mcnemartest.R
new file mode 100644
index 000000000..168a8f51c
--- /dev/null
+++ b/R/ard_mcnemartest.R
@@ -0,0 +1,108 @@
+#' ARD McNemar's Test
+#'
+#' @description
+#' Analysis results data for McNemar's statistical test.
+#'
+#' @param data (`data.frame`)\cr
+#'   a data frame. See below for details.
+#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
+#'   column name to compare by.
+#' @param variable ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
+#'   column name to be compared.
+#' @param ... arguments passed to `stats::mcnemar.test(...)`
+#'
+#' @return ARD data frame
+#' @name ard_mcnemartest
+#'
+#' @details
+#' For the `ard_mcnemartest()` function, the data is expected to be one row per subject.
+#' The data is passed as `stats::mcnemar.test(x = data[[variable]], y = data[[by]], ...)`.
+#' Please use `table(x = data[[variable]], y = data[[by]])` to check the contingency table.
+#'
+#' @examples
+#' cards::ADSL |>
+#'   ard_mcnemartest(by = "SEX", variable = "EFFFL")
+#'
+NULL
+
+#' @rdname ard_mcnemartest
+#' @export
+ard_mcnemartest <- function(data, by, variable, ...) {
+  # check installed packages ---------------------------------------------------
+  cards::check_pkg_installed("broom", reference_pkg = "cardx")
+
+  # check/process inputs -------------------------------------------------------
+  check_not_missing(data)
+  check_not_missing(variable)
+  check_not_missing(by)
+  check_class_data_frame(x = data)
+  data <- dplyr::ungroup(data)
+  cards::process_selectors(data, by = {{ by }}, variable = {{ variable }})
+  check_scalar(by)
+  check_scalar(variable)
+
+  # build ARD ------------------------------------------------------------------
+  .format_mcnemartest_results(
+    by = by,
+    variable = variable,
+    lst_tidy =
+      cards::eval_capture_conditions(
+        stats::mcnemar.test(x = data[[variable]], y = data[[by]], ...) |>
+          broom::tidy()
+      ),
+    ...
+  )
+}
+
+#' Convert McNemar's test to ARD
+#'
+#' @inheritParams cards::tidy_as_ard
+#' @inheritParams stats::mcnemar.test
+#' @param by (`string`)\cr by column name
+#' @param variable (`string`)\cr variable column name
+#' @param ... passed to `stats::mcnemar.test(...)`
+#'
+#' @return ARD data frame
+#'
+#' @examples
+#' cardx:::.format_mcnemartest_results(
+#'   by = "ARM",
+#'   variable = "AGE",
+#'   lst_tidy =
+#'     cards::eval_capture_conditions(
+#'       stats::mcnemar.test(cards::ADSL[["SEX"]], cards::ADSL[["EFFFL"]]) |>
+#'         broom::tidy()
+#'     )
+#' )
+#'
+#' @keywords internal
+.format_mcnemartest_results <- function(by, variable, lst_tidy, ...) {
+  # build ARD ------------------------------------------------------------------
+  ret <-
+    cards::tidy_as_ard(
+      lst_tidy = lst_tidy,
+      tidy_result_names = c("statistic", "p.value", "method"),
+      fun_args_to_record = c("correct"),
+      formals = formals(asNamespace("stats")[["mcnemar.test"]]),
+      passed_args = dots_list(...),
+      lst_ard_columns = list(group1 = by, variable = variable, context = "mcnemartest")
+    )
+
+  # add the stat label ---------------------------------------------------------
+  ret |>
+    dplyr::left_join(
+      .df_mcnemar_stat_labels(),
+      by = "stat_name"
+    ) |>
+    dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |>
+    cards::tidy_ard_column_order()
+}
+
+.df_mcnemar_stat_labels <- function() {
+  dplyr::tribble(
+    ~stat_name, ~stat_label,
+    "statistic", "X-squared Statistic",
+    "parameter", "Degrees of Freedom",
+    "p.value", "p-value",
+  )
+}
diff --git a/R/ard_wilcoxtest.R b/R/ard_wilcoxtest.R
index 3ed5a1e4a..071c77ba8 100644
--- a/R/ard_wilcoxtest.R
+++ b/R/ard_wilcoxtest.R
@@ -105,17 +105,23 @@ ard_paired_wilcoxtest <- function(data, by, variable, id, ...) {
   )
 }
 
+
 #' Convert Wilcoxon test to ARD
 #'
 #' @inheritParams cards::tidy_as_ard
 #' @inheritParams stats::wilcox.test
 #' @param by (`string`)\cr by column name
 #' @param variable (`string`)\cr variable column name
-#' @param ... passed to `wilcox.test(...)`
+#' @param ... passed to `stats::wilcox.test(...)`
 #'
 #' @return ARD data frame
-#' @keywords internal
+#'
 #' @examples
+#' # Pre-processing ADSL to have grouping factor (ARM here) with 2 levels
+#' ADSL <- cards::ADSL |>
+#'   dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) |>
+#'   ard_wilcoxtest(by = "ARM", variable = "AGE")
+#'
 #' cardx:::.format_wilcoxtest_results(
 #'   by = "ARM",
 #'   variable = "AGE",
@@ -126,6 +132,8 @@ ard_paired_wilcoxtest <- function(data, by, variable, id, ...) {
 #'         broom::tidy()
 #'     )
 #' )
+#'
+#' @keywords internal
 .format_wilcoxtest_results <- function(by, variable, lst_tidy, paired, ...) {
   # build ARD ------------------------------------------------------------------
   ret <-
diff --git a/_pkgdown.yml b/_pkgdown.yml
index b3c97b7c0..e23c70a05 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -27,6 +27,7 @@ reference:
       - ard_chisqtest
       - ard_fishertest
       - ard_kruskaltest
+      - ard_mcnemartest
       - ard_ttest
       - ard_wilcoxtest
 
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 1a602fd4c..6aabb79bb 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -5,6 +5,7 @@ Clopper
 Codecov
 Hoffmann
 Jeffreys
+McNemar's
 Lifecycle
 Newcombe
 Su
diff --git a/man/ard_mcnemartest.Rd b/man/ard_mcnemartest.Rd
new file mode 100644
index 000000000..7fdfafceb
--- /dev/null
+++ b/man/ard_mcnemartest.Rd
@@ -0,0 +1,36 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ard_mcnemartest.R
+\name{ard_mcnemartest}
+\alias{ard_mcnemartest}
+\title{ARD McNemar's Test}
+\usage{
+ard_mcnemartest(data, by, variable, ...)
+}
+\arguments{
+\item{data}{(\code{data.frame})\cr
+a data frame. See below for details.}
+
+\item{by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr
+column name to compare by.}
+
+\item{variable}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr
+column name to be compared.}
+
+\item{...}{arguments passed to \code{stats::mcnemar.test(...)}}
+}
+\value{
+ARD data frame
+}
+\description{
+Analysis results data for McNemar's statistical test.
+}
+\details{
+For the \code{ard_mcnemartest()} function, the data is expected to be one row per subject.
+The data is passed as \code{stats::mcnemar.test(x = data[[variable]], y = data[[by]], ...)}.
+Please use \code{table(x = data[[variable]], y = data[[by]])} to check the contingency table.
+}
+\examples{
+cards::ADSL |>
+  ard_mcnemartest(by = "SEX", variable = "EFFFL")
+
+}
diff --git a/man/dot-format_mcnemartest_results.Rd b/man/dot-format_mcnemartest_results.Rd
new file mode 100644
index 000000000..a809a791c
--- /dev/null
+++ b/man/dot-format_mcnemartest_results.Rd
@@ -0,0 +1,38 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ard_mcnemartest.R
+\name{.format_mcnemartest_results}
+\alias{.format_mcnemartest_results}
+\title{Convert McNemar's test to ARD}
+\usage{
+.format_mcnemartest_results(by, variable, lst_tidy, ...)
+}
+\arguments{
+\item{by}{(\code{string})\cr by column name}
+
+\item{variable}{(\code{string})\cr variable column name}
+
+\item{lst_tidy}{(named \code{list})\cr
+list of tidied results constructed with \code{\link[cards:eval_capture_conditions]{eval_capture_conditions()}},
+e.g. \code{eval_capture_conditions(t.test(mtcars$mpg ~ mtcars$am) |> broom::tidy())}.}
+
+\item{...}{passed to \code{stats::mcnemar.test(...)}}
+}
+\value{
+ARD data frame
+}
+\description{
+Convert McNemar's test to ARD
+}
+\examples{
+cardx:::.format_mcnemartest_results(
+  by = "ARM",
+  variable = "AGE",
+  lst_tidy =
+    cards::eval_capture_conditions(
+      stats::mcnemar.test(cards::ADSL[["SEX"]], cards::ADSL[["EFFFL"]]) |>
+        broom::tidy()
+    )
+)
+
+}
+\keyword{internal}
diff --git a/man/dot-format_wilcoxtest_results.Rd b/man/dot-format_wilcoxtest_results.Rd
index 55318dba0..10f70950e 100644
--- a/man/dot-format_wilcoxtest_results.Rd
+++ b/man/dot-format_wilcoxtest_results.Rd
@@ -17,7 +17,7 @@ e.g. \code{eval_capture_conditions(t.test(mtcars$mpg ~ mtcars$am) |> broom::tidy
 
 \item{paired}{a logical indicating whether you want a paired test.}
 
-\item{...}{passed to \code{wilcox.test(...)}}
+\item{...}{passed to \code{stats::wilcox.test(...)}}
 }
 \value{
 ARD data frame
@@ -26,6 +26,11 @@ ARD data frame
 Convert Wilcoxon test to ARD
 }
 \examples{
+# Pre-processing ADSL to have grouping factor (ARM here) with 2 levels
+ADSL <- cards::ADSL |>
+  dplyr::filter(ARM \%in\% c("Placebo", "Xanomeline High Dose")) |>
+  ard_wilcoxtest(by = "ARM", variable = "AGE")
+
 cardx:::.format_wilcoxtest_results(
   by = "ARM",
   variable = "AGE",
@@ -36,5 +41,6 @@ cardx:::.format_wilcoxtest_results(
         broom::tidy()
     )
 )
+
 }
 \keyword{internal}
diff --git a/tests/testthat/test-ard_mcnemartest.R b/tests/testthat/test-ard_mcnemartest.R
new file mode 100644
index 000000000..de4f4e582
--- /dev/null
+++ b/tests/testthat/test-ard_mcnemartest.R
@@ -0,0 +1,40 @@
+test_that("ard_mcnemartest() works", {
+  expect_error(
+    ard_mcnemartest <-
+      cards::ADSL |>
+      ard_mcnemartest(by = SEX, variable = EFFFL),
+    NA
+  )
+
+  expect_equal(
+    ard_mcnemartest |>
+      cards::get_ard_statistics(stat_name %in% c("statistic", "p.value", "parameter", "method")),
+    stats::mcnemar.test(cards::ADSL[["SEX"]], cards::ADSL[["EFFFL"]], correct = TRUE) |>
+      broom::tidy() |>
+      unclass(),
+    ignore_attr = TRUE
+  )
+
+  # errors are properly handled
+  expect_equal(
+    cards::ADSL |>
+      ard_mcnemartest(by = ARM, variable = AGE, correct = FALSE) |>
+      dplyr::pull(error) |>
+      getElement(1L),
+    "'x' and 'y' must have the same number of levels (minimum 2)"
+  )
+
+  # non-syntactic column names work too
+  ADSL_tmp <- cards::ADSL |>
+    dplyr::rename("if" = AGE, "_c d" = EFFFL)
+
+  expect_equal(
+    cards::ADSL |>
+      dplyr::rename(`Planned Tx` = TRT01P, `Age Group` = AGEGR1) |>
+      ard_mcnemartest(by = `Planned Tx`, variable = `Age Group`) |>
+      cards::get_ard_statistics(),
+    cards::ADSL |>
+      ard_mcnemartest(by = TRT01P, variable = AGEGR1) |>
+      cards::get_ard_statistics()
+  )
+})