From 39ccf3f0cd68401f8a6b3d524470cb3b994fe13f Mon Sep 17 00:00:00 2001 From: Davide Garolini Date: Wed, 21 Feb 2024 19:07:53 +0100 Subject: [PATCH] McNemar's statistical test addition: `ard_mcnemartest` (#51) **What changes are proposed in this pull request?** * Added `ard_mcnemartest()` statistical test function. Closes #40 * I am wondering if we need to have `ccorrect = TRUE` as default, following the default in stats, and if we should make this option more apparent in the case we change this default. * Also, I do not know if it would be relevant but I used an utility function to find dichotomies in a data-set that could result useful as a tool if there is not one already doing this: ``` r # Assuming your data.frame is named df df <- data.frame( A = c(1, 0, 1, 0), # dichotomous B = c(1, 2, 3, 4), # not dichotomous C = c("Yes", "No", "Yes", "No"), # dichotomous D = c(TRUE, FALSE, TRUE, FALSE) # dichotomous ) # Function to find dichotomous columns find_dichotomous_columns <- function(df) { dichotomous_columns <- c() for (col_name in names(df)) { if (length(unique(df[[col_name]])) == 2) { dichotomous_columns <- c(dichotomous_columns, col_name) } } return(dichotomous_columns) } # Find and display dichotomous columns dichotomous_columns <- find_dichotomous_columns(df) print(dichotomous_columns) #> [1] "A" "C" "D" di_cols <- find_dichotomous_columns(cards::ADSL) print(di_cols) #> [1] "SEX" "ETHNIC" "EFFFL" "COMP8FL" "COMP16FL" "COMP24FL" #> [7] "DISCONFL" "DSRAEFL" "DTHFL" "DURDSGR1" ``` Created on 2024-02-14 with [reprex v2.1.0](https://reprex.tidyverse.org) * `.paired_data_pivot_wider` is in the Wilcoxon test. Should we have it as a tool in a dedicated file? It looks general to me. -------------------------------------------------------------------------------- Pre-review Checklist (if item does not apply, mark is as complete) - [x] **All** GitHub Action workflows pass with a :white_check_mark: - [x] PR branch has pulled the most recent updates from master branch: `usethis::pr_merge_main()` - [x] If a bug was fixed, a unit test was added. - [x] Code coverage is suitable for any new functions/features (generally, 100% coverage for new code): `devtools::test_coverage()` - [x] Request a reviewer Reviewer Checklist (if item does not apply, mark is as complete) - [ ] If a bug was fixed, a unit test was added. - [ ] Run `pkgdown::build_site()`. Check the R console for errors, and review the rendered website. - [ ] Code coverage is suitable for any new functions/features: `devtools::test_coverage()` When the branch is ready to be merged: - [ ] Update `NEWS.md` with the changes from this pull request under the heading "`# cards (development version)`". If there is an issue associated with the pull request, reference it in parentheses at the end update (see `NEWS.md` for examples). - [ ] **All** GitHub Action workflows pass with a :white_check_mark: - [ ] Approve Pull Request - [ ] Merge the PR. Please use "Squash and merge" or "Rebase and merge". --------- Signed-off-by: Davide Garolini Signed-off-by: Davide Garolini Signed-off-by: Daniel Sjoberg Co-authored-by: Abinaya Yogasekaram <73252787+ayogasekaram@users.noreply.github.com> Co-authored-by: Daniel Sjoberg --- NAMESPACE | 1 + R/ard_mcnemartest.R | 108 ++++++++++++++++++++++++++ R/ard_wilcoxtest.R | 12 ++- _pkgdown.yml | 1 + inst/WORDLIST | 1 + man/ard_mcnemartest.Rd | 36 +++++++++ man/dot-format_mcnemartest_results.Rd | 38 +++++++++ man/dot-format_wilcoxtest_results.Rd | 8 +- tests/testthat/test-ard_mcnemartest.R | 40 ++++++++++ 9 files changed, 242 insertions(+), 3 deletions(-) create mode 100644 R/ard_mcnemartest.R create mode 100644 man/ard_mcnemartest.Rd create mode 100644 man/dot-format_mcnemartest_results.Rd create mode 100644 tests/testthat/test-ard_mcnemartest.R diff --git a/NAMESPACE b/NAMESPACE index d58df76b4..6d710dda8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(any_of) export(ard_chisqtest) export(ard_fishertest) export(ard_kruskaltest) +export(ard_mcnemartest) export(ard_paired_ttest) export(ard_paired_wilcoxtest) export(ard_proportion_ci) diff --git a/R/ard_mcnemartest.R b/R/ard_mcnemartest.R new file mode 100644 index 000000000..168a8f51c --- /dev/null +++ b/R/ard_mcnemartest.R @@ -0,0 +1,108 @@ +#' ARD McNemar's Test +#' +#' @description +#' Analysis results data for McNemar's statistical test. +#' +#' @param data (`data.frame`)\cr +#' a data frame. See below for details. +#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr +#' column name to compare by. +#' @param variable ([`tidy-select`][dplyr::dplyr_tidy_select])\cr +#' column name to be compared. +#' @param ... arguments passed to `stats::mcnemar.test(...)` +#' +#' @return ARD data frame +#' @name ard_mcnemartest +#' +#' @details +#' For the `ard_mcnemartest()` function, the data is expected to be one row per subject. +#' The data is passed as `stats::mcnemar.test(x = data[[variable]], y = data[[by]], ...)`. +#' Please use `table(x = data[[variable]], y = data[[by]])` to check the contingency table. +#' +#' @examples +#' cards::ADSL |> +#' ard_mcnemartest(by = "SEX", variable = "EFFFL") +#' +NULL + +#' @rdname ard_mcnemartest +#' @export +ard_mcnemartest <- function(data, by, variable, ...) { + # check installed packages --------------------------------------------------- + cards::check_pkg_installed("broom", reference_pkg = "cardx") + + # check/process inputs ------------------------------------------------------- + check_not_missing(data) + check_not_missing(variable) + check_not_missing(by) + check_class_data_frame(x = data) + data <- dplyr::ungroup(data) + cards::process_selectors(data, by = {{ by }}, variable = {{ variable }}) + check_scalar(by) + check_scalar(variable) + + # build ARD ------------------------------------------------------------------ + .format_mcnemartest_results( + by = by, + variable = variable, + lst_tidy = + cards::eval_capture_conditions( + stats::mcnemar.test(x = data[[variable]], y = data[[by]], ...) |> + broom::tidy() + ), + ... + ) +} + +#' Convert McNemar's test to ARD +#' +#' @inheritParams cards::tidy_as_ard +#' @inheritParams stats::mcnemar.test +#' @param by (`string`)\cr by column name +#' @param variable (`string`)\cr variable column name +#' @param ... passed to `stats::mcnemar.test(...)` +#' +#' @return ARD data frame +#' +#' @examples +#' cardx:::.format_mcnemartest_results( +#' by = "ARM", +#' variable = "AGE", +#' lst_tidy = +#' cards::eval_capture_conditions( +#' stats::mcnemar.test(cards::ADSL[["SEX"]], cards::ADSL[["EFFFL"]]) |> +#' broom::tidy() +#' ) +#' ) +#' +#' @keywords internal +.format_mcnemartest_results <- function(by, variable, lst_tidy, ...) { + # build ARD ------------------------------------------------------------------ + ret <- + cards::tidy_as_ard( + lst_tidy = lst_tidy, + tidy_result_names = c("statistic", "p.value", "method"), + fun_args_to_record = c("correct"), + formals = formals(asNamespace("stats")[["mcnemar.test"]]), + passed_args = dots_list(...), + lst_ard_columns = list(group1 = by, variable = variable, context = "mcnemartest") + ) + + # add the stat label --------------------------------------------------------- + ret |> + dplyr::left_join( + .df_mcnemar_stat_labels(), + by = "stat_name" + ) |> + dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |> + cards::tidy_ard_column_order() +} + +.df_mcnemar_stat_labels <- function() { + dplyr::tribble( + ~stat_name, ~stat_label, + "statistic", "X-squared Statistic", + "parameter", "Degrees of Freedom", + "p.value", "p-value", + ) +} diff --git a/R/ard_wilcoxtest.R b/R/ard_wilcoxtest.R index 3ed5a1e4a..071c77ba8 100644 --- a/R/ard_wilcoxtest.R +++ b/R/ard_wilcoxtest.R @@ -105,17 +105,23 @@ ard_paired_wilcoxtest <- function(data, by, variable, id, ...) { ) } + #' Convert Wilcoxon test to ARD #' #' @inheritParams cards::tidy_as_ard #' @inheritParams stats::wilcox.test #' @param by (`string`)\cr by column name #' @param variable (`string`)\cr variable column name -#' @param ... passed to `wilcox.test(...)` +#' @param ... passed to `stats::wilcox.test(...)` #' #' @return ARD data frame -#' @keywords internal +#' #' @examples +#' # Pre-processing ADSL to have grouping factor (ARM here) with 2 levels +#' ADSL <- cards::ADSL |> +#' dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")) |> +#' ard_wilcoxtest(by = "ARM", variable = "AGE") +#' #' cardx:::.format_wilcoxtest_results( #' by = "ARM", #' variable = "AGE", @@ -126,6 +132,8 @@ ard_paired_wilcoxtest <- function(data, by, variable, id, ...) { #' broom::tidy() #' ) #' ) +#' +#' @keywords internal .format_wilcoxtest_results <- function(by, variable, lst_tidy, paired, ...) { # build ARD ------------------------------------------------------------------ ret <- diff --git a/_pkgdown.yml b/_pkgdown.yml index b3c97b7c0..e23c70a05 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -27,6 +27,7 @@ reference: - ard_chisqtest - ard_fishertest - ard_kruskaltest + - ard_mcnemartest - ard_ttest - ard_wilcoxtest diff --git a/inst/WORDLIST b/inst/WORDLIST index 1a602fd4c..6aabb79bb 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -5,6 +5,7 @@ Clopper Codecov Hoffmann Jeffreys +McNemar's Lifecycle Newcombe Su diff --git a/man/ard_mcnemartest.Rd b/man/ard_mcnemartest.Rd new file mode 100644 index 000000000..7fdfafceb --- /dev/null +++ b/man/ard_mcnemartest.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_mcnemartest.R +\name{ard_mcnemartest} +\alias{ard_mcnemartest} +\title{ARD McNemar's Test} +\usage{ +ard_mcnemartest(data, by, variable, ...) +} +\arguments{ +\item{data}{(\code{data.frame})\cr +a data frame. See below for details.} + +\item{by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +column name to compare by.} + +\item{variable}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +column name to be compared.} + +\item{...}{arguments passed to \code{stats::mcnemar.test(...)}} +} +\value{ +ARD data frame +} +\description{ +Analysis results data for McNemar's statistical test. +} +\details{ +For the \code{ard_mcnemartest()} function, the data is expected to be one row per subject. +The data is passed as \code{stats::mcnemar.test(x = data[[variable]], y = data[[by]], ...)}. +Please use \code{table(x = data[[variable]], y = data[[by]])} to check the contingency table. +} +\examples{ +cards::ADSL |> + ard_mcnemartest(by = "SEX", variable = "EFFFL") + +} diff --git a/man/dot-format_mcnemartest_results.Rd b/man/dot-format_mcnemartest_results.Rd new file mode 100644 index 000000000..a809a791c --- /dev/null +++ b/man/dot-format_mcnemartest_results.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_mcnemartest.R +\name{.format_mcnemartest_results} +\alias{.format_mcnemartest_results} +\title{Convert McNemar's test to ARD} +\usage{ +.format_mcnemartest_results(by, variable, lst_tidy, ...) +} +\arguments{ +\item{by}{(\code{string})\cr by column name} + +\item{variable}{(\code{string})\cr variable column name} + +\item{lst_tidy}{(named \code{list})\cr +list of tidied results constructed with \code{\link[cards:eval_capture_conditions]{eval_capture_conditions()}}, +e.g. \code{eval_capture_conditions(t.test(mtcars$mpg ~ mtcars$am) |> broom::tidy())}.} + +\item{...}{passed to \code{stats::mcnemar.test(...)}} +} +\value{ +ARD data frame +} +\description{ +Convert McNemar's test to ARD +} +\examples{ +cardx:::.format_mcnemartest_results( + by = "ARM", + variable = "AGE", + lst_tidy = + cards::eval_capture_conditions( + stats::mcnemar.test(cards::ADSL[["SEX"]], cards::ADSL[["EFFFL"]]) |> + broom::tidy() + ) +) + +} +\keyword{internal} diff --git a/man/dot-format_wilcoxtest_results.Rd b/man/dot-format_wilcoxtest_results.Rd index 55318dba0..10f70950e 100644 --- a/man/dot-format_wilcoxtest_results.Rd +++ b/man/dot-format_wilcoxtest_results.Rd @@ -17,7 +17,7 @@ e.g. \code{eval_capture_conditions(t.test(mtcars$mpg ~ mtcars$am) |> broom::tidy \item{paired}{a logical indicating whether you want a paired test.} -\item{...}{passed to \code{wilcox.test(...)}} +\item{...}{passed to \code{stats::wilcox.test(...)}} } \value{ ARD data frame @@ -26,6 +26,11 @@ ARD data frame Convert Wilcoxon test to ARD } \examples{ +# Pre-processing ADSL to have grouping factor (ARM here) with 2 levels +ADSL <- cards::ADSL |> + dplyr::filter(ARM \%in\% c("Placebo", "Xanomeline High Dose")) |> + ard_wilcoxtest(by = "ARM", variable = "AGE") + cardx:::.format_wilcoxtest_results( by = "ARM", variable = "AGE", @@ -36,5 +41,6 @@ cardx:::.format_wilcoxtest_results( broom::tidy() ) ) + } \keyword{internal} diff --git a/tests/testthat/test-ard_mcnemartest.R b/tests/testthat/test-ard_mcnemartest.R new file mode 100644 index 000000000..de4f4e582 --- /dev/null +++ b/tests/testthat/test-ard_mcnemartest.R @@ -0,0 +1,40 @@ +test_that("ard_mcnemartest() works", { + expect_error( + ard_mcnemartest <- + cards::ADSL |> + ard_mcnemartest(by = SEX, variable = EFFFL), + NA + ) + + expect_equal( + ard_mcnemartest |> + cards::get_ard_statistics(stat_name %in% c("statistic", "p.value", "parameter", "method")), + stats::mcnemar.test(cards::ADSL[["SEX"]], cards::ADSL[["EFFFL"]], correct = TRUE) |> + broom::tidy() |> + unclass(), + ignore_attr = TRUE + ) + + # errors are properly handled + expect_equal( + cards::ADSL |> + ard_mcnemartest(by = ARM, variable = AGE, correct = FALSE) |> + dplyr::pull(error) |> + getElement(1L), + "'x' and 'y' must have the same number of levels (minimum 2)" + ) + + # non-syntactic column names work too + ADSL_tmp <- cards::ADSL |> + dplyr::rename("if" = AGE, "_c d" = EFFFL) + + expect_equal( + cards::ADSL |> + dplyr::rename(`Planned Tx` = TRT01P, `Age Group` = AGEGR1) |> + ard_mcnemartest(by = `Planned Tx`, variable = `Age Group`) |> + cards::get_ard_statistics(), + cards::ADSL |> + ard_mcnemartest(by = TRT01P, variable = AGEGR1) |> + cards::get_ard_statistics() + ) +})