From 7fd9326e8ebb4dbe77efef76b4c51203c50f5c14 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Wed, 21 Feb 2024 21:09:21 -0500
Subject: [PATCH] Add tests, clean up

---
 R/ard_survfit.R                      | 135 ++++++++++++---------------
 man/ard_survfit.Rd                   |   6 +-
 man/dot-process_survfit_probs.Rd     |   6 +-
 man/dot-process_survfit_time.Rd      |   7 +-
 tests/testthat/_snaps/ard_survfit.md |  74 +++++++++++++++
 tests/testthat/test-ard_survfit.R    |  21 +++++
 6 files changed, 162 insertions(+), 87 deletions(-)
 create mode 100644 tests/testthat/_snaps/ard_survfit.md
 create mode 100644 tests/testthat/test-ard_survfit.R

diff --git a/R/ard_survfit.R b/R/ard_survfit.R
index 7d550a139..0f97cfad7 100644
--- a/R/ard_survfit.R
+++ b/R/ard_survfit.R
@@ -18,7 +18,9 @@
 #' @name ard_survfit
 #'
 #' @details
-#' Only one of either the `times` or `probs` parameters can be specified.
+#' * Only one of either the `times` or `probs` parameters can be specified.
+#' * Times should be provided using the same scale as the time variable used to fit the provided
+#'   survival fit model.
 #'
 #' @examplesIf broom.helpers::.assert_package("survival", pkg_search = "cardx", boolean = TRUE)
 #' library(survival)
@@ -38,23 +40,18 @@ ard_survfit <- function(x, times = NULL, probs = NULL, reverse = FALSE) {
 
   # check/process inputs -------------------------------------------------------
   check_not_missing(x)
-  check_range(probs, c(0, 1))
   check_binary(reverse)
-
+  if (!is.null(probs)) check_range(probs, c(0, 1))
   if (!all(inherits(x, "survfit"))) {
     cli::cli_abort(
       "The {.arg x} argument must be class {.cls survfit} created using the {.fun survival::survfit} function."
     )
   }
   if (sum(is.null(times), is.null(probs)) != 1) {
-    cli::cli_abort(
-      "One and only one of {.arg times} and {.arg probs} must be specified."
-    )
+    cli::cli_abort("One and only one of {.arg times} and {.arg probs} must be specified.")
   }
   if (reverse && !is.null(probs)) {
-    cli::cli_inform(
-      "The {.code reverse=TRUE} argument is ignored for survival quantile estimation."
-    )
+    cli::cli_inform("The {.code reverse=TRUE} argument is ignored for survival quantile estimation.")
   }
 
   # build ARD ------------------------------------------------------------------
@@ -76,82 +73,75 @@ ard_survfit <- function(x, times = NULL, probs = NULL, reverse = FALSE) {
 #' @return a tibble
 #'
 #' @examples
-#' cardx:::.format_survfit_time(
-#'   survfit(Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE),
-#'   times = c(60, 180),
-#'   reverse = FALSE
-#' )
+#' survfit(Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE) |>
+#'   cardx:::.process_survfit_time(times = c(60, 180), reverse = FALSE)
 #'
 #' @keywords internal
 .process_survfit_time <- function(x, times, reverse) {
-  tidy <- broom::tidy(x)
-
-  strata <- intersect("strata", names(tidy)) %>%
-    list() %>%
-    compact()
+  # tidy survfit results
+  tidy_x <- broom::tidy(x)
 
   # adding time 0 to data frame
-  tidy <- tidy %>%
+  tidy_x <- tidy_x %>%
     # make strata a fct to preserve ordering
-    mutate_at(vars(strata), ~ factor(., levels = unique(.))) %>%
-    # if CI is missing, and SE is 0, make the CI the estimate
-    mutate_at(
-      vars("conf.high", "conf.low"),
+    dplyr::mutate_at(dplyr::vars(strata), ~ factor(., levels = unique(.))) %>%
+    # if CI is missing and SE is 0, use estimate as the CI
+    dplyr::mutate_at(
+      dplyr::vars("conf.high", "conf.low"),
       ~ ifelse(is.na(.) & .data$std.error == 0, .data$estimate, .)
     ) %>%
-    select(any_of(c("time", "estimate", "conf.high", "conf.low", "strata"))) %>%
+    dplyr::select(dplyr::any_of(c("time", "estimate", "conf.high", "conf.low", "strata"))) %>%
     # add data for time 0
-    bind_rows(
-      group_by(., strata) %>%
-        slice(1) %>%
-        mutate(
+    dplyr::bind_rows(
+      dplyr::group_by(., strata) %>%
+        dplyr::slice(1) %>%
+        dplyr::mutate(
           time = 0,
           estimate = 1,
           conf.low = 1,
           conf.high = 1
         )
     ) %>%
-    ungroup()
+    dplyr::ungroup()
 
   # get requested estimates
-  df_stat <- tidy %>%
+  df_stat <- tidy_x %>%
     # find max time
-    group_by(., strata) %>%
-    mutate(time_max = max(.data$time)) %>%
-    ungroup() %>%
+    dplyr::group_by(., strata) %>%
+    dplyr::mutate(time_max = max(.data$time)) %>%
+    dplyr::ungroup() %>%
     # add requested timepoints
-    full_join(
-      select(tidy, strata) %>%
-        distinct() %>%
-        mutate(
+    dplyr::full_join(
+      dplyr::select(tidy_x, strata) %>%
+        dplyr::distinct() %>%
+        dplyr::mutate(
           time = list(.env$times),
           col_name = list(paste("stat", seq_len(length(.env$times)), sep = "_"))
         ) %>%
-        unnest(cols = c("time", "col_name")),
-      by = unlist(c(strata, "time"))
+        tidyr::unnest(cols = c("time", "col_name")),
+      by = unlist(c("strata", "time"))
     ) %>%
     # if user-specifed time is unobserved, fill estimate with previous value
-    arrange(strata, .data$time) %>%
-    group_by(strata) %>%
+    dplyr::arrange(.data$strata, .data$time) %>%
+    dplyr::group_by(.data$strata) %>%
     tidyr::fill(
       "estimate", "conf.high", "conf.low", "time_max",
       .direction = "down"
     ) %>%
-    ungroup() %>%
+    dplyr::ungroup() %>%
     # keep only user-specified times
-    filter(!is.na(.data$col_name)) %>%
+    dplyr::filter(!is.na(.data$col_name)) %>%
     # if user-specified time is after max time, make estimate NA
-    mutate_at(
-      vars("estimate", "conf.high", "conf.low"),
+    dplyr::mutate_at(
+      dplyr::vars("estimate", "conf.high", "conf.low"),
       ~ ifelse(.data$time > .data$time_max, NA_real_, .)
     ) %>%
-    select(-c(time_max, col_name))
+    dplyr::select(-c(time_max, col_name))
 
   # reverse probs if requested
   if (reverse) {
-    df_stat <-
-      df_stat %>%
-      mutate_at(vars("estimate", "conf.low", "conf.high"), ~ 1 - .) %>%
+    df_stat <- df_stat %>%
+      dplyr::mutate_at(vars("estimate", "conf.low", "conf.high"), ~ 1 - .) %>%
       dplyr::rename(conf.low = "conf.high", conf.high = "conf.low")
   }
 
@@ -166,19 +156,11 @@ ard_survfit <- function(x, times = NULL, probs = NULL, reverse = FALSE) {
 #' @return a tibble
 #'
 #' @examples
-#' cardx:::.format_survfit_probs(
-#'   survfit(Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE),
-#'   probs = c(0.25, 0.5, 0.75)
-#' )
+#' survfit(Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE) |>
+#'   cardx:::.process_survfit_probs(probs = c(0.25, 0.75))
 #'
 #' @keywords internal
 .process_survfit_probs <- function(x, probs) {
-  tidy <- broom::tidy(x)
-
-  strata <- intersect("strata", names(tidy)) %>%
-    list() %>%
-    compact()
-
   # calculate survival quantiles and add estimates to df
   df_stat <- purrr::map2_dfr(
     probs,
@@ -187,8 +169,9 @@ ard_survfit <- function(x, times = NULL, probs = NULL, reverse = FALSE) {
       as.data.frame() %>%
       tibble::rownames_to_column() %>%
       set_names(c("strata", "estimate", "conf.low", "conf.high")) %>%
-      mutate(prob = .x)
-  )
+      dplyr::mutate(prob = .x)
+  ) %>%
+    tibble::tibble()
 
   df_stat
 }
@@ -207,32 +190,30 @@ ard_survfit <- function(x, times = NULL, probs = NULL, reverse = FALSE) {
 #' @keywords internal
 .format_survfit_results <- function(tidy_survfit) {
   ret <- tidy_survfit %>%
-    mutate(across(
-      any_of(c("estimate", "conf.high", "conf.low", "time", "prob")), ~ as.list(.)
+    dplyr::mutate(across(
+      dplyr::any_of(c("estimate", "conf.high", "conf.low", "time", "prob")), ~ as.list(.)
     )) %>%
-    pivot_longer(
+    tidyr::pivot_longer(
       cols = any_of(c("estimate", "conf.high", "conf.low", "time", "prob")),
       names_to = "stat_name",
       values_to = "stat"
     ) %>%
-    separate_wider_delim(strata, "=", names = c("variable", "variable_level"))
+    tidyr::separate_wider_delim(strata, "=", names = c("variable", "variable_level")) %>%
+    dplyr::arrange(variable, variable_level)
 
-  ret |>
+  ret %>%
     dplyr::left_join(
       .df_survfit_stat_labels(),
       by = "stat_name"
-    ) |>
+    ) %>%
     dplyr::mutate(
-      fmt_fn =
-        lapply(
-          .data$stat,
-          function(x) {
-            switch(is.integer(x), 0L) %||% switch(is.numeric(x), 1L)
-          }
-        ),
+      fmt_fn = lapply(
+        .data$stat,
+        function(x) switch(is.integer(x), 0L) %||% switch(is.numeric(x), 1L)
+      ),
       context = "survival",
       stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)
-    ) |>
+    ) %>%
     cards::tidy_ard_column_order() %>%
     structure(., class = c("card", class(.)))
 }
@@ -245,6 +226,6 @@ ard_survfit <- function(x, times = NULL, probs = NULL, reverse = FALSE) {
     "conf.high", "CI Upper Bound",
     "conf.level", "CI Confidence Level",
     "prob", "Quantile",
-    "times", "Time"
+    "time", "Time"
   )
 }
diff --git a/man/ard_survfit.Rd b/man/ard_survfit.Rd
index 2efab2687..0f51efdcb 100644
--- a/man/ard_survfit.Rd
+++ b/man/ard_survfit.Rd
@@ -28,7 +28,11 @@ Analysis results data for survival quantiles and x-year survival estimates, extr
 from a \code{\link[survival:survfit]{survival::survfit()}} model.
 }
 \details{
-Only one of either the \code{times} or \code{probs} parameters can be specified.
+\itemize{
+\item Only one of either the \code{times} or \code{probs} parameters can be specified.
+\item Times should be provided using the same scale as the time variable used to fit the provided
+survival fit model.
+}
 }
 \examples{
 \dontshow{if (broom.helpers::.assert_package("survival", pkg_search = "cardx", boolean = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
diff --git a/man/dot-process_survfit_probs.Rd b/man/dot-process_survfit_probs.Rd
index 32c7825d8..675a80ca6 100644
--- a/man/dot-process_survfit_probs.Rd
+++ b/man/dot-process_survfit_probs.Rd
@@ -20,10 +20,8 @@ a tibble
 Process Survival Fit For Quantile Estimates
 }
 \examples{
-cardx:::.format_survfit_probs(
-  survfit(Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE),
-  probs = c(0.25, 0.5, 0.75)
-)
+survfit(Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE) |>
+  cardx:::.process_survfit_probs(probs = c(0.25, 0.75))
 
 }
 \keyword{internal}
diff --git a/man/dot-process_survfit_time.Rd b/man/dot-process_survfit_time.Rd
index b608091f4..7cdc54de2 100644
--- a/man/dot-process_survfit_time.Rd
+++ b/man/dot-process_survfit_time.Rd
@@ -24,11 +24,8 @@ a tibble
 Process Survival Fit For Time Estimates
 }
 \examples{
-cardx:::.format_survfit_time(
-  survfit(Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE),
-  times = c(60, 180),
-  reverse = FALSE
-)
+survfit(Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE) |>
+  cardx:::.process_survfit_time(times = c(60, 180), reverse = FALSE)
 
 }
 \keyword{internal}
diff --git a/tests/testthat/_snaps/ard_survfit.md b/tests/testthat/_snaps/ard_survfit.md
new file mode 100644
index 000000000..edd1977e2
--- /dev/null
+++ b/tests/testthat/_snaps/ard_survfit.md
@@ -0,0 +1,74 @@
+# ard_survfit() works with times provided
+
+    Code
+      print(dplyr::mutate(ard_survfit(survfit(Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE),
+      times = c(60, 180)), stat = lapply(stat, function(x) ifelse(is.numeric(x),
+      cards::round5(x, 3), x))), n = Inf)
+    Message
+      {cards} data frame: 24 x 7
+    Output
+         variable variable_level stat_name stat_label  stat fmt_fn
+      1      TRTA        Placebo  estimate  Survival… 0.893      1
+      2      TRTA        Placebo conf.high  CI Upper… 0.966      1
+      3      TRTA        Placebo  conf.low  CI Lower… 0.825      1
+      4      TRTA        Placebo      time       Time    60      1
+      5      TRTA        Placebo  estimate  Survival… 0.651      1
+      6      TRTA        Placebo conf.high  CI Upper… 0.783      1
+      7      TRTA        Placebo  conf.low  CI Lower… 0.541      1
+      8      TRTA        Placebo      time       Time   180      1
+      9      TRTA      Xanomeli…  estimate  Survival… 0.694      1
+      10     TRTA      Xanomeli… conf.high  CI Upper… 0.849      1
+      11     TRTA      Xanomeli…  conf.low  CI Lower… 0.568      1
+      12     TRTA      Xanomeli…      time       Time    60      1
+      13     TRTA      Xanomeli…  estimate  Survival… 0.262      1
+      14     TRTA      Xanomeli… conf.high  CI Upper… 0.749      1
+      15     TRTA      Xanomeli…  conf.low  CI Lower… 0.092      1
+      16     TRTA      Xanomeli…      time       Time   180      1
+      17     TRTA      Xanomeli…  estimate  Survival… 0.732      1
+      18     TRTA      Xanomeli… conf.high  CI Upper… 0.878      1
+      19     TRTA      Xanomeli…  conf.low  CI Lower…  0.61      1
+      20     TRTA      Xanomeli…      time       Time    60      1
+      21     TRTA      Xanomeli…  estimate  Survival… 0.381      1
+      22     TRTA      Xanomeli… conf.high  CI Upper… 0.743      1
+      23     TRTA      Xanomeli…  conf.low  CI Lower… 0.195      1
+      24     TRTA      Xanomeli…      time       Time   180      1
+    Message
+      i 1 more variable: context
+
+# ard_survfit() works with probs provided
+
+    Code
+      print(dplyr::mutate(ard_survfit(survfit(Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE),
+      probs = c(0.25, 0.75)), stat = lapply(stat, function(x) ifelse(is.numeric(x),
+      cards::round5(x, 3), x))), n = Inf)
+    Message
+      {cards} data frame: 24 x 7
+    Output
+         variable variable_level stat_name stat_label stat fmt_fn
+      1      TRTA        Placebo  estimate  Survival…  142      1
+      2      TRTA        Placebo conf.high  CI Upper…  181      1
+      3      TRTA        Placebo  conf.low  CI Lower…   70      1
+      4      TRTA        Placebo      prob   Quantile 0.25      1
+      5      TRTA        Placebo  estimate  Survival…  184      1
+      6      TRTA        Placebo conf.high  CI Upper…  191      1
+      7      TRTA        Placebo  conf.low  CI Lower…  183      1
+      8      TRTA        Placebo      prob   Quantile 0.75      1
+      9      TRTA      Xanomeli…  estimate  Survival…   44      1
+      10     TRTA      Xanomeli… conf.high  CI Upper…  180      1
+      11     TRTA      Xanomeli…  conf.low  CI Lower…   22      1
+      12     TRTA      Xanomeli…      prob   Quantile 0.25      1
+      13     TRTA      Xanomeli…  estimate  Survival…  188      1
+      14     TRTA      Xanomeli… conf.high  CI Upper…   NA      1
+      15     TRTA      Xanomeli…  conf.low  CI Lower…  167      1
+      16     TRTA      Xanomeli…      prob   Quantile 0.75      1
+      17     TRTA      Xanomeli…  estimate  Survival…   49      1
+      18     TRTA      Xanomeli… conf.high  CI Upper…  180      1
+      19     TRTA      Xanomeli…  conf.low  CI Lower…   37      1
+      20     TRTA      Xanomeli…      prob   Quantile 0.25      1
+      21     TRTA      Xanomeli…  estimate  Survival…  184      1
+      22     TRTA      Xanomeli… conf.high  CI Upper…   NA      1
+      23     TRTA      Xanomeli…  conf.low  CI Lower…  180      1
+      24     TRTA      Xanomeli…      prob   Quantile 0.75      1
+    Message
+      i 1 more variable: context
+
diff --git a/tests/testthat/test-ard_survfit.R b/tests/testthat/test-ard_survfit.R
new file mode 100644
index 000000000..f961c354f
--- /dev/null
+++ b/tests/testthat/test-ard_survfit.R
@@ -0,0 +1,21 @@
+test_that("ard_survfit() works with times provided", {
+  expect_snapshot(
+    survfit(Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE) |>
+      ard_survfit(times = c(60, 180)) |>
+      dplyr::mutate(
+        stat = lapply(stat, function(x) ifelse(is.numeric(x), cards::round5(x, 3), x))
+      ) |>
+      print(n = Inf)
+  )
+})
+
+test_that("ard_survfit() works with probs provided", {
+  expect_snapshot(
+    survfit(Surv(AVAL, CNSR) ~ TRTA, cards::ADTTE) |>
+      ard_survfit(probs = c(0.25, 0.75)) |>
+      dplyr::mutate(
+        stat = lapply(stat, function(x) ifelse(is.numeric(x), cards::round5(x, 3), x))
+      ) |>
+      print(n = Inf)
+  )
+})