Update ard_survival_survfit (#152)

**What changes are proposed in this pull request?** * Updated `ard_survival_survfit` to return `std.error` and `n.risk` statistics. (#139 ) @ddsjoberg as I currently have it, `std.error` gets transformed along with `estimate`, `conf.low`, and `conf.high` (with options `"survival"`, `"cumhaz"`, and `"risk"`), but if you think it's more informative to always return the untransformed `std.error` that can be updated. Closes #139 -------------------------------------------------------------------------------- Pre-review Checklist (if item does not apply, mark is as complete) - [x] **All** GitHub Action workflows pass with a ✅ - [x] PR branch has pulled the most recent updates from master branch: `usethis::pr_merge_main()` - [x] If a bug was fixed, a unit test was added. - [x] If a new `ard_*()` function was added, it passes the ARD structural checks from `cards::check_ard_structure()`. - [x] If a new `ard_*()` function was added, `set_cli_abort_call()` has been set. - [x] If a new `ard_*()` function was added and it depends on another package (such as, `broom`), `is_pkg_installed("broom", reference_pkg = "cardx")` has been set in the function call and the following added to the roxygen comments: `@examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"", reference_pkg = "cardx"))` - [x] Code coverage is suitable for any new functions/features (generally, 100% coverage for new code): `devtools::test_coverage()` Reviewer Checklist (if item does not apply, mark is as complete) - [ ] If a bug was fixed, a unit test was added. - [ ] Code coverage is suitable for any new functions/features: `devtools::test_coverage()` When the branch is ready to be merged: - [ ] Update `NEWS.md` with the changes from this pull request under the heading "`# cardx (development version)`". If there is an issue associated with the pull request, reference it in parentheses at the end update (see `NEWS.md` for examples). - [ ] **All** GitHub Action workflows pass with a ✅ - [ ] Approve Pull Request - [ ] Merge the PR. Please use "Squash and merge" or "Rebase and merge".
insightsengineering · May 23, 2024 · 819dab4 · 819dab4
1 parent db48b77
commit 819dab4
Show file tree

Hide file tree

Showing 3 changed files with 154 additions and 119 deletions.
diff --git a/R/ard_survival_survfit.R b/R/ard_survival_survfit.R
@@ -111,6 +111,8 @@ ard_survival_survfit <- function(x, times = NULL, probs = NULL, type = NULL) {
 #'
 #' @inheritParams cards::tidy_as_ard
 #' @inheritParams ard_survival_survfit
+#' @param start.time (`numeric`)\cr
+#'   default starting time. See [survival::survfit0()] for more details.
 #'
 #' @return a `tibble`
 #'
@@ -119,42 +121,39 @@ ard_survival_survfit <- function(x, times = NULL, probs = NULL, type = NULL) {
 #'   cardx:::.process_survfit_time(times = c(60, 180), type = "risk")
 #'
 #' @keywords internal
-.process_survfit_time <- function(x, times, type) {
-  # tidy survfit results
-  tidy_x <- broom::tidy(x)
+.process_survfit_time <- function(x, times, type, start.time = NULL) {
+  # add start time
+  min_time <- min(x$time)
+  if (is.null(start.time) && min_time < 0) {
+    cli::cli_inform(paste(
+      "The {.arg start.time} argument has not been set and negative times have been observed. Please set start",
+      "time via the {.arg start.time} argument, otherwise the minimum observed time will be used by default."
+    ))
+    start.time <- min_time
+  } else if (is.null(start.time)) {
+    start.time <- 0
+  }
+  x <- survival::survfit0(x, start.time) %>%
+    summary(times)
 
   # process competing risks/multi-state models
-  multi_state <- inherits(x, "survfitms")
+  multi_state <- inherits(x, "summary.survfitms")
 
-  if (multi_state == TRUE) {
+  if (multi_state) {
     # selecting state to show
-    state <- setdiff(unique(tidy_x$state), "(s0)")[[1]]
+    state <- setdiff(unique(x$states), "(s0)")[[1]]
     cli::cli_inform("Multi-state model detected. Showing probabilities into state '{state}'.")
-    tidy_x <- dplyr::filter(tidy_x, .data$state == .env$state)
+    x$n.risk <- x$n.risk[, 1]
+    ms_cols <- c("pstate", "std.err", "upper", "lower")
+    state_col <- which(colnames(x$pstate) == state)
+    x[ms_cols] <- lapply(x[ms_cols], function(m) m[, state_col])
+    x$surv <- x$pstate
   }
 
-  # adding time 0 to data frame
-  tidy_x <- tidy_x %>%
-    # make strata a fct to preserve ordering
-    dplyr::mutate(dplyr::across(dplyr::any_of("strata"), ~ factor(., levels = unique(.)))) %>%
-    # if CI is missing and SE is 0, use estimate as the CI
-    dplyr::mutate_at(
-      dplyr::vars("conf.high", "conf.low"),
-      ~ ifelse(is.na(.) & .data$std.error == 0, .data$estimate, .)
-    ) %>%
-    dplyr::select(dplyr::any_of(c("time", "estimate", "conf.high", "conf.low", "strata"))) %>%
-    # add data for time 0
-    dplyr::bind_rows(
-      dplyr::group_by_at(., dplyr::vars(dplyr::any_of("strata"))) %>%
-        dplyr::slice(1) %>%
-        dplyr::mutate(
-          time = 0,
-          estimate = ifelse(multi_state, 0, 1),
-          conf.low = ifelse(multi_state, 0, 1),
-          conf.high = ifelse(multi_state, 0, 1)
-        )
-    ) %>%
-    dplyr::ungroup()
+  # tidy survfit results
+  x_cols <- intersect(names(x), c("time", "n.risk", "surv", "std.err", "upper", "lower", "strata"))
+  tidy_x <- data.frame(x[x_cols]) %>%
+    dplyr::rename(estimate = "surv", std.error = "std.err", conf.high = "upper", conf.low = "lower")
 
   strat <- "strata" %in% names(tidy_x)
 
@@ -182,16 +181,7 @@ ard_survival_survfit <- function(x, times = NULL, probs = NULL, type = NULL) {
   }
 
   df_stat <- df_stat %>%
-    # if user-specifed time is unobserved, fill estimate with previous value
     dplyr::arrange(.data$time) %>%
-    dplyr::group_by_at(dplyr::vars(dplyr::any_of("strata"))) %>%
-    tidyr::fill(
-      "estimate", "conf.high", "conf.low", "time_max",
-      .direction = "down"
-    ) %>%
-    dplyr::ungroup() %>%
-    # keep only user-specified times
-    dplyr::filter(!is.na(.data$col_name)) %>%
     # if user-specified time is after max time, make estimate NA
     dplyr::mutate_at(
       dplyr::vars("estimate", "conf.high", "conf.low"),
@@ -236,7 +226,7 @@ ard_survival_survfit <- function(x, times = NULL, probs = NULL, type = NULL) {
       as.data.frame() %>%
       set_names(c("estimate", "conf.low", "conf.high")) %>%
       dplyr::mutate(strata = row.names(.)) %>%
-      dplyr::select(dplyr::any_of(c("strata", "estimate", "conf.low", "conf.high"))) %>%
+      dplyr::select(dplyr::any_of(c("n.risk", "strata", "estimate", "std.error", "conf.low", "conf.high"))) %>%
       dplyr::mutate(prob = .x)
   ) %>%
     dplyr::bind_rows() %>%
@@ -293,10 +283,10 @@ extract_multi_strata <- function(x, df_stat) {
 
   ret <- tidy_survfit %>%
     dplyr::mutate(dplyr::across(
-      dplyr::any_of(c("estimate", "conf.high", "conf.low", "time", "prob")), ~ as.list(.)
+      dplyr::any_of(c("n.risk", "estimate", "std.error", "conf.high", "conf.low", "time", "prob")), ~ as.list(.)
     )) %>%
     tidyr::pivot_longer(
-      cols = dplyr::any_of(c("estimate", "conf.high", "conf.low")),
+      cols = dplyr::any_of(c("n.risk", "estimate", "std.error", "conf.high", "conf.low")),
       names_to = "stat_name",
       values_to = "stat"
     ) %>%
@@ -342,7 +332,9 @@ extract_multi_strata <- function(x, df_stat) {
 .df_survfit_stat_labels <- function() {
   dplyr::tribble(
     ~stat_name, ~stat_label,
+    "n.risk", "Number of Subjects at Risk",
     "estimate", "Survival Probability",
+    "std.error", "Standard Error (untransformed)",
     "conf.low", "CI Lower Bound",
     "conf.high", "CI Upper Bound",
     "conf.level", "CI Confidence Level",

diff --git a/man/dot-process_survfit_time.Rd b/man/dot-process_survfit_time.Rd