From 1e21f297bd9a813e3d5d268b44f365f1bbb7fb09 Mon Sep 17 00:00:00 2001 From: Emily de la Rua <59304861+edelarua@users.noreply.github.com> Date: Tue, 31 Oct 2023 08:27:23 -0400 Subject: [PATCH] Refactor `count_occurrences` (#1076) Fixes #1073 --- NAMESPACE | 2 + NEWS.md | 2 + R/count_occurrences.R | 151 +++++++++++++++--- R/riskdiff.R | 16 +- R/summarize_functions.R | 1 + R/utils_default_stats_formats_labels.R | 88 +++++++++- man/afun_riskdiff.Rd | 12 +- man/count_occurrences.Rd | 86 +++++++--- man/default_stats_formats_labels.Rd | 29 +++- man/summarize_functions.Rd | 1 + tests/testthat/_snaps/count_occurrences.md | 99 ++++++++++++ .../utils_default_stats_formats_labels.md | 20 ++- tests/testthat/test-count_occurrences.R | 122 +++++++++++++- .../test-utils_default_stats_formats_labels.R | 44 +++++ 14 files changed, 603 insertions(+), 70 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index f09604b8be..0f9539a37b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -129,6 +129,7 @@ export(g_lineplot) export(g_step) export(g_waterfall) export(get_formats_from_stats) +export(get_indents_from_stats) export(get_labels_from_stats) export(get_smooths) export(get_stats) @@ -268,6 +269,7 @@ export(summarize_coxreg) export(summarize_glm_count) export(summarize_logistic) export(summarize_num_patients) +export(summarize_occurrences) export(summarize_occurrences_by_grade) export(summarize_patients_events_in_cols) export(summarize_patients_exposure_in_cols) diff --git a/NEWS.md b/NEWS.md index e242de45cb..3e224caf59 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,8 @@ ### New Features * Added the `na_str` argument to `analyze` & `summarize_row_groups` wrapper functions `count_abnormal`, `count_abnormal_by_baseline`, `count_abnormal_by_marked`, `count_abnormal_by_worst_grade`, `count_abnormal_lab_worsen_by_baseline`, `count_cumulative`, `count_missed_doses`, `count_occurrences`, `count_occurrences_by_grade`, `summarize_occurrences_by_grade`, `summarize_patients_events_in_cols`, `count_patients_with_event`, `count_patients_with_flags`, `count_values`, `estimate_multinomial_response`, `estimate_proportion`, `h_tab_one_biomarker`, `estimate_incidence_rate`, `logistic_summary_by_flag`, `estimate_odds_ratio`, `estimate_proportion_diff`, `test_proportion_diff`, `summarize_ancova`, `summarize_change`, `summarize_glm_count`, `summarize_num_patients`, `analyze_num_patients`, `summarize_patients_exposure_in_cols`, `coxph_pairwise`, `tabulate_survival_subgroups`, `surv_time`, and `surv_timepoint`. +* Added function `get_indents_from_stats` to format and return indent modifiers for a given set of statistics. +* Added summarize function version of `count_occurrences` analyze function, `summarize_occurrences`. ### Enhancements * Added formatting function `format_count_fraction_lt10` for formatting `count_fraction` with special consideration when count is less than 10. diff --git a/R/count_occurrences.R b/R/count_occurrences.R index 19d34e5dfe..677458f416 100644 --- a/R/count_occurrences.R +++ b/R/count_occurrences.R @@ -111,10 +111,7 @@ s_count_occurrences <- function(df, #' * `a_count_occurrences()` returns the corresponding list with formatted [rtables::CellValue()]. #' #' @examples -#' # We need to ungroup `count_fraction` first so that the `rtables` formatting -#' # function `format_count_fraction()` can be applied correctly. -#' afun <- make_afun(a_count_occurrences, .ungroup_stats = c("count", "count_fraction", "fraction")) -#' afun( +#' a_count_occurrences( #' df, #' .N_col = N_per_col, #' .df_row = df, @@ -123,10 +120,61 @@ s_count_occurrences <- function(df, #' ) #' #' @export -a_count_occurrences <- make_afun( - s_count_occurrences, - .formats = c(count = "xx", count_fraction = format_count_fraction_fixed_dp, fraction = format_fraction_fixed_dp) -) +a_count_occurrences <- function(df, + labelstr = "", + id = "USUBJID", + denom = c("N_col", "n"), + drop = TRUE, + .N_col, # nolint + .var = NULL, + .df_row = NULL, + .stats = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL, + na_str = NA_character_) { + denom <- match.arg(denom) + x_stats <- s_count_occurrences( + df = df, denom = denom, .N_col = .N_col, .df_row = .df_row, drop = drop, .var = .var, id = id + ) + if (is.null(unlist(x_stats))) { + return(NULL) + } + x_lvls <- names(x_stats[[1]]) + + # Fill in with formatting defaults if needed + .stats <- get_stats("count_occurrences", stats_in = .stats) + .formats <- get_formats_from_stats(.stats, .formats) + .labels <- get_labels_from_stats(.stats, .labels, row_nms = x_lvls) + .indent_mods <- get_indents_from_stats(.stats, .indent_mods, row_nms = x_lvls) + + if ("count_fraction_fixed_dp" %in% .stats) x_stats[["count_fraction_fixed_dp"]] <- x_stats[["count_fraction"]] + x_stats <- x_stats[.stats] + + # Ungroup statistics with values for each level of x + x_ungrp <- ungroup_stats(x_stats, .formats, list(), list()) + x_stats <- x_ungrp[["x"]] + .formats <- x_ungrp[[".formats"]] + + # Auto format handling + fmt_is_auto <- vapply(.formats, function(ii) is.character(ii) && ii == "auto", logical(1)) + if (any(fmt_is_auto)) { + res_l_auto <- x_stats[fmt_is_auto] + tmp_dt_var <- .df_row[[.var]] # xxx this can be extended for the WHOLE data or single facets + .formats[fmt_is_auto] <- lapply(seq_along(res_l_auto), function(rla) { + format_auto(tmp_dt_var, names(res_l_auto)[rla]) + }) + } + + in_rows( + .list = x_stats, + .formats = .formats, + .names = .labels, + .labels = .labels, + .indent_mods = .indent_mods, + .format_na_strs = na_str + ) +} #' @describeIn count_occurrences Layout-creating function which can take statistics function arguments #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. @@ -147,7 +195,8 @@ a_count_occurrences <- make_afun( #' "MH1", "MH2", "MH1", "MH1", "MH1", "MH3", #' "MH2", "MH2", "MH3", "MH1", "MH2", "MH4" #' ), -#' ARM = rep(c("A", "B"), each = 6) +#' ARM = rep(c("A", "B"), each = 6), +#' SEX = c("F", "F", "M", "M", "M", "M", "F", "F", "F", "M", "M", "F") #' ) #' df_adsl <- df %>% #' select(USUBJID, ARM) %>% @@ -174,36 +223,32 @@ count_occurrences <- function(lyt, nested = TRUE, ..., table_names = vars, - .stats = "count_fraction", + .stats = "count_fraction_fixed_dp", .formats = NULL, .labels = NULL, .indent_mods = NULL) { checkmate::assert_flag(riskdiff) - afun <- make_afun( - a_count_occurrences, - .stats = .stats, - .formats = .formats, - .labels = .labels, - .indent_mods = .indent_mods, - .ungroup_stats = .stats + extra_args <- list( + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str ) - extra_args <- if (isFALSE(riskdiff)) { - list(...) + if (isFALSE(riskdiff)) { + extra_args <- c(extra_args, list(...)) } else { - list( - afun = list("s_count_occurrences" = afun), - .stats = .stats, - .indent_mods = .indent_mods, - s_args = list(...) + extra_args <- c( + extra_args, + list( + afun = list("s_count_occurrences" = a_count_occurrences), + s_args = list(...) + ) ) } analyze( lyt = lyt, vars = vars, - afun = ifelse(isFALSE(riskdiff), afun, afun_riskdiff), + afun = ifelse(isFALSE(riskdiff), a_count_occurrences, afun_riskdiff), var_labels = var_labels, show_labels = show_labels, table_names = table_names, @@ -212,3 +257,59 @@ count_occurrences <- function(lyt, extra_args = extra_args ) } + +#' @describeIn count_occurrences Layout-creating function which can take content function arguments +#' and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()]. +#' +#' @return +#' * `summarize_occurrences()` returns a layout object suitable for passing to further layouting functions, +#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows +#' containing the statistics from `s_count_occurrences()` to the table layout. +#' +#' @examples +#' # Layout creating function with custom format. +#' basic_table() %>% +#' add_colcounts() %>% +#' split_rows_by("SEX", child_labels = "visible") %>% +#' summarize_occurrences( +#' var = "MHDECOD", +#' .formats = c("count_fraction" = "xx.xx (xx.xx%)") +#' ) %>% +#' build_table(df, alt_counts_df = df_adsl) +#' +#' @export +summarize_occurrences <- function(lyt, + var, + riskdiff = FALSE, + na_str = NA_character_, + ..., + .stats = "count_fraction_fixed_dp", + .formats = NULL, + .indent_mods = NULL, + .labels = NULL) { + checkmate::assert_flag(riskdiff) + + extra_args <- list( + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str + ) + + if (isFALSE(riskdiff)) { + extra_args <- c(extra_args, list(...)) + } else { + extra_args <- c( + extra_args, + list( + afun = list("s_count_occurrences" = a_count_occurrences), + s_args = list(...) + ) + ) + } + + summarize_row_groups( + lyt = lyt, + var = var, + cfun = ifelse(isFALSE(riskdiff), a_count_occurrences, afun_riskdiff), + na_str = na_str, + extra_args = extra_args + ) +} diff --git a/R/riskdiff.R b/R/riskdiff.R index b908f45c67..8a86be6dce 100644 --- a/R/riskdiff.R +++ b/R/riskdiff.R @@ -78,7 +78,10 @@ afun_riskdiff <- function(df, .spl_context, .all_col_counts, .stats, - .indent_mods, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL, + na_str = NA_character_, afun, s_args = list()) { if (!any(grepl("riskdiff", names(.spl_context)))) { @@ -89,8 +92,10 @@ afun_riskdiff <- function(df, } checkmate::assert_list(afun, len = 1, types = "function") checkmate::assert_named(afun) - - afun_args <- list(.var = .var, .df_row = .df_row, .N_row = .N_row, denom = "N_col", labelstr = labelstr) + afun_args <- list( + .var = .var, .df_row = .df_row, .N_row = .N_row, denom = "N_col", labelstr = labelstr, + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str + ) afun_args <- afun_args[intersect(names(afun_args), names(as.list(args(afun[[1]]))))] if ("denom" %in% names(s_args)) afun_args[["denom"]] <- NULL @@ -113,8 +118,9 @@ afun_riskdiff <- function(df, cur_var <- tail(.spl_context$cur_col_split[[1]], 1) # Apply statistics function to arm X and arm Y data - s_x <- do.call(names(afun), args = c(list(df = df[df[[cur_var]] == arm_x, ], .N_col = N_col_x), afun_args, s_args)) - s_y <- do.call(names(afun), args = c(list(df = df[df[[cur_var]] == arm_y, ], .N_col = N_col_y), afun_args, s_args)) + s_args <- c(s_args, afun_args[intersect(names(afun_args), names(as.list(args(names(afun)))))]) + s_x <- do.call(names(afun), args = c(list(df = df[df[[cur_var]] == arm_x, ], .N_col = N_col_x), s_args)) + s_y <- do.call(names(afun), args = c(list(df = df[df[[cur_var]] == arm_y, ], .N_col = N_col_y), s_args)) # Get statistic name and row names stat <- ifelse("count_fraction" %in% names(s_x), "count_fraction", "unique") diff --git a/R/summarize_functions.R b/R/summarize_functions.R index 9004ba626d..58d7d7461b 100644 --- a/R/summarize_functions.R +++ b/R/summarize_functions.R @@ -10,6 +10,7 @@ #' * [h_tab_one_biomarker()] (probably to deprecate) #' * [logistic_summary_by_flag()] #' * [summarize_num_patients()] +#' * [summarize_occurrences()] #' * [summarize_occurrences_by_grade()] #' * [summarize_patients_events_in_cols()] #' * [summarize_patients_exposure_in_cols()] diff --git a/R/utils_default_stats_formats_labels.R b/R/utils_default_stats_formats_labels.R index 789442b99c..5adda079fa 100644 --- a/R/utils_default_stats_formats_labels.R +++ b/R/utils_default_stats_formats_labels.R @@ -68,7 +68,7 @@ get_stats <- function(method_groups = "analyze_vars_numeric", stats_in = NULL, a for (mgi in method_groups) { # Main switcher out_tmp <- switch(mgi, - "count_occurrences" = c("count", "count_fraction_fixed_dp", "fraction"), + "count_occurrences" = c("count", "count_fraction", "count_fraction_fixed_dp", "fraction"), "summarize_num_patients" = c("unique", "nonunique", "unique_count"), "analyze_vars_counts" = c("n", "count", "count_fraction", "n_blq"), "analyze_vars_numeric" = c( @@ -189,7 +189,12 @@ get_formats_from_stats <- function(stats, formats_in = NULL) { #' @describeIn default_stats_formats_labels Get labels from vector of statistical methods. #' -#' @param labels_in (named `vector`) \cr inserted labels to replace defaults. +#' @param labels_in (named `vector`)\cr inserted labels to replace defaults. +#' @param row_nms (`character`)\cr row names. Levels of a `factor` or `character` variable, each +#' of which the statistics in `.stats` will be calculated for. If this parameter is set, these +#' variable levels will be used as the defaults, and the names of the given custom values should +#' correspond to levels (or have format `statistic.level`) instead of statistics. Can also be +#' variable names if rows correspond to different variables instead of levels. Defaults to `NULL`. #' #' @return #' * `get_labels_from_stats()` returns a named character vector of default labels (if present @@ -207,8 +212,9 @@ get_formats_from_stats <- function(stats, formats_in = NULL) { #' get_labels_from_stats(all_cnt_occ, labels_in = list("fraction" = c("Some more fractions"))) #' #' @export -get_labels_from_stats <- function(stats, labels_in = NULL) { +get_labels_from_stats <- function(stats, labels_in = NULL, row_nms = NULL) { checkmate::assert_character(stats, min.len = 1) + checkmate::assert_character(row_nms, null.ok = TRUE) # It may be a list if (checkmate::test_list(labels_in, null.ok = TRUE)) { checkmate::assert_list(labels_in, null.ok = TRUE) @@ -217,12 +223,22 @@ get_labels_from_stats <- function(stats, labels_in = NULL) { checkmate::assert_character(labels_in, null.ok = TRUE) } - which_lbl <- match(stats, names(tern_default_labels)) + if (!is.null(row_nms)) { + ret <- rep(row_nms, length(stats)) + out <- setNames(ret, paste(rep(stats, each = length(row_nms)), ret, sep = ".")) - ret <- vector("character", length = length(stats)) # it needs to be a character vector - ret[!is.na(which_lbl)] <- tern_default_labels[which_lbl[!is.na(which_lbl)]] + if (!is.null(labels_in)) { + lvl_lbls <- intersect(names(labels_in), row_nms) + for (i in lvl_lbls) out[paste(stats, i, sep = ".")] <- labels_in[[i]] + } + } else { + which_lbl <- match(stats, names(tern_default_labels)) - out <- setNames(ret, stats) + ret <- vector("character", length = length(stats)) # it needs to be a character vector + ret[!is.na(which_lbl)] <- tern_default_labels[which_lbl[!is.na(which_lbl)]] + + out <- setNames(ret, stats) + } # Modify some with custom labels if (!is.null(labels_in)) { @@ -234,6 +250,62 @@ get_labels_from_stats <- function(stats, labels_in = NULL) { out } +#' @describeIn default_stats_formats_labels Format indent modifiers for a given vector/list of statistics. +#' +#' @param indents_in (named `vector`)\cr inserted indent modifiers to replace defaults (default is `0L`). +#' +#' @return +#' * `get_indents_from_stats()` returns a single indent modifier value to apply to all rows +#' or a named numeric vector of indent modifiers (if present, otherwise `NULL`). +#' +#' @examples +#' get_indents_from_stats(all_cnt_occ, indents_in = 3L) +#' get_indents_from_stats(all_cnt_occ, indents_in = list(count = 2L, count_fraction = 5L)) +#' get_indents_from_stats( +#' all_cnt_occ, +#' indents_in = list(a = 2L, count.a = 1L, count.b = 5L), row_nms = c("a", "b") +#' ) +#' +#' @export +get_indents_from_stats <- function(stats, indents_in = NULL, row_nms = NULL) { + checkmate::assert_character(stats, min.len = 1) + checkmate::assert_character(row_nms, null.ok = TRUE) + # It may be a list + if (checkmate::test_list(indents_in, null.ok = TRUE)) { + checkmate::assert_list(indents_in, null.ok = TRUE) + # Or it may be a vector of integers + } else { + checkmate::assert_integerish(indents_in, null.ok = TRUE) + } + + if (is.null(names(indents_in)) && length(indents_in) == 1) { + out <- rep(indents_in, length(stats) * if (!is.null(row_nms)) length(row_nms) else 1) + return(out) + } + + if (!is.null(row_nms)) { + ret <- rep(0L, length(stats) * length(row_nms)) + out <- setNames(ret, paste(rep(stats, each = length(row_nms)), rep(row_nms, length(stats)), sep = ".")) + + if (!is.null(indents_in)) { + lvl_lbls <- intersect(names(indents_in), row_nms) + for (i in lvl_lbls) out[paste(stats, i, sep = ".")] <- indents_in[[i]] + } + } else { + ret <- rep(0L, length(stats)) + out <- setNames(ret, stats) + } + + # Modify some with custom labels + if (!is.null(indents_in)) { + # Stats is the main + common_names <- intersect(names(out), names(indents_in)) + out[common_names] <- indents_in[common_names] + } + + out +} + #' @describeIn default_stats_formats_labels Named list of default formats for `tern`. #' @format #' * `tern_default_formats` is a list of available formats, named after their relevant @@ -284,11 +356,13 @@ tern_default_formats <- c( #' @export tern_default_labels <- c( # list of labels -> sorted? xxx it should be not relevant due to match + fraction = "fraction", unique = "Number of patients with at least one event", nonunique = "Number of events", n = "n", count = "count", count_fraction = "count_fraction", + count_fraction_fixed_dp = "count_fraction", n_blq = "n_blq", sum = "Sum", mean = "Mean", diff --git a/man/afun_riskdiff.Rd b/man/afun_riskdiff.Rd index 5b69b18e9b..372d5732bc 100644 --- a/man/afun_riskdiff.Rd +++ b/man/afun_riskdiff.Rd @@ -14,7 +14,10 @@ afun_riskdiff( .spl_context, .all_col_counts, .stats, - .indent_mods, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL, + na_str = NA_character_, afun, s_args = list() ) @@ -45,9 +48,16 @@ taken from \code{alt_counts_df} if specified (see \code{\link[rtables:build_tabl \item{.stats}{(\code{character})\cr statistics to select for the table.} +\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics. See Details in \code{analyze_vars} for more +information on the \code{"auto"} setting.} + +\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} + \item{.indent_mods}{(named \code{integer})\cr indent modifiers for the labels. Defaults to 0, which corresponds to the unmodified default behavior. Can be negative.} +\item{na_str}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} + \item{afun}{(named \code{list})\cr A named list containing one name-value pair where the name corresponds to the name of the statistics function that should be used in calculations and the value is the corresponding analysis function.} diff --git a/man/count_occurrences.Rd b/man/count_occurrences.Rd index b5f2c99f49..f7c5f5d156 100644 --- a/man/count_occurrences.Rd +++ b/man/count_occurrences.Rd @@ -4,6 +4,7 @@ \alias{count_occurrences} \alias{s_count_occurrences} \alias{a_count_occurrences} +\alias{summarize_occurrences} \title{Occurrence Counts} \usage{ s_count_occurrences( @@ -18,12 +19,18 @@ s_count_occurrences( a_count_occurrences( df, + labelstr = "", + id = "USUBJID", denom = c("N_col", "n"), - .N_col, - .df_row, drop = TRUE, - .var = "MHDECOD", - id = "USUBJID" + .N_col, + .var = NULL, + .df_row = NULL, + .stats = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL, + na_str = NA_character_ ) count_occurrences( @@ -36,11 +43,23 @@ count_occurrences( nested = TRUE, ..., table_names = vars, - .stats = "count_fraction", + .stats = "count_fraction_fixed_dp", .formats = NULL, .labels = NULL, .indent_mods = NULL ) + +summarize_occurrences( + lyt, + var, + riskdiff = FALSE, + na_str = NA_character_, + ..., + .stats = "count_fraction_fixed_dp", + .formats = NULL, + .indent_mods = NULL, + .labels = NULL +) } \arguments{ \item{df}{(\code{data.frame})\cr data set containing all analysis variables.} @@ -59,11 +78,27 @@ passed by \code{rtables}.} \item{drop}{(\code{flag})\cr should non appearing occurrence levels be dropped from the resulting table. Note that in that case the remaining occurrence levels in the table are sorted alphabetically.} -\item{.var}{(\code{string})\cr single variable name that is passed by \code{rtables} when requested +\item{.var, var}{(\code{string})\cr single variable name that is passed by \code{rtables} when requested by a statistics function.} \item{id}{(\code{string})\cr subject variable name.} +\item{labelstr}{(\code{character})\cr label of the level of the parent split currently being summarized +(must be present as second argument in Content Row Functions). See \code{\link[rtables:summarize_row_groups]{rtables::summarize_row_groups()}} +for more information.} + +\item{.stats}{(\code{character})\cr statistics to select for the table.} + +\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics. See Details in \code{analyze_vars} for more +information on the \code{"auto"} setting.} + +\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} + +\item{.indent_mods}{(named \code{integer})\cr indent modifiers for the labels. Defaults to 0, which corresponds to the +unmodified default behavior. Can be negative.} + +\item{na_str}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} + \item{lyt}{(\code{layout})\cr input layout where analyses will be added to.} \item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.} @@ -76,8 +111,6 @@ by a statistics function.} used as \code{split_fun} in the prior column split of the table layout, specifying which columns should be compared. See \code{\link[=stat_propdiff_ci]{stat_propdiff_ci()}} for details on risk difference calculation.} -\item{na_str}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} - \item{nested}{(\code{flag})\cr whether this layout instruction should be applied within the existing layout structure \emph{if possible} (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split. underneath analyses, which is not allowed.} @@ -86,16 +119,6 @@ underneath analyses, which is not allowed.} \item{table_names}{(\code{character})\cr this can be customized in case that the same \code{vars} are analyzed multiple times, to avoid warnings from \code{rtables}.} - -\item{.stats}{(\code{character})\cr statistics to select for the table.} - -\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics. See Details in \code{analyze_vars} for more -information on the \code{"auto"} setting.} - -\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} - -\item{.indent_mods}{(named \code{integer})\cr indent modifiers for the labels. Defaults to 0, which corresponds to the -unmodified default behavior. Can be negative.} } \value{ \itemize{ @@ -116,6 +139,12 @@ unmodified default behavior. Can be negative.} or to \code{\link[rtables:build_table]{rtables::build_table()}}. Adding this function to an \code{rtable} layout will add formatted rows containing the statistics from \code{s_count_occurrences()} to the table layout. } + +\itemize{ +\item \code{summarize_occurrences()} returns a layout object suitable for passing to further layouting functions, +or to \code{\link[rtables:build_table]{rtables::build_table()}}. Adding this function to an \code{rtable} layout will add formatted content rows +containing the statistics from \code{s_count_occurrences()} to the table layout. +} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} @@ -136,6 +165,9 @@ in \code{count_occurrences()}. \item \code{count_occurrences()}: Layout-creating function which can take statistics function arguments and additional format arguments. This function is a wrapper for \code{\link[rtables:analyze]{rtables::analyze()}}. +\item \code{summarize_occurrences()}: Layout-creating function which can take content function arguments +and additional format arguments. This function is a wrapper for \code{\link[rtables:summarize_row_groups]{rtables::summarize_row_groups()}}. + }} \note{ By default, occurrences which don't appear in a given row split are dropped from the table and @@ -160,10 +192,7 @@ s_count_occurrences( id = "USUBJID" ) -# We need to ungroup `count_fraction` first so that the `rtables` formatting -# function `format_count_fraction()` can be applied correctly. -afun <- make_afun(a_count_occurrences, .ungroup_stats = c("count", "count_fraction", "fraction")) -afun( +a_count_occurrences( df, .N_col = N_per_col, .df_row = df, @@ -181,7 +210,8 @@ df <- data.frame( "MH1", "MH2", "MH1", "MH1", "MH1", "MH3", "MH2", "MH2", "MH3", "MH1", "MH2", "MH4" ), - ARM = rep(c("A", "B"), each = 6) + ARM = rep(c("A", "B"), each = 6), + SEX = c("F", "F", "M", "M", "M", "M", "F", "F", "F", "M", "M", "F") ) df_adsl <- df \%>\% select(USUBJID, ARM) \%>\% @@ -198,4 +228,14 @@ lyt \%>\% build_table(df, alt_counts_df = df_adsl) \%>\% prune_table() +# Layout creating function with custom format. +basic_table() \%>\% + add_colcounts() \%>\% + split_rows_by("SEX", child_labels = "visible") \%>\% + summarize_occurrences( + var = "MHDECOD", + .formats = c("count_fraction" = "xx.xx (xx.xx\%)") + ) \%>\% + build_table(df, alt_counts_df = df_adsl) + } diff --git a/man/default_stats_formats_labels.Rd b/man/default_stats_formats_labels.Rd index abb81c2614..84997cbde5 100644 --- a/man/default_stats_formats_labels.Rd +++ b/man/default_stats_formats_labels.Rd @@ -6,6 +6,7 @@ \alias{get_stats} \alias{get_formats_from_stats} \alias{get_labels_from_stats} +\alias{get_indents_from_stats} \alias{tern_default_formats} \alias{tern_default_labels} \alias{summary_formats} @@ -32,7 +33,9 @@ get_stats( get_formats_from_stats(stats, formats_in = NULL) -get_labels_from_stats(stats, labels_in = NULL) +get_labels_from_stats(stats, labels_in = NULL, row_nms = NULL) + +get_indents_from_stats(stats, indents_in = NULL, row_nms = NULL) tern_default_formats @@ -66,7 +69,15 @@ methods.} \item{formats_in}{(named \code{vector}) \cr inserted formats to replace defaults. It can be a character vector from \code{\link[formatters:list_formats]{formatters::list_valid_format_labels()}} or a custom format function.} -\item{labels_in}{(named \code{vector}) \cr inserted labels to replace defaults.} +\item{labels_in}{(named \code{vector})\cr inserted labels to replace defaults.} + +\item{row_nms}{(\code{character})\cr row names. Levels of a \code{factor} or \code{character} variable, each +of which the statistics in \code{.stats} will be calculated for. If this parameter is set, these +variable levels will be used as the defaults, and the names of the given custom values should +correspond to levels (or have format \code{statistic.level}) instead of statistics. Can also be +variable names if rows correspond to different variables instead of levels. Defaults to \code{NULL}.} + +\item{indents_in}{(named \code{vector})\cr inserted indent modifiers to replace defaults (default is \code{0L}).} \item{type}{(\code{flag})\cr is it going to be \code{"numeric"} or \code{"counts"}?} @@ -105,6 +116,11 @@ statistics by setting \code{indent_mods_custom} to a single integer value.} otherwise \code{NULL}). } +\itemize{ +\item \code{get_indents_from_stats()} returns a single indent modifier value to apply to all rows +or a named numeric vector of indent modifiers (if present, otherwise \code{NULL}). +} + \itemize{ \item \code{summary_formats()} returns a named \code{vector} of default statistic formats for the given data type. } @@ -139,6 +155,8 @@ present \code{NULL} is returned. \item \code{get_labels_from_stats()}: Get labels from vector of statistical methods. +\item \code{get_indents_from_stats()}: Format indent modifiers for a given vector/list of statistics. + \item \code{tern_default_formats}: Named list of default formats for \code{tern}. \item \code{tern_default_labels}: \code{character} vector that contains default labels @@ -199,6 +217,13 @@ get_labels_from_stats(all_cnt_occ) get_labels_from_stats(all_cnt_occ, labels_in = c("fraction" = "Fraction")) get_labels_from_stats(all_cnt_occ, labels_in = list("fraction" = c("Some more fractions"))) +get_indents_from_stats(all_cnt_occ, indents_in = 3L) +get_indents_from_stats(all_cnt_occ, indents_in = list(count = 2L, count_fraction = 5L)) +get_indents_from_stats( + all_cnt_occ, + indents_in = list(a = 2L, count.a = 1L, count.b = 5L), row_nms = c("a", "b") +) + summary_formats() summary_formats(type = "counts", include_pval = TRUE) diff --git a/man/summarize_functions.Rd b/man/summarize_functions.Rd index bf3e5b0c74..e87a8e0d81 100644 --- a/man/summarize_functions.Rd +++ b/man/summarize_functions.Rd @@ -12,6 +12,7 @@ to add summary rows to a given table layout: \item \code{\link[=h_tab_one_biomarker]{h_tab_one_biomarker()}} (probably to deprecate) \item \code{\link[=logistic_summary_by_flag]{logistic_summary_by_flag()}} \item \code{\link[=summarize_num_patients]{summarize_num_patients()}} +\item \code{\link[=summarize_occurrences]{summarize_occurrences()}} \item \code{\link[=summarize_occurrences_by_grade]{summarize_occurrences_by_grade()}} \item \code{\link[=summarize_patients_events_in_cols]{summarize_patients_events_in_cols()}} \item \code{\link[=summarize_patients_exposure_in_cols]{summarize_patients_exposure_in_cols()}} diff --git a/tests/testthat/_snaps/count_occurrences.md b/tests/testthat/_snaps/count_occurrences.md index aae53d747c..1f2a4eb280 100644 --- a/tests/testthat/_snaps/count_occurrences.md +++ b/tests/testthat/_snaps/count_occurrences.md @@ -82,6 +82,23 @@ +# a_count_occurrences works with custom input. + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 Level: a 3 1 Level: a + 2 LVL B 1 2 LVL B + 3 Count of c 1 0 Count of c + 4 Missing D 0 3 Missing D + 5 Level: a 3 (60%) 1 Level: a + 6 LVL B 1 (20%) 2 LVL B + 7 c 1 (20%) 0 c + 8 Missing D 0 (0%) 0 Missing D + # count_occurrences functions as expected with valid input and default arguments Code @@ -184,3 +201,85 @@ dcd D.1.1.4.2 10 (8.2%) 5 (5.2%) 7 (7.1%) 3.0 (-3.5 - 9.6) 6 (7.5%) 8 (10.0%) 9 (14.1%) -2.5 (-11.2 - 6.2) dcd D.2.1.5.3 12 (9.8%) 10 (10.3%) 9 (9.2%) -0.5 (-8.5 - 7.6) 9 (11.2%) 10 (12.5%) 3 (4.7%) -1.2 (-11.3 - 8.8) +# summarize_occurrences functions as expected with valid input and default arguments + + Code + res + Output + A B + (N=5) (N=4) + ————————————————————————————— + F + MH1 1 (20.0%) 0 + MH2 1 (20.0%) 1 (25.0%) + MH3 0 1 (25.0%) + MH4 0 1 (25.0%) + M + MH1 2 (40.0%) 1 (25.0%) + MH2 0 1 (25.0%) + MH3 1 (20.0%) 0 + +# summarize_occurrences works as expected with risk difference column + + Code + res + Output + A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) + (N=202) (N=177) (N=162) (N=379) + ————————————————————————————————————————————————————————————————————————————————— + F + LOW 12 (5.9%) 11 (6.2%) 10 (6.2%) -0.3 (-5.1 - 4.6) + MEDIUM 11 (5.4%) 13 (7.3%) 10 (6.2%) -1.9 (-6.9 - 3.1) + HIGH 10 (5.0%) 7 (4.0%) 7 (4.3%) 1.0 (-3.2 - 5.1) + M + LOW 12 (5.9%) 5 (2.8%) 6 (3.7%) 3.1 (-1.0 - 7.2) + MEDIUM 10 (5.0%) 9 (5.1%) 4 (2.5%) -0.1 (-4.5 - 4.3) + HIGH 4 (2.0%) 12 (6.8%) 11 (6.8%) -4.8 (-9.0 - -0.6) + +--- + + Code + res + Output + A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) + (N=202) (N=177) (N=162) (N=379) + ———————————————————————————————————————————————————————————————————————————————————————— + F + LOW 10 6 8 1.6 (-2.4 - 5.6) + MEDIUM 9 11 9 -1.8 (-6.3 - 2.8) + HIGH 6 6 7 -0.4 (-4.0 - 3.1) + LOW 10 (5.0%) 6 (3.4%) 8 (4.9%) 1.6 (-2.4 - 5.6) + MEDIUM 9 (4.5%) 11 (6.2%) 9 (5.6%) -1.8 (-6.3 - 2.8) + HIGH 6 (3.0%) 6 (3.4%) 7 (4.3%) -0.4 (-4.0 - 3.1) + LOW 10/202 (5.0%) 6/177 (3.4%) 8/162 (4.9%) 1.6 (-2.4 - 5.6) + MEDIUM 9/202 (4.5%) 11/177 (6.2%) 9/162 (5.6%) -1.8 (-6.3 - 2.8) + HIGH 6/202 (3.0%) 6/177 (3.4%) 7/162 (4.3%) -0.4 (-4.0 - 3.1) + M + LOW 7 5 6 0.6 (-2.9 - 4.2) + MEDIUM 9 7 4 0.5 (-3.5 - 4.5) + HIGH 4 11 8 -4.2 (-8.3 - -0.2) + LOW 7 (3.5%) 5 (2.8%) 6 (3.7%) 0.6 (-2.9 - 4.2) + MEDIUM 9 (4.5%) 7 (4.0%) 4 (2.5%) 0.5 (-3.5 - 4.5) + HIGH 4 (2.0%) 11 (6.2%) 8 (4.9%) -4.2 (-8.3 - -0.2) + LOW 7/202 (3.5%) 5/177 (2.8%) 6/162 (3.7%) 0.6 (-2.9 - 4.2) + MEDIUM 9/202 (4.5%) 7/177 (4.0%) 4/162 (2.5%) 0.5 (-3.5 - 4.5) + HIGH 4/202 (2.0%) 11/177 (6.2%) 8/162 (4.9%) -4.2 (-8.3 - -0.2) + +--- + + Code + res + Output + A B C + A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) + (N=64) (N=67) (N=62) (N=131) (N=78) (N=64) (N=45) (N=142) (N=60) (N=46) (N=55) (N=106) + ——————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————— + F + LOW 6 (9.4%) 4 (6.0%) 1 (1.6%) 3.4 (-5.7 - 12.5) 3 (3.8%) 5 (7.8%) 6 (13.3%) -4.0 (-11.8 - 3.9) 3 (5.0%) 2 (4.3%) 3 (5.5%) 0.7 (-7.4 - 8.7) + MEDIUM 1 (1.6%) 4 (6.0%) 4 (6.5%) -4.4 (-10.8 - 2.0) 6 (7.7%) 4 (6.2%) 2 (4.4%) 1.4 (-6.9 - 9.8) 4 (6.7%) 5 (10.9%) 4 (7.3%) -4.2 (-15.2 - 6.8) + HIGH 3 (4.7%) 4 (6.0%) 3 (4.8%) -1.3 (-9.0 - 6.4) 3 (3.8%) 2 (3.1%) 0 0.7 (-5.3 - 6.8) 4 (6.7%) 1 (2.2%) 4 (7.3%) 4.5 (-3.1 - 12.1) + M + LOW 1 (1.6%) 2 (3.0%) 2 (3.2%) -1.4 (-6.5 - 3.7) 7 (9.0%) 2 (3.1%) 3 (6.7%) 5.8 (-1.8 - 13.5) 4 (6.7%) 1 (2.2%) 1 (1.8%) 4.5 (-3.1 - 12.1) + MEDIUM 5 (7.8%) 2 (3.0%) 1 (1.6%) 4.8 (-2.9 - 12.6) 2 (2.6%) 3 (4.7%) 2 (4.4%) -2.1 (-8.4 - 4.1) 3 (5.0%) 4 (8.7%) 1 (1.8%) -3.7 (-13.5 - 6.1) + HIGH 2 (3.1%) 5 (7.5%) 5 (8.1%) -4.3 (-11.9 - 3.3) 2 (2.6%) 5 (7.8%) 3 (6.7%) -5.2 (-12.7 - 2.2) 0 2 (4.3%) 3 (5.5%) -4.3 (-10.2 - 1.5) + diff --git a/tests/testthat/_snaps/utils_default_stats_formats_labels.md b/tests/testthat/_snaps/utils_default_stats_formats_labels.md index 4a9831a8fc..fdf98b4bc9 100644 --- a/tests/testthat/_snaps/utils_default_stats_formats_labels.md +++ b/tests/testthat/_snaps/utils_default_stats_formats_labels.md @@ -3,8 +3,8 @@ Code res Output - [1] "count" "count_fraction_fixed_dp" - [3] "fraction" + [1] "count" "count_fraction" + [3] "count_fraction_fixed_dp" "fraction" --- @@ -36,8 +36,20 @@ Code res Output - count count_fraction_fixed_dp fraction - "count" "" "" + count count_fraction count_fraction_fixed_dp + "count" "count_fraction" "count_fraction" + fraction + "fraction" + +# get_indents_from_stats works as expected + + Code + res + Output + count count_fraction count_fraction_fixed_dp + 0 0 0 + fraction + 0 # summary_formats works as expected diff --git a/tests/testthat/test-count_occurrences.R b/tests/testthat/test-count_occurrences.R index 46677852a8..eb0f75df63 100644 --- a/tests/testthat/test-count_occurrences.R +++ b/tests/testthat/test-count_occurrences.R @@ -65,6 +65,50 @@ testthat::test_that("s_count_occurrences functions as expected when requesting d testthat::expect_snapshot(res) }) +testthat::test_that("a_count_occurrences works with healthy input.", { + options("width" = 100) + + # factor input + df <- data.frame( + id = factor(1:5), + x = factor(c("a", "a", "b", "c", "a")) + ) + result <- a_count_occurrences( + df = df, .N_col = 10, .stats = get_stats("count_occurrences"), .var = "x", id = "id", .df_row = df + ) + res_out <- testthat::expect_silent(result) + + # character input + df <- data.frame( + id = factor(1:5), + x = c("a", "a", "b", "c", "a") + ) + result <- a_count_occurrences( + df = df, .N_col = 10, .stats = get_stats("count_occurrences"), .var = "x", id = "id", .df_row = df + ) + res_out <- testthat::expect_silent(result) +}) + +testthat::test_that("a_count_occurrences works with custom input.", { + options("width" = 100) + + df <- data.frame( + id = factor(1:5), + x = factor(c("a", "a", "b", "c", "a"), levels = c("a", "b", "c", "d")) + ) + + result <- a_count_occurrences( + df = df, .df_row = df, .var = "x", id = "id", .N_col = 5, + .stats = c("count", "count_fraction"), drop = FALSE, + .formats = c(count_fraction = "xx (xx%)"), + .labels = c(a = "Level: a", b = "LVL B", count.c = "Count of c", d = "Missing D"), + .indent_mods = list(a = 1L, b = 2L, count.d = 3L) + ) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + testthat::test_that("count_occurrences functions as expected with valid input and default arguments", { df <- data.frame( USUBJID = as.character(c(1, 1, 2, 4, 4, 4, 6, 6, 6, 7, 7, 8)), @@ -84,7 +128,7 @@ testthat::test_that("count_occurrences functions as expected with valid input an add_colcounts() %>% count_occurrences(vars = "MHDECOD") - result <- rtable_object <- lyt %>% + result <- lyt %>% build_table(df, alt_counts_df = df_adsl) res <- testthat::expect_silent(result) @@ -110,7 +154,7 @@ testthat::test_that("count_occurrences functions as expected with label row spec show_labels = "visible" ) - result <- rtable_object <- lyt %>% + result <- lyt %>% build_table(df, alt_counts_df = df_adsl) res <- testthat::expect_silent(result) @@ -136,7 +180,7 @@ testthat::test_that("count_occurrences works as expected with risk difference co count_occurrences( vars = "AEDECOD", riskdiff = TRUE, - .stats = c("count", "count_fraction", "fraction"), + .stats = c("count", "count_fraction_fixed_dp", "fraction"), id = "SITEID" ) %>% build_table(tern_ex_adae) @@ -157,3 +201,75 @@ testthat::test_that("count_occurrences works as expected with risk difference co res <- testthat::expect_silent(result) testthat::expect_snapshot(res) }) + +testthat::test_that("summarize_occurrences functions as expected with valid input and default arguments", { + df <- data.frame( + USUBJID = as.character(c(1, 1, 2, 4, 4, 4, 6, 6, 6, 7, 7, 8)), + MHDECOD = factor( + c("MH1", "MH2", "MH1", "MH1", "MH1", "MH3", "MH2", "MH2", "MH3", "MH1", "MH2", "MH4"), + levels = c("MH1", "MH2", "MH3", "MH4", "MHX") + ), + ARM = rep(c("A", "B"), each = 6), + SEX = c("F", "F", "M", "M", "M", "M", "F", "F", "F", "M", "M", "F") + ) + df_adsl <- data.frame( + USUBJID = 1:9, + ARM = rep(c("A", "B"), c(5, 4)) + ) + + lyt <- basic_table() %>% + split_cols_by("ARM") %>% + split_rows_by("SEX", child_labels = "visible") %>% + add_colcounts() %>% + summarize_occurrences(var = "MHDECOD") + + result <- lyt %>% + build_table(df, alt_counts_df = df_adsl) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + +testthat::test_that("summarize_occurrences works as expected with risk difference column", { + # One statistic + result <- basic_table(show_colcounts = TRUE) %>% + split_cols_by("ARM", split_fun = add_riskdiff("A: Drug X", "B: Placebo")) %>% + split_rows_by("SEX", child_labels = "visible") %>% + summarize_occurrences( + var = "BMRKR2", + riskdiff = TRUE + ) %>% + build_table(tern_ex_adae) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + # Multiple statistics, different id var + result <- basic_table(show_colcounts = TRUE) %>% + split_cols_by("ARM", split_fun = add_riskdiff("A: Drug X", "B: Placebo")) %>% + split_rows_by("SEX", child_labels = "visible") %>% + summarize_occurrences( + var = "BMRKR2", + riskdiff = TRUE, + .stats = c("count", "count_fraction_fixed_dp", "fraction"), + id = "SITEID" + ) %>% + build_table(tern_ex_adae) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + # Nested column splits + result <- basic_table(show_colcounts = TRUE) %>% + split_cols_by("STRATA1") %>% + split_cols_by("ARM", split_fun = add_riskdiff("A: Drug X", "B: Placebo")) %>% + split_rows_by("SEX", child_labels = "visible") %>% + summarize_occurrences( + var = "BMRKR2", + riskdiff = TRUE + ) %>% + build_table(tern_ex_adae) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) diff --git a/tests/testthat/test-utils_default_stats_formats_labels.R b/tests/testthat/test-utils_default_stats_formats_labels.R index 99ebc2f278..a0585f79ff 100644 --- a/tests/testthat/test-utils_default_stats_formats_labels.R +++ b/tests/testthat/test-utils_default_stats_formats_labels.R @@ -138,6 +138,50 @@ testthat::test_that("get_labels_from_stats works as expected", { ), stats_to_do ) + + # with row_nms + testthat::expect_identical( + get_labels_from_stats( + stats = c("count", "count_fraction"), + labels_in = c("c" = "Lvl c:", "count_fraction.a" = "CF: A", "count.b" = "Count of b"), + row_nms = c("a", "b", "c") + ), + c( + count.a = "a", count.b = "Count of b", count.c = "Lvl c:", + count_fraction.a = "CF: A", count_fraction.b = "b", count_fraction.c = "Lvl c:" + ) + ) +}) + +testthat::test_that("get_indents_from_stats works as expected", { + sts <- get_stats("count_occurrences") + res <- testthat::expect_silent(get_indents_from_stats(sts)) + testthat::expect_snapshot(res) + + testthat::expect_identical(get_indents_from_stats("count", NULL)[["count"]], 0L) + testthat::expect_identical(get_indents_from_stats(c("count"), 3L), 3L) + + # integer vector + stats_to_do <- c("count" = 3L, "mean" = 6L) + testthat::expect_identical( + get_indents_from_stats(c(names(stats_to_do), "n"), + indents_in = stats_to_do + ), + c(stats_to_do, n = 0L) + ) + + # with row_nms + testthat::expect_identical( + get_indents_from_stats( + stats = c("count", "count_fraction"), + indents_in = c("c" = 3L, "count_fraction.a" = 1L, "count.b" = 2L), + row_nms = c("a", "b", "c") + ), + c( + count.a = 0L, count.b = 2L, count.c = 3L, + count_fraction.a = 1L, count_fraction.b = 0L, count_fraction.c = 3L + ) + ) }) testthat::test_that("summary_formats works as expected", {