From 1e21f297bd9a813e3d5d268b44f365f1bbb7fb09 Mon Sep 17 00:00:00 2001 From: Emily de la Rua <59304861+edelarua@users.noreply.github.com> Date: Tue, 31 Oct 2023 08:27:23 -0400 Subject: [PATCH 1/4] Refactor `count_occurrences` (#1076) Fixes #1073 --- NAMESPACE | 2 + NEWS.md | 2 + R/count_occurrences.R | 151 +++++++++++++++--- R/riskdiff.R | 16 +- R/summarize_functions.R | 1 + R/utils_default_stats_formats_labels.R | 88 +++++++++- man/afun_riskdiff.Rd | 12 +- man/count_occurrences.Rd | 86 +++++++--- man/default_stats_formats_labels.Rd | 29 +++- man/summarize_functions.Rd | 1 + tests/testthat/_snaps/count_occurrences.md | 99 ++++++++++++ .../utils_default_stats_formats_labels.md | 20 ++- tests/testthat/test-count_occurrences.R | 122 +++++++++++++- .../test-utils_default_stats_formats_labels.R | 44 +++++ 14 files changed, 603 insertions(+), 70 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index f09604b8be..0f9539a37b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -129,6 +129,7 @@ export(g_lineplot) export(g_step) export(g_waterfall) export(get_formats_from_stats) +export(get_indents_from_stats) export(get_labels_from_stats) export(get_smooths) export(get_stats) @@ -268,6 +269,7 @@ export(summarize_coxreg) export(summarize_glm_count) export(summarize_logistic) export(summarize_num_patients) +export(summarize_occurrences) export(summarize_occurrences_by_grade) export(summarize_patients_events_in_cols) export(summarize_patients_exposure_in_cols) diff --git a/NEWS.md b/NEWS.md index e242de45cb..3e224caf59 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,8 @@ ### New Features * Added the `na_str` argument to `analyze` & `summarize_row_groups` wrapper functions `count_abnormal`, `count_abnormal_by_baseline`, `count_abnormal_by_marked`, `count_abnormal_by_worst_grade`, `count_abnormal_lab_worsen_by_baseline`, `count_cumulative`, `count_missed_doses`, `count_occurrences`, `count_occurrences_by_grade`, `summarize_occurrences_by_grade`, `summarize_patients_events_in_cols`, `count_patients_with_event`, `count_patients_with_flags`, `count_values`, `estimate_multinomial_response`, `estimate_proportion`, `h_tab_one_biomarker`, `estimate_incidence_rate`, `logistic_summary_by_flag`, `estimate_odds_ratio`, `estimate_proportion_diff`, `test_proportion_diff`, `summarize_ancova`, `summarize_change`, `summarize_glm_count`, `summarize_num_patients`, `analyze_num_patients`, `summarize_patients_exposure_in_cols`, `coxph_pairwise`, `tabulate_survival_subgroups`, `surv_time`, and `surv_timepoint`. +* Added function `get_indents_from_stats` to format and return indent modifiers for a given set of statistics. +* Added summarize function version of `count_occurrences` analyze function, `summarize_occurrences`. ### Enhancements * Added formatting function `format_count_fraction_lt10` for formatting `count_fraction` with special consideration when count is less than 10. diff --git a/R/count_occurrences.R b/R/count_occurrences.R index 19d34e5dfe..677458f416 100644 --- a/R/count_occurrences.R +++ b/R/count_occurrences.R @@ -111,10 +111,7 @@ s_count_occurrences <- function(df, #' * `a_count_occurrences()` returns the corresponding list with formatted [rtables::CellValue()]. #' #' @examples -#' # We need to ungroup `count_fraction` first so that the `rtables` formatting -#' # function `format_count_fraction()` can be applied correctly. -#' afun <- make_afun(a_count_occurrences, .ungroup_stats = c("count", "count_fraction", "fraction")) -#' afun( +#' a_count_occurrences( #' df, #' .N_col = N_per_col, #' .df_row = df, @@ -123,10 +120,61 @@ s_count_occurrences <- function(df, #' ) #' #' @export -a_count_occurrences <- make_afun( - s_count_occurrences, - .formats = c(count = "xx", count_fraction = format_count_fraction_fixed_dp, fraction = format_fraction_fixed_dp) -) +a_count_occurrences <- function(df, + labelstr = "", + id = "USUBJID", + denom = c("N_col", "n"), + drop = TRUE, + .N_col, # nolint + .var = NULL, + .df_row = NULL, + .stats = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL, + na_str = NA_character_) { + denom <- match.arg(denom) + x_stats <- s_count_occurrences( + df = df, denom = denom, .N_col = .N_col, .df_row = .df_row, drop = drop, .var = .var, id = id + ) + if (is.null(unlist(x_stats))) { + return(NULL) + } + x_lvls <- names(x_stats[[1]]) + + # Fill in with formatting defaults if needed + .stats <- get_stats("count_occurrences", stats_in = .stats) + .formats <- get_formats_from_stats(.stats, .formats) + .labels <- get_labels_from_stats(.stats, .labels, row_nms = x_lvls) + .indent_mods <- get_indents_from_stats(.stats, .indent_mods, row_nms = x_lvls) + + if ("count_fraction_fixed_dp" %in% .stats) x_stats[["count_fraction_fixed_dp"]] <- x_stats[["count_fraction"]] + x_stats <- x_stats[.stats] + + # Ungroup statistics with values for each level of x + x_ungrp <- ungroup_stats(x_stats, .formats, list(), list()) + x_stats <- x_ungrp[["x"]] + .formats <- x_ungrp[[".formats"]] + + # Auto format handling + fmt_is_auto <- vapply(.formats, function(ii) is.character(ii) && ii == "auto", logical(1)) + if (any(fmt_is_auto)) { + res_l_auto <- x_stats[fmt_is_auto] + tmp_dt_var <- .df_row[[.var]] # xxx this can be extended for the WHOLE data or single facets + .formats[fmt_is_auto] <- lapply(seq_along(res_l_auto), function(rla) { + format_auto(tmp_dt_var, names(res_l_auto)[rla]) + }) + } + + in_rows( + .list = x_stats, + .formats = .formats, + .names = .labels, + .labels = .labels, + .indent_mods = .indent_mods, + .format_na_strs = na_str + ) +} #' @describeIn count_occurrences Layout-creating function which can take statistics function arguments #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. @@ -147,7 +195,8 @@ a_count_occurrences <- make_afun( #' "MH1", "MH2", "MH1", "MH1", "MH1", "MH3", #' "MH2", "MH2", "MH3", "MH1", "MH2", "MH4" #' ), -#' ARM = rep(c("A", "B"), each = 6) +#' ARM = rep(c("A", "B"), each = 6), +#' SEX = c("F", "F", "M", "M", "M", "M", "F", "F", "F", "M", "M", "F") #' ) #' df_adsl <- df %>% #' select(USUBJID, ARM) %>% @@ -174,36 +223,32 @@ count_occurrences <- function(lyt, nested = TRUE, ..., table_names = vars, - .stats = "count_fraction", + .stats = "count_fraction_fixed_dp", .formats = NULL, .labels = NULL, .indent_mods = NULL) { checkmate::assert_flag(riskdiff) - afun <- make_afun( - a_count_occurrences, - .stats = .stats, - .formats = .formats, - .labels = .labels, - .indent_mods = .indent_mods, - .ungroup_stats = .stats + extra_args <- list( + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str ) - extra_args <- if (isFALSE(riskdiff)) { - list(...) + if (isFALSE(riskdiff)) { + extra_args <- c(extra_args, list(...)) } else { - list( - afun = list("s_count_occurrences" = afun), - .stats = .stats, - .indent_mods = .indent_mods, - s_args = list(...) + extra_args <- c( + extra_args, + list( + afun = list("s_count_occurrences" = a_count_occurrences), + s_args = list(...) + ) ) } analyze( lyt = lyt, vars = vars, - afun = ifelse(isFALSE(riskdiff), afun, afun_riskdiff), + afun = ifelse(isFALSE(riskdiff), a_count_occurrences, afun_riskdiff), var_labels = var_labels, show_labels = show_labels, table_names = table_names, @@ -212,3 +257,59 @@ count_occurrences <- function(lyt, extra_args = extra_args ) } + +#' @describeIn count_occurrences Layout-creating function which can take content function arguments +#' and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()]. +#' +#' @return +#' * `summarize_occurrences()` returns a layout object suitable for passing to further layouting functions, +#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows +#' containing the statistics from `s_count_occurrences()` to the table layout. +#' +#' @examples +#' # Layout creating function with custom format. +#' basic_table() %>% +#' add_colcounts() %>% +#' split_rows_by("SEX", child_labels = "visible") %>% +#' summarize_occurrences( +#' var = "MHDECOD", +#' .formats = c("count_fraction" = "xx.xx (xx.xx%)") +#' ) %>% +#' build_table(df, alt_counts_df = df_adsl) +#' +#' @export +summarize_occurrences <- function(lyt, + var, + riskdiff = FALSE, + na_str = NA_character_, + ..., + .stats = "count_fraction_fixed_dp", + .formats = NULL, + .indent_mods = NULL, + .labels = NULL) { + checkmate::assert_flag(riskdiff) + + extra_args <- list( + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str + ) + + if (isFALSE(riskdiff)) { + extra_args <- c(extra_args, list(...)) + } else { + extra_args <- c( + extra_args, + list( + afun = list("s_count_occurrences" = a_count_occurrences), + s_args = list(...) + ) + ) + } + + summarize_row_groups( + lyt = lyt, + var = var, + cfun = ifelse(isFALSE(riskdiff), a_count_occurrences, afun_riskdiff), + na_str = na_str, + extra_args = extra_args + ) +} diff --git a/R/riskdiff.R b/R/riskdiff.R index b908f45c67..8a86be6dce 100644 --- a/R/riskdiff.R +++ b/R/riskdiff.R @@ -78,7 +78,10 @@ afun_riskdiff <- function(df, .spl_context, .all_col_counts, .stats, - .indent_mods, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL, + na_str = NA_character_, afun, s_args = list()) { if (!any(grepl("riskdiff", names(.spl_context)))) { @@ -89,8 +92,10 @@ afun_riskdiff <- function(df, } checkmate::assert_list(afun, len = 1, types = "function") checkmate::assert_named(afun) - - afun_args <- list(.var = .var, .df_row = .df_row, .N_row = .N_row, denom = "N_col", labelstr = labelstr) + afun_args <- list( + .var = .var, .df_row = .df_row, .N_row = .N_row, denom = "N_col", labelstr = labelstr, + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str + ) afun_args <- afun_args[intersect(names(afun_args), names(as.list(args(afun[[1]]))))] if ("denom" %in% names(s_args)) afun_args[["denom"]] <- NULL @@ -113,8 +118,9 @@ afun_riskdiff <- function(df, cur_var <- tail(.spl_context$cur_col_split[[1]], 1) # Apply statistics function to arm X and arm Y data - s_x <- do.call(names(afun), args = c(list(df = df[df[[cur_var]] == arm_x, ], .N_col = N_col_x), afun_args, s_args)) - s_y <- do.call(names(afun), args = c(list(df = df[df[[cur_var]] == arm_y, ], .N_col = N_col_y), afun_args, s_args)) + s_args <- c(s_args, afun_args[intersect(names(afun_args), names(as.list(args(names(afun)))))]) + s_x <- do.call(names(afun), args = c(list(df = df[df[[cur_var]] == arm_x, ], .N_col = N_col_x), s_args)) + s_y <- do.call(names(afun), args = c(list(df = df[df[[cur_var]] == arm_y, ], .N_col = N_col_y), s_args)) # Get statistic name and row names stat <- ifelse("count_fraction" %in% names(s_x), "count_fraction", "unique") diff --git a/R/summarize_functions.R b/R/summarize_functions.R index 9004ba626d..58d7d7461b 100644 --- a/R/summarize_functions.R +++ b/R/summarize_functions.R @@ -10,6 +10,7 @@ #' * [h_tab_one_biomarker()] (probably to deprecate) #' * [logistic_summary_by_flag()] #' * [summarize_num_patients()] +#' * [summarize_occurrences()] #' * [summarize_occurrences_by_grade()] #' * [summarize_patients_events_in_cols()] #' * [summarize_patients_exposure_in_cols()] diff --git a/R/utils_default_stats_formats_labels.R b/R/utils_default_stats_formats_labels.R index 789442b99c..5adda079fa 100644 --- a/R/utils_default_stats_formats_labels.R +++ b/R/utils_default_stats_formats_labels.R @@ -68,7 +68,7 @@ get_stats <- function(method_groups = "analyze_vars_numeric", stats_in = NULL, a for (mgi in method_groups) { # Main switcher out_tmp <- switch(mgi, - "count_occurrences" = c("count", "count_fraction_fixed_dp", "fraction"), + "count_occurrences" = c("count", "count_fraction", "count_fraction_fixed_dp", "fraction"), "summarize_num_patients" = c("unique", "nonunique", "unique_count"), "analyze_vars_counts" = c("n", "count", "count_fraction", "n_blq"), "analyze_vars_numeric" = c( @@ -189,7 +189,12 @@ get_formats_from_stats <- function(stats, formats_in = NULL) { #' @describeIn default_stats_formats_labels Get labels from vector of statistical methods. #' -#' @param labels_in (named `vector`) \cr inserted labels to replace defaults. +#' @param labels_in (named `vector`)\cr inserted labels to replace defaults. +#' @param row_nms (`character`)\cr row names. Levels of a `factor` or `character` variable, each +#' of which the statistics in `.stats` will be calculated for. If this parameter is set, these +#' variable levels will be used as the defaults, and the names of the given custom values should +#' correspond to levels (or have format `statistic.level`) instead of statistics. Can also be +#' variable names if rows correspond to different variables instead of levels. Defaults to `NULL`. #' #' @return #' * `get_labels_from_stats()` returns a named character vector of default labels (if present @@ -207,8 +212,9 @@ get_formats_from_stats <- function(stats, formats_in = NULL) { #' get_labels_from_stats(all_cnt_occ, labels_in = list("fraction" = c("Some more fractions"))) #' #' @export -get_labels_from_stats <- function(stats, labels_in = NULL) { +get_labels_from_stats <- function(stats, labels_in = NULL, row_nms = NULL) { checkmate::assert_character(stats, min.len = 1) + checkmate::assert_character(row_nms, null.ok = TRUE) # It may be a list if (checkmate::test_list(labels_in, null.ok = TRUE)) { checkmate::assert_list(labels_in, null.ok = TRUE) @@ -217,12 +223,22 @@ get_labels_from_stats <- function(stats, labels_in = NULL) { checkmate::assert_character(labels_in, null.ok = TRUE) } - which_lbl <- match(stats, names(tern_default_labels)) + if (!is.null(row_nms)) { + ret <- rep(row_nms, length(stats)) + out <- setNames(ret, paste(rep(stats, each = length(row_nms)), ret, sep = ".")) - ret <- vector("character", length = length(stats)) # it needs to be a character vector - ret[!is.na(which_lbl)] <- tern_default_labels[which_lbl[!is.na(which_lbl)]] + if (!is.null(labels_in)) { + lvl_lbls <- intersect(names(labels_in), row_nms) + for (i in lvl_lbls) out[paste(stats, i, sep = ".")] <- labels_in[[i]] + } + } else { + which_lbl <- match(stats, names(tern_default_labels)) - out <- setNames(ret, stats) + ret <- vector("character", length = length(stats)) # it needs to be a character vector + ret[!is.na(which_lbl)] <- tern_default_labels[which_lbl[!is.na(which_lbl)]] + + out <- setNames(ret, stats) + } # Modify some with custom labels if (!is.null(labels_in)) { @@ -234,6 +250,62 @@ get_labels_from_stats <- function(stats, labels_in = NULL) { out } +#' @describeIn default_stats_formats_labels Format indent modifiers for a given vector/list of statistics. +#' +#' @param indents_in (named `vector`)\cr inserted indent modifiers to replace defaults (default is `0L`). +#' +#' @return +#' * `get_indents_from_stats()` returns a single indent modifier value to apply to all rows +#' or a named numeric vector of indent modifiers (if present, otherwise `NULL`). +#' +#' @examples +#' get_indents_from_stats(all_cnt_occ, indents_in = 3L) +#' get_indents_from_stats(all_cnt_occ, indents_in = list(count = 2L, count_fraction = 5L)) +#' get_indents_from_stats( +#' all_cnt_occ, +#' indents_in = list(a = 2L, count.a = 1L, count.b = 5L), row_nms = c("a", "b") +#' ) +#' +#' @export +get_indents_from_stats <- function(stats, indents_in = NULL, row_nms = NULL) { + checkmate::assert_character(stats, min.len = 1) + checkmate::assert_character(row_nms, null.ok = TRUE) + # It may be a list + if (checkmate::test_list(indents_in, null.ok = TRUE)) { + checkmate::assert_list(indents_in, null.ok = TRUE) + # Or it may be a vector of integers + } else { + checkmate::assert_integerish(indents_in, null.ok = TRUE) + } + + if (is.null(names(indents_in)) && length(indents_in) == 1) { + out <- rep(indents_in, length(stats) * if (!is.null(row_nms)) length(row_nms) else 1) + return(out) + } + + if (!is.null(row_nms)) { + ret <- rep(0L, length(stats) * length(row_nms)) + out <- setNames(ret, paste(rep(stats, each = length(row_nms)), rep(row_nms, length(stats)), sep = ".")) + + if (!is.null(indents_in)) { + lvl_lbls <- intersect(names(indents_in), row_nms) + for (i in lvl_lbls) out[paste(stats, i, sep = ".")] <- indents_in[[i]] + } + } else { + ret <- rep(0L, length(stats)) + out <- setNames(ret, stats) + } + + # Modify some with custom labels + if (!is.null(indents_in)) { + # Stats is the main + common_names <- intersect(names(out), names(indents_in)) + out[common_names] <- indents_in[common_names] + } + + out +} + #' @describeIn default_stats_formats_labels Named list of default formats for `tern`. #' @format #' * `tern_default_formats` is a list of available formats, named after their relevant @@ -284,11 +356,13 @@ tern_default_formats <- c( #' @export tern_default_labels <- c( # list of labels -> sorted? xxx it should be not relevant due to match + fraction = "fraction", unique = "Number of patients with at least one event", nonunique = "Number of events", n = "n", count = "count", count_fraction = "count_fraction", + count_fraction_fixed_dp = "count_fraction", n_blq = "n_blq", sum = "Sum", mean = "Mean", diff --git a/man/afun_riskdiff.Rd b/man/afun_riskdiff.Rd index 5b69b18e9b..372d5732bc 100644 --- a/man/afun_riskdiff.Rd +++ b/man/afun_riskdiff.Rd @@ -14,7 +14,10 @@ afun_riskdiff( .spl_context, .all_col_counts, .stats, - .indent_mods, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL, + na_str = NA_character_, afun, s_args = list() ) @@ -45,9 +48,16 @@ taken from \code{alt_counts_df} if specified (see \code{\link[rtables:build_tabl \item{.stats}{(\code{character})\cr statistics to select for the table.} +\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics. See Details in \code{analyze_vars} for more +information on the \code{"auto"} setting.} + +\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} + \item{.indent_mods}{(named \code{integer})\cr indent modifiers for the labels. Defaults to 0, which corresponds to the unmodified default behavior. Can be negative.} +\item{na_str}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} + \item{afun}{(named \code{list})\cr A named list containing one name-value pair where the name corresponds to the name of the statistics function that should be used in calculations and the value is the corresponding analysis function.} diff --git a/man/count_occurrences.Rd b/man/count_occurrences.Rd index b5f2c99f49..f7c5f5d156 100644 --- a/man/count_occurrences.Rd +++ b/man/count_occurrences.Rd @@ -4,6 +4,7 @@ \alias{count_occurrences} \alias{s_count_occurrences} \alias{a_count_occurrences} +\alias{summarize_occurrences} \title{Occurrence Counts} \usage{ s_count_occurrences( @@ -18,12 +19,18 @@ s_count_occurrences( a_count_occurrences( df, + labelstr = "", + id = "USUBJID", denom = c("N_col", "n"), - .N_col, - .df_row, drop = TRUE, - .var = "MHDECOD", - id = "USUBJID" + .N_col, + .var = NULL, + .df_row = NULL, + .stats = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL, + na_str = NA_character_ ) count_occurrences( @@ -36,11 +43,23 @@ count_occurrences( nested = TRUE, ..., table_names = vars, - .stats = "count_fraction", + .stats = "count_fraction_fixed_dp", .formats = NULL, .labels = NULL, .indent_mods = NULL ) + +summarize_occurrences( + lyt, + var, + riskdiff = FALSE, + na_str = NA_character_, + ..., + .stats = "count_fraction_fixed_dp", + .formats = NULL, + .indent_mods = NULL, + .labels = NULL +) } \arguments{ \item{df}{(\code{data.frame})\cr data set containing all analysis variables.} @@ -59,11 +78,27 @@ passed by \code{rtables}.} \item{drop}{(\code{flag})\cr should non appearing occurrence levels be dropped from the resulting table. Note that in that case the remaining occurrence levels in the table are sorted alphabetically.} -\item{.var}{(\code{string})\cr single variable name that is passed by \code{rtables} when requested +\item{.var, var}{(\code{string})\cr single variable name that is passed by \code{rtables} when requested by a statistics function.} \item{id}{(\code{string})\cr subject variable name.} +\item{labelstr}{(\code{character})\cr label of the level of the parent split currently being summarized +(must be present as second argument in Content Row Functions). See \code{\link[rtables:summarize_row_groups]{rtables::summarize_row_groups()}} +for more information.} + +\item{.stats}{(\code{character})\cr statistics to select for the table.} + +\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics. See Details in \code{analyze_vars} for more +information on the \code{"auto"} setting.} + +\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} + +\item{.indent_mods}{(named \code{integer})\cr indent modifiers for the labels. Defaults to 0, which corresponds to the +unmodified default behavior. Can be negative.} + +\item{na_str}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} + \item{lyt}{(\code{layout})\cr input layout where analyses will be added to.} \item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.} @@ -76,8 +111,6 @@ by a statistics function.} used as \code{split_fun} in the prior column split of the table layout, specifying which columns should be compared. See \code{\link[=stat_propdiff_ci]{stat_propdiff_ci()}} for details on risk difference calculation.} -\item{na_str}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} - \item{nested}{(\code{flag})\cr whether this layout instruction should be applied within the existing layout structure \emph{if possible} (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split. underneath analyses, which is not allowed.} @@ -86,16 +119,6 @@ underneath analyses, which is not allowed.} \item{table_names}{(\code{character})\cr this can be customized in case that the same \code{vars} are analyzed multiple times, to avoid warnings from \code{rtables}.} - -\item{.stats}{(\code{character})\cr statistics to select for the table.} - -\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics. See Details in \code{analyze_vars} for more -information on the \code{"auto"} setting.} - -\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} - -\item{.indent_mods}{(named \code{integer})\cr indent modifiers for the labels. Defaults to 0, which corresponds to the -unmodified default behavior. Can be negative.} } \value{ \itemize{ @@ -116,6 +139,12 @@ unmodified default behavior. Can be negative.} or to \code{\link[rtables:build_table]{rtables::build_table()}}. Adding this function to an \code{rtable} layout will add formatted rows containing the statistics from \code{s_count_occurrences()} to the table layout. } + +\itemize{ +\item \code{summarize_occurrences()} returns a layout object suitable for passing to further layouting functions, +or to \code{\link[rtables:build_table]{rtables::build_table()}}. Adding this function to an \code{rtable} layout will add formatted content rows +containing the statistics from \code{s_count_occurrences()} to the table layout. +} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} @@ -136,6 +165,9 @@ in \code{count_occurrences()}. \item \code{count_occurrences()}: Layout-creating function which can take statistics function arguments and additional format arguments. This function is a wrapper for \code{\link[rtables:analyze]{rtables::analyze()}}. +\item \code{summarize_occurrences()}: Layout-creating function which can take content function arguments +and additional format arguments. This function is a wrapper for \code{\link[rtables:summarize_row_groups]{rtables::summarize_row_groups()}}. + }} \note{ By default, occurrences which don't appear in a given row split are dropped from the table and @@ -160,10 +192,7 @@ s_count_occurrences( id = "USUBJID" ) -# We need to ungroup `count_fraction` first so that the `rtables` formatting -# function `format_count_fraction()` can be applied correctly. -afun <- make_afun(a_count_occurrences, .ungroup_stats = c("count", "count_fraction", "fraction")) -afun( +a_count_occurrences( df, .N_col = N_per_col, .df_row = df, @@ -181,7 +210,8 @@ df <- data.frame( "MH1", "MH2", "MH1", "MH1", "MH1", "MH3", "MH2", "MH2", "MH3", "MH1", "MH2", "MH4" ), - ARM = rep(c("A", "B"), each = 6) + ARM = rep(c("A", "B"), each = 6), + SEX = c("F", "F", "M", "M", "M", "M", "F", "F", "F", "M", "M", "F") ) df_adsl <- df \%>\% select(USUBJID, ARM) \%>\% @@ -198,4 +228,14 @@ lyt \%>\% build_table(df, alt_counts_df = df_adsl) \%>\% prune_table() +# Layout creating function with custom format. +basic_table() \%>\% + add_colcounts() \%>\% + split_rows_by("SEX", child_labels = "visible") \%>\% + summarize_occurrences( + var = "MHDECOD", + .formats = c("count_fraction" = "xx.xx (xx.xx\%)") + ) \%>\% + build_table(df, alt_counts_df = df_adsl) + } diff --git a/man/default_stats_formats_labels.Rd b/man/default_stats_formats_labels.Rd index abb81c2614..84997cbde5 100644 --- a/man/default_stats_formats_labels.Rd +++ b/man/default_stats_formats_labels.Rd @@ -6,6 +6,7 @@ \alias{get_stats} \alias{get_formats_from_stats} \alias{get_labels_from_stats} +\alias{get_indents_from_stats} \alias{tern_default_formats} \alias{tern_default_labels} \alias{summary_formats} @@ -32,7 +33,9 @@ get_stats( get_formats_from_stats(stats, formats_in = NULL) -get_labels_from_stats(stats, labels_in = NULL) +get_labels_from_stats(stats, labels_in = NULL, row_nms = NULL) + +get_indents_from_stats(stats, indents_in = NULL, row_nms = NULL) tern_default_formats @@ -66,7 +69,15 @@ methods.} \item{formats_in}{(named \code{vector}) \cr inserted formats to replace defaults. It can be a character vector from \code{\link[formatters:list_formats]{formatters::list_valid_format_labels()}} or a custom format function.} -\item{labels_in}{(named \code{vector}) \cr inserted labels to replace defaults.} +\item{labels_in}{(named \code{vector})\cr inserted labels to replace defaults.} + +\item{row_nms}{(\code{character})\cr row names. Levels of a \code{factor} or \code{character} variable, each +of which the statistics in \code{.stats} will be calculated for. If this parameter is set, these +variable levels will be used as the defaults, and the names of the given custom values should +correspond to levels (or have format \code{statistic.level}) instead of statistics. Can also be +variable names if rows correspond to different variables instead of levels. Defaults to \code{NULL}.} + +\item{indents_in}{(named \code{vector})\cr inserted indent modifiers to replace defaults (default is \code{0L}).} \item{type}{(\code{flag})\cr is it going to be \code{"numeric"} or \code{"counts"}?} @@ -105,6 +116,11 @@ statistics by setting \code{indent_mods_custom} to a single integer value.} otherwise \code{NULL}). } +\itemize{ +\item \code{get_indents_from_stats()} returns a single indent modifier value to apply to all rows +or a named numeric vector of indent modifiers (if present, otherwise \code{NULL}). +} + \itemize{ \item \code{summary_formats()} returns a named \code{vector} of default statistic formats for the given data type. } @@ -139,6 +155,8 @@ present \code{NULL} is returned. \item \code{get_labels_from_stats()}: Get labels from vector of statistical methods. +\item \code{get_indents_from_stats()}: Format indent modifiers for a given vector/list of statistics. + \item \code{tern_default_formats}: Named list of default formats for \code{tern}. \item \code{tern_default_labels}: \code{character} vector that contains default labels @@ -199,6 +217,13 @@ get_labels_from_stats(all_cnt_occ) get_labels_from_stats(all_cnt_occ, labels_in = c("fraction" = "Fraction")) get_labels_from_stats(all_cnt_occ, labels_in = list("fraction" = c("Some more fractions"))) +get_indents_from_stats(all_cnt_occ, indents_in = 3L) +get_indents_from_stats(all_cnt_occ, indents_in = list(count = 2L, count_fraction = 5L)) +get_indents_from_stats( + all_cnt_occ, + indents_in = list(a = 2L, count.a = 1L, count.b = 5L), row_nms = c("a", "b") +) + summary_formats() summary_formats(type = "counts", include_pval = TRUE) diff --git a/man/summarize_functions.Rd b/man/summarize_functions.Rd index bf3e5b0c74..e87a8e0d81 100644 --- a/man/summarize_functions.Rd +++ b/man/summarize_functions.Rd @@ -12,6 +12,7 @@ to add summary rows to a given table layout: \item \code{\link[=h_tab_one_biomarker]{h_tab_one_biomarker()}} (probably to deprecate) \item \code{\link[=logistic_summary_by_flag]{logistic_summary_by_flag()}} \item \code{\link[=summarize_num_patients]{summarize_num_patients()}} +\item \code{\link[=summarize_occurrences]{summarize_occurrences()}} \item \code{\link[=summarize_occurrences_by_grade]{summarize_occurrences_by_grade()}} \item \code{\link[=summarize_patients_events_in_cols]{summarize_patients_events_in_cols()}} \item \code{\link[=summarize_patients_exposure_in_cols]{summarize_patients_exposure_in_cols()}} diff --git a/tests/testthat/_snaps/count_occurrences.md b/tests/testthat/_snaps/count_occurrences.md index aae53d747c..1f2a4eb280 100644 --- a/tests/testthat/_snaps/count_occurrences.md +++ b/tests/testthat/_snaps/count_occurrences.md @@ -82,6 +82,23 @@ +# a_count_occurrences works with custom input. + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 Level: a 3 1 Level: a + 2 LVL B 1 2 LVL B + 3 Count of c 1 0 Count of c + 4 Missing D 0 3 Missing D + 5 Level: a 3 (60%) 1 Level: a + 6 LVL B 1 (20%) 2 LVL B + 7 c 1 (20%) 0 c + 8 Missing D 0 (0%) 0 Missing D + # count_occurrences functions as expected with valid input and default arguments Code @@ -184,3 +201,85 @@ dcd D.1.1.4.2 10 (8.2%) 5 (5.2%) 7 (7.1%) 3.0 (-3.5 - 9.6) 6 (7.5%) 8 (10.0%) 9 (14.1%) -2.5 (-11.2 - 6.2) dcd D.2.1.5.3 12 (9.8%) 10 (10.3%) 9 (9.2%) -0.5 (-8.5 - 7.6) 9 (11.2%) 10 (12.5%) 3 (4.7%) -1.2 (-11.3 - 8.8) +# summarize_occurrences functions as expected with valid input and default arguments + + Code + res + Output + A B + (N=5) (N=4) + ————————————————————————————— + F + MH1 1 (20.0%) 0 + MH2 1 (20.0%) 1 (25.0%) + MH3 0 1 (25.0%) + MH4 0 1 (25.0%) + M + MH1 2 (40.0%) 1 (25.0%) + MH2 0 1 (25.0%) + MH3 1 (20.0%) 0 + +# summarize_occurrences works as expected with risk difference column + + Code + res + Output + A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) + (N=202) (N=177) (N=162) (N=379) + ————————————————————————————————————————————————————————————————————————————————— + F + LOW 12 (5.9%) 11 (6.2%) 10 (6.2%) -0.3 (-5.1 - 4.6) + MEDIUM 11 (5.4%) 13 (7.3%) 10 (6.2%) -1.9 (-6.9 - 3.1) + HIGH 10 (5.0%) 7 (4.0%) 7 (4.3%) 1.0 (-3.2 - 5.1) + M + LOW 12 (5.9%) 5 (2.8%) 6 (3.7%) 3.1 (-1.0 - 7.2) + MEDIUM 10 (5.0%) 9 (5.1%) 4 (2.5%) -0.1 (-4.5 - 4.3) + HIGH 4 (2.0%) 12 (6.8%) 11 (6.8%) -4.8 (-9.0 - -0.6) + +--- + + Code + res + Output + A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) + (N=202) (N=177) (N=162) (N=379) + ———————————————————————————————————————————————————————————————————————————————————————— + F + LOW 10 6 8 1.6 (-2.4 - 5.6) + MEDIUM 9 11 9 -1.8 (-6.3 - 2.8) + HIGH 6 6 7 -0.4 (-4.0 - 3.1) + LOW 10 (5.0%) 6 (3.4%) 8 (4.9%) 1.6 (-2.4 - 5.6) + MEDIUM 9 (4.5%) 11 (6.2%) 9 (5.6%) -1.8 (-6.3 - 2.8) + HIGH 6 (3.0%) 6 (3.4%) 7 (4.3%) -0.4 (-4.0 - 3.1) + LOW 10/202 (5.0%) 6/177 (3.4%) 8/162 (4.9%) 1.6 (-2.4 - 5.6) + MEDIUM 9/202 (4.5%) 11/177 (6.2%) 9/162 (5.6%) -1.8 (-6.3 - 2.8) + HIGH 6/202 (3.0%) 6/177 (3.4%) 7/162 (4.3%) -0.4 (-4.0 - 3.1) + M + LOW 7 5 6 0.6 (-2.9 - 4.2) + MEDIUM 9 7 4 0.5 (-3.5 - 4.5) + HIGH 4 11 8 -4.2 (-8.3 - -0.2) + LOW 7 (3.5%) 5 (2.8%) 6 (3.7%) 0.6 (-2.9 - 4.2) + MEDIUM 9 (4.5%) 7 (4.0%) 4 (2.5%) 0.5 (-3.5 - 4.5) + HIGH 4 (2.0%) 11 (6.2%) 8 (4.9%) -4.2 (-8.3 - -0.2) + LOW 7/202 (3.5%) 5/177 (2.8%) 6/162 (3.7%) 0.6 (-2.9 - 4.2) + MEDIUM 9/202 (4.5%) 7/177 (4.0%) 4/162 (2.5%) 0.5 (-3.5 - 4.5) + HIGH 4/202 (2.0%) 11/177 (6.2%) 8/162 (4.9%) -4.2 (-8.3 - -0.2) + +--- + + Code + res + Output + A B C + A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) + (N=64) (N=67) (N=62) (N=131) (N=78) (N=64) (N=45) (N=142) (N=60) (N=46) (N=55) (N=106) + ——————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————— + F + LOW 6 (9.4%) 4 (6.0%) 1 (1.6%) 3.4 (-5.7 - 12.5) 3 (3.8%) 5 (7.8%) 6 (13.3%) -4.0 (-11.8 - 3.9) 3 (5.0%) 2 (4.3%) 3 (5.5%) 0.7 (-7.4 - 8.7) + MEDIUM 1 (1.6%) 4 (6.0%) 4 (6.5%) -4.4 (-10.8 - 2.0) 6 (7.7%) 4 (6.2%) 2 (4.4%) 1.4 (-6.9 - 9.8) 4 (6.7%) 5 (10.9%) 4 (7.3%) -4.2 (-15.2 - 6.8) + HIGH 3 (4.7%) 4 (6.0%) 3 (4.8%) -1.3 (-9.0 - 6.4) 3 (3.8%) 2 (3.1%) 0 0.7 (-5.3 - 6.8) 4 (6.7%) 1 (2.2%) 4 (7.3%) 4.5 (-3.1 - 12.1) + M + LOW 1 (1.6%) 2 (3.0%) 2 (3.2%) -1.4 (-6.5 - 3.7) 7 (9.0%) 2 (3.1%) 3 (6.7%) 5.8 (-1.8 - 13.5) 4 (6.7%) 1 (2.2%) 1 (1.8%) 4.5 (-3.1 - 12.1) + MEDIUM 5 (7.8%) 2 (3.0%) 1 (1.6%) 4.8 (-2.9 - 12.6) 2 (2.6%) 3 (4.7%) 2 (4.4%) -2.1 (-8.4 - 4.1) 3 (5.0%) 4 (8.7%) 1 (1.8%) -3.7 (-13.5 - 6.1) + HIGH 2 (3.1%) 5 (7.5%) 5 (8.1%) -4.3 (-11.9 - 3.3) 2 (2.6%) 5 (7.8%) 3 (6.7%) -5.2 (-12.7 - 2.2) 0 2 (4.3%) 3 (5.5%) -4.3 (-10.2 - 1.5) + diff --git a/tests/testthat/_snaps/utils_default_stats_formats_labels.md b/tests/testthat/_snaps/utils_default_stats_formats_labels.md index 4a9831a8fc..fdf98b4bc9 100644 --- a/tests/testthat/_snaps/utils_default_stats_formats_labels.md +++ b/tests/testthat/_snaps/utils_default_stats_formats_labels.md @@ -3,8 +3,8 @@ Code res Output - [1] "count" "count_fraction_fixed_dp" - [3] "fraction" + [1] "count" "count_fraction" + [3] "count_fraction_fixed_dp" "fraction" --- @@ -36,8 +36,20 @@ Code res Output - count count_fraction_fixed_dp fraction - "count" "" "" + count count_fraction count_fraction_fixed_dp + "count" "count_fraction" "count_fraction" + fraction + "fraction" + +# get_indents_from_stats works as expected + + Code + res + Output + count count_fraction count_fraction_fixed_dp + 0 0 0 + fraction + 0 # summary_formats works as expected diff --git a/tests/testthat/test-count_occurrences.R b/tests/testthat/test-count_occurrences.R index 46677852a8..eb0f75df63 100644 --- a/tests/testthat/test-count_occurrences.R +++ b/tests/testthat/test-count_occurrences.R @@ -65,6 +65,50 @@ testthat::test_that("s_count_occurrences functions as expected when requesting d testthat::expect_snapshot(res) }) +testthat::test_that("a_count_occurrences works with healthy input.", { + options("width" = 100) + + # factor input + df <- data.frame( + id = factor(1:5), + x = factor(c("a", "a", "b", "c", "a")) + ) + result <- a_count_occurrences( + df = df, .N_col = 10, .stats = get_stats("count_occurrences"), .var = "x", id = "id", .df_row = df + ) + res_out <- testthat::expect_silent(result) + + # character input + df <- data.frame( + id = factor(1:5), + x = c("a", "a", "b", "c", "a") + ) + result <- a_count_occurrences( + df = df, .N_col = 10, .stats = get_stats("count_occurrences"), .var = "x", id = "id", .df_row = df + ) + res_out <- testthat::expect_silent(result) +}) + +testthat::test_that("a_count_occurrences works with custom input.", { + options("width" = 100) + + df <- data.frame( + id = factor(1:5), + x = factor(c("a", "a", "b", "c", "a"), levels = c("a", "b", "c", "d")) + ) + + result <- a_count_occurrences( + df = df, .df_row = df, .var = "x", id = "id", .N_col = 5, + .stats = c("count", "count_fraction"), drop = FALSE, + .formats = c(count_fraction = "xx (xx%)"), + .labels = c(a = "Level: a", b = "LVL B", count.c = "Count of c", d = "Missing D"), + .indent_mods = list(a = 1L, b = 2L, count.d = 3L) + ) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + testthat::test_that("count_occurrences functions as expected with valid input and default arguments", { df <- data.frame( USUBJID = as.character(c(1, 1, 2, 4, 4, 4, 6, 6, 6, 7, 7, 8)), @@ -84,7 +128,7 @@ testthat::test_that("count_occurrences functions as expected with valid input an add_colcounts() %>% count_occurrences(vars = "MHDECOD") - result <- rtable_object <- lyt %>% + result <- lyt %>% build_table(df, alt_counts_df = df_adsl) res <- testthat::expect_silent(result) @@ -110,7 +154,7 @@ testthat::test_that("count_occurrences functions as expected with label row spec show_labels = "visible" ) - result <- rtable_object <- lyt %>% + result <- lyt %>% build_table(df, alt_counts_df = df_adsl) res <- testthat::expect_silent(result) @@ -136,7 +180,7 @@ testthat::test_that("count_occurrences works as expected with risk difference co count_occurrences( vars = "AEDECOD", riskdiff = TRUE, - .stats = c("count", "count_fraction", "fraction"), + .stats = c("count", "count_fraction_fixed_dp", "fraction"), id = "SITEID" ) %>% build_table(tern_ex_adae) @@ -157,3 +201,75 @@ testthat::test_that("count_occurrences works as expected with risk difference co res <- testthat::expect_silent(result) testthat::expect_snapshot(res) }) + +testthat::test_that("summarize_occurrences functions as expected with valid input and default arguments", { + df <- data.frame( + USUBJID = as.character(c(1, 1, 2, 4, 4, 4, 6, 6, 6, 7, 7, 8)), + MHDECOD = factor( + c("MH1", "MH2", "MH1", "MH1", "MH1", "MH3", "MH2", "MH2", "MH3", "MH1", "MH2", "MH4"), + levels = c("MH1", "MH2", "MH3", "MH4", "MHX") + ), + ARM = rep(c("A", "B"), each = 6), + SEX = c("F", "F", "M", "M", "M", "M", "F", "F", "F", "M", "M", "F") + ) + df_adsl <- data.frame( + USUBJID = 1:9, + ARM = rep(c("A", "B"), c(5, 4)) + ) + + lyt <- basic_table() %>% + split_cols_by("ARM") %>% + split_rows_by("SEX", child_labels = "visible") %>% + add_colcounts() %>% + summarize_occurrences(var = "MHDECOD") + + result <- lyt %>% + build_table(df, alt_counts_df = df_adsl) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + +testthat::test_that("summarize_occurrences works as expected with risk difference column", { + # One statistic + result <- basic_table(show_colcounts = TRUE) %>% + split_cols_by("ARM", split_fun = add_riskdiff("A: Drug X", "B: Placebo")) %>% + split_rows_by("SEX", child_labels = "visible") %>% + summarize_occurrences( + var = "BMRKR2", + riskdiff = TRUE + ) %>% + build_table(tern_ex_adae) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + # Multiple statistics, different id var + result <- basic_table(show_colcounts = TRUE) %>% + split_cols_by("ARM", split_fun = add_riskdiff("A: Drug X", "B: Placebo")) %>% + split_rows_by("SEX", child_labels = "visible") %>% + summarize_occurrences( + var = "BMRKR2", + riskdiff = TRUE, + .stats = c("count", "count_fraction_fixed_dp", "fraction"), + id = "SITEID" + ) %>% + build_table(tern_ex_adae) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + # Nested column splits + result <- basic_table(show_colcounts = TRUE) %>% + split_cols_by("STRATA1") %>% + split_cols_by("ARM", split_fun = add_riskdiff("A: Drug X", "B: Placebo")) %>% + split_rows_by("SEX", child_labels = "visible") %>% + summarize_occurrences( + var = "BMRKR2", + riskdiff = TRUE + ) %>% + build_table(tern_ex_adae) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) diff --git a/tests/testthat/test-utils_default_stats_formats_labels.R b/tests/testthat/test-utils_default_stats_formats_labels.R index 99ebc2f278..a0585f79ff 100644 --- a/tests/testthat/test-utils_default_stats_formats_labels.R +++ b/tests/testthat/test-utils_default_stats_formats_labels.R @@ -138,6 +138,50 @@ testthat::test_that("get_labels_from_stats works as expected", { ), stats_to_do ) + + # with row_nms + testthat::expect_identical( + get_labels_from_stats( + stats = c("count", "count_fraction"), + labels_in = c("c" = "Lvl c:", "count_fraction.a" = "CF: A", "count.b" = "Count of b"), + row_nms = c("a", "b", "c") + ), + c( + count.a = "a", count.b = "Count of b", count.c = "Lvl c:", + count_fraction.a = "CF: A", count_fraction.b = "b", count_fraction.c = "Lvl c:" + ) + ) +}) + +testthat::test_that("get_indents_from_stats works as expected", { + sts <- get_stats("count_occurrences") + res <- testthat::expect_silent(get_indents_from_stats(sts)) + testthat::expect_snapshot(res) + + testthat::expect_identical(get_indents_from_stats("count", NULL)[["count"]], 0L) + testthat::expect_identical(get_indents_from_stats(c("count"), 3L), 3L) + + # integer vector + stats_to_do <- c("count" = 3L, "mean" = 6L) + testthat::expect_identical( + get_indents_from_stats(c(names(stats_to_do), "n"), + indents_in = stats_to_do + ), + c(stats_to_do, n = 0L) + ) + + # with row_nms + testthat::expect_identical( + get_indents_from_stats( + stats = c("count", "count_fraction"), + indents_in = c("c" = 3L, "count_fraction.a" = 1L, "count.b" = 2L), + row_nms = c("a", "b", "c") + ), + c( + count.a = 0L, count.b = 2L, count.c = 3L, + count_fraction.a = 1L, count_fraction.b = 0L, count_fraction.c = 3L + ) + ) }) testthat::test_that("summary_formats works as expected", { From 9c4b9b3a520a5c4c7bd04f5d55ec1fa0f8b1ac33 Mon Sep 17 00:00:00 2001 From: edelarua Date: Tue, 31 Oct 2023 12:28:42 +0000 Subject: [PATCH 2/4] [skip actions] Bump version to 0.9.2.9003 --- .pre-commit-config.yaml | 2 +- DESCRIPTION | 4 ++-- NEWS.md | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b665e99129..bfa7d8a4f5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ default_language_version: python: python3 repos: - repo: https://github.com/lorenzwalthert/precommit - rev: v0.3.2.9023 + rev: v0.3.2.9025 hooks: - id: style-files name: Style code with `styler` diff --git a/DESCRIPTION b/DESCRIPTION index 1a333c4210..259eea2904 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: tern Title: Create Common TLGs Used in Clinical Trials -Version: 0.9.2.9002 -Date: 2023-10-27 +Version: 0.9.2.9003 +Date: 2023-10-31 Authors@R: c( person("Joe", "Zhu", , "joe.zhu@roche.com", role = c("aut", "cre")), person("Daniel", "Sabanés Bové", , "daniel.sabanes_bove@roche.com", role = "aut"), diff --git a/NEWS.md b/NEWS.md index 3e224caf59..7a271bae5f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# tern 0.9.2.9002 +# tern 0.9.2.9003 ### Miscellaneous * Specified minimal version of package dependencies. From 5e8cfd1634f2743358fd15085168098a77fc2d93 Mon Sep 17 00:00:00 2001 From: Davide Garolini Date: Tue, 31 Oct 2023 14:37:57 +0100 Subject: [PATCH 3/4] Add split functions for `ref_group` position (#1111) # Pull Request Fixes #1085 --------- Signed-off-by: Davide Garolini Co-authored-by: stoilovs Co-authored-by: Jana Stoilova <43623360+anajens@users.noreply.github.com> Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> --- DESCRIPTION | 1 + NAMESPACE | 2 + NEWS.md | 5 + R/utils_split_funs.R | 154 +++++++++++++++++++++++ _pkgdown.yml | 1 + man/utils_split_funs.Rd | 106 ++++++++++++++++ tests/testthat/_snaps/utils_split_fun.md | 77 ++++++++++++ tests/testthat/test-utils_split_fun.R | 141 +++++++++++++++++++++ 8 files changed, 487 insertions(+) create mode 100644 R/utils_split_funs.R create mode 100644 man/utils_split_funs.Rd create mode 100644 tests/testthat/_snaps/utils_split_fun.md create mode 100644 tests/testthat/test-utils_split_fun.R diff --git a/DESCRIPTION b/DESCRIPTION index 259eea2904..167711ac52 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -167,3 +167,4 @@ Collate: 'utils_factor.R' 'utils_grid.R' 'utils_rtables.R' + 'utils_split_funs.R' diff --git a/NAMESPACE b/NAMESPACE index 0f9539a37b..c6396b4d69 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -219,6 +219,7 @@ export(has_fractions_difference) export(imputation_rule) export(keep_content_rows) export(keep_rows) +export(level_order) export(logistic_regression_cols) export(logistic_summary_by_flag) export(month2day) @@ -236,6 +237,7 @@ export(prop_strat_wilson) export(prop_wald) export(prop_wilson) export(reapply_varlabels) +export(ref_group_position) export(s_compare) export(s_count_occurrences) export(s_count_occurrences_by_grade) diff --git a/NEWS.md b/NEWS.md index 7a271bae5f..848780369a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # tern 0.9.2.9003 +### New Features +* Added `ref_group_position` function to place the reference group facet last, first or at a certain position. +* Added `keep_level_order` split function to retain original order of levels in a split. +* Added `level_order` split function to reorder manually the levels. + ### Miscellaneous * Specified minimal version of package dependencies. diff --git a/R/utils_split_funs.R b/R/utils_split_funs.R new file mode 100644 index 0000000000..0516c26965 --- /dev/null +++ b/R/utils_split_funs.R @@ -0,0 +1,154 @@ +#' Custom Split Functions +#' +#' @description `r lifecycle::badge("stable")` +#' +#' Collection of useful functions that are expanding on the core list of functions +#' provided by `rtables`. See [rtables::custom_split_funs] and [rtables::make_split_fun()] +#' for more information on how to make a custom split function. All these functions +#' work with [split_rows_by()] argument `split_fun` to modify the way the split +#' happens. For other split functions, consider consulting [`rtables::split_funcs`]. +#' +#' @seealso [rtables::make_split_fun()] +#' +#' @name utils_split_funs +NULL + +#' @describeIn utils_split_funs split function to place reference group facet at a specific position +#' during post-processing stage. +#' +#' @param position (`string` or `integer`)\cr should it be `"first"` or `"last"` or in a specific position? +#' +#' @return +#' * `ref_group_position` returns an utility function that puts the reference group +#' as first, last or at a certain position and needs to be assigned to `split_fun`. +#' +#' @examples +#' library(dplyr) +#' +#' dat <- data.frame( +#' x = factor(letters[1:5], levels = letters[5:1]), +#' y = 1:5 +#' ) +#' +#' # With rtables layout functions +#' basic_table() %>% +#' split_cols_by("x", ref_group = "c", split_fun = ref_group_position("last")) %>% +#' analyze("y") %>% +#' build_table(dat) +#' +#' # With tern layout funcitons +#' adtte_f <- tern_ex_adtte %>% +#' filter(PARAMCD == "OS") %>% +#' mutate( +#' AVAL = day2month(AVAL), +#' is_event = CNSR == 0 +#' ) +#' +#' basic_table() %>% +#' split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position("first")) %>% +#' add_colcounts() %>% +#' surv_time( +#' vars = "AVAL", +#' var_labels = "Survival Time (Months)", +#' is_event = "is_event", +#' ) %>% +#' build_table(df = adtte_f) +#' +#' basic_table() %>% +#' split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position(2)) %>% +#' add_colcounts() %>% +#' surv_time( +#' vars = "AVAL", +#' var_labels = "Survival Time (Months)", +#' is_event = "is_event", +#' ) %>% +#' build_table(df = adtte_f) +#' +#' @export +ref_group_position <- function(position = "first") { + make_split_fun( + post = list( + function(splret, spl, fulldf) { + if (!"ref_group_value" %in% methods::slotNames(spl)) { + stop("Reference group is undefined.") + } + + spl_var <- rtables:::spl_payload(spl) + fulldf[[spl_var]] <- factor(fulldf[[spl_var]]) + init_lvls <- levels(fulldf[[spl_var]]) + + if (!all(names(splret$values) %in% init_lvls)) { + stop("This split function does not work with combination facets.") + } + + ref_group_pos <- which(init_lvls == rtables:::spl_ref_group(spl)) + pos_choices <- c("first", "last") + if (checkmate::test_choice(position, pos_choices) && position == "first") { + pos <- 0 + } else if (checkmate::test_choice(position, pos_choices) && position == "last") { + pos <- length(init_lvls) + } else if (checkmate::test_int(position, lower = 1, upper = length(init_lvls))) { + pos <- position - 1 + } else { + stop("Wrong input for ref group position. It must be 'first', 'last', or a integer.") + } + + reord_lvls <- append(init_lvls[-ref_group_pos], init_lvls[ref_group_pos], after = pos) + ord <- match(reord_lvls, names(splret$values)) + + make_split_result( + splret$values[ord], + splret$datasplit[ord], + splret$labels[ord] + ) + } + ) + ) +} + +#' @describeIn utils_split_funs split function to change level order based on a `integer` +#' vector or a `character` vector that represent the split variable's factor levels. +#' +#' @param order (`character` or `integer`)\cr vector of ordering indexes for the split facets. +#' +#' @return +#' * `level_order` returns an utility function that changes the original levels' order, +#' depending on input `order` and split levels. +#' +#' @examples +#' # level_order -------- +#' # Even if default would bring ref_group first, the original order puts it last +#' basic_table() %>% +#' split_cols_by("Species", split_fun = level_order(c(1, 3, 2))) %>% +#' analyze("Sepal.Length") %>% +#' build_table(iris) +#' +#' # character vector +#' new_order <- level_order(levels(iris$Species)[c(1, 3, 2)]) +#' basic_table() %>% +#' split_cols_by("Species", ref_group = "virginica", split_fun = new_order) %>% +#' analyze("Sepal.Length") %>% +#' build_table(iris) +#' +#' @export +level_order <- function(order) { + make_split_fun( + post = list( + function(splret, spl, fulldf) { + if (checkmate::test_integerish(order)) { + checkmate::assert_integerish(order, lower = 1, upper = length(splret$values)) + ord <- order + } else { + checkmate::assert_character(order, len = length(splret$values)) + checkmate::assert_set_equal(order, names(splret$values), ordered = FALSE) + ord <- match(order, names(splret$values)) + } + make_split_result( + splret$values[ord], + splret$datasplit[ord], + splret$labels[ord] + ) + } + ) + ) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 49d7ea7614..22388323ce 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -114,6 +114,7 @@ reference: - split_cols_by_groups - to_string_matrix - groups_list_to_df + - utils_split_funs - title: rtables Formatting Functions desc: These functions provide customized formatting rules to work with the diff --git a/man/utils_split_funs.Rd b/man/utils_split_funs.Rd new file mode 100644 index 0000000000..b1c718b608 --- /dev/null +++ b/man/utils_split_funs.Rd @@ -0,0 +1,106 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_split_funs.R +\name{utils_split_funs} +\alias{utils_split_funs} +\alias{ref_group_position} +\alias{level_order} +\title{Custom Split Functions} +\usage{ +ref_group_position(position = "first") + +level_order(order) +} +\arguments{ +\item{position}{(\code{string} or \code{integer})\cr should it be \code{"first"} or \code{"last"} or in a specific position?} + +\item{order}{(\code{character} or \code{integer})\cr vector of ordering indexes for the split facets.} +} +\value{ +\itemize{ +\item \code{ref_group_position} returns an utility function that puts the reference group +as first, last or at a certain position and needs to be assigned to \code{split_fun}. +} + +\itemize{ +\item \code{level_order} returns an utility function that changes the original levels' order, +depending on input \code{order} and split levels. +} +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} + +Collection of useful functions that are expanding on the core list of functions +provided by \code{rtables}. See \link[rtables:custom_split_funs]{rtables::custom_split_funs} and \code{\link[rtables:make_split_fun]{rtables::make_split_fun()}} +for more information on how to make a custom split function. All these functions +work with \code{\link[=split_rows_by]{split_rows_by()}} argument \code{split_fun} to modify the way the split +happens. For other split functions, consider consulting \code{\link[rtables:split_funcs]{rtables::split_funcs}}. +} +\section{Functions}{ +\itemize{ +\item \code{ref_group_position()}: split function to place reference group facet at a specific position +during post-processing stage. + +\item \code{level_order()}: split function to change level order based on a \code{integer} +vector or a \code{character} vector that represent the split variable's factor levels. + +}} +\examples{ +library(dplyr) + +dat <- data.frame( + x = factor(letters[1:5], levels = letters[5:1]), + y = 1:5 +) + +# With rtables layout functions +basic_table() \%>\% + split_cols_by("x", ref_group = "c", split_fun = ref_group_position("last")) \%>\% + analyze("y") \%>\% + build_table(dat) + +# With tern layout funcitons +adtte_f <- tern_ex_adtte \%>\% + filter(PARAMCD == "OS") \%>\% + mutate( + AVAL = day2month(AVAL), + is_event = CNSR == 0 + ) + +basic_table() \%>\% + split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position("first")) \%>\% + add_colcounts() \%>\% + surv_time( + vars = "AVAL", + var_labels = "Survival Time (Months)", + is_event = "is_event", + ) \%>\% + build_table(df = adtte_f) + +basic_table() \%>\% + split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position(2)) \%>\% + add_colcounts() \%>\% + surv_time( + vars = "AVAL", + var_labels = "Survival Time (Months)", + is_event = "is_event", + ) \%>\% + build_table(df = adtte_f) + +# level_order -------- +# Even if default would bring ref_group first, the original order puts it last +basic_table() \%>\% + split_cols_by("Species", split_fun = level_order(c(1, 3, 2))) \%>\% + analyze("Sepal.Length") \%>\% + build_table(iris) + +# character vector +new_order <- level_order(levels(iris$Species)[c(1, 3, 2)]) +basic_table() \%>\% + split_cols_by("Species", ref_group = "virginica", split_fun = new_order) \%>\% + analyze("Sepal.Length") \%>\% + build_table(iris) + +} +\seealso{ +\code{\link[rtables:make_split_fun]{rtables::make_split_fun()}} +} diff --git a/tests/testthat/_snaps/utils_split_fun.md b/tests/testthat/_snaps/utils_split_fun.md new file mode 100644 index 0000000000..9a8269e0c0 --- /dev/null +++ b/tests/testthat/_snaps/utils_split_fun.md @@ -0,0 +1,77 @@ +# analyze_vars works as expected with ref_group_position last split fun + + Code + res[3:4, ] + Output + ARM A ARM B ARM C + (N=69) (N=73) (N=58) + ———————————————————————————————————————————————— + Mean (SD) 34.1 (6.8) 35.8 (7.1) 36.1 (7.4) + Median 32.8 35.4 36.2 + +# compare_vars works as expected with ref_group first split fun + + Code + res[1:2, ] + Output + ARM B ARM A ARM C + (N=73) (N=69) (N=58) + ———————————————————————————————————————————————— + n 73 69 58 + Mean (SD) 35.8 (7.1) 34.1 (6.8) 36.1 (7.4) + +# summarize_ancova works as expected with ref_group position split fun + + Code + res[1:2, ] + Output + ARM A ARM B ARM C + (N=69) (N=73) (N=58) + ———————————————————————————————————————————————— + Unadjusted comparison + n 69 73 58 + +# binary endpoint layouts work as expected with ref_group_position last split fun + + Code + res + Output + A: Drug X C: Combination B: Placebo + (N=69) (N=58) (N=73) + ——————————————————————————————————————————————————————————————————————————————————————— + Odds Ratio (95% CI) 2.47 (1.22 - 5.01) 2.29 (1.10 - 4.78) + Difference in Response rate (%) 20.5 19.0 + 95% CI (Wald, with correction) (3.6, 37.3) (1.2, 36.8) + p-value (Chi-Squared Test) 0.0113 0.0263 + +# time to event layouts works as expected with ref_group_position last split fun + + Code + res + Output + ARM A ARM C ARM B + (N=69) (N=58) (N=73) + ——————————————————————————————————————————————————————————————————————————————————— + CoxPH + p-value (log-rank) 0.0159 0.1820 + Hazard Ratio 0.58 1.31 + 95% CI (0.37, 0.91) (0.88, 1.95) + 6 Months + Patients remaining at risk 49 39 46 + Event Free Rate (%) 85.29 71.87 71.55 + 95% CI (76.38, 94.19) (60.15, 83.58) (60.96, 82.14) + Difference in Event Free Rate 13.74 0.31 + 95% CI (-0.10, 27.57) (-15.47, 16.10) + p-value (Z-test) 0.0517 0.9688 + +# summarize_ancova works as expected with ref_group_position last split fun + + Code + res + Output + ARM A ARM C ARM B + (N=69) (N=58) (N=73) + ————————————————————————————————————————————————————— + Unadjusted rate (per year) + Rate 8.2061 7.8551 9.1554 + diff --git a/tests/testthat/test-utils_split_fun.R b/tests/testthat/test-utils_split_fun.R new file mode 100644 index 0000000000..ab14f817b3 --- /dev/null +++ b/tests/testthat/test-utils_split_fun.R @@ -0,0 +1,141 @@ +testthat::test_that("ref_group_position last split fun gives error when ref group is undefined", { + lyt <- basic_table() %>% + split_cols_by(var = "ARMCD", split_fun = ref_group_position("last")) %>% + analyze("AGE") + + testthat::expect_error(build_table(lyt, df = tern_ex_adsl)) +}) + +testthat::test_that("analyze_vars works as expected with ref_group_position last split fun", { + # Default behavior + result <- basic_table() %>% + split_cols_by(var = "ARMCD", ref_group = "ARM C") %>% + add_colcounts() %>% + analyze_vars(c("AGE", "STRATA2")) %>% + build_table(df = tern_ex_adsl) + + res <- testthat::expect_silent(result) + testthat::expect_identical(names(res), c("ARM A", "ARM B", "ARM C")) + + result <- basic_table() %>% + split_cols_by(var = "ARMCD", ref_group = "ARM C", split_fun = ref_group_position("last")) %>% + add_colcounts() %>% + analyze_vars(c("AGE", "STRATA2")) %>% + build_table(df = tern_ex_adsl) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res[3:4, ]) +}) + +testthat::test_that("compare_vars works as expected with ref_group first split fun", { + result <- basic_table() %>% + split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position("first")) %>% + add_colcounts() %>% + compare_vars("AGE") %>% + build_table(df = tern_ex_adsl) + + res <- testthat::expect_silent(result) + testthat::expect_identical(names(res), c("ARM B", "ARM A", "ARM C")) + testthat::expect_snapshot(res[1:2, ]) +}) + +testthat::test_that("summarize_ancova works as expected with ref_group position split fun", { + result <- basic_table() %>% + split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position(2)) %>% + add_colcounts() %>% + summarize_ancova( + vars = "BMRKR1", + variables = list(arm = "ARM"), + var_labels = "Unadjusted comparison", + conf_level = 0.95 + ) %>% + build_table(tern_ex_adsl) + + res <- testthat::expect_silent(result) + testthat::expect_identical(names(res), c("ARM A", "ARM B", "ARM C")) + testthat::expect_snapshot(res[1:2, ]) +}) + +testthat::test_that("binary endpoint layouts work as expected with ref_group_position last split fun", { + adrs_f <- tern_ex_adrs %>% + dplyr::filter(PARAMCD == "INVET") %>% + dplyr::mutate(is_rsp = AVALC %in% c("CR", "PR")) + + result <- basic_table() %>% + split_cols_by(var = "ARM", ref_group = "B: Placebo", split_fun = ref_group_position("last")) %>% + add_colcounts() %>% + estimate_odds_ratio(vars = "is_rsp") %>% + estimate_proportion_diff(vars = "is_rsp", table_names = "prop_diff") %>% + test_proportion_diff(vars = "is_rsp", table_names = "test_prop_diff") %>% + build_table(adrs_f) + + res <- testthat::expect_silent(result) + testthat::expect_identical(names(res), c("A: Drug X", "C: Combination", "B: Placebo")) + testthat::expect_snapshot(res) +}) + +testthat::test_that("time to event layouts works as expected with ref_group_position last split fun", { + adtte_f <- tern_ex_adtte %>% + dplyr::filter(PARAMCD == "PFS") %>% + dplyr::mutate( + AVAL = day2month(AVAL), + is_event = CNSR == 0 + ) + + result <- basic_table() %>% + split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position("last")) %>% + add_colcounts() %>% + coxph_pairwise( + vars = "AVAL", + is_event = "is_event" + ) %>% + surv_timepoint( + vars = "AVAL", + var_labels = "Months", + time_point = 6, + is_event = "is_event", + method = "both" + ) %>% + build_table(adtte_f) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + +testthat::test_that("summarize_ancova works as expected with ref_group_position last split fun", { + anl <- tern_ex_adtte %>% filter(PARAMCD == "TNE") + + result <- basic_table() %>% + split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position("last")) %>% + add_colcounts() %>% + summarize_glm_count( + vars = "AVAL", + variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = NULL), + conf_level = 0.95, + distribution = "poisson", + rate_mean_method = "emmeans", + var_labels = "Unadjusted rate (per year)", + .stats = c("rate"), + .labels = c(rate = "Rate") + ) %>% + build_table(anl) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + +testthat::test_that("level_order works for integerish and characters", { + tbl_int <- basic_table() %>% + split_cols_by("Species", split_fun = level_order(c(1, 3, 2))) %>% + analyze("Sepal.Length") %>% + build_table(iris) + + # character vector + new_order <- level_order(levels(iris$Species)[c(1, 3, 2)]) + tbl_chr <- basic_table() %>% + split_cols_by("Species", ref_group = "virginica", split_fun = new_order) %>% + analyze("Sepal.Length") %>% + build_table(iris) + + testthat::expect_identical(toString(tbl_int), toString(tbl_chr)) +}) From 413f6d95afb93e7646021cf17a93ffd6a21f708a Mon Sep 17 00:00:00 2001 From: Melkiades Date: Tue, 31 Oct 2023 13:39:23 +0000 Subject: [PATCH 4/4] [skip actions] Bump version to 0.9.2.9004 --- DESCRIPTION | 2 +- NEWS.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 167711ac52..ce03a75a2d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: tern Title: Create Common TLGs Used in Clinical Trials -Version: 0.9.2.9003 +Version: 0.9.2.9004 Date: 2023-10-31 Authors@R: c( person("Joe", "Zhu", , "joe.zhu@roche.com", role = c("aut", "cre")), diff --git a/NEWS.md b/NEWS.md index 848780369a..6f234c8432 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# tern 0.9.2.9003 +# tern 0.9.2.9004 ### New Features * Added `ref_group_position` function to place the reference group facet last, first or at a certain position.