From 12de1fb3000342c2ea2d142dfe7c608803627b06 Mon Sep 17 00:00:00 2001 From: Davide Garolini Date: Thu, 7 Nov 2024 22:48:57 +0100 Subject: [PATCH] Refactor `summarize_change()` (#1347) Fixes #1345 --------- Co-authored-by: shajoezhu Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Joe Zhu --- NEWS.md | 8 +- R/summarize_change.R | 139 ++++++++++++----- R/utils_default_stats_formats_labels.R | 69 +++++++-- R/utils_rtables.R | 65 ++++++++ man/default_stats_formats_labels.Rd | 7 +- man/summarize_change.Rd | 40 +++-- man/util_handling_additional_fun_params.Rd | 45 ++++++ tests/testthat/_snaps/summarize_change.md | 17 +++ tests/testthat/test-summarize_change.R | 58 +++++++ .../test-utils_default_stats_formats_labels.R | 11 +- vignettes/tern_functions_guide.Rmd | 142 ++++++++++++++++++ 11 files changed, 527 insertions(+), 74 deletions(-) create mode 100644 man/util_handling_additional_fun_params.Rd create mode 100644 vignettes/tern_functions_guide.Rmd diff --git a/NEWS.md b/NEWS.md index ee34e76c3c..fdb80c33ad 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,15 +4,19 @@ * Added the `denom` parameter to `s_count_cumulative()`, `s_count_missed_doses()`, and `s_count_occurrences_by_grade()`. * Added `"N_row"` as an optional input to `denom` in `s_count_occurrences()`. * Refactored `a_count_occurrences_by_grade()`, `a_count_patients_with_event()`, and `a_count_patients_with_flags()` to no longer use `make_afun()`. - -### Enhancements * Added `rel_height_plot` parameter to `g_lineplot()` to control the line plot height relative to annotation table height. * Updated the `table_font_size` parameter of `g_lineplot()` to control the size of all text in the annotation table, including labels. * Added `as_list` parameter to `g_lineplot()` to allow users to return the line plot and annotation table elements as a list instead of stacked for more complex customization. +* Refactored `summarize_change()` to work without `make_afun()` and access all additional function parameter. +* Added vignette "Understanding `tern` functions" for future reference. ### Bug Fixes * Fixed bug in `a_summary()` causing non-unique `row_name` values to occur when multiple statistics are selected for count variables. +### Miscellaneous +* Reverted deprecation of quick get functions `summary_formats()` and `summary_labels()`. Added disclaimer about underlying use of `get_stats`. +* Corrected handling of extra arguments and `NA` for `summarize_change()`. + # tern 0.9.6 ### Enhancements diff --git a/R/summarize_change.R b/R/summarize_change.R index bf76ca7424..832d8647b1 100644 --- a/R/summarize_change.R +++ b/R/summarize_change.R @@ -29,11 +29,12 @@ NULL #' an error will be thrown. #' #' @keywords internal -s_change_from_baseline <- function(df, - .var, - variables, - na.rm = TRUE, # nolint - ...) { +s_change_from_baseline <- function(df, ...) { + # s_summary should get na.rm + args_list <- list(...) + .var <- args_list[[".var"]] + variables <- args_list[["variables"]] + checkmate::assert_numeric(df[[variables$value]]) checkmate::assert_numeric(df[[.var]]) checkmate::assert_logical(df[[variables$baseline_flag]]) @@ -48,7 +49,7 @@ s_change_from_baseline <- function(df, if (is.logical(combined) && identical(length(combined), 0L)) { combined <- numeric(0) } - s_summary(combined, na.rm = na.rm, ...) + s_summary(combined, ...) } #' @describeIn summarize_change Formatted analysis function which is used as `afun` in `summarize_change()`. @@ -57,25 +58,56 @@ s_change_from_baseline <- function(df, #' * `a_change_from_baseline()` returns the corresponding list with formatted [rtables::CellValue()]. #' #' @keywords internal -a_change_from_baseline <- make_afun( - s_change_from_baseline, - .formats = c( - n = "xx", - mean_sd = "xx.xx (xx.xx)", - mean_se = "xx.xx (xx.xx)", - median = "xx.xx", - range = "xx.xx - xx.xx", - mean_ci = "(xx.xx, xx.xx)", - median_ci = "(xx.xx, xx.xx)", - mean_pval = "xx.xx" - ), - .labels = c( - mean_sd = "Mean (SD)", - mean_se = "Mean (SE)", - median = "Median", - range = "Min - Max" +a_change_from_baseline <- function(df, + ..., + .stats = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL) { + # Check if there are user-defined functions + default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) + .stats <- default_and_custom_stats_list$default_stats + custom_stat_functions <- default_and_custom_stats_list$custom_stats + + # Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params) + extra_afun_params <- retrieve_extra_afun_params( + names(list(...)$.additional_fun_parameters) + ) + x_stats <- .apply_stat_functions( + default_stat_fnc = s_change_from_baseline, + custom_stat_fnc_list = custom_stat_functions, + args_list = c( + df = list(df), + extra_afun_params, + list(...) + ) + ) + + # Fill in with formatting defaults if needed + .stats <- c( + get_stats("analyze_vars_numeric", stats_in = .stats), + names(custom_stat_functions) # Additional stats from custom functions ) -) + .formats <- get_formats_from_stats(.stats, .formats) + .labels <- get_labels_from_stats(.stats, .labels) + .indent_mods <- get_indents_from_stats(.stats, .indent_mods) + + # Auto format handling + .formats <- apply_auto_formatting( + .formats, + x_stats, + extra_afun_params$.df_row, + extra_afun_params$.var + ) + + in_rows( + .list = x_stats[.stats], + .formats = .formats, + .names = names(.labels), + .labels = .labels, + .indent_mods = .indent_mods + ) +} #' @describeIn summarize_change Layout-creating function which can take statistics function arguments #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. @@ -91,7 +123,7 @@ a_change_from_baseline <- make_afun( #' @examples #' library(dplyr) #' -#' ## Fabricate dataset +#' # Fabricate dataset #' dta_test <- data.frame( #' USUBJID = rep(1:6, each = 3), #' AVISIT = rep(paste0("V", 1:3), 6), @@ -119,31 +151,60 @@ a_change_from_baseline <- make_afun( summarize_change <- function(lyt, vars, variables, + var_labels = vars, na_str = default_na_str(), + na_rm = TRUE, nested = TRUE, - ..., + show_labels = "default", table_names = vars, + section_div = NA_character_, + ..., .stats = c("n", "mean_sd", "median", "range"), - .formats = NULL, - .labels = NULL, + .formats = c( + n = "xx", + mean_sd = "xx.xx (xx.xx)", + mean_se = "xx.xx (xx.xx)", + median = "xx.xx", + range = "xx.xx - xx.xx", + mean_ci = "(xx.xx, xx.xx)", + median_ci = "(xx.xx, xx.xx)", + mean_pval = "xx.xx" + ), + .labels = c( + mean_sd = "Mean (SD)", + mean_se = "Mean (SE)", + median = "Median", + range = "Min - Max" + ), .indent_mods = NULL) { - extra_args <- list(variables = variables, ...) + # Extra args must contain .stats, .formats, .labels, .indent_mods - sent to the analysis level + extra_args <- list(".stats" = .stats) + if (!is.null(.formats)) extra_args[[".formats"]] <- .formats + if (!is.null(.labels)) extra_args[[".labels"]] <- .labels + if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods - afun <- make_afun( - a_change_from_baseline, - .stats = .stats, - .formats = .formats, - .labels = .labels, - .indent_mods = .indent_mods + # Adding additional arguments to the analysis function (depends on the specific call) + extra_args <- c(extra_args, "variables" = list(variables), ...) + + # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) + extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) + formals(a_change_from_baseline) <- c( + formals(a_change_from_baseline), + extra_args[[".additional_fun_parameters"]] ) + # Main analysis call - Nothing with .* -> these should be dedicated to the analysis function analyze( - lyt, - vars, - afun = afun, + lyt = lyt, + vars = vars, + var_labels = var_labels, + afun = a_change_from_baseline, na_str = na_str, nested = nested, extra_args = extra_args, - table_names = table_names + inclNAs = na_rm, # adds na.rm = TRUE to the analysis function + show_labels = show_labels, + table_names = table_names, + section_div = section_div ) } diff --git a/R/utils_default_stats_formats_labels.R b/R/utils_default_stats_formats_labels.R index bd3871b4b8..b97acf4a08 100644 --- a/R/utils_default_stats_formats_labels.R +++ b/R/utils_default_stats_formats_labels.R @@ -121,6 +121,57 @@ get_stats <- function(method_groups = "analyze_vars_numeric", stats_in = NULL, a out } +# Utility function used to separate custom stats (user-defined functions) from defaults +.split_std_from_custom_stats <- function(stats_in) { + out <- list(default_stats = NULL, custom_stats = NULL) + if (is.list(stats_in)) { + is_custom_fnc <- sapply(stats_in, is.function) + checkmate::assert_list(stats_in[is_custom_fnc], types = "function", names = "named") + out[["custom_stats"]] <- stats_in[is_custom_fnc] + out[["default_stats"]] <- unlist(stats_in[!is_custom_fnc]) + } else { + out[["default_stats"]] <- stats_in + } + + out +} + +# Utility function to apply statistical functions +.apply_stat_functions <- function(default_stat_fnc, custom_stat_fnc_list, args_list) { + # Default checks + checkmate::assert_function(default_stat_fnc) + checkmate::assert_list(custom_stat_fnc_list, types = "function", null.ok = TRUE, names = "named") + checkmate::assert_list(args_list) + + # Checking custom stats have same formals + if (!is.null(custom_stat_fnc_list)) { + fundamental_call_to_data <- names(formals(default_stat_fnc))[[1]] + for (fnc in custom_stat_fnc_list) { + if (!identical(names(formals(fnc))[[1]], fundamental_call_to_data)) { + stop( + "The first parameter of a custom statistical function needs to be the same (it can be `df` or `x`) ", + "as the default statistical function. In this case your custom function has ", names(formals(fnc))[[1]], + " as first parameter, while the default function has ", fundamental_call_to_data, "." + ) + } + if (!any(names(formals(fnc)) == "...")) { + stop( + "The custom statistical function needs to have `...` as a parameter to accept additional arguments. ", + "In this case your custom function does not have `...`." + ) + } + } + } + + # Merging + stat_fnc_list <- c(default_stat_fnc, custom_stat_fnc_list) + + # Applying + out <- unlist(lapply(stat_fnc_list, function(fnc) do.call(fnc, args = args_list)), recursive = FALSE) + + out +} + #' @describeIn default_stats_formats_labels Get formats corresponding to a list of statistics. #' To check available defaults see `tern::tern_default_formats` list. #' @@ -496,9 +547,7 @@ tern_default_labels <- c( rate_ratio = "Adjusted Rate Ratio" ) -# To deprecate --------- - -#' @describeIn default_stats_formats_labels `r lifecycle::badge("deprecated")` +#' @describeIn default_stats_formats_labels `r lifecycle::badge("stable")` #' Quick function to retrieve default formats for summary statistics: #' [analyze_vars()] and [analyze_vars_in_cols()] principally. #' @@ -513,20 +562,20 @@ tern_default_labels <- c( #' #' @export summary_formats <- function(type = "numeric", include_pval = FALSE) { - lifecycle::deprecate_warn( - "0.9.6", "summary_formats()", - details = 'Use get_formats_from_stats(get_stats("analyze_vars_numeric", add_pval = include_pval)) instead' - ) met_grp <- paste0(c("analyze_vars", type), collapse = "_") get_formats_from_stats(get_stats(met_grp, add_pval = include_pval)) } -#' @describeIn default_stats_formats_labels `r lifecycle::badge("deprecated")` +#' @describeIn default_stats_formats_labels `r lifecycle::badge("stable")` #' Quick function to retrieve default labels for summary statistics. #' Returns labels of descriptive statistics which are understood by `rtables`. Similar to `summary_formats`. #' #' @param include_pval (`flag`)\cr same as the `add_pval` argument in [get_stats()]. #' +#' @details +#' `summary_*` quick get functions for labels or formats uses `get_stats` and `get_labels_from_stats` or +#' `get_formats_from_stats` respectively to retrieve relevant information. +#' #' @return #' * `summary_labels` returns a named `vector` of default statistic labels for the given data type. #' @@ -536,10 +585,6 @@ summary_formats <- function(type = "numeric", include_pval = FALSE) { #' #' @export summary_labels <- function(type = "numeric", include_pval = FALSE) { - lifecycle::deprecate_warn( - "0.9.6", "summary_formats()", - details = 'Use get_labels_from_stats(get_stats("analyze_vars_numeric", add_pval = include_pval)) instead' - ) met_grp <- paste0(c("analyze_vars", type), collapse = "_") get_labels_from_stats(get_stats(met_grp, add_pval = include_pval)) } diff --git a/R/utils_rtables.R b/R/utils_rtables.R index 61dd4b5f3e..26c45543db 100644 --- a/R/utils_rtables.R +++ b/R/utils_rtables.R @@ -467,3 +467,68 @@ set_default_na_str <- function(na_str) { checkmate::assert_character(na_str, len = 1, null.ok = TRUE) options("tern_default_na_str" = na_str) } + + +#' Utilities to handle extra arguments in analysis functions +#' +#' @description `r lifecycle::badge("stable")` +#' Important additional parameters, useful to modify behavior of analysis and summary +#' functions are listed in [rtables::additional_fun_params]. With these utility functions +#' we can retrieve a curated list of these parameters from the environment, and pass them +#' to the analysis functions with dedicated `...`; notice that the final `s_*` function +#' will get them through argument matching. +#' +#' @param extra_afun_params (`list`)\cr list of additional parameters (`character`) to be +#' retrieved from the environment. Curated list is present in [rtables::additional_fun_params]. +#' @param add_alt_df (`logical`)\cr if `TRUE`, the function will also add `.alt_df` and `.alt_df_row` +#' parameters. +#' +#' @name util_handling_additional_fun_params +NULL + +#' @describeIn util_handling_additional_fun_params Retrieve additional parameters from the environment. +#' +#' @return +#' * `retrieve_extra_afun_params` returns a list of the values of the parameters in the environment. +#' +#' @keywords internal +retrieve_extra_afun_params <- function(extra_afun_params) { + out <- list() + for (extra_param in extra_afun_params) { + out <- c(out, list(get(extra_param, envir = parent.frame()))) + } + setNames(out, extra_afun_params) +} + +#' @describeIn util_handling_additional_fun_params Curated list of additional parameters for +#' analysis functions. Please check [rtables::additional_fun_params] for precise descriptions. +#' +#' @return +#' * `get_additional_afun_params` returns a list of additional parameters. +#' +#' @keywords internal +get_additional_afun_params <- function(add_alt_df = FALSE) { + out_list <- list( + .N_col = integer(), + .N_total = integer(), + .N_row = integer(), + .df_row = data.frame(), + .var = character(), + .ref_group = character(), + .ref_full = vector(mode = "numeric"), + .in_ref_col = logical(), + .spl_context = data.frame(), + .all_col_exprs = vector(mode = "expression"), + .all_col_counts = vector(mode = "integer") + ) + + if (isTRUE(add_alt_df)) { + out_list <- c( + out_list, + .alt_df_row = data.frame(), + .alt_df = data.frame() + ) + } + + out_list +} diff --git a/man/default_stats_formats_labels.Rd b/man/default_stats_formats_labels.Rd index 228288a6f8..3fbcad8fad 100644 --- a/man/default_stats_formats_labels.Rd +++ b/man/default_stats_formats_labels.Rd @@ -120,6 +120,9 @@ seen in \code{\link[=analyze_vars]{analyze_vars()}}. See notes to understand why } \details{ Current choices for \code{type} are \code{counts} and \code{numeric} for \code{\link[=analyze_vars]{analyze_vars()}} and affect \code{get_stats()}. + +\verb{summary_*} quick get functions for labels or formats uses \code{get_stats} and \code{get_labels_from_stats} or +\code{get_formats_from_stats} respectively to retrieve relevant information. } \section{Functions}{ \itemize{ @@ -142,11 +145,11 @@ It defaults to 0L for all values. \item \code{tern_default_labels}: Named \code{character} vector of default labels for \code{tern}. -\item \code{summary_formats()}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +\item \code{summary_formats()}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Quick function to retrieve default formats for summary statistics: \code{\link[=analyze_vars]{analyze_vars()}} and \code{\link[=analyze_vars_in_cols]{analyze_vars_in_cols()}} principally. -\item \code{summary_labels()}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +\item \code{summary_labels()}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Quick function to retrieve default labels for summary statistics. Returns labels of descriptive statistics which are understood by \code{rtables}. Similar to \code{summary_formats}. diff --git a/man/summarize_change.Rd b/man/summarize_change.Rd index 40b05f423c..b37cdd761c 100644 --- a/man/summarize_change.Rd +++ b/man/summarize_change.Rd @@ -10,19 +10,33 @@ summarize_change( lyt, vars, variables, + var_labels = vars, na_str = default_na_str(), + na_rm = TRUE, nested = TRUE, - ..., + show_labels = "default", table_names = vars, + section_div = NA_character_, + ..., .stats = c("n", "mean_sd", "median", "range"), - .formats = NULL, - .labels = NULL, + .formats = c(n = "xx", mean_sd = "xx.xx (xx.xx)", mean_se = "xx.xx (xx.xx)", median = + "xx.xx", range = "xx.xx - xx.xx", mean_ci = "(xx.xx, xx.xx)", median_ci = + "(xx.xx, xx.xx)", mean_pval = "xx.xx"), + .labels = c(mean_sd = "Mean (SD)", mean_se = "Mean (SE)", median = "Median", range = + "Min - Max"), .indent_mods = NULL ) -s_change_from_baseline(df, .var, variables, na.rm = TRUE, ...) +s_change_from_baseline(df, ...) -a_change_from_baseline(df, .var, variables, na.rm = TRUE, ...) +a_change_from_baseline( + df, + ..., + .stats = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL +) } \arguments{ \item{lyt}{(\code{PreDataTableLayouts})\cr layout that analyses will be added to.} @@ -31,17 +45,24 @@ a_change_from_baseline(df, .var, variables, na.rm = TRUE, ...) \item{variables}{(named \code{list} of \code{string})\cr list of additional analysis variables.} +\item{var_labels}{(\code{character})\cr variable labels.} + \item{na_str}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} \item{nested}{(\code{flag})\cr whether this layout instruction should be applied within the existing layout structure _if possible (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split. underneath analyses, which is not allowed.} -\item{...}{additional arguments for the lower level functions.} +\item{show_labels}{(\code{string})\cr label visibility: one of "default", "visible" and "hidden".} \item{table_names}{(\code{character})\cr this can be customized in the case that the same \code{vars} are analyzed multiple times, to avoid warnings from \code{rtables}.} +\item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group +defined by this split instruction, or \code{NA_character_} (the default) for no section divider.} + +\item{...}{additional arguments for the lower level functions.} + \item{.stats}{(\code{character})\cr statistics to select for the table. Options are: \verb{'n', 'sum', 'mean', 'sd', 'se', 'mean_sd', 'mean_se', 'mean_ci', 'mean_sei', 'mean_sdi', 'mean_pval', 'median', 'mad', 'median_ci', 'quantiles', 'iqr', 'range', 'min', 'max', 'median_range', 'cv', 'geom_mean', 'geom_mean_ci', 'geom_cv'}} @@ -55,11 +76,6 @@ information on the \code{"auto"} setting.} unmodified default behavior. Can be negative.} \item{df}{(\code{data.frame})\cr data set containing all analysis variables.} - -\item{.var}{(\code{string})\cr single variable name that is passed by \code{rtables} when requested -by a statistics function.} - -\item{na.rm}{(\code{flag})\cr whether \code{NA} values should be removed from \code{x} prior to analysis.} } \value{ \itemize{ @@ -108,7 +124,7 @@ an error will be thrown. \examples{ library(dplyr) -## Fabricate dataset +# Fabricate dataset dta_test <- data.frame( USUBJID = rep(1:6, each = 3), AVISIT = rep(paste0("V", 1:3), 6), diff --git a/man/util_handling_additional_fun_params.Rd b/man/util_handling_additional_fun_params.Rd new file mode 100644 index 0000000000..ce95abe6dc --- /dev/null +++ b/man/util_handling_additional_fun_params.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_rtables.R +\name{util_handling_additional_fun_params} +\alias{util_handling_additional_fun_params} +\alias{retrieve_extra_afun_params} +\alias{get_additional_afun_params} +\title{Utilities to handle extra arguments in analysis functions} +\usage{ +retrieve_extra_afun_params(extra_afun_params) + +get_additional_afun_params(add_alt_df = FALSE) +} +\arguments{ +\item{extra_afun_params}{(\code{list})\cr list of additional parameters (\code{character}) to be +retrieved from the environment. Curated list is present in \link[rtables:additional_fun_params]{rtables::additional_fun_params}.} + +\item{add_alt_df}{(\code{logical})\cr if \code{TRUE}, the function will also add \code{.alt_df} and \code{.alt_df_row} +parameters.} +} +\value{ +\itemize{ +\item \code{retrieve_extra_afun_params} returns a list of the values of the parameters in the environment. +} + +\itemize{ +\item \code{get_additional_afun_params} returns a list of additional parameters. +} +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} +Important additional parameters, useful to modify behavior of analysis and summary +functions are listed in \link[rtables:additional_fun_params]{rtables::additional_fun_params}. With these utility functions +we can retrieve a curated list of these parameters from the environment, and pass them +to the analysis functions with dedicated \code{...}; notice that the final \verb{s_*} function +will get them through argument matching. +} +\section{Functions}{ +\itemize{ +\item \code{retrieve_extra_afun_params()}: Retrieve additional parameters from the environment. + +\item \code{get_additional_afun_params()}: Curated list of additional parameters for +analysis functions. Please check \link[rtables:additional_fun_params]{rtables::additional_fun_params} for precise descriptions. + +}} +\keyword{internal} diff --git a/tests/testthat/_snaps/summarize_change.md b/tests/testthat/_snaps/summarize_change.md index b0e8774d89..9752712464 100644 --- a/tests/testthat/_snaps/summarize_change.md +++ b/tests/testthat/_snaps/summarize_change.md @@ -499,3 +499,20 @@ Median -2.00 Min - Max -2.00 - -2.00 +# summarize_change works with custom statistical functions + + Code + res + Output + all obs + ——————————————————— + V1 + n 3 + my_stat 1.00 + V2 + n 3 + my_stat 0.83 + V3 + n 3 + my_stat 0.67 + diff --git a/tests/testthat/test-summarize_change.R b/tests/testthat/test-summarize_change.R index 4c8d13a92e..adf60ffc68 100644 --- a/tests/testthat/test-summarize_change.R +++ b/tests/testthat/test-summarize_change.R @@ -77,3 +77,61 @@ testthat::test_that("summarize_change works as expected", { res <- testthat::expect_silent(result) testthat::expect_snapshot(res) }) + + +testthat::test_that("summarize_change works with custom statistical functions", { + dta_test <- data.frame( + USUBJID = rep(1:6, each = 3), + AVISIT = rep(paste0("V", 1:3), 6), + AVAL = c(9:1, rep(NA, 9)) + ) %>% + dplyr::mutate( + ABLFLL = AVISIT == "V1" + ) %>% + dplyr::group_by(USUBJID) %>% + dplyr::mutate( + BLVAL = AVAL[ABLFLL], + CHG = AVAL - BLVAL + ) %>% + dplyr::ungroup() + + testthat::expect_error( + basic_table() %>% + split_rows_by("AVISIT") %>% + summarize_change( + "CHG", + variables = list(value = "AVAL", baseline_flag = "ABLFLL"), + .stats = c("n", "my_stat" = function(x) mean(x)) + ) %>% + build_table(dta_test), + "custom function has x as first parameter, while the default function has df" + ) + testthat::expect_error( + basic_table() %>% + split_rows_by("AVISIT") %>% + summarize_change( + "CHG", + variables = list(value = "AVAL", baseline_flag = "ABLFLL"), + .stats = c("n", "my_stat" = function(df) mean(df$AVAL)) + ) %>% + build_table(dta_test), + "The custom statistical function needs to have " + ) + + result <- basic_table() %>% + split_rows_by("AVISIT") %>% + summarize_change( + "CHG", + variables = list(value = "AVAL", baseline_flag = "ABLFLL"), + .stats = c("n", "my_stat" = function(df, ...) { + a <- mean(df$AVAL, na.rm = TRUE) + b <- list(...)$.N_row + a / b + }), + .formats = c("my_stat" = function(x, ...) sprintf("%.2f", x)) + ) %>% + build_table(dta_test) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) diff --git a/tests/testthat/test-utils_default_stats_formats_labels.R b/tests/testthat/test-utils_default_stats_formats_labels.R index f040eb0ef2..9583faac18 100644 --- a/tests/testthat/test-utils_default_stats_formats_labels.R +++ b/tests/testthat/test-utils_default_stats_formats_labels.R @@ -199,24 +199,21 @@ testthat::test_that("labels_use_control works as expected", { }) testthat::test_that("summary_formats works as expected", { - testthat::expect_warning( - result <- summary_formats() %>% - unlist() # More compact fruition - ) + result <- summary_formats() %>% unlist() # More compact fruition res <- testthat::expect_silent(result) testthat::expect_snapshot(res) - testthat::expect_warning(result <- summary_formats(type = "counts", include_pval = TRUE)) + result <- summary_formats(type = "counts", include_pval = TRUE) testthat::expect_true(all(result[c("n", "count", "n_blq")] == "xx.")) testthat::expect_identical(result[["pval_counts"]], "x.xxxx | (<0.0001)") }) testthat::test_that("summary_labels works as expected", { - testthat::expect_warning(result <- summary_labels()) + result <- summary_labels() res <- testthat::expect_silent(result) testthat::expect_snapshot(res) - testthat::expect_warning(result <- summary_labels(type = "counts", include_pval = TRUE)) + result <- summary_labels(type = "counts", include_pval = TRUE) res <- testthat::expect_silent(result) testthat::expect_snapshot(res) }) diff --git a/vignettes/tern_functions_guide.Rmd b/vignettes/tern_functions_guide.Rmd new file mode 100644 index 0000000000..6dc8e2377f --- /dev/null +++ b/vignettes/tern_functions_guide.Rmd @@ -0,0 +1,142 @@ +--- +title: "Understanding `tern` functions" +date: "2024-11-04" +output: + rmarkdown::html_document: + theme: "spacelab" + highlight: "kate" + toc: true + toc_float: true +vignette: > + %\VignetteIndexEntry{Understanding `tern` functions} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +editor_options: + markdown: + wrap: 72 +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +## Understanding `tern` functions + +Every function in the `tern` package is designed to have a certain structure that can cooperate well with every user's need, while maintaining a consistent and predictable behavior. This document will guide you through an example function in the package, explaining the purpose of many of its building blocks and how they can be used. + +As we recently worked on it we will consider `summarize_change()` as an example. This function is used to calculate the change from a baseline value for a given variable. A realistic example can be found in [`LBT03`](https://insightsengineering.github.io/tlg-catalog/stable/tables/lab-results/lbt03.html) from the TLG-catalog. + +`summarize_change()` is the main function that is available to the user. You can find lists of these functions in `?tern::analyze_functions`. All of these are build around `rtables::analyze()` function, which is the core analysis function in `rtables`. All these wrapper functions call specific analysis functions (always written as `a_*`) that are meant to handle the statistic functions (always written as `s_*`) and format the results with the `rtables::in_row()` function. We can summarize this structure as follows: + +`summarize_change()` (1)-> `a_change_from_baseline()` (2)-> [`s_change_from_baseline()` + `rtables::in_row()`] + +The main questions that may arise are: + +1. Handling of `NA`. +2. Handling of formats. +3. Additional statistics. + +Data set and library loading. +```{r} +library(dplyr) +library(tern) + +## Fabricate dataset +dta_test <- data.frame( + USUBJID = rep(1:6, each = 3), + AVISIT = rep(paste0("V", 1:3), 6), + ARM = rep(LETTERS[1:3], rep(6, 3)), + AVAL = c(9:1, rep(NA, 9)) +) %>% + mutate(ABLFLL = AVISIT == "V1") %>% + group_by(USUBJID) %>% + mutate( + BLVAL = AVAL[ABLFLL], + CHG = AVAL - BLVAL + ) %>% + ungroup() +``` + +Classic use of `summarize_change()`. +```{r} +fix_layout <- basic_table() %>% + split_cols_by("ARM") %>% + split_rows_by("AVISIT") + + +# Dealing with NAs: na_rm = TRUE +fix_layout %>% + summarize_change("CHG", variables = list(value = "AVAL", baseline_flag = "ABLFLL")) %>% + build_table(dta_test) %>% + print() + +# Dealing with NAs: na_rm = FALSE +fix_layout %>% + summarize_change("CHG", variables = list(value = "AVAL", baseline_flag = "ABLFLL"), na_rm = FALSE) %>% + build_table(dta_test) %>% + print() + +# changing the NA string (it is done on all levels) +fix_layout %>% + summarize_change("CHG", variables = list(value = "AVAL", baseline_flag = "ABLFLL"), na_str = "my_na") %>% + build_table(dta_test) %>% + print() +``` + +`.formats`, `.labels`, and `.indent_mods` depend on the names of `.stats`. Here is how you can change the default formatting. + +```{r} +# changing n count format and label and indentation +fix_layout %>% + summarize_change("CHG", + variables = list(value = "AVAL", baseline_flag = "ABLFLL"), + .stats = c("n", "mean"), # reducing the number of stats for visual appreciation + .formats = c(n = "xx.xx"), + .labels = c(n = "NnNn"), + .indent_mods = c(n = 5), na_str = "nA" + ) %>% + build_table(dta_test) %>% + print() +``` +What if I want something special for the format? + +```{r} +# changing n count format and label and indentation +fix_layout %>% + summarize_change("CHG", + variables = list(value = "AVAL", baseline_flag = "ABLFLL"), + .stats = c("n", "mean"), # reducing the number of stats for visual appreciation + .formats = c(n = function(x, ...) as.character(x * 100)) + ) %>% # Note you need ...!!! + build_table(dta_test) %>% + print() +``` + +Adding a custom statistic (and custom format): +```{r} +# changing n count format and label and indentation +fix_layout %>% + summarize_change( + "CHG", + variables = list(value = "AVAL", baseline_flag = "ABLFLL"), + .stats = c("n", "my_stat" = function(df, ...) { + a <- mean(df$AVAL, na.rm = TRUE) + b <- list(...)$.N_row # It has access at all `?rtables::additional_fun_params` + a / b + }), + .formats = c("my_stat" = function(x, ...) sprintf("%.2f", x)) + ) %>% + build_table(dta_test) +``` + + +## For Developers + +In all of these layers there are specific parameters that need to be available, and, while `rtables` has multiple way to handle formatting and `NA` values, we had to decide how to correctly handle these and additional extra arguments. We follow the following scheme: + +Level 1: `summarize_change()`: all parameters without a starting dot `.*` are used or added to `extra_args`. Specifically, here we solve `NA` values by using `inclNAs` option in `rtables::analyze()`. This will add to `...` `na.rm = inclNAs`. Also `na_str` is here set. We may want to be statistic dependent in the future, but we still need to think how to accomplish that. We add the `rtables::additional_fun_params` to the analysis function so to make them available as `...` in the next level. + +Level 2: `a_change_from_baseline()`: all parameters starting with a dot `.` are used. Mainly `.stats`, `.formats`, `.labels`, and `.indent_mods` are used. We also add `extra_afun_params` to the `...` list for the statistical function. Notice the handling for additional parameters in the `do.call()` function.