Skip to content

Commit

Permalink
Refactor summarize_change() (#1347)
Browse files Browse the repository at this point in the history
Fixes #1345

---------

Co-authored-by: shajoezhu <[email protected]>
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Joe Zhu <[email protected]>
  • Loading branch information
4 people authored Nov 7, 2024
1 parent 4b9e6a6 commit 12de1fb
Show file tree
Hide file tree
Showing 11 changed files with 527 additions and 74 deletions.
8 changes: 6 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,19 @@
* Added the `denom` parameter to `s_count_cumulative()`, `s_count_missed_doses()`, and `s_count_occurrences_by_grade()`.
* Added `"N_row"` as an optional input to `denom` in `s_count_occurrences()`.
* Refactored `a_count_occurrences_by_grade()`, `a_count_patients_with_event()`, and `a_count_patients_with_flags()` to no longer use `make_afun()`.

### Enhancements
* Added `rel_height_plot` parameter to `g_lineplot()` to control the line plot height relative to annotation table height.
* Updated the `table_font_size` parameter of `g_lineplot()` to control the size of all text in the annotation table, including labels.
* Added `as_list` parameter to `g_lineplot()` to allow users to return the line plot and annotation table elements as a list instead of stacked for more complex customization.
* Refactored `summarize_change()` to work without `make_afun()` and access all additional function parameter.
* Added vignette "Understanding `tern` functions" for future reference.

### Bug Fixes
* Fixed bug in `a_summary()` causing non-unique `row_name` values to occur when multiple statistics are selected for count variables.

### Miscellaneous
* Reverted deprecation of quick get functions `summary_formats()` and `summary_labels()`. Added disclaimer about underlying use of `get_stats`.
* Corrected handling of extra arguments and `NA` for `summarize_change()`.

# tern 0.9.6

### Enhancements
Expand Down
139 changes: 100 additions & 39 deletions R/summarize_change.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,12 @@ NULL
#' an error will be thrown.
#'
#' @keywords internal
s_change_from_baseline <- function(df,
.var,
variables,
na.rm = TRUE, # nolint
...) {
s_change_from_baseline <- function(df, ...) {
# s_summary should get na.rm
args_list <- list(...)
.var <- args_list[[".var"]]
variables <- args_list[["variables"]]

checkmate::assert_numeric(df[[variables$value]])
checkmate::assert_numeric(df[[.var]])
checkmate::assert_logical(df[[variables$baseline_flag]])
Expand All @@ -48,7 +49,7 @@ s_change_from_baseline <- function(df,
if (is.logical(combined) && identical(length(combined), 0L)) {
combined <- numeric(0)
}
s_summary(combined, na.rm = na.rm, ...)
s_summary(combined, ...)
}

#' @describeIn summarize_change Formatted analysis function which is used as `afun` in `summarize_change()`.
Expand All @@ -57,25 +58,56 @@ s_change_from_baseline <- function(df,
#' * `a_change_from_baseline()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_change_from_baseline <- make_afun(
s_change_from_baseline,
.formats = c(
n = "xx",
mean_sd = "xx.xx (xx.xx)",
mean_se = "xx.xx (xx.xx)",
median = "xx.xx",
range = "xx.xx - xx.xx",
mean_ci = "(xx.xx, xx.xx)",
median_ci = "(xx.xx, xx.xx)",
mean_pval = "xx.xx"
),
.labels = c(
mean_sd = "Mean (SD)",
mean_se = "Mean (SE)",
median = "Median",
range = "Min - Max"
a_change_from_baseline <- function(df,
...,
.stats = NULL,
.formats = NULL,
.labels = NULL,
.indent_mods = NULL) {
# Check if there are user-defined functions
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
.stats <- default_and_custom_stats_list$default_stats
custom_stat_functions <- default_and_custom_stats_list$custom_stats

# Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params)
extra_afun_params <- retrieve_extra_afun_params(
names(list(...)$.additional_fun_parameters)
)
x_stats <- .apply_stat_functions(
default_stat_fnc = s_change_from_baseline,
custom_stat_fnc_list = custom_stat_functions,
args_list = c(
df = list(df),
extra_afun_params,
list(...)
)
)

# Fill in with formatting defaults if needed
.stats <- c(
get_stats("analyze_vars_numeric", stats_in = .stats),
names(custom_stat_functions) # Additional stats from custom functions
)
)
.formats <- get_formats_from_stats(.stats, .formats)
.labels <- get_labels_from_stats(.stats, .labels)
.indent_mods <- get_indents_from_stats(.stats, .indent_mods)

# Auto format handling
.formats <- apply_auto_formatting(
.formats,
x_stats,
extra_afun_params$.df_row,
extra_afun_params$.var
)

in_rows(
.list = x_stats[.stats],
.formats = .formats,
.names = names(.labels),
.labels = .labels,
.indent_mods = .indent_mods
)
}

#' @describeIn summarize_change Layout-creating function which can take statistics function arguments
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
Expand All @@ -91,7 +123,7 @@ a_change_from_baseline <- make_afun(
#' @examples
#' library(dplyr)
#'
#' ## Fabricate dataset
#' # Fabricate dataset
#' dta_test <- data.frame(
#' USUBJID = rep(1:6, each = 3),
#' AVISIT = rep(paste0("V", 1:3), 6),
Expand Down Expand Up @@ -119,31 +151,60 @@ a_change_from_baseline <- make_afun(
summarize_change <- function(lyt,
vars,
variables,
var_labels = vars,
na_str = default_na_str(),
na_rm = TRUE,
nested = TRUE,
...,
show_labels = "default",
table_names = vars,
section_div = NA_character_,
...,
.stats = c("n", "mean_sd", "median", "range"),
.formats = NULL,
.labels = NULL,
.formats = c(
n = "xx",
mean_sd = "xx.xx (xx.xx)",
mean_se = "xx.xx (xx.xx)",
median = "xx.xx",
range = "xx.xx - xx.xx",
mean_ci = "(xx.xx, xx.xx)",
median_ci = "(xx.xx, xx.xx)",
mean_pval = "xx.xx"
),
.labels = c(
mean_sd = "Mean (SD)",
mean_se = "Mean (SE)",
median = "Median",
range = "Min - Max"
),
.indent_mods = NULL) {
extra_args <- list(variables = variables, ...)
# Extra args must contain .stats, .formats, .labels, .indent_mods - sent to the analysis level
extra_args <- list(".stats" = .stats)
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

afun <- make_afun(
a_change_from_baseline,
.stats = .stats,
.formats = .formats,
.labels = .labels,
.indent_mods = .indent_mods
# Adding additional arguments to the analysis function (depends on the specific call)
extra_args <- c(extra_args, "variables" = list(variables), ...)

# Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params)
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
formals(a_change_from_baseline) <- c(
formals(a_change_from_baseline),
extra_args[[".additional_fun_parameters"]]
)

# Main analysis call - Nothing with .* -> these should be dedicated to the analysis function
analyze(
lyt,
vars,
afun = afun,
lyt = lyt,
vars = vars,
var_labels = var_labels,
afun = a_change_from_baseline,
na_str = na_str,
nested = nested,
extra_args = extra_args,
table_names = table_names
inclNAs = na_rm, # adds na.rm = TRUE to the analysis function
show_labels = show_labels,
table_names = table_names,
section_div = section_div
)
}
69 changes: 57 additions & 12 deletions R/utils_default_stats_formats_labels.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,57 @@ get_stats <- function(method_groups = "analyze_vars_numeric", stats_in = NULL, a
out
}

# Utility function used to separate custom stats (user-defined functions) from defaults
.split_std_from_custom_stats <- function(stats_in) {
out <- list(default_stats = NULL, custom_stats = NULL)
if (is.list(stats_in)) {
is_custom_fnc <- sapply(stats_in, is.function)
checkmate::assert_list(stats_in[is_custom_fnc], types = "function", names = "named")
out[["custom_stats"]] <- stats_in[is_custom_fnc]
out[["default_stats"]] <- unlist(stats_in[!is_custom_fnc])
} else {
out[["default_stats"]] <- stats_in
}

out
}

# Utility function to apply statistical functions
.apply_stat_functions <- function(default_stat_fnc, custom_stat_fnc_list, args_list) {
# Default checks
checkmate::assert_function(default_stat_fnc)
checkmate::assert_list(custom_stat_fnc_list, types = "function", null.ok = TRUE, names = "named")
checkmate::assert_list(args_list)

# Checking custom stats have same formals
if (!is.null(custom_stat_fnc_list)) {
fundamental_call_to_data <- names(formals(default_stat_fnc))[[1]]
for (fnc in custom_stat_fnc_list) {
if (!identical(names(formals(fnc))[[1]], fundamental_call_to_data)) {
stop(
"The first parameter of a custom statistical function needs to be the same (it can be `df` or `x`) ",
"as the default statistical function. In this case your custom function has ", names(formals(fnc))[[1]],
" as first parameter, while the default function has ", fundamental_call_to_data, "."
)
}
if (!any(names(formals(fnc)) == "...")) {
stop(
"The custom statistical function needs to have `...` as a parameter to accept additional arguments. ",
"In this case your custom function does not have `...`."
)
}
}
}

# Merging
stat_fnc_list <- c(default_stat_fnc, custom_stat_fnc_list)

# Applying
out <- unlist(lapply(stat_fnc_list, function(fnc) do.call(fnc, args = args_list)), recursive = FALSE)

out
}

#' @describeIn default_stats_formats_labels Get formats corresponding to a list of statistics.
#' To check available defaults see `tern::tern_default_formats` list.
#'
Expand Down Expand Up @@ -496,9 +547,7 @@ tern_default_labels <- c(
rate_ratio = "Adjusted Rate Ratio"
)

# To deprecate ---------

#' @describeIn default_stats_formats_labels `r lifecycle::badge("deprecated")`
#' @describeIn default_stats_formats_labels `r lifecycle::badge("stable")`
#' Quick function to retrieve default formats for summary statistics:
#' [analyze_vars()] and [analyze_vars_in_cols()] principally.
#'
Expand All @@ -513,20 +562,20 @@ tern_default_labels <- c(
#'
#' @export
summary_formats <- function(type = "numeric", include_pval = FALSE) {
lifecycle::deprecate_warn(
"0.9.6", "summary_formats()",
details = 'Use get_formats_from_stats(get_stats("analyze_vars_numeric", add_pval = include_pval)) instead'
)
met_grp <- paste0(c("analyze_vars", type), collapse = "_")
get_formats_from_stats(get_stats(met_grp, add_pval = include_pval))
}

#' @describeIn default_stats_formats_labels `r lifecycle::badge("deprecated")`
#' @describeIn default_stats_formats_labels `r lifecycle::badge("stable")`
#' Quick function to retrieve default labels for summary statistics.
#' Returns labels of descriptive statistics which are understood by `rtables`. Similar to `summary_formats`.
#'
#' @param include_pval (`flag`)\cr same as the `add_pval` argument in [get_stats()].
#'
#' @details
#' `summary_*` quick get functions for labels or formats uses `get_stats` and `get_labels_from_stats` or
#' `get_formats_from_stats` respectively to retrieve relevant information.
#'
#' @return
#' * `summary_labels` returns a named `vector` of default statistic labels for the given data type.
#'
Expand All @@ -536,10 +585,6 @@ summary_formats <- function(type = "numeric", include_pval = FALSE) {
#'
#' @export
summary_labels <- function(type = "numeric", include_pval = FALSE) {
lifecycle::deprecate_warn(
"0.9.6", "summary_formats()",
details = 'Use get_labels_from_stats(get_stats("analyze_vars_numeric", add_pval = include_pval)) instead'
)
met_grp <- paste0(c("analyze_vars", type), collapse = "_")
get_labels_from_stats(get_stats(met_grp, add_pval = include_pval))
}
65 changes: 65 additions & 0 deletions R/utils_rtables.R
Original file line number Diff line number Diff line change
Expand Up @@ -467,3 +467,68 @@ set_default_na_str <- function(na_str) {
checkmate::assert_character(na_str, len = 1, null.ok = TRUE)
options("tern_default_na_str" = na_str)
}


#' Utilities to handle extra arguments in analysis functions
#'
#' @description `r lifecycle::badge("stable")`
#' Important additional parameters, useful to modify behavior of analysis and summary
#' functions are listed in [rtables::additional_fun_params]. With these utility functions
#' we can retrieve a curated list of these parameters from the environment, and pass them
#' to the analysis functions with dedicated `...`; notice that the final `s_*` function
#' will get them through argument matching.
#'
#' @param extra_afun_params (`list`)\cr list of additional parameters (`character`) to be
#' retrieved from the environment. Curated list is present in [rtables::additional_fun_params].
#' @param add_alt_df (`logical`)\cr if `TRUE`, the function will also add `.alt_df` and `.alt_df_row`
#' parameters.
#'
#' @name util_handling_additional_fun_params
NULL

#' @describeIn util_handling_additional_fun_params Retrieve additional parameters from the environment.
#'
#' @return
#' * `retrieve_extra_afun_params` returns a list of the values of the parameters in the environment.
#'
#' @keywords internal
retrieve_extra_afun_params <- function(extra_afun_params) {
out <- list()
for (extra_param in extra_afun_params) {
out <- c(out, list(get(extra_param, envir = parent.frame())))
}
setNames(out, extra_afun_params)
}

#' @describeIn util_handling_additional_fun_params Curated list of additional parameters for
#' analysis functions. Please check [rtables::additional_fun_params] for precise descriptions.
#'
#' @return
#' * `get_additional_afun_params` returns a list of additional parameters.
#'
#' @keywords internal
get_additional_afun_params <- function(add_alt_df = FALSE) {
out_list <- list(
.N_col = integer(),
.N_total = integer(),
.N_row = integer(),
.df_row = data.frame(),
.var = character(),
.ref_group = character(),
.ref_full = vector(mode = "numeric"),
.in_ref_col = logical(),
.spl_context = data.frame(),
.all_col_exprs = vector(mode = "expression"),
.all_col_counts = vector(mode = "integer")
)

if (isTRUE(add_alt_df)) {
out_list <- c(
out_list,
.alt_df_row = data.frame(),
.alt_df = data.frame()
)
}

out_list
}
Loading

0 comments on commit 12de1fb

Please sign in to comment.