Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor summarize_change() #1347

Merged
merged 20 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,19 @@
* Added the `denom` parameter to `s_count_cumulative()`, `s_count_missed_doses()`, and `s_count_occurrences_by_grade()`.
* Added `"N_row"` as an optional input to `denom` in `s_count_occurrences()`.
* Refactored `a_count_occurrences_by_grade()`, `a_count_patients_with_event()`, and `a_count_patients_with_flags()` to no longer use `make_afun()`.

### Enhancements
* Added `rel_height_plot` parameter to `g_lineplot()` to control the line plot height relative to annotation table height.
* Updated the `table_font_size` parameter of `g_lineplot()` to control the size of all text in the annotation table, including labels.
* Added `as_list` parameter to `g_lineplot()` to allow users to return the line plot and annotation table elements as a list instead of stacked for more complex customization.
* Refactored `summarize_change()` to work without `make_afun()` and access all additional function parameter.
* Added vignette "Understanding `tern` functions" for future reference.

### Bug Fixes
* Fixed bug in `a_summary()` causing non-unique `row_name` values to occur when multiple statistics are selected for count variables.

### Miscellaneous
* Reverted deprecation of quick get functions `summary_formats()` and `summary_labels()`. Added disclaimer about underlying use of `get_stats`.
* Corrected handling of extra arguments and `NA` for `summarize_change()`.

# tern 0.9.6

### Enhancements
Expand Down
139 changes: 100 additions & 39 deletions R/summarize_change.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,12 @@ NULL
#' an error will be thrown.
#'
#' @keywords internal
s_change_from_baseline <- function(df,
.var,
variables,
na.rm = TRUE, # nolint
...) {
s_change_from_baseline <- function(df, ...) {
# s_summary should get na.rm
args_list <- list(...)
.var <- args_list[[".var"]]
variables <- args_list[["variables"]]

checkmate::assert_numeric(df[[variables$value]])
checkmate::assert_numeric(df[[.var]])
checkmate::assert_logical(df[[variables$baseline_flag]])
Expand All @@ -48,7 +49,7 @@ s_change_from_baseline <- function(df,
if (is.logical(combined) && identical(length(combined), 0L)) {
combined <- numeric(0)
}
s_summary(combined, na.rm = na.rm, ...)
s_summary(combined, ...)
}

#' @describeIn summarize_change Formatted analysis function which is used as `afun` in `summarize_change()`.
Expand All @@ -57,25 +58,56 @@ s_change_from_baseline <- function(df,
#' * `a_change_from_baseline()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_change_from_baseline <- make_afun(
s_change_from_baseline,
.formats = c(
n = "xx",
mean_sd = "xx.xx (xx.xx)",
mean_se = "xx.xx (xx.xx)",
median = "xx.xx",
range = "xx.xx - xx.xx",
mean_ci = "(xx.xx, xx.xx)",
median_ci = "(xx.xx, xx.xx)",
mean_pval = "xx.xx"
),
.labels = c(
mean_sd = "Mean (SD)",
mean_se = "Mean (SE)",
median = "Median",
range = "Min - Max"
a_change_from_baseline <- function(df,
...,
.stats = NULL,
.formats = NULL,
.labels = NULL,
.indent_mods = NULL) {
# Check if there are user-defined functions
default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
.stats <- default_and_custom_stats_list$default_stats
custom_stat_functions <- default_and_custom_stats_list$custom_stats

# Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params)
extra_afun_params <- retrieve_extra_afun_params(
names(list(...)$.additional_fun_parameters)
)
x_stats <- .apply_stat_functions(
default_stat_fnc = s_change_from_baseline,
custom_stat_fnc_list = custom_stat_functions,
args_list = c(
df = list(df),
extra_afun_params,
list(...)
)
)

# Fill in with formatting defaults if needed
.stats <- c(
get_stats("analyze_vars_numeric", stats_in = .stats),
names(custom_stat_functions) # Additional stats from custom functions
)
)
.formats <- get_formats_from_stats(.stats, .formats)
.labels <- get_labels_from_stats(.stats, .labels)
.indent_mods <- get_indents_from_stats(.stats, .indent_mods)

# Auto format handling
.formats <- apply_auto_formatting(
.formats,
x_stats,
extra_afun_params$.df_row,
extra_afun_params$.var
)

in_rows(
.list = x_stats[.stats],
.formats = .formats,
.names = names(.labels),
.labels = .labels,
.indent_mods = .indent_mods
)
}

#' @describeIn summarize_change Layout-creating function which can take statistics function arguments
#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
Expand All @@ -91,7 +123,7 @@ a_change_from_baseline <- make_afun(
#' @examples
#' library(dplyr)
#'
#' ## Fabricate dataset
#' # Fabricate dataset
#' dta_test <- data.frame(
#' USUBJID = rep(1:6, each = 3),
#' AVISIT = rep(paste0("V", 1:3), 6),
Expand Down Expand Up @@ -119,31 +151,60 @@ a_change_from_baseline <- make_afun(
summarize_change <- function(lyt,
vars,
variables,
var_labels = vars,
na_str = default_na_str(),
na_rm = TRUE,
nested = TRUE,
...,
show_labels = "default",
table_names = vars,
section_div = NA_character_,
...,
.stats = c("n", "mean_sd", "median", "range"),
.formats = NULL,
.labels = NULL,
.formats = c(
Melkiades marked this conversation as resolved.
Show resolved Hide resolved
n = "xx",
mean_sd = "xx.xx (xx.xx)",
mean_se = "xx.xx (xx.xx)",
median = "xx.xx",
range = "xx.xx - xx.xx",
mean_ci = "(xx.xx, xx.xx)",
median_ci = "(xx.xx, xx.xx)",
mean_pval = "xx.xx"
),
.labels = c(
mean_sd = "Mean (SD)",
mean_se = "Mean (SE)",
median = "Median",
range = "Min - Max"
),
.indent_mods = NULL) {
extra_args <- list(variables = variables, ...)
# Extra args must contain .stats, .formats, .labels, .indent_mods - sent to the analysis level
extra_args <- list(".stats" = .stats)
if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

afun <- make_afun(
a_change_from_baseline,
.stats = .stats,
.formats = .formats,
.labels = .labels,
.indent_mods = .indent_mods
# Adding additional arguments to the analysis function (depends on the specific call)
extra_args <- c(extra_args, "variables" = list(variables), ...)

# Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params)
extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
formals(a_change_from_baseline) <- c(
formals(a_change_from_baseline),
extra_args[[".additional_fun_parameters"]]
)

# Main analysis call - Nothing with .* -> these should be dedicated to the analysis function
analyze(
lyt,
vars,
afun = afun,
lyt = lyt,
vars = vars,
var_labels = var_labels,
afun = a_change_from_baseline,
na_str = na_str,
nested = nested,
extra_args = extra_args,
table_names = table_names
inclNAs = na_rm, # adds na.rm = TRUE to the analysis function
show_labels = show_labels,
table_names = table_names,
section_div = section_div
)
}
69 changes: 57 additions & 12 deletions R/utils_default_stats_formats_labels.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,57 @@ get_stats <- function(method_groups = "analyze_vars_numeric", stats_in = NULL, a
out
}

# Utility function used to separate custom stats (user-defined functions) from defaults
.split_std_from_custom_stats <- function(stats_in) {
out <- list(default_stats = NULL, custom_stats = NULL)
if (is.list(stats_in)) {
is_custom_fnc <- sapply(stats_in, is.function)
checkmate::assert_list(stats_in[is_custom_fnc], types = "function", names = "named")
out[["custom_stats"]] <- stats_in[is_custom_fnc]
out[["default_stats"]] <- unlist(stats_in[!is_custom_fnc])
} else {
out[["default_stats"]] <- stats_in
}

out
}

# Utility function to apply statistical functions
.apply_stat_functions <- function(default_stat_fnc, custom_stat_fnc_list, args_list) {
# Default checks
checkmate::assert_function(default_stat_fnc)
checkmate::assert_list(custom_stat_fnc_list, types = "function", null.ok = TRUE, names = "named")
checkmate::assert_list(args_list)

# Checking custom stats have same formals
if (!is.null(custom_stat_fnc_list)) {
fundamental_call_to_data <- names(formals(default_stat_fnc))[[1]]
for (fnc in custom_stat_fnc_list) {
if (!identical(names(formals(fnc))[[1]], fundamental_call_to_data)) {
stop(
"The first parameter of a custom statistical function needs to be the same (it can be `df` or `x`) ",
"as the default statistical function. In this case your custom function has ", names(formals(fnc))[[1]],
" as first parameter, while the default function has ", fundamental_call_to_data, "."
)
}
if (!any(names(formals(fnc)) == "...")) {
stop(
"The custom statistical function needs to have `...` as a parameter to accept additional arguments. ",
"In this case your custom function does not have `...`."
)
}
}
}

# Merging
stat_fnc_list <- c(default_stat_fnc, custom_stat_fnc_list)

# Applying
out <- unlist(lapply(stat_fnc_list, function(fnc) do.call(fnc, args = args_list)), recursive = FALSE)

out
}

#' @describeIn default_stats_formats_labels Get formats corresponding to a list of statistics.
#' To check available defaults see `tern::tern_default_formats` list.
#'
Expand Down Expand Up @@ -496,9 +547,7 @@ tern_default_labels <- c(
rate_ratio = "Adjusted Rate Ratio"
)

# To deprecate ---------

#' @describeIn default_stats_formats_labels `r lifecycle::badge("deprecated")`
#' @describeIn default_stats_formats_labels `r lifecycle::badge("stable")`
#' Quick function to retrieve default formats for summary statistics:
#' [analyze_vars()] and [analyze_vars_in_cols()] principally.
#'
Expand All @@ -513,20 +562,20 @@ tern_default_labels <- c(
#'
#' @export
summary_formats <- function(type = "numeric", include_pval = FALSE) {
lifecycle::deprecate_warn(
"0.9.6", "summary_formats()",
details = 'Use get_formats_from_stats(get_stats("analyze_vars_numeric", add_pval = include_pval)) instead'
)
met_grp <- paste0(c("analyze_vars", type), collapse = "_")
get_formats_from_stats(get_stats(met_grp, add_pval = include_pval))
}

#' @describeIn default_stats_formats_labels `r lifecycle::badge("deprecated")`
#' @describeIn default_stats_formats_labels `r lifecycle::badge("stable")`
#' Quick function to retrieve default labels for summary statistics.
#' Returns labels of descriptive statistics which are understood by `rtables`. Similar to `summary_formats`.
#'
#' @param include_pval (`flag`)\cr same as the `add_pval` argument in [get_stats()].
#'
#' @details
#' `summary_*` quick get functions for labels or formats uses `get_stats` and `get_labels_from_stats` or
#' `get_formats_from_stats` respectively to retrieve relevant information.
#'
#' @return
#' * `summary_labels` returns a named `vector` of default statistic labels for the given data type.
#'
Expand All @@ -536,10 +585,6 @@ summary_formats <- function(type = "numeric", include_pval = FALSE) {
#'
#' @export
summary_labels <- function(type = "numeric", include_pval = FALSE) {
lifecycle::deprecate_warn(
"0.9.6", "summary_formats()",
details = 'Use get_labels_from_stats(get_stats("analyze_vars_numeric", add_pval = include_pval)) instead'
)
met_grp <- paste0(c("analyze_vars", type), collapse = "_")
get_labels_from_stats(get_stats(met_grp, add_pval = include_pval))
}
65 changes: 65 additions & 0 deletions R/utils_rtables.R
Original file line number Diff line number Diff line change
Expand Up @@ -467,3 +467,68 @@ set_default_na_str <- function(na_str) {
checkmate::assert_character(na_str, len = 1, null.ok = TRUE)
options("tern_default_na_str" = na_str)
}


#' Utilities to handle extra arguments in analysis functions
#'
#' @description `r lifecycle::badge("stable")`
#' Important additional parameters, useful to modify behavior of analysis and summary
#' functions are listed in [rtables::additional_fun_params]. With these utility functions
#' we can retrieve a curated list of these parameters from the environment, and pass them
#' to the analysis functions with dedicated `...`; notice that the final `s_*` function
#' will get them through argument matching.
#'
#' @param extra_afun_params (`list`)\cr list of additional parameters (`character`) to be
#' retrieved from the environment. Curated list is present in [rtables::additional_fun_params].
#' @param add_alt_df (`logical`)\cr if `TRUE`, the function will also add `.alt_df` and `.alt_df_row`
#' parameters.
#'
#' @name util_handling_additional_fun_params
NULL

#' @describeIn util_handling_additional_fun_params Retrieve additional parameters from the environment.
#'
#' @return
#' * `retrieve_extra_afun_params` returns a list of the values of the parameters in the environment.
#'
#' @keywords internal
retrieve_extra_afun_params <- function(extra_afun_params) {
out <- list()
for (extra_param in extra_afun_params) {
out <- c(out, list(get(extra_param, envir = parent.frame())))
}
setNames(out, extra_afun_params)
}

#' @describeIn util_handling_additional_fun_params Curated list of additional parameters for
#' analysis functions. Please check [rtables::additional_fun_params] for precise descriptions.
#'
#' @return
#' * `get_additional_afun_params` returns a list of additional parameters.
#'
#' @keywords internal
get_additional_afun_params <- function(add_alt_df = FALSE) {
Melkiades marked this conversation as resolved.
Show resolved Hide resolved
out_list <- list(
.N_col = integer(),
.N_total = integer(),
.N_row = integer(),
.df_row = data.frame(),
.var = character(),
.ref_group = character(),
.ref_full = vector(mode = "numeric"),
.in_ref_col = logical(),
.spl_context = data.frame(),
.all_col_exprs = vector(mode = "expression"),
.all_col_counts = vector(mode = "integer")
)

if (isTRUE(add_alt_df)) {
out_list <- c(
out_list,
.alt_df_row = data.frame(),
.alt_df = data.frame()
)
}

out_list
}
Loading
Loading