diff --git a/DESCRIPTION b/DESCRIPTION index fbed0782dd..93106d81ca 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -167,3 +167,4 @@ Collate: 'utils_factor.R' 'utils_grid.R' 'utils_rtables.R' + 'utils_split_funs.R' diff --git a/NAMESPACE b/NAMESPACE index f09604b8be..0837a52d8a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -217,7 +217,9 @@ export(has_fraction_in_cols) export(has_fractions_difference) export(imputation_rule) export(keep_content_rows) +export(keep_level_order) export(keep_rows) +export(level_order) export(logistic_regression_cols) export(logistic_summary_by_flag) export(month2day) @@ -235,6 +237,7 @@ export(prop_strat_wilson) export(prop_wald) export(prop_wilson) export(reapply_varlabels) +export(ref_group_position) export(s_compare) export(s_count_occurrences) export(s_count_occurrences_by_grade) diff --git a/NEWS.md b/NEWS.md index 6a44910ec0..4b3892a2f2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # tern 0.9.2.9001 +### New Features +* Added `ref_group_position` function to place the reference group facet last, first or at a certain position. +* Added `keep_level_order` split function to retain original order of levels in a split. +* Added `level_order` split function to reorder manually the levels. + ### Miscellaneous * Specified minimal version of package dependencies. diff --git a/R/utils_split_funs.R b/R/utils_split_funs.R new file mode 100644 index 0000000000..131f41a882 --- /dev/null +++ b/R/utils_split_funs.R @@ -0,0 +1,185 @@ +#' Custom Split Functions +#' +#' @description `r lifecycle::badge("stable")` +#' +#' Collection of useful functions that are expanding on the core list of functions +#' provided by `rtables`. See [rtables::custom_split_funs] and [rtables::make_split_fun()] +#' for more information on how to make a custom split function. All these functions +#' work with [split_rows_by()] argument `split_fun` to modify the way the split +#' happens. For other split functions, consider consulting [`rtables::split_funcs`]. +#' +#' @inheritParams rtables::split_funcs +#' @param .spl_context (`data.frame`) \cr detailed description of the current split (or subsetting). +#' Please consider consulting [rtables::spl_context] for more information. +#' +#' @seealso [rtables::make_split_fun()] +#' +#' @name utils_split_funs +NULL + +#' @describeIn utils_split_funs split function to place reference group facet at a specific position +#' during post-processing stage. +#' +#' @param position (`string` or `integer`)\cr should it be `"first"` or `"last"` or in a specific position? +#' +#' @return +#' * `ref_group_position` returns an utility function that puts the reference group +#' as first, last or at a certain position and needs to be assigned to `split_fun`. +#' +#' @examples +#' library(dplyr) +#' +#' dat <- data.frame( +#' x = factor(letters[1:5], levels = letters[5:1]), +#' y = 1:5 +#' ) +#' +#' # With rtables layout functions +#' basic_table() %>% +#' split_cols_by("x", ref_group = "c", split_fun = ref_group_position("last")) %>% +#' analyze("y") %>% +#' build_table(dat) +#' +#' # With tern layout funcitons +#' adtte_f <- tern_ex_adtte %>% +#' filter(PARAMCD == "OS") %>% +#' mutate( +#' AVAL = day2month(AVAL), +#' is_event = CNSR == 0 +#' ) +#' +#' basic_table() %>% +#' split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position("first")) %>% +#' add_colcounts() %>% +#' surv_time( +#' vars = "AVAL", +#' var_labels = "Survival Time (Months)", +#' is_event = "is_event", +#' ) %>% +#' build_table(df = adtte_f) +#' +#' basic_table() %>% +#' split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position(2)) %>% +#' add_colcounts() %>% +#' surv_time( +#' vars = "AVAL", +#' var_labels = "Survival Time (Months)", +#' is_event = "is_event", +#' ) %>% +#' build_table(df = adtte_f) +#' +#' @export +ref_group_position <- function(position = "first") { + make_split_fun( + post = list( + function(splret, spl, fulldf) { + if (!"ref_group_value" %in% slotNames(spl)) { + stop("Reference group is undefined.") + } + + spl_var <- rtables:::spl_payload(spl) + fulldf[[spl_var]] <- factor(fulldf[[spl_var]]) + init_lvls <- levels(fulldf[[spl_var]]) + + if (!all(names(splret$values) %in% init_lvls)) { + stop("This split function does not work with combination facets.") + } + + ref_group_pos <- which(init_lvls == rtables:::spl_ref_group(spl)) + pos_choices <- c("first", "last") + if (checkmate::test_choice(position, pos_choices) && position == "first") { + pos <- 0 + } else if (checkmate::test_choice(position, pos_choices) && position == "last") { + pos <- length(init_lvls) + } else if (checkmate::test_int(position, lower = 1, upper = length(init_lvls))) { + pos <- position - 1 + } else { + stop("Wrong input for ref group position. It must be 'first', 'last', or a integer.") + } + + reord_lvls <- append(init_lvls[-ref_group_pos], init_lvls[ref_group_pos], after = pos) + ord <- match(reord_lvls, names(splret$values)) + + make_split_result( + splret$values[ord], + splret$datasplit[ord], + splret$labels[ord] + ) + } + ) + ) +} +#' @describeIn utils_split_funs split function to keep original order of factor +#' levels in the split. +#' +#' @return +#' * `keep_level_order` returns an utility function that keeps the original levels'. +#' It needs to be assigned to `split_fun`. +#' +#' @examples +#' # keep_level_order -------- +#' # Even if default would bring ref_group first, the original order puts it last +#' basic_table() %>% +#' split_cols_by("Species", ref_group = "virginica", split_fun = keep_level_order) %>% +#' analyze("Sepal.Length") %>% +#' build_table(iris) +#' +#' @export +keep_level_order <- make_split_fun( + post = list( + function(splret, spl, fulldf, ...) { + ord <- order(names(splret$values)) + make_split_result( + splret$values[ord], + splret$datasplit[ord], + splret$labels[ord] + ) + } + ) +) +#' @describeIn utils_split_funs split function to change level order based on a `integer` +#' vector or a `character` vector that represent the split variable's factor levels. +#' +#' @param order (`character` or `integer`)\cr vector of ordering indexes for the split facets. +#' +#' @return +#' * `keep_level_order` returns an utility function that changes the original levels' order, +#' depending on input `order` and split levels. +#' +#' @examples +#' # level_order -------- +#' # Even if default would bring ref_group first, the original order puts it last +#' basic_table() %>% +#' split_cols_by("Species", split_fun = level_order(c(1, 3, 2))) %>% +#' analyze("Sepal.Length") %>% +#' build_table(iris) +#' +#' # character vector +#' new_order <- level_order(levels(iris$Species)[c(1, 3, 2)]) +#' basic_table() %>% +#' split_cols_by("Species", ref_group = "virginica", split_fun = new_order) %>% +#' analyze("Sepal.Length") %>% +#' build_table(iris) +#' +#' @export +level_order <- function(order) { + make_split_fun( + post = list( + function(splret, spl, fulldf, ...) { + if (checkmate::test_integerish(order)) { + checkmate::assert_integerish(order, lower = 1, upper = length(splret$values)) + ord <- order + } else { + checkmate::assert_character(order, len = length(splret$values)) + checkmate::assert_set_equal(order, names(splret$values), ordered = FALSE) + ord <- match(order, names(splret$values)) + } + make_split_result( + splret$values[ord], + splret$datasplit[ord], + splret$labels[ord] + ) + } + ) + ) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 49d7ea7614..22388323ce 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -114,6 +114,7 @@ reference: - split_cols_by_groups - to_string_matrix - groups_list_to_df + - utils_split_funs - title: rtables Formatting Functions desc: These functions provide customized formatting rules to work with the diff --git a/man/utils_split_funs.Rd b/man/utils_split_funs.Rd new file mode 100644 index 0000000000..9cc703228b --- /dev/null +++ b/man/utils_split_funs.Rd @@ -0,0 +1,147 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_split_funs.R +\name{utils_split_funs} +\alias{utils_split_funs} +\alias{ref_group_position} +\alias{keep_level_order} +\alias{level_order} +\title{Custom Split Functions} +\usage{ +ref_group_position(position = "first") + +keep_level_order( + df, + spl, + vals = NULL, + labels = NULL, + trim = FALSE, + .spl_context +) + +level_order(order) +} +\arguments{ +\item{position}{(\code{string} or \code{integer})\cr should it be \code{"first"} or \code{"last"} or in a specific position?} + +\item{df}{dataset (\code{data.frame} or \code{tibble})} + +\item{spl}{A Split object defining a partitioning or analysis/tabulation of +the data.} + +\item{vals}{ANY. For internal use only.} + +\item{labels}{character. Labels to use for the remaining levels instead of +the existing ones.} + +\item{trim}{logical(1). Should splits corresponding with 0 observations be +kept when tabulating.} + +\item{.spl_context}{(\code{data.frame}) \cr detailed description of the current split (or subsetting). +Please consider consulting \link[rtables:spl_context]{rtables::spl_context} for more information.} + +\item{order}{(\code{character} or \code{integer})\cr vector of ordering indexes for the split facets.} +} +\value{ +\itemize{ +\item \code{ref_group_position} returns an utility function that puts the reference group +as first, last or at a certain position and needs to be assigned to \code{split_fun}. +} + +\itemize{ +\item \code{keep_level_order} returns an utility function that keeps the original levels'. +It needs to be assigned to \code{split_fun}. +} + +\itemize{ +\item \code{keep_level_order} returns an utility function that changes the original levels' order, +depending on input \code{order} and split levels. +} +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} + +Collection of useful functions that are expanding on the core list of functions +provided by \code{rtables}. See \link[rtables:custom_split_funs]{rtables::custom_split_funs} and \code{\link[rtables:make_split_fun]{rtables::make_split_fun()}} +for more information on how to make a custom split function. All these functions +work with \code{\link[=split_rows_by]{split_rows_by()}} argument \code{split_fun} to modify the way the split +happens. For other split functions, consider consulting \code{\link[rtables:split_funcs]{rtables::split_funcs}}. +} +\section{Functions}{ +\itemize{ +\item \code{ref_group_position()}: split function to place reference group facet at a specific position +during post-processing stage. + +\item \code{keep_level_order()}: split function to keep original order of factor +levels in the split. + +\item \code{level_order()}: split function to change level order based on a \code{integer} +vector or a \code{character} vector that represent the split variable's factor levels. + +}} +\examples{ +library(dplyr) + +dat <- data.frame( + x = factor(letters[1:5], levels = letters[5:1]), + y = 1:5 +) + +# With rtables layout functions +basic_table() \%>\% + split_cols_by("x", ref_group = "c", split_fun = ref_group_position("last")) \%>\% + analyze("y") \%>\% + build_table(dat) + +# With tern layout funcitons +adtte_f <- tern_ex_adtte \%>\% + filter(PARAMCD == "OS") \%>\% + mutate( + AVAL = day2month(AVAL), + is_event = CNSR == 0 + ) + +basic_table() \%>\% + split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position("first")) \%>\% + add_colcounts() \%>\% + surv_time( + vars = "AVAL", + var_labels = "Survival Time (Months)", + is_event = "is_event", + ) \%>\% + build_table(df = adtte_f) + +basic_table() \%>\% + split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position(2)) \%>\% + add_colcounts() \%>\% + surv_time( + vars = "AVAL", + var_labels = "Survival Time (Months)", + is_event = "is_event", + ) \%>\% + build_table(df = adtte_f) + +# keep_level_order -------- +# Even if default would bring ref_group first, the original order puts it last +basic_table() \%>\% + split_cols_by("Species", ref_group = "virginica", split_fun = keep_level_order) \%>\% + analyze("Sepal.Length") \%>\% + build_table(iris) + +# level_order -------- +# Even if default would bring ref_group first, the original order puts it last +basic_table() \%>\% + split_cols_by("Species", split_fun = level_order(c(1, 3, 2))) \%>\% + analyze("Sepal.Length") \%>\% + build_table(iris) + +# character vector +new_order <- level_order(levels(iris$Species)[c(1, 3, 2)]) +basic_table() \%>\% + split_cols_by("Species", ref_group = "virginica", split_fun = new_order) \%>\% + analyze("Sepal.Length") \%>\% + build_table(iris) + +} +\seealso{ +\code{\link[rtables:make_split_fun]{rtables::make_split_fun()}} +} diff --git a/tests/testthat/_snaps/utils_split_fun.md b/tests/testthat/_snaps/utils_split_fun.md new file mode 100644 index 0000000000..9a8269e0c0 --- /dev/null +++ b/tests/testthat/_snaps/utils_split_fun.md @@ -0,0 +1,77 @@ +# analyze_vars works as expected with ref_group_position last split fun + + Code + res[3:4, ] + Output + ARM A ARM B ARM C + (N=69) (N=73) (N=58) + ———————————————————————————————————————————————— + Mean (SD) 34.1 (6.8) 35.8 (7.1) 36.1 (7.4) + Median 32.8 35.4 36.2 + +# compare_vars works as expected with ref_group first split fun + + Code + res[1:2, ] + Output + ARM B ARM A ARM C + (N=73) (N=69) (N=58) + ———————————————————————————————————————————————— + n 73 69 58 + Mean (SD) 35.8 (7.1) 34.1 (6.8) 36.1 (7.4) + +# summarize_ancova works as expected with ref_group position split fun + + Code + res[1:2, ] + Output + ARM A ARM B ARM C + (N=69) (N=73) (N=58) + ———————————————————————————————————————————————— + Unadjusted comparison + n 69 73 58 + +# binary endpoint layouts work as expected with ref_group_position last split fun + + Code + res + Output + A: Drug X C: Combination B: Placebo + (N=69) (N=58) (N=73) + ——————————————————————————————————————————————————————————————————————————————————————— + Odds Ratio (95% CI) 2.47 (1.22 - 5.01) 2.29 (1.10 - 4.78) + Difference in Response rate (%) 20.5 19.0 + 95% CI (Wald, with correction) (3.6, 37.3) (1.2, 36.8) + p-value (Chi-Squared Test) 0.0113 0.0263 + +# time to event layouts works as expected with ref_group_position last split fun + + Code + res + Output + ARM A ARM C ARM B + (N=69) (N=58) (N=73) + ——————————————————————————————————————————————————————————————————————————————————— + CoxPH + p-value (log-rank) 0.0159 0.1820 + Hazard Ratio 0.58 1.31 + 95% CI (0.37, 0.91) (0.88, 1.95) + 6 Months + Patients remaining at risk 49 39 46 + Event Free Rate (%) 85.29 71.87 71.55 + 95% CI (76.38, 94.19) (60.15, 83.58) (60.96, 82.14) + Difference in Event Free Rate 13.74 0.31 + 95% CI (-0.10, 27.57) (-15.47, 16.10) + p-value (Z-test) 0.0517 0.9688 + +# summarize_ancova works as expected with ref_group_position last split fun + + Code + res + Output + ARM A ARM C ARM B + (N=69) (N=58) (N=73) + ————————————————————————————————————————————————————— + Unadjusted rate (per year) + Rate 8.2061 7.8551 9.1554 + diff --git a/tests/testthat/test-utils_split_fun.R b/tests/testthat/test-utils_split_fun.R new file mode 100644 index 0000000000..7d48042bde --- /dev/null +++ b/tests/testthat/test-utils_split_fun.R @@ -0,0 +1,152 @@ +testthat::test_that("ref_group_position last split fun gives error when ref group is undefined", { + lyt <- basic_table() %>% + split_cols_by(var = "ARMCD", split_fun = ref_group_position("last")) %>% + analyze("AGE") + + testthat::expect_error(build_table(lyt, df = tern_ex_adsl)) +}) + +testthat::test_that("analyze_vars works as expected with ref_group_position last split fun", { + # Default behavior + result <- basic_table() %>% + split_cols_by(var = "ARMCD", ref_group = "ARM C") %>% + add_colcounts() %>% + analyze_vars(c("AGE", "STRATA2")) %>% + build_table(df = tern_ex_adsl) + + res <- testthat::expect_silent(result) + testthat::expect_identical(names(res), c("ARM C", "ARM A", "ARM B")) + + result <- basic_table() %>% + split_cols_by(var = "ARMCD", ref_group = "ARM C", split_fun = ref_group_position("last")) %>% + add_colcounts() %>% + analyze_vars(c("AGE", "STRATA2")) %>% + build_table(df = tern_ex_adsl) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res[3:4, ]) + + # keep_level_order + result_ordered <- basic_table() %>% + split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = keep_level_order) %>% + add_colcounts() %>% + analyze_vars(c("AGE", "STRATA2")) %>% + build_table(df = tern_ex_adsl) + + res_ordered <- testthat::expect_silent(result_ordered) + + testthat::expect_identical(toString(res), toString(res_ordered)) +}) + +testthat::test_that("compare_vars works as expected with ref_group first split fun", { + result <- basic_table() %>% + split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position("first")) %>% + add_colcounts() %>% + compare_vars("AGE") %>% + build_table(df = tern_ex_adsl) + + res <- testthat::expect_silent(result) + testthat::expect_identical(names(res), c("ARM B", "ARM A", "ARM C")) + testthat::expect_snapshot(res[1:2, ]) +}) + +testthat::test_that("summarize_ancova works as expected with ref_group position split fun", { + result <- basic_table() %>% + split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position(2)) %>% + add_colcounts() %>% + summarize_ancova( + vars = "BMRKR1", + variables = list(arm = "ARM"), + var_labels = "Unadjusted comparison", + conf_level = 0.95 + ) %>% + build_table(tern_ex_adsl) + + res <- testthat::expect_silent(result) + testthat::expect_identical(names(res), c("ARM A", "ARM B", "ARM C")) + testthat::expect_snapshot(res[1:2, ]) +}) + +testthat::test_that("binary endpoint layouts work as expected with ref_group_position last split fun", { + adrs_f <- tern_ex_adrs %>% + dplyr::filter(PARAMCD == "INVET") %>% + dplyr::mutate(is_rsp = AVALC %in% c("CR", "PR")) + + result <- basic_table() %>% + split_cols_by(var = "ARM", ref_group = "B: Placebo", split_fun = ref_group_position("last")) %>% + add_colcounts() %>% + estimate_odds_ratio(vars = "is_rsp") %>% + estimate_proportion_diff(vars = "is_rsp", table_names = "prop_diff") %>% + test_proportion_diff(vars = "is_rsp", table_names = "test_prop_diff") %>% + build_table(adrs_f) + + res <- testthat::expect_silent(result) + testthat::expect_identical(names(res), c("A: Drug X", "C: Combination", "B: Placebo")) + testthat::expect_snapshot(res) +}) + +testthat::test_that("time to event layouts works as expected with ref_group_position last split fun", { + adtte_f <- tern_ex_adtte %>% + dplyr::filter(PARAMCD == "PFS") %>% + dplyr::mutate( + AVAL = day2month(AVAL), + is_event = CNSR == 0 + ) + + result <- basic_table() %>% + split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position("last")) %>% + add_colcounts() %>% + coxph_pairwise( + vars = "AVAL", + is_event = "is_event" + ) %>% + surv_timepoint( + vars = "AVAL", + var_labels = "Months", + time_point = 6, + is_event = "is_event", + method = "both" + ) %>% + build_table(adtte_f) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + +testthat::test_that("summarize_ancova works as expected with ref_group_position last split fun", { + anl <- tern_ex_adtte %>% filter(PARAMCD == "TNE") + + result <- basic_table() %>% + split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position("last")) %>% + add_colcounts() %>% + summarize_glm_count( + vars = "AVAL", + variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = NULL), + conf_level = 0.95, + distribution = "poisson", + rate_mean_method = "emmeans", + var_labels = "Unadjusted rate (per year)", + .stats = c("rate"), + .labels = c(rate = "Rate") + ) %>% + build_table(anl) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + +testthat::test_that("level_order works for integerish and characters", { + tbl_int <- basic_table() %>% + split_cols_by("Species", split_fun = level_order(c(1, 3, 2))) %>% + analyze("Sepal.Length") %>% + build_table(iris) + + # character vector + new_order <- level_order(levels(iris$Species)[c(1, 3, 2)]) + tbl_chr <- basic_table() %>% + split_cols_by("Species", ref_group = "virginica", split_fun = new_order) %>% + analyze("Sepal.Length") %>% + build_table(iris) + + testthat::expect_identical(toString(tbl_int), toString(tbl_chr)) +})