diff --git a/NAMESPACE b/NAMESPACE index a53328608..412daa2b5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,7 @@ export("header_section_div<-") export("horizontal_sep<-") export("indent_mod<-") export("label_at_path<-") +export("obj_stat_names<-") export("ref_index<-") export("ref_symbol<-") export("row_footnotes<-") @@ -125,6 +126,7 @@ export(manual_cols) export(no_colinfo) export(non_ref_rcell) export(obj_avar) +export(obj_stat_names) export(pag_tt_indices) export(paginate_table) export(path_enriched_df) @@ -221,6 +223,7 @@ exportMethods("obj_format<-") exportMethods("obj_label<-") exportMethods("obj_na_str<-") exportMethods("obj_name<-") +exportMethods("obj_stat_names<-") exportMethods("prov_footer<-") exportMethods("ref_index<-") exportMethods("ref_symbol<-") @@ -267,6 +270,7 @@ exportMethods(obj_format) exportMethods(obj_label) exportMethods(obj_na_str) exportMethods(obj_name) +exportMethods(obj_stat_names) exportMethods(prov_footer) exportMethods(rbind) exportMethods(rbind2) diff --git a/NEWS.md b/NEWS.md index 07d3c5060..75b750512 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,8 @@ * Experimental pagination is now possible in `tt_as_flextable()` and `export_as_docx()`. * Added handling of widths in `tt_as_flextable()`. Now it is possible to change column widths for `.docx` exports. * Initialized vignette about quality control outputs of `as_result_df()`. - * Initialized parameter `make_ard` output for single-line statistical outputs. + * Completed parameter `make_ard` output for single-line statistical outputs. + * Added `stat_names` to `rcell()` to be used by `as_result_df(make_ard = TRUE)`. ### Miscellaneous * Split `docx` document generation to the new package [`rtables.officer`](https://github.com/insightsengineering/rtables.officer). diff --git a/R/00tabletrees.R b/R/00tabletrees.R index 26eac6a83..2f5ebfab1 100644 --- a/R/00tabletrees.R +++ b/R/00tabletrees.R @@ -1933,7 +1933,7 @@ RefFootnote <- function(note, index = NA_integer_, symbol = NA_character_) { ## indent_mod: indent modifier to be used for parent row CellValue <- function(val, format = NULL, colspan = 1L, label = NULL, indent_mod = NULL, footnotes = NULL, - align = NULL, format_na_str = NULL) { + align = NULL, format_na_str = NULL, stat_names = NA_character_) { if (is.null(colspan)) { colspan <- 1L } @@ -1957,6 +1957,7 @@ CellValue <- function(val, format = NULL, colspan = 1L, label = NULL, indent_mod = indent_mod, footnotes = footnotes, align = align, format_na_str = format_na_str, + stat_names = stat_names, class = "CellValue" ) ret diff --git a/R/colby_constructors.R b/R/colby_constructors.R index d31472a4e..3cda0e84a 100644 --- a/R/colby_constructors.R +++ b/R/colby_constructors.R @@ -1492,7 +1492,8 @@ setMethod( } ret <- rcell(cnt, format = format, - label = label + label = label, + stat_names = "n" ) ret } @@ -1515,11 +1516,11 @@ setMethod( cnt <- sum(!is.na(df)) } ## the formatter does the *100 so we don't here. - ## TODO name elements of this so that ARD generation has access to them - ## ret <- rcell(c(n = cnt, pct = cnt / .N_col), + ## Elements are named with stat_names so that ARD generation has access to them ret <- rcell(c(cnt, cnt / .N_col), format = format, - label = label + label = label, + stat_names = c("n", "p") ) ret } diff --git a/R/tree_accessors.R b/R/tree_accessors.R index 462feb3d9..9b3b3b573 100644 --- a/R/tree_accessors.R +++ b/R/tree_accessors.R @@ -4322,3 +4322,31 @@ setMethod( obj } ) + +# stat_names for ARD ----------------------------------------------------------- +# +#' @rdname int_methods +#' @export +setGeneric("obj_stat_names", function(obj) standardGeneric("obj_stat_names")) +# +#' @rdname int_methods +#' @export +setGeneric("obj_stat_names<-", function(obj, value) standardGeneric("obj_stat_names<-")) + +#' @rdname int_methods +#' @export +setMethod("obj_stat_names<-", "CellValue", function(obj, value) { + attr(obj, "stat_names") <- value + obj +}) + +#' @rdname int_methods +#' @export +setMethod("obj_stat_names", "CellValue", function(obj) attr(obj, "stat_names")) + +#' @rdname int_methods +#' @export +setMethod( + "obj_stat_names", "RowsVerticalSection", + function(obj) lapply(obj, obj_stat_names) +) diff --git a/R/tt_afun_utils.R b/R/tt_afun_utils.R index 5ebd80992..585deffd8 100644 --- a/R/tt_afun_utils.R +++ b/R/tt_afun_utils.R @@ -10,11 +10,18 @@ #' @param label (`string` or `NULL`)\cr label. If non-`NULL`, it will be looked at when determining row labels. #' @param colspan (`integer(1)`)\cr column span value. #' @param footnotes (`list` or `NULL`)\cr referential footnote messages for the cell. +#' @param stat_names (`character` or `NA`)\cr names for the statistics in the cell. It can be a vector of strings. +#' If `NA`, statistic names are not specified. #' #' @inherit CellValue return #' #' @note Currently column spanning is only supported for defining header structure. #' +#' @examples +#' rcell(1, format = "xx.x") +#' rcell(c(1, 2), format = c("xx - xx")) +#' rcell(c(1, 2), stat_names = c("Rand1", "Rand2")) +#' #' @rdname rcell #' @export rcell <- function(x, @@ -24,7 +31,9 @@ rcell <- function(x, indent_mod = NULL, footnotes = NULL, align = NULL, - format_na_str = NULL) { + format_na_str = NULL, + stat_names = NULL) { + checkmate::assert_character(stat_names, null.ok = TRUE) if (!is.null(align)) { check_aligns(align) } @@ -47,6 +56,9 @@ rcell <- function(x, if (!is.null(format_na_str)) { obj_na_str(x) <- format_na_str } + if (!is.null(stat_names)) { + obj_stat_names(x) <- stat_names + } ret <- x } else { if (is.null(label)) { @@ -66,7 +78,8 @@ rcell <- function(x, label = label, indent_mod = indent_mod, footnotes = footnotes, - format_na_str = format_na_str + format_na_str = format_na_str, + stat_names = stat_names %||% NA_character_ ) # RefFootnote(footnote)) } if (!is.null(align)) { @@ -113,6 +126,9 @@ non_ref_rcell <- function(x, is_ref, format = NULL, colspan = 1L, #' @param .aligns (`character` or `NULL`)\cr alignments for the cells. Standard for `NULL` is `"center"`. #' See [formatters::list_valid_aligns()] for currently supported alignments. #' @param .format_na_strs (`character` or `NULL`)\cr NA strings for the cells. +#' @param .stat_names (`list`)\cr names for the statistics in the cells. +#' It can be a vector of values. If `list(NULL)`, statistic names are not specified and will +#' appear as `NA`. #' #' @note In post-processing, referential footnotes can also be added using row and column #' paths with [`fnotes_at_path<-`]. @@ -126,6 +142,12 @@ non_ref_rcell <- function(x, is_ref, format = NULL, colspan = 1L, #' in_rows(1, 2, 3, .names = c("a", "b", "c")) #' in_rows(1, 2, 3, .labels = c("a", "b", "c")) #' in_rows(1, 2, 3, .names = c("a", "b", "c"), .labels = c("AAA", "BBB", "CCC")) +#' in_rows( +#' .list = list(a = c(NA, NA)), +#' .formats = "xx - xx", +#' .format_na_strs = list(c("asda", "lkjklj")) +#' ) +#' in_rows(.list = list(a = c(NA, NA)), .format_na_strs = c("asda", "lkjklj")) #' #' in_rows(.list = list(a = 1, b = 2, c = 3)) #' in_rows(1, 2, .list = list(3), .names = c("a", "b", "c")) @@ -150,7 +172,8 @@ in_rows <- function(..., .list = NULL, .names = NULL, .cell_footnotes = list(NULL), .row_footnotes = list(NULL), .aligns = NULL, - .format_na_strs = NULL) { + .format_na_strs = NULL, + .stat_names = list(NULL)) { if (is.function(.formats)) { .formats <- list(.formats) } @@ -172,11 +195,12 @@ in_rows <- function(..., .list = NULL, .names = NULL, length(.formats) > 0 || length(.names) > 0 || length(.indent_mods) > 0 || - length(.format_na_strs) > 0 + length(.format_na_strs) > 0 || + (!all(is.na(.stat_names)) && length(.stat_names) > 0) ) { stop( "in_rows got 0 rows but length >0 of at least one of ", - ".labels, .formats, .names, .indent_mods, .format_na_strs. ", + ".labels, .formats, .names, .indent_mods, .format_na_strs, .stat_names. ", "Does your analysis/summary function handle the 0 row ", "df/length 0 x case?" ) @@ -208,11 +232,13 @@ in_rows <- function(..., .list = NULL, .names = NULL, if (is.null(.aligns)) { .aligns <- list(NULL) } + l2 <- mapply(rcell, x = l, format = .formats, footnotes = .cell_footnotes %||% list(NULL), align = .aligns, format_na_str = .format_na_strs %||% list(NULL), + stat_names = .stat_names %||% list(NULL), SIMPLIFY = FALSE ) } diff --git a/R/tt_as_df.R b/R/tt_as_df.R index 409ad660a..df1727fe3 100644 --- a/R/tt_as_df.R +++ b/R/tt_as_df.R @@ -63,21 +63,9 @@ as_result_df <- function(tt, spec = NULL, } if (is.null(spec)) { - raw_cvals <- cell_values(tt) - ## if the table has one row and multiple columns, sometimes the cell values returns a list of the cell values - ## rather than a list of length 1 representing the single row. This is bad but may not be changeable - ## at this point. - if (nrow(tt) == 1 && length(raw_cvals) > 1) { - raw_cvals <- list(raw_cvals) - } - - # Flatten the list of lists (rows) of cell values into a data frame - cellvals <- as.data.frame(do.call(rbind, raw_cvals)) - row.names(cellvals) <- NULL - - if (nrow(tt) == 1 && ncol(tt) == 1) { - colnames(cellvals) <- names(raw_cvals) - } + # raw values + rawvals <- cell_values(tt) + cellvals <- .make_df_from_raw_data(rawvals, nr = nrow(tt), nc = ncol(tt)) if (data_format %in% c("strings", "numeric")) { # we keep previous calculations to check the format of the data @@ -104,9 +92,6 @@ as_result_df <- function(tt, spec = NULL, } rdf <- make_row_df(tt) - cinfo_df <- col_info(tt) - ci_coltree <- coltree(cinfo_df) - column_split_names <- .get_column_split_name(ci_coltree) # used only in make_ard df <- rdf[, c("name", "label", "abs_rownumber", "path", "reprint_inds", "node_class")] # Removing initial root elements from path (out of the loop -> right maxlen) @@ -142,6 +127,21 @@ as_result_df <- function(tt, spec = NULL, ) } + # Fix for content rows analysis variable label + if (any(ret$node_class == "ContentRow")) { + where_to <- which(ret$node_class == "ContentRow") + for (crow_i in where_to) { + # For each Content row, extract the row split that is used as analysis variable + tmp_tbl <- ret[crow_i, , drop = FALSE] + na_labels <- lapply(tmp_tbl, is.na) %>% unlist(use.names = FALSE) + group_to_take <- colnames(tmp_tbl[, !na_labels]) + group_to_take <- group_to_take[grep("^group[0-9]+$", group_to_take)] + + # Final assignment of each Content row to its correct analysis label + ret$avar_name[crow_i] <- ret[[group_to_take[length(group_to_take)]]][crow_i] + } + } + # If we want to expand colnames if (expand_colnames) { col_name_structure <- .get_formatted_colnames(clayout(tt)) @@ -176,27 +176,88 @@ as_result_df <- function(tt, spec = NULL, # ARD part for one stat per row if (make_ard) { + cinfo_df <- col_info(tt) + ci_coltree <- coltree(cinfo_df) + column_split_names <- .get_column_split_name(ci_coltree) # used only in make_ard + # Unnecessary columns ret_tmp <- ret[, !colnames(ret) %in% c("row_num", "is_group_summary", "node_class")] + n_row_groups <- sapply(colnames(ret), function(x) { + if (grepl("^group", x)) { + # Extract the number after "group" using regex + return(as.numeric(sub("group(\\d+).*", "\\1", x))) + } else { + return(0) # Return 0 if no "group" is found + } + }) %>% + max() # Indexes of real columns (visible in the output, but no row names) only_col_indexes <- seq(which(colnames(ret_tmp) == "label_name") + 1, ncol(ret_tmp)) # Core row names col_label_rows <- grepl("", ret_tmp$avar_name)) core_row_names <- ret_tmp[!col_label_rows, -only_col_indexes] + colnames_to_rename <- colnames(core_row_names) %in% c("avar_name", "row_name", "label_name") + # instead of avar_name row_name label_name ("variable_label" is not present in ARDs) + colnames(core_row_names)[colnames_to_rename] <- c("variable", "variable_level", "variable_label") + + # Adding stats_names if present + raw_stat_names <- .get_stat_names_from_table(tt, add_labrows = keep_label_rows) + cell_stat_names <- .make_df_from_raw_data(raw_stat_names, nr = nrow(tt), nc = ncol(tt)) # Moving colnames to rows (flattening) ret_w_cols <- NULL + # Looping on statistical columns for (col_i in only_col_indexes) { - tmp_ret_by_col_i <- cbind( - group1 = column_split_names[[ret_tmp[, col_i][[1]]]], - group1_level = ret_tmp[, col_i][[1]], - # instead of avar_name row_name label_name ("variable_label" is not present in ARDs) - setNames(core_row_names, c("variable", "variable_level", "variable_label")), # missing stat_name xxx - stat = I(setNames(ret_tmp[!col_label_rows, col_i], NULL)) + # Making row splits into row specifications (group1 group1_level) + current_col_split_level <- unlist(ret_tmp[seq_len(number_of_col_splits), col_i], use.names = FALSE) + flattened_cols_names <- .c_alternated(column_split_names[[1]][[1]], current_col_split_level) + names(flattened_cols_names) <- .c_alternated( + paste0("group", seq_along(column_split_names[[1]][[1]]) + n_row_groups), + paste0("group", seq_along(current_col_split_level) + n_row_groups, "_level") ) + if (n_row_groups > 0) { + tmp_core_ret_by_col_i <- cbind( + core_row_names[, seq(n_row_groups * 2)], + t(data.frame(flattened_cols_names)), + core_row_names[, -seq(n_row_groups * 2)], + row.names = NULL + ) + } else { + tmp_core_ret_by_col_i <- cbind( + t(data.frame(flattened_cols_names)), + core_row_names, + row.names = NULL + ) + } + + # retrieving stat names and stats + stat_name <- setNames(cell_stat_names[, col_i - min(only_col_indexes) + 1, drop = TRUE], NULL) + stat <- setNames(ret_tmp[!col_label_rows, col_i, drop = TRUE], NULL) + necessary_stat_lengths <- sapply(stat, length) + + # Truncating or adding NA if stat names has more or less elements than stats + stat_name <- lapply(seq_along(stat_name), function(sn_i) { + stat_name[[sn_i]][seq_len(necessary_stat_lengths[sn_i])] + }) + + # unnesting stat_name and stat + tmp_ret_by_col_i <- NULL + for (row_i in seq_along(stat)) { + tmp_ret_by_col_i <- rbind( + tmp_ret_by_col_i, + cbind( + tmp_core_ret_by_col_i[row_i, ], + stat_name = stat_name[[row_i]], + stat = stat[[row_i]], + row.names = NULL + ) + ) + } + ret_w_cols <- rbind(ret_w_cols, tmp_ret_by_col_i) } @@ -220,12 +281,46 @@ as_result_df <- function(tt, spec = NULL, out } +# Helper function used to structure the raw values into a dataframe +.make_df_from_raw_data <- function(raw_vals, nr, nc) { + ## if the table has one row and multiple columns, sometimes the cell values returns a list of the cell values + ## rather than a list of length 1 representing the single row. This is bad but may not be changeable + ## at this point. + if (nr == 1 && length(raw_vals) > 1) { + raw_vals <- list(raw_vals) + } + + # Flatten the list of lists (rows) of cell values into a data frame + cellvals <- as.data.frame(do.call(rbind, raw_vals)) + row.names(cellvals) <- NULL + + if (nr == 1 && nc == 1) { + colnames(cellvals) <- names(raw_vals) + } + + cellvals +} + +# Is there a better alternative? +.c_alternated <- function(v1, v2) { + unlist(mapply(c, v1, v2, SIMPLIFY = FALSE)) +} + +# Amazing helper function to get the statistic names from row cells! +.get_stat_names_from_table <- function(tt, add_labrows = FALSE) { + # omit_labrows # omit label rows + rows <- collect_leaves(tt, incl.cont = TRUE, add.labrows = add_labrows) + lapply(rows, function(ri) { + lapply(row_cells(ri), obj_stat_names) + }) +} + # Helper function to get column split names .get_column_split_name <- function(ci_coltree) { # ci stands for column information if (is(ci_coltree, "LayoutAxisTree")) { kids <- tree_children(ci_coltree) - return(unlist(lapply(kids, .get_column_split_name))) + return(lapply(kids, .get_column_split_name)) } sapply(pos_splits(tree_pos(ci_coltree)), spl_payload) } @@ -270,7 +365,7 @@ make_result_df_md_colnames <- function(maxlen) { spllen <- floor((maxlen - 2) / 2) ret <- character() if (spllen > 0) { - ret <- paste(c("spl_var", "spl_value"), rep(seq_len(spllen), rep(2, spllen)), sep = "_") + ret <- paste("group", rep(seq_len(spllen), each = 2), c("", "_level"), sep = "") } ret <- c(ret, c("avar_name", "row_name", "label_name", "row_num", "is_group_summary", "node_class")) } diff --git a/R/utils.R b/R/utils.R index 84441850b..933e160c7 100644 --- a/R/utils.R +++ b/R/utils.R @@ -31,21 +31,21 @@ setGeneric("simple_analysis", function(x, ...) standardGeneric("simple_analysis" #' @exportMethod simple_analysis setMethod( "simple_analysis", "numeric", - function(x, ...) in_rows("Mean" = rcell(mean(x, ...), format = "xx.xx")) + function(x, ...) in_rows("Mean" = rcell(mean(x, ...), stat_names = "mean", format = "xx.xx")) ) #' @rdname rtinner #' @exportMethod simple_analysis setMethod( "simple_analysis", "logical", - function(x, ...) in_rows("Count" = rcell(sum(x, ...), format = "xx")) + function(x, ...) in_rows("Count" = rcell(sum(x, ...), stat_names = "n", format = "xx")) ) #' @rdname rtinner #' @exportMethod simple_analysis setMethod( "simple_analysis", "factor", - function(x, ...) in_rows(.list = as.list(table(x))) + function(x, ...) in_rows(.list = as.list(table(x)), .stat_names = "n") ) #' @rdname rtinner diff --git a/inst/WORDLIST b/inst/WORDLIST index 913654388..bcfac9e44 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,3 +1,5 @@ +ARD +ARDs Bové CRAN's Carreras diff --git a/man/CellValue.Rd b/man/CellValue.Rd index e2627590d..3c96fbd90 100644 --- a/man/CellValue.Rd +++ b/man/CellValue.Rd @@ -12,7 +12,8 @@ CellValue( indent_mod = NULL, footnotes = NULL, align = NULL, - format_na_str = NULL + format_na_str = NULL, + stat_names = NA_character_ ) } \arguments{ @@ -37,6 +38,9 @@ corresponds to the unmodified default behavior.} \item{format_na_str}{(\code{string})\cr string which should be displayed when formatted if this cell's value(s) are all \code{NA}.} + +\item{stat_names}{(\code{character} or \code{NA})\cr names for the statistics in the cell. It can be a vector of strings. +If \code{NA}, statistic names are not specified.} } \value{ An object representing the value within a single cell within a populated table. The underlying structure diff --git a/man/in_rows.Rd b/man/in_rows.Rd index 7ad68b0e9..665638cc2 100644 --- a/man/in_rows.Rd +++ b/man/in_rows.Rd @@ -14,7 +14,8 @@ in_rows( .cell_footnotes = list(NULL), .row_footnotes = list(NULL), .aligns = NULL, - .format_na_strs = NULL + .format_na_strs = NULL, + .stat_names = list(NULL) ) } \arguments{ @@ -38,6 +39,10 @@ in_rows( See \code{\link[formatters:list_formats]{formatters::list_valid_aligns()}} for currently supported alignments.} \item{.format_na_strs}{(\code{character} or \code{NULL})\cr NA strings for the cells.} + +\item{.stat_names}{(\code{list})\cr names for the statistics in the cells. +It can be a vector of values. If \code{list(NULL)}, statistic names are not specified and will +appear as \code{NA}.} } \value{ A \code{RowsVerticalSection} object (or \code{NULL}). The details of this object should be considered an @@ -54,6 +59,12 @@ paths with \code{\link{fnotes_at_path<-}}. in_rows(1, 2, 3, .names = c("a", "b", "c")) in_rows(1, 2, 3, .labels = c("a", "b", "c")) in_rows(1, 2, 3, .names = c("a", "b", "c"), .labels = c("AAA", "BBB", "CCC")) +in_rows( + .list = list(a = c(NA, NA)), + .formats = "xx - xx", + .format_na_strs = list(c("asda", "lkjklj")) +) +in_rows(.list = list(a = c(NA, NA)), .format_na_strs = c("asda", "lkjklj")) in_rows(.list = list(a = 1, b = 2, c = 3)) in_rows(1, 2, .list = list(3), .names = c("a", "b", "c")) diff --git a/man/int_methods.Rd b/man/int_methods.Rd index 1cb0ce878..e67995f18 100644 --- a/man/int_methods.Rd +++ b/man/int_methods.Rd @@ -351,6 +351,11 @@ \alias{ref_msg,RefFootnote-method} \alias{fnotes_at_path<-,VTableTree,character-method} \alias{fnotes_at_path<-,VTableTree,NULL-method} +\alias{obj_stat_names} +\alias{obj_stat_names<-} +\alias{obj_stat_names<-,CellValue-method} +\alias{obj_stat_names,CellValue-method} +\alias{obj_stat_names,RowsVerticalSection-method} \alias{rbind2,VTableNodeInfo,missing-method} \alias{tt_at_path,VTableTree-method} \alias{tt_at_path<-,VTableTree,ANY,VTableTree-method} @@ -1118,6 +1123,16 @@ spl_varnames(object) <- value \S4method{fnotes_at_path}{VTableTree,NULL}(obj, rowpath = NULL, colpath = NULL, reset_idx = TRUE) <- value +obj_stat_names(obj) + +obj_stat_names(obj) <- value + +\S4method{obj_stat_names}{CellValue}(obj) <- value + +\S4method{obj_stat_names}{CellValue}(obj) + +\S4method{obj_stat_names}{RowsVerticalSection}(obj) + \S4method{rbind2}{VTableNodeInfo,missing}(x, y) \S4method{tt_at_path}{VTableTree}(tt, path, ...) diff --git a/man/rcell.Rd b/man/rcell.Rd index 8df42204d..06a17c7d6 100644 --- a/man/rcell.Rd +++ b/man/rcell.Rd @@ -13,7 +13,8 @@ rcell( indent_mod = NULL, footnotes = NULL, align = NULL, - format_na_str = NULL + format_na_str = NULL, + stat_names = NULL ) non_ref_rcell( @@ -50,6 +51,9 @@ corresponds to the unmodified default behavior.} \item{format_na_str}{(\code{string})\cr string which should be displayed when formatted if this cell's value(s) are all \code{NA}.} +\item{stat_names}{(\code{character} or \code{NA})\cr names for the statistics in the cell. It can be a vector of strings. +If \code{NA}, statistic names are not specified.} + \item{is_ref}{(\code{flag})\cr whether function is being used in the reference column (i.e. \code{.in_ref_col} should be passed to this argument).} @@ -69,3 +73,9 @@ be passed the value of \code{.in_ref_col} when it is used. \note{ Currently column spanning is only supported for defining header structure. } +\examples{ +rcell(1, format = "xx.x") +rcell(c(1, 2), format = c("xx - xx")) +rcell(c(1, 2), stat_names = c("Rand1", "Rand2")) + +} diff --git a/tests/testthat/test-result_data_frame.R b/tests/testthat/test-result_data_frame.R index 0e34647fb..01889620d 100644 --- a/tests/testthat/test-result_data_frame.R +++ b/tests/testthat/test-result_data_frame.R @@ -18,7 +18,7 @@ test_that("Result Data Frame generation works v0", { expect_identical( names(result_df)[1:5], - c("spl_var_1", "spl_value_1", "spl_var_2", "spl_value_2", "avar_name") + c("group1", "group1_level", "group2", "group2_level", "avar_name") ) ## handle multiple analyses @@ -238,3 +238,248 @@ test_that("as_result_df works fine with empty tables and no character(0) is allo "a" ) }) + +test_that("make_ard produces realistic ARD output with as_result_df", { + # Testing fundamental getters/setters + rc <- rcell(c(1, 2), stat_names = c("Rand1", "Rand2")) + expect_equal(obj_stat_names(rc), c("Rand1", "Rand2")) + + rc_row <- in_rows( + .list = list(a = c(NA, 1), b = c(1, NA)), + .formats = c("xx - xx", "xx.x - xx.x"), + .format_na_strs = list(c("asda", "lkjklj")), + .stat_names = list(c("A", "B"), c("B", "C")) # if c("A", "B") one each row, if single list duplicated + ) + + expect_equal( + list("a" = c("A", "B"), "b" = c("B", "C")), # now it is named + lapply(rc_row, obj_stat_names) + ) + + # Lets make a custom function and check ARDs + mean_sd_custom <- function(x) { + mean <- mean(x, na.rm = FALSE) + sd <- sd(x, na.rm = FALSE) + + rcell(c(mean, sd), + label = "Mean (SD)", format = "xx.x (xx.x)", + stat_names = c("Mean", "SD") + ) + } + counts_percentage_custom <- function(x) { + cnts <- table(x) + out <- lapply(cnts, function(x) { + perc <- x / sum(cnts) + rcell(c(x, perc), format = "xx. (xx.%)") + }) + in_rows( + .list = as.list(out), .labels = names(cnts), + .stat_names = list(c("Count", "Percentage")) + ) + } + + lyt <- basic_table(show_colcounts = TRUE, colcount_format = "N=xx") %>% + split_cols_by("ARM", split_fun = keep_split_levels(c("A: Drug X", "B: Placebo"))) %>% + analyze(vars = "AGE", afun = mean_sd_custom) %>% + analyze(vars = "SEX", afun = counts_percentage_custom) + + tbl <- build_table(lyt, ex_adsl) + ard_out <- as_result_df(tbl, make_ard = TRUE) + + # Numeric output + expect_equal( + ard_out[2, , drop = TRUE], + list( + group1 = "ARM", + group1_level = "A: Drug X", + variable = "AGE", + variable_level = "Mean (SD)", + variable_label = "Mean (SD)", + stat_name = "SD", + stat = 6.553326 + ), + tolerance = 10e-6 + ) + + # Percentage output + expect_equal( + ard_out[14, , drop = TRUE], + list( + group1 = "ARM", + group1_level = "B: Placebo", + variable = "SEX", + variable_level = "F", + variable_label = "F", + stat_name = "Percentage", + stat = 0.5746269 + ), + tolerance = 10e-6 + ) + + # Default values + lyt <- basic_table() %>% + split_cols_by("ARM") %>% + analyze(c("AGE", "SEX")) + + tbl <- build_table(lyt, ex_adsl) + ard_out <- as_result_df(tbl, make_ard = TRUE) + + expect_equal(unique(ard_out$stat_name), c("mean", "n")) +}) + +test_that("make_ard works with multiple row levels", { + lyt <- basic_table() %>% + split_rows_by("STRATA1") %>% + split_rows_by("STRATA2") %>% + split_cols_by("ARM") %>% + analyze(c("AGE", "SEX")) + + tbl <- build_table(lyt, ex_adsl) + ard_out <- as_result_df(tbl, make_ard = TRUE) + + expect_equal(unique(ard_out$stat_name), c("mean", "n")) + expect_contains(colnames(ard_out), c("group2", "group2_level")) + + # Count output + expect_equal( + ard_out[90, , drop = TRUE], + list( + group1 = "STRATA1", + group1_level = "C", + group2 = "STRATA2", + group2_level = "S2", + group3 = "ARM", + group3_level = "C: Combination", + variable = "SEX", + variable_level = "UNDIFFERENTIATED", + variable_label = "UNDIFFERENTIATED", + stat_name = "n", + stat = 0 + ), + tolerance = 10e-6 + ) +}) + +test_that("make_ard works with multiple column levels", { + lyt <- basic_table() %>% + split_rows_by("STRATA1") %>% + split_cols_by("ARM") %>% + split_cols_by("STRATA2") %>% + analyze(c("AGE", "SEX")) + + tbl <- build_table(lyt, ex_adsl) + ard_out <- as_result_df(tbl, make_ard = TRUE) + + expect_equal(unique(ard_out$stat_name), c("mean", "n")) + expect_contains(colnames(ard_out), c("group1", "group1_level")) + expect_contains(colnames(ard_out), c("group2", "group2_level")) + + # Count output + expect_equal( + ard_out[16, , drop = TRUE], + list( + group1 = "STRATA1", + group1_level = "A", + group2 = "ARM", + group2_level = "A: Drug X", + group3 = "STRATA2", + group3_level = "S2", + variable = "AGE", + variable_level = "Mean", + variable_label = "Mean", + stat_name = "mean", + stat = 34.4 + ), + tolerance = 10e-6 + ) +}) + +test_that("make_ard works with summarize_row_groups", { + lyt <- basic_table() %>% + split_rows_by("STRATA2") %>% + summarize_row_groups() %>% + split_rows_by("ARM") %>% + split_cols_by("ARM") %>% + split_cols_by("STRATA1") %>% + analyze(c("AGE", "SEX")) + + tbl <- build_table(lyt, ex_adsl) + ard_out <- as_result_df(tbl, make_ard = TRUE) + + expect_contains(unique(ard_out$stat_name), c("mean", "n", "p")) + expect_contains(colnames(ard_out), c("group1", "group1_level")) + expect_contains(colnames(ard_out), c("group2", "group2_level")) + + # label row output + expect_equal( + ard_out[1, , drop = TRUE], + list( + group1 = "STRATA2", + group1_level = "S1", + group2 = NA_character_, + group2_level = NA_character_, + group3 = "ARM", + group3_level = "A: Drug X", + group4 = "STRATA1", + group4_level = "A", + variable = "STRATA2", + variable_level = "S1", + variable_label = "S1", + stat_name = "n", + stat = 18 + ), + tolerance = 10e-6 + ) + + # Testing different placements of summarize_row_groups + lyt <- basic_table() %>% + split_rows_by("STRATA2") %>% + split_rows_by("ARM") %>% + summarize_row_groups() %>% + split_cols_by("ARM") %>% + split_cols_by("STRATA1") %>% + analyze(c("AGE", "SEX")) + + tbl <- build_table(lyt, ex_adsl) + ard_out <- as_result_df(tbl, make_ard = TRUE) + + # label row output + expect_equal( + ard_out[1, , drop = TRUE], + list( + group1 = "STRATA2", + group1_level = "S1", + group2 = "ARM", + group2_level = "A: Drug X", + group3 = "ARM", + group3_level = "A: Drug X", + group4 = "STRATA1", + group4_level = "A", + variable = "ARM", + variable_level = "A: Drug X", + variable_label = "A: Drug X", + stat_name = "n", + stat = 18 + ), + tolerance = 10e-6 + ) +}) + +test_that("make_ard works if there are no stat_names", { + mean_sd_custom <- function(x) { + mean <- mean(x, na.rm = FALSE) + sd <- sd(x, na.rm = FALSE) + + rcell(c(mean, sd), + label = "Mean (SD)", format = "xx.x (xx.x)" + ) + } + + lyt <- basic_table(show_colcounts = TRUE, colcount_format = "N=xx") %>% + split_cols_by("ARM", split_fun = keep_split_levels(c("A: Drug X", "B: Placebo"))) %>% + analyze(vars = "AGE", afun = mean_sd_custom) + + tbl <- build_table(lyt, ex_adsl) + + expect_equal(as_result_df(tbl, make_ard = TRUE)$stat_name, rep(NA_character_, 4)) +}) diff --git a/vignettes/ard_how_to.Rmd b/vignettes/ard_how_to.Rmd index 22bf7d118..5d08cd2cd 100644 --- a/vignettes/ard_how_to.Rmd +++ b/vignettes/ard_how_to.Rmd @@ -1,10 +1,10 @@ --- -title: "How to generate QC-ready result data frames from tables" +title: "Generating QC-Ready Result Data Frames (ARDs) from Tables" author: "Davide Garolini" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{How to generate QC-ready result data frames from tables} + %\VignetteIndexEntry{Generating QC-Ready Result Data Frames (ARDs) from Tables} %\VignetteEncoding{UTF-8} %\VignetteEngine{knitr::rmarkdown} editor_options: @@ -29,64 +29,139 @@ knitr::opts_chunk$set( knitr::opts_chunk$set(comment = "#") ``` - ```{css, echo=FALSE} .reveal .r code { white-space: pre; } ``` + # Disclaimer -This vignette is a work in progress. +This vignette is a work in progress and subject to change. -## Create the example table +## Creating an Example Table -First of all we need a table to retrieve all the necessary information. Borrowing one from the [vignette](https://insightsengineering.github.io/rtables/latest-tag/articles/clinical_trials.html) about clinical trials. +In order to generate an ARD (Analysis Results Dataset), we first need to create a table from which all the necessary information will be retrieved. We will borrow a simple table from [this vignette](https://insightsengineering.github.io/rtables/latest-tag/articles/clinical_trials.html) about clinical trials. ```{r} library(rtables) ADSL <- ex_adsl # Example ADSL dataset + +# Very simple table +lyt <- basic_table() %>% + split_cols_by("ARM") %>% + analyze(c("AGE", "SEX")) + +tbl <- build_table(lyt, ADSL) +tbl +``` + +## Converting the Table to a Result Data Frame (ARD) + +The `as_result_df()` function is used to convert a table to a result data frame. The result data frame is a data frame that contains the result of the summary table and is ready to be used for quality control purposes. This may be customized according to different standards. + +Let's see how we can produce different result `data.frame`s. The following outputs can be returned by setting different parameters in the `as_results_df()` function, and these results can be transformed back into a table using the `df_to_tt()` function. + +```{r} +as_result_df(tbl) + +as_result_df(tbl, data_format = "strings") +as_result_df(tbl, simplify = TRUE) +as_result_df(tbl, simplify = TRUE, keep_label_rows = TRUE) +as_result_df(tbl, simplify = TRUE, keep_label_rows = TRUE, expand_colnames = TRUE) +``` + +Now let's generate our final ARD output, which is ready to be used for quality control purposes. + +```{r} +as_result_df(tbl, make_ard = TRUE) +``` + +## Customizing the Output + +`as_result_df()` and ARDs depend on the content of the table, so it is possible to modify the table to customize the output. For example, we can add some user-defined statistics with custom names: + +```{r} +# rcell and in_rows are the core of any analysis function +rc <- rcell(c(1, 2), stat_names = c("Rand1", "Rand2")) +print(obj_stat_names(rc)) # c("Rand1", "Rand2") + +rc_row <- in_rows( + .list = list(a = c(NA, 1), b = c(1, NA)), + .formats = c("xx - xx", "xx.x - xx.x"), + .format_na_strs = list(c("asda", "lkjklj")), + .stat_names = list(c("A", "B"), c("B", "C")) +) + +# Only a getter for this object +print(obj_stat_names(rc_row)) # list(a = c("A", "B"), b = c("B", "C")) + +# if c("A", "B"), one for each row +# if single list, duplicated +rc_row <- in_rows( + .list = list(a = c(NA, 1), b = c(1, NA)), + .formats = c("xx - xx", "xx.x - xx.x"), + .format_na_strs = list(c("asda", "lkjklj")), + .stat_names = c("A", "B") +) +print(obj_stat_names(rc_row)) # c("A", "B") # one for each row +print(lapply(rc_row, obj_stat_names)) # identical to above + row names + +rc_row <- in_rows( + .list = list(a = c(NA, 1), b = c(1, NA)), + .formats = c("xx - xx", "xx.x - xx.x"), + .format_na_strs = list(c("asda", "lkjklj")), + .stat_names = list(c("A", "B")) # It is duplicated, check it yourself! +) +``` + +Let's put it into practice: + +```{r} mean_sd_custom <- function(x) { mean <- mean(x, na.rm = FALSE) sd <- sd(x, na.rm = FALSE) - rcell(c(mean, sd), label = "Mean (SD)", format = "xx.x (xx.x)") + rcell(c(mean, sd), + label = "Mean (SD)", format = "xx.x (xx.x)" # , + # stat_names = c("Mean", "SD") + ) } counts_percentage_custom <- function(x) { - # browser() cnts <- table(x) out <- lapply(cnts, function(x) { perc <- x / sum(cnts) rcell(c(x, perc), format = "xx. (xx.%)") }) - in_rows(.list = as.list(out), .labels = names(cnts)) + in_rows( + .list = as.list(out), .labels = names(cnts), + .stat_names = list(c("Count", "Percentage")) + ) } lyt <- basic_table(show_colcounts = TRUE, colcount_format = "N=xx") %>% - # split_rows_by("STRATA1", split_fun = keep_split_levels(c("A"))) %>% - # split_cols_by("STRATA2") %>% split_cols_by("ARM", split_fun = keep_split_levels(c("A: Drug X", "B: Placebo"))) %>% analyze(vars = "AGE", afun = mean_sd_custom) %>% analyze(vars = "SEX", afun = counts_percentage_custom) -tbl <- build_table(lyt, ADSL) -tbl +tbl <- build_table(lyt, ex_adsl) + +as_result_df(tbl, make_ard = TRUE) ``` -## Convert the table to a result data frame +# More Complex Outputs -The `as_result_df` function is the one that converts a table to a result data frame. The result data frame is a data frame that contains the result of the summary table and is ready to be used for quality control purposes. This may differ for different standard and lets see how to produce different outputs. Final goal is having clearly one result for row. Lets play with different options. +Let's add hierarchical row and column splits: ```{r} -as_result_df(tbl) +lyt <- basic_table() %>% + split_rows_by("STRATA2") %>% + summarize_row_groups() %>% + split_cols_by("ARM") %>% + split_cols_by("STRATA1") %>% + analyze(c("AGE", "SEX")) -as_result_df(tbl, data_format = "strings") -as_result_df(tbl, simplify = TRUE) -as_result_df(tbl, simplify = TRUE, keep_label_rows = TRUE) -as_result_df(tbl, simplify = TRUE, keep_label_rows = TRUE, expand_colnames = TRUE) -``` +tbl <- build_table(lyt, ex_adsl) -Now lets get the final `ARD` output. This is the one that is ready to be used for quality control purposes. -```{r} as_result_df(tbl, make_ard = TRUE) ```