From c4018e7724062038728c98a72571b42bd8220936 Mon Sep 17 00:00:00 2001 From: Davide Garolini Date: Wed, 29 Nov 2023 09:55:08 +0100 Subject: [PATCH 1/2] Direct formatted output from `as_result_df` (#793) * better docs * order and addition of features on top of v0_experimental * expanded tests * fixes * news * styling * adding stringi * news * integer fix --- DESCRIPTION | 5 +- NEWS.md | 4 + R/tt_export.R | 508 +++++++++++++++--------- R/tt_from_df.R | 5 + R/utils.R | 18 +- man/as_result_df.Rd | 39 -- man/data.frame_export.Rd | 88 ++++ man/df_to_tt.Rd | 1 + man/path_enriched_df.Rd | 37 -- man/result_df_specs.Rd | 17 - man/tsv_io.Rd | 8 +- tests/testthat/test-result_data_frame.R | 58 ++- 12 files changed, 500 insertions(+), 288 deletions(-) delete mode 100644 man/as_result_df.Rd create mode 100644 man/data.frame_export.Rd delete mode 100644 man/path_enriched_df.Rd delete mode 100644 man/result_df_specs.Rd diff --git a/DESCRIPTION b/DESCRIPTION index c67122221..919a4fa9a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,13 +34,14 @@ Depends: methods, R (>= 2.10) Imports: + checkmate (>= 2.1.0), grid, htmltools (>= 0.5.4), - stats + stats, + stringi (>= 1.6) Suggests: broom (>= 0.7.10), car (>= 3.0-13), - checkmate (>= 2.1.0), dplyr (>= 1.0.5), flextable (>= 0.8.4), knitr (>= 1.42), diff --git a/NEWS.md b/NEWS.md index 009aafbe4..8c4e446a7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,8 @@ * Added `header_section_div` setters and getters for layout and table objects along with related `basic_table` parameter. * Added `na_str` argument to `analyze_colvars` to set custom string to print in place of missing values. + * Added flat `data.frame` outputs for `as_result_df()` via flag parameters `as_viewer`, `as_strings`, and + `expand_colnames`. ### Bug Fixes * Fixed a bug that was failing when wrapping and section dividers were used at the same time. @@ -22,6 +24,8 @@ * Started deprecation cycle for `col_fnotes_here` to be replaced with `col_footnotes`. * Exported `section_div` methods now have a dedicated documentation page that is visible to users. * When tables are exported as `txt`, they preserve the horizontal separator of the table. + * Added imports on `stringi` and `checkmate` as they are fundamental packages for string handling and + argument checking. ## rtables 0.6.5 ### New Features diff --git a/R/tt_export.R b/R/tt_export.R index 10fde5cd6..932b8bd6e 100644 --- a/R/tt_export.R +++ b/R/tt_export.R @@ -1,13 +1,11 @@ #' @importFrom tools file_ext NULL -### XXX is the stuff in this file correct or should we be exporting *formatted* values to -### meet the needs of consumers of this? Do we ened to support both? #' Create Enriched flat value table with paths #' -#' +#' @description #' This function creates a flat tabular file of cell values and -#' corresponding paths via \code{\link{path_enriched_df}}. I then +#' corresponding paths via [path_enriched_df()]. I then #' writes that data.frame out as a `tsv` file. #' #' By default (i.e. when \code{value_func} is not specified, @@ -20,25 +18,23 @@ NULL #' #' @inheritParams gen_args #' @param file character(1). The path of the file to written to or read from. -#' @inheritParams path_enriched_df +#' @inheritParams data.frame_export +#' #' @return \code{NULL} silently for \code{export_as_tsv}, a data.frame with #' re-constituted list values for \code{export_as_tsv}. -#' @export +#' +#' @seealso [path_enriched_df()] for the underlying function that does the work. +#' #' @rdname tsv_io #' @importFrom utils write.table read.table - +#' @export export_as_tsv <- function(tt, file = NULL, path_fun = collapse_path, value_fun = collapse_values) { df <- path_enriched_df(tt, path_fun = path_fun, value_fun = value_fun) write.table(df, file, sep = "\t") } - - -.collapse_char <- "|" -.collapse_char_esc <- "\\|" - -##' @export -##' @rdname tsv_io +#' @rdname tsv_io +#' @export import_from_tsv <- function(file) { rawdf <- read.table(file, header = TRUE, sep = "\t") as.data.frame(lapply( @@ -53,52 +49,267 @@ import_from_tsv <- function(file) { )) } -collapse_path <- function(paths) { - if (is.list(paths)) { - return(vapply(paths, collapse_path, "")) - } - paste(paths, collapse = .collapse_char) -} +### Migrated to formatters. -collapse_values <- function(colvals) { - if (!is.list(colvals)) { ## || all(vapply(colvals, length, 1L) == 1)) - return(colvals) - } else if (all(vapply(colvals, length, 1L) == 1)) { - return(unlist(colvals)) - } - vapply(colvals, paste, "", collapse = .collapse_char) -} +#' @importFrom formatters export_as_txt +#' +#' @examples +#' lyt <- basic_table() %>% +#' split_cols_by("ARM") %>% +#' analyze(c("AGE", "BMRKR2", "COUNTRY")) +#' +#' tbl <- build_table(lyt, ex_adsl) +#' +#' cat(export_as_txt(tbl, file = NULL, paginate = TRUE, lpp = 8)) +#' +#' \dontrun{ +#' tf <- tempfile(fileext = ".txt") +#' export_as_txt(tbl, file = tf) +#' system2("cat", tf) +#' } +#' +#' @export +formatters::export_as_txt + +# data.frame output ------------------------------------------------------------ -#' Transform `TableTree` object to Path-Enriched data.frame +#' Generate a Result Data Frame +#' +#' @description +#' Collection of utilities to exctract `data.frame` from `TableTree` objects. #' #' @inheritParams gen_args -#' @param path_fun function. Function to transform paths into single-string -#' row/column names. -#' @param value_fun function. Function to transform cell values into cells of -#' the data.frame. Defaults to \code{collapse_values} which creates strings -#' where multi-valued cells are collapsed together, separated by \code{|}. -#' @export -#' @return A data frame of \code{tt}'s cell values (processed by -#' \code{value_fun}, with columns named by the full column paths (processed by -#' \code{path_fun} and an additional \code{row_path} column with the row paths -#' (processed by by \code{path_fun}). -#' @examples +#' @param spec character(1). The specification to use to +#' extract the result data frame. See details +#' @param simplify logical(1). If \code{TRUE}, the result data frame will have only visible +#' labels and result columns. +#' @param ... Passed to spec-specific result data frame conversion function. Currently it can be +#' one or more of the following parameters (valid only for `v0_experimental` spec for now): +#' - `expand_colnames`: when `TRUE`, the result data frame will have expanded column names above the usual +#' output. This is useful when the result data frame is used for further processing. +#' - `simplify`: when `TRUE`, the result data frame will have only visible labels and result columns. +#' - `as_strings`: when `TRUE`, the result data frame will have all values as strings, as they appear +#' in the final table (it can also be retrieved from `matrix_form(tt)$strings`). This is also true for +#' column counts if `expand_colnames = TRUE`. +#' - `as_viewer`: when `TRUE`, the result data frame will have all values as they appear in the final table, +#' i.e. with the same precision and numbers, but in easy-to-use numeric form. +#' +#' @details `as_result_df()`: Result data frame specifications may differ in the exact information +#' they include and the form in which they represent it. Specifications whose names end in "_experimental" +#' are subject to change without notice, but specifications without the "_experimental" +#' suffix will remain available \emph{including any bugs in their construction} indefinitely. #' +#' @examples #' lyt <- basic_table() %>% #' split_cols_by("ARM") %>% +#' split_rows_by("STRATA1") %>% #' analyze(c("AGE", "BMRKR2")) #' #' tbl <- build_table(lyt, ex_adsl) -#' path_enriched_df(tbl) -path_enriched_df <- function(tt, path_fun = collapse_path, value_fun = collapse_values) { +#' as_result_df(tbl) +#' +#' @name data.frame_export +#' @export +as_result_df <- function(tt, spec = "v0_experimental", simplify = FALSE, ...) { + checkmate::assert_class(tt, "VTableTree") + checkmate::assert_string(spec) + checkmate::assert_flag(simplify) + + if (nrow(tt) == 0) { + return(sanitize_table_struct(tt)) + } + + result_df_fun <- lookup_result_df_specfun(spec) + out <- result_df_fun(tt, ...) + + if (simplify) { + out <- .simplify_result_df(out) + } + + out +} + +# Function that selects specific outputs from the result data frame +.simplify_result_df <- function(df) { + col_df <- colnames(df) + row_names_col <- which(col_df == "row_name") + result_cols <- seq(which(col_df == "node_class") + 1, length(col_df)) + + df[, c(row_names_col, result_cols)] +} + +# Not used in rtables +# .split_colwidths <- function(ptabs, nctot, colwidths) { +# ret <- list() +# i <- 1L +# +# rlw <- colwidths[1] +# colwidths <- colwidths[-1] +# donenc <- 0 +# while (donenc < nctot) { +# curnc <- NCOL(ptabs[[i]]) +# ret[[i]] <- c(rlw, colwidths[seq_len(curnc)]) +# colwidths <- colwidths[-1 * seq_len(curnc)] +# donenc <- donenc + curnc +# i <- i + 1 +# } +# ret +# } + +#' @describeIn data.frame_export list of functions that extract result data frames from \code{TableTree}s. +#' +#' @return `result_df_specs()`: returns a named list of result data frame extraction functions by "specification". +#' +#' @examples +#' result_df_specs() +#' +#' @export +result_df_specs <- function() { + list(v0_experimental = result_df_v0_experimental) +} + +lookup_result_df_specfun <- function(spec) { + if (!(spec %in% names(result_df_specs()))) { + stop( + "unrecognized result data frame specification: ", + spec, + "If that specification is correct you may need to update your version of rtables" + ) + } + result_df_specs()[[spec]] +} + +result_df_v0_experimental <- function(tt, + as_viewer = FALSE, + as_strings = FALSE, + expand_colnames = FALSE) { + checkmate::assert_flag(as_viewer) + checkmate::assert_flag(as_strings) + checkmate::assert_flag(expand_colnames) + + raw_cvals <- cell_values(tt) + ## if the table has one row and multiple columns, sometimes the cell values returns a list of the cell values + ## rather than a list of length 1 representing the single row. This is bad but may not be changeable + ## at this point. + if (nrow(tt) == 1 && length(raw_cvals) > 1) { + raw_cvals <- list(raw_cvals) + } + + cellvals <- as.data.frame(do.call(rbind, raw_cvals)) + row.names(cellvals) <- NULL + + if (nrow(tt) == 1 && ncol(tt) == 1) { + colnames(cellvals) <- names(raw_cvals) + } + + if (as_viewer || as_strings) { + # we keep previous calculations to check the format of the data + mf_tt <- matrix_form(tt) + mf_result_chars <- mf_strings(mf_tt)[-seq_len(mf_nlheader(mf_tt)), -1] + mf_result_chars <- .remove_empty_elements(mf_result_chars) + mf_result_numeric <- as.data.frame( + .make_numeric_char_mf(mf_result_chars) + ) + mf_result_chars <- as.data.frame(mf_result_chars) + if (!setequal(dim(mf_result_numeric), dim(cellvals)) || + !setequal(dim(mf_result_chars), dim(cellvals))) { + stop( + "The extracted numeric data.frame does not have the same dimension of the", + " cell values extracted with cell_values(). This is a bug. Please report it." + ) # nocov + } + if (as_strings) { + colnames(mf_result_chars) <- colnames(cellvals) + cellvals <- mf_result_chars + } else { + colnames(mf_result_numeric) <- colnames(cellvals) + cellvals <- mf_result_numeric + } + } + rdf <- make_row_df(tt) - cdf <- make_col_df(tt) - cvs <- as.data.frame(do.call(rbind, cell_values(tt))) - cvs <- as.data.frame(lapply(cvs, value_fun)) - row.names(cvs) <- NULL - colnames(cvs) <- path_fun(cdf$path) - preppaths <- path_fun(rdf[rdf$node_class != "LabelRow", ]$path) - cbind.data.frame(row_path = preppaths, cvs) + + df <- cbind( + rdf[ + rdf$node_class != "LabelRow", + c("name", "label", "abs_rownumber", "path", "reprint_inds", "node_class") + ], + cellvals + ) + + maxlen <- max(lengths(df$path)) + metadf <- do.call( + rbind.data.frame, + lapply( + seq_len(NROW(df)), + function(ii) { + handle_rdf_row(df[ii, ], maxlen = maxlen) + } + ) + ) + + ret <- cbind( + metadf[metadf$node_class != "LabelRow", ], + cellvals + ) + + # If we want to expand colnames + if (expand_colnames) { + col_name_structure <- .get_formatted_colnames(clayout(tt)) + number_of_non_data_cols <- which(colnames(ret) == "node_class") + if (NCOL(ret) - number_of_non_data_cols != NCOL(col_name_structure)) { + stop( + "When expanding colnames structure, we were not able to find the same", + " number of columns as in the result data frame. This is a bug. Please report it." + ) # nocov + } + + buffer_rows_for_colnames <- matrix( + rep("", number_of_non_data_cols * NROW(col_name_structure)), + nrow = NROW(col_name_structure) + ) + + header_colnames_matrix <- cbind(buffer_rows_for_colnames, data.frame(col_name_structure)) + colnames(header_colnames_matrix) <- colnames(ret) + + count_row <- NULL + if (disp_ccounts(tt)) { + ccounts <- col_counts(tt) + if (as_strings) { + ccounts <- mf_strings(mf_tt)[mf_nlheader(mf_tt), ] + ccounts <- .remove_empty_elements(ccounts) + } + count_row <- c(rep("", number_of_non_data_cols), ccounts) + header_colnames_matrix <- rbind(header_colnames_matrix, count_row) + } + ret <- rbind(header_colnames_matrix, ret) + } + + ret +} + +.remove_empty_elements <- function(char_df) { + if (is.null(dim(char_df))) { + return(char_df[nzchar(char_df, keepNA = TRUE)]) + } + + apply(char_df, 2, function(col_i) col_i[nzchar(col_i, keepNA = TRUE)]) +} + +# Helper function to make the character matrix numeric +.make_numeric_char_mf <- function(char_df) { + if (is.null(dim(char_df))) { + return(as.numeric(stringi::stri_extract_all(char_df, regex = "\\d+.\\d+|\\d+"))) + } + + ret <- apply(char_df, 2, function(col_i) { + lapply( + stringi::stri_extract_all(col_i, regex = "\\d+.\\d+|\\d+"), + as.numeric + ) + }) + + do.call(cbind, ret) } do_label_row <- function(rdfrow, maxlen) { @@ -109,7 +320,6 @@ do_label_row <- function(rdfrow, maxlen) { ) } - make_result_df_md_colnames <- function(maxlen) { spllen <- floor((maxlen - 2) / 2) ret <- character() @@ -119,7 +329,6 @@ make_result_df_md_colnames <- function(maxlen) { ret <- c(ret, c("avar_name", "row_name", "row_num", "is_group_summary", "node_class")) } - do_content_row <- function(rdfrow, maxlen) { pth <- rdfrow$path[[1]] @@ -150,7 +359,6 @@ do_data_row <- function(rdfrow, maxlen) { ) } - handle_rdf_row <- function(rdfrow, maxlen) { nclass <- rdfrow$node_class if (rdfrow$path[[1]][1] == "root") { @@ -166,126 +374,76 @@ handle_rdf_row <- function(rdfrow, maxlen) { setNames(ret, make_result_df_md_colnames(maxlen)) } - -#' Result Data Frame Specifications -#' -#' @return a named list of result data frame extraction functions by "specification" -#' @export -#' @examples -#' result_df_specs() -result_df_specs <- function() { - list(v0_experimental = result_df_v0_experimental) -} - -lookup_result_df_specfun <- function(spec) { - if (!(spec %in% names(result_df_specs()))) { - stop( - "unrecognized result data frame specification: ", - spec, - "If that specification is correct you may need to update your version of rtables" - ) +# Helper recurrent function to get the column names for the result data frame from the VTableTree +.get_formatted_colnames <- function(clyt) { + ret <- obj_label(clyt) + if (!nzchar(ret)) { + ret <- NULL } - result_df_specs()[[spec]] -} - -result_df_v0_experimental <- function(tt) { - raw_cvals <- cell_values(tt) - ## if the table has one row and multiple columns, sometimes the cell values returns a list of the cell values - ## rather than a list of length 1 reprsenting the single row. This is bad but may not be changable - ## at this point. - if (nrow(tt) == 1 && length(raw_cvals) > 1) { - raw_cvals <- list(raw_cvals) + if (is.null(tree_children(clyt))) { + return(ret) + } else { + ret <- rbind(ret, lapply(tree_children(clyt), .get_formatted_colnames) %>% do.call(cbind, .)) + colnames(ret) <- NULL + rownames(ret) <- NULL + return(ret) } - cellvals <- as.data.frame(do.call(rbind, raw_cvals)) - row.names(cellvals) <- NULL - rdf <- make_row_df(tt) - df <- cbind( - rdf[ - rdf$node_class != "LabelRow", - c("name", "label", "abs_rownumber", "path", "reprint_inds", "node_class") - ], - cellvals - ) - maxlen <- max(lengths(df$path)) - metadf <- do.call(rbind.data.frame, lapply( - seq_len(NROW(df)), - function(ii) handle_rdf_row(df[ii, ], maxlen = maxlen) - )) - cbind( - metadf[metadf$node_class != "LabelRow", ], - cellvals - ) } -#' Generate a Result Data Frame +#' @describeIn data.frame_export transform `TableTree` object to Path-Enriched `data.frame`. #' -#' @param tt `VTableTree`. The table. -#' @param spec character(1). The specification to use to -#' extract the result data frame. See details -#' @param ... Passed to spec-specific result data frame conversion function. +#' @param path_fun function. Function to transform paths into single-string +#' row/column names. +#' @param value_fun function. Function to transform cell values into cells of +#' the data.frame. Defaults to \code{collapse_values} which creates strings +#' where multi-valued cells are collapsed together, separated by \code{|}. #' -#' @details Result data frame specifications may differ in the exact information they include and -#' the form in which they represent it. Specifications whose names end in "_experimental" -#' are subject to change without notice, but specifications without the "_experimental" -#' suffix will remain available \emph{including any bugs in their construction} indefinitely. +#' @return `path_enriched_df()`: returns a data frame of \code{tt}'s cell values (processed by +#' \code{value_fun}, with columns named by the full column paths (processed by +#' \code{path_fun} and an additional \code{row_path} column with the row paths +#' (processed by by \code{path_fun}). #' -#' @note This function may eventually be migrated to a separate package, and so should -#' not be called via `::` -#' @export #' @examples -#' #' lyt <- basic_table() %>% #' split_cols_by("ARM") %>% -#' split_rows_by("STRATA1") %>% #' analyze(c("AGE", "BMRKR2")) #' #' tbl <- build_table(lyt, ex_adsl) -#' as_result_df(tbl) -as_result_df <- function(tt, spec = "v0_experimental", ...) { - result_df_fun <- lookup_result_df_specfun(spec) - result_df_fun(tt, ...) +#' path_enriched_df(tbl) +#' +#' @export +path_enriched_df <- function(tt, path_fun = collapse_path, value_fun = collapse_values) { + rdf <- make_row_df(tt) + cdf <- make_col_df(tt) + cvs <- as.data.frame(do.call(rbind, cell_values(tt))) + cvs <- as.data.frame(lapply(cvs, value_fun)) + row.names(cvs) <- NULL + colnames(cvs) <- path_fun(cdf$path) + preppaths <- path_fun(rdf[rdf$node_class != "LabelRow", ]$path) + cbind.data.frame(row_path = preppaths, cvs) } -.split_colwidths <- function(ptabs, nctot, colwidths) { - ret <- list() - i <- 1L - - rlw <- colwidths[1] - colwidths <- colwidths[-1] - donenc <- 0 - while (donenc < nctot) { - curnc <- NCOL(ptabs[[i]]) - ret[[i]] <- c(rlw, colwidths[seq_len(curnc)]) - colwidths <- colwidths[-1 * seq_len(curnc)] - donenc <- donenc + curnc - i <- i + 1 +.collapse_char <- "|" +.collapse_char_esc <- "\\|" + +collapse_path <- function(paths) { + if (is.list(paths)) { + return(vapply(paths, collapse_path, "")) } - ret + paste(paths, collapse = .collapse_char) } -### Migrated to formatters. - -#' @importFrom formatters export_as_txt -#' -#' @examples -#' lyt <- basic_table() %>% -#' split_cols_by("ARM") %>% -#' analyze(c("AGE", "BMRKR2", "COUNTRY")) -#' -#' tbl <- build_table(lyt, ex_adsl) -#' -#' cat(export_as_txt(tbl, file = NULL, paginate = TRUE, lpp = 8)) -#' -#' \dontrun{ -#' tf <- tempfile(fileext = ".txt") -#' export_as_txt(tbl, file = tf) -#' system2("cat", tf) -#' } -#' -#' @export -formatters::export_as_txt +collapse_values <- function(colvals) { + if (!is.list(colvals)) { ## || all(vapply(colvals, length, 1L) == 1)) + return(colvals) + } else if (all(vapply(colvals, length, 1L) == 1)) { + return(unlist(colvals)) + } + vapply(colvals, paste, "", collapse = .collapse_char) +} +# pdf output ------------------------------------------------------------------- #' Export as PDF #' #' The PDF output is based on the ASCII output created with `toString` @@ -462,6 +620,22 @@ export_as_pdf <- function(tt, lpp = lpp, cpp = cpp ) } + +# only used in pagination +.tab_to_colpath_set <- function(tt) { + vapply( + collect_leaves(coltree(tt)), + function(y) paste(pos_to_path(tree_pos(y)), collapse = " "), + "" + ) +} +.figure_out_colinds <- function(subtab, fulltab) { + match( + .tab_to_colpath_set(subtab), + .tab_to_colpath_set(fulltab) + ) +} + # Flextable and docx ----------------------------------------------------------- #' Export as word document #' @@ -707,7 +881,7 @@ tt_to_flextable <- function(tt, tf_wrap = !is.null(cpp), max_width = cpp, total_width = 10) { - check_required_packages(c("flextable", "checkmate")) + check_required_packages("flextable") if (!inherits(tt, "VTableTree")) { stop("Input table is not an rtables' object.") } @@ -912,7 +1086,7 @@ theme_docx_default <- function(tt = NULL, # Option for more complicated stuff bold_manual = NULL, border = flextable::fp_border_default(width = 0.5)) { function(flx) { - check_required_packages(c("flextable", "checkmate")) + check_required_packages("flextable") if (!inherits(flx, "flextable")) { stop(sprintf( "Function `%s` supports only flextable objects.", @@ -1074,29 +1248,3 @@ apply_alignments <- function(flx, aligns_df, part) { flx } - -# only used in pagination -.tab_to_colpath_set <- function(tt) { - vapply( - collect_leaves(coltree(tt)), - function(y) paste(pos_to_path(tree_pos(y)), collapse = " "), - "" - ) -} -.figure_out_colinds <- function(subtab, fulltab) { - match( - .tab_to_colpath_set(subtab), - .tab_to_colpath_set(fulltab) - ) -} - -check_required_packages <- function(pkgs) { - for (pkgi in pkgs) { - if (!requireNamespace(pkgi)) { - stop( - "This function requires the ", pkgi, " package. ", - "Please install it if you wish to use it" - ) - } - } -} diff --git a/R/tt_from_df.R b/R/tt_from_df.R index 3e2a0dfa3..111255c7a 100644 --- a/R/tt_from_df.R +++ b/R/tt_from_df.R @@ -1,9 +1,13 @@ #' Create `ElementaryTable` from data.frame +#' #' @param df data.frame. +#' #' @return an \code{ElementaryTable} object with unnested columns corresponding to #' \code{names(df)} and row labels corresponding to \code{row.names(df)} +#' #' @examples #' df_to_tt(mtcars) +#' #' @export df_to_tt <- function(df) { colnms <- colnames(df) @@ -14,5 +18,6 @@ df_to_tt <- function(df) { rni <- if (havern) rnames[i] else "" do.call(rrow, c(list(row.name = rni), unclass(df[i, ]))) }) + ElementaryTable(kids = kids, cinfo = cinfo) } diff --git a/R/utils.R b/R/utils.R index 0633cbecf..3b9cfc245 100644 --- a/R/utils.R +++ b/R/utils.R @@ -11,8 +11,6 @@ is_rtable <- function(x) { is(x, "VTableTree") } - - # nocov start ## is each object in a collection from a class are <- function(object_collection, class2) { @@ -51,7 +49,6 @@ is_logical_vector_modif <- function(x, min_length = 1) { } # nocov end - # Shorthand for functions that take df as first parameter .takes_df <- function(f) { func_takes(f, "df", is_first = TRUE) @@ -79,11 +76,11 @@ func_takes <- function(func, params, is_first = FALSE) { #' Translate spl_context to Path for display in error messages #' -#' #' @param ctx data.frame. The `spl_context` data.frame where the error occurred #' #' @return A character string containing a description of the row path corresponding #' to the `ctx` +#' #' @export spl_context_to_disp_path <- function(ctx) { ## this can happen in the first split in column space, but @@ -107,3 +104,16 @@ spl_context_to_disp_path <- function(ctx) { paste_vec <- function(vec) { paste0('c("', paste(vec, collapse = '", "'), '")') } + +# Utility for checking if a package is installed +check_required_packages <- function(pkgs) { + for (pkgi in pkgs) { + if (!requireNamespace(pkgi)) { + stop( + "This function requires the ", pkgi, " package. ", + "Please install it if you wish to use it" + ) + } + } +} + diff --git a/man/as_result_df.Rd b/man/as_result_df.Rd deleted file mode 100644 index 54768f5ec..000000000 --- a/man/as_result_df.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tt_export.R -\name{as_result_df} -\alias{as_result_df} -\title{Generate a Result Data Frame} -\usage{ -as_result_df(tt, spec = "v0_experimental", ...) -} -\arguments{ -\item{tt}{\code{VTableTree}. The table.} - -\item{spec}{character(1). The specification to use to -extract the result data frame. See details} - -\item{...}{Passed to spec-specific result data frame conversion function.} -} -\description{ -Generate a Result Data Frame -} -\details{ -Result data frame specifications may differ in the exact information they include and -the form in which they represent it. Specifications whose names end in "_experimental" -are subject to change without notice, but specifications without the "_experimental" -suffix will remain available \emph{including any bugs in their construction} indefinitely. -} -\note{ -This function may eventually be migrated to a separate package, and so should -not be called via \code{::} -} -\examples{ - -lyt <- basic_table() \%>\% - split_cols_by("ARM") \%>\% - split_rows_by("STRATA1") \%>\% - analyze(c("AGE", "BMRKR2")) - -tbl <- build_table(lyt, ex_adsl) -as_result_df(tbl) -} diff --git a/man/data.frame_export.Rd b/man/data.frame_export.Rd new file mode 100644 index 000000000..97d0011aa --- /dev/null +++ b/man/data.frame_export.Rd @@ -0,0 +1,88 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tt_export.R +\name{data.frame_export} +\alias{data.frame_export} +\alias{as_result_df} +\alias{result_df_specs} +\alias{path_enriched_df} +\title{Generate a Result Data Frame} +\usage{ +as_result_df(tt, spec = "v0_experimental", simplify = FALSE, ...) + +result_df_specs() + +path_enriched_df(tt, path_fun = collapse_path, value_fun = collapse_values) +} +\arguments{ +\item{tt}{\code{TableTree} (or related class). A \code{TableTree} object representing a +populated table.} + +\item{spec}{character(1). The specification to use to +extract the result data frame. See details} + +\item{simplify}{logical(1). If \code{TRUE}, the result data frame will have only visible +labels and result columns.} + +\item{...}{Passed to spec-specific result data frame conversion function. Currently it can be +one or more of the following parameters (valid only for \code{v0_experimental} spec for now): +\itemize{ +\item \code{expand_colnames}: when \code{TRUE}, the result data frame will have expanded column names above the usual +output. This is useful when the result data frame is used for further processing. +\item \code{simplify}: when \code{TRUE}, the result data frame will have only visible labels and result columns. +\item \code{as_strings}: when \code{TRUE}, the result data frame will have all values as strings, as they appear +in the final table (it can also be retrieved from \code{matrix_form(tt)$strings}). This is also true for +column counts if \code{expand_colnames = TRUE}. +\item \code{as_viewer}: when \code{TRUE}, the result data frame will have all values as they appear in the final table, +i.e. with the same precision and numbers, but in easy-to-use numeric form. +}} + +\item{path_fun}{function. Function to transform paths into single-string +row/column names.} + +\item{value_fun}{function. Function to transform cell values into cells of +the data.frame. Defaults to \code{collapse_values} which creates strings +where multi-valued cells are collapsed together, separated by \code{|}.} +} +\value{ +\code{result_df_specs()}: returns a named list of result data frame extraction functions by "specification". + +\code{path_enriched_df()}: returns a data frame of \code{tt}'s cell values (processed by +\code{value_fun}, with columns named by the full column paths (processed by +\code{path_fun} and an additional \code{row_path} column with the row paths +(processed by by \code{path_fun}). +} +\description{ +Collection of utilities to exctract \code{data.frame} from \code{TableTree} objects. +} +\details{ +\code{as_result_df()}: Result data frame specifications may differ in the exact information +they include and the form in which they represent it. Specifications whose names end in "_experimental" +are subject to change without notice, but specifications without the "_experimental" +suffix will remain available \emph{including any bugs in their construction} indefinitely. +} +\section{Functions}{ +\itemize{ +\item \code{result_df_specs()}: list of functions that extract result data frames from \code{TableTree}s. + +\item \code{path_enriched_df()}: transform \code{TableTree} object to Path-Enriched \code{data.frame}. + +}} +\examples{ +lyt <- basic_table() \%>\% + split_cols_by("ARM") \%>\% + split_rows_by("STRATA1") \%>\% + analyze(c("AGE", "BMRKR2")) + +tbl <- build_table(lyt, ex_adsl) +as_result_df(tbl) + +result_df_specs() + +lyt <- basic_table() \%>\% + split_cols_by("ARM") \%>\% + analyze(c("AGE", "BMRKR2")) + +tbl <- build_table(lyt, ex_adsl) +path_enriched_df(tbl) + +} diff --git a/man/df_to_tt.Rd b/man/df_to_tt.Rd index 51feb47fe..6911d128c 100644 --- a/man/df_to_tt.Rd +++ b/man/df_to_tt.Rd @@ -18,4 +18,5 @@ Create \code{ElementaryTable} from data.frame } \examples{ df_to_tt(mtcars) + } diff --git a/man/path_enriched_df.Rd b/man/path_enriched_df.Rd deleted file mode 100644 index d83da44cc..000000000 --- a/man/path_enriched_df.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tt_export.R -\name{path_enriched_df} -\alias{path_enriched_df} -\title{Transform \code{TableTree} object to Path-Enriched data.frame} -\usage{ -path_enriched_df(tt, path_fun = collapse_path, value_fun = collapse_values) -} -\arguments{ -\item{tt}{\code{TableTree} (or related class). A \code{TableTree} object representing a -populated table.} - -\item{path_fun}{function. Function to transform paths into single-string -row/column names.} - -\item{value_fun}{function. Function to transform cell values into cells of -the data.frame. Defaults to \code{collapse_values} which creates strings -where multi-valued cells are collapsed together, separated by \code{|}.} -} -\value{ -A data frame of \code{tt}'s cell values (processed by -\code{value_fun}, with columns named by the full column paths (processed by -\code{path_fun} and an additional \code{row_path} column with the row paths -(processed by by \code{path_fun}). -} -\description{ -Transform \code{TableTree} object to Path-Enriched data.frame -} -\examples{ - -lyt <- basic_table() \%>\% - split_cols_by("ARM") \%>\% - analyze(c("AGE", "BMRKR2")) - -tbl <- build_table(lyt, ex_adsl) -path_enriched_df(tbl) -} diff --git a/man/result_df_specs.Rd b/man/result_df_specs.Rd deleted file mode 100644 index 0715abdd3..000000000 --- a/man/result_df_specs.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tt_export.R -\name{result_df_specs} -\alias{result_df_specs} -\title{Result Data Frame Specifications} -\usage{ -result_df_specs() -} -\value{ -a named list of result data frame extraction functions by "specification" -} -\description{ -Result Data Frame Specifications -} -\examples{ -result_df_specs() -} diff --git a/man/tsv_io.Rd b/man/tsv_io.Rd index 50e58ec04..8149d469c 100644 --- a/man/tsv_io.Rd +++ b/man/tsv_io.Rd @@ -33,10 +33,9 @@ re-constituted list values for \code{export_as_tsv}. } \description{ This function creates a flat tabular file of cell values and -corresponding paths via \code{\link{path_enriched_df}}. I then +corresponding paths via \code{\link[=path_enriched_df]{path_enriched_df()}}. I then writes that data.frame out as a \code{tsv} file. -} -\details{ + By default (i.e. when \code{value_func} is not specified, List columns where at least one value has length > 1 are collapsed to character vectors by collapsing the list element with \code{"|"}. @@ -46,3 +45,6 @@ There is currently no round-trip capability for this type of export. You can read values exported this way back in via \code{import_from_tsv} but you will receive only the data.frame version back, NOT a \code{TableTree}. } +\seealso{ +\code{\link[=path_enriched_df]{path_enriched_df()}} for the underlying function that does the work. +} diff --git a/tests/testthat/test-result_data_frame.R b/tests/testthat/test-result_data_frame.R index 7591a64ed..6a40be997 100644 --- a/tests/testthat/test-result_data_frame.R +++ b/tests/testthat/test-result_data_frame.R @@ -1,6 +1,5 @@ context("Result Data Frames") - test_that("Result Data Frame generation works v0", { ## change here (only) when v0 is crystalized (no longer experimental) spec_version <- "v0_experimental" @@ -50,20 +49,67 @@ test_that("Result Data Frame generation works v0", { result_df3 <- as_result_df(tbl3, spec_version) expect_identical(nrow(result_df3), 1L) - + ## test labels when no row splits lyt4 <- basic_table() %>% split_cols_by("ARM") %>% analyze(c("AGE", "SEX")) - - tbl4 <- build_table(lyt4, DM) + + tbl4 <- build_table(lyt4, DM) result_df4 <- as_result_df(tbl4) - + expect_identical( names(result_df4), c( - "avar_name", "row_name", "row_num", "is_group_summary", + "avar_name", "row_name", "row_num", "is_group_summary", "node_class", "A: Drug X", "B: Placebo", "C: Combination" ) ) }) + +test_that("as_result_df works with visual output (as_viewer)", { + lyt <- make_big_lyt() + tbl <- build_table(lyt, rawdat) + + res <- expect_silent(as_result_df(tbl, simplify = TRUE, as_viewer = TRUE)) + expect_equal(res$ARM1.M[[1]], c(116.0, 45.3)) + + res <- expect_silent(as_result_df(tbl, simplify = TRUE, as_viewer = TRUE, as_strings = TRUE)) + expect_equal(res$ARM1.M[[1]], "116 (45.3%)") + + mf <- matrix_form(tbl) + string_tbl <- mf_strings(mf)[-seq_len(mf_nlheader(mf)), ] + string_tbl <- string_tbl[nzchar(string_tbl[, 2]), ] + colnames(string_tbl) <- colnames(res) + expect_equal(res, data.frame(string_tbl)) + + res <- expect_silent(as_result_df(tbl, simplify = TRUE, as_strings = TRUE, expand_colnames = TRUE)) + string_tbl <- mf_strings(mf) + string_tbl <- data.frame(string_tbl[nzchar(string_tbl[, 2]), ]) + colnames(string_tbl) <- colnames(res) + string_tbl$row_name[seq_len(mf_nlheader(mf))] <- res$row_name[seq_len(mf_nlheader(mf))] + expect_equal(res, string_tbl) + + expect_silent(basic_table() %>% build_table(DM) %>% as_result_df()) + + tbl <- basic_table(show_colcounts = TRUE) %>% + analyze("BMRKR1") %>% + build_table(DM) + expect_equal(as_result_df(tbl)$`all obs`, 5.851948, tolerance = 1e-6) + expect_equal( + as_result_df(tbl, as_viewer = TRUE)$`all obs`, + as.numeric(as_result_df(tbl, as_strings = TRUE)$`all obs`) + ) + expect_equal(as_result_df(tbl, expand_colnames = TRUE)$`all obs`[2], "356") + expect_equal(as_result_df(tbl, expand_colnames = TRUE, as_strings = TRUE)$`all obs`[2], "(N=356)") + + + # Test for integer extraction and ranges + lyt <- basic_table() %>% + split_cols_by("ARM") %>% + split_rows_by("STRATA1") %>% + analyze("AGE", afun = function(x) list(a = mean(x), b = range(x))) + + tbl <- build_table(lyt, ex_adsl) + expect_equal(as_result_df(tbl, simplify = TRUE, as_viewer = TRUE)[2, 2][[1]], c(24, 46)) +}) From 2fb0fd6eaab7c97d39902061e128559f5dbcfe48 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Wed, 29 Nov 2023 08:56:12 +0000 Subject: [PATCH 2/2] [skip actions] Bump version to 0.6.5.9018 --- DESCRIPTION | 4 ++-- NEWS.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 919a4fa9a..fd246a932 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: rtables Title: Reporting Tables -Version: 0.6.5.9017 -Date: 2023-11-28 +Version: 0.6.5.9018 +Date: 2023-11-29 Authors@R: c( person("Gabriel", "Becker", , "gabembecker@gmail.com", role = "aut", comment = "Original creator of the package"), diff --git a/NEWS.md b/NEWS.md index 8c4e446a7..78cb853e4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -## rtables 0.6.5.9017 +## rtables 0.6.5.9018 ### New Features * Removed `ref_group` reordering in column splits so not to change the order. * Added `bold` argument to `as_html` to bold specified elements, and `header_sep_line`