From 2b18a77cfdbbb026ec7fd48381bbbff694790f83 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Tue, 28 Nov 2023 11:49:21 +0100 Subject: [PATCH 1/9] better docs --- R/tt_export.R | 372 +++++++++++++++++++-------------------- R/tt_from_df.R | 5 + R/utils.R | 18 +- man/as_result_df.Rd | 39 ---- man/data.frame_export.Rd | 74 ++++++++ man/df_to_tt.Rd | 1 + man/path_enriched_df.Rd | 37 ---- man/result_df_specs.Rd | 17 -- man/tsv_io.Rd | 8 +- 9 files changed, 282 insertions(+), 289 deletions(-) delete mode 100644 man/as_result_df.Rd create mode 100644 man/data.frame_export.Rd delete mode 100644 man/path_enriched_df.Rd delete mode 100644 man/result_df_specs.Rd diff --git a/R/tt_export.R b/R/tt_export.R index 10fde5cd6..849a1c577 100644 --- a/R/tt_export.R +++ b/R/tt_export.R @@ -1,13 +1,11 @@ #' @importFrom tools file_ext NULL -### XXX is the stuff in this file correct or should we be exporting *formatted* values to -### meet the needs of consumers of this? Do we ened to support both? #' Create Enriched flat value table with paths #' -#' +#' @description #' This function creates a flat tabular file of cell values and -#' corresponding paths via \code{\link{path_enriched_df}}. I then +#' corresponding paths via [path_enriched_df()]. I then #' writes that data.frame out as a `tsv` file. #' #' By default (i.e. when \code{value_func} is not specified, @@ -20,25 +18,23 @@ NULL #' #' @inheritParams gen_args #' @param file character(1). The path of the file to written to or read from. -#' @inheritParams path_enriched_df +#' @inheritParams data.frame_export +#' #' @return \code{NULL} silently for \code{export_as_tsv}, a data.frame with #' re-constituted list values for \code{export_as_tsv}. -#' @export +#' +#' @seealso [path_enriched_df()] for the underlying function that does the work. +#' #' @rdname tsv_io #' @importFrom utils write.table read.table - +#' @export export_as_tsv <- function(tt, file = NULL, path_fun = collapse_path, value_fun = collapse_values) { df <- path_enriched_df(tt, path_fun = path_fun, value_fun = value_fun) write.table(df, file, sep = "\t") } - - -.collapse_char <- "|" -.collapse_char_esc <- "\\|" - -##' @export -##' @rdname tsv_io +#' @rdname tsv_io +#' @export import_from_tsv <- function(file) { rawdf <- read.table(file, header = TRUE, sep = "\t") as.data.frame(lapply( @@ -53,52 +49,129 @@ import_from_tsv <- function(file) { )) } -collapse_path <- function(paths) { - if (is.list(paths)) { - return(vapply(paths, collapse_path, "")) - } - paste(paths, collapse = .collapse_char) -} +### Migrated to formatters. -collapse_values <- function(colvals) { - if (!is.list(colvals)) { ## || all(vapply(colvals, length, 1L) == 1)) - return(colvals) - } else if (all(vapply(colvals, length, 1L) == 1)) { - return(unlist(colvals)) - } - vapply(colvals, paste, "", collapse = .collapse_char) -} +#' @importFrom formatters export_as_txt +#' +#' @examples +#' lyt <- basic_table() %>% +#' split_cols_by("ARM") %>% +#' analyze(c("AGE", "BMRKR2", "COUNTRY")) +#' +#' tbl <- build_table(lyt, ex_adsl) +#' +#' cat(export_as_txt(tbl, file = NULL, paginate = TRUE, lpp = 8)) +#' +#' \dontrun{ +#' tf <- tempfile(fileext = ".txt") +#' export_as_txt(tbl, file = tf) +#' system2("cat", tf) +#' } +#' +#' @export +formatters::export_as_txt + +# data.frame output ------------------------------------------------------------ -#' Transform `TableTree` object to Path-Enriched data.frame +#' Generate a Result Data Frame +#' +#' @description +#' Collection of utilities to exctract `data.frame` from `TableTree` objects. #' #' @inheritParams gen_args -#' @param path_fun function. Function to transform paths into single-string -#' row/column names. -#' @param value_fun function. Function to transform cell values into cells of -#' the data.frame. Defaults to \code{collapse_values} which creates strings -#' where multi-valued cells are collapsed together, separated by \code{|}. -#' @export -#' @return A data frame of \code{tt}'s cell values (processed by -#' \code{value_fun}, with columns named by the full column paths (processed by -#' \code{path_fun} and an additional \code{row_path} column with the row paths -#' (processed by by \code{path_fun}). -#' @examples +#' @param spec character(1). The specification to use to +#' extract the result data frame. See details +#' @param ... Passed to spec-specific result data frame conversion function. +#' +#' @details `as_result_df()`: Result data frame specifications may differ in the exact information +#' they include and the form in which they represent it. Specifications whose names end in "_experimental" +#' are subject to change without notice, but specifications without the "_experimental" +#' suffix will remain available \emph{including any bugs in their construction} indefinitely. #' +#' @examples #' lyt <- basic_table() %>% #' split_cols_by("ARM") %>% +#' split_rows_by("STRATA1") %>% #' analyze(c("AGE", "BMRKR2")) #' #' tbl <- build_table(lyt, ex_adsl) -#' path_enriched_df(tbl) -path_enriched_df <- function(tt, path_fun = collapse_path, value_fun = collapse_values) { +#' as_result_df(tbl) +#' +#' @name data.frame_export +#' @export +as_result_df <- function(tt, spec = "v0_experimental", ...) { + result_df_fun <- lookup_result_df_specfun(spec) + result_df_fun(tt, ...) +} + +# Not used in rtables +# .split_colwidths <- function(ptabs, nctot, colwidths) { +# ret <- list() +# i <- 1L +# +# rlw <- colwidths[1] +# colwidths <- colwidths[-1] +# donenc <- 0 +# while (donenc < nctot) { +# curnc <- NCOL(ptabs[[i]]) +# ret[[i]] <- c(rlw, colwidths[seq_len(curnc)]) +# colwidths <- colwidths[-1 * seq_len(curnc)] +# donenc <- donenc + curnc +# i <- i + 1 +# } +# ret +# } + +#' @describeIn data.frame_export list of functions that extract result data frames from \code{TableTree}s. +#' +#' @return `result_df_specs()`: returns a named list of result data frame extraction functions by "specification". +#' +#' @examples +#' result_df_specs() +#' +#' @export +result_df_specs <- function() { + list(v0_experimental = result_df_v0_experimental) +} + +lookup_result_df_specfun <- function(spec) { + if (!(spec %in% names(result_df_specs()))) { + stop( + "unrecognized result data frame specification: ", + spec, + "If that specification is correct you may need to update your version of rtables" + ) + } + result_df_specs()[[spec]] +} + +result_df_v0_experimental <- function(tt) { + raw_cvals <- cell_values(tt) + ## if the table has one row and multiple columns, sometimes the cell values returns a list of the cell values + ## rather than a list of length 1 reprsenting the single row. This is bad but may not be changable + ## at this point. + if (nrow(tt) == 1 && length(raw_cvals) > 1) { + raw_cvals <- list(raw_cvals) + } + cellvals <- as.data.frame(do.call(rbind, raw_cvals)) + row.names(cellvals) <- NULL rdf <- make_row_df(tt) - cdf <- make_col_df(tt) - cvs <- as.data.frame(do.call(rbind, cell_values(tt))) - cvs <- as.data.frame(lapply(cvs, value_fun)) - row.names(cvs) <- NULL - colnames(cvs) <- path_fun(cdf$path) - preppaths <- path_fun(rdf[rdf$node_class != "LabelRow", ]$path) - cbind.data.frame(row_path = preppaths, cvs) + df <- cbind( + rdf[ + rdf$node_class != "LabelRow", + c("name", "label", "abs_rownumber", "path", "reprint_inds", "node_class") + ], + cellvals + ) + maxlen <- max(lengths(df$path)) + metadf <- do.call(rbind.data.frame, lapply( + seq_len(NROW(df)), + function(ii) handle_rdf_row(df[ii, ], maxlen = maxlen) + )) + cbind( + metadf[metadf$node_class != "LabelRow", ], + cellvals + ) } do_label_row <- function(rdfrow, maxlen) { @@ -109,7 +182,6 @@ do_label_row <- function(rdfrow, maxlen) { ) } - make_result_df_md_colnames <- function(maxlen) { spllen <- floor((maxlen - 2) / 2) ret <- character() @@ -119,14 +191,13 @@ make_result_df_md_colnames <- function(maxlen) { ret <- c(ret, c("avar_name", "row_name", "row_num", "is_group_summary", "node_class")) } - do_content_row <- function(rdfrow, maxlen) { pth <- rdfrow$path[[1]] - + contpos <- which(pth == "@content") - + seq_before <- seq_len(contpos - 1) - + c( as.list(pth[seq_before]), replicate(maxlen - contpos, list(NA_character_)), list(tail(pth, 1)), @@ -150,7 +221,6 @@ do_data_row <- function(rdfrow, maxlen) { ) } - handle_rdf_row <- function(rdfrow, maxlen) { nclass <- rdfrow$node_class if (rdfrow$path[[1]][1] == "root") { @@ -158,134 +228,68 @@ handle_rdf_row <- function(rdfrow, maxlen) { maxlen <- maxlen - 1 } ret <- switch(nclass, - LabelRow = do_label_row(rdfrow, maxlen), - ContentRow = do_content_row(rdfrow, maxlen), - DataRow = do_data_row(rdfrow, maxlen), - stop("Unrecognized node type in row dataframe, unable to generate result data frame") + LabelRow = do_label_row(rdfrow, maxlen), + ContentRow = do_content_row(rdfrow, maxlen), + DataRow = do_data_row(rdfrow, maxlen), + stop("Unrecognized node type in row dataframe, unable to generate result data frame") ) setNames(ret, make_result_df_md_colnames(maxlen)) } - -#' Result Data Frame Specifications +#' @describeIn data.frame_export transform `TableTree` object to Path-Enriched `data.frame`. #' -#' @return a named list of result data frame extraction functions by "specification" -#' @export -#' @examples -#' result_df_specs() -result_df_specs <- function() { - list(v0_experimental = result_df_v0_experimental) -} - -lookup_result_df_specfun <- function(spec) { - if (!(spec %in% names(result_df_specs()))) { - stop( - "unrecognized result data frame specification: ", - spec, - "If that specification is correct you may need to update your version of rtables" - ) - } - result_df_specs()[[spec]] -} - -result_df_v0_experimental <- function(tt) { - raw_cvals <- cell_values(tt) - ## if the table has one row and multiple columns, sometimes the cell values returns a list of the cell values - ## rather than a list of length 1 reprsenting the single row. This is bad but may not be changable - ## at this point. - if (nrow(tt) == 1 && length(raw_cvals) > 1) { - raw_cvals <- list(raw_cvals) - } - cellvals <- as.data.frame(do.call(rbind, raw_cvals)) - row.names(cellvals) <- NULL - rdf <- make_row_df(tt) - df <- cbind( - rdf[ - rdf$node_class != "LabelRow", - c("name", "label", "abs_rownumber", "path", "reprint_inds", "node_class") - ], - cellvals - ) - maxlen <- max(lengths(df$path)) - metadf <- do.call(rbind.data.frame, lapply( - seq_len(NROW(df)), - function(ii) handle_rdf_row(df[ii, ], maxlen = maxlen) - )) - cbind( - metadf[metadf$node_class != "LabelRow", ], - cellvals - ) -} - -#' Generate a Result Data Frame -#' -#' @param tt `VTableTree`. The table. -#' @param spec character(1). The specification to use to -#' extract the result data frame. See details -#' @param ... Passed to spec-specific result data frame conversion function. +#' @param path_fun function. Function to transform paths into single-string +#' row/column names. +#' @param value_fun function. Function to transform cell values into cells of +#' the data.frame. Defaults to \code{collapse_values} which creates strings +#' where multi-valued cells are collapsed together, separated by \code{|}. #' -#' @details Result data frame specifications may differ in the exact information they include and -#' the form in which they represent it. Specifications whose names end in "_experimental" -#' are subject to change without notice, but specifications without the "_experimental" -#' suffix will remain available \emph{including any bugs in their construction} indefinitely. +#' @return `path_enriched_df()`: returns a data frame of \code{tt}'s cell values (processed by +#' \code{value_fun}, with columns named by the full column paths (processed by +#' \code{path_fun} and an additional \code{row_path} column with the row paths +#' (processed by by \code{path_fun}). #' -#' @note This function may eventually be migrated to a separate package, and so should -#' not be called via `::` -#' @export #' @examples -#' #' lyt <- basic_table() %>% #' split_cols_by("ARM") %>% -#' split_rows_by("STRATA1") %>% #' analyze(c("AGE", "BMRKR2")) #' #' tbl <- build_table(lyt, ex_adsl) -#' as_result_df(tbl) -as_result_df <- function(tt, spec = "v0_experimental", ...) { - result_df_fun <- lookup_result_df_specfun(spec) - result_df_fun(tt, ...) +#' path_enriched_df(tbl) +#' +#' @export +path_enriched_df <- function(tt, path_fun = collapse_path, value_fun = collapse_values) { + rdf <- make_row_df(tt) + cdf <- make_col_df(tt) + cvs <- as.data.frame(do.call(rbind, cell_values(tt))) + cvs <- as.data.frame(lapply(cvs, value_fun)) + row.names(cvs) <- NULL + colnames(cvs) <- path_fun(cdf$path) + preppaths <- path_fun(rdf[rdf$node_class != "LabelRow", ]$path) + cbind.data.frame(row_path = preppaths, cvs) } -.split_colwidths <- function(ptabs, nctot, colwidths) { - ret <- list() - i <- 1L - - rlw <- colwidths[1] - colwidths <- colwidths[-1] - donenc <- 0 - while (donenc < nctot) { - curnc <- NCOL(ptabs[[i]]) - ret[[i]] <- c(rlw, colwidths[seq_len(curnc)]) - colwidths <- colwidths[-1 * seq_len(curnc)] - donenc <- donenc + curnc - i <- i + 1 +.collapse_char <- "|" +.collapse_char_esc <- "\\|" + +collapse_path <- function(paths) { + if (is.list(paths)) { + return(vapply(paths, collapse_path, "")) } - ret + paste(paths, collapse = .collapse_char) } -### Migrated to formatters. - -#' @importFrom formatters export_as_txt -#' -#' @examples -#' lyt <- basic_table() %>% -#' split_cols_by("ARM") %>% -#' analyze(c("AGE", "BMRKR2", "COUNTRY")) -#' -#' tbl <- build_table(lyt, ex_adsl) -#' -#' cat(export_as_txt(tbl, file = NULL, paginate = TRUE, lpp = 8)) -#' -#' \dontrun{ -#' tf <- tempfile(fileext = ".txt") -#' export_as_txt(tbl, file = tf) -#' system2("cat", tf) -#' } -#' -#' @export -formatters::export_as_txt +collapse_values <- function(colvals) { + if (!is.list(colvals)) { ## || all(vapply(colvals, length, 1L) == 1)) + return(colvals) + } else if (all(vapply(colvals, length, 1L) == 1)) { + return(unlist(colvals)) + } + vapply(colvals, paste, "", collapse = .collapse_char) +} +# pdf output ------------------------------------------------------------------- #' Export as PDF #' #' The PDF output is based on the ASCII output created with `toString` @@ -462,6 +466,22 @@ export_as_pdf <- function(tt, lpp = lpp, cpp = cpp ) } + +# only used in pagination +.tab_to_colpath_set <- function(tt) { + vapply( + collect_leaves(coltree(tt)), + function(y) paste(pos_to_path(tree_pos(y)), collapse = " "), + "" + ) +} +.figure_out_colinds <- function(subtab, fulltab) { + match( + .tab_to_colpath_set(subtab), + .tab_to_colpath_set(fulltab) + ) +} + # Flextable and docx ----------------------------------------------------------- #' Export as word document #' @@ -1074,29 +1094,3 @@ apply_alignments <- function(flx, aligns_df, part) { flx } - -# only used in pagination -.tab_to_colpath_set <- function(tt) { - vapply( - collect_leaves(coltree(tt)), - function(y) paste(pos_to_path(tree_pos(y)), collapse = " "), - "" - ) -} -.figure_out_colinds <- function(subtab, fulltab) { - match( - .tab_to_colpath_set(subtab), - .tab_to_colpath_set(fulltab) - ) -} - -check_required_packages <- function(pkgs) { - for (pkgi in pkgs) { - if (!requireNamespace(pkgi)) { - stop( - "This function requires the ", pkgi, " package. ", - "Please install it if you wish to use it" - ) - } - } -} diff --git a/R/tt_from_df.R b/R/tt_from_df.R index 3e2a0dfa3..111255c7a 100644 --- a/R/tt_from_df.R +++ b/R/tt_from_df.R @@ -1,9 +1,13 @@ #' Create `ElementaryTable` from data.frame +#' #' @param df data.frame. +#' #' @return an \code{ElementaryTable} object with unnested columns corresponding to #' \code{names(df)} and row labels corresponding to \code{row.names(df)} +#' #' @examples #' df_to_tt(mtcars) +#' #' @export df_to_tt <- function(df) { colnms <- colnames(df) @@ -14,5 +18,6 @@ df_to_tt <- function(df) { rni <- if (havern) rnames[i] else "" do.call(rrow, c(list(row.name = rni), unclass(df[i, ]))) }) + ElementaryTable(kids = kids, cinfo = cinfo) } diff --git a/R/utils.R b/R/utils.R index 0633cbecf..3b9cfc245 100644 --- a/R/utils.R +++ b/R/utils.R @@ -11,8 +11,6 @@ is_rtable <- function(x) { is(x, "VTableTree") } - - # nocov start ## is each object in a collection from a class are <- function(object_collection, class2) { @@ -51,7 +49,6 @@ is_logical_vector_modif <- function(x, min_length = 1) { } # nocov end - # Shorthand for functions that take df as first parameter .takes_df <- function(f) { func_takes(f, "df", is_first = TRUE) @@ -79,11 +76,11 @@ func_takes <- function(func, params, is_first = FALSE) { #' Translate spl_context to Path for display in error messages #' -#' #' @param ctx data.frame. The `spl_context` data.frame where the error occurred #' #' @return A character string containing a description of the row path corresponding #' to the `ctx` +#' #' @export spl_context_to_disp_path <- function(ctx) { ## this can happen in the first split in column space, but @@ -107,3 +104,16 @@ spl_context_to_disp_path <- function(ctx) { paste_vec <- function(vec) { paste0('c("', paste(vec, collapse = '", "'), '")') } + +# Utility for checking if a package is installed +check_required_packages <- function(pkgs) { + for (pkgi in pkgs) { + if (!requireNamespace(pkgi)) { + stop( + "This function requires the ", pkgi, " package. ", + "Please install it if you wish to use it" + ) + } + } +} + diff --git a/man/as_result_df.Rd b/man/as_result_df.Rd deleted file mode 100644 index 54768f5ec..000000000 --- a/man/as_result_df.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tt_export.R -\name{as_result_df} -\alias{as_result_df} -\title{Generate a Result Data Frame} -\usage{ -as_result_df(tt, spec = "v0_experimental", ...) -} -\arguments{ -\item{tt}{\code{VTableTree}. The table.} - -\item{spec}{character(1). The specification to use to -extract the result data frame. See details} - -\item{...}{Passed to spec-specific result data frame conversion function.} -} -\description{ -Generate a Result Data Frame -} -\details{ -Result data frame specifications may differ in the exact information they include and -the form in which they represent it. Specifications whose names end in "_experimental" -are subject to change without notice, but specifications without the "_experimental" -suffix will remain available \emph{including any bugs in their construction} indefinitely. -} -\note{ -This function may eventually be migrated to a separate package, and so should -not be called via \code{::} -} -\examples{ - -lyt <- basic_table() \%>\% - split_cols_by("ARM") \%>\% - split_rows_by("STRATA1") \%>\% - analyze(c("AGE", "BMRKR2")) - -tbl <- build_table(lyt, ex_adsl) -as_result_df(tbl) -} diff --git a/man/data.frame_export.Rd b/man/data.frame_export.Rd new file mode 100644 index 000000000..38a19d7a3 --- /dev/null +++ b/man/data.frame_export.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tt_export.R +\name{data.frame_export} +\alias{data.frame_export} +\alias{as_result_df} +\alias{result_df_specs} +\alias{path_enriched_df} +\title{Generate a Result Data Frame} +\usage{ +as_result_df(tt, spec = "v0_experimental", ...) + +result_df_specs() + +path_enriched_df(tt, path_fun = collapse_path, value_fun = collapse_values) +} +\arguments{ +\item{tt}{\code{TableTree} (or related class). A \code{TableTree} object representing a +populated table.} + +\item{spec}{character(1). The specification to use to +extract the result data frame. See details} + +\item{...}{Passed to spec-specific result data frame conversion function.} + +\item{path_fun}{function. Function to transform paths into single-string +row/column names.} + +\item{value_fun}{function. Function to transform cell values into cells of +the data.frame. Defaults to \code{collapse_values} which creates strings +where multi-valued cells are collapsed together, separated by \code{|}.} +} +\value{ +\code{result_df_specs()}: returns a named list of result data frame extraction functions by "specification". + +\code{path_enriched_df()}: returns a data frame of \code{tt}'s cell values (processed by +\code{value_fun}, with columns named by the full column paths (processed by +\code{path_fun} and an additional \code{row_path} column with the row paths +(processed by by \code{path_fun}). +} +\description{ +Collection of utilities to exctract \code{data.frame} from \code{TableTree} objects. +} +\details{ +\code{as_result_df()}: Result data frame specifications may differ in the exact information +they include and the form in which they represent it. Specifications whose names end in "_experimental" +are subject to change without notice, but specifications without the "_experimental" +suffix will remain available \emph{including any bugs in their construction} indefinitely. +} +\section{Functions}{ +\itemize{ +\item \code{result_df_specs()}: list of functions that extract result data frames from \code{TableTree}s. + +\item \code{path_enriched_df()}: transform \code{TableTree} object to Path-Enriched \code{data.frame}. + +}} +\examples{ +lyt <- basic_table() \%>\% + split_cols_by("ARM") \%>\% + split_rows_by("STRATA1") \%>\% + analyze(c("AGE", "BMRKR2")) + +tbl <- build_table(lyt, ex_adsl) +as_result_df(tbl) + +result_df_specs() + +lyt <- basic_table() \%>\% + split_cols_by("ARM") \%>\% + analyze(c("AGE", "BMRKR2")) + +tbl <- build_table(lyt, ex_adsl) +path_enriched_df(tbl) + +} diff --git a/man/df_to_tt.Rd b/man/df_to_tt.Rd index 51feb47fe..6911d128c 100644 --- a/man/df_to_tt.Rd +++ b/man/df_to_tt.Rd @@ -18,4 +18,5 @@ Create \code{ElementaryTable} from data.frame } \examples{ df_to_tt(mtcars) + } diff --git a/man/path_enriched_df.Rd b/man/path_enriched_df.Rd deleted file mode 100644 index d83da44cc..000000000 --- a/man/path_enriched_df.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tt_export.R -\name{path_enriched_df} -\alias{path_enriched_df} -\title{Transform \code{TableTree} object to Path-Enriched data.frame} -\usage{ -path_enriched_df(tt, path_fun = collapse_path, value_fun = collapse_values) -} -\arguments{ -\item{tt}{\code{TableTree} (or related class). A \code{TableTree} object representing a -populated table.} - -\item{path_fun}{function. Function to transform paths into single-string -row/column names.} - -\item{value_fun}{function. Function to transform cell values into cells of -the data.frame. Defaults to \code{collapse_values} which creates strings -where multi-valued cells are collapsed together, separated by \code{|}.} -} -\value{ -A data frame of \code{tt}'s cell values (processed by -\code{value_fun}, with columns named by the full column paths (processed by -\code{path_fun} and an additional \code{row_path} column with the row paths -(processed by by \code{path_fun}). -} -\description{ -Transform \code{TableTree} object to Path-Enriched data.frame -} -\examples{ - -lyt <- basic_table() \%>\% - split_cols_by("ARM") \%>\% - analyze(c("AGE", "BMRKR2")) - -tbl <- build_table(lyt, ex_adsl) -path_enriched_df(tbl) -} diff --git a/man/result_df_specs.Rd b/man/result_df_specs.Rd deleted file mode 100644 index 0715abdd3..000000000 --- a/man/result_df_specs.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tt_export.R -\name{result_df_specs} -\alias{result_df_specs} -\title{Result Data Frame Specifications} -\usage{ -result_df_specs() -} -\value{ -a named list of result data frame extraction functions by "specification" -} -\description{ -Result Data Frame Specifications -} -\examples{ -result_df_specs() -} diff --git a/man/tsv_io.Rd b/man/tsv_io.Rd index 50e58ec04..8149d469c 100644 --- a/man/tsv_io.Rd +++ b/man/tsv_io.Rd @@ -33,10 +33,9 @@ re-constituted list values for \code{export_as_tsv}. } \description{ This function creates a flat tabular file of cell values and -corresponding paths via \code{\link{path_enriched_df}}. I then +corresponding paths via \code{\link[=path_enriched_df]{path_enriched_df()}}. I then writes that data.frame out as a \code{tsv} file. -} -\details{ + By default (i.e. when \code{value_func} is not specified, List columns where at least one value has length > 1 are collapsed to character vectors by collapsing the list element with \code{"|"}. @@ -46,3 +45,6 @@ There is currently no round-trip capability for this type of export. You can read values exported this way back in via \code{import_from_tsv} but you will receive only the data.frame version back, NOT a \code{TableTree}. } +\seealso{ +\code{\link[=path_enriched_df]{path_enriched_df()}} for the underlying function that does the work. +} From 9cf72b8a3f29f7458a4408866ffec231e04e4a82 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Tue, 28 Nov 2023 14:16:21 +0100 Subject: [PATCH 2/9] order and addition of features on top of v0_experimental --- DESCRIPTION | 2 +- R/tt_export.R | 162 ++++++++++++++++++++++-- man/data.frame_export.Rd | 18 ++- tests/testthat/test-result_data_frame.R | 7 +- 4 files changed, 171 insertions(+), 18 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3d5c2ea71..552d670e7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,13 +34,13 @@ Depends: methods, R (>= 2.10) Imports: + checkmate (>= 2.1.0), grid, htmltools (>= 0.5.4), stats Suggests: broom (>= 0.7.10), car (>= 3.0-13), - checkmate (>= 2.1.0), dplyr (>= 1.0.5), flextable (>= 0.8.4), knitr (>= 1.42), diff --git a/R/tt_export.R b/R/tt_export.R index 849a1c577..9b64a0fd9 100644 --- a/R/tt_export.R +++ b/R/tt_export.R @@ -80,8 +80,19 @@ formatters::export_as_txt #' #' @inheritParams gen_args #' @param spec character(1). The specification to use to -#' extract the result data frame. See details -#' @param ... Passed to spec-specific result data frame conversion function. +#' extract the result data frame. See details +#' @param simplify logical(1). If \code{TRUE}, the result data frame will have only visible +#' labels and result columns. +#' @param ... Passed to spec-specific result data frame conversion function. Currently it can be +#' one or more of the following parameters (valid only for `v0_experimental` spec for now): +#' - `expanded_colnames`: when `TRUE`, the result data frame will have expanded column names above the usual +#' output. This is useful when the result data frame is used for further processing. +#' - `simplify`: when `TRUE`, the result data frame will have only visible labels and result columns. +#' - `as_strings`: when `TRUE`, the result data frame will have all values as strings, as they appear +#' in the final table (it can also be retrieved from `matrix_form(tt)$strings`). This is also true for +#' column counts if `expanded_colnames = TRUE`. +#' - `as_viewer`: when `TRUE`, the result data frame will have all values as they appear in the final table, +#' i.e. with the same precision and numbers, but in easy-to-use numeric form. #' #' @details `as_result_df()`: Result data frame specifications may differ in the exact information #' they include and the form in which they represent it. Specifications whose names end in "_experimental" @@ -99,9 +110,28 @@ formatters::export_as_txt #' #' @name data.frame_export #' @export -as_result_df <- function(tt, spec = "v0_experimental", ...) { +as_result_df <- function(tt, spec = "v0_experimental", simplify = FALSE, ...) { + checkmate::assert_class(tt, "VTableTree") + checkmate::assert_string(spec) + checkmate::assert_flag(simplify) + result_df_fun <- lookup_result_df_specfun(spec) - result_df_fun(tt, ...) + out <- result_df_fun(tt, ...) + + if (simplify) { + out <- .simplify_result_df(out) + } + + out +} + +# Function that selects specific outputs from the result data frame +.simplify_result_df <- function(df) { + col_df <- colnames(df) + row_names_col <- which(col_df == "row_name") + result_cols <- seq(which(col_df == "node_class") + 1, length(col_df)) + + df[, c(row_names_col, result_cols)] } # Not used in rtables @@ -122,7 +152,7 @@ as_result_df <- function(tt, spec = "v0_experimental", ...) { # ret # } -#' @describeIn data.frame_export list of functions that extract result data frames from \code{TableTree}s. +#' @describeIn data.frame_export list of functions that extract result data frames from \code{TableTree}s. #' #' @return `result_df_specs()`: returns a named list of result data frame extraction functions by "specification". #' @@ -145,17 +175,47 @@ lookup_result_df_specfun <- function(spec) { result_df_specs()[[spec]] } -result_df_v0_experimental <- function(tt) { +result_df_v0_experimental <- function(tt, + as_viewer = FALSE, + as_strings = FALSE, + expanded_colnames = FALSE) { + checkmate::assert_flag(as_viewer) + checkmate::assert_flag(as_string) + checkmate::assert_flag(expanded_colnames) + raw_cvals <- cell_values(tt) ## if the table has one row and multiple columns, sometimes the cell values returns a list of the cell values - ## rather than a list of length 1 reprsenting the single row. This is bad but may not be changable + ## rather than a list of length 1 representing the single row. This is bad but may not be changeable ## at this point. if (nrow(tt) == 1 && length(raw_cvals) > 1) { raw_cvals <- list(raw_cvals) } + cellvals <- as.data.frame(do.call(rbind, raw_cvals)) row.names(cellvals) <- NULL + + if (as_viewer || as_strings) { + # we keep previous calculations to check the format of the data + mf_tt <- matrix_form(tt) + mf_result_chars <- mf_strings(mf_tt)[-seq_len(mf_nlheader(mf_tt)), -1] + mf_result_chars <- .remove_empty_elements(mf_result_chars) + mf_result_numeric <- .make_numeric_char_mf(mf_result_chars) + if (!setequal(dim(mf_result_numeric), dim(cellvals)) || + !setequal(dim(mf_result_chars), dim(cellvals))) { + stop("The extracted numeric data.frame does not have the same dimension of the", + " cell values extracted with cell_values(). This is a bug. Please report it.") # nocov + } + if (as_strings) { + colnames(mf_result_chars) <- colnames(cellvals) + cellvals <- mf_result_chars + } else { + colnames(mf_result_numeric) <- colnames(cellvals) + cellvals <- mf_result_numeric + } + } + rdf <- make_row_df(tt) + df <- cbind( rdf[ rdf$node_class != "LabelRow", @@ -163,15 +223,76 @@ result_df_v0_experimental <- function(tt) { ], cellvals ) + maxlen <- max(lengths(df$path)) - metadf <- do.call(rbind.data.frame, lapply( - seq_len(NROW(df)), - function(ii) handle_rdf_row(df[ii, ], maxlen = maxlen) - )) - cbind( + metadf <- do.call( + rbind.data.frame, + lapply( + seq_len(NROW(df)), + function(ii) + handle_rdf_row(df[ii, ], maxlen = maxlen) + ) + ) + + ret <- cbind( metadf[metadf$node_class != "LabelRow", ], cellvals ) + + # If we want to expand colnames + if (expanded_colnames) { + col_name_structure <- .get_formatted_colnames(clayout(tt)) + number_of_non_data_cols <- which(colnames(ret) == "node_class") + if (NCOL(ret) - number_of_non_data_cols != NCOL(col_name_structure)) { + stop("When expanding colnames structure, we were not able to find the same", + " number of columns as in the result data frame. This is a bug. Please report it.") # nocov + } + + buffer_rows_for_colnames <- matrix( + rep("", number_of_non_data_cols * NROW(col_name_structure)), + nrow = NROW(col_name_structure) + ) + + header_colnames_matrix <- cbind(buffer_rows_for_colnames, data.frame(col_name_structure)) + colnames(header_colnames_matrix) <- colnames(ret) + + count_row <- NULL + if (disp_ccounts(tt)) { + ccounts <- col_counts(tt) + if (as_strings) { + ccounts <- mf_strings(mf_tt)[mf_nlheader(mf_tt), ] + ccounts <- .remove_empty_elements(ccounts) + } + count_row <- c(rep("", number_of_non_data_cols), ccounts) + header_colnames_matrix <- rbind(header_colnames_matrix, count_row) + } + ret <- rbind(header_colnames_matrix, ret) + } + + ret +} + +.remove_empty_elements <- function(char_df) { + if (is.null(dim(char_df))) { + return(char_df[nzchar(char_df, keepNA = TRUE)]) + } + + apply(char_df, 2, function(col_i) col_i[nzchar(col_i, keepNA = TRUE)]) +} + +# Helper function to make the character matrix numeric +.make_numeric_char_mf <- function(char_df) { + if (is.null(dim(char_df))) { + return(as.numeric(stringi::stri_extract_all(char_df, regex = "\\d+.\\d+"))) + } + + apply(char_df, 2, function(col_i) { + lapply( + stringi::stri_extract_all(col_i, regex = "\\d+.\\d+"), + as.numeric + ) + }) %>% + do.call(cbind, .) } do_label_row <- function(rdfrow, maxlen) { @@ -236,6 +357,19 @@ handle_rdf_row <- function(rdfrow, maxlen) { setNames(ret, make_result_df_md_colnames(maxlen)) } +# Helper recurrent function to get the column names for the result data frame from the VTableTree +.get_formatted_colnames <- function(clyt) { + ret <- names(clyt) + if (is.null(tree_children(clyt))) { + return(ret) + } else { + ret <- rbind(ret, lapply(tree_children(clyt), .get_formatted_colnames) %>% do.call(c, .)) + colnames(ret) <- NULL + rownames(ret) <- NULL + return(ret) + } +} + #' @describeIn data.frame_export transform `TableTree` object to Path-Enriched `data.frame`. #' #' @param path_fun function. Function to transform paths into single-string @@ -727,7 +861,7 @@ tt_to_flextable <- function(tt, tf_wrap = !is.null(cpp), max_width = cpp, total_width = 10) { - check_required_packages(c("flextable", "checkmate")) + check_required_packages("flextable") if (!inherits(tt, "VTableTree")) { stop("Input table is not an rtables' object.") } @@ -932,7 +1066,7 @@ theme_docx_default <- function(tt = NULL, # Option for more complicated stuff bold_manual = NULL, border = flextable::fp_border_default(width = 0.5)) { function(flx) { - check_required_packages(c("flextable", "checkmate")) + check_required_packages("flextable") if (!inherits(flx, "flextable")) { stop(sprintf( "Function `%s` supports only flextable objects.", diff --git a/man/data.frame_export.Rd b/man/data.frame_export.Rd index 38a19d7a3..6cc1b3bd1 100644 --- a/man/data.frame_export.Rd +++ b/man/data.frame_export.Rd @@ -7,7 +7,7 @@ \alias{path_enriched_df} \title{Generate a Result Data Frame} \usage{ -as_result_df(tt, spec = "v0_experimental", ...) +as_result_df(tt, spec = "v0_experimental", simplify = FALSE, ...) result_df_specs() @@ -20,7 +20,21 @@ populated table.} \item{spec}{character(1). The specification to use to extract the result data frame. See details} -\item{...}{Passed to spec-specific result data frame conversion function.} +\item{simplify}{logical(1). If \code{TRUE}, the result data frame will have only visible +labels and result columns.} + +\item{...}{Passed to spec-specific result data frame conversion function. Currently it can be +one or more of the following parameters (valid only for \code{v0_experimental} spec for now): +\itemize{ +\item \code{expanded_colnames}: when \code{TRUE}, the result data frame will have expanded column names above the usual +output. This is useful when the result data frame is used for further processing. +\item \code{simplify}: when \code{TRUE}, the result data frame will have only visible labels and result columns. +\item \code{as_strings}: when \code{TRUE}, the result data frame will have all values as strings, as they appear +in the final table (it can also be retrieved from \code{matrix_form(tt)$strings}). This is also true for +column counts if \code{expanded_colnames = TRUE}. +\item \code{as_viewer}: when \code{TRUE}, the result data frame will have all values as they appear in the final table, +i.e. with the same precision and numbers, but in easy-to-use numeric form. +}} \item{path_fun}{function. Function to transform paths into single-string row/column names.} diff --git a/tests/testthat/test-result_data_frame.R b/tests/testthat/test-result_data_frame.R index 7591a64ed..1c5c7523d 100644 --- a/tests/testthat/test-result_data_frame.R +++ b/tests/testthat/test-result_data_frame.R @@ -1,6 +1,5 @@ context("Result Data Frames") - test_that("Result Data Frame generation works v0", { ## change here (only) when v0 is crystalized (no longer experimental) spec_version <- "v0_experimental" @@ -67,3 +66,9 @@ test_that("Result Data Frame generation works v0", { ) ) }) + +test_that("as_result_df works with visual output (as_viewer)", { + lyt <- make_big_lyt() + tbl <- build_table(lyt, rawdat) + as_result_df(tbl, simplify = TRUE, as_viewer = TRUE) +}) \ No newline at end of file From 2781a77fe9fd75fa023511228f0e03c663ca199d Mon Sep 17 00:00:00 2001 From: Melkiades Date: Tue, 28 Nov 2023 15:06:24 +0100 Subject: [PATCH 3/9] expanded tests --- R/tt_export.R | 25 +++++++++++++-------- tests/testthat/test-result_data_frame.R | 29 ++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/R/tt_export.R b/R/tt_export.R index 9b64a0fd9..1f957840b 100644 --- a/R/tt_export.R +++ b/R/tt_export.R @@ -85,12 +85,12 @@ formatters::export_as_txt #' labels and result columns. #' @param ... Passed to spec-specific result data frame conversion function. Currently it can be #' one or more of the following parameters (valid only for `v0_experimental` spec for now): -#' - `expanded_colnames`: when `TRUE`, the result data frame will have expanded column names above the usual +#' - `expand_colnames`: when `TRUE`, the result data frame will have expanded column names above the usual #' output. This is useful when the result data frame is used for further processing. #' - `simplify`: when `TRUE`, the result data frame will have only visible labels and result columns. #' - `as_strings`: when `TRUE`, the result data frame will have all values as strings, as they appear #' in the final table (it can also be retrieved from `matrix_form(tt)$strings`). This is also true for -#' column counts if `expanded_colnames = TRUE`. +#' column counts if `expand_colnames = TRUE`. #' - `as_viewer`: when `TRUE`, the result data frame will have all values as they appear in the final table, #' i.e. with the same precision and numbers, but in easy-to-use numeric form. #' @@ -115,6 +115,10 @@ as_result_df <- function(tt, spec = "v0_experimental", simplify = FALSE, ...) { checkmate::assert_string(spec) checkmate::assert_flag(simplify) + if (nrow(tt) == 0) { + return(sanitize_table_struct(tt)) + } + result_df_fun <- lookup_result_df_specfun(spec) out <- result_df_fun(tt, ...) @@ -169,7 +173,7 @@ lookup_result_df_specfun <- function(spec) { stop( "unrecognized result data frame specification: ", spec, - "If that specification is correct you may need to update your version of rtables" + "If that specification is correct you may need to update your version of rtables" ) } result_df_specs()[[spec]] @@ -178,10 +182,10 @@ lookup_result_df_specfun <- function(spec) { result_df_v0_experimental <- function(tt, as_viewer = FALSE, as_strings = FALSE, - expanded_colnames = FALSE) { + expand_colnames = FALSE) { checkmate::assert_flag(as_viewer) - checkmate::assert_flag(as_string) - checkmate::assert_flag(expanded_colnames) + checkmate::assert_flag(as_strings) + checkmate::assert_flag(expand_colnames) raw_cvals <- cell_values(tt) ## if the table has one row and multiple columns, sometimes the cell values returns a list of the cell values @@ -240,7 +244,7 @@ result_df_v0_experimental <- function(tt, ) # If we want to expand colnames - if (expanded_colnames) { + if (expand_colnames) { col_name_structure <- .get_formatted_colnames(clayout(tt)) number_of_non_data_cols <- which(colnames(ret) == "node_class") if (NCOL(ret) - number_of_non_data_cols != NCOL(col_name_structure)) { @@ -359,11 +363,14 @@ handle_rdf_row <- function(rdfrow, maxlen) { # Helper recurrent function to get the column names for the result data frame from the VTableTree .get_formatted_colnames <- function(clyt) { - ret <- names(clyt) + ret <- obj_label(clyt) + if (!nzchar(ret)) { + ret <- NULL + } if (is.null(tree_children(clyt))) { return(ret) } else { - ret <- rbind(ret, lapply(tree_children(clyt), .get_formatted_colnames) %>% do.call(c, .)) + ret <- rbind(ret, lapply(tree_children(clyt), .get_formatted_colnames) %>% do.call(cbind, .)) colnames(ret) <- NULL rownames(ret) <- NULL return(ret) diff --git a/tests/testthat/test-result_data_frame.R b/tests/testthat/test-result_data_frame.R index 1c5c7523d..10eacc1d9 100644 --- a/tests/testthat/test-result_data_frame.R +++ b/tests/testthat/test-result_data_frame.R @@ -70,5 +70,32 @@ test_that("Result Data Frame generation works v0", { test_that("as_result_df works with visual output (as_viewer)", { lyt <- make_big_lyt() tbl <- build_table(lyt, rawdat) - as_result_df(tbl, simplify = TRUE, as_viewer = TRUE) + + res <- expect_silent(as_result_df(tbl, simplify = TRUE, as_viewer = TRUE)) + expect_equal(res$ARM1.M[[1]], c(116.0, 45.3)) + + res <- expect_silent(as_result_df(tbl, simplify = TRUE, as_viewer = TRUE, as_strings = TRUE)) + expect_equal(res$ARM1.M[[1]], "116 (45.3%)") + + mf <- matrix_form(tbl) + string_tbl <- mf_strings(mf)[-seq_len(mf_nlheader(mf)),] + string_tbl <- string_tbl[nzchar(string_tbl[, 2]), ] + colnames(string_tbl) <- colnames(res) + expect_equal(res, data.frame(string_tbl)) + + res <- expect_silent(as_result_df(tbl, simplify = TRUE, as_strings = TRUE, expand_colnames = TRUE)) + string_tbl <- mf_strings(mf) + string_tbl <- data.frame(string_tbl[nzchar(string_tbl[, 2]), ]) + colnames(string_tbl) <- colnames(res) + string_tbl$row_name[seq_len(mf_nlheader(mf))] <- res$row_name[seq_len(mf_nlheader(mf))] + expect_equal(res, string_tbl) + + expect_silent(basic_table() %>% build_table(DM) %>% as_result_df()) + tbl <- basic_table() %>% analyze("BMRKR1") %>% build_table(DM) + + expect_equal(as_result_df(tbl)$V1, 5.851948, tolerance = 1e-6) # V1? + + # as_result_df(tbl, as_strings = TRUE) + # as_result_df(tbl, as_viewer = TRUE) + as_result_df(tbl, expand_colnames = TRUE) }) \ No newline at end of file From 62d299e8b4357c322e9d6203ecfc506654c963c2 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Tue, 28 Nov 2023 15:54:30 +0100 Subject: [PATCH 4/9] fixes --- R/tt_export.R | 11 +++++++++-- man/data.frame_export.Rd | 4 ++-- tests/testthat/test-result_data_frame.R | 16 +++++++++------- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/R/tt_export.R b/R/tt_export.R index 1f957840b..92be3fe42 100644 --- a/R/tt_export.R +++ b/R/tt_export.R @@ -194,16 +194,23 @@ result_df_v0_experimental <- function(tt, if (nrow(tt) == 1 && length(raw_cvals) > 1) { raw_cvals <- list(raw_cvals) } - + cellvals <- as.data.frame(do.call(rbind, raw_cvals)) row.names(cellvals) <- NULL + + if (nrow(tt) == 1 && ncol(tt) == 1) { + colnames(cellvals) <- names(raw_cvals) + } if (as_viewer || as_strings) { # we keep previous calculations to check the format of the data mf_tt <- matrix_form(tt) mf_result_chars <- mf_strings(mf_tt)[-seq_len(mf_nlheader(mf_tt)), -1] mf_result_chars <- .remove_empty_elements(mf_result_chars) - mf_result_numeric <- .make_numeric_char_mf(mf_result_chars) + mf_result_numeric <- as.data.frame( + .make_numeric_char_mf(mf_result_chars) + ) + mf_result_chars <- as.data.frame(mf_result_chars) if (!setequal(dim(mf_result_numeric), dim(cellvals)) || !setequal(dim(mf_result_chars), dim(cellvals))) { stop("The extracted numeric data.frame does not have the same dimension of the", diff --git a/man/data.frame_export.Rd b/man/data.frame_export.Rd index 6cc1b3bd1..97d0011aa 100644 --- a/man/data.frame_export.Rd +++ b/man/data.frame_export.Rd @@ -26,12 +26,12 @@ labels and result columns.} \item{...}{Passed to spec-specific result data frame conversion function. Currently it can be one or more of the following parameters (valid only for \code{v0_experimental} spec for now): \itemize{ -\item \code{expanded_colnames}: when \code{TRUE}, the result data frame will have expanded column names above the usual +\item \code{expand_colnames}: when \code{TRUE}, the result data frame will have expanded column names above the usual output. This is useful when the result data frame is used for further processing. \item \code{simplify}: when \code{TRUE}, the result data frame will have only visible labels and result columns. \item \code{as_strings}: when \code{TRUE}, the result data frame will have all values as strings, as they appear in the final table (it can also be retrieved from \code{matrix_form(tt)$strings}). This is also true for -column counts if \code{expanded_colnames = TRUE}. +column counts if \code{expand_colnames = TRUE}. \item \code{as_viewer}: when \code{TRUE}, the result data frame will have all values as they appear in the final table, i.e. with the same precision and numbers, but in easy-to-use numeric form. }} diff --git a/tests/testthat/test-result_data_frame.R b/tests/testthat/test-result_data_frame.R index 10eacc1d9..e678b0c3a 100644 --- a/tests/testthat/test-result_data_frame.R +++ b/tests/testthat/test-result_data_frame.R @@ -91,11 +91,13 @@ test_that("as_result_df works with visual output (as_viewer)", { expect_equal(res, string_tbl) expect_silent(basic_table() %>% build_table(DM) %>% as_result_df()) - tbl <- basic_table() %>% analyze("BMRKR1") %>% build_table(DM) - expect_equal(as_result_df(tbl)$V1, 5.851948, tolerance = 1e-6) # V1? - - # as_result_df(tbl, as_strings = TRUE) - # as_result_df(tbl, as_viewer = TRUE) - as_result_df(tbl, expand_colnames = TRUE) -}) \ No newline at end of file + tbl <- basic_table(show_colcounts = TRUE) %>% analyze("BMRKR1") %>% build_table(DM) + expect_equal(as_result_df(tbl)$`all obs`, 5.851948, tolerance = 1e-6) + expect_equal( + as_result_df(tbl, as_viewer = TRUE)$`all obs`, + as.numeric(as_result_df(tbl, as_strings = TRUE)$`all obs`) + ) + expect_equal(as_result_df(tbl, expand_colnames = TRUE)$`all obs`[2], "356") + expect_equal(as_result_df(tbl, expand_colnames = TRUE, as_strings = TRUE)$`all obs`[2], "(N=356)") +}) From 2be3123a9551c91d7c0750a3b08fd369f49544d6 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Tue, 28 Nov 2023 15:57:50 +0100 Subject: [PATCH 5/9] news --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index a8741a6d1..098d0200e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,8 @@ * Added `header_section_div` setters and getters for layout and table objects along with related `basic_table` parameter. * Added `na_str` argument to `analyze_colvars` to set custom string to print in place of missing values. + * Added flat `data.frame` outputs for `as_result_df()` via flag parameters `as_viewer`, `as_strings`, and + `expand_colnames`. ### Bug Fixes * Fixed a bug that was failing when wrapping and section dividers were used at the same time. From cb0db2efd67ef44659e11e22ed18e181bea7a9cc Mon Sep 17 00:00:00 2001 From: Melkiades Date: Tue, 28 Nov 2023 16:07:08 +0100 Subject: [PATCH 6/9] styling --- R/tt_export.R | 105 +++++++++++++----------- tests/testthat/test-result_data_frame.R | 32 ++++---- 2 files changed, 72 insertions(+), 65 deletions(-) diff --git a/R/tt_export.R b/R/tt_export.R index 92be3fe42..d3add1eec 100644 --- a/R/tt_export.R +++ b/R/tt_export.R @@ -19,12 +19,12 @@ NULL #' @inheritParams gen_args #' @param file character(1). The path of the file to written to or read from. #' @inheritParams data.frame_export -#' +#' #' @return \code{NULL} silently for \code{export_as_tsv}, a data.frame with #' re-constituted list values for \code{export_as_tsv}. -#' +#' #' @seealso [path_enriched_df()] for the underlying function that does the work. -#' +#' #' @rdname tsv_io #' @importFrom utils write.table read.table #' @export @@ -74,9 +74,9 @@ formatters::export_as_txt # data.frame output ------------------------------------------------------------ #' Generate a Result Data Frame -#' +#' #' @description -#' Collection of utilities to exctract `data.frame` from `TableTree` objects. +#' Collection of utilities to exctract `data.frame` from `TableTree` objects. #' #' @inheritParams gen_args #' @param spec character(1). The specification to use to @@ -94,7 +94,7 @@ formatters::export_as_txt #' - `as_viewer`: when `TRUE`, the result data frame will have all values as they appear in the final table, #' i.e. with the same precision and numbers, but in easy-to-use numeric form. #' -#' @details `as_result_df()`: Result data frame specifications may differ in the exact information +#' @details `as_result_df()`: Result data frame specifications may differ in the exact information #' they include and the form in which they represent it. Specifications whose names end in "_experimental" #' are subject to change without notice, but specifications without the "_experimental" #' suffix will remain available \emph{including any bugs in their construction} indefinitely. @@ -107,25 +107,25 @@ formatters::export_as_txt #' #' tbl <- build_table(lyt, ex_adsl) #' as_result_df(tbl) -#' +#' #' @name data.frame_export #' @export as_result_df <- function(tt, spec = "v0_experimental", simplify = FALSE, ...) { checkmate::assert_class(tt, "VTableTree") checkmate::assert_string(spec) checkmate::assert_flag(simplify) - + if (nrow(tt) == 0) { return(sanitize_table_struct(tt)) } - + result_df_fun <- lookup_result_df_specfun(spec) out <- result_df_fun(tt, ...) - + if (simplify) { out <- .simplify_result_df(out) } - + out } @@ -134,7 +134,7 @@ as_result_df <- function(tt, spec = "v0_experimental", simplify = FALSE, ...) { col_df <- colnames(df) row_names_col <- which(col_df == "row_name") result_cols <- seq(which(col_df == "node_class") + 1, length(col_df)) - + df[, c(row_names_col, result_cols)] } @@ -142,7 +142,7 @@ as_result_df <- function(tt, spec = "v0_experimental", simplify = FALSE, ...) { # .split_colwidths <- function(ptabs, nctot, colwidths) { # ret <- list() # i <- 1L -# +# # rlw <- colwidths[1] # colwidths <- colwidths[-1] # donenc <- 0 @@ -156,13 +156,13 @@ as_result_df <- function(tt, spec = "v0_experimental", simplify = FALSE, ...) { # ret # } -#' @describeIn data.frame_export list of functions that extract result data frames from \code{TableTree}s. +#' @describeIn data.frame_export list of functions that extract result data frames from \code{TableTree}s. #' #' @return `result_df_specs()`: returns a named list of result data frame extraction functions by "specification". -#' +#' #' @examples #' result_df_specs() -#' +#' #' @export result_df_specs <- function() { list(v0_experimental = result_df_v0_experimental) @@ -179,14 +179,14 @@ lookup_result_df_specfun <- function(spec) { result_df_specs()[[spec]] } -result_df_v0_experimental <- function(tt, - as_viewer = FALSE, - as_strings = FALSE, +result_df_v0_experimental <- function(tt, + as_viewer = FALSE, + as_strings = FALSE, expand_colnames = FALSE) { checkmate::assert_flag(as_viewer) checkmate::assert_flag(as_strings) checkmate::assert_flag(expand_colnames) - + raw_cvals <- cell_values(tt) ## if the table has one row and multiple columns, sometimes the cell values returns a list of the cell values ## rather than a list of length 1 representing the single row. This is bad but may not be changeable @@ -201,7 +201,7 @@ result_df_v0_experimental <- function(tt, if (nrow(tt) == 1 && ncol(tt) == 1) { colnames(cellvals) <- names(raw_cvals) } - + if (as_viewer || as_strings) { # we keep previous calculations to check the format of the data mf_tt <- matrix_form(tt) @@ -211,10 +211,12 @@ result_df_v0_experimental <- function(tt, .make_numeric_char_mf(mf_result_chars) ) mf_result_chars <- as.data.frame(mf_result_chars) - if (!setequal(dim(mf_result_numeric), dim(cellvals)) || - !setequal(dim(mf_result_chars), dim(cellvals))) { - stop("The extracted numeric data.frame does not have the same dimension of the", - " cell values extracted with cell_values(). This is a bug. Please report it.") # nocov + if (!setequal(dim(mf_result_numeric), dim(cellvals)) || + !setequal(dim(mf_result_chars), dim(cellvals))) { + stop( + "The extracted numeric data.frame does not have the same dimension of the", + " cell values extracted with cell_values(). This is a bug. Please report it." + ) # nocov } if (as_strings) { colnames(mf_result_chars) <- colnames(cellvals) @@ -224,9 +226,9 @@ result_df_v0_experimental <- function(tt, cellvals <- mf_result_numeric } } - + rdf <- make_row_df(tt) - + df <- cbind( rdf[ rdf$node_class != "LabelRow", @@ -234,39 +236,42 @@ result_df_v0_experimental <- function(tt, ], cellvals ) - + maxlen <- max(lengths(df$path)) metadf <- do.call( - rbind.data.frame, + rbind.data.frame, lapply( seq_len(NROW(df)), - function(ii) + function(ii) { handle_rdf_row(df[ii, ], maxlen = maxlen) + } ) ) - + ret <- cbind( metadf[metadf$node_class != "LabelRow", ], cellvals ) - + # If we want to expand colnames if (expand_colnames) { col_name_structure <- .get_formatted_colnames(clayout(tt)) number_of_non_data_cols <- which(colnames(ret) == "node_class") if (NCOL(ret) - number_of_non_data_cols != NCOL(col_name_structure)) { - stop("When expanding colnames structure, we were not able to find the same", - " number of columns as in the result data frame. This is a bug. Please report it.") # nocov + stop( + "When expanding colnames structure, we were not able to find the same", + " number of columns as in the result data frame. This is a bug. Please report it." + ) # nocov } - + buffer_rows_for_colnames <- matrix( - rep("", number_of_non_data_cols * NROW(col_name_structure)), + rep("", number_of_non_data_cols * NROW(col_name_structure)), nrow = NROW(col_name_structure) ) - + header_colnames_matrix <- cbind(buffer_rows_for_colnames, data.frame(col_name_structure)) colnames(header_colnames_matrix) <- colnames(ret) - + count_row <- NULL if (disp_ccounts(tt)) { ccounts <- col_counts(tt) @@ -279,7 +284,7 @@ result_df_v0_experimental <- function(tt, } ret <- rbind(header_colnames_matrix, ret) } - + ret } @@ -287,7 +292,7 @@ result_df_v0_experimental <- function(tt, if (is.null(dim(char_df))) { return(char_df[nzchar(char_df, keepNA = TRUE)]) } - + apply(char_df, 2, function(col_i) col_i[nzchar(col_i, keepNA = TRUE)]) } @@ -296,13 +301,13 @@ result_df_v0_experimental <- function(tt, if (is.null(dim(char_df))) { return(as.numeric(stringi::stri_extract_all(char_df, regex = "\\d+.\\d+"))) } - + apply(char_df, 2, function(col_i) { lapply( stringi::stri_extract_all(col_i, regex = "\\d+.\\d+"), as.numeric ) - }) %>% + }) %>% do.call(cbind, .) } @@ -325,11 +330,11 @@ make_result_df_md_colnames <- function(maxlen) { do_content_row <- function(rdfrow, maxlen) { pth <- rdfrow$path[[1]] - + contpos <- which(pth == "@content") - + seq_before <- seq_len(contpos - 1) - + c( as.list(pth[seq_before]), replicate(maxlen - contpos, list(NA_character_)), list(tail(pth, 1)), @@ -360,10 +365,10 @@ handle_rdf_row <- function(rdfrow, maxlen) { maxlen <- maxlen - 1 } ret <- switch(nclass, - LabelRow = do_label_row(rdfrow, maxlen), - ContentRow = do_content_row(rdfrow, maxlen), - DataRow = do_data_row(rdfrow, maxlen), - stop("Unrecognized node type in row dataframe, unable to generate result data frame") + LabelRow = do_label_row(rdfrow, maxlen), + ContentRow = do_content_row(rdfrow, maxlen), + DataRow = do_data_row(rdfrow, maxlen), + stop("Unrecognized node type in row dataframe, unable to generate result data frame") ) setNames(ret, make_result_df_md_colnames(maxlen)) } @@ -404,7 +409,7 @@ handle_rdf_row <- function(rdfrow, maxlen) { #' #' tbl <- build_table(lyt, ex_adsl) #' path_enriched_df(tbl) -#' +#' #' @export path_enriched_df <- function(tt, path_fun = collapse_path, value_fun = collapse_values) { rdf <- make_row_df(tt) diff --git a/tests/testthat/test-result_data_frame.R b/tests/testthat/test-result_data_frame.R index e678b0c3a..6037c6a55 100644 --- a/tests/testthat/test-result_data_frame.R +++ b/tests/testthat/test-result_data_frame.R @@ -49,19 +49,19 @@ test_that("Result Data Frame generation works v0", { result_df3 <- as_result_df(tbl3, spec_version) expect_identical(nrow(result_df3), 1L) - + ## test labels when no row splits lyt4 <- basic_table() %>% split_cols_by("ARM") %>% analyze(c("AGE", "SEX")) - - tbl4 <- build_table(lyt4, DM) + + tbl4 <- build_table(lyt4, DM) result_df4 <- as_result_df(tbl4) - + expect_identical( names(result_df4), c( - "avar_name", "row_name", "row_num", "is_group_summary", + "avar_name", "row_name", "row_num", "is_group_summary", "node_class", "A: Drug X", "B: Placebo", "C: Combination" ) ) @@ -70,32 +70,34 @@ test_that("Result Data Frame generation works v0", { test_that("as_result_df works with visual output (as_viewer)", { lyt <- make_big_lyt() tbl <- build_table(lyt, rawdat) - + res <- expect_silent(as_result_df(tbl, simplify = TRUE, as_viewer = TRUE)) expect_equal(res$ARM1.M[[1]], c(116.0, 45.3)) - + res <- expect_silent(as_result_df(tbl, simplify = TRUE, as_viewer = TRUE, as_strings = TRUE)) expect_equal(res$ARM1.M[[1]], "116 (45.3%)") - + mf <- matrix_form(tbl) - string_tbl <- mf_strings(mf)[-seq_len(mf_nlheader(mf)),] + string_tbl <- mf_strings(mf)[-seq_len(mf_nlheader(mf)), ] string_tbl <- string_tbl[nzchar(string_tbl[, 2]), ] colnames(string_tbl) <- colnames(res) expect_equal(res, data.frame(string_tbl)) - + res <- expect_silent(as_result_df(tbl, simplify = TRUE, as_strings = TRUE, expand_colnames = TRUE)) string_tbl <- mf_strings(mf) string_tbl <- data.frame(string_tbl[nzchar(string_tbl[, 2]), ]) colnames(string_tbl) <- colnames(res) string_tbl$row_name[seq_len(mf_nlheader(mf))] <- res$row_name[seq_len(mf_nlheader(mf))] expect_equal(res, string_tbl) - + expect_silent(basic_table() %>% build_table(DM) %>% as_result_df()) - - tbl <- basic_table(show_colcounts = TRUE) %>% analyze("BMRKR1") %>% build_table(DM) - expect_equal(as_result_df(tbl)$`all obs`, 5.851948, tolerance = 1e-6) + + tbl <- basic_table(show_colcounts = TRUE) %>% + analyze("BMRKR1") %>% + build_table(DM) + expect_equal(as_result_df(tbl)$`all obs`, 5.851948, tolerance = 1e-6) expect_equal( - as_result_df(tbl, as_viewer = TRUE)$`all obs`, + as_result_df(tbl, as_viewer = TRUE)$`all obs`, as.numeric(as_result_df(tbl, as_strings = TRUE)$`all obs`) ) expect_equal(as_result_df(tbl, expand_colnames = TRUE)$`all obs`[2], "356") From 0fb23c773f06f4d7d554e096f5febfce498b8bc3 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Tue, 28 Nov 2023 16:36:33 +0100 Subject: [PATCH 7/9] adding stringi --- DESCRIPTION | 3 ++- R/tt_export.R | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 177260bae..919a4fa9a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,7 +37,8 @@ Imports: checkmate (>= 2.1.0), grid, htmltools (>= 0.5.4), - stats + stats, + stringi (>= 1.6) Suggests: broom (>= 0.7.10), car (>= 3.0-13), diff --git a/R/tt_export.R b/R/tt_export.R index d3add1eec..6a7d1ec94 100644 --- a/R/tt_export.R +++ b/R/tt_export.R @@ -302,13 +302,14 @@ result_df_v0_experimental <- function(tt, return(as.numeric(stringi::stri_extract_all(char_df, regex = "\\d+.\\d+"))) } - apply(char_df, 2, function(col_i) { + ret <- apply(char_df, 2, function(col_i) { lapply( stringi::stri_extract_all(col_i, regex = "\\d+.\\d+"), as.numeric ) - }) %>% - do.call(cbind, .) + }) + + do.call(cbind, ret) } do_label_row <- function(rdfrow, maxlen) { From 6afbf55ee35279acc58a49173e447896531be459 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Tue, 28 Nov 2023 16:38:26 +0100 Subject: [PATCH 8/9] news --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 290224fba..8c4e446a7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -24,6 +24,8 @@ * Started deprecation cycle for `col_fnotes_here` to be replaced with `col_footnotes`. * Exported `section_div` methods now have a dedicated documentation page that is visible to users. * When tables are exported as `txt`, they preserve the horizontal separator of the table. + * Added imports on `stringi` and `checkmate` as they are fundamental packages for string handling and + argument checking. ## rtables 0.6.5 ### New Features From 9a53fd794ab5628234bd510e4198fcdcb98da508 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Tue, 28 Nov 2023 21:15:16 +0100 Subject: [PATCH 9/9] integer fix --- R/tt_export.R | 4 ++-- tests/testthat/test-result_data_frame.R | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/R/tt_export.R b/R/tt_export.R index 6a7d1ec94..932b8bd6e 100644 --- a/R/tt_export.R +++ b/R/tt_export.R @@ -299,12 +299,12 @@ result_df_v0_experimental <- function(tt, # Helper function to make the character matrix numeric .make_numeric_char_mf <- function(char_df) { if (is.null(dim(char_df))) { - return(as.numeric(stringi::stri_extract_all(char_df, regex = "\\d+.\\d+"))) + return(as.numeric(stringi::stri_extract_all(char_df, regex = "\\d+.\\d+|\\d+"))) } ret <- apply(char_df, 2, function(col_i) { lapply( - stringi::stri_extract_all(col_i, regex = "\\d+.\\d+"), + stringi::stri_extract_all(col_i, regex = "\\d+.\\d+|\\d+"), as.numeric ) }) diff --git a/tests/testthat/test-result_data_frame.R b/tests/testthat/test-result_data_frame.R index 6037c6a55..6a40be997 100644 --- a/tests/testthat/test-result_data_frame.R +++ b/tests/testthat/test-result_data_frame.R @@ -102,4 +102,14 @@ test_that("as_result_df works with visual output (as_viewer)", { ) expect_equal(as_result_df(tbl, expand_colnames = TRUE)$`all obs`[2], "356") expect_equal(as_result_df(tbl, expand_colnames = TRUE, as_strings = TRUE)$`all obs`[2], "(N=356)") + + + # Test for integer extraction and ranges + lyt <- basic_table() %>% + split_cols_by("ARM") %>% + split_rows_by("STRATA1") %>% + analyze("AGE", afun = function(x) list(a = mean(x), b = range(x))) + + tbl <- build_table(lyt, ex_adsl) + expect_equal(as_result_df(tbl, simplify = TRUE, as_viewer = TRUE)[2, 2][[1]], c(24, 46)) })