From 325aed03c61f0b50b4c46c6737b2d379023a4e3f Mon Sep 17 00:00:00 2001 From: waddella Date: Thu, 11 Oct 2018 23:28:41 +0200 Subject: [PATCH 01/21] change rtabulate to have col_N argument instead of col_total argument --- DESCRIPTION | 4 +- NAMESPACE | 1 + NEWS.md | 2 +- R/rtabulate.R | 153 ++++++++++++++++++++++++++---------- man/col_N.Rd | 45 +++++++++++ man/rtabulate.data.frame.Rd | 10 ++- man/rtabulate.factor.Rd | 11 +-- man/rtabulate.logical.Rd | 12 +-- man/rtabulate.numeric.Rd | 15 ++-- 9 files changed, 185 insertions(+), 68 deletions(-) create mode 100644 man/col_N.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 7e4f1436f..6031ca8e6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rtables Title: Reporting Tables -Version: 0.1.0.5 +Version: 0.1.0.6 Authors@R: c( person("Adrian", "Waddell", email = "adrian.waddell@roche.com", role = c("aut", "cre")) ) @@ -18,7 +18,7 @@ Suggests: License: Apache License 2.0 | file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 6.0.1 +RoxygenNote: 6.1.0 VignetteBuilder: knitr URL: https://github.com/roche/rtables, https://roche.github.io/rtables/ BugReports: https://github.com/roche/rtables/issues diff --git a/NAMESPACE b/NAMESPACE index f5aeab82d..76f5ccf9c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,7 @@ export("header<-") export(Viewer) export(as.rtable) export(as_html) +export(col_N) export(compare_rtables) export(format_rcell) export(header) diff --git a/NEWS.md b/NEWS.md index 7d544020a..1a9a28063 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,7 +3,7 @@ * added `sprintf_format` for formatting rcells (thanks to Doug Kelkhoff for the suggestion) * added `"(N=xx)"` and `">999.9"` format labels -* `rtabulate` has now an argument `col_total` and shows by default the numbers of elements associated with a column +* `rtabulate` has now an argument `col_N` and the function `col_N()` ## rtable 0.1.0 diff --git a/R/rtabulate.R b/R/rtabulate.R index 4e4b2c6c9..80245affa 100644 --- a/R/rtabulate.R +++ b/R/rtabulate.R @@ -36,6 +36,7 @@ no_by <- function(name) { structure(name, class = "no_by") } + #' Check if object inherits from the \code{no_by} Class #' #' Functions to test inheritance on \code{no_by} @@ -49,20 +50,63 @@ is.no_by <- function(x) { is(x, "no_by") } -rtabulate_header <- function(col_by, Ntot, N=NULL, format="(N=xx)") { - if (!is.factor(col_by) && !is.no_by(col_by)) stop("col_by is required to be a factor or no_by object") + +#' Query the column total +#' +#' If the \code{col_N} argument was specified in \code{rtabulate} then this +#' function returns the value for each cell data. +#' +#' @param x cell data object of \code{FUN} in \code{\link{rtabulate}} +#' +#' @export +#' +#' @examples +#' +#' x <- 1:3 +#' y <- factor(letters[1:3]) +#' +#' rtabulate(x, col_by = y, function(x) x) +#' +#' rtabulate(x, col_by = y, function(x) x, col_N = table(y)) +#' +#' rtabulate(x, col_by = y, function(x) col_N(x), col_N = c(8, 3, 7)) +#' +#' rtabulate(factor(LETTERS[1:3]), factor(letters[1:3]), function(x) col_N(x), col_N = c(8, 3, 7)) +#' +#' +#' df <- expand.grid(aaa = factor(c("A", "B")), bbb = factor(c("X", "Y", "Z"))) +#' df <- rbind(df, df) +#' df$val <- 1:nrow(df) +#' +#' rtabulate( +#' x = df, +#' row_by_var = "aaa", +#' col_by_var = "bbb", +#' FUN = function(x) { +#' col_N(x) +#' }, +#' col_N = c(8, 3, 7) +#' ) +#' +#' +col_N <- function(x) { + attr(x, "col_N") +} + +rtabulate_header <- function(col_by, col_N, format="(N=xx)") { + + if (!is.null(col_N) && length(col_N) != nlevels(col_by) && !is.numeric(col_N)) { + stop("col_N") + } lvls <- if (is.no_by(col_by)) as.vector(col_by) else levels(col_by) - if (is.null(format)) { + if (is.null(col_N)) { rheader(lvls) } else { - if (is.null(N)) { - N <- if (is.no_by(col_by)) Ntot else table(col_by) - } rheader( rrowl("", lvls), - rrowl("", unname(N), format = format) + rrowl("", unname(col_N), format = format) ) } } @@ -73,14 +117,11 @@ rtabulate_header <- function(col_by, Ntot, N=NULL, format="(N=xx)") { # # see parameter descrition for rtabulate.numeric # -rtabulate_default <- function(x, col_by = no_by("col_1"), FUN = NULL, ..., row_data_arg=FALSE, - format = NULL, row.name = "", indent = 0, col_total = "(N=xx)") { +rtabulate_default <- function(x, col_by = no_by("col_1"), FUN, ..., row_data_arg = FALSE, + format = NULL, row.name = "", indent = 0, col_N = NULL) { - if (is.null(FUN)) stop("FUN is required") - - stop_if_has_na(col_by) - - tbl_header <- rtabulate_header(col_by, length(x), format=col_total) + force(FUN) + check_stop_col_by(col_by) xs <- if (is.no_by(col_by)) { setNames(list(x), col_by) @@ -88,6 +129,10 @@ rtabulate_default <- function(x, col_by = no_by("col_1"), FUN = NULL, ..., row_d if (length(x) != length(col_by)) stop("dimension missmatch x and col_by") split(x, col_by, drop = FALSE) } + + if (!is.null(col_N)) { + xs <- Map(function(xi, N) structure(xi, col_N = N), xs, col_N) + } col_data <- if (row_data_arg) { lapply(xs, FUN, x, ...) @@ -97,16 +142,18 @@ rtabulate_default <- function(x, col_by = no_by("col_1"), FUN = NULL, ..., row_d rr <- rrowl(row.name = row.name, col_data, format = format, indent = indent) + tbl_header <- rtabulate_header(col_by, col_N) + rtable(header = tbl_header, rr) } #' tabulate a numeric vector -#' +#' #' by default the \code{\link[stats]{fivenum}} function is applied to the #' vectors associated to the cells -#' -#' +#' +#' #' @inheritParams rrow #' @param x a vecor #' @param col_by a \code{\link{factor}} of length \code{nrow(x)} that defines @@ -116,25 +163,26 @@ rtabulate_default <- function(x, col_by = no_by("col_1"), FUN = NULL, ..., row_d #' set to \code{TRUE} the a second argument with the row data is passed to #' \code{FUN} #' @param ... arguments passed to \code{FUN} -#' @param row_data_arg call \code{FUN} with the row data as the second argument +#' @param row_data_arg call \code{FUN} with the row data as the second argument #' @param format if \code{FUN} does not return a formatted \code{\link{rcell}} #' then the \code{format} is applied #' @param row.name if \code{NULL} then the \code{FUN} argument is deparsed and #' used as \code{row.name} of the \code{\link{rrow}} -#' @param col_total a format string for displaying the number of elements in the -#' column header. If \code{NULL} then no header row for the column is +#' @param col_N The column total for each column. If specified then +#' \code{\link{col_N}()} can be used on the cell data in order to retrieve the +#' column total. If \code{NULL} then no header row for the column is #' displayed. -#' +#' #' @inherit rtabulate return -#' +#' #' @export -#' -#' @examples +#' +#' @examples #' #' rtabulate(iris$Sepal.Length) -#' +#' #' rtabulate(iris$Sepal.Length, col_by = no_by("Sepal.Length")) -#' +#' #' with(iris, rtabulate(x = Sepal.Length, col_by = Species, row.name = "fivenum")) #' #' SL <- iris$Sepal.Length @@ -145,17 +193,17 @@ rtabulate_default <- function(x, col_by = no_by("col_1"), FUN = NULL, ..., row_d #' rtabulate(SL, Sp, median, row.name = "Median"), #' rtabulate(SL, Sp, range, format = "xx.xx - xx.xx", row.name = "Min - Max") #' ) -#' -#' +#' +#' #' rtabulate.numeric <- function(x, col_by = no_by("col_1"), FUN = mean, ..., row_data_arg = FALSE, format = NULL, row.name = NULL, - indent = 0, col_total = "(N=xx)") { + indent = 0, col_N = NULL) { if (is.null(row.name)) row.name <- paste0(deparse(substitute(FUN))) rtabulate_default(x = x, col_by = col_by, FUN = FUN, ..., row_data_arg = row_data_arg, format = format, row.name = row.name, indent = indent, - col_total = col_total) + col_N = col_N) } #' tabulate a logical vector @@ -182,19 +230,19 @@ rtabulate.numeric <- function(x, col_by = no_by("col_1"), FUN = mean, ..., #' )) #' rtabulate.logical <- function(x, col_by = no_by("col_1"), - FUN = function(x) sum(x) * c(1, 1/length(x)), + FUN = sum, ..., row_data_arg = FALSE, - format = "xx.xx (xx.xx%)", + format = NULL, row.name = "", indent = 0, - col_total = "(N=xx)" + col_N = NULL ) { if (is.null(row.name)) row.name <- paste0(deparse(substitute(FUN))) rtabulate_default(x = x, col_by = col_by, FUN = FUN, ..., row_data_arg = row_data_arg, format = format, row.name = row.name, indent = indent, - col_total = col_total) + col_N = col_N) } #' Tabulate Factors @@ -242,16 +290,15 @@ rtabulate.factor <- function(x, ..., row_col_data_args = FALSE, useNA = c("no", "ifany", "always"), - format = "xx", + format = NULL, indent = 0, - col_total = "(N=xx)") { + col_N = NULL) { - stop_if_has_na(col_by) + force(FUN) + check_stop_col_by(col_by) useNA <- match.arg(useNA) - tbl_header <- rtabulate_header(col_by, length(x), format=col_total) - if (useNA %in% c("ifany", "always")) { if (any("" %in% levels(x))) stop("cannot use useNA='ifany' or 'always' if there any levels called ") @@ -270,6 +317,8 @@ rtabulate.factor <- function(x, row_data_list <- split(x, x, drop = FALSE) + + # cell_data = list(row1 = list(col1, col2, ...), row2 = list(col1, col2, ...), ...) cell_data <- if (is.no_by(col_by)) { lapply(row_data_list, function(row_i) setNames(list(row_i), col_by)) } else { @@ -284,6 +333,10 @@ rtabulate.factor <- function(x, }) } + if (!is.null(col_N)) { + cell_data <- lapply(cell_data, function(row_i) Map(function(xi, N) structure(xi, col_N = N), row_i, col_N)) + } + rrow_data <- if (!row_col_data_args) { lapply(cell_data, function(row_i) lapply(row_i, FUN, ...)) } else { @@ -308,6 +361,8 @@ rtabulate.factor <- function(x, rrows <- Map(function(row, rowname) rrowl(rowname, row, format = format, indent = indent), rrow_data, names(rrow_data)) + tbl_header <- rtabulate_header(col_by, col_N) + rtablel(header = tbl_header, rrows) } @@ -397,15 +452,14 @@ rtabulate.data.frame <- function(x, FUN = nrow, ..., row_col_data_args = FALSE, - format = "xx", + format = NULL, indent = 0, - col_total = "(N=xx)") { + col_N = NULL) { if (!is.no_by(row_by_var) && !is.factor(x[[row_by_var]])) stop("x[[row_by_var]] currently needs to be a factor") if (!is.no_by(col_by_var) && !is.factor(x[[col_by_var]])) stop("x[[col_by_var]] currently needs to be a factor") - tbl_header <- rtabulate_header(if (is.no_by(col_by_var)) col_by_var else x[[col_by_var]], nrow(x), format=col_total) - + row_data <- if (is.no_by(row_by_var)) { setNames(list(x), row_by_var) } else { @@ -424,6 +478,11 @@ rtabulate.data.frame <- function(x, lapply(row_data, function(row_i) split(row_i, row_i[[col_by_var]], drop = FALSE)) } + + if (!is.null(col_N)) { + cell_data <- lapply(cell_data, function(row_i) Map(function(xi, N) structure(xi, col_N = N), row_i, col_N)) + } + rrow_data <- if (!row_col_data_args) { lapply(cell_data, function(row_i) lapply(row_i, FUN, ...)) } else { @@ -443,11 +502,19 @@ rtabulate.data.frame <- function(x, rrowl(row.name = rowname, row_dat, format = format, indent = indent) }, rrow_data, names(rrow_data)) + tbl_header <- rtabulate_header(if (is.no_by(col_by_var)) col_by_var else x[[col_by_var]], col_N) + rtablel(header = tbl_header, rrows) } +check_stop_col_by <- function(col_by) { + if (!is.factor(col_by) && !is.no_by(col_by)) stop("col_by is required to be a factor or no_by object") + if (any(is.na(col_by))) stop("col_by does currently not support any NAs") +} + + stop_if_has_na <- function(x) { if (any(is.na(x))) stop(paste0(deparse(substitute(x)), " does currently not support any NAs")) } diff --git a/man/col_N.Rd b/man/col_N.Rd new file mode 100644 index 000000000..a5e4bb106 --- /dev/null +++ b/man/col_N.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rtabulate.R +\name{col_N} +\alias{col_N} +\title{Query the column total} +\usage{ +col_N(x) +} +\arguments{ +\item{x}{cell data object of \code{FUN} in \code{\link{rtabulate}}} +} +\description{ +If the \code{col_N} argument was specified in \code{rtabulate} then this +function returns the value for each cell data. +} +\examples{ + +x <- 1:3 +y <- factor(letters[1:3]) + +rtabulate(x, col_by = y, function(x) x) + +rtabulate(x, col_by = y, function(x) x, col_N = table(y)) + +rtabulate(x, col_by = y, function(x) col_N(x), col_N = c(8, 3, 7)) + +rtabulate(factor(LETTERS[1:3]), factor(letters[1:3]), function(x) col_N(x), col_N = c(8, 3, 7)) + + +df <- expand.grid(aaa = factor(c("A", "B")), bbb = factor(c("X", "Y", "Z"))) +df <- rbind(df, df) +df$val <- 1:nrow(df) + +rtabulate( + x = df, + row_by_var = "aaa", + col_by_var = "bbb", + FUN = function(x) { + col_N(x) + }, + col_N = c(8, 3, 7) +) + + +} diff --git a/man/rtabulate.data.frame.Rd b/man/rtabulate.data.frame.Rd index 2f4777a1f..31db44709 100644 --- a/man/rtabulate.data.frame.Rd +++ b/man/rtabulate.data.frame.Rd @@ -5,8 +5,9 @@ \title{Split data.frame and apply functions} \usage{ \method{rtabulate}{data.frame}(x, row_by_var = no_by("row_1"), - col_by_var = no_by("col_1"), FUN = nrow, ..., row_col_data_args = FALSE, - format = "xx", indent = 0, col_total = "(N=xx)") + col_by_var = no_by("col_1"), FUN = nrow, ..., + row_col_data_args = FALSE, format = NULL, indent = 0, + col_N = NULL) } \arguments{ \item{x}{a vecor} @@ -31,8 +32,9 @@ then the \code{format} is applied} \item{indent}{non-negative integer where 0 means that the row should not be indented} -\item{col_total}{a format string for displaying the number of elements in the -column header. If \code{NULL} then no header row for the column is +\item{col_N}{The column total for each column. If specified then +\code{\link{col_N}()} can be used on the cell data in order to retrieve the +column total. If \code{NULL} then no header row for the column is displayed.} } \value{ diff --git a/man/rtabulate.factor.Rd b/man/rtabulate.factor.Rd index dec2b0c3f..043940bc3 100644 --- a/man/rtabulate.factor.Rd +++ b/man/rtabulate.factor.Rd @@ -4,9 +4,9 @@ \alias{rtabulate.factor} \title{Tabulate Factors} \usage{ -\method{rtabulate}{factor}(x, col_by = no_by("col_1"), FUN = length, ..., - row_col_data_args = FALSE, useNA = c("no", "ifany", "always"), - format = "xx", indent = 0, col_total = "(N=xx)") +\method{rtabulate}{factor}(x, col_by = no_by("col_1"), FUN = length, + ..., row_col_data_args = FALSE, useNA = c("no", "ifany", "always"), + format = NULL, indent = 0, col_N = NULL) } \arguments{ \item{x}{a vecor} @@ -35,8 +35,9 @@ then the \code{format} is applied} \item{indent}{non-negative integer where 0 means that the row should not be indented} -\item{col_total}{a format string for displaying the number of elements in the -column header. If \code{NULL} then no header row for the column is +\item{col_N}{The column total for each column. If specified then +\code{\link{col_N}()} can be used on the cell data in order to retrieve the +column total. If \code{NULL} then no header row for the column is displayed.} } \value{ diff --git a/man/rtabulate.logical.Rd b/man/rtabulate.logical.Rd index 9f029dda1..bf8916873 100644 --- a/man/rtabulate.logical.Rd +++ b/man/rtabulate.logical.Rd @@ -4,10 +4,9 @@ \alias{rtabulate.logical} \title{tabulate a logical vector} \usage{ -\method{rtabulate}{logical}(x, col_by = no_by("col_1"), FUN = function(x) - sum(x) * c(1, 1/length(x)), ..., row_data_arg = FALSE, - format = "xx.xx (xx.xx\%)", row.name = "", indent = 0, - col_total = "(N=xx)") +\method{rtabulate}{logical}(x, col_by = no_by("col_1"), FUN = sum, ..., + row_data_arg = FALSE, format = NULL, row.name = "", indent = 0, + col_N = NULL) } \arguments{ \item{x}{a vecor} @@ -33,8 +32,9 @@ used as \code{row.name} of the \code{\link{rrow}}} \item{indent}{non-negative integer where 0 means that the row should not be indented} -\item{col_total}{a format string for displaying the number of elements in the -column header. If \code{NULL} then no header row for the column is +\item{col_N}{The column total for each column. If specified then +\code{\link{col_N}()} can be used on the cell data in order to retrieve the +column total. If \code{NULL} then no header row for the column is displayed.} } \value{ diff --git a/man/rtabulate.numeric.Rd b/man/rtabulate.numeric.Rd index 37b691dff..8f183ca5a 100644 --- a/man/rtabulate.numeric.Rd +++ b/man/rtabulate.numeric.Rd @@ -4,9 +4,9 @@ \alias{rtabulate.numeric} \title{tabulate a numeric vector} \usage{ -\method{rtabulate}{numeric}(x, col_by = no_by("col_1"), FUN = mean, ..., - row_data_arg = FALSE, format = NULL, row.name = NULL, indent = 0, - col_total = "(N=xx)") +\method{rtabulate}{numeric}(x, col_by = no_by("col_1"), FUN = mean, + ..., row_data_arg = FALSE, format = NULL, row.name = NULL, + indent = 0, col_N = NULL) } \arguments{ \item{x}{a vecor} @@ -32,8 +32,9 @@ used as \code{row.name} of the \code{\link{rrow}}} \item{indent}{non-negative integer where 0 means that the row should not be indented} -\item{col_total}{a format string for displaying the number of elements in the -column header. If \code{NULL} then no header row for the column is +\item{col_N}{The column total for each column. If specified then +\code{\link{col_N}()} can be used on the cell data in order to retrieve the +column total. If \code{NULL} then no header row for the column is displayed.} } \value{ @@ -46,9 +47,9 @@ vectors associated to the cells \examples{ rtabulate(iris$Sepal.Length) - + rtabulate(iris$Sepal.Length, col_by = no_by("Sepal.Length")) - + with(iris, rtabulate(x = Sepal.Length, col_by = Species, row.name = "fivenum")) SL <- iris$Sepal.Length From 8aacd7aaf37da21d31b0d65c05b75facf15c1cbe Mon Sep 17 00:00:00 2001 From: waddella Date: Sat, 20 Oct 2018 20:23:18 +0200 Subject: [PATCH 02/21] add example --- R/rtabulate.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/rtabulate.R b/R/rtabulate.R index 80245affa..013004872 100644 --- a/R/rtabulate.R +++ b/R/rtabulate.R @@ -284,6 +284,12 @@ rtabulate.logical <- function(x, col_by = no_by("col_1"), #' #' rtabulate(sl5, iris$Species) #' +#' +#' x <- factor(LETTERS[1:4], levels = LETTERS[1:4]) +#' col_by <- factor(c("a", "a", "b", "c"), levels = letters[1:4]) +#' rtabulate(x, col_by, length) +#' +#' rtabulate.factor <- function(x, col_by = no_by("col_1"), FUN = length, From 9fd1627e43991951cb267cd4d75446ece48cfdbe Mon Sep 17 00:00:00 2001 From: waddella Date: Mon, 22 Oct 2018 19:17:52 +0200 Subject: [PATCH 03/21] fix bug in rtabulate.factor where factor(col_by) dropped the levels of col_by --- R/rtabulate.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/rtabulate.R b/R/rtabulate.R index 013004872..fd2432d4d 100644 --- a/R/rtabulate.R +++ b/R/rtabulate.R @@ -285,9 +285,10 @@ rtabulate.logical <- function(x, col_by = no_by("col_1"), #' rtabulate(sl5, iris$Species) #' #' -#' x <- factor(LETTERS[1:4], levels = LETTERS[1:4]) -#' col_by <- factor(c("a", "a", "b", "c"), levels = letters[1:4]) -#' rtabulate(x, col_by, length) +#' +#' rtabulate(x = factor(c("X", "Y"), c("X", "Y")), col_by = factor(c("a", "a"), c("a", "b")), FUN = length) +#' +#' rtabulate(factor(c("Y", "Y"), c("X", "Y")), factor(c("b", "b"), c("a", "b")), length) #' #' rtabulate.factor <- function(x, @@ -305,9 +306,9 @@ rtabulate.factor <- function(x, useNA <- match.arg(useNA) + if (any("" %in% levels(x))) stop("factor with level '' is not valid in rtabulate.factor") + if (useNA %in% c("ifany", "always")) { - if (any("" %in% levels(x))) stop("cannot use useNA='ifany' or 'always' if there any levels called ") - if (useNA == "always" || any(is.na(x))) { levels(x) <- c(levels(x), "") x[is.na(x)] <- "" @@ -329,10 +330,9 @@ rtabulate.factor <- function(x, lapply(row_data_list, function(row_i) setNames(list(row_i), col_by)) } else { if (length(x) != length(col_by)) stop("dimension missmatch x and col_by") - df <- data.frame( x = x, - col_by = factor(col_by) + col_by = col_by ) lapply(split(df, df$x, drop = FALSE), function(row_i) { split(row_i$x, row_i$col_by, drop = FALSE) From 3955f5c91839e69479233dbc3eb29b4cc4b3faee Mon Sep 17 00:00:00 2001 From: waddella Date: Mon, 22 Oct 2018 19:18:31 +0200 Subject: [PATCH 04/21] add unit tests for rtabulate --- tests/testthat/test-rtabulate.R | 40 +++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 tests/testthat/test-rtabulate.R diff --git a/tests/testthat/test-rtabulate.R b/tests/testthat/test-rtabulate.R new file mode 100644 index 000000000..8db900fe4 --- /dev/null +++ b/tests/testthat/test-rtabulate.R @@ -0,0 +1,40 @@ +context("rtabulate") + +test_that("rtabulate length tests", { + + cells <- function(x) as.vector(unlist(x)) # current implementation + + cb <- letters[1:2] + cbf <- factor(cb, levels = cb) + + rt_test <- function(x) { + tbl <- rtabulate(x, cbf, length) + expect_true(all(cells(tbl) == 1), "subset fail") + + tbl <- rtabulate(x[1], cbf[1], length) + expect_identical(cells(tbl), c(1L, 0L), "0 count") + + tbl <- rtabulate(numeric(0), cbf[c(FALSE, FALSE)], length) + expect_identical(cells(tbl), c(0L, 0L), "0 count 2") + } + + + rt_test(seq_along(cb)) # numeric + rt_test(rep(TRUE, length(cb))) # logical + + # factor + + tbl <- rtabulate(cbf, cbf) + expect_identical(cells(tbl), c(1L, 0L, 0L, 1L), "factor") + + tbl <- rtabulate(factor(c("X", "X"), c("X", "Y")), cbf, length) + expect_identical(cells(tbl), c(1L, 1L, 0L, 0L), "factor") + + tbl <- rtabulate(factor(c("X", "Y"), c("X", "Y")), factor(c("a", "a"), c("a", "b")), length) + expect_identical(cells(tbl), c(1L, 0L, 1L, 0L), "factor") + + tbl <- rtabulate(factor(c("Y", "Y"), c("X", "Y")), factor(c("b", "b"), c("a", "b")), length) + expect_identical(cells(tbl), c(0L, 0L, 0L, 2L), "factor") + + +}) From 74ce7965329a395ce46650d00199cc4329c82b2f Mon Sep 17 00:00:00 2001 From: waddella Date: Fri, 26 Oct 2018 17:27:20 +0200 Subject: [PATCH 05/21] refactor rtablulate: * remove row_data_arg, row_col_data_arg, col_N arguments * add col_wise_args * change row_by_var and col_by_var in rtabulate.data.frame to row_by and col_by --- NAMESPACE | 3 +- R/rtabulate.R | 369 ++++++++++++++++-------------------- man/col_N.Rd | 45 ----- man/rtabulate.data.frame.Rd | 85 ++++----- man/rtabulate.factor.Rd | 38 ++-- man/rtabulate.logical.Rd | 23 +-- man/rtabulate.numeric.Rd | 16 +- 7 files changed, 246 insertions(+), 333 deletions(-) delete mode 100644 man/col_N.Rd diff --git a/NAMESPACE b/NAMESPACE index 76f5ccf9c..94d4d5906 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ S3method(as_html,rrow) S3method(as_html,rtable) S3method(dim,rheader) S3method(dim,rtable) +S3method(levels,no_by) S3method(names,rtable) S3method(print,rcell) S3method(print,rrow) @@ -27,10 +28,10 @@ export("header<-") export(Viewer) export(as.rtable) export(as_html) -export(col_N) export(compare_rtables) export(format_rcell) export(header) +export(header_add_N) export(indented_row.names) export(is.no_by) export(list_rcell_format_labels) diff --git a/R/rtabulate.R b/R/rtabulate.R index fd2432d4d..03b455ba4 100644 --- a/R/rtabulate.R +++ b/R/rtabulate.R @@ -50,66 +50,8 @@ is.no_by <- function(x) { is(x, "no_by") } - -#' Query the column total -#' -#' If the \code{col_N} argument was specified in \code{rtabulate} then this -#' function returns the value for each cell data. -#' -#' @param x cell data object of \code{FUN} in \code{\link{rtabulate}} -#' #' @export -#' -#' @examples -#' -#' x <- 1:3 -#' y <- factor(letters[1:3]) -#' -#' rtabulate(x, col_by = y, function(x) x) -#' -#' rtabulate(x, col_by = y, function(x) x, col_N = table(y)) -#' -#' rtabulate(x, col_by = y, function(x) col_N(x), col_N = c(8, 3, 7)) -#' -#' rtabulate(factor(LETTERS[1:3]), factor(letters[1:3]), function(x) col_N(x), col_N = c(8, 3, 7)) -#' -#' -#' df <- expand.grid(aaa = factor(c("A", "B")), bbb = factor(c("X", "Y", "Z"))) -#' df <- rbind(df, df) -#' df$val <- 1:nrow(df) -#' -#' rtabulate( -#' x = df, -#' row_by_var = "aaa", -#' col_by_var = "bbb", -#' FUN = function(x) { -#' col_N(x) -#' }, -#' col_N = c(8, 3, 7) -#' ) -#' -#' -col_N <- function(x) { - attr(x, "col_N") -} - -rtabulate_header <- function(col_by, col_N, format="(N=xx)") { - - if (!is.null(col_N) && length(col_N) != nlevels(col_by) && !is.numeric(col_N)) { - stop("col_N") - } - - lvls <- if (is.no_by(col_by)) as.vector(col_by) else levels(col_by) - - if (is.null(col_N)) { - rheader(lvls) - } else { - rheader( - rrowl("", lvls), - rrowl("", unname(col_N), format = format) - ) - } -} +levels.no_by <- function(x) as.vector(x) # rtabulate default for vectors # @@ -117,37 +59,41 @@ rtabulate_header <- function(col_by, col_N, format="(N=xx)") { # # see parameter descrition for rtabulate.numeric # -rtabulate_default <- function(x, col_by = no_by("col_1"), FUN, ..., row_data_arg = FALSE, - format = NULL, row.name = "", indent = 0, col_N = NULL) { +rtabulate_default <- function(x, col_by = no_by("col_1"), FUN, ..., + format = NULL, row.name = "", indent = 0, + col_wise_args = NULL) { force(FUN) - check_stop_col_by(col_by) + check_stop_col_by(col_by, col_wise_args) - xs <- if (is.no_by(col_by)) { + column_data <- if (is.no_by(col_by)) { setNames(list(x), col_by) } else { if (length(x) != length(col_by)) stop("dimension missmatch x and col_by") split(x, col_by, drop = FALSE) } - if (!is.null(col_N)) { - xs <- Map(function(xi, N) structure(xi, col_N = N), xs, col_N) - } - - col_data <- if (row_data_arg) { - lapply(xs, FUN, x, ...) + cells <- if (is.null(col_wise_args)) { + + lapply(column_data, FUN, ...) + } else { - lapply(xs, FUN, ...) + + dots <- list(...) + args <- lapply(seq_len(nlevels(col_by)), function(i) c(dots, lapply(col_wise_args, `[[`, i))) + + Map(function(xi, argsi) { + do.call(FUN, c(list(xi), argsi)) + }, column_data, args) } - rr <- rrowl(row.name = row.name, col_data, format = format, indent = indent) - - tbl_header <- rtabulate_header(col_by, col_N) - - rtable(header = tbl_header, rr) + rr <- rrowl(row.name = row.name, cells, format = format, indent = indent) + rtable(header = levels(col_by), rr) } + + #' tabulate a numeric vector #' #' by default the \code{\link[stats]{fivenum}} function is applied to the @@ -193,17 +139,28 @@ rtabulate_default <- function(x, col_by = no_by("col_1"), FUN, ..., row_data_arg #' rtabulate(SL, Sp, median, row.name = "Median"), #' rtabulate(SL, Sp, range, format = "xx.xx - xx.xx", row.name = "Min - Max") #' ) -#' -#' +#' +#' +#' x <- 1:100 +#' cb <- factor(rep(LETTERS[1:3], c(20, 30, 50))) +#' +#' rtabulate( +#' x = x, col_by = cb, FUN = function(x, N) list(mean(x), sd(x), N), +#' format = sprintf_format("%.2f (%.2f) and %i"), row.name = "Mean (SD) and N", +#' col_wise_args = list(N = table(cb)) +#' ) #' rtabulate.numeric <- function(x, col_by = no_by("col_1"), FUN = mean, ..., - row_data_arg = FALSE, format = NULL, row.name = NULL, - indent = 0, col_N = NULL) { - if (is.null(row.name)) row.name <- paste0(deparse(substitute(FUN))) - rtabulate_default(x = x, col_by = col_by, FUN = FUN, ..., - row_data_arg = row_data_arg, format = format, - row.name = row.name, indent = indent, - col_N = col_N) + format = NULL, row.name = NULL, + indent = 0, col_wise_args = NULL) { + + if (is.null(row.name)) row.name <- paste(deparse(substitute(FUN)), collapse = ";") + + rtabulate_default( + x = x, col_by = col_by, FUN = FUN, ..., + format = format, row.name = row.name, indent = indent, + col_wise_args = col_wise_args + ) } #' tabulate a logical vector @@ -217,32 +174,39 @@ rtabulate.numeric <- function(x, col_by = no_by("col_1"), FUN = mean, ..., #' @examples #' rtabulate(iris$Species == "setosa") #' -#' rtabulate(iris$Species == "setosa", no_by("Species"), row.name = "n (n/N)") +#' rtabulate(iris$Species == "setosa", no_by("Species"), +#' FUN = function(x, N) list(sum(x), sum(x)/N), +#' row.name = "n (n/N)", +#' col_wise_args = list(N = 150)) #' #' # default: percentages equal \code{TRUE} #' with(iris, rtabulate(Sepal.Length < 5, Species, row.name = "Sepal.Length < 5")) #' #' # precentages with proportion of cell number of \code{TRUE}s to overvall #' # number of \code{TRUE}s -#' with(iris, rtabulate(Sepal.Length < 5, Species, row.name = "Sepal.Length < 5", -#' FUN = function(cell_data, row_data) sum(cell_data) * c(1, 1/sum(row_data)), -#' row_data_arg = TRUE +#' with(iris, rtabulate(Sepal.Length < 5, Species, +#' FUN = function(xi, N) sum(xi) * c(1, 1/N), +#' format = "xx.xx (xx.xx)", +#' row.name = "Sepal.Length < 5", +#' col_wise_args = list(N = table(cb)) #' )) #' rtabulate.logical <- function(x, col_by = no_by("col_1"), FUN = sum, ..., - row_data_arg = FALSE, format = NULL, - row.name = "", + row.name = NULL, indent = 0, - col_N = NULL + col_wise_args = NULL ) { - if (is.null(row.name)) row.name <- paste0(deparse(substitute(FUN))) - rtabulate_default(x = x, col_by = col_by, FUN = FUN, ..., - row_data_arg = row_data_arg, format = format, - row.name = row.name, indent = indent, - col_N = col_N) + + if (is.null(row.name)) row.name <- paste(deparse(substitute(FUN)), collapse = ";") + + rtabulate_default( + x = x, col_by = col_by, FUN = FUN, ..., + format = format, row.name = row.name, indent = indent, + col_wise_args = col_wise_args + ) } #' Tabulate Factors @@ -271,15 +235,15 @@ rtabulate.logical <- function(x, col_by = no_by("col_1"), #' rtabulate(iris$Species, col_by=sl5) #' #' rtabulate(iris$Species, col_by=sl5, -#' FUN = function(cell_data, row_data, col_data) { +#' FUN = function(cell_data, N) { #' if (length(cell_data) > 10) { -#' length(cell_data) * c(1, 1/length(col_data)) +#' length(cell_data) * c(1, 1/N) #' } else { #' rcell("-", format = "xx") #' } #' }, -#' row_col_data_args = TRUE, -#' format = "xx (xx.xx%)" +#' format = "xx (xx.xx%)", +#' col_wise_args = list(N = table(sl5)) #' ) #' #' rtabulate(sl5, iris$Species) @@ -291,18 +255,25 @@ rtabulate.logical <- function(x, col_by = no_by("col_1"), #' rtabulate(factor(c("Y", "Y"), c("X", "Y")), factor(c("b", "b"), c("a", "b")), length) #' #' +#' rtabulate( +#' x = factor(c("Y", "Y"), c("X", "Y")), +#' col_by = factor(c("b", "b"), c("a", "b")), +#' FUN = function(x, N) list(length(x), N), +#' col_wise_args = list(N = c(1,2)) +#' ) +#' +#' rtabulate.factor <- function(x, col_by = no_by("col_1"), FUN = length, ..., - row_col_data_args = FALSE, useNA = c("no", "ifany", "always"), format = NULL, indent = 0, - col_N = NULL) { + col_wise_args = NULL) { force(FUN) - check_stop_col_by(col_by) + check_stop_col_by(col_by, col_wise_args) useNA <- match.arg(useNA) @@ -321,13 +292,11 @@ rtabulate.factor <- function(x, levels(x) <- gsub("^$", "-", levels(x)) warning("'' levels were turned into level -") } - - row_data_list <- split(x, x, drop = FALSE) # cell_data = list(row1 = list(col1, col2, ...), row2 = list(col1, col2, ...), ...) - cell_data <- if (is.no_by(col_by)) { - lapply(row_data_list, function(row_i) setNames(list(row_i), col_by)) + cell_data_by_row <- if (is.no_by(col_by)) { + lapply(split(x, x, drop = FALSE), function(row_i) setNames(list(row_i), col_by)) } else { if (length(x) != length(col_by)) stop("dimension missmatch x and col_by") df <- data.frame( @@ -339,37 +308,27 @@ rtabulate.factor <- function(x, }) } - if (!is.null(col_N)) { - cell_data <- lapply(cell_data, function(row_i) Map(function(xi, N) structure(xi, col_N = N), row_i, col_N)) - } - rrow_data <- if (!row_col_data_args) { - lapply(cell_data, function(row_i) lapply(row_i, FUN, ...)) + cells_by_row <- if (is.null(col_wise_args)) { + + lapply(cell_data_by_row, function(row_i) lapply(row_i, FUN, ...)) + } else { - col_data_list <- if (is.no_by(col_by)) { - setNames(list(x), col_by) - } else { - split(x, col_by, drop = FALSE) - } - - rrow_data_tmp <- lapply(1:length(row_data_list), function(i) { - rrow_data_i <- lapply(1:length(col_data_list), function(j) { - FUN(cell_data[[i]][[j]], row_data_list[[i]], col_data_list[[j]], ...) - }) - names(rrow_data_i) <- names(col_data_list) - rrow_data_i + dots <- list(...) + args <- lapply(seq_len(nlevels(col_by)), function(i) c(dots, lapply(col_wise_args, `[[`, i))) + + lapply(cell_data_by_row, function(row_i) { + Map(function(xi, argsi) { + do.call(FUN, c(list(xi), argsi)) + }, row_i, args) }) - names(rrow_data_tmp) <- names(row_data_list) - rrow_data_tmp } rrows <- Map(function(row, rowname) rrowl(rowname, row, format = format, indent = indent), - rrow_data, names(rrow_data)) + cells_by_row, names(cells_by_row)) - tbl_header <- rtabulate_header(col_by, col_N) - - rtablel(header = tbl_header, rrows) + rtablel(header = levels(col_by), rrows) } @@ -394,8 +353,8 @@ rtabulate.factor <- function(x, #' #' rtabulate( #' x = df, -#' row_by_var = "aaa", -#' col_by_var = "bbb", +#' row_by = df$aaa, +#' col_by = df$bbb, #' FUN = function(x) { #' sum(x$val) #' } @@ -403,121 +362,119 @@ rtabulate.factor <- function(x, #' #' rtabulate( #' x = iris, -#' row_by_var = no_by("sum"), -#' col_by_var = "Species", +#' row_by = no_by("sum"), +#' col_by = iris$Species, #' FUN = function(x) sum(x$Sepal.Length) #' ) #' #' rtabulate( #' x = iris, -#' row_by_var = "Species", -#' col_by_var = no_by("sum"), +#' row_by = iris$Species, +#' col_by = no_by("sum"), #' FUN = function(x) sum(x$Sepal.Length) #' ) #' -#' tbl <- rtabulate( -#' x = iris, -#' FUN = function(cell_data) c(sum(cell_data$Sepal.Length), sd(cell_data$Sepal.Length)), -#' format = "xx.xx (xx.xx%)" -#' ) -#' -#' tbl -#' -#' row.names(tbl) -#' row.names(tbl) <- "Sum of Sepal Length" -#' -#' tbl -#' -#' iris2 <- iris -#' iris2$fsl5 <- factor(iris$Sepal.Length > 5, levels = c(TRUE, FALSE), +#' fsl5 <- factor(iris$Sepal.Length > 5, levels = c(TRUE, FALSE), #' labels = c("S.L > 5", "S.L <= 5")) #' #' tbl <- rtabulate( -#' x = iris2, -#' row_by_var = "fsl5", -#' col_by_var = "Species", -#' FUN = function(x_cell, x_row, x_col) { +#' x = iris, +#' row_by = fsl5, +#' col_by = iris$Species, +#' FUN = function(x_cell) { #' if (nrow(x_cell) < 10) { #' rcell("-") #' } else { #' fit <- lm(Sepal.Length ~ Petal.Width, data = x_cell) -#' m_col <- mean(x_col$Sepal.Length) -#' m_row <- mean(x_row$Sepal.Length) -#' -#' rcell(list(fit, m_col, m_row), format = function(x, output) { -#' paste("df:", x[[1]]$df.residual,", and", round(x[[2]],1), ", and", round(x[[3]],2)) +#' +#' rcell(list(fit), format = function(x, output) { +#' paste("df:", x[[1]]$df.residual) #' }) #' } -#' } , -#' row_col_data_args = TRUE +#' } #' ) #' tbl +#' +#' +#' +#' rtabulate( +#' x = iris, +#' row_by = fsl5, +#' col_by = iris$Species, +#' FUN = function(x_cell, N) { +#' N +#' }, +#' col_wise_args = list(N = c(10, 100, 200)) +#' ) +#' +#' +#' rtabulate.data.frame <- function(x, - row_by_var = no_by("row_1"), - col_by_var = no_by("col_1"), - FUN = nrow, + row_by, + col_by, + FUN, ..., - row_col_data_args = FALSE, format = NULL, indent = 0, - col_N = NULL) { - - if (!is.no_by(row_by_var) && !is.factor(x[[row_by_var]])) stop("x[[row_by_var]] currently needs to be a factor") - if (!is.no_by(col_by_var) && !is.factor(x[[col_by_var]])) stop("x[[col_by_var]] currently needs to be a factor") - - - row_data <- if (is.no_by(row_by_var)) { - setNames(list(x), row_by_var) - } else { - split(x, x[[row_by_var]], drop = FALSE) - } + col_wise_args = NULL) { - col_data <- if (is.no_by(col_by_var)) { - setNames(list(x), col_by_var) - } else { - split(x, x[[col_by_var]], drop = FALSE) - } + force(FUN) + check_stop_col_by(col_by, col_wise_args) + check_stop_col_by(row_by) - cell_data <- if (is.no_by(col_by_var)) { - lapply(row_data, function(row_i) setNames(list(row_i), col_by_var)) + + # list(row1 = list(c1, c2, ...), row2 = list(c1, c2, ...), ...) + cell_data <- if (!is.no_by(row_by) && !is.no_by(col_by)) { + xs <- split(x, row_by, drop = FALSE) + cs <- split(col_by, row_by, drop = FALSE) + setNames(Map(function(xi, col_by_i) split(xi, col_by_i), xs, cs), levels(row_by)) + } else if (is.no_by(row_by) && !is.no_by(col_by)) { + setNames(list(split(x, col_by, drop = FALSE)), row_by) + } else if (!is.no_by(row_by) && is.no_by(col_by)) { + lapply(split(x, row_by, drop = FALSE), function(xi) list(xi)) + } else if (is.no_by(row_by) && is.no_by(col_by)) { + setNames(list(list(x)), row_by) } else { - lapply(row_data, function(row_i) split(row_i, row_i[[col_by_var]], drop = FALSE)) - } - - - if (!is.null(col_N)) { - cell_data <- lapply(cell_data, function(row_i) Map(function(xi, N) structure(xi, col_N = N), row_i, col_N)) + stop("unexpected col_by & row_by combination") } + - rrow_data <- if (!row_col_data_args) { + cells_by_row <- if (is.null(col_wise_args)) { lapply(cell_data, function(row_i) lapply(row_i, FUN, ...)) } else { - rrow_data_tmp <- lapply(1:length(row_data), function(i) { - rrow_data_i <- lapply(1:length(col_data), function(j) { - FUN(cell_data[[i]][[j]], row_data[[i]], col_data[[j]], ...) - }) - names(rrow_data_i) <- names(col_data) - rrow_data_i + dots <- list(...) + args <- lapply(seq_len(nlevels(col_by)), function(i) c(dots, lapply(col_wise_args, `[[`, i))) + + lapply(cell_data, function(row_i) { + Map(function(xi, argsi) { + do.call(FUN, c(list(xi), argsi)) + }, row_i, args) }) - names(rrow_data_tmp) <- names(row_data) - rrow_data_tmp + } - rrows <- Map(function(row_dat, rowname) { - rrowl(row.name = rowname, row_dat, format = format, indent = indent) - }, rrow_data, names(rrow_data)) - - tbl_header <- rtabulate_header(if (is.no_by(col_by_var)) col_by_var else x[[col_by_var]], col_N) + rrows <- Map(function(cells_row, rowname) { + rrowl(row.name = rowname, cells_row, format = format, indent = indent) + }, cells_by_row, names(cells_by_row)) - rtablel(header = tbl_header, rrows) + rtablel(header = levels(col_by), rrows) } -check_stop_col_by <- function(col_by) { +check_stop_col_by <- function(col_by, col_wise_args = NULL) { if (!is.factor(col_by) && !is.no_by(col_by)) stop("col_by is required to be a factor or no_by object") if (any(is.na(col_by))) stop("col_by does currently not support any NAs") + + if (!is.null(col_wise_args)) { + if (!is.list(col_wise_args)) + stop("col_wise_args needs to be either a list or NULL") + + if (!all(vapply(col_wise_args, length, numeric(1)) == nlevels(col_by))) + stop("not all elements in col_wise_args have length ", nlevels(col_by)) + } + TRUE } diff --git a/man/col_N.Rd b/man/col_N.Rd deleted file mode 100644 index a5e4bb106..000000000 --- a/man/col_N.Rd +++ /dev/null @@ -1,45 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rtabulate.R -\name{col_N} -\alias{col_N} -\title{Query the column total} -\usage{ -col_N(x) -} -\arguments{ -\item{x}{cell data object of \code{FUN} in \code{\link{rtabulate}}} -} -\description{ -If the \code{col_N} argument was specified in \code{rtabulate} then this -function returns the value for each cell data. -} -\examples{ - -x <- 1:3 -y <- factor(letters[1:3]) - -rtabulate(x, col_by = y, function(x) x) - -rtabulate(x, col_by = y, function(x) x, col_N = table(y)) - -rtabulate(x, col_by = y, function(x) col_N(x), col_N = c(8, 3, 7)) - -rtabulate(factor(LETTERS[1:3]), factor(letters[1:3]), function(x) col_N(x), col_N = c(8, 3, 7)) - - -df <- expand.grid(aaa = factor(c("A", "B")), bbb = factor(c("X", "Y", "Z"))) -df <- rbind(df, df) -df$val <- 1:nrow(df) - -rtabulate( - x = df, - row_by_var = "aaa", - col_by_var = "bbb", - FUN = function(x) { - col_N(x) - }, - col_N = c(8, 3, 7) -) - - -} diff --git a/man/rtabulate.data.frame.Rd b/man/rtabulate.data.frame.Rd index 31db44709..ea13ffb5c 100644 --- a/man/rtabulate.data.frame.Rd +++ b/man/rtabulate.data.frame.Rd @@ -4,17 +4,15 @@ \alias{rtabulate.data.frame} \title{Split data.frame and apply functions} \usage{ -\method{rtabulate}{data.frame}(x, row_by_var = no_by("row_1"), - col_by_var = no_by("col_1"), FUN = nrow, ..., - row_col_data_args = FALSE, format = NULL, indent = 0, - col_N = NULL) +\method{rtabulate}{data.frame}(x, row_by, col_by, FUN, ..., + format = NULL, indent = 0, col_wise_args = NULL) } \arguments{ \item{x}{a vecor} -\item{row_by_var}{name of factor variable in \code{x}} - -\item{col_by_var}{name of factor variable in \code{x}} +\item{col_by}{a \code{\link{factor}} of length \code{nrow(x)} that defines +which levels in \code{col_by} define a column. If data should not be split +into columns use the \code{\link{no_by}} function.} \item{FUN}{a function that processes the cell data, if \code{row_data_arg} is set to \code{TRUE} the a second argument with the row data is passed to @@ -22,20 +20,15 @@ set to \code{TRUE} the a second argument with the row data is passed to \item{...}{arguments passed to \code{FUN}} -\item{row_col_data_args}{boolean, if \code{TRUE} then \code{FUN} is called -with the first three arguments being the cell, row, and column data, -respectively} - \item{format}{if \code{FUN} does not return a formatted \code{\link{rcell}} then the \code{format} is applied} \item{indent}{non-negative integer where 0 means that the row should not be indented} -\item{col_N}{The column total for each column. If specified then -\code{\link{col_N}()} can be used on the cell data in order to retrieve the -column total. If \code{NULL} then no header row for the column is -displayed.} +\item{row_by_var}{name of factor variable in \code{x}} + +\item{col_by_var}{name of factor variable in \code{x}} } \value{ an \code{\link{rtable}} project @@ -50,8 +43,8 @@ df$val <- 1:nrow(df) rtabulate( x = df, - row_by_var = "aaa", - col_by_var = "bbb", + row_by = df$aaa, + col_by = df$bbb, FUN = function(x) { sum(x$val) } @@ -59,53 +52,51 @@ rtabulate( rtabulate( x = iris, - row_by_var = no_by("sum"), - col_by_var = "Species", + row_by = no_by("sum"), + col_by = iris$Species, FUN = function(x) sum(x$Sepal.Length) ) rtabulate( x = iris, - row_by_var = "Species", - col_by_var = no_by("sum"), + row_by = iris$Species, + col_by = no_by("sum"), FUN = function(x) sum(x$Sepal.Length) ) -tbl <- rtabulate( - x = iris, - FUN = function(cell_data) c(sum(cell_data$Sepal.Length), sd(cell_data$Sepal.Length)), - format = "xx.xx (xx.xx\%)" -) - -tbl - -row.names(tbl) -row.names(tbl) <- "Sum of Sepal Length" - -tbl - -iris2 <- iris -iris2$fsl5 <- factor(iris$Sepal.Length > 5, levels = c(TRUE, FALSE), +fsl5 <- factor(iris$Sepal.Length > 5, levels = c(TRUE, FALSE), labels = c("S.L > 5", "S.L <= 5")) tbl <- rtabulate( - x = iris2, - row_by_var = "fsl5", - col_by_var = "Species", - FUN = function(x_cell, x_row, x_col) { + x = iris, + row_by = fsl5, + col_by = iris$Species, + FUN = function(x_cell) { if (nrow(x_cell) < 10) { rcell("-") } else { fit <- lm(Sepal.Length ~ Petal.Width, data = x_cell) - m_col <- mean(x_col$Sepal.Length) - m_row <- mean(x_row$Sepal.Length) - - rcell(list(fit, m_col, m_row), format = function(x, output) { - paste("df:", x[[1]]$df.residual,", and", round(x[[2]],1), ", and", round(x[[3]],2)) + + rcell(list(fit), format = function(x, output) { + paste("df:", x[[1]]$df.residual) }) } - } , - row_col_data_args = TRUE + } ) tbl + + + +rtabulate( + x = iris, + row_by = fsl5, + col_by = iris$Species, + FUN = function(x_cell, N) { + N + }, + col_wise_args = list(N = c(10, 100, 200)) +) + + + } diff --git a/man/rtabulate.factor.Rd b/man/rtabulate.factor.Rd index 2ead82b83..b1cba69a6 100644 --- a/man/rtabulate.factor.Rd +++ b/man/rtabulate.factor.Rd @@ -5,8 +5,8 @@ \title{Tabulate Factors} \usage{ \method{rtabulate}{factor}(x, col_by = no_by("col_1"), FUN = length, - ..., row_col_data_args = FALSE, useNA = c("no", "ifany", "always"), - format = NULL, indent = 0, col_N = NULL) + ..., useNA = c("no", "ifany", "always"), format = NULL, indent = 0, + col_wise_args = NULL) } \arguments{ \item{x}{a vecor} @@ -21,10 +21,6 @@ set to \code{TRUE} the a second argument with the row data is passed to \item{...}{arguments passed to \code{FUN}} -\item{row_col_data_args}{boolean, if \code{TRUE} then \code{FUN} is called -with the first three arguments being the cell, row, and column data, -respectively} - \item{useNA}{boolean, if \code{TRUE} then \code{NA} values in \code{x} get turned into a factor level \code{"NA"}, if \code{FALSE} then the \code{NA} values in \code{x} get dropped.} @@ -35,10 +31,9 @@ then the \code{format} is applied} \item{indent}{non-negative integer where 0 means that the row should not be indented} -\item{col_N}{The column total for each column. If specified then -\code{\link{col_N}()} can be used on the cell data in order to retrieve the -column total. If \code{NULL} then no header row for the column is -displayed.} +\item{row_col_data_args}{boolean, if \code{TRUE} then \code{FUN} is called +with the first three arguments being the cell, row, and column data, +respectively} } \value{ an \code{\link{rtable}} project @@ -58,23 +53,32 @@ sl5 <- factor(iris$Sepal.Length > 5, levels = c(TRUE, FALSE), rtabulate(iris$Species, col_by=sl5) rtabulate(iris$Species, col_by=sl5, - FUN = function(cell_data, row_data, col_data) { + FUN = function(cell_data, N) { if (length(cell_data) > 10) { - length(cell_data) * c(1, 1/length(col_data)) + length(cell_data) * c(1, 1/N) } else { rcell("-", format = "xx") } }, - row_col_data_args = TRUE, - format = "xx (xx.xx\%)" + format = "xx (xx.xx\%)", + col_wise_args = list(N = table(sl5)) ) rtabulate(sl5, iris$Species) -x <- factor(LETTERS[1:4], levels = LETTERS[1:4]) -col_by <- factor(c("a", "a", "b", "c"), levels = letters[1:4]) -rtabulate(x, col_by, length) + +rtabulate(x = factor(c("X", "Y"), c("X", "Y")), col_by = factor(c("a", "a"), c("a", "b")), FUN = length) + +rtabulate(factor(c("Y", "Y"), c("X", "Y")), factor(c("b", "b"), c("a", "b")), length) + + +rtabulate( + x = factor(c("Y", "Y"), c("X", "Y")), + col_by = factor(c("b", "b"), c("a", "b")), + FUN = function(x, N) list(length(x), N), + col_wise_args = list(N = c(1,2)) +) } diff --git a/man/rtabulate.logical.Rd b/man/rtabulate.logical.Rd index bf8916873..945c50e0d 100644 --- a/man/rtabulate.logical.Rd +++ b/man/rtabulate.logical.Rd @@ -5,8 +5,7 @@ \title{tabulate a logical vector} \usage{ \method{rtabulate}{logical}(x, col_by = no_by("col_1"), FUN = sum, ..., - row_data_arg = FALSE, format = NULL, row.name = "", indent = 0, - col_N = NULL) + format = NULL, row.name = NULL, indent = 0, col_wise_args = NULL) } \arguments{ \item{x}{a vecor} @@ -21,8 +20,6 @@ set to \code{TRUE} the a second argument with the row data is passed to \item{...}{arguments passed to \code{FUN}} -\item{row_data_arg}{call \code{FUN} with the row data as the second argument} - \item{format}{if \code{FUN} does not return a formatted \code{\link{rcell}} then the \code{format} is applied} @@ -31,11 +28,6 @@ used as \code{row.name} of the \code{\link{rrow}}} \item{indent}{non-negative integer where 0 means that the row should not be indented} - -\item{col_N}{The column total for each column. If specified then -\code{\link{col_N}()} can be used on the cell data in order to retrieve the -column total. If \code{NULL} then no header row for the column is -displayed.} } \value{ an \code{\link{rtable}} project @@ -46,16 +38,21 @@ tabulate a logical vector \examples{ rtabulate(iris$Species == "setosa") -rtabulate(iris$Species == "setosa", no_by("Species"), row.name = "n (n/N)") +rtabulate(iris$Species == "setosa", no_by("Species"), + FUN = function(x, N) list(sum(x), sum(x)/N), + row.name = "n (n/N)", + col_wise_args = list(N = 150)) # default: percentages equal \\code{TRUE} with(iris, rtabulate(Sepal.Length < 5, Species, row.name = "Sepal.Length < 5")) # precentages with proportion of cell number of \\code{TRUE}s to overvall # number of \\code{TRUE}s -with(iris, rtabulate(Sepal.Length < 5, Species, row.name = "Sepal.Length < 5", - FUN = function(cell_data, row_data) sum(cell_data) * c(1, 1/sum(row_data)), - row_data_arg = TRUE +with(iris, rtabulate(Sepal.Length < 5, Species, + FUN = function(xi, N) sum(xi) * c(1, 1/N), + format = "xx.xx (xx.xx)", + row.name = "Sepal.Length < 5", + col_wise_args = list(N = table(cb)) )) } diff --git a/man/rtabulate.numeric.Rd b/man/rtabulate.numeric.Rd index 8f183ca5a..d0810153c 100644 --- a/man/rtabulate.numeric.Rd +++ b/man/rtabulate.numeric.Rd @@ -5,8 +5,8 @@ \title{tabulate a numeric vector} \usage{ \method{rtabulate}{numeric}(x, col_by = no_by("col_1"), FUN = mean, - ..., row_data_arg = FALSE, format = NULL, row.name = NULL, - indent = 0, col_N = NULL) + ..., format = NULL, row.name = NULL, indent = 0, + col_wise_args = NULL) } \arguments{ \item{x}{a vecor} @@ -21,8 +21,6 @@ set to \code{TRUE} the a second argument with the row data is passed to \item{...}{arguments passed to \code{FUN}} -\item{row_data_arg}{call \code{FUN} with the row data as the second argument} - \item{format}{if \code{FUN} does not return a formatted \code{\link{rcell}} then the \code{format} is applied} @@ -32,6 +30,8 @@ used as \code{row.name} of the \code{\link{rrow}}} \item{indent}{non-negative integer where 0 means that the row should not be indented} +\item{row_data_arg}{call \code{FUN} with the row data as the second argument} + \item{col_N}{The column total for each column. If specified then \code{\link{col_N}()} can be used on the cell data in order to retrieve the column total. If \code{NULL} then no header row for the column is @@ -62,5 +62,13 @@ rbind( ) +x <- 1:100 +cb <- factor(rep(LETTERS[1:3], c(20, 30, 50))) + +rtabulate( + x = x, col_by = cb, FUN = function(x, N) list(mean(x), sd(x), N), + format = sprintf_format("\%.2f (\%.2f) and \%i"), row.name = "Mean (SD) and N", + col_wise_args = list(N = table(cb)) +) } From bb640e4a5f7b03a537b4a47cf3ebd1dfa13b0f1d Mon Sep 17 00:00:00 2001 From: waddella Date: Fri, 26 Oct 2018 17:27:29 +0200 Subject: [PATCH 06/21] add header_add_N function --- R/header_add_N.R | 26 ++++++++++++++++++++++++++ man/header_add_N.Rd | 24 ++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 R/header_add_N.R create mode 100644 man/header_add_N.Rd diff --git a/R/header_add_N.R b/R/header_add_N.R new file mode 100644 index 000000000..0559e545f --- /dev/null +++ b/R/header_add_N.R @@ -0,0 +1,26 @@ + +#' add N=xx to header +#' @export +#' +#' @examples +#' +#' tbl <- rtable( +#' header = letters[1:3], +#' rrow("X", 1, 2, 3), +#' rrow("Y", 4, 5, 6) +#' ) +#' +#' tbl +#' +#' header_add_N(tbl, 1:3) +#' +header_add_N <- function(x, N) { + is(x, "rtable") || stop("x is expected to be an rtable") + length(N) == ncol(x) || stop("dimension missmatch") + + header(x) <- rheader( + header(x)[[1]], + rrowl("", N, format = "(N=xx)") + ) + x +} diff --git a/man/header_add_N.Rd b/man/header_add_N.Rd new file mode 100644 index 000000000..e211de8dc --- /dev/null +++ b/man/header_add_N.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/header_add_N.R +\name{header_add_N} +\alias{header_add_N} +\title{add N=xx to header} +\usage{ +header_add_N(x, N) +} +\description{ +add N=xx to header +} +\examples{ + +tbl <- rtable( + header = letters[1:3], + rrow("X", 1, 2, 3), + rrow("Y", 4, 5, 6) +) + +tbl + +header_add_N(tbl, 1:3) + +} From 1efb58e0be2fad81f73088bf8639014b37428d30 Mon Sep 17 00:00:00 2001 From: stoilovs Date: Mon, 12 Nov 2018 19:00:35 +0100 Subject: [PATCH 07/21] Update rtabulate and header_add_N documentation. --- R/header_add_N.R | 10 ++++- R/rtabulate.R | 88 +++++++++++++++++++------------------ man/header_add_N.Rd | 11 ++++- man/levels.no_by.Rd | 14 ++++++ man/no_by.Rd | 13 ++++-- man/rtabulate.Rd | 4 +- man/rtabulate.data.frame.Rd | 22 ++++------ man/rtabulate.factor.Rd | 28 +++++------- man/rtabulate.logical.Rd | 24 +++++----- man/rtabulate.numeric.Rd | 23 ++++------ 10 files changed, 131 insertions(+), 106 deletions(-) create mode 100644 man/levels.no_by.Rd diff --git a/R/header_add_N.R b/R/header_add_N.R index 0559e545f..049da7ad4 100644 --- a/R/header_add_N.R +++ b/R/header_add_N.R @@ -1,5 +1,13 @@ -#' add N=xx to header +#' Add N=xx to header +#' +#' Helper function used to add the population total (N) in the +#' column header of \code{\link{rtable}} object. +#' +#' @param x \code{rtable} +#' @param N vector with counts to be displayed in the header. The +#' length must match the number of columns in \code{x} +#' #' @export #' #' @examples diff --git a/R/rtabulate.R b/R/rtabulate.R index 03b455ba4..dab0a5c29 100644 --- a/R/rtabulate.R +++ b/R/rtabulate.R @@ -5,13 +5,13 @@ #' \code{\link{rtable}}s. Conceptually the \code{rtabulate} has it's origin in #' \code{\link{tapply}}. #' -#' The data is split into cell-data and a function can be specified that return +#' The data is split into cell-data and a function can be specified that returns #' a data structre (or \code{\link{rcell}}). #' #' @param x either a vector or \code{data.frame} #' @param ... arguments passed to methods #' -#' @return an \code{\link{rtable}} project +#' @return an \code{\link{rtable}} object #' #' @author Adrian Waddell #' @@ -22,16 +22,21 @@ rtabulate <- function(x, ...) { } -#' Do not split data into columns or row in \code{rtabulate} +#' Do not split data into columns in \code{rtabulate} #' -#' \code{\link{rtabulate}} has the arguments \code{col_by} and \code{row_by} +#' \code{\link{rtabulate}} has the argument \code{col_by} #' which can either take a vector or if no splitting is needed the return value -#' of \code{no_by}. +#' of \code{no_by}. Using \code{no_by} creates a table with a single +#' column. #' -#' @param name row name or column name +#' @param name character column name to display in the table header #' #' @export #' +#' @examples +#' +#' rtabulate(iris$Species, col_by = no_by("Total")) + no_by <- function(name) { structure(name, class = "no_by") } @@ -50,12 +55,19 @@ is.no_by <- function(x) { is(x, "no_by") } +#' Access levels attribute for an object of \code{no_by} Class +#' +#' @param x \code{no_by} class object +#' #' @export -levels.no_by <- function(x) as.vector(x) +#' +levels.no_by <- function(x) { + as.vector(x) +} # rtabulate default for vectors # -# this method is used for vectors of type \code{logical} and \code{numeric} +# This method is used for vectors of type \code{logical} and \code{numeric} # # see parameter descrition for rtabulate.numeric # @@ -94,10 +106,9 @@ rtabulate_default <- function(x, col_by = no_by("col_1"), FUN, ..., -#' tabulate a numeric vector +#' Tabulate a numeric vector #' -#' by default the \code{\link[stats]{fivenum}} function is applied to the -#' vectors associated to the cells +#' By default each cell reports the mean based on the associated vector. #' #' #' @inheritParams rrow @@ -105,19 +116,15 @@ rtabulate_default <- function(x, col_by = no_by("col_1"), FUN, ..., #' @param col_by a \code{\link{factor}} of length \code{nrow(x)} that defines #' which levels in \code{col_by} define a column. If data should not be split #' into columns use the \code{\link{no_by}} function. -#' @param FUN a function that processes the cell data, if \code{row_data_arg} is -#' set to \code{TRUE} the a second argument with the row data is passed to -#' \code{FUN} +#' @param FUN a function that processes the cell data #' @param ... arguments passed to \code{FUN} -#' @param row_data_arg call \code{FUN} with the row data as the second argument #' @param format if \code{FUN} does not return a formatted \code{\link{rcell}} #' then the \code{format} is applied #' @param row.name if \code{NULL} then the \code{FUN} argument is deparsed and #' used as \code{row.name} of the \code{\link{rrow}} -#' @param col_N The column total for each column. If specified then -#' \code{\link{col_N}()} can be used on the cell data in order to retrieve the -#' column total. If \code{NULL} then no header row for the column is -#' displayed. +#' @param col_wise_args a list containing vectors with data for each column that +#' is passed to \code{FUN}. The length and order of each vector must match the +#' levels in \code{col_by}. See examples. #' #' @inherit rtabulate return #' @@ -129,7 +136,7 @@ rtabulate_default <- function(x, col_by = no_by("col_1"), FUN, ..., #' #' rtabulate(iris$Sepal.Length, col_by = no_by("Sepal.Length")) #' -#' with(iris, rtabulate(x = Sepal.Length, col_by = Species, row.name = "fivenum")) +#' with(iris, rtabulate(x = Sepal.Length, col_by = Species, row.name = "mean")) #' #' SL <- iris$Sepal.Length #' Sp <- iris$Species @@ -140,7 +147,6 @@ rtabulate_default <- function(x, col_by = no_by("col_1"), FUN, ..., #' rtabulate(SL, Sp, range, format = "xx.xx - xx.xx", row.name = "Min - Max") #' ) #' -#' #' x <- 1:100 #' cb <- factor(rep(LETTERS[1:3], c(20, 30, 50))) #' @@ -163,7 +169,9 @@ rtabulate.numeric <- function(x, col_by = no_by("col_1"), FUN = mean, ..., ) } -#' tabulate a logical vector +#' Tabulate a logical vector +#' +#' By default each cell reports the number of \code{TRUE} observations from the associated vector. #' #' @inheritParams rtabulate.numeric #' @@ -179,16 +187,16 @@ rtabulate.numeric <- function(x, col_by = no_by("col_1"), FUN = mean, ..., #' row.name = "n (n/N)", #' col_wise_args = list(N = 150)) #' -#' # default: percentages equal \code{TRUE} +#' # default FUN is number of observations equal to TRUE #' with(iris, rtabulate(Sepal.Length < 5, Species, row.name = "Sepal.Length < 5")) #' -#' # precentages with proportion of cell number of \code{TRUE}s to overvall -#' # number of \code{TRUE}s +#' # Custom FUN: number of TRUE records in a cell and precentages based on number of records +#' # in each column #' with(iris, rtabulate(Sepal.Length < 5, Species, #' FUN = function(xi, N) sum(xi) * c(1, 1/N), -#' format = "xx.xx (xx.xx)", +#' format = "xx.xx (xx.xx%)", #' row.name = "Sepal.Length < 5", -#' col_wise_args = list(N = table(cb)) +#' col_wise_args = list(N = table(Species)) #' )) #' rtabulate.logical <- function(x, col_by = no_by("col_1"), @@ -211,13 +219,14 @@ rtabulate.logical <- function(x, col_by = no_by("col_1"), #' Tabulate Factors #' +#' By default each cell reports the number of observations in +#' each level of \code{x}. +#' #' @inheritParams rtabulate.numeric -#' @param row_col_data_args boolean, if \code{TRUE} then \code{FUN} is called -#' with the first three arguments being the cell, row, and column data, -#' respectively -#' @param useNA boolean, if \code{TRUE} then \code{NA} values in \code{x} get -#' turned into a factor level \code{"NA"}, if \code{FALSE} then the \code{NA} -#' values in \code{x} get dropped. +#' @param useNA either one of ("no", "ifany", "always"). If \code{"no"} then \code{NA} values +#' in \code{x} get dropped. When \code{"ifany"} is used a row for \code{NA} values is +#' included in the summary if any \code{NA}s exist in \code{x}. For option \code{"always"} +#' \code{NA} values are always included in the summary even if none exist in \code{x}. #' #' @inherit rtabulate return #' @@ -233,6 +242,7 @@ rtabulate.logical <- function(x, col_by = no_by("col_1"), #' labels = c("S.L > 5", "S.L <= 5")) #' #' rtabulate(iris$Species, col_by=sl5) +#' rtabulate(sl5, iris$Species) #' #' rtabulate(iris$Species, col_by=sl5, #' FUN = function(cell_data, N) { @@ -246,15 +256,10 @@ rtabulate.logical <- function(x, col_by = no_by("col_1"), #' col_wise_args = list(N = table(sl5)) #' ) #' -#' rtabulate(sl5, iris$Species) -#' -#' -#' #' rtabulate(x = factor(c("X", "Y"), c("X", "Y")), col_by = factor(c("a", "a"), c("a", "b")), FUN = length) #' #' rtabulate(factor(c("Y", "Y"), c("X", "Y")), factor(c("b", "b"), c("a", "b")), length) #' -#' #' rtabulate( #' x = factor(c("Y", "Y"), c("X", "Y")), #' col_by = factor(c("b", "b"), c("a", "b")), @@ -337,8 +342,9 @@ rtabulate.factor <- function(x, #' Split data.frame and apply functions #' #' @inheritParams rtabulate.factor -#' @param row_by_var name of factor variable in \code{x} -#' @param col_by_var name of factor variable in \code{x} +#' @param x data.frame +#' @param row_by name of factor variable in \code{x} +#' @param col_by name of factor variable in \code{x} #' #' #' @inherit rtabulate return @@ -395,8 +401,6 @@ rtabulate.factor <- function(x, #' ) #' tbl #' -#' -#' #' rtabulate( #' x = iris, #' row_by = fsl5, diff --git a/man/header_add_N.Rd b/man/header_add_N.Rd index e211de8dc..36134f57b 100644 --- a/man/header_add_N.Rd +++ b/man/header_add_N.Rd @@ -2,12 +2,19 @@ % Please edit documentation in R/header_add_N.R \name{header_add_N} \alias{header_add_N} -\title{add N=xx to header} +\title{Add N=xx to header} \usage{ header_add_N(x, N) } +\arguments{ +\item{x}{\code{rtable}} + +\item{N}{vector with counts to be displayed in the header. The +length must match the number of columns in \code{x}} +} \description{ -add N=xx to header +Helper function used to add the population total (N) in the +column header of \code{\link{rtable}} object. } \examples{ diff --git a/man/levels.no_by.Rd b/man/levels.no_by.Rd new file mode 100644 index 000000000..1094d5cf0 --- /dev/null +++ b/man/levels.no_by.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rtabulate.R +\name{levels.no_by} +\alias{levels.no_by} +\title{Access levels attribute for an object of \code{no_by} Class} +\usage{ +\method{levels}{no_by}(x) +} +\arguments{ +\item{x}{\code{no_by} class object} +} +\description{ +Access levels attribute for an object of \code{no_by} Class +} diff --git a/man/no_by.Rd b/man/no_by.Rd index ca0eb016f..47396f508 100644 --- a/man/no_by.Rd +++ b/man/no_by.Rd @@ -2,15 +2,20 @@ % Please edit documentation in R/rtabulate.R \name{no_by} \alias{no_by} -\title{Do not split data into columns or row in \code{rtabulate}} +\title{Do not split data into columns in \code{rtabulate}} \usage{ no_by(name) } \arguments{ -\item{name}{row name or column name} +\item{name}{character column name to display in the table header} } \description{ -\code{\link{rtabulate}} has the arguments \code{col_by} and \code{row_by} +\code{\link{rtabulate}} has the argument \code{col_by} which can either take a vector or if no splitting is needed the return value -of \code{no_by}. +of \code{no_by}. Using \code{no_by} creates a table with a single +column. +} +\examples{ + +rtabulate(iris$Species, col_by = no_by("Total")) } diff --git a/man/rtabulate.Rd b/man/rtabulate.Rd index 9d447c3d7..45893ea8f 100644 --- a/man/rtabulate.Rd +++ b/man/rtabulate.Rd @@ -12,7 +12,7 @@ rtabulate(x, ...) \item{...}{arguments passed to methods} } \value{ -an \code{\link{rtable}} project +an \code{\link{rtable}} object } \description{ \code{rtablulate} provides a number of methods to derive @@ -20,7 +20,7 @@ an \code{\link{rtable}} project \code{\link{tapply}}. } \details{ -The data is split into cell-data and a function can be specified that return +The data is split into cell-data and a function can be specified that returns a data structre (or \code{\link{rcell}}). } \author{ diff --git a/man/rtabulate.data.frame.Rd b/man/rtabulate.data.frame.Rd index ea13ffb5c..4699e5392 100644 --- a/man/rtabulate.data.frame.Rd +++ b/man/rtabulate.data.frame.Rd @@ -8,15 +8,13 @@ format = NULL, indent = 0, col_wise_args = NULL) } \arguments{ -\item{x}{a vecor} +\item{x}{data.frame} -\item{col_by}{a \code{\link{factor}} of length \code{nrow(x)} that defines -which levels in \code{col_by} define a column. If data should not be split -into columns use the \code{\link{no_by}} function.} +\item{row_by}{name of factor variable in \code{x}} -\item{FUN}{a function that processes the cell data, if \code{row_data_arg} is -set to \code{TRUE} the a second argument with the row data is passed to -\code{FUN}} +\item{col_by}{name of factor variable in \code{x}} + +\item{FUN}{a function that processes the cell data} \item{...}{arguments passed to \code{FUN}} @@ -26,12 +24,12 @@ then the \code{format} is applied} \item{indent}{non-negative integer where 0 means that the row should not be indented} -\item{row_by_var}{name of factor variable in \code{x}} - -\item{col_by_var}{name of factor variable in \code{x}} +\item{col_wise_args}{a list containing vectors with data for each column that +is passed to \code{FUN}. The length and order of each vector must match the +levels in \code{col_by}. See examples.} } \value{ -an \code{\link{rtable}} project +an \code{\link{rtable}} object } \description{ Split data.frame and apply functions @@ -85,8 +83,6 @@ tbl <- rtabulate( ) tbl - - rtabulate( x = iris, row_by = fsl5, diff --git a/man/rtabulate.factor.Rd b/man/rtabulate.factor.Rd index b1cba69a6..97a896a7c 100644 --- a/man/rtabulate.factor.Rd +++ b/man/rtabulate.factor.Rd @@ -15,15 +15,14 @@ which levels in \code{col_by} define a column. If data should not be split into columns use the \code{\link{no_by}} function.} -\item{FUN}{a function that processes the cell data, if \code{row_data_arg} is -set to \code{TRUE} the a second argument with the row data is passed to -\code{FUN}} +\item{FUN}{a function that processes the cell data} \item{...}{arguments passed to \code{FUN}} -\item{useNA}{boolean, if \code{TRUE} then \code{NA} values in \code{x} get -turned into a factor level \code{"NA"}, if \code{FALSE} then the \code{NA} -values in \code{x} get dropped.} +\item{useNA}{either one of ("no", "ifany", "always"). If \code{"no"} then \code{NA} values +in \code{x} get dropped. When \code{"ifany"} is used a row for \code{NA} values is +included in the summary if any \code{NA}s exist in \code{x}. For option \code{"always"} +\code{NA} values are always included in the summary even if none exist in \code{x}.} \item{format}{if \code{FUN} does not return a formatted \code{\link{rcell}} then the \code{format} is applied} @@ -31,15 +30,16 @@ then the \code{format} is applied} \item{indent}{non-negative integer where 0 means that the row should not be indented} -\item{row_col_data_args}{boolean, if \code{TRUE} then \code{FUN} is called -with the first three arguments being the cell, row, and column data, -respectively} +\item{col_wise_args}{a list containing vectors with data for each column that +is passed to \code{FUN}. The length and order of each vector must match the +levels in \code{col_by}. See examples.} } \value{ -an \code{\link{rtable}} project +an \code{\link{rtable}} object } \description{ -Tabulate Factors +By default each cell reports the number of observations in +each level of \code{x}. } \examples{ @@ -51,6 +51,7 @@ sl5 <- factor(iris$Sepal.Length > 5, levels = c(TRUE, FALSE), labels = c("S.L > 5", "S.L <= 5")) rtabulate(iris$Species, col_by=sl5) +rtabulate(sl5, iris$Species) rtabulate(iris$Species, col_by=sl5, FUN = function(cell_data, N) { @@ -64,15 +65,10 @@ rtabulate(iris$Species, col_by=sl5, col_wise_args = list(N = table(sl5)) ) -rtabulate(sl5, iris$Species) - - - rtabulate(x = factor(c("X", "Y"), c("X", "Y")), col_by = factor(c("a", "a"), c("a", "b")), FUN = length) rtabulate(factor(c("Y", "Y"), c("X", "Y")), factor(c("b", "b"), c("a", "b")), length) - rtabulate( x = factor(c("Y", "Y"), c("X", "Y")), col_by = factor(c("b", "b"), c("a", "b")), diff --git a/man/rtabulate.logical.Rd b/man/rtabulate.logical.Rd index 945c50e0d..8a87bfac9 100644 --- a/man/rtabulate.logical.Rd +++ b/man/rtabulate.logical.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/rtabulate.R \name{rtabulate.logical} \alias{rtabulate.logical} -\title{tabulate a logical vector} +\title{Tabulate a logical vector} \usage{ \method{rtabulate}{logical}(x, col_by = no_by("col_1"), FUN = sum, ..., format = NULL, row.name = NULL, indent = 0, col_wise_args = NULL) @@ -14,9 +14,7 @@ which levels in \code{col_by} define a column. If data should not be split into columns use the \code{\link{no_by}} function.} -\item{FUN}{a function that processes the cell data, if \code{row_data_arg} is -set to \code{TRUE} the a second argument with the row data is passed to -\code{FUN}} +\item{FUN}{a function that processes the cell data} \item{...}{arguments passed to \code{FUN}} @@ -28,12 +26,16 @@ used as \code{row.name} of the \code{\link{rrow}}} \item{indent}{non-negative integer where 0 means that the row should not be indented} + +\item{col_wise_args}{a list containing vectors with data for each column that +is passed to \code{FUN}. The length and order of each vector must match the +levels in \code{col_by}. See examples.} } \value{ -an \code{\link{rtable}} project +an \code{\link{rtable}} object } \description{ -tabulate a logical vector +By default each cell reports the number of \code{TRUE} observations from the associated vector. } \examples{ rtabulate(iris$Species == "setosa") @@ -43,16 +45,16 @@ rtabulate(iris$Species == "setosa", no_by("Species"), row.name = "n (n/N)", col_wise_args = list(N = 150)) -# default: percentages equal \\code{TRUE} +# default FUN is number of observations equal to TRUE with(iris, rtabulate(Sepal.Length < 5, Species, row.name = "Sepal.Length < 5")) -# precentages with proportion of cell number of \\code{TRUE}s to overvall -# number of \\code{TRUE}s +# Custom FUN: number of TRUE records in a cell and precentages based on number of records +# in each column with(iris, rtabulate(Sepal.Length < 5, Species, FUN = function(xi, N) sum(xi) * c(1, 1/N), - format = "xx.xx (xx.xx)", + format = "xx.xx (xx.xx\%)", row.name = "Sepal.Length < 5", - col_wise_args = list(N = table(cb)) + col_wise_args = list(N = table(Species)) )) } diff --git a/man/rtabulate.numeric.Rd b/man/rtabulate.numeric.Rd index d0810153c..f252af1b7 100644 --- a/man/rtabulate.numeric.Rd +++ b/man/rtabulate.numeric.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/rtabulate.R \name{rtabulate.numeric} \alias{rtabulate.numeric} -\title{tabulate a numeric vector} +\title{Tabulate a numeric vector} \usage{ \method{rtabulate}{numeric}(x, col_by = no_by("col_1"), FUN = mean, ..., format = NULL, row.name = NULL, indent = 0, @@ -15,9 +15,7 @@ which levels in \code{col_by} define a column. If data should not be split into columns use the \code{\link{no_by}} function.} -\item{FUN}{a function that processes the cell data, if \code{row_data_arg} is -set to \code{TRUE} the a second argument with the row data is passed to -\code{FUN}} +\item{FUN}{a function that processes the cell data} \item{...}{arguments passed to \code{FUN}} @@ -30,19 +28,15 @@ used as \code{row.name} of the \code{\link{rrow}}} \item{indent}{non-negative integer where 0 means that the row should not be indented} -\item{row_data_arg}{call \code{FUN} with the row data as the second argument} - -\item{col_N}{The column total for each column. If specified then -\code{\link{col_N}()} can be used on the cell data in order to retrieve the -column total. If \code{NULL} then no header row for the column is -displayed.} +\item{col_wise_args}{a list containing vectors with data for each column that +is passed to \code{FUN}. The length and order of each vector must match the +levels in \code{col_by}. See examples.} } \value{ -an \code{\link{rtable}} project +an \code{\link{rtable}} object } \description{ -by default the \code{\link[stats]{fivenum}} function is applied to the -vectors associated to the cells +By default each cell reports the mean based on the associated vector. } \examples{ @@ -50,7 +44,7 @@ rtabulate(iris$Sepal.Length) rtabulate(iris$Sepal.Length, col_by = no_by("Sepal.Length")) -with(iris, rtabulate(x = Sepal.Length, col_by = Species, row.name = "fivenum")) +with(iris, rtabulate(x = Sepal.Length, col_by = Species, row.name = "mean")) SL <- iris$Sepal.Length Sp <- iris$Species @@ -61,7 +55,6 @@ rbind( rtabulate(SL, Sp, range, format = "xx.xx - xx.xx", row.name = "Min - Max") ) - x <- 1:100 cb <- factor(rep(LETTERS[1:3], c(20, 30, 50))) From c6be22710022638b0abd634a77989570cc701b11 Mon Sep 17 00:00:00 2001 From: waddella Date: Wed, 21 Nov 2018 23:47:28 +0100 Subject: [PATCH 08/21] improve speed of rbind.rtable and add gap argument --- NAMESPACE | 1 + R/accessors_modifiers.R | 61 ++++-- R/utils.R | 11 +- docs/dev/articles/rtables.html | 2 +- docs/dev/articles/rtabulate.html | 67 +++---- docs/dev/news/index.html | 3 +- docs/dev/reference/header_add_N.html | 194 +++++++++++++++++++ docs/dev/reference/index.html | 6 +- docs/dev/reference/levels.no_by.html | 168 ++++++++++++++++ docs/dev/reference/no_by.html | 29 ++- docs/dev/reference/rtabulate.data.frame.html | 126 +++++------- docs/dev/reference/rtabulate.factor.html | 97 +++++----- docs/dev/reference/rtabulate.html | 4 +- docs/dev/reference/rtabulate.logical.html | 73 ++++--- docs/dev/reference/rtabulate.numeric.html | 56 +++--- man/rbind.rtable.Rd | 16 +- man/rbindl_rtable.Rd | 16 ++ vignettes/rtables.Rmd | 2 - vignettes/rtabulate.Rmd | 75 ++++--- 19 files changed, 696 insertions(+), 311 deletions(-) create mode 100644 docs/dev/reference/header_add_N.html create mode 100644 docs/dev/reference/levels.no_by.html create mode 100644 man/rbindl_rtable.Rd diff --git a/NAMESPACE b/NAMESPACE index 94d4d5906..cd59526aa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -36,6 +36,7 @@ export(indented_row.names) export(is.no_by) export(list_rcell_format_labels) export(no_by) +export(rbindl_rtable) export(rcell) export(rheader) export(rrow) diff --git a/R/accessors_modifiers.R b/R/accessors_modifiers.R index b4db76762..7749cf05f 100644 --- a/R/accessors_modifiers.R +++ b/R/accessors_modifiers.R @@ -243,9 +243,13 @@ set_rrow_attrs <- function(rrow, row.name, indent) { rrow } -#' stack rtable objects +#' Stack rtable objects +#' +#' Note that the columns order are not mached by the header: the first table +#' header is taken as the reference. #' #' @param ... \code{\link{rtable}} objects +#' @param gap number of empty rows to add between tables #' #' @return an \code{\link{rtable}} object #' @@ -280,25 +284,52 @@ set_rrow_attrs <- function(rrow, row.name, indent) { #' )) #' #' tbl <- rbind(mtbl, mtbl2) -#' #' tbl #' -rbind.rtable <- function(...) { - +#' tbl <- rbind(mtbl, mtbl2, gap = 1) +#' tbl +#' +#' tbl <- rbind(mtbl, mtbl2, gap = 2) +#' tbl +#' +rbind.rtable <- function(..., gap = 0) { dots <- Filter(Negate(is.null), list(...)) + rbindl_rtable(dots, gap = gap) +} + +#' Stack a list of rtables +#' +#' See \code{\link{rbind.rtable}} for details +#' +#' @param x a list of rtable objects +#' @inheritParams rbind.rtable +#' +#' @export +#' +rbindl_rtable <- function(x, gap = 0) { - if (!are(dots, "rtable")) stop("not all elements are of type rtable") - - headers <- lapply(dots, header) - ref_header <- headers[[1]] + stopifnot(are(x, "rtable")) + stopifnot(length(x) > 0) + stopifnot(is.numeric(gap), gap >= 0) - same_headers <- vapply(headers[-1], function(h) { - identical(h, ref_header) - }, logical(1)) + if (!num_all_equal(vapply(x, ncol, numeric(1))) || + !num_all_equal(vapply(x, nrow, numeric(1)))) { + stop("dimension missmatch between tables") + } - if (!all(same_headers)) stop("not all rtables have the same header") + tbl <- if (gap != 0) { + gap_rows <- replicate(gap, rrow(), simplify = FALSE) + Reduce(function(tbl1, tbl2) c(tbl1, gap_rows, tbl2), x) + } else { + unlist(x, recursive = FALSE) + } - body <- unlist(dots, recursive = FALSE) + ref_header <- header(x[[1]]) + class(tbl) <- "rtable" + attr(tbl, "header") <- ref_header + attr(tbl, "nrow") <- length(tbl) + attr(tbl, "ncol") <- ncol(ref_header) - rtablel(header = ref_header, body) -} + tbl +} + diff --git a/R/utils.R b/R/utils.R index c82d65640..d77272dc0 100644 --- a/R/utils.R +++ b/R/utils.R @@ -3,4 +3,13 @@ ## is each object in a collection from a class are <- function(object_collection, class2) { all(vapply(object_collection, is, logical(1), class2)) -} \ No newline at end of file +} + +num_all_equal <- function(x, tol = .Machine$double.eps ^ 0.5) { + stopifnot(is.numeric(x)) + + if (length(x) == 1) return(TRUE) + + y <- range(x) / mean(x) + isTRUE(all.equal(y[1], y[2], tolerance = tol)) +} diff --git a/docs/dev/articles/rtables.html b/docs/dev/articles/rtables.html index a6c046809..536b55a90 100644 --- a/docs/dev/articles/rtables.html +++ b/docs/dev/articles/rtables.html @@ -86,7 +86,7 @@

Introduction to rtables

Adrian Waddell

-

2018-10-22

+

2018-11-19

Source: vignettes/rtables.Rmd diff --git a/docs/dev/articles/rtabulate.html b/docs/dev/articles/rtabulate.html index a71f2e958..bd2230d58 100644 --- a/docs/dev/articles/rtabulate.html +++ b/docs/dev/articles/rtabulate.html @@ -98,7 +98,7 @@

3/21/2018

Introduction

-

rtabulate provides a number of methods to compute rtable tables. The idea behind rtabulate is to have a similar function to tapply which returns rtables instead of lists. For example, for the iris data we might be interested in the average Sepal.Length for each Species which can be answered with tapply as follows:

+

rtabulate provides a number of methods to compute rtable tables. The idea behind rtabulate is to have a similar function to tapply which returns rtables instead of lists or vectors. For example, for the iris data we might be interested in the average Sepal.Length for each Species which can be answered with tapply as follows:

attach(iris)
 tapply(X = Sepal.Length, INDEX = Species, FUN = mean)
##     setosa versicolor  virginica 
@@ -106,13 +106,11 @@ 

and to get the same numbers with rtabulate we can run

rtabulate(x = Sepal.Length, col_by = Species, FUN = mean)
##               setosa          versicolor        virginica 
-##               (N=50)            (N=50)            (N=50)  
 ## ----------------------------------------------------------
 ## mean          5.006             5.936             6.588

Because rtables also provide formatting options we can pass that info to rtabulate

rtabulate(x = Sepal.Length, col_by = Species, FUN = mean, format = "xx.xx")
##               setosa          versicolor        virginica 
-##               (N=50)            (N=50)            (N=50)  
 ## ----------------------------------------------------------
 ## mean           5.01              5.94              6.59

This vignette will provide an overview of rtabulate and show how different types of tables can be derived with it.

@@ -133,7 +131,6 @@

If x should not be split then the no_by function can be used to specify a column name (as the returned object is still an rtable with one column):

rtabulate(Sepal.Length, no_by("All Data"), sum)
##            All Data
-##            (N=150) 
 ## -------------------
 ## sum         876.5
@@ -145,7 +142,6 @@

rtabulate.numeric returns an rtable with one row and columns according to the levels of col_by.

rtabulate(x = Sepal.Length, col_by = Species, FUN = median)
##                 setosa          versicolor        virginica 
-##                 (N=50)            (N=50)            (N=50)  
 ## ------------------------------------------------------------
 ## median            5                5.9               6.5

@@ -157,9 +153,8 @@

rtabulate(x = is_SL_below_avg, col_by = Species, row.name = "count (percentage)")

##                             setosa          versicolor        virginica 
-##                             (N=50)            (N=50)            (N=50)  
 ## ------------------------------------------------------------------------
-## count (percentage)        50 (100%)          24 (48%)          6 (12%)
+## count (percentage) 50 24 6

By default FUN=function(x) sum(x) * c(1, 1/length(x)).

@@ -168,7 +163,6 @@

rtabulate.factor returns an rtable with one row per factor level of x.

rtabulate(x = esoph$agegp, col_by = esoph$alcgp)
##              0-39g/day          40-79           80-119            120+   
-##               (N=23)           (N=23)           (N=21)           (N=21)  
 ## -------------------------------------------------------------------------
 ## 25-34            4                4                3                4    
 ## 35-44            4                4                4                3    
@@ -180,7 +174,7 @@ 

Tabulation based on Data Frames

-

rtabulate.data.frame has a data.frame as x argument and the col_by_var and row_by_var need to specify which factor variables with x should be used to split the data for the table rows and columns. By default the argument to FUN is then a subset of x that is associated with a particular table cell. Let’s look at the CO2 dataset:

+

rtabulate.data.frame has a data.frame as x argument and the col_by and row_by need to specify how to split the data for the table rows and columns. The argument to FUN is then a subset of x that is associated with a particular table cell. Let’s look at the CO2 dataset:

head(CO2)
##   Plant   Type  Treatment conc uptake
 ## 1   Qn1 Quebec nonchilled   95   16.0
@@ -190,50 +184,35 @@ 

## 5 Qn1 Quebec nonchilled 500 35.3 ## 6 Qn1 Quebec nonchilled 675 39.2

Say we want to calculate the total uptake for each Type and Treatment

-
rtabulate(x = CO2, row_by_var = "Type", col_by_var = "Treatment",
+
rtabulate(x = CO2, row_by = CO2$Type, col_by = CO2$Treatment,
           FUN = function(xi) sum(xi$uptake))
##                    nonchilled         chilled  
-##                      (N=42)            (N=42)  
 ## -----------------------------------------------
 ## Quebec                742              666.8   
 ## Mississippi           545              332.1
-
+

-Cell Data, Row Data, Column Data based Tabulation

-

rtabulate.numeric and rtabulate.logical have the argument row_data_arg which is by default set to FALSE. If set to TRUE then FUN receives a second argument with a copy of the x argument (as the row-associated data is the whole data for a numeric and logical vector).

-
rtabulate(
-  Sepal.Length, Species,
-  FUN = function(x_cell, x_row) c(length(x_cell), length(x_row)),
-  row_data_arg = TRUE,
-  format = "xx / xx", 
-  row.name = "length of cell and row data"
-)
-
##                                      setosa          versicolor        virginica 
-##                                      (N=50)            (N=50)            (N=50)  
-## ---------------------------------------------------------------------------------
-## length of cell and row data         50 / 150          50 / 150          50 / 150
-

Next, for rtabulate.factor and rtabulate.data.frame there is an row_col_data_args which when set to TRUE then the FUN function receives three arguments, the data that is associated with a table cell, row, column, respectively.

-
mtcars2 <- mtcars
-mtcars2$gear <- factor(mtcars2$gear)
-mtcars2$carb <- factor(mtcars2$carb)
+Column Wise Arguments

+

The arguments passed to the elipsis ... in rtabulate are optional arguments to FUN. This is the same behaviour as for the arguments passed to ... in lapply and tapply. This is useful, for example, when passing the na.rm argument to mean

+
x <- c(1, 2, 3, NA, 3, 2)
+cb <- factor(c("A", "B", "A", "B", "B", "A"))
 
-rtabulate(
-  x = mtcars2, row_by_var = "carb", col_by_var = "gear",
-  FUN = function(x_cell, x_row, x_col) c(nrow(x_cell), nrow(x_row), nrow(x_col)),
-  row_col_data_args = TRUE,
-  format = NULL
-)
-
##              3                4                5    
-##           (N=15)           (N=12)            (N=5)  
-## ----------------------------------------------------
-## 1        3, 7, 15         4, 7, 12          0, 7, 5 
-## 2        4, 10, 15        4, 10, 12        2, 10, 5 
-## 3        3, 3, 15         0, 3, 12          0, 3, 5 
-## 4        5, 10, 15        4, 10, 12        1, 10, 5 
-## 6        0, 1, 15         0, 1, 12          1, 1, 5 
-## 8        0, 1, 15         0, 1, 12          1, 1, 5
+rtabulate(x, cb, mean, na.rm = TRUE)

+
##              A          B 
+## --------------------------
+## mean         2         2.5
+

Often, howewer, it is also useful to iterate over multiple collections as for example is the case with the ... in mapply and Map. That is, given a collection x and y with each p elements, then we would like to evaluate FUN(x[i], y[i]) for i in 1 to p. In rtabulate this can be achieved with the collections wrapped in a named list and passed to the col_wise_args. For example:

+
x <- c(1, 2, 3, NA, 3, 2)
+cb <- factor(c("A", "A", "A", "B", "B", "A"), levels = c("A", "B"))
+
+rtabulate(x, cb, FUN = function(xi, na.rm, N) {
+  list(mean(xi, na.rm=na.rm), N)
+}, na.rm = TRUE, col_wise_args = list(N = table(cb)), row.name = "mean, N")
+
##                 A           B  
+## -------------------------------
+## mean, N        2, 4        3, 2

Note that format=NULL is equivalent to paste(x, collapse = ", ") on the cell data structure.

diff --git a/docs/dev/news/index.html b/docs/dev/news/index.html index 100cd70d2..89bf5a6ad 100644 --- a/docs/dev/news/index.html +++ b/docs/dev/news/index.html @@ -133,7 +133,8 @@

  • added sprintf_format for formatting rcells (thanks to Doug Kelkhoff for the suggestion)
  • added "(N=xx)" and ">999.9" format labels
  • -rtabulate has now an argument col_total and shows by default the numbers of elements associated with a column
  • +rtabulate has now an argument col_N and the function col_N() +
    diff --git a/docs/dev/reference/header_add_N.html b/docs/dev/reference/header_add_N.html new file mode 100644 index 000000000..f056987ae --- /dev/null +++ b/docs/dev/reference/header_add_N.html @@ -0,0 +1,194 @@ + + + + + + + + +Add N=xx to header — header_add_N • rtables + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Helper function used to add the population total (N) in the +column header of rtable object.

    + +
    + +
    header_add_N(x, N)
    + +

    Arguments

    + + + + + + + + + + +
    x

    rtable

    N

    vector with counts to be displayed in the header. The +length must match the number of columns in x

    + + +

    Examples

    +
    +tbl <- rtable( + header = letters[1:3], + rrow("X", 1, 2, 3), + rrow("Y", 4, 5, 6) +) + +tbl
    #> a b c +#> ---------------------------- +#> X 1 2 3 +#> Y 4 5 6
    +header_add_N(tbl, 1:3)
    #> a b c +#> (N=1) (N=2) (N=3) +#> ---------------------------------------- +#> X 1 2 3 +#> Y 4 5 6
    +
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/dev/reference/index.html b/docs/dev/reference/index.html index 8e37d26a5..063c83ec9 100644 --- a/docs/dev/reference/index.html +++ b/docs/dev/reference/index.html @@ -268,19 +268,19 @@

    rtabulate(<logical>)

    -

    tabulate a logical vector

    +

    Tabulate a logical vector

    rtabulate(<numeric>)

    -

    tabulate a numeric vector

    +

    Tabulate a numeric vector

    no_by()

    -

    Do not split data into columns or row in rtabulate

    +

    Do not split data into columns in rtabulate

    diff --git a/docs/dev/reference/levels.no_by.html b/docs/dev/reference/levels.no_by.html new file mode 100644 index 000000000..81fee521d --- /dev/null +++ b/docs/dev/reference/levels.no_by.html @@ -0,0 +1,168 @@ + + + + + + + + +Access levels attribute for an object of <code>no_by</code> Class — levels.no_by • rtables + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Access levels attribute for an object of no_by Class

    + +
    + +
    # S3 method for no_by
    +levels(x)
    + +

    Arguments

    + + + + + + +
    x

    no_by class object

    + + +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/dev/reference/no_by.html b/docs/dev/reference/no_by.html index 988fa19b6..da4743219 100644 --- a/docs/dev/reference/no_by.html +++ b/docs/dev/reference/no_by.html @@ -6,7 +6,7 @@ -Do not split data into columns or row in <code>rtabulate</code> — no_by • rtables +Do not split data into columns in <code>rtabulate</code> — no_by • rtables @@ -30,11 +30,12 @@ - + - +of no_by. Using no_by creates a table with a single +column." /> @@ -117,16 +118,17 @@
    -

    rtabulate has the arguments col_by and row_by +

    rtabulate has the argument col_by which can either take a vector or if no splitting is needed the return value -of no_by.

    +of no_by. Using no_by creates a table with a single +column.

    @@ -137,17 +139,26 @@

    Arg name -

    row name or column name

    +

    character column name to display in the table header

    +

    Examples

    +
    +rtabulate(iris$Species, col_by = no_by("Total"))
    #> Total +#> ----------------------- +#> setosa 50 +#> versicolor 50 +#> virginica 50
    diff --git a/docs/dev/reference/rtabulate.data.frame.html b/docs/dev/reference/rtabulate.data.frame.html index ffb426caa..d22aa5d05 100644 --- a/docs/dev/reference/rtabulate.data.frame.html +++ b/docs/dev/reference/rtabulate.data.frame.html @@ -127,42 +127,32 @@

    Split data.frame and apply functions

    # S3 method for data.frame
    -rtabulate(x, row_by_var = no_by("row_1"),
    -  col_by_var = no_by("col_1"), FUN = nrow, ...,
    -  row_col_data_args = FALSE, format = "xx", indent = 0,
    -  col_total = "(N=xx)")
    +rtabulate(x, row_by, col_by, FUN, ..., + format = NULL, indent = 0, col_wise_args = NULL)

    Arguments

    - + - + - + - + - - - - - - + +
    x

    a vecor

    data.frame

    row_by_varrow_by

    name of factor variable in x

    col_by_varcol_by

    name of factor variable in x

    FUN

    a function that processes the cell data, if row_data_arg is -set to TRUE the a second argument with the row data is passed to -FUN

    a function that processes the cell data

    ...

    arguments passed to FUN

    row_col_data_args

    boolean, if TRUE then FUN is called -with the first three arguments being the cell, row, and column data, -respectively

    format

    if FUN does not return a formatted rcell @@ -174,16 +164,16 @@

    Arg indented

    col_total

    a format string for displaying the number of elements in the -column header. If NULL then no header row for the column is -displayed.

    col_wise_args

    a list containing vectors with data for each column that +is passed to FUN. The length and order of each vector must match the +levels in col_by. See examples.

    Value

    -

    an rtable project

    +

    an rtable object

    Examples

    @@ -193,81 +183,71 @@

    Examp rtabulate( x = df, - row_by_var = "aaa", - col_by_var = "bbb", + row_by = df$aaa, + col_by = df$bbb, FUN = function(x) { sum(x$val) } -)
    #> X Y Z -#> (N=4) (N=4) (N=4) -#> ---------------------------------------- -#> A 8 12 16 -#> B 10 14 18
    +)
    #> X Y Z +#> ------------------------------- +#> A 8 12 16 +#> B 10 14 18
    rtabulate( x = iris, - row_by_var = no_by("sum"), - col_by_var = "Species", + row_by = no_by("sum"), + col_by = iris$Species, FUN = function(x) sum(x$Sepal.Length) )
    #> setosa versicolor virginica -#> (N=50) (N=50) (N=50) #> --------------------------------------------------------- #> sum 250.3 296.8 329.4
    rtabulate( x = iris, - row_by_var = "Species", - col_by_var = no_by("sum"), + row_by = iris$Species, + col_by = no_by("sum"), FUN = function(x) sum(x$Sepal.Length) -)
    #> sum -#> (N=150) -#> ------------------------- -#> setosa 250.3 -#> versicolor 296.8 -#> virginica 329.4
    -tbl <- rtabulate( - x = iris, - FUN = function(cell_data) c(sum(cell_data$Sepal.Length), sd(cell_data$Sepal.Length)), - format = "xx.xx (xx.xx%)" -) - -tbl
    #> col_1 -#> (N=150) -#> --------------------------- -#> row_1 876.5 (82.81%)
    -row.names(tbl)
    #> row_1 -#> "row_1"
    row.names(tbl) <- "Sum of Sepal Length" - -tbl
    #> col_1 -#> (N=150) -#> ----------------------------------------- -#> Sum of Sepal Length 876.5 (82.81%)
    -iris2 <- iris -iris2$fsl5 <- factor(iris$Sepal.Length > 5, levels = c(TRUE, FALSE), +)
    #> sum +#> ----------------------- +#> setosa 250.3 +#> versicolor 296.8 +#> virginica 329.4
    +fsl5 <- factor(iris$Sepal.Length > 5, levels = c(TRUE, FALSE), labels = c("S.L > 5", "S.L <= 5")) tbl <- rtabulate( - x = iris2, - row_by_var = "fsl5", - col_by_var = "Species", - FUN = function(x_cell, x_row, x_col) { + x = iris, + row_by = fsl5, + col_by = iris$Species, + FUN = function(x_cell) { if (nrow(x_cell) < 10) { rcell("-") } else { fit <- lm(Sepal.Length ~ Petal.Width, data = x_cell) - m_col <- mean(x_col$Sepal.Length) - m_row <- mean(x_row$Sepal.Length) - rcell(list(fit, m_col, m_row), format = function(x, output) { - paste("df:", x[[1]]$df.residual,", and", round(x[[2]],1), ", and", round(x[[3]],2)) + rcell(list(fit), format = function(x, output) { + paste("df:", x[[1]]$df.residual) }) } - } , - row_col_data_args = TRUE + } ) -tbl
    #> setosa versicolor virginica -#> (N=50) (N=50) (N=50) -#> ----------------------------------------------------------------------------------------------------------------- -#> S.L > 5 df: 20 , and 5 , and 6.13 df: 45 , and 5.9 , and 6.13 df: 47 , and 6.6 , and 6.13 -#> S.L <= 5 df: 26 , and 5 , and 4.79 - -
    +tbl
    #> setosa versicolor virginica +#> -------------------------------------------------------------- +#> S.L > 5 df: 20 df: 45 df: 47 +#> S.L <= 5 df: 26 - -
    +rtabulate( + x = iris, + row_by = fsl5, + col_by = iris$Species, + FUN = function(x_cell, N) { + N + }, + col_wise_args = list(N = c(10, 100, 200)) +)
    #> setosa versicolor virginica +#> -------------------------------------------------------------- +#> S.L > 5 10 100 200 +#> S.L <= 5 10 100 200
    + + +