diff --git a/NEWS.md b/NEWS.md index 83e7cd9a6..9804a7209 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ ## rtables 0.6.6.9001 +### Miscellaneous + * Added new chapter to Developer Guide on `rtables` internals about the printing machinery. + * `expand_newlines = FALSE` now works for all the secondary elements of the table (e.g. titles and footers). + ## rtables 0.6.6 ### New Features * Removed `ref_group` reordering in column splits so not to change the order. diff --git a/R/tt_toString.R b/R/tt_toString.R index 2f46d5510..37d5e95ae 100644 --- a/R/tt_toString.R +++ b/R/tt_toString.R @@ -55,7 +55,7 @@ setMethod("toString", "VTableTree", function(x, toString( matrix_form(x, indent_rownames = TRUE, - indent_size = indent_size + indent_size = indent_size # Only modifies the rownames in matrix_form ), widths = widths, col_gap = col_gap, hsep = hsep, @@ -132,6 +132,7 @@ table_shell_str <- function(tt, widths = NULL, col_gap = 3, hsep = default_hsep( ) } +## matrix_form ----------------------------------------------------------------- #' Transform `rtable` to a list of matrices which can be used for outputting #' @@ -140,7 +141,7 @@ table_shell_str <- function(tt, widths = NULL, col_gap = 3, hsep = default_hsep( #' #' @inheritParams gen_args #' @param indent_rownames logical(1), if TRUE the column with the row names in -#' the `strings` matrix of has indented row names (strings pre-fixed) +#' the `strings` matrix has indented row names (strings pre-fixed) #' @param expand_newlines logical(1). Should the matrix form generated #' expand rows whose values contain newlines into multiple #' 'physical' rows (as they will appear when rendered into @@ -196,10 +197,14 @@ setMethod( expand_newlines = TRUE, indent_size = 2) { stopifnot(is(obj, "VTableTree")) - header_content <- .tbl_header_mat(obj) # first col are for row.names - + + header_content <- .tbl_header_mat(obj) # first col are for row.names or topleft info + nr_header <- nrow(header_content$body) # colcounts were added in .tbl_header_mat + + # Summary of row contents - reprint_inds specifies which rows to reprint (hence the grouping) sr <- make_row_df(obj) - + + # With get_formatted_cells we get relevant information inside the table tree body_content_strings <- if (NROW(sr) == 0) { character() } else { @@ -211,7 +216,8 @@ setMethod( } else { cbind("", get_formatted_cells(obj, shell = TRUE)) } - + + # Takes the flatten spans for each row and repeats them according to the number of elements tsptmp <- lapply(collect_leaves(obj, TRUE, TRUE), function(rr) { sp <- row_cspans(rr) rep(sp, times = sp) @@ -232,27 +238,21 @@ setMethod( body <- rbind(header_content$body, body_content_strings) + # Init column format for header (empty if not for column counts) hdr_fmt_blank <- matrix("", nrow = nrow(header_content$body), ncol = ncol(header_content$body) ) + # If column counts are displayed, add column count format if (disp_ccounts(obj)) { hdr_fmt_blank[nrow(hdr_fmt_blank), ] <- c("", rep(colcount_format(obj), ncol(obj))) } - ## if(disp_ccounts(obj)) { - ## formats <- rbind(matrix("", nrow = nrow(header_content$body) - 1L, - ## ncol = ncol(header_content$body)), - - ## formats_strings) - ## } else { - ## formats <- rbind(header_content$body, formats_strings) - ## } + formats <- rbind(hdr_fmt_blank, formats_strings) spans <- rbind(header_content$span, body_spans) row.names(spans) <- NULL - ## unused??? space <- matrix(rep(0, length(body)), nrow = nrow(body)) aligns <- rbind( matrix(rep("center", length(header_content$body)), nrow = nrow(header_content$body) @@ -262,11 +262,7 @@ setMethod( aligns[, 1] <- "left" # row names and topleft (still needed for topleft) - ## if (any(apply(body, c(1, 2), function(x) grepl("\n", x, fixed = TRUE)))) - ## stop("no \\n allowed at the moment") - - - nr_header <- nrow(header_content$body) + # Main indentation of the table rownames if (indent_rownames) { body[, 1] <- indent_string(body[, 1], c(rep(0, nr_header), sr$indent), incr = indent_size @@ -275,7 +271,8 @@ setMethod( incr = indent_size ) } - + + # Handling of references in header and body col_ref_strs <- matrix(vapply(header_content$footnotes, function(x) { if (length(x) == 0) { "" @@ -284,7 +281,6 @@ setMethod( } }, ""), ncol = ncol(body)) body_ref_strs <- get_ref_matrix(obj) - body <- matrix( paste0( body, @@ -296,20 +292,21 @@ setMethod( nrow = nrow(body), ncol = ncol(body) ) + # Solve \n in titles if (any(grepl("\n", all_titles(obj)))) { if (any(grepl("\n", main_title(obj)))) { - tmp_title_vec <- .quick_handle_nl(main_title(obj)) + tmp_title_vec <- .quick_handle_nl(main_title(obj), expand_newlines) main_title(obj) <- tmp_title_vec[1] - subtitles(obj) <- c(tmp_title_vec[-1], .quick_handle_nl(subtitles(obj))) + subtitles(obj) <- c(tmp_title_vec[-1], .quick_handle_nl(subtitles(obj), expand_newlines)) } else { - subtitles(obj) <- .quick_handle_nl(subtitles(obj)) + subtitles(obj) <- .quick_handle_nl(subtitles(obj), expand_newlines) } } # Solve \n in footers - main_footer(obj) <- .quick_handle_nl(main_footer(obj)) - prov_footer(obj) <- .quick_handle_nl(prov_footer(obj)) + main_footer(obj) <- .quick_handle_nl(main_footer(obj), expand_newlines) + prov_footer(obj) <- .quick_handle_nl(prov_footer(obj), expand_newlines) # xxx \n in page titles are not working atm (I think) # ref_fnotes <- strsplit(get_formatted_fnotes(obj), "\n", fixed = TRUE) @@ -343,8 +340,8 @@ setMethod( } ) -.quick_handle_nl <- function(str_v) { - if (any(grepl("\n", str_v))) { +.quick_handle_nl <- function(str_v, expand_newlines) { + if (any(grepl("\n", str_v)) && isTRUE(expand_newlines)) { return(unlist(strsplit(str_v, "\n", fixed = TRUE))) } else { return(str_v) @@ -574,8 +571,7 @@ get_formatted_fnotes <- function(tt) { }) ) - - + # Information about coulumn counts is set here from cinfo if (disp_ccounts(cinfo)) { counts <- col_counts(cinfo) cformat <- colcount_format(cinfo) @@ -604,6 +600,7 @@ get_formatted_fnotes <- function(tt) { fnote <- rbind(fnote, rep(list(list()), nc)) } + # topleft information is set here from cinfo tl <- top_left(cinfo) lentl <- length(tl) nli <- nrow(body) @@ -617,8 +614,11 @@ get_formatted_fnotes <- function(tt) { # We want topleft alignment that goes to the bottom! tl <- c(rep("", nli - lentl), tl) } + + # Final output is a list of strings, spans, and footnotes list( - body = cbind(tl, body, deparse.level = 0), span = cbind(1, span), + body = cbind(tl, body, deparse.level = 0), + span = cbind(1, span), footnotes = cbind(list(list()), fnote) ) } diff --git a/man/matrix_form-VTableTree-method.Rd b/man/matrix_form-VTableTree-method.Rd index b511ba77f..a8cefc51f 100644 --- a/man/matrix_form-VTableTree-method.Rd +++ b/man/matrix_form-VTableTree-method.Rd @@ -15,7 +15,7 @@ \item{obj}{ANY. The object for the accessor to access or modify} \item{indent_rownames}{logical(1), if TRUE the column with the row names in -the \code{strings} matrix of has indented row names (strings pre-fixed)} +the \code{strings} matrix has indented row names (strings pre-fixed)} \item{expand_newlines}{logical(1). Should the matrix form generated expand rows whose values contain newlines into multiple diff --git a/vignettes/dev-guide/dg_notes.Rmd b/vignettes/dev-guide/dg_notes.Rmd index e35cd703f..b5e8143ed 100644 --- a/vignettes/dev-guide/dg_notes.Rmd +++ b/vignettes/dev-guide/dg_notes.Rmd @@ -18,6 +18,7 @@ knitr::opts_chunk$set(echo = TRUE) ## Disclaimer + This is a collection of notes divided by issues and it is a working document that will end up being a dev vignette one day. diff --git a/vignettes/dev-guide/dg_printing.Rmd b/vignettes/dev-guide/dg_printing.Rmd new file mode 100644 index 000000000..e4c5b19ea --- /dev/null +++ b/vignettes/dev-guide/dg_printing.Rmd @@ -0,0 +1,390 @@ +--- +title: "Printing Machinery" +author: "Davide Garolini" +date: '`r Sys.Date()`' +output: + html_document: + theme: spacelab + toc: true + toc_float: + collapsed: false +editor_options: + chunk_output_type: console +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + + +## Disclaimer + +In comparison to other entries of the dev-guide, this is intended to keep track of the general concepts and processing pipeline behind the printing machinery. It is not intended to be a complete documentation of the machinery itself, but rather a collection of notes that can be used to understand the machinery and its internals. Hence, be aware that this is a working document that captures a snapshot of the machinery at a certain point in time. It is not meant to be fully maintained, but it can be used as a starting point for one. Differently from other parts of the dev-guide, this will contain the current state of `rlistings`' printing machinery, which is often a simplified version of the machinery that is used in `rtables`. + + +### How `print` works + +Lets track down what is going under the hood when a standard table is printed. The following is the code that is executed when a table is printed: + +```{r} +library(rtables) +library(dplyr) +lyt <- basic_table() %>% + split_rows_by("SEX", split_fun = keep_split_levels(c("F", "M"))) %>% + split_cols_by("ARM") %>% + analyze("BMRKR1") %>% + print() +tbl <- build_table(lyt, ex_adsl) %>% + print() +``` + +We see that also a layout object (`PreDataTableLayouts`) is created and printed. This is because `print` is a generic function that dispatches to different methods depending on the class of the object. In this case, the S4 class of the object is `PreDataTableLayouts` and the method that is called is `print`. In the case of {rtables} the method is dispatched towards the `show` method of the class `PreDataTableLayouts`. It can be found by searching `A Pre-data Table Layout` into {rtables} source code. I think R dispatcher for `print` methods looks for `show` S4 methods instead if there are no S3 or S4 `print` methods available. Indeed, this is the code that is executed: + +```{r, eval=FALSE} +setMethod( + "show", "PreDataTableLayouts", + function(object) { + cat("A Pre-data Table Layout\n") + cat("\nColumn-Split Structure:\n") + docat_predataxis(object@col_layout) + cat("\nRow-Split Structure:\n") + docat_predataxis(object@row_layout) + cat("\n") + invisible(object) + } +) +``` + +This was evident if we searched for methods associated with the class `PreDataTableLayouts`, where only `show` is connected to a sort of printing machinery: + +```{r, eval=TRUE} +methods(class = "PreDataTableLayouts") +``` + +Now, lets see the same for our result table `tbl`: + +```{r} +class(tbl) %>% print() + +getClass("TableTree") %>% print() # Main object representing a table in {rtables} + +methods(class = "TableTree") %>% print() # more than 70 methods but no print method +``` + +Again, the class itself has only the `show` method. Nonetheless, if you search for `VTableTree"` you will find the `print` method for the `TableTree` class. This is because `VTableTree` is a virtual class that is inherited by `TableTree` and is almost identical to the `show` method for `TableTree` objects. All the different statements in this case (`show` or `print`) do the same thing, i.e. they call `toString` and `cat` on the object. Hence, we know that every table is printed by `toString` with `\n` as separator for different lines so that `cat` renders it in its final format. + +### From `matrix_form` to `toString` + +If we have source code of `formatters`, `rtables`, and `rlistings` in our local we can search for `"toString"` S4 method definition across these source folders. We will find generics in `formatters` and three different `setMethod(...)`. `toString` is properly defined in `formatters`, but it is also present in `rlistings` and`rtables`. Let's take a look at the latter first. + +```{r, eval=FALSE} +setMethod("toString", "VTableTree", function(x, + widths = NULL, + col_gap = 3, + hsep = horizontal_sep(x), + indent_size = 2, + tf_wrap = FALSE, + max_width = NULL) { + toString( + matrix_form(x, + indent_rownames = TRUE, + indent_size = indent_size # Only modifies the rownames in matrix_form + ), + widths = widths, col_gap = col_gap, + hsep = hsep, + tf_wrap = tf_wrap, + max_width = max_width + ) +}) +``` + +This is only a wrapper/dispatcher to the core toString function in `formatters`, beside the `indent_size` specification. This is based on the "rendering-ready" class `MatrixPrintForm` that is produced by `matrix_form`. The latter is the first core transformation that we need to know to understand the printing process. All exporters and printers are based on `MatrixPrintForm` objects, hence any bug or problem needs to tracked down to this function or `toString`. If we take a look at `toString` for `"listing_df"` in `rlistings`, we will find a shallow wrapper that dispatches to `MatrixPrintForm` objects: + +```{r, eval=FALSE} +setMethod("toString", "listing_df", function(x, ...) { + toString(matrix_form(x), ...) +}) +``` + +Hence lets take a look at `"matrix_form"` (if there are quotes, it is an S4 function from now on). Beside generics and self calls (`setMethod("matrix_form", "MatrixPrintForm", [...] obj)`), `rlistings` and `rtables` have their own "constructor" of `MatrixPrintForm` (the real one can be found in `formatters`). Let's start with the latter `"matrix_form"` which is dispatched when dealing with `VTableTree`s. +```{r, eval=FALSE} +# Entering matrix_form for VTableTree +trace("matrix_form", signature = "VTableTree", tracer = browser, exit = browser) +matrix_form(tbl) +untrace("matrix_form", signature = "VTableTree") +``` + +Now lets see the newly commented code for `matrix_form`. With `#->` I will comment some suggestions for further understandings. + +```{r, eval=FALSE} +setMethod( + "matrix_form", "VTableTree", + function(obj, + indent_rownames = FALSE, + expand_newlines = TRUE, + indent_size = 2) { + stopifnot(is(obj, "VTableTree")) + + #-> Read .tbl_header_mat and subfunctions (based largely on cinfo) it can help for understanding + # column structure and how it is printed (we can add a description of this process xxx) + # Note: it contains the display of column counts directives and specifics + header_content <- .tbl_header_mat(obj) # first col are for row.names or topleft info + nr_header <- nrow(header_content$body) # colcounts were added in .tbl_header_mat + + #-> As before, reading this function can help understanding how the content of the table is transformed + # in row content and how the structure of the table is preserved in a compact manner. It is complex + # function as it is a recursive one with the different dispatcher but following how different section_div + # are printed (with the dedicated assignment function) can help understanding the table structure and its + # row-wise transformation. + # Summary of row contents - reprint_inds specifies which rows to reprint (hence the grouping) + sr <- make_row_df(obj) + + # With get_formatted_cells we get relevant information inside the table tree + body_content_strings <- if (NROW(sr) == 0) { + character() + } else { + #-> get_formatted_cells is an interesting function to understand the structure of the table as + # it is design to extract only the "data" of the table as strings. Note how the label rows are + # taken from make_row_df instead. Check shell = TRUE afterwards to see how the format are retrieved. + cbind(as.character(sr$label), get_formatted_cells(obj)) + } + + formats_strings <- if (NROW(sr) == 0) { + character() + } else { + cbind("", get_formatted_cells(obj, shell = TRUE)) + } + + #-> Here spans are extracted for each row. Spans are rarely modified beyond its standard values. + # Takes the flatten spans for each row and repeats them according to the number elements + tsptmp <- lapply(collect_leaves(obj, TRUE, TRUE), function(rr) { + sp <- row_cspans(rr) + rep(sp, times = sp) + }) + + ## the 1 is for row labels + body_spans <- if (nrow(obj) > 0) { + cbind(1L, do.call(rbind, tsptmp)) + } else { + matrix(1, nrow = 0, ncol = ncol(obj) + 1) + } + + body_aligns <- if (NROW(sr) == 0) { + character() + } else { + cbind("left", get_cell_aligns(obj)) #-> extracts align values for each cell + } + + body <- rbind(header_content$body, body_content_strings) + + # Init column format for header (empty if not for column counts) + hdr_fmt_blank <- matrix("", + nrow = nrow(header_content$body), + ncol = ncol(header_content$body) + ) + # If column counts are displayed, add column count format + if (disp_ccounts(obj)) { + hdr_fmt_blank[nrow(hdr_fmt_blank), ] <- c("", rep(colcount_format(obj), ncol(obj))) + } + + formats <- rbind(hdr_fmt_blank, formats_strings) + + spans <- rbind(header_content$span, body_spans) + row.names(spans) <- NULL + + aligns <- rbind( + matrix(rep("center", length(header_content$body)), + nrow = nrow(header_content$body) + ), + body_aligns + ) + + aligns[, 1] <- "left" # row names and topleft (still needed for topleft) + + # Main indentation of the table rownames #-> Main indentation facility + if (indent_rownames) { + body[, 1] <- indent_string(body[, 1], c(rep(0, nr_header), sr$indent), + incr = indent_size + ) + formats[, 1] <- indent_string(formats[, 1], c(rep(0, nr_header), sr$indent), + incr = indent_size + ) + } + + #-> referential strings are added to the table. get_ref_matrix is the core of this process + # along with format_fnote_ref that in this case is used to format the reference string and their + # indices. Note that the footnotes for the header is taken from the output of .tbl_header_mat + # Handling of references in header and body + col_ref_strs <- matrix(vapply(header_content$footnotes, function(x) { + if (length(x) == 0) { + "" + } else { + paste(vapply(x, format_fnote_ref, ""), collapse = " ") + } + }, ""), ncol = ncol(body)) + body_ref_strs <- get_ref_matrix(obj) + body <- matrix( + paste0( + body, + rbind( + col_ref_strs, #-> col_ref_strs are added to the body as a separate section + body_ref_strs + ) + ), + nrow = nrow(body), + ncol = ncol(body) + ) + + # Solve \n in titles + if (any(grepl("\n", all_titles(obj)))) { + if (any(grepl("\n", main_title(obj)))) { + tmp_title_vec <- .quick_handle_nl(main_title(obj)) + main_title(obj) <- tmp_title_vec[1] + subtitles(obj) <- c(tmp_title_vec[-1], .quick_handle_nl(subtitles(obj))) + } else { + subtitles(obj) <- .quick_handle_nl(subtitles(obj)) + } + } + + # Solve \n in footers + main_footer(obj) <- .quick_handle_nl(main_footer(obj)) + prov_footer(obj) <- .quick_handle_nl(prov_footer(obj)) + + + #-> this is still under development as indicated by xxx. The idea is to allow \n also in peculiar + # cases, such as page titles and referential footnotes. The latter are resolved in toString (pagination + # will not count them as more than one line each), while for the former we do not have any coverage yet. + # xxx \n in page titles are not working atm (I think) + # ref_fnotes <- strsplit(get_formatted_fnotes(obj), "\n", fixed = TRUE) + ref_fnotes <- get_formatted_fnotes(obj) # pagination will not count extra lines coming from here + pag_titles <- page_titles(obj) + + MatrixPrintForm( + strings = body, #-> FUNDAMENTAL: this is the matrix that contains all the cell strings + spans = spans, + aligns = aligns, + formats = formats, + ## display = display, purely a function of spans, handled in constructor now + row_info = sr, #-> FUNDAMENTAL: this is the data.frame that contains all the information about the rows + # it is the most complex data brought forward into toString + ## line_grouping handled internally now line_grouping = 1:nrow(body), + ref_fnotes = ref_fnotes, + nlines_header = nr_header, ## this is fixed internally + nrow_header = nr_header, + expand_newlines = expand_newlines, + has_rowlabs = TRUE, + has_topleft = TRUE, #-> I think topleft material is handled later in toString + main_title = main_title(obj), + subtitles = subtitles(obj), + page_titles = pag_titles, + main_footer = main_footer(obj), + prov_footer = prov_footer(obj), + table_inset = table_inset(obj), + header_section_div = header_section_div(obj), + horizontal_sep = horizontal_sep(obj), + indent_size = indent_size + ) + } +) +``` + +Now lets see the `matrix_form` in `rlistings`: +```{r, eval=FALSE} +library(rlistings) +lsting <- as_listing(mtcars) +trace("matrix_form", signature = "listing_df", tracer = browser, exit = browser) +mf <- matrix_form(lsting) +untrace("matrix_form", signature = "listing_df") +``` + + +```{r, eval=FALSE} +setMethod( + "matrix_form", "listing_df", + rix_form <- function(obj, indent_rownames = FALSE) { #-> I have no idea why here there is an assignment xxx + ## we intentionally silently ignore indent_rownames because listings have + ## no rownames, but formatters::vert_pag_indices calls matrix_form(obj, TRUE) + ## unconditionally. + + + # Keeping only displayed columns + cols <- attr(obj, "listing_dispcols") # this is the list of columns to be displayed + listing <- obj[, cols] + atts <- attributes(obj) + atts$names <- cols + attributes(listing) <- atts + keycols <- get_keycols(listing) + + + bodymat <- matrix("", + nrow = nrow(listing), + ncol = ncol(listing) + ) + + colnames(bodymat) <- names(listing) + + + # Print only first appearer of key columns if repeated + curkey <- "" + for (i in seq_along(keycols)) { + kcol <- keycols[i] + kcolvec <- listing[[kcol]] + #-> format_value transforms the values of the column into strings + kcolvec <- vapply(kcolvec, format_value, "", format = obj_format(kcolvec), na_str = obj_na_str(kcolvec)) + curkey <- paste0(curkey, kcolvec) + disp <- c(TRUE, tail(curkey, -1) != head(curkey, -1)) #-> This condition only show the first appearer of a key + bodymat[disp, kcol] <- kcolvec[disp] + } + + # Print all other columns directly + nonkeycols <- setdiff(names(listing), keycols) + if (length(nonkeycols) > 0) { + for (nonk in nonkeycols) { + vec <- listing[[nonk]] + vec <- vapply(vec, format_value, "", format = obj_format(vec), na_str = obj_na_str(vec)) + bodymat[, nonk] <- vec + } + } + + + fullmat <- rbind( + var_labels(listing, fill = TRUE), # Extracts the variable labels + bodymat + ) + + colaligns <- rbind( + rep("center", length(cols)), # Col names are always centered? + matrix(sapply(listing, obj_align), + ncol = length(cols), + nrow = nrow(fullmat) - 1, + byrow = TRUE + ) + ) + + MatrixPrintForm( + strings = fullmat, + spans = matrix(1, + nrow = nrow(fullmat), + ncol = ncol(fullmat) + ), + ref_fnotes = list(), + aligns = colaligns, + formats = matrix(1, + nrow = nrow(fullmat), + ncol = ncol(fullmat) + ), + row_info = make_row_df(obj), + nlines_header = 1, ## XXX this is probably wrong!!! + nrow_header = 1, + has_topleft = FALSE, + has_rowlabs = FALSE, + expand_newlines = TRUE, # Always expand newlines, but this happens later!! XXX to fix + main_title = main_title(obj), + subtitles = subtitles(obj), + page_titles = page_titles(obj), + main_footer = main_footer(obj), + prov_footer = prov_footer(obj) + ) + } +) +``` \ No newline at end of file