Skip to content

Commit

Permalink
remove chunks + merge_expression_srv (#77)
Browse files Browse the repository at this point in the history
* remove datasets and chunks

* docs

* update examples

* deprecate data_merge_xxx

* docs

* Update R/data_merge_module.R

Co-authored-by: Dawid Kałędkowski <[email protected]>

* Update R/get_dplyr_call.R

Co-authored-by: Pawel Rucki <[email protected]>

* checks

* update

* tests

* pkgdown

* simplification

* Update R/data_merge_module.R

Co-authored-by: Dawid Kałędkowski <[email protected]>

* NEWS

* docs

* docs

* review Nik

Co-authored-by: Dawid Kałędkowski <[email protected]>
Co-authored-by: Pawel Rucki <[email protected]>
  • Loading branch information
3 people authored Jul 1, 2022
1 parent 2e86f50 commit 2140417
Show file tree
Hide file tree
Showing 25 changed files with 1,370 additions and 347 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ export(is.choices_selected)
export(is_single_dataset)
export(list_extract_spec)
export(merge_datasets)
export(merge_expression_module)
export(merge_expression_srv)
export(no_selected_as_NULL)
export(resolve_delayed)
export(select_spec)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
### New features

* `data_extract_ui`, `data_extract_srv`, `data_extract_multiple_srv` can be initialized by the list of (optionally `reactive`) `data.frame` objects.
* Added new modules `merge_expression_srv` and `merge_expression_module`, updates of `data_merge_srv` and `data_merge_module`
(which will be deprecated in future releases) respectively, where `datasets` argument takes a list of (optionally `reactive`) `data.frame` objects and a new argument `join_keys`.

# teal.transform 0.1.1

Expand Down
53 changes: 25 additions & 28 deletions R/data_extract_module.R
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,8 @@ check_data_extract_spec_react <- function(datasets, data_extract) {
#' @inheritParams shiny::moduleServer
#' @param datasets (`FilteredData` or `list` of `reactive` or non-`reactive` `data.frame`)\cr
#' object containing data either in the form of [teal.slice::FilteredData] or as a list of `data.frame`.
#' When passing a list of non-reactive `data.frame`s, they are converted to reactive `data.frame`s internally.
#' When passing a list of reactive or non-reactive `data.frame`s, the argument `keys` is required also.
#' When passing a list of non-reactive `data.frame` objects, they are converted to reactive `data.frame`s internally.
#' When passing a list of reactive or non-reactive `data.frame` objects, the argument `join_keys` is required also.
#' @param data_extract_spec (`data_extract_spec` or a list of `data_extract_spec`)\cr
#' A list of data filter and select information constructed by [data_extract_spec].
#' @param ...
Expand Down Expand Up @@ -395,31 +395,30 @@ data_extract_srv.FilteredData <- function(id, datasets, data_extract_spec, ...)
reactive(datasets$get_data(dataname = x, filtered = TRUE))
})

key_list <- sapply(X = datasets$datanames(), simplify = FALSE, FUN = function(x) {
datasets$get_keys(dataname = x)
})
join_keys_list <- datasets$get_join_keys()

filter_and_select_reactive <- data_extract_srv(
id = NULL,
datasets = data_list,
data_extract_spec = data_extract_spec,
keys = key_list
join_keys = join_keys_list
)
filter_and_select_reactive
}
)
}

#' @rdname data_extract_srv
#' @param keys (`list`) of keys per dataset in `datasets`
#' @param join_keys (named `list` or `NULL`) of keys per dataset in `datasets`
#' @export
data_extract_srv.list <- function(id, datasets, data_extract_spec, keys = NULL, ...) {
data_extract_srv.list <- function(id, datasets, data_extract_spec, join_keys = NULL, ...) {
checkmate::assert_list(datasets, types = c("reactive", "data.frame"), names = "named")
checkmate::assert_list(keys, "character", names = "named", null.ok = TRUE)
if (length(join_keys) == 0) join_keys <- NULL
checkmate::assert_list(join_keys, names = "named", null.ok = TRUE)
checkmate::assert(
.var.name = "keys",
checkmate::check_names(names(keys), subset.of = names(datasets)),
checkmate::check_null(keys)
.var.name = "join_keys",
checkmate::check_names(names(join_keys), subset.of = names(datasets)),
checkmate::check_null(join_keys)
)

moduleServer(
Expand All @@ -429,6 +428,9 @@ data_extract_srv.list <- function(id, datasets, data_extract_spec, keys = NULL,
"data_extract_srv.list initialized with datasets: { paste(names(datasets), collapse = ', ') }."
)

# get keys out of join_keys
keys <- sapply(names(datasets), simplify = FALSE, function(x) join_keys[[x]][[x]])

# convert to list of reactives
datasets <- sapply(X = datasets, simplify = FALSE, FUN = function(x) {
if (is.reactive(x)) x else reactive(x)
Expand Down Expand Up @@ -473,6 +475,7 @@ data_extract_srv.list <- function(id, datasets, data_extract_spec, keys = NULL,
input$dataset
}
})

filter_and_select_reactive <- reactive({
if (is.null(dataname())) {
NULL
Expand Down Expand Up @@ -589,29 +592,23 @@ data_extract_multiple_srv.FilteredData <- function(data_extract, datasets, ...)
reactive(datasets$get_data(dataname = x, filtered = TRUE))
})

key_list <- sapply(X = datasets$datanames(), simplify = FALSE, FUN = function(x) {
datasets$get_keys(dataname = x)
})
data_extract_multiple_srv(data_extract = data_extract, datasets = data_list, keys = key_list)
join_keys_list <- datasets$get_join_keys()
data_extract_multiple_srv(data_extract = data_extract, datasets = data_list, join_keys = join_keys_list)
}

#' @rdname data_extract_multiple_srv
#' @param keys (`list`) of keys per dataset in `datasets`
#' @param join_keys (named `list` or `NULL`) of join keys per dataset in `datasets`.
#' @export
data_extract_multiple_srv.list <- function(data_extract, datasets, keys = NULL, ...) {
data_extract_multiple_srv.list <- function(data_extract, datasets, join_keys = NULL, ...) {
checkmate::assert_list(datasets, types = c("reactive", "data.frame"), names = "named")
checkmate::assert_list(keys, "character", names = "named", null.ok = TRUE)
if (length(join_keys) == 0) join_keys <- NULL
checkmate::assert_list(join_keys, names = "named", null.ok = TRUE)
checkmate::assert(
.var.name = "keys",
checkmate::check_names(names(keys), subset.of = names(datasets)),
checkmate::check_null(keys)
.var.name = "join_keys",
checkmate::check_names(names(join_keys), subset.of = names(datasets)),
checkmate::check_null(join_keys)
)

# convert to list of reactives
datasets <- sapply(X = datasets, simplify = FALSE, FUN = function(x) {
if (is.reactive(x)) x else reactive(x)
})

logger::log_trace(
"data_extract_multiple_srv.list initialized with dataset: { paste(names(datasets), collapse = ', ') }."
)
Expand All @@ -627,7 +624,7 @@ data_extract_multiple_srv.list <- function(data_extract, datasets, keys = NULL,
id = x,
data_extract_spec = data_extract[[x]],
datasets = datasets,
keys = keys
join_keys = join_keys
)
}
)
Expand Down
34 changes: 22 additions & 12 deletions R/data_merge_module.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,6 @@ data_merge_module <- function(datasets,
logger::log_trace("data_merge_module called with: { paste(datasets$datanames(), collapse = ', ') } datasets.")

checkmate::assert_list(data_extract)
stopifnot(
all(vapply(
data_extract,
function(x) {
inherits(x, "data_extract_spec") || all(vapply(x, inherits, logical(1), "data_extract_spec"))
},
logical(1)
))
)

selector_list <- data_extract_multiple_srv(data_extract, datasets)

Expand Down Expand Up @@ -243,16 +234,35 @@ data_merge_srv <- function(id = "merge_id",
reactive({
checkmate::assert_list(selector_list(), names = "named", types = "reactive")
merge_fun_name <- if (inherits(merge_function, "reactive")) merge_function() else merge_function

datasets_list <- sapply(
datasets$datanames(),
simplify = FALSE,
function(x) reactive(datasets$get_data(x, filtered = TRUE))
)
join_keys <- datasets$get_join_keys()
check_merge_function(merge_fun_name)

ds <- Filter(Negate(is.null), lapply(selector_list(), function(x) x()))
validate(need(length(ds) > 0, "At least one dataset needs to be selected"))
merge_datasets(
ds,
datasets = datasets,
merged_data <- merge_datasets(
selector_list = ds,
datasets = datasets_list,
join_keys = join_keys,
merge_function = merge_fun_name,
anl_name = anl_name
)
ch <- teal.code::chunks_new()
datasets_list_nr <- sapply(datasets$datanames(), simplify = FALSE, datasets$get_data, filtered = TRUE)
teal.code::chunks_reset(envir = list2env(datasets_list_nr), chunks = ch)
for (chunk in merged_data$expr) teal.code::chunks_push(expression = chunk, chunks = ch)
teal.code::chunks_safe_eval(chunks = ch)
merged_data$data <- reactive({
ch$get(anl_name)
})
merged_data$chunks <- ch
merged_data$expr <- paste(merged_data$expr, collapse = "\n")
merged_data
})
}
)
Expand Down
11 changes: 6 additions & 5 deletions R/get_dplyr_call.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ get_dplyr_call_data <- function(selector_list, join_keys = list()) {
#' @param idx optional (\code{integer}) current selector index in all selectors list
#' @param dplyr_call_data (\code{list}) simplified selectors with aggregated set of filters,
#' selections, reshapes etc. All necessary data for merging
#' @param datasets (\code{NULL} or \code{FilteredData}).
#' @param data (`NULL` or named `list`).
#'
#' @return (\code{call}) filter, select, rename and reshape call
#' @keywords internal
Expand Down Expand Up @@ -206,7 +206,7 @@ get_dplyr_call <- function(selector_list,
logger::log_trace(
paste(
"get_dplyr_call called with:",
"{ paste(datasets$datanames(), collapse = ', ') } datasets;",
"{ paste(names(datasets), collapse = ', ') } datasets;",
"{ paste(names(selector_list), collapse = ', ') } selectors."
)
)
Expand Down Expand Up @@ -266,7 +266,7 @@ get_select_call <- function(select) {
#'
#' @param filter (\code{list}) Either list of lists or list with \code{select} and \code{selected} items.
#' @param dataname (\code{NULL} or \code{character}) name of dataset.
#' @param datasets (\code{NULL} or \code{FilteredData}).
#' @param datasets (\code{NULL} or \code{named `list`}).
#' @return (\code{call}) \code{dplyr} filter call
#' @keywords internal
#'
Expand All @@ -286,21 +286,22 @@ get_filter_call <- function(filter, dataname = NULL, datasets = NULL) {
"{ paste(sapply(filter, function(x) x$columns), collapse = ', ') } filters."
)
)
checkmate::assert_list(datasets, types = "reactive", names = "named", null.ok = TRUE)
if (is.null(filter)) {
return(NULL)
}

stopifnot((!is.null(dataname) && is.null(datasets)) ||
(is.null(dataname) && is.null(datasets)) ||
(!is.null(datasets) && isTRUE(dataname %in% datasets$datanames())))
(!is.null(datasets) && isTRUE(dataname %in% names(datasets))))

get_filter_call_internal <- function(filter, dataname, datasets) {
if (rlang::is_empty(filter$selected)) {
return(FALSE)
}

keys <- filter$columns
datas_vars <- if (!is.null(datasets)) datasets$get_data(dataname, filtered = TRUE) else NULL
datas_vars <- if (!is.null(datasets)) datasets[[dataname]]() else NULL

if (!is.null(datas_vars)) {
u_variables <- unique(apply(datas_vars[, keys, drop = FALSE], 1, function(x) paste(x, collapse = "-")))
Expand Down
5 changes: 4 additions & 1 deletion R/get_merge_call.R
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,10 @@ get_anl_relabel_call <- function(columns_source, datasets, anl_name = "ANL") {
if (rlang::is_empty(column_names)) {
return(NULL)
}
column_labels <- datasets$get_varlabels(attr(selector, "dataname"), column_names)

data_used <- datasets[[attr(selector, "dataname")]]
labels <- formatters::var_labels(datasets[[attr(selector, "dataname")]](), fill = FALSE)
column_labels <- labels[intersect(colnames(data_used()), column_names)]

# NULL for no labels at all, character(0) for no labels for a given columns
return(
Expand Down
66 changes: 22 additions & 44 deletions R/merge_datasets.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
#' Merge the datasets on the keys
#'
#' @description `r lifecycle::badge("stable")`
#' @description `r lifecycle::badge("experimental")`
#' It combines/merges multiple datasets with specified keys attribute.
#'
#'
#' @details Internally this function uses calls to allow reproducibility.
#'
#' @inheritParams data_merge_srv
#' @inheritParams merge_expression_srv
#' @return merged_dataset (`list`) containing:
#' \itemize{
#' \item data (`data.frame`) after filtering and reshaping containing selected columns.
#' \item `expr` (`character`) code needed to replicate merged dataset.
#' \item `expr` (`list` of `call`) code needed to replicate merged dataset.
#' \item columns_source (`list`) of column selected for particular selector.
#' \item keys (`list`) the keys of the merged dataset.
#' \item filter_info (`list`) The information given by the user. This information
Expand All @@ -34,25 +33,27 @@
#' )
#' merged_data <- merge_datasets(list(regressor(), response()))
#' }
merge_datasets <- function(selector_list, datasets, merge_function = "dplyr::full_join", anl_name = "ANL") {
merge_datasets <- function(selector_list, datasets, join_keys, merge_function = "dplyr::full_join", anl_name = "ANL") {
logger::log_trace(
paste(
"merge_datasets called with:",
"{ paste(datasets$datanames(), collapse = ', ') } datasets;",
"{ paste(names(datasets), collapse = ', ') } datasets;",
"{ paste(names(selector_list), collapse = ', ') } selectors;",
"{ merge_function } merge function."
)
)

checkmate::assert_list(selector_list, min.len = 1)
checkmate::assert_string(anl_name)
checkmate::assert_list(datasets, names = "named")
checkmate::assert_list(join_keys, names = "named")
stopifnot(attr(regexec("[A-Za-z0-9\\_]*", anl_name)[[1]], "match.length") == nchar(anl_name))
lapply(selector_list, check_selector)
merge_selectors_out <- merge_selectors(selector_list)
merged_selector_list <- merge_selectors_out[[1]]
merged_selector_map_id <- merge_selectors_out[[2]]
check_data_merge_selectors(merged_selector_list)

join_keys <- datasets$get_join_keys()
dplyr_call_data <- get_dplyr_call_data(merged_selector_list, join_keys)

validate_keys_sufficient(join_keys, merged_selector_list)
Expand All @@ -77,74 +78,51 @@ merge_datasets <- function(selector_list, datasets, merge_function = "dplyr::ful
SIMPLIFY = FALSE
)

chunks_stack <- teal.code::chunks$new()

new_env <- new.env()

selector_datanames <- unique(vapply(merged_selector_list, `[[`, character(1), "dataname"))
for (i in selector_datanames) {
logger::log_trace("merge_datasets { paste0(i, \"_FILTERED\") } assigned in chunks environment.")
assign(
paste0(i, "_FILTERED"),
datasets$get_data(i, filtered = TRUE),
envir = new_env

filtered_data_call <- lapply(selector_datanames, function(i) {
logger::log_trace("merge_datasets { paste0(i, \"_FILTERED\") } assigned.")
call(
"<-",
as.name(paste0(i, "_FILTERED")),
as.name(i)
)
}
chunks_stack$reset(envir = new_env)
})

for (idx in seq_along(merged_selector_list)) {
dplyr_calls <- lapply(seq_along(merged_selector_list), function(idx) {
dplyr_call <- get_dplyr_call(
selector_list = merged_selector_list,
idx = idx,
dplyr_call_data = dplyr_call_data,
datasets = datasets
)
anl_i_call <- call("<-", as.name(paste0(anl_name, "_", idx)), dplyr_call)
chunks_stack$push(anl_i_call, id = paste0("ANL_dplyr_call_", idx))
}
anl_i_call
})

anl_merge_calls <- get_merge_call(
selector_list = merged_selector_list,
dplyr_call_data = dplyr_call_data,
merge_function = merge_function,
anl_name = anl_name
)
for (idx in seq_along(anl_merge_calls)) {
chunks_stack$push(anl_merge_calls[[idx]], id = paste0("get_merge_call_", idx))
}

anl_relabel_call <- get_anl_relabel_call(
columns_source = get_relabel_cols(columns_source, dplyr_call_data), # don't relabel reshaped cols
datasets = datasets,
anl_name = anl_name
)

if (!is.null(anl_relabel_call)) {
chunks_stack$push(anl_relabel_call, id = "ANL_relabel_call")
}

all_call_string <- paste0(chunks_stack$get_rcode(), collapse = "\n")
all_calls_expression <- c(filtered_data_call, dplyr_calls, anl_merge_calls, anl_relabel_call)

# keys in each merged_selector_list element shoul be identical
# keys in each merged_selector_list element should be identical
# so take first one
keys <- merged_selector_list[[1]]$keys

filter_info <- lapply(merged_selector_list, "[[", "filters")

# Merge the datasets, ignore errors and warnings (kept in chunks)
suppressWarnings(chunks_stack$eval())

res <- list(
# put data into function to delay chunk code validation
# This allows the code from chunks to be accessible in "Show R Code"
# by either "expr" or "chunks" part of the returned list
data = function() {
logger::log_trace("merge_datasets { anl_name } dataset returned from chunks.")
chunks_stack$validate_is_ok()
chunks_stack$get(anl_name)
},
expr = all_call_string,
chunks = chunks_stack,
expr = all_calls_expression,
columns_source = columns_source,
keys = keys,
filter_info = filter_info
Expand Down
Loading

0 comments on commit 2140417

Please sign in to comment.