Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove chunks + merge_expression_srv #77

Merged
merged 21 commits into from
Jul 1, 2022
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ export(is.choices_selected)
export(is_single_dataset)
export(list_extract_spec)
export(merge_datasets)
export(merge_expression_module)
export(merge_expression_srv)
export(no_selected_as_NULL)
export(resolve_delayed)
export(select_spec)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
### New features

* `data_extract_ui`, `data_extract_srv`, `data_extract_multiple_srv` can be initialized by the list of (optionally `reactive`) `data.frame` objects.
* Added new modules `merge_expression_srv` and `merge_expression_module`, updates of `data_merge_srv` and `data_merge_module` respectively, where `datasets` argument takes a list of (optionally `reactive`) `data.frame` objects and a new argument `join_keys`.
mhallal1 marked this conversation as resolved.
Show resolved Hide resolved

# teal.transform 0.1.1

Expand Down
53 changes: 25 additions & 28 deletions R/data_extract_module.R
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,8 @@ check_data_extract_spec_react <- function(datasets, data_extract) {
#' @inheritParams shiny::moduleServer
#' @param datasets (`FilteredData` or `list` of `reactive` or non-`reactive` `data.frame`)\cr
#' object containing data either in the form of [teal.slice::FilteredData] or as a list of `data.frame`.
#' When passing a list of non-reactive `data.frame`s, they are converted to reactive `data.frame`s internally.
#' When passing a list of reactive or non-reactive `data.frame`s, the argument `keys` is required also.
#' When passing a list of non-reactive `data.frame` objects, they are converted to reactive `data.frame`s internally.
#' When passing a list of reactive or non-reactive `data.frame` objects, the argument `join_keys` is required also.
#' @param data_extract_spec (`data_extract_spec` or a list of `data_extract_spec`)\cr
#' A list of data filter and select information constructed by [data_extract_spec].
#' @param ...
Expand Down Expand Up @@ -395,31 +395,30 @@ data_extract_srv.FilteredData <- function(id, datasets, data_extract_spec, ...)
reactive(datasets$get_data(dataname = x, filtered = TRUE))
})

key_list <- sapply(X = datasets$datanames(), simplify = FALSE, FUN = function(x) {
datasets$get_keys(dataname = x)
})
join_keys_list <- datasets$get_join_keys()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you have created filtered_data not from teal.data then the primary keys probably won't be in the join_keys

#won't have join_keys
init_filtered_data(list(ADSL = ..., keys = c("USUBJID", "STUDYID"))
# will do
init_filtered_data(list(ADSL = ..., keys = c("USUBJID", "STUDYID"), join_keys = join_keys(...))

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue create in teal.slice as discussed:
insightsengineering/teal.slice#58


filter_and_select_reactive <- data_extract_srv(
id = NULL,
datasets = data_list,
data_extract_spec = data_extract_spec,
keys = key_list
join_keys = join_keys_list
)
filter_and_select_reactive
}
)
}

#' @rdname data_extract_srv
#' @param keys (`list`) of keys per dataset in `datasets`
#' @param join_keys (named `list`) of keys per dataset in `datasets`
mhallal1 marked this conversation as resolved.
Show resolved Hide resolved
#' @export
data_extract_srv.list <- function(id, datasets, data_extract_spec, keys = NULL, ...) {
data_extract_srv.list <- function(id, datasets, data_extract_spec, join_keys = NULL, ...) {
checkmate::assert_list(datasets, types = c("reactive", "data.frame"), names = "named")
checkmate::assert_list(keys, "character", names = "named", null.ok = TRUE)
if (length(join_keys) == 0) join_keys <- NULL
checkmate::assert_list(join_keys, names = "named", null.ok = TRUE)
checkmate::assert(
.var.name = "keys",
checkmate::check_names(names(keys), subset.of = names(datasets)),
checkmate::check_null(keys)
.var.name = "join_keys",
checkmate::check_names(names(join_keys), subset.of = names(datasets)),
checkmate::check_null(join_keys)
)

moduleServer(
Expand All @@ -429,6 +428,9 @@ data_extract_srv.list <- function(id, datasets, data_extract_spec, keys = NULL,
"data_extract_srv.list initialized with datasets: { paste(names(datasets), collapse = ', ') }."
)

# get keys out of join_keys
keys <- sapply(names(datasets), simplify = FALSE, function(x) join_keys[[x]][[x]])

# convert to list of reactives
datasets <- sapply(X = datasets, simplify = FALSE, FUN = function(x) {
if (is.reactive(x)) x else reactive(x)
Expand Down Expand Up @@ -473,6 +475,7 @@ data_extract_srv.list <- function(id, datasets, data_extract_spec, keys = NULL,
input$dataset
}
})

filter_and_select_reactive <- reactive({
if (is.null(dataname())) {
NULL
Expand Down Expand Up @@ -589,29 +592,23 @@ data_extract_multiple_srv.FilteredData <- function(data_extract, datasets, ...)
reactive(datasets$get_data(dataname = x, filtered = TRUE))
})

key_list <- sapply(X = datasets$datanames(), simplify = FALSE, FUN = function(x) {
datasets$get_keys(dataname = x)
})
data_extract_multiple_srv(data_extract = data_extract, datasets = data_list, keys = key_list)
join_keys_list <- datasets$get_join_keys()
data_extract_multiple_srv(data_extract = data_extract, datasets = data_list, join_keys = join_keys_list)
}

#' @rdname data_extract_multiple_srv
#' @param keys (`list`) of keys per dataset in `datasets`
#' @param join_keys (named `list`) of join keys per dataset in `datasets`.
mhallal1 marked this conversation as resolved.
Show resolved Hide resolved
#' @export
data_extract_multiple_srv.list <- function(data_extract, datasets, keys = NULL, ...) {
data_extract_multiple_srv.list <- function(data_extract, datasets, join_keys = NULL, ...) {
checkmate::assert_list(datasets, types = c("reactive", "data.frame"), names = "named")
checkmate::assert_list(keys, "character", names = "named", null.ok = TRUE)
if (length(join_keys) == 0) join_keys <- NULL
checkmate::assert_list(join_keys, names = "named", null.ok = TRUE)
checkmate::assert(
.var.name = "keys",
checkmate::check_names(names(keys), subset.of = names(datasets)),
checkmate::check_null(keys)
.var.name = "join_keys",
checkmate::check_names(names(join_keys), subset.of = names(datasets)),
checkmate::check_null(join_keys)
)

# convert to list of reactives
datasets <- sapply(X = datasets, simplify = FALSE, FUN = function(x) {
if (is.reactive(x)) x else reactive(x)
})

logger::log_trace(
"data_extract_multiple_srv.list initialized with dataset: { paste(names(datasets), collapse = ', ') }."
)
Expand All @@ -627,7 +624,7 @@ data_extract_multiple_srv.list <- function(data_extract, datasets, keys = NULL,
id = x,
data_extract_spec = data_extract[[x]],
datasets = datasets,
keys = keys
join_keys = join_keys
)
}
)
Expand Down
39 changes: 27 additions & 12 deletions R/data_merge_module.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,6 @@ data_merge_module <- function(datasets,
logger::log_trace("data_merge_module called with: { paste(datasets$datanames(), collapse = ', ') } datasets.")

checkmate::assert_list(data_extract)
stopifnot(
all(vapply(
data_extract,
function(x) {
inherits(x, "data_extract_spec") || all(vapply(x, inherits, logical(1), "data_extract_spec"))
},
logical(1)
))
)

selector_list <- data_extract_multiple_srv(data_extract, datasets)

Expand Down Expand Up @@ -243,16 +234,40 @@ data_merge_srv <- function(id = "merge_id",
reactive({
checkmate::assert_list(selector_list(), names = "named", types = "reactive")
merge_fun_name <- if (inherits(merge_function, "reactive")) merge_function() else merge_function

datasets_list <- sapply(
datasets$datanames(),
simplify = FALSE,
function(x) reactive(datasets$get_data(x, filtered = TRUE))
)
join_keys <- datasets$get_join_keys()
check_merge_function(merge_fun_name)

ds <- Filter(Negate(is.null), lapply(selector_list(), function(x) x()))
validate(need(length(ds) > 0, "At least one dataset needs to be selected"))
merge_datasets(
ds,
datasets = datasets,
merged_data <- merge_datasets(
selector_list = ds,
datasets = datasets_list,
join_keys = join_keys,
merge_function = merge_fun_name,
anl_name = anl_name
)
ch <- teal.code::chunks_new()
datasets_list_nr <- sapply(
datasets$datanames(),
simplify = FALSE,
function(x) datasets$get_data(x, filtered = TRUE)
mhallal1 marked this conversation as resolved.
Show resolved Hide resolved
)
teal.code::chunks_reset(envir = list2env(datasets_list_nr), chunks = ch)
for (chunk in merged_data$expr) teal.code::chunks_push(expression = chunk, chunks = ch)
teal.code::chunks_safe_eval(chunks = ch)

merged_data$data <- reactive({
ch$get("ANL")
mhallal1 marked this conversation as resolved.
Show resolved Hide resolved
})
merged_data$chunks <- ch
merged_data$expr <- paste(merged_data$expr, collapse = "\n")
merged_data
})
}
)
Expand Down
11 changes: 6 additions & 5 deletions R/get_dplyr_call.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ get_dplyr_call_data <- function(selector_list, join_keys = list()) {
#' @param idx optional (\code{integer}) current selector index in all selectors list
#' @param dplyr_call_data (\code{list}) simplified selectors with aggregated set of filters,
#' selections, reshapes etc. All necessary data for merging
#' @param datasets (\code{NULL} or \code{FilteredData}).
#' @param data (`NULL` or named `list`).
#'
#' @return (\code{call}) filter, select, rename and reshape call
#' @keywords internal
Expand Down Expand Up @@ -206,7 +206,7 @@ get_dplyr_call <- function(selector_list,
logger::log_trace(
paste(
"get_dplyr_call called with:",
"{ paste(datasets$datanames(), collapse = ', ') } datasets;",
"{ paste(names(datasets), collapse = ', ') } datasets;",
"{ paste(names(selector_list), collapse = ', ') } selectors."
)
)
Expand Down Expand Up @@ -266,7 +266,7 @@ get_select_call <- function(select) {
#'
#' @param filter (\code{list}) Either list of lists or list with \code{select} and \code{selected} items.
#' @param dataname (\code{NULL} or \code{character}) name of dataset.
#' @param datasets (\code{NULL} or \code{FilteredData}).
#' @param datasets (\code{NULL} or \code{named `list`}).
#' @return (\code{call}) \code{dplyr} filter call
#' @keywords internal
#'
Expand All @@ -286,21 +286,22 @@ get_filter_call <- function(filter, dataname = NULL, datasets = NULL) {
"{ paste(sapply(filter, function(x) x$columns), collapse = ', ') } filters."
)
)
checkmate::assert_list(datasets, types = "reactive", names = "named", null.ok = TRUE)
if (is.null(filter)) {
return(NULL)
}

stopifnot((!is.null(dataname) && is.null(datasets)) ||
(is.null(dataname) && is.null(datasets)) ||
(!is.null(datasets) && isTRUE(dataname %in% datasets$datanames())))
(!is.null(datasets) && isTRUE(dataname %in% names(datasets))))

get_filter_call_internal <- function(filter, dataname, datasets) {
if (rlang::is_empty(filter$selected)) {
return(FALSE)
}

keys <- filter$columns
datas_vars <- if (!is.null(datasets)) datasets$get_data(dataname, filtered = TRUE) else NULL
datas_vars <- if (!is.null(datasets)) datasets[[dataname]]() else NULL

if (!is.null(datas_vars)) {
u_variables <- unique(apply(datas_vars[, keys, drop = FALSE], 1, function(x) paste(x, collapse = "-")))
Expand Down
5 changes: 4 additions & 1 deletion R/get_merge_call.R
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,10 @@ get_anl_relabel_call <- function(columns_source, datasets, anl_name = "ANL") {
if (rlang::is_empty(column_names)) {
return(NULL)
}
column_labels <- datasets$get_varlabels(attr(selector, "dataname"), column_names)

data_used <- datasets[[attr(selector, "dataname")]]
labels <- formatters::var_labels(datasets[[attr(selector, "dataname")]](), fill = FALSE)
column_labels <- labels[intersect(colnames(data_used()), column_names)]

# NULL for no labels at all, character(0) for no labels for a given columns
return(
Expand Down
66 changes: 22 additions & 44 deletions R/merge_datasets.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
#' Merge the datasets on the keys
#'
#' @description `r lifecycle::badge("stable")`
#' @description `r lifecycle::badge("experimental")`
#' It combines/merges multiple datasets with specified keys attribute.
#'
#'
#' @details Internally this function uses calls to allow reproducibility.
#'
#' @inheritParams data_merge_srv
#' @inheritParams merge_expression_srv
#' @return merged_dataset (`list`) containing:
#' \itemize{
#' \item data (`data.frame`) after filtering and reshaping containing selected columns.
#' \item `expr` (`character`) code needed to replicate merged dataset.
#' \item `expr` (`list` of `call`) code needed to replicate merged dataset.
#' \item columns_source (`list`) of column selected for particular selector.
#' \item keys (`list`) the keys of the merged dataset.
#' \item filter_info (`list`) The information given by the user. This information
Expand All @@ -34,25 +33,27 @@
#' )
#' merged_data <- merge_datasets(list(regressor(), response()))
#' }
merge_datasets <- function(selector_list, datasets, merge_function = "dplyr::full_join", anl_name = "ANL") {
merge_datasets <- function(selector_list, datasets, join_keys, merge_function = "dplyr::full_join", anl_name = "ANL") {
logger::log_trace(
paste(
"merge_datasets called with:",
"{ paste(datasets$datanames(), collapse = ', ') } datasets;",
"{ paste(names(datasets), collapse = ', ') } datasets;",
"{ paste(names(selector_list), collapse = ', ') } selectors;",
"{ merge_function } merge function."
)
)

checkmate::assert_list(selector_list, min.len = 1)
checkmate::assert_string(anl_name)
checkmate::assert_list(datasets, names = "named")
checkmate::assert_list(join_keys, names = "named")
stopifnot(attr(regexec("[A-Za-z0-9\\_]*", anl_name)[[1]], "match.length") == nchar(anl_name))
lapply(selector_list, check_selector)
merge_selectors_out <- merge_selectors(selector_list)
merged_selector_list <- merge_selectors_out[[1]]
merged_selector_map_id <- merge_selectors_out[[2]]
check_data_merge_selectors(merged_selector_list)

join_keys <- datasets$get_join_keys()
dplyr_call_data <- get_dplyr_call_data(merged_selector_list, join_keys)

validate_keys_sufficient(join_keys, merged_selector_list)
Expand All @@ -77,74 +78,51 @@ merge_datasets <- function(selector_list, datasets, merge_function = "dplyr::ful
SIMPLIFY = FALSE
)

chunks_stack <- teal.code::chunks$new()

new_env <- new.env()

selector_datanames <- unique(vapply(merged_selector_list, `[[`, character(1), "dataname"))
for (i in selector_datanames) {
logger::log_trace("merge_datasets { paste0(i, \"_FILTERED\") } assigned in chunks environment.")
assign(
paste0(i, "_FILTERED"),
datasets$get_data(i, filtered = TRUE),
envir = new_env

filtered_data_call <- lapply(selector_datanames, function(i) {
logger::log_trace("merge_datasets { paste0(i, \"_FILTERED\") } assigned.")
call(
"<-",
as.name(paste0(i, "_FILTERED")),
as.name(i)
)
}
chunks_stack$reset(envir = new_env)
})

for (idx in seq_along(merged_selector_list)) {
dplyr_calls <- lapply(seq_along(merged_selector_list), function(idx) {
dplyr_call <- get_dplyr_call(
selector_list = merged_selector_list,
idx = idx,
dplyr_call_data = dplyr_call_data,
datasets = datasets
)
anl_i_call <- call("<-", as.name(paste0(anl_name, "_", idx)), dplyr_call)
chunks_stack$push(anl_i_call, id = paste0("ANL_dplyr_call_", idx))
}
anl_i_call
})

anl_merge_calls <- get_merge_call(
selector_list = merged_selector_list,
dplyr_call_data = dplyr_call_data,
merge_function = merge_function,
anl_name = anl_name
)
for (idx in seq_along(anl_merge_calls)) {
chunks_stack$push(anl_merge_calls[[idx]], id = paste0("get_merge_call_", idx))
}

anl_relabel_call <- get_anl_relabel_call(
columns_source = get_relabel_cols(columns_source, dplyr_call_data), # don't relabel reshaped cols
datasets = datasets,
anl_name = anl_name
)

if (!is.null(anl_relabel_call)) {
chunks_stack$push(anl_relabel_call, id = "ANL_relabel_call")
}

all_call_string <- paste0(chunks_stack$get_rcode(), collapse = "\n")
all_calls_expression <- c(filtered_data_call, dplyr_calls, anl_merge_calls, anl_relabel_call)

# keys in each merged_selector_list element shoul be identical
# keys in each merged_selector_list element should be identical
# so take first one
keys <- merged_selector_list[[1]]$keys

filter_info <- lapply(merged_selector_list, "[[", "filters")

# Merge the datasets, ignore errors and warnings (kept in chunks)
suppressWarnings(chunks_stack$eval())

res <- list(
# put data into function to delay chunk code validation
# This allows the code from chunks to be accessible in "Show R Code"
# by either "expr" or "chunks" part of the returned list
data = function() {
logger::log_trace("merge_datasets { anl_name } dataset returned from chunks.")
chunks_stack$validate_is_ok()
chunks_stack$get(anl_name)
},
expr = all_call_string,
chunks = chunks_stack,
expr = all_calls_expression,
columns_source = columns_source,
keys = keys,
filter_info = filter_info
Expand Down
Loading