diff --git a/DESCRIPTION b/DESCRIPTION index 18bd16f4f..423bdda5c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,6 +33,7 @@ Imports: shinyjs, stats, teal.logger (>= 0.1.1), + teal.code (>= 0.4.1.9003), utils, yaml (>= 1.1.0) Suggests: @@ -57,7 +58,7 @@ Config/Needs/verdepcheck: rstudio/shiny, mllg/checkmate, yaml=vubiostat/r-yaml, rstudio/bslib, tidyverse/dplyr, yihui/knitr, tidyverse/magrittr, bioc::MultiAssayExperiment, tidyverse/readr, rstudio/reticulate, rstudio/rmarkdown, bioc::SummarizedExperiment, - r-lib/testthat, r-lib/withr + insightsengineering/teal.code, r-lib/testthat, r-lib/withr Config/Needs/website: insightsengineering/nesttemplate Encoding: UTF-8 Language: en-US diff --git a/NAMESPACE b/NAMESPACE index 9ac8192cd..52a6ca514 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,9 @@ # Generated by roxygen2: do not edit by hand +S3method("[",JoinKeys) +S3method("[<-",JoinKeys) +S3method("get_join_keys<-",JoinKeys) +S3method("get_join_keys<-",teal_data) S3method(as_cdisc,TealDataset) S3method(as_cdisc,TealDatasetConnector) S3method(dataset,MultiAssayExperiment) @@ -12,6 +16,7 @@ S3method(get_code,default) S3method(get_dataname,TealDataAbstract) S3method(get_dataname,TealDataset) S3method(get_dataname,TealDatasetConnector) +S3method(get_dataname,teal_data) S3method(get_dataset,TealDataAbstract) S3method(get_dataset,TealDataset) S3method(get_dataset,TealDatasetConnector) @@ -20,6 +25,11 @@ S3method(get_dataset_label,TealDatasetConnector) S3method(get_datasets,TealDataAbstract) S3method(get_datasets,TealDataset) S3method(get_datasets,TealDatasetConnector) +S3method(get_datasets,teal_data) +S3method(get_join_keys,JoinKeys) +S3method(get_join_keys,TealData) +S3method(get_join_keys,default) +S3method(get_join_keys,teal_data) S3method(get_key_duplicates,TealDataset) S3method(get_key_duplicates,data.frame) S3method(get_keys,TealDataAbstract) @@ -50,13 +60,14 @@ S3method(set_keys,TealDataAbstract) S3method(set_keys,TealDataset) S3method(set_keys,TealDatasetConnector) S3method(to_relational_data,MultiAssayExperiment) -S3method(to_relational_data,TealData) S3method(to_relational_data,TealDataset) S3method(to_relational_data,TealDatasetConnector) S3method(to_relational_data,data.frame) S3method(to_relational_data,list) export("col_labels<-") export("data_label<-") +export("datanames<-") +export("get_join_keys<-") export(as_cdisc) export(callable_code) export(callable_function) @@ -67,6 +78,7 @@ export(cdisc_dataset) export(cdisc_dataset_connector) export(cdisc_dataset_connector_file) export(cdisc_dataset_file) +export(cdisc_join_keys) export(code_cdisc_dataset_connector) export(code_dataset_connector) export(col_labels) @@ -75,6 +87,7 @@ export(csv_cdisc_dataset_connector) export(csv_dataset_connector) export(data_connection) export(data_label) +export(datanames) export(dataset) export(dataset_connector) export(dataset_connector_file) @@ -89,6 +102,7 @@ export(get_dataname) export(get_dataset) export(get_dataset_label) export(get_datasets) +export(get_join_keys) export(get_key_duplicates) export(get_keys) export(get_labels) @@ -118,7 +132,8 @@ export(teal_data_file) export(to_relational_data) export(validate_metadata) import(shiny) +import(teal.code) importFrom(digest,digest) importFrom(logger,log_trace) -importFrom(shinyjs,show) +importFrom(shinyjs,useShinyjs) importFrom(stats,setNames) diff --git a/NEWS.md b/NEWS.md index 12849888b..f593bc560 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,13 @@ # teal.data 0.3.0.9007 +### Enhancements +* Simplified `join_key` to better support primary keys. +* Added subset and subset-assignment to `JoinKeySet` class to manipulate relationship pair keys _(`[` and `[<-`)_. + +### Breaking changes + +* Introduced new data class (`teal_data`) which replaces deprecated `TealData`. New data class becomes a standard input for whole `teal` framework. +* Deprecated `teal_data` constructor when `TealDataset`, `TealDatasetConnector` or `TealDataConnector` objects are provided. New delayed data loading functions introduced in `teal` package. ### Miscellaneous * Specified minimal version of package dependencies. diff --git a/R/CallableFunction.R b/R/CallableFunction.R index 4eb87c68f..5eb20a5c7 100644 --- a/R/CallableFunction.R +++ b/R/CallableFunction.R @@ -144,7 +144,6 @@ CallableFunction <- R6::R6Class( # nolint # @return nothing refresh = function() { if (!is.null(private$fun_name) || !identical(private$fun_name, character(0))) { - # replaced str2lang found at: # https://rlang.r-lib.org/reference/call2.html private$call <- as.call( diff --git a/R/JoinKeys.R b/R/JoinKeys.R index 241eaa1e9..0b9255fb2 100644 --- a/R/JoinKeys.R +++ b/R/JoinKeys.R @@ -301,8 +301,10 @@ JoinKeys <- R6::R6Class( # nolint return(TRUE) } - if (xor(length(join_key_1$keys) == 0, length(join_key_2$keys) == 0) || - !identical(sort(join_key_1$keys), sort(setNames(names(join_key_2$keys), join_key_2$keys)))) { + if ( + xor(length(join_key_1$keys) == 0, length(join_key_2$keys) == 0) || + !identical(sort(join_key_1$keys), sort(setNames(names(join_key_2$keys), join_key_2$keys))) + ) { error_message(join_key_1$dataset_1, join_key_1$dataset_2) } } @@ -351,17 +353,18 @@ JoinKeys <- R6::R6Class( # nolint #' @export #' #' @examples -#' join_keys() +#' # setting join keys #' join_keys( #' join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), #' join_key("dataset_A", "dataset_C", c("col_2" = "col_x", "col_3" = "col_y")) #' ) -#' join_keys( -#' join_key("dataset_A", "dataset_B", c("col_1" = "col_a")) -#' ) +#' # or +#' jk <- join_keys() +#' jk["dataset_A", "dataset_B"] <- c("col_1" = "col_a") +#' jk["dataset_A", "dataset_C"] <- c("col_2" = "col_x", "col_3" = "col_y") +#' join_keys <- function(...) { - x <- list(...) - + x <- rlang::list2(...) res <- JoinKeys$new() if (length(x) > 0) { res$set(x) @@ -370,6 +373,75 @@ join_keys <- function(...) { res } +#' @title Getter for JoinKeys that returns the relationship between pairs of datasets +#' @param x JoinKeys object to extract the join keys +#' @param dataset_1 (`character`) name of first dataset. +#' @param dataset_2 (`character`) name of second dataset. +#' @export +#' @keywords internal +`[.JoinKeys` <- function(x, dataset_1, dataset_2 = dataset_1) { + checkmate::assert_string(dataset_1) + checkmate::assert_string(dataset_2) + x$get(dataset_1, dataset_2) +} + +#' @rdname sub-.JoinKeys +#' @param value value to assign +#' @export +#' @keywords internal +`[<-.JoinKeys` <- function(x, dataset_1, dataset_2 = dataset_1, value) { + checkmate::assert_string(dataset_1) + checkmate::assert_string(dataset_2) + x$mutate(dataset_1, dataset_2, value) + x +} + +#' @rdname join_keys +#' @details +#' `cdisc_join_keys` is a wrapper around `join_keys` that sets the default +#' join keys for CDISC datasets. It is used internally by `cdisc_data` to +#' set the default join keys for CDISC datasets. +#' +#' @export +#' @examples +#' +#' # default CDISC join keys +#' cdisc_join_keys(join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), "ADTTE") +#' +cdisc_join_keys <- function(...) { + data_objects <- rlang::list2(...) + + join_keys <- join_keys() + lapply(seq_along(data_objects), function(ix) { + item <- data_objects[[ix]] + name <- names(data_objects)[ix] + + if (checkmate::test_class(item, "JoinKeySet")) { + join_keys$set(item) + return(NULL) + } else if ( + checkmate::test_multi_class(item, c("TealDataConnector", "TealDataset", "TealDatasetConnector")) + ) { + return(NULL) + } else { + if ((is.null(name) || identical(trimws(name), "")) && is.character(item)) { + name <- item + } + if (name %in% names(default_cdisc_keys)) { + # Set default primary keys + keys_list <- default_cdisc_keys[[name]] + join_keys[name] <- keys_list$primary + + if (!is.null(keys_list$parent) && !is.null(keys_list$foreign)) { + join_keys[name, keys_list$parent] <- keys_list$foreign + } + } + } + }) + + join_keys +} + # wrappers ==== #' Mutate `JoinKeys` with a new values #' @@ -425,12 +497,16 @@ mutate_join_keys.TealData <- function(x, dataset_1, dataset_2, val) { # nolint x$mutate_join_keys(dataset_1, dataset_2, val) } - #' Create a relationship between a pair of datasets #' #' @description `r lifecycle::badge("stable")` #' +#' @details `join_key()` will create a relationship for the variables on a pair +#' of datasets. +#' #' @inheritParams mutate_join_keys +#' @param dataset_2 (optional `character`) other dataset name. In case it is omitted, then it +#' will create a primary key for `dataset_1`. #' @param keys (optionally named `character`) where `names(keys)` are columns in `dataset_1` #' with relationship to columns of `dataset_2` given by the elements in `keys`. #' If `names(keys)` is `NULL` then the same column names are used for both `dataset_1` @@ -441,7 +517,7 @@ mutate_join_keys.TealData <- function(x, dataset_1, dataset_2, val) { # nolint #' @seealso [join_keys()] #' #' @export -join_key <- function(dataset_1, dataset_2, keys) { +join_key <- function(dataset_1, dataset_2 = dataset_1, keys) { checkmate::assert_string(dataset_1) checkmate::assert_string(dataset_2) checkmate::assert_character(keys, any.missing = FALSE) diff --git a/R/cdisc_data.R b/R/cdisc_data.R index 63562afea..f44f8721f 100644 --- a/R/cdisc_data.R +++ b/R/cdisc_data.R @@ -1,105 +1,85 @@ #' Data input for teal app #' #' @description `r lifecycle::badge("stable")` -#' Function takes datasets and creates `TealData` object which can be used in `teal` applications. -#' -#' @note This function does not automatically assign keys to `TealDataset` -#' and `TealDatasetConnector` objects passed to it. If the keys are needed -#' they should be assigned before calling `cdisc_data`. See example: -#' ``` -#' test_dataset <- dataset("ADAE", teal.data::example_cdisc_data("ADAE")) # does not have keys -#' test_adsl <- cdisc_dataset("ADSL", teal.data::example_cdisc_data("ADSL")) -#' test_data <- cdisc_data(test_dataset, test_adsl) -#' get_keys(test_data, "ADAE") # returns character(0) -#' -#' test_dataset <- cdisc_dataset("ADAE", teal.data::example_cdisc_data("ADAE")) -#' test_data <- cdisc_data(test_dataset, test_adsl) -#' get_keys(test_data, "ADAE") # returns [1] "STUDYID" "USUBJID" "ASTDTM" "AETERM" "AESEQ" -#' ``` +#' Function is a wrapper around [teal_data()] and guesses `join_keys` +#' for given datasets whose names match ADAM datasets names. +#' #' @inheritParams teal_data -#' @param ... (`TealDataConnector`, `TealDataset` or -#' `TealDatasetConnector`) elements to include. #' @param join_keys (`JoinKeys`) or a single (`JoinKeySet`)\cr #' (optional) object with datasets column names used for joining. -#' If empty then it would be automatically derived basing on intersection of datasets primary keys +#' If empty then it would be automatically derived basing on intersection of datasets primary keys. +#' For ADAM datasets it would be automatically derived. #' -#' @return a `TealData` object +#' @return +#' - a `TealData` object when `TealDataset` `TealDatasetConnector`, `TealDataConnector` is provided, +#' - a `teal_data` object otherwise. #' #' @details This function checks if there were keys added to all data sets #' #' @export #' #' @examples -#' -#' ADSL <- teal.data::example_cdisc_data("ADSL") -#' ADTTE <- teal.data::example_cdisc_data("ADTTE") -#' -#' # basic example -#' cdisc_data( -#' cdisc_dataset("ADSL", ADSL), -#' cdisc_dataset("ADTTE", ADTTE), -#' code = "ADSL <- teal.data::example_cdisc_data(\"ADSL\") -#' ADTTE <- teal.data::example_cdisc_data(\"ADTTE\")", -#' check = TRUE -#' ) -#' -#' # Example with keys -#' cdisc_data( -#' cdisc_dataset("ADSL", ADSL, -#' keys = c("STUDYID", "USUBJID") -#' ), -#' cdisc_dataset("ADTTE", ADTTE, -#' keys = c("STUDYID", "USUBJID", "PARAMCD"), -#' parent = "ADSL" -#' ), +#' data <- cdisc_data( #' join_keys = join_keys( -#' join_key( -#' "ADSL", -#' "ADTTE", -#' c("STUDYID" = "STUDYID", "USUBJID" = "USUBJID") -#' ) -#' ), -#' code = "ADSL <- teal.data::example_cdisc_data(\"ADSL\") -#' ADTTE <- teal.data::example_cdisc_data(\"ADTTE\")", -#' check = TRUE +#' join_key("ADSL", "ADTTE", c("STUDYID" = "STUDYID", "USUBJID" = "USUBJID")) +#' ) #' ) +#' +#' data <- within(data, { +#' ADSL <- teal.data::example_cdisc_data("ADSL") +#' ADTTE <- teal.data::example_cdisc_data("ADTTE") +#' }) +#' cdisc_data <- function(..., - join_keys = teal.data::join_keys(), - code = "", + join_keys = teal.data::cdisc_join_keys(...), + code = character(0), check = FALSE) { - data_objects <- list(...) - checkmate::assert_list( - data_objects, - types = c("TealDataset", "TealDatasetConnector", "TealDataConnector") - ) - if (inherits(join_keys, "JoinKeySet")) { - join_keys <- teal.data::join_keys(join_keys) - } - - update_join_keys_to_primary(data_objects, join_keys) + teal_data(..., join_keys = join_keys, code = code, check = check) +} - retrieve_parents <- function(x) { - tryCatch( - x$get_parent(), - error = function(cond) rep(character(0), length(x$get_datanames())) +#' Extrapolate parents from `TealData` classes +#' +#' `r lifecycle::badge("deprecated")` +#' +#' note: This function will be removed once the following classes are defunct: +#' `TealDataConnector`, `TealDataset`, `TealDatasetConnector` +#' +#' @keywords internal +deprecated_join_keys_extract <- function(data_objects, join_keys) { + if ( + !checkmate::test_list( + data_objects, + types = c("TealDataConnector", "TealDataset", "TealDatasetConnector") ) + ) { + return(join_keys) } + # TODO: check if redundant with same call in teal_data body + update_join_keys_to_primary(data_objects, join_keys) new_parents_fun <- function(data_objects) { - lapply(data_objects, function(x) { - if (inherits(x, "TealDataConnector")) { - unlist(new_parents_fun(x$get_items()), recursive = FALSE) - } else { - list(retrieve_parents(x)) + lapply( + data_objects, + function(x) { + if (inherits(x, "TealDataConnector")) { + unlist(new_parents_fun(x$get_items()), recursive = FALSE) + } else { + list( + tryCatch( + x$get_parent(), + error = function(cond) rep(character(0), length(x$get_datanames())) + ) + ) + } } - }) + ) } new_parents <- unlist(new_parents_fun(data_objects), recursive = FALSE) names(new_parents) <- unlist(lapply(data_objects, function(x) { if (inherits(x, "TealDataConnector")) { - lapply(x$get_items(), function(z) z$get_dataname()) + lapply(x$get_items(), function(y) y$get_dataname()) } else { x$get_datanames() } @@ -111,15 +91,7 @@ cdisc_data <- function(..., join_keys$set_parents(new_parents) join_keys$update_keys_given_parents() - x <- TealData$new(..., check = check, join_keys = join_keys) - - if (length(code) > 0 && !identical(code, "")) { - x$set_pull_code(code = code) - } - - x$check_reproducibility() - x$check_metadata() - return(x) + join_keys } #' Load `TealData` object from a file diff --git a/R/datanames.R b/R/datanames.R new file mode 100644 index 000000000..932c40a3a --- /dev/null +++ b/R/datanames.R @@ -0,0 +1,43 @@ +#' Names of Data Sets in `teal_data` Object +#' +#' Get or set the value of the `datanames` slot. +#' +#' The `@datanames` slot in a `teal_data` object specifies which of the variables stored in its environment +#' (the `@env` slot) are data sets to be taken into consideration. +#' The contents of `@datanames` can be specified upon creation and default to all variables in `@env`. +#' Variables created later, which may well be data sets, are not automatically considered such. +#' Use this function to update the slot. +#' +#' @param x (`teal_data`) object to access or modify +#' @param value (`character`) new value for `@datanames`; all elements must be names of variables existing in `@env` +#' +#' @return The contents of `@datanames` or `teal_data` with updated `@datanames`. +#' +#' @examples +#' td <- teal_data(iris = iris) +#' td <- within(td, mtcars <- mtcars) +#' datanames(td) +#' datanames(td) <- c("iris", "mtcars") +#' +#' @name datanames +#' @aliases datanames,teal_data-method +#' @aliases datanames<-,teal_data,character-method + +#' @rdname datanames +#' @export +setGeneric("datanames", function(x) standardGeneric("datanames")) +setMethod("datanames", "teal_data", definition = function(x) { + x@datanames +}) + +#' @rdname datanames +#' @export +setGeneric("datanames<-", function(x, value) standardGeneric("datanames<-")) +setMethod("datanames<-", c("teal_data", "character"), definition = function(x, value) { + if (!all(is.element(value, ls(x@env, all.names = TRUE)))) { + stop("invalid name") + } + x@datanames <- value + methods::validObject(x) + x +}) diff --git a/R/get_dataname.R b/R/get_dataname.R index 3418c9087..9ac544a37 100644 --- a/R/get_dataname.R +++ b/R/get_dataname.R @@ -32,3 +32,9 @@ get_dataname.TealDatasetConnector <- function(x) { # nolint get_dataname.TealDataset <- function(x) { # nolint return(x$get_dataname()) } + +#' @rdname get_dataname +#' @export +get_dataname.teal_data <- function(x) { # nolint + return(x@datanames) +} diff --git a/R/get_datasets.R b/R/get_datasets.R index e72595725..48414bcef 100644 --- a/R/get_datasets.R +++ b/R/get_datasets.R @@ -133,3 +133,9 @@ get_datasets.TealDatasetConnector <- function(x) { # nolint get_datasets.TealDataset <- function(x) { x } + +#' @rdname get_datasets +#' @export +get_datasets.teal_data <- function(x) { + as.list(x@env)[teal.data::get_dataname(x)] +} diff --git a/R/get_join_keys.R b/R/get_join_keys.R new file mode 100644 index 000000000..a73a3e12f --- /dev/null +++ b/R/get_join_keys.R @@ -0,0 +1,59 @@ +#' Function to get join keys from a `` object +#' @param data `` - object to extract the join keys +#' @return Either `JoinKeys` object or `NULL` if no join keys +#' @export +get_join_keys <- function(data) { + UseMethod("get_join_keys", data) +} + +#' @rdname get_join_keys +#' @export +get_join_keys.default <- function(data) { + stop("get_join_keys function not implemented for object of class ", toString(class(data))) +} + +#' @rdname get_join_keys +#' @export +get_join_keys.teal_data <- function(data) { + data@join_keys +} + +#' @rdname get_join_keys +#' @export +get_join_keys.JoinKeys <- function(data) { + data +} + +#' @rdname get_join_keys +#' @export +get_join_keys.TealData <- function(data) { + data$get_join_keys() +} + +#' @rdname get_join_keys +#' @inheritParams mutate_join_keys +#' @param value value to assign +#' @export +`get_join_keys<-` <- function(data, dataset_1, dataset_2 = NULL, value) { + UseMethod("get_join_keys<-", data) +} + + +#' @rdname get_join_keys +#' @inheritParams mutate_join_keys +#' @export +`get_join_keys<-.JoinKeys` <- function(data, dataset_1, dataset_2 = NULL, value) { + # The reason this passthrough method is defined is to prevent a warning message + # The assignment is performed by `get_join_keys.JoinKeys` and `[<-.JoinKeys` combination + # as well as `JoinKeys` being an R6 class + data +} + +#' @rdname get_join_keys +#' @export +`get_join_keys<-.teal_data` <- function(data, dataset_1, dataset_2 = NULL, value) { + # The reason this passthrough method is defined is to prevent a warning message + # The assignment is performed by `get_join_keys.teal_data` and `[<-.JoinKeys` combination + # as well as `JoinKeys` being an R6 class + data +} diff --git a/R/teal.data.R b/R/teal.data.R index 83037785d..70bd1af1a 100644 --- a/R/teal.data.R +++ b/R/teal.data.R @@ -13,6 +13,6 @@ #' @import shiny #' @importFrom digest digest #' @importFrom stats setNames -#' @importFrom shinyjs show +#' @importFrom shinyjs useShinyjs #' @importFrom logger log_trace NULL diff --git a/R/teal_data-class.R b/R/teal_data-class.R new file mode 100644 index 000000000..1506b1d7c --- /dev/null +++ b/R/teal_data-class.R @@ -0,0 +1,93 @@ +setOldClass("JoinKeys") + +#' Reproducible data. +#' +#' Reproducible data container class. Inherits code tracking behavior from [`teal.code::qenv-class`]. +#' +#' This class provides an isolated environment in which to store and process data with all code being recorded. +#' The environment, code, data set names, and data joining keys are stored in their respective slots. +#' These slots should never be accessed directly, use the provided get/set functions. +#' +#' As code is evaluated in `teal_data`, messages and warnings are stored in their respective slots. +#' If errors are raised, a `qenv.error` object is returned. +#' +#' @name teal_data-class +#' @rdname teal_data-class +#' +#' @slot env (`environment`) environment containing data sets and possibly auxiliary variables +#' Access variables with [get_var()] or [`[[`]. +#' No setter provided. Evaluate code to add variables into `@env`. +#' @slot code (`character`) representing code necessary to reproduce the contents of `@env`. +#' Access with [get_code()]. +#' No setter provided. Evaluate code to append code to the slot. +#' @slot id (`integer`) random identifier assigned to each line of code in `@code`. Used internally. +#' @slot warnings (`character`) the warnings output when evaluating the code. +#' @slot warnings (`character`) warnings raised when evaluating code. +#' Access with [get_warnings()]. +#' @slot messages (`character`) messages raised when evaluating code. +#' @slot join_keys (`JoinKeys`) object specifying joining keys for data sets in `@env`. +#' Access or modify with [get_join_keys()]. +#' @slot datanames (`character`) vector of names of data sets in `@env`. +#' Used internally to distinguish them from auxiliary variables. +#' Access or modify with [datanames()]. +#' +#' @import teal.code +#' @keywords internal +setClass( + Class = "teal_data", + contains = "qenv", + slots = c(join_keys = "JoinKeys", datanames = "character"), + prototype = list( + join_keys = join_keys(), + datanames = character(0) + ) +) + +#' Initialize `teal_data` object +#' +#' Initialize `teal_data` object. +#' @name new_teal_data +#' +#' @param data (`named list`) List of data. +#' @param code (`character` or `language`) code to reproduce the `data`. +#' Accepts and stores comments also. +#' @param join_keys (`JoinKeys`) object +#' @param datanames (`character`) names of datasets passed to `data`. +#' Needed when non-dataset objects are needed in the `env` slot. +#' @rdname new_teal_data +#' @keywords internal +new_teal_data <- function(data, + code = character(0), + join_keys = join_keys(), + datanames = union(names(data), names(join_keys$get()))) { + checkmate::assert_list(data) + checkmate::assert_class(join_keys, "JoinKeys") + if (is.null(datanames)) datanames <- character(0) # todo: allow to specify + checkmate::assert_character(datanames) + if (!any(is.language(code), is.character(code))) { + stop("`code` must be a character or language object.") + } + + if (is.language(code)) { + code <- format_expression(code) + } + if (length(code)) { + code <- paste(code, collapse = "\n") + } + + id <- sample.int(.Machine$integer.max, size = length(code)) + + new_env <- rlang::env_clone(list2env(data), parent = parent.env(.GlobalEnv)) + lockEnvironment(new_env, bindings = TRUE) + + methods::new( + "teal_data", + env = new_env, + code = code, + warnings = rep("", length(code)), + messages = rep("", length(code)), + id = id, + join_keys = join_keys, + datanames = datanames + ) +} diff --git a/R/teal_data.R b/R/teal_data.R index 2aab2abdc..037fb2650 100644 --- a/R/teal_data.R +++ b/R/teal_data.R @@ -1,64 +1,71 @@ -#' Teal data +#' Teal Data #' #' @description `r lifecycle::badge("stable")` -#' Universal function to pass data to teal application +#' Universal function to pass data to teal application. #' -#' @param ... (`TealDataConnector`, `TealDataset`, `TealDatasetConnector`)\cr -#' objects +#' @param ... (`TealDataConnector`, `TealDataset`, `TealDatasetConnector`, `any`)\cr +#' Either 1) an object of a `Teal*` class, which is deprecated and will be removed in next release, +#' or 2) any number of any objects provided as `name = value` pairs, which is available from version `0.4.0`. #' @param join_keys (`JoinKeys`) or a single (`JoinKeySet`)\cr #' (optional) object with dataset column relationships used for joining. #' If empty then no joins between pairs of objects -#' @param code (`character`) code to reproduce the datasets. +#' @param code (`character`, `language`) code to reproduce the datasets. #' @param check (`logical`) reproducibility check - whether to perform a check that the pre-processing #' code included in the object definitions actually produces those objects. #' If `check` is true and preprocessing code is empty an error will be thrown. #' -#' @return (`TealData`) +#' @return +#' If old data classes are provided (`TealDataset` `TealDatasetConnector`, `TealDataConnector`), a `TealData` object. +#' Otherwise a `teal_data` object. #' #' @export #' #' @examples -#' x1 <- dataset( -#' "x1", -#' iris, -#' code = "x1 <- iris" -#' ) -#' -#' x2 <- dataset( -#' "x2", -#' mtcars, -#' code = "x2 <- mtcars" -#' ) +#' teal_data(x1 = iris, x2 = mtcars) #' -#' teal_data(x1, x2) teal_data <- function(..., join_keys = teal.data::join_keys(), - code = "", + code = character(0), check = FALSE) { - data_objects <- list(...) - checkmate::assert_list( - data_objects, - types = c("TealDataset", "TealDatasetConnector", "TealDataConnector") - ) + data_objects <- rlang::list2(...) if (inherits(join_keys, "JoinKeySet")) { join_keys <- teal.data::join_keys(join_keys) } + if ( + checkmate::test_list( + data_objects, + types = c("TealDataConnector", "TealDataset", "TealDatasetConnector"), + min.len = 1 + ) + ) { + lifecycle::deprecate_warn( + when = "0.3.1", + "teal_data( + data_objects = 'should use data directly. Using TealDatasetConnector and TealDataset is deprecated. + Find more information on https://github.com/insightsengineering/teal/discussions/945' + )" + ) + deprecated_join_keys_extract(data_objects, join_keys) - update_join_keys_to_primary(data_objects, join_keys) - - x <- TealData$new(..., check = check, join_keys = join_keys) - - if (length(code) > 0 && !identical(code, "")) { - x$set_pull_code(code = code) + x <- TealData$new(..., check = check, join_keys = join_keys) + if (length(code) > 0 && !identical(code, "")) { + x$set_pull_code(code = code) + } + x$check_reproducibility() + x$check_metadata() + x + } else { + if (length(data_objects) > 0 && !checkmate::test_names(names(data_objects), type = "named")) { + stop("Dot (`...`) arguments on `teal_data()` must be named.") + } + new_teal_data( + data = data_objects, + code = code, + join_keys = join_keys + ) } - - x$check_reproducibility() - x$check_metadata() - - return(x) } - #' Load `TealData` object from a file #' #' @description `r lifecycle::badge("experimental")` diff --git a/R/to_relational_data.R b/R/to_relational_data.R index 3cb132b67..7cf7f5224 100644 --- a/R/to_relational_data.R +++ b/R/to_relational_data.R @@ -30,8 +30,7 @@ to_relational_data <- function(data) { #' @export to_relational_data.data.frame <- function(data) { # nolint dataname <- deparse(substitute(data, parent.frame()), width.cutoff = 500L) - - if (grepl("\\)$", dataname) && inherits(data, "data.frame")) { + if (grepl("\\)$", dataname)) { stop("Single data.frame shouldn't be provided as a result of a function call. Please name the object first or use a named list.") } @@ -118,11 +117,10 @@ to_relational_data.list <- function(data) { #' @keywords internal #' @export to_relational_data.MultiAssayExperiment <- function(data) { # nolint - teal_data(dataset("MAE", data)) -} - -#' @keywords internal -#' @export -to_relational_data.TealData <- function(data) { # nolint - data + dataname <- deparse(substitute(data, parent.frame()), width.cutoff = 500L) + if (grepl("\\)$", dataname)) { + stop("Single data.frame shouldn't be provided as a result of a function call. Please name + the object first or use a named list.") + } + teal_data(dataset(dataname, data)) } diff --git a/R/zzz.R b/R/zzz.R index a21f4485a..aa1402424 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -11,3 +11,6 @@ invisible() } + +# use non-exported function from teal.code +format_expression <- getFromNamespace("format_expression", "teal.code") diff --git a/_pkgdown.yml b/_pkgdown.yml index 7b3796d37..4684fe812 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -80,6 +80,7 @@ reference: - teal_data - teal_data_file - to_relational_data + - new_teal_data - title: Delayed Dataset Functions desc: For loading and modifying delayed data objects. contents: @@ -94,6 +95,7 @@ reference: - get_dataset_label - get_datasets - get_keys + - get_join_keys - get_raw_data - is_pulled - load_dataset @@ -106,6 +108,8 @@ reference: - title: Helpers desc: Other useful functions for users and developers. contents: + - datanames + - datanames<- - col_labels - col_labels<- - col_relabel diff --git a/inst/WORDLIST b/inst/WORDLIST index 9df28bd9e..c4aa9809b 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,14 +1,16 @@ CDISC -Forkers -Hoffmann -Pre -Reproducibility -SCDA -UI cloneable +Forkers formatters funder +Getter +Hoffmann iteratively +JoinKeys +Pre pre repo +Reproducibility reproducibility +SCDA +UI diff --git a/man/cdisc_data.Rd b/man/cdisc_data.Rd index 073157225..eea0b46b5 100644 --- a/man/cdisc_data.Rd +++ b/man/cdisc_data.Rd @@ -4,79 +4,53 @@ \alias{cdisc_data} \title{Data input for teal app} \usage{ -cdisc_data(..., join_keys = teal.data::join_keys(), code = "", check = FALSE) +cdisc_data( + ..., + join_keys = teal.data::cdisc_join_keys(...), + code = character(0), + check = FALSE +) } \arguments{ -\item{...}{(\code{TealDataConnector}, \code{TealDataset} or -\code{TealDatasetConnector}) elements to include.} +\item{...}{(\code{TealDataConnector}, \code{TealDataset}, \code{TealDatasetConnector}, \code{any})\cr +Either 1) an object of a \verb{Teal*} class, which is deprecated and will be removed in next release, +or 2) any number of any objects provided as \code{name = value} pairs, which is available from version \verb{0.4.0}.} \item{join_keys}{(\code{JoinKeys}) or a single (\code{JoinKeySet})\cr (optional) object with datasets column names used for joining. -If empty then it would be automatically derived basing on intersection of datasets primary keys} +If empty then it would be automatically derived basing on intersection of datasets primary keys. +For ADAM datasets it would be automatically derived.} -\item{code}{(\code{character}) code to reproduce the datasets.} +\item{code}{(\code{character}, \code{language}) code to reproduce the datasets.} \item{check}{(\code{logical}) reproducibility check - whether to perform a check that the pre-processing code included in the object definitions actually produces those objects. If \code{check} is true and preprocessing code is empty an error will be thrown.} } \value{ -a \code{TealData} object +\itemize{ +\item a \code{TealData} object when \code{TealDataset} \code{TealDatasetConnector}, \code{TealDataConnector} is provided, +\item a \code{teal_data} object otherwise. +} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} -Function takes datasets and creates \code{TealData} object which can be used in \code{teal} applications. +Function is a wrapper around \code{\link[=teal_data]{teal_data()}} and guesses \code{join_keys} +for given datasets whose names match ADAM datasets names. } \details{ This function checks if there were keys added to all data sets } -\note{ -This function does not automatically assign keys to \code{TealDataset} -and \code{TealDatasetConnector} objects passed to it. If the keys are needed -they should be assigned before calling \code{cdisc_data}. See example: - -\if{html}{\out{
}}\preformatted{test_dataset <- dataset("ADAE", teal.data::example_cdisc_data("ADAE")) # does not have keys -test_adsl <- cdisc_dataset("ADSL", teal.data::example_cdisc_data("ADSL")) -test_data <- cdisc_data(test_dataset, test_adsl) -get_keys(test_data, "ADAE") # returns character(0) - -test_dataset <- cdisc_dataset("ADAE", teal.data::example_cdisc_data("ADAE")) -test_data <- cdisc_data(test_dataset, test_adsl) -get_keys(test_data, "ADAE") # returns [1] "STUDYID" "USUBJID" "ASTDTM" "AETERM" "AESEQ" -}\if{html}{\out{
}} -} \examples{ - -ADSL <- teal.data::example_cdisc_data("ADSL") -ADTTE <- teal.data::example_cdisc_data("ADTTE") - -# basic example -cdisc_data( - cdisc_dataset("ADSL", ADSL), - cdisc_dataset("ADTTE", ADTTE), - code = "ADSL <- teal.data::example_cdisc_data(\"ADSL\") - ADTTE <- teal.data::example_cdisc_data(\"ADTTE\")", - check = TRUE -) - -# Example with keys -cdisc_data( - cdisc_dataset("ADSL", ADSL, - keys = c("STUDYID", "USUBJID") - ), - cdisc_dataset("ADTTE", ADTTE, - keys = c("STUDYID", "USUBJID", "PARAMCD"), - parent = "ADSL" - ), +data <- cdisc_data( join_keys = join_keys( - join_key( - "ADSL", - "ADTTE", - c("STUDYID" = "STUDYID", "USUBJID" = "USUBJID") - ) - ), - code = "ADSL <- teal.data::example_cdisc_data(\"ADSL\") - ADTTE <- teal.data::example_cdisc_data(\"ADTTE\")", - check = TRUE + join_key("ADSL", "ADTTE", c("STUDYID" = "STUDYID", "USUBJID" = "USUBJID")) + ) ) + +data <- within(data, { + ADSL <- teal.data::example_cdisc_data("ADSL") + ADTTE <- teal.data::example_cdisc_data("ADTTE") +}) + } diff --git a/man/datanames.Rd b/man/datanames.Rd new file mode 100644 index 000000000..3847538b4 --- /dev/null +++ b/man/datanames.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/datanames.R +\name{datanames} +\alias{datanames} +\alias{datanames,teal_data-method} +\alias{datanames<-,teal_data,character-method} +\alias{datanames<-} +\title{Names of Data Sets in \code{teal_data} Object} +\usage{ +datanames(x) + +datanames(x) <- value +} +\arguments{ +\item{x}{(\code{teal_data}) object to access or modify} + +\item{value}{(\code{character}) new value for \verb{@datanames}; all elements must be names of variables existing in \verb{@env}} +} +\value{ +The contents of \verb{@datanames} or \code{teal_data} with updated \verb{@datanames}. +} +\description{ +Get or set the value of the \code{datanames} slot. +} +\details{ +The \verb{@datanames} slot in a \code{teal_data} object specifies which of the variables stored in its environment +(the \verb{@env} slot) are data sets to be taken into consideration. +The contents of \verb{@datanames} can be specified upon creation and default to all variables in \verb{@env}. +Variables created later, which may well be data sets, are not automatically considered such. +Use this function to update the slot. +} +\examples{ +td <- teal_data(iris = iris) +td <- within(td, mtcars <- mtcars) +datanames(td) +datanames(td) <- c("iris", "mtcars") + +} diff --git a/man/deprecated_join_keys_extract.Rd b/man/deprecated_join_keys_extract.Rd new file mode 100644 index 000000000..beae3c4c4 --- /dev/null +++ b/man/deprecated_join_keys_extract.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cdisc_data.R +\name{deprecated_join_keys_extract} +\alias{deprecated_join_keys_extract} +\title{Extrapolate parents from \code{TealData} classes} +\usage{ +deprecated_join_keys_extract(data_objects, join_keys) +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +} +\details{ +note: This function will be removed once the following classes are defunct: +\code{TealDataConnector}, \code{TealDataset}, \code{TealDatasetConnector} +} +\keyword{internal} diff --git a/man/get_dataname.Rd b/man/get_dataname.Rd index 3c7ae2953..5c592b55c 100644 --- a/man/get_dataname.Rd +++ b/man/get_dataname.Rd @@ -5,6 +5,7 @@ \alias{get_dataname.TealDataAbstract} \alias{get_dataname.TealDatasetConnector} \alias{get_dataname.TealDataset} +\alias{get_dataname.teal_data} \title{S3 method for getting a \code{dataname(s)} of (\code{TealDataAbstract}, (\code{TealDatasetConnector} or \code{TealDataset}) R6 object} @@ -16,6 +17,8 @@ get_dataname(x) \method{get_dataname}{TealDatasetConnector}(x) \method{get_dataname}{TealDataset}(x) + +\method{get_dataname}{teal_data}(x) } \arguments{ \item{x}{(\code{TealDataAbstract}, \code{TealDatasetConnector} or diff --git a/man/get_datasets.Rd b/man/get_datasets.Rd index ea34069a8..82d3fe4b0 100644 --- a/man/get_datasets.Rd +++ b/man/get_datasets.Rd @@ -5,6 +5,7 @@ \alias{get_datasets.TealDataAbstract} \alias{get_datasets.TealDatasetConnector} \alias{get_datasets.TealDataset} +\alias{get_datasets.teal_data} \title{Get a \code{\link{TealDataset}} objects.} \usage{ get_datasets(x) @@ -14,6 +15,8 @@ get_datasets(x) \method{get_datasets}{TealDatasetConnector}(x) \method{get_datasets}{TealDataset}(x) + +\method{get_datasets}{teal_data}(x) } \arguments{ \item{x}{(\code{\link{TealData}})\cr diff --git a/man/get_join_keys.Rd b/man/get_join_keys.Rd new file mode 100644 index 000000000..4ec725a21 --- /dev/null +++ b/man/get_join_keys.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_join_keys.R +\name{get_join_keys} +\alias{get_join_keys} +\alias{get_join_keys.default} +\alias{get_join_keys.teal_data} +\alias{get_join_keys.JoinKeys} +\alias{get_join_keys.TealData} +\alias{get_join_keys<-} +\alias{get_join_keys<-.JoinKeys} +\alias{get_join_keys<-.teal_data} +\title{Function to get join keys from a `` object} +\usage{ +get_join_keys(data) + +\method{get_join_keys}{default}(data) + +\method{get_join_keys}{teal_data}(data) + +\method{get_join_keys}{JoinKeys}(data) + +\method{get_join_keys}{TealData}(data) + +get_join_keys(data, dataset_1, dataset_2 = NULL) <- value + +\method{get_join_keys}{JoinKeys}(data, dataset_1, dataset_2 = NULL) <- value + +\method{get_join_keys}{teal_data}(data, dataset_1, dataset_2 = NULL) <- value +} +\arguments{ +\item{data}{`` - object to extract the join keys} + +\item{dataset_1}{(\code{character}) one dataset name} + +\item{dataset_2}{(\code{character}) other dataset name} + +\item{value}{value to assign} +} +\value{ +Either \code{JoinKeys} object or \code{NULL} if no join keys +} +\description{ +Function to get join keys from a `` object +} diff --git a/man/join_key.Rd b/man/join_key.Rd index 89d073870..17f020099 100644 --- a/man/join_key.Rd +++ b/man/join_key.Rd @@ -4,12 +4,13 @@ \alias{join_key} \title{Create a relationship between a pair of datasets} \usage{ -join_key(dataset_1, dataset_2, keys) +join_key(dataset_1, dataset_2 = dataset_1, keys) } \arguments{ \item{dataset_1}{(\code{character}) one dataset name} -\item{dataset_2}{(\code{character}) other dataset name} +\item{dataset_2}{(optional \code{character}) other dataset name. In case it is omitted, then it +will create a primary key for \code{dataset_1}.} \item{keys}{(optionally named \code{character}) where \code{names(keys)} are columns in \code{dataset_1} with relationship to columns of \code{dataset_2} given by the elements in \code{keys}. @@ -22,6 +23,10 @@ object of class \code{JoinKeySet} to be passed into \code{join_keys} function. \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} } +\details{ +\code{join_key()} will create a relationship for the variables on a pair +of datasets. +} \seealso{ \code{\link[=join_keys]{join_keys()}} } diff --git a/man/join_keys.Rd b/man/join_keys.Rd index b0d10e1e4..535d8585f 100644 --- a/man/join_keys.Rd +++ b/man/join_keys.Rd @@ -2,9 +2,12 @@ % Please edit documentation in R/JoinKeys.R \name{join_keys} \alias{join_keys} +\alias{cdisc_join_keys} \title{Create a \code{JoinKeys} out of a list of \code{JoinKeySet} objects} \usage{ join_keys(...) + +cdisc_join_keys(...) } \arguments{ \item{...}{optional, a \code{JoinKeySet} objects created using the \code{join_key} function.} @@ -18,14 +21,24 @@ join_keys(...) \details{ Note that join keys are symmetric although the relationship only needs to be specified once. + +\code{cdisc_join_keys} is a wrapper around \code{join_keys} that sets the default +join keys for CDISC datasets. It is used internally by \code{cdisc_data} to +set the default join keys for CDISC datasets. } \examples{ -join_keys() +# setting join keys join_keys( join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), join_key("dataset_A", "dataset_C", c("col_2" = "col_x", "col_3" = "col_y")) ) -join_keys( - join_key("dataset_A", "dataset_B", c("col_1" = "col_a")) -) +# or +jk <- join_keys() +jk["dataset_A", "dataset_B"] <- c("col_1" = "col_a") +jk["dataset_A", "dataset_C"] <- c("col_2" = "col_x", "col_3" = "col_y") + + +# default CDISC join keys +cdisc_join_keys(join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), "ADTTE") + } diff --git a/man/new_teal_data.Rd b/man/new_teal_data.Rd new file mode 100644 index 000000000..1191cebc5 --- /dev/null +++ b/man/new_teal_data.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/teal_data-class.R +\name{new_teal_data} +\alias{new_teal_data} +\title{Initialize \code{teal_data} object} +\usage{ +new_teal_data( + data, + code = character(0), + join_keys = join_keys(), + datanames = union(names(data), names(join_keys$get())) +) +} +\arguments{ +\item{data}{(\verb{named list}) List of data.} + +\item{code}{(\code{character} or \code{language}) code to reproduce the \code{data}. +Accepts and stores comments also.} + +\item{join_keys}{(\code{JoinKeys}) object} + +\item{datanames}{(\code{character}) names of datasets passed to \code{data}. +Needed when non-dataset objects are needed in the \code{env} slot.} +} +\description{ +Initialize \code{teal_data} object. +} +\keyword{internal} diff --git a/man/sub-.JoinKeys.Rd b/man/sub-.JoinKeys.Rd new file mode 100644 index 000000000..1fac6de0b --- /dev/null +++ b/man/sub-.JoinKeys.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/JoinKeys.R +\name{[.JoinKeys} +\alias{[.JoinKeys} +\alias{[<-.JoinKeys} +\title{Getter for JoinKeys that returns the relationship between pairs of datasets} +\usage{ +\method{[}{JoinKeys}(x, dataset_1, dataset_2 = dataset_1) + +\method{[}{JoinKeys}(x, dataset_1, dataset_2 = dataset_1) <- value +} +\arguments{ +\item{x}{JoinKeys object to extract the join keys} + +\item{dataset_1}{(\code{character}) name of first dataset.} + +\item{dataset_2}{(\code{character}) name of second dataset.} + +\item{value}{value to assign} +} +\description{ +Getter for JoinKeys that returns the relationship between pairs of datasets +} +\keyword{internal} diff --git a/man/teal_data-class.Rd b/man/teal_data-class.Rd new file mode 100644 index 000000000..e3fcad4c5 --- /dev/null +++ b/man/teal_data-class.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/teal_data-class.R +\docType{class} +\name{teal_data-class} +\alias{teal_data-class} +\title{Reproducible data.} +\description{ +Reproducible data container class. Inherits code tracking behavior from \code{\link[teal.code:qenv-class]{teal.code::qenv}}. +} +\details{ +This class provides an isolated environment in which to store and process data with all code being recorded. +The environment, code, data set names, and data joining keys are stored in their respective slots. +These slots should never be accessed directly, use the provided get/set functions. + +As code is evaluated in \code{teal_data}, messages and warnings are stored in their respective slots. +If errors are raised, a \code{qenv.error} object is returned. +} +\section{Slots}{ + +\describe{ +\item{\code{env}}{(\code{environment}) environment containing data sets and possibly auxiliary variables +Access variables with \code{\link[=get_var]{get_var()}} or [\code{[[}]. +No setter provided. Evaluate code to add variables into \verb{@env}.} + +\item{\code{code}}{(\code{character}) representing code necessary to reproduce the contents of \verb{@env}. +Access with \code{\link[=get_code]{get_code()}}. +No setter provided. Evaluate code to append code to the slot.} + +\item{\code{id}}{(\code{integer}) random identifier assigned to each line of code in \verb{@code}. Used internally.} + +\item{\code{warnings}}{(\code{character}) the warnings output when evaluating the code.} + +\item{\code{warnings}}{(\code{character}) warnings raised when evaluating code. +Access with \code{\link[=get_warnings]{get_warnings()}}.} + +\item{\code{messages}}{(\code{character}) messages raised when evaluating code.} + +\item{\code{join_keys}}{(\code{JoinKeys}) object specifying joining keys for data sets in \verb{@env}. +Access or modify with \code{\link[=get_join_keys]{get_join_keys()}}.} + +\item{\code{datanames}}{(\code{character}) vector of names of data sets in \verb{@env}. +Used internally to distinguish them from auxiliary variables. +Access or modify with \code{\link[=datanames]{datanames()}}.} +}} + +\keyword{internal} diff --git a/man/teal_data.Rd b/man/teal_data.Rd index 9aa287038..5b556fe54 100644 --- a/man/teal_data.Rd +++ b/man/teal_data.Rd @@ -2,43 +2,39 @@ % Please edit documentation in R/teal_data.R \name{teal_data} \alias{teal_data} -\title{Teal data} +\title{Teal Data} \usage{ -teal_data(..., join_keys = teal.data::join_keys(), code = "", check = FALSE) +teal_data( + ..., + join_keys = teal.data::join_keys(), + code = character(0), + check = FALSE +) } \arguments{ -\item{...}{(\code{TealDataConnector}, \code{TealDataset}, \code{TealDatasetConnector})\cr -objects} +\item{...}{(\code{TealDataConnector}, \code{TealDataset}, \code{TealDatasetConnector}, \code{any})\cr +Either 1) an object of a \verb{Teal*} class, which is deprecated and will be removed in next release, +or 2) any number of any objects provided as \code{name = value} pairs, which is available from version \verb{0.4.0}.} \item{join_keys}{(\code{JoinKeys}) or a single (\code{JoinKeySet})\cr (optional) object with dataset column relationships used for joining. If empty then no joins between pairs of objects} -\item{code}{(\code{character}) code to reproduce the datasets.} +\item{code}{(\code{character}, \code{language}) code to reproduce the datasets.} \item{check}{(\code{logical}) reproducibility check - whether to perform a check that the pre-processing code included in the object definitions actually produces those objects. If \code{check} is true and preprocessing code is empty an error will be thrown.} } \value{ -(\code{TealData}) +If old data classes are provided (\code{TealDataset} \code{TealDatasetConnector}, \code{TealDataConnector}), a \code{TealData} object. +Otherwise a \code{teal_data} object. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} -Universal function to pass data to teal application +Universal function to pass data to teal application. } \examples{ -x1 <- dataset( - "x1", - iris, - code = "x1 <- iris" -) - -x2 <- dataset( - "x2", - mtcars, - code = "x2 <- mtcars" -) +teal_data(x1 = iris, x2 = mtcars) -teal_data(x1, x2) } diff --git a/staged_dependencies.yaml b/staged_dependencies.yaml index 4cdc94883..e57163180 100644 --- a/staged_dependencies.yaml +++ b/staged_dependencies.yaml @@ -3,6 +3,9 @@ current_repo: repo: insightsengineering/teal.data host: https://github.com upstream_repos: + insightsengineering/teal.code: + repo: insightsengineering/teal.code + host: https://github.com insightsengineering/teal.logger: repo: insightsengineering/teal.logger host: https://github.com diff --git a/tests/testthat/helper-get_join_keys.R b/tests/testthat/helper-get_join_keys.R new file mode 100644 index 000000000..34bca8a9e --- /dev/null +++ b/tests/testthat/helper-get_join_keys.R @@ -0,0 +1,45 @@ +#' Generate a teal_data dataset with sample data and JoinKeys +helper_generator_teal_data <- function() { + iris2 <- iris + iris2$id <- rnorm(NROW(iris2)) + iris2$id <- apply(iris2, 1, rlang::hash) + new_teal_data( + list( + ds1 = iris2, + ds2 = iris2 + ), + code = "ds1 <- iris2; ds2 <- iris2", + join_keys = helper_generator_JoinKeys("ds1", keys = c("id")) + ) +} + +#' Generate a JoinKeys +helper_generator_JoinKeys <- function(dataset_1 = "ds1", keys = c("id")) { # nolint + join_keys( + join_key(dataset_1, keys = keys) + ) +} + +#' Test suite for default get_join generated by helper +helper_test_get_join_keys <- function(obj, dataset_1 = "ds1") { + jk <- get_join_keys(obj) + + expect_s3_class(jk, c("JoinKey", "R6")) + expect_length(jk$get(), 1) + expect_length(jk$get(dataset_1), 1) + + obj +} + +#' Test suite for JoinKeys after manual adding a primary key +helper_test_get_join_keys_add <- function(obj, dataset_1 = "ds1", new_dataset_1 = "ds2", new_keys = c("id")) { + obj <- helper_test_get_join_keys(obj, dataset_1) + get_join_keys(obj)[new_dataset_1] <- c(new_keys) + + jk <- get_join_keys(obj) + + checkmate::expect_r6(jk, c("JoinKeys")) + expect_length(jk$get(), 2) + expect_length(jk$get(dataset_1), 1) + expect_length(jk$get(new_dataset_1), 1) +} diff --git a/tests/testthat/test-JoinKeys.R b/tests/testthat/test-JoinKeys.R index f3e9c80c4..72b300ae4 100644 --- a/tests/testthat/test-JoinKeys.R +++ b/tests/testthat/test-JoinKeys.R @@ -15,6 +15,7 @@ test_that("join_key throws error with invalid keys arguments", { # names(keys)!= keys if datasets are the same expect_error(join_key("d1", "d1", keys = c("B" = "A", "A" = "B"))) + expect_error(join_key("d1", keys = c("B" = "A", "A" = "B"))) }) test_that("key empty name is changed to the key value", { @@ -42,7 +43,6 @@ test_that("join_key throws error with invalid dataset arguments", { expect_error(join_key("d1", c("d1", "d2"), keys = c("A" = "B", "C" = "D"))) }) - test_that("join_key does not throw error with valid arguments", { # keys of length 0 expect_silent(join_key("d1", "d2", keys = character(0))) @@ -52,8 +52,9 @@ test_that("join_key does not throw error with valid arguments", { expect_silent(join_key("d1", "d2", keys = c("A" = "B", "C" = "D"))) # dataset_1 and dataset_2 can be the same if keys match expect_silent(join_key("d1", "d1", keys = c("A" = "A", "B" = "B"))) -}) + expect_silent(join_key("d1", keys = c("A" = "A", "B" = "B"))) +}) test_that("cannot set join_keys with incompatible keys", { # different keys @@ -257,33 +258,35 @@ testthat::test_that("JoinKeys$split method returns a named list of JoinKeys obje testthat::expect_equal(names(res$Y$get()), c("Y", "Z")) }) -testthat::test_that("JoinKeys$split method returns an updated list after - the state of the object is modified by JoinKeys$mutate()", { - x <- JoinKeys$new() - x$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) +testthat::test_that( + "JoinKeys$split method returns an updated list after the state of the object is modified by JoinKeys$mutate()", + { + x <- JoinKeys$new() + x$set( + list( + join_key("A", "B", c("a" = "b")), + join_key("A", "C", c("a" = "c", "aa" = "cc")), + join_key("Z", "Y", c("z" = "y")) + ) ) - ) - res <- x$split() + res <- x$split() - x$mutate("A", "B", c("a" = "b", "aa" = "bb")) - res2 <- x$split() + x$mutate("A", "B", c("a" = "b", "aa" = "bb")) + res2 <- x$split() - testthat::expect_false(identical(res, res2)) - testthat::expect_identical(res2$A$get()$A$B, c("a" = "b", "aa" = "bb")) + testthat::expect_false(identical(res, res2)) + testthat::expect_identical(res2$A$get()$A$B, c("a" = "b", "aa" = "bb")) - # adding new datasets - x$mutate("D", "G", c("d" = "g")) - res3 <- x$split() - testthat::expect_false(identical(res, res3)) - testthat::expect_false(identical(res2, res3)) - testthat::expect_identical(res3$D$get()$D$G, c("d" = "g")) - testthat::expect_identical(res3$D$get()$G$D, c("g" = "d")) - testthat::expect_identical(names(res3$D$get()), c("D", "G")) -}) + # adding new datasets + x$mutate("D", "G", c("d" = "g")) + res3 <- x$split() + testthat::expect_false(identical(res, res3)) + testthat::expect_false(identical(res2, res3)) + testthat::expect_identical(res3$D$get()$D$G, c("d" = "g")) + testthat::expect_identical(res3$D$get()$G$D, c("g" = "d")) + testthat::expect_identical(names(res3$D$get()), c("D", "G")) + } +) testthat::test_that("JoinKeys$split method does not modify self", { x <- JoinKeys$new() @@ -347,35 +350,37 @@ testthat::test_that("JoinKeys$merge can handle edge case: argument is a list of testthat::expect_identical(previous_output, y$get()) }) -testthat::test_that("JoinKeys$merge throws error when improper argument is - passed in without modifying the caller", { - y <- JoinKeys$new() - y$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) +testthat::test_that( + "JoinKeys$merge throws error when improper argument is passed in without modifying the caller", + { + y <- JoinKeys$new() + y$set( + list( + join_key("A", "B", c("a" = "b")), + join_key("A", "C", c("a" = "c", "aa" = "cc")), + join_key("Z", "Y", c("z" = "y")) + ) ) - ) - previous_output <- y$get() - testthat::expect_error(y$merge()) - testthat::expect_identical(previous_output, y$get()) + previous_output <- y$get() + testthat::expect_error(y$merge()) + testthat::expect_identical(previous_output, y$get()) - testthat::expect_error(y$merge(1)) - testthat::expect_identical(previous_output, y$get()) + testthat::expect_error(y$merge(1)) + testthat::expect_identical(previous_output, y$get()) - testthat::expect_error(y$merge("A")) - testthat::expect_identical(previous_output, y$get()) + testthat::expect_error(y$merge("A")) + testthat::expect_identical(previous_output, y$get()) - testthat::expect_error(y$merge(list())) - testthat::expect_identical(previous_output, y$get()) + testthat::expect_error(y$merge(list())) + testthat::expect_identical(previous_output, y$get()) - testthat::expect_error(y$merge(list(1))) - testthat::expect_identical(previous_output, y$get()) + testthat::expect_error(y$merge(list(1))) + testthat::expect_identical(previous_output, y$get()) - testthat::expect_error(y$merge(list("A"))) - testthat::expect_identical(previous_output, y$get()) -}) + testthat::expect_error(y$merge(list("A"))) + testthat::expect_identical(previous_output, y$get()) + } +) testthat::test_that("JoinKeys$merge does nothing when argument is a JoinKeys object with identical data", { x <- JoinKeys$new() @@ -645,3 +650,122 @@ testthat::test_that("JoinKeys$check_parent_child throws error if no join_keys ex "No join keys from df2 to its parent \\(df1\\) and vice versa" ) }) + +test_that("cdisc_join_keys will generate JoinKeys for named list with non-named elements", { + new_dataset <- cdisc_join_keys("ADSL", ADTTE = rADTTE) + jk <- get_join_keys(new_dataset) + + expect_identical(unname(jk$get("ADSL", "ADSL")), default_cdisc_keys[["ADSL"]]$primary) + expect_identical(unname(jk$get("ADTTE", "ADTTE")), default_cdisc_keys[["ADTTE"]]$primary) + + expect_identical(unname(jk$get("ADSL", "ADTTE")), default_cdisc_keys[["ADTTE"]]$foreign) + expect_identical(unname(jk$get("ADTTE", "ADSL")), default_cdisc_keys[["ADTTE"]]$foreign) +}) + +test_that("cdisc_join_keys will generate JoinKeys for character list", { + new_dataset <- cdisc_join_keys("ADSL", "ADTTE") + jk <- get_join_keys(new_dataset) + + expect_identical(unname(jk$get("ADSL", "ADSL")), default_cdisc_keys[["ADSL"]]$primary) + expect_identical(unname(jk$get("ADTTE", "ADTTE")), default_cdisc_keys[["ADTTE"]]$primary) + + expect_identical(unname(jk$get("ADSL", "ADTTE")), default_cdisc_keys[["ADTTE"]]$foreign) + expect_identical(unname(jk$get("ADTTE", "ADSL")), default_cdisc_keys[["ADTTE"]]$foreign) +}) + +test_that("cdisc_join_keys will generate JoinKeys for named list", { + new_dataset <- cdisc_join_keys(ADSL = rADSL, ADTTE = rADTTE) + jk <- get_join_keys(new_dataset) + + expect_identical(unname(jk$get("ADSL", "ADSL")), default_cdisc_keys[["ADSL"]]$primary) + expect_identical(unname(jk$get("ADTTE", "ADTTE")), default_cdisc_keys[["ADTTE"]]$primary) + + expect_identical(unname(jk$get("ADSL", "ADTTE")), default_cdisc_keys[["ADTTE"]]$foreign) + expect_identical(unname(jk$get("ADTTE", "ADSL")), default_cdisc_keys[["ADTTE"]]$foreign) +}) + +test_that("cdisc_join_keys will retrieve ADTTE primary and foreign keys", { + datasets <- names(default_cdisc_keys) + + internal_keys <- default_cdisc_keys[["ADTTE"]] + jk <- cdisc_join_keys("ADTTE") + primary_keys <- unname(jk$get("ADTTE", "ADTTE")) + + expect_equal(primary_keys, internal_keys$primary) + + foreign_keys <- unname(jk$get("ADTTE", internal_keys$parent)) + expect_equal(foreign_keys, internal_keys$foreign) +}) + +test_that("cdisc_join_keys will retrieve known primary and foreign keys", { + datasets <- names(default_cdisc_keys) + + vapply( + datasets, + function(.x) { + internal_keys <- default_cdisc_keys[[.x]] + jk <- cdisc_join_keys(.x) + primary_keys <- unname(jk$get(.x, .x)) + expect_equal(primary_keys, internal_keys$primary) + if (!is.null(internal_keys$foreign)) { + foreign_keys <- unname(jk$get(.x, internal_keys$parent)) + expect_equal(foreign_keys, internal_keys$foreign) + } + character(0) + }, + character(0) + ) +}) + +test_that("cdisc_join_keys will retrieve known primary keys", { + datasets <- names(default_cdisc_keys) + + vapply( + datasets, + function(.x) { + jk <- cdisc_join_keys(.x) + expect_equal(unname(jk[.x]), get_cdisc_keys(.x)) + character(0) + }, + character(0) + ) +}) + +test_that("cdisc_join_keys does nothing with TealDataset", { + adae_cf <- callable_function( + function() as.data.frame(as.list(setNames(nm = get_cdisc_keys("ADAE")))) + ) + adae_cdc <- cdisc_dataset_connector("ADAE", adae_cf, keys = get_cdisc_keys("ADAE")) + expect_length(get_join_keys(cdisc_join_keys(adae_cdc))$get(), 0) +}) + +test_that("[.JoinKeys returns the primary key if arguments only have 1 dataset", { + jk <- join_keys(join_key("ds1", keys = c("id"))) + + expect_failure(expect_identical(jk$get("ds1"), jk["ds1"])) + checkmate::expect_character(jk["ds1"]) +}) + +test_that("[.JoinKeys subsets relationship pair successfully", { + jk <- join_keys(join_key("ds1", keys = c("id"))) + + expect_identical(jk$get("ds1", "ds1"), jk["ds1"]) +}) + +test_that("[<-.JoinKeys assigns new relationship pair", { + jk <- join_keys(join_key("ds1", keys = c("id"))) + + expect_length(jk$get("ds1", "ds2"), 0) + + jk["ds1", "ds2"] <- c("id") + expect_identical(jk$get("ds1", "ds2"), c(id = "id")) + expect_identical(jk$get("ds1", "ds2"), jk["ds1", "ds2"]) +}) + +test_that("[<-.JoinKeys modifies existing relationship pair", { + jk <- join_keys(join_key("ds1", keys = c("id"))) + + jk["ds1", "ds1"] <- c("Species") + expect_failure(expect_identical(jk$get("ds1", "ds1"), c(id = "id"))) + expect_identical(jk$get("ds1", "ds1"), c(Species = "Species")) +}) diff --git a/tests/testthat/test-TealDatasetConnector.R b/tests/testthat/test-TealDatasetConnector.R index c26744e02..aa8a18888 100644 --- a/tests/testthat/test-TealDatasetConnector.R +++ b/tests/testthat/test-TealDatasetConnector.R @@ -564,7 +564,7 @@ testthat::test_that("code_dataset_connector - library calls", { label = "ADRS dataset" ) - data <- cdisc_data(adsl, adtte, adrs, check = TRUE) + lifecycle::expect_deprecated(data <- cdisc_data(adsl, adtte, adrs, check = TRUE)) expect_silent( lapply( data$get_items(), @@ -704,8 +704,10 @@ testthat::test_that("TealDatasetConnector mutate method with delayed logic", { testthat::expect_true(t_dc$is_mutate_delayed()) testthat::expect_true( - all(c("test_dc2$neg_integers <- t_dc3$neg_integers", "test_dc$six <- test_dc$five + 1") %in% - pretty_code_string(t_dc$get_code())) + all( + c("test_dc2$neg_integers <- t_dc3$neg_integers", "test_dc$six <- test_dc$five + 1") %in% + pretty_code_string(t_dc$get_code()) + ) ) testthat::expect_true(t_dc$is_mutate_delayed()) @@ -889,34 +891,40 @@ testthat::test_that("TealDatasetConnector returns the correct code when mutated testthat::expect_equal(dc$get_code_class()$get_code(), "mtcars <- (function() head(mtcars))()\n1") }) -testthat::test_that("Pulling an already pulled TealDatasetConnector after mutating it with a delayed object - undoes any eager pre-pull mutations", { - cf <- CallableFunction$new(function() head(mtcars)) - dc <- TealDatasetConnector$new("mtcars", cf) - dc$pull() - dc$mutate(code = "mtcars[1] <- NULL") - dc$mutate( - code = "", - vars = list(delayed = TealDatasetConnector$new("iris", CallableFunction$new(function() head(iris)))) - ) - dc$pull() - testthat::expect_equal(dc$get_raw_data(), head(mtcars)) -}) - -testthat::test_that("Pulling an already pulled TealDatasetConnector after mutating it with a delayed object - does not change the returned code", { - cf <- CallableFunction$new(function() head(mtcars)) - dc <- TealDatasetConnector$new("mtcars", cf) - dc$pull() - dc$mutate(code = "mtcars[1] <- NULL") - dc$mutate( - code = "", - vars = list(delayed = TealDatasetConnector$new("iris", CallableFunction$new(function() head(iris)))) - ) - pre_pull_code <- dc$get_code() - dc$pull() - testthat::expect_equal(dc$get_code(), pre_pull_code) -}) +testthat::test_that( + "Pulling an already pulled TealDatasetConnector after mutating it with a delayed object + undoes any eager pre-pull mutations", + { + cf <- CallableFunction$new(function() head(mtcars)) + dc <- TealDatasetConnector$new("mtcars", cf) + dc$pull() + dc$mutate(code = "mtcars[1] <- NULL") + dc$mutate( + code = "", + vars = list(delayed = TealDatasetConnector$new("iris", CallableFunction$new(function() head(iris)))) + ) + dc$pull() + testthat::expect_equal(dc$get_raw_data(), head(mtcars)) + } +) + +testthat::test_that( + "Pulling an already pulled TealDatasetConnector after mutating it with a delayed object + does not change the returned code", + { + cf <- CallableFunction$new(function() head(mtcars)) + dc <- TealDatasetConnector$new("mtcars", cf) + dc$pull() + dc$mutate(code = "mtcars[1] <- NULL") + dc$mutate( + code = "", + vars = list(delayed = TealDatasetConnector$new("iris", CallableFunction$new(function() head(iris)))) + ) + pre_pull_code <- dc$get_code() + dc$pull() + testthat::expect_equal(dc$get_code(), pre_pull_code) + } +) testthat::test_that("Initializing TealDatasetConnector with code argument works", { test_ds1 <- TealDataset$new("head_mtcars", head(mtcars), code = "head_mtcars <- head(mtcars)") @@ -995,18 +1003,20 @@ testthat::test_that("TealDatasetConnector$print prints dataset when it is pulled ) }) -testthat::test_that("get_var_r6 returns identical objects as these passed to the vars argument in - the constructor", { - test_ds0 <- TealDataset$new("head_mtcars", head(mtcars), code = "head_mtcars <- head(mtcars)") - test_ds1 <- TealDatasetConnector$new( - dataname = "test_dc", - pull_callable = CallableFunction$new(data.frame), - vars = list(test_ds0 = test_ds0) - ) +testthat::test_that( + "get_var_r6 returns identical objects as these passed to the vars argument in the constructor", + { + test_ds0 <- TealDataset$new("head_mtcars", head(mtcars), code = "head_mtcars <- head(mtcars)") + test_ds1 <- TealDatasetConnector$new( + dataname = "test_dc", + pull_callable = CallableFunction$new(data.frame), + vars = list(test_ds0 = test_ds0) + ) - vars <- test_ds1$get_var_r6() - testthat::expect_identical(vars$test_ds0, test_ds0) -}) + vars <- test_ds1$get_var_r6() + testthat::expect_identical(vars$test_ds0, test_ds0) + } +) testthat::test_that("clone(deep = TRUE) deep clones dependencies, which are TealDataset objects", { test_ds0 <- TealDataset$new("head_mtcars", head(mtcars), code = "head_mtcars <- head(mtcars)") @@ -1021,78 +1031,87 @@ testthat::test_that("clone(deep = TRUE) deep clones dependencies, which are Teal ) }) -testthat::test_that("reassign_datasets_vars updates the references of the vars to - addresses of passed objects", { - test_ds0 <- TealDataset$new("head_mtcars", head(mtcars), code = "head_mtcars <- head(mtcars)") - test_ds1 <- TealDatasetConnector$new( - dataname = "test_dc", - pull_callable = CallableFunction$new(data.frame), - vars = list(test_ds0 = test_ds0) - ) - - # after reassignment vars_r6, vars and muatate_vars match new reference - test_ds0_cloned <- test_ds0$clone(deep = TRUE) - test_ds1$reassign_datasets_vars(datasets = list(test_ds0 = test_ds0_cloned)) - - vars <- test_ds1$.__enclos_env__$private$pull_vars - testthat::expect_identical(vars$test_ds0, test_ds0_cloned) -}) - -testthat::test_that("reassign_datasets_vars updates the references of the vars_r6 to - addresses of passed objects", { - test_ds0 <- TealDataset$new("head_mtcars", head(mtcars), code = "head_mtcars <- head(mtcars)") - test_ds1 <- TealDatasetConnector$new( - dataname = "test_dc", - pull_callable = CallableFunction$new(data.frame), - vars = list(test_ds0 = test_ds0) - ) +testthat::test_that( + "reassign_datasets_vars updates the references of the vars to addresses of passed objects", + { + test_ds0 <- TealDataset$new("head_mtcars", head(mtcars), code = "head_mtcars <- head(mtcars)") + test_ds1 <- TealDatasetConnector$new( + dataname = "test_dc", + pull_callable = CallableFunction$new(data.frame), + vars = list(test_ds0 = test_ds0) + ) - # after reassignment vars_r6, vars and muatate_vars match new reference - test_ds0_cloned <- test_ds0$clone(deep = TRUE) - test_ds1$reassign_datasets_vars(datasets = list(test_ds0 = test_ds0_cloned)) + # after reassignment vars_r6, vars and muatate_vars match new reference + test_ds0_cloned <- test_ds0$clone(deep = TRUE) + test_ds1$reassign_datasets_vars(datasets = list(test_ds0 = test_ds0_cloned)) - vars_r6 <- test_ds1$get_var_r6() - testthat::expect_identical(vars_r6$test_ds0, test_ds0_cloned) -}) + vars <- test_ds1$.__enclos_env__$private$pull_vars + testthat::expect_identical(vars$test_ds0, test_ds0_cloned) + } +) + +testthat::test_that( + "reassign_datasets_vars updates the references of the vars_r6 to addresses of passed objects", + { + test_ds0 <- TealDataset$new("head_mtcars", head(mtcars), code = "head_mtcars <- head(mtcars)") + test_ds1 <- TealDatasetConnector$new( + dataname = "test_dc", + pull_callable = CallableFunction$new(data.frame), + vars = list(test_ds0 = test_ds0) + ) -testthat::test_that("reassign_datasets_vars does not change `vars` elements of - class different than TealDataset and TealDatasetConnector", { - test_ds0 <- mtcars - test_ds1 <- TealDataset$new("mtcars", mtcars) - test_ds2 <- TealDatasetConnector$new( - dataname = "iris", - pull_callable = callable_function(data.frame), - vars = list(test_ds0 = test_ds0, test_ds1 = test_ds1) - ) + # after reassignment vars_r6, vars and muatate_vars match new reference + test_ds0_cloned <- test_ds0$clone(deep = TRUE) + test_ds1$reassign_datasets_vars(datasets = list(test_ds0 = test_ds0_cloned)) - test_ds2$reassign_datasets_vars(list(test_ds1 = test_ds1)) - testthat::expect_identical( - test_ds2$.__enclos_env__$private$pull_vars$test_ds0, - test_ds0 - ) -}) + vars_r6 <- test_ds1$get_var_r6() + testthat::expect_identical(vars_r6$test_ds0, test_ds0_cloned) + } +) + +testthat::test_that( + "reassign_datasets_vars does not change `vars` elements of class different than + TealDataset and TealDatasetConnector", + { + test_ds0 <- mtcars + test_ds1 <- TealDataset$new("mtcars", mtcars) + test_ds2 <- TealDatasetConnector$new( + dataname = "iris", + pull_callable = callable_function(data.frame), + vars = list(test_ds0 = test_ds0, test_ds1 = test_ds1) + ) -testthat::test_that("reassign_datasets_vars does not change any `vars` while - empty list is provided", { - test_ds0 <- mtcars - test_ds1 <- TealDataset$new("mtcars", mtcars) - test_ds2 <- TealDataset$new("iris", iris) - test_ds2 <- TealDatasetConnector$new( - dataname = "iris", - pull_callable = callable_function(data.frame), - vars = list(test_ds0 = test_ds0, test_ds1 = test_ds1) - ) + test_ds2$reassign_datasets_vars(list(test_ds1 = test_ds1)) + testthat::expect_identical( + test_ds2$.__enclos_env__$private$pull_vars$test_ds0, + test_ds0 + ) + } +) + +testthat::test_that( + "reassign_datasets_vars does not change any `vars` while empty list is provided", + { + test_ds0 <- mtcars + test_ds1 <- TealDataset$new("mtcars", mtcars) + test_ds2 <- TealDataset$new("iris", iris) + test_ds2 <- TealDatasetConnector$new( + dataname = "iris", + pull_callable = callable_function(data.frame), + vars = list(test_ds0 = test_ds0, test_ds1 = test_ds1) + ) - test_ds2$reassign_datasets_vars(list()) - testthat::expect_identical( - test_ds2$.__enclos_env__$private$pull_vars$test_ds0, - test_ds0 - ) - testthat::expect_identical( - test_ds2$.__enclos_env__$private$pull_vars$test_ds1, - test_ds1 - ) -}) + test_ds2$reassign_datasets_vars(list()) + testthat::expect_identical( + test_ds2$.__enclos_env__$private$pull_vars$test_ds0, + test_ds0 + ) + testthat::expect_identical( + test_ds2$.__enclos_env__$private$pull_vars$test_ds1, + test_ds1 + ) + } +) testthat::test_that("Callable metadata is pulled when data is pulled", { diff --git a/tests/testthat/test-cdisc_data.R b/tests/testthat/test-cdisc_data.R index c7f8c8539..50340eca9 100644 --- a/tests/testthat/test-cdisc_data.R +++ b/tests/testthat/test-cdisc_data.R @@ -33,15 +33,15 @@ cdisc_data_mixed_call <- function(check = TRUE, join_keys1 = join_keys()) { } testthat::test_that("cdisc_data accepts TealDataset, TealDatasetConnector, TealDataConnector objects", { - testthat::expect_silent(data <- cdisc_data_mixed_call()) + lifecycle::expect_deprecated(data <- cdisc_data_mixed_call(), "should use data directly") testthat::expect_identical(data$get_datanames(), c("ADSL", "ADTTE", "ADAE")) }) -testthat::test_that("cdisc_data throws error if it receives undesired objects", { +testthat::test_that("cdisc_data returns teal_data object for objects different than old api", { adsl_raw <- as.data.frame(as.list(setNames(nm = get_cdisc_keys("ADSL")))) - testthat::expect_error( - teal_data(adsl_raw, check = TRUE), - "May only contain the following types: \\{TealDataset,TealDatasetConnector,TealDataConnector\\}" + testthat::expect_s4_class( + teal_data(adsl = adsl_raw, check = TRUE), + "teal_data" ) }) @@ -61,24 +61,26 @@ testthat::test_that("cdisc_data sets the join_keys internally", { }) testthat::test_that( - "cdisc_data sets the join_keys internally based on parents relations when primary keys are altered", { - jks <- join_keys(join_key("ADSL", "ADSL", c("STUDYID"))) - data <- cdisc_data_mixed_call(join_keys1 = jks) - - jks <- join_keys( - join_key("ADSL", "ADSL", c("STUDYID")), - join_key("ADTTE", "ADTTE", c("STUDYID", "USUBJID", "PARAMCD")), - join_key("ADAE", "ADAE", c("STUDYID", "USUBJID", "ASTDTM", "AETERM", "AESEQ")), - join_key("ADSL", "ADTTE", c("STUDYID")), - join_key("ADSL", "ADAE", c("STUDYID")), - join_key("ADTTE", "ADAE", c("STUDYID")) - ) - jks$set_parents(list(ADSL = character(0), ADTTE = "ADSL", ADAE = "ADSL")) - testthat::expect_equal( - data$get_join_keys(), - jks - ) -}) + "cdisc_data sets the join_keys internally based on parents relations when primary keys are altered", + { + jks <- join_keys(join_key("ADSL", "ADSL", c("STUDYID"))) + data <- cdisc_data_mixed_call(join_keys1 = jks) + + jks <- join_keys( + join_key("ADSL", "ADSL", c("STUDYID")), + join_key("ADTTE", "ADTTE", c("STUDYID", "USUBJID", "PARAMCD")), + join_key("ADAE", "ADAE", c("STUDYID", "USUBJID", "ASTDTM", "AETERM", "AESEQ")), + join_key("ADSL", "ADTTE", c("STUDYID")), + join_key("ADSL", "ADAE", c("STUDYID")), + join_key("ADTTE", "ADAE", c("STUDYID")) + ) + jks$set_parents(list(ADSL = character(0), ADTTE = "ADSL", ADAE = "ADSL")) + testthat::expect_equal( + data$get_join_keys(), + jks + ) + } +) testthat::test_that("cdisc_data sets primary keys as join_keys when no join_keys are present", { df1 <- data.frame(id = c("A", "B"), a = c(1L, 2L)) @@ -226,8 +228,9 @@ testthat::test_that("cdisc_data_file loads the TealData object", { rlang::local_options(lifecycle_verbosity = "quiet") tmp_file <- tempfile(fileext = ".R") - writeLines(text = c( - "adsl_raw <- as.data.frame(as.list(setNames(nm = get_cdisc_keys('ADSL')))) + writeLines( + text = c( + "adsl_raw <- as.data.frame(as.list(setNames(nm = get_cdisc_keys('ADSL')))) adsl <- cdisc_dataset( dataname = 'ADSL', x = adsl_raw, @@ -235,8 +238,8 @@ testthat::test_that("cdisc_data_file loads the TealData object", { ) cdisc_data(adsl) " - ), - con = tmp_file + ), + con = tmp_file ) tdf <- cdisc_data_file(tmp_file) file.remove(tmp_file) @@ -257,8 +260,9 @@ testthat::test_that("cdisc_data_file uses the code input to mutate the code of t rlang::local_options(lifecycle_verbosity = "quiet") tmp_file <- tempfile(fileext = ".R") - writeLines(text = c( - "adsl_raw <- as.data.frame(as.list(setNames(nm = get_cdisc_keys('ADSL')))) + writeLines( + text = c( + "adsl_raw <- as.data.frame(as.list(setNames(nm = get_cdisc_keys('ADSL')))) adsl <- cdisc_dataset( dataname = 'ADSL', x = adsl_raw, @@ -266,8 +270,8 @@ testthat::test_that("cdisc_data_file uses the code input to mutate the code of t ) cdisc_data(adsl) " - ), - con = tmp_file + ), + con = tmp_file ) tdf <- cdisc_data_file(tmp_file, "# MUTATE code") file.remove(tmp_file) diff --git a/tests/testthat/test-datanames.R b/tests/testthat/test-datanames.R new file mode 100644 index 000000000..e9b157258 --- /dev/null +++ b/tests/testthat/test-datanames.R @@ -0,0 +1,25 @@ +# get ---- +testthat::test_that("datanames returns contents of @datanames slot", { + td <- teal_data(i = iris, m = mtcars) + testthat::expect_identical(datanames(td), c("i", "m")) +}) + +testthat::test_that("variables not in @datanames are omitted", { + td <- teal_data(i = iris, m = mtcars) + td <- within(td, f <- faithful) + testthat::expect_identical(datanames(td), c("i", "m")) +}) + +# set ---- +testthat::test_that("datanames can set value of @datanames", { + td <- teal_data(i = iris, m = mtcars) + testthat::expect_identical(datanames(td), c("i", "m")) + datanames(td) <- "i" + testthat::expect_identical(datanames(td), "i") +}) + +testthat::test_that("only names of existing variables are accepted", { + td <- teal_data(i = iris, m = mtcars) + testthat::expect_no_error(datanames(td) <- "i") + testthat::expect_error(datanames(td) <- "f", "invalid name") +}) diff --git a/tests/testthat/test-get_join_keys.R b/tests/testthat/test-get_join_keys.R new file mode 100644 index 000000000..7a693a5e9 --- /dev/null +++ b/tests/testthat/test-get_join_keys.R @@ -0,0 +1,23 @@ +test_that("get_join_keys.teal_data will successfully obtain object from teal_data", { + obj <- helper_generator_teal_data() + + expect_identical(obj@join_keys, get_join_keys(obj)) + helper_test_get_join_keys(obj, "ds1") +}) + +test_that("get_join_keys.JoinKeys will return itself", { + obj <- helper_generator_JoinKeys() + + expect_identical(obj, get_join_keys(obj)) + helper_test_get_join_keys(obj, "ds1") +}) + +test_that("get_join_keys<-.teal_data", { + obj <- helper_generator_teal_data() + helper_test_get_join_keys_add(obj, "ds1", "ds2") +}) + +test_that("get_join_keys<-.JoinKeys", { + obj <- helper_generator_JoinKeys() + helper_test_get_join_keys_add(obj, "ds1", "ds2") +}) diff --git a/tests/testthat/test-teal_data.R b/tests/testthat/test-teal_data.R index 2da57165c..747d2378a 100644 --- a/tests/testthat/test-teal_data.R +++ b/tests/testthat/test-teal_data.R @@ -34,20 +34,101 @@ teal_data_mixed_call <- function(check = TRUE, join_keys1 = join_keys()) { teal_data(df1_ds, df2_dc, df3_rdc, check = check, join_keys = join_keys1) } +testthat::test_that("teal_data allows to initialize empty teal_data object", { + testthat::expect_s4_class(teal_data(), "teal_data") +}) + +testthat::test_that("empty teal_data returns empty code, id, wartnings and messages", { + testthat::expect_identical(teal_data()@code, character(0)) + testthat::expect_identical(teal_data()@id, integer(0)) + testthat::expect_identical(teal_data()@messages, character(0)) + testthat::expect_identical(teal_data()@warnings, character(0)) +}) + +testthat::test_that("teal_data allows to initialize empty teal_data with join_keys", { + testthat::expect_equal( + teal_data(join_keys = join_keys(join_key("data1", "data2", "id")))@join_keys, + join_keys(join_key("data1", "data2", "id")) + ) +}) + +testthat::test_that("teal_data initializes teal_data object with @datanames taken from passed objects", { + testthat::expect_identical( + teal_data(iris = iris, mtcars = mtcars)@datanames, + c("iris", "mtcars") + ) +}) + +testthat::test_that("teal_data initializes teal_data object with @datanames taken from passed join_keys", { + testthat::expect_identical( + teal_data(join_keys = join_keys(join_key("parent", "child", "id")))@datanames, + c("parent", "child") + ) +}) + +testthat::test_that("teal_data initializes teal_data object with @datanames taken from join_keys and passed objects", { + testthat::expect_identical( + teal_data(iris = iris, join_keys = join_keys(join_key("parent", "child", "id")))@datanames, + c("iris", "parent", "child") + ) +}) + testthat::test_that("teal_data accepts TealDataset, TealDatasetConnector, TealDataConnector objects", { testthat::expect_silent(data <- teal_data_mixed_call()) testthat::expect_identical(data$get_datanames(), c("df1", "df2", "df3")) }) -testthat::test_that("teal_data throws error if it receives undesired objects", { +testthat::test_that("teal_data returns teal_data when data passed as named list", { df1 <- data.frame(id = c("A", "B"), a = c(1L, 2L)) + testthat::expect_s4_class(teal_data(df1 = df1), "teal_data") +}) + +testthat::test_that("teal_data accepts any data provided as named list", { + df1 <- structure(1L, class = "anyclass") + testthat::expect_no_error(teal_data(df1 = df1)) +}) - testthat::expect_error( - teal_data(df1, check = TRUE), - "May only contain the following types: \\{TealDataset,TealDatasetConnector,TealDataConnector\\}" +testthat::test_that("teal_data accepts code as character", { + testthat::expect_no_error( + teal_data( + iris1 = iris, + code = "iris1 <- iris" + ) ) }) +testthat::test_that("teal_data accepts code as language", { + testthat::expect_no_error( + teal_data( + iris1 = iris, + code = quote(iris1 <- iris) + ) + ) +}) + +testthat::test_that("teal_data code unfolds code-block wrapped in '{'", { + testthat::expect_identical( + teal_data(iris1 = iris, code = quote({ + iris1 <- iris + }))@code, + "iris1 <- iris" + ) +}) + +testthat::test_that("teal_data code is concatenated into single string", { + testthat::expect_identical( + teal_data(iris1 = iris, code = c("iris1 <- iris", "iris2 <- iris1"))@code, + "iris1 <- iris\niris2 <- iris1" + ) +}) + +testthat::test_that("teal_data@env is locked. Not able to modify, add or remove bindings", { + data <- teal_data(iris = iris) + testthat::expect_error(data@env$iris <- iris, "cannot change value of locked binding for 'iris'") + testthat::expect_error(data@env$iris2 <- iris, "cannot add bindings to a locked environment") + testthat::expect_error(rm("iris", envir = data@env), "cannot remove bindings from a locked environment") +}) + testthat::test_that("teal_data sets passed join_keys to datasets correctly", { df1 <- data.frame(id = c("A", "B"), a = c(1L, 2L)) df2 <- data.frame(df2_id = c("A", "B"), id = c("A", "B"), b = c(1L, 2L)) @@ -58,14 +139,14 @@ testthat::test_that("teal_data sets passed join_keys to datasets correctly", { jk <- join_keys(join_key("df1", "df2", "id")) data <- teal_data(df1, df2, join_keys = jk, check = FALSE) - testthat::expect_equal( - data$get_join_keys(), - join_keys( - join_key("df1", "df2", "id"), - join_key("df1", "df1", "id"), - join_key("df2", "df2", "df2_id") - ) + jk_expected <- join_keys( + join_key("df1", "df2", "id"), + join_key("df1", "df1", "id"), + join_key("df2", "df2", "df2_id") ) + jk_expected$set_parents(list(df1 = character(0), df2 = character(0))) + + testthat::expect_equal(data$get_join_keys(), jk_expected) }) testthat::test_that("teal_data sets passed JoinKeys to datasets correctly when key names differ", { @@ -76,15 +157,15 @@ testthat::test_that("teal_data sets passed JoinKeys to datasets correctly when k jk <- join_keys(join_key("df1", "df2", c(id = "fk"))) data <- teal_data(df1, df2, join_keys = jk, check = FALSE) - testthat::expect_equal( - data$get_join_keys(), - join_keys( - join_key("df1", "df2", c(id = "fk")), - join_key("df1", "df1", "id"), - join_key("df2", "df1", c(fk = "id")), - join_key("df2", "df2", "df2_id") - ) + jk_expected <- join_keys( + join_key("df1", "df2", c(id = "fk")), + join_key("df1", "df1", "id"), + join_key("df2", "df1", c(fk = "id")), + join_key("df2", "df2", "df2_id") ) + jk_expected$set_parents(list(df1 = character(0), df2 = character(0))) + + testthat::expect_equal(data$get_join_keys(), jk_expected) }) testthat::test_that("teal_data sets passes JoinKeys to datasets correctly when key names differ (multiple keys)", { @@ -95,14 +176,13 @@ testthat::test_that("teal_data sets passes JoinKeys to datasets correctly when k data <- teal_data(df1, df2, check = FALSE) data$mutate_join_keys("df1", "df2", c(id = "fk", id2 = "fk2")) - testthat::expect_equal( - data$get_join_keys(), - join_keys( - join_key("df1", "df1", "id"), - join_key("df2", "df2", "df2_id"), - join_key("df1", "df2", c(id = "fk", id2 = "fk2")) - ) + jk_expected <- join_keys( + join_key("df1", "df1", "id"), + join_key("df2", "df2", "df2_id"), + join_key("df1", "df2", c(id = "fk", id2 = "fk2")) ) + jk_expected$set_parents(list(df1 = character(0), df2 = character(0))) + testthat::expect_equal(data$get_join_keys(), jk_expected) }) testthat::test_that("teal_data returns TealData object with cdisc_dataset input", { @@ -131,21 +211,18 @@ testthat::test_that("teal_data returns TealData object with cdisc_dataset input" cdisc_only <- teal_data(adsl, adtte) testthat::expect_equal(class(cdisc_only), c("TealData", "TealDataAbstract", "R6")) - - testthat::expect_error( - teal_data() - ) }) testthat::test_that("teal_data_file loads the TealData object", { tmp_file <- tempfile(fileext = ".R") - writeLines(text = c( - "df <- data.frame(A = c(1, 2, 3)) + writeLines( + text = c( + "df <- data.frame(A = c(1, 2, 3)) df1_ds <- dataset('df', df, code = 'df <- data.frame(A = c(1, 2, 3))') teal_data(df1_ds) " - ), - con = tmp_file + ), + con = tmp_file ) tdf <- teal_data_file(tmp_file) file.remove(tmp_file) @@ -162,8 +239,9 @@ testthat::test_that("teal_data_file loads the TealData object", { testthat::test_that("teal_data_file uses the code input to mutate the code of the loaded TealData object", { tmp_file <- tempfile(fileext = ".R") - writeLines(text = c( - "df <- data.frame(A = c(1, 2, 3)) + writeLines( + text = c( + "df <- data.frame(A = c(1, 2, 3)) df1_ds <- dataset('df', df, code = 'df <- data.frame(A = c(1, 2, 3))') teal_data(df1_ds) " diff --git a/tests/testthat/test-to_relational_data.R b/tests/testthat/test-to_relational_data.R index 3e93bd13c..432c4acd3 100644 --- a/tests/testthat/test-to_relational_data.R +++ b/tests/testthat/test-to_relational_data.R @@ -105,9 +105,9 @@ test_that("to_relational_data accepts a function returning a TealDataset as inpu test_that("to_relational_data accepts a MultiAssayExperiment as input", { utils::data(miniACC, package = "MultiAssayExperiment") mae <- miniACC - output_dataset <- to_relational_data(mae) + output_dataset <- to_relational_data_wrapper(mae) testthat::expect_is(output_dataset, "TealData") - testthat::expect_identical(output_dataset$get_datanames(), "MAE") + testthat::expect_identical(output_dataset$get_datanames(), "mae") }) diff --git a/vignettes/join-keys.Rmd b/vignettes/join-keys.Rmd index 8972efb4b..0ee134bc2 100644 --- a/vignettes/join-keys.Rmd +++ b/vignettes/join-keys.Rmd @@ -9,6 +9,7 @@ vignette: > --- ## Overview + `teal.data` gives the possibility to define individual keys per dataset and to define the relations to other datasets. Each table can have a set of keys that inform its structure and relation to other tables by specifying: @@ -16,14 +17,12 @@ Each table can have a set of keys that inform its structure and relation to othe - columns consisting the primary key - merge keys, a concept similar to `SQL`'s foreign key. -Usually, an application developer needs to specify the keys manually, but in case of datasets named according -to the `ADaM` standard, `teal` can assign the keys on its own. +Usually, an application developer needs to specify the keys manually, but in case of datasets named according to the `ADaM` standard, `teal` can assign the keys on its own. Refer to `vignette("including-adam-data-in-teal", package = "teal")` for more information. ### Primary key -Using the `keys` argument to the `dataset` function (or for `DDL` a `dataset_connector`), we can specify the column(s) -of the dataset that (together) uniquely identify rows in the dataset. +Using the `keys` argument to the `dataset` function (or for `DDL` a `dataset_connector`), we can specify the column(s) of the dataset that (together) uniquely identify rows in the dataset. ```{r, message=FALSE} library(teal.data) @@ -41,6 +40,26 @@ ds <- dataset( ds$get_keys() ``` +Alternatively, the primary keys can be defined in the `join_keys` parameter by using the `join_key` functions by omitting the second dataset (or define it with the same value as the first. + +```{r, message=FALSE} +library(teal.data) + +data_1 <- data.frame(X = factor(1:10), Y = 21:30, Z = 1:10) +data_2 <- data.frame(W = factor(10:1), V = factor(5:14), M = rep(1:5, 2)) + +data <- teal_data( + dataset("D1", data_1, code = "D1 <- data.frame(X = factor(1:10), Y = 21:30, Z = 1:10)"), + dataset("D2", data_2, code = "D2 <- data.frame(W = factor(10:1), V = factor(5:14), M = rep(1:5, 2))"), + join_keys = join_keys( + join_key("D1", keys = c("X")), + join_key("D2", keys = c("V", "W")) + # join_key("D2", "D2", keys = c("V", "W")), # equivalent to using primary_key + ) +) +data$get_join_keys() +``` + ### Merge keys When passing multiple datasets to the `cdisc_data` function, dataset relationship are set using diff --git a/vignettes/preprocessing-data.Rmd b/vignettes/preprocessing-data.Rmd index af624df8e..8a4d8ce37 100644 --- a/vignettes/preprocessing-data.Rmd +++ b/vignettes/preprocessing-data.Rmd @@ -24,8 +24,8 @@ saveRDS(example_cdisc_data("ADSL"), "adsl.rds") ## preprocessing ------------------- adsl <- readRDS("adsl.rds") ## ------------------- -data <- cdisc_data(cdisc_dataset("ADSL", adsl)) -data$get_code() +data <- cdisc_data(ADSL = adsl) +teal.code::get_code(data) ``` When executing the example above, the `get_code` function will return an empty string reflecting that _"Pre-processing is empty"_. In order to show the pre-processing code correctly the `code` argument of the `cdisc_data` function needs to be specified. For the example above this would be: @@ -42,10 +42,10 @@ unlink("adsl.rds") data <- cdisc_data( - cdisc_dataset("ADSL", adsl), + ADSL = adsl, code = 'ADSL <- readRDS("adsl.rds")' ) -data$get_code() %>% cat() +teal.code::get_code(data) ``` The code used to get the `ADSL` dataset is returned as expected. This can be used as input to the `data` argument of `teal::init` diff --git a/vignettes/teal-data.Rmd b/vignettes/teal-data.Rmd index 32429a78d..70ec4243f 100644 --- a/vignettes/teal-data.Rmd +++ b/vignettes/teal-data.Rmd @@ -25,8 +25,8 @@ adsl <- teal.data::example_cdisc_data("ADSL") adtte <- teal.data::example_cdisc_data("ADTTE") my_data <- cdisc_data( - cdisc_dataset("ADSL", adsl), - cdisc_dataset("ADTTE", adtte) + ADSL = adsl, + ADTTE = adtte ) ``` @@ -34,8 +34,8 @@ For more general data use the `teal_data` and `dataset` functions: ```{r} my_general_data <- teal_data( - dataset("iris", iris), - dataset("mtcars", mtcars) + iris = iris, + mtcars = mtcars ) ```