diff --git a/NAMESPACE b/NAMESPACE index 52a6ca514..b3c2deb23 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,13 +1,20 @@ # Generated by roxygen2: do not edit by hand -S3method("[",JoinKeys) -S3method("[<-",JoinKeys) -S3method("get_join_keys<-",JoinKeys) -S3method("get_join_keys<-",teal_data) +S3method("[",join_keys) +S3method("[<-",join_keys) +S3method("[[<-",join_keys) +S3method("join_keys<-",join_keys) +S3method("join_keys<-",teal_data) +S3method("names<-",join_keys) +S3method("parents<-",join_keys) +S3method("parents<-",teal_data) S3method(as_cdisc,TealDataset) S3method(as_cdisc,TealDatasetConnector) +S3method(c,join_key_set) +S3method(c,join_keys) S3method(dataset,MultiAssayExperiment) S3method(dataset,data.frame) +S3method(format,join_keys) S3method(get_attrs,TealDataset) S3method(get_code,TealDataAbstract) S3method(get_code,TealDataset) @@ -26,10 +33,6 @@ S3method(get_datasets,TealDataAbstract) S3method(get_datasets,TealDataset) S3method(get_datasets,TealDatasetConnector) S3method(get_datasets,teal_data) -S3method(get_join_keys,JoinKeys) -S3method(get_join_keys,TealData) -S3method(get_join_keys,default) -S3method(get_join_keys,teal_data) S3method(get_key_duplicates,TealDataset) S3method(get_key_duplicates,data.frame) S3method(get_keys,TealDataAbstract) @@ -41,6 +44,10 @@ S3method(get_raw_data,TealDatasetConnector) S3method(is_pulled,TealDataAbstract) S3method(is_pulled,TealDataset) S3method(is_pulled,TealDatasetConnector) +S3method(join_keys,TealData) +S3method(join_keys,default) +S3method(join_keys,join_keys) +S3method(join_keys,teal_data) S3method(load_dataset,TealDataset) S3method(load_dataset,TealDatasetConnector) S3method(load_datasets,TealData) @@ -51,8 +58,9 @@ S3method(mutate_data,TealDataAbstract) S3method(mutate_dataset,TealDataAbstract) S3method(mutate_dataset,TealDataset) S3method(mutate_dataset,TealDatasetConnector) -S3method(mutate_join_keys,JoinKeys) -S3method(mutate_join_keys,TealData) +S3method(parents,join_keys) +S3method(parents,teal_data) +S3method(print,join_keys) S3method(set_args,CallableCode) S3method(set_args,CallableFunction) S3method(set_args,TealDatasetConnector) @@ -68,6 +76,8 @@ export("col_labels<-") export("data_label<-") export("datanames<-") export("get_join_keys<-") +export("join_keys<-") +export("parents<-") export(as_cdisc) export(callable_code) export(callable_function) @@ -78,7 +88,6 @@ export(cdisc_dataset) export(cdisc_dataset_connector) export(cdisc_dataset_connector_file) export(cdisc_dataset_file) -export(cdisc_join_keys) export(code_cdisc_dataset_connector) export(code_dataset_connector) export(col_labels) @@ -92,6 +101,7 @@ export(dataset) export(dataset_connector) export(dataset_connector_file) export(dataset_file) +export(default_cdisc_join_keys) export(example_cdisc_data) export(fun_cdisc_dataset_connector) export(fun_dataset_connector) @@ -115,7 +125,8 @@ export(load_datasets) export(mae_dataset) export(mutate_data) export(mutate_dataset) -export(mutate_join_keys) +export(parent) +export(parents) export(python_cdisc_dataset_connector) export(python_code) export(python_dataset_connector) @@ -135,5 +146,6 @@ import(shiny) import(teal.code) importFrom(digest,digest) importFrom(logger,log_trace) +importFrom(rlang,`%||%`) importFrom(shinyjs,useShinyjs) importFrom(stats,setNames) diff --git a/NEWS.md b/NEWS.md index 313bd4a98..fb6b33d80 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,12 +2,16 @@ ### Enhancements * Simplified `join_key` to better support primary keys. -* Added subset and subset-assignment to `JoinKeySet` class to manipulate relationship pair keys _(`[` and `[<-`)_. +* `JoinKey` `R6` object was removed in favor of a list-like object with class name `join_keys`. Subset operators and assignments are supported (`[`, `[[`, `[<-` and `[[<-`) +* `join_keys` function works as a constructor, getter and setter. ### Breaking changes * Introduced new data class (`teal_data`) which replaces deprecated `TealData`. New data class becomes a standard input for whole `teal` framework. * Deprecated `teal_data` constructor when `TealDataset`, `TealDatasetConnector` or `TealDataConnector` objects are provided. New delayed data loading functions introduced in `teal` package. +* `JoinKeySet` class was renamed to `join_key_set`. +* `JoinKeys` class was renamed to `join_keys` + ### Miscellaneous * Specified minimal version of package dependencies. diff --git a/R/JoinKeys.R b/R/JoinKeys.R deleted file mode 100644 index 0b9255fb2..000000000 --- a/R/JoinKeys.R +++ /dev/null @@ -1,551 +0,0 @@ -## JoinKeys ==== -#' -#' -#' @title R6 Class to store relationships for joining datasets -#' -#' @description `r lifecycle::badge("stable")` -#' This class stores symmetric links between pairs of key-values -#' (e.g. column A of dataset X can be joined with column B of dataset Y). This relationship -#' is more general than the SQL foreign key relationship which also imposes constraints on the values -#' of these columns. -#' @param dataset_1 (`character`) one dataset name -#' @param dataset_2 (`character`) other dataset name -#' -#' @examples -#' x <- teal.data:::JoinKeys$new() -#' x$set( -#' list( -#' join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), -#' join_key("dataset_A", "dataset_C", c("col_2" = "col_x", "col_3" = "col_y")) -#' ) -#' ) -#' x$get() -#' x$mutate("dataset_A", "dataset_B", c("col1" = "col10")) -#' x$get("dataset_A", "dataset_B") -JoinKeys <- R6::R6Class( # nolint - classname = "JoinKeys", - ## __Public Methods ==== - public = list( - #' @description - #' Create a new object of `JoinKeys` - #' @return empty (`JoinKeys`) - initialize = function() { - logger::log_trace("JoinKeys initialized.") - return(invisible(self)) - }, - #' @description - #' Split the current `JoinKeys` object into a named list of join keys objects with an element for each dataset - #' @return (`list`) a list of `JoinKeys` object - split = function() { - list_of_list_of_join_key_set <- lapply( - names(self$get()), - function(dataset_1) { - lapply( - names(self$get()[[dataset_1]]), - function(dataset_2) join_key(dataset_1, dataset_2, self$get()[[dataset_1]][[dataset_2]]) - ) - } - ) - res <- lapply( - list_of_list_of_join_key_set, - function(x) { - y <- JoinKeys$new() - y$set(x) - } - ) - names(res) <- names(self$get()) - - logger::log_trace("JoinKeys$split keys split.") - return(res) - }, - #' @description - #' Merging a list (or one) of `JoinKeys` objects into the current `JoinKeys` object - #' @param x `list` of `JoinKeys` objects or single `JoinKeys` object - #' @return (`self`) invisibly for chaining - merge = function(x) { - if (inherits(x, "JoinKeys")) x <- list(x) - checkmate::assert_list(x, types = "JoinKeys", min.len = 1) - for (jk in x) { - for (dataset_1 in names(jk$get())) { - for (dataset_2 in names(jk$get()[[dataset_1]])) { - self$mutate(dataset_1, dataset_2, jk$get()[[dataset_1]][[dataset_2]]) - } - } - } - logger::log_trace("JoinKeys$merge keys merged.") - return(invisible(self)) - }, - #' @description - #' Get join keys between two datasets. - #' @return (`character`) named character vector x with names(x) the - #' columns of `dataset_1` and the values of `(x)` the corresponding join - #' keys in `dataset_2` or `character(0)` if no relationship - #' @details if one or both of `dataset_1` and `dataset_2` are missing then - #' underlying keys structure is returned for further processing - get = function(dataset_1, dataset_2) { - if (missing(dataset_1) && missing(dataset_2)) { - return(private$.keys) - } - if (missing(dataset_2)) { - return(private$.keys[[dataset_1]]) - } - if (missing(dataset_1)) { - return(private$.keys[[dataset_2]]) - } - if (is.null(private$.keys[[dataset_1]][[dataset_2]])) { - return(character(0)) - } - return(private$.keys[[dataset_1]][[dataset_2]]) - }, - #' @description - #' Change join_keys for a given pair of dataset names (or - #' add join_keys for given pair if it does not exist) - #' @param val (named `character`) column names used to join - #' @return (`self`) invisibly for chaining - mutate = function(dataset_1, dataset_2, val) { - checkmate::assert_string(dataset_1) - checkmate::assert_string(dataset_2) - checkmate::assert_character(val, any.missing = FALSE) - - private$join_pair(join_key(dataset_1, dataset_2, val)) - - logger::log_trace( - sprintf( - "JoinKeys$mutate updated the keys between %s and %s to %s", - dataset_1, - dataset_2, - paste(val, collapse = ", ") - ) - ) - return(invisible(self)) - }, - #' @description - #' Set up join keys basing on list of `JoinKeySet` objects. - #' @param x `list` of `JoinKeySet` objects (which are created using the `join_key` function) - #' or single `JoinKeySet` objects - #' @details Note that join keys are symmetric although the relationship only needs - #' to be specified once - #' @return (`self`) invisibly for chaining - set = function(x) { - if (length(private$.keys) > 0) { - stop("Keys already set, please use JoinKeys$mutate() to change them") - } - if (inherits(x, "JoinKeySet")) { - x <- list(x) - } - - # check if any JoinKeySets share the same datasets but different values - for (idx_1 in seq_along(x)) { - for (idx_2 in seq_len(idx_1)) { - private$check_compatible_keys(x[[idx_1]], x[[idx_2]]) - } - } - - checkmate::assert_list(x, types = "JoinKeySet", min.len = 1) - lapply(x, private$join_pair) - - logger::log_trace("JoinKeys$set keys are set.") - return(invisible(self)) - }, - #' @description - #' Prints this `JoinKeys`. - #' - #' @param ... additional arguments to the printing method - #' @return invisibly self - print = function(...) { - check_ellipsis(...) - keys_list <- self$get() - if (length(keys_list) > 0) { - cat(sprintf( - "A JoinKeys object containing foreign keys between %s datasets:\n", - length(keys_list) - )) - print(keys_list) - } else { - cat("An empty JoinKeys object.") - } - invisible(self) - }, - #' @description - #' Sets the parents of the datasets. - #' - #' @param named_list Named (`list`) of the parents datasets. - #' - #' @return (`self`) invisibly for chaining - set_parents = function(named_list) { - for (dataset in names(named_list)) { - checkmate::assert( - checkmate::check_null(self$get_parent(dataset)), - checkmate::check_true( - length(self$get_parent(dataset)) == 0 && - length(named_list[[dataset]]) == 0 - ), - checkmate::check_true(self$get_parent(dataset) == named_list[[dataset]]), - "Please check the difference between provided datasets parents and provided join_keys parents." - ) - if (is.null(self$get_parent(dataset))) { - private$parents[[dataset]] <- named_list[[dataset]] - } - } - invisible(self) - }, - #' @description - #' Gets the parent of the desired dataset. - #' - #' @param dataname (`character`) name of the dataset. - #' @return (`character`) the parent of the desired dataset - get_parent = function(dataname) { - if (missing(dataname)) { - return(NULL) - } - private$parents[[dataname]] - }, - #' @description - #' Gets the parents of the datasets. - #' - #' @return (`list`) A named list of the parents of all datasets - get_parents = function() { - private$parents - }, - #' @description - #' Updates the keys of the datasets based on the parents. - #' - #' @return (`self`) invisibly for chaining - update_keys_given_parents = function() { - datanames <- names(self$get()) - duplicate_pairs <- list() - for (d1 in datanames) { - d1_pk <- self$get(d1, d1) - d1_parent <- self$get_parent(d1) - for (d2 in datanames) { - if (paste(d2, d1) %in% duplicate_pairs) { - next - } - if (length(self$get(d1, d2)) == 0) { - d2_parent <- self$get_parent(d2) - d2_pk <- self$get(d2, d2) - - fk <- if (identical(d1, d2_parent)) { - # first is parent of second -> parent keys -> first keys - d1_pk - } else if (identical(d1_parent, d2)) { - # second is parent of first -> parent keys -> second keys - d2_pk - } else if (identical(d1_parent, d2_parent) && length(d1_parent) > 0) { - # both has the same parent -> parent keys - self$get(d1_parent, d1_parent) - } else { - # cant find connection - leave empty - next - } - self$mutate(d1, d2, fk) - duplicate_pairs <- append(duplicate_pairs, paste(d1, d2)) - } - } - } - # check parent child relation - private$check_parent_child() - - invisible(self) - } - ), - ## __Private Fields ==== - private = list( - .keys = list(), - parents = list(), - join_pair = function(join_key) { - dataset_1 <- join_key$dataset_1 - dataset_2 <- join_key$dataset_2 - keys <- join_key$keys - - if (is.null(private$.keys[[dataset_1]])) { - private$.keys[[dataset_1]] <- list() - } - private$.keys[[dataset_1]][[dataset_2]] <- keys - - if (dataset_2 != dataset_1) { - if (is.null(private$.keys[[dataset_2]])) { - private$.keys[[dataset_2]] <- list() - } - - if (length(keys) > 0) { - keys <- setNames(names(keys), keys) - } - private$.keys[[dataset_2]][[dataset_1]] <- keys - } - }, - # helper function to deterimine if two key sets contain incompatible keys - # return TRUE if compatible, throw error otherwise - check_compatible_keys = function(join_key_1, join_key_2) { - error_message <- function(dataset_1, dataset_2) { - stop( - paste("cannot specify multiple different join keys between datasets:", dataset_1, "and", dataset_2) - ) - } - - - # if first datasets and the second datasets match and keys - # must contain the same named elements - if (join_key_1$dataset_1 == join_key_2$dataset_1 && join_key_1$dataset_2 == join_key_2$dataset_2) { - if (!identical(sort(join_key_1$keys), sort(join_key_2$keys))) { - error_message(join_key_1$dataset_1, join_key_1$dataset_2) - } - } - - # if first dataset of join_key_1 matches second dataset of join_key_2 - # and the first dataset of join_key_2 must match second dataset of join_key_1 - # and keys must contain the same elements but with names and values swapped - if (join_key_1$dataset_1 == join_key_2$dataset_2 && join_key_1$dataset_2 == join_key_2$dataset_1) { - # have to handle empty case differently as names(character(0)) is NULL - if (length(join_key_1$keys) == 0 && length(join_key_2$keys) == 0) { - return(TRUE) - } - - if ( - xor(length(join_key_1$keys) == 0, length(join_key_2$keys) == 0) || - !identical(sort(join_key_1$keys), sort(setNames(names(join_key_2$keys), join_key_2$keys))) - ) { - error_message(join_key_1$dataset_1, join_key_1$dataset_2) - } - } - - # otherwise they are compatible - return(TRUE) - }, - # checks the parent child relations are valid - check_parent_child = function() { - if (!is.null(self$get_parents())) { - parents <- self$get_parents() - for (idx1 in seq_along(parents)) { - name_from <- names(parents)[[idx1]] - for (idx2 in seq_along(parents[[idx1]])) { - name_to <- parents[[idx1]][[idx2]] - keys_from <- self$get(name_from, name_to) - keys_to <- self$get(name_to, name_from) - if (length(keys_from) == 0 && length(keys_to) == 0) { - stop(sprintf("No join keys from %s to its parent (%s) and vice versa", name_from, name_to)) - } - if (length(keys_from) == 0) { - stop(sprintf("No join keys from %s to its parent (%s)", name_from, name_to)) - } - if (length(keys_to) == 0) { - stop(sprintf("No join keys from %s parent name (%s) to %s", name_from, name_to, name_from)) - } - } - } - } - } - ) -) - -# constructors ==== - -#' Create a `JoinKeys` out of a list of `JoinKeySet` objects -#' -#' @description `r lifecycle::badge("stable")` -#' -#' @param ... optional, a `JoinKeySet` objects created using the `join_key` function. -#' @details Note that join keys are symmetric although the relationship only needs -#' to be specified once. -#' -#' @return `JoinKeys` -#' -#' @export -#' -#' @examples -#' # setting join keys -#' join_keys( -#' join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), -#' join_key("dataset_A", "dataset_C", c("col_2" = "col_x", "col_3" = "col_y")) -#' ) -#' # or -#' jk <- join_keys() -#' jk["dataset_A", "dataset_B"] <- c("col_1" = "col_a") -#' jk["dataset_A", "dataset_C"] <- c("col_2" = "col_x", "col_3" = "col_y") -#' -join_keys <- function(...) { - x <- rlang::list2(...) - res <- JoinKeys$new() - if (length(x) > 0) { - res$set(x) - } - - res -} - -#' @title Getter for JoinKeys that returns the relationship between pairs of datasets -#' @param x JoinKeys object to extract the join keys -#' @param dataset_1 (`character`) name of first dataset. -#' @param dataset_2 (`character`) name of second dataset. -#' @export -#' @keywords internal -`[.JoinKeys` <- function(x, dataset_1, dataset_2 = dataset_1) { - checkmate::assert_string(dataset_1) - checkmate::assert_string(dataset_2) - x$get(dataset_1, dataset_2) -} - -#' @rdname sub-.JoinKeys -#' @param value value to assign -#' @export -#' @keywords internal -`[<-.JoinKeys` <- function(x, dataset_1, dataset_2 = dataset_1, value) { - checkmate::assert_string(dataset_1) - checkmate::assert_string(dataset_2) - x$mutate(dataset_1, dataset_2, value) - x -} - -#' @rdname join_keys -#' @details -#' `cdisc_join_keys` is a wrapper around `join_keys` that sets the default -#' join keys for CDISC datasets. It is used internally by `cdisc_data` to -#' set the default join keys for CDISC datasets. -#' -#' @export -#' @examples -#' -#' # default CDISC join keys -#' cdisc_join_keys(join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), "ADTTE") -#' -cdisc_join_keys <- function(...) { - data_objects <- rlang::list2(...) - - join_keys <- join_keys() - lapply(seq_along(data_objects), function(ix) { - item <- data_objects[[ix]] - name <- names(data_objects)[ix] - - if (checkmate::test_class(item, "JoinKeySet")) { - join_keys$set(item) - return(NULL) - } else if ( - checkmate::test_multi_class(item, c("TealDataConnector", "TealDataset", "TealDatasetConnector")) - ) { - return(NULL) - } else { - if ((is.null(name) || identical(trimws(name), "")) && is.character(item)) { - name <- item - } - if (name %in% names(default_cdisc_keys)) { - # Set default primary keys - keys_list <- default_cdisc_keys[[name]] - join_keys[name] <- keys_list$primary - - if (!is.null(keys_list$parent) && !is.null(keys_list$foreign)) { - join_keys[name, keys_list$parent] <- keys_list$foreign - } - } - } - }) - - join_keys -} - -# wrappers ==== -#' Mutate `JoinKeys` with a new values -#' -#' @description `r lifecycle::badge("experimental")` -#' Mutate `JoinKeys` with a new values -#' -#' @param x (`JoinKeys`) object to be modified -#' @param dataset_1 (`character`) one dataset name -#' @param dataset_2 (`character`) other dataset name -#' @param val (named `character`) column names used to join -#' -#' @return modified `JoinKeys` object -#' -#' @export -mutate_join_keys <- function(x, dataset_1, dataset_2, val) { - UseMethod("mutate_join_keys") -} - -#' @rdname mutate_join_keys -#' @export -#' @examples -#' # JoinKeys ---- -#' -#' x <- join_keys( -#' join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), -#' join_key("dataset_A", "dataset_C", c("col_2" = "col_x", "col_3" = "col_y")) -#' ) -#' x$get("dataset_A", "dataset_B") -#' -#' mutate_join_keys(x, "dataset_A", "dataset_B", c("col_1" = "col_10")) -#' x$get("dataset_A", "dataset_B") -mutate_join_keys.JoinKeys <- function(x, dataset_1, dataset_2, val) { - x$mutate(dataset_1, dataset_2, val) -} - -#' @rdname mutate_join_keys -#' @export -#' @examples -#' # TealData ---- -#' -#' ADSL <- teal.data::example_cdisc_data("ADSL") -#' ADRS <- teal.data::example_cdisc_data("ADRS") -#' -#' x <- cdisc_data( -#' cdisc_dataset("ADSL", ADSL), -#' cdisc_dataset("ADRS", ADRS) -#' ) -#' x$get_join_keys()$get("ADSL", "ADRS") -#' -#' mutate_join_keys(x, "ADSL", "ADRS", c("COLUMN1" = "COLUMN2")) -#' x$get_join_keys()$get("ADSL", "ADRS") -mutate_join_keys.TealData <- function(x, dataset_1, dataset_2, val) { # nolint - x$mutate_join_keys(dataset_1, dataset_2, val) -} - -#' Create a relationship between a pair of datasets -#' -#' @description `r lifecycle::badge("stable")` -#' -#' @details `join_key()` will create a relationship for the variables on a pair -#' of datasets. -#' -#' @inheritParams mutate_join_keys -#' @param dataset_2 (optional `character`) other dataset name. In case it is omitted, then it -#' will create a primary key for `dataset_1`. -#' @param keys (optionally named `character`) where `names(keys)` are columns in `dataset_1` -#' with relationship to columns of `dataset_2` given by the elements in `keys`. -#' If `names(keys)` is `NULL` then the same column names are used for both `dataset_1` -#' and `dataset_2`. -#' -#' @return object of class `JoinKeySet` to be passed into `join_keys` function. -#' -#' @seealso [join_keys()] -#' -#' @export -join_key <- function(dataset_1, dataset_2 = dataset_1, keys) { - checkmate::assert_string(dataset_1) - checkmate::assert_string(dataset_2) - checkmate::assert_character(keys, any.missing = FALSE) - - if (length(keys) > 0) { - if (is.null(names(keys))) { - names(keys) <- keys - } - - if (any(names(keys) == "")) { - names(keys)[names(keys) == "" & keys != ""] <- keys[names(keys) == "" & keys != ""] - } - - stopifnot(!is.null(names(keys))) - stopifnot(!anyDuplicated(keys)) - stopifnot(!anyDuplicated(names(keys))) - } - - if (dataset_1 == dataset_2 && any(names(keys) != keys)) { - stop("Keys within a dataset must match exactly: keys = c('A' = 'B') are not allowed") - } - - structure( - list( - dataset_1 = dataset_1, - dataset_2 = dataset_2, - keys = keys - ), - class = "JoinKeySet" - ) -} diff --git a/R/TealData.R b/R/TealData.R index baea9488b..7beef553d 100644 --- a/R/TealData.R +++ b/R/TealData.R @@ -8,9 +8,9 @@ #' #' @param ... (`TealDataConnector`, `TealDataset`, `TealDatasetConnector`)\cr #' objects -#' @param join_keys (`JoinKeys`) or a single (`JoinKeySet`)\cr +#' @param join_keys (`join_keys`) or a single (`join_key_set`)\cr #' (optional) object with dataset column relationships used for joining. -#' If empty then an empty `JoinKeys` object is passed by default. +#' If empty then an empty `join_keys` object is passed by default. #' @param check (`logical`) reproducibility check - whether evaluated preprocessing code gives the same objects #' as provided in arguments. Check is run only if flag is true and preprocessing code is not empty. #' @@ -61,7 +61,7 @@ TealData <- R6::R6Class( # nolint #' @description #' Create a new object of `TealData` class initialize = function(..., check = FALSE, join_keys = teal.data::join_keys()) { - checkmate::assert_class(join_keys, "JoinKeys") + checkmate::assert_class(join_keys, "join_keys") dot_args <- list(...) is_teal_data <- checkmate::test_list( @@ -176,11 +176,15 @@ TealData <- R6::R6Class( # nolint #' @return (`character`) named character vector x with names(x) the #' columns of `dataset_1` and the values of `(x)` the corresponding join #' keys in `dataset_2` or `character(0)` if no relationship - get_join_keys = function(dataset_1, dataset_2) { - if (missing(dataset_1) && missing(dataset_2)) { + get_join_keys = function(dataset_1 = NULL, dataset_2 = NULL) { + if (is.null(dataset_1) && is.null(dataset_2)) { private$join_keys + } else if (is.null(dataset_1)) { + private$join_keys[[dataset_2]] + } else if (is.null(dataset_2)) { + private$join_keys[[dataset_1]] } else { - private$join_keys$get(dataset_1, dataset_2) + private$join_keys[[dataset_1]][[dataset_2]] } }, @@ -189,7 +193,7 @@ TealData <- R6::R6Class( # nolint #' #' @return named (`list`) of the parents of all datasets. get_parents = function() { - private$join_keys$get_parents() + parents(private$join_keys) }, # ___ shiny ==== @@ -294,7 +298,8 @@ TealData <- R6::R6Class( # nolint #' @param val (named `character`) column names used to join #' @return (`self`) invisibly for chaining mutate_join_keys = function(dataset_1, dataset_2, val) { - private$join_keys$mutate(dataset_1, dataset_2, val) + private$join_keys[[dataset_1]][[dataset_2]] <- val + private$join_keys }, # ___ check ==== @@ -311,7 +316,7 @@ TealData <- R6::R6Class( # nolint dataname <- get_dataname(dataset) dataset_colnames <- dataset$get_colnames() - # expected columns in this dataset from JoinKeys specification + # expected columns in this dataset from join_keys specification join_key_cols <- unique(unlist(lapply(self$get_join_keys(dataname), names))) if (!is.null(join_key_cols) && !all(join_key_cols %in% dataset_colnames)) { stop( diff --git a/R/cdisc_data.R b/R/cdisc_data.R index f44f8721f..cb307c9d9 100644 --- a/R/cdisc_data.R +++ b/R/cdisc_data.R @@ -5,7 +5,7 @@ #' for given datasets whose names match ADAM datasets names. #' #' @inheritParams teal_data -#' @param join_keys (`JoinKeys`) or a single (`JoinKeySet`)\cr +#' @param join_keys (`join_keys`) or a single (`join_key_set`)\cr #' (optional) object with datasets column names used for joining. #' If empty then it would be automatically derived basing on intersection of datasets primary keys. #' For ADAM datasets it would be automatically derived. @@ -31,7 +31,7 @@ #' }) #' cdisc_data <- function(..., - join_keys = teal.data::cdisc_join_keys(...), + join_keys = teal.data::default_cdisc_join_keys[names(rlang::list2(...))], code = character(0), check = FALSE) { teal_data(..., join_keys = join_keys, code = code, check = check) @@ -54,8 +54,9 @@ deprecated_join_keys_extract <- function(data_objects, join_keys) { ) { return(join_keys) } + # TODO: check if redundant with same call in teal_data body - update_join_keys_to_primary(data_objects, join_keys) + join_keys <- update_join_keys_to_primary(data_objects, join_keys) new_parents_fun <- function(data_objects) { lapply( @@ -88,8 +89,10 @@ deprecated_join_keys_extract <- function(data_objects, join_keys) { if (is_dag(new_parents)) { stop("Cycle detected in a parent and child dataset graph.") } - join_keys$set_parents(new_parents) - join_keys$update_keys_given_parents() + + # Keep non-check setting of parents (this will be removed in refactor) + attr(join_keys, "__parents__") <- new_parents # nolint: object_name_linter + join_keys <- update_keys_given_parents(join_keys) join_keys } diff --git a/R/cdisc_join_keys.R b/R/cdisc_join_keys.R new file mode 100644 index 000000000..8ccd11cfe --- /dev/null +++ b/R/cdisc_join_keys.R @@ -0,0 +1,36 @@ +#' List containing the default `CDISC` join keys +#' +#' @details +#' This data object is created at loading time from `cdisc_datasets/cdisc_datasets.yaml`. +#' +#' @name default_cdisc_join_keys +#' @docType data +#' @export +NULL + +#' Helper method to build `default_cdisc_join_keys` +#' @param default_cdisc_keys (`list`) default definition of primary and foreign +#' keys for `CDISC` datasets +#' +#' @keywords internal +build_cdisc_join_keys <- function(default_cdisc_keys) { + checkmate::assert_list(default_cdisc_keys, types = "list") + + jk <- join_keys() + for (name in names(default_cdisc_keys)) { + # Set default primary keys + keys_list <- default_cdisc_keys[[name]] + + if (!is.null(keys_list[["primary"]])) { + jk[[name]][[name]] <- keys_list[["primary"]] + } + + if (!is.null(keys_list[["parent"]])) { + if (!is.null(keys_list[["foreign"]])) { + jk[[name]][[keys_list[["parent"]]]] <- keys_list[["foreign"]] + } + parents(jk)[[name]] <- keys_list[["parent"]] + } + } + jk +} diff --git a/R/get_join_keys.R b/R/get_join_keys.R index a73a3e12f..56edc4378 100644 --- a/R/get_join_keys.R +++ b/R/get_join_keys.R @@ -1,59 +1,23 @@ #' Function to get join keys from a `` object #' @param data `` - object to extract the join keys -#' @return Either `JoinKeys` object or `NULL` if no join keys +#' @return Either `join_keys` object or `NULL` if no join keys #' @export get_join_keys <- function(data) { - UseMethod("get_join_keys", data) + lifecycle::deprecate_stop( + when = " 0.3.1", + what = "get_join_keys(data)", + details = "Use `join_keys(data)` instead." + ) } #' @rdname get_join_keys -#' @export -get_join_keys.default <- function(data) { - stop("get_join_keys function not implemented for object of class ", toString(class(data))) -} - -#' @rdname get_join_keys -#' @export -get_join_keys.teal_data <- function(data) { - data@join_keys -} - -#' @rdname get_join_keys -#' @export -get_join_keys.JoinKeys <- function(data) { - data -} - -#' @rdname get_join_keys -#' @export -get_join_keys.TealData <- function(data) { - data$get_join_keys() -} - -#' @rdname get_join_keys -#' @inheritParams mutate_join_keys +#' @inheritParams join_key #' @param value value to assign #' @export `get_join_keys<-` <- function(data, dataset_1, dataset_2 = NULL, value) { - UseMethod("get_join_keys<-", data) -} - - -#' @rdname get_join_keys -#' @inheritParams mutate_join_keys -#' @export -`get_join_keys<-.JoinKeys` <- function(data, dataset_1, dataset_2 = NULL, value) { - # The reason this passthrough method is defined is to prevent a warning message - # The assignment is performed by `get_join_keys.JoinKeys` and `[<-.JoinKeys` combination - # as well as `JoinKeys` being an R6 class - data -} - -#' @rdname get_join_keys -#' @export -`get_join_keys<-.teal_data` <- function(data, dataset_1, dataset_2 = NULL, value) { - # The reason this passthrough method is defined is to prevent a warning message - # The assignment is performed by `get_join_keys.teal_data` and `[<-.JoinKeys` combination - # as well as `JoinKeys` being an R6 class - data + lifecycle::deprecate_stop( + when = " 0.3.1", + what = "`get_join_keys<-`()", + details = "Use `join_keys(x) <- ...`" + ) } diff --git a/R/get_keys.R b/R/get_keys.R index 5de4a4d09..8b996e006 100644 --- a/R/get_keys.R +++ b/R/get_keys.R @@ -11,6 +11,7 @@ #' #' @export get_keys <- function(x, ...) { + # TODO: rename function `get_keys.join_key_set` to `get_keys` once this generic is removed UseMethod("get_keys") } diff --git a/R/join_key.R b/R/join_key.R new file mode 100644 index 000000000..29d90aaef --- /dev/null +++ b/R/join_key.R @@ -0,0 +1,85 @@ +#' Create a relationship between a pair of datasets +#' +#' @description `r lifecycle::badge("stable")` +#' +#' @details `join_key()` will create a relationship for the variables on a pair +#' of datasets. +#' +#' @param dataset_1,dataset_2 (`character(1)`) dataset names. If `dataset_2` is omitted, +#' a primary key for `dataset_1` is created. +#' @param keys (optionally named `character`) where `names(keys)` are columns in `dataset_1` +#' corresponding to columns of `dataset_2` given by the elements of `keys`. +#' +#' If unnamed, the same column names are used for both datasets. +#' +#' If any element of the `keys` vector is empty with a non-empty name, then the name is +#' used for both datasets. +#' +#' @return object of class `join_key_set` to be passed into `join_keys` function. +#' +#' @seealso [join_keys()] +#' +#' @export +#' +#' @examples +#' join_key("d1", "d2", c("A")) +#' join_key("d1", "d2", c("A" = "B")) +#' join_key("d1", "d2", c("A" = "B", "C")) +join_key <- function(dataset_1, dataset_2 = dataset_1, keys) { + checkmate::assert_string(dataset_1) + checkmate::assert_string(dataset_2) + checkmate::assert_character(keys, any.missing = FALSE) + + if (length(keys) > 0) { + if (is.null(names(keys))) { + names(keys) <- keys + } + + keys <- trimws(keys) + names(keys) <- trimws(names(keys)) + + # Remove keys with empty value and without name + if (any(keys == "" & names(keys) == "")) { + message("Key with an empty value and name are ignored.") + keys <- keys[keys != "" & names(keys) != ""] + } + + # Set name of keys without one: c("A") -> c("A" = "A") + if (any(names(keys) == "")) { + names(keys)[names(keys) == ""] <- keys[names(keys) == ""] + } + + # Set value of keys with empty string, but non-empty name: c("A" = "") -> c("A" = "A") + if (any(keys == "")) { + keys[keys == ""] <- names(keys[keys == ""]) + } + + stopifnot(!is.null(names(keys))) + stopifnot(!anyDuplicated(keys)) + stopifnot(!anyDuplicated(names(keys))) + + if (dataset_1 == dataset_2 && any(names(keys) != keys)) { + stop("Keys within a dataset must match exactly: keys = c('A' = 'B') are not allowed") + } + } else { + keys <- NULL + } + + structure( + list( + structure( + list(keys), + names = dataset_2 + ) + ), + names = dataset_1, + class = "join_key_set" + ) +} + +#' @keywords internal +get_keys.join_key_set <- function(join_key_set_object) { + join_key_set_object[[1]][[1]] +} +# Remove this once generic `get_keys` is removed (and rename non-exported function to `get_keys`) +.S3method("get_keys", "join_key_set", get_keys.join_key_set) diff --git a/R/join_keys-c.R b/R/join_keys-c.R new file mode 100644 index 000000000..2ff93e8e7 --- /dev/null +++ b/R/join_keys-c.R @@ -0,0 +1,75 @@ +#' @rdname join_keys +#' @order 4 +#' @export +#' +#' @examples +#' +#' # Merging multiple `join_keys` --- +#' +#' jk_merged <- c( +#' jk, +#' join_keys( +#' join_key("ds4", keys = c("pk4", "pk4_2")), +#' join_key("ds4", "ds3", c(pk4_2 = "pk3")) +#' ) +#' ) +c.join_keys <- function(...) { + join_keys_obj <- rlang::list2(...)[[1]] + x <- rlang::list2(...)[-1] + checkmate::assert_multi_class(join_keys_obj, classes = c("join_keys", "join_key_set")) + checkmate::assert_list(x, types = c("join_keys", "join_key_set")) + + # Ensure base object has correct class when called from c.join_key_set + join_keys_obj <- join_keys(join_keys_obj) + + x_merged <- Reduce( + init = join_keys(), + x = x, + f = function(.x, .y) { + assert_compatible_keys2(.x, .y) + out <- utils::modifyList(.x, .y, keep.null = FALSE) + attr(out, "__parents__") <- .merge_parents(.x, .y) + out + } + ) + + out <- utils::modifyList(join_keys_obj, x_merged, keep.null = FALSE) + attr(out, "__parents__") <- .merge_parents(join_keys_obj, x_merged) + out +} + +#' @rdname join_keys +#' @order 4 +#' +#' @export +#' +#' @examples +#' +#' # note: merge can be performed with both join_keys and join_key_set +#' +#' jk_merged <- c( +#' jk_merged, +#' join_key("ds5", keys = "pk5"), +#' join_key("ds5", "ds1", c(pk5 = "pk1")) +#' ) +c.join_key_set <- function(...) { + c.join_keys(...) +} + +#' Merge parents for 2 `join_keys` object +#' +#' @param x,y (`join_keys`) objects to merge their parents +#' +#' @return a list with parents merged from 2 `join_keys`. Not the object itself. +#' @keywords internal +.merge_parents <- function(x, y) { + x_parent <- list() + y_parent <- list() + if (length(attr(x, "__parents__"))) { + x_parent <- attr(x, "__parents__") + } + if (length(attr(y, "__parents__"))) { + y_parent <- attr(y, "__parents__") + } + utils::modifyList(x_parent, y_parent, keep.null = FALSE) +} diff --git a/R/join_keys-extract.R b/R/join_keys-extract.R new file mode 100644 index 000000000..3c398e706 --- /dev/null +++ b/R/join_keys-extract.R @@ -0,0 +1,265 @@ +#' @rdname join_keys +#' @order 2 +#' +#' @section Functions: +#' - `x[datanames]`: Returns a subset of the `join_keys` object for +#' given `datanames`, including parent `datanames` and symmetric mirror keys between +#' `datanames` in the result. +#' - `x[i, j]`: Returns join keys between datasets `i` and `j`, +#' including implicit keys inferred from their relationship with a parent. +#' +#' @param i,j indices specifying elements to extract or replace. Index should be a +#' a character vector, but it can also take numeric, logical, `NULL` or missing. +#' +#' @export +#' +#' @examples +#' +#' # Getter for join_keys --- +#' +#' jk["ds1", "ds2"] +#' +#' # Subsetting join_keys ---- +#' +#' jk["ds1"] +#' jk[1:2] +#' jk[c("ds1", "ds2")] +`[.join_keys` <- function(x, i, j) { + if (missing(i) && missing(j)) { + # because: + # - list(a = 1)[] returns list(a = 1) + # - data.frame(a = 1)[] returns data.frame(a = 1) + return(x) + } else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) { + # because list(a = 1)[NULL] returns NULL + # data.frame(a = 1)[NULL, NULL] returns data.frame( + return(join_keys()) + } else if (!missing(i) && !missing(j)) { + if ( + !any( + checkmate::test_string(i), + checkmate::test_number(i), + checkmate::test_logical(i, len = length(x)) && sum(j) == 1 + ) || + !any( + checkmate::test_string(j), + checkmate::test_number(j), + checkmate::test_logical(j, len = length(x)) && sum(j) == 1 + ) + ) { + stop( + "join_keys[i, j] - Can't extract keys for multiple pairs.", + "When specifying a pair [i, j], both indices must point to a single key pair.\n", + call. = FALSE + ) + } + if (is.numeric(i)) i <- names(x)[i] + if (is.numeric(j)) j <- names(x)[j] + + subset_x <- update_keys_given_parents(x[union(i, j)]) + return(subset_x[[i]][[j]]) + } else if (!missing(j)) { + # ie. select all keys which have j as dataset_2 + # since list is symmetrical it is equivalent to selecting by i + i <- j + } + + checkmate::assert( + combine = "or", + checkmate::check_character(i), + checkmate::check_numeric(i), + checkmate::check_logical(i) + ) + + + # Convert integer/logical index to named index + if (checkmate::test_numeric(i) || checkmate::test_logical(i)) { + i <- names(x)[i] + } + + # When retrieving a relationship pair, it will also return the symmetric key + new_jk <- new_join_keys() + queue <- unique(i) + bin <- character(0) + + # Need to iterate on a mutating queue if subset of a dataset will also + # select its parent as that parent might have relationships with others + # already selected. + while (length(queue) > 0) { + ix <- queue[1] + queue <- queue[-1] + bin <- c(bin, ix) + + ix_parent <- parent(x, ix) + + if (checkmate::test_string(ix_parent, min.chars = 1) && !ix_parent %in% c(queue, bin)) { + queue <- c(queue, ix_parent) + } + + ix_valid_names <- names(x[[ix]]) %in% c(queue, bin) + + new_jk[[ix]] <- x[[ix]][ix_valid_names] + + # Add primary key of parent + if (length(ix_parent) > 0) { + new_jk[[ix_parent]][[ix_parent]] <- x[[ix_parent]][[ix_parent]] + } + } + + common_parents_ix <- names(parents(x)) %in% names(new_jk) & + parents(x) %in% names(new_jk) + + if (any(common_parents_ix)) parents(new_jk) <- parents(x)[common_parents_ix] + + new_jk +} + +#' @rdname join_keys +#' @order 2 +#' +#' @section Functions: +#' - `x[i, j] <- value`: Assignment of a key to pair `(i, j)`. +#' - `x[i] <- value`: This (without `j` parameter) **is not** a supported +#' operation for `join_keys`. +#' - `join_keys(x)[i, j] <- value`: Assignment to `join_keys` object stored in `x`, +#' such as a `teal_data` or `join_keys` itself. +#' +#' @export +#' @examples +#' +#' # Setting a new primary key --- +#' +#' jk["ds4", "ds4"] <- "pk4" +#' jk["ds5", "ds5"] <- "pk5" +#' +#' # Setting a single relationship pair --- +#' +#' jk["ds4", "ds1"] <- c("pk4" = "pk1") +#' +#' # Removing a key --- +#' +#' jk["ds5", "ds5"] <- NULL +`[<-.join_keys` <- function(x, i, j, value) { + if (missing(i) || missing(j)) { + stop("join_keys[i, j] specify both indices to set a key pair.") + } else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) { + stop("join_keys[i, j] neither i nor j can be NULL.") + } else if ( + !any( + checkmate::test_string(i), + checkmate::test_number(i), + checkmate::test_logical(i, len = length(x)) && sum(j) == 1 + ) || + !any( + checkmate::test_string(j), + checkmate::test_number(j), + checkmate::test_logical(j, len = length(x)) && sum(j) == 1 + ) + ) { + stop( + "join_keys[i, j] <- Can't set keys to specified indices.\n", + "When setting pair [i, j], both indices must point to a single key pair.\n", + call. = FALSE + ) + } + + x[[i]][[j]] <- value + x +} + +#' @noRd +#' @rdname join_keys +#' +#' @order 1000 +#' @usage ## Prefered method is x[i, j] <- value +#' x[[i]][[j]] <- value +#' +#' @section Functions: +#' - `x[[i]][[j]] <- value`: It is equivalent as `x[i, j] <- value`. +#' +#' @export +#' @examples +#' +#' # Setting via x[[i]] <- value --- +#' +#' jk <- join_keys() +#' jk[["ds6"]][["ds6"]] <- "pk6" +#' jk[["ds7"]] <- list(ds7 = "pk7", ds6 = c(pk7 = "pk6")) +#' jk[["ds7"]][["ds7"]] <- NULL # removes key +#' +#' jk +`[[<-.join_keys` <- function(x, i, value) { + checkmate::assert( + combine = "or", + checkmate::check_string(i), + checkmate::check_number(i), + checkmate::check_logical(i, len = length(x)) + ) + checkmate::assert_list(value, names = "named", types = "character", null.ok = TRUE) + if (checkmate::test_numeric(i) || checkmate::test_logical(i)) { + i <- names(x)[[i]] + } + + # Normalize values + norm_value <- lapply(seq_along(value), function(.x) { + join_key(i, names(value)[.x], value[[.x]]) + }) + names(norm_value) <- names(value) + + # Check if multiple modifications don't have a conflict + repeated_value_ix <- names(value) %in% names(value)[duplicated(names(value))] + repeated <- norm_value[repeated_value_ix] + vapply( + seq_along(repeated), + function(.ix, .x_value = repeated[[.ix]], .x_name = names(.x_value[[1]])) { + assert_compatible_keys2( + .x_value, + unlist(unname( + repeated[-.ix][names(repeated[-.ix]) == .x_name] + ), recursive = FALSE) + ) + }, + logical(1) + ) + + norm_value <- lapply(norm_value, function(x) x[[1]][[1]]) + names(norm_value) <- names(value) + + # Safe to do as duplicated are the same + norm_value[duplicated(names(norm_value))] <- NULL + + # Keep only elements with length > 0L + norm_value <- Filter(length, norm_value) + + # Remove classes to use list-based get/assign operations + new_x <- unclass(x) + + # In case a pair is removed, also remove the symmetric pair + removed_names <- setdiff(names(new_x[[i]]), names(norm_value)) + for (.x in removed_names) new_x[[.x]][[i]] <- NULL + + new_x[[i]] <- norm_value + + # Iterate on all new values to create symmetrical pair + for (ds2 in names(norm_value)) { + if (ds2 == i) next + + keep_value <- new_x[[ds2]] %||% list() + # Invert key + new_value <- setNames(names(norm_value[[ds2]]), norm_value[[ds2]]) + keep_value[[i]] <- new_value + + # Assign symmetrical + new_x[[ds2]] <- keep_value + } + + preserve_attr <- attributes(new_x)[!names(attributes(new_x)) %in% "names"] + # Remove NULL or empty keys + new_x <- Filter(function(x) length(x) != 0L, new_x) + attributes(new_x) <- utils::modifyList(attributes(new_x), preserve_attr) + + # + # restore class + class(new_x) <- class(x) + new_x +} diff --git a/R/join_keys-names.R b/R/join_keys-names.R new file mode 100644 index 000000000..d9254e729 --- /dev/null +++ b/R/join_keys-names.R @@ -0,0 +1,31 @@ +#' The Names of an `join_keys` Object +#' @inheritParams base::`names<-` +#' @export +`names<-.join_keys` <- function(x, value) { + new_x <- unclass(x) + parent_list <- parents(x) + # Update inner keys + for (old_name in setdiff(names(new_x), value)) { + old_entry <- new_x[[old_name]] + new_name <- value[names(new_x) == old_name] + + # Change 2nd-tier first + for (sub_name in names(old_entry)) { + names(new_x[[sub_name]])[names(new_x[[sub_name]]) == old_name] <- new_name + } + + # Change in first tier + names(new_x)[names(new_x) == old_name] <- new_name + + # changing name in the parents + if (length(parent_list)) { + names(parent_list)[names(parent_list) == old_name] <- new_name + ind <- vapply(parent_list, identical, logical(1), old_name) + parent_list[ind] <- new_name + attr(new_x, "__parents__") <- parent_list + } + } + + class(new_x) <- c("join_keys", "list") + new_x +} diff --git a/R/join_keys-parents.R b/R/join_keys-parents.R new file mode 100644 index 000000000..8737bb08e --- /dev/null +++ b/R/join_keys-parents.R @@ -0,0 +1,137 @@ +#' Getter and setter functions for parents attribute of `join_keys` +#' +#' @description +#' `parents()` facilitates the creation of dependencies between datasets by +#' assigning a parent-child relationship. +#' +#' Each element is defined by a list element, where `list("child" = "parent")`. +#' +#' @param x (`join_keys` or `teal_data`) object that contains "parents" information +#' to retrieve or manipulate. +#' +#' @return a list of `character` representing the parents. +#' +#' @export +#' @seealso [join_keys()] +parents <- function(x) { + UseMethod("parents", x) +} + +#' @describeIn parents Retrieves parents of `join_keys` object. +#' @export +#' @examples +#' # Get parents of join_keys --- +#' +#' jk <- default_cdisc_join_keys["ADEX"] +#' parents(jk) +parents.join_keys <- function(x) { + attr(x, "__parents__") %||% list() +} + +#' @describeIn parents Retrieves parents of `join_keys` inside `teal_data` object. +#' @export +#' @examples +#' +#' # Get parents of join_keys inside teal_data object --- +#' +#' td <- teal_data( +#' ADSL = teal.data::rADSL, +#' ADTTE = teal.data::rADTTE, +#' ADRS = teal.data::rADRS, +#' join_keys = default_cdisc_join_keys[c("ADSL", "ADTTE", "ADRS")] +#' ) +#' parents(td) +parents.teal_data <- function(x) { + attr(x@join_keys, "__parents__") %||% list() +} + +#' @describeIn parents Assignment of parents in `join_keys` object. +#' +#' @param value (`named list`) of `character` vectors. +#' +#' @export +`parents<-` <- function(x, value) { + UseMethod("parents<-", x) +} + +#' @describeIn parents Assignment of parents of `join_keys` object. +#' @export +#' @examples +#' +#' # Assigment of parents --- +#' +#' jk <- join_keys( +#' join_key("ds1", "ds2", "id"), +#' join_key("ds5", "ds6", "id"), +#' join_key("ds7", "ds6", "id") +#' ) +#' +#' parents(jk) <- list(ds2 = "ds1") +#' +#' # Setting individual parent-child relationship +#' +#' parents(jk)["ds6"] <- "ds5" +#' parents(jk)["ds7"] <- "ds6" +`parents<-.join_keys` <- function(x, value) { + checkmate::assert_list(value, types = "character", names = "named") + + new_parents <- list() + + for (dataset in names(value)) { + parent <- new_parents[[dataset]] + checkmate::assert( + checkmate::check_null(parent), + checkmate::check_true( + length(parent) == 0 && + length(value[[dataset]]) == 0 + ), + checkmate::check_true(parent == value[[dataset]]), + "Please check the difference between provided datasets parents and provided join_keys parents." + ) + if (is.null(parent)) { + new_parents[[dataset]] <- value[[dataset]] + } + } + + if (is_dag(new_parents)) { + stop("Cycle detected in a parent and child dataset graph.") + } + + attr(x, "__parents__") <- new_parents # nolint: object_name_linter + + assert_parent_child(x) + x +} + +#' @describeIn parents Assignment of parents of `join_keys` inside `teal_data` object. +#' @export +#' @examples +#' +#' # Assigment of parents of join_keys inside teal_data object --- +#' +#' parents(td) <- list("ADTTE" = "ADSL") # replace existing +#' parents(td)["ADRS"] <- "ADSL" # add new parent +`parents<-.teal_data` <- function(x, value) { + parents(x@join_keys) <- value + x +} + +#' @describeIn parents Getter for individual parent +#' +#' @param dataset_name (`character(1)`) Name of dataset to query on their parent. +#' +#' @return For `parent(x, dataset_name)` returns `NULL` if parent does not exist. +#' +#' @export +#' +#' @examples +#' +#' # Get individual parent --- +#' +#' parent(jk, "ds2") +#' parent(td, "ADTTE") +parent <- function(x, dataset_name) { + checkmate::assert_string(dataset_name) + # assert x is performed by parents() + parents(x)[[dataset_name]] +} diff --git a/R/join_keys-print.R b/R/join_keys-print.R new file mode 100644 index 000000000..361120bf7 --- /dev/null +++ b/R/join_keys-print.R @@ -0,0 +1,63 @@ +#' @rdname join_keys +#' @order 7 +#' @export +format.join_keys <- function(x, ...) { + check_ellipsis(...) + if (length(x) > 0) { + my_parents <- parents(x) + names_sorted <- topological_sort(my_parents) + names <- union(names_sorted, names(x)) + x_implicit <- update_keys_given_parents(x) + out <- lapply(names, function(i) { + this_parent <- my_parents[[i]] + out_i <- lapply(union(i, names(x[[i]])), function(j) { + direction <- if (identical(my_parents[[j]], i)) { + " <-- " + } else if (identical(my_parents[[i]], j)) { + " --> " + } else if (!identical(i, j)) { + " <-> " + } else { + "" + } + + keys <- x[[i]][[j]] + sprintf( + "%s%s: [%s]", + direction, j, + if (length(keys) == 0) "no primary keys" else toString(keys) + ) + }) + + implicit_datasets <- setdiff(names(x_implicit[[i]]), names(x[[i]])) + if (length(implicit_datasets) > 0) { + out_i <- c( + out_i, + paste0( + " --* (implicit via parent with): ", + paste(implicit_datasets, collapse = ", ") + ) + ) + } + + paste(out_i, collapse = "\n") + }) + paste( + c( + sprintf("A join_keys object containing foreign keys between %s datasets:", length(x)), + out + ), + collapse = "\n" + ) + } else { + "An empty join_keys object." + } +} + +#' @rdname join_keys +#' @order 7 +#' @export +print.join_keys <- function(x, ...) { + cat(format(x, ...), "\n") + invisible(x) +} diff --git a/R/join_keys-utils.R b/R/join_keys-utils.R new file mode 100644 index 000000000..87e64bf77 --- /dev/null +++ b/R/join_keys-utils.R @@ -0,0 +1,161 @@ +#' Helper function to assert if two key sets contain incompatible keys +#' +#' return TRUE if compatible, throw error otherwise +#' @keywords internal +assert_compatible_keys <- function(join_key_1, join_key_2) { + stop_message <- function(dataset_1, dataset_2) { + stop( + paste("cannot specify multiple different join keys between datasets:", dataset_1, "and", dataset_2) + ) + } + + dataset_1_one <- names(join_key_1) + dataset_2_one <- names(join_key_1[[1]]) + keys_one <- join_key_1[[1]][[1]] + + dataset_1_two <- names(join_key_2) + dataset_2_two <- names(join_key_2[[1]]) + keys_two <- join_key_2[[1]][[1]] + + # if first datasets and the second datasets match and keys + # must contain the same named elements + if (dataset_1_one == dataset_1_two && dataset_2_one == dataset_2_two) { + if (!identical(sort(keys_one), sort(keys_two))) { + stop_message(dataset_1_one, dataset_2_one) + } + } + + # if first dataset of join_key_1 matches second dataset of join_key_2 + # and the first dataset of join_key_2 must match second dataset of join_key_1 + # and keys must contain the same elements but with names and values swapped + if (dataset_1_one == dataset_2_two && dataset_2_one == dataset_1_two) { + if ( + xor(length(keys_one) == 0, length(keys_two) == 0) || + !identical(sort(keys_one), sort(setNames(names(keys_two), keys_two))) + ) { + stop_message(dataset_1_one, dataset_2_one) + } + } + + # otherwise they are compatible + return(TRUE) +} + +#' Helper function checks the parent-child relations are valid +#' +#' @param x (`join_keys`) object to assert validity of relations +#' +#' @return `join_keys` invisibly +#' +#' @keywords internal +assert_parent_child <- function(x) { + jk <- join_keys(x) + jk_parents <- parents(jk) + + checkmate::assert_class(jk, c("join_keys", "list")) + + if (!is.null(jk_parents)) { + for (idx1 in seq_along(jk_parents)) { + name_from <- names(jk_parents)[[idx1]] + for (idx2 in seq_along(jk_parents[[idx1]])) { + name_to <- jk_parents[[idx1]][[idx2]] + keys_from <- jk[[name_from]][[name_to]] + keys_to <- jk[[name_to]][[name_from]] + if (length(keys_from) == 0 && length(keys_to) == 0) { + stop(sprintf("No join keys from %s to its parent (%s) and vice versa", name_from, name_to)) + } + } + } + } + invisible(x) +} + +assert_compatible_keys2 <- function(x, y) { + # Helper to flatten join_keys / join_key_set + flatten_join_key_sets <- function(value) { + value <- unclass(value) + Reduce( + init = list(), + f = function(u, v, ...) { + el <- value[v][[1]] + res <- lapply(seq_along(el), function(ix) el[ix]) + names(res) <- rep(v, length(res)) + append(u, res) + }, + x = names(value) + ) + } + + x <- flatten_join_key_sets(x) + y <- flatten_join_key_sets(y) + + for (idx_1 in seq_along(x)) { + for (idx_2 in seq_along(y)) { + assert_compatible_keys(x[idx_1], y[idx_2]) + } + } + TRUE +} + +#' Updates the keys of the datasets based on the parents. +#' +#' @param x (`join_keys`) object to update the keys. +#' +#' @return (`self`) invisibly for chaining +#' +#' @keywords internal +update_keys_given_parents <- function(x) { + jk <- join_keys(x) + + checkmate::assert_class(jk, "join_keys", .var.name = checkmate::vname(x)) + + datanames <- names(jk) + duplicate_pairs <- list() + for (d1 in datanames) { + d1_pk <- jk[[d1]][[d1]] + d1_parent <- parent(jk, d1) + for (d2 in datanames) { + if (paste(d2, d1) %in% duplicate_pairs) { + next + } + if (length(jk[[d1]][[d2]]) == 0) { + d2_parent <- parent(jk, d2) + d2_pk <- jk[[d2]][[d2]] + + fk <- if (identical(d1, d2_parent)) { + # first is parent of second -> parent keys -> first keys + d1_pk + } else if (identical(d1_parent, d2)) { + # second is parent of first -> parent keys -> second keys + d2_pk + } else if (identical(d1_parent, d2_parent) && length(d1_parent) > 0) { + # both has the same parent -> common keys to parent + keys_d1_parent <- sort(jk[[d1]][[d1_parent]]) + keys_d2_parent <- sort(jk[[d2]][[d2_parent]]) + + common_ix_1 <- unname(keys_d1_parent) %in% unname(keys_d2_parent) + common_ix_2 <- unname(keys_d2_parent) %in% unname(keys_d1_parent) + + if (all(!common_ix_1)) { + # No common keys between datasets - leave empty + next + } + + structure( + names(keys_d2_parent)[common_ix_2], + names = names(keys_d1_parent)[common_ix_1] + ) + } else { + # cant find connection - leave empty + next + } + jk[[d1]][[d2]] <- fk # mutate join key + duplicate_pairs <- append(duplicate_pairs, paste(d1, d2)) + } + } + } + # check parent child relation + assert_parent_child(x = jk) + + jk +} diff --git a/R/join_keys.R b/R/join_keys.R new file mode 100644 index 000000000..8b962c07e --- /dev/null +++ b/R/join_keys.R @@ -0,0 +1,162 @@ +#' Manage relationships between datasets using `join_keys` +#' @order 1 +#' @name join_keys +#' +#' @usage ## Constructor, getter and setter +#' join_keys(...) +#' +#' @description +#' `join_keys()` facilitates the creation and retrieval of relationships between datasets. +#' `join_keys` class extends a list and contains keys connecting pairs of datasets. Each element +#' of the list contains keys for specific dataset. Each dataset can have a relationship with +#' itself (primary key) and with other datasets. +#' +#' Note that `join_keys` list is symmetrical, that is, when keys are set between `dat1` and `dat2` it +#' is automatically mirrored between `dat2` and `dat1`. +#' +#' @section Methods (by class): +#' - `join_keys()`: Returns an empty `join_keys` object when called without arguments. +#' - `join_keys(join_keys)`: Returns itself. +#' - `join_keys(teal_data)`: Returns the `join_keys` object contained in `teal_data`. +#' - `join_keys(...)`: Creates a new object with one or more `join_key_set` parameters. +#' +#' @param ... (optional)\cr +#' either `teal_data` or `join_keys` to extract `join_keys`, \cr +#' or any number of `join_key_set` objects to create `join_keys`, \cr +#' or nothing to create an empty `join_keys` +#' +#' @return `join_keys` object. +#' +#' @export +#' +#' @seealso [join_key()] for creating `join_keys_set`, +#' [parents()] for parent operations, +#' [teal_data()] for `teal_data` constructor _and_ +#' [default_cdisc_join_keys] for default `CDISC` keys. +#' +#' @examples +#' # Creating a new join keys ---- +#' +#' jk <- join_keys( +#' join_key("ds1", "ds1", "pk1"), +#' join_key("ds2", "ds2", "pk2"), +#' join_key("ds3", "ds3", "pk3"), +#' join_key("ds2", "ds1", c(pk2 = "pk1")), +#' join_key("ds3", "ds1", c(pk3 = "pk1")) +#' ) +#' +#' jk +join_keys <- function(...) { + if (missing(...)) { + return(new_join_keys()) + } + x <- rlang::list2(...) + if (length(x) == 1L) { + UseMethod("join_keys", x[[1]]) + } else { + join_keys.default(...) + } +} + +#' @rdname join_keys +#' @order 1 +#' @export +join_keys.default <- function(...) { + c(new_join_keys(), ...) +} + +#' @rdname join_keys +#' @order 1 +#' @export +join_keys.join_keys <- function(...) { + x <- rlang::list2(...) + x[[1]] +} + +#' @rdname join_keys +#' @order 1 +#' @export +join_keys.teal_data <- function(...) { + x <- rlang::list2(...) + x[[1]]@join_keys +} + +#' @rdname join_keys +#' @order 1 +#' @export +join_keys.TealData <- function(...) { + x <- rlang::list2(...) + x[[1]]$get_join_keys() +} + +#' @rdname join_keys +#' @order 5 +#' +#' @section Functions: +#' - `join_keys(x) <- value`: Assignment of the `join_keys` in object with `value`. +#' `value` needs to be an object of class `join_keys` or `join_key_set`. +#' +#' @param x (`join_keys`) empty object to set the new relationship pairs. +#' `x` is typically an object of `join_keys` class. When called with the `join_keys(x)` +#' or `join_keys(x) <- value` then it can also take a supported class (`teal_data`, `join_keys`) +#' @param value (`join_key_set` or list of `join_key_set`) relationship pairs to add +#' to `join_keys` list. +#' +#' @export +`join_keys<-` <- function(x, value) { + checkmate::assert_class(value, classes = c("join_keys", "list")) + UseMethod("join_keys<-", x) +} + +#' @rdname join_keys +#' @order 5 +#' @export +#' @examples +#' +#' # Assigning keys via join_keys(x)[i, j] <- value ---- +#' +#' obj <- join_keys() +#' # or +#' obj <- teal_data() +#' +#' join_keys(obj)["ds1", "ds1"] <- "pk1" +#' join_keys(obj)["ds2", "ds2"] <- "pk2" +#' join_keys(obj)["ds3", "ds3"] <- "pk3" +#' join_keys(obj)["ds2", "ds1"] <- c(pk2 = "pk1") +#' join_keys(obj)["ds3", "ds1"] <- c(pk3 = "pk1") +#' +#' identical(jk, join_keys(obj)) +`join_keys<-.join_keys` <- function(x, value) { + value +} + +#' @rdname join_keys +#' @order 5 +#' @export +#' @examples +#' +#' # Setter for join_keys within teal_data ---- +#' +#' td <- teal_data() +#' join_keys(td) <- jk +#' +#' join_keys(td)["ds1", "ds2"] <- "new_key" +#' join_keys(td) <- c(join_keys(td), join_keys(join_key("ds3", "ds2", "key3"))) +#' join_keys(td) +`join_keys<-.teal_data` <- function(x, value) { + join_keys(x@join_keys) <- value + x +} + +#' Internal constructor +#' +#' @return an empty `join_keys` list +#' +#' @keywords internal +new_join_keys <- function() { + structure( + list(), + class = c("join_keys", "list"), + "__parents__" = list() + ) +} diff --git a/R/teal.data.R b/R/teal.data.R index 70bd1af1a..c62eef361 100644 --- a/R/teal.data.R +++ b/R/teal.data.R @@ -11,6 +11,7 @@ # Fix R CMD check notes #' @import shiny +#' @importFrom rlang `%||%` #' @importFrom digest digest #' @importFrom stats setNames #' @importFrom shinyjs useShinyjs diff --git a/R/teal_data-class.R b/R/teal_data-class.R index 1506b1d7c..3ce3b4652 100644 --- a/R/teal_data-class.R +++ b/R/teal_data-class.R @@ -1,4 +1,4 @@ -setOldClass("JoinKeys") +setOldClass("join_keys") #' Reproducible data. #' @@ -25,8 +25,8 @@ setOldClass("JoinKeys") #' @slot warnings (`character`) warnings raised when evaluating code. #' Access with [get_warnings()]. #' @slot messages (`character`) messages raised when evaluating code. -#' @slot join_keys (`JoinKeys`) object specifying joining keys for data sets in `@env`. -#' Access or modify with [get_join_keys()]. +#' @slot join_keys (`join_keys`) object specifying joining keys for data sets in `@env`. +#' Access or modify with [join_keys()]. #' @slot datanames (`character`) vector of names of data sets in `@env`. #' Used internally to distinguish them from auxiliary variables. #' Access or modify with [datanames()]. @@ -36,7 +36,7 @@ setOldClass("JoinKeys") setClass( Class = "teal_data", contains = "qenv", - slots = c(join_keys = "JoinKeys", datanames = "character"), + slots = c(join_keys = "join_keys", datanames = "character"), prototype = list( join_keys = join_keys(), datanames = character(0) @@ -51,7 +51,7 @@ setClass( #' @param data (`named list`) List of data. #' @param code (`character` or `language`) code to reproduce the `data`. #' Accepts and stores comments also. -#' @param join_keys (`JoinKeys`) object +#' @param join_keys (`join_keys`) object #' @param datanames (`character`) names of datasets passed to `data`. #' Needed when non-dataset objects are needed in the `env` slot. #' @rdname new_teal_data @@ -59,9 +59,9 @@ setClass( new_teal_data <- function(data, code = character(0), join_keys = join_keys(), - datanames = union(names(data), names(join_keys$get()))) { + datanames = union(names(data), names(join_keys))) { checkmate::assert_list(data) - checkmate::assert_class(join_keys, "JoinKeys") + checkmate::assert_class(join_keys, "join_keys") if (is.null(datanames)) datanames <- character(0) # todo: allow to specify checkmate::assert_character(datanames) if (!any(is.language(code), is.character(code))) { diff --git a/R/teal_data.R b/R/teal_data.R index 037fb2650..61dee21a2 100644 --- a/R/teal_data.R +++ b/R/teal_data.R @@ -6,7 +6,7 @@ #' @param ... (`TealDataConnector`, `TealDataset`, `TealDatasetConnector`, `any`)\cr #' Either 1) an object of a `Teal*` class, which is deprecated and will be removed in next release, #' or 2) any number of any objects provided as `name = value` pairs, which is available from version `0.4.0`. -#' @param join_keys (`JoinKeys`) or a single (`JoinKeySet`)\cr +#' @param join_keys (`join_keys`) or a single (`join_key_set`)\cr #' (optional) object with dataset column relationships used for joining. #' If empty then no joins between pairs of objects #' @param code (`character`, `language`) code to reproduce the datasets. @@ -28,7 +28,7 @@ teal_data <- function(..., code = character(0), check = FALSE) { data_objects <- rlang::list2(...) - if (inherits(join_keys, "JoinKeySet")) { + if (inherits(join_keys, "join_key_set")) { join_keys <- teal.data::join_keys(join_keys) } if ( @@ -45,7 +45,7 @@ teal_data <- function(..., Find more information on https://github.com/insightsengineering/teal/discussions/945' )" ) - deprecated_join_keys_extract(data_objects, join_keys) + join_keys <- deprecated_join_keys_extract(data_objects, join_keys) x <- TealData$new(..., check = check, join_keys = join_keys) if (length(code) > 0 && !identical(code, "")) { @@ -110,22 +110,19 @@ teal_data_file <- function(path, code = get_code(path)) { #' Add primary keys as join_keys to a dataset self #' #' @param data_objects (`list`) of `TealDataset`, `TealDatasetConnector` or `TealDataConnector` objects -#' @param join_keys (`JoinKeys`) object +#' @param x (`join_keys`) object #' #' @keywords internal -update_join_keys_to_primary <- function(data_objects, join_keys) { - lapply(data_objects, function(obj) { +update_join_keys_to_primary <- function(data_objects, x) { + for (obj in data_objects) { if (inherits(obj, "TealDataConnector")) { - update_join_keys_to_primary(obj$get_items(), join_keys) + x <- update_join_keys_to_primary(obj$get_items(), x) } else { dataname <- obj$get_dataname() - if (length(join_keys$get(dataname, dataname)) == 0) { - join_keys$mutate( - dataname, - dataname, - obj$get_keys() - ) + if (length(x[[dataname]][[dataname]]) == 0) { + x[[dataname]][[dataname]] <- obj$get_keys() } } - }) + } + x } diff --git a/R/testhat-helpers.R b/R/testhat-helpers.R new file mode 100644 index 000000000..efc7746fd --- /dev/null +++ b/R/testhat-helpers.R @@ -0,0 +1,96 @@ +#' (test helper) Create test data for `CDISC` data +#' +#' @inheritParams cdisc_data +#' +#' @return a `CDISC` data set with the following tables: `ADSL`, `ADTTE` and `ADAE` +#' +#' @keywords internal +local_cdisc_data_mixed_call <- function(check = TRUE, join_keys1 = join_keys()) { + adsl_raw <- as.data.frame(as.list(setNames(nm = get_cdisc_keys("ADSL")))) + adtte_raw <- as.data.frame(as.list(setNames(nm = get_cdisc_keys("ADTTE")))) + adae_raw <- as.data.frame(as.list(setNames(nm = get_cdisc_keys("ADAE")))) + + adsl <- cdisc_dataset( + dataname = "ADSL", + x = adsl_raw, + code = "ADSL <- as.data.frame(as.list(setNames(nm = get_cdisc_keys(\"ADSL\"))))" + ) + adtte_cf <- callable_function( + function() { + as.data.frame(as.list(setNames(nm = get_cdisc_keys("ADTTE")))) + } + ) + adtte <- cdisc_dataset_connector("ADTTE", adtte_cf, keys = get_cdisc_keys("ADTTE"), vars = list(x = adsl)) + adae_cf <- callable_function( + function() { + as.data.frame(as.list(setNames(nm = get_cdisc_keys("ADAE")))) + } + ) + adae_cdc <- cdisc_dataset_connector("ADAE", adae_cf, keys = get_cdisc_keys("ADAE")) + adae_rdc <- cdisc_data_connector( + connection = data_connection(open_fun = callable_function(function() "open function")), + connectors = list(adae_cdc) + ) + + load_dataset(adsl) + load_dataset(adtte) + load_dataset(adae_cdc) + + cdisc_data(adsl, adtte, adae_rdc, check = check, join_keys = join_keys1) +} + +#' Test if Two Objects are (Nearly) Equal +#' +#' `all.equal(target, current)` is a utility to compare `join_keys` objects target +#' and current testing `near equality`. +#' +#' If they are different, comparison is still made to some extent, and a report +#' of the differences is returned. +#' Do not use all.equal directly in if expressions—either use `isTRUE(all.equal(....))` +#' or identical if appropriate. +#' +#' @inheritParams base::all.equal +#' @param ... further arguments for different methods. Not used with `join_keys`. +#' +#' @details +#' The parents attribute comparison tolerates `NULL` and empty lists and will find +#' no difference. +#' +#' The list containing all the relationships is treated like a map and ignores +#' entries with `NULL` if they exist. +#' +#' @seealso [base::all.equal()] +#' @keywords internal +all.equal.join_keys <- function(target, current, ...) { + .as_map <- function(.x) { + old_attributes <- attributes(.x) + # Keep only non-list attributes + old_attributes[["names"]] <- NULL + old_attributes[["original_class"]] <- old_attributes[["class"]] + old_attributes[["class"]] <- NULL + old_attributes[["__parents__"]] <- if (!length(old_attributes[["__parents__"]])) { + list() + } else { + old_attributes[["__parents__"]][order(names(old_attributes[["__parents__"]]))] + } + attr(.x, "class") <- "list" + + # Remove nulls + .x <- Filter(Negate(is.null), .x) + + # Sort named components, preserving positions of unnamed + nx <- rlang::names2(.x) + is_named <- nx != "" + if (any(is_named)) { + idx <- seq_along(.x) + idx[is_named] <- idx[is_named][order(nx[is_named])] + .x <- .x[idx] + } + attributes(.x) <- utils::modifyList(old_attributes, attributes(.x) %||% list()) + .x + } + x <- .as_map(target) + y <- .as_map(current) + print("inner") + all.equal(x, y) +} diff --git a/R/zzz.R b/R/zzz.R index aa1402424..45ffa8762 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -3,9 +3,16 @@ # copy from excel file default_cdisc_keys <- yaml::yaml.load_file( get_package_file("teal.data", "cdisc_datasets/cdisc_datasets.yaml") - ) # nolint + ) assign("default_cdisc_keys", default_cdisc_keys, envir = parent.env(environment())) + # update default_cdisc_join_keys + assign( + "default_cdisc_join_keys", + build_cdisc_join_keys(default_cdisc_keys), + envir = parent.env(environment()) + ) + # Set up the teal logger instance teal.logger::register_logger("teal.data") diff --git a/_pkgdown.yml b/_pkgdown.yml index 4684fe812..4f3a40d7e 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -102,7 +102,6 @@ reference: - load_datasets - mutate_data - mutate_dataset - - mutate_join_keys - set_args - set_keys - title: Helpers @@ -110,6 +109,7 @@ reference: contents: - datanames - datanames<- + - default_cdisc_join_keys - col_labels - col_labels<- - col_relabel @@ -122,8 +122,14 @@ reference: - get_labels - join_key - join_keys + - join_keys<- + - names<-.join_keys + - parent + - parents + - parents<- - python_code - read_script + - update_keys_given_parents - validate_metadata - title: For Developers subtitle: R6 Classes @@ -134,7 +140,6 @@ reference: - CDISCTealDataConnector - CDISCTealDataset - CDISCTealDatasetConnector - - JoinKeys - MAETealDataset - PythonCodeClass - TealData diff --git a/inst/WORDLIST b/inst/WORDLIST index bf67939a4..063052f75 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -2,7 +2,6 @@ CDISC Forkers Getter Hoffmann -JoinKeys Pre Reproducibility SCDA @@ -10,6 +9,7 @@ UI cloneable formatters funder +getter iteratively pre repo diff --git a/man/JoinKeys.Rd b/man/JoinKeys.Rd deleted file mode 100644 index 978b19bef..000000000 --- a/man/JoinKeys.Rd +++ /dev/null @@ -1,271 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/JoinKeys.R -\name{JoinKeys} -\alias{JoinKeys} -\title{R6 Class to store relationships for joining datasets} -\description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} -This class stores symmetric links between pairs of key-values -(e.g. column A of dataset X can be joined with column B of dataset Y). This relationship -is more general than the SQL foreign key relationship which also imposes constraints on the values -of these columns. -} -\examples{ -x <- teal.data:::JoinKeys$new() -x$set( - list( - join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), - join_key("dataset_A", "dataset_C", c("col_2" = "col_x", "col_3" = "col_y")) - ) -) -x$get() -x$mutate("dataset_A", "dataset_B", c("col1" = "col10")) -x$get("dataset_A", "dataset_B") -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-JoinKeys-new}{\code{JoinKeys$new()}} -\item \href{#method-JoinKeys-split}{\code{JoinKeys$split()}} -\item \href{#method-JoinKeys-merge}{\code{JoinKeys$merge()}} -\item \href{#method-JoinKeys-get}{\code{JoinKeys$get()}} -\item \href{#method-JoinKeys-mutate}{\code{JoinKeys$mutate()}} -\item \href{#method-JoinKeys-set}{\code{JoinKeys$set()}} -\item \href{#method-JoinKeys-print}{\code{JoinKeys$print()}} -\item \href{#method-JoinKeys-set_parents}{\code{JoinKeys$set_parents()}} -\item \href{#method-JoinKeys-get_parent}{\code{JoinKeys$get_parent()}} -\item \href{#method-JoinKeys-get_parents}{\code{JoinKeys$get_parents()}} -\item \href{#method-JoinKeys-update_keys_given_parents}{\code{JoinKeys$update_keys_given_parents()}} -\item \href{#method-JoinKeys-clone}{\code{JoinKeys$clone()}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-JoinKeys-new}{}}} -\subsection{Method \code{new()}}{ -Create a new object of \code{JoinKeys} -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{JoinKeys$new()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -empty (\code{JoinKeys}) -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-JoinKeys-split}{}}} -\subsection{Method \code{split()}}{ -Split the current \code{JoinKeys} object into a named list of join keys objects with an element for each dataset -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{JoinKeys$split()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -(\code{list}) a list of \code{JoinKeys} object -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-JoinKeys-merge}{}}} -\subsection{Method \code{merge()}}{ -Merging a list (or one) of \code{JoinKeys} objects into the current \code{JoinKeys} object -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{JoinKeys$merge(x)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{x}}{\code{list} of \code{JoinKeys} objects or single \code{JoinKeys} object} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -(\code{self}) invisibly for chaining -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-JoinKeys-get}{}}} -\subsection{Method \code{get()}}{ -Get join keys between two datasets. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{JoinKeys$get(dataset_1, dataset_2)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{dataset_1}}{(\code{character}) one dataset name} - -\item{\code{dataset_2}}{(\code{character}) other dataset name} -} -\if{html}{\out{
}} -} -\subsection{Details}{ -if one or both of \code{dataset_1} and \code{dataset_2} are missing then -underlying keys structure is returned for further processing -} - -\subsection{Returns}{ -(\code{character}) named character vector x with names(x) the -columns of \code{dataset_1} and the values of \code{(x)} the corresponding join -keys in \code{dataset_2} or \code{character(0)} if no relationship -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-JoinKeys-mutate}{}}} -\subsection{Method \code{mutate()}}{ -Change join_keys for a given pair of dataset names (or -add join_keys for given pair if it does not exist) -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{JoinKeys$mutate(dataset_1, dataset_2, val)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{dataset_1}}{(\code{character}) one dataset name} - -\item{\code{dataset_2}}{(\code{character}) other dataset name} - -\item{\code{val}}{(named \code{character}) column names used to join} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -(\code{self}) invisibly for chaining -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-JoinKeys-set}{}}} -\subsection{Method \code{set()}}{ -Set up join keys basing on list of \code{JoinKeySet} objects. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{JoinKeys$set(x)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{x}}{\code{list} of \code{JoinKeySet} objects (which are created using the \code{join_key} function) -or single \code{JoinKeySet} objects} -} -\if{html}{\out{
}} -} -\subsection{Details}{ -Note that join keys are symmetric although the relationship only needs -to be specified once -} - -\subsection{Returns}{ -(\code{self}) invisibly for chaining -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-JoinKeys-print}{}}} -\subsection{Method \code{print()}}{ -Prints this \code{JoinKeys}. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{JoinKeys$print(...)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{...}}{additional arguments to the printing method} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -invisibly self -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-JoinKeys-set_parents}{}}} -\subsection{Method \code{set_parents()}}{ -Sets the parents of the datasets. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{JoinKeys$set_parents(named_list)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{named_list}}{Named (\code{list}) of the parents datasets.} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -(\code{self}) invisibly for chaining -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-JoinKeys-get_parent}{}}} -\subsection{Method \code{get_parent()}}{ -Gets the parent of the desired dataset. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{JoinKeys$get_parent(dataname)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{dataname}}{(\code{character}) name of the dataset.} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -(\code{character}) the parent of the desired dataset -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-JoinKeys-get_parents}{}}} -\subsection{Method \code{get_parents()}}{ -Gets the parents of the datasets. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{JoinKeys$get_parents()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -(\code{list}) A named list of the parents of all datasets -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-JoinKeys-update_keys_given_parents}{}}} -\subsection{Method \code{update_keys_given_parents()}}{ -Updates the keys of the datasets based on the parents. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{JoinKeys$update_keys_given_parents()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -(\code{self}) invisibly for chaining -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-JoinKeys-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{JoinKeys$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/man/TealData.Rd b/man/TealData.Rd index f3728fa24..57ac7e07c 100644 --- a/man/TealData.Rd +++ b/man/TealData.Rd @@ -110,9 +110,9 @@ objects} \item{\code{check}}{(\code{logical}) reproducibility check - whether evaluated preprocessing code gives the same objects as provided in arguments. Check is run only if flag is true and preprocessing code is not empty.} -\item{\code{join_keys}}{(\code{JoinKeys}) or a single (\code{JoinKeySet})\cr +\item{\code{join_keys}}{(\code{join_keys}) or a single (\code{join_key_set})\cr (optional) object with dataset column relationships used for joining. -If empty then an empty \code{JoinKeys} object is passed by default.} +If empty then an empty \code{join_keys} object is passed by default.} } \if{html}{\out{}} } @@ -199,7 +199,7 @@ name of dataset connector to be returned. If \code{NULL}, all connectors are ret \subsection{Method \code{get_join_keys()}}{ Get join keys between two datasets. \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TealData$get_join_keys(dataset_1, dataset_2)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{TealData$get_join_keys(dataset_1 = NULL, dataset_2 = NULL)}\if{html}{\out{
}} } \subsection{Arguments}{ diff --git a/man/all.equal.join_keys.Rd b/man/all.equal.join_keys.Rd new file mode 100644 index 000000000..fb68b3941 --- /dev/null +++ b/man/all.equal.join_keys.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/testhat-helpers.R +\name{all.equal.join_keys} +\alias{all.equal.join_keys} +\title{Test if Two Objects are (Nearly) Equal} +\usage{ +\method{all}{equal.join_keys}(target, current, ...) +} +\arguments{ +\item{target}{\R object.} + +\item{current}{other \R object, to be compared with \code{target}.} + +\item{...}{further arguments for different methods. Not used with \code{join_keys}.} +} +\description{ +\code{all.equal(target, current)} is a utility to compare \code{join_keys} objects target +and current testing \verb{near equality}. +} +\details{ +If they are different, comparison is still made to some extent, and a report +of the differences is returned. +Do not use all.equal directly in if expressions—either use \code{isTRUE(all.equal(....))} +or identical if appropriate. + +The parents attribute comparison tolerates \code{NULL} and empty lists and will find +no difference. + +The list containing all the relationships is treated like a map and ignores +entries with \code{NULL} if they exist. +} +\seealso{ +\code{\link[base:all.equal]{base::all.equal()}} +} +\keyword{internal} diff --git a/man/assert_compatible_keys.Rd b/man/assert_compatible_keys.Rd new file mode 100644 index 000000000..1ede2e575 --- /dev/null +++ b/man/assert_compatible_keys.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/join_keys-utils.R +\name{assert_compatible_keys} +\alias{assert_compatible_keys} +\title{Helper function to assert if two key sets contain incompatible keys} +\usage{ +assert_compatible_keys(join_key_1, join_key_2) +} +\description{ +return TRUE if compatible, throw error otherwise +} +\keyword{internal} diff --git a/man/assert_parent_child.Rd b/man/assert_parent_child.Rd new file mode 100644 index 000000000..30f24efe9 --- /dev/null +++ b/man/assert_parent_child.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/join_keys-utils.R +\name{assert_parent_child} +\alias{assert_parent_child} +\title{Helper function checks the parent-child relations are valid} +\usage{ +assert_parent_child(x) +} +\arguments{ +\item{x}{(\code{join_keys}) object to assert validity of relations} +} +\value{ +\code{join_keys} invisibly +} +\description{ +Helper function checks the parent-child relations are valid +} +\keyword{internal} diff --git a/man/build_cdisc_join_keys.Rd b/man/build_cdisc_join_keys.Rd new file mode 100644 index 000000000..46f59cba1 --- /dev/null +++ b/man/build_cdisc_join_keys.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cdisc_join_keys.R +\name{build_cdisc_join_keys} +\alias{build_cdisc_join_keys} +\title{Helper method to build \code{default_cdisc_join_keys}} +\usage{ +build_cdisc_join_keys(default_cdisc_keys) +} +\arguments{ +\item{default_cdisc_keys}{(\code{list}) default definition of primary and foreign +keys for \code{CDISC} datasets} +} +\description{ +Helper method to build \code{default_cdisc_join_keys} +} +\keyword{internal} diff --git a/man/cdisc_data.Rd b/man/cdisc_data.Rd index eea0b46b5..15d5afcf2 100644 --- a/man/cdisc_data.Rd +++ b/man/cdisc_data.Rd @@ -6,7 +6,7 @@ \usage{ cdisc_data( ..., - join_keys = teal.data::cdisc_join_keys(...), + join_keys = teal.data::default_cdisc_join_keys[names(rlang::list2(...))], code = character(0), check = FALSE ) @@ -16,7 +16,7 @@ cdisc_data( Either 1) an object of a \verb{Teal*} class, which is deprecated and will be removed in next release, or 2) any number of any objects provided as \code{name = value} pairs, which is available from version \verb{0.4.0}.} -\item{join_keys}{(\code{JoinKeys}) or a single (\code{JoinKeySet})\cr +\item{join_keys}{(\code{join_keys}) or a single (\code{join_key_set})\cr (optional) object with datasets column names used for joining. If empty then it would be automatically derived basing on intersection of datasets primary keys. For ADAM datasets it would be automatically derived.} diff --git a/man/default_cdisc_join_keys.Rd b/man/default_cdisc_join_keys.Rd new file mode 100644 index 000000000..7934e89b0 --- /dev/null +++ b/man/default_cdisc_join_keys.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cdisc_join_keys.R +\docType{data} +\name{default_cdisc_join_keys} +\alias{default_cdisc_join_keys} +\title{List containing the default \code{CDISC} join keys} +\description{ +List containing the default \code{CDISC} join keys +} +\details{ +This data object is created at loading time from \code{cdisc_datasets/cdisc_datasets.yaml}. +} diff --git a/man/dot-merge_parents.Rd b/man/dot-merge_parents.Rd new file mode 100644 index 000000000..f558f28e2 --- /dev/null +++ b/man/dot-merge_parents.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/join_keys-c.R +\name{.merge_parents} +\alias{.merge_parents} +\title{Merge parents for 2 \code{join_keys} object} +\usage{ +.merge_parents(x, y) +} +\arguments{ +\item{x, y}{(\code{join_keys}) objects to merge their parents} +} +\value{ +a list with parents merged from 2 \code{join_keys}. Not the object itself. +} +\description{ +Merge parents for 2 \code{join_keys} object +} +\keyword{internal} diff --git a/man/get_join_keys.Rd b/man/get_join_keys.Rd index 4ec725a21..b6d1a63c6 100644 --- a/man/get_join_keys.Rd +++ b/man/get_join_keys.Rd @@ -2,42 +2,23 @@ % Please edit documentation in R/get_join_keys.R \name{get_join_keys} \alias{get_join_keys} -\alias{get_join_keys.default} -\alias{get_join_keys.teal_data} -\alias{get_join_keys.JoinKeys} -\alias{get_join_keys.TealData} \alias{get_join_keys<-} -\alias{get_join_keys<-.JoinKeys} -\alias{get_join_keys<-.teal_data} \title{Function to get join keys from a `` object} \usage{ get_join_keys(data) -\method{get_join_keys}{default}(data) - -\method{get_join_keys}{teal_data}(data) - -\method{get_join_keys}{JoinKeys}(data) - -\method{get_join_keys}{TealData}(data) - get_join_keys(data, dataset_1, dataset_2 = NULL) <- value - -\method{get_join_keys}{JoinKeys}(data, dataset_1, dataset_2 = NULL) <- value - -\method{get_join_keys}{teal_data}(data, dataset_1, dataset_2 = NULL) <- value } \arguments{ \item{data}{`` - object to extract the join keys} -\item{dataset_1}{(\code{character}) one dataset name} - -\item{dataset_2}{(\code{character}) other dataset name} +\item{dataset_1, dataset_2}{(\code{character(1)}) dataset names. If \code{dataset_2} is omitted, +a primary key for \code{dataset_1} is created.} \item{value}{value to assign} } \value{ -Either \code{JoinKeys} object or \code{NULL} if no join keys +Either \code{join_keys} object or \code{NULL} if no join keys } \description{ Function to get join keys from a `` object diff --git a/man/join_key.Rd b/man/join_key.Rd index 17f020099..f31fe1f0f 100644 --- a/man/join_key.Rd +++ b/man/join_key.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/JoinKeys.R +% Please edit documentation in R/join_key.R \name{join_key} \alias{join_key} \title{Create a relationship between a pair of datasets} @@ -7,18 +7,19 @@ join_key(dataset_1, dataset_2 = dataset_1, keys) } \arguments{ -\item{dataset_1}{(\code{character}) one dataset name} - -\item{dataset_2}{(optional \code{character}) other dataset name. In case it is omitted, then it -will create a primary key for \code{dataset_1}.} +\item{dataset_1, dataset_2}{(\code{character(1)}) dataset names. If \code{dataset_2} is omitted, +a primary key for \code{dataset_1} is created.} \item{keys}{(optionally named \code{character}) where \code{names(keys)} are columns in \code{dataset_1} -with relationship to columns of \code{dataset_2} given by the elements in \code{keys}. -If \code{names(keys)} is \code{NULL} then the same column names are used for both \code{dataset_1} -and \code{dataset_2}.} +corresponding to columns of \code{dataset_2} given by the elements of \code{keys}. + +If unnamed, the same column names are used for both datasets. + +If any element of the \code{keys} vector is empty with a non-empty name, then the name is +used for both datasets.} } \value{ -object of class \code{JoinKeySet} to be passed into \code{join_keys} function. +object of class \code{join_key_set} to be passed into \code{join_keys} function. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} @@ -27,6 +28,11 @@ object of class \code{JoinKeySet} to be passed into \code{join_keys} function. \code{join_key()} will create a relationship for the variables on a pair of datasets. } +\examples{ +join_key("d1", "d2", c("A")) +join_key("d1", "d2", c("A" = "B")) +join_key("d1", "d2", c("A" = "B", "C")) +} \seealso{ \code{\link[=join_keys]{join_keys()}} } diff --git a/man/join_keys.Rd b/man/join_keys.Rd index 535d8585f..a2836e86c 100644 --- a/man/join_keys.Rd +++ b/man/join_keys.Rd @@ -1,44 +1,196 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/JoinKeys.R +% Please edit documentation in R/join_keys.R, R/join_keys-extract.R, +% R/join_keys-c.R, R/join_keys-print.R \name{join_keys} \alias{join_keys} -\alias{cdisc_join_keys} -\title{Create a \code{JoinKeys} out of a list of \code{JoinKeySet} objects} +\alias{join_keys.default} +\alias{join_keys.join_keys} +\alias{join_keys.teal_data} +\alias{join_keys.TealData} +\alias{[.join_keys} +\alias{[<-.join_keys} +\alias{c.join_keys} +\alias{c.join_key_set} +\alias{join_keys<-} +\alias{join_keys<-.join_keys} +\alias{join_keys<-.teal_data} +\alias{format.join_keys} +\alias{print.join_keys} +\title{Manage relationships between datasets using \code{join_keys}} \usage{ +## Constructor, getter and setter join_keys(...) -cdisc_join_keys(...) +\method{join_keys}{default}(...) + +\method{join_keys}{join_keys}(...) + +\method{join_keys}{teal_data}(...) + +\method{join_keys}{TealData}(...) + +\method{[}{join_keys}(x, i, j) + +\method{[}{join_keys}(x, i, j) <- value + +\method{c}{join_keys}(...) + +\method{c}{join_key_set}(...) + +join_keys(x) <- value + +\method{join_keys}{join_keys}(x) <- value + +\method{join_keys}{teal_data}(x) <- value + +\method{format}{join_keys}(x, ...) + +\method{print}{join_keys}(x, ...) } \arguments{ -\item{...}{optional, a \code{JoinKeySet} objects created using the \code{join_key} function.} +\item{...}{(optional)\cr +either \code{teal_data} or \code{join_keys} to extract \code{join_keys}, \cr +or any number of \code{join_key_set} objects to create \code{join_keys}, \cr +or nothing to create an empty \code{join_keys}} + +\item{x}{(\code{join_keys}) empty object to set the new relationship pairs. +\code{x} is typically an object of \code{join_keys} class. When called with the \code{join_keys(x)} +or \code{join_keys(x) <- value} then it can also take a supported class (\code{teal_data}, \code{join_keys})} + +\item{i, j}{indices specifying elements to extract or replace. Index should be a +a character vector, but it can also take numeric, logical, \code{NULL} or missing.} + +\item{value}{(\code{join_key_set} or list of \code{join_key_set}) relationship pairs to add +to \code{join_keys} list.} } \value{ -\code{JoinKeys} +\code{join_keys} object. } \description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} +\code{join_keys()} facilitates the creation and retrieval of relationships between datasets. +\code{join_keys} class extends a list and contains keys connecting pairs of datasets. Each element +of the list contains keys for specific dataset. Each dataset can have a relationship with +itself (primary key) and with other datasets. + +Note that \code{join_keys} list is symmetrical, that is, when keys are set between \code{dat1} and \code{dat2} it +is automatically mirrored between \code{dat2} and \code{dat1}. } -\details{ -Note that join keys are symmetric although the relationship only needs -to be specified once. +\section{Methods (by class)}{ + +\itemize{ +\item \code{join_keys()}: Returns an empty \code{join_keys} object when called without arguments. +\item \code{join_keys(join_keys)}: Returns itself. +\item \code{join_keys(teal_data)}: Returns the \code{join_keys} object contained in \code{teal_data}. +\item \code{join_keys(...)}: Creates a new object with one or more \code{join_key_set} parameters. +} +} + +\section{Functions}{ + +\itemize{ +\item \code{x[datanames]}: Returns a subset of the \code{join_keys} object for +given \code{datanames}, including parent \code{datanames} and symmetric mirror keys between +\code{datanames} in the result. +\item \code{x[i, j]}: Returns join keys between datasets \code{i} and \code{j}, +including implicit keys inferred from their relationship with a parent. +} + + +\itemize{ +\item \code{x[i, j] <- value}: Assignment of a key to pair \verb{(i, j)}. +\item \code{x[i] <- value}: This (without \code{j} parameter) \strong{is not} a supported +operation for \code{join_keys}. +\item \code{join_keys(x)[i, j] <- value}: Assignment to \code{join_keys} object stored in \code{x}, +such as a \code{teal_data} or \code{join_keys} itself. +} + -\code{cdisc_join_keys} is a wrapper around \code{join_keys} that sets the default -join keys for CDISC datasets. It is used internally by \code{cdisc_data} to -set the default join keys for CDISC datasets. +\itemize{ +\item \code{join_keys(x) <- value}: Assignment of the \code{join_keys} in object with \code{value}. +\code{value} needs to be an object of class \code{join_keys} or \code{join_key_set}. } +} + \examples{ -# setting join keys -join_keys( - join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), - join_key("dataset_A", "dataset_C", c("col_2" = "col_x", "col_3" = "col_y")) +# Creating a new join keys ---- + +jk <- join_keys( + join_key("ds1", "ds1", "pk1"), + join_key("ds2", "ds2", "pk2"), + join_key("ds3", "ds3", "pk3"), + join_key("ds2", "ds1", c(pk2 = "pk1")), + join_key("ds3", "ds1", c(pk3 = "pk1")) ) + +jk + +# Getter for join_keys --- + +jk["ds1", "ds2"] + +# Subsetting join_keys ---- + +jk["ds1"] +jk[1:2] +jk[c("ds1", "ds2")] + +# Setting a new primary key --- + +jk["ds4", "ds4"] <- "pk4" +jk["ds5", "ds5"] <- "pk5" + +# Setting a single relationship pair --- + +jk["ds4", "ds1"] <- c("pk4" = "pk1") + +# Removing a key --- + +jk["ds5", "ds5"] <- NULL + +# Merging multiple `join_keys` --- + +jk_merged <- c( + jk, + join_keys( + join_key("ds4", keys = c("pk4", "pk4_2")), + join_key("ds4", "ds3", c(pk4_2 = "pk3")) + ) +) + +# note: merge can be performed with both join_keys and join_key_set + +jk_merged <- c( + jk_merged, + join_key("ds5", keys = "pk5"), + join_key("ds5", "ds1", c(pk5 = "pk1")) +) + +# Assigning keys via join_keys(x)[i, j] <- value ---- + +obj <- join_keys() # or -jk <- join_keys() -jk["dataset_A", "dataset_B"] <- c("col_1" = "col_a") -jk["dataset_A", "dataset_C"] <- c("col_2" = "col_x", "col_3" = "col_y") +obj <- teal_data() +join_keys(obj)["ds1", "ds1"] <- "pk1" +join_keys(obj)["ds2", "ds2"] <- "pk2" +join_keys(obj)["ds3", "ds3"] <- "pk3" +join_keys(obj)["ds2", "ds1"] <- c(pk2 = "pk1") +join_keys(obj)["ds3", "ds1"] <- c(pk3 = "pk1") -# default CDISC join keys -cdisc_join_keys(join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), "ADTTE") +identical(jk, join_keys(obj)) +# Setter for join_keys within teal_data ---- + +td <- teal_data() +join_keys(td) <- jk + +join_keys(td)["ds1", "ds2"] <- "new_key" +join_keys(td) <- c(join_keys(td), join_keys(join_key("ds3", "ds2", "key3"))) +join_keys(td) +} +\seealso{ +\code{\link[=join_key]{join_key()}} for creating \code{join_keys_set}, +\code{\link[=parents]{parents()}} for parent operations, +\code{\link[=teal_data]{teal_data()}} for \code{teal_data} constructor \emph{and} +\link{default_cdisc_join_keys} for default \code{CDISC} keys. } diff --git a/man/local_cdisc_data_mixed_call.Rd b/man/local_cdisc_data_mixed_call.Rd new file mode 100644 index 000000000..e97c8692f --- /dev/null +++ b/man/local_cdisc_data_mixed_call.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/testhat-helpers.R +\name{local_cdisc_data_mixed_call} +\alias{local_cdisc_data_mixed_call} +\title{(test helper) Create test data for \code{CDISC} data} +\usage{ +local_cdisc_data_mixed_call(check = TRUE, join_keys1 = join_keys()) +} +\arguments{ +\item{check}{(\code{logical}) reproducibility check - whether to perform a check that the pre-processing +code included in the object definitions actually produces those objects. +If \code{check} is true and preprocessing code is empty an error will be thrown.} +} +\value{ +a \code{CDISC} data set with the following tables: \code{ADSL}, \code{ADTTE} and \code{ADAE} +} +\description{ +(test helper) Create test data for \code{CDISC} data +} +\keyword{internal} diff --git a/man/mutate_join_keys.Rd b/man/mutate_join_keys.Rd deleted file mode 100644 index 78a4ef57c..000000000 --- a/man/mutate_join_keys.Rd +++ /dev/null @@ -1,55 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/JoinKeys.R -\name{mutate_join_keys} -\alias{mutate_join_keys} -\alias{mutate_join_keys.JoinKeys} -\alias{mutate_join_keys.TealData} -\title{Mutate \code{JoinKeys} with a new values} -\usage{ -mutate_join_keys(x, dataset_1, dataset_2, val) - -\method{mutate_join_keys}{JoinKeys}(x, dataset_1, dataset_2, val) - -\method{mutate_join_keys}{TealData}(x, dataset_1, dataset_2, val) -} -\arguments{ -\item{x}{(\code{JoinKeys}) object to be modified} - -\item{dataset_1}{(\code{character}) one dataset name} - -\item{dataset_2}{(\code{character}) other dataset name} - -\item{val}{(named \code{character}) column names used to join} -} -\value{ -modified \code{JoinKeys} object -} -\description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} -Mutate \code{JoinKeys} with a new values -} -\examples{ -# JoinKeys ---- - -x <- join_keys( - join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), - join_key("dataset_A", "dataset_C", c("col_2" = "col_x", "col_3" = "col_y")) -) -x$get("dataset_A", "dataset_B") - -mutate_join_keys(x, "dataset_A", "dataset_B", c("col_1" = "col_10")) -x$get("dataset_A", "dataset_B") -# TealData ---- - -ADSL <- teal.data::example_cdisc_data("ADSL") -ADRS <- teal.data::example_cdisc_data("ADRS") - -x <- cdisc_data( - cdisc_dataset("ADSL", ADSL), - cdisc_dataset("ADRS", ADRS) -) -x$get_join_keys()$get("ADSL", "ADRS") - -mutate_join_keys(x, "ADSL", "ADRS", c("COLUMN1" = "COLUMN2")) -x$get_join_keys()$get("ADSL", "ADRS") -} diff --git a/man/names-set-.join_keys.Rd b/man/names-set-.join_keys.Rd new file mode 100644 index 000000000..4d276845f --- /dev/null +++ b/man/names-set-.join_keys.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/join_keys-names.R +\name{names<-.join_keys} +\alias{names<-.join_keys} +\title{The Names of an \code{join_keys} Object} +\usage{ +\method{names}{join_keys}(x) <- value +} +\arguments{ +\item{x}{an \R object.} + +\item{value}{a character vector of up to the same length as \code{x}, or + \code{NULL}.} +} +\description{ +The Names of an \code{join_keys} Object +} diff --git a/man/new_join_keys.Rd b/man/new_join_keys.Rd new file mode 100644 index 000000000..093f9eb34 --- /dev/null +++ b/man/new_join_keys.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/join_keys.R +\name{new_join_keys} +\alias{new_join_keys} +\title{Internal constructor} +\usage{ +new_join_keys() +} +\value{ +an empty \code{join_keys} list +} +\description{ +Internal constructor +} +\keyword{internal} diff --git a/man/new_teal_data.Rd b/man/new_teal_data.Rd index 1191cebc5..5b1557ec2 100644 --- a/man/new_teal_data.Rd +++ b/man/new_teal_data.Rd @@ -8,7 +8,7 @@ new_teal_data( data, code = character(0), join_keys = join_keys(), - datanames = union(names(data), names(join_keys$get())) + datanames = union(names(data), names(join_keys)) ) } \arguments{ @@ -17,7 +17,7 @@ new_teal_data( \item{code}{(\code{character} or \code{language}) code to reproduce the \code{data}. Accepts and stores comments also.} -\item{join_keys}{(\code{JoinKeys}) object} +\item{join_keys}{(\code{join_keys}) object} \item{datanames}{(\code{character}) names of datasets passed to \code{data}. Needed when non-dataset objects are needed in the \code{env} slot.} diff --git a/man/parents.Rd b/man/parents.Rd new file mode 100644 index 000000000..ace27bd46 --- /dev/null +++ b/man/parents.Rd @@ -0,0 +1,107 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/join_keys-parents.R +\name{parents} +\alias{parents} +\alias{parents.join_keys} +\alias{parents.teal_data} +\alias{parents<-} +\alias{parents<-.join_keys} +\alias{parents<-.teal_data} +\alias{parent} +\title{Getter and setter functions for parents attribute of \code{join_keys}} +\usage{ +parents(x) + +\method{parents}{join_keys}(x) + +\method{parents}{teal_data}(x) + +parents(x) <- value + +\method{parents}{join_keys}(x) <- value + +\method{parents}{teal_data}(x) <- value + +parent(x, dataset_name) +} +\arguments{ +\item{x}{(\code{join_keys} or \code{teal_data}) object that contains "parents" information +to retrieve or manipulate.} + +\item{value}{(\verb{named list}) of \code{character} vectors.} + +\item{dataset_name}{(\code{character(1)}) Name of dataset to query on their parent.} +} +\value{ +a list of \code{character} representing the parents. + +For \code{parent(x, dataset_name)} returns \code{NULL} if parent does not exist. +} +\description{ +\code{parents()} facilitates the creation of dependencies between datasets by +assigning a parent-child relationship. + +Each element is defined by a list element, where \code{list("child" = "parent")}. +} +\section{Methods (by class)}{ +\itemize{ +\item \code{parents(join_keys)}: Retrieves parents of \code{join_keys} object. + +\item \code{parents(teal_data)}: Retrieves parents of \code{join_keys} inside \code{teal_data} object. + +}} +\section{Functions}{ +\itemize{ +\item \code{parents(x) <- value}: Assignment of parents in \code{join_keys} object. + +\item \code{parents(join_keys) <- value}: Assignment of parents of \code{join_keys} object. + +\item \code{parents(teal_data) <- value}: Assignment of parents of \code{join_keys} inside \code{teal_data} object. + +\item \code{parent()}: Getter for individual parent + +}} +\examples{ +# Get parents of join_keys --- + +jk <- default_cdisc_join_keys["ADEX"] +parents(jk) + +# Get parents of join_keys inside teal_data object --- + +td <- teal_data( + ADSL = teal.data::rADSL, + ADTTE = teal.data::rADTTE, + ADRS = teal.data::rADRS, + join_keys = default_cdisc_join_keys[c("ADSL", "ADTTE", "ADRS")] +) +parents(td) + +# Assigment of parents --- + +jk <- join_keys( + join_key("ds1", "ds2", "id"), + join_key("ds5", "ds6", "id"), + join_key("ds7", "ds6", "id") +) + +parents(jk) <- list(ds2 = "ds1") + +# Setting individual parent-child relationship + +parents(jk)["ds6"] <- "ds5" +parents(jk)["ds7"] <- "ds6" + +# Assigment of parents of join_keys inside teal_data object --- + +parents(td) <- list("ADTTE" = "ADSL") # replace existing +parents(td)["ADRS"] <- "ADSL" # add new parent + +# Get individual parent --- + +parent(jk, "ds2") +parent(td, "ADTTE") +} +\seealso{ +\code{\link[=join_keys]{join_keys()}} +} diff --git a/man/sub-.JoinKeys.Rd b/man/sub-.JoinKeys.Rd deleted file mode 100644 index 1fac6de0b..000000000 --- a/man/sub-.JoinKeys.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/JoinKeys.R -\name{[.JoinKeys} -\alias{[.JoinKeys} -\alias{[<-.JoinKeys} -\title{Getter for JoinKeys that returns the relationship between pairs of datasets} -\usage{ -\method{[}{JoinKeys}(x, dataset_1, dataset_2 = dataset_1) - -\method{[}{JoinKeys}(x, dataset_1, dataset_2 = dataset_1) <- value -} -\arguments{ -\item{x}{JoinKeys object to extract the join keys} - -\item{dataset_1}{(\code{character}) name of first dataset.} - -\item{dataset_2}{(\code{character}) name of second dataset.} - -\item{value}{value to assign} -} -\description{ -Getter for JoinKeys that returns the relationship between pairs of datasets -} -\keyword{internal} diff --git a/man/teal_data-class.Rd b/man/teal_data-class.Rd index e3fcad4c5..d6101689e 100644 --- a/man/teal_data-class.Rd +++ b/man/teal_data-class.Rd @@ -35,8 +35,8 @@ Access with \code{\link[=get_warnings]{get_warnings()}}.} \item{\code{messages}}{(\code{character}) messages raised when evaluating code.} -\item{\code{join_keys}}{(\code{JoinKeys}) object specifying joining keys for data sets in \verb{@env}. -Access or modify with \code{\link[=get_join_keys]{get_join_keys()}}.} +\item{\code{join_keys}}{(\code{join_keys}) object specifying joining keys for data sets in \verb{@env}. +Access or modify with \code{\link[=join_keys]{join_keys()}}.} \item{\code{datanames}}{(\code{character}) vector of names of data sets in \verb{@env}. Used internally to distinguish them from auxiliary variables. diff --git a/man/teal_data.Rd b/man/teal_data.Rd index 5b556fe54..dbb065aed 100644 --- a/man/teal_data.Rd +++ b/man/teal_data.Rd @@ -16,7 +16,7 @@ teal_data( Either 1) an object of a \verb{Teal*} class, which is deprecated and will be removed in next release, or 2) any number of any objects provided as \code{name = value} pairs, which is available from version \verb{0.4.0}.} -\item{join_keys}{(\code{JoinKeys}) or a single (\code{JoinKeySet})\cr +\item{join_keys}{(\code{join_keys}) or a single (\code{join_key_set})\cr (optional) object with dataset column relationships used for joining. If empty then no joins between pairs of objects} diff --git a/man/update_join_keys_to_primary.Rd b/man/update_join_keys_to_primary.Rd index b6e977ce9..690cd6c23 100644 --- a/man/update_join_keys_to_primary.Rd +++ b/man/update_join_keys_to_primary.Rd @@ -4,12 +4,12 @@ \alias{update_join_keys_to_primary} \title{Add primary keys as join_keys to a dataset self} \usage{ -update_join_keys_to_primary(data_objects, join_keys) +update_join_keys_to_primary(data_objects, x) } \arguments{ \item{data_objects}{(\code{list}) of \code{TealDataset}, \code{TealDatasetConnector} or \code{TealDataConnector} objects} -\item{join_keys}{(\code{JoinKeys}) object} +\item{x}{(\code{join_keys}) object} } \description{ Add primary keys as join_keys to a dataset self diff --git a/man/update_keys_given_parents.Rd b/man/update_keys_given_parents.Rd new file mode 100644 index 000000000..704a4b548 --- /dev/null +++ b/man/update_keys_given_parents.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/join_keys-utils.R +\name{update_keys_given_parents} +\alias{update_keys_given_parents} +\title{Updates the keys of the datasets based on the parents.} +\usage{ +update_keys_given_parents(x) +} +\arguments{ +\item{x}{(\code{join_keys}) object to update the keys.} +} +\value{ +(\code{self}) invisibly for chaining +} +\description{ +Updates the keys of the datasets based on the parents. +} +\keyword{internal} diff --git a/tests/testthat/helper-all.equal.R b/tests/testthat/helper-all.equal.R new file mode 100644 index 000000000..556f4c29d --- /dev/null +++ b/tests/testthat/helper-all.equal.R @@ -0,0 +1,2 @@ +# covr package doesn't detect the .S3method if it is declared within R/ folder +.S3method("all.equal", "join_keys", all.equal.join_keys) diff --git a/tests/testthat/helper-get_join_keys.R b/tests/testthat/helper-get_join_keys.R deleted file mode 100644 index 34bca8a9e..000000000 --- a/tests/testthat/helper-get_join_keys.R +++ /dev/null @@ -1,45 +0,0 @@ -#' Generate a teal_data dataset with sample data and JoinKeys -helper_generator_teal_data <- function() { - iris2 <- iris - iris2$id <- rnorm(NROW(iris2)) - iris2$id <- apply(iris2, 1, rlang::hash) - new_teal_data( - list( - ds1 = iris2, - ds2 = iris2 - ), - code = "ds1 <- iris2; ds2 <- iris2", - join_keys = helper_generator_JoinKeys("ds1", keys = c("id")) - ) -} - -#' Generate a JoinKeys -helper_generator_JoinKeys <- function(dataset_1 = "ds1", keys = c("id")) { # nolint - join_keys( - join_key(dataset_1, keys = keys) - ) -} - -#' Test suite for default get_join generated by helper -helper_test_get_join_keys <- function(obj, dataset_1 = "ds1") { - jk <- get_join_keys(obj) - - expect_s3_class(jk, c("JoinKey", "R6")) - expect_length(jk$get(), 1) - expect_length(jk$get(dataset_1), 1) - - obj -} - -#' Test suite for JoinKeys after manual adding a primary key -helper_test_get_join_keys_add <- function(obj, dataset_1 = "ds1", new_dataset_1 = "ds2", new_keys = c("id")) { - obj <- helper_test_get_join_keys(obj, dataset_1) - get_join_keys(obj)[new_dataset_1] <- c(new_keys) - - jk <- get_join_keys(obj) - - checkmate::expect_r6(jk, c("JoinKeys")) - expect_length(jk$get(), 2) - expect_length(jk$get(dataset_1), 1) - expect_length(jk$get(new_dataset_1), 1) -} diff --git a/tests/testthat/test-JoinKeys.R b/tests/testthat/test-JoinKeys.R deleted file mode 100644 index 72b300ae4..000000000 --- a/tests/testthat/test-JoinKeys.R +++ /dev/null @@ -1,771 +0,0 @@ -test_that("join_key throws error with invalid keys arguments", { - # invalid types - expect_error(join_key("d1", "d2", keys = NULL)) - expect_error(join_key("d1", "d2", keys = 1:10)) - - # not fully named - expect_error(join_key("d1", "d2", keys = c("X" = "A", "B")), NA) - keys <- c("A", "C" = "B") - names(keys)[1] <- "" - expect_error(join_key("d1", "d2", keys), NA) - - # duplicates in names or values - expect_error(join_key("d1", "d2", keys = c("A" = "A", "A" = "B"))) - expect_error(join_key("d1", "d2", keys = c("C" = "A", "D" = "A"))) - - # names(keys)!= keys if datasets are the same - expect_error(join_key("d1", "d1", keys = c("B" = "A", "A" = "B"))) - expect_error(join_key("d1", keys = c("B" = "A", "A" = "B"))) -}) - -test_that("key empty name is changed to the key value", { - keys <- JoinKeys$new() - - # set empty key name - keys$mutate("d1", "d2", c("A" = "B", "C")) - expect_equal(keys$get()$d1$d2, setNames(c("B", "C"), c("A", "C"))) - - # set key on non-empty variable name equal to "" - keys$mutate("d1", "d2", c("A" = "B", "C" = "")) - expect_equal(keys$get()$d1$d2, setNames(c("B", ""), c("A", "C"))) - - # set key on empty variable name equal to "" - keys$mutate("d1", "d2", c("A" = "B", "")) - expect_equal(keys$get()$d1$d2, setNames(c("B", ""), c("A", ""))) -}) - -test_that("join_key throws error with invalid dataset arguments", { - # missing - expect_error(join_key("d1", as.character(NA), keys = c("A" = "B", "C" = "D"))) - # invalid type - expect_error(join_key("d1", 5, keys = c("A" = "B", "C" = "D"))) - # invalid length - expect_error(join_key("d1", c("d1", "d2"), keys = c("A" = "B", "C" = "D"))) -}) - -test_that("join_key does not throw error with valid arguments", { - # keys of length 0 - expect_silent(join_key("d1", "d2", keys = character(0))) - # keys of length 1 - expect_silent(join_key("d1", "d2", keys = c("A" = "B"))) - # keys of length > 1 - expect_silent(join_key("d1", "d2", keys = c("A" = "B", "C" = "D"))) - # dataset_1 and dataset_2 can be the same if keys match - expect_silent(join_key("d1", "d1", keys = c("A" = "A", "B" = "B"))) - - expect_silent(join_key("d1", keys = c("A" = "A", "B" = "B"))) -}) - -test_that("cannot set join_keys with incompatible keys", { - # different keys - expect_error( - join_keys( - join_key("d1", "d2", keys = c("A" = "B", "C" = "D")), - join_key("d1", "d2", keys = c("A" = "B", "C" = "D", "E" = "F")) - ) - ) - - expect_error( - join_keys( - join_key("d1", "d2", keys = c("A" = "B", "C" = "D")), - join_key("d1", "d2", keys = character(0)) - ) - ) - - expect_error( - join_keys( - join_key("d1", "d2", keys = c("A" = "B", "C" = "D")), - join_key("d2", "d1", keys = character(0)) - ) - ) - - expect_error( - join_keys( - join_key("d1", "d2", keys = character(0)), - join_key("d2", "d1", keys = c("A" = "B", "C" = "D")) - ) - ) - - expect_error( - join_keys( - join_key("d1", "d2", keys = c("a" = "B", "C" = "D")), - join_key("d1", "d2", keys = c("A" = "B", "C" = "D")) - ) - ) -}) - -test_that("can create join_keys with compatible information", { - # different datasets - expect_silent( - join_keys( - join_key("d1", "d2", keys = c("A" = "B", "C" = "D")), - join_key("d1", "d3", keys = c("A" = "B", "C" = "D")) - ) - ) - - # same keys - expect_silent( - join_keys( - join_key("d1", "d2", keys = c("A" = "B", "C" = "D")), - join_key("d1", "d2", keys = c("A" = "B", "C" = "D")) - ) - ) - - # reordering keys still matches - expect_silent( - join_keys( - join_key("d1", "d2", keys = c("A" = "B", "C" = "D")), - join_key("d1", "d2", keys = c("C" = "D", "A" = "B")) - ) - ) - - # can match with empty - expect_silent( - join_keys( - join_key("d1", "d2", keys = character(0)), - join_key("d1", "d2", keys = character(0)) - ) - ) - - expect_silent( - join_keys( - join_key("d2", "d1", keys = character(0)), - join_key("d2", "d1", keys = character(0)) - ) - ) - - # swapping dataset order still matches - expect_silent( - join_keys( - join_key("d2", "d1", keys = c("B" = "A", "D" = "C")), - join_key("d1", "d2", keys = c("C" = "D", "A" = "B")) - ) - ) -}) - - -test_that("cannot create JoinKeys with invalid arguments", { - # not using join_key - expect_error(join_keys("d1", "d2", "A")) - # key sets with the same pair of datasets but different values - expect_error(join_keys(join_key("d1", "d2", "A"), join_key("d2", "d1", "B"))) - expect_error(join_keys(join_key("d1", "d2", c("A" = "X")), join_key("d2", "d1", c("A" = "X")))) -}) - -test_that("can create JoinKeys with valid arguments", { - # no keys - expect_silent(join_keys()) - # list of keys - expect_silent(join_keys(join_key("d1", "d2", "A"), join_key("d2", "d3", "B"))) - # single key out of list - expect_silent(join_keys(join_key("d1", "d2", "A"))) - # key sets with the same pair of datasets and the same values - expect_silent(join_keys(join_key("d1", "d2", c("A" = "C")), join_key("d2", "d1", c("C" = "A")))) - expect_silent(join_keys(join_key("d1", "d2", "X"), join_key("d2", "d1", "X"))) -}) - - -test_that("cannot set keys in JoinKeys if they have already been set", { - my_keys <- join_keys(join_key("d1", "d2", "A")) - expect_error(my_keys$set(join_key("d1", "d3", "A"))) -}) - - -test_that("creating join keys with d1 -> d2 also creates the key d2 - > d1", { - my_keys <- join_keys(join_key("d1", "d2", c("A" = "C"))) - expect_equal(my_keys$get("d2", "d1"), c("C" = "A")) -}) - - -test_that("can get all keys for a given dataset", { - my_keys <- join_keys( - join_key("d1", "d2", c("A" = "C")), - join_key("d1", "d3", c("A" = "B", "S" = "T")), - join_key("d2", "d3", c("C" = "U", "L" = "M")) - ) - expect_equal(my_keys$get(dataset_1 = "d1"), list(d2 = c("A" = "C"), d3 = c("A" = "B", "S" = "T"))) - expect_equal(my_keys$get(dataset_2 = "d1"), list(d2 = c("A" = "C"), d3 = c("A" = "B", "S" = "T"))) - expect_equal(my_keys$get(dataset_1 = "d3"), list(d1 = c("B" = "A", "T" = "S"), d2 = c("U" = "C", "M" = "L"))) -}) - - -test_that("can get all keys from JoinKeys", { - my_keys <- join_keys( - join_key("d1", "d2", c("A" = "C")), - join_key("d1", "d3", c("A" = "B", "S" = "T")), - join_key("d2", "d3", c("C" = "U", "L" = "M")) - ) - - all_keys <- my_keys$get() - expect_equal(names(all_keys), c("d1", "d2", "d3")) - expect_equal(my_keys$get(dataset_1 = "d1"), all_keys[["d1"]]) -}) - -test_that("join_key with unamed keys vector creates a JoinKeys with the same column names for both datasets ", { - test_keys <- join_keys(join_key("d1", "d2", keys = c("A", "B"))) - expect_equal(unname(test_keys$get("d1", "d2")), names(test_keys$get("d1", "d2"))) -}) - - -test_that("if no keys between pair of datasets then getting them returns character(0)", { - my_keys <- join_keys(join_key("d1", "d2", "A")) - expect_equal(my_keys$get("d1", "d3"), character(0)) - expect_equal(my_keys$get("d1", "d4"), character(0)) -}) - -test_that("can mutate existing keys", { - my_keys <- join_keys(join_key("d1", "d2", "A")) - my_keys$mutate("d1", "d2", c("X" = "Y")) - expect_equal(my_keys$get("d1", "d2"), c("X" = "Y")) -}) - -test_that("mutating non-existing keys adds them", { - my_keys <- join_keys(join_key("d1", "d2", "A")) - my_keys$mutate("d2", "d3", c("X" = "Y")) - expect_equal(my_keys$get("d3", "d2"), c("Y" = "X")) -}) - -test_that("can remove keys by setting them to character(0)", { - my_keys <- join_keys(join_key("d1", "d2", "A"), join_key("d3", "d4", c("A" = "B", "C" = "D"))) - my_keys$mutate("d1", "d2", character(0)) - expect_equal(my_keys$get("d1", "d2"), character(0)) -}) - -testthat::test_that("JoinKeys$split method returns empty list when object itself is empty", { - x <- JoinKeys$new() - testthat::expect_identical(x$split(), list()) -}) - -testthat::test_that("JoinKeys$split method returns a named list of JoinKeys objects with an element for each dataset", { - x <- JoinKeys$new() - x$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) - ) - ) - res <- x$split() - testthat::expect_true(inherits(res, "list")) - testthat::expect_equal(length(res), 5) - testthat::expect_equal(names(res), c("A", "B", "C", "Z", "Y")) - checkmate::expect_list(res, types = "JoinKeys") - - testthat::expect_equal(names(res$A$get()), c("A", "B", "C")) - testthat::expect_equal(names(res$B$get()), c("B", "A")) - testthat::expect_equal(names(res$C$get()), c("C", "A")) - testthat::expect_equal(names(res$Z$get()), c("Z", "Y")) - testthat::expect_equal(names(res$Y$get()), c("Y", "Z")) -}) - -testthat::test_that( - "JoinKeys$split method returns an updated list after the state of the object is modified by JoinKeys$mutate()", - { - x <- JoinKeys$new() - x$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) - ) - ) - res <- x$split() - - x$mutate("A", "B", c("a" = "b", "aa" = "bb")) - res2 <- x$split() - - testthat::expect_false(identical(res, res2)) - testthat::expect_identical(res2$A$get()$A$B, c("a" = "b", "aa" = "bb")) - - # adding new datasets - x$mutate("D", "G", c("d" = "g")) - res3 <- x$split() - testthat::expect_false(identical(res, res3)) - testthat::expect_false(identical(res2, res3)) - testthat::expect_identical(res3$D$get()$D$G, c("d" = "g")) - testthat::expect_identical(res3$D$get()$G$D, c("g" = "d")) - testthat::expect_identical(names(res3$D$get()), c("D", "G")) - } -) - -testthat::test_that("JoinKeys$split method does not modify self", { - x <- JoinKeys$new() - x$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) - ) - ) - previous_self <- x$clone() - no_use_output <- x$split() - testthat::expect_equal(previous_self, x) -}) - - -testthat::test_that("JoinKeys$merge can handle edge case: calling object is empty", { - x <- JoinKeys$new() - y <- JoinKeys$new() - y$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) - ) - ) - testthat::expect_silent(x$merge(y)) - testthat::expect_identical(x$get(), y$get()) -}) - -testthat::test_that("JoinKeys$merge can handle edge case: argument is an empty object", { - x <- JoinKeys$new() - y <- JoinKeys$new() - y$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) - ) - ) - previous_output <- y$get() - testthat::expect_silent(y$merge(x)) - testthat::expect_identical(previous_output, y$get()) -}) - -testthat::test_that("JoinKeys$merge can handle edge case: argument is a list of empty objects", { - x <- JoinKeys$new() - y <- JoinKeys$new() - y$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) - ) - ) - previous_output <- y$get() - testthat::expect_silent(y$merge(list(x, x$clone()))) - testthat::expect_identical(previous_output, y$get()) - - testthat::expect_silent(y$merge(list(x, x$clone(), x$clone()))) - testthat::expect_identical(previous_output, y$get()) -}) - -testthat::test_that( - "JoinKeys$merge throws error when improper argument is passed in without modifying the caller", - { - y <- JoinKeys$new() - y$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) - ) - ) - previous_output <- y$get() - testthat::expect_error(y$merge()) - testthat::expect_identical(previous_output, y$get()) - - testthat::expect_error(y$merge(1)) - testthat::expect_identical(previous_output, y$get()) - - testthat::expect_error(y$merge("A")) - testthat::expect_identical(previous_output, y$get()) - - testthat::expect_error(y$merge(list())) - testthat::expect_identical(previous_output, y$get()) - - testthat::expect_error(y$merge(list(1))) - testthat::expect_identical(previous_output, y$get()) - - testthat::expect_error(y$merge(list("A"))) - testthat::expect_identical(previous_output, y$get()) - } -) - -testthat::test_that("JoinKeys$merge does nothing when argument is a JoinKeys object with identical data", { - x <- JoinKeys$new() - y <- JoinKeys$new() - x$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) - ) - ) - y$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) - ) - ) - previous_output <- y$get() - testthat::expect_silent(y$merge(x)) - testthat::expect_identical(previous_output, y$get()) -}) - -testthat::test_that("JoinKeys$merge does nothing when argument is a list of one JoinKeys object with identical data", { - x <- JoinKeys$new() - y <- JoinKeys$new() - x$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) - ) - ) - y$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) - ) - ) - previous_output <- y$get() - testthat::expect_silent(y$merge(list(x))) - testthat::expect_identical(previous_output, y$get()) - - testthat::expect_silent(y$merge(list(x, x$clone()))) - testthat::expect_identical(previous_output, y$get()) -}) - -testthat::test_that("JoinKeys$merge does nothing when argument is a list of many JoinKeys object with identical data", { - x <- JoinKeys$new() - y <- JoinKeys$new() - x$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) - ) - ) - y$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) - ) - ) - previous_output <- y$get() - testthat::expect_silent(y$merge(list(x, x, x, x, x, x, x, x))) - testthat::expect_identical(previous_output, y$get()) -}) - -testthat::test_that("JoinKeys$merge clones data when argument is a list of one JoinKeys object that is a superset", { - x <- JoinKeys$new() - y <- JoinKeys$new() - x$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")), - join_key("ZZ", "YY", c("zz" = "yy")) - ) - ) - y$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) - ) - ) - previous_output <- y$get() - testthat::expect_silent(y$merge(list(x))) - testthat::expect_false(identical(previous_output, y$get())) - testthat::expect_identical(x$get(), y$get()) -}) - -testthat::test_that("JoinKeys$merge does nothing when argument is a list of one JoinKeys object that is a subset", { - x <- JoinKeys$new() - y <- JoinKeys$new() - x$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")), - join_key("ZZ", "YY", c("zz" = "yy")) - ) - ) - y$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) - ) - ) - previous_output <- x$get() - testthat::expect_silent(x$merge(list(y))) - testthat::expect_identical(previous_output, x$get()) -}) - -testthat::test_that("JoinKeys$merge merges mutually exclusive data", { - x <- JoinKeys$new() - y <- JoinKeys$new() - x$set( - list( - join_key("A", "B", c("a" = "b")) - ) - ) - y$set( - list( - join_key("Z", "Y", c("z" = "y")) - ) - ) - z <- JoinKeys$new() - z$merge(list(x, y)) - testthat::expect_identical(c(x$get(), y$get()), z$get()) - - x$merge(y) - y$merge(x) - - testthat::expect_identical(x$get(), z$get()) - testthat::expect_true(all(y$get() %in% z$get()) && all(z$get() %in% y$get())) - testthat::expect_true(all(y$get() %in% x$get()) && all(x$get() %in% y$get())) - - testthat::expect_identical(names(z$get()), c("A", "B", "Z", "Y")) - testthat::expect_equal(length(z$get()), 4) - testthat::expect_identical(z$get()$A$B, c("a" = "b")) - testthat::expect_identical(z$get()$B$A, c("b" = "a")) - testthat::expect_identical(z$get()$Z$Y, c("z" = "y")) - testthat::expect_identical(z$get()$Y$Z, c("y" = "z")) -}) - -testthat::test_that("JoinKeys$print for empty set", { - jk <- JoinKeys$new() - testthat::expect_output( - print(jk), - "An empty JoinKeys object." - ) -}) - -testthat::test_that("JoinKeys$print for a non-empty set", { - jk <- JoinKeys$new() - jk$set(list(join_key("DF1", "DF2", c("id" = "fk")))) - testthat::expect_output( - print(jk), - "A JoinKeys object containing foreign keys between 2 datasets:" - ) -}) - -testthat::test_that("JoinKeys$set_parents sets the parents of datasets when they are empty", { - jk <- JoinKeys$new() - jk$set(list(join_key("df1", "df2", c("id" = "fk")))) - testthat::expect_silent(jk$set_parents(list(df1 = character(0), df2 = "df1"))) - testthat::expect_identical( - ss <- jk$get_parents(), - list(df1 = character(0), df2 = "df1") - ) -}) - -testthat::test_that("JoinKeys$set_parents throws error when overwriting the parent value with a different value", { - jk <- JoinKeys$new() - jk$set(list(join_key("df1", "df2", c("id" = "id")))) - testthat::expect_silent(jk$set_parents(list(df1 = character(0), df2 = "df1"))) - testthat::expect_error(jk$set_parents(list(df1 = character(0), df2 = "df5"))) -}) - -testthat::test_that("JoinKeys$set_parents works when overwriting the parent value with the same value", { - jk <- JoinKeys$new() - jk$set(list(join_key("df1", "df2", c("id" = "id")))) - testthat::expect_silent(jk$set_parents(list(df1 = character(0), df2 = "df1"))) - testthat::expect_silent(jk$set_parents(list(df1 = character(0), df2 = "df1"))) -}) - -testthat::test_that("JoinKeys$get_parent returns the parent name of the dataset", { - jk <- JoinKeys$new() - jk$set(list(join_key("df1", "df2", c("id" = "id")))) - testthat::expect_silent(jk$set_parents(list(df1 = character(0), df2 = "df1"))) - testthat::expect_identical(jk$get_parent("df1"), character(0)) - testthat::expect_identical(jk$get_parent("df2"), "df1") -}) - -testthat::test_that("JoinKeys$get_parent returns NULL when dataset is not found or not passed", { - jk <- JoinKeys$new() - jk$set(list(join_key("df1", "df2", c("id" = "id")))) - testthat::expect_silent(jk$set_parents(list(df1 = character(0), df2 = "df1"))) - testthat::expect_null(jk$get_parent()) - testthat::expect_null(jk$get_parent("df3")) -}) - -testthat::test_that("JoinKeys$get_parents returns a list of all parents", { - jk <- JoinKeys$new() - jk$set(list(join_key("df1", "df2", c("id" = "id")))) - testthat::expect_silent(jk$set_parents(list(df1 = character(0), df2 = "df1"))) - testthat::expect_identical(jk$get_parents(), list(df1 = character(0), df2 = "df1")) -}) - -testthat::test_that("JoinKeys$get_parents returns an empty list when no parents are present", { - jk <- JoinKeys$new() - jk$set(list(join_key("df1", "df2", c("id" = "id")))) - testthat::expect_identical(jk$get_parents(), list()) -}) - -testthat::test_that("JoinKeys$get_parents throws error when dataname input is provided", { - jk <- JoinKeys$new() - jk$set(list(join_key("df1", "df2", c("id" = "id")))) - testthat::expect_error(jk$get_parents("df1"), "unused argument \\(\"df1\"\\)") -}) - -testthat::test_that("JoinKeys$update_keys_given_parents does not update the join_keys when no presents are present", { - jk <- JoinKeys$new() - jk$set(list(join_key("df1", "df2", c("id" = "id")))) - jk$update_keys_given_parents() - testthat::expect_equal(jk, join_keys(join_key("df1", "df2", c("id" = "id")))) -}) - -testthat::test_that("JoinKeys$update_keys_given_parents updates the join_keys when presents are present", { - jk <- JoinKeys$new() - jk$set(list( - join_key("df1", "df1", c("id", "id2")), - join_key("df1", "df2", c("id" = "id")), - join_key("df1", "df3", c("id" = "id")) - )) - jk$set_parents(list(df1 = character(0), df2 = "df1", df3 = "df1")) - jk$update_keys_given_parents() - expected_jk <- join_keys( - join_key("df1", "df1", c("id", "id2")), - join_key("df1", "df2", c("id" = "id")), - join_key("df1", "df3", c("id" = "id")), - join_key("df2", "df2", c("id", "id2")), - join_key("df2", "df3", c("id", "id2")), - join_key("df3", "df3", c("id", "id2")) - ) - expected_jk$set_parents(list(df1 = character(0), df2 = "df1", df3 = "df1")) - testthat::expect_equal(jk, expected_jk) -}) - -testthat::test_that("JoinKeys$check_parent_child does nothing if no parents are present", { - jk <- JoinKeys$new() - jk$set(list(join_key("df1", "df1", c("id" = "id")))) - testthat::expect_identical(jk$get_parents(), list()) - testthat::expect_silent(jk$.__enclos_env__$private$check_parent_child()) -}) - -testthat::test_that("JoinKeys$check_parent_child throws error if no join_keys exist for chuld-parent", { - jk <- JoinKeys$new() - jk$set(list(join_key("df1", "df1", c("id" = "id")))) - jk$set_parents(list(df1 = character(0), df2 = "df1", df3 = "df1")) - testthat::expect_error( - jk$.__enclos_env__$private$check_parent_child(), - "No join keys from df2 to its parent \\(df1\\) and vice versa" - ) -}) - -test_that("cdisc_join_keys will generate JoinKeys for named list with non-named elements", { - new_dataset <- cdisc_join_keys("ADSL", ADTTE = rADTTE) - jk <- get_join_keys(new_dataset) - - expect_identical(unname(jk$get("ADSL", "ADSL")), default_cdisc_keys[["ADSL"]]$primary) - expect_identical(unname(jk$get("ADTTE", "ADTTE")), default_cdisc_keys[["ADTTE"]]$primary) - - expect_identical(unname(jk$get("ADSL", "ADTTE")), default_cdisc_keys[["ADTTE"]]$foreign) - expect_identical(unname(jk$get("ADTTE", "ADSL")), default_cdisc_keys[["ADTTE"]]$foreign) -}) - -test_that("cdisc_join_keys will generate JoinKeys for character list", { - new_dataset <- cdisc_join_keys("ADSL", "ADTTE") - jk <- get_join_keys(new_dataset) - - expect_identical(unname(jk$get("ADSL", "ADSL")), default_cdisc_keys[["ADSL"]]$primary) - expect_identical(unname(jk$get("ADTTE", "ADTTE")), default_cdisc_keys[["ADTTE"]]$primary) - - expect_identical(unname(jk$get("ADSL", "ADTTE")), default_cdisc_keys[["ADTTE"]]$foreign) - expect_identical(unname(jk$get("ADTTE", "ADSL")), default_cdisc_keys[["ADTTE"]]$foreign) -}) - -test_that("cdisc_join_keys will generate JoinKeys for named list", { - new_dataset <- cdisc_join_keys(ADSL = rADSL, ADTTE = rADTTE) - jk <- get_join_keys(new_dataset) - - expect_identical(unname(jk$get("ADSL", "ADSL")), default_cdisc_keys[["ADSL"]]$primary) - expect_identical(unname(jk$get("ADTTE", "ADTTE")), default_cdisc_keys[["ADTTE"]]$primary) - - expect_identical(unname(jk$get("ADSL", "ADTTE")), default_cdisc_keys[["ADTTE"]]$foreign) - expect_identical(unname(jk$get("ADTTE", "ADSL")), default_cdisc_keys[["ADTTE"]]$foreign) -}) - -test_that("cdisc_join_keys will retrieve ADTTE primary and foreign keys", { - datasets <- names(default_cdisc_keys) - - internal_keys <- default_cdisc_keys[["ADTTE"]] - jk <- cdisc_join_keys("ADTTE") - primary_keys <- unname(jk$get("ADTTE", "ADTTE")) - - expect_equal(primary_keys, internal_keys$primary) - - foreign_keys <- unname(jk$get("ADTTE", internal_keys$parent)) - expect_equal(foreign_keys, internal_keys$foreign) -}) - -test_that("cdisc_join_keys will retrieve known primary and foreign keys", { - datasets <- names(default_cdisc_keys) - - vapply( - datasets, - function(.x) { - internal_keys <- default_cdisc_keys[[.x]] - jk <- cdisc_join_keys(.x) - primary_keys <- unname(jk$get(.x, .x)) - expect_equal(primary_keys, internal_keys$primary) - if (!is.null(internal_keys$foreign)) { - foreign_keys <- unname(jk$get(.x, internal_keys$parent)) - expect_equal(foreign_keys, internal_keys$foreign) - } - character(0) - }, - character(0) - ) -}) - -test_that("cdisc_join_keys will retrieve known primary keys", { - datasets <- names(default_cdisc_keys) - - vapply( - datasets, - function(.x) { - jk <- cdisc_join_keys(.x) - expect_equal(unname(jk[.x]), get_cdisc_keys(.x)) - character(0) - }, - character(0) - ) -}) - -test_that("cdisc_join_keys does nothing with TealDataset", { - adae_cf <- callable_function( - function() as.data.frame(as.list(setNames(nm = get_cdisc_keys("ADAE")))) - ) - adae_cdc <- cdisc_dataset_connector("ADAE", adae_cf, keys = get_cdisc_keys("ADAE")) - expect_length(get_join_keys(cdisc_join_keys(adae_cdc))$get(), 0) -}) - -test_that("[.JoinKeys returns the primary key if arguments only have 1 dataset", { - jk <- join_keys(join_key("ds1", keys = c("id"))) - - expect_failure(expect_identical(jk$get("ds1"), jk["ds1"])) - checkmate::expect_character(jk["ds1"]) -}) - -test_that("[.JoinKeys subsets relationship pair successfully", { - jk <- join_keys(join_key("ds1", keys = c("id"))) - - expect_identical(jk$get("ds1", "ds1"), jk["ds1"]) -}) - -test_that("[<-.JoinKeys assigns new relationship pair", { - jk <- join_keys(join_key("ds1", keys = c("id"))) - - expect_length(jk$get("ds1", "ds2"), 0) - - jk["ds1", "ds2"] <- c("id") - expect_identical(jk$get("ds1", "ds2"), c(id = "id")) - expect_identical(jk$get("ds1", "ds2"), jk["ds1", "ds2"]) -}) - -test_that("[<-.JoinKeys modifies existing relationship pair", { - jk <- join_keys(join_key("ds1", keys = c("id"))) - - jk["ds1", "ds1"] <- c("Species") - expect_failure(expect_identical(jk$get("ds1", "ds1"), c(id = "id"))) - expect_identical(jk$get("ds1", "ds1"), c(Species = "Species")) -}) diff --git a/tests/testthat/test-MAETealDataset.R b/tests/testthat/test-MAETealDataset.R index 360ef5c04..e5298829b 100644 --- a/tests/testthat/test-MAETealDataset.R +++ b/tests/testthat/test-MAETealDataset.R @@ -159,7 +159,10 @@ testthat::test_that("MAETealDataset$check_keys doesn't throw if constructed with utils::data(miniACC, package = "MultiAssayExperiment") mae <- MAETealDataset$new(dataname = "miniACC", x = miniACC, keys = "patientID") testthat::skip_on_ci() - testthat::expect_silent(mae$check_keys()) + withr::with_envvar( + new = c("_R_CHECK_AS_DATA_FRAME_EXPLICIT_METHOD_" = ""), + testthat::expect_silent(mae$check_keys()) + ) }) testthat::test_that("MAETealDataset$check_keys throws if constructed with keys not present in colData", { diff --git a/tests/testthat/test-TealData.R b/tests/testthat/test-TealData.R index e512dc407..00c728c41 100644 --- a/tests/testthat/test-TealData.R +++ b/tests/testthat/test-TealData.R @@ -256,7 +256,7 @@ testthat::test_that("TealData$get_parents returns an empty list even when parent testthat::expect_equal(data$get_parents(), list()) }) -testthat::test_that("TealData$mutate_join_keys returns a JoinKeys object with the updated join_keys", { +testthat::test_that("TealData$mutate_join_keys returns a join_keys object with the updated join_keys", { df1 <- data.frame(id = c("A", "B"), a = c(1L, 2L)) df2 <- data.frame(df2_id = c("A", "B"), id = c("A", "B"), b = c(1L, 2L)) diff --git a/tests/testthat/test-cdisc_data.R b/tests/testthat/test-cdisc_data.R index 50340eca9..e117a1a32 100644 --- a/tests/testthat/test-cdisc_data.R +++ b/tests/testthat/test-cdisc_data.R @@ -1,39 +1,5 @@ -cdisc_data_mixed_call <- function(check = TRUE, join_keys1 = join_keys()) { - adsl_raw <- as.data.frame(as.list(setNames(nm = get_cdisc_keys("ADSL")))) - adtte_raw <- as.data.frame(as.list(setNames(nm = get_cdisc_keys("ADTTE")))) - adae_raw <- as.data.frame(as.list(setNames(nm = get_cdisc_keys("ADAE")))) - - adsl <- cdisc_dataset( - dataname = "ADSL", - x = adsl_raw, - code = "ADSL <- as.data.frame(as.list(setNames(nm = get_cdisc_keys(\"ADSL\"))))" - ) - adtte_cf <- callable_function( - function() { - as.data.frame(as.list(setNames(nm = get_cdisc_keys("ADTTE")))) - } - ) - adtte <- cdisc_dataset_connector("ADTTE", adtte_cf, keys = get_cdisc_keys("ADTTE"), vars = list(x = adsl)) - adae_cf <- callable_function( - function() { - as.data.frame(as.list(setNames(nm = get_cdisc_keys("ADAE")))) - } - ) - adae_cdc <- cdisc_dataset_connector("ADAE", adae_cf, keys = get_cdisc_keys("ADAE")) - adae_rdc <- cdisc_data_connector( - connection = data_connection(open_fun = callable_function(function() "open function")), - connectors = list(adae_cdc) - ) - - load_dataset(adsl) - load_dataset(adtte) - load_dataset(adae_cdc) - - cdisc_data(adsl, adtte, adae_rdc, check = check, join_keys = join_keys1) -} - testthat::test_that("cdisc_data accepts TealDataset, TealDatasetConnector, TealDataConnector objects", { - lifecycle::expect_deprecated(data <- cdisc_data_mixed_call(), "should use data directly") + lifecycle::expect_deprecated(data <- local_cdisc_data_mixed_call(), "should use data directly") testthat::expect_identical(data$get_datanames(), c("ADSL", "ADTTE", "ADAE")) }) @@ -46,7 +12,7 @@ testthat::test_that("cdisc_data returns teal_data object for objects different t }) testthat::test_that("cdisc_data sets the join_keys internally", { - data <- cdisc_data_mixed_call() + data <- local_cdisc_data_mixed_call() jks <- join_keys( join_key("ADSL", "ADSL", c("STUDYID", "USUBJID")), @@ -56,15 +22,15 @@ testthat::test_that("cdisc_data sets the join_keys internally", { join_key("ADSL", "ADAE", c("STUDYID", "USUBJID")), join_key("ADTTE", "ADAE", c("STUDYID", "USUBJID")) ) - jks$set_parents(list(ADSL = character(0), ADTTE = "ADSL", ADAE = "ADSL")) - testthat::expect_equal(data$get_join_keys(), jks) + parents(jks) <- list(ADSL = character(0), ADTTE = "ADSL", ADAE = "ADSL") + testthat::expect_equal(join_keys(data), jks) }) testthat::test_that( "cdisc_data sets the join_keys internally based on parents relations when primary keys are altered", { jks <- join_keys(join_key("ADSL", "ADSL", c("STUDYID"))) - data <- cdisc_data_mixed_call(join_keys1 = jks) + data <- local_cdisc_data_mixed_call(join_keys1 = jks) jks <- join_keys( join_key("ADSL", "ADSL", c("STUDYID")), @@ -74,9 +40,9 @@ testthat::test_that( join_key("ADSL", "ADAE", c("STUDYID")), join_key("ADTTE", "ADAE", c("STUDYID")) ) - jks$set_parents(list(ADSL = character(0), ADTTE = "ADSL", ADAE = "ADSL")) + parents(jks) <- list(ADSL = character(0), ADTTE = "ADSL", ADAE = "ADSL") testthat::expect_equal( - data$get_join_keys(), + join_keys(data), jks ) } @@ -89,14 +55,15 @@ testthat::test_that("cdisc_data sets primary keys as join_keys when no join_keys df1 <- dataset("df1", df1, keys = "id") df2 <- dataset("df2", df2, keys = "df2_id") - data <- cdisc_data(df1, df2, check = FALSE) + data <- cdisc_data(df1 = df1, df2 = df2, check = FALSE) jks <- join_keys( join_key("df1", "df1", "id"), join_key("df2", "df2", "df2_id") ) - jks$set_parents(list(df1 = character(0), df2 = character(0))) - testthat::expect_equal(data$get_join_keys(), jks) + parents(jks) <- list(df1 = character(0), df2 = character(0)) + testthat::expect_equal(join_keys(data), jks) + testthat::expect_equal(parents(join_keys(data)), parents(jks)) }) testthat::test_that("cdisc_data throws error when a parent/child graph is not correct", { @@ -182,7 +149,7 @@ testthat::test_that("List values", { test_relational_data_equal <- function(data1, data2) { testthat::expect_equal(data1$get_items(), data2$get_items()) - testthat::expect_equal(data1$get_join_keys(), data2$get_join_keys()) + testthat::expect_equal(join_keys(data1), join_keys(data2)) mapply(testthat::expect_equal, data1$get_ui("test"), data2$get_ui("test")) } diff --git a/tests/testthat/test-cdisc_join_keys.R b/tests/testthat/test-cdisc_join_keys.R new file mode 100644 index 000000000..7596381ee --- /dev/null +++ b/tests/testthat/test-cdisc_join_keys.R @@ -0,0 +1,20 @@ +test_that("default_cdisc_join_keys is assigned in package environment", { + expect_true(exists("default_cdisc_join_keys")) + expect_gt(length(default_cdisc_join_keys), 0) +}) + +test_that("default_cdisc_join_keys subsetting of datasets with parent is valid", { + # indirect test to build_cdisc_join_keys + ds <- c("ADTTE", "ADSL") + result <- default_cdisc_join_keys[ds] + expect_length(result, 2) + expect_length(parents(result), 1) +}) + +test_that("default_cdisc_join_keys subsetting of dataset without parent contains parent", { + # indirect test to build_cdisc_join_keys + ds <- c("ADTTE", "ADEX", "ADRS") + result <- default_cdisc_join_keys[ds] + expect_length(result, 4) + expect_length(parents(result), 3) +}) diff --git a/tests/testthat/test-get_join_keys.R b/tests/testthat/test-get_join_keys.R index 7a693a5e9..a9bb4776a 100644 --- a/tests/testthat/test-get_join_keys.R +++ b/tests/testthat/test-get_join_keys.R @@ -1,23 +1,7 @@ -test_that("get_join_keys.teal_data will successfully obtain object from teal_data", { - obj <- helper_generator_teal_data() - - expect_identical(obj@join_keys, get_join_keys(obj)) - helper_test_get_join_keys(obj, "ds1") -}) - -test_that("get_join_keys.JoinKeys will return itself", { - obj <- helper_generator_JoinKeys() - - expect_identical(obj, get_join_keys(obj)) - helper_test_get_join_keys(obj, "ds1") -}) - -test_that("get_join_keys<-.teal_data", { - obj <- helper_generator_teal_data() - helper_test_get_join_keys_add(obj, "ds1", "ds2") +testthat::test_that("get_join_keys is deprecated", { + lifecycle::expect_defunct(get_join_keys(join_keys())) }) -test_that("get_join_keys<-.JoinKeys", { - obj <- helper_generator_JoinKeys() - helper_test_get_join_keys_add(obj, "ds1", "ds2") +testthat::test_that("get_join_keys<- is deprecated", { + lifecycle::expect_defunct(`get_join_keys<-`(NULL, NULL, NULL, NULL)) }) diff --git a/tests/testthat/test-join_key.R b/tests/testthat/test-join_key.R new file mode 100644 index 000000000..3cfcc576b --- /dev/null +++ b/tests/testthat/test-join_key.R @@ -0,0 +1,83 @@ +test_that("join_key throws error with NULL keys", { + expect_error(join_key("d1", "d2", keys = NULL)) +}) + +test_that("join_key throws error with NA keys", { + expect_error(join_key("d1", "d2", keys = NA)) + expect_error(join_key("d1", "d2", keys = c("X" = "A", "B", NA))) +}) + +test_that("join_key throws error with numeric keys", { + expect_error(join_key("d1", "d2", keys = 1:10)) +}) + +test_that("join_key throws error with data keys", { + expect_error(join_key("d1", "d2", keys = Sys.time() + seq(1, 5))) +}) + +test_that("join_key throws error with invalid duplicate names in keys/values", { + expect_error(join_key("d1", "d2", keys = c("A" = "A", "A" = "B"))) + expect_error(join_key("d1", "d2", keys = c("C" = "A", "D" = "A"))) +}) + +test_that("join_key throws error with invalid primary keys (names != keys)", { + expect_error(join_key("d1", "d1", keys = c("B" = "A", "A" = "B"))) + expect_error(join_key("d1", keys = c("B" = "A", "A" = "B"))) +}) + +test_that("join_key throws error with invalid dataset arguments", { + # missing + expect_error(join_key("d1", as.character(NA), keys = c("A" = "B", "C" = "D"))) + # invalid type + expect_error(join_key("d1", 5, keys = c("A" = "B", "C" = "D"))) + # invalid length + expect_error(join_key("d1", c("d1", "d2"), keys = c("A" = "B", "C" = "D"))) +}) + +test_that("join_key does not throw error with valid arguments", { + # keys of length 0 + expect_silent(join_key("d1", "d2", keys = character(0))) + # keys of length 1 + expect_silent(join_key("d1", "d2", keys = c("A" = "B"))) + # keys of length > 1 + expect_silent(join_key("d1", "d2", keys = c("A" = "B", "C" = "D"))) + # dataset_1 and dataset_2 can be the same if keys match + expect_silent(join_key("d1", "d1", keys = c("A" = "A", "B" = "B"))) + + expect_silent(join_key("d1", keys = c("A" = "A", "B" = "B"))) +}) + +test_that("join_key will fill empty names with value", { + # keys of length 0 + jk <- join_key("d1", "d2", keys = c("A" = "B", "C")) + expect_identical(get_keys(jk), setNames(c("B", "C"), c("A", "C"))) + + jk <- join_key("d1", "d2", keys = c("B", "C")) + expect_identical(get_keys(jk), setNames(c("B", "C"), c("B", "C"))) +}) + +test_that("join_key will fill empty values with name", { + # keys of length 0 + jk <- join_key("d1", "d2", keys = c("A" = "B", "C" = "")) + expect_identical(get_keys(jk), setNames(c("B", "C"), c("A", "C"))) + + jk <- join_key("d1", "d2", keys = c("B", "C" = "")) + expect_identical(get_keys(jk), setNames(c("B", "C"), c("B", "C"))) +}) + +test_that("join_key ignores empty name/value on keys if it has other keys", { + expect_message(jk <- join_key("d1", "d2", keys = c("A" = "B", "")), "are ignored") + expect_identical(get_keys(jk), setNames(c("B"), c("A"))) + + expect_message(jk <- join_key("d1", "d2", keys = c("B", "")), "are ignored") + expect_identical(get_keys(jk), setNames(c("B"), c("B"))) +}) + +test_that("join_key sets key as character(0) when keys are all all empty strings", { + # invalid types + jk <- join_key("d1", "d2", keys = c("", "", "", "")) + expect_length(get_keys(jk), 0) + + jk2 <- join_key("d1", "d2", keys = c(" ", " ", " ", " ")) + expect_length(get_keys(jk2), 0) +}) diff --git a/tests/testthat/test-join_keys-c.R b/tests/testthat/test-join_keys-c.R new file mode 100644 index 000000000..234a8fe69 --- /dev/null +++ b/tests/testthat/test-join_keys-c.R @@ -0,0 +1,226 @@ +testthat::test_that("c.join_keys joins join_keys object with join_key objects", { + obj <- join_keys() + obj <- c(obj, join_key("a", "a", "aa"), join_key("b", "b", "bb")) + testthat::expect_identical( + obj, + join_keys( + join_key("a", "a", "aa"), + join_key("b", "b", "bb") + ) + ) +}) + +testthat::test_that("c.join_key_set joins join_key_set object with join_keys objects", { + obj <- join_key("a", "a", "aa") + obj <- c(obj, join_key("b", "b", "bb"), join_key("c", "c", "cc")) + testthat::expect_identical( + obj, + join_keys( + join_key("a", "a", "aa"), + join_key("b", "b", "bb"), + join_key("c", "c", "cc") + ) + ) +}) + +testthat::test_that("c.join_keys duplicated keys are ignored", { + obj <- join_keys() + obj <- c(obj, join_key("a", "a", "aa"), join_key("a", "a", "aa")) + testthat::expect_identical( + obj, + join_keys(join_key("a", "a", "aa")) + ) +}) + +testthat::test_that("c.join_keys joins join_keys object with join_keys objects", { + obj <- join_keys() + obj <- c( + obj, + join_keys(join_key("a", "a", "aa")), + join_keys(join_key("b", "b", "bb")) + ) + testthat::expect_identical( + obj, + join_keys( + join_key("a", "a", "aa"), + join_key("b", "b", "bb") + ) + ) +}) + +testthat::test_that("c.join_keys joins join_keys object with join_keys and join_key objects", { + obj <- join_keys() + obj <- c( + obj, + join_keys(join_key("a", "a", "aa")), + join_key("b", "b", "bb") + ) + testthat::expect_identical( + obj, + join_keys( + join_key("a", "a", "aa"), + join_key("b", "b", "bb") + ) + ) +}) + +testthat::test_that("c.join_keys throws when joining with a list", { + obj <- join_keys() + testthat::expect_error(c( + obj, + list(c = list(c = "cc")) + )) +}) + +testthat::test_that("c.join_keys doesn't throw when second object is empty join_keys", { + x <- join_keys(join_key("a", "a", "aa")) + y <- join_keys() + testthat::expect_no_error(c(x, y)) +}) + +testthat::test_that("c.join_keys throws on conflicting join_keys_set objects", { + obj <- join_keys() + testthat::expect_error( + c( + obj, + join_keys(join_key("a", "b", "aa")), + join_keys(join_key("b", "a", "bb")) + ), + "cannot specify multiple different join keys between datasets" + ) + + testthat::expect_error( + c( + obj, + join_key("a", "b", "aa"), + join_key("b", "a", "bb") + ), + "cannot specify multiple different join keys between datasets" + ) +}) + +testthat::test_that("c.join_key_set throws on conflicting join_keys_set objects", { + testthat::expect_error( + c( + join_key("a", "b", "aa"), + join_key("a", "b", "ca"), + join_key("a", "b", "cc") + ), + "cannot specify multiple different join keys between datasets" + ) +}) + +testthat::test_that("c.join_key_set merges with empty and non-empty parents", { + jk1 <- join_keys( + join_key("d1", "d1", "a") + ) + + jk2 <- join_keys( + join_key("d3", "d3", "c"), + join_key("d4", "d4", "d"), + join_key("d4", "d3", "cd") + ) + parents(jk2) <- list(d3 = "d4") + + expected <- join_keys( + join_key("d1", "d1", "a"), + join_key("d3", "d3", "c"), + join_key("d4", "d4", "d"), + join_key("d3", "d4", "cd") + ) + parents(expected) <- list(d3 = "d4") + + testthat::expect_identical( + c(jk1, jk2), + expected + ) + + testthat::expect_equal( + c(jk2, jk1), + expected + ) +}) + +testthat::test_that("c.join_key_set merges parents also", { + jk1 <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d1", "d2", "ab") + ) + parents(jk1) <- list(d1 = "d2") + jk2 <- join_key("d3", "d3", "c") + + expected <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d1", "d2", "ab"), + join_key("d3", "d3", "c") + ) + parents(expected) <- list(d1 = "d2") + + testthat::expect_equal( + c(jk2, jk1), + expected + ) +}) + +testthat::test_that("c.join_keys merges parents also", { + jk1 <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d1", "d2", "ab") + ) + parents(jk1) <- list(d1 = "d2") + jk2 <- join_keys( + join_key("d3", "d3", "c"), + join_key("d4", "d4", "d"), + join_key("d4", "d3", "cd") + ) + parents(jk2) <- list(d3 = "d4") + + expected <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d1", "d2", "ab"), + join_key("d3", "d3", "c"), + join_key("d4", "d4", "d"), + join_key("d3", "d4", "cd") + ) + parents(expected) <- list(d1 = "d2", d3 = "d4") + + testthat::expect_identical( + c(jk1, jk2), + expected + ) +}) + +testthat::test_that("c.join_keys merges existing parents are overwritten", { + jk1 <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d3", "d3", "c"), + join_key("d4", "d4", "d"), + join_key("d1", "d2", "ab"), + join_key("d4", "d3", "cd") + ) + parents(jk1) <- list(d1 = "d2", d3 = "d4") + + jk2 <- join_keys( + join_key("d2", "d2", "b"), + join_key("d3", "d2", "cb") + ) + parents(jk2) <- list(d3 = "d2") + + expected <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d3", "d3", "c"), + join_key("d4", "d4", "d"), + join_key("d1", "d2", "ab"), + join_key("d4", "d3", "cd"), + join_key("d3", "d2", "cb") + ) + parents(expected) <- list(d1 = "d2", d3 = "d2") + + testthat::expect_identical(c(jk1, jk2), expected) +}) diff --git a/tests/testthat/test-join_keys-extract.R b/tests/testthat/test-join_keys-extract.R new file mode 100644 index 000000000..9f1a3887e --- /dev/null +++ b/tests/testthat/test-join_keys-extract.R @@ -0,0 +1,426 @@ +# join_keys[i] ----------------------------------------------------------------- +testthat::test_that("join_keys[i] returns join_keys object when i and j is missing", { + my_keys <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d3", "d3", "c") + ) + testthat::expect_identical(my_keys[], my_keys) +}) + +testthat::test_that("join_keys[i] returns empty join_keys when i or j are NULL", { + my_keys <- join_keys(join_key("d1", "d1", "a")) + testthat::expect_identical(my_keys[NULL], join_keys()) + testthat::expect_identical(my_keys[, NULL], join_keys()) +}) + +testthat::test_that("join_keys[i] subsets join_keys object to specific datasets", { + my_keys <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d3", "d3", "c") + ) + testthat::expect_equal( + my_keys[c("d1", "d2")], + join_keys(join_key("d1", "d1", "a"), join_key("d2", "d2", "b")) + ) +}) + +testthat::test_that("join_keys[i] returns join_keys object with keys for given index", { + my_keys <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d3", "d3", "c") + ) + testthat::expect_equal( + my_keys[c(1, 2)], + join_keys(join_key("d1", "d1", "a"), join_key("d2", "d2", "b")) + ) +}) + +testthat::test_that("join_keys[-i] drops keys for given index", { + my_keys <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d3", "d3", "c") + ) + testthat::expect_equal( + my_keys[-3], + join_keys(join_key("d1", "d1", "a"), join_key("d2", "d2", "b")) + ) +}) + +testthat::test_that("join_keys[i] returns join_keys object for given dataset including its parent", { + my_keys <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d3", "d3", "c"), + join_key("d2", "d1", "ab"), + join_key("d3", "d1", "ac") + ) + parents(my_keys) <- list("d2" = "d1", "d3" = "d1") + + expected <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d2", "d1", "ab") + ) + parents(expected) <- list("d2" = "d1") + + testthat::expect_equal(my_keys["d2"], expected) +}) + +testthat::test_that("join_keys[i] returns join_keys object for given dataset and doesn't include its children", { + my_keys <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d3", "d3", "c"), + join_key("d2", "d1", "ab"), + join_key("d3", "d1", "ac") + ) + parents(my_keys) <- list("d2" = "d1", "d3" = "d1") + + expected <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d2", "d1", "ab") + ) + parents(expected) <- list("d2" = "d1") + + testthat::expect_equal(my_keys["d2"], expected) +}) + +testthat::test_that("join_keys[i] returns empty join_keys for inexisting dataset", { + my_keys <- join_keys(join_key("d1", "d1", "a")) + testthat::expect_length(my_keys["d2"], 0) +}) + +testthat::test_that("join_keys[i] ignores duplicate indexes - return only first occurrence", { + jk <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d3", "d2", "b") + ) + testthat::expect_equal( + jk[c("d1", "d2", "d1")], + join_keys(join_key("d1", "d1", "a"), join_key("d2", "d2", "b")) + ) +}) + +testthat::test_that("join_keys[,j] returns the same as join_keys[i,]", { + my_keys <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d3", "d3", "c") + ) + testthat::expect_identical( + my_keys[, c("d1", "d2")], + my_keys[c("d1", "d2")] + ) +}) + +# join_keys[i, j] ----------------------------------------------------------------- +testthat::test_that("join_keys[i,j] returns keys for given pair", { + my_keys <- join_keys( + join_key("a", "a", "aa"), + join_key("b", "b", "bb"), + join_key("c", "c", "cc"), + join_key("b", "a", "child-parent"), + join_key("c", "a", "child-parent") + ) + testthat::expect_identical(my_keys["b", "a"], c(`child-parent` = "child-parent")) +}) + +testthat::test_that("join_keys[i,j] returns keys for pair given by numeric indices", { + my_keys <- join_keys( + join_key("a", "a", "aa"), + join_key("b", "b", "bb"), + join_key("c", "c", "cc"), + join_key("b", "a", "child-parent"), + join_key("c", "a", "child-parent") + ) + testthat::expect_identical(my_keys[2, 1], c(`child-parent` = "child-parent")) +}) + +testthat::test_that("join_keys[i,j] return NULL for given pair when no such key and no common parent", { + my_keys <- join_keys( + join_key("a", "a", "aa"), + join_key("b", "b", "bb"), + join_key("c", "c", "cc"), + join_key("b", "a", "child-parent"), + join_key("c", "a", "child-parent") + ) + testthat::expect_null(my_keys["b", "c"]) +}) + +testthat::test_that( + "join_keys[i,j] doesn't infer keys between children if they don't have common key to parent", + { + my_keys <- join_keys( + join_key("a", "a", "aa"), + join_key("b", "b", "bb"), + join_key("c", "c", "cc"), + join_key("b", "a", c(child = "a1")), + join_key("c", "a", c(child = "a2")) + ) + parents(my_keys) <- list("b" = "a", "c" = "a") + testthat::expect_null(my_keys["b", "c"]) + } +) + +testthat::test_that( + "join_keys[i,j] doesn't infer keys between grandchildren", + { + my_keys <- join_keys( + join_key("a", "a", "aa"), + join_key("b", "b", "bb"), + join_key("c", "c", "cc"), + join_key("b", "a", "child-parent"), + join_key("c", "a", "child-parent"), + join_key("d", "b", "grandchild-child"), + join_key("e", "c", "grandchild-child") + ) + parents(my_keys) <- list("b" = "a", "c" = "a", "d" = "b", "e" = "c") + testthat::expect_null(my_keys["d", "e"]) + } +) + +testthat::test_that( + "join_keys[i,j ] infer keys between children through foreign keys to parent. ", + { + my_keys <- join_keys( + join_key("a", "a", "aa"), + join_key("b", "b", "bb"), + join_key("c", "c", "cc"), + join_key("b", "a", c(bb = "aa")), + join_key("c", "a", c(cc = "aa")) + ) + parents(my_keys) <- list("b" = "a", "c" = "a") + # "bb" and "cc" are the names in child datasets, "aa" is the name in parent dataset + testthat::expect_identical(my_keys["b", "c"], c(bb = "cc")) + } +) + +testthat::test_that("join_keys[i,j] returns NULL for inexisting key pair (can't even infer)", { + my_keys <- join_keys( + join_key("a", "a", "aa"), + join_key("b", "b", "bb"), + join_key("c", "c", "cc") + ) + testthat::expect_null(my_keys["inexisting", "inexisting"]) +}) + +testthat::test_that("join_keys[i,j] throws when one of the indices is longer than 1", { + my_keys <- join_keys( + join_key("a", "a", "aa"), + join_key("b", "b", "bb"), + join_key("c", "c", "cc") + ) + testthat::expect_error(my_keys[c("a", "b"), "c"], "Can't extract keys for multiple pairs.") +}) + +# [<-.join_keys ------------------------------------------------ +testthat::test_that("join_keys[i]<- throws when assigning anything", { + my_keys <- join_keys() + testthat::expect_error(my_keys["a"] <- join_key("a", "b", "test"), "specify both indices to set a key pair.") +}) + +testthat::test_that("join_keys[i]<- throws when no index specified", { + my_keys <- join_keys() + testthat::expect_error(my_keys[] <- join_key("a", "b", "test"), "specify both indices to set a key pair.") +}) + +testthat::test_that("join_keys[i,j]<- can set new value for existing pair", { + my_keys <- join_keys(join_key("a", "a", "aa")) + testthat::expect_no_error(my_keys["a", "a"] <- "new key") + testthat::expect_identical(my_keys, join_keys(join_key("a", "a", "new key"))) +}) + +testthat::test_that("join_keys[i,j]<- sets a new keys for inexisting pair", { + my_keys <- join_keys(join_key("a", "a", "aa")) + testthat::expect_no_error(my_keys["b", "c"] <- "new key") + testthat::expect_identical(my_keys, join_keys(join_key("a", "a", "aa"), join_key("b", "c", "new key"))) +}) + +testthat::test_that("join_keys[i,j]<- throws when assigning to inspecific index", { + my_keys <- join_keys() + testthat::expect_error(my_keys[, "b"] <- join_key("a", "b", "test")) +}) + +testthat::test_that("join_keys[i,j]<- throws when assigning to j only", { + my_keys <- join_keys() + testthat::expect_error(my_keys[, "b"] <- join_key("a", "b", "test")) +}) + +testthat::test_that("join_keys[i,j]<- throws when i or j are NULL", { + my_keys <- join_keys() + testthat::expect_error(my_keys[NULL, 1] <- join_key("a", "b", "test"), "NULL") + testthat::expect_error(my_keys[1, NULL] <- join_key("a", "b", "test"), "NULL") +}) + +testthat::test_that("join_keys[i,j]<- throws when i or j are longer than 1", { + my_keys <- join_keys() + testthat::expect_error(my_keys[c("a", "b"), "a"] <- "new key") + testthat::expect_error(my_keys["a", c("a", "b")] <- "new key") +}) + +# [[<-.join_keys ------------------------------------------------ +testthat::test_that("[[<-.join_keys accepts named list where each containing character", { + jk <- join_keys() + testthat::expect_no_error( + jk[["d1"]] <- list(d1 = c("a", "b", "c"), d2 = c(b = "c", "d" = "d")) + ) +}) + +testthat::test_that("[[<-.join_keys accepts integerish as index", { + jk <- join_keys(join_key("a", "a", "aa")) + testthat::expect_no_error( + jk[[1]][[1]] <- "bb" + ) +}) + +testthat::test_that("[[<-.join_keys accepts unnamed vector", { + jk <- join_keys() + testthat::expect_no_error( + jk[["d1"]] <- list(d1 = c("a", "b", "c")) + ) +}) + +testthat::test_that("[[<-.join_keys doesn't accepts other list than named containing character", { + jk <- join_keys() + testthat::expect_error(jk[["d1"]] <- list(d1 = 1:5, d2 = c(b = "c", "d" = "d"))) + testthat::expect_error(jk[["d1"]] <- list(d1 = list(a = "a"))) + testthat::expect_error(jk[["d1"]] <- list(d1 = NULL)) +}) + +testthat::test_that("[[<-.join_keys doesn't accepts other list than named containing character", { + jk <- join_keys() + testthat::expect_error(jk[["d1"]] <- list(d1 = 1:5, d2 = c(b = "c", "d" = "d"))) + testthat::expect_error(jk[["d1"]] <- list(d1 = list(a = "a"))) + testthat::expect_error(jk[["d1"]] <- list(d1 = NULL)) + testthat::expect_error(jk[["d1"]] <- "test") +}) + +testthat::test_that("[[<-.join_keys adds join_keys specified as named list to the list of keys", { + jk <- join_keys() + jk[["d1"]] <- list(d1 = "a") + testthat::expect_equal(jk, join_keys(join_key("d1", "d1", "a"))) +}) + +testthat::test_that("[[<-.join_keys assigning NULL drops a key", { + jk <- join_keys(join_key("d1", "d1", "a")) + jk[["d1"]] <- NULL + testthat::expect_null(jk[["d1"]]) +}) + +testthat::test_that("[[<-.join_keys adds symmetrical change to the foreign dataset", { + jk <- join_keys() + jk[["d1"]][["d2"]] <- c("A" = "B", "C" = "C") + + testthat::expect_equal( + jk, + join_keys( + join_key("d1", "d2", c("A" = "B", "C" = "C")), + join_key("d2", "d1", c("B" = "A", "C" = "C")) + ) + ) +}) + +testthat::test_that("[[<- can mutate existing keys", { + my_keys <- join_keys(join_key("d1", "d2", "A")) + my_keys[["d1"]][["d2"]] <- "B" + testthat::expect_identical(my_keys, join_keys(join_key("d1", "d2", "B"))) +}) + +testthat::test_that("[[<- mutating non-existing keys adds them", { + my_keys <- join_keys(join_key("d1", "d2", "A")) + my_keys[["d2"]][["d3"]] <- "B" + testthat::expect_identical( + my_keys, + join_keys( + join_key("d1", "d2", "A"), + join_key("d2", "d3", "B") + ) + ) +}) + +testthat::test_that("[[<- setting a key to character(0) drops the key", { + my_keys <- join_keys( + join_key("d1", "d2", "A"), + join_key("d2", "d3", "B") + ) + + my_keys[["d1"]][["d2"]] <- character(0) + + testthat::expect_identical( + my_keys, + join_keys(join_key("d2", "d3", "B")) + ) +}) + +testthat::test_that("[[<-.join_keys removes keys with NULL", { + my_keys <- join_keys( + join_key("d1", "d1", "A"), + join_key("d2", "d2", "B") + ) + my_keys[["d2"]][["d2"]] <- NULL + + testthat::expect_identical( + my_keys, + join_keys( + join_key("d1", "d1", "A") + ) + ) +}) + +testthat::test_that("[[<-.join_keys removes keys with NULL and applies summetrical changes", { + my_keys <- join_keys( + join_key("d1", "d2", "A"), + join_key("d2", "d1", "A"), + join_key("d2", "d3", "B"), + join_key("d3", "d2", "B") + ) + my_keys[["d1"]][["d2"]] <- NULL + + testthat::expect_identical( + my_keys, + join_keys( + join_key("d2", "d3", "B"), + join_key("d3", "d2", "B") + ) + ) +}) + +testthat::test_that("[[<-.join_keys with empty name is changed to the key value", { + # set empty key name + jk <- join_keys() + jk[["d1"]][["d2"]] <- c("A" = "B", "C") + expect_equal(jk[["d1"]][["d2"]], setNames(c("B", "C"), c("A", "C"))) +}) + +testthat::test_that("[[<-.join_keys with empty value is set to its name", { + jk <- join_keys() + jk[["d1"]][["d2"]] <- c("A" = "B", "C" = "") + expect_equal(jk[["d1"]][["d2"]], setNames(c("B", "C"), c("A", "C"))) +}) + +testthat::test_that("[[<-.join_keys passing key unnamed 'empty' value is ignored", { + # set key on empty variable name equal to "" + jk <- join_keys() + testthat::expect_message(jk[["d1"]][["d2"]] <- c("A" = "B", ""), "are ignored") + testthat::expect_equal(jk[["d1"]][["d2"]], c(A = "B")) +}) + +testthat::test_that("[[<-.join_keys fails when provided foreign key pairs for same datasets, but different keys", { + jk <- join_keys() + testthat::expect_error( + jk[["ds1"]] <- list(ds2 = "new", ds2 = "new_but_different"), + "cannot specify multiple different join keys between datasets" + ) +}) + +testthat::test_that("[[<-.join_keys allows when provided foreign key pairs for same datasets and same keys", { + jk <- join_keys() + testthat::expect_silent(jk[["ds1"]] <- list(ds2 = "new", ds2 = c("new" = "new"))) + testthat::expect_equal(jk, join_keys(join_key("ds1", "ds2", "new"))) +}) diff --git a/tests/testthat/test-join_keys-names.R b/tests/testthat/test-join_keys-names.R new file mode 100644 index 000000000..53ca3cf07 --- /dev/null +++ b/tests/testthat/test-join_keys-names.R @@ -0,0 +1,41 @@ +testthat::test_that("names<-.join_keys will replace names at all levels of the join_keys list", { + jk <- join_keys( + join_key("a", "a", "a"), + join_key("a", "b", "ab"), + join_key("a", "c", "ac"), + join_key("d", "b", "db") + ) + + names(jk)[1:2] <- c("x", "y") + + testthat::expect_identical( + jk, + join_keys( + join_key("x", "x", "a"), + join_key("x", "y", "ab"), + join_key("x", "c", "ac"), + join_key("d", "y", "db") + ) + ) +}) + +testthat::test_that("names<-.join_keys will replace names at all levels of the join_keys list when parents set", { + jk <- join_keys( + join_key("a", "a", "a"), + join_key("b", "a", "ba"), + join_key("c", "a", "ca"), + join_key("d", "b", "db") + ) + parents(jk) <- list(b = "a", c = "a", d = "b") + + expected <- join_keys( + join_key("a", "a", "a"), + join_key("B", "a", "ba"), + join_key("c", "a", "ca"), + join_key("d", "B", "db") + ) + parents(expected) <- list(B = "a", c = "a", d = "B") + + names(jk)[2] <- "B" + testthat::expect_identical(jk, expected) +}) diff --git a/tests/testthat/test-join_keys-parents.R b/tests/testthat/test-join_keys-parents.R new file mode 100644 index 000000000..8c8a44502 --- /dev/null +++ b/tests/testthat/test-join_keys-parents.R @@ -0,0 +1,103 @@ +# get parents ----------------------------------------------------------------------------- +testthat::test_that("parents will return empty list when empty/not set", { + jk <- join_keys() + testthat::expect_identical(parents(jk), list()) +}) + +testthat::test_that("parents returns the same list as used in parents<-", { + jk <- join_keys(join_key("a", "b", "ab")) + parents <- list(b = "a") + parents(jk) <- parents + testthat::expect_identical(parents(jk), parents) +}) + +# set parents ---------------------------------------------------------------------------- +testthat::test_that("parents<- accepts a named list containing (non-empty, non-missing) character", { + jk <- join_keys(join_key("a", "b", "test")) + testthat::expect_no_error(parents(jk) <- list(b = "a")) +}) + +testthat::test_that("parents<- single parent can be changed utilizing list functionality with [[<-", { + jk <- join_keys( + join_key("a", "b", "ab"), + join_key("c", "d", "cd") + ) + parents(jk)[["a"]] <- "b" + parents(jk)[["c"]] <- "d" + testthat::expect_identical(parents(jk), list(a = "b", c = "d")) +}) + +testthat::test_that("parents<- dataset can't be own parent", { + jk <- join_keys( + join_key("a", "b", "ab"), + join_key("c", "d", "cd") + ) + testthat::expect_error(parents(jk) <- list(a = "a")) +}) + +testthat::test_that("parents<- setting parent-child relationship fails when no foreign keys between datasets", { + jk <- join_keys( + join_key("a", "1", "aa"), + join_key("b", "b", "bb") + ) + testthat::expect_error(parents(jk) <- list(a = "b")) +}) + +testthat::test_that("parents<- ensures it is a directed acyclical graph (DAG)", { + cyclic_jk <- join_keys( + join_key("a", "b", "id"), + join_key("b", "c", "id"), + join_key("c", "a", "id") + ) + testthat::expect_error( + parents(cyclic_jk) <- list(a = "b", b = "c", c = "a"), + "Cycle detected" + ) +}) + +testthat::test_that("parents<- single parent can be changed utilizing list functionality with [[<-", { + jk <- join_keys( + join_key("a", "b", "ab"), + join_key("c", "d", "cd") + ) + parents(jk)[["a"]] <- "b" + parents(jk)[["c"]] <- "d" + + testthat::expect_identical(parents(jk), list(a = "b", c = "d")) +}) + +testthat::test_that("parents<- fails when value isn't a list (non-empty, non-missing) character", { + jk <- join_keys(join_key("a", "b", "test")) + testthat::expect_error(parents(jk) <- list(b = 1)) + testthat::expect_error(parents(jk) <- list(b = NA_character_)) + testthat::expect_error(parents(jk) <- list(b = NULL)) + testthat::expect_error(parents(jk) <- NULL) +}) + +testthat::test_that("parents<- setting parents again overwrites previous state", { + jk <- join_keys(join_key("a", "b", "test"), join_key("c", "d", "test")) + parents(jk) <- list(a = "b") + parents(jk) <- list(b = "a") + testthat::expect_identical(parents(jk), list(b = "a")) +}) + +testthat::test_that("parents<- sets parent datasets to join_keys kept in teal_data", { + td <- teal_data( + a = data.frame(), + b = data.frame(), + join_keys = join_keys(join_key("a", "b", "test")) + ) + parents(td) <- list(b = "a") + testthat::expect_identical(parents(td), list(b = "a")) +}) + +testthat::test_that("parents<- setting parents changes join_keys object", { + jk <- join_keys(join_key("a", "b", "ab")) + jk2 <- jk + parents <- list(b = "a") + parents(jk) <- parents + + testthat::expect_failure(testthat::expect_identical(jk, jk2)) + # Relaxed comparison also fails + testthat::expect_failure(testthat::expect_equal(jk, jk2)) +}) diff --git a/tests/testthat/test-join_keys-print.R b/tests/testthat/test-join_keys-print.R new file mode 100644 index 000000000..dd3c51e95 --- /dev/null +++ b/tests/testthat/test-join_keys-print.R @@ -0,0 +1,75 @@ +testthat::test_that("format.join_keys for empty set", { + jk <- join_keys() + testthat::expect_identical(format(jk), "An empty join_keys object.") +}) + +testthat::test_that("format.join_keys with empty parents", { + my_keys <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d3", "d3", "c"), + join_key("d2", "d1", "ba"), + join_key("d3", "d2", "ca") + ) + testthat::expect_identical( + format(my_keys), + paste( + "A join_keys object containing foreign keys between 3 datasets:", + "d1: [a]", " <-> d2: [ba]", "d2: [b]", " <-> d1: [ba]", " <-> d3: [ca]", + "d3: [c]", " <-> d2: [ca]", + sep = "\n" + ) + ) +}) + +testthat::test_that("format.join_keys for parents", { + my_keys <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d3", "d3", "c"), + join_key("d2", "d1", "ba"), + join_key("d3", "d2", "ca") + ) + parents(my_keys) <- list("d2" = "d1", "d3" = "d2") + testthat::expect_identical( + format(my_keys), + paste( + "A join_keys object containing foreign keys between 3 datasets:", + "d1: [a]", " <-- d2: [ba]", "d2: [b]", " --> d1: [ba]", " <-- d3: [ca]", + "d3: [c]", " --> d2: [ca]", + sep = "\n" + ) + ) +}) + +testthat::test_that("format.join_keys print inferred keys for children sharing parent", { + my_keys <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d3", "d3", "c"), + join_key("d2", "d1", "child-a"), + join_key("d3", "d1", "child-a") + ) + parents(my_keys) <- list("d2" = "d1", "d3" = "d1") + testthat::expect_identical( + format(my_keys), + paste( + "A join_keys object containing foreign keys between 3 datasets:", + "d1: [a]", " <-- d2: [child-a]", " <-- d3: [child-a]", + "d2: [b]", " --> d1: [child-a]", " --* (implicit via parent with): d3", + "d3: [c]", " --> d1: [child-a]", " --* (implicit via parent with): d2", + sep = "\n" + ) + ) +}) + +testthat::test_that("print.join_keys produces output same as format", { + my_keys <- join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b"), + join_key("d3", "d3", "c"), + join_key("d2", "d1", "ba"), + join_key("d3", "d2", "ca") + ) + testthat::expect_output(print(my_keys), format(my_keys), fixed = TRUE) +}) diff --git a/tests/testthat/test-join_keys.R b/tests/testthat/test-join_keys.R new file mode 100644 index 000000000..2bf831b51 --- /dev/null +++ b/tests/testthat/test-join_keys.R @@ -0,0 +1,166 @@ +# join_keys -------------------------------------------------------------------- +testthat::test_that("join_keys creates empty join_keys object by default", { + testthat::expect_s3_class(join_keys(), "join_keys") +}) + +testthat::test_that("join_keys.join_key creates join_keys", { + testthat::expect_s3_class( + join_keys( + join_key("d1", keys = "test"), + join_key("d2", keys = "test") + ), + c("join_keys", "list") + ) +}) + +testthat::test_that("join_keys is a collection of join_key, ie named list with named list with named char vector", { + key1 <- join_key("d1", keys = "test") + key2 <- join_key("d2", keys = "test") + jk <- join_keys(key1, key2) + + testthat::expect_identical( + jk, + structure(c(key1, key2), class = c("join_keys", "list")) + ) + + testthat::expect_identical( + jk, + structure( + list( + d1 = list(d1 = c(test = "test")), + d2 = list(d2 = c(test = "test")) + ), + class = c("join_keys", "list"), + "__parents__" = list() + ) + ) + + # Relaxed comparison (not ordered and without need of empty attributes) + testthat::expect_equal( + jk, + structure( + list( + d2 = list(d2 = c(test = "test")), + d1 = list(d1 = c(test = "test")) + ), + class = c("join_keys", "list") + ) + ) +}) + +testthat::test_that("join_keys.teal_data returns join_keys object from teal_data", { + obj <- teal_data(join_keys = join_keys(join_key("d1", "d1", "a"))) + testthat::expect_identical(obj@join_keys, join_keys(obj)) +}) + +testthat::test_that("join_keys.join_keys returns itself", { + obj <- join_keys(join_key("d1", "d1", "a")) + testthat::expect_identical(obj, join_keys(obj)) +}) + +testthat::test_that("join_keys accepts duplicated join_key", { + testthat::expect_no_error( + join_keys(join_key("d1", "d2", "a"), join_key("d2", "d1", "a")) + ) +}) + +testthat::test_that("join_keys doesn't accept other objects than teal_data, TealData and join_key", { + testthat::expect_error(join_keys("a")) # todo: add expected error message +}) + +testthat::test_that("join_keys doesn't accept a list which is identical to output of join_key function", { + key <- join_key("a", "b", "test") + testthat::expect_error(join_keys(unclass(key))) +}) + +testthat::test_that("join_keys fails when provided foreign key pairs have incompatible values", { + testthat::expect_error( + join_keys(join_key("d1", "d2", "a"), join_key("d2", "d1", "b")), + "cannot specify multiple different join keys between datasets" + ) + testthat::expect_error( + join_keys(join_key("d1", "d2", c(a = "b")), join_key("d2", "d1", c(a = "b"))), + "cannot specify multiple different join keys between datasets" + ) + + testthat::expect_error( + join_keys( + join_keys( + join_key("q", "b", "d"), + join_key("a", "b", "c") + ), + join_key("a", "q", "e"), + join_key("a", "b", "f") + ), + "cannot specify multiple different join keys between datasets" + ) +}) + +testthat::test_that("join_keys constructor adds symmetric keys on given (unnamed) foreign key", { + my_keys <- join_keys(join_key("d1", "d2", "a")) + testthat::expect_identical( + my_keys, + join_keys(join_key("d1", "d2", "a"), join_key("d2", "d1", "a")) + ) +}) + +testthat::test_that("join_keys constructor adds symmetric keys on given (named) foreign key", { + testthat::expect_identical( + join_keys( + join_key("d1", "d2", c(a = "b")) + ), + join_keys( + join_key("d1", "d2", c(a = "b")), + join_key("d2", "d1", c(b = "a")) + ) + ) +}) + + + +# join_keys.<- ---------------------------------------------------------------- +testthat::test_that("join_keys<-.join_keys overwrites existing join_keys", { + my_keys <- join_keys(join_key("d1", "d1", "a"), join_key("d2", "d2", "b")) + join_keys(my_keys) <- join_keys(join_key("d1", "d1", "test")) + testthat::expect_identical(my_keys, join_keys(join_key("d1", "d1", "test"))) +}) + +testthat::test_that("join_keys<-.teal_data overwrites existing join_keys", { + td <- teal_data( + d1 = data.frame(), d2 = data.frame(), + join_keys = join_keys(join_key("d1", "d1", "a"), join_key("d2", "d2", "b")) + ) + + jk2 <- join_keys(join_key("d1", "d1", "test")) + join_keys(td) <- jk2 + testthat::expect_identical(join_keys(td), jk2) +}) + +testthat::test_that("join_keys<-.teal_data overwrites existing join_keys", { + td <- teal_data( + d1 = data.frame(), d2 = data.frame(), + join_keys = join_keys(join_key("d1", "d1", "a"), join_key("d2", "d2", "b")) + ) + + jk2 <- join_keys(join_key("d1", "d1", "test")) + join_keys(td) <- jk2 + testthat::expect_identical(join_keys(td), jk2) +}) + +testthat::test_that("join_keys()[]<-.join_keys with empty name is changed to the key value", { + jk <- join_keys() + join_keys(jk)[["d1"]][["d2"]] <- c("A" = "B", "C") + expect_equal(jk[["d1"]][["d2"]], c(A = "B", C = "C")) +}) + +testthat::test_that("join_keys()[]<-.join_keys with named empty valued is changed to its name", { + jk <- join_keys() + join_keys(jk)[["d1"]][["d2"]] <- c(A = "B", C = "") + expect_equal(jk[["d1"]][["d2"]], c(A = "B", C = "C")) +}) + +testthat::test_that("join_keys()[]<-.join_keys with empty value in a named vector are ignored ", { + jk <- join_keys() + testthat::expect_message(join_keys(jk)[["d1"]][["d2"]] <- c("A" = "B", ""), "are ignored") + testthat::expect_equal(jk[["d1"]][["d2"]], c(A = "B")) +}) diff --git a/tests/testthat/test-teal_data.R b/tests/testthat/test-teal_data.R index 747d2378a..7fa114620 100644 --- a/tests/testthat/test-teal_data.R +++ b/tests/testthat/test-teal_data.R @@ -144,12 +144,12 @@ testthat::test_that("teal_data sets passed join_keys to datasets correctly", { join_key("df1", "df1", "id"), join_key("df2", "df2", "df2_id") ) - jk_expected$set_parents(list(df1 = character(0), df2 = character(0))) + parents(jk_expected) <- list(df1 = character(0), df2 = character(0)) - testthat::expect_equal(data$get_join_keys(), jk_expected) + testthat::expect_equal(join_keys(data), jk_expected) }) -testthat::test_that("teal_data sets passed JoinKeys to datasets correctly when key names differ", { +testthat::test_that("teal_data sets passed join_keys to datasets correctly when key names differ", { df1 <- data.frame(id = c("A", "B"), a = c(1L, 2L)) df2 <- data.frame(df2_id = c("A", "B"), fk = c("A", "B"), b = c(1L, 2L)) df1 <- dataset("df1", df1, keys = "id") @@ -163,12 +163,12 @@ testthat::test_that("teal_data sets passed JoinKeys to datasets correctly when k join_key("df2", "df1", c(fk = "id")), join_key("df2", "df2", "df2_id") ) - jk_expected$set_parents(list(df1 = character(0), df2 = character(0))) + parents(jk_expected) <- list(df1 = character(0), df2 = character(0)) - testthat::expect_equal(data$get_join_keys(), jk_expected) + testthat::expect_equal(join_keys(data), jk_expected) }) -testthat::test_that("teal_data sets passes JoinKeys to datasets correctly when key names differ (multiple keys)", { +testthat::test_that("teal_data sets passes join_keys to datasets correctly when key names differ (multiple keys)", { df1 <- data.frame(id = c("A", "B"), id2 = c("A", "B"), a = c(1L, 2L)) df2 <- data.frame(df2_id = c("A", "B"), fk = c("A", "B"), fk2 = c("A", "B"), b = c(1L, 2L)) df1 <- dataset("df1", df1, keys = "id") @@ -181,8 +181,8 @@ testthat::test_that("teal_data sets passes JoinKeys to datasets correctly when k join_key("df2", "df2", "df2_id"), join_key("df1", "df2", c(id = "fk", id2 = "fk2")) ) - jk_expected$set_parents(list(df1 = character(0), df2 = character(0))) - testthat::expect_equal(data$get_join_keys(), jk_expected) + parents(jk_expected) <- list(df1 = character(0), df2 = character(0)) + testthat::expect_equal(join_keys(data), jk_expected) }) testthat::test_that("teal_data returns TealData object with cdisc_dataset input", { @@ -263,7 +263,7 @@ testthat::test_that("update_join_keys_to_primary updates the join_keys", { jks <- join_keys(join_key("df1", "df2", "id")) data_objects <- list(df1, df2) - update_join_keys_to_primary(data_objects, jks) + jks <- update_join_keys_to_primary(data_objects, jks) testthat::expect_equal( jks, join_keys( @@ -284,7 +284,7 @@ testthat::test_that("update_join_keys_to_primary updates the join_keys when prim jks <- join_keys(join_key("df1", "df2", "id")) data_objects <- list(df1, df2) - update_join_keys_to_primary(data_objects, jks) + jks <- update_join_keys_to_primary(data_objects, jks) testthat::expect_equal( jks, join_keys( @@ -305,7 +305,7 @@ testthat::test_that("update_join_keys_to_primary updates join_keys with characte jks <- join_keys(join_key("df1", "df2", "id")) data_objects <- list(df1, df2) - update_join_keys_to_primary(data_objects, jks) + jks <- update_join_keys_to_primary(data_objects, jks) testthat::expect_equal( jks, join_keys( diff --git a/vignettes/join-keys.Rmd b/vignettes/join-keys.Rmd index 0ee134bc2..dac2b06c6 100644 --- a/vignettes/join-keys.Rmd +++ b/vignettes/join-keys.Rmd @@ -57,7 +57,7 @@ data <- teal_data( # join_key("D2", "D2", keys = c("V", "W")), # equivalent to using primary_key ) ) -data$get_join_keys() +join_keys(data) ``` ### Merge keys