diff --git a/R/join_keys.R b/R/join_keys.R index bca055f1b..52563c9b8 100644 --- a/R/join_keys.R +++ b/R/join_keys.R @@ -227,8 +227,6 @@ c.join_key_set <- function(...) { #' @param i index specifying elements to extract or replace. Index should be a #' a character vector, but it can also take numeric, logical, `NULL` or missing. #' -#' @param keep_all_foreign_keys (`logical`) flag that keeps foreign keys and other -#' datasets even if they are not a parent of the selected dataset. #' #' @export #' @@ -244,7 +242,7 @@ c.join_key_set <- function(...) { #' jk["ds1"] #' jk[1:2] #' jk[c("ds1", "ds2")] -`[.join_keys` <- function(x, i, keep_all_foreign_keys = FALSE) { +`[.join_keys` <- function(x, i, j) { if (missing(i)) { return(x) } @@ -253,13 +251,31 @@ c.join_key_set <- function(...) { return(new_join_keys()) # replicate base R } + if (!missing(j)) { + checkmate::assert( + combine = "or", + checkmate::check_string(i), + checkmate::check_integerish(i, len = 1), + checkmate::check_logical(i, len = length(x)) + ) + checkmate::assert( + combine = "or", + checkmate::check_string(j), + checkmate::check_integerish(j, len = 1), + checkmate::check_logical(j, len = length(x)) + ) + + subset_x <- update_keys_given_parents(x[union(i, j)]) + return(subset_x[[i]][[j]]) + } + checkmate::assert( combine = "or", - checkmate::check_integerish(i), - checkmate::check_logical(i), - checkmate::check_character(i) + checkmate::check_character(i, max.len = length(x)), + checkmate::check_integerish(i, max.len = length(x)), + checkmate::check_logical(i, len = length(x)) ) - checkmate::assert_logical(keep_all_foreign_keys, len = 1) + # Convert integer/logical index to named index if (checkmate::test_integerish(i) || checkmate::test_logical(i)) { @@ -286,9 +302,6 @@ c.join_key_set <- function(...) { } ix_valid_names <- names(x[[ix]]) %in% c(queue, bin) - if (keep_all_foreign_keys) { - ix_valid_names <- rep(TRUE, length(names(x[[ix]]))) - } new_jk[[ix]] <- x[[ix]][ix_valid_names] @@ -312,8 +325,16 @@ c.join_key_set <- function(...) { #' - `[<-` is not a supported operation for `join_keys`. #' #' @export -`[<-.join_keys` <- function(x, i, value) { - stop("Can't use `[<-` for object `join_keys`. Use [[<- instead.") +`[<-.join_keys` <- function(x, i, j, value) { + if (missing(j)) { + stop("Can't use `[<-` for object `join_keys` with only i. Use [[<- instead.") + } + + checkmate::assert_string(i) + checkmate::assert_string(j) + + x[[i]][[j]] <- value + x } #' @rdname join_keys @@ -341,7 +362,7 @@ c.join_key_set <- function(...) { combine = "or", checkmate::check_string(i), checkmate::check_integerish(i, len = 1), - checkmate::check_logical(i, len = 1) + checkmate::check_logical(i, len = length(x)) ) checkmate::assert_list(value, names = "named", types = "character", null.ok = TRUE) if (checkmate::test_integerish(i) || checkmate::test_logical(i)) { @@ -436,7 +457,6 @@ length.join_keys <- function(x) { sum(vapply(x, function(.x) length(.x) > 0, logical(1))) } - #' @rdname join_keys #' @export format.join_keys <- function(x, ...) { @@ -445,7 +465,7 @@ format.join_keys <- function(x, ...) { my_parents <- parents(x) names_sorted <- topological_sort(my_parents) names <- union(names_sorted, names(x)) - + x_implicit <- update_keys_given_parents(x) out <- lapply(names, function(i) { this_parent <- my_parents[[i]] out_i <- lapply(union(i, names(x[[i]])), function(j) { @@ -466,6 +486,18 @@ format.join_keys <- function(x, ...) { if (length(keys) == 0) "no primary keys" else toString(keys) ) }) + + implicit_datasets <- setdiff(names(x_implicit[[i]]), names(x[[i]])) + if (length(implicit_datasets) > 0) { + out_i <- c( + out_i, + paste0( + " --* (implicit via parent with): ", + paste(implicit_datasets, collapse = ", ") + ) + ) + } + paste(out_i, collapse = "\n") }) paste( diff --git a/R/parents.R b/R/parents.R index b68b3a02e..d7ad1bafe 100644 --- a/R/parents.R +++ b/R/parents.R @@ -110,6 +110,7 @@ parents.teal_data <- function(x) { #' ADTTE = teal.data::rADTTE, #' ADRS = teal.data::rADRS #' ) +#' #' parents(td) <- list("ADTTE" = "ADSL") # replace existing #' parents(td)["ADRS"] <- "ADSL" # add new parent `parents<-.teal_data` <- function(x, value) { @@ -133,7 +134,7 @@ update_keys_given_parents <- function(x) { duplicate_pairs <- list() for (d1 in datanames) { d1_pk <- jk[[d1]][[d1]] - d1_parent <- parents(jk)[[d1]] + d1_parent <- parent(jk, d1) for (d2 in datanames) { if (paste(d2, d1) %in% duplicate_pairs) { next @@ -149,8 +150,22 @@ update_keys_given_parents <- function(x) { # second is parent of first -> parent keys -> second keys d2_pk } else if (identical(d1_parent, d2_parent) && length(d1_parent) > 0) { - # both has the same parent -> parent keys - jk[[d1_parent]][[d1_parent]] + # both has the same parent -> common keys to parent + keys_d1_parent <- sort(jk[[d1]][[d1_parent]]) + keys_d2_parent <- sort(jk[[d2]][[d2_parent]]) + + common_ix_1 <- unname(keys_d1_parent) %in% unname(keys_d2_parent) + common_ix_2 <- unname(keys_d2_parent) %in% unname(keys_d1_parent) + + if (all(!common_ix_1)) { + # No common keys between datasets - leave empty + next + } + + structure( + names(keys_d2_parent)[common_ix_2], + names = names(keys_d1_parent)[common_ix_1] + ) } else { # cant find connection - leave empty next diff --git a/man/join_keys.Rd b/man/join_keys.Rd index 8e43beddc..85e9cd4e2 100644 --- a/man/join_keys.Rd +++ b/man/join_keys.Rd @@ -40,9 +40,9 @@ join_keys(x) <- value \method{c}{join_key_set}(...) -\method{[}{join_keys}(x, i, keep_all_foreign_keys = FALSE) +\method{[}{join_keys}(x, i, j) -\method{[}{join_keys}(x, i) <- value +\method{[}{join_keys}(x, i, j) <- value \method{format}{join_keys}(x, ...) @@ -62,9 +62,6 @@ a character vector, but it can also take numeric, logical, \code{NULL} or missin \item{value}{(\code{join_key_set} or list of \code{join_key_set}) relationship pairs to add to \code{join_keys} list.} - -\item{keep_all_foreign_keys}{(\code{logical}) flag that keeps foreign keys and other -datasets even if they are not a parent of the selected dataset.} } \value{ \code{join_keys} object. diff --git a/man/parents.Rd b/man/parents.Rd index 54f75cf23..89e4cf7bd 100644 --- a/man/parents.Rd +++ b/man/parents.Rd @@ -57,6 +57,7 @@ td <- cdisc_data( ADTTE = teal.data::rADTTE, ADRS = teal.data::rADRS ) + parents(td) <- list("ADTTE" = "ADSL") # replace existing parents(td)["ADRS"] <- "ADSL" # add new parent } diff --git a/tests/testthat/test-join_keys.R b/tests/testthat/test-join_keys.R index 5a99a1020..6b9c9733c 100644 --- a/tests/testthat/test-join_keys.R +++ b/tests/testthat/test-join_keys.R @@ -110,7 +110,7 @@ testthat::test_that("[[.join_keys returns keys for given pair", { join_key("b", "a", "child-parent"), join_key("c", "a", "child-parent") ) - testthat::expect_identical(my_keys[["b"]][["a"]], c(`child-parent` = "child-parent")) + testthat::expect_identical(my_keys["b", "a"], c(`child-parent` = "child-parent")) }) testthat::test_that("[[.join_keys doesn't return keys for given a pair without explicit join_key", { @@ -124,18 +124,33 @@ testthat::test_that("[[.join_keys doesn't return keys for given a pair without e testthat::expect_null(my_keys[["b"]][["c"]]) }) -testthat::test_that("[[.join_keys infer keys between child by shared foreign keys to parent ", { +testthat::test_that("[[.join_keys infer keys between child by equal (unordered) foreign keys to parent", { my_keys <- join_keys( join_key("a", "a", "aa"), join_key("b", "b", "bb"), join_key("c", "c", "cc"), - join_key("b", "a", "child-parent"), - join_key("c", "a", "child-parent") + join_key("b", "a", sample(letters[1:5])), + join_key("c", "a", sample(letters[1:5])) ) parents(my_keys) <- list("b" = "a", "c" = "a") - testthat::expect_identical(my_keys[["b"]][["c"]], c(`child-parent` = "child-parent")) + testthat::expect_identical(my_keys["b", "c"], setNames(letters[1:5], letters[1:5])) }) +testthat::test_that( + "[[.join_keys infer keys between child by shared foreign keys to parent (key names are unique to datasets)", + { + my_keys <- join_keys( + join_key("a", "a", "aa"), + join_key("b", "b", "bb"), + join_key("c", "c", "cc"), + join_key("b", "a", c(aa = "bb")), + join_key("c", "a", c(aa = "cc")) + ) + parents(my_keys) <- list("b" = "a", "c" = "a") + testthat::expect_identical(my_keys["b", "c"], c(bb = "cc")) + } +) + # [.join_keys ----------------------------------------------------------------- testthat::test_that("[.join_keys returns join_keys object when i is missing", { my_keys <- join_keys( @@ -170,23 +185,6 @@ testthat::test_that("[.join_keys returns join_keys object with keys for given in ) }) -testthat::test_that("[.join_keys returns join_keys for given dataset including those connected with foreign keys", { - my_keys <- join_keys( - join_key("d1", "d1", "a"), - join_key("d2", "d2", "b"), - join_key("d3", "d3", "c"), - join_key("d2", "d1", "ab"), - join_key("d3", "d1", "ac") - ) - testthat::expect_identical( - my_keys["d2", keep_all_foreign_keys = TRUE], - join_keys( - join_key("d2", "d2", "b"), - join_key("d2", "d1", "ab") - ) - ) -}) - testthat::test_that("[.join_keys returns join_keys object for given dataset including its parent", { my_keys <- join_keys( join_key("d1", "d1", "a"),