Skip to content

Commit

Permalink
Merge branch 'WIP_join_keys_single_bracket' into 78_simplify_joinkeys…
Browse files Browse the repository at this point in the history
…@main
  • Loading branch information
gogonzo committed Nov 17, 2023
2 parents 3393eda + 92093a4 commit cd1e29b
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 45 deletions.
62 changes: 47 additions & 15 deletions R/join_keys.R
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,6 @@ c.join_key_set <- function(...) {
#' @param i index specifying elements to extract or replace. Index should be a
#' a character vector, but it can also take numeric, logical, `NULL` or missing.
#'
#' @param keep_all_foreign_keys (`logical`) flag that keeps foreign keys and other
#' datasets even if they are not a parent of the selected dataset.
#'
#' @export
#'
Expand All @@ -244,7 +242,7 @@ c.join_key_set <- function(...) {
#' jk["ds1"]
#' jk[1:2]
#' jk[c("ds1", "ds2")]
`[.join_keys` <- function(x, i, keep_all_foreign_keys = FALSE) {
`[.join_keys` <- function(x, i, j) {
if (missing(i)) {
return(x)
}
Expand All @@ -253,13 +251,31 @@ c.join_key_set <- function(...) {
return(new_join_keys()) # replicate base R
}

if (!missing(j)) {
checkmate::assert(
combine = "or",
checkmate::check_string(i),
checkmate::check_integerish(i, len = 1),
checkmate::check_logical(i, len = length(x))
)
checkmate::assert(
combine = "or",
checkmate::check_string(j),
checkmate::check_integerish(j, len = 1),
checkmate::check_logical(j, len = length(x))
)

subset_x <- update_keys_given_parents(x[union(i, j)])
return(subset_x[[i]][[j]])
}

checkmate::assert(
combine = "or",
checkmate::check_integerish(i),
checkmate::check_logical(i),
checkmate::check_character(i)
checkmate::check_character(i, max.len = length(x)),
checkmate::check_integerish(i, max.len = length(x)),
checkmate::check_logical(i, len = length(x))
)
checkmate::assert_logical(keep_all_foreign_keys, len = 1)


# Convert integer/logical index to named index
if (checkmate::test_integerish(i) || checkmate::test_logical(i)) {
Expand All @@ -286,9 +302,6 @@ c.join_key_set <- function(...) {
}

ix_valid_names <- names(x[[ix]]) %in% c(queue, bin)
if (keep_all_foreign_keys) {
ix_valid_names <- rep(TRUE, length(names(x[[ix]])))
}

new_jk[[ix]] <- x[[ix]][ix_valid_names]

Expand All @@ -312,8 +325,16 @@ c.join_key_set <- function(...) {
#' - `[<-` is not a supported operation for `join_keys`.
#'
#' @export
`[<-.join_keys` <- function(x, i, value) {
stop("Can't use `[<-` for object `join_keys`. Use [[<- instead.")
`[<-.join_keys` <- function(x, i, j, value) {
if (missing(j)) {
stop("Can't use `[<-` for object `join_keys` with only i. Use [[<- instead.")
}

checkmate::assert_string(i)
checkmate::assert_string(j)

x[[i]][[j]] <- value
x
}

#' @rdname join_keys
Expand Down Expand Up @@ -341,7 +362,7 @@ c.join_key_set <- function(...) {
combine = "or",
checkmate::check_string(i),
checkmate::check_integerish(i, len = 1),
checkmate::check_logical(i, len = 1)
checkmate::check_logical(i, len = length(x))
)
checkmate::assert_list(value, names = "named", types = "character", null.ok = TRUE)
if (checkmate::test_integerish(i) || checkmate::test_logical(i)) {
Expand Down Expand Up @@ -436,7 +457,6 @@ length.join_keys <- function(x) {
sum(vapply(x, function(.x) length(.x) > 0, logical(1)))
}


#' @rdname join_keys
#' @export
format.join_keys <- function(x, ...) {
Expand All @@ -445,7 +465,7 @@ format.join_keys <- function(x, ...) {
my_parents <- parents(x)
names_sorted <- topological_sort(my_parents)
names <- union(names_sorted, names(x))

x_implicit <- update_keys_given_parents(x)
out <- lapply(names, function(i) {
this_parent <- my_parents[[i]]
out_i <- lapply(union(i, names(x[[i]])), function(j) {
Expand All @@ -466,6 +486,18 @@ format.join_keys <- function(x, ...) {
if (length(keys) == 0) "no primary keys" else toString(keys)
)
})

implicit_datasets <- setdiff(names(x_implicit[[i]]), names(x[[i]]))
if (length(implicit_datasets) > 0) {
out_i <- c(
out_i,
paste0(
" --* (implicit via parent with): ",
paste(implicit_datasets, collapse = ", ")
)
)
}

paste(out_i, collapse = "\n")
})
paste(
Expand Down
21 changes: 18 additions & 3 deletions R/parents.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ parents.teal_data <- function(x) {
#' ADTTE = teal.data::rADTTE,
#' ADRS = teal.data::rADRS
#' )
#'
#' parents(td) <- list("ADTTE" = "ADSL") # replace existing
#' parents(td)["ADRS"] <- "ADSL" # add new parent
`parents<-.teal_data` <- function(x, value) {
Expand All @@ -133,7 +134,7 @@ update_keys_given_parents <- function(x) {
duplicate_pairs <- list()
for (d1 in datanames) {
d1_pk <- jk[[d1]][[d1]]
d1_parent <- parents(jk)[[d1]]
d1_parent <- parent(jk, d1)
for (d2 in datanames) {
if (paste(d2, d1) %in% duplicate_pairs) {
next
Expand All @@ -149,8 +150,22 @@ update_keys_given_parents <- function(x) {
# second is parent of first -> parent keys -> second keys
d2_pk
} else if (identical(d1_parent, d2_parent) && length(d1_parent) > 0) {
# both has the same parent -> parent keys
jk[[d1_parent]][[d1_parent]]
# both has the same parent -> common keys to parent
keys_d1_parent <- sort(jk[[d1]][[d1_parent]])
keys_d2_parent <- sort(jk[[d2]][[d2_parent]])

common_ix_1 <- unname(keys_d1_parent) %in% unname(keys_d2_parent)
common_ix_2 <- unname(keys_d2_parent) %in% unname(keys_d1_parent)

if (all(!common_ix_1)) {
# No common keys between datasets - leave empty
next
}

structure(
names(keys_d2_parent)[common_ix_2],
names = names(keys_d1_parent)[common_ix_1]
)
} else {
# cant find connection - leave empty
next
Expand Down
7 changes: 2 additions & 5 deletions man/join_keys.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/parents.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 20 additions & 22 deletions tests/testthat/test-join_keys.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ testthat::test_that("[[.join_keys returns keys for given pair", {
join_key("b", "a", "child-parent"),
join_key("c", "a", "child-parent")
)
testthat::expect_identical(my_keys[["b"]][["a"]], c(`child-parent` = "child-parent"))
testthat::expect_identical(my_keys["b", "a"], c(`child-parent` = "child-parent"))
})

testthat::test_that("[[.join_keys doesn't return keys for given a pair without explicit join_key", {
Expand All @@ -124,18 +124,33 @@ testthat::test_that("[[.join_keys doesn't return keys for given a pair without e
testthat::expect_null(my_keys[["b"]][["c"]])
})

testthat::test_that("[[.join_keys infer keys between child by shared foreign keys to parent ", {
testthat::test_that("[[.join_keys infer keys between child by equal (unordered) foreign keys to parent", {
my_keys <- join_keys(
join_key("a", "a", "aa"),
join_key("b", "b", "bb"),
join_key("c", "c", "cc"),
join_key("b", "a", "child-parent"),
join_key("c", "a", "child-parent")
join_key("b", "a", sample(letters[1:5])),
join_key("c", "a", sample(letters[1:5]))
)
parents(my_keys) <- list("b" = "a", "c" = "a")
testthat::expect_identical(my_keys[["b"]][["c"]], c(`child-parent` = "child-parent"))
testthat::expect_identical(my_keys["b", "c"], setNames(letters[1:5], letters[1:5]))
})

testthat::test_that(
"[[.join_keys infer keys between child by shared foreign keys to parent (key names are unique to datasets)",
{
my_keys <- join_keys(
join_key("a", "a", "aa"),
join_key("b", "b", "bb"),
join_key("c", "c", "cc"),
join_key("b", "a", c(aa = "bb")),
join_key("c", "a", c(aa = "cc"))
)
parents(my_keys) <- list("b" = "a", "c" = "a")
testthat::expect_identical(my_keys["b", "c"], c(bb = "cc"))
}
)

# [.join_keys -----------------------------------------------------------------
testthat::test_that("[.join_keys returns join_keys object when i is missing", {
my_keys <- join_keys(
Expand Down Expand Up @@ -170,23 +185,6 @@ testthat::test_that("[.join_keys returns join_keys object with keys for given in
)
})

testthat::test_that("[.join_keys returns join_keys for given dataset including those connected with foreign keys", {
my_keys <- join_keys(
join_key("d1", "d1", "a"),
join_key("d2", "d2", "b"),
join_key("d3", "d3", "c"),
join_key("d2", "d1", "ab"),
join_key("d3", "d1", "ac")
)
testthat::expect_identical(
my_keys["d2", keep_all_foreign_keys = TRUE],
join_keys(
join_key("d2", "d2", "b"),
join_key("d2", "d1", "ab")
)
)
})

testthat::test_that("[.join_keys returns join_keys object for given dataset including its parent", {
my_keys <- join_keys(
join_key("d1", "d1", "a"),
Expand Down

0 comments on commit cd1e29b

Please sign in to comment.