From d9d72d59a1263d1e1083456b5c7afd9493a557e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Thu, 19 Oct 2023 14:33:07 +0200 Subject: [PATCH 01/19] feat: initial support for simplification of join_key function --- NAMESPACE | 1 + NEWS.md | 4 ++++ R/JoinKeys.R | 28 +++++++++++++++++++++++++--- man/join_key.Rd | 16 ++++++++++++++-- tests/testthat/test-JoinKeys.R | 16 +++++++++++++++- vignettes/join-keys.Rmd | 28 ++++++++++++++++++++++++---- 6 files changed, 83 insertions(+), 10 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 4056b8baf..aa817d407 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -106,6 +106,7 @@ export(mae_dataset) export(mutate_data) export(mutate_dataset) export(mutate_join_keys) +export(primary_key) export(python_cdisc_dataset_connector) export(python_code) export(python_dataset_connector) diff --git a/NEWS.md b/NEWS.md index b960cb0ba..4a1b0db1f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,13 @@ # teal.data 0.3.0.9007 +### Enhancements +* Simplified `join_key` to better support primary keys. Added `primary_key` as a wrapper to `join_key`. + ### Breaking changes * Introduced new data class (`teal_data`) which replaces deprecated `TealData`. New data class becomes a standard input for whole `teal` framework. * Deprecated `teal_data` constructor when `TealDataset`, `TealDatasetConnector` or `TealDataConnector` objects are provided. New delayed data loading functions introduced in `teal` package. +======= ### Miscellaneous * Specified minimal version of package dependencies. diff --git a/R/JoinKeys.R b/R/JoinKeys.R index 241eaa1e9..2ca411ec2 100644 --- a/R/JoinKeys.R +++ b/R/JoinKeys.R @@ -425,12 +425,16 @@ mutate_join_keys.TealData <- function(x, dataset_1, dataset_2, val) { # nolint x$mutate_join_keys(dataset_1, dataset_2, val) } - #' Create a relationship between a pair of datasets #' #' @description `r lifecycle::badge("stable")` #' +#' @details `join_key()` will create a relationship for the variables on a pair +#' of datasets. +#' #' @inheritParams mutate_join_keys +#' @param dataset_2 (`character`) other dataset name. In case it is omitted, then it +#' will create a primary key for `dataset_1`. #' @param keys (optionally named `character`) where `names(keys)` are columns in `dataset_1` #' with relationship to columns of `dataset_2` given by the elements in `keys`. #' If `names(keys)` is `NULL` then the same column names are used for both `dataset_1` @@ -441,11 +445,13 @@ mutate_join_keys.TealData <- function(x, dataset_1, dataset_2, val) { # nolint #' @seealso [join_keys()] #' #' @export -join_key <- function(dataset_1, dataset_2, keys) { +join_key <- function(dataset_1, dataset_2 = NULL, keys) { checkmate::assert_string(dataset_1) - checkmate::assert_string(dataset_2) + checkmate::assert_string(dataset_2, null.ok = TRUE) checkmate::assert_character(keys, any.missing = FALSE) + dataset_2 <- rlang::`%||%`(dataset_2, dataset_1) + if (length(keys) > 0) { if (is.null(names(keys))) { names(keys) <- keys @@ -473,3 +479,19 @@ join_key <- function(dataset_1, dataset_2, keys) { class = "JoinKeySet" ) } + +#' @rdname join_key +#' @export +#' +#' @details +#' `primary_key()` will create a primary key for a dataset. It is equivalent to +#' `join_key(...)` and omitting `dataset_2` argument or giving it the same name +#' `dataset_1`. +#' +primary_key <- function(dataset_1, keys) { + if (checkmate::test_character(keys) && + !checkmate::test_names(names(keys), type = "unnamed")) { + stop("Primary keys must match exactly: keys = c('A' = 'B') are not allowed") + } + join_key(dataset_1 = dataset_1, dataset_2 = dataset_1, keys = keys) +} diff --git a/man/join_key.Rd b/man/join_key.Rd index 89d073870..b9e23b416 100644 --- a/man/join_key.Rd +++ b/man/join_key.Rd @@ -2,14 +2,18 @@ % Please edit documentation in R/JoinKeys.R \name{join_key} \alias{join_key} +\alias{primary_key} \title{Create a relationship between a pair of datasets} \usage{ -join_key(dataset_1, dataset_2, keys) +join_key(dataset_1, dataset_2 = NULL, keys) + +primary_key(dataset_1, keys) } \arguments{ \item{dataset_1}{(\code{character}) one dataset name} -\item{dataset_2}{(\code{character}) other dataset name} +\item{dataset_2}{(\code{character}) other dataset name. In case it is omitted, then it +will create a primary key for \code{dataset_1}.} \item{keys}{(optionally named \code{character}) where \code{names(keys)} are columns in \code{dataset_1} with relationship to columns of \code{dataset_2} given by the elements in \code{keys}. @@ -22,6 +26,14 @@ object of class \code{JoinKeySet} to be passed into \code{join_keys} function. \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} } +\details{ +\code{join_key()} will create a relationship for the variables on a pair +of datasets. + +\code{primary_key()} will create a primary key for a dataset. It is equivalent to +\code{join_key(...)} and omitting \code{dataset_2} argument or giving it the same name +\code{dataset_1}. +} \seealso{ \code{\link[=join_keys]{join_keys()}} } diff --git a/tests/testthat/test-JoinKeys.R b/tests/testthat/test-JoinKeys.R index f3e9c80c4..cc4bd8ddd 100644 --- a/tests/testthat/test-JoinKeys.R +++ b/tests/testthat/test-JoinKeys.R @@ -15,6 +15,7 @@ test_that("join_key throws error with invalid keys arguments", { # names(keys)!= keys if datasets are the same expect_error(join_key("d1", "d1", keys = c("B" = "A", "A" = "B"))) + expect_error(join_key("d1", keys = c("B" = "A", "A" = "B"))) }) test_that("key empty name is changed to the key value", { @@ -42,7 +43,6 @@ test_that("join_key throws error with invalid dataset arguments", { expect_error(join_key("d1", c("d1", "d2"), keys = c("A" = "B", "C" = "D"))) }) - test_that("join_key does not throw error with valid arguments", { # keys of length 0 expect_silent(join_key("d1", "d2", keys = character(0))) @@ -52,8 +52,22 @@ test_that("join_key does not throw error with valid arguments", { expect_silent(join_key("d1", "d2", keys = c("A" = "B", "C" = "D"))) # dataset_1 and dataset_2 can be the same if keys match expect_silent(join_key("d1", "d1", keys = c("A" = "A", "B" = "B"))) + + expect_silent(join_key("d1", keys = c("A" = "A", "B" = "B"))) +}) + +test_that("primary_key does not allow named keys", { + expect_error(primary_key("d2", keys = c("A" = "B"))) }) +test_that("primary_key does not throw error with valid arguments", { + # keys of length 0 + expect_silent(primary_key("d1", keys = character(0))) + # keys of length 1 + expect_silent(primary_key("d2", keys = c("A"))) + # keys of length > 1 + expect_silent(primary_key("d1", keys = c("A", "B", "C", "D"))) +}) test_that("cannot set join_keys with incompatible keys", { # different keys diff --git a/vignettes/join-keys.Rmd b/vignettes/join-keys.Rmd index 8972efb4b..71aeb6e35 100644 --- a/vignettes/join-keys.Rmd +++ b/vignettes/join-keys.Rmd @@ -9,6 +9,7 @@ vignette: > --- ## Overview + `teal.data` gives the possibility to define individual keys per dataset and to define the relations to other datasets. Each table can have a set of keys that inform its structure and relation to other tables by specifying: @@ -16,14 +17,12 @@ Each table can have a set of keys that inform its structure and relation to othe - columns consisting the primary key - merge keys, a concept similar to `SQL`'s foreign key. -Usually, an application developer needs to specify the keys manually, but in case of datasets named according -to the `ADaM` standard, `teal` can assign the keys on its own. +Usually, an application developer needs to specify the keys manually, but in case of datasets named according to the `ADaM` standard, `teal` can assign the keys on its own. Refer to `vignette("including-adam-data-in-teal", package = "teal")` for more information. ### Primary key -Using the `keys` argument to the `dataset` function (or for `DDL` a `dataset_connector`), we can specify the column(s) -of the dataset that (together) uniquely identify rows in the dataset. +Using the `keys` argument to the `dataset` function (or for `DDL` a `dataset_connector`), we can specify the column(s) of the dataset that (together) uniquely identify rows in the dataset. ```{r, message=FALSE} library(teal.data) @@ -41,6 +40,27 @@ ds <- dataset( ds$get_keys() ``` +Alternatively, the primary keys can be defined in the `join_keys` parameter by using the `primary_key` or `join_key` functions _(note: when using `join_key` to define a primary key the second dataset name needs to be omitted or have the same value as the first)_. + +```{r, message=FALSE} +library(teal.data) + +data_1 <- data.frame(X = factor(1:10), Y = 21:30, Z = 1:10) +data_2 <- data.frame(W = factor(10:1), V = factor(5:14), M = rep(1:5, 2)) + +data <- teal_data( + dataset("D1", data_1, code = "D1 <- data.frame(X = factor(1:10), Y = 21:30, Z = 1:10)"), + dataset("D2", data_2, code = "D2 <- data.frame(W = factor(10:1), V = factor(5:14), M = rep(1:5, 2))"), + join_keys = join_keys( + primary_key("D1", c("X")), + primary_key("D2", c("V", "W")) + # join_key("D1", c("X")) # equivalent to using primary_key + # join_key("D2", "D2", c("V", "W")), # equivalent to using primary_key + ) +) +data$get_join_keys() +``` + ### Merge keys When passing multiple datasets to the `cdisc_data` function, dataset relationship are set using From df1f573b3e1959dffb02c72fd278a19e1bc6e25f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Thu, 19 Oct 2023 15:19:40 +0200 Subject: [PATCH 02/19] docs: improve on error message --- R/JoinKeys.R | 6 +-- tests/testthat/test-JoinKeys.R | 96 ++++++++++++++++++---------------- 2 files changed, 53 insertions(+), 49 deletions(-) diff --git a/R/JoinKeys.R b/R/JoinKeys.R index 2ca411ec2..b7c29c66f 100644 --- a/R/JoinKeys.R +++ b/R/JoinKeys.R @@ -302,7 +302,7 @@ JoinKeys <- R6::R6Class( # nolint } if (xor(length(join_key_1$keys) == 0, length(join_key_2$keys) == 0) || - !identical(sort(join_key_1$keys), sort(setNames(names(join_key_2$keys), join_key_2$keys)))) { + !identical(sort(join_key_1$keys), sort(setNames(names(join_key_2$keys), join_key_2$keys)))) { error_message(join_key_1$dataset_1, join_key_1$dataset_2) } } @@ -490,8 +490,8 @@ join_key <- function(dataset_1, dataset_2 = NULL, keys) { #' primary_key <- function(dataset_1, keys) { if (checkmate::test_character(keys) && - !checkmate::test_names(names(keys), type = "unnamed")) { - stop("Primary keys must match exactly: keys = c('A' = 'B') are not allowed") + !checkmate::test_names(names(keys), type = "unnamed")) { + stop("Primary keys parameter must be a unamed character vector: keys = c('A' = 'A') are not allowed") } join_key(dataset_1 = dataset_1, dataset_2 = dataset_1, keys = keys) } diff --git a/tests/testthat/test-JoinKeys.R b/tests/testthat/test-JoinKeys.R index cc4bd8ddd..36bfd93a1 100644 --- a/tests/testthat/test-JoinKeys.R +++ b/tests/testthat/test-JoinKeys.R @@ -271,33 +271,35 @@ testthat::test_that("JoinKeys$split method returns a named list of JoinKeys obje testthat::expect_equal(names(res$Y$get()), c("Y", "Z")) }) -testthat::test_that("JoinKeys$split method returns an updated list after - the state of the object is modified by JoinKeys$mutate()", { - x <- JoinKeys$new() - x$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) +testthat::test_that( + "JoinKeys$split method returns an updated list after the state of the object is modified by JoinKeys$mutate()", + { + x <- JoinKeys$new() + x$set( + list( + join_key("A", "B", c("a" = "b")), + join_key("A", "C", c("a" = "c", "aa" = "cc")), + join_key("Z", "Y", c("z" = "y")) + ) ) - ) - res <- x$split() + res <- x$split() - x$mutate("A", "B", c("a" = "b", "aa" = "bb")) - res2 <- x$split() + x$mutate("A", "B", c("a" = "b", "aa" = "bb")) + res2 <- x$split() - testthat::expect_false(identical(res, res2)) - testthat::expect_identical(res2$A$get()$A$B, c("a" = "b", "aa" = "bb")) + testthat::expect_false(identical(res, res2)) + testthat::expect_identical(res2$A$get()$A$B, c("a" = "b", "aa" = "bb")) - # adding new datasets - x$mutate("D", "G", c("d" = "g")) - res3 <- x$split() - testthat::expect_false(identical(res, res3)) - testthat::expect_false(identical(res2, res3)) - testthat::expect_identical(res3$D$get()$D$G, c("d" = "g")) - testthat::expect_identical(res3$D$get()$G$D, c("g" = "d")) - testthat::expect_identical(names(res3$D$get()), c("D", "G")) -}) + # adding new datasets + x$mutate("D", "G", c("d" = "g")) + res3 <- x$split() + testthat::expect_false(identical(res, res3)) + testthat::expect_false(identical(res2, res3)) + testthat::expect_identical(res3$D$get()$D$G, c("d" = "g")) + testthat::expect_identical(res3$D$get()$G$D, c("g" = "d")) + testthat::expect_identical(names(res3$D$get()), c("D", "G")) + } +) testthat::test_that("JoinKeys$split method does not modify self", { x <- JoinKeys$new() @@ -361,35 +363,37 @@ testthat::test_that("JoinKeys$merge can handle edge case: argument is a list of testthat::expect_identical(previous_output, y$get()) }) -testthat::test_that("JoinKeys$merge throws error when improper argument is - passed in without modifying the caller", { - y <- JoinKeys$new() - y$set( - list( - join_key("A", "B", c("a" = "b")), - join_key("A", "C", c("a" = "c", "aa" = "cc")), - join_key("Z", "Y", c("z" = "y")) +testthat::test_that( + "JoinKeys$merge throws error when improper argument is passed in without modifying the caller", + { + y <- JoinKeys$new() + y$set( + list( + join_key("A", "B", c("a" = "b")), + join_key("A", "C", c("a" = "c", "aa" = "cc")), + join_key("Z", "Y", c("z" = "y")) + ) ) - ) - previous_output <- y$get() - testthat::expect_error(y$merge()) - testthat::expect_identical(previous_output, y$get()) + previous_output <- y$get() + testthat::expect_error(y$merge()) + testthat::expect_identical(previous_output, y$get()) - testthat::expect_error(y$merge(1)) - testthat::expect_identical(previous_output, y$get()) + testthat::expect_error(y$merge(1)) + testthat::expect_identical(previous_output, y$get()) - testthat::expect_error(y$merge("A")) - testthat::expect_identical(previous_output, y$get()) + testthat::expect_error(y$merge("A")) + testthat::expect_identical(previous_output, y$get()) - testthat::expect_error(y$merge(list())) - testthat::expect_identical(previous_output, y$get()) + testthat::expect_error(y$merge(list())) + testthat::expect_identical(previous_output, y$get()) - testthat::expect_error(y$merge(list(1))) - testthat::expect_identical(previous_output, y$get()) + testthat::expect_error(y$merge(list(1))) + testthat::expect_identical(previous_output, y$get()) - testthat::expect_error(y$merge(list("A"))) - testthat::expect_identical(previous_output, y$get()) -}) + testthat::expect_error(y$merge(list("A"))) + testthat::expect_identical(previous_output, y$get()) + } +) testthat::test_that("JoinKeys$merge does nothing when argument is a JoinKeys object with identical data", { x <- JoinKeys$new() From 4ed8daaf8e8c786f892a954e9b548b3c1e8b68b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Fri, 20 Oct 2023 14:16:32 +0200 Subject: [PATCH 03/19] PoC for join_keys that extrapolates cdisc from names --- NAMESPACE | 1 + R/JoinKeys.R | 49 ++++++++++++++++++++++++++++++++++++++++++++--- R/cdisc_data.R | 2 +- R/teal_data.R | 2 +- man/cdisc_data.Rd | 7 ++++++- man/join_keys.Rd | 9 +++++++++ man/teal_data.Rd | 2 +- 7 files changed, 65 insertions(+), 7 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index aa817d407..e7990db20 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -100,6 +100,7 @@ export(get_raw_data) export(is_pulled) export(join_key) export(join_keys) +export(join_keys_cdisc) export(load_dataset) export(load_datasets) export(mae_dataset) diff --git a/R/JoinKeys.R b/R/JoinKeys.R index b7c29c66f..e8a4d8e26 100644 --- a/R/JoinKeys.R +++ b/R/JoinKeys.R @@ -302,7 +302,7 @@ JoinKeys <- R6::R6Class( # nolint } if (xor(length(join_key_1$keys) == 0, length(join_key_2$keys) == 0) || - !identical(sort(join_key_1$keys), sort(setNames(names(join_key_2$keys), join_key_2$keys)))) { + !identical(sort(join_key_1$keys), sort(setNames(names(join_key_2$keys), join_key_2$keys)))) { error_message(join_key_1$dataset_1, join_key_1$dataset_2) } } @@ -361,7 +361,6 @@ JoinKeys <- R6::R6Class( # nolint #' ) join_keys <- function(...) { x <- list(...) - res <- JoinKeys$new() if (length(x) > 0) { res$set(x) @@ -370,6 +369,50 @@ join_keys <- function(...) { res } +#' @rdname join_keys +#' @details +#' `join_keys_cdisc` treat non-`JoinKeySet` arguments as possible CDISC datasets. +#' The `dataname` is extrapolated from the name (or fallback to the value itself if +#' it's a `character(1)`). +#' +#' @export +#' @examples +#' join_keys_cdisc(join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), "ADTTE") +#' +join_keys_cdisc <- function(...) { + x <- list(...) + res <- JoinKeys$new() + + x_parsed <- lapply(seq_along(x), function(ix) { + item <- x[[ix]] + + name <- rlang::`%||%`(names(x)[ix], item) # fallback to value if names are not set + if ( + checkmate::test_class(item, "JoinKeySet") || + !checkmate::test_string(name, min.chars = 1) || + !name %in% names(default_cdisc_keys)) { + return(list(item)) + } + + # Add primary key + result <- list(primary_key(name, keys = get_cdisc_keys(name))) + keys_list <- default_cdisc_keys[[name]] + + if (is.null(keys_list) || is.null(keys_list$parent) || is.null(keys_list$foreign)) { + return(result) + } + # Add JoinKey with parent dataset (if exists) + append(result, list(join_key(name, keys_list$parent, keys = keys_list$foreign))) + }) + x_parsed <- do.call(c, x_parsed) + + if (length(x_parsed) > 0) { + res$set(x_parsed) + } + + res +} + # wrappers ==== #' Mutate `JoinKeys` with a new values #' @@ -490,7 +533,7 @@ join_key <- function(dataset_1, dataset_2 = NULL, keys) { #' primary_key <- function(dataset_1, keys) { if (checkmate::test_character(keys) && - !checkmate::test_names(names(keys), type = "unnamed")) { + !checkmate::test_names(names(keys), type = "unnamed")) { stop("Primary keys parameter must be a unamed character vector: keys = c('A' = 'A') are not allowed") } join_key(dataset_1 = dataset_1, dataset_2 = dataset_1, keys = keys) diff --git a/R/cdisc_data.R b/R/cdisc_data.R index 24ec1717f..a46518536 100644 --- a/R/cdisc_data.R +++ b/R/cdisc_data.R @@ -33,7 +33,7 @@ #' ) #' ) cdisc_data <- function(..., - join_keys = teal.data::join_keys(), + join_keys = teal.data::join_keys_cdisc(...), code = "", check = FALSE) { data_objects <- list(...) diff --git a/R/teal_data.R b/R/teal_data.R index 9da01458e..ad7dd8eb2 100644 --- a/R/teal_data.R +++ b/R/teal_data.R @@ -32,7 +32,7 @@ #' }) #' ) teal_data <- function(..., - join_keys = teal.data::join_keys(), + join_keys = NULL, code = "", check = FALSE) { data_objects <- list(...) diff --git a/man/cdisc_data.Rd b/man/cdisc_data.Rd index 2c3be5ebe..72557dfbd 100644 --- a/man/cdisc_data.Rd +++ b/man/cdisc_data.Rd @@ -4,7 +4,12 @@ \alias{cdisc_data} \title{Data input for teal app} \usage{ -cdisc_data(..., join_keys = teal.data::join_keys(), code = "", check = FALSE) +cdisc_data( + ..., + join_keys = teal.data::join_keys_cdisc(...), + code = "", + check = FALSE +) } \arguments{ \item{...}{(\code{TealDataConnector}, \code{TealDataset}, \code{TealDatasetConnector}, \code{any})\cr diff --git a/man/join_keys.Rd b/man/join_keys.Rd index b0d10e1e4..d73a28e28 100644 --- a/man/join_keys.Rd +++ b/man/join_keys.Rd @@ -2,9 +2,12 @@ % Please edit documentation in R/JoinKeys.R \name{join_keys} \alias{join_keys} +\alias{join_keys_cdisc} \title{Create a \code{JoinKeys} out of a list of \code{JoinKeySet} objects} \usage{ join_keys(...) + +join_keys_cdisc(...) } \arguments{ \item{...}{optional, a \code{JoinKeySet} objects created using the \code{join_key} function.} @@ -18,6 +21,10 @@ join_keys(...) \details{ Note that join keys are symmetric although the relationship only needs to be specified once. + +\code{join_keys_cdisc} treat non-\code{JoinKeySet} arguments as possible CDISC datasets. +The \code{dataname} is extrapolated from the name (or fallback to the value itself if +it's a \code{character(1)}). } \examples{ join_keys() @@ -28,4 +35,6 @@ join_keys( join_keys( join_key("dataset_A", "dataset_B", c("col_1" = "col_a")) ) +join_keys_cdisc(join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), "ADTTE") + } diff --git a/man/teal_data.Rd b/man/teal_data.Rd index 0880a0c6b..5453e55c1 100644 --- a/man/teal_data.Rd +++ b/man/teal_data.Rd @@ -4,7 +4,7 @@ \alias{teal_data} \title{Teal data} \usage{ -teal_data(..., join_keys = teal.data::join_keys(), code = "", check = FALSE) +teal_data(..., join_keys = NULL, code = "", check = FALSE) } \arguments{ \item{...}{(\code{TealDataConnector}, \code{TealDataset}, \code{TealDatasetConnector}, \code{any})\cr From f3abc93836f4d09787b0466185136b46e59b5c83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Fri, 20 Oct 2023 14:21:13 +0200 Subject: [PATCH 04/19] fix: remove repeated code --- R/JoinKeys.R | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/R/JoinKeys.R b/R/JoinKeys.R index e8a4d8e26..e8db22d51 100644 --- a/R/JoinKeys.R +++ b/R/JoinKeys.R @@ -381,7 +381,6 @@ join_keys <- function(...) { #' join_keys_cdisc <- function(...) { x <- list(...) - res <- JoinKeys$new() x_parsed <- lapply(seq_along(x), function(ix) { item <- x[[ix]] @@ -406,11 +405,7 @@ join_keys_cdisc <- function(...) { }) x_parsed <- do.call(c, x_parsed) - if (length(x_parsed) > 0) { - res$set(x_parsed) - } - - res + do.call(join_keys, x_parsed) } # wrappers ==== From 508e19ef447308f65cf11e429369e811a6c6fa08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Fri, 20 Oct 2023 15:29:01 +0200 Subject: [PATCH 05/19] support for mutate as join_key_instance[dataset_1, dataset_2] <- value --- NAMESPACE | 3 +++ R/JoinKeys.R | 25 +++++++++++++++++++++++-- R/teal.data.R | 1 + 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index e7990db20..509348769 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +S3method("[",JoinKeys) +S3method("[<-",JoinKeys) S3method(as_cdisc,TealDataset) S3method(as_cdisc,TealDatasetConnector) S3method(dataset,MultiAssayExperiment) @@ -127,5 +129,6 @@ import(shiny) import(teal.code) importFrom(digest,digest) importFrom(logger,log_trace) +importFrom(rlang,"%||%") importFrom(shinyjs,useShinyjs) importFrom(stats,setNames) diff --git a/R/JoinKeys.R b/R/JoinKeys.R index e8db22d51..b578964e9 100644 --- a/R/JoinKeys.R +++ b/R/JoinKeys.R @@ -369,6 +369,27 @@ join_keys <- function(...) { res } +#' @title Getter for JoinKeys that returns the relationship between pairs of datasets +#' @export +`[.JoinKeys` <- function(x, dataset_1, dataset_2 = NULL) { + checkmate::assert_string(dataset_1) + checkmate::assert_string(dataset_2, null.ok = TRUE) + + dataset_2 <- dataset_2 %||% dataset_1 + x$get(dataset_1, dataset_2) +} + +#' @rdname sub-.JoinKeys +#' @export +`[<-.JoinKeys` <- function(x, dataset_1, dataset_2 = NULL, value) { + checkmate::assert_string(dataset_1) + checkmate::assert_string(dataset_2, null.ok = TRUE) + + dataset_2 <- dataset_2 %||% dataset_1 + x$mutate(dataset_1, dataset_2, value) + x +} + #' @rdname join_keys #' @details #' `join_keys_cdisc` treat non-`JoinKeySet` arguments as possible CDISC datasets. @@ -385,7 +406,7 @@ join_keys_cdisc <- function(...) { x_parsed <- lapply(seq_along(x), function(ix) { item <- x[[ix]] - name <- rlang::`%||%`(names(x)[ix], item) # fallback to value if names are not set + name <- names(x)[ix] %||% item # fallback to value if names are not set if ( checkmate::test_class(item, "JoinKeySet") || !checkmate::test_string(name, min.chars = 1) || @@ -488,7 +509,7 @@ join_key <- function(dataset_1, dataset_2 = NULL, keys) { checkmate::assert_string(dataset_2, null.ok = TRUE) checkmate::assert_character(keys, any.missing = FALSE) - dataset_2 <- rlang::`%||%`(dataset_2, dataset_1) + dataset_2 <- dataset_2 %||% dataset_1 if (length(keys) > 0) { if (is.null(names(keys))) { diff --git a/R/teal.data.R b/R/teal.data.R index 70bd1af1a..5181c54cd 100644 --- a/R/teal.data.R +++ b/R/teal.data.R @@ -11,6 +11,7 @@ # Fix R CMD check notes #' @import shiny +#' @importFrom rlang %||% #' @importFrom digest digest #' @importFrom stats setNames #' @importFrom shinyjs useShinyjs From fd2b12f051dc2bd061e87289b6420ed1c066f426 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Fri, 20 Oct 2023 17:10:31 +0200 Subject: [PATCH 06/19] supports an assignment via a getter function --- NAMESPACE | 5 +++++ R/get_join_keys.R | 31 ++++++++++++++++++++++++++++++- inst/WORDLIST | 14 ++++++++------ man/get_join_keys.Rd | 15 +++++++++++++++ man/sub-.JoinKeys.Rd | 14 ++++++++++++++ 5 files changed, 72 insertions(+), 7 deletions(-) create mode 100644 man/sub-.JoinKeys.Rd diff --git a/NAMESPACE b/NAMESPACE index 509348769..71efe1dda 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,9 @@ S3method("[",JoinKeys) S3method("[<-",JoinKeys) +S3method("get_join_keys<-",JoinKeys) +S3method("get_join_keys<-",TealData) +S3method("get_join_keys<-",teal_data) S3method(as_cdisc,TealDataset) S3method(as_cdisc,TealDatasetConnector) S3method(dataset,MultiAssayExperiment) @@ -24,6 +27,7 @@ S3method(get_datasets,TealDataAbstract) S3method(get_datasets,TealDataset) S3method(get_datasets,TealDatasetConnector) S3method(get_datasets,teal_data) +S3method(get_join_keys,JoinKeys) S3method(get_join_keys,TealData) S3method(get_join_keys,teal_data) S3method(get_key_duplicates,TealDataset) @@ -62,6 +66,7 @@ S3method(to_relational_data,data.frame) S3method(to_relational_data,list) export("col_labels<-") export("data_label<-") +export("get_join_keys<-") export(as_cdisc) export(callable_code) export(callable_function) diff --git a/R/get_join_keys.R b/R/get_join_keys.R index f2f21c4fd..04b6e5311 100644 --- a/R/get_join_keys.R +++ b/R/get_join_keys.R @@ -6,7 +6,6 @@ get_join_keys <- function(data) { UseMethod("get_join_keys", data) } - #' @rdname get_join_keys #' @export get_join_keys.teal_data <- function(data) { @@ -18,3 +17,33 @@ get_join_keys.teal_data <- function(data) { get_join_keys.TealData <- function(data) { data$get_join_keys() } + +#' @rdname get_join_keys +#' @export +get_join_keys.JoinKeys <- function(data) { + data +} + +#' @rdname get_join_keys +#' @export +`get_join_keys<-` <- function(data, dataset_1, dataset_2 = NULL, value) { + UseMethod("get_join_keys<-", data) +} + +#' @rdname get_join_keys +#' @export +`get_join_keys<-.JoinKeys` <- function(data, dataset_1, dataset_2 = NULL, value) { + data +} + +#' @rdname get_join_keys +#' @export +`get_join_keys<-.teal_data` <- function(data, dataset_1, dataset_2 = NULL, value) { + data +} + +#' @rdname get_join_keys +#' @export +`get_join_keys<-.TealData` <- function(data, dataset_1, dataset_2 = NULL, value) { + data +} diff --git a/inst/WORDLIST b/inst/WORDLIST index 9df28bd9e..c4aa9809b 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,14 +1,16 @@ CDISC -Forkers -Hoffmann -Pre -Reproducibility -SCDA -UI cloneable +Forkers formatters funder +Getter +Hoffmann iteratively +JoinKeys +Pre pre repo +Reproducibility reproducibility +SCDA +UI diff --git a/man/get_join_keys.Rd b/man/get_join_keys.Rd index e34804dc9..572cc6d55 100644 --- a/man/get_join_keys.Rd +++ b/man/get_join_keys.Rd @@ -4,6 +4,11 @@ \alias{get_join_keys} \alias{get_join_keys.teal_data} \alias{get_join_keys.TealData} +\alias{get_join_keys.JoinKeys} +\alias{get_join_keys<-} +\alias{get_join_keys<-.JoinKeys} +\alias{get_join_keys<-.teal_data} +\alias{get_join_keys<-.TealData} \title{Function to get join keys from a `` object} \usage{ get_join_keys(data) @@ -11,6 +16,16 @@ get_join_keys(data) \method{get_join_keys}{teal_data}(data) \method{get_join_keys}{TealData}(data) + +\method{get_join_keys}{JoinKeys}(data) + +get_join_keys(data, dataset_1, dataset_2 = NULL) <- value + +\method{get_join_keys}{JoinKeys}(data, dataset_1, dataset_2 = NULL) <- value + +\method{get_join_keys}{teal_data}(data, dataset_1, dataset_2 = NULL) <- value + +\method{get_join_keys}{TealData}(data, dataset_1, dataset_2 = NULL) <- value } \arguments{ \item{data}{`` - object to extract the join keys} diff --git a/man/sub-.JoinKeys.Rd b/man/sub-.JoinKeys.Rd new file mode 100644 index 000000000..96e0e4a33 --- /dev/null +++ b/man/sub-.JoinKeys.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/JoinKeys.R +\name{[.JoinKeys} +\alias{[.JoinKeys} +\alias{[<-.JoinKeys} +\title{Getter for JoinKeys that returns the relationship between pairs of datasets} +\usage{ +\method{[}{JoinKeys}(x, dataset_1, dataset_2 = NULL) + +\method{[}{JoinKeys}(x, dataset_1, dataset_2 = NULL) <- value +} +\description{ +Getter for JoinKeys that returns the relationship between pairs of datasets +} From 0c6184d26fa0cd745ff2b782ec8b6d42a206a010 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Mon, 23 Oct 2023 12:27:35 +0200 Subject: [PATCH 07/19] fix: remove function --- NAMESPACE | 6 ---- NEWS.md | 2 +- R/JoinKeys.R | 18 +---------- R/get_join_keys.R | 58 +++++++++++++++++----------------- man/get_join_keys.Rd | 15 --------- man/join_key.Rd | 7 ---- tests/testthat/test-JoinKeys.R | 13 -------- vignettes/join-keys.Rmd | 7 ++-- 8 files changed, 34 insertions(+), 92 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 71efe1dda..b5e3ae438 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,9 +2,6 @@ S3method("[",JoinKeys) S3method("[<-",JoinKeys) -S3method("get_join_keys<-",JoinKeys) -S3method("get_join_keys<-",TealData) -S3method("get_join_keys<-",teal_data) S3method(as_cdisc,TealDataset) S3method(as_cdisc,TealDatasetConnector) S3method(dataset,MultiAssayExperiment) @@ -27,7 +24,6 @@ S3method(get_datasets,TealDataAbstract) S3method(get_datasets,TealDataset) S3method(get_datasets,TealDatasetConnector) S3method(get_datasets,teal_data) -S3method(get_join_keys,JoinKeys) S3method(get_join_keys,TealData) S3method(get_join_keys,teal_data) S3method(get_key_duplicates,TealDataset) @@ -66,7 +62,6 @@ S3method(to_relational_data,data.frame) S3method(to_relational_data,list) export("col_labels<-") export("data_label<-") -export("get_join_keys<-") export(as_cdisc) export(callable_code) export(callable_function) @@ -114,7 +109,6 @@ export(mae_dataset) export(mutate_data) export(mutate_dataset) export(mutate_join_keys) -export(primary_key) export(python_cdisc_dataset_connector) export(python_code) export(python_dataset_connector) diff --git a/NEWS.md b/NEWS.md index 4a1b0db1f..a80fdb372 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,7 @@ # teal.data 0.3.0.9007 ### Enhancements -* Simplified `join_key` to better support primary keys. Added `primary_key` as a wrapper to `join_key`. +* Simplified `join_key` to better support primary keys. ### Breaking changes diff --git a/R/JoinKeys.R b/R/JoinKeys.R index b578964e9..09ac2433e 100644 --- a/R/JoinKeys.R +++ b/R/JoinKeys.R @@ -415,7 +415,7 @@ join_keys_cdisc <- function(...) { } # Add primary key - result <- list(primary_key(name, keys = get_cdisc_keys(name))) + result <- list(join_key(name, keys = get_cdisc_keys(name))) keys_list <- default_cdisc_keys[[name]] if (is.null(keys_list) || is.null(keys_list$parent) || is.null(keys_list$foreign)) { @@ -538,19 +538,3 @@ join_key <- function(dataset_1, dataset_2 = NULL, keys) { class = "JoinKeySet" ) } - -#' @rdname join_key -#' @export -#' -#' @details -#' `primary_key()` will create a primary key for a dataset. It is equivalent to -#' `join_key(...)` and omitting `dataset_2` argument or giving it the same name -#' `dataset_1`. -#' -primary_key <- function(dataset_1, keys) { - if (checkmate::test_character(keys) && - !checkmate::test_names(names(keys), type = "unnamed")) { - stop("Primary keys parameter must be a unamed character vector: keys = c('A' = 'A') are not allowed") - } - join_key(dataset_1 = dataset_1, dataset_2 = dataset_1, keys = keys) -} diff --git a/R/get_join_keys.R b/R/get_join_keys.R index 04b6e5311..453116f8c 100644 --- a/R/get_join_keys.R +++ b/R/get_join_keys.R @@ -18,32 +18,32 @@ get_join_keys.TealData <- function(data) { data$get_join_keys() } -#' @rdname get_join_keys -#' @export -get_join_keys.JoinKeys <- function(data) { - data -} - -#' @rdname get_join_keys -#' @export -`get_join_keys<-` <- function(data, dataset_1, dataset_2 = NULL, value) { - UseMethod("get_join_keys<-", data) -} - -#' @rdname get_join_keys -#' @export -`get_join_keys<-.JoinKeys` <- function(data, dataset_1, dataset_2 = NULL, value) { - data -} - -#' @rdname get_join_keys -#' @export -`get_join_keys<-.teal_data` <- function(data, dataset_1, dataset_2 = NULL, value) { - data -} - -#' @rdname get_join_keys -#' @export -`get_join_keys<-.TealData` <- function(data, dataset_1, dataset_2 = NULL, value) { - data -} +#' #' @rdname get_join_keys +#' #' @export +#' get_join_keys.JoinKeys <- function(data) { +#' data +#' } +#' +#' #' @rdname get_join_keys +#' #' @export +#' `get_join_keys<-` <- function(data, dataset_1, dataset_2 = NULL, value) { +#' UseMethod("get_join_keys<-", data) +#' } +#' +#' #' @rdname get_join_keys +#' #' @export +#' `get_join_keys<-.JoinKeys` <- function(data, dataset_1, dataset_2 = NULL, value) { +#' data +#' } +#' +#' #' @rdname get_join_keys +#' #' @export +#' `get_join_keys<-.teal_data` <- function(data, dataset_1, dataset_2 = NULL, value) { +#' data +#' } +#' +#' #' @rdname get_join_keys +#' #' @export +#' `get_join_keys<-.TealData` <- function(data, dataset_1, dataset_2 = NULL, value) { +#' data +#' } diff --git a/man/get_join_keys.Rd b/man/get_join_keys.Rd index 572cc6d55..e34804dc9 100644 --- a/man/get_join_keys.Rd +++ b/man/get_join_keys.Rd @@ -4,11 +4,6 @@ \alias{get_join_keys} \alias{get_join_keys.teal_data} \alias{get_join_keys.TealData} -\alias{get_join_keys.JoinKeys} -\alias{get_join_keys<-} -\alias{get_join_keys<-.JoinKeys} -\alias{get_join_keys<-.teal_data} -\alias{get_join_keys<-.TealData} \title{Function to get join keys from a `` object} \usage{ get_join_keys(data) @@ -16,16 +11,6 @@ get_join_keys(data) \method{get_join_keys}{teal_data}(data) \method{get_join_keys}{TealData}(data) - -\method{get_join_keys}{JoinKeys}(data) - -get_join_keys(data, dataset_1, dataset_2 = NULL) <- value - -\method{get_join_keys}{JoinKeys}(data, dataset_1, dataset_2 = NULL) <- value - -\method{get_join_keys}{teal_data}(data, dataset_1, dataset_2 = NULL) <- value - -\method{get_join_keys}{TealData}(data, dataset_1, dataset_2 = NULL) <- value } \arguments{ \item{data}{`` - object to extract the join keys} diff --git a/man/join_key.Rd b/man/join_key.Rd index b9e23b416..ff130c63b 100644 --- a/man/join_key.Rd +++ b/man/join_key.Rd @@ -2,12 +2,9 @@ % Please edit documentation in R/JoinKeys.R \name{join_key} \alias{join_key} -\alias{primary_key} \title{Create a relationship between a pair of datasets} \usage{ join_key(dataset_1, dataset_2 = NULL, keys) - -primary_key(dataset_1, keys) } \arguments{ \item{dataset_1}{(\code{character}) one dataset name} @@ -29,10 +26,6 @@ object of class \code{JoinKeySet} to be passed into \code{join_keys} function. \details{ \code{join_key()} will create a relationship for the variables on a pair of datasets. - -\code{primary_key()} will create a primary key for a dataset. It is equivalent to -\code{join_key(...)} and omitting \code{dataset_2} argument or giving it the same name -\code{dataset_1}. } \seealso{ \code{\link[=join_keys]{join_keys()}} diff --git a/tests/testthat/test-JoinKeys.R b/tests/testthat/test-JoinKeys.R index 36bfd93a1..7ea9c3d0a 100644 --- a/tests/testthat/test-JoinKeys.R +++ b/tests/testthat/test-JoinKeys.R @@ -56,19 +56,6 @@ test_that("join_key does not throw error with valid arguments", { expect_silent(join_key("d1", keys = c("A" = "A", "B" = "B"))) }) -test_that("primary_key does not allow named keys", { - expect_error(primary_key("d2", keys = c("A" = "B"))) -}) - -test_that("primary_key does not throw error with valid arguments", { - # keys of length 0 - expect_silent(primary_key("d1", keys = character(0))) - # keys of length 1 - expect_silent(primary_key("d2", keys = c("A"))) - # keys of length > 1 - expect_silent(primary_key("d1", keys = c("A", "B", "C", "D"))) -}) - test_that("cannot set join_keys with incompatible keys", { # different keys expect_error( diff --git a/vignettes/join-keys.Rmd b/vignettes/join-keys.Rmd index 71aeb6e35..a12a8dacc 100644 --- a/vignettes/join-keys.Rmd +++ b/vignettes/join-keys.Rmd @@ -40,7 +40,7 @@ ds <- dataset( ds$get_keys() ``` -Alternatively, the primary keys can be defined in the `join_keys` parameter by using the `primary_key` or `join_key` functions _(note: when using `join_key` to define a primary key the second dataset name needs to be omitted or have the same value as the first)_. +Alternatively, the primary keys can be defined in the `join_keys` parameter by using the `join_key` functions by omitting the second dataset (or define it with the same value as the first. ```{r, message=FALSE} library(teal.data) @@ -52,9 +52,8 @@ data <- teal_data( dataset("D1", data_1, code = "D1 <- data.frame(X = factor(1:10), Y = 21:30, Z = 1:10)"), dataset("D2", data_2, code = "D2 <- data.frame(W = factor(10:1), V = factor(5:14), M = rep(1:5, 2))"), join_keys = join_keys( - primary_key("D1", c("X")), - primary_key("D2", c("V", "W")) - # join_key("D1", c("X")) # equivalent to using primary_key + join_key("D1", c("X")), + join_key("D2", c("V", "W")) # join_key("D2", "D2", c("V", "W")), # equivalent to using primary_key ) ) From 05165926a933ee30c244a403d3911364d22af7e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Mon, 23 Oct 2023 12:30:42 +0200 Subject: [PATCH 08/19] docs: rename function with cdisc suffix instead --- NAMESPACE | 2 +- R/JoinKeys.R | 6 +++--- R/cdisc_data.R | 2 +- man/cdisc_data.Rd | 2 +- man/join_keys.Rd | 8 ++++---- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index b5e3ae438..70583eb35 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -72,6 +72,7 @@ export(cdisc_dataset) export(cdisc_dataset_connector) export(cdisc_dataset_connector_file) export(cdisc_dataset_file) +export(cdisc_join_keys) export(code_cdisc_dataset_connector) export(code_dataset_connector) export(col_labels) @@ -102,7 +103,6 @@ export(get_raw_data) export(is_pulled) export(join_key) export(join_keys) -export(join_keys_cdisc) export(load_dataset) export(load_datasets) export(mae_dataset) diff --git a/R/JoinKeys.R b/R/JoinKeys.R index 09ac2433e..d0d22803a 100644 --- a/R/JoinKeys.R +++ b/R/JoinKeys.R @@ -392,15 +392,15 @@ join_keys <- function(...) { #' @rdname join_keys #' @details -#' `join_keys_cdisc` treat non-`JoinKeySet` arguments as possible CDISC datasets. +#' `cdisc_join_keys` treat non-`JoinKeySet` arguments as possible CDISC datasets. #' The `dataname` is extrapolated from the name (or fallback to the value itself if #' it's a `character(1)`). #' #' @export #' @examples -#' join_keys_cdisc(join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), "ADTTE") +#' cdisc_join_keys(join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), "ADTTE") #' -join_keys_cdisc <- function(...) { +cdisc_join_keys <- function(...) { x <- list(...) x_parsed <- lapply(seq_along(x), function(ix) { diff --git a/R/cdisc_data.R b/R/cdisc_data.R index a46518536..db2620119 100644 --- a/R/cdisc_data.R +++ b/R/cdisc_data.R @@ -33,7 +33,7 @@ #' ) #' ) cdisc_data <- function(..., - join_keys = teal.data::join_keys_cdisc(...), + join_keys = teal.data::cdisc_join_keys(...), code = "", check = FALSE) { data_objects <- list(...) diff --git a/man/cdisc_data.Rd b/man/cdisc_data.Rd index 72557dfbd..66d6b85a3 100644 --- a/man/cdisc_data.Rd +++ b/man/cdisc_data.Rd @@ -6,7 +6,7 @@ \usage{ cdisc_data( ..., - join_keys = teal.data::join_keys_cdisc(...), + join_keys = teal.data::cdisc_join_keys(...), code = "", check = FALSE ) diff --git a/man/join_keys.Rd b/man/join_keys.Rd index d73a28e28..fb79dacba 100644 --- a/man/join_keys.Rd +++ b/man/join_keys.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/JoinKeys.R \name{join_keys} \alias{join_keys} -\alias{join_keys_cdisc} +\alias{cdisc_join_keys} \title{Create a \code{JoinKeys} out of a list of \code{JoinKeySet} objects} \usage{ join_keys(...) -join_keys_cdisc(...) +cdisc_join_keys(...) } \arguments{ \item{...}{optional, a \code{JoinKeySet} objects created using the \code{join_key} function.} @@ -22,7 +22,7 @@ join_keys_cdisc(...) Note that join keys are symmetric although the relationship only needs to be specified once. -\code{join_keys_cdisc} treat non-\code{JoinKeySet} arguments as possible CDISC datasets. +\code{cdisc_join_keys} treat non-\code{JoinKeySet} arguments as possible CDISC datasets. The \code{dataname} is extrapolated from the name (or fallback to the value itself if it's a \code{character(1)}). } @@ -35,6 +35,6 @@ join_keys( join_keys( join_key("dataset_A", "dataset_B", c("col_1" = "col_a")) ) -join_keys_cdisc(join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), "ADTTE") +cdisc_join_keys(join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), "ADTTE") } From f2a311bf242f741940f55b02688248b3c2288fcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Mon, 23 Oct 2023 12:34:42 +0200 Subject: [PATCH 09/19] docs: adds NEWS and get_join_keys<- assignment --- NEWS.md | 1 + R/get_join_keys.R | 50 ++++++++++++++++++----------------------------- 2 files changed, 20 insertions(+), 31 deletions(-) diff --git a/NEWS.md b/NEWS.md index a80fdb372..a8f6ce7bc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ ### Enhancements * Simplified `join_key` to better support primary keys. +* Added subset and assignment to `JoinKeySet` class to manipulate relationship pair keys. ### Breaking changes diff --git a/R/get_join_keys.R b/R/get_join_keys.R index 453116f8c..cc50ead0d 100644 --- a/R/get_join_keys.R +++ b/R/get_join_keys.R @@ -14,36 +14,24 @@ get_join_keys.teal_data <- function(data) { #' @rdname get_join_keys #' @export -get_join_keys.TealData <- function(data) { - data$get_join_keys() +get_join_keys.JoinKeys <- function(data) { + data } -#' #' @rdname get_join_keys -#' #' @export -#' get_join_keys.JoinKeys <- function(data) { -#' data -#' } -#' -#' #' @rdname get_join_keys -#' #' @export -#' `get_join_keys<-` <- function(data, dataset_1, dataset_2 = NULL, value) { -#' UseMethod("get_join_keys<-", data) -#' } -#' -#' #' @rdname get_join_keys -#' #' @export -#' `get_join_keys<-.JoinKeys` <- function(data, dataset_1, dataset_2 = NULL, value) { -#' data -#' } -#' -#' #' @rdname get_join_keys -#' #' @export -#' `get_join_keys<-.teal_data` <- function(data, dataset_1, dataset_2 = NULL, value) { -#' data -#' } -#' -#' #' @rdname get_join_keys -#' #' @export -#' `get_join_keys<-.TealData` <- function(data, dataset_1, dataset_2 = NULL, value) { -#' data -#' } +#' @rdname get_join_keys +#' @export +`get_join_keys<-` <- function(data, dataset_1, dataset_2 = NULL, value) { + UseMethod("get_join_keys<-", data) +} + +#' @rdname get_join_keys +#' @export +`get_join_keys<-.JoinKeys` <- function(data, dataset_1, dataset_2 = NULL, value) { + data +} + +#' @rdname get_join_keys +#' @export +`get_join_keys<-.teal_data` <- function(data, dataset_1, dataset_2 = NULL, value) { + data +} From 0e4edcfab742800c14757ecd1826ea197605d33b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Mon, 23 Oct 2023 12:35:39 +0200 Subject: [PATCH 10/19] docs: regenerate docs --- NAMESPACE | 5 ++++- man/get_join_keys.Rd | 13 +++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 70583eb35..4755b69c9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,8 @@ S3method("[",JoinKeys) S3method("[<-",JoinKeys) +S3method("get_join_keys<-",JoinKeys) +S3method("get_join_keys<-",teal_data) S3method(as_cdisc,TealDataset) S3method(as_cdisc,TealDatasetConnector) S3method(dataset,MultiAssayExperiment) @@ -24,7 +26,7 @@ S3method(get_datasets,TealDataAbstract) S3method(get_datasets,TealDataset) S3method(get_datasets,TealDatasetConnector) S3method(get_datasets,teal_data) -S3method(get_join_keys,TealData) +S3method(get_join_keys,JoinKeys) S3method(get_join_keys,teal_data) S3method(get_key_duplicates,TealDataset) S3method(get_key_duplicates,data.frame) @@ -62,6 +64,7 @@ S3method(to_relational_data,data.frame) S3method(to_relational_data,list) export("col_labels<-") export("data_label<-") +export("get_join_keys<-") export(as_cdisc) export(callable_code) export(callable_function) diff --git a/man/get_join_keys.Rd b/man/get_join_keys.Rd index e34804dc9..169589bde 100644 --- a/man/get_join_keys.Rd +++ b/man/get_join_keys.Rd @@ -3,14 +3,23 @@ \name{get_join_keys} \alias{get_join_keys} \alias{get_join_keys.teal_data} -\alias{get_join_keys.TealData} +\alias{get_join_keys.JoinKeys} +\alias{get_join_keys<-} +\alias{get_join_keys<-.JoinKeys} +\alias{get_join_keys<-.teal_data} \title{Function to get join keys from a `` object} \usage{ get_join_keys(data) \method{get_join_keys}{teal_data}(data) -\method{get_join_keys}{TealData}(data) +\method{get_join_keys}{JoinKeys}(data) + +get_join_keys(data, dataset_1, dataset_2 = NULL) <- value + +\method{get_join_keys}{JoinKeys}(data, dataset_1, dataset_2 = NULL) <- value + +\method{get_join_keys}{teal_data}(data, dataset_1, dataset_2 = NULL) <- value } \arguments{ \item{data}{`` - object to extract the join keys} From 66f233d5195700d8566257355e4bde54f40e68b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Mon, 23 Oct 2023 12:38:41 +0200 Subject: [PATCH 11/19] docs: minor addition --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index a8f6ce7bc..6c5bfea49 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ ### Enhancements * Simplified `join_key` to better support primary keys. -* Added subset and assignment to `JoinKeySet` class to manipulate relationship pair keys. +* Added subset and subset-assignment to `JoinKeySet` class to manipulate relationship pair keys _(`[` and `[<-`)_. ### Breaking changes From e1b72c7434eff8a4925372d488f26073363bcd72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Mon, 23 Oct 2023 16:56:40 +0200 Subject: [PATCH 12/19] fix: revert change --- R/teal_data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/teal_data.R b/R/teal_data.R index ad7dd8eb2..9da01458e 100644 --- a/R/teal_data.R +++ b/R/teal_data.R @@ -32,7 +32,7 @@ #' }) #' ) teal_data <- function(..., - join_keys = NULL, + join_keys = teal.data::join_keys(), code = "", check = FALSE) { data_objects <- list(...) From 87b733462a3d08eed045d594c980bcdafbbccb39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Mon, 23 Oct 2023 17:33:46 +0200 Subject: [PATCH 13/19] feat: adds tests and simplifies cdisc_data --- R/CallableFunction.R | 1 - R/JoinKeys.R | 21 +++++--- R/cdisc_data.R | 76 +++++++++++---------------- man/deprecated_join_keys_extract.Rd | 13 +++++ man/teal_data.Rd | 2 +- tests/testthat/helper-get_join_keys.R | 45 ++++++++++++++++ tests/testthat/test-JoinKeys.R | 31 +++++++++++ tests/testthat/test-cdisc_data.R | 2 +- tests/testthat/test-get_join_keys.R | 23 ++++++++ 9 files changed, 160 insertions(+), 54 deletions(-) create mode 100644 man/deprecated_join_keys_extract.Rd create mode 100644 tests/testthat/helper-get_join_keys.R create mode 100644 tests/testthat/test-get_join_keys.R diff --git a/R/CallableFunction.R b/R/CallableFunction.R index 4eb87c68f..5eb20a5c7 100644 --- a/R/CallableFunction.R +++ b/R/CallableFunction.R @@ -144,7 +144,6 @@ CallableFunction <- R6::R6Class( # nolint # @return nothing refresh = function() { if (!is.null(private$fun_name) || !identical(private$fun_name, character(0))) { - # replaced str2lang found at: # https://rlang.r-lib.org/reference/call2.html private$call <- as.call( diff --git a/R/JoinKeys.R b/R/JoinKeys.R index d0d22803a..55dcc7393 100644 --- a/R/JoinKeys.R +++ b/R/JoinKeys.R @@ -401,13 +401,21 @@ join_keys <- function(...) { #' cdisc_join_keys(join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), "ADTTE") #' cdisc_join_keys <- function(...) { - x <- list(...) + data_objects <- list(...) - x_parsed <- lapply(seq_along(x), function(ix) { - item <- x[[ix]] + data_objects_parsed <- lapply(seq_along(data_objects), function(ix) { + item <- data_objects[[ix]] + name <- names(data_objects)[ix] %||% item # fallback to value if names are not set - name <- names(x)[ix] %||% item # fallback to value if names are not set if ( + checkmate::test_r6(item) && + checkmate::test_multi_class( + item, + classes = c("TealDataConnector", "TealDataset", "TealDatasetConnector") + ) + ) { + return(NULL) + } else if ( checkmate::test_class(item, "JoinKeySet") || !checkmate::test_string(name, min.chars = 1) || !name %in% names(default_cdisc_keys)) { @@ -424,9 +432,10 @@ cdisc_join_keys <- function(...) { # Add JoinKey with parent dataset (if exists) append(result, list(join_key(name, keys_list$parent, keys = keys_list$foreign))) }) - x_parsed <- do.call(c, x_parsed) - do.call(join_keys, x_parsed) + data_objects_parsed <- do.call(c, data_objects_parsed) + + do.call(join_keys, as.list(data_objects_parsed[!is.null(data_objects_parsed)])) } # wrappers ==== diff --git a/R/cdisc_data.R b/R/cdisc_data.R index db2620119..19f23fb99 100644 --- a/R/cdisc_data.R +++ b/R/cdisc_data.R @@ -37,25 +37,24 @@ cdisc_data <- function(..., code = "", check = FALSE) { data_objects <- list(...) + deprecated_join_keys_extract(data_objects, join_keys) + teal_data(..., join_keys = join_keys, code = code, check = check) +} - # todo: is it really important? - to remove - if (inherits(join_keys, "JoinKeySet")) { - join_keys <- teal.data::join_keys(join_keys) - } - - if ( - checkmate::test_list(data_objects, types = c("TealDataConnector", "TealDataset", "TealDatasetConnector")) - ) { - lifecycle::deprecate_warn( - when = "0.3.1", - "cdisc_data( - data_objects = 'should use data directly. Using TealDatasetConnector and TealDataset is deprecated.' - )" - ) - update_join_keys_to_primary(data_objects, join_keys) +#' Extrapolate parents from `TealData` classes +#' +#' note: This function will be removed once the following classes are defunct: +#' `TealDataConnector`, `TealDataset`, `TealDatasetConnector` +#' +#' @keywords internal +deprecated_join_keys_extract <- function(data_objects, join_keys) { + # TODO: check if redundant with same call in teal_data body + update_join_keys_to_primary(data_objects, join_keys) - new_parents_fun <- function(data_objects) { - lapply(data_objects, function(x) { + new_parents_fun <- function(data_objects) { + lapply( + data_objects, + function(x) { if (inherits(x, "TealDataConnector")) { unlist(new_parents_fun(x$get_items()), recursive = FALSE) } else { @@ -66,40 +65,27 @@ cdisc_data <- function(..., ) ) } - }) - } - - new_parents <- unlist(new_parents_fun(data_objects), recursive = FALSE) - - names(new_parents) <- unlist(lapply(data_objects, function(x) { - if (inherits(x, "TealDataConnector")) { - lapply(x$get_items(), function(y) y$get_dataname()) - } else { - x$get_datanames() } - })) - - if (is_dag(new_parents)) { - stop("Cycle detected in a parent and child dataset graph.") - } - join_keys$set_parents(new_parents) - join_keys$update_keys_given_parents() + ) + } - x <- TealData$new(..., check = check, join_keys = join_keys) + new_parents <- unlist(new_parents_fun(data_objects), recursive = FALSE) - if (length(code) > 0 && !identical(code, "")) { - x$set_pull_code(code = code) + names(new_parents) <- unlist(lapply(data_objects, function(x) { + if (inherits(x, "TealDataConnector")) { + lapply(x$get_items(), function(y) y$get_dataname()) + } else { + x$get_datanames() } + })) - x$check_reproducibility() - x$check_metadata() - x - } else { - if (!checkmate::test_names(names(data_objects), type = "named")) { - stop("Dot (`...`) arguments on `teal_data()` must be named.") - } - new_teal_data(data = data_objects, code = code, keys = join_keys) + if (is_dag(new_parents)) { + stop("Cycle detected in a parent and child dataset graph.") } + join_keys$set_parents(new_parents) + join_keys$update_keys_given_parents() + + join_keys } #' Load `TealData` object from a file diff --git a/man/deprecated_join_keys_extract.Rd b/man/deprecated_join_keys_extract.Rd new file mode 100644 index 000000000..714fd370c --- /dev/null +++ b/man/deprecated_join_keys_extract.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cdisc_data.R +\name{deprecated_join_keys_extract} +\alias{deprecated_join_keys_extract} +\title{Extrapolate parents from \code{TealData} classes} +\usage{ +deprecated_join_keys_extract(data_objects, join_keys) +} +\description{ +note: This function will be removed once the following classes are defunct: +\code{TealDataConnector}, \code{TealDataset}, \code{TealDatasetConnector} +} +\keyword{internal} diff --git a/man/teal_data.Rd b/man/teal_data.Rd index 5453e55c1..0880a0c6b 100644 --- a/man/teal_data.Rd +++ b/man/teal_data.Rd @@ -4,7 +4,7 @@ \alias{teal_data} \title{Teal data} \usage{ -teal_data(..., join_keys = NULL, code = "", check = FALSE) +teal_data(..., join_keys = teal.data::join_keys(), code = "", check = FALSE) } \arguments{ \item{...}{(\code{TealDataConnector}, \code{TealDataset}, \code{TealDatasetConnector}, \code{any})\cr diff --git a/tests/testthat/helper-get_join_keys.R b/tests/testthat/helper-get_join_keys.R new file mode 100644 index 000000000..7f1cf82a8 --- /dev/null +++ b/tests/testthat/helper-get_join_keys.R @@ -0,0 +1,45 @@ +#' Generate a teal_data dataset with sample data and JoinKeys +helper_generator_teal_data <- function() { + iris2 <- iris + iris2$id <- rnorm(NROW(iris2)) + iris2$id <- apply(iris2, 1, rlang::hash) + new_teal_data( + list( + ds1 = iris2, + ds2 = iris2 + ), + code = "ds1 <- iris2; ds2 <- iris2", + keys = helper_generator_JoinKeys("ds1", keys = c("id")) + ) +} + +#' Generate a JoinKeys +helper_generator_JoinKeys <- function(dataset_1 = "ds1", keys = c("id")) { + join_keys( + join_key(dataset_1, keys = keys) + ) +} + +#' Test suite for default get_join generated by helper +helper_test_get_join_keys <- function(obj, dataset_1 = "ds1") { + jk <- get_join_keys(obj) + + expect_s3_class(jk, c("JoinKey", "R6")) + expect_length(jk$get(), 1) + expect_length(jk$get(dataset_1), 1) + + obj +} + +#' Test suite for JoinKeys after manual adding a primary key +helper_test_get_join_keys_add <- function(obj, dataset_1 = "ds1", new_dataset_1 = "ds2", new_keys = c("id")) { + obj <- helper_test_get_join_keys(obj, dataset_1) + get_join_keys(obj)[new_dataset_1] <- c(new_keys) + + jk <- get_join_keys(obj) + + checkmate::expect_r6(jk, c("JoinKeys")) + expect_length(jk$get(), 2) + expect_length(jk$get(dataset_1), 1) + expect_length(jk$get(new_dataset_1), 1) +} diff --git a/tests/testthat/test-JoinKeys.R b/tests/testthat/test-JoinKeys.R index 7ea9c3d0a..97191dcf3 100644 --- a/tests/testthat/test-JoinKeys.R +++ b/tests/testthat/test-JoinKeys.R @@ -1,3 +1,34 @@ +test_that("[.JoinKeys returns the primary key if arguments only have 1 dataset", { + jk <- join_keys(join_key("ds1", keys = c("id"))) + + expect_failure(expect_identical(jk$get("ds1"), jk["ds1"])) + checkmate::expect_character(jk["ds1"]) +}) + +test_that("[.JoinKeys subsets relationship pair successfully", { + jk <- join_keys(join_key("ds1", keys = c("id"))) + + expect_identical(jk$get("ds1", "ds1"), jk["ds1"]) +}) + +test_that("[<-.JoinKeys assigns new relationship pair", { + jk <- join_keys(join_key("ds1", keys = c("id"))) + + expect_length(jk$get("ds1", "ds2"), 0) + + jk["ds1", "ds2"] <- c("id") + expect_identical(jk$get("ds1", "ds2"), c(id = "id")) + expect_identical(jk$get("ds1", "ds2"), jk["ds1", "ds2"]) +}) + +test_that("[<-.JoinKeys modifies existing relationship pair", { + jk <- join_keys(join_key("ds1", keys = c("id"))) + + jk["ds1", "ds1"] <- c("Species") + expect_failure(expect_identical(jk$get("ds1", "ds1"), c(id = "id"))) + expect_identical(jk$get("ds1", "ds1"), c(Species = "Species")) +}) + test_that("join_key throws error with invalid keys arguments", { # invalid types expect_error(join_key("d1", "d2", keys = NULL)) diff --git a/tests/testthat/test-cdisc_data.R b/tests/testthat/test-cdisc_data.R index 78bb7162f..50340eca9 100644 --- a/tests/testthat/test-cdisc_data.R +++ b/tests/testthat/test-cdisc_data.R @@ -33,7 +33,7 @@ cdisc_data_mixed_call <- function(check = TRUE, join_keys1 = join_keys()) { } testthat::test_that("cdisc_data accepts TealDataset, TealDatasetConnector, TealDataConnector objects", { - testthat::expect_silent(data <- cdisc_data_mixed_call()) + lifecycle::expect_deprecated(data <- cdisc_data_mixed_call(), "should use data directly") testthat::expect_identical(data$get_datanames(), c("ADSL", "ADTTE", "ADAE")) }) diff --git a/tests/testthat/test-get_join_keys.R b/tests/testthat/test-get_join_keys.R new file mode 100644 index 000000000..7a693a5e9 --- /dev/null +++ b/tests/testthat/test-get_join_keys.R @@ -0,0 +1,23 @@ +test_that("get_join_keys.teal_data will successfully obtain object from teal_data", { + obj <- helper_generator_teal_data() + + expect_identical(obj@join_keys, get_join_keys(obj)) + helper_test_get_join_keys(obj, "ds1") +}) + +test_that("get_join_keys.JoinKeys will return itself", { + obj <- helper_generator_JoinKeys() + + expect_identical(obj, get_join_keys(obj)) + helper_test_get_join_keys(obj, "ds1") +}) + +test_that("get_join_keys<-.teal_data", { + obj <- helper_generator_teal_data() + helper_test_get_join_keys_add(obj, "ds1", "ds2") +}) + +test_that("get_join_keys<-.JoinKeys", { + obj <- helper_generator_JoinKeys() + helper_test_get_join_keys_add(obj, "ds1", "ds2") +}) From ceb7db690efe89d92acf921ce1e33b03851d8737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Tue, 24 Oct 2023 12:39:37 +0200 Subject: [PATCH 14/19] adds tests and improves on getting clauses --- R/JoinKeys.R | 6 ++- R/cdisc_data.R | 10 ++++ tests/testthat/test-JoinKeys.R | 88 ++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 1 deletion(-) diff --git a/R/JoinKeys.R b/R/JoinKeys.R index 55dcc7393..52d7da6d2 100644 --- a/R/JoinKeys.R +++ b/R/JoinKeys.R @@ -405,7 +405,9 @@ cdisc_join_keys <- function(...) { data_objects_parsed <- lapply(seq_along(data_objects), function(ix) { item <- data_objects[[ix]] - name <- names(data_objects)[ix] %||% item # fallback to value if names are not set + + name <- names(data_objects)[ix] + if (is.null(name) || identical(trimws(name), "")) name <- item # fallback to value if names are not set if ( checkmate::test_r6(item) && @@ -414,6 +416,8 @@ cdisc_join_keys <- function(...) { classes = c("TealDataConnector", "TealDataset", "TealDatasetConnector") ) ) { + # Code not refactored for these data types as they'll be deprecated soon + # see logic in function `deprecated_join_keys_extract` called under `cdisc_data` return(NULL) } else if ( checkmate::test_class(item, "JoinKeySet") || diff --git a/R/cdisc_data.R b/R/cdisc_data.R index 19f23fb99..3ced30d9e 100644 --- a/R/cdisc_data.R +++ b/R/cdisc_data.R @@ -43,11 +43,21 @@ cdisc_data <- function(..., #' Extrapolate parents from `TealData` classes #' +#' `r lifecycle::badge("deprecated")` +#' #' note: This function will be removed once the following classes are defunct: #' `TealDataConnector`, `TealDataset`, `TealDatasetConnector` #' #' @keywords internal deprecated_join_keys_extract <- function(data_objects, join_keys) { + if ( + !checkmate::test_list( + data_objects, + types = c("TealDataConnector", "TealDataset", "TealDatasetConnector") + ) + ) { + return(join_keys) + } # TODO: check if redundant with same call in teal_data body update_join_keys_to_primary(data_objects, join_keys) diff --git a/tests/testthat/test-JoinKeys.R b/tests/testthat/test-JoinKeys.R index 97191dcf3..cde08bd67 100644 --- a/tests/testthat/test-JoinKeys.R +++ b/tests/testthat/test-JoinKeys.R @@ -1,3 +1,91 @@ +test_that("cdisc_join_keys will generate JoinKeys for named list with non-named elements", { + new_dataset <- cdisc_join_keys("ADSL", ADTTE = rADTTE) + jk <- get_join_keys(new_dataset) + + expect_identical(unname(jk$get("ADSL", "ADSL")), default_cdisc_keys[["ADSL"]]$primary) + expect_identical(unname(jk$get("ADTTE", "ADTTE")), default_cdisc_keys[["ADTTE"]]$primary) + + expect_identical(unname(jk$get("ADSL", "ADTTE")), default_cdisc_keys[["ADTTE"]]$foreign) + expect_identical(unname(jk$get("ADTTE", "ADSL")), default_cdisc_keys[["ADTTE"]]$foreign) +}) + +test_that("cdisc_join_keys will generate JoinKeys for character list", { + new_dataset <- cdisc_join_keys("ADSL", "ADTTE") + jk <- get_join_keys(new_dataset) + + expect_identical(unname(jk$get("ADSL", "ADSL")), default_cdisc_keys[["ADSL"]]$primary) + expect_identical(unname(jk$get("ADTTE", "ADTTE")), default_cdisc_keys[["ADTTE"]]$primary) + + expect_identical(unname(jk$get("ADSL", "ADTTE")), default_cdisc_keys[["ADTTE"]]$foreign) + expect_identical(unname(jk$get("ADTTE", "ADSL")), default_cdisc_keys[["ADTTE"]]$foreign) +}) + +test_that("cdisc_join_keys will generate JoinKeys for named list", { + new_dataset <- cdisc_join_keys(ADSL = rADSL, ADTTE = rADTTE) + jk <- get_join_keys(new_dataset) + + expect_identical(unname(jk$get("ADSL", "ADSL")), default_cdisc_keys[["ADSL"]]$primary) + expect_identical(unname(jk$get("ADTTE", "ADTTE")), default_cdisc_keys[["ADTTE"]]$primary) + + expect_identical(unname(jk$get("ADSL", "ADTTE")), default_cdisc_keys[["ADTTE"]]$foreign) + expect_identical(unname(jk$get("ADTTE", "ADSL")), default_cdisc_keys[["ADTTE"]]$foreign) +}) + +test_that("cdisc_join_keys will retrieve ADTTE primary and foreign keys", { + datasets <- names(default_cdisc_keys) + + internal_keys <- default_cdisc_keys[["ADTTE"]] + jk <- cdisc_join_keys("ADTTE") + primary_keys <- unname(jk$get("ADTTE", "ADTTE")) + + expect_equal(primary_keys, internal_keys$primary) + + foreign_keys <- unname(jk$get("ADTTE", internal_keys$parent)) + expect_equal(foreign_keys, internal_keys$foreign) +}) + +test_that("cdisc_join_keys will retrieve known primary and foreign keys", { + datasets <- names(default_cdisc_keys) + + vapply( + datasets, + function(.x) { + internal_keys <- default_cdisc_keys[[.x]] + jk <- cdisc_join_keys(.x) + primary_keys <- unname(jk$get(.x, .x)) + expect_equal(primary_keys, internal_keys$primary) + if (!is.null(internal_keys$foreign)) { + foreign_keys <- unname(jk$get(.x, internal_keys$parent)) + expect_equal(foreign_keys, internal_keys$foreign) + } + character(0) + }, + character(0) + ) +}) + +test_that("cdisc_join_keys will retrieve known primary keys", { + datasets <- names(default_cdisc_keys) + + vapply( + datasets, + function(.x) { + jk <- cdisc_join_keys(.x) + expect_equal(unname(jk[.x]), get_cdisc_keys(.x)) + character(0) + }, + character(0) + ) +}) + +test_that("cdisc_join_keys does nothing with TealDataset", { + adae_cf <- callable_function( + function() as.data.frame(as.list(setNames(nm = get_cdisc_keys("ADAE")))) + ) + adae_cdc <- cdisc_dataset_connector("ADAE", adae_cf, keys = get_cdisc_keys("ADAE")) + expect_length(get_join_keys(cdisc_join_keys(adae_cdc))$get(), 0) +}) + test_that("[.JoinKeys returns the primary key if arguments only have 1 dataset", { jk <- join_keys(join_key("ds1", keys = c("id"))) From 7cfc05d265ad730e9712a99981fc5462e4962695 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Tue, 24 Oct 2023 12:43:53 +0200 Subject: [PATCH 15/19] docs reorder new tests to the bottom of test file --- tests/testthat/test-JoinKeys.R | 238 ++++++++++++++++----------------- 1 file changed, 119 insertions(+), 119 deletions(-) diff --git a/tests/testthat/test-JoinKeys.R b/tests/testthat/test-JoinKeys.R index cde08bd67..72b300ae4 100644 --- a/tests/testthat/test-JoinKeys.R +++ b/tests/testthat/test-JoinKeys.R @@ -1,122 +1,3 @@ -test_that("cdisc_join_keys will generate JoinKeys for named list with non-named elements", { - new_dataset <- cdisc_join_keys("ADSL", ADTTE = rADTTE) - jk <- get_join_keys(new_dataset) - - expect_identical(unname(jk$get("ADSL", "ADSL")), default_cdisc_keys[["ADSL"]]$primary) - expect_identical(unname(jk$get("ADTTE", "ADTTE")), default_cdisc_keys[["ADTTE"]]$primary) - - expect_identical(unname(jk$get("ADSL", "ADTTE")), default_cdisc_keys[["ADTTE"]]$foreign) - expect_identical(unname(jk$get("ADTTE", "ADSL")), default_cdisc_keys[["ADTTE"]]$foreign) -}) - -test_that("cdisc_join_keys will generate JoinKeys for character list", { - new_dataset <- cdisc_join_keys("ADSL", "ADTTE") - jk <- get_join_keys(new_dataset) - - expect_identical(unname(jk$get("ADSL", "ADSL")), default_cdisc_keys[["ADSL"]]$primary) - expect_identical(unname(jk$get("ADTTE", "ADTTE")), default_cdisc_keys[["ADTTE"]]$primary) - - expect_identical(unname(jk$get("ADSL", "ADTTE")), default_cdisc_keys[["ADTTE"]]$foreign) - expect_identical(unname(jk$get("ADTTE", "ADSL")), default_cdisc_keys[["ADTTE"]]$foreign) -}) - -test_that("cdisc_join_keys will generate JoinKeys for named list", { - new_dataset <- cdisc_join_keys(ADSL = rADSL, ADTTE = rADTTE) - jk <- get_join_keys(new_dataset) - - expect_identical(unname(jk$get("ADSL", "ADSL")), default_cdisc_keys[["ADSL"]]$primary) - expect_identical(unname(jk$get("ADTTE", "ADTTE")), default_cdisc_keys[["ADTTE"]]$primary) - - expect_identical(unname(jk$get("ADSL", "ADTTE")), default_cdisc_keys[["ADTTE"]]$foreign) - expect_identical(unname(jk$get("ADTTE", "ADSL")), default_cdisc_keys[["ADTTE"]]$foreign) -}) - -test_that("cdisc_join_keys will retrieve ADTTE primary and foreign keys", { - datasets <- names(default_cdisc_keys) - - internal_keys <- default_cdisc_keys[["ADTTE"]] - jk <- cdisc_join_keys("ADTTE") - primary_keys <- unname(jk$get("ADTTE", "ADTTE")) - - expect_equal(primary_keys, internal_keys$primary) - - foreign_keys <- unname(jk$get("ADTTE", internal_keys$parent)) - expect_equal(foreign_keys, internal_keys$foreign) -}) - -test_that("cdisc_join_keys will retrieve known primary and foreign keys", { - datasets <- names(default_cdisc_keys) - - vapply( - datasets, - function(.x) { - internal_keys <- default_cdisc_keys[[.x]] - jk <- cdisc_join_keys(.x) - primary_keys <- unname(jk$get(.x, .x)) - expect_equal(primary_keys, internal_keys$primary) - if (!is.null(internal_keys$foreign)) { - foreign_keys <- unname(jk$get(.x, internal_keys$parent)) - expect_equal(foreign_keys, internal_keys$foreign) - } - character(0) - }, - character(0) - ) -}) - -test_that("cdisc_join_keys will retrieve known primary keys", { - datasets <- names(default_cdisc_keys) - - vapply( - datasets, - function(.x) { - jk <- cdisc_join_keys(.x) - expect_equal(unname(jk[.x]), get_cdisc_keys(.x)) - character(0) - }, - character(0) - ) -}) - -test_that("cdisc_join_keys does nothing with TealDataset", { - adae_cf <- callable_function( - function() as.data.frame(as.list(setNames(nm = get_cdisc_keys("ADAE")))) - ) - adae_cdc <- cdisc_dataset_connector("ADAE", adae_cf, keys = get_cdisc_keys("ADAE")) - expect_length(get_join_keys(cdisc_join_keys(adae_cdc))$get(), 0) -}) - -test_that("[.JoinKeys returns the primary key if arguments only have 1 dataset", { - jk <- join_keys(join_key("ds1", keys = c("id"))) - - expect_failure(expect_identical(jk$get("ds1"), jk["ds1"])) - checkmate::expect_character(jk["ds1"]) -}) - -test_that("[.JoinKeys subsets relationship pair successfully", { - jk <- join_keys(join_key("ds1", keys = c("id"))) - - expect_identical(jk$get("ds1", "ds1"), jk["ds1"]) -}) - -test_that("[<-.JoinKeys assigns new relationship pair", { - jk <- join_keys(join_key("ds1", keys = c("id"))) - - expect_length(jk$get("ds1", "ds2"), 0) - - jk["ds1", "ds2"] <- c("id") - expect_identical(jk$get("ds1", "ds2"), c(id = "id")) - expect_identical(jk$get("ds1", "ds2"), jk["ds1", "ds2"]) -}) - -test_that("[<-.JoinKeys modifies existing relationship pair", { - jk <- join_keys(join_key("ds1", keys = c("id"))) - - jk["ds1", "ds1"] <- c("Species") - expect_failure(expect_identical(jk$get("ds1", "ds1"), c(id = "id"))) - expect_identical(jk$get("ds1", "ds1"), c(Species = "Species")) -}) - test_that("join_key throws error with invalid keys arguments", { # invalid types expect_error(join_key("d1", "d2", keys = NULL)) @@ -769,3 +650,122 @@ testthat::test_that("JoinKeys$check_parent_child throws error if no join_keys ex "No join keys from df2 to its parent \\(df1\\) and vice versa" ) }) + +test_that("cdisc_join_keys will generate JoinKeys for named list with non-named elements", { + new_dataset <- cdisc_join_keys("ADSL", ADTTE = rADTTE) + jk <- get_join_keys(new_dataset) + + expect_identical(unname(jk$get("ADSL", "ADSL")), default_cdisc_keys[["ADSL"]]$primary) + expect_identical(unname(jk$get("ADTTE", "ADTTE")), default_cdisc_keys[["ADTTE"]]$primary) + + expect_identical(unname(jk$get("ADSL", "ADTTE")), default_cdisc_keys[["ADTTE"]]$foreign) + expect_identical(unname(jk$get("ADTTE", "ADSL")), default_cdisc_keys[["ADTTE"]]$foreign) +}) + +test_that("cdisc_join_keys will generate JoinKeys for character list", { + new_dataset <- cdisc_join_keys("ADSL", "ADTTE") + jk <- get_join_keys(new_dataset) + + expect_identical(unname(jk$get("ADSL", "ADSL")), default_cdisc_keys[["ADSL"]]$primary) + expect_identical(unname(jk$get("ADTTE", "ADTTE")), default_cdisc_keys[["ADTTE"]]$primary) + + expect_identical(unname(jk$get("ADSL", "ADTTE")), default_cdisc_keys[["ADTTE"]]$foreign) + expect_identical(unname(jk$get("ADTTE", "ADSL")), default_cdisc_keys[["ADTTE"]]$foreign) +}) + +test_that("cdisc_join_keys will generate JoinKeys for named list", { + new_dataset <- cdisc_join_keys(ADSL = rADSL, ADTTE = rADTTE) + jk <- get_join_keys(new_dataset) + + expect_identical(unname(jk$get("ADSL", "ADSL")), default_cdisc_keys[["ADSL"]]$primary) + expect_identical(unname(jk$get("ADTTE", "ADTTE")), default_cdisc_keys[["ADTTE"]]$primary) + + expect_identical(unname(jk$get("ADSL", "ADTTE")), default_cdisc_keys[["ADTTE"]]$foreign) + expect_identical(unname(jk$get("ADTTE", "ADSL")), default_cdisc_keys[["ADTTE"]]$foreign) +}) + +test_that("cdisc_join_keys will retrieve ADTTE primary and foreign keys", { + datasets <- names(default_cdisc_keys) + + internal_keys <- default_cdisc_keys[["ADTTE"]] + jk <- cdisc_join_keys("ADTTE") + primary_keys <- unname(jk$get("ADTTE", "ADTTE")) + + expect_equal(primary_keys, internal_keys$primary) + + foreign_keys <- unname(jk$get("ADTTE", internal_keys$parent)) + expect_equal(foreign_keys, internal_keys$foreign) +}) + +test_that("cdisc_join_keys will retrieve known primary and foreign keys", { + datasets <- names(default_cdisc_keys) + + vapply( + datasets, + function(.x) { + internal_keys <- default_cdisc_keys[[.x]] + jk <- cdisc_join_keys(.x) + primary_keys <- unname(jk$get(.x, .x)) + expect_equal(primary_keys, internal_keys$primary) + if (!is.null(internal_keys$foreign)) { + foreign_keys <- unname(jk$get(.x, internal_keys$parent)) + expect_equal(foreign_keys, internal_keys$foreign) + } + character(0) + }, + character(0) + ) +}) + +test_that("cdisc_join_keys will retrieve known primary keys", { + datasets <- names(default_cdisc_keys) + + vapply( + datasets, + function(.x) { + jk <- cdisc_join_keys(.x) + expect_equal(unname(jk[.x]), get_cdisc_keys(.x)) + character(0) + }, + character(0) + ) +}) + +test_that("cdisc_join_keys does nothing with TealDataset", { + adae_cf <- callable_function( + function() as.data.frame(as.list(setNames(nm = get_cdisc_keys("ADAE")))) + ) + adae_cdc <- cdisc_dataset_connector("ADAE", adae_cf, keys = get_cdisc_keys("ADAE")) + expect_length(get_join_keys(cdisc_join_keys(adae_cdc))$get(), 0) +}) + +test_that("[.JoinKeys returns the primary key if arguments only have 1 dataset", { + jk <- join_keys(join_key("ds1", keys = c("id"))) + + expect_failure(expect_identical(jk$get("ds1"), jk["ds1"])) + checkmate::expect_character(jk["ds1"]) +}) + +test_that("[.JoinKeys subsets relationship pair successfully", { + jk <- join_keys(join_key("ds1", keys = c("id"))) + + expect_identical(jk$get("ds1", "ds1"), jk["ds1"]) +}) + +test_that("[<-.JoinKeys assigns new relationship pair", { + jk <- join_keys(join_key("ds1", keys = c("id"))) + + expect_length(jk$get("ds1", "ds2"), 0) + + jk["ds1", "ds2"] <- c("id") + expect_identical(jk$get("ds1", "ds2"), c(id = "id")) + expect_identical(jk$get("ds1", "ds2"), jk["ds1", "ds2"]) +}) + +test_that("[<-.JoinKeys modifies existing relationship pair", { + jk <- join_keys(join_key("ds1", keys = c("id"))) + + jk["ds1", "ds1"] <- c("Species") + expect_failure(expect_identical(jk$get("ds1", "ds1"), c(id = "id"))) + expect_identical(jk$get("ds1", "ds1"), c(Species = "Species")) +}) From 6f70c199397ebf3063b26cf78426b903097edb87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Tue, 24 Oct 2023 12:50:48 +0200 Subject: [PATCH 16/19] docs: update to deprecated_join_keys_extract --- man/deprecated_join_keys_extract.Rd | 3 +++ 1 file changed, 3 insertions(+) diff --git a/man/deprecated_join_keys_extract.Rd b/man/deprecated_join_keys_extract.Rd index 714fd370c..beae3c4c4 100644 --- a/man/deprecated_join_keys_extract.Rd +++ b/man/deprecated_join_keys_extract.Rd @@ -7,6 +7,9 @@ deprecated_join_keys_extract(data_objects, join_keys) } \description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +} +\details{ note: This function will be removed once the following classes are defunct: \code{TealDataConnector}, \code{TealDataset}, \code{TealDatasetConnector} } From 1e5531a9ac9d0baa29243f16c6cdf1e9b9871d42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Tue, 24 Oct 2023 13:32:23 +0200 Subject: [PATCH 17/19] minor: fix linter errors and problem with Rmd --- R/JoinKeys.R | 9 ++++++--- vignettes/join-keys.Rmd | 6 +++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/R/JoinKeys.R b/R/JoinKeys.R index 52d7da6d2..14757393f 100644 --- a/R/JoinKeys.R +++ b/R/JoinKeys.R @@ -301,8 +301,10 @@ JoinKeys <- R6::R6Class( # nolint return(TRUE) } - if (xor(length(join_key_1$keys) == 0, length(join_key_2$keys) == 0) || - !identical(sort(join_key_1$keys), sort(setNames(names(join_key_2$keys), join_key_2$keys)))) { + if ( + xor(length(join_key_1$keys) == 0, length(join_key_2$keys) == 0) || + !identical(sort(join_key_1$keys), sort(setNames(names(join_key_2$keys), join_key_2$keys))) + ) { error_message(join_key_1$dataset_1, join_key_1$dataset_2) } } @@ -422,7 +424,8 @@ cdisc_join_keys <- function(...) { } else if ( checkmate::test_class(item, "JoinKeySet") || !checkmate::test_string(name, min.chars = 1) || - !name %in% names(default_cdisc_keys)) { + !name %in% names(default_cdisc_keys) + ) { return(list(item)) } diff --git a/vignettes/join-keys.Rmd b/vignettes/join-keys.Rmd index a12a8dacc..0ee134bc2 100644 --- a/vignettes/join-keys.Rmd +++ b/vignettes/join-keys.Rmd @@ -52,9 +52,9 @@ data <- teal_data( dataset("D1", data_1, code = "D1 <- data.frame(X = factor(1:10), Y = 21:30, Z = 1:10)"), dataset("D2", data_2, code = "D2 <- data.frame(W = factor(10:1), V = factor(5:14), M = rep(1:5, 2))"), join_keys = join_keys( - join_key("D1", c("X")), - join_key("D2", c("V", "W")) - # join_key("D2", "D2", c("V", "W")), # equivalent to using primary_key + join_key("D1", keys = c("X")), + join_key("D2", keys = c("V", "W")) + # join_key("D2", "D2", keys = c("V", "W")), # equivalent to using primary_key ) ) data$get_join_keys() From 98eedbd4c4a1c9a2b497ec228400b547de9dc5cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Tue, 24 Oct 2023 15:25:09 +0200 Subject: [PATCH 18/19] docs: adds some extra roxygen tags --- R/JoinKeys.R | 4 ++++ R/get_join_keys.R | 3 +++ man/get_join_keys.Rd | 6 ++++++ man/sub-.JoinKeys.Rd | 9 +++++++++ 4 files changed, 22 insertions(+) diff --git a/R/JoinKeys.R b/R/JoinKeys.R index 14757393f..e0df2ab8c 100644 --- a/R/JoinKeys.R +++ b/R/JoinKeys.R @@ -372,6 +372,9 @@ join_keys <- function(...) { } #' @title Getter for JoinKeys that returns the relationship between pairs of datasets +#' @param x JoinKeys object to extract the join keys +#' @param dataset_1 (`character`) name of first dataset. +#' @param dataset_2 (`character`) name of second dataset. #' @export `[.JoinKeys` <- function(x, dataset_1, dataset_2 = NULL) { checkmate::assert_string(dataset_1) @@ -382,6 +385,7 @@ join_keys <- function(...) { } #' @rdname sub-.JoinKeys +#' @param value value to assign #' @export `[<-.JoinKeys` <- function(x, dataset_1, dataset_2 = NULL, value) { checkmate::assert_string(dataset_1) diff --git a/R/get_join_keys.R b/R/get_join_keys.R index cc50ead0d..09389e05a 100644 --- a/R/get_join_keys.R +++ b/R/get_join_keys.R @@ -19,12 +19,15 @@ get_join_keys.JoinKeys <- function(data) { } #' @rdname get_join_keys +#' @inheritParams mutate_join_keys +#' @param value value to assign #' @export `get_join_keys<-` <- function(data, dataset_1, dataset_2 = NULL, value) { UseMethod("get_join_keys<-", data) } #' @rdname get_join_keys +#' @inheritParams mutate_join_keys #' @export `get_join_keys<-.JoinKeys` <- function(data, dataset_1, dataset_2 = NULL, value) { data diff --git a/man/get_join_keys.Rd b/man/get_join_keys.Rd index 169589bde..046ce0281 100644 --- a/man/get_join_keys.Rd +++ b/man/get_join_keys.Rd @@ -23,6 +23,12 @@ get_join_keys(data, dataset_1, dataset_2 = NULL) <- value } \arguments{ \item{data}{`` - object to extract the join keys} + +\item{dataset_1}{(\code{character}) one dataset name} + +\item{dataset_2}{(\code{character}) other dataset name} + +\item{value}{value to assign} } \value{ Either \code{JoinKeys} object or \code{NULL} if no join keys diff --git a/man/sub-.JoinKeys.Rd b/man/sub-.JoinKeys.Rd index 96e0e4a33..b6649c8c0 100644 --- a/man/sub-.JoinKeys.Rd +++ b/man/sub-.JoinKeys.Rd @@ -9,6 +9,15 @@ \method{[}{JoinKeys}(x, dataset_1, dataset_2 = NULL) <- value } +\arguments{ +\item{x}{JoinKeys object to extract the join keys} + +\item{dataset_1}{(\code{character}) name of first dataset.} + +\item{dataset_2}{(\code{character}) name of second dataset.} + +\item{value}{value to assign} +} \description{ Getter for JoinKeys that returns the relationship between pairs of datasets } From 45e57e8ce5d3c62b91c4572a024c3bbe3138ca8a Mon Sep 17 00:00:00 2001 From: go_gonzo Date: Wed, 25 Oct 2023 04:42:36 +0200 Subject: [PATCH 19/19] reduce code in cdisc_join_keys --- R/JoinKeys.R | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/R/JoinKeys.R b/R/JoinKeys.R index e0df2ab8c..388471d2d 100644 --- a/R/JoinKeys.R +++ b/R/JoinKeys.R @@ -409,44 +409,35 @@ join_keys <- function(...) { cdisc_join_keys <- function(...) { data_objects <- list(...) - data_objects_parsed <- lapply(seq_along(data_objects), function(ix) { + join_keys <- join_keys() + lapply(seq_along(data_objects), function(ix) { item <- data_objects[[ix]] - name <- names(data_objects)[ix] - if (is.null(name) || identical(trimws(name), "")) name <- item # fallback to value if names are not set - if ( - checkmate::test_r6(item) && - checkmate::test_multi_class( - item, - classes = c("TealDataConnector", "TealDataset", "TealDatasetConnector") - ) - ) { - # Code not refactored for these data types as they'll be deprecated soon - # see logic in function `deprecated_join_keys_extract` called under `cdisc_data` + if ((is.null(name) || identical(trimws(name), "")) && is.character(item)) { + name <- item + } else if (checkmate::test_class(item, "JoinKeySet")) { + join_keys$set(item) return(NULL) } else if ( - checkmate::test_class(item, "JoinKeySet") || - !checkmate::test_string(name, min.chars = 1) || - !name %in% names(default_cdisc_keys) + checkmate::test_multi_class(item, c("TealDataConnector", "TealDataset", "TealDatasetConnector")) ) { - return(list(item)) + return(NULL) } - # Add primary key - result <- list(join_key(name, keys = get_cdisc_keys(name))) - keys_list <- default_cdisc_keys[[name]] + if (name %in% names(default_cdisc_keys)) { + # Set default primary keys + keys_list <- default_cdisc_keys[[name]] + join_keys[name] <- keys_list$primary - if (is.null(keys_list) || is.null(keys_list$parent) || is.null(keys_list$foreign)) { - return(result) + if (!is.null(keys_list$parent) && !is.null(keys_list$foreign)) { + join_keys[name, keys_list$parent] <- keys_list$foreign + } } - # Add JoinKey with parent dataset (if exists) - append(result, list(join_key(name, keys_list$parent, keys = keys_list$foreign))) - }) - data_objects_parsed <- do.call(c, data_objects_parsed) + }) - do.call(join_keys, as.list(data_objects_parsed[!is.null(data_objects_parsed)])) + join_keys } # wrappers ====