Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplifies data constructor for primary keys in join_keys #179

Merged
merged 19 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Generated by roxygen2: do not edit by hand

S3method("[",JoinKeys)
S3method("[<-",JoinKeys)
S3method("get_join_keys<-",JoinKeys)
S3method("get_join_keys<-",teal_data)
S3method(as_cdisc,TealDataset)
S3method(as_cdisc,TealDatasetConnector)
S3method(dataset,MultiAssayExperiment)
Expand All @@ -22,7 +26,7 @@ S3method(get_datasets,TealDataAbstract)
S3method(get_datasets,TealDataset)
S3method(get_datasets,TealDatasetConnector)
S3method(get_datasets,teal_data)
S3method(get_join_keys,TealData)
S3method(get_join_keys,JoinKeys)
S3method(get_join_keys,teal_data)
S3method(get_key_duplicates,TealDataset)
S3method(get_key_duplicates,data.frame)
Expand Down Expand Up @@ -60,6 +64,7 @@ S3method(to_relational_data,data.frame)
S3method(to_relational_data,list)
export("col_labels<-")
export("data_label<-")
export("get_join_keys<-")
export(as_cdisc)
export(callable_code)
export(callable_function)
Expand All @@ -70,6 +75,7 @@ export(cdisc_dataset)
export(cdisc_dataset_connector)
export(cdisc_dataset_connector_file)
export(cdisc_dataset_file)
export(cdisc_join_keys)
export(code_cdisc_dataset_connector)
export(code_dataset_connector)
export(col_labels)
Expand Down Expand Up @@ -125,5 +131,6 @@ import(shiny)
import(teal.code)
importFrom(digest,digest)
importFrom(logger,log_trace)
importFrom(rlang,"%||%")
importFrom(shinyjs,useShinyjs)
importFrom(stats,setNames)
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
# teal.data 0.3.0.9007

### Enhancements
* Simplified `join_key` to better support primary keys.
* Added subset and subset-assignment to `JoinKeySet` class to manipulate relationship pair keys _(`[` and `[<-`)_.

### Breaking changes

* Introduced new data class (`teal_data`) which replaces deprecated `TealData`. New data class becomes a standard input for whole `teal` framework.
* Deprecated `teal_data` constructor when `TealDataset`, `TealDatasetConnector` or `TealDataConnector` objects are provided. New delayed data loading functions introduced in `teal` package.
=======

### Miscellaneous
* Specified minimal version of package dependencies.
Expand Down
1 change: 0 additions & 1 deletion R/CallableFunction.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ CallableFunction <- R6::R6Class( # nolint
# @return nothing
refresh = function() {
if (!is.null(private$fun_name) || !identical(private$fun_name, character(0))) {

# replaced str2lang found at:
# https://rlang.r-lib.org/reference/call2.html
private$call <- as.call(
Expand Down
88 changes: 82 additions & 6 deletions R/JoinKeys.R
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,10 @@ JoinKeys <- R6::R6Class( # nolint
return(TRUE)
}

if (xor(length(join_key_1$keys) == 0, length(join_key_2$keys) == 0) ||
!identical(sort(join_key_1$keys), sort(setNames(names(join_key_2$keys), join_key_2$keys)))) {
if (
xor(length(join_key_1$keys) == 0, length(join_key_2$keys) == 0) ||
!identical(sort(join_key_1$keys), sort(setNames(names(join_key_2$keys), join_key_2$keys)))
) {
error_message(join_key_1$dataset_1, join_key_1$dataset_2)
}
}
Expand Down Expand Up @@ -361,7 +363,6 @@ JoinKeys <- R6::R6Class( # nolint
#' )
join_keys <- function(...) {
x <- list(...)

res <- JoinKeys$new()
if (length(x) > 0) {
res$set(x)
Expand All @@ -370,6 +371,75 @@ join_keys <- function(...) {
res
}

#' @title Getter for JoinKeys that returns the relationship between pairs of datasets
#' @param x JoinKeys object to extract the join keys
#' @param dataset_1 (`character`) name of first dataset.
#' @param dataset_2 (`character`) name of second dataset.
#' @export
`[.JoinKeys` <- function(x, dataset_1, dataset_2 = NULL) {
checkmate::assert_string(dataset_1)
checkmate::assert_string(dataset_2, null.ok = TRUE)

dataset_2 <- dataset_2 %||% dataset_1
x$get(dataset_1, dataset_2)
}

#' @rdname sub-.JoinKeys
#' @param value value to assign
#' @export
`[<-.JoinKeys` <- function(x, dataset_1, dataset_2 = NULL, value) {
checkmate::assert_string(dataset_1)
checkmate::assert_string(dataset_2, null.ok = TRUE)

dataset_2 <- dataset_2 %||% dataset_1
x$mutate(dataset_1, dataset_2, value)
x
}

#' @rdname join_keys
#' @details
#' `cdisc_join_keys` treat non-`JoinKeySet` arguments as possible CDISC datasets.
#' The `dataname` is extrapolated from the name (or fallback to the value itself if
#' it's a `character(1)`).
#'
#' @export
#' @examples
#' cdisc_join_keys(join_key("dataset_A", "dataset_B", c("col_1" = "col_a")), "ADTTE")
#'
cdisc_join_keys <- function(...) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just realized that we need cdisc_join_keys also in ddl constructor. It means that we won't have a named list of object - we can only have names of datasets.

ddl <- function(code, ui, server, input_mask, datanames, join_keys = cdisc_join_keys(datanames))

It means we need a cdisc_join_keys function which can just accept names of datasets. When I look at the function body, name (datanames) is exactly what this function needs. So I think that maybe we should have datanames argument instead of ...? And do following

cdisc_data(..., join_keys = cdisc_join_keys(names(...))

In above, in case of old-classes list will be unnamed so function will return NULL.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the function actually takes strings in ... and parses them as if they are named lists.. the following calls are valid (as of now)

# Identical output for all 3 calls
cdisc_join_keys(ADSL = rADSL, ADTTE = rADTTE)
cdisc_join_keys("ADSL", ADTTE = rADTTE)
cdisc_join_keys("ADSL", "ADTTE")

But we can also use your suggestion: cdisc_data(..., join_keys = cdisc_join_keys(names(...))

WDYT?

data_objects <- list(...)

join_keys <- join_keys()
lapply(seq_along(data_objects), function(ix) {
item <- data_objects[[ix]]
name <- names(data_objects)[ix]

if ((is.null(name) || identical(trimws(name), "")) && is.character(item)) {
name <- item
} else if (checkmate::test_class(item, "JoinKeySet")) {
join_keys$set(item)
return(NULL)
} else if (
checkmate::test_multi_class(item, c("TealDataConnector", "TealDataset", "TealDatasetConnector"))
) {
return(NULL)
}

if (name %in% names(default_cdisc_keys)) {
# Set default primary keys
keys_list <- default_cdisc_keys[[name]]
join_keys[name] <- keys_list$primary

if (!is.null(keys_list$parent) && !is.null(keys_list$foreign)) {
join_keys[name, keys_list$parent] <- keys_list$foreign
}
}

})

join_keys
}

# wrappers ====
#' Mutate `JoinKeys` with a new values
#'
Expand Down Expand Up @@ -425,12 +495,16 @@ mutate_join_keys.TealData <- function(x, dataset_1, dataset_2, val) { # nolint
x$mutate_join_keys(dataset_1, dataset_2, val)
}


#' Create a relationship between a pair of datasets
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @details `join_key()` will create a relationship for the variables on a pair
#' of datasets.
#'
#' @inheritParams mutate_join_keys
#' @param dataset_2 (`character`) other dataset name. In case it is omitted, then it
#' will create a primary key for `dataset_1`.
#' @param keys (optionally named `character`) where `names(keys)` are columns in `dataset_1`
#' with relationship to columns of `dataset_2` given by the elements in `keys`.
#' If `names(keys)` is `NULL` then the same column names are used for both `dataset_1`
Expand All @@ -441,11 +515,13 @@ mutate_join_keys.TealData <- function(x, dataset_1, dataset_2, val) { # nolint
#' @seealso [join_keys()]
#'
#' @export
join_key <- function(dataset_1, dataset_2, keys) {
join_key <- function(dataset_1, dataset_2 = NULL, keys) {
checkmate::assert_string(dataset_1)
checkmate::assert_string(dataset_2)
checkmate::assert_string(dataset_2, null.ok = TRUE)
checkmate::assert_character(keys, any.missing = FALSE)

dataset_2 <- dataset_2 %||% dataset_1

if (length(keys) > 0) {
if (is.null(names(keys))) {
names(keys) <- keys
Expand Down
84 changes: 40 additions & 44 deletions R/cdisc_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,29 +33,38 @@
#' )
#' )
cdisc_data <- function(...,
join_keys = teal.data::join_keys(),
join_keys = teal.data::cdisc_join_keys(...),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At this moment cdisc_join_keys() doesn't work when ... are old class types. When object in ... is of class TealDataset or TealDatasetConnector or TealDataConnector function fails.

Previously join_keys default was just empty JoinKeys object which was refilled inside cdisc_data function body.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for this, as we discussed in the call I've added this and refactored the code to remove repetitive code in cdisc_data

code = "",
check = FALSE) {
data_objects <- list(...)
deprecated_join_keys_extract(data_objects, join_keys)
teal_data(..., join_keys = join_keys, code = code, check = check)
}

# todo: is it really important? - to remove
if (inherits(join_keys, "JoinKeySet")) {
join_keys <- teal.data::join_keys(join_keys)
}

#' Extrapolate parents from `TealData` classes
#'
#' `r lifecycle::badge("deprecated")`
#'
#' note: This function will be removed once the following classes are defunct:
#' `TealDataConnector`, `TealDataset`, `TealDatasetConnector`
#'
#' @keywords internal
deprecated_join_keys_extract <- function(data_objects, join_keys) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need to add this condition to skip deprecated_join_keys_extract in case you provide named list of objects into ...

Suggested change
deprecated_join_keys_extract <- function(data_objects, join_keys) {
deprecated_join_keys_extract <- function(data_objects, join_keys) {
if (checkmate::test_list(data_objects, types = c("TealDataConnector", "TealDataset", "TealDatasetConnector"))) {
return(NULL)
}

Copy link
Contributor Author

@averissimo averissimo Oct 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 Thanks! I noticed this too when testing. Added on 9630699 (negated though)

  if (
    !checkmate::test_list(
      data_objects,
      types = c("TealDataConnector", "TealDataset", "TealDatasetConnector")
    )
  ) {
    return(join_keys)
  }

if (
checkmate::test_list(data_objects, types = c("TealDataConnector", "TealDataset", "TealDatasetConnector"))
) {
lifecycle::deprecate_warn(
when = "0.3.1",
"cdisc_data(
data_objects = 'should use data directly. Using TealDatasetConnector and TealDataset is deprecated.'
)"
!checkmate::test_list(
data_objects,
types = c("TealDataConnector", "TealDataset", "TealDatasetConnector")
)
update_join_keys_to_primary(data_objects, join_keys)
) {
return(join_keys)
}
# TODO: check if redundant with same call in teal_data body
update_join_keys_to_primary(data_objects, join_keys)

new_parents_fun <- function(data_objects) {
lapply(data_objects, function(x) {
new_parents_fun <- function(data_objects) {
lapply(
data_objects,
function(x) {
if (inherits(x, "TealDataConnector")) {
unlist(new_parents_fun(x$get_items()), recursive = FALSE)
} else {
Expand All @@ -66,40 +75,27 @@ cdisc_data <- function(...,
)
)
}
})
}

new_parents <- unlist(new_parents_fun(data_objects), recursive = FALSE)

names(new_parents) <- unlist(lapply(data_objects, function(x) {
if (inherits(x, "TealDataConnector")) {
lapply(x$get_items(), function(y) y$get_dataname())
} else {
x$get_datanames()
}
}))

if (is_dag(new_parents)) {
stop("Cycle detected in a parent and child dataset graph.")
}
join_keys$set_parents(new_parents)
join_keys$update_keys_given_parents()
)
}

x <- TealData$new(..., check = check, join_keys = join_keys)
new_parents <- unlist(new_parents_fun(data_objects), recursive = FALSE)

if (length(code) > 0 && !identical(code, "")) {
x$set_pull_code(code = code)
names(new_parents) <- unlist(lapply(data_objects, function(x) {
if (inherits(x, "TealDataConnector")) {
lapply(x$get_items(), function(y) y$get_dataname())
} else {
x$get_datanames()
}
}))

x$check_reproducibility()
x$check_metadata()
x
} else {
if (!checkmate::test_names(names(data_objects), type = "named")) {
stop("Dot (`...`) arguments on `teal_data()` must be named.")
}
new_teal_data(data = data_objects, code = code, keys = join_keys)
if (is_dag(new_parents)) {
stop("Cycle detected in a parent and child dataset graph.")
}
join_keys$set_parents(new_parents)
join_keys$update_keys_given_parents()

join_keys
}

#' Load `TealData` object from a file
Expand Down
26 changes: 23 additions & 3 deletions R/get_join_keys.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ get_join_keys <- function(data) {
UseMethod("get_join_keys", data)
}


#' @rdname get_join_keys
#' @export
get_join_keys.teal_data <- function(data) {
Expand All @@ -15,6 +14,27 @@ get_join_keys.teal_data <- function(data) {

#' @rdname get_join_keys
#' @export
get_join_keys.TealData <- function(data) {
data$get_join_keys()
get_join_keys.JoinKeys <- function(data) {
data
}

#' @rdname get_join_keys
#' @inheritParams mutate_join_keys
#' @param value value to assign
#' @export
`get_join_keys<-` <- function(data, dataset_1, dataset_2 = NULL, value) {
UseMethod("get_join_keys<-", data)
}

#' @rdname get_join_keys
#' @inheritParams mutate_join_keys
#' @export
`get_join_keys<-.JoinKeys` <- function(data, dataset_1, dataset_2 = NULL, value) {
data
}

#' @rdname get_join_keys
#' @export
`get_join_keys<-.teal_data` <- function(data, dataset_1, dataset_2 = NULL, value) {
data
}
1 change: 1 addition & 0 deletions R/teal.data.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

# Fix R CMD check notes
#' @import shiny
#' @importFrom rlang %||%
#' @importFrom digest digest
#' @importFrom stats setNames
#' @importFrom shinyjs useShinyjs
Expand Down
14 changes: 8 additions & 6 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
CDISC
Forkers
Hoffmann
Pre
Reproducibility
SCDA
UI
cloneable
Forkers
formatters
funder
Getter
Hoffmann
iteratively
JoinKeys
Pre
pre
repo
Reproducibility
reproducibility
SCDA
UI
7 changes: 6 additions & 1 deletion man/cdisc_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading