diff --git a/DESCRIPTION b/DESCRIPTION index 9a97061c6..14a406fe3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: teal.data Title: Data Model for 'teal' Applications -Version: 0.6.0.9016 +Version: 0.6.0.9017 Date: 2024-11-08 Authors@R: c( person("Dawid", "Kaledkowski", , "dawid.kaledkowski@roche.com", role = c("aut", "cre"), @@ -72,10 +72,11 @@ Collate: 'join_keys.R' 'teal.data.R' 'teal_data-class.R' + 'teal_data-constructor.R' + 'teal_data-extract.R' 'teal_data-get_code.R' 'teal_data-names.R' 'teal_data-show.R' - 'teal_data.R' 'testhat-helpers.R' 'topological_sort.R' 'verify.R' diff --git a/NAMESPACE b/NAMESPACE index a126ccc47..df8555991 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand S3method("[",join_keys) +S3method("[",teal_data) S3method("[<-",join_keys) S3method("[[<-",join_keys) S3method("join_keys<-",join_keys) diff --git a/NEWS.md b/NEWS.md index 0ab6238ad..765092fb9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# teal.data 0.6.0.9016 +# teal.data 0.6.0.9017 ### Breaking changes diff --git a/R/join_keys-extract.R b/R/join_keys-extract.R index 46e2c73b1..a533c5045 100644 --- a/R/join_keys-extract.R +++ b/R/join_keys-extract.R @@ -2,9 +2,9 @@ #' @order 2 #' #' @section Functions: -#' - `x[datanames]`: Returns a subset of the `join_keys` object for -#' given `datanames`, including parent `datanames` and symmetric mirror keys between -#' `datanames` in the result. +#' - `x[names]`: Returns a subset of the `join_keys` object for +#' given `names`, including parent `names` and symmetric mirror keys between +#' `names` in the result. #' - `x[i, j]`: Returns join keys between datasets `i` and `j`, #' including implicit keys inferred from their relationship with a parent. #' diff --git a/R/teal_data-class.R b/R/teal_data-class.R index aea827319..85ce9e3bc 100644 --- a/R/teal_data-class.R +++ b/R/teal_data-class.R @@ -18,15 +18,9 @@ setOldClass("join_keys") #' auxiliary variables. #' Access variables with [get()], [`$`], [get_var()] or [`[[`]. #' No setter provided. Evaluate code to add variables into `@.xData`. -#' @slot code (`character`) vector representing code necessary to reproduce the -#' contents of `@.xData`. +#' @slot code (`list` of `character`) representing code necessary to reproduce the contents of `qenv`. #' Access with [get_code()]. #' No setter provided. Evaluate code to append code to the slot. -#' @slot id (`integer`) random identifier assigned to each element of `@code`. -#' Used internally. -#' @slot warnings (`character`) vector of warnings raised when evaluating code. -#' Access with [get_warnings()]. -#' @slot messages (`character`) vector of messages raised when evaluating code. #' @slot join_keys (`join_keys`) object specifying joining keys for data sets in #' `@.xData`. #' Access or modify with [join_keys()]. @@ -34,6 +28,15 @@ setOldClass("join_keys") #' proven to yield contents of `@.xData`. #' Used internally. See [`verify()`] for more details. #' +#' @section Code: +#' +#' Each code element is a character representing one call. Each element has possible attributes: +#' - `warnings` (`character`) the warnings output when evaluating the code element +#' - `messages` (`character`) the messages output when evaluating the code element +#' - `id (`integer`) random identifier of the code element to make sure uniqueness when joining +#' - `dependency` (`character`) names of objects that appear in this call and gets affected by this call, +#' separated by `<-` (objects on LHS of `<-` are affected by this line, and objects on RHS are affecting this line) +#' #' @import teal.code #' @keywords internal setClass( @@ -54,7 +57,7 @@ setClass( setMethod( "initialize", "teal_data", - function(.Object, .xData = list(), join_keys = join_keys(), ...) { # nolint: object_name. + function(.Object, .xData = list(), join_keys = join_keys(), code = list(), ...) { # nolint: object_name. # Allow .xData to be a list and convert it to an environment if (!missing(.xData) && inherits(.xData, "list")) { .xData <- rlang::env_clone(list2env(.xData), parent = parent.env(.GlobalEnv)) # nolint: object_name. @@ -64,12 +67,57 @@ setMethod( checkmate::assert_environment(.xData) checkmate::assert_class(join_keys, "join_keys") checkmate::assert_list(args, names = "named") + if (!any(is.language(code), is.character(code))) { + stop("`code` must be a character or language object.") + } + + if (is.language(code)) { + code <- paste(lang2calls(code), collapse = "\n") + } + if (length(code)) { + code <- paste(code, collapse = "\n") + } + methods::callNextMethod( .Object, .xData, join_keys = join_keys, verified = (length(args$code) == 0L && length(.xData) == 0L), + code = code2list(code), ... ) } ) + +#' Reshape code to the list +#' +#' List will be divided by the calls. Each element of the list contains `id` and `dependency` attributes. +#' +#' @param code `character` with the code. +#' +#' @return list of `character`s of the length equal to the number of calls in `code`. +#' +#' @keywords internal +#' @noRd +code2list <- function(code) { + checkmate::assert_character(code, null.ok = TRUE) + if (length(code) == 0) { + return(list()) + } + + parsed_code <- parse(text = code, keep.source = TRUE) + + if (length(parsed_code)) { + lapply(split_code(code), function(current_code) { + attr(current_code, "id") <- sample.int(.Machine$integer.max, 1) + parsed_code <- parse(text = current_code, keep.source = TRUE) + attr(current_code, "dependency") <- extract_dependency(parsed_code) + current_code + }) + } else { + # empty code like "", or just comments + attr(code, "id") <- sample.int(.Machine$integer.max, size = 1) + attr(code, "dependency") <- extract_dependency(parsed_code) # in case comment contains @linksto tag + list(code) + } +} diff --git a/R/teal_data.R b/R/teal_data-constructor.R similarity index 100% rename from R/teal_data.R rename to R/teal_data-constructor.R diff --git a/R/teal_data-extract.R b/R/teal_data-extract.R new file mode 100644 index 000000000..dfa719a5c --- /dev/null +++ b/R/teal_data-extract.R @@ -0,0 +1,30 @@ +#' +#' @section Subsetting: +#' `x[names]` subsets objects in `teal_data` environment and limit the code to the necessary needed to build limited +#' objects. +#' +#' @param names (`character`) names of objects included in `teal_subset` to subset +#' @param x (`teal_data`) +#' +#' @examples +#' +#' # Subsetting +#' data <- teal_data() +#' data <- eval_code(data, "a <- 1;b<-2") +#' data["a"] +#' data[c("a", "b")] +#' +#' join_keys(data) <- join_keys(join_key("a", "b", "x")) +#' join_keys(data["a"]) # should show empty keys +#' join_keys(data["b"]) +#' join_keys(data)["a"] # should show empty keys +#' join_keys(data)["b"] +#' +#' @rdname teal_data +#' +#' @export +`[.teal_data` <- function(x, names) { + x <- NextMethod("`[`", x, check_code_names = x@verified) # unverified doesn't need warning for code inconsistency + x@join_keys <- x@join_keys[names] + x +} diff --git a/R/teal_data-get_code.R b/R/teal_data-get_code.R index 23002590d..52c237a71 100644 --- a/R/teal_data-get_code.R +++ b/R/teal_data-get_code.R @@ -121,7 +121,7 @@ setMethod("get_code", } if (!is.null(names) && lifecycle::is_present(datanames)) { - stop("Please use either 'names' (recommended) or 'datanames' parameter.") + stop("'names' shouldn't be specified with deprecated 'datanames' parameter.") } checkmate::assert_character(names, min.len = 1L, null.ok = TRUE) diff --git a/R/teal_data-names.R b/R/teal_data-names.R index 32e9d71de..1c28ccd56 100644 --- a/R/teal_data-names.R +++ b/R/teal_data-names.R @@ -24,18 +24,18 @@ names.teal_data <- function(x) { # Sorting from `ls` can be safely done as environments don't have any order # nor support numeric-index subsetting envir <- as.environment(x) - .get_sorted_names(ls(envir = envir), join_keys(x), envir) + .get_sorted_names(names = ls(envir = envir), join_keys = join_keys(x), env = envir) } #' @export length.teal.data <- function(x) length(ls(x)) #' @keywords internal -.get_sorted_names <- function(datanames, join_keys, env) { - child_parent <- sapply(datanames, parent, x = join_keys, USE.NAMES = TRUE, simplify = FALSE) +.get_sorted_names <- function(names, join_keys, env) { + child_parent <- sapply(names, parent, x = join_keys, USE.NAMES = TRUE, simplify = FALSE) union( intersect(unlist(topological_sort(child_parent)), ls(env, all.names = TRUE)), - datanames + names ) } diff --git a/R/zzz.R b/R/zzz.R index b1dfb9432..7f78c9dff 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,2 +1,4 @@ # use non-exported function from teal.code lang2calls <- getFromNamespace("lang2calls", "teal.code") +extract_dependency <- getFromNamespace("extract_dependency", "teal.code") +split_code <- getFromNamespace("split_code", "teal.code") diff --git a/man/join_keys.Rd b/man/join_keys.Rd index bc1e9a15f..df13905bf 100644 --- a/man/join_keys.Rd +++ b/man/join_keys.Rd @@ -101,9 +101,9 @@ in a parent-child relationship and the mapping is automatically mirrored between \section{Functions}{ \itemize{ -\item \code{x[datanames]}: Returns a subset of the \code{join_keys} object for -given \code{datanames}, including parent \code{datanames} and symmetric mirror keys between -\code{datanames} in the result. +\item \code{x[names]}: Returns a subset of the \code{join_keys} object for +given \code{names}, including parent \code{names} and symmetric mirror keys between +\code{names} in the result. \item \code{x[i, j]}: Returns join keys between datasets \code{i} and \code{j}, including implicit keys inferred from their relationship with a parent. } diff --git a/man/teal_data-class.Rd b/man/teal_data-class.Rd index 707029e66..86da5a14c 100644 --- a/man/teal_data-class.Rd +++ b/man/teal_data-class.Rd @@ -23,19 +23,10 @@ auxiliary variables. Access variables with \code{\link[=get]{get()}}, \code{\link{$}}, \code{\link[=get_var]{get_var()}} or [\code{[[}]. No setter provided. Evaluate code to add variables into \verb{@.xData}.} -\item{\code{code}}{(\code{character}) vector representing code necessary to reproduce the -contents of \verb{@.xData}. +\item{\code{code}}{(\code{list} of \code{character}) representing code necessary to reproduce the contents of \code{qenv}. Access with \code{\link[=get_code]{get_code()}}. No setter provided. Evaluate code to append code to the slot.} -\item{\code{id}}{(\code{integer}) random identifier assigned to each element of \verb{@code}. -Used internally.} - -\item{\code{warnings}}{(\code{character}) vector of warnings raised when evaluating code. -Access with \code{\link[=get_warnings]{get_warnings()}}.} - -\item{\code{messages}}{(\code{character}) vector of messages raised when evaluating code.} - \item{\code{join_keys}}{(\code{join_keys}) object specifying joining keys for data sets in \verb{@.xData}. Access or modify with \code{\link[=join_keys]{join_keys()}}.} @@ -45,4 +36,17 @@ proven to yield contents of \verb{@.xData}. Used internally. See \code{\link[=verify]{verify()}} for more details.} }} +\section{Code}{ + + +Each code element is a character representing one call. Each element has possible attributes: +\itemize{ +\item \code{warnings} (\code{character}) the warnings output when evaluating the code element +\item \code{messages} (\code{character}) the messages output when evaluating the code element +\item \verb{id (}integer`) random identifier of the code element to make sure uniqueness when joining +\item \code{dependency} (\code{character}) names of objects that appear in this call and gets affected by this call, +separated by \verb{<-} (objects on LHS of \verb{<-} are affected by this line, and objects on RHS are affecting this line) +} +} + \keyword{internal} diff --git a/man/teal_data.Rd b/man/teal_data.Rd index b249aece4..c55e2a0d6 100644 --- a/man/teal_data.Rd +++ b/man/teal_data.Rd @@ -1,10 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/teal_data.R +% Please edit documentation in R/teal_data-constructor.R, R/teal_data-extract.R \name{teal_data} \alias{teal_data} +\alias{[.teal_data} \title{Comprehensive data integration function for \code{teal} applications} \usage{ teal_data(..., join_keys = teal.data::join_keys(), code = character(0), check) + +\method{[}{teal_data}(x, names) } \arguments{ \item{...}{any number of objects (presumably data objects) provided as \code{name = value} pairs.} @@ -18,6 +21,10 @@ Note this code is not executed and the \code{teal_data} may not be reproducible} \item{check}{(\code{logical}) \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Use \code{\link[=verify]{verify()}} to verify code reproducibility .} + +\item{x}{(\code{teal_data})} + +\item{names}{(\code{character}) names of objects included in \code{teal_subset} to subset} } \value{ A \code{teal_data} object. @@ -27,7 +34,26 @@ A \code{teal_data} object. Universal function to pass data to teal application. } +\section{Subsetting}{ + +\code{x[names]} subsets objects in \code{teal_data} environment and limit the code to the necessary needed to build limited +objects. +} + \examples{ teal_data(x1 = iris, x2 = mtcars) + +# Subsetting +data <- teal_data() +data <- eval_code(data, "a <- 1;b<-2") +data["a"] +data[c("a", "b")] + +join_keys(data) <- join_keys(join_key("a", "b", "x")) +join_keys(data["a"]) # should show empty keys +join_keys(data["b"]) +join_keys(data)["a"] # should show empty keys +join_keys(data)["b"] + } diff --git a/tests/testthat/test-extract.R b/tests/testthat/test-extract.R new file mode 100644 index 000000000..0541b9685 --- /dev/null +++ b/tests/testthat/test-extract.R @@ -0,0 +1,20 @@ +testthat::test_that("`[.` subsets join_keys also", { + data <- teal_data(a = 1, b = 2) + join_keys(data) <- join_keys(join_key("a", "b", "x")) + testthat::expect_length(join_keys(data["a"]), 0) +}) + +testthat::test_that("`[.` preserves @verified field", { + testthat::expect_false(teal_data(a = 1, b = 2)["a"]@verified) + testthat::expect_true(within(teal_data(), a <- 1)["a"]@verified) +}) + +testthat::test_that("`[.` warns and subsets if names are present in code", { + data <- teal_data(a = 1, b = 2, code = "a <- 1; b <- 2; c <- 3; d <- 4") + testthat::expect_warning( + subset <- data[c("a", "c", "d")], + "Some 'names' do not exist in the environment of the 'teal_data'. Skipping those: c, d." + ) + testthat::expect_identical(subset@code, data@code[c(1, 3, 4)]) + testthat::expect_identical(as.list(subset), as.list(data)[1]) +}) diff --git a/tests/testthat/test-teal_data.R b/tests/testthat/test-teal_data.R index a7d715d3e..268bf11c3 100644 --- a/tests/testthat/test-teal_data.R +++ b/tests/testthat/test-teal_data.R @@ -2,11 +2,8 @@ testthat::test_that("teal_data allows to initialize empty teal_data object", { testthat::expect_s4_class(teal_data(), "teal_data") }) -testthat::test_that("empty teal_data returns empty code, id, wartnings and messages and verified=TRUE", { - testthat::expect_identical(teal_data()@code, character(0)) - testthat::expect_identical(teal_data()@id, integer(0)) - testthat::expect_identical(teal_data()@messages, character(0)) - testthat::expect_identical(teal_data()@warnings, character(0)) +testthat::test_that("empty teal_data returns empty code and verified=TRUE", { + testthat::expect_identical(teal_data()@code, list()) testthat::expect_identical(teal_data()@verified, TRUE) }) @@ -71,16 +68,16 @@ testthat::test_that("teal_data accepts code as language", { testthat::test_that("teal_data code unfolds code-block wrapped in '{'", { testthat::expect_identical( - teal_data(iris1 = iris, code = quote({ + get_code(teal_data(iris1 = iris, code = quote({ iris1 <- iris - }))@code, + }))), "iris1 <- iris" ) }) testthat::test_that("teal_data code is concatenated into single string", { testthat::expect_identical( - teal_data(iris1 = iris, code = c("iris1 <- iris", "iris2 <- iris1"))@code, + get_code(teal_data(iris1 = iris, code = c("iris1 <- iris", "iris2 <- iris1"))), "iris1 <- iris\niris2 <- iris1" ) }) diff --git a/tests/testthat/test-verify.R b/tests/testthat/test-verify.R index 4e15fb696..f80537895 100644 --- a/tests/testthat/test-verify.R +++ b/tests/testthat/test-verify.R @@ -20,7 +20,7 @@ testthat::test_that("verify returns the same object with changed @verified field testthat::expect_identical(teal.code::get_env(tdata2_ver), teal.code::get_env(tdata2)) }) -testthat::test_that("verify raises error if @code does not restore objects in @env", { +testthat::test_that("verify raises error if @code does not restore objects in @.xData", { tdata3 <- teal_data(x1 = iris, code = "x1 = mtcars") testthat::expect_error(verify(tdata3), "Code verification failed.") diff --git a/vignettes/teal-data.Rmd b/vignettes/teal-data.Rmd index 2288a7f5b..1301814b3 100644 --- a/vignettes/teal-data.Rmd +++ b/vignettes/teal-data.Rmd @@ -30,16 +30,22 @@ my_data <- within( { data1 <- data.frame(id = 1:10, x = 11:20) data2 <- data.frame(id = 1:10, x = 21:30) + data3 <- data.frame(id = 1:10, x = 31:40) } ) # get objects stored in teal_data my_data[["data1"]] -my_data[["data1"]] +my_data[["data2"]] + +# limit objects stored in teal_data +my_data[c("data1", "data3")] # get reproducible code get_code(my_data) +# get code just for specific object +get_code(my_data, names = "data2") # get datanames names(my_data) @@ -91,6 +97,9 @@ join_keys(my_data) <- join_keys( ) join_keys(my_data) + +# join_keys for limited object +join_keys(my_data["child"]) ``` ### Hidden objects