From 3d899ee0fbfe429ad02fcdfab24d27f3faf4b605 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Fri, 8 Nov 2024 09:58:21 +0000 Subject: [PATCH 1/2] Adds `names()` function and deprecates `datanames()` (#347) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Pull Request - Fixes #333 - Companion of https://github.com/insightsengineering/teal.code/pull/218 #### Blockers - `{teal.code}` - [ ] Merge https://github.com/insightsengineering/teal.code/pull/218 #### Changes description - [x] Adds `names()` function - [x] Setter is implemented but throws warning (does nothing) - [x] Deprecates `datanames()` getter and setter - [x] Adapt tests to new API - [x] Update NEWS - [ ] Review documentation - [ ] Review vignettes --------- Signed-off-by: André Veríssimo <211358+averissimo@users.noreply.github.com> Co-authored-by: Dawid Kałędkowski --- DESCRIPTION | 4 +- NAMESPACE | 3 + NEWS.md | 13 ++- R/deprecated.R | 48 +++++++++ R/join_keys.R | 1 - R/teal_data-class.R | 99 +++++++---------- R/teal_data-datanames.R | 68 ------------ R/teal_data-get_code.R | 6 +- R/teal_data-names.R | 41 +++++++ R/teal_data-show.R | 2 +- R/teal_data.R | 5 +- R/verify.R | 16 +-- _pkgdown.yml | 1 + man/datanames.Rd | 34 +++--- man/get_code.Rd | 6 +- man/names.teal_data.Rd | 32 ++++++ man/new_teal_data.Rd | 28 ----- man/teal_data-class.Rd | 23 ++-- man/verify.Rd | 6 +- tests/testthat/test-datanames.R | 137 ------------------------ tests/testthat/test-teal_data.R | 24 ++--- tests/testthat/test-test_data-names.R | 113 +++++++++++++++++++ tests/testthat/test-verify.R | 2 +- vignettes/join-keys.Rmd | 4 - vignettes/teal-data-reproducibility.Rmd | 2 +- vignettes/teal-data.Rmd | 23 +++- 26 files changed, 363 insertions(+), 378 deletions(-) delete mode 100644 R/teal_data-datanames.R create mode 100644 R/teal_data-names.R create mode 100644 man/names.teal_data.Rd delete mode 100644 man/new_teal_data.Rd delete mode 100644 tests/testthat/test-datanames.R create mode 100644 tests/testthat/test-test_data-names.R diff --git a/DESCRIPTION b/DESCRIPTION index 25d9bb0c3..45ffa7c97 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,7 +29,7 @@ URL: https://insightsengineering.github.io/teal.data/, BugReports: https://github.com/insightsengineering/teal.data/issues Depends: R (>= 4.0), - teal.code (>= 0.5.0.9011) + teal.code (>= 0.5.0.9012) Imports: checkmate (>= 2.1.0), lifecycle (>= 0.2.0), @@ -72,8 +72,8 @@ Collate: 'join_keys.R' 'teal.data.R' 'teal_data-class.R' - 'teal_data-datanames.R' 'teal_data-get_code.R' + 'teal_data-names.R' 'teal_data-show.R' 'teal_data.R' 'testhat-helpers.R' diff --git a/NAMESPACE b/NAMESPACE index 71db8957b..a126ccc47 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ S3method("[[<-",join_keys) S3method("join_keys<-",join_keys) S3method("join_keys<-",teal_data) S3method("names<-",join_keys) +S3method("names<-",teal_data) S3method("parents<-",join_keys) S3method("parents<-",teal_data) S3method(c,join_key_set) @@ -14,6 +15,8 @@ S3method(format,join_keys) S3method(join_keys,default) S3method(join_keys,join_keys) S3method(join_keys,teal_data) +S3method(length,teal.data) +S3method(names,teal_data) S3method(parents,join_keys) S3method(parents,teal_data) S3method(print,join_keys) diff --git a/NEWS.md b/NEWS.md index c37b3a076..bcf1da4d2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,17 +2,20 @@ ### Breaking changes -- soft deprecate `datanames` argument of `get_code()`. Use `names` instead. +- Soft deprecate `datanames` argument of `get_code()`. Use `names` instead. +- Soft deprecate of `datanames()`. Use `names()` instead. +- Deprecate of `datanames(x) <- value`. Does nothing, replace with renaming the objects inside the environment. + ### Enhancements -- `datanames()` - - if `join_keys` are provided, the `datanames()` are now sorted in topological way (`Kahn` algorithm), +- `names()` function is introduced replacing `datanames`. + - if `join_keys` are provided, the `names()` are now sorted in topological way (`Kahn` algorithm), which means the parent dataset always precedes the child dataset. - - are extended by the parent dataset name, if one of the child dataset exist in `datanames()` and + - are extended by the parent dataset name, if one of the child dataset exist in `names()` and the connection between child-parent is set through `join_keys` and `parent` exist in `teal_data` environment. - do not allow to set a dataset name that do not exist in `teal_data` environment. - - `teal_data` no longer set default `datanames()` based on `join_keys` names - it uses only data names. + - `teal_data` no longer set default `names()` based on `join_keys` names - it uses only data names. ### Miscellaneous diff --git a/R/deprecated.R b/R/deprecated.R index d4b3e9e93..97035fcd8 100644 --- a/R/deprecated.R +++ b/R/deprecated.R @@ -343,3 +343,51 @@ get_join_keys <- function(...) { get_labels <- function(...) { .deprecate_function("get_labels()", "Use col_labels(data)") } + +#' Names of data sets in `teal_data` object +#' +#' @description +#' `r lifecycle::badge("deprecated")` +#' +#' Use `names()` instead of `datanames()`. +#' +#' `datanames()` is deprecated. If object should be hidden, then use a `.` (dot) +#' prefix for the object's name. +#' +#' @param x (`teal_data` or `qenv_error`) object to access or modify +#' @param value (`character`) new value for `@datanames`; all elements must be names of variables existing in `@.xData` +#' +#' @return The contents of `@datanames` or `teal_data` object with updated `@datanames`. +#' @aliases `datanames<-.teal_data` +#' +#' @name datanames + +#' @rdname datanames +#' @export +datanames <- function(x) { + lifecycle::deprecate_soft("0.6.1", "datanames()", details = "names()") + names(x) +} + +#' @rdname datanames +#' @export +`datanames<-` <- function(x, value) { + lifecycle::deprecate_soft( + "0.6.1", + "`datanames<-`()", + details = "invalid to use `datanames()<-` or `names()<-` on an object of class `teal_data`. See ?names.teal_data" + ) + names(x) +} + +#' @rdname datanames +#' @export +#' @keywords internal +`names<-.teal_data` <- function(x, value) { + lifecycle::deprecate_warn( + "0.6.1", + "`names<-.teal_data`()", + details = "invalid to use `datanames()<-` or `names()<-` on an object of class `teal_data`. See ?names.teal_data" + ) + x +} diff --git a/R/join_keys.R b/R/join_keys.R index 1798a46af..47fb97836 100644 --- a/R/join_keys.R +++ b/R/join_keys.R @@ -142,7 +142,6 @@ join_keys.teal_data <- function(...) { #' join_keys(td) `join_keys<-.teal_data` <- function(x, value) { join_keys(x@join_keys) <- value - datanames(x) <- x@datanames # datanames fun manages some exceptions x } diff --git a/R/teal_data-class.R b/R/teal_data-class.R index 3015797b9..aea827319 100644 --- a/R/teal_data-class.R +++ b/R/teal_data-class.R @@ -14,22 +14,24 @@ setOldClass("join_keys") #' @name teal_data-class #' @rdname teal_data-class #' -#' @slot env (`environment`) environment containing data sets and possibly auxiliary variables. -#' Access variables with [get_var()] or [`[[`]. -#' No setter provided. Evaluate code to add variables into `@env`. -#' @slot code (`character`) vector representing code necessary to reproduce the contents of `@env`. +#' @slot .xData (`environment`) environment containing data sets and possibly +#' auxiliary variables. +#' Access variables with [get()], [`$`], [get_var()] or [`[[`]. +#' No setter provided. Evaluate code to add variables into `@.xData`. +#' @slot code (`character`) vector representing code necessary to reproduce the +#' contents of `@.xData`. #' Access with [get_code()]. #' No setter provided. Evaluate code to append code to the slot. -#' @slot id (`integer`) random identifier assigned to each element of `@code`. Used internally. +#' @slot id (`integer`) random identifier assigned to each element of `@code`. +#' Used internally. #' @slot warnings (`character`) vector of warnings raised when evaluating code. #' Access with [get_warnings()]. #' @slot messages (`character`) vector of messages raised when evaluating code. -#' @slot join_keys (`join_keys`) object specifying joining keys for data sets in `@env`. +#' @slot join_keys (`join_keys`) object specifying joining keys for data sets in +#' `@.xData`. #' Access or modify with [join_keys()]. -#' @slot datanames (`character`) vector of names of data sets in `@env`. -#' Used internally to distinguish them from auxiliary variables. -#' Access or modify with [datanames()]. -#' @slot verified (`logical(1)`) flag signifying that code in `@code` has been proven to yield contents of `@env`. +#' @slot verified (`logical(1)`) flag signifying that code in `@code` has been +#' proven to yield contents of `@.xData`. #' Used internally. See [`verify()`] for more details. #' #' @import teal.code @@ -37,62 +39,37 @@ setOldClass("join_keys") setClass( Class = "teal_data", contains = "qenv", - slots = c(join_keys = "join_keys", datanames = "character", verified = "logical"), + slots = c(join_keys = "join_keys", verified = "logical"), prototype = list( join_keys = join_keys(), - datanames = character(0), verified = logical(0) ) ) -#' Initialize `teal_data` object +#' It initializes the `teal_data` class #' -#' @name new_teal_data -#' -#' @param data (`named list`) of data objects. -#' @param code (`character` or `language`) code to reproduce the `data`. -#' Accepts and stores comments also. -#' @param join_keys (`join_keys`) object -#' @param datanames (`character`) names of datasets passed to `data`. -#' Needed when non-dataset objects are needed in the `env` slot. -#' @rdname new_teal_data -#' @keywords internal -new_teal_data <- function(data, - code = character(0), - join_keys = join_keys(), - datanames = names(data)) { - checkmate::assert_list(data) - checkmate::assert_class(join_keys, "join_keys") - if (is.null(datanames)) datanames <- character(0) # todo: allow to specify - checkmate::assert_character(datanames) - if (!any(is.language(code), is.character(code))) { - stop("`code` must be a character or language object.") - } - - if (is.language(code)) { - code <- paste(lang2calls(code), collapse = "\n") - } - if (length(code)) { - code <- paste(code, collapse = "\n") +#' Accepts .xData as a list and converts it to an environment before initializing +#' parent constructor (`qenv`). +#' @noRd +setMethod( + "initialize", + "teal_data", + function(.Object, .xData = list(), join_keys = join_keys(), ...) { # nolint: object_name. + # Allow .xData to be a list and convert it to an environment + if (!missing(.xData) && inherits(.xData, "list")) { + .xData <- rlang::env_clone(list2env(.xData), parent = parent.env(.GlobalEnv)) # nolint: object_name. + lockEnvironment(.xData, bindings = TRUE) + } + args <- list(...) + checkmate::assert_environment(.xData) + checkmate::assert_class(join_keys, "join_keys") + checkmate::assert_list(args, names = "named") + methods::callNextMethod( + .Object, + .xData, + join_keys = join_keys, + verified = (length(args$code) == 0L && length(.xData) == 0L), + ... + ) } - verified <- (length(code) == 0L && length(data) == 0L) - - id <- sample.int(.Machine$integer.max, size = length(code)) - - new_env <- rlang::env_clone(list2env(data), parent = parent.env(.GlobalEnv)) - lockEnvironment(new_env, bindings = TRUE) - - datanames <- .get_sorted_datanames(datanames = datanames, join_keys = join_keys, env = new_env) - - methods::new( - "teal_data", - env = new_env, - code = code, - warnings = rep("", length(code)), - messages = rep("", length(code)), - id = id, - join_keys = join_keys, - datanames = datanames, - verified = verified - ) -} +) diff --git a/R/teal_data-datanames.R b/R/teal_data-datanames.R deleted file mode 100644 index d32727859..000000000 --- a/R/teal_data-datanames.R +++ /dev/null @@ -1,68 +0,0 @@ -#' Names of data sets in `teal_data` object -#' -#' Get or set the value of the `datanames` slot. -#' -#' The `@datanames` slot in a `teal_data` object specifies which of the variables stored in its environment -#' (the `@env` slot) are data sets to be taken into consideration. -#' The contents of `@datanames` can be specified upon creation and default to all variables in `@env`. -#' Variables created later, which may well be data sets, are not automatically considered such. -#' Use this function to update the slot. -#' -#' @param x (`teal_data`) object to access or modify -#' @param value (`character`) new value for `@datanames`; all elements must be names of variables existing in `@env` -#' -#' @return The contents of `@datanames` or `teal_data` object with updated `@datanames`. -#' -#' @examples -#' td <- teal_data(iris = iris) -#' td <- within(td, mtcars <- mtcars) -#' datanames(td) -#' -#' datanames(td) <- c("iris", "mtcars") -#' datanames(td) -#' -#' @name datanames -#' @aliases datanames,teal_data-method -#' @aliases datanames<-,teal_data,character-method -#' @aliases datanames,qenv.error-method -#' @aliases datanames<-,qenv.error,character-method - -#' @rdname datanames -#' @export -setGeneric("datanames", function(x) standardGeneric("datanames")) -setMethod("datanames", signature = "teal_data", definition = function(x) { - x@datanames -}) -setMethod("datanames", signature = "qenv.error", definition = function(x) { - NULL -}) - -#' @rdname datanames -#' @export -setGeneric("datanames<-", function(x, value) standardGeneric("datanames<-")) -setMethod("datanames<-", signature = c("teal_data", "character"), definition = function(x, value) { - checkmate::assert_subset(value, names(x@env)) - x@datanames <- .get_sorted_datanames(datanames = value, join_keys = x@join_keys, env = x@env) - methods::validObject(x) - x -}) -setMethod("datanames<-", signature = c("qenv.error", "character"), definition = function(x, value) { - methods::validObject(x) - x -}) - - -#' @keywords internal -.get_sorted_datanames <- function(datanames, join_keys, env) { - child_parent <- sapply( - datanames, - function(name) parent(join_keys, name), - USE.NAMES = TRUE, - simplify = FALSE - ) - - union( - intersect(unlist(topological_sort(child_parent)), ls(env)), - datanames - ) -} diff --git a/R/teal_data-get_code.R b/R/teal_data-get_code.R index 1fd668193..23002590d 100644 --- a/R/teal_data-get_code.R +++ b/R/teal_data-get_code.R @@ -2,8 +2,10 @@ #' #' Retrieve code from `teal_data` object. #' -#' Retrieve code stored in `@code`, which (in principle) can be used to recreate all objects found in `@env`. -#' Use `names` to limit the code to one or more of the datasets enumerated in `@datanames`. +#' Retrieve code stored in `@code`, which (in principle) can be used to recreate +#' all objects found in the environment (`@.xData`). +#' Use `names` to limit the code to one or more of the datasets enumerated in +#' the environment. #' #' @section Extracting dataset-specific code: #' When `names` is specified, the code returned will be limited to the lines needed to _create_ diff --git a/R/teal_data-names.R b/R/teal_data-names.R new file mode 100644 index 000000000..32e9d71de --- /dev/null +++ b/R/teal_data-names.R @@ -0,0 +1,41 @@ +#' Names of data sets in `teal_data` object +#' +#' Functions to get the names of a `teal_data` object. +#' The names are obtained from the objects listed in the `qenv` environment. +#' +#' Objects named with a `.` (dot) prefix will be ignored and not returned. +#' To get the names of all objects, use `ls(x, all.names = TRUE)`, however, it +#' will not group the names by the join_keys topological structure. +#' +#' @param x A (`teal_data`) object to access or modify. +#' +#' @return A character vector of names. +#' +#' @examples +#' td <- teal_data(iris = iris) +#' td <- within(td, mtcars <- mtcars) +#' names(td) +#' +#' td <- within(td, .CO2 <- CO2) +#' names(td) # '.CO2' will not be returned +#' +#' @export +names.teal_data <- function(x) { + # Sorting from `ls` can be safely done as environments don't have any order + # nor support numeric-index subsetting + envir <- as.environment(x) + .get_sorted_names(ls(envir = envir), join_keys(x), envir) +} + +#' @export +length.teal.data <- function(x) length(ls(x)) + +#' @keywords internal +.get_sorted_names <- function(datanames, join_keys, env) { + child_parent <- sapply(datanames, parent, x = join_keys, USE.NAMES = TRUE, simplify = FALSE) + + union( + intersect(unlist(topological_sort(child_parent)), ls(env, all.names = TRUE)), + datanames + ) +} diff --git a/R/teal_data-show.R b/R/teal_data-show.R index b47f29c10..fca8cf66e 100644 --- a/R/teal_data-show.R +++ b/R/teal_data-show.R @@ -16,5 +16,5 @@ setMethod("show", signature = "teal_data", function(object) { } else { cat("\u2716", "unverified teal_data object\n") } - rlang::env_print(object@env) + rlang::env_print(teal.code::get_env(object)) }) diff --git a/R/teal_data.R b/R/teal_data.R index ea32e624a..1721c94fe 100644 --- a/R/teal_data.R +++ b/R/teal_data.R @@ -60,8 +60,9 @@ teal_data <- function(..., if (length(data_objects) > 0 && !checkmate::test_names(names(data_objects), type = "named")) { stop("Dot (`...`) arguments on `teal_data()` must be named.") } - new_teal_data( - data = data_objects, + methods::new( + "teal_data", + .xData = data_objects, code = code, join_keys = join_keys ) diff --git a/R/verify.R b/R/verify.R index 7caa648f2..a3c1f24fe 100644 --- a/R/verify.R +++ b/R/verify.R @@ -2,10 +2,12 @@ #' #' Checks whether code in `teal_data` object reproduces the stored objects. #' -#' If objects created by code in the `@code` slot of `x` are `all_equal` to the contents of the `@env` slot, +#' If objects created by code in the `@code` slot of `x` are `all_equal` to the +#' contents of the environment (`@.xData` slot), #' the function updates the `@verified` slot to `TRUE` in the returned `teal_data` object. #' Once verified, the slot will always be set to `TRUE`. -#' If the `@code` fails to recreate objects in `teal_data@env`, an error is raised. +#' If the `@code` fails to recreate objects in `teal_data`'s environment, an +#' error is raised. #' #' @return Input `teal_data` object or error. #' @@ -65,7 +67,7 @@ setMethod("verify", signature = "teal_data", definition = function(x) { stop(conditionMessage(y), call. = FALSE) } - reproduced <- isTRUE(all.equal(x@env, y@env)) + reproduced <- isTRUE(all.equal(teal.code::get_env(x), teal.code::get_env(y))) if (reproduced) { x@verified <- TRUE methods::validObject(x) @@ -74,15 +76,15 @@ setMethod("verify", signature = "teal_data", definition = function(x) { error <- "Code verification failed." objects_diff <- vapply( - intersect(names(x@env), names(y@env)), + intersect(names(x), names(y)), function(element) { - isTRUE(all.equal(x@env[[element]], y@env[[element]])) + isTRUE(all.equal(x[[element]], y[[element]])) }, logical(1) ) - names_diff_other <- setdiff(names(y@env), names(x@env)) - names_diff_inenv <- setdiff(names(x@env), names(y@env)) + names_diff_other <- setdiff(names(y), names(x)) + names_diff_inenv <- setdiff(names(x), names(y)) if (length(objects_diff)) { error <- c( diff --git a/_pkgdown.yml b/_pkgdown.yml index 2ecf65392..cc6bbb0c3 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -41,6 +41,7 @@ reference: - join_key - join_keys - join_keys<- + - names.teal_data - names<-.join_keys - parents - parents<- diff --git a/man/datanames.Rd b/man/datanames.Rd index 4908979a2..19cbb2a0e 100644 --- a/man/datanames.Rd +++ b/man/datanames.Rd @@ -1,42 +1,32 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/teal_data-datanames.R +% Please edit documentation in R/deprecated.R \name{datanames} \alias{datanames} -\alias{datanames,teal_data-method} -\alias{datanames<-,teal_data,character-method} -\alias{datanames,qenv.error-method} -\alias{datanames<-,qenv.error,character-method} +\alias{`datanames<-.teal_data`} \alias{datanames<-} +\alias{names<-.teal_data} \title{Names of data sets in \code{teal_data} object} \usage{ datanames(x) datanames(x) <- value + +\method{names}{teal_data}(x) <- value } \arguments{ -\item{x}{(\code{teal_data}) object to access or modify} +\item{x}{(\code{teal_data} or \code{qenv_error}) object to access or modify} -\item{value}{(\code{character}) new value for \verb{@datanames}; all elements must be names of variables existing in \verb{@env}} +\item{value}{(\code{character}) new value for \verb{@datanames}; all elements must be names of variables existing in \verb{@.xData}} } \value{ The contents of \verb{@datanames} or \code{teal_data} object with updated \verb{@datanames}. } \description{ -Get or set the value of the \code{datanames} slot. -} -\details{ -The \verb{@datanames} slot in a \code{teal_data} object specifies which of the variables stored in its environment -(the \verb{@env} slot) are data sets to be taken into consideration. -The contents of \verb{@datanames} can be specified upon creation and default to all variables in \verb{@env}. -Variables created later, which may well be data sets, are not automatically considered such. -Use this function to update the slot. -} -\examples{ -td <- teal_data(iris = iris) -td <- within(td, mtcars <- mtcars) -datanames(td) +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} -datanames(td) <- c("iris", "mtcars") -datanames(td) +Use \code{names()} instead of \code{datanames()}. +\code{datanames()} is deprecated. If object should be hidden, then use a \code{.} (dot) +prefix for the object's name. } +\keyword{internal} diff --git a/man/get_code.Rd b/man/get_code.Rd index 8a21424e7..3d3b8cf25 100644 --- a/man/get_code.Rd +++ b/man/get_code.Rd @@ -36,8 +36,10 @@ only code that \emph{creates} that dataset (not code that uses it) is returned. Retrieve code from \code{teal_data} object. } \details{ -Retrieve code stored in \verb{@code}, which (in principle) can be used to recreate all objects found in \verb{@env}. -Use \code{names} to limit the code to one or more of the datasets enumerated in \verb{@datanames}. +Retrieve code stored in \verb{@code}, which (in principle) can be used to recreate +all objects found in the environment (\verb{@.xData}). +Use \code{names} to limit the code to one or more of the datasets enumerated in +the environment. } \section{Extracting dataset-specific code}{ diff --git a/man/names.teal_data.Rd b/man/names.teal_data.Rd new file mode 100644 index 000000000..4f03fce70 --- /dev/null +++ b/man/names.teal_data.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/teal_data-names.R +\name{names.teal_data} +\alias{names.teal_data} +\title{Names of data sets in \code{teal_data} object} +\usage{ +\method{names}{teal_data}(x) +} +\arguments{ +\item{x}{A (\code{teal_data}) object to access or modify.} +} +\value{ +A character vector of names. +} +\description{ +Functions to get the names of a \code{teal_data} object. +The names are obtained from the objects listed in the \code{qenv} environment. +} +\details{ +Objects named with a \code{.} (dot) prefix will be ignored and not returned. +To get the names of all objects, use \code{ls(x, all.names = TRUE)}, however, it +will not group the names by the join_keys topological structure. +} +\examples{ +td <- teal_data(iris = iris) +td <- within(td, mtcars <- mtcars) +names(td) + +td <- within(td, .CO2 <- CO2) +names(td) # '.CO2' will not be returned + +} diff --git a/man/new_teal_data.Rd b/man/new_teal_data.Rd deleted file mode 100644 index 4ddc43c95..000000000 --- a/man/new_teal_data.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/teal_data-class.R -\name{new_teal_data} -\alias{new_teal_data} -\title{Initialize \code{teal_data} object} -\usage{ -new_teal_data( - data, - code = character(0), - join_keys = join_keys(), - datanames = names(data) -) -} -\arguments{ -\item{data}{(\verb{named list}) of data objects.} - -\item{code}{(\code{character} or \code{language}) code to reproduce the \code{data}. -Accepts and stores comments also.} - -\item{join_keys}{(\code{join_keys}) object} - -\item{datanames}{(\code{character}) names of datasets passed to \code{data}. -Needed when non-dataset objects are needed in the \code{env} slot.} -} -\description{ -Initialize \code{teal_data} object -} -\keyword{internal} diff --git a/man/teal_data-class.Rd b/man/teal_data-class.Rd index ef1c1decb..707029e66 100644 --- a/man/teal_data-class.Rd +++ b/man/teal_data-class.Rd @@ -18,29 +18,30 @@ If errors are raised, a \code{qenv.error} object is returned. \section{Slots}{ \describe{ -\item{\code{env}}{(\code{environment}) environment containing data sets and possibly auxiliary variables. -Access variables with \code{\link[=get_var]{get_var()}} or [\code{[[}]. -No setter provided. Evaluate code to add variables into \verb{@env}.} +\item{\code{.xData}}{(\code{environment}) environment containing data sets and possibly +auxiliary variables. +Access variables with \code{\link[=get]{get()}}, \code{\link{$}}, \code{\link[=get_var]{get_var()}} or [\code{[[}]. +No setter provided. Evaluate code to add variables into \verb{@.xData}.} -\item{\code{code}}{(\code{character}) vector representing code necessary to reproduce the contents of \verb{@env}. +\item{\code{code}}{(\code{character}) vector representing code necessary to reproduce the +contents of \verb{@.xData}. Access with \code{\link[=get_code]{get_code()}}. No setter provided. Evaluate code to append code to the slot.} -\item{\code{id}}{(\code{integer}) random identifier assigned to each element of \verb{@code}. Used internally.} +\item{\code{id}}{(\code{integer}) random identifier assigned to each element of \verb{@code}. +Used internally.} \item{\code{warnings}}{(\code{character}) vector of warnings raised when evaluating code. Access with \code{\link[=get_warnings]{get_warnings()}}.} \item{\code{messages}}{(\code{character}) vector of messages raised when evaluating code.} -\item{\code{join_keys}}{(\code{join_keys}) object specifying joining keys for data sets in \verb{@env}. +\item{\code{join_keys}}{(\code{join_keys}) object specifying joining keys for data sets in +\verb{@.xData}. Access or modify with \code{\link[=join_keys]{join_keys()}}.} -\item{\code{datanames}}{(\code{character}) vector of names of data sets in \verb{@env}. -Used internally to distinguish them from auxiliary variables. -Access or modify with \code{\link[=datanames]{datanames()}}.} - -\item{\code{verified}}{(\code{logical(1)}) flag signifying that code in \verb{@code} has been proven to yield contents of \verb{@env}. +\item{\code{verified}}{(\code{logical(1)}) flag signifying that code in \verb{@code} has been +proven to yield contents of \verb{@.xData}. Used internally. See \code{\link[=verify]{verify()}} for more details.} }} diff --git a/man/verify.Rd b/man/verify.Rd index 630a08788..7b71ce489 100644 --- a/man/verify.Rd +++ b/man/verify.Rd @@ -18,10 +18,12 @@ Input \code{teal_data} object or error. Checks whether code in \code{teal_data} object reproduces the stored objects. } \details{ -If objects created by code in the \verb{@code} slot of \code{x} are \code{all_equal} to the contents of the \verb{@env} slot, +If objects created by code in the \verb{@code} slot of \code{x} are \code{all_equal} to the +contents of the environment (\verb{@.xData} slot), the function updates the \verb{@verified} slot to \code{TRUE} in the returned \code{teal_data} object. Once verified, the slot will always be set to \code{TRUE}. -If the \verb{@code} fails to recreate objects in \code{teal_data@env}, an error is raised. +If the \verb{@code} fails to recreate objects in \code{teal_data}'s environment, an +error is raised. } \examples{ tdata1 <- teal_data() diff --git a/tests/testthat/test-datanames.R b/tests/testthat/test-datanames.R deleted file mode 100644 index e0c99bb63..000000000 --- a/tests/testthat/test-datanames.R +++ /dev/null @@ -1,137 +0,0 @@ -# get ---- -testthat::test_that("datanames returns contents of @datanames slot", { - td <- teal_data(i = iris, m = mtcars) - testthat::expect_identical(datanames(td), c("i", "m")) -}) - -testthat::test_that("variables not in @datanames are omitted", { - td <- teal_data(i = iris, m = mtcars) - td <- within(td, f <- faithful) - testthat::expect_identical(datanames(td), c("i", "m")) -}) - -# set ---- -testthat::test_that("datanames can set value of @datanames", { - td <- teal_data(i = iris, m = mtcars) - testthat::expect_identical(datanames(td), c("i", "m")) - datanames(td) <- "i" - testthat::expect_identical(datanames(td), "i") -}) - -testthat::test_that("only names of existing variables are accepted", { - td <- teal_data(i = iris, m = mtcars) - testthat::expect_no_error(datanames(td) <- "i") - testthat::expect_error(datanames(td) <- "f", "Assertion .* failed: Must be a subset") -}) - -# qenv.error support ---- -testthat::test_that("datanames supports qenv.error class", { - qe <- within(teal_data(), stop()) - testthat::expect_no_error(datanames(qe)) - testthat::expect_no_error(datanames(qe) <- "name") -}) - -testthat::test_that("datanames called on qenv.error returns NULL", { - qe <- within(teal_data(), stop()) - testthat::expect_null(datanames(qe)) -}) - -testthat::test_that("datanames<- called on qenv.error does not change qenv.error", { - qe <- within(teal_data(), stop()) - qec <- qe - testthat::expect_identical(qe, qec) -}) - -# topological_order ---- - -testthat::test_that("datanames are set in topological order in constructor if join_keys are specified", { - data <- - teal_data(b = data.frame(), a = data.frame(), join_keys = join_keys(join_key("a", "b", "id"))) - testthat::expect_identical( - datanames(data), - c("a", "b") - ) -}) - -testthat::test_that("datanames return parent if in constructor it was provided in join_keys and exists in env", { - data <- - teal_data(b = data.frame(), a = data.frame(), join_keys = join_keys(join_key("a", "b", "id"))) - datanames(data) <- "b" - testthat::expect_identical( - datanames(data), - c("a", "b") - ) -}) - -testthat::test_that( - "datanames do not return parent if in constructor it was provided in join_keys but do not exists in env", - { - data <- - teal_data(b = data.frame(), join_keys = join_keys(join_key("a", "b", "id"))) - testthat::expect_identical( - datanames(data), - "b" - ) - } -) - -testthat::test_that("datanames return topological order of datasets once join_keys are specified", { - data <- within(teal_data(), { - ADTTE <- teal.data::rADTTE # nolint: object_name. - iris <- iris - ADSL <- teal.data::rADSL # nolint: object_name. - }) - datanames(data) <- c("ADTTE", "iris", "ADSL") - join_keys(data) <- default_cdisc_join_keys[c("ADSL", "ADTTE")] - testthat::expect_identical( - datanames(data), - c("ADSL", "ADTTE", "iris") - ) -}) - -testthat::test_that("datanames return topological order of datasets after datanames are called after join_keys", { - data <- within(teal_data(), { - ADTTE <- teal.data::rADTTE # nolint: object_name. - iris <- iris - ADSL <- teal.data::rADSL # nolint: object_name. - }) - - join_keys(data) <- default_cdisc_join_keys[c("ADSL", "ADTTE")] - datanames(data) <- c("ADTTE", "iris", "ADSL") - - testthat::expect_identical( - datanames(data), - c("ADSL", "ADTTE", "iris") - ) -}) - - -testthat::test_that("datanames return parent if join_keys were provided and parent exists in env", { - data <- within(teal_data(), { - ADTTE <- teal.data::rADTTE # nolint: object_name. - iris <- iris - ADSL <- teal.data::rADSL # nolint: object_name. - }) - - join_keys(data) <- default_cdisc_join_keys[c("ADSL", "ADTTE")] - datanames(data) <- c("ADTTE", "iris") - - testthat::expect_identical( - datanames(data), - c("ADSL", "ADTTE", "iris") - ) -}) - -testthat::test_that("datanames do not return parent if join_keys were provided and parent did not exists in env", { - data <- teal_data( - ADTTE = teal.data::rADTTE, # nolint: object_name. - iris = iris - ) - - join_keys(data) <- default_cdisc_join_keys[c("ADSL", "ADTTE")] - - testthat::expect_identical( - datanames(data), - c("ADTTE", "iris") - ) -}) diff --git a/tests/testthat/test-teal_data.R b/tests/testthat/test-teal_data.R index 8391f72e2..a7d715d3e 100644 --- a/tests/testthat/test-teal_data.R +++ b/tests/testthat/test-teal_data.R @@ -21,34 +21,26 @@ testthat::test_that("teal_data allows to initialize empty teal_data with join_ke ) }) -testthat::test_that("teal_data initializes teal_data object with @datanames taken from passed objects", { - testthat::expect_identical( - teal_data(iris = iris, mtcars = mtcars)@datanames, - c("iris", "mtcars") - ) -}) - testthat::test_that( - "teal_data initializes teal_data object without @datanames taken from join_keys if objects did not exist in env", + "teal_data initializes teal_data object without names taken from join_keys if objects did not exist in env", { testthat::expect_identical( - datanames(teal_data(join_keys = join_keys(join_key("parent", "child", "id")))), + names(teal_data(join_keys = join_keys(join_key("parent", "child", "id")))), character(0) ) } ) testthat::test_that( - "teal_data initializes teal_data object with @datanames taken only from passed objects and not join_keys", + "teal_data initializes teal_data object with names taken only from passed objects and not join_keys", { testthat::expect_identical( - datanames(teal_data(iris = iris, join_keys = join_keys(join_key("parent", "child", "id")))), + names(teal_data(iris = iris, join_keys = join_keys(join_key("parent", "child", "id")))), "iris" ) } ) - testthat::test_that("teal_data returns teal_data when data passed as named list", { df1 <- data.frame(id = c("A", "B"), a = c(1L, 2L)) testthat::expect_s4_class(teal_data(df1 = df1), "teal_data") @@ -93,9 +85,9 @@ testthat::test_that("teal_data code is concatenated into single string", { ) }) -testthat::test_that("teal_data@env is locked. Not able to modify, add or remove bindings", { +testthat::test_that("teal_data@.xData is locked. Not able to modify, add or remove bindings", { data <- teal_data(iris = iris) - testthat::expect_error(data@env$iris <- iris, "cannot change value of locked binding for 'iris'") - testthat::expect_error(data@env$iris2 <- iris, "cannot add bindings to a locked environment") - testthat::expect_error(rm("iris", envir = data@env), "cannot remove bindings from a locked environment") + testthat::expect_error(data@.xData$iris <- iris, "cannot change value of locked binding for 'iris'") + testthat::expect_error(data@.xData$iris2 <- iris, "cannot add bindings to a locked environment") + testthat::expect_error(rm("iris", envir = data@.xData), "cannot remove bindings from a locked environment") }) diff --git a/tests/testthat/test-test_data-names.R b/tests/testthat/test-test_data-names.R new file mode 100644 index 000000000..5db8d9d14 --- /dev/null +++ b/tests/testthat/test-test_data-names.R @@ -0,0 +1,113 @@ +# get ---- +testthat::test_that("names returns list of objects in teal_data", { + td <- teal_data(i = iris, m = mtcars) + testthat::expect_identical(names(td), c("i", "m")) +}) + +testthat::test_that("variables with dot prefix are omitted", { + td <- teal_data(i = iris, m = mtcars) + td <- within(td, .f <- faithful) + testthat::expect_identical(names(td), c("i", "m")) +}) + +# set --- +testthat::test_that("names<- called on teal_data does not change it", { + td <- teal_data(i = iris, m = mtcars) + tdc <- td <- teal_data(i = iris, m = mtcars) + suppressWarnings(names(td) <- c("a", "b", "c")) + testthat::expect_identical(td, tdc) +}) + +# qenv.error support ---- +testthat::test_that("names supports qenv.error class", { + qe <- within(teal_data(), stop()) + testthat::expect_no_error(names(qe)) +}) + + +# topological_order ---- + +testthat::test_that("names are set in topological order in constructor if join_keys are specified", { + data <- + teal_data(b = data.frame(), a = data.frame(), join_keys = join_keys(join_key("a", "b", "id"))) + testthat::expect_identical( + names(data), + c("a", "b") + ) +}) + +testthat::test_that("names return parent if in constructor it was provided in join_keys and exists in env", { + data <- teal_data( + b = data.frame(), + .a = data.frame(), + join_keys = join_keys(join_key(".a", "b", "id")) + ) + testthat::expect_setequal(names(data), c(".a", "b")) +}) + +testthat::test_that( + "names do not return parent if in constructor it was provided in join_keys but do not exists in env", + { + data <- teal_data(b = data.frame(), join_keys = join_keys(join_key("a", "b", "id"))) + testthat::expect_identical( + names(data), + "b" + ) + } +) + +testthat::test_that("names return topological order of datasets once join_keys are specified", { + data <- within(teal_data(), { + ADTTE <- teal.data::rADTTE # nolint: object_name. + ADTR <- teal.data::rADTR # nolint: object_name. + ADSL <- teal.data::rADSL # nolint: object_name. + }) + join_keys(data) <- default_cdisc_join_keys[c("ADSL", "ADTTE")] + testthat::expect_identical( + names(data), + c("ADTR", "ADSL", "ADTTE") + ) +}) + +testthat::test_that("names return topological order of datasets after new objects are added after join_keys", { + data <- within(teal_data(), { + ADTTE <- teal.data::rADTTE # nolint: object_name. + ADSL <- teal.data::rADSL # nolint: object_name. + }) + + testthat::expect_identical( + names(data), + c("ADSL", "ADTTE") + ) + + join_keys(data) <- default_cdisc_join_keys[c("ADSL", "ADTTE")] + data <- within(data, ADTR <- teal.data::rADTR) # nolint: object_name. + + testthat::expect_identical( + names(data), + c("ADTR", "ADSL", "ADTTE") + ) +}) + + +testthat::test_that("names return parent if join_keys were provided and parent exists in env", { + data <- within(teal_data(), { + ADTTE <- teal.data::rADTTE # nolint: object_name. + iris <- iris + .ADSL <- teal.data::rADSL # nolint: object_name. + }) + + join_keys(data) <- default_cdisc_join_keys[c("ADSL", "ADTTE")] + names(join_keys(data)) <- c(".ADSL", "ADTTE") + + testthat::expect_setequal(names(data), c(".ADSL", "ADTTE", "iris")) +}) + +testthat::test_that("names do not return parent if join_keys were provided and parent did not exists in env", { + data <- teal_data( + ADTTE = teal.data::rADTTE, # nolint: object_name. + iris = iris + ) + join_keys(data) <- default_cdisc_join_keys[c("ADSL", "ADTTE")] + testthat::expect_setequal(names(data), c("ADTTE", "iris")) +}) diff --git a/tests/testthat/test-verify.R b/tests/testthat/test-verify.R index a2b45a7bb..4e15fb696 100644 --- a/tests/testthat/test-verify.R +++ b/tests/testthat/test-verify.R @@ -17,7 +17,7 @@ testthat::test_that("verify returns the same object with changed @verified field testthat::expect_identical(tdata2@verified, FALSE) testthat::expect_identical(tdata2_ver@verified, TRUE) testthat::expect_identical(tdata2_ver@code, tdata2@code) - testthat::expect_identical(tdata2_ver@env, tdata2@env) + testthat::expect_identical(teal.code::get_env(tdata2_ver), teal.code::get_env(tdata2)) }) testthat::test_that("verify raises error if @code does not restore objects in @env", { diff --git a/vignettes/join-keys.Rmd b/vignettes/join-keys.Rmd index 9d06fed22..9ead04820 100644 --- a/vignettes/join-keys.Rmd +++ b/vignettes/join-keys.Rmd @@ -192,7 +192,6 @@ td_pk <- within( teal_data(), ds1 <- transform(iris, id = seq_len(nrow(iris))) ) -datanames(td_pk) <- "ds1" join_keys(td_pk) <- join_keys(join_key("ds1", keys = "id")) @@ -209,7 +208,6 @@ td_pk <- within( ds3 <- data.frame(V = 5:14, N = 4) } ) -datanames(td_pk) <- c(datanames(td_pk), "ds2", "ds3") join_keys(td_pk)["ds2", "ds2"] <- c("V", "W") join_keys(td_pk)["ds3", "ds3"] <- c("V", "W") @@ -239,7 +237,6 @@ td_fk <- within( ds3 <- data.frame(V = 5:14, N = 4) } ) -datanames(td_fk) <- c("ds1", "ds2", "ds3") join_keys(td_fk) <- join_keys( # Primary keys @@ -275,7 +272,6 @@ td <- within( ds4 <- data.frame(V = 5:14, R = rnorm(10)) } ) -datanames(td) <- c("ds1", "ds2", "ds3", "ds4") join_keys(td) <- join_keys( # Primary keys diff --git a/vignettes/teal-data-reproducibility.Rmd b/vignettes/teal-data-reproducibility.Rmd index f39d0a81b..b0b379cc9 100644 --- a/vignettes/teal-data-reproducibility.Rmd +++ b/vignettes/teal-data-reproducibility.Rmd @@ -105,7 +105,7 @@ Note that in when retrieving code for a specific dataset, the result is only the ## Tracking object dependencies -Calling `get_code` with `datanames` specified initiates an analysis of the stored code, in which object dependencies are automatically discovered. +Calling `get_code` with `names` specified initiates an analysis of the stored code, in which object dependencies are automatically discovered. If object `x` is created with an expression that uses object `y`, the lines that create object `y` must also be returned. This is quite effective when objects are created by simple assignments like `x <- foo(y)`. However, in rare cases discovering dependencies is impossible, _e.g._ when opening connections to databases or when objects are created by side effects (functions acting on their calling environment implicitly rather than returning a value that is then assigned). diff --git a/vignettes/teal-data.Rmd b/vignettes/teal-data.Rmd index 687b136d9..2288a7f5b 100644 --- a/vignettes/teal-data.Rmd +++ b/vignettes/teal-data.Rmd @@ -40,9 +40,8 @@ my_data[["data1"]] # get reproducible code get_code(my_data) -# get or set datanames -datanames(my_data) <- c("data1", "data2") -datanames(my_data) +# get datanames +names(my_data) # print print(my_data) @@ -55,7 +54,7 @@ A `teal_data` object keeps the following information: - `env` - an environment containing data. - `code` - a string containing code to reproduce `env` (details in [reproducibility](teal-data-reproducibility.html)). -- `datanames` - a character vector listing objects of interest to `teal` modules (details in [this `teal` vignette](https://insightsengineering.github.io/teal/latest-tag/articles/including-data-in-teal-applications.html)). +- `names` - a character vector listing objects of interest to `teal` modules (details in [this `teal` vignette](https://insightsengineering.github.io/teal/latest-tag/articles/including-data-in-teal-applications.html)). - `join_keys` - a `join_keys` object defining relationships between datasets (details in [Join Keys](join-keys.html)). ### Reproducibility @@ -71,7 +70,6 @@ my_data <- within(my_data, data$id <- seq_len(nrow(data))) my_data # is verified ``` - ### Relational data models The `teal_data` class supports relational data. @@ -94,3 +92,18 @@ join_keys(my_data) <- join_keys( join_keys(my_data) ``` + +### Hidden objects + +An object is hidden in `teal_data` if its name starts with a dot (`.`). +This can be used to pass auxiliary objects / function in the `teal_data` instance, without being visible in the `teal` summary and filter panel. + +```{r} +my_data <- teal_data() +my_data <- within(my_data, { + data <- data.frame(id = 1:10, x = 11:20) + .data2 <- data.frame(id = 1:20, data_id = c(1:10, 1:10), y = 21:30) +}) + +names(my_data) +``` From 06ebcd741dbcbef23b2f7ec94330171709f3b4c0 Mon Sep 17 00:00:00 2001 From: averissimo Date: Fri, 8 Nov 2024 09:59:22 +0000 Subject: [PATCH 2/2] [skip actions] Bump version to 0.6.0.9016 --- .pre-commit-config.yaml | 2 +- DESCRIPTION | 4 ++-- NEWS.md | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ccfcc1c64..232e2defd 100755 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ default_language_version: python: python3 repos: - repo: https://github.com/lorenzwalthert/precommit - rev: v0.4.3.9001 + rev: v0.4.3.9003 hooks: - id: style-files name: Style code with `styler` diff --git a/DESCRIPTION b/DESCRIPTION index 45ffa7c97..9a97061c6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Type: Package Package: teal.data Title: Data Model for 'teal' Applications -Version: 0.6.0.9015 -Date: 2024-10-23 +Version: 0.6.0.9016 +Date: 2024-11-08 Authors@R: c( person("Dawid", "Kaledkowski", , "dawid.kaledkowski@roche.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-9533-457X")), diff --git a/NEWS.md b/NEWS.md index bcf1da4d2..0ab6238ad 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# teal.data 0.6.0.9015 +# teal.data 0.6.0.9016 ### Breaking changes