From d278046732534a4d82f99a0c5a508bf7d161357b Mon Sep 17 00:00:00 2001 From: m7pr Date: Wed, 16 Oct 2024 15:56:24 +0200 Subject: [PATCH 01/11] bring get_code_dependency --- NEWS.md | 5 + R/qenv-get_code.R | 94 +++- R/utils-get_code_dependency.R | 436 +++++++++++++++++ tests/testthat/test-qenv_eval_code.R | 2 +- tests/testthat/test-qenv_get_code.R | 701 +++++++++++++++++++++++++++ tests/testthat/test-qenv_within.R | 2 +- 6 files changed, 1232 insertions(+), 8 deletions(-) create mode 100644 R/utils-get_code_dependency.R diff --git a/NEWS.md b/NEWS.md index 22bce150..e33438f8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # teal.code 0.5.0.9010 +### Enhancements + +* `get_code()` was extended with `names` parameter and allows the code extraction to be limited to objects stored in +`qenv` but limited to `names`. + # teal.code 0.5.0 ### Breaking Change diff --git a/R/qenv-get_code.R b/R/qenv-get_code.R index 70584c11..34b60334 100644 --- a/R/qenv-get_code.R +++ b/R/qenv-get_code.R @@ -5,15 +5,89 @@ #' #' @param object (`qenv`) #' @param deparse (`logical(1)`) flag specifying whether to return code as `character` or `expression`. +#' @param names `r lifecycle::badge("experimental")` (`character`) vector of object names to return the code for. +#' For more details see the "Extracting dataset-specific code" section. #' @param ... see `Details` #' +#' +#' @section Extracting dataset-specific code: +#' When `names` is specified, the code returned will be limited to the lines needed to _create_ +#' the requested objects. The code stored in the `@code` slot is analyzed statically to determine +#' which lines the objects of interest depend upon. The analysis works well when objects are created +#' with standard infix assignment operators (see `?assignOps`) but it can fail in some situations. +#' +#' Consider the following examples: +#' +#' _Case 1: Usual assignments._ +#' ```r +#' q1 <- qenv() |> +#' within({ +#' foo <- function(x) { +#' x + 1 +#' } +#' x <- 0 +#' y <- foo(x) +#' }) +#' get_code(q1, names = "y") +#' ``` +#' `x` has no dependencies, so `get_code(data, names = "x")` will return only the second call.\cr +#' `y` depends on `x` and `foo`, so `get_code(data, names = "y")` will contain all three calls. +#' +#' _Case 2: Some objects are created by a function's side effects._ +#' ```r +#' q2 <- qenv() |> +#' within({ +#' foo <- function() { +#' x <<- x + 1 +#' } +#' x <- 0 +#' foo() +#' y <- x +#' }) +#' get_code(q2, names = "y") +#' ``` +#' Here, `y` depends on `x` but `x` is modified by `foo` as a side effect (not by reassignment) +#' and so `get_code(data, names = "y")` will not return the `foo()` call.\cr +#' To overcome this limitation, code dependencies can be specified manually. +#' Lines where side effects occur can be flagged by adding "`# @linksto `" at the end.\cr +#' Note that `within` evaluates code passed to `expr` as is and comments are ignored. +#' In order to include comments in code one must use the `eval_code` function instead. +#' +#' ```r +#' q3 <- qenv() |> +#' eval_code(" +#' foo <- function() { +#' x <<- x + 1 +#' } +#' x <- 0 +#' foo() # @linksto x +#' y <- x +#' ") +#' get_code(q3, names = "y") +#' ``` +#' Now the `foo()` call will be properly included in the code required to recreate `y`. +#' +#' Note that two functions that create objects as side effects, `assign` and `data`, are handled automatically. +#' +#' Here are known cases where manual tagging is necessary: +#' - non-standard assignment operators, _e.g._ `%<>%` +#' - objects used as conditions in `if` statements: `if ()` +#' - objects used to iterate over in `for` loops: `for(i in )` +#' - creating and evaluating language objects, _e.g._ `eval()` +#' #' @return #' `get_code` returns the traced code (from `@code` slot) in the form specified by `deparse`. #' #' @examples #' # retrieve code +#' q <- within(qenv(), {a <- 1; b <- 2}) #' get_code(q) #' get_code(q, deparse = FALSE) +#' get_code(q, names = "a") +#' +#' q <- qenv() +#' q <- eval_code(q, code = c("a <- 1", "b <- 2")) +#' get_code(q, names = "a") #' #' @name get_code #' @rdname qenv @@ -21,7 +95,7 @@ #' @aliases get_code,qenv.error-method #' #' @export -setGeneric("get_code", function(object, deparse = TRUE, ...) { +setGeneric("get_code", function(object, deparse = TRUE, names = NULL, ...) { # this line forces evaluation of object before passing to the generic # needed for error handling to work properly grDevices::pdf(nullfile()) @@ -31,16 +105,24 @@ setGeneric("get_code", function(object, deparse = TRUE, ...) { standardGeneric("get_code") }) -setMethod("get_code", signature = "qenv", function(object, deparse = TRUE) { +setMethod("get_code", signature = "qenv", function(object, deparse = TRUE, names = NULL, ...) { checkmate::assert_flag(deparse) + checkmate::assert_character(names, min.len = 1L, null.ok = TRUE) + + code <- if (!is.null(names)) { + get_code_dependency(object@code, names, ...) + } else { + object@code + } + if (deparse) { - if (length(object@code) == 0) { - object@code + if (length(code) == 0) { + code } else { - paste(object@code, collapse = "\n") + paste(code, collapse = "\n") } } else { - parse(text = paste(c("{", object@code, "}"), collapse = "\n"), keep.source = TRUE) + parse(text = paste(c("{", code, "}"), collapse = "\n"), keep.source = TRUE) } }) diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R new file mode 100644 index 00000000..48fe6b12 --- /dev/null +++ b/R/utils-get_code_dependency.R @@ -0,0 +1,436 @@ +# get_code_dependency ---- + +#' Get code dependency of an object +#' +#' Extract subset of code required to reproduce specific object(s), including code producing side-effects. +#' +#' Given a character vector with code, this function will extract the part of the code responsible for creating +#' the variables specified by `names`. +#' This includes the final call that creates the variable(s) in question as well as all _parent calls_, +#' _i.e._ calls that create variables used in the final call and their parents, etc. +#' Also included are calls that create side-effects like establishing connections. +#' +#' It is assumed that object dependency is established by using three assignment operators: `<-`, `=`, and `->` . +#' Other assignment methods (`assign`, `<<-`) or non-standard-evaluation methods are not supported. +#' +#' Side-effects are not detected automatically and must be marked in the code. +#' Add `# @linksto object` at the end of a line where a side-effect occurs to specify that this line is required +#' to reproduce a variable called `object`. +#' +#' @param code `character` with the code. +#' @param names `character` vector of object names. +#' @param check_names `logical(1)` flag specifying if a warning for non-existing names should be displayed. +#' +#' @return Character vector, a subset of `code`. +#' Note that subsetting is actually done on the calls `code`, not necessarily on the elements of the vector. +#' +#' @keywords internal +get_code_dependency <- function(code, names, check_names = TRUE) { + checkmate::assert_character(code) + checkmate::assert_character(names, any.missing = FALSE) + + if (identical(code, character(0)) || identical(trimws(code), "")) { + return(code) + } + + # If code is bound in curly brackets, remove them. + tcode <- trimws(code) + if (any(grepl("^\\{.*\\}$", tcode))) { + code <- sub("^\\{(.*)\\}$", "\\1", tcode) + } + + + code <- parse(text = code, keep.source = TRUE) + pd <- utils::getParseData(code) + calls_pd <- extract_calls(pd) + + if (check_names) { + # Detect if names are actually in code. + symbols <- unlist(lapply(calls_pd, function(call) call[call$token == "SYMBOL", "text"])) + if (any(pd$text == "assign")) { + assign_calls <- Filter(function(call) find_call(call, "assign"), calls_pd) + ass_str <- unlist(lapply(assign_calls, function(call) call[call$token == "STR_CONST", "text"])) + ass_str <- gsub("^['\"]|['\"]$", "", ass_str) + symbols <- c(ass_str, symbols) + } + if (!all(names %in% unique(symbols))) { + warning("Object(s) not found in code: ", toString(setdiff(names, symbols))) + } + } + + graph <- code_graph(calls_pd) + ind <- unlist(lapply(names, function(x) graph_parser(x, graph))) + + lib_ind <- detect_libraries(calls_pd) + + as.character(code[sort(unique(c(lib_ind, ind)))]) +} + +#' Locate function call token +#' +#' Determine which row of parsed data is specific `SYMBOL_FUNCTION_CALL` token. +#' +#' Useful for determining occurrence of `assign` or `data` functions in an input call. +#' +#' @param call_pd `data.frame` as returned by `extract_calls()` +#' @param text `character(1)` to look for in `text` column of `call_pd` +#' +#' @return +#' Single integer specifying row in `call_pd` where `token` is `SYMBOL_FUNCTION_CALL` and `text` is `text`. +#' 0 if not found. +#' +#' @keywords internal +#' @noRd +find_call <- function(call_pd, text) { + checkmate::check_data_frame(call_pd) + checkmate::check_names(call_pd, must.include = c("token", "text")) + checkmate::check_string(text) + + ans <- which(call_pd$token == "SYMBOL_FUNCTION_CALL" & call_pd$text == text) + if (length(ans)) { + ans + } else { + 0L + } +} + +#' Split the result of `utils::getParseData()` into separate calls +#' +#' @param pd (`data.frame`) A result of `utils::getParseData()`. +#' +#' @return +#' A `list` of `data.frame`s. +#' Each element is a subset of `pd` corresponding to one call in the original code from which `pd` was obtained. +#' Only four columns (`"token"`, `"text"`, `"id"`, `"parent"`) are kept, the rest is discarded. +#' +#' @keywords internal +#' @noRd +extract_calls <- function(pd) { + calls <- lapply( + pd[pd$parent == 0, "id"], + function(parent) { + rbind( + pd[pd$id == parent, c("token", "text", "id", "parent")], + get_children(pd = pd, parent = parent) + ) + } + ) + calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls) + calls <- Filter(Negate(is.null), calls) + calls <- fix_shifted_comments(calls) + fix_arrows(calls) +} + +#' @keywords internal +#' @noRd +get_children <- function(pd, parent) { + idx_children <- abs(pd$parent) == parent + children <- pd[idx_children, c("token", "text", "id", "parent")] + if (nrow(children) == 0) { + return(NULL) + } + + if (parent > 0) { + do.call(rbind, c(list(children), lapply(children$id, get_children, pd = pd))) + } +} + +#' Fixes edge case of comments being shifted to the next call. +#' @keywords internal +#' @noRd +fix_shifted_comments <- function(calls) { + # If the first or the second token is a @linksto COMMENT, + # then it belongs to the previous call. + if (length(calls) >= 2) { + for (i in 2:length(calls)) { + comment_idx <- grep("@linksto", calls[[i]][, "text"]) + if (isTRUE(comment_idx[1] <= 2)) { + calls[[i - 1]] <- rbind( + calls[[i - 1]], + calls[[i]][seq_len(comment_idx[1]), ] + ) + calls[[i]] <- calls[[i]][-seq_len(comment_idx[1]), ] + } + } + } + Filter(nrow, calls) +} + +#' Fixes edge case of `<-` assignment operator being called as function, +#' which is \code{`<-`(y,x)} instead of traditional `y <- x`. +#' @keywords internal +#' @noRd +fix_arrows <- function(calls) { + checkmate::assert_list(calls) + lapply(calls, function(call) { + sym_fun <- call$token == "SYMBOL_FUNCTION_CALL" + call[sym_fun, ] <- sub_arrows(call[sym_fun, ]) + call + }) +} + +#' Execution of assignment operator substitutions for a call. +#' @keywords internal +#' @noRd +sub_arrows <- function(call) { + checkmate::assert_data_frame(call) + map <- data.frame( + row.names = c("`<-`", "`<<-`", "`=`"), + token = rep("LEFT_ASSIGN", 3), + text = rep("<-", 3) + ) + sub_ids <- call$text %in% rownames(map) + call[sub_ids, c("token", "text")] <- map[call$text[sub_ids], ] + call +} + +# code_graph ---- + +#' Create object dependencies graph within parsed code +#' +#' Builds dependency graph that identifies dependencies between objects in parsed code. +#' Helps understand which objects depend on which. +#' +#' @param calls_pd `list` of `data.frame`s; +#' result of `utils::getParseData()` split into subsets representing individual calls; +#' created by `extract_calls()` function +#' +#' @return +#' A list (of length of input `calls_pd`) where each element represents one call. +#' Each element is a character vector listing names of objects that depend on this call +#' and names of objects that this call depends on. +#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a` +#' depends on objects `b` and `c`. +#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call. +#' +#' @keywords internal +#' @noRd +code_graph <- function(calls_pd) { + cooccurrence <- extract_occurrence(calls_pd) + + side_effects <- extract_side_effects(calls_pd) + + mapply(c, side_effects, cooccurrence, SIMPLIFY = FALSE) +} + +#' Extract object occurrence +#' +#' Extracts objects occurrence within calls passed by `calls_pd`. +#' Also detects which objects depend on which within a call. +#' +#' @param calls_pd `list` of `data.frame`s; +#' result of `utils::getParseData()` split into subsets representing individual calls; +#' created by `extract_calls()` function +#' +#' @return +#' A list (of length of input `calls_pd`) where each element represents one call. +#' Each element is a character vector listing names of objects that depend on this call +#' and names of objects that this call depends on. +#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a` +#' depends on objects `b` and `c`. +#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call. +#' +#' @keywords internal +#' @noRd +extract_occurrence <- function(calls_pd) { + is_in_function <- function(x) { + # If an object is a function parameter, + # then in calls_pd there is a `SYMBOL_FORMALS` entry for that object. + function_id <- x[x$token == "FUNCTION", "parent"] + if (length(function_id)) { + x$id %in% get_children(x, function_id[1])$id + } else { + rep(FALSE, nrow(x)) + } + } + in_parenthesis <- function(x) { + if (any(x$token %in% c("LBB", "'['"))) { + id_start <- min(x$id[x$token %in% c("LBB", "'['")]) + id_end <- min(x$id[x$token == "']'"]) + x$text[x$token == "SYMBOL" & x$id > id_start & x$id < id_end] + } + } + lapply( + calls_pd, + function(call_pd) { + # Handle data(object)/data("object")/data(object, envir = ) independently. + data_call <- find_call(call_pd, "data") + if (data_call) { + sym <- call_pd[data_call + 1, "text"] + return(c(gsub("^['\"]|['\"]$", "", sym), "<-")) + } + # Handle assign(x = ). + assign_call <- find_call(call_pd, "assign") + if (assign_call) { + # Check if parameters were named. + # "','" is for unnamed parameters, where "SYMBOL_SUB" is for named. + # "EQ_SUB" is for `=` appearing after the name of the named parameter. + if (any(call_pd$token == "SYMBOL_SUB")) { + params <- call_pd[call_pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"] + # Remove sequence of "=", ",". + if (length(params > 1)) { + remove <- integer(0) + for (i in 2:length(params)) { + if (params[i - 1] == "=" & params[i] == ",") { + remove <- c(remove, i - 1, i) + } + } + if (length(remove)) params <- params[-remove] + } + pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0)) + if (!pos) { + return(character(0L)) + } + # pos is indicator of the place of 'x' + # 1. All parameters are named, but none is 'x' - return(character(0L)) + # 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ",")) + # - check "x" in params being just a vector of named parameters. + # 3. Some parameters are named, 'x' is not in named parameters + # - check first appearance of "," (unnamed parameter) in vector parameters. + } else { + # Object is the first entry after 'assign'. + pos <- 1 + } + sym <- call_pd[assign_call + pos, "text"] + return(c(gsub("^['\"]|['\"]$", "", sym), "<-")) + } + + # What occurs in a function body is not tracked. + x <- call_pd[!is_in_function(call_pd), ] + sym_cond <- which(x$token %in% c("SYMBOL", "SYMBOL_FUNCTION_CALL")) + + if (length(sym_cond) == 0) { + return(character(0L)) + } + # Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not. + # For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2. + dollar_ids <- x[x$token %in% c("'$'", "'@'"), "id"] + if (length(dollar_ids)) { + object_ids <- x[sym_cond, "id"] + after_dollar <- object_ids[(object_ids - 2) %in% dollar_ids] + sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar)) + } + + ass_cond <- grep("ASSIGN", x$token) + if (!length(ass_cond)) { + return(c("<-", unique(x[sym_cond, "text"]))) + } + + sym_cond <- sym_cond[sym_cond > ass_cond] # NOTE 1 + # If there was an assignment operation detect direction of it. + if (unique(x$text[ass_cond]) == "->") { # NOTE 2 + sym_cond <- rev(sym_cond) + } + + after <- match(min(x$id[ass_cond]), sort(x$id[c(min(ass_cond), sym_cond)])) - 1 + ans <- append(x[sym_cond, "text"], "<-", after = max(1, after)) + roll <- in_parenthesis(call_pd) + if (length(roll)) { + c(setdiff(ans, roll), roll) + } else { + ans + } + + ### NOTE 2: What if there are 2 assignments: e.g. a <- b -> c. + ### NOTE 1: For cases like 'eval(expression(b <- b + 2))' removes 'eval(expression('. + } + ) +} + +#' Extract side effects +#' +#' Extracts all object names from the code that are marked with `@linksto` tag. +#' +#' The code may contain functions calls that create side effects, e.g. modify the environment. +#' Static code analysis may be insufficient to determine which objects are created or modified by such a function call. +#' The `@linksto` comment tag is introduced to mark a call as having a (side) effect on one or more objects. +#' With this tag a complete object dependency structure can be established. +#' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function. +#' +#' @param calls_pd `list` of `data.frame`s; +#' result of `utils::getParseData()` split into subsets representing individual calls; +#' created by `extract_calls()` function +#' +#' @return +#' A list of length equal to that of `calls_pd`, where each element is a character vector of names of objects +#' depending a call tagged with `@linksto` in a corresponding element of `calls_pd`. +#' +#' @keywords internal +#' @noRd +extract_side_effects <- function(calls_pd) { + lapply( + calls_pd, + function(x) { + linksto <- grep("@linksto", x[x$token == "COMMENT", "text"], value = TRUE) + unlist(strsplit(sub("\\s*#\\s*@linksto\\s+", "", linksto), "\\s+")) + } + ) +} + +# graph_parser ---- + +#' Return the indices of calls needed to reproduce an object +#' +#' @param x The name of the object to return code for. +#' @param graph A result of `code_graph()`. +#' +#' @return +#' Integer vector of indices that can be applied to `graph` to obtain all calls required to reproduce object `x`. +#' +#' @keywords internal +#' @noRd +graph_parser <- function(x, graph) { + occurrence <- vapply( + graph, function(call) { + ind <- match("<-", call, nomatch = length(call) + 1L) + x %in% call[seq_len(ind - 1L)] + }, + logical(1) + ) + + dependencies <- lapply(graph[occurrence], function(call) { + ind <- match("<-", call, nomatch = 0L) + call[(ind + 1L):length(call)] + }) + dependencies <- setdiff(unlist(dependencies), x) + + if (length(dependencies) && any(occurrence)) { + dependency_ids <- lapply(dependencies, function(dependency) { + graph_parser(dependency, graph[1:max(which(occurrence))]) + }) + sort(unique(c(which(occurrence), unlist(dependency_ids)))) + } else { + which(occurrence) + } +} + + +# default_side_effects -------------------------------------------------------------------------------------------- + +#' Detect library calls +#' +#' Detects `library()` and `require()` function calls. +#' +#' @param calls_pd `list` of `data.frame`s; +#' result of `utils::getParseData()` split into subsets representing individual calls; +#' created by `extract_calls()` function +#' +#' @return +#' Integer vector of indices that can be applied to `graph` (result of `code_graph()`) to obtain all calls containing +#' `library()` or `require()` calls that are always returned for reproducibility. +#' +#' @keywords internal +#' @noRd +detect_libraries <- function(calls_pd) { + defaults <- c("library", "require") + + which( + vapply( + calls_pd, + function(call) { + any(call$token == "SYMBOL_FUNCTION_CALL" & call$text %in% defaults) + }, + logical(1) + ) + ) +} diff --git a/tests/testthat/test-qenv_eval_code.R b/tests/testthat/test-qenv_eval_code.R index 12a2a7a0..1254b7bc 100644 --- a/tests/testthat/test-qenv_eval_code.R +++ b/tests/testthat/test-qenv_eval_code.R @@ -76,7 +76,7 @@ testthat::test_that("eval_code works with quoted code block", { }) testthat::test_that("eval_code fails with unquoted expression", { - testthat::expect_error(eval_code(qenv(), a <- b), "object 'b' not found") + testthat::expect_error(eval_code(qenv(), a <- b)) }) testthat::test_that("an error when calling eval_code returns a qenv.error object which has message and trace", { diff --git a/tests/testthat/test-qenv_get_code.R b/tests/testthat/test-qenv_get_code.R index 099f0a36..356c97b4 100644 --- a/tests/testthat/test-qenv_get_code.R +++ b/tests/testthat/test-qenv_get_code.R @@ -44,3 +44,704 @@ testthat::test_that("get_code called with qenv.error returns error with trace in "object 'v' not found \n when evaluating qenv code:\nw <- v\n\ntrace: \n x <- 1\n y <- x\n w <- v\n" ) }) + + +# names parameter ------------------------------------------------------------------------------------------------- + +testthat::test_that("handles empty @code slot", { + testthat::expect_identical( + get_code(qenv(), names = "a"), + character(0) + ) + testthat::expect_identical( + get_code(eval_code(qenv(), code = ""), names = "a"), + "" + ) +}) + +testthat::test_that("handles the code without symbols on rhs", { + code <- c( + "1 + 1", + "a <- 5", + "501" + ) + + testthat::expect_identical( + get_code(eval_code(qenv(), code), names = "a"), + "a <- 5" + ) +}) + +testthat::test_that("handles the code included in curly brackets", { + code <- "{1 + 1;a <- 5}" + + testthat::expect_identical( + get_code(eval_code(qenv(), code), names = "a"), + "a <- 5" + ) +}) + +testthat::test_that("handles the code of length > 1 when at least one is enclosed in curly brackets", { + code <- c("{a<-5}", "1+1") + q <- eval_code(eval_code(qenv(), code[1]), code[2]) + + testthat::expect_identical( + get_code(q, names = "a"), + "a <- 5" + ) +}) + + +testthat::test_that("extracts the code of a binding from character vector containing simple code", { + code <- c( + "a <- 1", + "b <- 2" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "a"), + "a <- 1" + ) + testthat::expect_identical( + get_code(q, names = "b"), + "b <- 2" + ) +}) + +testthat::test_that("extracts the code without downstream usage", { + code <- c( + "a <- 1", + "head(a)" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "a"), + "a <- 1" + ) +}) + +testthat::test_that("works for datanames of length > 1", { + code <- c( + "a <- 1", + "b <- 2" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = c("a", "b")), + paste(code, collapse = "\n") + ) +}) + +testthat::test_that("warns if binding doesn't exist in code", { + code <- c("a <- 1") + q <- eval_code(qenv(), code) + testthat::expect_warning( + get_code(q, names = "c"), + "Object\\(s\\) not found in code: c" + ) +}) + +testthat::test_that("does not fall into a loop", { + code <- c( + "a <- 1", + "b <- a", + "c <- b", + "a <- c" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "a"), + paste(code, collapse = "\n") + ) + testthat::expect_identical( + get_code(q, names = "b"), + paste(code[1:2], collapse = "\n") + ) + testthat::expect_identical( + get_code(q, names = "c"), + paste(code[1:3], collapse = "\n") + ) +}) + + +testthat::test_that("extracts code of a parent binding but only those evaluated before coocurence", { + code <- c( + "a <- 1", + "b <- a", + "a <- 2" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste("a <- 1", "b <- a", sep = "\n") + ) +}) + +testthat::test_that("extracts the code of a parent binding if used as an arg in a function call", { + code <- c( + "a <- 1", + "b <- identity(x = a)", + "a <- 2" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste("a <- 1", "b <- identity(x = a)", sep = "\n") + ) +}) + +testthat::test_that("extracts the code when using <<-", { + code <- c( + "a <- 1", + "b <- a", + "b <<- b + 2" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste("a <- 1", "b <- a", "b <<- b + 2", sep = "\n") + ) +}) + +testthat::test_that("detects every assign calls even if not evaluated, if there is only one assignment in this line", { + code <- c( + "a <- 1", + "b <- 2", + "eval(expression({b <- b + 2}))" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste("b <- 2", "eval(expression({\n b <- b + 2\n}))", sep = "\n") + ) +}) + +testthat::test_that("returns result of length 1 for non-empty input", { + q1 <- qenv() + q1 <- within(q1, { + a <- 1 + b <- a^5 + c <- list(x = 2) + }) + + testthat::expect_length(get_code(q1, deparse = FALSE), 1) + testthat::expect_length(get_code(q1, deparse = TRUE), 1) +}) + +testthat::test_that("does not break if code is separated by ;", { + code <- c( + "a <- 1;a <- a + 1" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "a"), + gsub(";", "\n", code, fixed = TRUE) + ) +}) + +testthat::test_that("does not break if code uses quote()", { + code <- c( + "expr <- quote(x <- x + 1)", + "x <- 0", + "eval(expr)" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "x"), + code[2] + ) +}) + +testthat::test_that("does not break if object is used in a function on lhs", { + code <- c( + "data(iris)", + "iris2 <- iris", + "names(iris) <- letters[1:5]" + ) + q <- eval_code(qenv(), code = code) + testthat::expect_identical( + get_code(q, names = "iris"), + paste(code[c(1, 3)], collapse = "\n") + ) +}) + +testthat::test_that( + "does not break if object is used in a function on lhs and influencers are both on lhs and rhs", + { + code <- c( + "x <- 5", + "y <- length(x)", + "names(x)[y] <- y" + ) + q <- eval_code(qenv(), code = code) + testthat::expect_identical( + get_code(q, names = "x"), + paste(code, collapse = "\n") + ) + } +) + +# assign ---------------------------------------------------------------------------------------------------------- + +testthat::test_that("extracts the code for assign() where \"x\" is a literal string", { + code <- c( + "a <- 1", + "assign('b', 5)", + "assign(value = 7, x = 'c')", + "assign(value = 15, x = \"d\")", + "b <- b + 2", + "c <- b", + "d <- d * 2" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste("assign(\"b\", 5)", "b <- b + 2", sep = "\n") + ) + testthat::expect_identical( + get_code(q, names = "c"), + paste( + "assign(\"b\", 5)", + "assign(value = 7, x = \"c\")", + "b <- b + 2", + "c <- b", + sep = "\n" + ) + ) + testthat::expect_identical( + get_code(q, names = "d"), + paste("assign(value = 15, x = \"d\")", "d <- d * 2", sep = "\n") + ) +}) + +testthat::test_that("extracts the code for assign() where \"x\" is variable", { + testthat::skip("We will not resolve this, as this requires code evaluation.") + code <- c( + "x <- \"a\"", + "assign(x, 5)", + "b <- a" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste(code, collapse = "\n") + ) +}) + +testthat::test_that("works for assign() detection no matter how many parametrers were provided in assignq()", { + code <- c( + "x <- 1", + "assign(\"x\", 0, envir = environment())", + "assign(inherits = FALSE, immediate = TRUE, \"z\", 5, envir = environment())", + "y <- x + z", + "y <- x" + ) + + q <- eval_code(qenv(), code) + + testthat::expect_identical( + get_code(q, names = "y"), + paste(code, collapse = "\n") + ) +}) + +testthat::test_that("detects function usage of the assignment operator", { + code <- c( + "x <- 1", + "`<-`(y,x)" + ) + code2 <- "`<-`(y, `<-`(x, 2))" + + q <- eval_code(qenv(), code) + q2 <- eval_code(qenv(), code2) + + testthat::expect_identical( + get_code(q, names = "y"), + paste(c(code[1], "y <- x"), collapse = "\n") + ) + testthat::expect_identical( + get_code(q2, names = "y"), + "y <- x <- 2" + ) +}) + + +# @linksto --------------------------------------------------------------------------------------------------------- + +testthat::test_that("get_code does not break if @linksto is put in the last line", { + # In some cases R parses comment as a separate expression so the comment is not + # directly associated with this line of code. This situation occurs when `eval` is in the last + # line of the code. Other cases are not known but are highly probable. + code <- c( + "expr <- quote(x <- x + 1)", + "x <- 0", + "eval(expr) #@linksto x" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "x"), + paste(gsub(" #@linksto x", "", code, fixed = TRUE), collapse = "\n") + ) +}) + +testthat::test_that("@linksto makes a line being returned for an affected binding", { + code <- " + a <- 1 # @linksto b + b <- 2 + " + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste("a <- 1", "b <- 2", sep = "\n") + ) +}) + +testthat::test_that( + "@linksto returns the line for an affected binding + even if the object did not exist in the same iteration of eval_code", + { + code <- c( + "a <- 1 # @linksto b", + "b <- 2" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste("a <- 1", "b <- 2", sep = "\n") + ) + } +) + +testthat::test_that( + "lines affecting parent evaluated after co-occurrence are not included in output when using @linksto", + { + code <- c( + "a <- 1 ", + "b <- 2 # @linksto a", + "a <- a + 1", + "b <- b + 1" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "a"), + paste("a <- 1", "b <- 2", "a <- a + 1", sep = "\n") + ) + testthat::expect_identical( + get_code(q, names = "b"), + paste("b <- 2", "b <- b + 1", sep = "\n") + ) + } +) + +testthat::test_that( + "@linksto gets extracted if it's a side-effect on a dependent object (even of a dependent object)", + { + code <- " + iris[1:5, ] -> iris2 + iris_head <- head(iris) # @linksto iris3 + iris3 <- iris_head[1, ] # @linksto iris2 + classes <- lapply(iris2, class) + " + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "classes"), + paste("iris2 <- iris[1:5, ]", + "iris_head <- head(iris)", + "iris3 <- iris_head[1, ]", + "classes <- lapply(iris2, class)", + sep = "\n" + ) + ) + } +) + +# functions ------------------------------------------------------------------------------------------------------- + +testthat::test_that("ignores occurrence in a function definition", { + code <- c( + "b <- 2", + "foo <- function(b) { b <- b + 2 }" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + "b <- 2" + ) + testthat::expect_identical( + get_code(q, names = "foo"), + "foo <- function(b) {\n b <- b + 2\n}" + ) +}) + +testthat::test_that("ignores occurrence in a function definition that has function in it", { + code <- c( + "b <- 2", + "foo <- function(b) { function(c) {b <- c + 2 }}" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + "b <- 2" + ) + testthat::expect_identical( + get_code(q, names = "foo"), + "foo <- function(b) {\n function(c) {\n b <- c + 2\n }\n}" + ) +}) + +testthat::test_that("ignores occurrence in a function definition if there is multiple function definitions", { + code <- c( + "b <- 2", + "foo <- function(b) { function(c) {b <- c + 2 }}", + "b <- b + 1", + "bar <- function(b) print(b)" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + "b <- 2\nb <- b + 1" + ) + testthat::expect_identical( + get_code(q, names = "foo"), + "foo <- function(b) {\n function(c) {\n b <- c + 2\n }\n}" + ) +}) + +testthat::test_that("ignores occurrence in a function definition in lapply", { + code <- c( + "a <- list(a = 1, b = 2, c = 3)", + "b <- lapply(a, FUN = function(x) { x <- x + 1 })", + "b <- Filter(function(x) x > 2, b)", + "x <- 1", + "identity(x)" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "x"), + "x <- 1" + ) +}) + +testthat::test_that("does not ignore occurrence in function body if object exsits in env", { + skip("This is not urgent and can be ommitted with @linksto tag.") + code <- c( + "a <- list(a = 1, b = 2, c = 3)", + "p <- 5", # This is not extracted, even though is used in the next line. + "b <- lapply(a, FUN = function(x) { x <- x + p })", + "b <- Filter(function(x) x > 2, b)" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste(code, sep = "\n") + ) +}) + +testthat::test_that("ignores occurrence in function definition without { curly brackets", { + code <- c( + "b <- 2", + "foo <- function(b) b <- b + 2 " + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "foo"), + "foo <- function(b) b <- b + 2" + ) + testthat::expect_identical( + get_code(q, names = "b"), + "b <- 2" + ) +}) + +testthat::test_that("detects occurrence of the function object", { + code <- c( + "a <- 1", + "b <- 2", + "foo <- function(b) { b <- b + 2 }", + "b <- foo(a)" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste("a <- 1", "b <- 2", "foo <- function(b) {\n b <- b + 2\n}", "b <- foo(a)", sep = "\n") + ) +}) + +testthat::test_that("detects occurrence of a function definition when a formal is named the same as a function", { + code <- c( + "x <- 1", + "foo <- function(foo = 1) 'text'", + "a <- foo(x)" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "a"), + paste("x <- 1", "foo <- function(foo = 1) \"text\"", "a <- foo(x)", sep = "\n") + ) +}) + +testthat::test_that("detects occurrence of a function definition with a @linksto usage", { + code <- c( + " + foo <- function() { + env <- parent.frame() + env$x <- 0 + }", + "foo() # @linksto x", + "y <- x" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "x"), + "foo <- function() {\n env <- parent.frame()\n env$x <- 0\n}\nfoo()" + ) +}) +# $ --------------------------------------------------------------------------------------------------------------- + +testthat::test_that("understands $ usage and do not treat rhs of $ as objects (only lhs)", { + code <- c( + "x <- data.frame(a = 1:3)", + "a <- data.frame(y = 1:3)", + "a$x <- a$y", + "a$x <- a$x + 2", + "a$x <- x$a" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "x"), + "x <- data.frame(a = 1:3)" + ) + testthat::expect_identical( + get_code(q, names = "a"), + paste("x <- data.frame(a = 1:3)", + "a <- data.frame(y = 1:3)", + "a$x <- a$y", + "a$x <- a$x + 2", + "a$x <- x$a", + sep = "\n" + ) + ) +}) + +testthat::test_that("detects cooccurrence properly even if all objects are on lhs", { + code <- c( + "a <- 1", + "b <- list(c = 2)", + "b[[a]] <- 3" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste(code, collapse = "\n") + ) +}) + + +# @ --------------------------------------------------------------------------------------------------------------- + +testthat::test_that("understands @ usage and do not treat rhs of @ as objects (only lhs)", { + code <- c( + "setClass('aclass', slots = c(a = 'numeric', x = 'numeric', y = 'numeric')) # @linksto a x", + "x <- new('aclass', a = 1:3, x = 1:3, y = 1:3)", + "a <- new('aclass', a = 1:3, x = 1:3, y = 1:3)", + "a@x <- a@y", + "a@x <- a@x + 2", + "a@x <- x@a" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "x"), + paste( + 'setClass("aclass", slots = c(a = "numeric", x = "numeric", y = "numeric"))', + 'x <- new("aclass", a = 1:3, x = 1:3, y = 1:3)', + sep = "\n" + ) + ) + testthat::expect_identical( + get_code(q, names = "a"), + paste( + 'setClass("aclass", slots = c(a = "numeric", x = "numeric", y = "numeric"))', + 'x <- new("aclass", a = 1:3, x = 1:3, y = 1:3)', + 'a <- new("aclass", a = 1:3, x = 1:3, y = 1:3)', + "a@x <- a@y", + "a@x <- a@x + 2", + "a@x <- x@a", + sep = "\n" + ) + ) +}) + + + +# libraries ------------------------------------------------------------------------------------------------------- + +testthat::test_that("library() and require() are always returned", { + code <- c( + "set.seed(1)", + "library(random.cdisc.data)", + "require(dplyr)", + "library(MultiAssayExperiment)", + "x <- 5", + "y <- 6" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "x"), + paste( + "library(random.cdisc.data)", + "require(dplyr)", + "library(MultiAssayExperiment)", + "x <- 5", + sep = "\n" + ) + ) +}) + + +# data() ---------------------------------------------------------------------------------------------------------- + +testthat::test_that("data() call is returned when data name is provided as is", { + code <- c( + "set.seed(1)", + "library(random.cdisc.data)", + "require(dplyr)", + "library(MultiAssayExperiment)", + "data(miniACC, envir = environment())", + "x <- miniACC" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "x"), + paste( + "library(random.cdisc.data)", + "require(dplyr)", + "library(MultiAssayExperiment)", + "data(miniACC, envir = environment())", + "x <- miniACC", + sep = "\n" + ) + ) +}) + +testthat::test_that("data() call is returned when data name is provided as a character", { + code <- c( + "set.seed(1)", + "library(random.cdisc.data)", + "require(dplyr)", + "library(MultiAssayExperiment)", + "data('mtcars')", + "z <- mtcars" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "z"), + paste( + "library(random.cdisc.data)", + "require(dplyr)", + "library(MultiAssayExperiment)", + "data(\"mtcars\")", + "z <- mtcars", + sep = "\n" + ) + ) +}) diff --git a/tests/testthat/test-qenv_within.R b/tests/testthat/test-qenv_within.R index 6073d80b..18eece32 100644 --- a/tests/testthat/test-qenv_within.R +++ b/tests/testthat/test-qenv_within.R @@ -93,7 +93,7 @@ testthat::test_that("external values are not taken from calling frame", { i <- subset(iris, Species == species) }) testthat::expect_s3_class(qq, "qenv.error") - testthat::expect_error(get_code(qq), "object 'species' not found") + testthat::expect_error(get_code(qq)) qq <- within(q, { i <- subset(iris, Species == species) From 249e6fe14bd5971461463f8439af8e237ad9a371 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 17 Oct 2024 12:30:06 +0000 Subject: [PATCH 02/11] [skip style] [skip vbump] Restyle files --- R/qenv-get_code.R | 5 ++++- tests/testthat/test-qenv_get_code.R | 18 +++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/R/qenv-get_code.R b/R/qenv-get_code.R index 34b60334..c417850d 100644 --- a/R/qenv-get_code.R +++ b/R/qenv-get_code.R @@ -80,7 +80,10 @@ #' #' @examples #' # retrieve code -#' q <- within(qenv(), {a <- 1; b <- 2}) +#' q <- within(qenv(), { +#' a <- 1 +#' b <- 2 +#' }) #' get_code(q) #' get_code(q, deparse = FALSE) #' get_code(q, names = "a") diff --git a/tests/testthat/test-qenv_get_code.R b/tests/testthat/test-qenv_get_code.R index 356c97b4..cf567dae 100644 --- a/tests/testthat/test-qenv_get_code.R +++ b/tests/testthat/test-qenv_get_code.R @@ -446,10 +446,10 @@ testthat::test_that( testthat::expect_identical( get_code(q, names = "classes"), paste("iris2 <- iris[1:5, ]", - "iris_head <- head(iris)", - "iris3 <- iris_head[1, ]", - "classes <- lapply(iris2, class)", - sep = "\n" + "iris_head <- head(iris)", + "iris3 <- iris_head[1, ]", + "classes <- lapply(iris2, class)", + sep = "\n" ) ) } @@ -614,11 +614,11 @@ testthat::test_that("understands $ usage and do not treat rhs of $ as objects (o testthat::expect_identical( get_code(q, names = "a"), paste("x <- data.frame(a = 1:3)", - "a <- data.frame(y = 1:3)", - "a$x <- a$y", - "a$x <- a$x + 2", - "a$x <- x$a", - sep = "\n" + "a <- data.frame(y = 1:3)", + "a$x <- a$y", + "a$x <- a$x + 2", + "a$x <- x$a", + sep = "\n" ) ) }) From 227d7664e284f343dd5b1af0c563ce859ab1edc9 Mon Sep 17 00:00:00 2001 From: "27856297+dependabot-preview[bot]@users.noreply.github.com" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Thu, 17 Oct 2024 12:30:09 +0000 Subject: [PATCH 03/11] [skip roxygen] [skip vbump] Roxygen Man Pages Auto Update --- DESCRIPTION | 1 + man/get_code_dependency.Rd | 37 +++++++++++++++++ man/qenv.Rd | 82 +++++++++++++++++++++++++++++++++++++- 3 files changed, 119 insertions(+), 1 deletion(-) create mode 100644 man/get_code_dependency.Rd diff --git a/DESCRIPTION b/DESCRIPTION index ca32a094..7977eabc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -66,4 +66,5 @@ Collate: 'qenv-show.R' 'qenv-within.R' 'teal.code-package.R' + 'utils-get_code_dependency.R' 'utils.R' diff --git a/man/get_code_dependency.Rd b/man/get_code_dependency.Rd new file mode 100644 index 00000000..8db40903 --- /dev/null +++ b/man/get_code_dependency.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-get_code_dependency.R +\name{get_code_dependency} +\alias{get_code_dependency} +\title{Get code dependency of an object} +\usage{ +get_code_dependency(code, names, check_names = TRUE) +} +\arguments{ +\item{code}{\code{character} with the code.} + +\item{names}{\code{character} vector of object names.} + +\item{check_names}{\code{logical(1)} flag specifying if a warning for non-existing names should be displayed.} +} +\value{ +Character vector, a subset of \code{code}. +Note that subsetting is actually done on the calls \code{code}, not necessarily on the elements of the vector. +} +\description{ +Extract subset of code required to reproduce specific object(s), including code producing side-effects. +} +\details{ +Given a character vector with code, this function will extract the part of the code responsible for creating +the variables specified by \code{names}. +This includes the final call that creates the variable(s) in question as well as all \emph{parent calls}, +\emph{i.e.} calls that create variables used in the final call and their parents, etc. +Also included are calls that create side-effects like establishing connections. + +It is assumed that object dependency is established by using three assignment operators: \verb{<-}, \code{=}, and \verb{->} . +Other assignment methods (\code{assign}, \verb{<<-}) or non-standard-evaluation methods are not supported. + +Side-effects are not detected automatically and must be marked in the code. +Add \verb{# @linksto object} at the end of a line where a side-effect occurs to specify that this line is required +to reproduce a variable called \code{object}. +} +\keyword{internal} diff --git a/man/qenv.Rd b/man/qenv.Rd index 75bd486e..4e002f15 100644 --- a/man/qenv.Rd +++ b/man/qenv.Rd @@ -26,7 +26,7 @@ new_qenv(env = new.env(parent = parent.env(.GlobalEnv)), code = character()) eval_code(object, code) -get_code(object, deparse = TRUE, ...) +get_code(object, deparse = TRUE, names = NULL, ...) \method{within}{qenv}(data, expr, ...) } @@ -40,6 +40,9 @@ Environment being a result of the \code{code} evaluation.} \item{deparse}{(\code{logical(1)}) flag specifying whether to return code as \code{character} or \code{expression}.} +\item{names}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} (\code{character}) vector of object names to return the code for. +For more details see the "Extracting dataset-specific code" section.} + \item{...}{see \code{Details}} \item{data}{(\code{qenv})} @@ -78,6 +81,77 @@ It is a method for the \code{base} generic that wraps \code{eval_code} to provid through the \code{...} argument: as \code{name:value} pairs are passed to \code{...}, \code{name} in \code{expr} will be replaced with \code{value}. } +\section{Extracting dataset-specific code}{ + +When \code{names} is specified, the code returned will be limited to the lines needed to \emph{create} +the requested objects. The code stored in the \verb{@code} slot is analyzed statically to determine +which lines the objects of interest depend upon. The analysis works well when objects are created +with standard infix assignment operators (see \code{?assignOps}) but it can fail in some situations. + +Consider the following examples: + +\emph{Case 1: Usual assignments.} + +\if{html}{\out{
}}\preformatted{q1 <- qenv() |> + within(\{ + foo <- function(x) \{ + x + 1 + \} + x <- 0 + y <- foo(x) + \}) +get_code(q1, names = "y") +}\if{html}{\out{
}} + +\code{x} has no dependencies, so \code{get_code(data, names = "x")} will return only the second call.\cr +\code{y} depends on \code{x} and \code{foo}, so \code{get_code(data, names = "y")} will contain all three calls. + +\emph{Case 2: Some objects are created by a function's side effects.} + +\if{html}{\out{
}}\preformatted{q2 <- qenv() |> + within(\{ + foo <- function() \{ + x <<- x + 1 + \} + x <- 0 + foo() + y <- x + \}) +get_code(q2, names = "y") +}\if{html}{\out{
}} + +Here, \code{y} depends on \code{x} but \code{x} is modified by \code{foo} as a side effect (not by reassignment) +and so \code{get_code(data, names = "y")} will not return the \code{foo()} call.\cr +To overcome this limitation, code dependencies can be specified manually. +Lines where side effects occur can be flagged by adding "\verb{# @linksto }" at the end.\cr +Note that \code{within} evaluates code passed to \code{expr} as is and comments are ignored. +In order to include comments in code one must use the \code{eval_code} function instead. + +\if{html}{\out{
}}\preformatted{q3 <- qenv() |> + eval_code(" + foo <- function() \{ + x <<- x + 1 + \} + x <- 0 + foo() # @linksto x + y <- x + ") +get_code(q3, names = "y") +}\if{html}{\out{
}} + +Now the \code{foo()} call will be properly included in the code required to recreate \code{y}. + +Note that two functions that create objects as side effects, \code{assign} and \code{data}, are handled automatically. + +Here are known cases where manual tagging is necessary: +\itemize{ +\item non-standard assignment operators, \emph{e.g.} \verb{\%<>\%} +\item objects used as conditions in \code{if} statements: \verb{if ()} +\item objects used to iterate over in \code{for} loops: \verb{for(i in )} +\item creating and evaluating language objects, \emph{e.g.} \verb{eval()} +} +} + \section{Using language objects with \code{within}}{ Passing language objects to \code{expr} is generally not intended but can be achieved with \code{do.call}. @@ -100,8 +174,14 @@ q <- eval_code(q, quote(library(checkmate))) q <- eval_code(q, expression(assert_number(a))) # retrieve code +q <- within(qenv(), {a <- 1; b <- 2}) get_code(q) get_code(q, deparse = FALSE) +get_code(q, names = "a") + +q <- qenv() +q <- eval_code(q, code = c("a <- 1", "b <- 2")) +get_code(q, names = "a") # evaluate code using within q <- qenv() From 2b2d16c61f90869127007cf7ceef81238fda0fb5 Mon Sep 17 00:00:00 2001 From: m7pr Date: Thu, 17 Oct 2024 15:19:19 +0200 Subject: [PATCH 04/11] remove library(random.cdisc.data) from tests --- tests/testthat/test-qenv_get_code.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/testthat/test-qenv_get_code.R b/tests/testthat/test-qenv_get_code.R index 356c97b4..6ff3ad68 100644 --- a/tests/testthat/test-qenv_get_code.R +++ b/tests/testthat/test-qenv_get_code.R @@ -678,7 +678,6 @@ testthat::test_that("understands @ usage and do not treat rhs of @ as objects (o testthat::test_that("library() and require() are always returned", { code <- c( "set.seed(1)", - "library(random.cdisc.data)", "require(dplyr)", "library(MultiAssayExperiment)", "x <- 5", @@ -688,7 +687,6 @@ testthat::test_that("library() and require() are always returned", { testthat::expect_identical( get_code(q, names = "x"), paste( - "library(random.cdisc.data)", "require(dplyr)", "library(MultiAssayExperiment)", "x <- 5", @@ -703,7 +701,6 @@ testthat::test_that("library() and require() are always returned", { testthat::test_that("data() call is returned when data name is provided as is", { code <- c( "set.seed(1)", - "library(random.cdisc.data)", "require(dplyr)", "library(MultiAssayExperiment)", "data(miniACC, envir = environment())", @@ -713,7 +710,6 @@ testthat::test_that("data() call is returned when data name is provided as is", testthat::expect_identical( get_code(q, names = "x"), paste( - "library(random.cdisc.data)", "require(dplyr)", "library(MultiAssayExperiment)", "data(miniACC, envir = environment())", @@ -726,7 +722,6 @@ testthat::test_that("data() call is returned when data name is provided as is", testthat::test_that("data() call is returned when data name is provided as a character", { code <- c( "set.seed(1)", - "library(random.cdisc.data)", "require(dplyr)", "library(MultiAssayExperiment)", "data('mtcars')", @@ -736,7 +731,6 @@ testthat::test_that("data() call is returned when data name is provided as a cha testthat::expect_identical( get_code(q, names = "z"), paste( - "library(random.cdisc.data)", "require(dplyr)", "library(MultiAssayExperiment)", "data(\"mtcars\")", From 8340d0c4c827f509c3bd600f8051ec0a21b6d739 Mon Sep 17 00:00:00 2001 From: m7pr Date: Thu, 17 Oct 2024 15:21:34 +0200 Subject: [PATCH 05/11] bring error messages to tests --- tests/testthat/test-qenv_eval_code.R | 2 +- tests/testthat/test-qenv_within.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-qenv_eval_code.R b/tests/testthat/test-qenv_eval_code.R index 1254b7bc..12a2a7a0 100644 --- a/tests/testthat/test-qenv_eval_code.R +++ b/tests/testthat/test-qenv_eval_code.R @@ -76,7 +76,7 @@ testthat::test_that("eval_code works with quoted code block", { }) testthat::test_that("eval_code fails with unquoted expression", { - testthat::expect_error(eval_code(qenv(), a <- b)) + testthat::expect_error(eval_code(qenv(), a <- b), "object 'b' not found") }) testthat::test_that("an error when calling eval_code returns a qenv.error object which has message and trace", { diff --git a/tests/testthat/test-qenv_within.R b/tests/testthat/test-qenv_within.R index 18eece32..6073d80b 100644 --- a/tests/testthat/test-qenv_within.R +++ b/tests/testthat/test-qenv_within.R @@ -93,7 +93,7 @@ testthat::test_that("external values are not taken from calling frame", { i <- subset(iris, Species == species) }) testthat::expect_s3_class(qq, "qenv.error") - testthat::expect_error(get_code(qq)) + testthat::expect_error(get_code(qq), "object 'species' not found") qq <- within(q, { i <- subset(iris, Species == species) From a6b26276a14f3af5ac87685ec428b79fe7b386d4 Mon Sep 17 00:00:00 2001 From: "27856297+dependabot-preview[bot]@users.noreply.github.com" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Thu, 17 Oct 2024 13:23:51 +0000 Subject: [PATCH 06/11] [skip roxygen] [skip vbump] Roxygen Man Pages Auto Update --- man/qenv.Rd | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/man/qenv.Rd b/man/qenv.Rd index 4e002f15..4d382246 100644 --- a/man/qenv.Rd +++ b/man/qenv.Rd @@ -174,7 +174,10 @@ q <- eval_code(q, quote(library(checkmate))) q <- eval_code(q, expression(assert_number(a))) # retrieve code -q <- within(qenv(), {a <- 1; b <- 2}) +q <- within(qenv(), { + a <- 1 + b <- 2 +}) get_code(q) get_code(q, deparse = FALSE) get_code(q, names = "a") From 015737d7c19041f469f9a85bbb25cb6219a5641d Mon Sep 17 00:00:00 2001 From: m7pr Date: Thu, 17 Oct 2024 16:09:29 +0200 Subject: [PATCH 07/11] change MultiAssayExperiment to lifecycle package in tests --- tests/testthat/test-qenv_get_code.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-qenv_get_code.R b/tests/testthat/test-qenv_get_code.R index b662ce78..7f678959 100644 --- a/tests/testthat/test-qenv_get_code.R +++ b/tests/testthat/test-qenv_get_code.R @@ -679,7 +679,7 @@ testthat::test_that("library() and require() are always returned", { code <- c( "set.seed(1)", "require(dplyr)", - "library(MultiAssayExperiment)", + "library(lifecycle)", "x <- 5", "y <- 6" ) @@ -688,7 +688,7 @@ testthat::test_that("library() and require() are always returned", { get_code(q, names = "x"), paste( "require(dplyr)", - "library(MultiAssayExperiment)", + "library(lifecycle)", "x <- 5", sep = "\n" ) @@ -702,7 +702,7 @@ testthat::test_that("data() call is returned when data name is provided as is", code <- c( "set.seed(1)", "require(dplyr)", - "library(MultiAssayExperiment)", + "library(lifecycle)", "data(miniACC, envir = environment())", "x <- miniACC" ) @@ -711,7 +711,7 @@ testthat::test_that("data() call is returned when data name is provided as is", get_code(q, names = "x"), paste( "require(dplyr)", - "library(MultiAssayExperiment)", + "library(lifecycle)", "data(miniACC, envir = environment())", "x <- miniACC", sep = "\n" @@ -723,7 +723,7 @@ testthat::test_that("data() call is returned when data name is provided as a cha code <- c( "set.seed(1)", "require(dplyr)", - "library(MultiAssayExperiment)", + "library(lifecycle)", "data('mtcars')", "z <- mtcars" ) @@ -732,7 +732,7 @@ testthat::test_that("data() call is returned when data name is provided as a cha get_code(q, names = "z"), paste( "require(dplyr)", - "library(MultiAssayExperiment)", + "library(lifecycle)", "data(\"mtcars\")", "z <- mtcars", sep = "\n" From 41f6ee60d5bb8f0357d85a95be11f4ef7d134efa Mon Sep 17 00:00:00 2001 From: m7pr Date: Fri, 18 Oct 2024 09:27:52 +0200 Subject: [PATCH 08/11] remove miniACC as there is not MultiAssayExperiment --- tests/testthat/test-qenv_get_code.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-qenv_get_code.R b/tests/testthat/test-qenv_get_code.R index 7f678959..3bffb21b 100644 --- a/tests/testthat/test-qenv_get_code.R +++ b/tests/testthat/test-qenv_get_code.R @@ -703,8 +703,8 @@ testthat::test_that("data() call is returned when data name is provided as is", "set.seed(1)", "require(dplyr)", "library(lifecycle)", - "data(miniACC, envir = environment())", - "x <- miniACC" + "data(iris, envir = environment())", + "x <- iris" ) q <- eval_code(qenv(), code) testthat::expect_identical( @@ -712,8 +712,8 @@ testthat::test_that("data() call is returned when data name is provided as is", paste( "require(dplyr)", "library(lifecycle)", - "data(miniACC, envir = environment())", - "x <- miniACC", + "data(iris, envir = environment())", + "x <- iris", sep = "\n" ) ) From 31566a02a81bd0638a27516e367ed3e9ddc3cbaa Mon Sep 17 00:00:00 2001 From: m7pr Date: Fri, 18 Oct 2024 09:40:26 +0200 Subject: [PATCH 09/11] fix one more test --- tests/testthat/test-qenv_get_code.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-qenv_get_code.R b/tests/testthat/test-qenv_get_code.R index 3bffb21b..d248dac3 100644 --- a/tests/testthat/test-qenv_get_code.R +++ b/tests/testthat/test-qenv_get_code.R @@ -648,7 +648,8 @@ testthat::test_that("understands @ usage and do not treat rhs of @ as objects (o "a@x <- a@x + 2", "a@x <- x@a" ) - q <- eval_code(qenv(), code) + q <- qenv() + q@code <- code # we don't use eval_code so the code is not run testthat::expect_identical( get_code(q, names = "x"), paste( From 4fa61243399425ed42ec75ea7db16efab5949877 Mon Sep 17 00:00:00 2001 From: m7pr Date: Tue, 22 Oct 2024 16:27:10 +0200 Subject: [PATCH 10/11] commit changes from https://github.com/insightsengineering/teal.data/pull/340/files --- R/qenv-get_code.R | 5 ++ R/utils-get_code_dependency.R | 21 ++++- tests/testthat/test-qenv_get_code.R | 132 ++++++++++++++++++++++++++++ 3 files changed, 156 insertions(+), 2 deletions(-) diff --git a/R/qenv-get_code.R b/R/qenv-get_code.R index c417850d..cc88d633 100644 --- a/R/qenv-get_code.R +++ b/R/qenv-get_code.R @@ -112,6 +112,11 @@ setMethod("get_code", signature = "qenv", function(object, deparse = TRUE, names checkmate::assert_flag(deparse) checkmate::assert_character(names, min.len = 1L, null.ok = TRUE) + # Normalize in case special it is backticked + if (!is.null(names)) { + names <- gsub("^`(.*)`$", "\\1", names) + } + code <- if (!is.null(names)) { get_code_dependency(object@code, names, ...) } else { diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R index 48fe6b12..a38b19bc 100644 --- a/R/utils-get_code_dependency.R +++ b/R/utils-get_code_dependency.R @@ -42,6 +42,7 @@ get_code_dependency <- function(code, names, check_names = TRUE) { code <- parse(text = code, keep.source = TRUE) pd <- utils::getParseData(code) + pd <- normalize_pd(pd) calls_pd <- extract_calls(pd) if (check_names) { @@ -175,7 +176,7 @@ fix_arrows <- function(calls) { sub_arrows <- function(call) { checkmate::assert_data_frame(call) map <- data.frame( - row.names = c("`<-`", "`<<-`", "`=`"), + row.names = c("<-", "<<-", "="), token = rep("LEFT_ASSIGN", 3), text = rep("<-", 3) ) @@ -297,7 +298,7 @@ extract_occurrence <- function(calls_pd) { # What occurs in a function body is not tracked. x <- call_pd[!is_in_function(call_pd), ] - sym_cond <- which(x$token %in% c("SYMBOL", "SYMBOL_FUNCTION_CALL")) + sym_cond <- which(x$token %in% c("SPECIAL", "SYMBOL", "SYMBOL_FUNCTION_CALL")) if (length(sym_cond) == 0) { return(character(0L)) @@ -434,3 +435,19 @@ detect_libraries <- function(calls_pd) { ) ) } + +#' Normalize parsed data removing backticks from symbols +#' +#' @param pd `data.frame` resulting from `utils::getParseData()` call. +#' +#' @return `data.frame` with backticks removed from `text` column for `SYMBOL` tokens. +#' +#' @keywords internal +#' @noRd +normalize_pd <- function(pd) { + # Remove backticks from SYMBOL tokens + symbol_index <- grepl("^SYMBOL.*$", pd$token) + pd[symbol_index, "text"] <- gsub("^`(.*)`$", "\\1", pd[symbol_index, "text"]) + + pd +} diff --git a/tests/testthat/test-qenv_get_code.R b/tests/testthat/test-qenv_get_code.R index d248dac3..902592f2 100644 --- a/tests/testthat/test-qenv_get_code.R +++ b/tests/testthat/test-qenv_get_code.R @@ -740,3 +740,135 @@ testthat::test_that("data() call is returned when data name is provided as a cha ) ) }) + + +testthat::describe("Backticked symbol", { + testthat::it("code can be retrieved with get_code", { + td <- within( + teal_data(), + { + `%cbind%` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name. + iris_ds <- iris %cbind% data.frame(new_col = "new column") + } + ) + + testthat::expect_identical( + get_code(td, datanames = "%cbind%"), + "`%cbind%` <- function(lhs, rhs) cbind(lhs, rhs)" + ) + }) + + testthat::it("code can be retrieved with get_code", { + td <- within( + teal_data(), + { + `%cbind%` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name. + iris_ds <- iris %cbind% data.frame(new_col = "new column") + } + ) + + testthat::expect_identical( + get_code(td, datanames = "`%cbind%`"), + "`%cbind%` <- function(lhs, rhs) cbind(lhs, rhs)" + ) + }) + + testthat::it("starting with underscore is detected in code dependency", { + td <- within( + teal_data(), + { + `_add_column_` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name. + iris_ds <- `_add_column_`(iris, data.frame(new_col = "new column")) + } + ) + + testthat::expect_identical( + get_code(td, datanames = "iris_ds"), + paste( + sep = "\n", + "`_add_column_` <- function(lhs, rhs) cbind(lhs, rhs)", + "iris_ds <- `_add_column_`(iris, data.frame(new_col = \"new column\"))" + ) + ) + }) + + testthat::it("with space character is detected in code dependency", { + td <- within( + teal_data(), + { + `add column` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name. + iris_ds <- `add column`(iris, data.frame(new_col = "new column")) + } + ) + + testthat::expect_identical( + get_code(td, datanames = "iris_ds"), + paste( + sep = "\n", + "`add column` <- function(lhs, rhs) cbind(lhs, rhs)", + "iris_ds <- `add column`(iris, data.frame(new_col = \"new column\"))" + ) + ) + }) + + testthat::it("without special characters is cleaned and detected in code dependency", { + td <- within( + teal_data(), + { + `add_column` <- function(lhs, rhs) cbind(lhs, rhs) + iris_ds <- `add_column`(iris, data.frame(new_col = "new column")) + } + ) + + testthat::expect_identical( + get_code(td, datanames = "iris_ds"), + paste( + sep = "\n", + "add_column <- function(lhs, rhs) cbind(lhs, rhs)", + "iris_ds <- add_column(iris, data.frame(new_col = \"new column\"))" + ) + ) + }) + + testthat::it("with non-native pipe used as function is detected code dependency", { + td <- within( + teal_data(), + { + `%add_column%` <- function(lhs, rhs) cbind(lhs, rhs) + iris_ds <- `%add_column%`(iris, data.frame(new_col = "new column")) + } + ) + + # Note that the original code is changed to use the non-native pipe operator + # correctly. + testthat::expect_identical( + get_code(td, datanames = "iris_ds"), + paste( + sep = "\n", + "`%add_column%` <- function(lhs, rhs) cbind(lhs, rhs)", + "iris_ds <- iris %add_column% data.frame(new_col = \"new column\")" + ) + ) + }) + + testthat::it("with non-native pipe is detected code dependency", { + td <- within( + teal_data(), + { + `%add_column%` <- function(lhs, rhs) cbind(lhs, rhs) + iris_ds <- iris %add_column% data.frame(new_col = "new column") + } + ) + + # Note that the original code is changed to use the non-native pipe operator + # correctly. + testthat::expect_identical( + get_code(td, datanames = "iris_ds"), + paste( + sep = "\n", + "`%add_column%` <- function(lhs, rhs) cbind(lhs, rhs)", + "iris_ds <- iris %add_column% data.frame(new_col = \"new column\")" + ) + ) + }) +}) From ea337f18241494064fa303c214b78e23b2fcb133 Mon Sep 17 00:00:00 2001 From: m7pr Date: Tue, 22 Oct 2024 16:34:16 +0200 Subject: [PATCH 11/11] change teal_data+datanames to qenv+names in TESTS --- tests/testthat/test-qenv_get_code.R | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/testthat/test-qenv_get_code.R b/tests/testthat/test-qenv_get_code.R index 902592f2..0f9aedf9 100644 --- a/tests/testthat/test-qenv_get_code.R +++ b/tests/testthat/test-qenv_get_code.R @@ -745,7 +745,7 @@ testthat::test_that("data() call is returned when data name is provided as a cha testthat::describe("Backticked symbol", { testthat::it("code can be retrieved with get_code", { td <- within( - teal_data(), + qenv(), { `%cbind%` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name. iris_ds <- iris %cbind% data.frame(new_col = "new column") @@ -753,14 +753,14 @@ testthat::describe("Backticked symbol", { ) testthat::expect_identical( - get_code(td, datanames = "%cbind%"), + get_code(td, names = "%cbind%"), "`%cbind%` <- function(lhs, rhs) cbind(lhs, rhs)" ) }) testthat::it("code can be retrieved with get_code", { td <- within( - teal_data(), + qenv(), { `%cbind%` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name. iris_ds <- iris %cbind% data.frame(new_col = "new column") @@ -768,14 +768,14 @@ testthat::describe("Backticked symbol", { ) testthat::expect_identical( - get_code(td, datanames = "`%cbind%`"), + get_code(td, names = "`%cbind%`"), "`%cbind%` <- function(lhs, rhs) cbind(lhs, rhs)" ) }) testthat::it("starting with underscore is detected in code dependency", { td <- within( - teal_data(), + qenv(), { `_add_column_` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name. iris_ds <- `_add_column_`(iris, data.frame(new_col = "new column")) @@ -783,7 +783,7 @@ testthat::describe("Backticked symbol", { ) testthat::expect_identical( - get_code(td, datanames = "iris_ds"), + get_code(td, names = "iris_ds"), paste( sep = "\n", "`_add_column_` <- function(lhs, rhs) cbind(lhs, rhs)", @@ -794,7 +794,7 @@ testthat::describe("Backticked symbol", { testthat::it("with space character is detected in code dependency", { td <- within( - teal_data(), + qenv(), { `add column` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name. iris_ds <- `add column`(iris, data.frame(new_col = "new column")) @@ -802,7 +802,7 @@ testthat::describe("Backticked symbol", { ) testthat::expect_identical( - get_code(td, datanames = "iris_ds"), + get_code(td, names = "iris_ds"), paste( sep = "\n", "`add column` <- function(lhs, rhs) cbind(lhs, rhs)", @@ -813,7 +813,7 @@ testthat::describe("Backticked symbol", { testthat::it("without special characters is cleaned and detected in code dependency", { td <- within( - teal_data(), + qenv(), { `add_column` <- function(lhs, rhs) cbind(lhs, rhs) iris_ds <- `add_column`(iris, data.frame(new_col = "new column")) @@ -821,7 +821,7 @@ testthat::describe("Backticked symbol", { ) testthat::expect_identical( - get_code(td, datanames = "iris_ds"), + get_code(td, names = "iris_ds"), paste( sep = "\n", "add_column <- function(lhs, rhs) cbind(lhs, rhs)", @@ -832,7 +832,7 @@ testthat::describe("Backticked symbol", { testthat::it("with non-native pipe used as function is detected code dependency", { td <- within( - teal_data(), + qenv(), { `%add_column%` <- function(lhs, rhs) cbind(lhs, rhs) iris_ds <- `%add_column%`(iris, data.frame(new_col = "new column")) @@ -842,7 +842,7 @@ testthat::describe("Backticked symbol", { # Note that the original code is changed to use the non-native pipe operator # correctly. testthat::expect_identical( - get_code(td, datanames = "iris_ds"), + get_code(td, names = "iris_ds"), paste( sep = "\n", "`%add_column%` <- function(lhs, rhs) cbind(lhs, rhs)", @@ -853,7 +853,7 @@ testthat::describe("Backticked symbol", { testthat::it("with non-native pipe is detected code dependency", { td <- within( - teal_data(), + qenv(), { `%add_column%` <- function(lhs, rhs) cbind(lhs, rhs) iris_ds <- iris %add_column% data.frame(new_col = "new column") @@ -863,7 +863,7 @@ testthat::describe("Backticked symbol", { # Note that the original code is changed to use the non-native pipe operator # correctly. testthat::expect_identical( - get_code(td, datanames = "iris_ds"), + get_code(td, names = "iris_ds"), paste( sep = "\n", "`%add_column%` <- function(lhs, rhs) cbind(lhs, rhs)",