From 03d952d2bfeb6cb225998ac9e8e6a4f32c753feb Mon Sep 17 00:00:00 2001 From: Marcin <133694481+m7pr@users.noreply.github.com> Date: Tue, 22 Oct 2024 17:03:46 +0200 Subject: [PATCH] 210 bring `get_code_dependency` (#214) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BLOCKED by - https://github.com/insightsengineering/teal.data/pull/340 Closes - #210 Companion to - https://github.com/insightsengineering/teal.data/pull/343 - https://github.com/insightsengineering/teal/pull/1388 # Description Brings `names` parameter to `get_code` so that you can limit returned code to specific objects (and the lines that create those objects). `get_code_dependency` was moved from `teal.data` # Tested with ``` r library(teal.code) # EVAL CODE q <- qenv() q <- eval_code(q, code = c("a <- 1", "b <- 2")) q@code #> [1] "a <- 1\nb <- 2" get_code(q, names = "a") #> [1] "a <- 1" # WITHIN q <- qenv() q <- within(q, {a <- 1; b <- 2}) q@code #> [1] "a <- 1\nb <- 2" get_code(q, names = "a") #> [1] "a <- 1" # OLD TEAL.DATA t <- teal.data::teal_data(a = 5, code = c("a <- 1", "b <- 2")) t@code #> [1] "a <- 1\nb <- 2" teal.data::get_code(t, datanames = 'a') #> Warning in .local(object, deparse, ...): get_code(datanames) was deprecated in #> teal.data 0.6.1, use get_code(names) instead. #> [1] "a <- 1" ``` Created on 2024-10-16 with [reprex v2.1.1](https://reprex.tidyverse.org) # Local tests ```r > devtools::test() ℹ Testing teal.code ✔ | F W S OK | Context ✔ | 12 | qenv_concat ✔ | 8 | qenv_constructor ✔ | 26 | qenv_eval_code ✔ | 2 60 | qenv_get_code [1.1s] ✔ | 10 | qenv_get_var ✔ | 7 | qenv_get_warnings ✔ | 40 | qenv_join ✔ | 14 | qenv_within ✔ | 12 | utils ══ Results ══════════════════════════════════════════════════════════════════ Duration: 2.2 s ── Skipped tests (2) ──────────────────────────────────────────────────────── • This is not urgent and can be ommitted with @linksto tag. (1): test-qenv_get_code.R:526:3 • We will not resolve this, as this requires code evaluation. (1): test-qenv_get_code.R:318:3 [ FAIL 0 | WARN 0 | SKIP 2 | PASS 189 ] ```` # Local R CMD CHECK ```r ── R CMD check results ──────────────────────────────────── teal.code 0.5.0.9010 ──── Duration: 33.5s ❯ checking for future file timestamps ... NOTE unable to verify current time 0 errors ✔ | 0 warnings ✔ | 1 note ✖ R CMD check succeeded ``` --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: 27856297+dependabot-preview[bot]@users.noreply.github.com <27856297+dependabot-preview[bot]@users.noreply.github.com> --- DESCRIPTION | 1 + NEWS.md | 5 + R/qenv-get_code.R | 102 +++- R/utils-get_code_dependency.R | 453 +++++++++++++++ man/get_code_dependency.Rd | 37 ++ man/qenv.Rd | 85 ++- tests/testthat/test-qenv_get_code.R | 828 ++++++++++++++++++++++++++++ 7 files changed, 1504 insertions(+), 7 deletions(-) create mode 100644 R/utils-get_code_dependency.R create mode 100644 man/get_code_dependency.Rd diff --git a/DESCRIPTION b/DESCRIPTION index ca32a094..7977eabc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -66,4 +66,5 @@ Collate: 'qenv-show.R' 'qenv-within.R' 'teal.code-package.R' + 'utils-get_code_dependency.R' 'utils.R' diff --git a/NEWS.md b/NEWS.md index 22bce150..e33438f8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # teal.code 0.5.0.9010 +### Enhancements + +* `get_code()` was extended with `names` parameter and allows the code extraction to be limited to objects stored in +`qenv` but limited to `names`. + # teal.code 0.5.0 ### Breaking Change diff --git a/R/qenv-get_code.R b/R/qenv-get_code.R index 70584c11..cc88d633 100644 --- a/R/qenv-get_code.R +++ b/R/qenv-get_code.R @@ -5,15 +5,92 @@ #' #' @param object (`qenv`) #' @param deparse (`logical(1)`) flag specifying whether to return code as `character` or `expression`. +#' @param names `r lifecycle::badge("experimental")` (`character`) vector of object names to return the code for. +#' For more details see the "Extracting dataset-specific code" section. #' @param ... see `Details` #' +#' +#' @section Extracting dataset-specific code: +#' When `names` is specified, the code returned will be limited to the lines needed to _create_ +#' the requested objects. The code stored in the `@code` slot is analyzed statically to determine +#' which lines the objects of interest depend upon. The analysis works well when objects are created +#' with standard infix assignment operators (see `?assignOps`) but it can fail in some situations. +#' +#' Consider the following examples: +#' +#' _Case 1: Usual assignments._ +#' ```r +#' q1 <- qenv() |> +#' within({ +#' foo <- function(x) { +#' x + 1 +#' } +#' x <- 0 +#' y <- foo(x) +#' }) +#' get_code(q1, names = "y") +#' ``` +#' `x` has no dependencies, so `get_code(data, names = "x")` will return only the second call.\cr +#' `y` depends on `x` and `foo`, so `get_code(data, names = "y")` will contain all three calls. +#' +#' _Case 2: Some objects are created by a function's side effects._ +#' ```r +#' q2 <- qenv() |> +#' within({ +#' foo <- function() { +#' x <<- x + 1 +#' } +#' x <- 0 +#' foo() +#' y <- x +#' }) +#' get_code(q2, names = "y") +#' ``` +#' Here, `y` depends on `x` but `x` is modified by `foo` as a side effect (not by reassignment) +#' and so `get_code(data, names = "y")` will not return the `foo()` call.\cr +#' To overcome this limitation, code dependencies can be specified manually. +#' Lines where side effects occur can be flagged by adding "`# @linksto `" at the end.\cr +#' Note that `within` evaluates code passed to `expr` as is and comments are ignored. +#' In order to include comments in code one must use the `eval_code` function instead. +#' +#' ```r +#' q3 <- qenv() |> +#' eval_code(" +#' foo <- function() { +#' x <<- x + 1 +#' } +#' x <- 0 +#' foo() # @linksto x +#' y <- x +#' ") +#' get_code(q3, names = "y") +#' ``` +#' Now the `foo()` call will be properly included in the code required to recreate `y`. +#' +#' Note that two functions that create objects as side effects, `assign` and `data`, are handled automatically. +#' +#' Here are known cases where manual tagging is necessary: +#' - non-standard assignment operators, _e.g._ `%<>%` +#' - objects used as conditions in `if` statements: `if ()` +#' - objects used to iterate over in `for` loops: `for(i in )` +#' - creating and evaluating language objects, _e.g._ `eval()` +#' #' @return #' `get_code` returns the traced code (from `@code` slot) in the form specified by `deparse`. #' #' @examples #' # retrieve code +#' q <- within(qenv(), { +#' a <- 1 +#' b <- 2 +#' }) #' get_code(q) #' get_code(q, deparse = FALSE) +#' get_code(q, names = "a") +#' +#' q <- qenv() +#' q <- eval_code(q, code = c("a <- 1", "b <- 2")) +#' get_code(q, names = "a") #' #' @name get_code #' @rdname qenv @@ -21,7 +98,7 @@ #' @aliases get_code,qenv.error-method #' #' @export -setGeneric("get_code", function(object, deparse = TRUE, ...) { +setGeneric("get_code", function(object, deparse = TRUE, names = NULL, ...) { # this line forces evaluation of object before passing to the generic # needed for error handling to work properly grDevices::pdf(nullfile()) @@ -31,16 +108,29 @@ setGeneric("get_code", function(object, deparse = TRUE, ...) { standardGeneric("get_code") }) -setMethod("get_code", signature = "qenv", function(object, deparse = TRUE) { +setMethod("get_code", signature = "qenv", function(object, deparse = TRUE, names = NULL, ...) { checkmate::assert_flag(deparse) + checkmate::assert_character(names, min.len = 1L, null.ok = TRUE) + + # Normalize in case special it is backticked + if (!is.null(names)) { + names <- gsub("^`(.*)`$", "\\1", names) + } + + code <- if (!is.null(names)) { + get_code_dependency(object@code, names, ...) + } else { + object@code + } + if (deparse) { - if (length(object@code) == 0) { - object@code + if (length(code) == 0) { + code } else { - paste(object@code, collapse = "\n") + paste(code, collapse = "\n") } } else { - parse(text = paste(c("{", object@code, "}"), collapse = "\n"), keep.source = TRUE) + parse(text = paste(c("{", code, "}"), collapse = "\n"), keep.source = TRUE) } }) diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R new file mode 100644 index 00000000..a38b19bc --- /dev/null +++ b/R/utils-get_code_dependency.R @@ -0,0 +1,453 @@ +# get_code_dependency ---- + +#' Get code dependency of an object +#' +#' Extract subset of code required to reproduce specific object(s), including code producing side-effects. +#' +#' Given a character vector with code, this function will extract the part of the code responsible for creating +#' the variables specified by `names`. +#' This includes the final call that creates the variable(s) in question as well as all _parent calls_, +#' _i.e._ calls that create variables used in the final call and their parents, etc. +#' Also included are calls that create side-effects like establishing connections. +#' +#' It is assumed that object dependency is established by using three assignment operators: `<-`, `=`, and `->` . +#' Other assignment methods (`assign`, `<<-`) or non-standard-evaluation methods are not supported. +#' +#' Side-effects are not detected automatically and must be marked in the code. +#' Add `# @linksto object` at the end of a line where a side-effect occurs to specify that this line is required +#' to reproduce a variable called `object`. +#' +#' @param code `character` with the code. +#' @param names `character` vector of object names. +#' @param check_names `logical(1)` flag specifying if a warning for non-existing names should be displayed. +#' +#' @return Character vector, a subset of `code`. +#' Note that subsetting is actually done on the calls `code`, not necessarily on the elements of the vector. +#' +#' @keywords internal +get_code_dependency <- function(code, names, check_names = TRUE) { + checkmate::assert_character(code) + checkmate::assert_character(names, any.missing = FALSE) + + if (identical(code, character(0)) || identical(trimws(code), "")) { + return(code) + } + + # If code is bound in curly brackets, remove them. + tcode <- trimws(code) + if (any(grepl("^\\{.*\\}$", tcode))) { + code <- sub("^\\{(.*)\\}$", "\\1", tcode) + } + + + code <- parse(text = code, keep.source = TRUE) + pd <- utils::getParseData(code) + pd <- normalize_pd(pd) + calls_pd <- extract_calls(pd) + + if (check_names) { + # Detect if names are actually in code. + symbols <- unlist(lapply(calls_pd, function(call) call[call$token == "SYMBOL", "text"])) + if (any(pd$text == "assign")) { + assign_calls <- Filter(function(call) find_call(call, "assign"), calls_pd) + ass_str <- unlist(lapply(assign_calls, function(call) call[call$token == "STR_CONST", "text"])) + ass_str <- gsub("^['\"]|['\"]$", "", ass_str) + symbols <- c(ass_str, symbols) + } + if (!all(names %in% unique(symbols))) { + warning("Object(s) not found in code: ", toString(setdiff(names, symbols))) + } + } + + graph <- code_graph(calls_pd) + ind <- unlist(lapply(names, function(x) graph_parser(x, graph))) + + lib_ind <- detect_libraries(calls_pd) + + as.character(code[sort(unique(c(lib_ind, ind)))]) +} + +#' Locate function call token +#' +#' Determine which row of parsed data is specific `SYMBOL_FUNCTION_CALL` token. +#' +#' Useful for determining occurrence of `assign` or `data` functions in an input call. +#' +#' @param call_pd `data.frame` as returned by `extract_calls()` +#' @param text `character(1)` to look for in `text` column of `call_pd` +#' +#' @return +#' Single integer specifying row in `call_pd` where `token` is `SYMBOL_FUNCTION_CALL` and `text` is `text`. +#' 0 if not found. +#' +#' @keywords internal +#' @noRd +find_call <- function(call_pd, text) { + checkmate::check_data_frame(call_pd) + checkmate::check_names(call_pd, must.include = c("token", "text")) + checkmate::check_string(text) + + ans <- which(call_pd$token == "SYMBOL_FUNCTION_CALL" & call_pd$text == text) + if (length(ans)) { + ans + } else { + 0L + } +} + +#' Split the result of `utils::getParseData()` into separate calls +#' +#' @param pd (`data.frame`) A result of `utils::getParseData()`. +#' +#' @return +#' A `list` of `data.frame`s. +#' Each element is a subset of `pd` corresponding to one call in the original code from which `pd` was obtained. +#' Only four columns (`"token"`, `"text"`, `"id"`, `"parent"`) are kept, the rest is discarded. +#' +#' @keywords internal +#' @noRd +extract_calls <- function(pd) { + calls <- lapply( + pd[pd$parent == 0, "id"], + function(parent) { + rbind( + pd[pd$id == parent, c("token", "text", "id", "parent")], + get_children(pd = pd, parent = parent) + ) + } + ) + calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls) + calls <- Filter(Negate(is.null), calls) + calls <- fix_shifted_comments(calls) + fix_arrows(calls) +} + +#' @keywords internal +#' @noRd +get_children <- function(pd, parent) { + idx_children <- abs(pd$parent) == parent + children <- pd[idx_children, c("token", "text", "id", "parent")] + if (nrow(children) == 0) { + return(NULL) + } + + if (parent > 0) { + do.call(rbind, c(list(children), lapply(children$id, get_children, pd = pd))) + } +} + +#' Fixes edge case of comments being shifted to the next call. +#' @keywords internal +#' @noRd +fix_shifted_comments <- function(calls) { + # If the first or the second token is a @linksto COMMENT, + # then it belongs to the previous call. + if (length(calls) >= 2) { + for (i in 2:length(calls)) { + comment_idx <- grep("@linksto", calls[[i]][, "text"]) + if (isTRUE(comment_idx[1] <= 2)) { + calls[[i - 1]] <- rbind( + calls[[i - 1]], + calls[[i]][seq_len(comment_idx[1]), ] + ) + calls[[i]] <- calls[[i]][-seq_len(comment_idx[1]), ] + } + } + } + Filter(nrow, calls) +} + +#' Fixes edge case of `<-` assignment operator being called as function, +#' which is \code{`<-`(y,x)} instead of traditional `y <- x`. +#' @keywords internal +#' @noRd +fix_arrows <- function(calls) { + checkmate::assert_list(calls) + lapply(calls, function(call) { + sym_fun <- call$token == "SYMBOL_FUNCTION_CALL" + call[sym_fun, ] <- sub_arrows(call[sym_fun, ]) + call + }) +} + +#' Execution of assignment operator substitutions for a call. +#' @keywords internal +#' @noRd +sub_arrows <- function(call) { + checkmate::assert_data_frame(call) + map <- data.frame( + row.names = c("<-", "<<-", "="), + token = rep("LEFT_ASSIGN", 3), + text = rep("<-", 3) + ) + sub_ids <- call$text %in% rownames(map) + call[sub_ids, c("token", "text")] <- map[call$text[sub_ids], ] + call +} + +# code_graph ---- + +#' Create object dependencies graph within parsed code +#' +#' Builds dependency graph that identifies dependencies between objects in parsed code. +#' Helps understand which objects depend on which. +#' +#' @param calls_pd `list` of `data.frame`s; +#' result of `utils::getParseData()` split into subsets representing individual calls; +#' created by `extract_calls()` function +#' +#' @return +#' A list (of length of input `calls_pd`) where each element represents one call. +#' Each element is a character vector listing names of objects that depend on this call +#' and names of objects that this call depends on. +#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a` +#' depends on objects `b` and `c`. +#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call. +#' +#' @keywords internal +#' @noRd +code_graph <- function(calls_pd) { + cooccurrence <- extract_occurrence(calls_pd) + + side_effects <- extract_side_effects(calls_pd) + + mapply(c, side_effects, cooccurrence, SIMPLIFY = FALSE) +} + +#' Extract object occurrence +#' +#' Extracts objects occurrence within calls passed by `calls_pd`. +#' Also detects which objects depend on which within a call. +#' +#' @param calls_pd `list` of `data.frame`s; +#' result of `utils::getParseData()` split into subsets representing individual calls; +#' created by `extract_calls()` function +#' +#' @return +#' A list (of length of input `calls_pd`) where each element represents one call. +#' Each element is a character vector listing names of objects that depend on this call +#' and names of objects that this call depends on. +#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a` +#' depends on objects `b` and `c`. +#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call. +#' +#' @keywords internal +#' @noRd +extract_occurrence <- function(calls_pd) { + is_in_function <- function(x) { + # If an object is a function parameter, + # then in calls_pd there is a `SYMBOL_FORMALS` entry for that object. + function_id <- x[x$token == "FUNCTION", "parent"] + if (length(function_id)) { + x$id %in% get_children(x, function_id[1])$id + } else { + rep(FALSE, nrow(x)) + } + } + in_parenthesis <- function(x) { + if (any(x$token %in% c("LBB", "'['"))) { + id_start <- min(x$id[x$token %in% c("LBB", "'['")]) + id_end <- min(x$id[x$token == "']'"]) + x$text[x$token == "SYMBOL" & x$id > id_start & x$id < id_end] + } + } + lapply( + calls_pd, + function(call_pd) { + # Handle data(object)/data("object")/data(object, envir = ) independently. + data_call <- find_call(call_pd, "data") + if (data_call) { + sym <- call_pd[data_call + 1, "text"] + return(c(gsub("^['\"]|['\"]$", "", sym), "<-")) + } + # Handle assign(x = ). + assign_call <- find_call(call_pd, "assign") + if (assign_call) { + # Check if parameters were named. + # "','" is for unnamed parameters, where "SYMBOL_SUB" is for named. + # "EQ_SUB" is for `=` appearing after the name of the named parameter. + if (any(call_pd$token == "SYMBOL_SUB")) { + params <- call_pd[call_pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"] + # Remove sequence of "=", ",". + if (length(params > 1)) { + remove <- integer(0) + for (i in 2:length(params)) { + if (params[i - 1] == "=" & params[i] == ",") { + remove <- c(remove, i - 1, i) + } + } + if (length(remove)) params <- params[-remove] + } + pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0)) + if (!pos) { + return(character(0L)) + } + # pos is indicator of the place of 'x' + # 1. All parameters are named, but none is 'x' - return(character(0L)) + # 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ",")) + # - check "x" in params being just a vector of named parameters. + # 3. Some parameters are named, 'x' is not in named parameters + # - check first appearance of "," (unnamed parameter) in vector parameters. + } else { + # Object is the first entry after 'assign'. + pos <- 1 + } + sym <- call_pd[assign_call + pos, "text"] + return(c(gsub("^['\"]|['\"]$", "", sym), "<-")) + } + + # What occurs in a function body is not tracked. + x <- call_pd[!is_in_function(call_pd), ] + sym_cond <- which(x$token %in% c("SPECIAL", "SYMBOL", "SYMBOL_FUNCTION_CALL")) + + if (length(sym_cond) == 0) { + return(character(0L)) + } + # Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not. + # For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2. + dollar_ids <- x[x$token %in% c("'$'", "'@'"), "id"] + if (length(dollar_ids)) { + object_ids <- x[sym_cond, "id"] + after_dollar <- object_ids[(object_ids - 2) %in% dollar_ids] + sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar)) + } + + ass_cond <- grep("ASSIGN", x$token) + if (!length(ass_cond)) { + return(c("<-", unique(x[sym_cond, "text"]))) + } + + sym_cond <- sym_cond[sym_cond > ass_cond] # NOTE 1 + # If there was an assignment operation detect direction of it. + if (unique(x$text[ass_cond]) == "->") { # NOTE 2 + sym_cond <- rev(sym_cond) + } + + after <- match(min(x$id[ass_cond]), sort(x$id[c(min(ass_cond), sym_cond)])) - 1 + ans <- append(x[sym_cond, "text"], "<-", after = max(1, after)) + roll <- in_parenthesis(call_pd) + if (length(roll)) { + c(setdiff(ans, roll), roll) + } else { + ans + } + + ### NOTE 2: What if there are 2 assignments: e.g. a <- b -> c. + ### NOTE 1: For cases like 'eval(expression(b <- b + 2))' removes 'eval(expression('. + } + ) +} + +#' Extract side effects +#' +#' Extracts all object names from the code that are marked with `@linksto` tag. +#' +#' The code may contain functions calls that create side effects, e.g. modify the environment. +#' Static code analysis may be insufficient to determine which objects are created or modified by such a function call. +#' The `@linksto` comment tag is introduced to mark a call as having a (side) effect on one or more objects. +#' With this tag a complete object dependency structure can be established. +#' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function. +#' +#' @param calls_pd `list` of `data.frame`s; +#' result of `utils::getParseData()` split into subsets representing individual calls; +#' created by `extract_calls()` function +#' +#' @return +#' A list of length equal to that of `calls_pd`, where each element is a character vector of names of objects +#' depending a call tagged with `@linksto` in a corresponding element of `calls_pd`. +#' +#' @keywords internal +#' @noRd +extract_side_effects <- function(calls_pd) { + lapply( + calls_pd, + function(x) { + linksto <- grep("@linksto", x[x$token == "COMMENT", "text"], value = TRUE) + unlist(strsplit(sub("\\s*#\\s*@linksto\\s+", "", linksto), "\\s+")) + } + ) +} + +# graph_parser ---- + +#' Return the indices of calls needed to reproduce an object +#' +#' @param x The name of the object to return code for. +#' @param graph A result of `code_graph()`. +#' +#' @return +#' Integer vector of indices that can be applied to `graph` to obtain all calls required to reproduce object `x`. +#' +#' @keywords internal +#' @noRd +graph_parser <- function(x, graph) { + occurrence <- vapply( + graph, function(call) { + ind <- match("<-", call, nomatch = length(call) + 1L) + x %in% call[seq_len(ind - 1L)] + }, + logical(1) + ) + + dependencies <- lapply(graph[occurrence], function(call) { + ind <- match("<-", call, nomatch = 0L) + call[(ind + 1L):length(call)] + }) + dependencies <- setdiff(unlist(dependencies), x) + + if (length(dependencies) && any(occurrence)) { + dependency_ids <- lapply(dependencies, function(dependency) { + graph_parser(dependency, graph[1:max(which(occurrence))]) + }) + sort(unique(c(which(occurrence), unlist(dependency_ids)))) + } else { + which(occurrence) + } +} + + +# default_side_effects -------------------------------------------------------------------------------------------- + +#' Detect library calls +#' +#' Detects `library()` and `require()` function calls. +#' +#' @param calls_pd `list` of `data.frame`s; +#' result of `utils::getParseData()` split into subsets representing individual calls; +#' created by `extract_calls()` function +#' +#' @return +#' Integer vector of indices that can be applied to `graph` (result of `code_graph()`) to obtain all calls containing +#' `library()` or `require()` calls that are always returned for reproducibility. +#' +#' @keywords internal +#' @noRd +detect_libraries <- function(calls_pd) { + defaults <- c("library", "require") + + which( + vapply( + calls_pd, + function(call) { + any(call$token == "SYMBOL_FUNCTION_CALL" & call$text %in% defaults) + }, + logical(1) + ) + ) +} + +#' Normalize parsed data removing backticks from symbols +#' +#' @param pd `data.frame` resulting from `utils::getParseData()` call. +#' +#' @return `data.frame` with backticks removed from `text` column for `SYMBOL` tokens. +#' +#' @keywords internal +#' @noRd +normalize_pd <- function(pd) { + # Remove backticks from SYMBOL tokens + symbol_index <- grepl("^SYMBOL.*$", pd$token) + pd[symbol_index, "text"] <- gsub("^`(.*)`$", "\\1", pd[symbol_index, "text"]) + + pd +} diff --git a/man/get_code_dependency.Rd b/man/get_code_dependency.Rd new file mode 100644 index 00000000..8db40903 --- /dev/null +++ b/man/get_code_dependency.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-get_code_dependency.R +\name{get_code_dependency} +\alias{get_code_dependency} +\title{Get code dependency of an object} +\usage{ +get_code_dependency(code, names, check_names = TRUE) +} +\arguments{ +\item{code}{\code{character} with the code.} + +\item{names}{\code{character} vector of object names.} + +\item{check_names}{\code{logical(1)} flag specifying if a warning for non-existing names should be displayed.} +} +\value{ +Character vector, a subset of \code{code}. +Note that subsetting is actually done on the calls \code{code}, not necessarily on the elements of the vector. +} +\description{ +Extract subset of code required to reproduce specific object(s), including code producing side-effects. +} +\details{ +Given a character vector with code, this function will extract the part of the code responsible for creating +the variables specified by \code{names}. +This includes the final call that creates the variable(s) in question as well as all \emph{parent calls}, +\emph{i.e.} calls that create variables used in the final call and their parents, etc. +Also included are calls that create side-effects like establishing connections. + +It is assumed that object dependency is established by using three assignment operators: \verb{<-}, \code{=}, and \verb{->} . +Other assignment methods (\code{assign}, \verb{<<-}) or non-standard-evaluation methods are not supported. + +Side-effects are not detected automatically and must be marked in the code. +Add \verb{# @linksto object} at the end of a line where a side-effect occurs to specify that this line is required +to reproduce a variable called \code{object}. +} +\keyword{internal} diff --git a/man/qenv.Rd b/man/qenv.Rd index 75bd486e..4d382246 100644 --- a/man/qenv.Rd +++ b/man/qenv.Rd @@ -26,7 +26,7 @@ new_qenv(env = new.env(parent = parent.env(.GlobalEnv)), code = character()) eval_code(object, code) -get_code(object, deparse = TRUE, ...) +get_code(object, deparse = TRUE, names = NULL, ...) \method{within}{qenv}(data, expr, ...) } @@ -40,6 +40,9 @@ Environment being a result of the \code{code} evaluation.} \item{deparse}{(\code{logical(1)}) flag specifying whether to return code as \code{character} or \code{expression}.} +\item{names}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} (\code{character}) vector of object names to return the code for. +For more details see the "Extracting dataset-specific code" section.} + \item{...}{see \code{Details}} \item{data}{(\code{qenv})} @@ -78,6 +81,77 @@ It is a method for the \code{base} generic that wraps \code{eval_code} to provid through the \code{...} argument: as \code{name:value} pairs are passed to \code{...}, \code{name} in \code{expr} will be replaced with \code{value}. } +\section{Extracting dataset-specific code}{ + +When \code{names} is specified, the code returned will be limited to the lines needed to \emph{create} +the requested objects. The code stored in the \verb{@code} slot is analyzed statically to determine +which lines the objects of interest depend upon. The analysis works well when objects are created +with standard infix assignment operators (see \code{?assignOps}) but it can fail in some situations. + +Consider the following examples: + +\emph{Case 1: Usual assignments.} + +\if{html}{\out{
}}\preformatted{q1 <- qenv() |> + within(\{ + foo <- function(x) \{ + x + 1 + \} + x <- 0 + y <- foo(x) + \}) +get_code(q1, names = "y") +}\if{html}{\out{
}} + +\code{x} has no dependencies, so \code{get_code(data, names = "x")} will return only the second call.\cr +\code{y} depends on \code{x} and \code{foo}, so \code{get_code(data, names = "y")} will contain all three calls. + +\emph{Case 2: Some objects are created by a function's side effects.} + +\if{html}{\out{
}}\preformatted{q2 <- qenv() |> + within(\{ + foo <- function() \{ + x <<- x + 1 + \} + x <- 0 + foo() + y <- x + \}) +get_code(q2, names = "y") +}\if{html}{\out{
}} + +Here, \code{y} depends on \code{x} but \code{x} is modified by \code{foo} as a side effect (not by reassignment) +and so \code{get_code(data, names = "y")} will not return the \code{foo()} call.\cr +To overcome this limitation, code dependencies can be specified manually. +Lines where side effects occur can be flagged by adding "\verb{# @linksto }" at the end.\cr +Note that \code{within} evaluates code passed to \code{expr} as is and comments are ignored. +In order to include comments in code one must use the \code{eval_code} function instead. + +\if{html}{\out{
}}\preformatted{q3 <- qenv() |> + eval_code(" + foo <- function() \{ + x <<- x + 1 + \} + x <- 0 + foo() # @linksto x + y <- x + ") +get_code(q3, names = "y") +}\if{html}{\out{
}} + +Now the \code{foo()} call will be properly included in the code required to recreate \code{y}. + +Note that two functions that create objects as side effects, \code{assign} and \code{data}, are handled automatically. + +Here are known cases where manual tagging is necessary: +\itemize{ +\item non-standard assignment operators, \emph{e.g.} \verb{\%<>\%} +\item objects used as conditions in \code{if} statements: \verb{if ()} +\item objects used to iterate over in \code{for} loops: \verb{for(i in )} +\item creating and evaluating language objects, \emph{e.g.} \verb{eval()} +} +} + \section{Using language objects with \code{within}}{ Passing language objects to \code{expr} is generally not intended but can be achieved with \code{do.call}. @@ -100,8 +174,17 @@ q <- eval_code(q, quote(library(checkmate))) q <- eval_code(q, expression(assert_number(a))) # retrieve code +q <- within(qenv(), { + a <- 1 + b <- 2 +}) get_code(q) get_code(q, deparse = FALSE) +get_code(q, names = "a") + +q <- qenv() +q <- eval_code(q, code = c("a <- 1", "b <- 2")) +get_code(q, names = "a") # evaluate code using within q <- qenv() diff --git a/tests/testthat/test-qenv_get_code.R b/tests/testthat/test-qenv_get_code.R index 099f0a36..0f9aedf9 100644 --- a/tests/testthat/test-qenv_get_code.R +++ b/tests/testthat/test-qenv_get_code.R @@ -44,3 +44,831 @@ testthat::test_that("get_code called with qenv.error returns error with trace in "object 'v' not found \n when evaluating qenv code:\nw <- v\n\ntrace: \n x <- 1\n y <- x\n w <- v\n" ) }) + + +# names parameter ------------------------------------------------------------------------------------------------- + +testthat::test_that("handles empty @code slot", { + testthat::expect_identical( + get_code(qenv(), names = "a"), + character(0) + ) + testthat::expect_identical( + get_code(eval_code(qenv(), code = ""), names = "a"), + "" + ) +}) + +testthat::test_that("handles the code without symbols on rhs", { + code <- c( + "1 + 1", + "a <- 5", + "501" + ) + + testthat::expect_identical( + get_code(eval_code(qenv(), code), names = "a"), + "a <- 5" + ) +}) + +testthat::test_that("handles the code included in curly brackets", { + code <- "{1 + 1;a <- 5}" + + testthat::expect_identical( + get_code(eval_code(qenv(), code), names = "a"), + "a <- 5" + ) +}) + +testthat::test_that("handles the code of length > 1 when at least one is enclosed in curly brackets", { + code <- c("{a<-5}", "1+1") + q <- eval_code(eval_code(qenv(), code[1]), code[2]) + + testthat::expect_identical( + get_code(q, names = "a"), + "a <- 5" + ) +}) + + +testthat::test_that("extracts the code of a binding from character vector containing simple code", { + code <- c( + "a <- 1", + "b <- 2" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "a"), + "a <- 1" + ) + testthat::expect_identical( + get_code(q, names = "b"), + "b <- 2" + ) +}) + +testthat::test_that("extracts the code without downstream usage", { + code <- c( + "a <- 1", + "head(a)" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "a"), + "a <- 1" + ) +}) + +testthat::test_that("works for datanames of length > 1", { + code <- c( + "a <- 1", + "b <- 2" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = c("a", "b")), + paste(code, collapse = "\n") + ) +}) + +testthat::test_that("warns if binding doesn't exist in code", { + code <- c("a <- 1") + q <- eval_code(qenv(), code) + testthat::expect_warning( + get_code(q, names = "c"), + "Object\\(s\\) not found in code: c" + ) +}) + +testthat::test_that("does not fall into a loop", { + code <- c( + "a <- 1", + "b <- a", + "c <- b", + "a <- c" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "a"), + paste(code, collapse = "\n") + ) + testthat::expect_identical( + get_code(q, names = "b"), + paste(code[1:2], collapse = "\n") + ) + testthat::expect_identical( + get_code(q, names = "c"), + paste(code[1:3], collapse = "\n") + ) +}) + + +testthat::test_that("extracts code of a parent binding but only those evaluated before coocurence", { + code <- c( + "a <- 1", + "b <- a", + "a <- 2" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste("a <- 1", "b <- a", sep = "\n") + ) +}) + +testthat::test_that("extracts the code of a parent binding if used as an arg in a function call", { + code <- c( + "a <- 1", + "b <- identity(x = a)", + "a <- 2" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste("a <- 1", "b <- identity(x = a)", sep = "\n") + ) +}) + +testthat::test_that("extracts the code when using <<-", { + code <- c( + "a <- 1", + "b <- a", + "b <<- b + 2" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste("a <- 1", "b <- a", "b <<- b + 2", sep = "\n") + ) +}) + +testthat::test_that("detects every assign calls even if not evaluated, if there is only one assignment in this line", { + code <- c( + "a <- 1", + "b <- 2", + "eval(expression({b <- b + 2}))" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste("b <- 2", "eval(expression({\n b <- b + 2\n}))", sep = "\n") + ) +}) + +testthat::test_that("returns result of length 1 for non-empty input", { + q1 <- qenv() + q1 <- within(q1, { + a <- 1 + b <- a^5 + c <- list(x = 2) + }) + + testthat::expect_length(get_code(q1, deparse = FALSE), 1) + testthat::expect_length(get_code(q1, deparse = TRUE), 1) +}) + +testthat::test_that("does not break if code is separated by ;", { + code <- c( + "a <- 1;a <- a + 1" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "a"), + gsub(";", "\n", code, fixed = TRUE) + ) +}) + +testthat::test_that("does not break if code uses quote()", { + code <- c( + "expr <- quote(x <- x + 1)", + "x <- 0", + "eval(expr)" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "x"), + code[2] + ) +}) + +testthat::test_that("does not break if object is used in a function on lhs", { + code <- c( + "data(iris)", + "iris2 <- iris", + "names(iris) <- letters[1:5]" + ) + q <- eval_code(qenv(), code = code) + testthat::expect_identical( + get_code(q, names = "iris"), + paste(code[c(1, 3)], collapse = "\n") + ) +}) + +testthat::test_that( + "does not break if object is used in a function on lhs and influencers are both on lhs and rhs", + { + code <- c( + "x <- 5", + "y <- length(x)", + "names(x)[y] <- y" + ) + q <- eval_code(qenv(), code = code) + testthat::expect_identical( + get_code(q, names = "x"), + paste(code, collapse = "\n") + ) + } +) + +# assign ---------------------------------------------------------------------------------------------------------- + +testthat::test_that("extracts the code for assign() where \"x\" is a literal string", { + code <- c( + "a <- 1", + "assign('b', 5)", + "assign(value = 7, x = 'c')", + "assign(value = 15, x = \"d\")", + "b <- b + 2", + "c <- b", + "d <- d * 2" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste("assign(\"b\", 5)", "b <- b + 2", sep = "\n") + ) + testthat::expect_identical( + get_code(q, names = "c"), + paste( + "assign(\"b\", 5)", + "assign(value = 7, x = \"c\")", + "b <- b + 2", + "c <- b", + sep = "\n" + ) + ) + testthat::expect_identical( + get_code(q, names = "d"), + paste("assign(value = 15, x = \"d\")", "d <- d * 2", sep = "\n") + ) +}) + +testthat::test_that("extracts the code for assign() where \"x\" is variable", { + testthat::skip("We will not resolve this, as this requires code evaluation.") + code <- c( + "x <- \"a\"", + "assign(x, 5)", + "b <- a" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste(code, collapse = "\n") + ) +}) + +testthat::test_that("works for assign() detection no matter how many parametrers were provided in assignq()", { + code <- c( + "x <- 1", + "assign(\"x\", 0, envir = environment())", + "assign(inherits = FALSE, immediate = TRUE, \"z\", 5, envir = environment())", + "y <- x + z", + "y <- x" + ) + + q <- eval_code(qenv(), code) + + testthat::expect_identical( + get_code(q, names = "y"), + paste(code, collapse = "\n") + ) +}) + +testthat::test_that("detects function usage of the assignment operator", { + code <- c( + "x <- 1", + "`<-`(y,x)" + ) + code2 <- "`<-`(y, `<-`(x, 2))" + + q <- eval_code(qenv(), code) + q2 <- eval_code(qenv(), code2) + + testthat::expect_identical( + get_code(q, names = "y"), + paste(c(code[1], "y <- x"), collapse = "\n") + ) + testthat::expect_identical( + get_code(q2, names = "y"), + "y <- x <- 2" + ) +}) + + +# @linksto --------------------------------------------------------------------------------------------------------- + +testthat::test_that("get_code does not break if @linksto is put in the last line", { + # In some cases R parses comment as a separate expression so the comment is not + # directly associated with this line of code. This situation occurs when `eval` is in the last + # line of the code. Other cases are not known but are highly probable. + code <- c( + "expr <- quote(x <- x + 1)", + "x <- 0", + "eval(expr) #@linksto x" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "x"), + paste(gsub(" #@linksto x", "", code, fixed = TRUE), collapse = "\n") + ) +}) + +testthat::test_that("@linksto makes a line being returned for an affected binding", { + code <- " + a <- 1 # @linksto b + b <- 2 + " + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste("a <- 1", "b <- 2", sep = "\n") + ) +}) + +testthat::test_that( + "@linksto returns the line for an affected binding + even if the object did not exist in the same iteration of eval_code", + { + code <- c( + "a <- 1 # @linksto b", + "b <- 2" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste("a <- 1", "b <- 2", sep = "\n") + ) + } +) + +testthat::test_that( + "lines affecting parent evaluated after co-occurrence are not included in output when using @linksto", + { + code <- c( + "a <- 1 ", + "b <- 2 # @linksto a", + "a <- a + 1", + "b <- b + 1" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "a"), + paste("a <- 1", "b <- 2", "a <- a + 1", sep = "\n") + ) + testthat::expect_identical( + get_code(q, names = "b"), + paste("b <- 2", "b <- b + 1", sep = "\n") + ) + } +) + +testthat::test_that( + "@linksto gets extracted if it's a side-effect on a dependent object (even of a dependent object)", + { + code <- " + iris[1:5, ] -> iris2 + iris_head <- head(iris) # @linksto iris3 + iris3 <- iris_head[1, ] # @linksto iris2 + classes <- lapply(iris2, class) + " + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "classes"), + paste("iris2 <- iris[1:5, ]", + "iris_head <- head(iris)", + "iris3 <- iris_head[1, ]", + "classes <- lapply(iris2, class)", + sep = "\n" + ) + ) + } +) + +# functions ------------------------------------------------------------------------------------------------------- + +testthat::test_that("ignores occurrence in a function definition", { + code <- c( + "b <- 2", + "foo <- function(b) { b <- b + 2 }" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + "b <- 2" + ) + testthat::expect_identical( + get_code(q, names = "foo"), + "foo <- function(b) {\n b <- b + 2\n}" + ) +}) + +testthat::test_that("ignores occurrence in a function definition that has function in it", { + code <- c( + "b <- 2", + "foo <- function(b) { function(c) {b <- c + 2 }}" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + "b <- 2" + ) + testthat::expect_identical( + get_code(q, names = "foo"), + "foo <- function(b) {\n function(c) {\n b <- c + 2\n }\n}" + ) +}) + +testthat::test_that("ignores occurrence in a function definition if there is multiple function definitions", { + code <- c( + "b <- 2", + "foo <- function(b) { function(c) {b <- c + 2 }}", + "b <- b + 1", + "bar <- function(b) print(b)" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + "b <- 2\nb <- b + 1" + ) + testthat::expect_identical( + get_code(q, names = "foo"), + "foo <- function(b) {\n function(c) {\n b <- c + 2\n }\n}" + ) +}) + +testthat::test_that("ignores occurrence in a function definition in lapply", { + code <- c( + "a <- list(a = 1, b = 2, c = 3)", + "b <- lapply(a, FUN = function(x) { x <- x + 1 })", + "b <- Filter(function(x) x > 2, b)", + "x <- 1", + "identity(x)" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "x"), + "x <- 1" + ) +}) + +testthat::test_that("does not ignore occurrence in function body if object exsits in env", { + skip("This is not urgent and can be ommitted with @linksto tag.") + code <- c( + "a <- list(a = 1, b = 2, c = 3)", + "p <- 5", # This is not extracted, even though is used in the next line. + "b <- lapply(a, FUN = function(x) { x <- x + p })", + "b <- Filter(function(x) x > 2, b)" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste(code, sep = "\n") + ) +}) + +testthat::test_that("ignores occurrence in function definition without { curly brackets", { + code <- c( + "b <- 2", + "foo <- function(b) b <- b + 2 " + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "foo"), + "foo <- function(b) b <- b + 2" + ) + testthat::expect_identical( + get_code(q, names = "b"), + "b <- 2" + ) +}) + +testthat::test_that("detects occurrence of the function object", { + code <- c( + "a <- 1", + "b <- 2", + "foo <- function(b) { b <- b + 2 }", + "b <- foo(a)" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste("a <- 1", "b <- 2", "foo <- function(b) {\n b <- b + 2\n}", "b <- foo(a)", sep = "\n") + ) +}) + +testthat::test_that("detects occurrence of a function definition when a formal is named the same as a function", { + code <- c( + "x <- 1", + "foo <- function(foo = 1) 'text'", + "a <- foo(x)" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "a"), + paste("x <- 1", "foo <- function(foo = 1) \"text\"", "a <- foo(x)", sep = "\n") + ) +}) + +testthat::test_that("detects occurrence of a function definition with a @linksto usage", { + code <- c( + " + foo <- function() { + env <- parent.frame() + env$x <- 0 + }", + "foo() # @linksto x", + "y <- x" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "x"), + "foo <- function() {\n env <- parent.frame()\n env$x <- 0\n}\nfoo()" + ) +}) +# $ --------------------------------------------------------------------------------------------------------------- + +testthat::test_that("understands $ usage and do not treat rhs of $ as objects (only lhs)", { + code <- c( + "x <- data.frame(a = 1:3)", + "a <- data.frame(y = 1:3)", + "a$x <- a$y", + "a$x <- a$x + 2", + "a$x <- x$a" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "x"), + "x <- data.frame(a = 1:3)" + ) + testthat::expect_identical( + get_code(q, names = "a"), + paste("x <- data.frame(a = 1:3)", + "a <- data.frame(y = 1:3)", + "a$x <- a$y", + "a$x <- a$x + 2", + "a$x <- x$a", + sep = "\n" + ) + ) +}) + +testthat::test_that("detects cooccurrence properly even if all objects are on lhs", { + code <- c( + "a <- 1", + "b <- list(c = 2)", + "b[[a]] <- 3" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "b"), + paste(code, collapse = "\n") + ) +}) + + +# @ --------------------------------------------------------------------------------------------------------------- + +testthat::test_that("understands @ usage and do not treat rhs of @ as objects (only lhs)", { + code <- c( + "setClass('aclass', slots = c(a = 'numeric', x = 'numeric', y = 'numeric')) # @linksto a x", + "x <- new('aclass', a = 1:3, x = 1:3, y = 1:3)", + "a <- new('aclass', a = 1:3, x = 1:3, y = 1:3)", + "a@x <- a@y", + "a@x <- a@x + 2", + "a@x <- x@a" + ) + q <- qenv() + q@code <- code # we don't use eval_code so the code is not run + testthat::expect_identical( + get_code(q, names = "x"), + paste( + 'setClass("aclass", slots = c(a = "numeric", x = "numeric", y = "numeric"))', + 'x <- new("aclass", a = 1:3, x = 1:3, y = 1:3)', + sep = "\n" + ) + ) + testthat::expect_identical( + get_code(q, names = "a"), + paste( + 'setClass("aclass", slots = c(a = "numeric", x = "numeric", y = "numeric"))', + 'x <- new("aclass", a = 1:3, x = 1:3, y = 1:3)', + 'a <- new("aclass", a = 1:3, x = 1:3, y = 1:3)', + "a@x <- a@y", + "a@x <- a@x + 2", + "a@x <- x@a", + sep = "\n" + ) + ) +}) + + + +# libraries ------------------------------------------------------------------------------------------------------- + +testthat::test_that("library() and require() are always returned", { + code <- c( + "set.seed(1)", + "require(dplyr)", + "library(lifecycle)", + "x <- 5", + "y <- 6" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "x"), + paste( + "require(dplyr)", + "library(lifecycle)", + "x <- 5", + sep = "\n" + ) + ) +}) + + +# data() ---------------------------------------------------------------------------------------------------------- + +testthat::test_that("data() call is returned when data name is provided as is", { + code <- c( + "set.seed(1)", + "require(dplyr)", + "library(lifecycle)", + "data(iris, envir = environment())", + "x <- iris" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "x"), + paste( + "require(dplyr)", + "library(lifecycle)", + "data(iris, envir = environment())", + "x <- iris", + sep = "\n" + ) + ) +}) + +testthat::test_that("data() call is returned when data name is provided as a character", { + code <- c( + "set.seed(1)", + "require(dplyr)", + "library(lifecycle)", + "data('mtcars')", + "z <- mtcars" + ) + q <- eval_code(qenv(), code) + testthat::expect_identical( + get_code(q, names = "z"), + paste( + "require(dplyr)", + "library(lifecycle)", + "data(\"mtcars\")", + "z <- mtcars", + sep = "\n" + ) + ) +}) + + +testthat::describe("Backticked symbol", { + testthat::it("code can be retrieved with get_code", { + td <- within( + qenv(), + { + `%cbind%` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name. + iris_ds <- iris %cbind% data.frame(new_col = "new column") + } + ) + + testthat::expect_identical( + get_code(td, names = "%cbind%"), + "`%cbind%` <- function(lhs, rhs) cbind(lhs, rhs)" + ) + }) + + testthat::it("code can be retrieved with get_code", { + td <- within( + qenv(), + { + `%cbind%` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name. + iris_ds <- iris %cbind% data.frame(new_col = "new column") + } + ) + + testthat::expect_identical( + get_code(td, names = "`%cbind%`"), + "`%cbind%` <- function(lhs, rhs) cbind(lhs, rhs)" + ) + }) + + testthat::it("starting with underscore is detected in code dependency", { + td <- within( + qenv(), + { + `_add_column_` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name. + iris_ds <- `_add_column_`(iris, data.frame(new_col = "new column")) + } + ) + + testthat::expect_identical( + get_code(td, names = "iris_ds"), + paste( + sep = "\n", + "`_add_column_` <- function(lhs, rhs) cbind(lhs, rhs)", + "iris_ds <- `_add_column_`(iris, data.frame(new_col = \"new column\"))" + ) + ) + }) + + testthat::it("with space character is detected in code dependency", { + td <- within( + qenv(), + { + `add column` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name. + iris_ds <- `add column`(iris, data.frame(new_col = "new column")) + } + ) + + testthat::expect_identical( + get_code(td, names = "iris_ds"), + paste( + sep = "\n", + "`add column` <- function(lhs, rhs) cbind(lhs, rhs)", + "iris_ds <- `add column`(iris, data.frame(new_col = \"new column\"))" + ) + ) + }) + + testthat::it("without special characters is cleaned and detected in code dependency", { + td <- within( + qenv(), + { + `add_column` <- function(lhs, rhs) cbind(lhs, rhs) + iris_ds <- `add_column`(iris, data.frame(new_col = "new column")) + } + ) + + testthat::expect_identical( + get_code(td, names = "iris_ds"), + paste( + sep = "\n", + "add_column <- function(lhs, rhs) cbind(lhs, rhs)", + "iris_ds <- add_column(iris, data.frame(new_col = \"new column\"))" + ) + ) + }) + + testthat::it("with non-native pipe used as function is detected code dependency", { + td <- within( + qenv(), + { + `%add_column%` <- function(lhs, rhs) cbind(lhs, rhs) + iris_ds <- `%add_column%`(iris, data.frame(new_col = "new column")) + } + ) + + # Note that the original code is changed to use the non-native pipe operator + # correctly. + testthat::expect_identical( + get_code(td, names = "iris_ds"), + paste( + sep = "\n", + "`%add_column%` <- function(lhs, rhs) cbind(lhs, rhs)", + "iris_ds <- iris %add_column% data.frame(new_col = \"new column\")" + ) + ) + }) + + testthat::it("with non-native pipe is detected code dependency", { + td <- within( + qenv(), + { + `%add_column%` <- function(lhs, rhs) cbind(lhs, rhs) + iris_ds <- iris %add_column% data.frame(new_col = "new column") + } + ) + + # Note that the original code is changed to use the non-native pipe operator + # correctly. + testthat::expect_identical( + get_code(td, names = "iris_ds"), + paste( + sep = "\n", + "`%add_column%` <- function(lhs, rhs) cbind(lhs, rhs)", + "iris_ds <- iris %add_column% data.frame(new_col = \"new column\")" + ) + ) + }) +})