From 03d952d2bfeb6cb225998ac9e8e6a4f32c753feb Mon Sep 17 00:00:00 2001
From: Marcin <133694481+m7pr@users.noreply.github.com>
Date: Tue, 22 Oct 2024 17:03:46 +0200
Subject: [PATCH] 210 bring `get_code_dependency` (#214)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BLOCKED by
- https://github.com/insightsengineering/teal.data/pull/340

Closes
- #210

Companion to
- https://github.com/insightsengineering/teal.data/pull/343
- https://github.com/insightsengineering/teal/pull/1388

# Description

Brings `names` parameter to `get_code` so that you can limit returned
code to specific objects (and the lines that create those objects).
`get_code_dependency` was moved from `teal.data`

# Tested with

``` r
library(teal.code)

# EVAL CODE
q <- qenv()
q <- eval_code(q, code = c("a <- 1", "b <- 2"))
q@code
#> [1] "a <- 1\nb <- 2"
get_code(q, names = "a")
#> [1] "a <- 1"

# WITHIN
q <- qenv()
q <- within(q, {a <- 1; b <- 2})
q@code
#> [1] "a <- 1\nb <- 2"
get_code(q, names = "a")
#> [1] "a <- 1"

# OLD TEAL.DATA
t <- teal.data::teal_data(a = 5, code = c("a <- 1", "b <- 2"))
t@code
#> [1] "a <- 1\nb <- 2"
teal.data::get_code(t, datanames = 'a')
#> Warning in .local(object, deparse, ...): get_code(datanames) was deprecated in
#> teal.data 0.6.1, use get_code(names) instead.
#> [1] "a <- 1"
```

<sup>Created on 2024-10-16 with [reprex
v2.1.1](https://reprex.tidyverse.org)</sup>

# Local tests

```r
> devtools::test()
ℹ Testing teal.code
✔ | F W  S  OK | Context
✔ |         12 | qenv_concat
✔ |          8 | qenv_constructor
✔ |         26 | qenv_eval_code
✔ |      2  60 | qenv_get_code [1.1s]
✔ |         10 | qenv_get_var
✔ |          7 | qenv_get_warnings
✔ |         40 | qenv_join
✔ |         14 | qenv_within
✔ |         12 | utils

══ Results ══════════════════════════════════════════════════════════════════
Duration: 2.2 s

── Skipped tests (2) ────────────────────────────────────────────────────────
• This is not urgent and can be ommitted with @linksto tag. (1):
  test-qenv_get_code.R:526:3
• We will not resolve this, as this requires code evaluation. (1):
  test-qenv_get_code.R:318:3

[ FAIL 0 | WARN 0 | SKIP 2 | PASS 189 ]
````

# Local R CMD CHECK

```r
── R CMD check results ──────────────────────────────────── teal.code 0.5.0.9010 ────
Duration: 33.5s

❯ checking for future file timestamps ... NOTE
  unable to verify current time

0 errors ✔ | 0 warnings ✔ | 1 note ✖

R CMD check succeeded
```

---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: 27856297+dependabot-preview[bot]@users.noreply.github.com <27856297+dependabot-preview[bot]@users.noreply.github.com>
---
 DESCRIPTION                         |   1 +
 NEWS.md                             |   5 +
 R/qenv-get_code.R                   | 102 +++-
 R/utils-get_code_dependency.R       | 453 +++++++++++++++
 man/get_code_dependency.Rd          |  37 ++
 man/qenv.Rd                         |  85 ++-
 tests/testthat/test-qenv_get_code.R | 828 ++++++++++++++++++++++++++++
 7 files changed, 1504 insertions(+), 7 deletions(-)
 create mode 100644 R/utils-get_code_dependency.R
 create mode 100644 man/get_code_dependency.Rd
diff --git a/DESCRIPTION b/DESCRIPTION
index ca32a094..7977eabc 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -66,4 +66,5 @@ Collate:
     'qenv-show.R'
     'qenv-within.R'
     'teal.code-package.R'
+    'utils-get_code_dependency.R'
     'utils.R'
diff --git a/NEWS.md b/NEWS.md
index 22bce150..e33438f8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,10 @@
 # teal.code 0.5.0.9010
 
+### Enhancements
+
+* `get_code()` was extended with `names` parameter and allows the code extraction to be limited to objects stored in
+`qenv` but limited to `names`.
+
 # teal.code 0.5.0
 
 ### Breaking Change
diff --git a/R/qenv-get_code.R b/R/qenv-get_code.R
index 70584c11..cc88d633 100644
--- a/R/qenv-get_code.R
+++ b/R/qenv-get_code.R
@@ -5,15 +5,92 @@
 #'
 #' @param object (`qenv`)
 #' @param deparse (`logical(1)`) flag specifying whether to return code as `character` or `expression`.
+#' @param names `r lifecycle::badge("experimental")` (`character`) vector of object names to return the code for.
+#' For more details see the "Extracting dataset-specific code" section.
 #' @param ... see `Details`
 #'
+#'
+#' @section Extracting dataset-specific code:
+#' When `names` is specified, the code returned will be limited  to the lines needed to _create_
+#' the requested objects. The code stored in the `@code` slot is analyzed statically to determine
+#' which lines the objects of interest depend upon. The analysis works well when objects are created
+#' with standard infix assignment operators (see `?assignOps`) but it can fail in some situations.
+#'
+#' Consider the following examples:
+#'
+#' _Case 1: Usual assignments._
+#' ```r
+#' q1 <- qenv() |>
+#'   within({
+#'     foo <- function(x) {
+#'       x + 1
+#'     }
+#'     x <- 0
+#'     y <- foo(x)
+#'   })
+#' get_code(q1, names = "y")
+#' ```
+#' `x` has no dependencies, so `get_code(data, names = "x")` will return only the second call.\cr
+#' `y` depends on `x` and `foo`, so `get_code(data, names = "y")` will contain all three calls.
+#'
+#' _Case 2: Some objects are created by a function's side effects._
+#' ```r
+#' q2 <- qenv() |>
+#'   within({
+#'     foo <- function() {
+#'       x <<- x + 1
+#'     }
+#'     x <- 0
+#'     foo()
+#'     y <- x
+#'   })
+#' get_code(q2, names = "y")
+#' ```
+#' Here, `y` depends on `x` but `x` is modified by `foo` as a side effect (not by reassignment)
+#' and so `get_code(data, names = "y")` will not return the `foo()` call.\cr
+#' To overcome this limitation, code dependencies can be specified manually.
+#' Lines where side effects occur can be flagged by adding "`# @linksto <object name>`" at the end.\cr
+#' Note that `within` evaluates code passed to `expr` as is and comments are ignored.
+#' In order to include comments in code one must use the `eval_code` function instead.
+#'
+#' ```r
+#' q3 <- qenv() |>
+#'   eval_code("
+#'     foo <- function() {
+#'       x <<- x + 1
+#'     }
+#'     x <- 0
+#'     foo() # @linksto x
+#'     y <- x
+#'   ")
+#' get_code(q3, names = "y")
+#' ```
+#' Now the `foo()` call will be properly included in the code required to recreate `y`.
+#'
+#' Note that two functions that create objects as side effects, `assign` and `data`, are handled automatically.
+#'
+#' Here are known cases where manual tagging is necessary:
+#' - non-standard assignment operators, _e.g._ `%<>%`
+#' - objects used as conditions in `if` statements: `if (<condition>)`
+#' - objects used to iterate over in `for` loops: `for(i in <sequence>)`
+#' - creating and evaluating language objects, _e.g._ `eval(<call>)`
+#'
 #' @return
 #' `get_code` returns the traced code (from `@code` slot) in the form specified by `deparse`.
 #'
 #' @examples
 #' # retrieve code
+#' q <- within(qenv(), {
+#'   a <- 1
+#'   b <- 2
+#' })
 #' get_code(q)
 #' get_code(q, deparse = FALSE)
+#' get_code(q, names = "a")
+#'
+#' q <- qenv()
+#' q <- eval_code(q, code = c("a <- 1", "b <- 2"))
+#' get_code(q, names = "a")
 #'
 #' @name get_code
 #' @rdname qenv
@@ -21,7 +98,7 @@
 #' @aliases get_code,qenv.error-method
 #'
 #' @export
-setGeneric("get_code", function(object, deparse = TRUE, ...) {
+setGeneric("get_code", function(object, deparse = TRUE, names = NULL, ...) {
   # this line forces evaluation of object before passing to the generic
   # needed for error handling to work properly
   grDevices::pdf(nullfile())
@@ -31,16 +108,29 @@ setGeneric("get_code", function(object, deparse = TRUE, ...) {
   standardGeneric("get_code")
 })
 
-setMethod("get_code", signature = "qenv", function(object, deparse = TRUE) {
+setMethod("get_code", signature = "qenv", function(object, deparse = TRUE, names = NULL, ...) {
   checkmate::assert_flag(deparse)
+  checkmate::assert_character(names, min.len = 1L, null.ok = TRUE)
+
+  # Normalize in case special it is backticked
+  if (!is.null(names)) {
+    names <- gsub("^`(.*)`$", "\\1", names)
+  }
+
+  code <- if (!is.null(names)) {
+    get_code_dependency(object@code, names, ...)
+  } else {
+    object@code
+  }
+
   if (deparse) {
-    if (length(object@code) == 0) {
-      object@code
+    if (length(code) == 0) {
+      code
     } else {
-      paste(object@code, collapse = "\n")
+      paste(code, collapse = "\n")
     }
   } else {
-    parse(text = paste(c("{", object@code, "}"), collapse = "\n"), keep.source = TRUE)
+    parse(text = paste(c("{", code, "}"), collapse = "\n"), keep.source = TRUE)
   }
 })
 
diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R
new file mode 100644
index 00000000..a38b19bc
--- /dev/null
+++ b/R/utils-get_code_dependency.R
@@ -0,0 +1,453 @@
+# get_code_dependency ----
+
+#' Get code dependency of an object
+#'
+#' Extract subset of code required to reproduce specific object(s), including code producing side-effects.
+#'
+#' Given a character vector with code, this function will extract the part of the code responsible for creating
+#' the variables specified by `names`.
+#' This includes the final call that creates the variable(s) in question as well as all _parent calls_,
+#' _i.e._ calls that create variables used in the final call and their parents, etc.
+#' Also included are calls that create side-effects like establishing connections.
+#'
+#' It is assumed that object dependency is established by using three assignment operators: `<-`, `=`, and `->` .
+#' Other assignment methods (`assign`, `<<-`) or non-standard-evaluation methods are not supported.
+#'
+#' Side-effects are not detected automatically and must be marked in the code.
+#' Add `# @linksto object` at the end of a line where a side-effect occurs to specify that this line is required
+#' to reproduce a variable called `object`.
+#'
+#' @param code `character` with the code.
+#' @param names `character` vector of object names.
+#' @param check_names `logical(1)` flag specifying if a warning for non-existing names should be displayed.
+#'
+#' @return Character vector, a subset of `code`.
+#' Note that subsetting is actually done on the calls `code`, not necessarily on the elements of the vector.
+#'
+#' @keywords internal
+get_code_dependency <- function(code, names, check_names = TRUE) {
+  checkmate::assert_character(code)
+  checkmate::assert_character(names, any.missing = FALSE)
+
+  if (identical(code, character(0)) || identical(trimws(code), "")) {
+    return(code)
+  }
+
+  # If code is bound in curly brackets, remove them.
+  tcode <- trimws(code)
+  if (any(grepl("^\\{.*\\}$", tcode))) {
+    code <- sub("^\\{(.*)\\}$", "\\1", tcode)
+  }
+
+
+  code <- parse(text = code, keep.source = TRUE)
+  pd <- utils::getParseData(code)
+  pd <- normalize_pd(pd)
+  calls_pd <- extract_calls(pd)
+
+  if (check_names) {
+    # Detect if names are actually in code.
+    symbols <- unlist(lapply(calls_pd, function(call) call[call$token == "SYMBOL", "text"]))
+    if (any(pd$text == "assign")) {
+      assign_calls <- Filter(function(call) find_call(call, "assign"), calls_pd)
+      ass_str <- unlist(lapply(assign_calls, function(call) call[call$token == "STR_CONST", "text"]))
+      ass_str <- gsub("^['\"]|['\"]$", "", ass_str)
+      symbols <- c(ass_str, symbols)
+    }
+    if (!all(names %in% unique(symbols))) {
+      warning("Object(s) not found in code: ", toString(setdiff(names, symbols)))
+    }
+  }
+
+  graph <- code_graph(calls_pd)
+  ind <- unlist(lapply(names, function(x) graph_parser(x, graph)))
+
+  lib_ind <- detect_libraries(calls_pd)
+
+  as.character(code[sort(unique(c(lib_ind, ind)))])
+}
+
+#' Locate function call token
+#'
+#' Determine which row of parsed data is specific `SYMBOL_FUNCTION_CALL` token.
+#'
+#' Useful for determining occurrence of `assign` or `data` functions in an input call.
+#'
+#' @param call_pd `data.frame` as returned by `extract_calls()`
+#' @param text `character(1)` to look for in `text` column of `call_pd`
+#'
+#' @return
+#' Single integer specifying row in `call_pd` where `token` is `SYMBOL_FUNCTION_CALL` and `text` is `text`.
+#' 0 if not found.
+#'
+#' @keywords internal
+#' @noRd
+find_call <- function(call_pd, text) {
+  checkmate::check_data_frame(call_pd)
+  checkmate::check_names(call_pd, must.include = c("token", "text"))
+  checkmate::check_string(text)
+
+  ans <- which(call_pd$token == "SYMBOL_FUNCTION_CALL" & call_pd$text == text)
+  if (length(ans)) {
+    ans
+  } else {
+    0L
+  }
+}
+
+#' Split the result of `utils::getParseData()` into separate calls
+#'
+#' @param pd (`data.frame`) A result of `utils::getParseData()`.
+#'
+#' @return
+#' A `list` of `data.frame`s.
+#' Each element is a subset of `pd` corresponding to one call in the original code from which `pd` was obtained.
+#' Only four columns (`"token"`, `"text"`, `"id"`, `"parent"`) are kept, the rest is discarded.
+#'
+#' @keywords internal
+#' @noRd
+extract_calls <- function(pd) {
+  calls <- lapply(
+    pd[pd$parent == 0, "id"],
+    function(parent) {
+      rbind(
+        pd[pd$id == parent, c("token", "text", "id", "parent")],
+        get_children(pd = pd, parent = parent)
+      )
+    }
+  )
+  calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls)
+  calls <- Filter(Negate(is.null), calls)
+  calls <- fix_shifted_comments(calls)
+  fix_arrows(calls)
+}
+
+#' @keywords internal
+#' @noRd
+get_children <- function(pd, parent) {
+  idx_children <- abs(pd$parent) == parent
+  children <- pd[idx_children, c("token", "text", "id", "parent")]
+  if (nrow(children) == 0) {
+    return(NULL)
+  }
+
+  if (parent > 0) {
+    do.call(rbind, c(list(children), lapply(children$id, get_children, pd = pd)))
+  }
+}
+
+#' Fixes edge case of comments being shifted to the next call.
+#' @keywords internal
+#' @noRd
+fix_shifted_comments <- function(calls) {
+  # If the first or the second token is a @linksto COMMENT,
+  #  then it belongs to the previous call.
+  if (length(calls) >= 2) {
+    for (i in 2:length(calls)) {
+      comment_idx <- grep("@linksto", calls[[i]][, "text"])
+      if (isTRUE(comment_idx[1] <= 2)) {
+        calls[[i - 1]] <- rbind(
+          calls[[i - 1]],
+          calls[[i]][seq_len(comment_idx[1]), ]
+        )
+        calls[[i]] <- calls[[i]][-seq_len(comment_idx[1]), ]
+      }
+    }
+  }
+  Filter(nrow, calls)
+}
+
+#' Fixes edge case of `<-` assignment operator being called as function,
+#' which is \code{`<-`(y,x)} instead of traditional `y <- x`.
+#' @keywords internal
+#' @noRd
+fix_arrows <- function(calls) {
+  checkmate::assert_list(calls)
+  lapply(calls, function(call) {
+    sym_fun <- call$token == "SYMBOL_FUNCTION_CALL"
+    call[sym_fun, ] <- sub_arrows(call[sym_fun, ])
+    call
+  })
+}
+
+#' Execution of assignment operator substitutions for a call.
+#' @keywords internal
+#' @noRd
+sub_arrows <- function(call) {
+  checkmate::assert_data_frame(call)
+  map <- data.frame(
+    row.names = c("<-", "<<-", "="),
+    token = rep("LEFT_ASSIGN", 3),
+    text = rep("<-", 3)
+  )
+  sub_ids <- call$text %in% rownames(map)
+  call[sub_ids, c("token", "text")] <- map[call$text[sub_ids], ]
+  call
+}
+
+# code_graph ----
+
+#' Create object dependencies graph within parsed code
+#'
+#' Builds dependency graph that identifies dependencies between objects in parsed code.
+#' Helps understand which objects depend on which.
+#'
+#' @param calls_pd `list` of `data.frame`s;
+#'  result of `utils::getParseData()` split into subsets representing individual calls;
+#'  created by `extract_calls()` function
+#'
+#' @return
+#' A list (of length of input `calls_pd`) where each element represents one call.
+#' Each element is a character vector listing names of objects that depend on this call
+#' and names of objects that this call depends on.
+#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`
+#' depends on objects `b` and `c`.
+#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.
+#'
+#' @keywords internal
+#' @noRd
+code_graph <- function(calls_pd) {
+  cooccurrence <- extract_occurrence(calls_pd)
+
+  side_effects <- extract_side_effects(calls_pd)
+
+  mapply(c, side_effects, cooccurrence, SIMPLIFY = FALSE)
+}
+
+#' Extract object occurrence
+#'
+#' Extracts objects occurrence within calls passed by `calls_pd`.
+#' Also detects which objects depend on which within a call.
+#'
+#' @param calls_pd `list` of `data.frame`s;
+#'  result of `utils::getParseData()` split into subsets representing individual calls;
+#'  created by `extract_calls()` function
+#'
+#' @return
+#' A list (of length of input `calls_pd`) where each element represents one call.
+#' Each element is a character vector listing names of objects that depend on this call
+#' and names of objects that this call depends on.
+#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`
+#' depends on objects `b` and `c`.
+#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.
+#'
+#' @keywords internal
+#' @noRd
+extract_occurrence <- function(calls_pd) {
+  is_in_function <- function(x) {
+    # If an object is a function parameter,
+    # then in calls_pd there is a `SYMBOL_FORMALS` entry for that object.
+    function_id <- x[x$token == "FUNCTION", "parent"]
+    if (length(function_id)) {
+      x$id %in% get_children(x, function_id[1])$id
+    } else {
+      rep(FALSE, nrow(x))
+    }
+  }
+  in_parenthesis <- function(x) {
+    if (any(x$token %in% c("LBB", "'['"))) {
+      id_start <- min(x$id[x$token %in% c("LBB", "'['")])
+      id_end <- min(x$id[x$token == "']'"])
+      x$text[x$token == "SYMBOL" & x$id > id_start & x$id < id_end]
+    }
+  }
+  lapply(
+    calls_pd,
+    function(call_pd) {
+      # Handle data(object)/data("object")/data(object, envir = ) independently.
+      data_call <- find_call(call_pd, "data")
+      if (data_call) {
+        sym <- call_pd[data_call + 1, "text"]
+        return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
+      }
+      # Handle assign(x = ).
+      assign_call <- find_call(call_pd, "assign")
+      if (assign_call) {
+        # Check if parameters were named.
+        # "','" is for unnamed parameters, where "SYMBOL_SUB" is for named.
+        # "EQ_SUB" is for `=` appearing after the name of the named parameter.
+        if (any(call_pd$token == "SYMBOL_SUB")) {
+          params <- call_pd[call_pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]
+          # Remove sequence of "=", ",".
+          if (length(params > 1)) {
+            remove <- integer(0)
+            for (i in 2:length(params)) {
+              if (params[i - 1] == "=" & params[i] == ",") {
+                remove <- c(remove, i - 1, i)
+              }
+            }
+            if (length(remove)) params <- params[-remove]
+          }
+          pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0))
+          if (!pos) {
+            return(character(0L))
+          }
+          # pos is indicator of the place of 'x'
+          # 1. All parameters are named, but none is 'x' - return(character(0L))
+          # 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ","))
+          # - check "x" in params being just a vector of named parameters.
+          # 3. Some parameters are named, 'x' is not in named parameters
+          # - check first appearance of "," (unnamed parameter) in vector parameters.
+        } else {
+          # Object is the first entry after 'assign'.
+          pos <- 1
+        }
+        sym <- call_pd[assign_call + pos, "text"]
+        return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
+      }
+
+      # What occurs in a function body is not tracked.
+      x <- call_pd[!is_in_function(call_pd), ]
+      sym_cond <- which(x$token %in% c("SPECIAL", "SYMBOL", "SYMBOL_FUNCTION_CALL"))
+
+      if (length(sym_cond) == 0) {
+        return(character(0L))
+      }
+      # Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not.
+      # For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2.
+      dollar_ids <- x[x$token %in% c("'$'", "'@'"), "id"]
+      if (length(dollar_ids)) {
+        object_ids <- x[sym_cond, "id"]
+        after_dollar <- object_ids[(object_ids - 2) %in% dollar_ids]
+        sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar))
+      }
+
+      ass_cond <- grep("ASSIGN", x$token)
+      if (!length(ass_cond)) {
+        return(c("<-", unique(x[sym_cond, "text"])))
+      }
+
+      sym_cond <- sym_cond[sym_cond > ass_cond] # NOTE 1
+      # If there was an assignment operation detect direction of it.
+      if (unique(x$text[ass_cond]) == "->") { # NOTE 2
+        sym_cond <- rev(sym_cond)
+      }
+
+      after <- match(min(x$id[ass_cond]), sort(x$id[c(min(ass_cond), sym_cond)])) - 1
+      ans <- append(x[sym_cond, "text"], "<-", after = max(1, after))
+      roll <- in_parenthesis(call_pd)
+      if (length(roll)) {
+        c(setdiff(ans, roll), roll)
+      } else {
+        ans
+      }
+
+      ### NOTE 2: What if there are 2 assignments: e.g. a <- b -> c.
+      ### NOTE 1: For cases like 'eval(expression(b <- b + 2))' removes 'eval(expression('.
+    }
+  )
+}
+
+#' Extract side effects
+#'
+#' Extracts all object names from the code that are marked with `@linksto` tag.
+#'
+#' The code may contain functions calls that create side effects, e.g. modify the environment.
+#' Static code analysis may be insufficient to determine which objects are created or modified by such a function call.
+#' The `@linksto` comment tag is introduced to mark a call as having a (side) effect on one or more objects.
+#' With this tag a complete object dependency structure can be established.
+#' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function.
+#'
+#' @param calls_pd `list` of `data.frame`s;
+#'  result of `utils::getParseData()` split into subsets representing individual calls;
+#'  created by `extract_calls()` function
+#'
+#' @return
+#' A list of length equal to that of `calls_pd`, where each element is a character vector of names of objects
+#' depending a call tagged with `@linksto` in a corresponding element of `calls_pd`.
+#'
+#' @keywords internal
+#' @noRd
+extract_side_effects <- function(calls_pd) {
+  lapply(
+    calls_pd,
+    function(x) {
+      linksto <- grep("@linksto", x[x$token == "COMMENT", "text"], value = TRUE)
+      unlist(strsplit(sub("\\s*#\\s*@linksto\\s+", "", linksto), "\\s+"))
+    }
+  )
+}
+
+# graph_parser ----
+
+#' Return the indices of calls needed to reproduce an object
+#'
+#' @param x The name of the object to return code for.
+#' @param graph A result of `code_graph()`.
+#'
+#' @return
+#' Integer vector of indices that can be applied to `graph` to obtain all calls required to reproduce object `x`.
+#'
+#' @keywords internal
+#' @noRd
+graph_parser <- function(x, graph) {
+  occurrence <- vapply(
+    graph, function(call) {
+      ind <- match("<-", call, nomatch = length(call) + 1L)
+      x %in% call[seq_len(ind - 1L)]
+    },
+    logical(1)
+  )
+
+  dependencies <- lapply(graph[occurrence], function(call) {
+    ind <- match("<-", call, nomatch = 0L)
+    call[(ind + 1L):length(call)]
+  })
+  dependencies <- setdiff(unlist(dependencies), x)
+
+  if (length(dependencies) && any(occurrence)) {
+    dependency_ids <- lapply(dependencies, function(dependency) {
+      graph_parser(dependency, graph[1:max(which(occurrence))])
+    })
+    sort(unique(c(which(occurrence), unlist(dependency_ids))))
+  } else {
+    which(occurrence)
+  }
+}
+
+
+# default_side_effects --------------------------------------------------------------------------------------------
+
+#' Detect library calls
+#'
+#' Detects `library()` and `require()` function calls.
+#'
+#' @param calls_pd `list` of `data.frame`s;
+#'  result of `utils::getParseData()` split into subsets representing individual calls;
+#'  created by `extract_calls()` function
+#'
+#' @return
+#' Integer vector of indices that can be applied to `graph` (result of `code_graph()`) to obtain all calls containing
+#' `library()` or `require()` calls that are always returned for reproducibility.
+#'
+#' @keywords internal
+#' @noRd
+detect_libraries <- function(calls_pd) {
+  defaults <- c("library", "require")
+
+  which(
+    vapply(
+      calls_pd,
+      function(call) {
+        any(call$token == "SYMBOL_FUNCTION_CALL" & call$text %in% defaults)
+      },
+      logical(1)
+    )
+  )
+}
+
+#' Normalize parsed data removing backticks from symbols
+#'
+#' @param pd `data.frame` resulting from `utils::getParseData()` call.
+#'
+#' @return `data.frame` with backticks removed from `text` column for `SYMBOL` tokens.
+#'
+#' @keywords internal
+#' @noRd
+normalize_pd <- function(pd) {
+  # Remove backticks from SYMBOL tokens
+  symbol_index <- grepl("^SYMBOL.*$", pd$token)
+  pd[symbol_index, "text"] <- gsub("^`(.*)`$", "\\1", pd[symbol_index, "text"])
+
+  pd
+}
diff --git a/man/get_code_dependency.Rd b/man/get_code_dependency.Rd
new file mode 100644
index 00000000..8db40903
--- /dev/null
+++ b/man/get_code_dependency.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils-get_code_dependency.R
+\name{get_code_dependency}
+\alias{get_code_dependency}
+\title{Get code dependency of an object}
+\usage{
+get_code_dependency(code, names, check_names = TRUE)
+}
+\arguments{
+\item{code}{\code{character} with the code.}
+
+\item{names}{\code{character} vector of object names.}
+
+\item{check_names}{\code{logical(1)} flag specifying if a warning for non-existing names should be displayed.}
+}
+\value{
+Character vector, a subset of \code{code}.
+Note that subsetting is actually done on the calls \code{code}, not necessarily on the elements of the vector.
+}
+\description{
+Extract subset of code required to reproduce specific object(s), including code producing side-effects.
+}
+\details{
+Given a character vector with code, this function will extract the part of the code responsible for creating
+the variables specified by \code{names}.
+This includes the final call that creates the variable(s) in question as well as all \emph{parent calls},
+\emph{i.e.} calls that create variables used in the final call and their parents, etc.
+Also included are calls that create side-effects like establishing connections.
+
+It is assumed that object dependency is established by using three assignment operators: \verb{<-}, \code{=}, and \verb{->} .
+Other assignment methods (\code{assign}, \verb{<<-}) or non-standard-evaluation methods are not supported.
+
+Side-effects are not detected automatically and must be marked in the code.
+Add \verb{# @linksto object} at the end of a line where a side-effect occurs to specify that this line is required
+to reproduce a variable called \code{object}.
+}
+\keyword{internal}
diff --git a/man/qenv.Rd b/man/qenv.Rd
index 75bd486e..4d382246 100644
--- a/man/qenv.Rd
+++ b/man/qenv.Rd
@@ -26,7 +26,7 @@ new_qenv(env = new.env(parent = parent.env(.GlobalEnv)), code = character())
 
 eval_code(object, code)
 
-get_code(object, deparse = TRUE, ...)
+get_code(object, deparse = TRUE, names = NULL, ...)
 
 \method{within}{qenv}(data, expr, ...)
 }
@@ -40,6 +40,9 @@ Environment being a result of the \code{code} evaluation.}
 
 \item{deparse}{(\code{logical(1)}) flag specifying whether to return code as \code{character} or \code{expression}.}
 
+\item{names}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} (\code{character}) vector of object names to return the code for.
+For more details see the "Extracting dataset-specific code" section.}
+
 \item{...}{see \code{Details}}
 
 \item{data}{(\code{qenv})}
@@ -78,6 +81,77 @@ It is a method for the \code{base} generic that wraps \code{eval_code} to provid
 through the \code{...} argument:
 as \code{name:value} pairs are passed to \code{...}, \code{name} in \code{expr} will be replaced with \code{value}.
 }
+\section{Extracting dataset-specific code}{
+
+When \code{names} is specified, the code returned will be limited  to the lines needed to \emph{create}
+the requested objects. The code stored in the \verb{@code} slot is analyzed statically to determine
+which lines the objects of interest depend upon. The analysis works well when objects are created
+with standard infix assignment operators (see \code{?assignOps}) but it can fail in some situations.
+
+Consider the following examples:
+
+\emph{Case 1: Usual assignments.}
+
+\if{html}{\out{<div class="sourceCode r">}}\preformatted{q1 <- qenv() |>
+  within(\{
+    foo <- function(x) \{
+      x + 1
+    \}
+    x <- 0
+    y <- foo(x)
+  \})
+get_code(q1, names = "y")
+}\if{html}{\out{</div>}}
+
+\code{x} has no dependencies, so \code{get_code(data, names = "x")} will return only the second call.\cr
+\code{y} depends on \code{x} and \code{foo}, so \code{get_code(data, names = "y")} will contain all three calls.
+
+\emph{Case 2: Some objects are created by a function's side effects.}
+
+\if{html}{\out{<div class="sourceCode r">}}\preformatted{q2 <- qenv() |>
+  within(\{
+    foo <- function() \{
+      x <<- x + 1
+    \}
+    x <- 0
+    foo()
+    y <- x
+  \})
+get_code(q2, names = "y")
+}\if{html}{\out{</div>}}
+
+Here, \code{y} depends on \code{x} but \code{x} is modified by \code{foo} as a side effect (not by reassignment)
+and so \code{get_code(data, names = "y")} will not return the \code{foo()} call.\cr
+To overcome this limitation, code dependencies can be specified manually.
+Lines where side effects occur can be flagged by adding "\verb{# @linksto <object name>}" at the end.\cr
+Note that \code{within} evaluates code passed to \code{expr} as is and comments are ignored.
+In order to include comments in code one must use the \code{eval_code} function instead.
+
+\if{html}{\out{<div class="sourceCode r">}}\preformatted{q3 <- qenv() |>
+  eval_code("
+    foo <- function() \{
+      x <<- x + 1
+    \}
+    x <- 0
+    foo() # @linksto x
+    y <- x
+  ")
+get_code(q3, names = "y")
+}\if{html}{\out{</div>}}
+
+Now the \code{foo()} call will be properly included in the code required to recreate \code{y}.
+
+Note that two functions that create objects as side effects, \code{assign} and \code{data}, are handled automatically.
+
+Here are known cases where manual tagging is necessary:
+\itemize{
+\item non-standard assignment operators, \emph{e.g.} \verb{\%<>\%}
+\item objects used as conditions in \code{if} statements: \verb{if (<condition>)}
+\item objects used to iterate over in \code{for} loops: \verb{for(i in <sequence>)}
+\item creating and evaluating language objects, \emph{e.g.} \verb{eval(<call>)}
+}
+}
+
 \section{Using language objects with \code{within}}{
 
 Passing language objects to \code{expr} is generally not intended but can be achieved with \code{do.call}.
@@ -100,8 +174,17 @@ q <- eval_code(q, quote(library(checkmate)))
 q <- eval_code(q, expression(assert_number(a)))
 
 # retrieve code
+q <- within(qenv(), {
+  a <- 1
+  b <- 2
+})
 get_code(q)
 get_code(q, deparse = FALSE)
+get_code(q, names = "a")
+
+q <- qenv()
+q <- eval_code(q, code = c("a <- 1", "b <- 2"))
+get_code(q, names = "a")
 
 # evaluate code using within
 q <- qenv()
diff --git a/tests/testthat/test-qenv_get_code.R b/tests/testthat/test-qenv_get_code.R
index 099f0a36..0f9aedf9 100644
--- a/tests/testthat/test-qenv_get_code.R
+++ b/tests/testthat/test-qenv_get_code.R
@@ -44,3 +44,831 @@ testthat::test_that("get_code called with qenv.error returns error with trace in
     "object 'v' not found \n when evaluating qenv code:\nw <- v\n\ntrace: \n x <- 1\n y <- x\n w <- v\n"
   )
 })
+
+
+# names parameter -------------------------------------------------------------------------------------------------
+
+testthat::test_that("handles empty @code slot", {
+  testthat::expect_identical(
+    get_code(qenv(), names = "a"),
+    character(0)
+  )
+  testthat::expect_identical(
+    get_code(eval_code(qenv(), code = ""), names = "a"),
+    ""
+  )
+})
+
+testthat::test_that("handles the code without symbols on rhs", {
+  code <- c(
+    "1 + 1",
+    "a <- 5",
+    "501"
+  )
+
+  testthat::expect_identical(
+    get_code(eval_code(qenv(), code), names = "a"),
+    "a <- 5"
+  )
+})
+
+testthat::test_that("handles the code included in curly brackets", {
+  code <- "{1 + 1;a <- 5}"
+
+  testthat::expect_identical(
+    get_code(eval_code(qenv(), code), names = "a"),
+    "a <- 5"
+  )
+})
+
+testthat::test_that("handles the code of length > 1 when at least one is enclosed in curly brackets", {
+  code <- c("{a<-5}", "1+1")
+  q <- eval_code(eval_code(qenv(), code[1]), code[2])
+
+  testthat::expect_identical(
+    get_code(q, names = "a"),
+    "a <- 5"
+  )
+})
+
+
+testthat::test_that("extracts the code of a binding from character vector containing simple code", {
+  code <- c(
+    "a <- 1",
+    "b <- 2"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "a"),
+    "a <- 1"
+  )
+  testthat::expect_identical(
+    get_code(q, names = "b"),
+    "b <- 2"
+  )
+})
+
+testthat::test_that("extracts the code without downstream usage", {
+  code <- c(
+    "a <- 1",
+    "head(a)"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "a"),
+    "a <- 1"
+  )
+})
+
+testthat::test_that("works for datanames of length > 1", {
+  code <- c(
+    "a <- 1",
+    "b <- 2"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = c("a", "b")),
+    paste(code, collapse = "\n")
+  )
+})
+
+testthat::test_that("warns if binding doesn't exist in code", {
+  code <- c("a <- 1")
+  q <- eval_code(qenv(), code)
+  testthat::expect_warning(
+    get_code(q, names = "c"),
+    "Object\\(s\\) not found in code: c"
+  )
+})
+
+testthat::test_that("does not fall into a loop", {
+  code <- c(
+    "a <- 1",
+    "b <- a",
+    "c <- b",
+    "a <- c"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "a"),
+    paste(code, collapse = "\n")
+  )
+  testthat::expect_identical(
+    get_code(q, names = "b"),
+    paste(code[1:2], collapse = "\n")
+  )
+  testthat::expect_identical(
+    get_code(q, names = "c"),
+    paste(code[1:3], collapse = "\n")
+  )
+})
+
+
+testthat::test_that("extracts code of a parent binding but only those evaluated before coocurence", {
+  code <- c(
+    "a <- 1",
+    "b <- a",
+    "a <- 2"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "b"),
+    paste("a <- 1", "b <- a", sep = "\n")
+  )
+})
+
+testthat::test_that("extracts the code of a parent binding if used as an arg in a function call", {
+  code <- c(
+    "a <- 1",
+    "b <- identity(x = a)",
+    "a <- 2"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "b"),
+    paste("a <- 1", "b <- identity(x = a)", sep = "\n")
+  )
+})
+
+testthat::test_that("extracts the code when using <<-", {
+  code <- c(
+    "a <- 1",
+    "b <- a",
+    "b <<- b + 2"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "b"),
+    paste("a <- 1", "b <- a", "b <<- b + 2", sep = "\n")
+  )
+})
+
+testthat::test_that("detects every assign calls even if not evaluated, if there is only one assignment in this line", {
+  code <- c(
+    "a <- 1",
+    "b <- 2",
+    "eval(expression({b <- b + 2}))"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "b"),
+    paste("b <- 2", "eval(expression({\n    b <- b + 2\n}))", sep = "\n")
+  )
+})
+
+testthat::test_that("returns result of length 1 for non-empty input", {
+  q1 <- qenv()
+  q1 <- within(q1, {
+    a <- 1
+    b <- a^5
+    c <- list(x = 2)
+  })
+
+  testthat::expect_length(get_code(q1, deparse = FALSE), 1)
+  testthat::expect_length(get_code(q1, deparse = TRUE), 1)
+})
+
+testthat::test_that("does not break if code is separated by ;", {
+  code <- c(
+    "a <- 1;a <- a + 1"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "a"),
+    gsub(";", "\n", code, fixed = TRUE)
+  )
+})
+
+testthat::test_that("does not break if code uses quote()", {
+  code <- c(
+    "expr <- quote(x <- x + 1)",
+    "x <- 0",
+    "eval(expr)"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "x"),
+    code[2]
+  )
+})
+
+testthat::test_that("does not break if object is used in a function on lhs", {
+  code <- c(
+    "data(iris)",
+    "iris2 <- iris",
+    "names(iris) <- letters[1:5]"
+  )
+  q <- eval_code(qenv(), code = code)
+  testthat::expect_identical(
+    get_code(q, names = "iris"),
+    paste(code[c(1, 3)], collapse = "\n")
+  )
+})
+
+testthat::test_that(
+  "does not break if object is used in a function on lhs and influencers are both on lhs and rhs",
+  {
+    code <- c(
+      "x <- 5",
+      "y <- length(x)",
+      "names(x)[y] <- y"
+    )
+    q <- eval_code(qenv(), code = code)
+    testthat::expect_identical(
+      get_code(q, names = "x"),
+      paste(code, collapse = "\n")
+    )
+  }
+)
+
+# assign ----------------------------------------------------------------------------------------------------------
+
+testthat::test_that("extracts the code for assign() where \"x\" is a literal string", {
+  code <- c(
+    "a <- 1",
+    "assign('b', 5)",
+    "assign(value = 7, x = 'c')",
+    "assign(value = 15, x = \"d\")",
+    "b <- b + 2",
+    "c <- b",
+    "d <- d * 2"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "b"),
+    paste("assign(\"b\", 5)", "b <- b + 2", sep = "\n")
+  )
+  testthat::expect_identical(
+    get_code(q, names = "c"),
+    paste(
+      "assign(\"b\", 5)",
+      "assign(value = 7, x = \"c\")",
+      "b <- b + 2",
+      "c <- b",
+      sep = "\n"
+    )
+  )
+  testthat::expect_identical(
+    get_code(q, names = "d"),
+    paste("assign(value = 15, x = \"d\")", "d <- d * 2", sep = "\n")
+  )
+})
+
+testthat::test_that("extracts the code for assign() where \"x\" is variable", {
+  testthat::skip("We will not resolve this, as this requires code evaluation.")
+  code <- c(
+    "x <- \"a\"",
+    "assign(x, 5)",
+    "b <- a"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "b"),
+    paste(code, collapse = "\n")
+  )
+})
+
+testthat::test_that("works for assign() detection no matter how many parametrers were provided in assignq()", {
+  code <- c(
+    "x <- 1",
+    "assign(\"x\", 0, envir = environment())",
+    "assign(inherits = FALSE, immediate = TRUE, \"z\", 5, envir = environment())",
+    "y <- x + z",
+    "y <- x"
+  )
+
+  q <- eval_code(qenv(), code)
+
+  testthat::expect_identical(
+    get_code(q, names = "y"),
+    paste(code, collapse = "\n")
+  )
+})
+
+testthat::test_that("detects function usage of the assignment operator", {
+  code <- c(
+    "x <- 1",
+    "`<-`(y,x)"
+  )
+  code2 <- "`<-`(y, `<-`(x, 2))"
+
+  q <- eval_code(qenv(), code)
+  q2 <- eval_code(qenv(), code2)
+
+  testthat::expect_identical(
+    get_code(q, names = "y"),
+    paste(c(code[1], "y <- x"), collapse = "\n")
+  )
+  testthat::expect_identical(
+    get_code(q2, names = "y"),
+    "y <- x <- 2"
+  )
+})
+
+
+# @linksto ---------------------------------------------------------------------------------------------------------
+
+testthat::test_that("get_code does not break if @linksto is put in the last line", {
+  # In some cases R parses comment as a separate expression so the comment is not
+  # directly associated with this line of code. This situation occurs when `eval` is in the last
+  # line of the code. Other cases are not known but are highly probable.
+  code <- c(
+    "expr <- quote(x <- x + 1)",
+    "x <- 0",
+    "eval(expr) #@linksto x"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "x"),
+    paste(gsub(" #@linksto x", "", code, fixed = TRUE), collapse = "\n")
+  )
+})
+
+testthat::test_that("@linksto makes a line being returned for an affected binding", {
+  code <- "
+  a <- 1 # @linksto b
+  b <- 2
+  "
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "b"),
+    paste("a <- 1", "b <- 2", sep = "\n")
+  )
+})
+
+testthat::test_that(
+  "@linksto returns the line for an affected binding
+  even if the object did not exist in the same iteration of eval_code",
+  {
+    code <- c(
+      "a <- 1 # @linksto b",
+      "b <- 2"
+    )
+    q <- eval_code(qenv(), code)
+    testthat::expect_identical(
+      get_code(q, names = "b"),
+      paste("a <- 1", "b <- 2", sep = "\n")
+    )
+  }
+)
+
+testthat::test_that(
+  "lines affecting parent evaluated after co-occurrence are not included in output when using @linksto",
+  {
+    code <- c(
+      "a <- 1 ",
+      "b <- 2 # @linksto a",
+      "a <- a + 1",
+      "b <- b + 1"
+    )
+    q <- eval_code(qenv(), code)
+    testthat::expect_identical(
+      get_code(q, names = "a"),
+      paste("a <- 1", "b <- 2", "a <- a + 1", sep = "\n")
+    )
+    testthat::expect_identical(
+      get_code(q, names = "b"),
+      paste("b <- 2", "b <- b + 1", sep = "\n")
+    )
+  }
+)
+
+testthat::test_that(
+  "@linksto gets extracted if it's a side-effect on a dependent object (even of a dependent object)",
+  {
+    code <- "
+      iris[1:5, ] -> iris2
+      iris_head <- head(iris) # @linksto iris3
+      iris3 <- iris_head[1, ] # @linksto iris2
+      classes <- lapply(iris2, class)
+    "
+    q <- eval_code(qenv(), code)
+    testthat::expect_identical(
+      get_code(q, names = "classes"),
+      paste("iris2 <- iris[1:5, ]",
+        "iris_head <- head(iris)",
+        "iris3 <- iris_head[1, ]",
+        "classes <- lapply(iris2, class)",
+        sep = "\n"
+      )
+    )
+  }
+)
+
+# functions -------------------------------------------------------------------------------------------------------
+
+testthat::test_that("ignores occurrence in a function definition", {
+  code <- c(
+    "b <- 2",
+    "foo <- function(b) { b <- b + 2 }"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "b"),
+    "b <- 2"
+  )
+  testthat::expect_identical(
+    get_code(q, names = "foo"),
+    "foo <- function(b) {\n    b <- b + 2\n}"
+  )
+})
+
+testthat::test_that("ignores occurrence in a function definition that has function in it", {
+  code <- c(
+    "b <- 2",
+    "foo <- function(b) { function(c) {b <- c + 2 }}"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "b"),
+    "b <- 2"
+  )
+  testthat::expect_identical(
+    get_code(q, names = "foo"),
+    "foo <- function(b) {\n    function(c) {\n        b <- c + 2\n    }\n}"
+  )
+})
+
+testthat::test_that("ignores occurrence in a function definition if there is multiple function definitions", {
+  code <- c(
+    "b <- 2",
+    "foo <- function(b) { function(c) {b <- c + 2 }}",
+    "b <- b + 1",
+    "bar <- function(b) print(b)"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "b"),
+    "b <- 2\nb <- b + 1"
+  )
+  testthat::expect_identical(
+    get_code(q, names = "foo"),
+    "foo <- function(b) {\n    function(c) {\n        b <- c + 2\n    }\n}"
+  )
+})
+
+testthat::test_that("ignores occurrence in a function definition in lapply", {
+  code <- c(
+    "a <- list(a = 1, b = 2, c = 3)",
+    "b <- lapply(a, FUN = function(x) { x <- x + 1 })",
+    "b <- Filter(function(x) x > 2, b)",
+    "x <- 1",
+    "identity(x)"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "x"),
+    "x <- 1"
+  )
+})
+
+testthat::test_that("does not ignore occurrence in function body if object exsits in env", {
+  skip("This is not urgent and can be ommitted with @linksto tag.")
+  code <- c(
+    "a <- list(a = 1, b = 2, c = 3)",
+    "p <- 5", # This is not extracted, even though is used in the next line.
+    "b <- lapply(a, FUN = function(x) { x <- x + p })",
+    "b <- Filter(function(x) x > 2, b)"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "b"),
+    paste(code, sep = "\n")
+  )
+})
+
+testthat::test_that("ignores occurrence in function definition without { curly brackets", {
+  code <- c(
+    "b <- 2",
+    "foo <- function(b) b <- b + 2 "
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "foo"),
+    "foo <- function(b) b <- b + 2"
+  )
+  testthat::expect_identical(
+    get_code(q, names = "b"),
+    "b <- 2"
+  )
+})
+
+testthat::test_that("detects occurrence of the function object", {
+  code <- c(
+    "a <- 1",
+    "b <- 2",
+    "foo <- function(b) { b <- b + 2 }",
+    "b <- foo(a)"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "b"),
+    paste("a <- 1", "b <- 2", "foo <- function(b) {\n    b <- b + 2\n}", "b <- foo(a)", sep = "\n")
+  )
+})
+
+testthat::test_that("detects occurrence of a function definition when a formal is named the same as a function", {
+  code <- c(
+    "x <- 1",
+    "foo <- function(foo = 1) 'text'",
+    "a <- foo(x)"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "a"),
+    paste("x <- 1", "foo <- function(foo = 1) \"text\"", "a <- foo(x)", sep = "\n")
+  )
+})
+
+testthat::test_that("detects occurrence of a function definition with a @linksto usage", {
+  code <- c(
+    "
+        foo <- function() {
+          env <- parent.frame()
+          env$x <- 0
+        }",
+    "foo() # @linksto x",
+    "y <- x"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "x"),
+    "foo <- function() {\n    env <- parent.frame()\n    env$x <- 0\n}\nfoo()"
+  )
+})
+# $ ---------------------------------------------------------------------------------------------------------------
+
+testthat::test_that("understands $ usage and do not treat rhs of $ as objects (only lhs)", {
+  code <- c(
+    "x <- data.frame(a = 1:3)",
+    "a <- data.frame(y = 1:3)",
+    "a$x <- a$y",
+    "a$x <- a$x + 2",
+    "a$x <- x$a"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "x"),
+    "x <- data.frame(a = 1:3)"
+  )
+  testthat::expect_identical(
+    get_code(q, names = "a"),
+    paste("x <- data.frame(a = 1:3)",
+      "a <- data.frame(y = 1:3)",
+      "a$x <- a$y",
+      "a$x <- a$x + 2",
+      "a$x <- x$a",
+      sep = "\n"
+    )
+  )
+})
+
+testthat::test_that("detects cooccurrence properly even if all objects are on lhs", {
+  code <- c(
+    "a <- 1",
+    "b <- list(c = 2)",
+    "b[[a]] <- 3"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "b"),
+    paste(code, collapse = "\n")
+  )
+})
+
+
+# @ ---------------------------------------------------------------------------------------------------------------
+
+testthat::test_that("understands @ usage and do not treat rhs of @ as objects (only lhs)", {
+  code <- c(
+    "setClass('aclass', slots = c(a = 'numeric', x = 'numeric', y = 'numeric')) # @linksto a x",
+    "x <- new('aclass', a = 1:3, x = 1:3, y = 1:3)",
+    "a <- new('aclass', a = 1:3, x = 1:3, y = 1:3)",
+    "a@x <- a@y",
+    "a@x <- a@x + 2",
+    "a@x <- x@a"
+  )
+  q <- qenv()
+  q@code <- code # we don't use eval_code so the code is not run
+  testthat::expect_identical(
+    get_code(q, names = "x"),
+    paste(
+      'setClass("aclass", slots = c(a = "numeric", x = "numeric", y = "numeric"))',
+      'x <- new("aclass", a = 1:3, x = 1:3, y = 1:3)',
+      sep = "\n"
+    )
+  )
+  testthat::expect_identical(
+    get_code(q, names = "a"),
+    paste(
+      'setClass("aclass", slots = c(a = "numeric", x = "numeric", y = "numeric"))',
+      'x <- new("aclass", a = 1:3, x = 1:3, y = 1:3)',
+      'a <- new("aclass", a = 1:3, x = 1:3, y = 1:3)',
+      "a@x <- a@y",
+      "a@x <- a@x + 2",
+      "a@x <- x@a",
+      sep = "\n"
+    )
+  )
+})
+
+
+
+# libraries -------------------------------------------------------------------------------------------------------
+
+testthat::test_that("library() and require() are always returned", {
+  code <- c(
+    "set.seed(1)",
+    "require(dplyr)",
+    "library(lifecycle)",
+    "x <- 5",
+    "y <- 6"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "x"),
+    paste(
+      "require(dplyr)",
+      "library(lifecycle)",
+      "x <- 5",
+      sep = "\n"
+    )
+  )
+})
+
+
+# data() ----------------------------------------------------------------------------------------------------------
+
+testthat::test_that("data() call is returned when data name is provided as is", {
+  code <- c(
+    "set.seed(1)",
+    "require(dplyr)",
+    "library(lifecycle)",
+    "data(iris, envir = environment())",
+    "x <- iris"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "x"),
+    paste(
+      "require(dplyr)",
+      "library(lifecycle)",
+      "data(iris, envir = environment())",
+      "x <- iris",
+      sep = "\n"
+    )
+  )
+})
+
+testthat::test_that("data() call is returned when data name is provided as a character", {
+  code <- c(
+    "set.seed(1)",
+    "require(dplyr)",
+    "library(lifecycle)",
+    "data('mtcars')",
+    "z <- mtcars"
+  )
+  q <- eval_code(qenv(), code)
+  testthat::expect_identical(
+    get_code(q, names = "z"),
+    paste(
+      "require(dplyr)",
+      "library(lifecycle)",
+      "data(\"mtcars\")",
+      "z <- mtcars",
+      sep = "\n"
+    )
+  )
+})
+
+
+testthat::describe("Backticked symbol", {
+  testthat::it("code can be retrieved with get_code", {
+    td <- within(
+      qenv(),
+      {
+        `%cbind%` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name.
+        iris_ds <- iris %cbind% data.frame(new_col = "new column")
+      }
+    )
+
+    testthat::expect_identical(
+      get_code(td, names = "%cbind%"),
+      "`%cbind%` <- function(lhs, rhs) cbind(lhs, rhs)"
+    )
+  })
+
+  testthat::it("code can be retrieved with get_code", {
+    td <- within(
+      qenv(),
+      {
+        `%cbind%` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name.
+        iris_ds <- iris %cbind% data.frame(new_col = "new column")
+      }
+    )
+
+    testthat::expect_identical(
+      get_code(td, names = "`%cbind%`"),
+      "`%cbind%` <- function(lhs, rhs) cbind(lhs, rhs)"
+    )
+  })
+
+  testthat::it("starting with underscore is detected in code dependency", {
+    td <- within(
+      qenv(),
+      {
+        `_add_column_` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name.
+        iris_ds <- `_add_column_`(iris, data.frame(new_col = "new column"))
+      }
+    )
+
+    testthat::expect_identical(
+      get_code(td, names = "iris_ds"),
+      paste(
+        sep = "\n",
+        "`_add_column_` <- function(lhs, rhs) cbind(lhs, rhs)",
+        "iris_ds <- `_add_column_`(iris, data.frame(new_col = \"new column\"))"
+      )
+    )
+  })
+
+  testthat::it("with space character is detected in code dependency", {
+    td <- within(
+      qenv(),
+      {
+        `add column` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name.
+        iris_ds <- `add column`(iris, data.frame(new_col = "new column"))
+      }
+    )
+
+    testthat::expect_identical(
+      get_code(td, names = "iris_ds"),
+      paste(
+        sep = "\n",
+        "`add column` <- function(lhs, rhs) cbind(lhs, rhs)",
+        "iris_ds <- `add column`(iris, data.frame(new_col = \"new column\"))"
+      )
+    )
+  })
+
+  testthat::it("without special characters is cleaned and detected in code dependency", {
+    td <- within(
+      qenv(),
+      {
+        `add_column` <- function(lhs, rhs) cbind(lhs, rhs)
+        iris_ds <- `add_column`(iris, data.frame(new_col = "new column"))
+      }
+    )
+
+    testthat::expect_identical(
+      get_code(td, names = "iris_ds"),
+      paste(
+        sep = "\n",
+        "add_column <- function(lhs, rhs) cbind(lhs, rhs)",
+        "iris_ds <- add_column(iris, data.frame(new_col = \"new column\"))"
+      )
+    )
+  })
+
+  testthat::it("with non-native pipe used as function is detected code dependency", {
+    td <- within(
+      qenv(),
+      {
+        `%add_column%` <- function(lhs, rhs) cbind(lhs, rhs)
+        iris_ds <- `%add_column%`(iris, data.frame(new_col = "new column"))
+      }
+    )
+
+    # Note that the original code is changed to use the non-native pipe operator
+    # correctly.
+    testthat::expect_identical(
+      get_code(td, names = "iris_ds"),
+      paste(
+        sep = "\n",
+        "`%add_column%` <- function(lhs, rhs) cbind(lhs, rhs)",
+        "iris_ds <- iris %add_column% data.frame(new_col = \"new column\")"
+      )
+    )
+  })
+
+  testthat::it("with non-native pipe is detected code dependency", {
+    td <- within(
+      qenv(),
+      {
+        `%add_column%` <- function(lhs, rhs) cbind(lhs, rhs)
+        iris_ds <- iris %add_column% data.frame(new_col = "new column")
+      }
+    )
+
+    # Note that the original code is changed to use the non-native pipe operator
+    # correctly.
+    testthat::expect_identical(
+      get_code(td, names = "iris_ds"),
+      paste(
+        sep = "\n",
+        "`%add_column%` <- function(lhs, rhs) cbind(lhs, rhs)",
+        "iris_ds <- iris %add_column% data.frame(new_col = \"new column\")"
+      )
+    )
+  })
+})