From c8a448ca80925772769b02569c2a7809d5953c9e Mon Sep 17 00:00:00 2001 From: m7pr Date: Thu, 18 Jan 2024 11:54:34 +0100 Subject: [PATCH 01/36] test for ; case --- tests/testthat/test-get_code.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/testthat/test-get_code.R b/tests/testthat/test-get_code.R index 32054ba58..09360c3b8 100644 --- a/tests/testthat/test-get_code.R +++ b/tests/testthat/test-get_code.R @@ -166,6 +166,17 @@ testthat::test_that("get_code returns result of length 1 for non-empty input", { testthat::expect_length(get_code(tdata1, deparse = TRUE), 1) }) +testthat::test_that("get_code does not break if code is separated by ;", { + code <- c( + "a <- 1;a <- a + 1" + ) + tdata <- eval_code(teal_data(), code) + testthat::expect_identical( + get_code(tdata, datanames = "a"), + gsub(";", "\n", code, fixed = TRUE) + ) +}) + # assign ---------------------------------------------------------------------------------------------------------- From e6c163f40bf0585195d6db42a4e80a992aa30754 Mon Sep 17 00:00:00 2001 From: m7pr Date: Mon, 22 Jan 2024 09:47:09 +0100 Subject: [PATCH 02/36] fix if statement for single function call detection --- R/utils-get_code_dependency.R | 4 +++- tests/testthat/test-get_code.R | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R index 49859e77c..42cd87f63 100644 --- a/R/utils-get_code_dependency.R +++ b/R/utils-get_code_dependency.R @@ -240,7 +240,9 @@ extract_occurrence <- function(calls_pd) { if ((length(ass_cond) && x$text[ass_cond] == "->") || !length(ass_cond)) { # NOTE 3 sym_cond <- rev(sym_cond) } - append(unique(x[sym_cond, "text"]), "<-", after = 1) + ans <- unique(x[sym_cond, "text"]) + after <- if (length(x[sym_cond, "text"]) == 1 && x[sym_cond, "token"] == "SYMBOL_FUNCTION_CALL") 0 else 1 + append(ans, "<-", after = after) ### NOTE 3: What if there are 2+ assignments, e.g. a <- b -> c or e.g. a <- b <- c. ### NOTE 2: For cases like 'eval(expression(b <- b + 2))' removes 'eval(expression('. diff --git a/tests/testthat/test-get_code.R b/tests/testthat/test-get_code.R index 3a871f14a..144049621 100644 --- a/tests/testthat/test-get_code.R +++ b/tests/testthat/test-get_code.R @@ -470,6 +470,27 @@ testthat::test_that( } ) +testthat::test_that( + "get_code detects occurrence of function definition and @linksto usage", + { + code <- c(" + foo <- function() { + env <- parent.frame() + env$x <- 0 + }", + "foo() # @linksto x", + "y <- x" + ) + tdata <- teal_data(code = code) + testthat::expect_identical( + get_code(tdata, datanames = 'x'), + paste( + warning_message, + "foo <- function() {\n env <- parent.frame()\n env$x <- 0\n}\nfoo()", + sep = "\n" + ) + ) +}) # $ --------------------------------------------------------------------------------------------------------------- testthat::test_that("get_code with datanames understands $ usage and do not treat rhs of $ as objects (only lhs)", { From a902355329184d4de750fe607c208842e49678e8 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 22 Jan 2024 08:50:18 +0000 Subject: [PATCH 03/36] [skip actions] Restyle files --- tests/testthat/test-get_code.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-get_code.R b/tests/testthat/test-get_code.R index 144049621..6b0d2ee59 100644 --- a/tests/testthat/test-get_code.R +++ b/tests/testthat/test-get_code.R @@ -473,24 +473,26 @@ testthat::test_that( testthat::test_that( "get_code detects occurrence of function definition and @linksto usage", { - code <- c(" + code <- c( + " foo <- function() { env <- parent.frame() env$x <- 0 }", - "foo() # @linksto x", - "y <- x" + "foo() # @linksto x", + "y <- x" ) tdata <- teal_data(code = code) testthat::expect_identical( - get_code(tdata, datanames = 'x'), + get_code(tdata, datanames = "x"), paste( warning_message, "foo <- function() {\n env <- parent.frame()\n env$x <- 0\n}\nfoo()", sep = "\n" ) ) -}) + } +) # $ --------------------------------------------------------------------------------------------------------------- testthat::test_that("get_code with datanames understands $ usage and do not treat rhs of $ as objects (only lhs)", { From 8ec9e57beab582aab3ff3fcddeaa2b6f2305ccee Mon Sep 17 00:00:00 2001 From: m7pr Date: Mon, 22 Jan 2024 09:50:37 +0100 Subject: [PATCH 04/36] Empty-Commit From 790722f3bc590c7f8cf0106533641719004b2dce Mon Sep 17 00:00:00 2001 From: "27856297+dependabot-preview[bot]@users.noreply.github.com" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 22 Jan 2024 08:52:38 +0000 Subject: [PATCH 05/36] [skip actions] Roxygen Man Pages Auto Update --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9408ecf75..18d4d6561 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -51,7 +51,7 @@ Encoding: UTF-8 Language: en-US LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.0 Collate: 'cdisc_data.R' 'data.R' From 040d88b692c5496c8067ba9e180e22479c17e63d Mon Sep 17 00:00:00 2001 From: m7pr Date: Mon, 22 Jan 2024 09:55:25 +0100 Subject: [PATCH 06/36] update roxygen --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9408ecf75..18d4d6561 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -51,7 +51,7 @@ Encoding: UTF-8 Language: en-US LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.0 Collate: 'cdisc_data.R' 'data.R' From 3950d07cdfe84e3b88ebc670db690cd2349fef29 Mon Sep 17 00:00:00 2001 From: m7pr Date: Mon, 22 Jan 2024 09:56:04 +0100 Subject: [PATCH 07/36] Empty-Commit From 02013110a95a84be27d9ad548acc26e894b9b974 Mon Sep 17 00:00:00 2001 From: go_gonzo Date: Mon, 22 Jan 2024 10:47:29 +0100 Subject: [PATCH 08/36] docs update --- R/teal_data-get_code.R | 54 ++++++++++++++++++++++++++++++-- man/get_code-teal_data-method.Rd | 52 ++++++++++++++++++++++++++++-- 2 files changed, 100 insertions(+), 6 deletions(-) diff --git a/R/teal_data-get_code.R b/R/teal_data-get_code.R index c3d05da33..17dcb27e5 100644 --- a/R/teal_data-get_code.R +++ b/R/teal_data-get_code.R @@ -6,9 +6,57 @@ #' Use `datanames` to limit the code to one or more of the data sets enumerated in `@datanames`. #' If the code has not passed verification (with [`verify()`]), a warning will be prepended. #' -#' @section Notes for Developers: -#' To learn more about how a subset of code needed to reproduce a specific data set is extracted from all code, -#' see [`get_code_dependency()`]. +#' @section Extracting dataset-specific code: +#' When `datanames` is specified, the code returned will be limited to the code needed to create the +#' requested data sets. `code` stored in the `teal_data` object is statically analyzed to determine +#' dependency tree between each line of the code. Analysis is performed automatically basing on the +#' used symbols and it is working in a typical case when new dataset is created by assignment operator. +#' For example: +#' ```r +#' data <- teal_data() |> +#' eval_code(" +#' foo <- function(x) x + 1 +#' x <- 0 +#' y <- foo(x) +#' ") +#' get_code(data, datanames = "y") +#' ``` +#' +#' In above case `y` depends on `z` and `foo` so the code returned by `get_code` will contain all three lines of code. +#' `get_code(data, datanames = "x")` will return only the second line of code etc. +#' \cr +#' There could be cases when the dependency tree is not obvious. For example: +#' ```r +#' data <- teal_data() |> +#' eval_code(" +#' foo <- function() { +#' env <- parent.frame() +#' env$x <- 0 +#' } +#' foo() +#' y <- x +#' ") +#' get_code(data, datanames = "y") +#' ``` +#' +#' In above case `y` depends on `x` but `x` is not created by assignment operator. In such cases +#' `get_code(data, y)` will only return the second line of the code. To overcome this limitation +#' add `# @linksto x` at the end of a line where a side-effect occurs to specify that this line +#' is required to reproduce a variable called `x`. So the code should look like: +#' +#' ```r +#' data <- teal_data() |> +#' eval_code(" +#' foo <- function() { +#' env <- parent.frame() +#' env$x <- 0 +#' } +#' foo() # @linksto x +#' y <- x +#' ") +#' get_code(data, datanames = "y") +#' ``` +#' #' #' @param object (`teal_data`) #' @param datanames `r lifecycle::badge("experimental")` (`character`) vector of data set names to return the code for. diff --git a/man/get_code-teal_data-method.Rd b/man/get_code-teal_data-method.Rd index bf4b92e20..0039d448b 100644 --- a/man/get_code-teal_data-method.Rd +++ b/man/get_code-teal_data-method.Rd @@ -26,10 +26,56 @@ Retrieve code stored in \verb{@code}, which (in principle) can be used to recrea Use \code{datanames} to limit the code to one or more of the data sets enumerated in \verb{@datanames}. If the code has not passed verification (with \code{\link[=verify]{verify()}}), a warning will be prepended. } -\section{Notes for Developers}{ +\section{Extracting dataset-specific code}{ -To learn more about how a subset of code needed to reproduce a specific data set is extracted from all code, -see \code{\link[=get_code_dependency]{get_code_dependency()}}. +When \code{datanames} is specified, the code returned will be limited to the code needed to create the +requested data sets. \code{code} stored in the \code{teal_data} object is statically analyzed to determine +dependency tree between each line of the code. Analysis is performed automatically basing on the +used symbols and it is working in a typical case when new dataset is created by assignment operator. +For example: + +\if{html}{\out{
}}\preformatted{data <- teal_data() |> + eval_code(" + foo <- function(x) x + 1 + x <- 0 + y <- foo(x) + ") +get_code(data, datanames = "y") +}\if{html}{\out{
}} + +In above case \code{y} depends on \code{z} and \code{foo} so the code returned by \code{get_code} will contain all three lines of code. +\code{get_code(data, datanames = "x")} will return only the second line of code etc. +\cr +There could be cases when the dependency tree is not obvious. For example: + +\if{html}{\out{
}}\preformatted{data <- teal_data() |> + eval_code(" + foo <- function() \{ + env <- parent.frame() + env$x <- 0 + \} + foo() + y <- x + ") +get_code(data, datanames = "y") +}\if{html}{\out{
}} + +In above case \code{y} depends on \code{x} but \code{x} is not created by assignment operator. In such cases +\code{get_code(data, y)} will only return the second line of the code. To overcome this limitation +add \verb{# @linksto x} at the end of a line where a side-effect occurs to specify that this line +is required to reproduce a variable called \code{x}. So the code should look like: + +\if{html}{\out{
}}\preformatted{data <- teal_data() |> + eval_code(" + foo <- function() \{ + env <- parent.frame() + env$x <- 0 + \} + foo() # @linksto x + y <- x + ") +get_code(data, datanames = "y") +}\if{html}{\out{
}} } \examples{ From 75a23639f65bb9eebd8c16a8048e7c7f150183a2 Mon Sep 17 00:00:00 2001 From: m7pr Date: Mon, 22 Jan 2024 11:24:17 +0100 Subject: [PATCH 09/36] do not return downstream effects for objects --- R/utils-get_code_dependency.R | 27 +++++++++++++-------------- tests/testthat/test-get_code.R | 22 ++++++++++++++++------ 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R index 42cd87f63..90d2c260b 100644 --- a/R/utils-get_code_dependency.R +++ b/R/utils-get_code_dependency.R @@ -199,7 +199,7 @@ extract_occurrence <- function(calls_pd) { data_call <- find_call(call_pd, "data") if (data_call) { sym <- call_pd[data_call + 1, "text"] - return(c(gsub("^['\"]|['\"]$", "", sym), "<-", "data")) + return(c(gsub("^['\"]|['\"]$", "", sym), "<-")) } # Handle assign(). assign_call <- find_call(call_pd, "assign") @@ -213,7 +213,7 @@ extract_occurrence <- function(calls_pd) { pos <- 1 } sym <- call_pd[assign_call + pos, "text"] - return(c(gsub("^['\"]|['\"]$", "", sym), "<-", "assign")) + return(c(gsub("^['\"]|['\"]$", "", sym), "<-")) } # What occurs in a function body is not tracked. @@ -232,22 +232,21 @@ extract_occurrence <- function(calls_pd) { sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar)) } - # If there was an assignment operation detect direction of it. ass_cond <- grep("ASSIGN", x$token) - if (length(ass_cond)) { # NOTE 1 - sym_cond <- sym_cond[sym_cond > ass_cond] # NOTE 2 + if (!length(ass_cond)) { + return(c("<-", unique(x[sym_cond, "text"]))) } - if ((length(ass_cond) && x$text[ass_cond] == "->") || !length(ass_cond)) { # NOTE 3 + + sym_cond <- sym_cond[sym_cond > ass_cond] # NOTE 1 + # If there was an assignment operation detect direction of it. + if (x$text[ass_cond] == "->") { # NOTE 2 sym_cond <- rev(sym_cond) } - ans <- unique(x[sym_cond, "text"]) - after <- if (length(x[sym_cond, "text"]) == 1 && x[sym_cond, "token"] == "SYMBOL_FUNCTION_CALL") 0 else 1 - append(ans, "<-", after = after) - - ### NOTE 3: What if there are 2+ assignments, e.g. a <- b -> c or e.g. a <- b <- c. - ### NOTE 2: For cases like 'eval(expression(b <- b + 2))' removes 'eval(expression('. - ### NOTE 1: Cases like 'data(iris)' that do not have an assignment operator. - ### NOTE 1: Then they are parsed as c("iris", "<-", "data") + + append(unique(x[sym_cond, "text"]), "<-", after = 1) + + ### NOTE 2: What if there are 2+ assignments, e.g. a <- b -> c or e.g. a <- b <- c. + ### NOTE 1: For cases like 'eval(expression(b <- b + 2))' removes 'eval(expression('. } ) } diff --git a/tests/testthat/test-get_code.R b/tests/testthat/test-get_code.R index 6b0d2ee59..00f1de9e5 100644 --- a/tests/testthat/test-get_code.R +++ b/tests/testthat/test-get_code.R @@ -40,6 +40,18 @@ testthat::test_that("get_code with datanames extracts code of a binding from cha ) }) +testthat::test_that("get_code with datanames extracts code of a binding from character vector containing simple code", { + code <- c( + "a <- 1", + "head(a)" + ) + tdata <- eval_code(teal_data(), code) + testthat::expect_identical( + get_code(tdata, datanames = "a"), + "a <- 1" + ) +}) + testthat::test_that("get_code works for datanames of length > 1", { code <- c( "a <- 1", @@ -133,7 +145,6 @@ testthat::test_that("get_code with datanames can extract the code when using <<- "b <<- b + 2" ) tdata <- eval_code(teal_data(), code) - datanames(tdata) <- c("a", "b") testthat::expect_identical( get_code(tdata, datanames = "b"), paste("a <- 1", "b <- a", "b <<- b + 2", sep = "\n") @@ -147,7 +158,6 @@ testthat::test_that("get_code with datanames detects every assign calls even if "eval(expression({b <- b + 2}))" ) tdata <- eval_code(teal_data(), code) - datanames(tdata) <- c("a", "b") testthat::expect_identical( get_code(tdata, datanames = "b"), paste("b <- 2", "eval(expression({\n b <- b + 2\n}))", sep = "\n") @@ -213,7 +223,7 @@ testthat::test_that("get_code with datanames can extract the code for assign fun testthat::test_that( "get_code with datanames can extract the code for assign function where \"x\" is variable", { - testthat::skip("We will tackle this some day!") + testthat::skip("We will not tackle this some day, as this require code evaluation.") code <- c( "x <- \"a\"", "assign(x, 5)", @@ -392,7 +402,7 @@ testthat::test_that("get_code with datanames ignores occurrence in function defi tdata <- eval_code(teal_data(), code) testthat::expect_identical( get_code(tdata, datanames = "x"), - paste("x <- 1", "print(x)", sep = "\n") + "x <- 1" ) }) @@ -436,7 +446,7 @@ testthat::test_that("get_code with datanames returns custom function calls on ob tdata <- eval_code(teal_data(), code) testthat::expect_identical( get_code(tdata, datanames = "b"), - paste("b <- 2", "foo <- function(b) {\n b <- b + 2\n}", "foo(b)", sep = "\n") + code[1] ) }) @@ -656,10 +666,10 @@ testthat::test_that( "iris2 <- head(iris)" ) tdata <- eval_code(teal_data(), code) - datanames(tdata) <- c("iris2") testthat::expect_identical( get_code(tdata, datanames = "iris2"), paste("data(iris)", "iris2 <- head(iris)", sep = "\n") ) } ) + From ea3f6f14e35b9730fe7320914be17f87687f3754 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 22 Jan 2024 10:26:23 +0000 Subject: [PATCH 10/36] [skip actions] Restyle files --- tests/testthat/test-get_code.R | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/testthat/test-get_code.R b/tests/testthat/test-get_code.R index 00f1de9e5..7498ec020 100644 --- a/tests/testthat/test-get_code.R +++ b/tests/testthat/test-get_code.R @@ -672,4 +672,3 @@ testthat::test_that( ) } ) - From 3bb617b0aacf2982b0f2e621a7447f0b58b5cdd9 Mon Sep 17 00:00:00 2001 From: m7pr Date: Mon, 22 Jan 2024 11:28:06 +0100 Subject: [PATCH 11/36] change name of the test --- tests/testthat/test-get_code.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-get_code.R b/tests/testthat/test-get_code.R index 00f1de9e5..a68f64d85 100644 --- a/tests/testthat/test-get_code.R +++ b/tests/testthat/test-get_code.R @@ -40,7 +40,7 @@ testthat::test_that("get_code with datanames extracts code of a binding from cha ) }) -testthat::test_that("get_code with datanames extracts code of a binding from character vector containing simple code", { +testthat::test_that("get_code with datanames extracts code without downstream usage", { code <- c( "a <- 1", "head(a)" From 614280da743bc409ebba59ad2839d41956942136 Mon Sep 17 00:00:00 2001 From: m7pr Date: Mon, 22 Jan 2024 11:28:48 +0100 Subject: [PATCH 12/36] typo --- tests/testthat/test-get_code.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-get_code.R b/tests/testthat/test-get_code.R index 98ea791f0..21557fff9 100644 --- a/tests/testthat/test-get_code.R +++ b/tests/testthat/test-get_code.R @@ -223,7 +223,7 @@ testthat::test_that("get_code with datanames can extract the code for assign fun testthat::test_that( "get_code with datanames can extract the code for assign function where \"x\" is variable", { - testthat::skip("We will not tackle this some day, as this require code evaluation.") + testthat::skip("We will not tackle this some day, as this requires code evaluation.") code <- c( "x <- \"a\"", "assign(x, 5)", From ad3a9af0e619f1ea941c717187ec1b00fe6e734e Mon Sep 17 00:00:00 2001 From: go_gonzo Date: Mon, 22 Jan 2024 11:59:29 +0100 Subject: [PATCH 13/36] fix docs --- R/teal_data-get_code.R | 35 ++++++++++++++++++-------------- man/get_code-teal_data-method.Rd | 35 ++++++++++++++++++-------------- 2 files changed, 40 insertions(+), 30 deletions(-) diff --git a/R/teal_data-get_code.R b/R/teal_data-get_code.R index 17dcb27e5..deeb5e60e 100644 --- a/R/teal_data-get_code.R +++ b/R/teal_data-get_code.R @@ -7,42 +7,47 @@ #' If the code has not passed verification (with [`verify()`]), a warning will be prepended. #' #' @section Extracting dataset-specific code: -#' When `datanames` is specified, the code returned will be limited to the code needed to create the -#' requested data sets. `code` stored in the `teal_data` object is statically analyzed to determine -#' dependency tree between each line of the code. Analysis is performed automatically basing on the -#' used symbols and it is working in a typical case when new dataset is created by assignment operator. +#' When `datanames` is specified `get_code` will limits the output only to the lines of code needed +#' to recreate the requested data sets. `code` stored in the `teal_data` object is analyzed statically +#' to determine dependency tree between each line of the code. Analysis is performed automatically +#' based on the used symbols and it is working in a standard case when a new dataset is created by +#' the assignment operator. +#' #' For example: #' ```r #' data <- teal_data() |> -#' eval_code(" +#' within( #' foo <- function(x) x + 1 #' x <- 0 #' y <- foo(x) -#' ") +#' ) #' get_code(data, datanames = "y") #' ``` #' -#' In above case `y` depends on `z` and `foo` so the code returned by `get_code` will contain all three lines of code. -#' `get_code(data, datanames = "x")` will return only the second line of code etc. +#' In above case `y` depends on `x` and `foo` so the code returned by `get_code` will contain all +#' three lines of code. `get_code(data, datanames = "x")` will return only the second line of code etc. #' \cr -#' There could be cases when the dependency tree is not obvious. For example: +#' When a code uses non-standard evaluation `get_code` won't be able to determine relationships +#' between each calls. Consider the case where `y` depends on `x` but `x` is not created by +#' assignment operator. In such cases `get_code(data, y)` will only return the second line of the code: #' ```r #' data <- teal_data() |> -#' eval_code(" +#' within( #' foo <- function() { #' env <- parent.frame() #' env$x <- 0 #' } #' foo() #' y <- x -#' ") +#' ) #' get_code(data, datanames = "y") #' ``` #' -#' In above case `y` depends on `x` but `x` is not created by assignment operator. In such cases -#' `get_code(data, y)` will only return the second line of the code. To overcome this limitation -#' add `# @linksto x` at the end of a line where a side-effect occurs to specify that this line -#' is required to reproduce a variable called `x`. So the code should look like: +#' To overcome limitation from above example `get_code` allows to specify dependencies manually. +#' Adding `# @linksto x` at the end of a line where a non-standard evaluation occurs will "flag" +#' this line as dependent on `x`. +#' NOTE: `expr` passed to `within` function discards comments. To add a code with comments to +#' `teal_data` object use `eval_code` function. #' #' ```r #' data <- teal_data() |> diff --git a/man/get_code-teal_data-method.Rd b/man/get_code-teal_data-method.Rd index 0039d448b..e85acc2c5 100644 --- a/man/get_code-teal_data-method.Rd +++ b/man/get_code-teal_data-method.Rd @@ -28,42 +28,47 @@ If the code has not passed verification (with \code{\link[=verify]{verify()}}), } \section{Extracting dataset-specific code}{ -When \code{datanames} is specified, the code returned will be limited to the code needed to create the -requested data sets. \code{code} stored in the \code{teal_data} object is statically analyzed to determine -dependency tree between each line of the code. Analysis is performed automatically basing on the -used symbols and it is working in a typical case when new dataset is created by assignment operator. +When \code{datanames} is specified \code{get_code} will limits the output only to the lines of code needed +to recreate the requested data sets. \code{code} stored in the \code{teal_data} object is analyzed statically +to determine dependency tree between each line of the code. Analysis is performed automatically +based on the used symbols and it is working in a standard case when a new dataset is created by +the assignment operator. + For example: \if{html}{\out{
}}\preformatted{data <- teal_data() |> - eval_code(" + within( foo <- function(x) x + 1 x <- 0 y <- foo(x) - ") + ) get_code(data, datanames = "y") }\if{html}{\out{
}} -In above case \code{y} depends on \code{z} and \code{foo} so the code returned by \code{get_code} will contain all three lines of code. -\code{get_code(data, datanames = "x")} will return only the second line of code etc. +In above case \code{y} depends on \code{x} and \code{foo} so the code returned by \code{get_code} will contain all +three lines of code. \code{get_code(data, datanames = "x")} will return only the second line of code etc. \cr -There could be cases when the dependency tree is not obvious. For example: +When a code uses non-standard evaluation \code{get_code} won't be able to determine relationships +between each calls. Consider the case where \code{y} depends on \code{x} but \code{x} is not created by +assignment operator. In such cases \code{get_code(data, y)} will only return the second line of the code: \if{html}{\out{
}}\preformatted{data <- teal_data() |> - eval_code(" + within( foo <- function() \{ env <- parent.frame() env$x <- 0 \} foo() y <- x - ") + ) get_code(data, datanames = "y") }\if{html}{\out{
}} -In above case \code{y} depends on \code{x} but \code{x} is not created by assignment operator. In such cases -\code{get_code(data, y)} will only return the second line of the code. To overcome this limitation -add \verb{# @linksto x} at the end of a line where a side-effect occurs to specify that this line -is required to reproduce a variable called \code{x}. So the code should look like: +To overcome limitation from above example \code{get_code} allows to specify dependencies manually. +Adding \verb{# @linksto x} at the end of a line where a untracked effect occurs will flag this line as +dependent on \code{x}. +NOTE: \code{within} accepts \code{expression} object only which discards comments. To add a code to +\code{teal_data} object along with comments use \code{eval_code} function. \if{html}{\out{
}}\preformatted{data <- teal_data() |> eval_code(" From f7766e11a9fc52817d26df71502a8010c616f15d Mon Sep 17 00:00:00 2001 From: go_gonzo Date: Mon, 22 Jan 2024 12:01:41 +0100 Subject: [PATCH 14/36] fix docs --- R/teal_data-get_code.R | 2 +- man/get_code-teal_data-method.Rd | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/teal_data-get_code.R b/R/teal_data-get_code.R index deeb5e60e..6b47736ab 100644 --- a/R/teal_data-get_code.R +++ b/R/teal_data-get_code.R @@ -13,7 +13,7 @@ #' based on the used symbols and it is working in a standard case when a new dataset is created by #' the assignment operator. #' -#' For example: +#' Consider the following example: #' ```r #' data <- teal_data() |> #' within( diff --git a/man/get_code-teal_data-method.Rd b/man/get_code-teal_data-method.Rd index e85acc2c5..1745c3236 100644 --- a/man/get_code-teal_data-method.Rd +++ b/man/get_code-teal_data-method.Rd @@ -34,7 +34,7 @@ to determine dependency tree between each line of the code. Analysis is performe based on the used symbols and it is working in a standard case when a new dataset is created by the assignment operator. -For example: +Consider the following example: \if{html}{\out{
}}\preformatted{data <- teal_data() |> within( @@ -65,10 +65,10 @@ get_code(data, datanames = "y") }\if{html}{\out{
}} To overcome limitation from above example \code{get_code} allows to specify dependencies manually. -Adding \verb{# @linksto x} at the end of a line where a untracked effect occurs will flag this line as -dependent on \code{x}. -NOTE: \code{within} accepts \code{expression} object only which discards comments. To add a code to -\code{teal_data} object along with comments use \code{eval_code} function. +Adding \verb{# @linksto x} at the end of a line where a non-standard evaluation occurs will "flag" +this line as dependent on \code{x}. +NOTE: \code{expr} passed to \code{within} function discards comments. To add a code with comments to +\code{teal_data} object use \code{eval_code} function. \if{html}{\out{
}}\preformatted{data <- teal_data() |> eval_code(" From 605dbd36983b0ddc4c469727aa676d321e79fc70 Mon Sep 17 00:00:00 2001 From: go_gonzo Date: Mon, 22 Jan 2024 13:06:26 +0100 Subject: [PATCH 15/36] simplify examples docs --- R/teal_data-get_code.R | 12 +++++++----- man/get_code-teal_data-method.Rd | 12 +++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/R/teal_data-get_code.R b/R/teal_data-get_code.R index 6b47736ab..74aa0824a 100644 --- a/R/teal_data-get_code.R +++ b/R/teal_data-get_code.R @@ -17,7 +17,9 @@ #' ```r #' data <- teal_data() |> #' within( -#' foo <- function(x) x + 1 +#' foo <- function(x) { +#' x + 1 +#' } #' x <- 0 #' y <- foo(x) #' ) @@ -34,9 +36,9 @@ #' data <- teal_data() |> #' within( #' foo <- function() { -#' env <- parent.frame() -#' env$x <- 0 +#' x <<- x + 1 #' } +#' x <- 0 #' foo() #' y <- x #' ) @@ -53,9 +55,9 @@ #' data <- teal_data() |> #' eval_code(" #' foo <- function() { -#' env <- parent.frame() -#' env$x <- 0 +#' x <<- x + 1 #' } +#' x <- 0 #' foo() # @linksto x #' y <- x #' ") diff --git a/man/get_code-teal_data-method.Rd b/man/get_code-teal_data-method.Rd index 1745c3236..9546d4ed5 100644 --- a/man/get_code-teal_data-method.Rd +++ b/man/get_code-teal_data-method.Rd @@ -38,7 +38,9 @@ Consider the following example: \if{html}{\out{
}}\preformatted{data <- teal_data() |> within( - foo <- function(x) x + 1 + foo <- function(x) \{ + x + 1 + \} x <- 0 y <- foo(x) ) @@ -55,9 +57,9 @@ assignment operator. In such cases \code{get_code(data, y)} will only return the \if{html}{\out{
}}\preformatted{data <- teal_data() |> within( foo <- function() \{ - env <- parent.frame() - env$x <- 0 + x <<- x + 1 \} + x <- 0 foo() y <- x ) @@ -73,9 +75,9 @@ NOTE: \code{expr} passed to \code{within} function discards comments. To add a c \if{html}{\out{
}}\preformatted{data <- teal_data() |> eval_code(" foo <- function() \{ - env <- parent.frame() - env$x <- 0 + x <<- x + 1 \} + x <- 0 foo() # @linksto x y <- x ") From a74707d208c31a92741a0dbaa2ffccfd81f34708 Mon Sep 17 00:00:00 2001 From: go_gonzo Date: Mon, 22 Jan 2024 13:42:33 +0100 Subject: [PATCH 16/36] another proposition --- R/teal_data-get_code.R | 26 ++++++++++++++------------ man/get_code-teal_data-method.Rd | 24 ++++++++++++------------ 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/R/teal_data-get_code.R b/R/teal_data-get_code.R index 74aa0824a..a19642975 100644 --- a/R/teal_data-get_code.R +++ b/R/teal_data-get_code.R @@ -14,40 +14,42 @@ #' the assignment operator. #' #' Consider the following example: +#' #' ```r #' data <- teal_data() |> -#' within( +#' within({ #' foo <- function(x) { #' x + 1 #' } #' x <- 0 #' y <- foo(x) -#' ) +#' }) #' get_code(data, datanames = "y") #' ``` #' -#' In above case `y` depends on `x` and `foo` so the code returned by `get_code` will contain all -#' three lines of code. `get_code(data, datanames = "x")` will return only the second line of code etc. +#' In above case `y` depends on `x` and `foo` so the code returned by `get_code(data, datanames = "y")` +#' will contain all three calls. `get_code(data, datanames = "x")` will return only the second call etc. #' \cr -#' When a code uses non-standard evaluation `get_code` won't be able to determine relationships -#' between each calls. Consider the case where `y` depends on `x` but `x` is not created by -#' assignment operator. In such cases `get_code(data, y)` will only return the second line of the code: +#' `get_code` isn't always able to properly assess dependencies between each calls and symbols in a +#' provided code. Consider the case where `y` depends on `x` but `x` is not created by assignment +#' operator. In such cases `get_code(data, datanames = "y")` will only return the last call: +#' #' ```r #' data <- teal_data() |> -#' within( +#' within({ #' foo <- function() { #' x <<- x + 1 #' } #' x <- 0 #' foo() #' y <- x -#' ) +#' }) #' get_code(data, datanames = "y") #' ``` #' -#' To overcome limitation from above example `get_code` allows to specify dependencies manually. -#' Adding `# @linksto x` at the end of a line where a non-standard evaluation occurs will "flag" -#' this line as dependent on `x`. +#' To overcome limitation from above example, `get_code` allows to specify dependencies manually. +#' Adding `# @linksto x` at the end of a line where unusual evaluation takes place will "flag" +#' this call as dependent on `x`. #' NOTE: `expr` passed to `within` function discards comments. To add a code with comments to #' `teal_data` object use `eval_code` function. #' diff --git a/man/get_code-teal_data-method.Rd b/man/get_code-teal_data-method.Rd index 9546d4ed5..f8f746f87 100644 --- a/man/get_code-teal_data-method.Rd +++ b/man/get_code-teal_data-method.Rd @@ -37,38 +37,38 @@ the assignment operator. Consider the following example: \if{html}{\out{
}}\preformatted{data <- teal_data() |> - within( + within(\{ foo <- function(x) \{ x + 1 \} x <- 0 y <- foo(x) - ) + \}) get_code(data, datanames = "y") }\if{html}{\out{
}} -In above case \code{y} depends on \code{x} and \code{foo} so the code returned by \code{get_code} will contain all -three lines of code. \code{get_code(data, datanames = "x")} will return only the second line of code etc. +In above case \code{y} depends on \code{x} and \code{foo} so the code returned by \code{get_code(data, datanames = "y")} +will contain all three calls. \code{get_code(data, datanames = "x")} will return only the second call etc. \cr -When a code uses non-standard evaluation \code{get_code} won't be able to determine relationships -between each calls. Consider the case where \code{y} depends on \code{x} but \code{x} is not created by -assignment operator. In such cases \code{get_code(data, y)} will only return the second line of the code: +\code{get_code} isn't always able to properly assess dependencies between each calls and symbols in a +provided code. Consider the case where \code{y} depends on \code{x} but \code{x} is not created by assignment +operator. In such cases \code{get_code(data, datanames = "y")} will only return the last call: \if{html}{\out{
}}\preformatted{data <- teal_data() |> - within( + within(\{ foo <- function() \{ x <<- x + 1 \} x <- 0 foo() y <- x - ) + \}) get_code(data, datanames = "y") }\if{html}{\out{
}} -To overcome limitation from above example \code{get_code} allows to specify dependencies manually. -Adding \verb{# @linksto x} at the end of a line where a non-standard evaluation occurs will "flag" -this line as dependent on \code{x}. +To overcome limitation from above example, \code{get_code} allows to specify dependencies manually. +Adding \verb{# @linksto x} at the end of a line where unusual evaluation takes place will "flag" +this call as dependent on \code{x}. NOTE: \code{expr} passed to \code{within} function discards comments. To add a code with comments to \code{teal_data} object use \code{eval_code} function. From 5dcf34fd071a7eb7212932006eef33c503e2f05b Mon Sep 17 00:00:00 2001 From: Aleksander Chlebowski Date: Tue, 23 Jan 2024 13:29:36 +0100 Subject: [PATCH 17/36] update documentation --- DESCRIPTION | 2 +- R/teal_data-get_code.R | 43 +++++++++---------- ...t_code-teal_data-method.Rd => get_code.Rd} | 42 +++++++++--------- 3 files changed, 44 insertions(+), 43 deletions(-) rename man/{get_code-teal_data-method.Rd => get_code.Rd} (57%) diff --git a/DESCRIPTION b/DESCRIPTION index 18d4d6561..42b667c16 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -51,7 +51,7 @@ Encoding: UTF-8 Language: en-US LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.0 +RoxygenNote: 7.3.1 Collate: 'cdisc_data.R' 'data.R' diff --git a/R/teal_data-get_code.R b/R/teal_data-get_code.R index a19642975..9bbc6d3bc 100644 --- a/R/teal_data-get_code.R +++ b/R/teal_data-get_code.R @@ -7,14 +7,14 @@ #' If the code has not passed verification (with [`verify()`]), a warning will be prepended. #' #' @section Extracting dataset-specific code: -#' When `datanames` is specified `get_code` will limits the output only to the lines of code needed -#' to recreate the requested data sets. `code` stored in the `teal_data` object is analyzed statically -#' to determine dependency tree between each line of the code. Analysis is performed automatically -#' based on the used symbols and it is working in a standard case when a new dataset is created by -#' the assignment operator. +#' When `datanames` is specified, the code returned will be limited to the lines needed to _create_ +#' the requested data sets. The code stored in the `@code` slot is analyzed statically to determine +#' which lines the datasets of interest depend upon. The analysis works best when objects are created +#' by using the standard assignment operator ,`<-`, and it can fail in some situations. #' -#' Consider the following example: +#' Consider the following examples: #' +#' _Case 1: Usual assignments._ #' ```r #' data <- teal_data() |> #' within({ @@ -26,14 +26,10 @@ #' }) #' get_code(data, datanames = "y") #' ``` +#' `x` has no dependencies, so `get_code(data, datanames = "x")` will return only the second call.\cr +#' `y` depends on `x` and `foo`, so `get_code(data, datanames = "y")` will contain all three calls. #' -#' In above case `y` depends on `x` and `foo` so the code returned by `get_code(data, datanames = "y")` -#' will contain all three calls. `get_code(data, datanames = "x")` will return only the second call etc. -#' \cr -#' `get_code` isn't always able to properly assess dependencies between each calls and symbols in a -#' provided code. Consider the case where `y` depends on `x` but `x` is not created by assignment -#' operator. In such cases `get_code(data, datanames = "y")` will only return the last call: -#' +#' _Case 2: Some objects are created by a function's side effects._ #' ```r #' data <- teal_data() |> #' within({ @@ -46,12 +42,12 @@ #' }) #' get_code(data, datanames = "y") #' ``` -#' -#' To overcome limitation from above example, `get_code` allows to specify dependencies manually. -#' Adding `# @linksto x` at the end of a line where unusual evaluation takes place will "flag" -#' this call as dependent on `x`. -#' NOTE: `expr` passed to `within` function discards comments. To add a code with comments to -#' `teal_data` object use `eval_code` function. +#' Here, `y` depends on `x` but `x` is modified by `foo` as a side effect (not by reassignment) +#' and so `get_code(data, datanames = "y")` will not return the `foo()` call.\cr +#' To overcome this limitation, code dependencies can be specified manually. +#' Lines where side effects occur can be flagged by adding "`# @linksto `" at the end.\cr +#' Note that `within` evaluates code passed to `expr` as is and comments are ignored. +#' In order to include comments in code one must use the `eval_code` function instead. #' #' ```r #' data <- teal_data() |> @@ -65,6 +61,7 @@ #' ") #' get_code(data, datanames = "y") #' ``` +#' Now, the `foo()` call will be properly included in the code required to recreate `y`. #' #' #' @param object (`teal_data`) @@ -73,10 +70,10 @@ #' `expression` (`deparse = FALSE`). #' #' @return -#' Either string or an expression representing code used to create the requested data sets. +#' Either a character string or an expression. If `datanames` is used to request a specific dataset, +#' only code that _creates_ that dataset (not code that uses it) is returned. Otherwise, all contents of `@code`. #' #' @examples -#' #' tdata1 <- teal_data() #' tdata1 <- within(tdata1, { #' a <- 1 @@ -90,8 +87,10 @@ #' tdata2 <- teal_data(x1 = iris, code = "x1 <- iris") #' get_code(tdata2) #' get_code(verify(tdata2)) +#' +#' @rdname get_code #' @aliases get_code,teal_data-method -#' @aliases get_code +#' #' @export setMethod("get_code", signature = "teal_data", definition = function(object, deparse = TRUE, datanames = NULL) { checkmate::assert_character(datanames, min.len = 1L, null.ok = TRUE) diff --git a/man/get_code-teal_data-method.Rd b/man/get_code.Rd similarity index 57% rename from man/get_code-teal_data-method.Rd rename to man/get_code.Rd index f8f746f87..fe04b57ea 100644 --- a/man/get_code-teal_data-method.Rd +++ b/man/get_code.Rd @@ -2,7 +2,6 @@ % Please edit documentation in R/teal_data-get_code.R \name{get_code,teal_data-method} \alias{get_code,teal_data-method} -\alias{get_code} \title{Get code from \code{teal_data} object} \usage{ \S4method{get_code}{teal_data}(object, deparse = TRUE, datanames = NULL) @@ -16,7 +15,8 @@ \item{datanames}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} (\code{character}) vector of data set names to return the code for.} } \value{ -Either string or an expression representing code used to create the requested data sets. +Either a character string or an expression. If \code{datanames} is used to request a specific dataset, +only code that \emph{creates} that dataset (not code that uses it) is returned. Otherwise, all contents of \verb{@code}. } \description{ Retrieve code from \code{teal_data} object. @@ -28,13 +28,14 @@ If the code has not passed verification (with \code{\link[=verify]{verify()}}), } \section{Extracting dataset-specific code}{ -When \code{datanames} is specified \code{get_code} will limits the output only to the lines of code needed -to recreate the requested data sets. \code{code} stored in the \code{teal_data} object is analyzed statically -to determine dependency tree between each line of the code. Analysis is performed automatically -based on the used symbols and it is working in a standard case when a new dataset is created by -the assignment operator. +When \code{datanames} is specified, the code returned will be limited to the lines needed to \emph{create} +the requested data sets. The code stored in the \verb{@code} slot is analyzed statically to determine +which lines the datasets of interest depend upon. The analysis works best when objects are created +by using the standard assignment operator ,\verb{<-}, and it can fail in some situations. -Consider the following example: +Consider the following examples: + +\emph{Case 1: Usual assignments.} \if{html}{\out{
}}\preformatted{data <- teal_data() |> within(\{ @@ -47,12 +48,10 @@ Consider the following example: get_code(data, datanames = "y") }\if{html}{\out{
}} -In above case \code{y} depends on \code{x} and \code{foo} so the code returned by \code{get_code(data, datanames = "y")} -will contain all three calls. \code{get_code(data, datanames = "x")} will return only the second call etc. -\cr -\code{get_code} isn't always able to properly assess dependencies between each calls and symbols in a -provided code. Consider the case where \code{y} depends on \code{x} but \code{x} is not created by assignment -operator. In such cases \code{get_code(data, datanames = "y")} will only return the last call: +\code{x} has no dependencies, so \code{get_code(data, datanames = "x")} will return only the second call.\cr +\code{y} depends on \code{x} and \code{foo}, so \code{get_code(data, datanames = "y")} will contain all three calls. + +\emph{Case 2: Some objects are created by a function's side effects.} \if{html}{\out{
}}\preformatted{data <- teal_data() |> within(\{ @@ -66,11 +65,12 @@ operator. In such cases \code{get_code(data, datanames = "y")} will only return get_code(data, datanames = "y") }\if{html}{\out{
}} -To overcome limitation from above example, \code{get_code} allows to specify dependencies manually. -Adding \verb{# @linksto x} at the end of a line where unusual evaluation takes place will "flag" -this call as dependent on \code{x}. -NOTE: \code{expr} passed to \code{within} function discards comments. To add a code with comments to -\code{teal_data} object use \code{eval_code} function. +Here, \code{y} depends on \code{x} but \code{x} is modified by \code{foo} as a side effect (not by reassignment) +and so \code{get_code(data, datanames = "y")} will not return the \code{foo()} call.\cr +To overcome this limitation, code dependencies can be specified manually. +Lines where side effects occur can be flagged by adding "\verb{# @linksto }" at the end.\cr +Note that \code{within} evaluates code passed to \code{expr} as is and comments are ignored. +In order to include comments in code one must use the \code{eval_code} function instead. \if{html}{\out{
}}\preformatted{data <- teal_data() |> eval_code(" @@ -83,10 +83,11 @@ NOTE: \code{expr} passed to \code{within} function discards comments. To add a c ") get_code(data, datanames = "y") }\if{html}{\out{
}} + +Now, the \code{foo()} call will be properly included in the code required to recreate \code{y}. } \examples{ - tdata1 <- teal_data() tdata1 <- within(tdata1, { a <- 1 @@ -100,4 +101,5 @@ get_code(tdata1, datanames = "b") tdata2 <- teal_data(x1 = iris, code = "x1 <- iris") get_code(tdata2) get_code(verify(tdata2)) + } From cf78609c7e9a7bbfa19c8b14dd433fc57bdc6b83 Mon Sep 17 00:00:00 2001 From: Marcin <133694481+m7pr@users.noreply.github.com> Date: Tue, 23 Jan 2024 14:00:47 +0100 Subject: [PATCH 18/36] 265 fix issue for using code in `quote` function with `get_code` (#266) Close #265 --- R/utils-get_code_dependency.R | 6 +++++- tests/testthat/test-get_code.R | 13 +++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R index 90d2c260b..f40fbced2 100644 --- a/R/utils-get_code_dependency.R +++ b/R/utils-get_code_dependency.R @@ -215,6 +215,10 @@ extract_occurrence <- function(calls_pd) { sym <- call_pd[assign_call + pos, "text"] return(c(gsub("^['\"]|['\"]$", "", sym), "<-")) } + quote_call <- find_call(call_pd, "quote") + if (quote_call) { + call_pd <- call_pd[-c(1:quote_call), ] + } # What occurs in a function body is not tracked. x <- call_pd[!is_in_function(call_pd), ] @@ -239,7 +243,7 @@ extract_occurrence <- function(calls_pd) { sym_cond <- sym_cond[sym_cond > ass_cond] # NOTE 1 # If there was an assignment operation detect direction of it. - if (x$text[ass_cond] == "->") { # NOTE 2 + if (unique(x$text[ass_cond]) == "->") { # NOTE 2 sym_cond <- rev(sym_cond) } diff --git a/tests/testthat/test-get_code.R b/tests/testthat/test-get_code.R index 21557fff9..632411fd7 100644 --- a/tests/testthat/test-get_code.R +++ b/tests/testthat/test-get_code.R @@ -187,6 +187,19 @@ testthat::test_that("get_code does not break if code is separated by ;", { ) }) +testthat::test_that("get_code does not break if code uses quote", { + code <- c( + "expr <- quote(x <- x + 1)", + "x <- 0", + "eval(expr)" + ) + tdata <- eval_code(teal_data(), code) + testthat::expect_identical( + get_code(tdata, datanames = "x"), + paste(code[1:2], collapse = "\n") + ) +}) + # assign ---------------------------------------------------------------------------------------------------------- testthat::test_that("get_code with datanames can extract the code for assign function", { From c33421741dfa5069190637ef6ff79c95da652ea6 Mon Sep 17 00:00:00 2001 From: Aleksander Chlebowski Date: Tue, 23 Jan 2024 14:33:08 +0100 Subject: [PATCH 19/36] update documentation --- DESCRIPTION | 2 +- R/teal_data-get_code.R | 14 +++++++++++--- man/get_code.Rd | 16 +++++++++++++--- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2b62c25e4..8940157f8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -51,7 +51,7 @@ Encoding: UTF-8 Language: en-US LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.0 +RoxygenNote: 7.3.1 Collate: 'cdisc_data.R' 'data.R' diff --git a/R/teal_data-get_code.R b/R/teal_data-get_code.R index 9bbc6d3bc..6e2875fd6 100644 --- a/R/teal_data-get_code.R +++ b/R/teal_data-get_code.R @@ -9,8 +9,8 @@ #' @section Extracting dataset-specific code: #' When `datanames` is specified, the code returned will be limited to the lines needed to _create_ #' the requested data sets. The code stored in the `@code` slot is analyzed statically to determine -#' which lines the datasets of interest depend upon. The analysis works best when objects are created -#' by using the standard assignment operator ,`<-`, and it can fail in some situations. +#' which lines the datasets of interest depend upon. The analysis works well when objects are created +#' with standard infix assignment operators (see `?assignOps`) but it can fail in some situations. #' #' Consider the following examples: #' @@ -61,7 +61,15 @@ #' ") #' get_code(data, datanames = "y") #' ``` -#' Now, the `foo()` call will be properly included in the code required to recreate `y`. +#' Now the `foo()` call will be properly included in the code required to recreate `y`. +#' +#' Note that two functions that create objects as side effects, `assign` and `data`, are handled automatically. +#' +#' Here are known cases where manual tagging is necessary: +#' - non-standard assignment operators, _e.g._ `%<>%` +#' - objects used as conditions in `if` statements: `if ()` +#' - objects used to iterate over in `for` loops: `for(i in )` +#' - evaluating expressions, _e.g._ `eval()` #' #' #' @param object (`teal_data`) diff --git a/man/get_code.Rd b/man/get_code.Rd index fe04b57ea..1dc28a92b 100644 --- a/man/get_code.Rd +++ b/man/get_code.Rd @@ -30,8 +30,8 @@ If the code has not passed verification (with \code{\link[=verify]{verify()}}), When \code{datanames} is specified, the code returned will be limited to the lines needed to \emph{create} the requested data sets. The code stored in the \verb{@code} slot is analyzed statically to determine -which lines the datasets of interest depend upon. The analysis works best when objects are created -by using the standard assignment operator ,\verb{<-}, and it can fail in some situations. +which lines the datasets of interest depend upon. The analysis works well when objects are created +with standard infix assignment operators (see \code{?assignOps}) but it can fail in some situations. Consider the following examples: @@ -84,7 +84,17 @@ In order to include comments in code one must use the \code{eval_code} function get_code(data, datanames = "y") }\if{html}{\out{}} -Now, the \code{foo()} call will be properly included in the code required to recreate \code{y}. +Now the \code{foo()} call will be properly included in the code required to recreate \code{y}. + +Note that two functions that create objects as side effects, \code{assign} and \code{data}, are handled automatically. + +Here are known cases where manual tagging is necessary: +\itemize{ +\item non-standard assignment operators, \emph{e.g.} \verb{\%<>\%} +\item objects used as conditions in \code{if} statements: \verb{if ()} +\item objects used to iterate over in \code{for} loops: \verb{for(i in )} +\item evaluating expressions, \emph{e.g.} \verb{eval()} +} } \examples{ From 43128b3cd76f0ce545e8dd094f171e11bf5783ab Mon Sep 17 00:00:00 2001 From: Marcin <133694481+m7pr@users.noreply.github.com> Date: Tue, 23 Jan 2024 15:24:38 +0100 Subject: [PATCH 20/36] Revert quote detection from 265 (#269) We went too far with the assumption of automated detection in here https://github.com/insightsengineering/teal.data/pull/266#issuecomment-1906057027 Reverting this one --- R/utils-get_code_dependency.R | 4 ---- tests/testthat/test-get_code.R | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R index f40fbced2..172e449ad 100644 --- a/R/utils-get_code_dependency.R +++ b/R/utils-get_code_dependency.R @@ -215,10 +215,6 @@ extract_occurrence <- function(calls_pd) { sym <- call_pd[assign_call + pos, "text"] return(c(gsub("^['\"]|['\"]$", "", sym), "<-")) } - quote_call <- find_call(call_pd, "quote") - if (quote_call) { - call_pd <- call_pd[-c(1:quote_call), ] - } # What occurs in a function body is not tracked. x <- call_pd[!is_in_function(call_pd), ] diff --git a/tests/testthat/test-get_code.R b/tests/testthat/test-get_code.R index 632411fd7..b262c93e7 100644 --- a/tests/testthat/test-get_code.R +++ b/tests/testthat/test-get_code.R @@ -196,7 +196,7 @@ testthat::test_that("get_code does not break if code uses quote", { tdata <- eval_code(teal_data(), code) testthat::expect_identical( get_code(tdata, datanames = "x"), - paste(code[1:2], collapse = "\n") + code[2] ) }) From 03fc5a4926684877a4071a34aa2d753dedda28b4 Mon Sep 17 00:00:00 2001 From: Aleksander Chlebowski Date: Tue, 23 Jan 2024 15:25:57 +0100 Subject: [PATCH 21/36] update documentation --- R/teal_data-get_code.R | 2 +- man/get_code.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/teal_data-get_code.R b/R/teal_data-get_code.R index 6e2875fd6..4663396ec 100644 --- a/R/teal_data-get_code.R +++ b/R/teal_data-get_code.R @@ -69,7 +69,7 @@ #' - non-standard assignment operators, _e.g._ `%<>%` #' - objects used as conditions in `if` statements: `if ()` #' - objects used to iterate over in `for` loops: `for(i in )` -#' - evaluating expressions, _e.g._ `eval()` +#' - creating and evaluating language objects, _e.g._ `eval()` #' #' #' @param object (`teal_data`) diff --git a/man/get_code.Rd b/man/get_code.Rd index 1dc28a92b..a0a5dfd93 100644 --- a/man/get_code.Rd +++ b/man/get_code.Rd @@ -93,7 +93,7 @@ Here are known cases where manual tagging is necessary: \item non-standard assignment operators, \emph{e.g.} \verb{\%<>\%} \item objects used as conditions in \code{if} statements: \verb{if ()} \item objects used to iterate over in \code{for} loops: \verb{for(i in )} -\item evaluating expressions, \emph{e.g.} \verb{eval()} +\item creating and evaluating language objects, \emph{e.g.} \verb{eval()} } } From 3f4b60c52f20af9a0faddcaa93f910852afd5672 Mon Sep 17 00:00:00 2001 From: Aleksander Chlebowski Date: Tue, 23 Jan 2024 15:34:51 +0100 Subject: [PATCH 22/36] unify dataset use --- R/teal_data-get_code.R | 6 +++--- man/get_code.Rd | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/teal_data-get_code.R b/R/teal_data-get_code.R index 4663396ec..a81a86105 100644 --- a/R/teal_data-get_code.R +++ b/R/teal_data-get_code.R @@ -3,12 +3,12 @@ #' Retrieve code from `teal_data` object. #' #' Retrieve code stored in `@code`, which (in principle) can be used to recreate all objects found in `@env`. -#' Use `datanames` to limit the code to one or more of the data sets enumerated in `@datanames`. +#' Use `datanames` to limit the code to one or more of the datasets enumerated in `@datanames`. #' If the code has not passed verification (with [`verify()`]), a warning will be prepended. #' #' @section Extracting dataset-specific code: #' When `datanames` is specified, the code returned will be limited to the lines needed to _create_ -#' the requested data sets. The code stored in the `@code` slot is analyzed statically to determine +#' the requested datasets. The code stored in the `@code` slot is analyzed statically to determine #' which lines the datasets of interest depend upon. The analysis works well when objects are created #' with standard infix assignment operators (see `?assignOps`) but it can fail in some situations. #' @@ -73,7 +73,7 @@ #' #' #' @param object (`teal_data`) -#' @param datanames `r lifecycle::badge("experimental")` (`character`) vector of data set names to return the code for. +#' @param datanames `r lifecycle::badge("experimental")` (`character`) vector of dataset names to return the code for. #' @param deparse (`logical`) flag specifying whether to return code as `character` (`deparse = TRUE`) or as #' `expression` (`deparse = FALSE`). #' diff --git a/man/get_code.Rd b/man/get_code.Rd index a0a5dfd93..53edf8d31 100644 --- a/man/get_code.Rd +++ b/man/get_code.Rd @@ -12,7 +12,7 @@ \item{deparse}{(\code{logical}) flag specifying whether to return code as \code{character} (\code{deparse = TRUE}) or as \code{expression} (\code{deparse = FALSE}).} -\item{datanames}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} (\code{character}) vector of data set names to return the code for.} +\item{datanames}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} (\code{character}) vector of dataset names to return the code for.} } \value{ Either a character string or an expression. If \code{datanames} is used to request a specific dataset, @@ -23,13 +23,13 @@ Retrieve code from \code{teal_data} object. } \details{ Retrieve code stored in \verb{@code}, which (in principle) can be used to recreate all objects found in \verb{@env}. -Use \code{datanames} to limit the code to one or more of the data sets enumerated in \verb{@datanames}. +Use \code{datanames} to limit the code to one or more of the datasets enumerated in \verb{@datanames}. If the code has not passed verification (with \code{\link[=verify]{verify()}}), a warning will be prepended. } \section{Extracting dataset-specific code}{ When \code{datanames} is specified, the code returned will be limited to the lines needed to \emph{create} -the requested data sets. The code stored in the \verb{@code} slot is analyzed statically to determine +the requested datasets. The code stored in the \verb{@code} slot is analyzed statically to determine which lines the datasets of interest depend upon. The analysis works well when objects are created with standard infix assignment operators (see \code{?assignOps}) but it can fail in some situations. From 12ac15761666889d4495dcdc8164016ea12d7bea Mon Sep 17 00:00:00 2001 From: Marcin <133694481+m7pr@users.noreply.github.com> Date: Wed, 24 Jan 2024 11:08:51 +0100 Subject: [PATCH 23/36] 271 fix `assign` function detection for cases with more than 2 arguments (#272) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Close #271 --------- Signed-off-by: Marcin <133694481+m7pr@users.noreply.github.com> Co-authored-by: André Veríssimo <211358+averissimo@users.noreply.github.com> Co-authored-by: Aleksander Chlebowski <114988527+chlebowa@users.noreply.github.com> --- R/utils-get_code_dependency.R | 49 +++++++++++++++++++++++++++++----- tests/testthat/test-get_code.R | 47 +++++++++++++++++++++++--------- 2 files changed, 77 insertions(+), 19 deletions(-) diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R index 172e449ad..3c5ac060e 100644 --- a/R/utils-get_code_dependency.R +++ b/R/utils-get_code_dependency.R @@ -101,7 +101,8 @@ find_call <- function(call_pd, text) { extract_calls <- function(pd) { calls <- lapply(pd[pd$parent == 0, "id"], get_children, pd = pd) calls <- Filter(Negate(is.null), calls) - fix_comments(calls) + calls <- fix_comments(calls) + fix_arrows(calls) } #' @keywords internal @@ -133,6 +134,23 @@ fix_comments <- function(calls) { calls } +#' Fixes edge case of `<-` assignment operator being called as function, +#' which is \code{`<-`(y,x)} instead of traditional `y <- x`. +#' @keywords internal +#' @noRd +fix_arrows <- function(calls) { + lapply( + calls, + function(call) { + call[call$token == 'SYMBOL_FUNCTION_CALL' & call$text == '`<-`', c("token", "text")] <- c("LEFT_ASSIGN", "<-") + call[call$token == 'SYMBOL_FUNCTION_CALL' & call$text == '`->`', c("token", "text")] <- c("RIGHT_ASSIGN", "->") + call[call$token == 'SYMBOL_FUNCTION_CALL' & call$text == '`<<-`', c("token", "text")] <- c("LEFT_ASSIGN", "<-") + call[call$token == 'SYMBOL_FUNCTION_CALL' & call$text == '`->>`', c("token", "text")] <- c("RIGHT_ASSIGN", "->") + call[call$token == 'SYMBOL_FUNCTION_CALL' & call$text == '`=`', c("token", "text")] <- c("LEFT_ASSIGN", "<-") + call + }) +} + # code_graph ---- #' Create object dependencies graph within parsed code @@ -201,13 +219,32 @@ extract_occurrence <- function(calls_pd) { sym <- call_pd[data_call + 1, "text"] return(c(gsub("^['\"]|['\"]$", "", sym), "<-")) } - # Handle assign(). + # Handle assign(x = ). assign_call <- find_call(call_pd, "assign") if (assign_call) { # Check if parameters were named. + # "','" is for unnamed parameters, where "SYMBOL_SUB" is for named. + # "EQ_SUB" is for `=` appearing after the name of the named parameter. if (any(call_pd$token == "SYMBOL_SUB")) { - params <- call_pd[call_pd$token == "SYMBOL_SUB", "text"] - pos <- match("x", params, nomatch = length(params) + 1L) + params <- call_pd[call_pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"] + # Remove sequence of "=", ",". + if (length(params >1)) { + remove <- integer(0) + for (i in 2:length(params)) { + if (params[i-1] == "=" & params[i] == ",") { + remove <- c(remove, i-1, i) + } + } + if (length(remove)) params <- params[-remove] + } + pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0)) + if (!pos) return(character(0L)) + # pos is indicator of the place of 'x' + # 1. All parameters are named, but none is 'x' - return(character(0L)) + # 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ",")) + # - check "x" in params being just a vector of named parameters. + # 3. Some parameters are named, 'x' is not in named parameters + # - check first appearance of "," (unnamed parameter) in vector parameters. } else { # Object is the first entry after 'assign'. pos <- 1 @@ -221,7 +258,7 @@ extract_occurrence <- function(calls_pd) { sym_cond <- which(x$token %in% c("SYMBOL", "SYMBOL_FUNCTION_CALL")) if (length(sym_cond) == 0) { - return(character(0)) + return(character(0L)) } # Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not. # For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2. @@ -245,7 +282,7 @@ extract_occurrence <- function(calls_pd) { append(unique(x[sym_cond, "text"]), "<-", after = 1) - ### NOTE 2: What if there are 2+ assignments, e.g. a <- b -> c or e.g. a <- b <- c. + ### NOTE 2: What if there are 2 assignments: e.g. a <- b -> c. ### NOTE 1: For cases like 'eval(expression(b <- b + 2))' removes 'eval(expression('. } ) diff --git a/tests/testthat/test-get_code.R b/tests/testthat/test-get_code.R index b262c93e7..ed1669149 100644 --- a/tests/testthat/test-get_code.R +++ b/tests/testthat/test-get_code.R @@ -125,19 +125,6 @@ testthat::test_that("get_code with datanames extracts code of a parent binding i ) }) -testthat::test_that("get_code with datanames is possible to output the code for multiple objects", { - code <- c( - "a <- 1", - "b <- 2", - "c <- 3" - ) - tdata <- eval_code(teal_data(), code) - testthat::expect_identical( - get_code(tdata, datanames = c("a", "b")), - paste(code[1:2], collapse = "\n") - ) -}) - testthat::test_that("get_code with datanames can extract the code when using <<-", { code <- c( "a <- 1", @@ -264,6 +251,40 @@ testthat::test_that("@linksto tag indicate affected object if object is assigned ) }) +testthat::test_that("get_code works for assign detection no matter how many parametrers were provided in assign", { + code <- c( + "x <- 1", + "assign(\"x\", 0, envir = environment())", + "assign(inherits = FALSE, immediate = TRUE, \"z\", 5, envir = environment())", + "y <- x + z", + "y <- x" + ) + + tdata <- eval_code(teal_data(), code) + + testthat::expect_identical( + get_code(tdata, datanames = "y"), + paste(code, collapse = "\n") + ) + +}) + +testthat::test_that("get_code detects function usage of assignment operator", { + code <- c( + "x <- 1", + "`<-`(y,x)" + ) + + tdata <- eval_code(teal_data(), code) + + testthat::expect_identical( + get_code(tdata, datanames = "y"), + paste(c(code[1], "y <- x"), collapse = "\n") + ) + +}) + + # @linksto --------------------------------------------------------------------------------------------------------- testthat::test_that("@linksto cause to return this line for affected binding", { From 6458b6efd684207fb87fe9b2e88249d8d36f097e Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 10:10:53 +0000 Subject: [PATCH 24/36] [skip actions] Restyle files --- R/utils-get_code_dependency.R | 23 +++++++++++++---------- tests/testthat/test-get_code.R | 2 -- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R index 3c5ac060e..b18f3b6f0 100644 --- a/R/utils-get_code_dependency.R +++ b/R/utils-get_code_dependency.R @@ -142,13 +142,14 @@ fix_arrows <- function(calls) { lapply( calls, function(call) { - call[call$token == 'SYMBOL_FUNCTION_CALL' & call$text == '`<-`', c("token", "text")] <- c("LEFT_ASSIGN", "<-") - call[call$token == 'SYMBOL_FUNCTION_CALL' & call$text == '`->`', c("token", "text")] <- c("RIGHT_ASSIGN", "->") - call[call$token == 'SYMBOL_FUNCTION_CALL' & call$text == '`<<-`', c("token", "text")] <- c("LEFT_ASSIGN", "<-") - call[call$token == 'SYMBOL_FUNCTION_CALL' & call$text == '`->>`', c("token", "text")] <- c("RIGHT_ASSIGN", "->") - call[call$token == 'SYMBOL_FUNCTION_CALL' & call$text == '`=`', c("token", "text")] <- c("LEFT_ASSIGN", "<-") + call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`<-`", c("token", "text")] <- c("LEFT_ASSIGN", "<-") + call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`->`", c("token", "text")] <- c("RIGHT_ASSIGN", "->") + call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`<<-`", c("token", "text")] <- c("LEFT_ASSIGN", "<-") + call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`->>`", c("token", "text")] <- c("RIGHT_ASSIGN", "->") + call[call$token == "SYMBOL_FUNCTION_CALL" & call$text == "`=`", c("token", "text")] <- c("LEFT_ASSIGN", "<-") call - }) + } + ) } # code_graph ---- @@ -228,17 +229,19 @@ extract_occurrence <- function(calls_pd) { if (any(call_pd$token == "SYMBOL_SUB")) { params <- call_pd[call_pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"] # Remove sequence of "=", ",". - if (length(params >1)) { + if (length(params > 1)) { remove <- integer(0) for (i in 2:length(params)) { - if (params[i-1] == "=" & params[i] == ",") { - remove <- c(remove, i-1, i) + if (params[i - 1] == "=" & params[i] == ",") { + remove <- c(remove, i - 1, i) } } if (length(remove)) params <- params[-remove] } pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0)) - if (!pos) return(character(0L)) + if (!pos) { + return(character(0L)) + } # pos is indicator of the place of 'x' # 1. All parameters are named, but none is 'x' - return(character(0L)) # 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ",")) diff --git a/tests/testthat/test-get_code.R b/tests/testthat/test-get_code.R index ed1669149..bbc1d6772 100644 --- a/tests/testthat/test-get_code.R +++ b/tests/testthat/test-get_code.R @@ -266,7 +266,6 @@ testthat::test_that("get_code works for assign detection no matter how many para get_code(tdata, datanames = "y"), paste(code, collapse = "\n") ) - }) testthat::test_that("get_code detects function usage of assignment operator", { @@ -281,7 +280,6 @@ testthat::test_that("get_code detects function usage of assignment operator", { get_code(tdata, datanames = "y"), paste(c(code[1], "y <- x"), collapse = "\n") ) - }) From 127000e2761ca9d8d3849fb07c6e01289be39777 Mon Sep 17 00:00:00 2001 From: m7pr Date: Wed, 24 Jan 2024 11:28:21 +0100 Subject: [PATCH 25/36] Empty-Commit From 1ce4ad20ed957d2c7890a82dd8f25f8a61c4f49d Mon Sep 17 00:00:00 2001 From: m7pr Date: Wed, 24 Jan 2024 11:52:27 +0100 Subject: [PATCH 26/36] cleanup tests --- tests/testthat/test-get_code.R | 170 +++++++-------------------------- 1 file changed, 36 insertions(+), 134 deletions(-) diff --git a/tests/testthat/test-get_code.R b/tests/testthat/test-get_code.R index bbc1d6772..a79e0ed5b 100644 --- a/tests/testthat/test-get_code.R +++ b/tests/testthat/test-get_code.R @@ -1,6 +1,6 @@ warning_message <- "warning('Code was not verified for reproducibility.')" -testthat::test_that("get_code with datanames handles empty @code slot", { +testthat::test_that("handles empty @code slot", { testthat::expect_identical( get_code(teal_data(a = 1, code = character(0)), datanames = "a"), warning_message @@ -11,7 +11,7 @@ testthat::test_that("get_code with datanames handles empty @code slot", { ) }) -testthat::test_that("get_code with datanames handles code without symbols in RHS", { +testthat::test_that("handles the code without symbols on rhs", { code <- c( "1 + 1", "a <- 5", @@ -24,7 +24,7 @@ testthat::test_that("get_code with datanames handles code without symbols in RHS ) }) -testthat::test_that("get_code with datanames extracts code of a binding from character vector containing simple code", { +testthat::test_that("extracts the code of a binding from character vector containing simple code", { code <- c( "a <- 1", "b <- 2" @@ -40,7 +40,7 @@ testthat::test_that("get_code with datanames extracts code of a binding from cha ) }) -testthat::test_that("get_code with datanames extracts code without downstream usage", { +testthat::test_that("extracts the code without downstream usage", { code <- c( "a <- 1", "head(a)" @@ -52,7 +52,7 @@ testthat::test_that("get_code with datanames extracts code without downstream us ) }) -testthat::test_that("get_code works for datanames of length > 1", { +testthat::test_that("works for datanames of length > 1", { code <- c( "a <- 1", "b <- 2" @@ -64,7 +64,7 @@ testthat::test_that("get_code works for datanames of length > 1", { ) }) -testthat::test_that("get_code with datanames warns if binding doesn't exist in code", { +testthat::test_that("warns if binding doesn't exist in code", { code <- c("a <- 1") tdata <- eval_code(teal_data(), code) testthat::expect_warning( @@ -73,7 +73,7 @@ testthat::test_that("get_code with datanames warns if binding doesn't exist in c ) }) -testthat::test_that("get_code with datanames does not fall into a loop", { +testthat::test_that("does not fall into a loop", { code <- c( "a <- 1", "b <- a", @@ -96,9 +96,7 @@ testthat::test_that("get_code with datanames does not fall into a loop", { }) -testthat::test_that( - "get_code with datanames extracts code of a parent binding but only those evaluated before coocurence", - { +testthat::test_that("extracts code of a parent binding but only those evaluated before coocurence", { code <- c( "a <- 1", "b <- a", @@ -112,7 +110,7 @@ testthat::test_that( } ) -testthat::test_that("get_code with datanames extracts code of a parent binding if used as an arg in fun call", { +testthat::test_that("extracts the code of a parent binding if used as an arg in a function call", { code <- c( "a <- 1", "b <- identity(x = a)", @@ -125,7 +123,7 @@ testthat::test_that("get_code with datanames extracts code of a parent binding i ) }) -testthat::test_that("get_code with datanames can extract the code when using <<-", { +testthat::test_that("extracts the code when using <<-", { code <- c( "a <- 1", "b <- a", @@ -138,7 +136,7 @@ testthat::test_that("get_code with datanames can extract the code when using <<- ) }) -testthat::test_that("get_code with datanames detects every assign calls even if not evaluated", { +testthat::test_that("detects every assign calls even if not evaluated, if there is only one assignment in this line", { code <- c( "a <- 1", "b <- 2", @@ -151,7 +149,7 @@ testthat::test_that("get_code with datanames detects every assign calls even if ) }) -testthat::test_that("get_code returns result of length 1 for non-empty input", { +testthat::test_that("returns result of length 1 for non-empty input", { tdata1 <- teal_data() tdata1 <- within(tdata1, { a <- 1 @@ -163,7 +161,7 @@ testthat::test_that("get_code returns result of length 1 for non-empty input", { testthat::expect_length(get_code(tdata1, deparse = TRUE), 1) }) -testthat::test_that("get_code does not break if code is separated by ;", { +testthat::test_that("does not break if code is separated by ;", { code <- c( "a <- 1;a <- a + 1" ) @@ -174,7 +172,7 @@ testthat::test_that("get_code does not break if code is separated by ;", { ) }) -testthat::test_that("get_code does not break if code uses quote", { +testthat::test_that("does not break if code uses quote()", { code <- c( "expr <- quote(x <- x + 1)", "x <- 0", @@ -189,7 +187,7 @@ testthat::test_that("get_code does not break if code uses quote", { # assign ---------------------------------------------------------------------------------------------------------- -testthat::test_that("get_code with datanames can extract the code for assign function", { +testthat::test_that("extracts the code for assign() where \"x\" is a literal string", { code <- c( "a <- 1", "assign('b', 5)", @@ -220,10 +218,8 @@ testthat::test_that("get_code with datanames can extract the code for assign fun ) }) -testthat::test_that( - "get_code with datanames can extract the code for assign function where \"x\" is variable", - { - testthat::skip("We will not tackle this some day, as this requires code evaluation.") +testthat::test_that("extracts the code for assign() where \"x\" is variable", { + testthat::skip("We will not resolve this, as this requires code evaluation.") code <- c( "x <- \"a\"", "assign(x, 5)", @@ -237,21 +233,7 @@ testthat::test_that( } ) - -testthat::test_that("@linksto tag indicate affected object if object is assigned anywhere in a code", { - code <- c( - "a <- 1", - "assign('b', 5) # @linksto b", - "b <- b + 2" - ) - tdata <- eval_code(teal_data(), code) - testthat::expect_identical( - get_code(tdata, datanames = "b"), - paste("assign(\"b\", 5)", "b <- b + 2", sep = "\n") - ) -}) - -testthat::test_that("get_code works for assign detection no matter how many parametrers were provided in assign", { +testthat::test_that("works for assign() detection no matter how many parametrers were provided in assignq()", { code <- c( "x <- 1", "assign(\"x\", 0, envir = environment())", @@ -268,7 +250,7 @@ testthat::test_that("get_code works for assign detection no matter how many para ) }) -testthat::test_that("get_code detects function usage of assignment operator", { +testthat::test_that("detects function usage of the assignment operator", { code <- c( "x <- 1", "`<-`(y,x)" @@ -285,7 +267,7 @@ testthat::test_that("get_code detects function usage of assignment operator", { # @linksto --------------------------------------------------------------------------------------------------------- -testthat::test_that("@linksto cause to return this line for affected binding", { +testthat::test_that("@linksto makes a line being returned for an affected binding", { code <- " a <- 1 # @linksto b b <- 2 @@ -298,8 +280,8 @@ testthat::test_that("@linksto cause to return this line for affected binding", { }) testthat::test_that( - "@linksto returns this line for affected binding - even if object is not specificed/created in the same eval_code", + "@linksto returns the line for an affected binding + even if the object did not exist in the same iteration of eval_code", { code <- c( "a <- 1 # @linksto b", @@ -314,40 +296,7 @@ testthat::test_that( ) testthat::test_that( - "@linksto returns this line for affected binding - if object is not specificed in the same element of code", - { - code <- c( - "a <- 1 ", - "b <- 2 # @linksto a" - ) - tdata <- eval_code(teal_data(), code) - testthat::expect_identical( - get_code(tdata, datanames = "a"), - paste("a <- 1", "b <- 2", sep = "\n") - ) - } -) - -testthat::test_that( - "lines affecting parent evaluated after co-occurrence are not included in get_code with datanamesoutput", - { - code <- c( - "a <- 1", - "b <- a", - "a <- 3" - ) - tdata <- eval_code(teal_data(), code) - testthat::expect_identical( - get_code(tdata, datanames = "b"), - paste("a <- 1", "b <- a", sep = "\n") - ) - } -) - -testthat::test_that( - "lines affecting parent evaluated after co-occurrence are not included in get_code with datanamesoutput - when using @linksto", + "lines affecting parent evaluated after co-occurrence are not included in output when using @linksto", { code <- c( "a <- 1 ", @@ -368,23 +317,7 @@ testthat::test_that( ) testthat::test_that( - "@linksto gets extracted if it's a side-effect on a dependent object", - { - code <- " - iris[1:5, ] -> iris2 - iris_head <- head(iris) # @linksto iris2 - classes <- lapply(iris2, class) - " - tdata <- eval_code(teal_data(), code) - testthat::expect_identical( - get_code(tdata, datanames = "classes"), - paste("iris2 <- iris[1:5, ]", "iris_head <- head(iris)", "classes <- lapply(iris2, class)", sep = "\n") - ) - } -) - -testthat::test_that( - "@linksto gets extracted if it's a side-effect on a dependent object of a dependent object", + "@linksto gets extracted if it's a side-effect on a dependent object (even of a dependent object)", { code <- " iris[1:5, ] -> iris2 @@ -407,7 +340,7 @@ testthat::test_that( # functions ------------------------------------------------------------------------------------------------------- -testthat::test_that("get_code with datanames ignores occurrence in function definition", { +testthat::test_that("ignores occurrence in a function definition", { code <- c( "b <- 2", "foo <- function(b) { b <- b + 2 }" @@ -423,7 +356,7 @@ testthat::test_that("get_code with datanames ignores occurrence in function defi ) }) -testthat::test_that("get_code with datanames ignores occurrence in function definition in lapply", { +testthat::test_that("ignores occurrence in a function definition in lapply", { code <- c( "a <- list(a = 1, b = 2, c = 3)", "b <- lapply(a, FUN = function(x) { x <- x + 1 })", @@ -438,7 +371,7 @@ testthat::test_that("get_code with datanames ignores occurrence in function defi ) }) -testthat::test_that("get_code with datanames does not ignore occurrence in function body if object exsits in env", { +testthat::test_that("does not ignore occurrence in function body if object exsits in env", { skip("This is not urgent and can be ommitted with @linksto tag.") code <- c( "a <- list(a = 1, b = 2, c = 3)", @@ -453,7 +386,7 @@ testthat::test_that("get_code with datanames does not ignore occurrence in funct ) }) -testthat::test_that("get_code with datanames ignores occurrence in function definition without { curly brackets", { +testthat::test_that("ignores occurrence in function definition without { curly brackets", { code <- c( "b <- 2", "foo <- function(b) b <- b + 2 " @@ -469,20 +402,7 @@ testthat::test_that("get_code with datanames ignores occurrence in function defi ) }) -testthat::test_that("get_code with datanames returns custom function calls on object", { - code <- c( - "b <- 2", - "foo <- function(b) { b <- b + 2 }", - "foo(b)" - ) - tdata <- eval_code(teal_data(), code) - testthat::expect_identical( - get_code(tdata, datanames = "b"), - code[1] - ) -}) - -testthat::test_that("get_code with datanames detects occurrence of the function object", { +testthat::test_that("detects occurrence of the function object", { code <- c( "a <- 1", "b <- 2", @@ -496,9 +416,7 @@ testthat::test_that("get_code with datanames detects occurrence of the function ) }) -testthat::test_that( - "Can't detect occurrence of function definition when a formal is named the same as a function", - { +testthat::test_that( "detects occurrence of a function definition when a formal is named the same as a function", { code <- c( "x <- 1", "foo <- function(foo = 1) 'text'", @@ -512,9 +430,7 @@ testthat::test_that( } ) -testthat::test_that( - "get_code detects occurrence of function definition and @linksto usage", - { +testthat::test_that("detects occurrence of a function definition with a @linksto usage", { code <- c( " foo <- function() { @@ -537,7 +453,7 @@ testthat::test_that( ) # $ --------------------------------------------------------------------------------------------------------------- -testthat::test_that("get_code with datanames understands $ usage and do not treat rhs of $ as objects (only lhs)", { +testthat::test_that("understands $ usage and do not treat rhs of $ as objects (only lhs)", { code <- c( "x <- data.frame(a = 1:3)", "a <- data.frame(y = 1:3)", @@ -562,7 +478,7 @@ testthat::test_that("get_code with datanames understands $ usage and do not trea ) }) -testthat::test_that("get_code with datanames detects cooccurrence properly even if all objects are on rhs", { +testthat::test_that("detects cooccurrence properly even if all objects are on lhs", { code <- c( "a <- 1", "b <- list(c = 2)", @@ -578,7 +494,7 @@ testthat::test_that("get_code with datanames detects cooccurrence properly even # @ --------------------------------------------------------------------------------------------------------------- -testthat::test_that("get_code with datanames understands @ usage and do not treat rhs of @ as objects (only lhs)", { +testthat::test_that("understands @ usage and do not treat rhs of @ as objects (only lhs)", { code <- c( "setClass('aclass', slots = c(a = 'numeric', x = 'numeric', y = 'numeric')) # @linksto a x", "x <- new('aclass', a = 1:3, x = 1:3, y = 1:3)", @@ -642,7 +558,7 @@ testthat::test_that("library() and require() are always returned", { # data() ---------------------------------------------------------------------------------------------------------- -testthat::test_that("get_call returns data call for a datanames specified asis", { +testthat::test_that("data() call is returned when data name is provided as is", { code <- c( "set.seed(1)", "library(scda)", @@ -666,7 +582,7 @@ testthat::test_that("get_call returns data call for a datanames specified asis", ) }) -testthat::test_that("get_call data call is returned when data name is provided as character", { +testthat::test_that("data() call is returned when data name is provided as a character", { code <- c( "set.seed(1)", "library(scda)", @@ -690,17 +606,3 @@ testthat::test_that("get_call data call is returned when data name is provided a ) }) -testthat::test_that( - "get_code with datanames can extract the code for objects assigned with data function", - { - code <- c( - "data(iris)", - "iris2 <- head(iris)" - ) - tdata <- eval_code(teal_data(), code) - testthat::expect_identical( - get_code(tdata, datanames = "iris2"), - paste("data(iris)", "iris2 <- head(iris)", sep = "\n") - ) - } -) From a464e02d7dfb2845af927148d17abddcc15631d6 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 10:54:38 +0000 Subject: [PATCH 27/36] [skip actions] Restyle files --- tests/testthat/test-get_code.R | 103 ++++++++++++++++----------------- 1 file changed, 49 insertions(+), 54 deletions(-) diff --git a/tests/testthat/test-get_code.R b/tests/testthat/test-get_code.R index a79e0ed5b..35fb33fae 100644 --- a/tests/testthat/test-get_code.R +++ b/tests/testthat/test-get_code.R @@ -97,18 +97,17 @@ testthat::test_that("does not fall into a loop", { testthat::test_that("extracts code of a parent binding but only those evaluated before coocurence", { - code <- c( - "a <- 1", - "b <- a", - "a <- 2" - ) - tdata <- eval_code(teal_data(), code) - testthat::expect_identical( - get_code(tdata, datanames = "b"), - paste("a <- 1", "b <- a", sep = "\n") - ) - } -) + code <- c( + "a <- 1", + "b <- a", + "a <- 2" + ) + tdata <- eval_code(teal_data(), code) + testthat::expect_identical( + get_code(tdata, datanames = "b"), + paste("a <- 1", "b <- a", sep = "\n") + ) +}) testthat::test_that("extracts the code of a parent binding if used as an arg in a function call", { code <- c( @@ -219,19 +218,18 @@ testthat::test_that("extracts the code for assign() where \"x\" is a literal str }) testthat::test_that("extracts the code for assign() where \"x\" is variable", { - testthat::skip("We will not resolve this, as this requires code evaluation.") - code <- c( - "x <- \"a\"", - "assign(x, 5)", - "b <- a" - ) - tdata <- eval_code(teal_data(), code) - testthat::expect_identical( - get_code(tdata, datanames = "b"), - paste(code, collapse = "\n") - ) - } -) + testthat::skip("We will not resolve this, as this requires code evaluation.") + code <- c( + "x <- \"a\"", + "assign(x, 5)", + "b <- a" + ) + tdata <- eval_code(teal_data(), code) + testthat::expect_identical( + get_code(tdata, datanames = "b"), + paste(code, collapse = "\n") + ) +}) testthat::test_that("works for assign() detection no matter how many parametrers were provided in assignq()", { code <- c( @@ -416,41 +414,39 @@ testthat::test_that("detects occurrence of the function object", { ) }) -testthat::test_that( "detects occurrence of a function definition when a formal is named the same as a function", { - code <- c( - "x <- 1", - "foo <- function(foo = 1) 'text'", - "a <- foo(x)" - ) - tdata <- eval_code(teal_data(), code) - testthat::expect_identical( - get_code(tdata, datanames = "a"), - paste("x <- 1", "foo <- function(foo = 1) \"text\"", "a <- foo(x)", sep = "\n") - ) - } -) +testthat::test_that("detects occurrence of a function definition when a formal is named the same as a function", { + code <- c( + "x <- 1", + "foo <- function(foo = 1) 'text'", + "a <- foo(x)" + ) + tdata <- eval_code(teal_data(), code) + testthat::expect_identical( + get_code(tdata, datanames = "a"), + paste("x <- 1", "foo <- function(foo = 1) \"text\"", "a <- foo(x)", sep = "\n") + ) +}) testthat::test_that("detects occurrence of a function definition with a @linksto usage", { - code <- c( - " + code <- c( + " foo <- function() { env <- parent.frame() env$x <- 0 }", - "foo() # @linksto x", - "y <- x" - ) - tdata <- teal_data(code = code) - testthat::expect_identical( - get_code(tdata, datanames = "x"), - paste( - warning_message, - "foo <- function() {\n env <- parent.frame()\n env$x <- 0\n}\nfoo()", - sep = "\n" - ) + "foo() # @linksto x", + "y <- x" + ) + tdata <- teal_data(code = code) + testthat::expect_identical( + get_code(tdata, datanames = "x"), + paste( + warning_message, + "foo <- function() {\n env <- parent.frame()\n env$x <- 0\n}\nfoo()", + sep = "\n" ) - } -) + ) +}) # $ --------------------------------------------------------------------------------------------------------------- testthat::test_that("understands $ usage and do not treat rhs of $ as objects (only lhs)", { @@ -605,4 +601,3 @@ testthat::test_that("data() call is returned when data name is provided as a cha ) ) }) - From 8e5b211dd4da2dd03c2c74e7b8715e4bd516da25 Mon Sep 17 00:00:00 2001 From: m7pr Date: Wed, 24 Jan 2024 11:55:51 +0100 Subject: [PATCH 28/36] Empty-Commit From 5917a92c1d1f790a163de4faa1c062c05e6146e6 Mon Sep 17 00:00:00 2001 From: Marcin <133694481+m7pr@users.noreply.github.com> Date: Wed, 24 Jan 2024 13:04:17 +0100 Subject: [PATCH 29/36] 267 fix `@linksto` tag with `eval()` in last line of the evaluated code for `get_code` (#268) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Close #267 Used @gogonzo solution from this PR to fix this https://github.com/insightsengineering/teal.data/pull/262/files#diff-66a61facc4f5ca86215d63af40e2645b364950439051784916316ef78d5073c8R102-R110 --------- Signed-off-by: Marcin <133694481+m7pr@users.noreply.github.com> Co-authored-by: Dawid Kałędkowski <6959016+gogonzo@users.noreply.github.com> --- R/utils-get_code_dependency.R | 12 ++++++++++-- tests/testthat/test-get_code.R | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R index b18f3b6f0..318f348d5 100644 --- a/R/utils-get_code_dependency.R +++ b/R/utils-get_code_dependency.R @@ -99,7 +99,15 @@ find_call <- function(call_pd, text) { #' @keywords internal #' @noRd extract_calls <- function(pd) { - calls <- lapply(pd[pd$parent == 0, "id"], get_children, pd = pd) + calls <- lapply( + pd[pd$parent == 0, "id"], + function(parent) { + rbind( + pd[pd$id == parent, c("token", "text", "id", "parent")], + get_children(pd = pd, parent = parent) + ) + } + ) calls <- Filter(Negate(is.null), calls) calls <- fix_comments(calls) fix_arrows(calls) @@ -131,7 +139,7 @@ fix_comments <- function(calls) { } } } - calls + Filter(nrow, calls) } #' Fixes edge case of `<-` assignment operator being called as function, diff --git a/tests/testthat/test-get_code.R b/tests/testthat/test-get_code.R index 35fb33fae..b4e92939c 100644 --- a/tests/testthat/test-get_code.R +++ b/tests/testthat/test-get_code.R @@ -265,6 +265,22 @@ testthat::test_that("detects function usage of the assignment operator", { # @linksto --------------------------------------------------------------------------------------------------------- +testthat::test_that("get_code does not break if @linksto is put in the last line", { + # In some cases R parses comment as a separate expression so the comment is not + # directly associated with this line of code. This situation occurs when `eval` is in the last + # line of the code. Other cases are not known but are highly probable. + code <- c( + "expr <- quote(x <- x + 1)", + "x <- 0", + "eval(expr) #@linksto x" + ) + tdata <- eval_code(teal_data(), code) + testthat::expect_identical( + get_code(tdata, datanames = "x"), + paste(gsub(" #@linksto x", "", code, fixed = TRUE), collapse = "\n") + ) +}) + testthat::test_that("@linksto makes a line being returned for an affected binding", { code <- " a <- 1 # @linksto b From 4e5785e54bfc57303f0c5f4175880c00784de4c6 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 12:06:28 +0000 Subject: [PATCH 30/36] [skip actions] Restyle files --- tests/testthat/test-get_code.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-get_code.R b/tests/testthat/test-get_code.R index b4e92939c..85788da87 100644 --- a/tests/testthat/test-get_code.R +++ b/tests/testthat/test-get_code.R @@ -266,7 +266,7 @@ testthat::test_that("detects function usage of the assignment operator", { # @linksto --------------------------------------------------------------------------------------------------------- testthat::test_that("get_code does not break if @linksto is put in the last line", { - # In some cases R parses comment as a separate expression so the comment is not + # In some cases R parses comment as a separate expression so the comment is not # directly associated with this line of code. This situation occurs when `eval` is in the last # line of the code. Other cases are not known but are highly probable. code <- c( From aabd2b9c34304d293c18561e9e3abe13d5a10da3 Mon Sep 17 00:00:00 2001 From: go_gonzo Date: Wed, 24 Jan 2024 13:49:28 +0100 Subject: [PATCH 31/36] restart cicd From de59c1326283dc9ffdb308d1ac4fb9705697a0a7 Mon Sep 17 00:00:00 2001 From: Marcin <133694481+m7pr@users.noreply.github.com> Date: Wed, 24 Jan 2024 13:56:40 +0100 Subject: [PATCH 32/36] Update R/teal_data-get_code.R MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Dawid Kałędkowski <6959016+gogonzo@users.noreply.github.com> Signed-off-by: Marcin <133694481+m7pr@users.noreply.github.com> --- R/teal_data-get_code.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/teal_data-get_code.R b/R/teal_data-get_code.R index a81a86105..4d83b9889 100644 --- a/R/teal_data-get_code.R +++ b/R/teal_data-get_code.R @@ -74,6 +74,7 @@ #' #' @param object (`teal_data`) #' @param datanames `r lifecycle::badge("experimental")` (`character`) vector of dataset names to return the code for. +For more details see the "Extracting dataset-specific code" section. #' @param deparse (`logical`) flag specifying whether to return code as `character` (`deparse = TRUE`) or as #' `expression` (`deparse = FALSE`). #' From 34ebcaf7a41335d86a46afacd32fd10a022acf90 Mon Sep 17 00:00:00 2001 From: m7pr Date: Wed, 24 Jan 2024 14:22:23 +0100 Subject: [PATCH 33/36] missing comment --- R/teal_data-get_code.R | 2 +- man/get_code.Rd | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/R/teal_data-get_code.R b/R/teal_data-get_code.R index 4d83b9889..a1ccd274d 100644 --- a/R/teal_data-get_code.R +++ b/R/teal_data-get_code.R @@ -74,7 +74,7 @@ #' #' @param object (`teal_data`) #' @param datanames `r lifecycle::badge("experimental")` (`character`) vector of dataset names to return the code for. -For more details see the "Extracting dataset-specific code" section. +#' For more details see the "Extracting dataset-specific code" section. #' @param deparse (`logical`) flag specifying whether to return code as `character` (`deparse = TRUE`) or as #' `expression` (`deparse = FALSE`). #' diff --git a/man/get_code.Rd b/man/get_code.Rd index 53edf8d31..1c36c1d5a 100644 --- a/man/get_code.Rd +++ b/man/get_code.Rd @@ -12,7 +12,8 @@ \item{deparse}{(\code{logical}) flag specifying whether to return code as \code{character} (\code{deparse = TRUE}) or as \code{expression} (\code{deparse = FALSE}).} -\item{datanames}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} (\code{character}) vector of dataset names to return the code for.} +\item{datanames}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} (\code{character}) vector of dataset names to return the code for. +For more details see the "Extracting dataset-specific code" section.} } \value{ Either a character string or an expression. If \code{datanames} is used to request a specific dataset, From 14f1099e68fbcd646b11321eb36f66372afbed9f Mon Sep 17 00:00:00 2001 From: m7pr Date: Wed, 24 Jan 2024 14:51:03 +0100 Subject: [PATCH 34/36] fix issue with ; usage in code --- R/utils-get_code_dependency.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R index 318f348d5..aeb9d9c35 100644 --- a/R/utils-get_code_dependency.R +++ b/R/utils-get_code_dependency.R @@ -108,6 +108,7 @@ extract_calls <- function(pd) { ) } ) + calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls) calls <- Filter(Negate(is.null), calls) calls <- fix_comments(calls) fix_arrows(calls) From 3ef0707f85b627e74fd4c0624f63fbf1bcdc8ea8 Mon Sep 17 00:00:00 2001 From: m7pr Date: Wed, 24 Jan 2024 15:00:06 +0100 Subject: [PATCH 35/36] latest change in get_children appends a new row in calls_pd with the call. fix_comments needs to be changed since it assumed first row contains comments that were suprisingly pushed from other call to another one. now fix_comments needs to verify two rows --- R/utils-get_code_dependency.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R index aeb9d9c35..83bf415b3 100644 --- a/R/utils-get_code_dependency.R +++ b/R/utils-get_code_dependency.R @@ -134,9 +134,14 @@ fix_comments <- function(calls) { # If the first token is a COMMENT, then it belongs to the previous call. if (length(calls) >= 2) { for (i in 2:length(calls)) { - if (grepl("@linksto", calls[[i]][1, "text"])) { + comments_first <- grepl("@linksto", calls[[i]][1, "text"]) + comments_second <- grepl("@linksto", calls[[i]][2, "text"]) && calls[[i]][1, "text"] == "" + if (comments_first) { calls[[i - 1]] <- rbind(calls[[i - 1]], calls[[i]][1, ]) calls[[i]] <- calls[[i]][-1, ] + } else if (comments_second) { + calls[[i - 1]] <- rbind(calls[[i - 1]], calls[[i]][1:2, ]) + calls[[i]] <- calls[[i]][-c(1:2), ] } } } From 1cb250caf5703edcc99ebdcdf89b6dc446649a30 Mon Sep 17 00:00:00 2001 From: go_gonzo Date: Wed, 24 Jan 2024 15:51:36 +0100 Subject: [PATCH 36/36] simplify code --- R/utils-get_code_dependency.R | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R index 83bf415b3..98fd8926c 100644 --- a/R/utils-get_code_dependency.R +++ b/R/utils-get_code_dependency.R @@ -110,7 +110,7 @@ extract_calls <- function(pd) { ) calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls) calls <- Filter(Negate(is.null), calls) - calls <- fix_comments(calls) + calls <- fix_shifted_comments(calls) fix_arrows(calls) } @@ -128,20 +128,21 @@ get_children <- function(pd, parent) { } } +#' Fixes edge case of comments being shifted to the next call. #' @keywords internal #' @noRd -fix_comments <- function(calls) { - # If the first token is a COMMENT, then it belongs to the previous call. +fix_shifted_comments <- function(calls) { + # If the first or the second token is a @linksto COMMENT, + # then it belongs to the previous call. if (length(calls) >= 2) { for (i in 2:length(calls)) { - comments_first <- grepl("@linksto", calls[[i]][1, "text"]) - comments_second <- grepl("@linksto", calls[[i]][2, "text"]) && calls[[i]][1, "text"] == "" - if (comments_first) { - calls[[i - 1]] <- rbind(calls[[i - 1]], calls[[i]][1, ]) - calls[[i]] <- calls[[i]][-1, ] - } else if (comments_second) { - calls[[i - 1]] <- rbind(calls[[i - 1]], calls[[i]][1:2, ]) - calls[[i]] <- calls[[i]][-c(1:2), ] + comment_idx <- grep("@linksto", calls[[i]][, "text"]) + if (isTRUE(comment_idx[1] <= 2)) { + calls[[i - 1]] <- rbind( + calls[[i - 1]], + calls[[i]][seq_len(comment_idx[1]), ] + ) + calls[[i]] <- calls[[i]][-seq_len(comment_idx[1]), ] } } }