diff --git a/main/coverage-report/index.html b/main/coverage-report/index.html index 0738474da..0c4139d8d 100644 --- a/main/coverage-report/index.html +++ b/main/coverage-report/index.html @@ -94,7 +94,7 @@ font-size: 11px; }
1 |
- #' Deprecated `TealData` class and related functions+ #' Verify code reproducibility |
||
3 |
- #' @description+ #' Checks whether code in `teal_data` object reproduces the stored objects. |
||
4 |
- #' `r lifecycle::badge("deprecated")`+ #' |
||
5 |
- #'+ #' If objects created by code in the `@code` slot of `x` are `all_equal` to the contents of the `@env` slot, |
||
6 |
- #' The `TealData` class and associated functions have been deprecated. Use [teal_data()] instead.+ #' the function updates the `@verified` slot to `TRUE` in the returned `teal_data` object. |
||
7 |
- #' See the [Migration guide](https://github.com/insightsengineering/teal/discussions/945) for details.+ #' Once verified, the slot will always be set to `TRUE`. |
||
8 |
- #'+ #' If the `@code` fails to recreate objects in `teal_data@env`, an error is raised. |
||
9 |
- #' @name TealData+ #' |
||
10 |
- #'+ #' @return Input `teal_data` object or error. |
||
11 |
- #' @param ... any argument supported in `TealData` related functions.+ #' |
||
12 |
- #'+ #' @param x `teal_data` object |
||
13 |
- #' @return nothing+ #' @examples |
||
14 |
- #' @seealso [cdisc_data()] , [join_keys()]+ #' tdata1 <- teal_data() |
||
15 |
- #'+ #' tdata1 <- within(tdata1, { |
||
16 |
- NULL+ #' a <- 1 |
||
17 |
-
+ #' b <- a^5 |
||
18 |
- .deprecate_function <- function(what, details) {+ #' c <- list(x = 2) |
||
19 | -! | +
- lifecycle::deprecate_stop(+ #' }) |
|
20 | -! | +
- when = "0.4.0",+ #' verify(tdata1) |
|
21 | -! | +
- what = what,+ #' |
|
22 | -! | +
- details = details+ #' tdata2 <- teal_data(x1 = iris, code = "x1 <- iris") |
|
23 |
- )+ #' verify(tdata2) |
||
24 |
- }+ #' verify(tdata2)@verified |
||
25 |
-
+ #' tdata2@verified |
||
26 |
- deprecation_detail <- "Find more information on https://github.com/insightsengineering/teal/discussions/945"+ #' |
||
27 |
-
+ #' tdata3 <- teal_data() |
||
28 |
- #' @rdname TealData+ #' tdata3 <- within(tdata3, { |
||
29 |
- #' @export+ #' stop("error") |
||
30 |
- as_cdisc <- function(...) {+ #' }) |
||
31 | -! | +
- .deprecate_function("as_cdisc()", deprecation_detail)+ #' try(verify(tdata3)) # fails |
|
32 |
- }+ #' |
||
33 |
-
+ #' |
||
34 |
- #' @rdname TealData+ #' a <- 1 |
||
35 |
- #' @export+ #' b <- a + 2 |
||
36 |
- callable_code <- function(...) {+ #' c <- list(x = 2) |
||
37 | -! | +
- .deprecate_function("callable_code()", deprecation_detail)+ #' d <- 5 |
|
38 |
- }+ #' tdata4 <- teal_data( |
||
39 |
-
+ #' a = a, b = b, c = c, d = d, |
||
40 |
- #' @rdname TealData+ #' code = "a <- 1 |
||
41 |
- #' @export+ #' b <- a |
||
42 |
- callable_function <- function(...) {+ #' c <- list(x = 2) |
||
43 | -! | +
- .deprecate_function("callable_function()", deprecation_detail)+ #' e <- 1" |
|
44 |
- }+ #' ) |
||
45 |
-
+ #' tdata4 |
||
46 |
- #' @rdname TealData+ #' try(verify(tdata4)) # fails |
||
47 |
- #' @export+ #' |
||
48 |
- code_dataset_connector <- function(...) {+ #' @name verify |
||
49 | -! | +
- .deprecate_function("code_dataset_connector()", deprecation_detail)+ #' @rdname verify |
|
50 |
- }+ #' @aliases verify,teal_data-method |
||
51 |
-
+ #' @aliases verify,qenv.error-method |
||
52 |
- #' @rdname TealData+ #' |
||
54 | -+ | 5x |
- code_cdisc_dataset_connector <- function(...) {+ setGeneric("verify", function(x) standardGeneric("verify")) |
55 | -! | +
- .deprecate_function("code_cdisc_dataset_connector()", deprecation_detail)+ setMethod("verify", signature = "teal_data", definition = function(x) { |
|
56 | -+ | 4x |
- }+ if (x@verified) { |
57 | -+ | 2x |
-
+ return(x) |
58 |
- #' @rdname TealData+ } |
||
59 | -+ | 2x |
- #' @export+ x_name <- deparse(substitute(x)) |
60 | -+ | 2x |
- csv_dataset_connector <- function(...) {+ y <- eval_code(teal_data(), get_code(x)) |
61 | -! | +
- .deprecate_function("csv_dataset_connector()", deprecation_detail)+ |
|
62 | -+ | 2x |
- }+ if (inherits(y, "qenv.error")) { |
63 | -+ | ! |
-
+ stop(conditionMessage(y), call. = FALSE) |
64 |
- #' @rdname TealData+ } |
||
65 |
- #' @export+ |
||
66 | -+ | 2x |
- csv_cdisc_dataset_connector <- function(...) {+ reproduced <- isTRUE(all.equal(x@env, y@env)) |
67 | -! | +2x |
- .deprecate_function("csv_cdisc_dataset_connector()", deprecation_detail)+ if (reproduced) { |
68 | -+ | 1x |
- }+ x@verified <- TRUE |
69 | -+ | 1x |
-
+ methods::validObject(x) |
70 | -+ | 1x |
- #' @rdname TealData+ x |
71 |
- #' @export+ } else { |
||
72 | -+ | 1x |
- python_code <- function(...) {+ error <- "Code verification failed." |
73 | -! | +
- .deprecate_function("python_code()", deprecation_detail)+ |
|
74 | -+ | 1x |
- }+ objects_diff <- vapply( |
75 | -+ | 1x |
-
+ intersect(names(x@env), names(y@env)), |
76 | -+ | 1x |
- #' @rdname TealData+ function(element) { |
77 | -+ | 1x |
- #' @export+ isTRUE(all.equal(x@env[[element]], y@env[[element]])) |
78 |
- python_dataset_connector <- function(...) {+ }, |
||
79 | -! | +1x |
- .deprecate_function("python_dataset_connector()", deprecation_detail)+ logical(1) |
80 |
- }+ ) |
||
82 | -+ | 1x |
- #' @rdname TealData+ names_diff_other <- setdiff(names(y@env), names(x@env)) |
83 | -+ | 1x |
- #' @export+ names_diff_inenv <- setdiff(names(x@env), names(y@env)) |
84 |
- python_cdisc_dataset_connector <- function(...) {+ |
||
85 | -! | +1x |
- .deprecate_function("python_cdisc_dataset_connector()", deprecation_detail)+ if (length(objects_diff)) { |
86 | -+ | 1x |
- }+ error <- c( |
87 | -+ | 1x |
-
+ error, |
88 | -+ | 1x |
- #' @rdname TealData+ paste0("Object(s) recreated with code that have different structure in ", x_name, ":"), |
89 | -+ | 1x |
- #' @export+ paste0(" \u2022 ", names(which(!objects_diff))) |
90 |
- cdisc_data_connector <- function(...) {+ ) |
||
91 | -! | +
- .deprecate_function("cdisc_data_connector()", deprecation_detail)+ } |
|
92 | -+ | 1x |
- }+ if (length(names_diff_inenv)) { |
93 | -+ | ! |
-
+ error <- c( |
94 | -+ | ! |
- #' @rdname TealData+ error, |
95 | -+ | ! |
- #' @export+ paste0("Object(s) not created with code that exist in ", x_name, ":"), |
96 | -+ | ! |
- cdisc_dataset <- function(...) {+ paste0(" \u2022 ", names_diff_inenv) |
97 | -! | +
- .deprecate_function("cdisc_dataset()", deprecation_detail)+ ) |
|
98 |
- }+ } |
||
99 | -+ | 1x |
-
+ if (length(names_diff_other)) { |
100 | -+ | ! |
- #' @rdname TealData+ error <- c( |
101 | -+ | ! |
- #' @export+ error, |
102 | -+ | ! |
- cdisc_dataset_connector <- function(...) {+ paste0("Object(s) created with code that do not exist in ", x_name, ":"), |
103 | ! |
- .deprecate_function("cdisc_dataset_connector()", deprecation_detail)+ paste0(" \u2022 ", names_diff_other) |
|
104 |
- }+ ) |
||
105 |
-
+ } |
||
106 |
- #' @rdname TealData+ |
||
107 | -+ | 1x |
- #' @export+ stop(paste(error, collapse = "\n"), call. = FALSE) |
108 |
- cdisc_dataset_connector_file <- function(...) {+ } |
||
109 | -! | +
- .deprecate_function("cdisc_dataset_connector_file()", deprecation_detail)+ }) |
|
110 |
- }+ setMethod("verify", signature = "qenv.error", definition = function(x) { |
||
111 | -+ | 1x |
-
+ stop(conditionMessage(x), call. = FALSE) |
112 |
- #' @rdname TealData+ }) |
113 | +1 |
- #' @export+ #' Deprecated `TealData` class and related functions |
|
114 | +2 |
- cdisc_dataset_file <- function(...) {+ #' |
|
115 | -! | +||
3 | +
- .deprecate_function("cdisc_dataset_file()", deprecation_detail)+ #' @description |
||
116 | +4 |
- }+ #' `r lifecycle::badge("deprecated")` |
|
117 | +5 |
-
+ #' |
|
118 | +6 |
- #' @rdname TealData+ #' The `TealData` class and associated functions have been deprecated. Use [teal_data()] instead. |
|
119 | +7 |
- #' @export+ #' See the [Migration guide](https://github.com/insightsengineering/teal/discussions/945) for details. |
|
120 | +8 |
- dataset <- function(...) {+ #' |
|
121 | -! | +||
9 | +
- .deprecate_function("dataset()", deprecation_detail)+ #' @name TealData |
||
122 | +10 |
- }+ #' |
|
123 | +11 |
-
+ #' @param ... any argument supported in `TealData` related functions. |
|
124 | +12 |
- #' @rdname TealData+ #' |
|
125 | +13 |
- #' @export+ #' @return nothing |
|
126 | +14 |
- dataset_connector <- function(...) {+ #' @seealso [cdisc_data()] , [join_keys()] |
|
127 | -! | +||
15 | +
- .deprecate_function("dataset_connector()", deprecation_detail)+ #' |
||
128 | +16 |
- }+ NULL |
|
129 | +17 | ||
130 | +18 |
- #' @rdname TealData+ .deprecate_function <- function(what, details) { |
|
131 | -- |
- #' @export- |
- |
132 | -+ | ||
19 | +! |
- dataset_connector_file <- function(...) {+ lifecycle::deprecate_stop( |
|
133 | +20 | ! |
- .deprecate_function("dataset_connector_file()", deprecation_detail)+ when = "0.4.0", |
134 | -+ | ||
21 | +! |
- }+ what = what, |
|
135 | -+ | ||
22 | +! |
-
+ details = details |
|
136 | +23 |
- #' @rdname TealData+ ) |
|
137 | +24 |
- #' @export+ } |
|
138 | +25 |
- dataset_file <- function(...) {- |
- |
139 | -! | -
- .deprecate_function("dataset_file()", deprecation_detail)+ |
|
140 | +26 |
- }+ deprecation_detail <- "Find more information on https://github.com/insightsengineering/teal/discussions/945" |
|
141 | +27 | ||
142 | +28 |
#' @rdname TealData |
|
143 | +29 |
#' @export |
|
144 | +30 |
- data_connection <- function(...) {+ as_cdisc <- function(...) { |
|
145 | +31 | ! |
- .deprecate_function("data_connection()", deprecation_detail)+ .deprecate_function("as_cdisc()", deprecation_detail) |
146 | +32 |
} |
|
147 | +33 | ||
148 | +34 |
#' @rdname TealData |
|
149 | +35 |
#' @export |
|
150 | +36 |
- fun_dataset_connector <- function(...) {+ callable_code <- function(...) { |
|
151 | +37 | ! |
- .deprecate_function("fun_dataset_connector()", deprecation_detail)+ .deprecate_function("callable_code()", deprecation_detail) |
152 | +38 |
} |
|
153 | +39 | ||
154 | +40 |
#' @rdname TealData |
|
155 | +41 |
#' @export |
|
156 | +42 |
- fun_cdisc_dataset_connector <- function(...) {+ callable_function <- function(...) { |
|
157 | +43 | ! |
- .deprecate_function("fun_cdisc_dataset_connector()", deprecation_detail)+ .deprecate_function("callable_function()", deprecation_detail) |
158 | +44 |
} |
|
159 | +45 | ||
160 | +46 |
#' @rdname TealData |
|
161 | +47 |
#' @export |
|
162 | +48 |
- relational_data_connector <- function(...) {+ code_dataset_connector <- function(...) { |
|
163 | +49 | ! |
- .deprecate_function("relational_data_connector()", deprecation_detail)+ .deprecate_function("code_dataset_connector()", deprecation_detail) |
164 | +50 |
} |
|
165 | +51 | ||
166 | +52 |
#' @rdname TealData |
|
167 | +53 |
#' @export |
|
168 | +54 |
- mae_dataset <- function(...) {+ code_cdisc_dataset_connector <- function(...) { |
|
169 | +55 | ! |
- .deprecate_function("mae_dataset()", deprecation_detail)+ .deprecate_function("code_cdisc_dataset_connector()", deprecation_detail) |
170 | +56 |
} |
|
171 | +57 | ||
172 | +58 |
#' @rdname TealData |
|
173 | +59 |
#' @export |
|
174 | +60 |
- get_attrs <- function(...) {+ csv_dataset_connector <- function(...) { |
|
175 | +61 | ! |
- .deprecate_function("get_attrs()", deprecation_detail)+ .deprecate_function("csv_dataset_connector()", deprecation_detail) |
176 | +62 |
} |
|
177 | +63 | ||
178 | +64 |
#' @rdname TealData |
|
179 | +65 |
#' @export |
|
180 | +66 |
- get_dataset_label <- function(...) {+ csv_cdisc_dataset_connector <- function(...) { |
|
181 | +67 | ! |
- .deprecate_function("get_dataset_label()", deprecation_detail)+ .deprecate_function("csv_cdisc_dataset_connector()", deprecation_detail) |
182 | +68 |
} |
|
183 | +69 | ||
184 | +70 |
#' @rdname TealData |
|
185 | +71 |
#' @export |
|
186 | +72 |
- get_dataset <- function(...) {+ python_code <- function(...) { |
|
187 | +73 | ! |
- .deprecate_function("get_dataset()", deprecation_detail)+ .deprecate_function("python_code()", deprecation_detail) |
188 | +74 |
} |
|
189 | +75 | ||
190 | +76 |
#' @rdname TealData |
|
191 | +77 |
#' @export |
|
192 | +78 |
- get_datasets <- function(...) {+ python_dataset_connector <- function(...) { |
|
193 | +79 | ! |
- .deprecate_function("get_datasets()", deprecation_detail)+ .deprecate_function("python_dataset_connector()", deprecation_detail) |
194 | +80 |
} |
|
195 | +81 | ||
196 | +82 |
#' @rdname TealData |
|
197 | +83 |
#' @export |
|
198 | +84 |
- get_dataname <- function(...) {+ python_cdisc_dataset_connector <- function(...) { |
|
199 | +85 | ! |
- .deprecate_function("get_dataname()", deprecation_detail)+ .deprecate_function("python_cdisc_dataset_connector()", deprecation_detail) |
200 | +86 |
} |
|
201 | +87 | ||
202 | +88 |
#' @rdname TealData |
|
203 | +89 |
#' @export |
|
204 | +90 |
- get_key_duplicates <- function(...) {+ cdisc_data_connector <- function(...) { |
|
205 | +91 | ! |
- .deprecate_function("get_key_duplicates()", deprecation_detail)+ .deprecate_function("cdisc_data_connector()", deprecation_detail) |
206 | +92 |
} |
|
207 | +93 | ||
208 | +94 |
#' @rdname TealData |
|
209 | +95 |
#' @export |
|
210 | +96 |
- get_keys <- function(...) {+ cdisc_dataset <- function(...) { |
|
211 | +97 | ! |
- .deprecate_function("get_keys()", deprecation_detail)+ .deprecate_function("cdisc_dataset()", deprecation_detail) |
212 | +98 |
} |
|
213 | +99 | ||
214 | +100 |
#' @rdname TealData |
|
215 | +101 |
#' @export |
|
216 | +102 |
- get_raw_data <- function(...) {+ cdisc_dataset_connector <- function(...) { |
|
217 | +103 | ! |
- .deprecate_function("get_raw_data()", deprecation_detail)+ .deprecate_function("cdisc_dataset_connector()", deprecation_detail) |
218 | +104 |
} |
|
219 | +105 | ||
220 | +106 |
#' @rdname TealData |
|
221 | +107 |
#' @export |
|
222 | +108 |
- is_pulled <- function(...) {+ cdisc_dataset_connector_file <- function(...) { |
|
223 | +109 | ! |
- .deprecate_function("is_pulled()", deprecation_detail)+ .deprecate_function("cdisc_dataset_connector_file()", deprecation_detail) |
224 | +110 |
} |
|
225 | +111 | ||
226 | +112 |
#' @rdname TealData |
|
227 | +113 |
#' @export |
|
228 | +114 |
- load_dataset <- function(...) {+ cdisc_dataset_file <- function(...) { |
|
229 | +115 | ! |
- .deprecate_function("load_dataset()", deprecation_detail)+ .deprecate_function("cdisc_dataset_file()", deprecation_detail) |
230 | +116 |
} |
|
231 | +117 | ||
232 | +118 |
#' @rdname TealData |
|
233 | +119 |
#' @export |
|
234 | +120 |
- load_datasets <- function(...) {+ dataset <- function(...) { |
|
235 | +121 | ! |
- .deprecate_function("load_datasets()", deprecation_detail)+ .deprecate_function("dataset()", deprecation_detail) |
236 | +122 |
} |
|
237 | +123 | ||
238 | +124 |
#' @rdname TealData |
|
239 | +125 |
#' @export |
|
240 | +126 |
- mutate_data <- function(...) {+ dataset_connector <- function(...) { |
|
241 | +127 | ! |
- .deprecate_function("mutate_data()", deprecation_detail)+ .deprecate_function("dataset_connector()", deprecation_detail) |
242 | +128 |
} |
|
243 | +129 | ||
244 | +130 |
#' @rdname TealData |
|
245 | +131 |
#' @export |
|
246 | +132 |
- mutate_dataset <- function(...) {+ dataset_connector_file <- function(...) { |
|
247 | +133 | ! |
- .deprecate_function("mutate_dataset()", deprecation_detail)+ .deprecate_function("dataset_connector_file()", deprecation_detail) |
248 | +134 |
} |
|
249 | +135 | ||
250 | +136 |
#' @rdname TealData |
|
251 | +137 |
#' @export |
|
252 | +138 |
- set_args <- function(...) {+ dataset_file <- function(...) { |
|
253 | +139 | ! |
- .deprecate_function("set_args()", deprecation_detail)+ .deprecate_function("dataset_file()", deprecation_detail) |
254 | +140 |
} |
|
255 | +141 | ||
256 | +142 |
#' @rdname TealData |
|
257 | +143 |
#' @export |
|
258 | +144 |
- rds_dataset_connector <- function(...) {+ data_connection <- function(...) { |
|
259 | +145 | ! |
- .deprecate_function("rds_dataset_connector()", deprecation_detail)+ .deprecate_function("data_connection()", deprecation_detail) |
260 | +146 |
} |
|
261 | +147 | ||
262 | +148 |
#' @rdname TealData |
|
263 | +149 |
#' @export |
|
264 | +150 |
- rds_cdisc_dataset_connector <- function(...) {+ fun_dataset_connector <- function(...) { |
|
265 | +151 | ! |
- .deprecate_function("rds_cdisc_dataset_connector()", deprecation_detail)+ .deprecate_function("fun_dataset_connector()", deprecation_detail) |
266 | +152 |
} |
|
267 | +153 | ||
268 | +154 |
#' @rdname TealData |
|
269 | +155 |
#' @export |
|
270 | +156 |
- script_dataset_connector <- function(...) {+ fun_cdisc_dataset_connector <- function(...) { |
|
271 | +157 | ! |
- .deprecate_function("script_dataset_connector()", deprecation_detail)+ .deprecate_function("fun_cdisc_dataset_connector()", deprecation_detail) |
272 | +158 |
} |
|
273 | +159 | ||
274 | +160 |
#' @rdname TealData |
|
275 | +161 |
#' @export |
|
276 | +162 |
- script_cdisc_dataset_connector <- function(...) {+ relational_data_connector <- function(...) { |
|
277 | +163 | ! |
- .deprecate_function("script_cdisc_dataset_connector()", deprecation_detail)+ .deprecate_function("relational_data_connector()", deprecation_detail) |
278 | +164 |
} |
|
279 | +165 | ||
280 | +166 |
#' @rdname TealData |
|
281 | +167 |
#' @export |
|
282 | +168 |
- set_keys <- function(...) {+ mae_dataset <- function(...) { |
|
283 | +169 | ! |
- .deprecate_function("set_keys()", deprecation_detail)+ .deprecate_function("mae_dataset()", deprecation_detail) |
284 | +170 |
} |
|
285 | +171 | ||
286 | +172 |
#' @rdname TealData |
|
287 | +173 |
#' @export |
|
288 | +174 |
- read_script <- function(...) {+ get_attrs <- function(...) { |
|
289 | +175 | ! |
- .deprecate_function("read_script()", deprecation_detail)+ .deprecate_function("get_attrs()", deprecation_detail) |
290 | +176 |
} |
|
291 | +177 | ||
292 | +178 |
#' @rdname TealData |
|
293 | +179 |
#' @export |
|
294 | +180 |
- to_relational_data <- function(...) {+ get_dataset_label <- function(...) { |
|
295 | +181 | ! |
- .deprecate_function("to_relational_data()", deprecation_detail)+ .deprecate_function("get_dataset_label()", deprecation_detail) |
296 | +182 |
} |
|
297 | +183 | ||
298 | +184 |
#' @rdname TealData |
|
299 | +185 |
#' @export |
|
300 | +186 |
- validate_metadata <- function(...) {+ get_dataset <- function(...) { |
|
301 | +187 | ! |
- .deprecate_function("validate_metadata()", deprecation_detail)+ .deprecate_function("get_dataset()", deprecation_detail) |
302 | +188 |
} |
|
303 | +189 | ||
304 | +190 |
#' @rdname TealData |
|
305 | +191 |
#' @export |
|
306 | +192 |
- get_cdisc_keys <- function(...) {+ get_datasets <- function(...) { |
|
307 | +193 | ! |
- .deprecate_function("get_cdisc_keys()", deprecation_detail)+ .deprecate_function("get_datasets()", deprecation_detail) |
308 | +194 |
} |
|
309 | +195 | ||
310 | +196 |
#' @rdname TealData |
|
311 | +197 |
#' @export |
|
312 | +198 |
- cdisc_data_file <- function(...) {+ get_dataname <- function(...) { |
|
313 | +199 | ! |
- .deprecate_function("cdisc_data_file()", deprecation_detail)+ .deprecate_function("get_dataname()", deprecation_detail) |
314 | +200 |
} |
|
315 | +201 | ||
316 | +202 |
#' @rdname TealData |
|
317 | +203 |
#' @export |
|
318 | +204 |
- teal_data_file <- function(...) {+ get_key_duplicates <- function(...) { |
|
319 | +205 | ! |
- .deprecate_function("teal_data_file()", deprecation_detail)+ .deprecate_function("get_key_duplicates()", deprecation_detail) |
320 | +206 |
} |
|
321 | +207 | ||
322 | +208 |
#' @rdname TealData |
|
323 | +209 |
#' @export |
|
324 | +210 |
- get_join_keys <- function(...) {+ get_keys <- function(...) { |
|
325 | +211 | ! |
- .deprecate_function("get_join_keys()", "Use `join_keys(data)` instead.")+ .deprecate_function("get_keys()", deprecation_detail) |
326 | +212 |
} |
|
327 | +213 | ||
328 | +214 |
#' @rdname TealData |
|
329 | -- |
- #' @param value value to assign- |
- |
330 | +215 |
#' @export |
|
331 | +216 |
- `get_join_keys<-` <- function(..., value) {+ get_raw_data <- function(...) { |
|
332 | +217 | ! |
- .deprecate_function("`get_join_keys<-`()", "Use `join_keys(x) <- ...`")+ .deprecate_function("get_raw_data()", deprecation_detail) |
333 | +218 |
} |
|
334 | +219 | ||
335 | +220 |
- #' @rdname col_labels+ #' @rdname TealData |
|
336 | +221 |
- #' @include formatters_var_labels.R+ #' @export |
|
337 | +222 |
- #' @details+ is_pulled <- function(...) { |
|
338 | -+ | ||
223 | +! |
- #' `r lifecycle::badge("deprecated")`+ .deprecate_function("is_pulled()", deprecation_detail) |
|
339 | +224 |
- #'+ } |
|
340 | +225 |
- #' In previous versions of `teal.data` labels were managed with `get_labels()`.+ |
|
341 | +226 |
- #' This function is deprecated as of `0.4.0`, use `col_labels` instead.+ #' @rdname TealData |
|
342 | +227 |
#' @export |
|
343 | +228 |
- get_labels <- function(...) {+ load_dataset <- function(...) { |
|
344 | +229 | ! |
- .deprecate_function("get_labels()", "Use col_labels(data)")+ .deprecate_function("load_dataset()", deprecation_detail) |
345 | +230 |
} |
1 | +231 |
- #' Create a relationship between a pair of datasets+ |
|
2 | +232 |
- #'+ #' @rdname TealData |
|
3 | +233 |
- #' @description+ #' @export |
|
4 | +234 |
- #' `r lifecycle::badge("stable")`+ load_datasets <- function(...) { |
|
5 | -+ | ||
235 | +! |
- #'+ .deprecate_function("load_datasets()", deprecation_detail) |
|
6 | +236 |
- #' Create a relationship between two datasets, `dataset_1` and `dataset_2`.+ } |
|
7 | +237 |
- #' By default, this function establishes a directed relationship with `dataset_1` as the parent.+ |
|
8 | +238 |
- #' If `dataset_2` is not specified, the function creates a primary key for `dataset_1`.+ #' @rdname TealData |
|
9 | +239 |
- #'+ #' @export |
|
10 | +240 |
- #' @param dataset_1,dataset_2 (`character(1)`) Dataset names. When `dataset_2` is omitted,+ mutate_data <- function(...) { |
|
11 | -+ | ||
241 | +! |
- #' a primary key for `dataset_1` is created.+ .deprecate_function("mutate_data()", deprecation_detail) |
|
12 | +242 |
- #' @param keys (optionally named `character`) Column mapping between the datasets,+ } |
|
13 | +243 |
- #' where `names(keys)` maps columns in `dataset_1` corresponding to columns of+ |
|
14 | +244 |
- #' `dataset_2` given by the elements of `keys`.+ #' @rdname TealData |
|
15 | +245 |
- #' - If unnamed, the same column names are used for both datasets.+ #' @export |
|
16 | +246 |
- #' - If any element of the `keys` vector is empty with a non-empty name, then the name is+ mutate_dataset <- function(...) { |
|
17 | -+ | ||
247 | +! |
- #' used for both datasets.+ .deprecate_function("mutate_dataset()", deprecation_detail) |
|
18 | +248 |
- #' @param directed (`logical(1)`) Flag that indicates whether it should create+ } |
|
19 | +249 |
- #' a parent-child relationship between the datasets.+ |
|
20 | +250 |
- #' - `TRUE` (default) `dataset_1` is the parent of `dataset_2`;+ #' @rdname TealData |
|
21 | +251 |
- #' - `FALSE` when the relationship is undirected.+ #' @export |
|
22 | +252 |
- #'+ set_args <- function(...) { |
|
23 | -+ | ||
253 | +! |
- #' @return object of class `join_key_set` to be passed into `join_keys` function.+ .deprecate_function("set_args()", deprecation_detail) |
|
24 | +254 |
- #'+ } |
|
25 | +255 |
- #' @examples+ |
|
26 | +256 |
- #' join_key("d1", "d2", c("A"))+ #' @rdname TealData |
|
27 | +257 |
- #' join_key("d1", "d2", c("A" = "B"))+ #' @export |
|
28 | +258 |
- #' join_key("d1", "d2", c("A" = "B", "C"))+ rds_dataset_connector <- function(...) { |
|
29 | -+ | ||
259 | +! |
- #'+ .deprecate_function("rds_dataset_connector()", deprecation_detail) |
|
30 | +260 |
- #' @export+ } |
|
31 | +261 |
- #' @seealso [join_keys()], [parents()]+ |
|
32 | +262 |
- #'+ #' @rdname TealData |
|
33 | +263 |
- join_key <- function(dataset_1, dataset_2 = dataset_1, keys, directed = TRUE) {+ #' @export |
|
34 | -1070x | +||
264 | +
- checkmate::assert_string(dataset_1)+ rds_cdisc_dataset_connector <- function(...) { |
||
35 | -1070x | +||
265 | +! |
- checkmate::assert_string(dataset_2)+ .deprecate_function("rds_cdisc_dataset_connector()", deprecation_detail) |
|
36 | -1067x | +||
266 | +
- checkmate::assert_character(keys, any.missing = FALSE)+ } |
||
37 | -1062x | +||
267 | +
- checkmate::assert_flag(directed)+ |
||
38 | +268 |
-
+ #' @rdname TealData |
|
39 | -1062x | +||
269 | +
- if (length(keys) > 0) {+ #' @export |
||
40 | -1060x | +||
270 | +
- if (is.null(names(keys))) {+ script_dataset_connector <- function(...) { |
||
41 | -520x | +||
271 | +! |
- names(keys) <- keys+ .deprecate_function("script_dataset_connector()", deprecation_detail) |
|
42 | +272 |
- }+ } |
|
43 | +273 | ||
44 | -1060x | -
- keys <- trimws(keys)- |
- |
45 | -1060x | +||
274 | +
- names(keys) <- trimws(names(keys))+ #' @rdname TealData |
||
46 | +275 |
-
+ #' @export |
|
47 | +276 |
- # Remove keys with empty value and without name+ script_cdisc_dataset_connector <- function(...) { |
|
48 | -1060x | +||
277 | +! |
- if (any(keys == "" & names(keys) == "")) {+ .deprecate_function("script_cdisc_dataset_connector()", deprecation_detail) |
|
49 | -6x | +||
278 | +
- message("Key with an empty value and name are ignored.")+ } |
||
50 | -6x | +||
279 | +
- keys <- keys[keys != "" & names(keys) != ""]+ |
||
51 | +280 |
- }+ #' @rdname TealData |
|
52 | +281 |
-
+ #' @export |
|
53 | +282 |
- # Set name of keys without one: c("A") -> c("A" = "A")+ set_keys <- function(...) { |
|
54 | -1060x | +||
283 | +! |
- if (any(names(keys) == "")) {+ .deprecate_function("set_keys()", deprecation_detail) |
|
55 | -4x | +||
284 | +
- names(keys)[names(keys) == ""] <- keys[names(keys) == ""]+ } |
||
56 | +285 |
- }+ |
|
57 | +286 |
-
+ #' @rdname TealData |
|
58 | +287 |
- # Set value of keys with empty string, but non-empty name: c("A" = "") -> c("A" = "A")+ #' @export |
|
59 | -1060x | +||
288 | +
- if (any(keys == "")) {+ read_script <- function(...) { |
||
60 | -4x | +||
289 | +! |
- keys[keys == ""] <- names(keys[keys == ""])+ .deprecate_function("read_script()", deprecation_detail) |
|
61 | +290 |
- }+ } |
|
62 | +291 | ||
63 | -1060x | +||
292 | +
- stopifnot(!is.null(names(keys)))+ #' @rdname TealData |
||
64 | -1060x | +||
293 | +
- stopifnot(!anyDuplicated(keys))+ #' @export |
||
65 | -1059x | +||
294 | +
- stopifnot(!anyDuplicated(names(keys)))+ to_relational_data <- function(...) {+ |
+ ||
295 | +! | +
+ .deprecate_function("to_relational_data()", deprecation_detail) |
|
66 | +296 |
-
+ } |
|
67 | -1058x | +||
297 | +
- if (dataset_1 == dataset_2 && any(names(keys) != keys)) {+ |
||
68 | -2x | +||
298 | +
- stop("Keys within a dataset must match exactly: keys = c('A' = 'B') are not allowed")+ #' @rdname TealData |
||
69 | +299 |
- }+ #' @export |
|
70 | +300 |
- } else {+ validate_metadata <- function(...) { |
|
71 | -2x | +||
301 | +! |
- keys <- NULL+ .deprecate_function("validate_metadata()", deprecation_detail) |
|
72 | +302 |
- }+ } |
|
73 | +303 | ||
74 | -1058x | +||
304 | +
- parents <- if (directed && dataset_1 != dataset_2) {+ #' @rdname TealData |
||
75 | -392x | +||
305 | +
- stats::setNames(list(dataset_1), dataset_2)+ #' @export |
||
76 | +306 |
- } else {+ get_cdisc_keys <- function(...) { |
|
77 | -666x | +||
307 | +! |
- list()+ .deprecate_function("get_cdisc_keys()", deprecation_detail) |
|
78 | +308 |
- }+ } |
|
79 | +309 | ||
80 | -1058x | +||
310 | +
- structure(+ #' @rdname TealData |
||
81 | -1058x | +||
311 | +
- list(+ #' @export |
||
82 | -1058x | +||
312 | +
- structure(+ cdisc_data_file <- function(...) { |
||
83 | -1058x | +||
313 | +! |
- list(keys),+ .deprecate_function("cdisc_data_file()", deprecation_detail) |
|
84 | -1058x | +||
314 | +
- names = dataset_2+ } |
||
85 | +315 |
- )+ |
|
86 | +316 |
- ),+ #' @rdname TealData |
|
87 | -1058x | +||
317 | +
- names = dataset_1,+ #' @export |
||
88 | -1058x | +||
318 | +
- class = "join_key_set",+ teal_data_file <- function(...) { |
||
89 | -1058x | +||
319 | +! |
- parents = parents+ .deprecate_function("teal_data_file()", deprecation_detail) |
|
90 | +320 |
- )+ } |
|
91 | +321 |
- }+ |
1 | +322 |
- #' Check Compatibility of keys+ #' @rdname TealData |
|
2 | +323 |
- #'+ #' @export |
|
3 | +324 |
- #' Helper function to assert if two key sets contain incompatible keys.+ get_join_keys <- function(...) { |
|
4 | -+ | ||
325 | +! |
- #'+ .deprecate_function("get_join_keys()", "Use `join_keys(data)` instead.") |
|
5 | +326 |
- #' @return Returns `TRUE` if successful, otherwise raises error.+ } |
|
6 | +327 |
- #' @keywords internal+ |
|
7 | +328 |
- assert_compatible_keys <- function(join_key_1, join_key_2) {+ #' @rdname TealData |
|
8 | -3x | +||
329 | +
- stop_message <- function(dataset_1, dataset_2) {+ #' @param value value to assign |
||
9 | -1x | +||
330 | +
- stop(+ #' @export |
||
10 | -1x | +||
331 | +
- paste("cannot specify multiple different join keys between datasets:", dataset_1, "and", dataset_2)+ `get_join_keys<-` <- function(..., value) { |
||
11 | -+ | ||
332 | +! |
- )+ .deprecate_function("`get_join_keys<-`()", "Use `join_keys(x) <- ...`") |
|
12 | +333 |
- }+ } |
|
13 | +334 | ||
14 | -3x | -
- dataset_1_one <- names(join_key_1)- |
- |
15 | -3x | +||
335 | +
- dataset_2_one <- names(join_key_1[[1]])+ #' @rdname col_labels |
||
16 | -3x | +||
336 | +
- keys_one <- join_key_1[[1]][[1]]+ #' @include formatters_var_labels.R |
||
17 | +337 |
-
+ #' @details |
|
18 | -3x | +||
338 | +
- dataset_1_two <- names(join_key_2)+ #' `r lifecycle::badge("deprecated")` |
||
19 | -3x | +||
339 | +
- dataset_2_two <- names(join_key_2[[1]])+ #' |
||
20 | -3x | +||
340 | +
- keys_two <- join_key_2[[1]][[1]]+ #' In previous versions of `teal.data` labels were managed with `get_labels()`. |
||
21 | +341 |
-
+ #' This function is deprecated as of `0.4.0`, use `col_labels` instead. |
|
22 | +342 |
- # if first datasets and the second datasets match and keys+ #' @export |
|
23 | +343 |
- # must contain the same named elements+ get_labels <- function(...) { |
|
24 | -3x | -
- if (dataset_1_one == dataset_1_two && dataset_2_one == dataset_2_two) {- |
- |
25 | -3x | -
- if (!identical(sort(keys_one), sort(keys_two))) {- |
- |
26 | -1x | +||
344 | +! |
- stop_message(dataset_1_one, dataset_2_one)+ .deprecate_function("get_labels()", "Use col_labels(data)") |
|
27 | +345 |
- }+ } |
28 | +1 |
- }+ #' Create a relationship between a pair of datasets |
||
29 | +2 |
-
+ #' |
||
30 | +3 |
- # if first dataset of join_key_1 matches second dataset of join_key_2+ #' @description |
||
31 | +4 |
- # and the first dataset of join_key_2 must match second dataset of join_key_1+ #' `r lifecycle::badge("stable")` |
||
32 | +5 |
- # and keys must contain the same elements but with names and values swapped+ #' |
||
33 | -2x | +|||
6 | +
- if (dataset_1_one == dataset_2_two && dataset_2_one == dataset_1_two) {+ #' Create a relationship between two datasets, `dataset_1` and `dataset_2`. |
|||
34 | +7 |
- if (+ #' By default, this function establishes a directed relationship with `dataset_1` as the parent. |
||
35 | -! | +|||
8 | +
- xor(length(keys_one) == 0, length(keys_two) == 0) ||+ #' If `dataset_2` is not specified, the function creates a primary key for `dataset_1`. |
|||
36 | -! | +|||
9 | +
- !identical(sort(keys_one), sort(stats::setNames(names(keys_two), keys_two)))+ #' |
|||
37 | +10 |
- ) {+ #' @param dataset_1,dataset_2 (`character(1)`) Dataset names. When `dataset_2` is omitted, |
||
38 | -! | +|||
11 | +
- stop_message(dataset_1_one, dataset_2_one)+ #' a primary key for `dataset_1` is created. |
|||
39 | +12 |
- }+ #' @param keys (optionally named `character`) Column mapping between the datasets, |
||
40 | +13 |
- }+ #' where `names(keys)` maps columns in `dataset_1` corresponding to columns of |
||
41 | +14 |
-
+ #' `dataset_2` given by the elements of `keys`. |
||
42 | +15 |
- # otherwise they are compatible+ #' - If unnamed, the same column names are used for both datasets. |
||
43 | -2x | +|||
16 | +
- return(TRUE)+ #' - If any element of the `keys` vector is empty with a non-empty name, then the name is |
|||
44 | +17 |
- }+ #' used for both datasets. |
||
45 | +18 |
-
+ #' @param directed (`logical(1)`) Flag that indicates whether it should create |
||
46 | +19 |
- #' Validate parent-child key+ #' a parent-child relationship between the datasets. |
||
47 | +20 |
- #'+ #' - `TRUE` (default) `dataset_1` is the parent of `dataset_2`; |
||
48 | +21 |
- #' Helper function checks the parent-child relations are valid.+ #' - `FALSE` when the relationship is undirected. |
||
49 | +22 |
#' |
||
50 | +23 |
- #' @param x (`join_keys`) object to assert validity of relations+ #' @return object of class `join_key_set` to be passed into `join_keys` function. |
||
51 | +24 |
#' |
||
52 | +25 |
- #' @return `join_keys` invisibly+ #' @examples |
||
53 | +26 |
- #'+ #' join_key("d1", "d2", c("A")) |
||
54 | +27 |
- #' @keywords internal+ #' join_key("d1", "d2", c("A" = "B")) |
||
55 | +28 |
- assert_parent_child <- function(x) {- |
- ||
56 | -431x | -
- jk <- join_keys(x)+ #' join_key("d1", "d2", c("A" = "B", "C")) |
||
57 | -431x | +|||
29 | +
- jk_parents <- parents(jk)+ #' |
|||
58 | +30 |
-
+ #' @export |
||
59 | -431x | +|||
31 | +
- checkmate::assert_class(jk, c("join_keys", "list"))+ #' @seealso [join_keys()], [parents()] |
|||
60 | +32 |
-
+ #' |
||
61 | -431x | +|||
33 | +
- if (!is.null(jk_parents)) {+ join_key <- function(dataset_1, dataset_2 = dataset_1, keys, directed = TRUE) { |
|||
62 | -431x | +34 | +1070x |
- for (idx1 in seq_along(jk_parents)) {+ checkmate::assert_string(dataset_1) |
63 | -170x | +35 | +1070x |
- name_from <- names(jk_parents)[[idx1]]+ checkmate::assert_string(dataset_2) |
64 | -170x | +36 | +1067x |
- for (idx2 in seq_along(jk_parents[[idx1]])) {+ checkmate::assert_character(keys, any.missing = FALSE) |
65 | -170x | +37 | +1062x |
- name_to <- jk_parents[[idx1]][[idx2]]+ checkmate::assert_flag(directed) |
66 | -170x | +|||
38 | +
- keys_from <- jk[[name_from]][[name_to]]+ |
|||
67 | -170x | +39 | +1062x |
- keys_to <- jk[[name_to]][[name_from]]+ if (length(keys) > 0) { |
68 | -170x | +40 | +1060x |
- if (length(keys_from) == 0 && length(keys_to) == 0) {+ if (is.null(names(keys))) { |
69 | -1x | +41 | +520x |
- stop(sprintf("No join keys from %s to its parent (%s) and vice versa", name_from, name_to))+ names(keys) <- keys |
70 | +42 |
- }+ } |
||
71 | +43 |
- }+ |
||
72 | +||||
44 | +1060x | +
+ keys <- trimws(keys)+ |
+ ||
45 | +1060x | +
+ names(keys) <- trimws(names(keys))+ |
+ ||
46 |
- }+ |
|||
73 | +47 |
- }+ # Remove keys with empty value and without name |
||
74 | -430x | +48 | +1060x |
- invisible(x)+ if (any(keys == "" & names(keys) == "")) { |
75 | -+ | |||
49 | +6x |
- }+ message("Key with an empty value and name are ignored.") |
||
76 | -+ | |||
50 | +6x |
-
+ keys <- keys[keys != "" & names(keys) != ""] |
||
77 | +51 |
- #' Verify key set compatibility+ } |
||
78 | +52 |
- #'+ |
||
79 | +53 |
- #' Helper function to ensuring compatibility between two sets of keys+ # Set name of keys without one: c("A") -> c("A" = "A") |
||
80 | -+ | |||
54 | +1060x |
- #'+ if (any(names(keys) == "")) { |
||
81 | -+ | |||
55 | +4x |
- #' @return Returns `TRUE` if successful, otherwise raises error.+ names(keys)[names(keys) == ""] <- keys[names(keys) == ""] |
||
82 | +56 |
- #' @keywords internal+ } |
||
83 | +57 |
- assert_compatible_keys2 <- function(x, y) {+ |
||
84 | +58 |
- # Helper to flatten join_keys / join_key_set+ # Set value of keys with empty string, but non-empty name: c("A" = "") -> c("A" = "A") |
||
85 | -3x | +59 | +1060x |
- flatten_join_key_sets <- function(value) {+ if (any(keys == "")) { |
86 | -6x | +60 | +4x |
- value <- unclass(value)+ keys[keys == ""] <- names(keys[keys == ""]) |
87 | -6x | +|||
61 | +
- Reduce(+ } |
|||
88 | -6x | +|||
62 | +
- init = list(),+ |
|||
89 | -6x | +63 | +1060x |
- f = function(u, v, ...) {+ stopifnot(!is.null(names(keys))) |
90 | -6x | +64 | +1060x |
- el <- value[v][[1]]+ stopifnot(!anyDuplicated(keys)) |
91 | -6x | +65 | +1059x |
- res <- lapply(seq_along(el), function(ix) el[ix])+ stopifnot(!anyDuplicated(names(keys)))+ |
+
66 | ++ | + | ||
92 | -6x | +67 | +1058x |
- names(res) <- rep(v, length(res))+ if (dataset_1 == dataset_2 && any(names(keys) != keys)) { |
93 | -6x | +68 | +2x |
- append(u, res)+ stop("Keys within a dataset must match exactly: keys = c('A' = 'B') are not allowed") |
94 | +69 |
- },+ } |
||
95 | -6x | +|||
70 | +
- x = names(value)+ } else { |
|||
96 | -+ | |||
71 | +2x |
- )+ keys <- NULL |
||
97 | +72 |
} |
||
98 | +73 | |||
99 | -3x | +74 | +1058x |
- x <- flatten_join_key_sets(x)+ parents <- if (directed && dataset_1 != dataset_2) { |
100 | -3x | +75 | +392x |
- y <- flatten_join_key_sets(y)+ stats::setNames(list(dataset_1), dataset_2) |
101 | +76 |
-
+ } else { |
||
102 | -3x | +77 | +666x |
- for (idx_1 in seq_along(x)) {+ list() |
103 | -3x | +|||
78 | +
- for (idx_2 in seq_along(y)) {+ } |
|||
104 | -3x | +|||
79 | +
- assert_compatible_keys(x[idx_1], y[idx_2])+ |
|||
105 | -+ | |||
80 | +1058x |
- }+ structure( |
||
106 | -+ | |||
81 | +1058x |
- }+ list( |
||
107 | -2x | +82 | +1058x |
- TRUE+ structure( |
108 | -+ | |||
83 | +1058x |
- }+ list(keys), |
||
109 | -+ | |||
84 | +1058x |
-
+ names = dataset_2 |
||
110 | +85 |
- #' Updates the keys of the datasets based on the parents+ ) |
||
111 | +86 |
- #'+ ), |
||
112 | -+ | |||
87 | +1058x |
- #' @param x (`join_keys`) object to update the keys.+ names = dataset_1,+ |
+ ||
88 | +1058x | +
+ class = "join_key_set",+ |
+ ||
89 | +1058x | +
+ parents = parents |
||
113 | +90 |
- #'+ ) |
||
114 | +91 |
- #' @return (`self`) invisibly for chaining+ } |
115 | +1 |
- #'+ # get_code_dependency ---- |
||
116 | +2 |
- #' @keywords internal+ |
||
117 | +3 |
- update_keys_given_parents <- function(x) {+ #' Get code dependency of an object |
||
118 | -12x | +|||
4 | +
- jk <- join_keys(x)+ #' |
|||
119 | +5 |
-
+ #' Extract subset of code required to reproduce specific object(s), including code producing side-effects. |
||
120 | -12x | +|||
6 | +
- checkmate::assert_class(jk, "join_keys", .var.name = checkmate::vname(x))+ #' |
|||
121 | +7 |
-
+ #' Given a character vector with code, this function will extract the part of the code responsible for creating |
||
122 | -12x | +|||
8 | +
- datanames <- names(jk)+ #' the variables specified by `names`. |
|||
123 | -12x | +|||
9 | +
- for (d1_ix in seq_along(datanames)) {+ #' This includes the final call that creates the variable(s) in question as well as all _parent calls_, |
|||
124 | -34x | +|||
10 | +
- d1 <- datanames[[d1_ix]]+ #' _i.e._ calls that create variables used in the final call and their parents, etc. |
|||
125 | -34x | +|||
11 | +
- d1_parent <- parent(jk, d1)+ #' Also included are calls that create side-effects like establishing connections. |
|||
126 | -34x | +|||
12 | +
- for (d2 in datanames[-1 * seq.int(d1_ix)]) {+ #' |
|||
127 | -38x | +|||
13 | +
- if (length(jk[[d1]][[d2]]) == 0) {+ #' It is assumed that object dependency is established by using three assignment operators: `<-`, `=`, and `->` . |
|||
128 | -16x | +|||
14 | +
- d2_parent <- parent(jk, d2)+ #' Other assignment methods (`assign`, `<<-`) or non-standard-evaluation methods are not supported. |
|||
129 | +15 |
-
+ #' |
||
130 | -12x | +|||
16 | +
- if (!identical(d1_parent, d2_parent) || length(d1_parent) == 0) next+ #' Side-effects are not detected automatically and must be marked in the code. |
|||
131 | +17 |
-
+ #' Add `# @linksto object` at the end of a line where a side-effect occurs to specify that this line is required |
||
132 | +18 |
- # both has the same parent -> common keys to parent+ #' to reproduce a variable called `object`. |
||
133 | -4x | +|||
19 | +
- keys_d1_parent <- sort(jk[[d1]][[d1_parent]])+ #' |
|||
134 | -4x | +|||
20 | +
- keys_d2_parent <- sort(jk[[d2]][[d2_parent]])+ #' @param code `character` with the code. |
|||
135 | +21 |
-
+ #' @param names `character` vector of object names. |
||
136 | -4x | +|||
22 | +
- common_ix_1 <- unname(keys_d1_parent) %in% unname(keys_d2_parent)+ #' @param check_names `logical(1)` flag specifying if a warning for non-existing names should be displayed. |
|||
137 | -4x | +|||
23 | +
- common_ix_2 <- unname(keys_d2_parent) %in% unname(keys_d1_parent)+ #' |
|||
138 | +24 |
-
+ #' @return Character vector, a subset of `code`. |
||
139 | +25 |
- # No common keys between datasets - leave empty+ #' Note that subsetting is actually done on the calls `code`, not necessarily on the elements of the vector. |
||
140 | -1x | +|||
26 | +
- if (all(!common_ix_1)) next+ #' |
|||
141 | +27 |
-
+ #' @keywords internal |
||
142 | -3x | +|||
28 | +
- fk <- structure(+ get_code_dependency <- function(code, names, check_names = TRUE) { |
|||
143 | -3x | +29 | +47x |
- names(keys_d2_parent)[common_ix_2],+ checkmate::assert_character(code) |
144 | -3x | +30 | +47x |
- names = names(keys_d1_parent)[common_ix_1]+ checkmate::assert_character(names, any.missing = FALSE) |
145 | +31 |
- )+ |
||
146 | -3x | +32 | +47x |
- jk[[d1]][[d2]] <- fk # mutate join key+ if (identical(code, character(0)) || identical(trimws(code), "")) { |
147 | -+ | |||
33 | +2x |
- }+ return(code) |
||
148 | +34 |
- }+ } |
||
149 | +35 |
- }+ |
||
150 | -- |
- # check parent child relation+ | ||
36 | +45x | +
+ code <- parse(text = code, keep.source = TRUE) |
||
151 | -12x | +37 | +45x |
- assert_parent_child(x = jk)+ pd <- utils::getParseData(code)+ |
+
38 | +45x | +
+ calls_pd <- extract_calls(pd) |
||
152 | +39 | |||
153 | -12x | +40 | +45x |
- jk+ if (check_names) { |
154 | +41 |
- }+ # Detect if names are actually in code. |
1 | -+ | |||
42 | +45x |
- #' @rdname join_keys+ symbols <- unlist(lapply(calls_pd, function(call) call[call$token == "SYMBOL", "text"])) |
||
2 | -+ | |||
43 | +45x |
- #' @order 2+ if (any(pd$text == "assign")) { |
||
3 | -+ | |||
44 | +4x |
- #'+ assign_calls <- Filter(function(call) find_call(call, "assign"), calls_pd) |
||
4 | -+ | |||
45 | +4x |
- #' @section Functions:+ ass_str <- unlist(lapply(assign_calls, function(call) call[call$token == "STR_CONST", "text"])) |
||
5 | -+ | |||
46 | +4x |
- #' - `x[datanames]`: Returns a subset of the `join_keys` object for+ ass_str <- gsub("^['\"]|['\"]$", "", ass_str) |
||
6 | -+ | |||
47 | +4x |
- #' given `datanames`, including parent `datanames` and symmetric mirror keys between+ symbols <- c(ass_str, symbols) |
||
7 | +48 |
- #' `datanames` in the result.+ } |
||
8 | -+ | |||
49 | +45x |
- #' - `x[i, j]`: Returns join keys between datasets `i` and `j`,+ if (!all(names %in% unique(symbols))) {+ |
+ ||
50 | +1x | +
+ warning("Object(s) not found in code: ", toString(setdiff(names, symbols))) |
||
9 | +51 |
- #' including implicit keys inferred from their relationship with a parent.+ } |
||
10 | +52 |
- #'+ } |
||
11 | +53 |
- #' @param i,j indices specifying elements to extract or replace. Index should be a+ + |
+ ||
54 | +45x | +
+ graph <- code_graph(calls_pd)+ |
+ ||
55 | +45x | +
+ ind <- unlist(lapply(names, function(x) graph_parser(x, graph))) |
||
12 | +56 |
- #' a character vector, but it can also take numeric, logical, `NULL` or missing.+ + |
+ ||
57 | +45x | +
+ lib_ind <- detect_libraries(calls_pd) |
||
13 | +58 |
- #'+ + |
+ ||
59 | +45x | +
+ as.character(code[unique(c(lib_ind, ind))]) |
||
14 | +60 |
- #' @export+ } |
||
15 | +61 |
- #'+ |
||
16 | +62 |
- #' @examples+ #' Locate function call token |
||
17 | +63 |
#' |
||
18 | +64 |
- #' # Getter for join_keys ---+ #' Determine which row of parsed data is specific `SYMBOL_FUNCTION_CALL` token. |
||
19 | +65 |
#' |
||
20 | +66 |
- #' jk["ds1", "ds2"]+ #' Useful for determining occurrence of `assign` or `data` functions in an input call. |
||
21 | +67 |
#' |
||
22 | +68 |
- #' # Subsetting join_keys ----+ #' @param call_pd `data.frame` as returned by `extract_calls()` |
||
23 | +69 |
- #'+ #' @param text `character(1)` to look for in `text` column of `call_pd` |
||
24 | +70 |
- #' jk["ds1"]+ #' |
||
25 | +71 |
- #' jk[1:2]+ #' @return |
||
26 | +72 |
- #' jk[c("ds1", "ds2")]+ #' Single integer specifying row in `call_pd` where `token` is `SYMBOL_FUNCTION_CALL` and `text` is `text`. |
||
27 | +73 |
- #'+ #' 0 if not found. |
||
28 | +74 |
- `[.join_keys` <- function(x, i, j) {- |
- ||
29 | -31x | -
- if (missing(i) && missing(j)) {+ #' |
||
30 | +75 |
- # because:+ #' @keywords internal |
||
31 | +76 |
- # - list(a = 1)[] returns list(a = 1)+ #' @noRd |
||
32 | +77 |
- # - data.frame(a = 1)[] returns data.frame(a = 1)+ find_call <- function(call_pd, text) { |
||
33 | -1x | +78 | +345x |
- return(x)+ checkmate::check_data_frame(call_pd) |
34 | -30x | +79 | +345x |
- } else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) {+ checkmate::check_names(call_pd, must.include = c("token", "text")) |
35 | -+ | |||
80 | +345x |
- # because list(a = 1)[NULL] returns NULL+ checkmate::check_string(text) |
||
36 | +81 |
- # data.frame(a = 1)[NULL, NULL] returns data.frame(+ |
||
37 | -2x | +82 | +345x |
- return(join_keys())+ ans <- which(call_pd$token == "SYMBOL_FUNCTION_CALL" & call_pd$text == text) |
38 | -28x | +83 | +345x |
- } else if (!missing(i) && !missing(j)) {- |
-
39 | -- |
- if (+ if (length(ans)) { |
||
40 | -8x | +84 | +25x |
- !any(+ ans |
41 | -8x | +|||
85 | +
- checkmate::test_string(i),+ } else { |
|||
42 | -8x | +86 | +320x |
- checkmate::test_number(i),+ 0L |
43 | -8x | +|||
87 | +
- checkmate::test_logical(i, len = length(x)) && sum(j) == 1+ } |
|||
44 | +88 |
- ) ||+ } |
||
45 | -8x | +|||
89 | +
- !any(+ |
|||
46 | -8x | +|||
90 | +
- checkmate::test_string(j),+ #' Split the result of `utils::getParseData()` into separate calls |
|||
47 | -8x | +|||
91 | +
- checkmate::test_number(j),+ #' |
|||
48 | -8x | +|||
92 | +
- checkmate::test_logical(j, len = length(x)) && sum(j) == 1+ #' @param pd (`data.frame`) A result of `utils::getParseData()`. |
|||
49 | +93 |
- )+ #' |
||
50 | +94 |
- ) {+ #' @return |
||
51 | -1x | +|||
95 | +
- stop(+ #' A `list` of `data.frame`s. |
|||
52 | -1x | +|||
96 | +
- "join_keys[i, j] - Can't extract keys for multiple pairs.",+ #' Each element is a subset of `pd` corresponding to one call in the original code from which `pd` was obtained. |
|||
53 | -1x | +|||
97 | +
- "When specifying a pair [i, j], both indices must point to a single key pair.\n",+ #' Only four columns (`"token"`, `"text"`, `"id"`, `"parent"`) are kept, the rest is discarded. |
|||
54 | -1x | +|||
98 | +
- call. = FALSE+ #' |
|||
55 | +99 |
- )+ #' @keywords internal |
||
56 | +100 |
- }+ #' @noRd |
||
57 | -1x | +|||
101 | +
- if (is.numeric(i)) i <- names(x)[i]+ extract_calls <- function(pd) { |
|||
58 | -1x | +102 | +45x |
- if (is.numeric(j)) j <- names(x)[j]+ calls <- lapply( |
59 | -+ | |||
103 | +45x |
-
+ pd[pd$parent == 0, "id"], |
||
60 | -7x | +104 | +45x |
- subset_x <- update_keys_given_parents(x[union(i, j)])+ function(parent) { |
61 | -7x | +105 | +163x |
- return(subset_x[[i]][[j]])+ rbind( |
62 | -20x | +106 | +163x |
- } else if (!missing(j)) {+ pd[pd$id == parent, c("token", "text", "id", "parent")], |
63 | -+ | |||
107 | +163x |
- # ie. select all keys which have j as dataset_2+ get_children(pd = pd, parent = parent) |
||
64 | +108 |
- # since list is symmetrical it is equivalent to selecting by i- |
- ||
65 | -1x | -
- i <- j+ ) |
||
66 | +109 |
- }+ } |
||
67 | +110 |
-
+ ) |
||
68 | -20x | +111 | +45x |
- checkmate::assert(+ calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls) |
69 | -20x | +112 | +45x |
- combine = "or",+ calls <- Filter(Negate(is.null), calls) |
70 | -20x | +113 | +45x |
- checkmate::check_character(i),+ calls <- fix_shifted_comments(calls) |
71 | -20x | +114 | +45x |
- checkmate::check_numeric(i),+ fix_arrows(calls) |
72 | -20x | +|||
115 | +
- checkmate::check_logical(i)+ } |
|||
73 | +116 |
- )+ |
||
74 | +117 |
-
+ #' @keywords internal |
||
75 | +118 |
-
+ #' @noRd |
||
76 | +119 |
- # Convert integer/logical index to named index+ get_children <- function(pd, parent) { |
||
77 | -20x | +120 | +1911x |
- if (checkmate::test_numeric(i) || checkmate::test_logical(i)) {+ idx_children <- abs(pd$parent) == parent |
78 | -2x | +121 | +1911x |
- i <- names(x)[i]+ children <- pd[idx_children, c("token", "text", "id", "parent")]+ |
+
122 | +1911x | +
+ if (nrow(children) == 0) {+ |
+ ||
123 | +1112x | +
+ return(NULL) |
||
79 | +124 |
} |
||
80 | +125 | |||
81 | -+ | |||
126 | +799x |
- # When retrieving a relationship pair, it will also return the symmetric key+ if (parent > 0) { |
||
82 | -20x | +127 | +799x |
- new_jk <- new_join_keys()+ do.call(rbind, c(list(children), lapply(children$id, get_children, pd = pd))) |
83 | -20x | +|||
128 | +
- queue <- unique(i)+ } |
|||
84 | -20x | +|||
129 | +
- bin <- character(0)+ } |
|||
85 | +130 | |||
86 | +131 |
- # Need to iterate on a mutating queue if subset of a dataset will also+ #' Fixes edge case of comments being shifted to the next call. |
||
87 | +132 |
- # select its parent as that parent might have relationships with others+ #' @keywords internal |
||
88 | +133 |
- # already selected.+ #' @noRd |
||
89 | -20x | +|||
134 | +
- while (length(queue) > 0) {+ fix_shifted_comments <- function(calls) { |
|||
90 | -45x | +|||
135 | +
- ix <- queue[1]+ # If the first or the second token is a @linksto COMMENT, |
|||
91 | -45x | +|||
136 | +
- queue <- queue[-1]+ # then it belongs to the previous call. |
|||
92 | +137 | 45x |
- bin <- c(bin, ix)+ if (length(calls) >= 2) { |
|
93 | -+ | |||
138 | +43x |
-
+ for (i in 2:length(calls)) { |
||
94 | -45x | +139 | +117x |
- ix_parent <- parent(x, ix)+ comment_idx <- grep("@linksto", calls[[i]][, "text"]) |
95 | -+ | |||
140 | +117x |
-
+ if (isTRUE(comment_idx[1] <= 2)) { |
||
96 | -45x | +141 | +4x |
- if (checkmate::test_string(ix_parent, min.chars = 1) && !ix_parent %in% c(queue, bin)) {+ calls[[i - 1]] <- rbind( |
97 | -10x | +142 | +4x |
- queue <- c(queue, ix_parent)+ calls[[i - 1]], |
98 | -+ | |||
143 | +4x |
- }+ calls[[i]][seq_len(comment_idx[1]), ] |
||
99 | +144 |
-
+ ) |
||
100 | -45x | +145 | +4x |
- ix_valid_names <- names(x[[ix]]) %in% c(queue, bin)+ calls[[i]] <- calls[[i]][-seq_len(comment_idx[1]), ] |
101 | +146 | - - | -||
102 | -45x | -
- new_jk[[ix]] <- x[[ix]][ix_valid_names]+ } |
||
103 | +147 |
-
+ } |
||
104 | +148 |
- # Add primary key of parent+ } |
||
105 | +149 | 45x |
- if (length(ix_parent) > 0) {+ Filter(nrow, calls) |
|
106 | -16x | +|||
150 | +
- new_jk[[ix_parent]][[ix_parent]] <- x[[ix_parent]][[ix_parent]]+ } |
|||
107 | +151 |
- }+ |
||
108 | +152 |
- }+ #' Fixes edge case of `<-` assignment operator being called as function, |
||
109 | +153 |
-
+ #' which is \code{`<-`(y,x)} instead of traditional `y <- x`. |
||
110 | -20x | +|||
154 | +
- common_parents_ix <- names(parents(x)) %in% names(new_jk) &+ #' @keywords internal |
|||
111 | -20x | +|||
155 | +
- parents(x) %in% names(new_jk)+ #' @noRd |
|||
112 | +156 |
-
+ fix_arrows <- function(calls) { |
||
113 | -9x | +157 | +45x |
- if (any(common_parents_ix)) parents(new_jk) <- parents(x)[common_parents_ix]+ checkmate::assert_list(calls) |
114 | -+ | |||
158 | +45x |
-
+ lapply(calls, function(call) { |
||
115 | -20x | +159 | +161x |
- new_jk+ sym_fun <- call$token == "SYMBOL_FUNCTION_CALL" |
116 | -+ | |||
160 | +161x |
- }+ call[sym_fun, ] <- sub_arrows(call[sym_fun, ]) |
||
117 | -+ | |||
161 | +161x |
-
+ call |
||
118 | +162 |
- #' @rdname join_keys+ }) |
||
119 | +163 |
- #' @order 2+ } |
||
120 | +164 |
- #'+ |
||
121 | +165 |
- #' @param directed (`logical(1)`) Flag that indicates whether it should create+ #' Execution of assignment operator substitutions for a call. |
||
122 | +166 |
- #' a parent-child relationship between the datasets.+ #' @keywords internal |
||
123 | +167 |
- #' - `TRUE` (default) `dataset_1` is the parent of `dataset_2`;+ #' @noRd |
||
124 | +168 |
- #' - `FALSE` when the relationship is undirected.+ sub_arrows <- function(call) { |
||
125 | -+ | |||
169 | +161x |
- #' @section Functions:+ checkmate::assert_data_frame(call) |
||
126 | -+ | |||
170 | +161x |
- #' - `x[i, j] <- value`: Assignment of a key to pair `(i, j)`.+ map <- data.frame( |
||
127 | -+ | |||
171 | +161x |
- #' - `x[i] <- value`: This (without `j` parameter) **is not** a supported+ row.names = c("`<-`", "`<<-`", "`=`"), |
||
128 | -+ | |||
172 | +161x |
- #' operation for `join_keys`.+ token = rep("LEFT_ASSIGN", 3), |
||
129 | -+ | |||
173 | +161x |
- #' - `join_keys(x)[i, j] <- value`: Assignment to `join_keys` object stored in `x`,+ text = rep("<-", 3) |
||
130 | +174 |
- #' such as a `teal_data` object or `join_keys` object itself.+ )+ |
+ ||
175 | +161x | +
+ sub_ids <- call$text %in% rownames(map)+ |
+ ||
176 | +161x | +
+ call[sub_ids, c("token", "text")] <- map[call$text[sub_ids], ]+ |
+ ||
177 | +161x | +
+ call |
||
131 | +178 |
- #'+ } |
||
132 | +179 |
- #' @export+ |
||
133 | +180 |
- #' @examples+ # code_graph ---- |
||
134 | +181 |
- #'+ |
||
135 | +182 |
- #' # Setting a new primary key ---+ #' Create object dependencies graph within parsed code |
||
136 | +183 |
#' |
||
137 | +184 |
- #' jk["ds4", "ds4"] <- "pk4"+ #' Builds dependency graph that identifies dependencies between objects in parsed code. |
||
138 | +185 |
- #' jk["ds5", "ds5"] <- "pk5"+ #' Helps understand which objects depend on which. |
||
139 | +186 |
#' |
||
140 | +187 |
- #' # Setting a single relationship pair ---+ #' @param calls_pd `list` of `data.frame`s; |
||
141 | +188 |
- #'+ #' result of `utils::getParseData()` split into subsets representing individual calls; |
||
142 | +189 |
- #' jk["ds1", "ds4"] <- c("pk1" = "pk4")+ #' created by `extract_calls()` function |
||
143 | +190 |
#' |
||
144 | +191 |
- #' # Removing a key ---+ #' @return |
||
145 | +192 |
- #'+ #' A list (of length of input `calls_pd`) where each element represents one call. |
||
146 | +193 |
- #' jk["ds5", "ds5"] <- NULL+ #' Each element is a character vector listing names of objects that depend on this call |
||
147 | +194 |
- `[<-.join_keys` <- function(x, i, j, directed = TRUE, value) {+ #' and names of objects that this call depends on. |
||
148 | -11x | +|||
195 | +
- checkmate::assert_flag(directed)+ #' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a` |
|||
149 | -11x | +|||
196 | +
- if (missing(i) || missing(j)) {+ #' depends on objects `b` and `c`. |
|||
150 | -4x | +|||
197 | +
- stop("join_keys[i, j] specify both indices to set a key pair.")+ #' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call. |
|||
151 | -7x | +|||
198 | +
- } else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) {+ #' |
|||
152 | -2x | +|||
199 | +
- stop("join_keys[i, j] neither i nor j can be NULL.")+ #' @keywords internal |
|||
153 | +200 |
- } else if (+ #' @noRd |
||
154 | -5x | +|||
201 | +
- !any(+ code_graph <- function(calls_pd) { |
|||
155 | -5x | +202 | +45x |
- checkmate::test_string(i),+ cooccurrence <- extract_occurrence(calls_pd) |
156 | -5x | +|||
203 | +
- checkmate::test_number(i),+ |
|||
157 | -5x | +204 | +45x |
- checkmate::test_logical(i, len = length(x)) && sum(j) == 1+ side_effects <- extract_side_effects(calls_pd) |
158 | +205 |
- ) ||+ |
||
159 | -5x | +206 | +45x |
- !any(+ mapply(c, side_effects, cooccurrence, SIMPLIFY = FALSE) |
160 | -5x | +|||
207 | +
- checkmate::test_string(j),+ } |
|||
161 | -5x | +|||
208 | +
- checkmate::test_number(j),+ |
|||
162 | -5x | +|||
209 | +
- checkmate::test_logical(j, len = length(x)) && sum(j) == 1+ #' Extract object occurrence |
|||
163 | +210 |
- )+ #' |
||
164 | +211 |
- ) {+ #' Extracts objects occurrence within calls passed by `calls_pd`. |
||
165 | -2x | +|||
212 | +
- stop(+ #' Also detects which objects depend on which within a call. |
|||
166 | -2x | +|||
213 | +
- "join_keys[i, j] <- Can't set keys to specified indices.\n",+ #' |
|||
167 | -2x | +|||
214 | +
- "When setting pair [i, j], both indices must point to a single key pair.\n",- |
- |||
168 | -2x | -
- call. = FALSE+ #' @param calls_pd `list` of `data.frame`s; |
||
169 | +215 |
- )+ #' result of `utils::getParseData()` split into subsets representing individual calls; |
||
170 | +216 |
- }+ #' created by `extract_calls()` function |
||
171 | +217 |
-
+ #' |
||
172 | +218 |
- # Handle join key removal separately- |
- ||
173 | -3x | -
- if (is.null(value)) {- |
- ||
174 | -1x | -
- x[[i]][[j]] <- NULL- |
- ||
175 | -1x | -
- return(x)+ #' @return |
||
176 | +219 |
- }+ #' A list (of length of input `calls_pd`) where each element represents one call. |
||
177 | +220 | - - | -||
178 | -2x | -
- c(x, join_key(i, j, value, directed))+ #' Each element is a character vector listing names of objects that depend on this call |
||
179 | +221 |
- }+ #' and names of objects that this call depends on. |
||
180 | +222 |
-
+ #' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a` |
||
181 | +223 |
- #' @rdname join_keys+ #' depends on objects `b` and `c`. |
||
182 | +224 |
- #'+ #' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call. |
||
183 | +225 |
- #' @order 1000+ #' |
||
184 | +226 |
- #' @usage ## Preferred method is x[i, j] <- value+ #' @keywords internal |
||
185 | +227 |
- #' x[[i]][[j]] <- value+ #' @noRd |
||
186 | +228 |
- #'+ extract_occurrence <- function(calls_pd) { |
||
187 | -+ | |||
229 | +45x |
- #' @section Functions:+ is_in_function <- function(x) { |
||
188 | +230 |
- #' - `x[[i]][[j]] <- value`: It is equivalent as `x[i, j] <- value`.+ # If an object is a function parameter, |
||
189 | +231 |
- #'+ # then in calls_pd there is a `SYMBOL_FORMALS` entry for that object. |
||
190 | -+ | |||
232 | +147x |
- #' @export+ function_id <- x[x$token == "FUNCTION", "parent"] |
||
191 | -+ | |||
233 | +147x |
- #' @examples+ if (length(function_id)) { |
||
192 | -+ | |||
234 | +9x |
- #'+ x$id %in% get_children(x, function_id)$id |
||
193 | +235 |
- #' # Setting via x[[i]] <- value ---+ } else { |
||
194 | -+ | |||
236 | +138x |
- #'+ rep(FALSE, nrow(x)) |
||
195 | +237 |
- #' jk <- join_keys()+ } |
||
196 | +238 |
- #' jk[["ds6"]][["ds6"]] <- "pk6"+ } |
||
197 | -+ | |||
239 | +45x |
- #' jk[["ds7"]] <- list(ds7 = "pk7", ds6 = c(pk7 = "pk6"))+ in_parenthesis <- function(x) { |
||
198 | -+ | |||
240 | +126x |
- #' jk[["ds7"]][["ds7"]] <- NULL # removes key+ if (any(x$token %in% c("LBB", "'['"))) { |
||
199 | -+ | |||
241 | +5x |
- #'+ id_start <- min(x$id[x$token %in% c("LBB", "'['")]) |
||
200 | -+ | |||
242 | +5x |
- #' jk+ id_end <- min(x$id[x$token == "']'"]) |
||
201 | -+ | |||
243 | +5x |
- #'+ x$text[x$token == "SYMBOL" & x$id > id_start & x$id < id_end] |
||
202 | +244 |
- #' @noRd+ } |
||
203 | +245 |
- `[[<-.join_keys` <- function(x, i, value) {+ } |
||
204 | -382x | +246 | +45x |
- checkmate::assert(+ lapply( |
205 | -382x | +247 | +45x |
- combine = "or",+ calls_pd, |
206 | -382x | +248 | +45x |
- checkmate::check_string(i),+ function(call_pd) { |
207 | -382x | +|||
249 | +
- checkmate::check_number(i),+ # Handle data(object)/data("object")/data(object, envir = ) independently. |
|||
208 | -382x | -
- checkmate::check_logical(i, len = length(x))- |
- ||
209 | -+ | 250 | +161x |
- )+ data_call <- find_call(call_pd, "data") |
210 | -382x | +251 | +161x |
- checkmate::assert_list(value, names = "named", types = "character", null.ok = TRUE)+ if (data_call) { |
211 | -375x | +252 | +3x |
- if (checkmate::test_numeric(i) || checkmate::test_logical(i)) {+ sym <- call_pd[data_call + 1, "text"] |
212 | -1x | +253 | +3x |
- i <- names(x)[[i]]+ return(c(gsub("^['\"]|['\"]$", "", sym), "<-")) |
213 | +254 |
- }+ } |
||
214 | +255 | - - | -||
215 | -- |
- # Normalize values+ # Handle assign(x = ). |
||
216 | -375x | +256 | +158x |
- norm_value <- lapply(seq_along(value), function(.x) {+ assign_call <- find_call(call_pd, "assign") |
217 | -524x | +257 | +158x |
- join_key(i, names(value)[.x], value[[.x]])+ if (assign_call) { |
218 | +258 |
- })- |
- ||
219 | -375x | -
- names(norm_value) <- names(value)+ # Check if parameters were named. |
||
220 | +259 |
-
+ # "','" is for unnamed parameters, where "SYMBOL_SUB" is for named. |
||
221 | +260 |
- # Check if multiple modifications don't have a conflict- |
- ||
222 | -375x | -
- repeated_value_ix <- names(value) %in% names(value)[duplicated(names(value))]- |
- ||
223 | -375x | -
- repeated <- norm_value[repeated_value_ix]+ # "EQ_SUB" is for `=` appearing after the name of the named parameter. |
||
224 | -375x | +261 | +11x |
- vapply(+ if (any(call_pd$token == "SYMBOL_SUB")) { |
225 | -375x | +262 | +8x |
- seq_along(repeated),+ params <- call_pd[call_pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"] |
226 | -375x | +|||
263 | +
- function(.ix, .x_value = repeated[[.ix]], .x_name = names(.x_value[[1]])) {+ # Remove sequence of "=", ",". |
|||
227 | -3x | +264 | +8x |
- assert_compatible_keys2(+ if (length(params > 1)) { |
228 | -3x | +265 | +8x |
- .x_value,+ remove <- integer(0) |
229 | -3x | +266 | +8x |
- unlist(unname(+ for (i in 2:length(params)) { |
230 | -3x | +267 | +36x |
- repeated[-.ix][names(repeated[-.ix]) == .x_name]+ if (params[i - 1] == "=" & params[i] == ",") { |
231 | -3x | +268 | +8x |
- ), recursive = FALSE)+ remove <- c(remove, i - 1, i) |
232 | +269 |
- )+ } |
||
233 | +270 |
- },+ } |
||
234 | -375x | +271 | +7x |
- logical(1)+ if (length(remove)) params <- params[-remove] |
235 | +272 |
- )+ } |
||
236 | -+ | |||
273 | +8x |
-
+ pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0)) |
||
237 | -374x | +274 | +8x |
- norm_value <- lapply(norm_value, function(x) x[[1]][[1]])+ if (!pos) { |
238 | -374x | +|||
275 | +! |
- names(norm_value) <- names(value)+ return(character(0L)) |
||
239 | +276 |
-
+ } |
||
240 | +277 |
- # Safe to do as duplicated are the same- |
- ||
241 | -374x | -
- norm_value[duplicated(names(norm_value))] <- NULL+ # pos is indicator of the place of 'x' |
||
242 | +278 |
-
+ # 1. All parameters are named, but none is 'x' - return(character(0L)) |
||
243 | +279 |
- # Keep only elements with length > 0L- |
- ||
244 | -374x | -
- norm_value <- Filter(length, norm_value)+ # 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ",")) |
||
245 | +280 |
-
+ # - check "x" in params being just a vector of named parameters. |
||
246 | +281 |
- # Remove classes to use list-based get/assign operations+ # 3. Some parameters are named, 'x' is not in named parameters |
||
247 | -374x | +|||
282 | +
- new_x <- unclass(x)+ # - check first appearance of "," (unnamed parameter) in vector parameters. |
|||
248 | +283 |
-
+ } else { |
||
249 | +284 |
- # In case a pair is removed, also remove the symmetric pair and update parents+ # Object is the first entry after 'assign'. |
||
250 | -374x | +285 | +3x |
- removed_names <- setdiff(names(new_x[[i]]), names(norm_value))+ pos <- 1 |
251 | -374x | +|||
286 | +
- for (.x in removed_names) {+ } |
|||
252 | -2x | +287 | +11x |
- if (identical(parent(x, .x), i)) attr(new_x, "parents")[[.x]] <- NULL+ sym <- call_pd[assign_call + pos, "text"] |
253 | -1x | +288 | +11x |
- if (identical(parent(x, i), .x)) attr(new_x, "parents")[[i]] <- NULL+ return(c(gsub("^['\"]|['\"]$", "", sym), "<-")) |
254 | +289 |
-
+ } |
||
255 | -5x | +|||
290 | +
- new_x[[.x]][[i]] <- NULL+ |
|||
256 | +291 |
- }+ # What occurs in a function body is not tracked. |
||
257 | -+ | |||
292 | +147x |
-
+ x <- call_pd[!is_in_function(call_pd), ] |
||
258 | -374x | +293 | +147x |
- new_x[[i]] <- norm_value+ sym_cond <- which(x$token %in% c("SYMBOL", "SYMBOL_FUNCTION_CALL")) |
259 | +294 | |||
260 | -+ | |||
295 | +147x |
- # Iterate on all new values to create symmetrical pair+ if (length(sym_cond) == 0) { |
||
261 | -374x | +296 | +2x |
- for (ds2 in names(norm_value)) {+ return(character(0L)) |
262 | -298x | +|||
297 | +
- if (ds2 == i) next+ } |
|||
263 | +298 |
-
+ # Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not.+ |
+ ||
299 | ++ |
+ # For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2. |
||
264 | -222x | +300 | +145x |
- keep_value <- if (is.null(x)) list() else new_x[[ds2]]+ dollar_ids <- x[x$token %in% c("'$'", "'@'"), "id"] |
265 | -+ | |||
301 | +145x |
-
+ if (length(dollar_ids)) { |
||
266 | -+ | |||
302 | +12x |
- # Invert key+ object_ids <- x[sym_cond, "id"] |
||
267 | -222x | +303 | +12x |
- new_value <- stats::setNames(names(norm_value[[ds2]]), norm_value[[ds2]])+ after_dollar <- object_ids[(object_ids - 2) %in% dollar_ids] |
268 | -222x | +304 | +12x |
- keep_value[[i]] <- new_value+ sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar)) |
269 | +305 |
-
+ } |
||
270 | +306 |
- # Assign symmetrical+ |
||
271 | -222x | +307 | +145x |
- new_x[[ds2]] <- keep_value+ ass_cond <- grep("ASSIGN", x$token)+ |
+
308 | +145x | +
+ if (!length(ass_cond)) {+ |
+ ||
309 | +19x | +
+ return(c("<-", unique(x[sym_cond, "text"]))) |
||
272 | +310 |
- }+ } |
||
273 | +311 | |||
274 | -374x | +312 | +126x |
- preserve_attr <- attributes(new_x)[!names(attributes(new_x)) %in% "names"]+ sym_cond <- sym_cond[sym_cond > ass_cond] # NOTE 1 |
275 | +313 |
- # Remove NULL or empty keys+ # If there was an assignment operation detect direction of it. |
||
276 | -374x | +314 | +126x |
- new_x <- Filter(function(x) length(x) != 0L, new_x)+ if (unique(x$text[ass_cond]) == "->") { # NOTE 2 |
277 | -374x | +315 | +1x |
- attributes(new_x) <- utils::modifyList(attributes(new_x), preserve_attr)+ sym_cond <- rev(sym_cond) |
278 | +316 |
-
+ } |
||
279 | +317 |
- #+ |
||
280 | -+ | |||
318 | +126x |
- # restore class+ after <- match(min(x$id[ass_cond]), sort(x$id[c(min(ass_cond), sym_cond)])) - 1 |
||
281 | -374x | +319 | +126x |
- class(new_x) <- class(x)+ ans <- append(x[sym_cond, "text"], "<-", after = max(1, after)) |
282 | -374x | +320 | +126x |
- new_x+ roll <- in_parenthesis(call_pd)+ |
+
321 | +126x | +
+ if (length(roll)) {+ |
+ ||
322 | +2x | +
+ c(setdiff(ans, roll), roll) |
||
283 | +323 |
- }+ } else {+ |
+ ||
324 | +124x | +
+ ans |
1 | +325 |
- # get_code_dependency ----+ } |
||
2 | +326 | |||
3 | +327 |
- #' Get code dependency of an object+ ### NOTE 2: What if there are 2 assignments: e.g. a <- b -> c. |
||
4 | +328 |
- #'+ ### NOTE 1: For cases like 'eval(expression(b <- b + 2))' removes 'eval(expression('. |
||
5 | +329 |
- #' Extract subset of code required to reproduce specific object(s), including code producing side-effects.+ } |
||
6 | +330 |
- #'+ ) |
||
7 | +331 |
- #' Given a character vector with code, this function will extract the part of the code responsible for creating+ } |
||
8 | +332 |
- #' the variables specified by `names`.+ |
||
9 | +333 |
- #' This includes the final call that creates the variable(s) in question as well as all _parent calls_,+ #' Extract side effects |
||
10 | +334 |
- #' _i.e._ calls that create variables used in the final call and their parents, etc.+ #' |
||
11 | +335 |
- #' Also included are calls that create side-effects like establishing connections.+ #' Extracts all object names from the code that are marked with `@linksto` tag. |
||
12 | +336 |
#' |
||
13 | +337 |
- #' It is assumed that object dependency is established by using three assignment operators: `<-`, `=`, and `->` .+ #' The code may contain functions calls that create side effects, e.g. modify the environment. |
||
14 | +338 |
- #' Other assignment methods (`assign`, `<<-`) or non-standard-evaluation methods are not supported.+ #' Static code analysis may be insufficient to determine which objects are created or modified by such a function call. |
||
15 | +339 |
- #'+ #' The `@linksto` comment tag is introduced to mark a call as having a (side) effect on one or more objects. |
||
16 | +340 |
- #' Side-effects are not detected automatically and must be marked in the code.+ #' With this tag a complete object dependency structure can be established. |
||
17 | +341 |
- #' Add `# @linksto object` at the end of a line where a side-effect occurs to specify that this line is required+ #' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function. |
||
18 | +342 |
- #' to reproduce a variable called `object`.+ #' |
||
19 | +343 |
- #'+ #' @param calls_pd `list` of `data.frame`s; |
||
20 | +344 |
- #' @param code `character` with the code.+ #' result of `utils::getParseData()` split into subsets representing individual calls; |
||
21 | +345 |
- #' @param names `character` vector of object names.+ #' created by `extract_calls()` function |
||
22 | +346 |
- #' @param check_names `logical(1)` flag specifying if a warning for non-existing names should be displayed.+ #' |
||
23 | +347 |
- #'+ #' @return |
||
24 | +348 |
- #' @return Character vector, a subset of `code`.+ #' A list of length equal to that of `calls_pd`, where each element is a character vector of names of objects |
||
25 | +349 |
- #' Note that subsetting is actually done on the calls `code`, not necessarily on the elements of the vector.+ #' depending a call tagged with `@linksto` in a corresponding element of `calls_pd`. |
||
26 | +350 |
#' |
||
27 | +351 |
#' @keywords internal |
||
28 | +352 |
- get_code_dependency <- function(code, names, check_names = TRUE) {+ #' @noRd |
||
29 | -45x | +|||
353 | +
- checkmate::assert_character(code)+ extract_side_effects <- function(calls_pd) { |
|||
30 | +354 | 45x |
- checkmate::assert_character(names, any.missing = FALSE)+ lapply( |
|
31 | -+ | |||
355 | +45x |
-
+ calls_pd, |
||
32 | +356 | 45x |
- if (identical(code, character(0)) || identical(trimws(code), "")) {+ function(x) { |
|
33 | -2x | +357 | +161x |
- return(code)+ linksto <- grep("@linksto", x[x$token == "COMMENT", "text"], value = TRUE) |
34 | -+ | |||
358 | +161x |
- }+ unlist(strsplit(sub("\\s*#\\s*@linksto\\s+", "", linksto), "\\s+")) |
||
35 | +359 | - - | -||
36 | -43x | -
- code <- parse(text = code, keep.source = TRUE)+ } |
||
37 | -43x | +|||
360 | +
- pd <- utils::getParseData(code)+ ) |
|||
38 | -43x | +|||
361 | +
- calls_pd <- extract_calls(pd)+ } |
|||
39 | +362 | |||
40 | -43x | -
- if (check_names) {- |
- ||
41 | +363 |
- # Detect if names are actually in code.+ # graph_parser ---- |
||
42 | -43x | +|||
364 | +
- symbols <- unlist(lapply(calls_pd, function(call) call[call$token == "SYMBOL", "text"]))+ |
|||
43 | -43x | +|||
365 | +
- if (any(pd$text == "assign")) {+ #' Return the indices of calls needed to reproduce an object |
|||
44 | -4x | +|||
366 | +
- assign_calls <- Filter(function(call) find_call(call, "assign"), calls_pd)+ #' |
|||
45 | -4x | +|||
367 | +
- ass_str <- unlist(lapply(assign_calls, function(call) call[call$token == "STR_CONST", "text"]))+ #' @param x The name of the object to return code for. |
|||
46 | -4x | +|||
368 | +
- ass_str <- gsub("^['\"]|['\"]$", "", ass_str)+ #' @param graph A result of `code_graph()`. |
|||
47 | -4x | +|||
369 | +
- symbols <- c(ass_str, symbols)+ #' |
|||
48 | +370 |
- }+ #' @return |
||
49 | -43x | +|||
371 | +
- if (!all(names %in% unique(symbols))) {+ #' Integer vector of indices that can be applied to `graph` to obtain all calls required to reproduce object `x`. |
|||
50 | -1x | +|||
372 | +
- warning("Object(s) not found in code: ", toString(setdiff(names, symbols)))+ #' |
|||
51 | +373 |
- }+ #' @keywords internal |
||
52 | +374 |
- }+ #' @noRd |
||
53 | +375 |
-
+ graph_parser <- function(x, graph) { |
||
54 | -43x | +376 | +190x |
- graph <- code_graph(calls_pd)+ occurrence <- vapply( |
55 | -43x | +377 | +190x |
- ind <- unlist(lapply(names, function(x) graph_parser(x, graph)))+ graph, function(call) { |
56 | -+ | |||
378 | +556x |
-
+ ind <- match("<-", call, nomatch = length(call) + 1L) |
||
57 | -43x | +379 | +556x |
- lib_ind <- detect_libraries(calls_pd)+ x %in% call[seq_len(ind - 1L)] |
58 | +380 |
-
+ }, |
||
59 | -43x | +381 | +190x |
- as.character(code[unique(c(lib_ind, ind))])+ logical(1) |
60 | +382 |
- }+ ) |
||
61 | +383 | |||
62 | -+ | |||
384 | +190x |
- #' Locate function call token+ dependencies <- lapply(graph[occurrence], function(call) { |
||
63 | -+ | |||
385 | +103x |
- #'+ ind <- match("<-", call, nomatch = 0L) |
||
64 | -+ | |||
386 | +103x |
- #' Determine which row of parsed data is specific `SYMBOL_FUNCTION_CALL` token.+ call[(ind + 1L):length(call)] |
||
65 | +387 |
- #'+ }) |
||
66 | -+ | |||
388 | +190x |
- #' Useful for determining occurrence of `assign` or `data` functions in an input call.+ dependencies <- setdiff(unlist(dependencies), x) |
||
67 | +389 |
- #'+ |
||
68 | -+ | |||
390 | +190x |
- #' @param call_pd `data.frame` as returned by `extract_calls()`+ if (length(dependencies) && any(occurrence)) { |
||
69 | -+ | |||
391 | +73x |
- #' @param text `character(1)` to look for in `text` column of `call_pd`+ dependency_ids <- lapply(dependencies, function(dependency) { |
||
70 | -+ | |||
392 | +144x |
- #'+ graph_parser(dependency, graph[1:max(which(occurrence))]) |
||
71 | +393 |
- #' @return+ }) |
||
72 | -+ | |||
394 | +73x |
- #' Single integer specifying row in `call_pd` where `token` is `SYMBOL_FUNCTION_CALL` and `text` is `text`.+ sort(unique(c(which(occurrence), unlist(dependency_ids)))) |
||
73 | +395 |
- #' 0 if not found.+ } else { |
||
74 | -+ | |||
396 | +117x |
- #'+ which(occurrence) |
||
75 | +397 |
- #' @keywords internal+ } |
||
76 | +398 |
- #' @noRd+ } |
||
77 | +399 |
- find_call <- function(call_pd, text) {- |
- ||
78 | -334x | -
- checkmate::check_data_frame(call_pd)- |
- ||
79 | -334x | -
- checkmate::check_names(call_pd, must.include = c("token", "text"))- |
- ||
80 | -334x | -
- checkmate::check_string(text)+ |
||
81 | +400 | |||
82 | -334x | -
- ans <- which(call_pd$token == "SYMBOL_FUNCTION_CALL" & call_pd$text == text)- |
- ||
83 | -334x | -
- if (length(ans)) {- |
- ||
84 | -24x | -
- ans- |
- ||
85 | +401 |
- } else {+ # default_side_effects -------------------------------------------------------------------------------------------- |
||
86 | -310x | +|||
402 | +
- 0L+ |
|||
87 | +403 |
- }+ #' Detect library calls |
||
88 | +404 |
- }+ #' |
||
89 | +405 |
-
+ #' Detects `library()` and `require()` function calls. |
||
90 | +406 |
- #' Split the result of `utils::getParseData()` into separate calls+ #' |
||
91 | +407 |
- #'+ #' @param calls_pd `list` of `data.frame`s; |
||
92 | +408 |
- #' @param pd (`data.frame`) A result of `utils::getParseData()`.+ #' result of `utils::getParseData()` split into subsets representing individual calls; |
||
93 | +409 |
- #'+ #' created by `extract_calls()` function |
||
94 | +410 |
- #' @return+ #' |
||
95 | +411 |
- #' A `list` of `data.frame`s.+ #' @return |
||
96 | +412 |
- #' Each element is a subset of `pd` corresponding to one call in the original code from which `pd` was obtained.+ #' Integer vector of indices that can be applied to `graph` (result of `code_graph()`) to obtain all calls containing |
||
97 | +413 |
- #' Only four columns (`"token"`, `"text"`, `"id"`, `"parent"`) are kept, the rest is discarded.+ #' `library()` or `require()` calls that are always returned for reproducibility. |
||
98 | +414 |
#' |
||
99 | +415 |
#' @keywords internal |
||
100 | +416 |
#' @noRd |
||
101 | +417 |
- extract_calls <- function(pd) {+ detect_libraries <- function(calls_pd) { |
||
102 | -43x | +418 | +45x |
- calls <- lapply(+ defaults <- c("library", "require")+ |
+
419 | ++ | + | ||
103 | -43x | +420 | +45x |
- pd[pd$parent == 0, "id"],+ which( |
104 | -43x | +421 | +45x |
- function(parent) {+ vapply( |
105 | -157x | +422 | +45x |
- rbind(+ calls_pd, |
106 | -157x | +423 | +45x |
- pd[pd$id == parent, c("token", "text", "id", "parent")],+ function(call) { |
107 | -157x | +424 | +161x |
- get_children(pd = pd, parent = parent)+ any(call$token == "SYMBOL_FUNCTION_CALL" & call$text %in% defaults) |
108 | +425 |
- )+ }, |
||
109 | -+ | |||
426 | +45x |
- }+ logical(1) |
||
110 | +427 |
- )+ ) |
||
111 | -43x | +|||
428 | +
- calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls)+ ) |
|||
112 | -43x | +|||
429 | +
- calls <- Filter(Negate(is.null), calls)+ } |
|||
113 | -43x | +
1 | +
- calls <- fix_shifted_comments(calls)+ #' Check Compatibility of keys |
|||
114 | -43x | +|||
2 | +
- fix_arrows(calls)+ #' |
|||
115 | +3 |
- }+ #' Helper function to assert if two key sets contain incompatible keys. |
||
116 | +4 |
-
+ #' |
||
117 | +5 |
- #' @keywords internal+ #' @return Returns `TRUE` if successful, otherwise raises error. |
||
118 | +6 |
- #' @noRd+ #' @keywords internal |
||
119 | +7 |
- get_children <- function(pd, parent) {+ assert_compatible_keys <- function(join_key_1, join_key_2) { |
||
120 | -1845x | +8 | +3x |
- idx_children <- abs(pd$parent) == parent+ stop_message <- function(dataset_1, dataset_2) { |
121 | -1845x | +9 | +1x |
- children <- pd[idx_children, c("token", "text", "id", "parent")]+ stop( |
122 | -1845x | +10 | +1x |
- if (nrow(children) == 0) {+ paste("cannot specify multiple different join keys between datasets:", dataset_1, "and", dataset_2) |
123 | -1076x | -
- return(NULL)+ | ||
11 | ++ |
+ ) |
||
124 | +12 |
} |
||
125 | +13 | |||
126 | -769x | +14 | +3x |
- if (parent > 0) {+ dataset_1_one <- names(join_key_1) |
127 | -769x | -
- do.call(rbind, c(list(children), lapply(children$id, get_children, pd = pd)))- |
- ||
128 | -+ | 15 | +3x |
- }+ dataset_2_one <- names(join_key_1[[1]]) |
129 | -+ | |||
16 | +3x |
- }+ keys_one <- join_key_1[[1]][[1]] |
||
130 | +17 | |||
131 | -+ | |||
18 | +3x |
- #' Fixes edge case of comments being shifted to the next call.+ dataset_1_two <- names(join_key_2) |
||
132 | -+ | |||
19 | +3x |
- #' @keywords internal+ dataset_2_two <- names(join_key_2[[1]]) |
||
133 | -+ | |||
20 | +3x |
- #' @noRd+ keys_two <- join_key_2[[1]][[1]] |
||
134 | +21 |
- fix_shifted_comments <- function(calls) {+ |
||
135 | +22 |
- # If the first or the second token is a @linksto COMMENT,+ # if first datasets and the second datasets match and keys |
||
136 | +23 |
- # then it belongs to the previous call.- |
- ||
137 | -43x | -
- if (length(calls) >= 2) {+ # must contain the same named elements |
||
138 | -41x | +24 | +3x |
- for (i in 2:length(calls)) {+ if (dataset_1_one == dataset_1_two && dataset_2_one == dataset_2_two) { |
139 | -113x | +25 | +3x |
- comment_idx <- grep("@linksto", calls[[i]][, "text"])+ if (!identical(sort(keys_one), sort(keys_two))) { |
140 | -113x | +26 | +1x |
- if (isTRUE(comment_idx[1] <= 2)) {+ stop_message(dataset_1_one, dataset_2_one) |
141 | -4x | +|||
27 | +
- calls[[i - 1]] <- rbind(+ } |
|||
142 | -4x | +|||
28 | +
- calls[[i - 1]],+ } |
|||
143 | -4x | +|||
29 | +
- calls[[i]][seq_len(comment_idx[1]), ]+ |
|||
144 | +30 |
- )+ # if first dataset of join_key_1 matches second dataset of join_key_2 |
||
145 | -4x | +|||
31 | +
- calls[[i]] <- calls[[i]][-seq_len(comment_idx[1]), ]+ # and the first dataset of join_key_2 must match second dataset of join_key_1 |
|||
146 | +32 |
- }+ # and keys must contain the same elements but with names and values swapped |
||
147 | -+ | |||
33 | +2x |
- }+ if (dataset_1_one == dataset_2_two && dataset_2_one == dataset_1_two) { |
||
148 | +34 |
- }+ if ( |
||
149 | -43x | +|||
35 | +! |
- Filter(nrow, calls)+ xor(length(keys_one) == 0, length(keys_two) == 0) || |
||
150 | -+ | |||
36 | +! |
- }+ !identical(sort(keys_one), sort(stats::setNames(names(keys_two), keys_two))) |
||
151 | +37 |
-
+ ) { |
||
152 | -+ | |||
38 | +! |
- #' Fixes edge case of `<-` assignment operator being called as function,+ stop_message(dataset_1_one, dataset_2_one) |
||
153 | +39 |
- #' which is \code{`<-`(y,x)} instead of traditional `y <- x`.+ } |
||
154 | +40 |
- #' @keywords internal+ } |
||
155 | +41 |
- #' @noRd+ |
||
156 | +42 |
- fix_arrows <- function(calls) {+ # otherwise they are compatible |
||
157 | -43x | +43 | +2x |
- checkmate::assert_list(calls)+ return(TRUE) |
158 | -43x | +|||
44 | +
- lapply(calls, function(call) {+ } |
|||
159 | -155x | +|||
45 | +
- sym_fun <- call$token == "SYMBOL_FUNCTION_CALL"+ |
|||
160 | -155x | +|||
46 | +
- call[sym_fun, ] <- sub_arrows(call[sym_fun, ])+ #' Validate parent-child key |
|||
161 | -155x | +|||
47 | +
- call+ #' |
|||
162 | +48 |
- })+ #' Helper function checks the parent-child relations are valid. |
||
163 | +49 |
- }+ #' |
||
164 | +50 |
-
+ #' @param x (`join_keys`) object to assert validity of relations |
||
165 | +51 |
- #' Execution of assignment operator substitutions for a call.+ #' |
||
166 | +52 |
- #' @keywords internal+ #' @return `join_keys` invisibly |
||
167 | +53 |
- #' @noRd+ #' |
||
168 | +54 |
- sub_arrows <- function(call) {+ #' @keywords internal |
||
169 | -155x | -
- checkmate::assert_data_frame(call)+ | ||
55 | ++ |
+ assert_parent_child <- function(x) { |
||
170 | -155x | +56 | +431x |
- map <- data.frame(+ jk <- join_keys(x) |
171 | -155x | +57 | +431x |
- row.names = c("`<-`", "`<<-`", "`=`"),+ jk_parents <- parents(jk) |
172 | -155x | +|||
58 | +
- token = rep("LEFT_ASSIGN", 3),+ |
|||
173 | -155x | +59 | +431x |
- text = rep("<-", 3)+ checkmate::assert_class(jk, c("join_keys", "list")) |
174 | +60 |
- )+ |
||
175 | -155x | +61 | +431x |
- sub_ids <- call$text %in% rownames(map)+ if (!is.null(jk_parents)) { |
176 | -155x | +62 | +431x |
- call[sub_ids, c("token", "text")] <- map[call$text[sub_ids], ]+ for (idx1 in seq_along(jk_parents)) { |
177 | -155x | +63 | +170x |
- call+ name_from <- names(jk_parents)[[idx1]] |
178 | -+ | |||
64 | +170x |
- }+ for (idx2 in seq_along(jk_parents[[idx1]])) { |
||
179 | -+ | |||
65 | +170x |
-
+ name_to <- jk_parents[[idx1]][[idx2]] |
||
180 | -+ | |||
66 | +170x |
- # code_graph ----+ keys_from <- jk[[name_from]][[name_to]] |
||
181 | -+ | |||
67 | +170x |
-
+ keys_to <- jk[[name_to]][[name_from]] |
||
182 | -+ | |||
68 | +170x |
- #' Create object dependencies graph within parsed code+ if (length(keys_from) == 0 && length(keys_to) == 0) { |
||
183 | -+ | |||
69 | +1x |
- #'+ stop(sprintf("No join keys from %s to its parent (%s) and vice versa", name_from, name_to)) |
||
184 | +70 |
- #' Builds dependency graph that identifies dependencies between objects in parsed code.+ } |
||
185 | +71 |
- #' Helps understand which objects depend on which.+ } |
||
186 | +72 |
- #'+ } |
||
187 | +73 |
- #' @param calls_pd `list` of `data.frame`s;+ } |
||
188 | -+ | |||
74 | +430x |
- #' result of `utils::getParseData()` split into subsets representing individual calls;+ invisible(x) |
||
189 | +75 |
- #' created by `extract_calls()` function+ } |
||
190 | +76 |
- #'+ |
||
191 | +77 |
- #' @return+ #' Verify key set compatibility |
||
192 | +78 |
- #' A list (of length of input `calls_pd`) where each element represents one call.+ #' |
||
193 | +79 |
- #' Each element is a character vector listing names of objects that depend on this call+ #' Helper function to ensuring compatibility between two sets of keys |
||
194 | +80 |
- #' and names of objects that this call depends on.+ #' |
||
195 | +81 |
- #' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`+ #' @return Returns `TRUE` if successful, otherwise raises error. |
||
196 | +82 |
- #' depends on objects `b` and `c`.+ #' @keywords internal |
||
197 | +83 |
- #' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.+ assert_compatible_keys2 <- function(x, y) { |
||
198 | +84 |
- #'+ # Helper to flatten join_keys / join_key_set |
||
199 | -+ | |||
85 | +3x |
- #' @keywords internal+ flatten_join_key_sets <- function(value) { |
||
200 | -+ | |||
86 | +6x |
- #' @noRd+ value <- unclass(value) |
||
201 | -+ | |||
87 | +6x |
- code_graph <- function(calls_pd) {+ Reduce( |
||
202 | -43x | +88 | +6x |
- cooccurrence <- extract_occurrence(calls_pd)+ init = list(), |
203 | -+ | |||
89 | +6x |
-
+ f = function(u, v, ...) { |
||
204 | -43x | +90 | +6x |
- side_effects <- extract_side_effects(calls_pd)+ el <- value[v][[1]] |
205 | -+ | |||
91 | +6x |
-
+ res <- lapply(seq_along(el), function(ix) el[ix]) |
||
206 | -43x | +92 | +6x |
- mapply(function(x, y) unique(c(x, y)), side_effects, cooccurrence, SIMPLIFY = FALSE)+ names(res) <- rep(v, length(res)) |
207 | -+ | |||
93 | +6x |
- }+ append(u, res) |
||
208 | +94 |
-
+ }, |
||
209 | -+ | |||
95 | +6x |
- #' Extract object occurrence+ x = names(value) |
||
210 | +96 |
- #'+ ) |
||
211 | +97 |
- #' Extracts objects occurrence within calls passed by `calls_pd`.+ } |
||
212 | +98 |
- #' Also detects which objects depend on which within a call.+ |
||
213 | -+ | |||
99 | +3x |
- #'+ x <- flatten_join_key_sets(x) |
||
214 | -+ | |||
100 | +3x |
- #' @param calls_pd `list` of `data.frame`s;+ y <- flatten_join_key_sets(y) |
||
215 | +101 |
- #' result of `utils::getParseData()` split into subsets representing individual calls;+ |
||
216 | -+ | |||
102 | +3x |
- #' created by `extract_calls()` function+ for (idx_1 in seq_along(x)) { |
||
217 | -+ | |||
103 | +3x |
- #'+ for (idx_2 in seq_along(y)) { |
||
218 | -+ | |||
104 | +3x |
- #' @return+ assert_compatible_keys(x[idx_1], y[idx_2]) |
||
219 | +105 |
- #' A list (of length of input `calls_pd`) where each element represents one call.+ } |
||
220 | +106 |
- #' Each element is a character vector listing names of objects that depend on this call+ } |
||
221 | -+ | |||
107 | +2x |
- #' and names of objects that this call depends on.+ TRUE |
||
222 | +108 |
- #' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`+ } |
||
223 | +109 |
- #' depends on objects `b` and `c`.+ |
||
224 | +110 |
- #' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.+ #' Updates the keys of the datasets based on the parents |
||
225 | +111 |
#' |
||
226 | +112 |
- #' @keywords internal+ #' @param x (`join_keys`) object to update the keys. |
||
227 | +113 |
- #' @noRd+ #' |
||
228 | +114 |
- extract_occurrence <- function(calls_pd) {- |
- ||
229 | -43x | -
- is_in_function <- function(x) {+ #' @return (`self`) invisibly for chaining |
||
230 | +115 |
- # If an object is a function parameter,+ #' |
||
231 | +116 |
- # then in calls_pd there is a `SYMBOL_FORMALS` entry for that object.- |
- ||
232 | -142x | -
- function_id <- x[x$token == "FUNCTION", "parent"]+ #' @keywords internal |
||
233 | -142x | +|||
117 | +
- if (length(function_id)) {+ update_keys_given_parents <- function(x) { |
|||
234 | -9x | +118 | +12x |
- x$id %in% get_children(x, function_id)$id+ jk <- join_keys(x) |
235 | +119 |
- } else {+ |
||
236 | -133x | +120 | +12x |
- rep(FALSE, nrow(x))+ checkmate::assert_class(jk, "join_keys", .var.name = checkmate::vname(x)) |
237 | +121 |
- }+ |
||
238 | -+ | |||
122 | +12x |
- }+ datanames <- names(jk) |
||
239 | -43x | +123 | +12x |
- lapply(+ for (d1_ix in seq_along(datanames)) { |
240 | -43x | +124 | +34x |
- calls_pd,+ d1 <- datanames[[d1_ix]] |
241 | -43x | +125 | +34x |
- function(call_pd) {+ d1_parent <- parent(jk, d1) |
242 | -+ | |||
126 | +34x |
- # Handle data(object)/data("object")/data(object, envir = ) independently.+ for (d2 in datanames[-1 * seq.int(d1_ix)]) { |
||
243 | -155x | +127 | +38x |
- data_call <- find_call(call_pd, "data")+ if (length(jk[[d1]][[d2]]) == 0) { |
244 | -155x | +128 | +16x |
- if (data_call) {+ d2_parent <- parent(jk, d2) |
245 | -2x | +|||
129 | +
- sym <- call_pd[data_call + 1, "text"]+ |
|||
246 | -2x | +130 | +12x |
- return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))+ if (!identical(d1_parent, d2_parent) || length(d1_parent) == 0) next |
247 | +131 |
- }+ |
||
248 | +132 |
- # Handle assign(x = ).+ # both has the same parent -> common keys to parent |
||
249 | -153x | +133 | +4x |
- assign_call <- find_call(call_pd, "assign")+ keys_d1_parent <- sort(jk[[d1]][[d1_parent]]) |
250 | -153x | +134 | +4x |
- if (assign_call) {+ keys_d2_parent <- sort(jk[[d2]][[d2_parent]]) |
251 | +135 |
- # Check if parameters were named.+ |
||
252 | -+ | |||
136 | +4x |
- # "','" is for unnamed parameters, where "SYMBOL_SUB" is for named.+ common_ix_1 <- unname(keys_d1_parent) %in% unname(keys_d2_parent)+ |
+ ||
137 | +4x | +
+ common_ix_2 <- unname(keys_d2_parent) %in% unname(keys_d1_parent) |
||
253 | +138 |
- # "EQ_SUB" is for `=` appearing after the name of the named parameter.+ |
||
254 | -11x | +|||
139 | +
- if (any(call_pd$token == "SYMBOL_SUB")) {+ # No common keys between datasets - leave empty |
|||
255 | -8x | +140 | +1x |
- params <- call_pd[call_pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]+ if (all(!common_ix_1)) next |
256 | +141 |
- # Remove sequence of "=", ",".+ |
||
257 | -8x | +142 | +3x |
- if (length(params > 1)) {+ fk <- structure( |
258 | -8x | +143 | +3x |
- remove <- integer(0)+ names(keys_d2_parent)[common_ix_2], |
259 | -8x | +144 | +3x |
- for (i in 2:length(params)) {+ names = names(keys_d1_parent)[common_ix_1] |
260 | -36x | +|||
145 | +
- if (params[i - 1] == "=" & params[i] == ",") {+ ) |
|||
261 | -8x | +146 | +3x |
- remove <- c(remove, i - 1, i)+ jk[[d1]][[d2]] <- fk # mutate join key |
262 | +147 |
- }+ } |
||
263 | +148 |
- }+ } |
||
264 | -7x | +|||
149 | +
- if (length(remove)) params <- params[-remove]+ } |
|||
265 | +150 |
- }+ # check parent child relation |
||
266 | -8x | +151 | +12x |
- pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0))+ assert_parent_child(x = jk)+ |
+
152 | ++ | + | ||
267 | -8x | +153 | +12x |
- if (!pos) {+ jk |
268 | -! | +|||
154 | +
- return(character(0L))+ } |
269 | +1 |
- }+ #' @rdname join_keys |
|||
270 | +2 |
- # pos is indicator of the place of 'x'+ #' @order 2 |
|||
271 | +3 |
- # 1. All parameters are named, but none is 'x' - return(character(0L))+ #' |
|||
272 | +4 |
- # 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ","))+ #' @section Functions: |
|||
273 | +5 |
- # - check "x" in params being just a vector of named parameters.+ #' - `x[datanames]`: Returns a subset of the `join_keys` object for |
|||
274 | +6 |
- # 3. Some parameters are named, 'x' is not in named parameters+ #' given `datanames`, including parent `datanames` and symmetric mirror keys between |
|||
275 | +7 |
- # - check first appearance of "," (unnamed parameter) in vector parameters.+ #' `datanames` in the result. |
|||
276 | +8 |
- } else {+ #' - `x[i, j]`: Returns join keys between datasets `i` and `j`, |
|||
277 | +9 |
- # Object is the first entry after 'assign'.+ #' including implicit keys inferred from their relationship with a parent. |
|||
278 | -3x | +||||
10 | +
- pos <- 1+ #' |
||||
279 | +11 |
- }+ #' @param i,j indices specifying elements to extract or replace. Index should be a |
|||
280 | -11x | +||||
12 | +
- sym <- call_pd[assign_call + pos, "text"]+ #' a character vector, but it can also take numeric, logical, `NULL` or missing. |
||||
281 | -11x | +||||
13 | +
- return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))+ #' |
||||
282 | +14 |
- }+ #' @export |
|||
283 | +15 |
-
+ #' |
|||
284 | +16 |
- # What occurs in a function body is not tracked.+ #' @examples |
|||
285 | -142x | +||||
17 | +
- x <- call_pd[!is_in_function(call_pd), ]+ #' |
||||
286 | -142x | +||||
18 | +
- sym_cond <- which(x$token %in% c("SYMBOL", "SYMBOL_FUNCTION_CALL"))+ #' # Getter for join_keys --- |
||||
287 | +19 |
-
+ #' |
|||
288 | -142x | +||||
20 | +
- if (length(sym_cond) == 0) {+ #' jk["ds1", "ds2"] |
||||
289 | -2x | +||||
21 | +
- return(character(0L))+ #' |
||||
290 | +22 |
- }+ #' # Subsetting join_keys ---- |
|||
291 | +23 |
- # Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not.+ #' |
|||
292 | +24 |
- # For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2.+ #' jk["ds1"] |
|||
293 | -140x | +||||
25 | +
- dollar_ids <- x[x$token %in% c("'$'", "'@'"), "id"]+ #' jk[1:2] |
||||
294 | -140x | +||||
26 | +
- if (length(dollar_ids)) {+ #' jk[c("ds1", "ds2")] |
||||
295 | -12x | +||||
27 | +
- object_ids <- x[sym_cond, "id"]+ #' |
||||
296 | -12x | +||||
28 | +
- after_dollar <- object_ids[(object_ids - 2) %in% dollar_ids]+ `[.join_keys` <- function(x, i, j) { |
||||
297 | -12x | +29 | +31x |
- sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar))+ if (missing(i) && missing(j)) { |
|
298 | +30 |
- }+ # because: |
|||
299 | +31 |
-
+ # - list(a = 1)[] returns list(a = 1) |
|||
300 | -140x | +||||
32 | +
- ass_cond <- grep("ASSIGN", x$token)+ # - data.frame(a = 1)[] returns data.frame(a = 1) |
||||
301 | -140x | +33 | +1x |
- if (!length(ass_cond)) {+ return(x) |
|
302 | -19x | -
- return(c("<-", unique(x[sym_cond, "text"])))+ | 34 | +30x | +
+ } else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) { |
303 | +35 |
- }+ # because list(a = 1)[NULL] returns NULL |
|||
304 | +36 |
-
+ # data.frame(a = 1)[NULL, NULL] returns data.frame( |
|||
305 | -121x | +37 | +2x |
- sym_cond <- sym_cond[sym_cond > ass_cond] # NOTE 1+ return(join_keys())+ |
+ |
38 | +28x | +
+ } else if (!missing(i) && !missing(j)) { |
|||
306 | +39 |
- # If there was an assignment operation detect direction of it.+ if ( |
|||
307 | -121x | +40 | +8x |
- if (unique(x$text[ass_cond]) == "->") { # NOTE 2+ !any( |
|
308 | -1x | +41 | +8x |
- sym_cond <- rev(sym_cond)+ checkmate::test_string(i), |
|
309 | -+ | ||||
42 | +8x |
- }+ checkmate::test_number(i),+ |
+ |||
43 | +8x | +
+ checkmate::test_logical(i, len = length(x)) && sum(j) == 1 |
|||
310 | +44 |
-
+ ) || |
|||
311 | -121x | +45 | +8x |
- append(unique(x[sym_cond, "text"]), "<-", after = 1)+ !any( |
|
312 | -+ | ||||
46 | +8x |
-
+ checkmate::test_string(j), |
|||
313 | -+ | ||||
47 | +8x |
- ### NOTE 2: What if there are 2 assignments: e.g. a <- b -> c.+ checkmate::test_number(j), |
|||
314 | -+ | ||||
48 | +8x |
- ### NOTE 1: For cases like 'eval(expression(b <- b + 2))' removes 'eval(expression('.+ checkmate::test_logical(j, len = length(x)) && sum(j) == 1 |
|||
315 | +49 |
- }+ ) |
|||
316 | +50 |
- )+ ) { |
|||
317 | -+ | ||||
51 | +1x |
- }+ stop( |
|||
318 | -+ | ||||
52 | +1x |
-
+ "join_keys[i, j] - Can't extract keys for multiple pairs.", |
|||
319 | -+ | ||||
53 | +1x |
- #' Extract side effects+ "When specifying a pair [i, j], both indices must point to a single key pair.\n", |
|||
320 | -+ | ||||
54 | +1x |
- #'+ call. = FALSE |
|||
321 | +55 |
- #' Extracts all object names from the code that are marked with `@linksto` tag.+ ) |
|||
322 | +56 |
- #'+ } |
|||
323 | -+ | ||||
57 | +1x |
- #' The code may contain functions calls that create side effects, e.g. modify the environment.+ if (is.numeric(i)) i <- names(x)[i] |
|||
324 | -+ | ||||
58 | +1x |
- #' Static code analysis may be insufficient to determine which objects are created or modified by such a function call.+ if (is.numeric(j)) j <- names(x)[j] |
|||
325 | +59 |
- #' The `@linksto` comment tag is introduced to mark a call as having a (side) effect on one or more objects.+ |
|||
326 | -+ | ||||
60 | +7x |
- #' With this tag a complete object dependency structure can be established.+ subset_x <- update_keys_given_parents(x[union(i, j)]) |
|||
327 | -+ | ||||
61 | +7x |
- #' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function.+ return(subset_x[[i]][[j]]) |
|||
328 | -+ | ||||
62 | +20x |
- #'+ } else if (!missing(j)) { |
|||
329 | +63 |
- #' @param calls_pd `list` of `data.frame`s;+ # ie. select all keys which have j as dataset_2 |
|||
330 | +64 |
- #' result of `utils::getParseData()` split into subsets representing individual calls;+ # since list is symmetrical it is equivalent to selecting by i |
|||
331 | -+ | ||||
65 | +1x |
- #' created by `extract_calls()` function+ i <- j |
|||
332 | +66 |
- #'+ } |
|||
333 | +67 |
- #' @return+ |
|||
334 | -+ | ||||
68 | +20x |
- #' A list of length equal to that of `calls_pd`, where each element is a character vector of names of objects+ checkmate::assert( |
|||
335 | -+ | ||||
69 | +20x |
- #' depending a call tagged with `@linksto` in a corresponding element of `calls_pd`.+ combine = "or",+ |
+ |||
70 | +20x | +
+ checkmate::check_character(i),+ |
+ |||
71 | +20x | +
+ checkmate::check_numeric(i),+ |
+ |||
72 | +20x | +
+ checkmate::check_logical(i) |
|||
336 | +73 |
- #'+ ) |
|||
337 | +74 |
- #' @keywords internal+ |
|||
338 | +75 |
- #' @noRd+ |
|||
339 | +76 |
- extract_side_effects <- function(calls_pd) {+ # Convert integer/logical index to named index |
|||
340 | -43x | -
- lapply(- |
- |||
341 | -43x | -
- calls_pd,- |
- |||
342 | -43x | +77 | +20x |
- function(x) {+ if (checkmate::test_numeric(i) || checkmate::test_logical(i)) { |
|
343 | -155x | +78 | +2x |
- linksto <- grep("@linksto", x[x$token == "COMMENT", "text"], value = TRUE)+ i <- names(x)[i] |
|
344 | -155x | +||||
79 | +
- unlist(strsplit(sub("\\s*#\\s*@linksto\\s+", "", linksto), "\\s+"))+ } |
||||
345 | +80 |
- }+ |
|||
346 | +81 |
- )+ # When retrieving a relationship pair, it will also return the symmetric key |
|||
347 | -+ | ||||
82 | +20x |
- }+ new_jk <- new_join_keys() |
|||
348 | -+ | ||||
83 | +20x |
-
+ queue <- unique(i) |
|||
349 | -+ | ||||
84 | +20x |
- # graph_parser ----+ bin <- character(0) |
|||
350 | +85 | ||||
351 | +86 |
- #' Return the indices of calls needed to reproduce an object+ # Need to iterate on a mutating queue if subset of a dataset will also |
|||
352 | +87 |
- #'+ # select its parent as that parent might have relationships with others |
|||
353 | +88 |
- #' @param x The name of the object to return code for.+ # already selected. |
|||
354 | -+ | ||||
89 | +20x |
- #' @param graph A result of `code_graph()`.+ while (length(queue) > 0) { |
|||
355 | -+ | ||||
90 | +45x |
- #'+ ix <- queue[1] |
|||
356 | -+ | ||||
91 | +45x |
- #' @return+ queue <- queue[-1] |
|||
357 | -+ | ||||
92 | +45x |
- #' Integer vector of indices that can be applied to `graph` to obtain all calls required to reproduce object `x`.+ bin <- c(bin, ix) |
|||
358 | +93 |
- #'+ |
|||
359 | -+ | ||||
94 | +45x |
- #' @keywords internal+ ix_parent <- parent(x, ix) |
|||
360 | +95 |
- #' @noRd+ |
|||
361 | -+ | ||||
96 | +45x |
- graph_parser <- function(x, graph) {+ if (checkmate::test_string(ix_parent, min.chars = 1) && !ix_parent %in% c(queue, bin)) { |
|||
362 | -184x | +97 | +10x |
- occurrence <- vapply(+ queue <- c(queue, ix_parent) |
|
363 | -184x | +||||
98 | +
- graph, function(call) {+ } |
||||
364 | -554x | +||||
99 | +
- ind <- match("<-", call, nomatch = length(call) + 1L)+ |
||||
365 | -554x | +100 | +45x |
- x %in% call[seq_len(ind - 1L)]+ ix_valid_names <- names(x[[ix]]) %in% c(queue, bin) |
|
366 | +101 |
- },+ |
|||
367 | -184x | +102 | +45x |
- logical(1)+ new_jk[[ix]] <- x[[ix]][ix_valid_names] |
|
368 | +103 |
- )+ |
|||
369 | +104 | - - | -|||
370 | -184x | -
- dependencies <- lapply(graph[occurrence], function(call) {+ # Add primary key of parent |
|||
371 | -97x | +105 | +45x |
- ind <- match("<-", call, nomatch = 0L)+ if (length(ix_parent) > 0) { |
|
372 | -97x | +106 | +16x |
- call[(ind + 1L):length(call)]+ new_jk[[ix_parent]][[ix_parent]] <- x[[ix_parent]][[ix_parent]] |
|
373 | +107 |
- })+ } |
|||
374 | -184x | +||||
108 | +
- dependencies <- setdiff(unlist(dependencies), x)+ } |
||||
375 | +109 | ||||
376 | -184x | -
- if (length(dependencies) && any(occurrence)) {- |
- |||
377 | -69x | +110 | +20x |
- dependency_ids <- lapply(dependencies, function(dependency) {+ common_parents_ix <- names(parents(x)) %in% names(new_jk) & |
|
378 | -140x | +111 | +20x |
- graph_parser(dependency, graph[1:max(which(occurrence))])+ parents(x) %in% names(new_jk) |
|
379 | +112 |
- })+ |
|||
380 | -69x | +113 | +9x |
- sort(unique(c(which(occurrence), unlist(dependency_ids))))+ if (any(common_parents_ix)) parents(new_jk) <- parents(x)[common_parents_ix] |
|
381 | +114 |
- } else {+ |
|||
382 | -115x | +115 | +20x |
- which(occurrence)+ new_jk |
|
383 | +116 |
- }+ } |
|||
384 | +117 |
- }+ |
|||
385 | +118 |
-
+ #' @rdname join_keys |
|||
386 | +119 |
-
+ #' @order 2 |
|||
387 | +120 |
- # default_side_effects --------------------------------------------------------------------------------------------+ #' |
|||
388 | +121 |
-
+ #' @param directed (`logical(1)`) Flag that indicates whether it should create |
|||
389 | +122 |
- #' Detect library calls+ #' a parent-child relationship between the datasets. |
|||
390 | +123 |
- #'+ #' - `TRUE` (default) `dataset_1` is the parent of `dataset_2`; |
|||
391 | +124 |
- #' Detects `library()` and `require()` function calls.+ #' - `FALSE` when the relationship is undirected. |
|||
392 | +125 |
- #'+ #' @section Functions: |
|||
393 | +126 |
- #' @param calls_pd `list` of `data.frame`s;+ #' - `x[i, j] <- value`: Assignment of a key to pair `(i, j)`. |
|||
394 | +127 |
- #' result of `utils::getParseData()` split into subsets representing individual calls;+ #' - `x[i] <- value`: This (without `j` parameter) **is not** a supported |
|||
395 | +128 |
- #' created by `extract_calls()` function+ #' operation for `join_keys`. |
|||
396 | +129 |
- #'+ #' - `join_keys(x)[i, j] <- value`: Assignment to `join_keys` object stored in `x`, |
|||
397 | +130 |
- #' @return+ #' such as a `teal_data` object or `join_keys` object itself. |
|||
398 | +131 |
- #' Integer vector of indices that can be applied to `graph` (result of `code_graph()`) to obtain all calls containing+ #' |
|||
399 | +132 |
- #' `library()` or `require()` calls that are always returned for reproducibility.+ #' @export |
|||
400 | +133 |
- #'+ #' @examples |
|||
401 | +134 |
- #' @keywords internal+ #' |
|||
402 | +135 |
- #' @noRd+ #' # Setting a new primary key --- |
|||
403 | +136 |
- detect_libraries <- function(calls_pd) {- |
- |||
404 | -43x | -
- defaults <- c("library", "require")+ #' |
|||
405 | +137 | - - | -|||
406 | -43x | -
- which(- |
- |||
407 | -43x | -
- vapply(- |
- |||
408 | -43x | -
- calls_pd,- |
- |||
409 | -43x | -
- function(call) {- |
- |||
410 | -155x | -
- any(call$token == "SYMBOL_FUNCTION_CALL" & call$text %in% defaults)+ #' jk["ds4", "ds4"] <- "pk4" |
|||
411 | +138 |
- },- |
- |||
412 | -43x | -
- logical(1)+ #' jk["ds5", "ds5"] <- "pk5" |
|||
413 | +139 |
- )+ #' |
|||
414 | +140 |
- )+ #' # Setting a single relationship pair --- |
|||
415 | +141 |
- }+ #' |
1 | +142 |
- #' Verify code reproducibility+ #' jk["ds1", "ds4"] <- c("pk1" = "pk4") |
||
2 | +143 |
#' |
||
3 | +144 |
- #' Checks whether code in `teal_data` object reproduces the stored objects.+ #' # Removing a key --- |
||
4 | +145 |
#' |
||
5 | +146 |
- #' If objects created by code in the `@code` slot of `x` are `all_equal` to the contents of the `@env` slot,+ #' jk["ds5", "ds5"] <- NULL |
||
6 | +147 |
- #' the function updates the `@verified` slot to `TRUE` in the returned `teal_data` object.+ `[<-.join_keys` <- function(x, i, j, directed = TRUE, value) { |
||
7 | -+ | |||
148 | +11x |
- #' Once verified, the slot will always be set to `TRUE`.+ checkmate::assert_flag(directed) |
||
8 | -+ | |||
149 | +11x |
- #' If the `@code` fails to recreate objects in `teal_data@env`, an error is raised.+ if (missing(i) || missing(j)) { |
||
9 | -+ | |||
150 | +4x |
- #'+ stop("join_keys[i, j] specify both indices to set a key pair.") |
||
10 | -+ | |||
151 | +7x |
- #' @return Input `teal_data` object or error.+ } else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) { |
||
11 | -+ | |||
152 | +2x |
- #'+ stop("join_keys[i, j] neither i nor j can be NULL.") |
||
12 | +153 |
- #' @param x `teal_data` object+ } else if ( |
||
13 | -+ | |||
154 | +5x |
- #' @examples+ !any( |
||
14 | -+ | |||
155 | +5x |
- #' tdata1 <- teal_data()+ checkmate::test_string(i), |
||
15 | -+ | |||
156 | +5x |
- #' tdata1 <- within(tdata1, {+ checkmate::test_number(i), |
||
16 | -+ | |||
157 | +5x |
- #' a <- 1+ checkmate::test_logical(i, len = length(x)) && sum(j) == 1 |
||
17 | +158 |
- #' b <- a^5+ ) || |
||
18 | -+ | |||
159 | +5x |
- #' c <- list(x = 2)+ !any( |
||
19 | -+ | |||
160 | +5x |
- #' })+ checkmate::test_string(j), |
||
20 | -+ | |||
161 | +5x |
- #' verify(tdata1)+ checkmate::test_number(j),+ |
+ ||
162 | +5x | +
+ checkmate::test_logical(j, len = length(x)) && sum(j) == 1 |
||
21 | +163 |
- #'+ ) |
||
22 | +164 |
- #' tdata2 <- teal_data(x1 = iris, code = "x1 <- iris")+ ) {+ |
+ ||
165 | +2x | +
+ stop(+ |
+ ||
166 | +2x | +
+ "join_keys[i, j] <- Can't set keys to specified indices.\n",+ |
+ ||
167 | +2x | +
+ "When setting pair [i, j], both indices must point to a single key pair.\n",+ |
+ ||
168 | +2x | +
+ call. = FALSE |
||
23 | +169 |
- #' verify(tdata2)+ ) |
||
24 | +170 |
- #' verify(tdata2)@verified+ } |
||
25 | +171 |
- #' tdata2@verified+ |
||
26 | +172 |
- #'+ # Handle join key removal separately+ |
+ ||
173 | +3x | +
+ if (is.null(value)) {+ |
+ ||
174 | +1x | +
+ x[[i]][[j]] <- NULL+ |
+ ||
175 | +1x | +
+ return(x) |
||
27 | +176 |
- #' tdata3 <- teal_data()+ } |
||
28 | +177 |
- #' tdata3 <- within(tdata3, {+ + |
+ ||
178 | +2x | +
+ c(x, join_key(i, j, value, directed)) |
||
29 | +179 |
- #' stop("error")+ } |
||
30 | +180 |
- #' })+ |
||
31 | +181 |
- #' try(verify(tdata3)) # fails+ #' @rdname join_keys |
||
32 | +182 |
#' |
||
33 | +183 |
- #'+ #' @order 1000 |
||
34 | +184 |
- #' a <- 1+ #' @usage ## Preferred method is x[i, j] <- value |
||
35 | +185 |
- #' b <- a + 2+ #' x[[i]][[j]] <- value |
||
36 | +186 |
- #' c <- list(x = 2)+ #' |
||
37 | +187 |
- #' d <- 5+ #' @section Functions: |
||
38 | +188 |
- #' tdata4 <- teal_data(+ #' - `x[[i]][[j]] <- value`: It is equivalent as `x[i, j] <- value`. |
||
39 | +189 |
- #' a = a, b = b, c = c, d = d,+ #' |
||
40 | +190 |
- #' code = "a <- 1+ #' @export |
||
41 | +191 |
- #' b <- a+ #' @examples |
||
42 | +192 |
- #' c <- list(x = 2)+ #' |
||
43 | +193 |
- #' e <- 1"+ #' # Setting via x[[i]] <- value --- |
||
44 | +194 |
- #' )+ #' |
||
45 | +195 |
- #' tdata4+ #' jk <- join_keys() |
||
46 | +196 |
- #' try(verify(tdata4)) # fails+ #' jk[["ds6"]][["ds6"]] <- "pk6" |
||
47 | +197 |
- #'+ #' jk[["ds7"]] <- list(ds7 = "pk7", ds6 = c(pk7 = "pk6")) |
||
48 | +198 |
- #' @name verify+ #' jk[["ds7"]][["ds7"]] <- NULL # removes key |
||
49 | +199 |
- #' @rdname verify+ #' |
||
50 | +200 |
- #' @aliases verify,teal_data-method+ #' jk |
||
51 | +201 |
- #' @aliases verify,qenv.error-method+ #' |
||
52 | +202 |
- #'+ #' @noRd |
||
53 | +203 |
- #' @export+ `[[<-.join_keys` <- function(x, i, value) { |
||
54 | -5x | +204 | +382x |
- setGeneric("verify", function(x) standardGeneric("verify"))+ checkmate::assert(+ |
+
205 | +382x | +
+ combine = "or",+ |
+ ||
206 | +382x | +
+ checkmate::check_string(i),+ |
+ ||
207 | +382x | +
+ checkmate::check_number(i),+ |
+ ||
208 | +382x | +
+ checkmate::check_logical(i, len = length(x)) |
||
55 | +209 |
- setMethod("verify", signature = "teal_data", definition = function(x) {+ ) |
||
56 | -4x | +210 | +382x |
- if (x@verified) {+ checkmate::assert_list(value, names = "named", types = "character", null.ok = TRUE) |
57 | -2x | +211 | +375x |
- return(x)+ if (checkmate::test_numeric(i) || checkmate::test_logical(i)) {+ |
+
212 | +1x | +
+ i <- names(x)[[i]] |
||
58 | +213 |
} |
||
214 | ++ | + + | +||
215 | ++ |
+ # Normalize values+ |
+ ||
59 | -2x | +216 | +375x |
- x_name <- deparse(substitute(x))+ norm_value <- lapply(seq_along(value), function(.x) { |
60 | -2x | +217 | +524x |
- y <- eval_code(teal_data(), get_code(x))+ join_key(i, names(value)[.x], value[[.x]]) |
61 | +218 | ++ |
+ })+ |
+ |
219 | +375x | +
+ names(norm_value) <- names(value)+ |
+ ||
220 | ||||
221 | ++ |
+ # Check if multiple modifications don't have a conflict+ |
+ ||
62 | -2x | +222 | +375x |
- if (inherits(y, "qenv.error")) {+ repeated_value_ix <- names(value) %in% names(value)[duplicated(names(value))] |
63 | -! | +|||
223 | +375x |
- stop(conditionMessage(y), call. = FALSE)+ repeated <- norm_value[repeated_value_ix]+ |
+ ||
224 | +375x | +
+ vapply(+ |
+ ||
225 | +375x | +
+ seq_along(repeated),+ |
+ ||
226 | +375x | +
+ function(.ix, .x_value = repeated[[.ix]], .x_name = names(.x_value[[1]])) {+ |
+ ||
227 | +3x | +
+ assert_compatible_keys2(+ |
+ ||
228 | +3x | +
+ .x_value,+ |
+ ||
229 | +3x | +
+ unlist(unname(+ |
+ ||
230 | +3x | +
+ repeated[-.ix][names(repeated[-.ix]) == .x_name]+ |
+ ||
231 | +3x | +
+ ), recursive = FALSE) |
||
64 | +232 |
- }+ ) |
||
65 | +233 |
-
+ }, |
||
66 | -2x | +234 | +375x |
- reproduced <- isTRUE(all.equal(x@env, y@env))+ logical(1)+ |
+
235 | ++ |
+ )+ |
+ ||
236 | ++ | + | ||
67 | -2x | +237 | +374x |
- if (reproduced) {+ norm_value <- lapply(norm_value, function(x) x[[1]][[1]]) |
68 | -1x | +238 | +374x |
- x@verified <- TRUE+ names(norm_value) <- names(value)+ |
+
239 | ++ | + + | +||
240 | ++ |
+ # Safe to do as duplicated are the same |
||
69 | -1x | +241 | +374x |
- methods::validObject(x)+ norm_value[duplicated(names(norm_value))] <- NULL+ |
+
242 | ++ | + + | +||
243 | ++ |
+ # Keep only elements with length > 0L |
||
70 | -1x | +244 | +374x |
- x+ norm_value <- Filter(length, norm_value) |
71 | +245 | ++ | + + | +|
246 |
- } else {+ # Remove classes to use list-based get/assign operations |
|||
72 | -1x | +247 | +374x |
- error <- "Code verification failed."+ new_x <- unclass(x) |
73 | +248 | |||
249 | ++ |
+ # In case a pair is removed, also remove the symmetric pair and update parents+ |
+ ||
74 | -1x | +250 | +374x |
- objects_diff <- vapply(+ removed_names <- setdiff(names(new_x[[i]]), names(norm_value)) |
75 | -1x | +251 | +374x |
- intersect(names(x@env), names(y@env)),+ for (.x in removed_names) { |
76 | -1x | +252 | +2x |
- function(element) {+ if (identical(parent(x, .x), i)) attr(new_x, "parents")[[.x]] <- NULL |
77 | +253 | 1x |
- isTRUE(all.equal(x@env[[element]], y@env[[element]]))+ if (identical(parent(x, i), .x)) attr(new_x, "parents")[[i]] <- NULL |
|
78 | +254 |
- },+ |
||
79 | -1x | +255 | +5x |
- logical(1)+ new_x[[.x]][[i]] <- NULL |
80 | +256 |
- )+ } |
||
81 | +257 | |||
82 | -1x | -
- names_diff_other <- setdiff(names(y@env), names(x@env))- |
- ||
83 | -1x | +258 | +374x |
- names_diff_inenv <- setdiff(names(x@env), names(y@env))+ new_x[[i]] <- norm_value |
84 | +259 | |||
85 | -1x | +|||
260 | +
- if (length(objects_diff)) {+ # Iterate on all new values to create symmetrical pair |
|||
86 | -1x | +261 | +374x |
- error <- c(+ for (ds2 in names(norm_value)) { |
87 | -1x | +262 | +298x |
- error,+ if (ds2 == i) next |
88 | -1x | +|||
263 | +
- paste0("Object(s) recreated with code that have different structure in ", x_name, ":"),+ |
|||
89 | -1x | +264 | +222x |
- paste0(" \u2022 ", names(which(!objects_diff)))+ keep_value <- if (is.null(x)) list() else new_x[[ds2]] |
90 | +265 |
- )+ |
||
91 | +266 |
- }+ # Invert key |
||
92 | -1x | -
- if (length(names_diff_inenv)) {- |
- ||
93 | -! | -
- error <- c(- |
- ||
94 | -! | -
- error,- |
- ||
95 | -! | +267 | +222x |
- paste0("Object(s) not created with code that exist in ", x_name, ":"),+ new_value <- stats::setNames(names(norm_value[[ds2]]), norm_value[[ds2]]) |
96 | -! | +|||
268 | +222x |
- paste0(" \u2022 ", names_diff_inenv)+ keep_value[[i]] <- new_value |
||
97 | +269 |
- )+ |
||
98 | +270 |
- }+ # Assign symmetrical |
||
99 | -1x | +271 | +222x |
- if (length(names_diff_other)) {+ new_x[[ds2]] <- keep_value |
100 | -! | +|||
272 | +
- error <- c(+ } |
|||
101 | -! | +|||
273 | +
- error,+ |
|||
102 | -! | +|||
274 | +374x |
- paste0("Object(s) created with code that do not exist in ", x_name, ":"),+ preserve_attr <- attributes(new_x)[!names(attributes(new_x)) %in% "names"] |
||
103 | -! | +|||
275 | +
- paste0(" \u2022 ", names_diff_other)+ # Remove NULL or empty keys |
|||
104 | -+ | |||
276 | +374x |
- )+ new_x <- Filter(function(x) length(x) != 0L, new_x) |
||
105 | -+ | |||
277 | +374x |
- }+ attributes(new_x) <- utils::modifyList(attributes(new_x), preserve_attr) |
||
106 | +278 | |||
107 | -1x | -
- stop(paste(error, collapse = "\n"), call. = FALSE)- |
- ||
108 | +279 |
- }+ # |
||
109 | +280 |
- })+ # restore class |
||
110 | -+ | |||
281 | +374x |
- setMethod("verify", signature = "qenv.error", definition = function(x) {+ class(new_x) <- class(x) |
||
111 | -1x | +282 | +374x |
- stop(conditionMessage(x), call. = FALSE)+ new_x |
112 | +283 |
- })+ } |
||
59 | -787x | +789x |
if (missing(...)) { |
|
60 | -225x | +227x |
return(new_join_keys()) |
@@ -13132,28 +13230,28 @@ |
156 | -355x | +357x |
structure( |
|
157 | -355x | +357x |
list(), |
|
158 | -355x | +357x |
class = c("join_keys", "list"), |
|
159 | -355x | +357x |
"parents" = list() |
@@ -14613,14 +14711,14 @@ |
32 | -77x | +79x |
data_objects <- rlang::list2(...) |
|
33 | -77x | +79x |
if (inherits(join_keys, "join_key_set")) { |
@@ -14641,7 +14739,7 @@ |
36 | -77x | +79x |
if (!missing(check)) { |
@@ -14718,28 +14816,28 @@ |
47 | -77x | +79x |
checkmate::test_list( |
|
48 | -77x | +79x |
data_objects, |
|
49 | -77x | +79x |
types = c("TealDataConnector", "TealDataset", "TealDatasetConnector"), |
|
50 | -77x | +79x |
min.len = 1 |
@@ -14816,7 +14914,7 @@ |
61 | -77x | +79x |
if (length(data_objects) > 0 && !checkmate::test_names(names(data_objects), type = "named")) { |
@@ -14837,28 +14935,28 @@ |
64 | -77x | +79x |
new_teal_data( |
|
65 | -77x | +79x |
data = data_objects, |
|
66 | -77x | +79x |
code = code, |
|
67 | -77x | +79x |
join_keys = join_keys |
@@ -16129,14 +16227,14 @@ |
105 | -49x | +51x |
checkmate::assert_character(datanames, min.len = 1L, null.ok = TRUE) |
|
106 | -49x | +51x |
checkmate::assert_flag(deparse) |
@@ -16150,14 +16248,14 @@ |
108 | -49x | +51x |
code <- if (!is.null(datanames)) { |
|
109 | -45x | +47x |
get_code_dependency(object@code, datanames) |
@@ -16192,7 +16290,7 @@ |
114 | -49x | +51x |
if (!object@verified) { |
@@ -16220,14 +16318,14 @@ |
118 | -49x | +51x |
if (deparse) { |
|
119 | -48x | +50x |
if (length(code) == 0) { |
@@ -16248,7 +16346,7 @@ |
122 | -47x | +49x |
paste(code, collapse = "\n") |
@@ -16737,35 +16835,35 @@ |
64 | -77x | +79x |
checkmate::assert_list(data) |
|
65 | -77x | +79x |
checkmate::assert_class(join_keys, "join_keys") |
|
66 | -46x | +48x |
if (is.null(datanames)) datanames <- character(0) # todo: allow to specify |
|
67 | -77x | +79x |
checkmate::assert_character(datanames) |
|
68 | -77x | +79x |
if (!any(is.language(code), is.character(code))) { |
@@ -16793,7 +16891,7 @@ |
72 | -77x | +79x |
if (is.language(code)) { |
@@ -16814,7 +16912,7 @@ |
75 | -77x | +79x |
if (length(code)) { |
@@ -16835,7 +16933,7 @@ |
78 | -77x | +79x |
verified <- (length(code) == 0L && length(data) == 0L) |
@@ -16849,7 +16947,7 @@ |
80 | -77x | +79x |
id <- sample.int(.Machine$integer.max, size = length(code)) |
@@ -16863,14 +16961,14 @@ |
82 | -77x | +79x |
new_env <- rlang::env_clone(list2env(data), parent = parent.env(.GlobalEnv)) |
|
83 | -77x | +79x |
lockEnvironment(new_env, bindings = TRUE) |
@@ -16884,70 +16982,70 @@ |
85 | -77x | +79x |
methods::new( |
|
86 | -77x | +79x |
"teal_data", |
|
87 | -77x | +79x |
env = new_env, |
|
88 | -77x | +79x |
code = code, |
|
89 | -77x | +79x |
warnings = rep("", length(code)), |
|
90 | -77x | +79x |
messages = rep("", length(code)), |
|
91 | -77x | +79x |
id = id, |
|
92 | -77x | +79x |
join_keys = join_keys, |
|
93 | -77x | +79x |
datanames = datanames, |
|
94 | -77x | +79x |
verified = verified |