diff --git a/release-candidate/coverage-report/index.html b/release-candidate/coverage-report/index.html new file mode 100644 index 000000000..3a80df44b --- /dev/null +++ b/release-candidate/coverage-report/index.html @@ -0,0 +1,17821 @@ + + +
+ + + + + + + + + + + + + + + + + + + + + + + +1 | ++ |
+ # get_code_dependency ----+ |
+
2 | ++ | + + | +
3 | ++ |
+ #' Get code dependency of an object+ |
+
4 | ++ |
+ #'+ |
+
5 | ++ |
+ #' Extract subset of code required to reproduce specific object(s), including code producing side-effects.+ |
+
6 | ++ |
+ #'+ |
+
7 | ++ |
+ #' Given a character vector with code, this function will extract the part of the code responsible for creating+ |
+
8 | ++ |
+ #' the variables specified by `names`.+ |
+
9 | ++ |
+ #' This includes the final call that creates the variable(s) in question as well as all _parent calls_,+ |
+
10 | ++ |
+ #' _i.e._ calls that create variables used in the final call and their parents, etc.+ |
+
11 | ++ |
+ #' Also included are calls that create side-effects like establishing connections.+ |
+
12 | ++ |
+ #'+ |
+
13 | ++ |
+ #' It is assumed that object dependency is established by using three assignment operators: `<-`, `=`, and `->` .+ |
+
14 | ++ |
+ #' Other assignment methods (`assign`, `<<-`) or non-standard-evaluation methods are not supported.+ |
+
15 | ++ |
+ #'+ |
+
16 | ++ |
+ #' Side-effects are not detected automatically and must be marked in the code.+ |
+
17 | ++ |
+ #' Add `# @linksto object` at the end of a line where a side-effect occurs to specify that this line is required+ |
+
18 | ++ |
+ #' to reproduce a variable called `object`.+ |
+
19 | ++ |
+ #'+ |
+
20 | ++ |
+ #' @param code `character` with the code.+ |
+
21 | ++ |
+ #' @param names `character` vector of object names.+ |
+
22 | ++ |
+ #' @param check_names `logical(1)` flag specifying if a warning for non-existing names should be displayed.+ |
+
23 | ++ |
+ #'+ |
+
24 | ++ |
+ #' @return Character vector, a subset of `code`.+ |
+
25 | ++ |
+ #' Note that subsetting is actually done on the calls `code`, not necessarily on the elements of the vector.+ |
+
26 | ++ |
+ #'+ |
+
27 | ++ |
+ #' @keywords internal+ |
+
28 | ++ |
+ get_code_dependency <- function(code, names, check_names = TRUE) {+ |
+
29 | +49x | +
+ checkmate::assert_character(code)+ |
+
30 | +49x | +
+ checkmate::assert_character(names, any.missing = FALSE)+ |
+
31 | ++ | + + | +
32 | +49x | +
+ if (identical(code, character(0)) || identical(trimws(code), "")) {+ |
+
33 | +2x | +
+ return(code)+ |
+
34 | ++ |
+ }+ |
+
35 | ++ | + + | +
36 | +47x | +
+ code <- parse(text = code, keep.source = TRUE)+ |
+
37 | +47x | +
+ pd <- utils::getParseData(code)+ |
+
38 | +47x | +
+ calls_pd <- extract_calls(pd)+ |
+
39 | ++ | + + | +
40 | +47x | +
+ if (check_names) {+ |
+
41 | ++ |
+ # Detect if names are actually in code.+ |
+
42 | +47x | +
+ symbols <- unlist(lapply(calls_pd, function(call) call[call$token == "SYMBOL", "text"]))+ |
+
43 | +47x | +
+ if (any(pd$text == "assign")) {+ |
+
44 | +4x | +
+ assign_calls <- Filter(function(call) find_call(call, "assign"), calls_pd)+ |
+
45 | +4x | +
+ ass_str <- unlist(lapply(assign_calls, function(call) call[call$token == "STR_CONST", "text"]))+ |
+
46 | +4x | +
+ ass_str <- gsub("^['\"]|['\"]$", "", ass_str)+ |
+
47 | +4x | +
+ symbols <- c(ass_str, symbols)+ |
+
48 | ++ |
+ }+ |
+
49 | +47x | +
+ if (!all(names %in% unique(symbols))) {+ |
+
50 | +1x | +
+ warning("Object(s) not found in code: ", toString(setdiff(names, symbols)))+ |
+
51 | ++ |
+ }+ |
+
52 | ++ |
+ }+ |
+
53 | ++ | + + | +
54 | +47x | +
+ graph <- code_graph(calls_pd)+ |
+
55 | +47x | +
+ ind <- unlist(lapply(names, function(x) graph_parser(x, graph)))+ |
+
56 | ++ | + + | +
57 | +47x | +
+ lib_ind <- detect_libraries(calls_pd)+ |
+
58 | ++ | + + | +
59 | +47x | +
+ as.character(code[sort(unique(c(lib_ind, ind)))])+ |
+
60 | ++ |
+ }+ |
+
61 | ++ | + + | +
62 | ++ |
+ #' Locate function call token+ |
+
63 | ++ |
+ #'+ |
+
64 | ++ |
+ #' Determine which row of parsed data is specific `SYMBOL_FUNCTION_CALL` token.+ |
+
65 | ++ |
+ #'+ |
+
66 | ++ |
+ #' Useful for determining occurrence of `assign` or `data` functions in an input call.+ |
+
67 | ++ |
+ #'+ |
+
68 | ++ |
+ #' @param call_pd `data.frame` as returned by `extract_calls()`+ |
+
69 | ++ |
+ #' @param text `character(1)` to look for in `text` column of `call_pd`+ |
+
70 | ++ |
+ #'+ |
+
71 | ++ |
+ #' @return+ |
+
72 | ++ |
+ #' Single integer specifying row in `call_pd` where `token` is `SYMBOL_FUNCTION_CALL` and `text` is `text`.+ |
+
73 | ++ |
+ #' 0 if not found.+ |
+
74 | ++ |
+ #'+ |
+
75 | ++ |
+ #' @keywords internal+ |
+
76 | ++ |
+ #' @noRd+ |
+
77 | ++ |
+ find_call <- function(call_pd, text) {+ |
+
78 | +353x | +
+ checkmate::check_data_frame(call_pd)+ |
+
79 | +353x | +
+ checkmate::check_names(call_pd, must.include = c("token", "text"))+ |
+
80 | +353x | +
+ checkmate::check_string(text)+ |
+
81 | ++ | + + | +
82 | +353x | +
+ ans <- which(call_pd$token == "SYMBOL_FUNCTION_CALL" & call_pd$text == text)+ |
+
83 | +353x | +
+ if (length(ans)) {+ |
+
84 | +25x | +
+ ans+ |
+
85 | ++ |
+ } else {+ |
+
86 | +328x | +
+ 0L+ |
+
87 | ++ |
+ }+ |
+
88 | ++ |
+ }+ |
+
89 | ++ | + + | +
90 | ++ |
+ #' Split the result of `utils::getParseData()` into separate calls+ |
+
91 | ++ |
+ #'+ |
+
92 | ++ |
+ #' @param pd (`data.frame`) A result of `utils::getParseData()`.+ |
+
93 | ++ |
+ #'+ |
+
94 | ++ |
+ #' @return+ |
+
95 | ++ |
+ #' A `list` of `data.frame`s.+ |
+
96 | ++ |
+ #' Each element is a subset of `pd` corresponding to one call in the original code from which `pd` was obtained.+ |
+
97 | ++ |
+ #' Only four columns (`"token"`, `"text"`, `"id"`, `"parent"`) are kept, the rest is discarded.+ |
+
98 | ++ |
+ #'+ |
+
99 | ++ |
+ #' @keywords internal+ |
+
100 | ++ |
+ #' @noRd+ |
+
101 | ++ |
+ extract_calls <- function(pd) {+ |
+
102 | +47x | +
+ calls <- lapply(+ |
+
103 | +47x | +
+ pd[pd$parent == 0, "id"],+ |
+
104 | +47x | +
+ function(parent) {+ |
+
105 | +167x | +
+ rbind(+ |
+
106 | +167x | +
+ pd[pd$id == parent, c("token", "text", "id", "parent")],+ |
+
107 | +167x | +
+ get_children(pd = pd, parent = parent)+ |
+
108 | ++ |
+ )+ |
+
109 | ++ |
+ }+ |
+
110 | ++ |
+ )+ |
+
111 | +47x | +
+ calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls)+ |
+
112 | +47x | +
+ calls <- Filter(Negate(is.null), calls)+ |
+
113 | +47x | +
+ calls <- fix_shifted_comments(calls)+ |
+
114 | +47x | +
+ fix_arrows(calls)+ |
+
115 | ++ |
+ }+ |
+
116 | ++ | + + | +
117 | ++ |
+ #' @keywords internal+ |
+
118 | ++ |
+ #' @noRd+ |
+
119 | ++ |
+ get_children <- function(pd, parent) {+ |
+
120 | +2035x | +
+ idx_children <- abs(pd$parent) == parent+ |
+
121 | +2035x | +
+ children <- pd[idx_children, c("token", "text", "id", "parent")]+ |
+
122 | +2035x | +
+ if (nrow(children) == 0) {+ |
+
123 | +1190x | +
+ return(NULL)+ |
+
124 | ++ |
+ }+ |
+
125 | ++ | + + | +
126 | +845x | +
+ if (parent > 0) {+ |
+
127 | +845x | +
+ do.call(rbind, c(list(children), lapply(children$id, get_children, pd = pd)))+ |
+
128 | ++ |
+ }+ |
+
129 | ++ |
+ }+ |
+
130 | ++ | + + | +
131 | ++ |
+ #' Fixes edge case of comments being shifted to the next call.+ |
+
132 | ++ |
+ #' @keywords internal+ |
+
133 | ++ |
+ #' @noRd+ |
+
134 | ++ |
+ fix_shifted_comments <- function(calls) {+ |
+
135 | ++ |
+ # If the first or the second token is a @linksto COMMENT,+ |
+
136 | ++ |
+ # then it belongs to the previous call.+ |
+
137 | +47x | +
+ if (length(calls) >= 2) {+ |
+
138 | +45x | +
+ for (i in 2:length(calls)) {+ |
+
139 | +119x | +
+ comment_idx <- grep("@linksto", calls[[i]][, "text"])+ |
+
140 | +119x | +
+ if (isTRUE(comment_idx[1] <= 2)) {+ |
+
141 | +4x | +
+ calls[[i - 1]] <- rbind(+ |
+
142 | +4x | +
+ calls[[i - 1]],+ |
+
143 | +4x | +
+ calls[[i]][seq_len(comment_idx[1]), ]+ |
+
144 | ++ |
+ )+ |
+
145 | +4x | +
+ calls[[i]] <- calls[[i]][-seq_len(comment_idx[1]), ]+ |
+
146 | ++ |
+ }+ |
+
147 | ++ |
+ }+ |
+
148 | ++ |
+ }+ |
+
149 | +47x | +
+ Filter(nrow, calls)+ |
+
150 | ++ |
+ }+ |
+
151 | ++ | + + | +
152 | ++ |
+ #' Fixes edge case of `<-` assignment operator being called as function,+ |
+
153 | ++ |
+ #' which is \code{`<-`(y,x)} instead of traditional `y <- x`.+ |
+
154 | ++ |
+ #' @keywords internal+ |
+
155 | ++ |
+ #' @noRd+ |
+
156 | ++ |
+ fix_arrows <- function(calls) {+ |
+
157 | +47x | +
+ checkmate::assert_list(calls)+ |
+
158 | +47x | +
+ lapply(calls, function(call) {+ |
+
159 | +165x | +
+ sym_fun <- call$token == "SYMBOL_FUNCTION_CALL"+ |
+
160 | +165x | +
+ call[sym_fun, ] <- sub_arrows(call[sym_fun, ])+ |
+
161 | +165x | +
+ call+ |
+
162 | ++ |
+ })+ |
+
163 | ++ |
+ }+ |
+
164 | ++ | + + | +
165 | ++ |
+ #' Execution of assignment operator substitutions for a call.+ |
+
166 | ++ |
+ #' @keywords internal+ |
+
167 | ++ |
+ #' @noRd+ |
+
168 | ++ |
+ sub_arrows <- function(call) {+ |
+
169 | +165x | +
+ checkmate::assert_data_frame(call)+ |
+
170 | +165x | +
+ map <- data.frame(+ |
+
171 | +165x | +
+ row.names = c("`<-`", "`<<-`", "`=`"),+ |
+
172 | +165x | +
+ token = rep("LEFT_ASSIGN", 3),+ |
+
173 | +165x | +
+ text = rep("<-", 3)+ |
+
174 | ++ |
+ )+ |
+
175 | +165x | +
+ sub_ids <- call$text %in% rownames(map)+ |
+
176 | +165x | +
+ call[sub_ids, c("token", "text")] <- map[call$text[sub_ids], ]+ |
+
177 | +165x | +
+ call+ |
+
178 | ++ |
+ }+ |
+
179 | ++ | + + | +
180 | ++ |
+ # code_graph ----+ |
+
181 | ++ | + + | +
182 | ++ |
+ #' Create object dependencies graph within parsed code+ |
+
183 | ++ |
+ #'+ |
+
184 | ++ |
+ #' Builds dependency graph that identifies dependencies between objects in parsed code.+ |
+
185 | ++ |
+ #' Helps understand which objects depend on which.+ |
+
186 | ++ |
+ #'+ |
+
187 | ++ |
+ #' @param calls_pd `list` of `data.frame`s;+ |
+
188 | ++ |
+ #' result of `utils::getParseData()` split into subsets representing individual calls;+ |
+
189 | ++ |
+ #' created by `extract_calls()` function+ |
+
190 | ++ |
+ #'+ |
+
191 | ++ |
+ #' @return+ |
+
192 | ++ |
+ #' A list (of length of input `calls_pd`) where each element represents one call.+ |
+
193 | ++ |
+ #' Each element is a character vector listing names of objects that depend on this call+ |
+
194 | ++ |
+ #' and names of objects that this call depends on.+ |
+
195 | ++ |
+ #' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`+ |
+
196 | ++ |
+ #' depends on objects `b` and `c`.+ |
+
197 | ++ |
+ #' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.+ |
+
198 | ++ |
+ #'+ |
+
199 | ++ |
+ #' @keywords internal+ |
+
200 | ++ |
+ #' @noRd+ |
+
201 | ++ |
+ code_graph <- function(calls_pd) {+ |
+
202 | +47x | +
+ cooccurrence <- extract_occurrence(calls_pd)+ |
+
203 | ++ | + + | +
204 | +47x | +
+ side_effects <- extract_side_effects(calls_pd)+ |
+
205 | ++ | + + | +
206 | +47x | +
+ mapply(c, side_effects, cooccurrence, SIMPLIFY = FALSE)+ |
+
207 | ++ |
+ }+ |
+
208 | ++ | + + | +
209 | ++ |
+ #' Extract object occurrence+ |
+
210 | ++ |
+ #'+ |
+
211 | ++ |
+ #' Extracts objects occurrence within calls passed by `calls_pd`.+ |
+
212 | ++ |
+ #' Also detects which objects depend on which within a call.+ |
+
213 | ++ |
+ #'+ |
+
214 | ++ |
+ #' @param calls_pd `list` of `data.frame`s;+ |
+
215 | ++ |
+ #' result of `utils::getParseData()` split into subsets representing individual calls;+ |
+
216 | ++ |
+ #' created by `extract_calls()` function+ |
+
217 | ++ |
+ #'+ |
+
218 | ++ |
+ #' @return+ |
+
219 | ++ |
+ #' A list (of length of input `calls_pd`) where each element represents one call.+ |
+
220 | ++ |
+ #' Each element is a character vector listing names of objects that depend on this call+ |
+
221 | ++ |
+ #' and names of objects that this call depends on.+ |
+
222 | ++ |
+ #' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`+ |
+
223 | ++ |
+ #' depends on objects `b` and `c`.+ |
+
224 | ++ |
+ #' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.+ |
+
225 | ++ |
+ #'+ |
+
226 | ++ |
+ #' @keywords internal+ |
+
227 | ++ |
+ #' @noRd+ |
+
228 | ++ |
+ extract_occurrence <- function(calls_pd) {+ |
+
229 | +47x | +
+ is_in_function <- function(x) {+ |
+
230 | ++ |
+ # If an object is a function parameter,+ |
+
231 | ++ |
+ # then in calls_pd there is a `SYMBOL_FORMALS` entry for that object.+ |
+
232 | +151x | +
+ function_id <- x[x$token == "FUNCTION", "parent"]+ |
+
233 | +151x | +
+ if (length(function_id)) {+ |
+
234 | +11x | +
+ x$id %in% get_children(x, function_id[1])$id+ |
+
235 | ++ |
+ } else {+ |
+
236 | +140x | +
+ rep(FALSE, nrow(x))+ |
+
237 | ++ |
+ }+ |
+
238 | ++ |
+ }+ |
+
239 | +47x | +
+ in_parenthesis <- function(x) {+ |
+
240 | +130x | +
+ if (any(x$token %in% c("LBB", "'['"))) {+ |
+
241 | +5x | +
+ id_start <- min(x$id[x$token %in% c("LBB", "'['")])+ |
+
242 | +5x | +
+ id_end <- min(x$id[x$token == "']'"])+ |
+
243 | +5x | +
+ x$text[x$token == "SYMBOL" & x$id > id_start & x$id < id_end]+ |
+
244 | ++ |
+ }+ |
+
245 | ++ |
+ }+ |
+
246 | +47x | +
+ lapply(+ |
+
247 | +47x | +
+ calls_pd,+ |
+
248 | +47x | +
+ function(call_pd) {+ |
+
249 | ++ |
+ # Handle data(object)/data("object")/data(object, envir = ) independently.+ |
+
250 | +165x | +
+ data_call <- find_call(call_pd, "data")+ |
+
251 | +165x | +
+ if (data_call) {+ |
+
252 | +3x | +
+ sym <- call_pd[data_call + 1, "text"]+ |
+
253 | +3x | +
+ return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))+ |
+
254 | ++ |
+ }+ |
+
255 | ++ |
+ # Handle assign(x = ).+ |
+
256 | +162x | +
+ assign_call <- find_call(call_pd, "assign")+ |
+
257 | +162x | +
+ if (assign_call) {+ |
+
258 | ++ |
+ # Check if parameters were named.+ |
+
259 | ++ |
+ # "','" is for unnamed parameters, where "SYMBOL_SUB" is for named.+ |
+
260 | ++ |
+ # "EQ_SUB" is for `=` appearing after the name of the named parameter.+ |
+
261 | +11x | +
+ if (any(call_pd$token == "SYMBOL_SUB")) {+ |
+
262 | +8x | +
+ params <- call_pd[call_pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]+ |
+
263 | ++ |
+ # Remove sequence of "=", ",".+ |
+
264 | +8x | +
+ if (length(params > 1)) {+ |
+
265 | +8x | +
+ remove <- integer(0)+ |
+
266 | +8x | +
+ for (i in 2:length(params)) {+ |
+
267 | +36x | +
+ if (params[i - 1] == "=" & params[i] == ",") {+ |
+
268 | +8x | +
+ remove <- c(remove, i - 1, i)+ |
+
269 | ++ |
+ }+ |
+
270 | ++ |
+ }+ |
+
271 | +7x | +
+ if (length(remove)) params <- params[-remove]+ |
+
272 | ++ |
+ }+ |
+
273 | +8x | +
+ pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0))+ |
+
274 | +8x | +
+ if (!pos) {+ |
+
275 | +! | +
+ return(character(0L))+ |
+
276 | ++ |
+ }+ |
+
277 | ++ |
+ # pos is indicator of the place of 'x'+ |
+
278 | ++ |
+ # 1. All parameters are named, but none is 'x' - return(character(0L))+ |
+
279 | ++ |
+ # 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ","))+ |
+
280 | ++ |
+ # - check "x" in params being just a vector of named parameters.+ |
+
281 | ++ |
+ # 3. Some parameters are named, 'x' is not in named parameters+ |
+
282 | ++ |
+ # - check first appearance of "," (unnamed parameter) in vector parameters.+ |
+
283 | ++ |
+ } else {+ |
+
284 | ++ |
+ # Object is the first entry after 'assign'.+ |
+
285 | +3x | +
+ pos <- 1+ |
+
286 | ++ |
+ }+ |
+
287 | +11x | +
+ sym <- call_pd[assign_call + pos, "text"]+ |
+
288 | +11x | +
+ return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))+ |
+
289 | ++ |
+ }+ |
+
290 | ++ | + + | +
291 | ++ |
+ # What occurs in a function body is not tracked.+ |
+
292 | +151x | +
+ x <- call_pd[!is_in_function(call_pd), ]+ |
+
293 | +151x | +
+ sym_cond <- which(x$token %in% c("SYMBOL", "SYMBOL_FUNCTION_CALL"))+ |
+
294 | ++ | + + | +
295 | +151x | +
+ if (length(sym_cond) == 0) {+ |
+
296 | +2x | +
+ return(character(0L))+ |
+
297 | ++ |
+ }+ |
+
298 | ++ |
+ # Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not.+ |
+
299 | ++ |
+ # For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2.+ |
+
300 | +149x | +
+ dollar_ids <- x[x$token %in% c("'$'", "'@'"), "id"]+ |
+
301 | +149x | +
+ if (length(dollar_ids)) {+ |
+
302 | +12x | +
+ object_ids <- x[sym_cond, "id"]+ |
+
303 | +12x | +
+ after_dollar <- object_ids[(object_ids - 2) %in% dollar_ids]+ |
+
304 | +12x | +
+ sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar))+ |
+
305 | ++ |
+ }+ |
+
306 | ++ | + + | +
307 | +149x | +
+ ass_cond <- grep("ASSIGN", x$token)+ |
+
308 | +149x | +
+ if (!length(ass_cond)) {+ |
+
309 | +19x | +
+ return(c("<-", unique(x[sym_cond, "text"])))+ |
+
310 | ++ |
+ }+ |
+
311 | ++ | + + | +
312 | +130x | +
+ sym_cond <- sym_cond[sym_cond > ass_cond] # NOTE 1+ |
+
313 | ++ |
+ # If there was an assignment operation detect direction of it.+ |
+
314 | +130x | +
+ if (unique(x$text[ass_cond]) == "->") { # NOTE 2+ |
+
315 | +1x | +
+ sym_cond <- rev(sym_cond)+ |
+
316 | ++ |
+ }+ |
+
317 | ++ | + + | +
318 | +130x | +
+ after <- match(min(x$id[ass_cond]), sort(x$id[c(min(ass_cond), sym_cond)])) - 1+ |
+
319 | +130x | +
+ ans <- append(x[sym_cond, "text"], "<-", after = max(1, after))+ |
+
320 | +130x | +
+ roll <- in_parenthesis(call_pd)+ |
+
321 | +130x | +
+ if (length(roll)) {+ |
+
322 | +2x | +
+ c(setdiff(ans, roll), roll)+ |
+
323 | ++ |
+ } else {+ |
+
324 | +128x | +
+ ans+ |
+
325 | ++ |
+ }+ |
+
326 | ++ | + + | +
327 | ++ |
+ ### NOTE 2: What if there are 2 assignments: e.g. a <- b -> c.+ |
+
328 | ++ |
+ ### NOTE 1: For cases like 'eval(expression(b <- b + 2))' removes 'eval(expression('.+ |
+
329 | ++ |
+ }+ |
+
330 | ++ |
+ )+ |
+
331 | ++ |
+ }+ |
+
332 | ++ | + + | +
333 | ++ |
+ #' Extract side effects+ |
+
334 | ++ |
+ #'+ |
+
335 | ++ |
+ #' Extracts all object names from the code that are marked with `@linksto` tag.+ |
+
336 | ++ |
+ #'+ |
+
337 | ++ |
+ #' The code may contain functions calls that create side effects, e.g. modify the environment.+ |
+
338 | ++ |
+ #' Static code analysis may be insufficient to determine which objects are created or modified by such a function call.+ |
+
339 | ++ |
+ #' The `@linksto` comment tag is introduced to mark a call as having a (side) effect on one or more objects.+ |
+
340 | ++ |
+ #' With this tag a complete object dependency structure can be established.+ |
+
341 | ++ |
+ #' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function.+ |
+
342 | ++ |
+ #'+ |
+
343 | ++ |
+ #' @param calls_pd `list` of `data.frame`s;+ |
+
344 | ++ |
+ #' result of `utils::getParseData()` split into subsets representing individual calls;+ |
+
345 | ++ |
+ #' created by `extract_calls()` function+ |
+
346 | ++ |
+ #'+ |
+
347 | ++ |
+ #' @return+ |
+
348 | ++ |
+ #' A list of length equal to that of `calls_pd`, where each element is a character vector of names of objects+ |
+
349 | ++ |
+ #' depending a call tagged with `@linksto` in a corresponding element of `calls_pd`.+ |
+
350 | ++ |
+ #'+ |
+
351 | ++ |
+ #' @keywords internal+ |
+
352 | ++ |
+ #' @noRd+ |
+
353 | ++ |
+ extract_side_effects <- function(calls_pd) {+ |
+
354 | +47x | +
+ lapply(+ |
+
355 | +47x | +
+ calls_pd,+ |
+
356 | +47x | +
+ function(x) {+ |
+
357 | +165x | +
+ linksto <- grep("@linksto", x[x$token == "COMMENT", "text"], value = TRUE)+ |
+
358 | +165x | +
+ unlist(strsplit(sub("\\s*#\\s*@linksto\\s+", "", linksto), "\\s+"))+ |
+
359 | ++ |
+ }+ |
+
360 | ++ |
+ )+ |
+
361 | ++ |
+ }+ |
+
362 | ++ | + + | +
363 | ++ |
+ # graph_parser ----+ |
+
364 | ++ | + + | +
365 | ++ |
+ #' Return the indices of calls needed to reproduce an object+ |
+
366 | ++ |
+ #'+ |
+
367 | ++ |
+ #' @param x The name of the object to return code for.+ |
+
368 | ++ |
+ #' @param graph A result of `code_graph()`.+ |
+
369 | ++ |
+ #'+ |
+
370 | ++ |
+ #' @return+ |
+
371 | ++ |
+ #' Integer vector of indices that can be applied to `graph` to obtain all calls required to reproduce object `x`.+ |
+
372 | ++ |
+ #'+ |
+
373 | ++ |
+ #' @keywords internal+ |
+
374 | ++ |
+ #' @noRd+ |
+
375 | ++ |
+ graph_parser <- function(x, graph) {+ |
+
376 | +196x | +
+ occurrence <- vapply(+ |
+
377 | +196x | +
+ graph, function(call) {+ |
+
378 | +566x | +
+ ind <- match("<-", call, nomatch = length(call) + 1L)+ |
+
379 | +566x | +
+ x %in% call[seq_len(ind - 1L)]+ |
+
380 | ++ |
+ },+ |
+
381 | +196x | +
+ logical(1)+ |
+
382 | ++ |
+ )+ |
+
383 | ++ | + + | +
384 | +196x | +
+ dependencies <- lapply(graph[occurrence], function(call) {+ |
+
385 | +105x | +
+ ind <- match("<-", call, nomatch = 0L)+ |
+
386 | +105x | +
+ call[(ind + 1L):length(call)]+ |
+
387 | ++ |
+ })+ |
+
388 | +196x | +
+ dependencies <- setdiff(unlist(dependencies), x)+ |
+
389 | ++ | + + | +
390 | +196x | +
+ if (length(dependencies) && any(occurrence)) {+ |
+
391 | +75x | +
+ dependency_ids <- lapply(dependencies, function(dependency) {+ |
+
392 | +148x | +
+ graph_parser(dependency, graph[1:max(which(occurrence))])+ |
+
393 | ++ |
+ })+ |
+
394 | +75x | +
+ sort(unique(c(which(occurrence), unlist(dependency_ids))))+ |
+
395 | ++ |
+ } else {+ |
+
396 | +121x | +
+ which(occurrence)+ |
+
397 | ++ |
+ }+ |
+
398 | ++ |
+ }+ |
+
399 | ++ | + + | +
400 | ++ | + + | +
401 | ++ |
+ # default_side_effects --------------------------------------------------------------------------------------------+ |
+
402 | ++ | + + | +
403 | ++ |
+ #' Detect library calls+ |
+
404 | ++ |
+ #'+ |
+
405 | ++ |
+ #' Detects `library()` and `require()` function calls.+ |
+
406 | ++ |
+ #'+ |
+
407 | ++ |
+ #' @param calls_pd `list` of `data.frame`s;+ |
+
408 | ++ |
+ #' result of `utils::getParseData()` split into subsets representing individual calls;+ |
+
409 | ++ |
+ #' created by `extract_calls()` function+ |
+
410 | ++ |
+ #'+ |
+
411 | ++ |
+ #' @return+ |
+
412 | ++ |
+ #' Integer vector of indices that can be applied to `graph` (result of `code_graph()`) to obtain all calls containing+ |
+
413 | ++ |
+ #' `library()` or `require()` calls that are always returned for reproducibility.+ |
+
414 | ++ |
+ #'+ |
+
415 | ++ |
+ #' @keywords internal+ |
+
416 | ++ |
+ #' @noRd+ |
+
417 | ++ |
+ detect_libraries <- function(calls_pd) {+ |
+
418 | +47x | +
+ defaults <- c("library", "require")+ |
+
419 | ++ | + + | +
420 | +47x | +
+ which(+ |
+
421 | +47x | +
+ vapply(+ |
+
422 | +47x | +
+ calls_pd,+ |
+
423 | +47x | +
+ function(call) {+ |
+
424 | +165x | +
+ any(call$token == "SYMBOL_FUNCTION_CALL" & call$text %in% defaults)+ |
+
425 | ++ |
+ },+ |
+
426 | +47x | +
+ logical(1)+ |
+
427 | ++ |
+ )+ |
+
428 | ++ |
+ )+ |
+
429 | ++ |
+ }+ |
+
1 | ++ |
+ #' Create a relationship between a pair of datasets+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' @description+ |
+
4 | ++ |
+ #' `r lifecycle::badge("stable")`+ |
+
5 | ++ |
+ #'+ |
+
6 | ++ |
+ #' Create a relationship between two datasets, `dataset_1` and `dataset_2`.+ |
+
7 | ++ |
+ #' By default, this function establishes a directed relationship with `dataset_1` as the parent.+ |
+
8 | ++ |
+ #' If `dataset_2` is not specified, the function creates a primary key for `dataset_1`.+ |
+
9 | ++ |
+ #'+ |
+
10 | ++ |
+ #' @param dataset_1,dataset_2 (`character(1)`) Dataset names. When `dataset_2` is omitted,+ |
+
11 | ++ |
+ #' a primary key for `dataset_1` is created.+ |
+
12 | ++ |
+ #' @param keys (optionally named `character`) Column mapping between the datasets,+ |
+
13 | ++ |
+ #' where `names(keys)` maps columns in `dataset_1` corresponding to columns of+ |
+
14 | ++ |
+ #' `dataset_2` given by the elements of `keys`.+ |
+
15 | ++ |
+ #' - If unnamed, the same column names are used for both datasets.+ |
+
16 | ++ |
+ #' - If any element of the `keys` vector is empty with a non-empty name, then the name is+ |
+
17 | ++ |
+ #' used for both datasets.+ |
+
18 | ++ |
+ #' @param directed (`logical(1)`) Flag that indicates whether it should create+ |
+
19 | ++ |
+ #' a parent-child relationship between the datasets.+ |
+
20 | ++ |
+ #' - `TRUE` (default) `dataset_1` is the parent of `dataset_2`;+ |
+
21 | ++ |
+ #' - `FALSE` when the relationship is undirected.+ |
+
22 | ++ |
+ #'+ |
+
23 | ++ |
+ #' @return object of class `join_key_set` to be passed into `join_keys` function.+ |
+
24 | ++ |
+ #'+ |
+
25 | ++ |
+ #' @examples+ |
+
26 | ++ |
+ #' join_key("d1", "d2", c("A"))+ |
+
27 | ++ |
+ #' join_key("d1", "d2", c("A" = "B"))+ |
+
28 | ++ |
+ #' join_key("d1", "d2", c("A" = "B", "C"))+ |
+
29 | ++ |
+ #'+ |
+
30 | ++ |
+ #' @export+ |
+
31 | ++ |
+ #' @seealso [join_keys()], [parents()]+ |
+
32 | ++ |
+ #'+ |
+
33 | ++ |
+ join_key <- function(dataset_1, dataset_2 = dataset_1, keys, directed = TRUE) {+ |
+
34 | +1070x | +
+ checkmate::assert_string(dataset_1)+ |
+
35 | +1070x | +
+ checkmate::assert_string(dataset_2)+ |
+
36 | +1067x | +
+ checkmate::assert_character(keys, any.missing = FALSE)+ |
+
37 | +1062x | +
+ checkmate::assert_flag(directed)+ |
+
38 | ++ | + + | +
39 | +1062x | +
+ if (length(keys) > 0) {+ |
+
40 | +1060x | +
+ if (is.null(names(keys))) {+ |
+
41 | +520x | +
+ names(keys) <- keys+ |
+
42 | ++ |
+ }+ |
+
43 | ++ | + + | +
44 | +1060x | +
+ keys <- trimws(keys)+ |
+
45 | +1060x | +
+ names(keys) <- trimws(names(keys))+ |
+
46 | ++ | + + | +
47 | ++ |
+ # Remove keys with empty value and without name+ |
+
48 | +1060x | +
+ if (any(keys == "" & names(keys) == "")) {+ |
+
49 | +6x | +
+ message("Key with an empty value and name are ignored.")+ |
+
50 | +6x | +
+ keys <- keys[keys != "" & names(keys) != ""]+ |
+
51 | ++ |
+ }+ |
+
52 | ++ | + + | +
53 | ++ |
+ # Set name of keys without one: c("A") -> c("A" = "A")+ |
+
54 | +1060x | +
+ if (any(names(keys) == "")) {+ |
+
55 | +4x | +
+ names(keys)[names(keys) == ""] <- keys[names(keys) == ""]+ |
+
56 | ++ |
+ }+ |
+
57 | ++ | + + | +
58 | ++ |
+ # Set value of keys with empty string, but non-empty name: c("A" = "") -> c("A" = "A")+ |
+
59 | +1060x | +
+ if (any(keys == "")) {+ |
+
60 | +4x | +
+ keys[keys == ""] <- names(keys[keys == ""])+ |
+
61 | ++ |
+ }+ |
+
62 | ++ | + + | +
63 | +1060x | +
+ stopifnot(!is.null(names(keys)))+ |
+
64 | +1060x | +
+ stopifnot(!anyDuplicated(keys))+ |
+
65 | +1059x | +
+ stopifnot(!anyDuplicated(names(keys)))+ |
+
66 | ++ | + + | +
67 | +1058x | +
+ if (dataset_1 == dataset_2 && any(names(keys) != keys)) {+ |
+
68 | +2x | +
+ stop("Keys within a dataset must match exactly: keys = c('A' = 'B') are not allowed")+ |
+
69 | ++ |
+ }+ |
+
70 | ++ |
+ } else {+ |
+
71 | +2x | +
+ keys <- NULL+ |
+
72 | ++ |
+ }+ |
+
73 | ++ | + + | +
74 | +1058x | +
+ parents <- if (directed && dataset_1 != dataset_2) {+ |
+
75 | +392x | +
+ stats::setNames(list(dataset_1), dataset_2)+ |
+
76 | ++ |
+ } else {+ |
+
77 | +666x | +
+ list()+ |
+
78 | ++ |
+ }+ |
+
79 | ++ | + + | +
80 | +1058x | +
+ structure(+ |
+
81 | +1058x | +
+ list(+ |
+
82 | +1058x | +
+ structure(+ |
+
83 | +1058x | +
+ list(keys),+ |
+
84 | +1058x | +
+ names = dataset_2+ |
+
85 | ++ |
+ )+ |
+
86 | ++ |
+ ),+ |
+
87 | +1058x | +
+ names = dataset_1,+ |
+
88 | +1058x | +
+ class = "join_key_set",+ |
+
89 | +1058x | +
+ parents = parents+ |
+
90 | ++ |
+ )+ |
+
91 | ++ |
+ }+ |
+
1 | ++ |
+ #' Deprecated `TealData` class and related functions+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' @description+ |
+
4 | ++ |
+ #' `r lifecycle::badge("deprecated")`+ |
+
5 | ++ |
+ #'+ |
+
6 | ++ |
+ #' The `TealData` class and associated functions have been deprecated. Use [teal_data()] instead.+ |
+
7 | ++ |
+ #' See the [Migration guide](https://github.com/insightsengineering/teal/discussions/945) for details.+ |
+
8 | ++ |
+ #'+ |
+
9 | ++ |
+ #' @name TealData+ |
+
10 | ++ |
+ #'+ |
+
11 | ++ |
+ #' @param ... any argument supported in `TealData` related functions.+ |
+
12 | ++ |
+ #'+ |
+
13 | ++ |
+ #' @return nothing+ |
+
14 | ++ |
+ #' @seealso [cdisc_data()] , [join_keys()]+ |
+
15 | ++ |
+ #'+ |
+
16 | ++ |
+ NULL+ |
+
17 | ++ | + + | +
18 | ++ |
+ .deprecate_function <- function(what, details) {+ |
+
19 | +! | +
+ lifecycle::deprecate_stop(+ |
+
20 | +! | +
+ when = "0.4.0",+ |
+
21 | +! | +
+ what = what,+ |
+
22 | +! | +
+ details = details+ |
+
23 | ++ |
+ )+ |
+
24 | ++ |
+ }+ |
+
25 | ++ | + + | +
26 | ++ |
+ deprecation_detail <- "Find more information on https://github.com/insightsengineering/teal/discussions/945"+ |
+
27 | ++ | + + | +
28 | ++ |
+ #' @rdname TealData+ |
+
29 | ++ |
+ #' @export+ |
+
30 | ++ |
+ as_cdisc <- function(...) {+ |
+
31 | +! | +
+ .deprecate_function("as_cdisc()", deprecation_detail)+ |
+
32 | ++ |
+ }+ |
+
33 | ++ | + + | +
34 | ++ |
+ #' @rdname TealData+ |
+
35 | ++ |
+ #' @export+ |
+
36 | ++ |
+ callable_code <- function(...) {+ |
+
37 | +! | +
+ .deprecate_function("callable_code()", deprecation_detail)+ |
+
38 | ++ |
+ }+ |
+
39 | ++ | + + | +
40 | ++ |
+ #' @rdname TealData+ |
+
41 | ++ |
+ #' @export+ |
+
42 | ++ |
+ callable_function <- function(...) {+ |
+
43 | +! | +
+ .deprecate_function("callable_function()", deprecation_detail)+ |
+
44 | ++ |
+ }+ |
+
45 | ++ | + + | +
46 | ++ |
+ #' @rdname TealData+ |
+
47 | ++ |
+ #' @export+ |
+
48 | ++ |
+ code_dataset_connector <- function(...) {+ |
+
49 | +! | +
+ .deprecate_function("code_dataset_connector()", deprecation_detail)+ |
+
50 | ++ |
+ }+ |
+
51 | ++ | + + | +
52 | ++ |
+ #' @rdname TealData+ |
+
53 | ++ |
+ #' @export+ |
+
54 | ++ |
+ code_cdisc_dataset_connector <- function(...) {+ |
+
55 | +! | +
+ .deprecate_function("code_cdisc_dataset_connector()", deprecation_detail)+ |
+
56 | ++ |
+ }+ |
+
57 | ++ | + + | +
58 | ++ |
+ #' @rdname TealData+ |
+
59 | ++ |
+ #' @export+ |
+
60 | ++ |
+ csv_dataset_connector <- function(...) {+ |
+
61 | +! | +
+ .deprecate_function("csv_dataset_connector()", deprecation_detail)+ |
+
62 | ++ |
+ }+ |
+
63 | ++ | + + | +
64 | ++ |
+ #' @rdname TealData+ |
+
65 | ++ |
+ #' @export+ |
+
66 | ++ |
+ csv_cdisc_dataset_connector <- function(...) {+ |
+
67 | +! | +
+ .deprecate_function("csv_cdisc_dataset_connector()", deprecation_detail)+ |
+
68 | ++ |
+ }+ |
+
69 | ++ | + + | +
70 | ++ |
+ #' @rdname TealData+ |
+
71 | ++ |
+ #' @export+ |
+
72 | ++ |
+ python_code <- function(...) {+ |
+
73 | +! | +
+ .deprecate_function("python_code()", deprecation_detail)+ |
+
74 | ++ |
+ }+ |
+
75 | ++ | + + | +
76 | ++ |
+ #' @rdname TealData+ |
+
77 | ++ |
+ #' @export+ |
+
78 | ++ |
+ python_dataset_connector <- function(...) {+ |
+
79 | +! | +
+ .deprecate_function("python_dataset_connector()", deprecation_detail)+ |
+
80 | ++ |
+ }+ |
+
81 | ++ | + + | +
82 | ++ |
+ #' @rdname TealData+ |
+
83 | ++ |
+ #' @export+ |
+
84 | ++ |
+ python_cdisc_dataset_connector <- function(...) {+ |
+
85 | +! | +
+ .deprecate_function("python_cdisc_dataset_connector()", deprecation_detail)+ |
+
86 | ++ |
+ }+ |
+
87 | ++ | + + | +
88 | ++ |
+ #' @rdname TealData+ |
+
89 | ++ |
+ #' @export+ |
+
90 | ++ |
+ cdisc_data_connector <- function(...) {+ |
+
91 | +! | +
+ .deprecate_function("cdisc_data_connector()", deprecation_detail)+ |
+
92 | ++ |
+ }+ |
+
93 | ++ | + + | +
94 | ++ |
+ #' @rdname TealData+ |
+
95 | ++ |
+ #' @export+ |
+
96 | ++ |
+ cdisc_dataset <- function(...) {+ |
+
97 | +! | +
+ .deprecate_function("cdisc_dataset()", deprecation_detail)+ |
+
98 | ++ |
+ }+ |
+
99 | ++ | + + | +
100 | ++ |
+ #' @rdname TealData+ |
+
101 | ++ |
+ #' @export+ |
+
102 | ++ |
+ cdisc_dataset_connector <- function(...) {+ |
+
103 | +! | +
+ .deprecate_function("cdisc_dataset_connector()", deprecation_detail)+ |
+
104 | ++ |
+ }+ |
+
105 | ++ | + + | +
106 | ++ |
+ #' @rdname TealData+ |
+
107 | ++ |
+ #' @export+ |
+
108 | ++ |
+ cdisc_dataset_connector_file <- function(...) {+ |
+
109 | +! | +
+ .deprecate_function("cdisc_dataset_connector_file()", deprecation_detail)+ |
+
110 | ++ |
+ }+ |
+
111 | ++ | + + | +
112 | ++ |
+ #' @rdname TealData+ |
+
113 | ++ |
+ #' @export+ |
+
114 | ++ |
+ cdisc_dataset_file <- function(...) {+ |
+
115 | +! | +
+ .deprecate_function("cdisc_dataset_file()", deprecation_detail)+ |
+
116 | ++ |
+ }+ |
+
117 | ++ | + + | +
118 | ++ |
+ #' @rdname TealData+ |
+
119 | ++ |
+ #' @export+ |
+
120 | ++ |
+ dataset <- function(...) {+ |
+
121 | +! | +
+ .deprecate_function("dataset()", deprecation_detail)+ |
+
122 | ++ |
+ }+ |
+
123 | ++ | + + | +
124 | ++ |
+ #' @rdname TealData+ |
+
125 | ++ |
+ #' @export+ |
+
126 | ++ |
+ dataset_connector <- function(...) {+ |
+
127 | +! | +
+ .deprecate_function("dataset_connector()", deprecation_detail)+ |
+
128 | ++ |
+ }+ |
+
129 | ++ | + + | +
130 | ++ |
+ #' @rdname TealData+ |
+
131 | ++ |
+ #' @export+ |
+
132 | ++ |
+ dataset_connector_file <- function(...) {+ |
+
133 | +! | +
+ .deprecate_function("dataset_connector_file()", deprecation_detail)+ |
+
134 | ++ |
+ }+ |
+
135 | ++ | + + | +
136 | ++ |
+ #' @rdname TealData+ |
+
137 | ++ |
+ #' @export+ |
+
138 | ++ |
+ dataset_file <- function(...) {+ |
+
139 | +! | +
+ .deprecate_function("dataset_file()", deprecation_detail)+ |
+
140 | ++ |
+ }+ |
+
141 | ++ | + + | +
142 | ++ |
+ #' @rdname TealData+ |
+
143 | ++ |
+ #' @export+ |
+
144 | ++ |
+ data_connection <- function(...) {+ |
+
145 | +! | +
+ .deprecate_function("data_connection()", deprecation_detail)+ |
+
146 | ++ |
+ }+ |
+
147 | ++ | + + | +
148 | ++ |
+ #' @rdname TealData+ |
+
149 | ++ |
+ #' @export+ |
+
150 | ++ |
+ fun_dataset_connector <- function(...) {+ |
+
151 | +! | +
+ .deprecate_function("fun_dataset_connector()", deprecation_detail)+ |
+
152 | ++ |
+ }+ |
+
153 | ++ | + + | +
154 | ++ |
+ #' @rdname TealData+ |
+
155 | ++ |
+ #' @export+ |
+
156 | ++ |
+ fun_cdisc_dataset_connector <- function(...) {+ |
+
157 | +! | +
+ .deprecate_function("fun_cdisc_dataset_connector()", deprecation_detail)+ |
+
158 | ++ |
+ }+ |
+
159 | ++ | + + | +
160 | ++ |
+ #' @rdname TealData+ |
+
161 | ++ |
+ #' @export+ |
+
162 | ++ |
+ relational_data_connector <- function(...) {+ |
+
163 | +! | +
+ .deprecate_function("relational_data_connector()", deprecation_detail)+ |
+
164 | ++ |
+ }+ |
+
165 | ++ | + + | +
166 | ++ |
+ #' @rdname TealData+ |
+
167 | ++ |
+ #' @export+ |
+
168 | ++ |
+ mae_dataset <- function(...) {+ |
+
169 | +! | +
+ .deprecate_function("mae_dataset()", deprecation_detail)+ |
+
170 | ++ |
+ }+ |
+
171 | ++ | + + | +
172 | ++ |
+ #' @rdname TealData+ |
+
173 | ++ |
+ #' @export+ |
+
174 | ++ |
+ get_attrs <- function(...) {+ |
+
175 | +! | +
+ .deprecate_function("get_attrs()", deprecation_detail)+ |
+
176 | ++ |
+ }+ |
+
177 | ++ | + + | +
178 | ++ |
+ #' @rdname TealData+ |
+
179 | ++ |
+ #' @export+ |
+
180 | ++ |
+ get_dataset_label <- function(...) {+ |
+
181 | +! | +
+ .deprecate_function("get_dataset_label()", deprecation_detail)+ |
+
182 | ++ |
+ }+ |
+
183 | ++ | + + | +
184 | ++ |
+ #' @rdname TealData+ |
+
185 | ++ |
+ #' @export+ |
+
186 | ++ |
+ get_dataset <- function(...) {+ |
+
187 | +! | +
+ .deprecate_function("get_dataset()", deprecation_detail)+ |
+
188 | ++ |
+ }+ |
+
189 | ++ | + + | +
190 | ++ |
+ #' @rdname TealData+ |
+
191 | ++ |
+ #' @export+ |
+
192 | ++ |
+ get_datasets <- function(...) {+ |
+
193 | +! | +
+ .deprecate_function("get_datasets()", deprecation_detail)+ |
+
194 | ++ |
+ }+ |
+
195 | ++ | + + | +
196 | ++ |
+ #' @rdname TealData+ |
+
197 | ++ |
+ #' @export+ |
+
198 | ++ |
+ get_dataname <- function(...) {+ |
+
199 | +! | +
+ .deprecate_function("get_dataname()", deprecation_detail)+ |
+
200 | ++ |
+ }+ |
+
201 | ++ | + + | +
202 | ++ |
+ #' @rdname TealData+ |
+
203 | ++ |
+ #' @export+ |
+
204 | ++ |
+ get_key_duplicates <- function(...) {+ |
+
205 | +! | +
+ .deprecate_function("get_key_duplicates()", deprecation_detail)+ |
+
206 | ++ |
+ }+ |
+
207 | ++ | + + | +
208 | ++ |
+ #' @rdname TealData+ |
+
209 | ++ |
+ #' @export+ |
+
210 | ++ |
+ get_keys <- function(...) {+ |
+
211 | +! | +
+ .deprecate_function("get_keys()", deprecation_detail)+ |
+
212 | ++ |
+ }+ |
+
213 | ++ | + + | +
214 | ++ |
+ #' @rdname TealData+ |
+
215 | ++ |
+ #' @export+ |
+
216 | ++ |
+ get_raw_data <- function(...) {+ |
+
217 | +! | +
+ .deprecate_function("get_raw_data()", deprecation_detail)+ |
+
218 | ++ |
+ }+ |
+
219 | ++ | + + | +
220 | ++ |
+ #' @rdname TealData+ |
+
221 | ++ |
+ #' @export+ |
+
222 | ++ |
+ is_pulled <- function(...) {+ |
+
223 | +! | +
+ .deprecate_function("is_pulled()", deprecation_detail)+ |
+
224 | ++ |
+ }+ |
+
225 | ++ | + + | +
226 | ++ |
+ #' @rdname TealData+ |
+
227 | ++ |
+ #' @export+ |
+
228 | ++ |
+ load_dataset <- function(...) {+ |
+
229 | +! | +
+ .deprecate_function("load_dataset()", deprecation_detail)+ |
+
230 | ++ |
+ }+ |
+
231 | ++ | + + | +
232 | ++ |
+ #' @rdname TealData+ |
+
233 | ++ |
+ #' @export+ |
+
234 | ++ |
+ load_datasets <- function(...) {+ |
+
235 | +! | +
+ .deprecate_function("load_datasets()", deprecation_detail)+ |
+
236 | ++ |
+ }+ |
+
237 | ++ | + + | +
238 | ++ |
+ #' @rdname TealData+ |
+
239 | ++ |
+ #' @export+ |
+
240 | ++ |
+ mutate_data <- function(...) {+ |
+
241 | +! | +
+ .deprecate_function("mutate_data()", deprecation_detail)+ |
+
242 | ++ |
+ }+ |
+
243 | ++ | + + | +
244 | ++ |
+ #' @rdname TealData+ |
+
245 | ++ |
+ #' @export+ |
+
246 | ++ |
+ mutate_dataset <- function(...) {+ |
+
247 | +! | +
+ .deprecate_function("mutate_dataset()", deprecation_detail)+ |
+
248 | ++ |
+ }+ |
+
249 | ++ | + + | +
250 | ++ |
+ #' @rdname TealData+ |
+
251 | ++ |
+ #' @export+ |
+
252 | ++ |
+ set_args <- function(...) {+ |
+
253 | +! | +
+ .deprecate_function("set_args()", deprecation_detail)+ |
+
254 | ++ |
+ }+ |
+
255 | ++ | + + | +
256 | ++ |
+ #' @rdname TealData+ |
+
257 | ++ |
+ #' @export+ |
+
258 | ++ |
+ rds_dataset_connector <- function(...) {+ |
+
259 | +! | +
+ .deprecate_function("rds_dataset_connector()", deprecation_detail)+ |
+
260 | ++ |
+ }+ |
+
261 | ++ | + + | +
262 | ++ |
+ #' @rdname TealData+ |
+
263 | ++ |
+ #' @export+ |
+
264 | ++ |
+ rds_cdisc_dataset_connector <- function(...) {+ |
+
265 | +! | +
+ .deprecate_function("rds_cdisc_dataset_connector()", deprecation_detail)+ |
+
266 | ++ |
+ }+ |
+
267 | ++ | + + | +
268 | ++ |
+ #' @rdname TealData+ |
+
269 | ++ |
+ #' @export+ |
+
270 | ++ |
+ script_dataset_connector <- function(...) {+ |
+
271 | +! | +
+ .deprecate_function("script_dataset_connector()", deprecation_detail)+ |
+
272 | ++ |
+ }+ |
+
273 | ++ | + + | +
274 | ++ |
+ #' @rdname TealData+ |
+
275 | ++ |
+ #' @export+ |
+
276 | ++ |
+ script_cdisc_dataset_connector <- function(...) {+ |
+
277 | +! | +
+ .deprecate_function("script_cdisc_dataset_connector()", deprecation_detail)+ |
+
278 | ++ |
+ }+ |
+
279 | ++ | + + | +
280 | ++ |
+ #' @rdname TealData+ |
+
281 | ++ |
+ #' @export+ |
+
282 | ++ |
+ set_keys <- function(...) {+ |
+
283 | +! | +
+ .deprecate_function("set_keys()", deprecation_detail)+ |
+
284 | ++ |
+ }+ |
+
285 | ++ | + + | +
286 | ++ |
+ #' @rdname TealData+ |
+
287 | ++ |
+ #' @export+ |
+
288 | ++ |
+ read_script <- function(...) {+ |
+
289 | +! | +
+ .deprecate_function("read_script()", deprecation_detail)+ |
+
290 | ++ |
+ }+ |
+
291 | ++ | + + | +
292 | ++ |
+ #' @rdname TealData+ |
+
293 | ++ |
+ #' @export+ |
+
294 | ++ |
+ to_relational_data <- function(...) {+ |
+
295 | +! | +
+ .deprecate_function("to_relational_data()", deprecation_detail)+ |
+
296 | ++ |
+ }+ |
+
297 | ++ | + + | +
298 | ++ |
+ #' @rdname TealData+ |
+
299 | ++ |
+ #' @export+ |
+
300 | ++ |
+ validate_metadata <- function(...) {+ |
+
301 | +! | +
+ .deprecate_function("validate_metadata()", deprecation_detail)+ |
+
302 | ++ |
+ }+ |
+
303 | ++ | + + | +
304 | ++ |
+ #' @rdname TealData+ |
+
305 | ++ |
+ #' @export+ |
+
306 | ++ |
+ get_cdisc_keys <- function(...) {+ |
+
307 | +! | +
+ .deprecate_function("get_cdisc_keys()", deprecation_detail)+ |
+
308 | ++ |
+ }+ |
+
309 | ++ | + + | +
310 | ++ |
+ #' @rdname TealData+ |
+
311 | ++ |
+ #' @export+ |
+
312 | ++ |
+ cdisc_data_file <- function(...) {+ |
+
313 | +! | +
+ .deprecate_function("cdisc_data_file()", deprecation_detail)+ |
+
314 | ++ |
+ }+ |
+
315 | ++ | + + | +
316 | ++ |
+ #' @rdname TealData+ |
+
317 | ++ |
+ #' @export+ |
+
318 | ++ |
+ teal_data_file <- function(...) {+ |
+
319 | +! | +
+ .deprecate_function("teal_data_file()", deprecation_detail)+ |
+
320 | ++ |
+ }+ |
+
321 | ++ | + + | +
322 | ++ |
+ #' @rdname TealData+ |
+
323 | ++ |
+ #' @export+ |
+
324 | ++ |
+ get_join_keys <- function(...) {+ |
+
325 | +! | +
+ .deprecate_function("get_join_keys()", "Use `join_keys(data)` instead.")+ |
+
326 | ++ |
+ }+ |
+
327 | ++ | + + | +
328 | ++ |
+ #' @rdname TealData+ |
+
329 | ++ |
+ #' @param value value to assign+ |
+
330 | ++ |
+ #' @export+ |
+
331 | ++ |
+ `get_join_keys<-` <- function(..., value) {+ |
+
332 | +! | +
+ .deprecate_function("`get_join_keys<-`()", "Use `join_keys(x) <- ...`")+ |
+
333 | ++ |
+ }+ |
+
334 | ++ | + + | +
335 | ++ |
+ #' @rdname col_labels+ |
+
336 | ++ |
+ #' @include formatters_var_labels.R+ |
+
337 | ++ |
+ #' @details+ |
+
338 | ++ |
+ #' `r lifecycle::badge("deprecated")`+ |
+
339 | ++ |
+ #'+ |
+
340 | ++ |
+ #' In previous versions of `teal.data` labels were managed with `get_labels()`.+ |
+
341 | ++ |
+ #' This function is deprecated as of `0.4.0`, use `col_labels` instead.+ |
+
342 | ++ |
+ #' @export+ |
+
343 | ++ |
+ get_labels <- function(...) {+ |
+
344 | +! | +
+ .deprecate_function("get_labels()", "Use col_labels(data)")+ |
+
345 | ++ |
+ }+ |
+
1 | ++ |
+ #' Topological graph sort+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' Graph is a `list` which for each node contains a vector of child nodes+ |
+
4 | ++ |
+ #' in the returned list, parents appear before their children.+ |
+
5 | ++ |
+ #'+ |
+
6 | ++ |
+ #' Implementation of `Kahn` algorithm with a modification to maintain the order of input elements.+ |
+
7 | ++ |
+ #'+ |
+
8 | ++ |
+ #' @param graph (`named list`) with node vector elements+ |
+
9 | ++ |
+ #' @keywords internal+ |
+
10 | ++ |
+ topological_sort <- function(graph) {+ |
+
11 | ++ |
+ # compute in-degrees+ |
+
12 | +428x | +
+ in_degrees <- list()+ |
+
13 | +428x | +
+ for (node in names(graph)) {+ |
+
14 | +168x | +
+ in_degrees[[node]] <- 0+ |
+
15 | +168x | +
+ for (to_edge in graph[[node]]) {+ |
+
16 | +168x | +
+ in_degrees[[to_edge]] <- 0+ |
+
17 | ++ |
+ }+ |
+
18 | ++ |
+ }+ |
+
19 | ++ | + + | +
20 | +428x | +
+ for (node in graph) {+ |
+
21 | +168x | +
+ for (to_edge in node) {+ |
+
22 | +168x | +
+ in_degrees[[to_edge]] <- in_degrees[[to_edge]] + 1+ |
+
23 | ++ |
+ }+ |
+
24 | ++ |
+ }+ |
+
25 | ++ | + + | +
26 | ++ |
+ # sort+ |
+
27 | +428x | +
+ visited <- 0+ |
+
28 | +428x | +
+ sorted <- list()+ |
+
29 | +428x | +
+ zero_in <- list()+ |
+
30 | +428x | +
+ for (node in names(in_degrees)) {+ |
+
31 | +142x | +
+ if (in_degrees[[node]] == 0) zero_in <- append(zero_in, node)+ |
+
32 | ++ |
+ }+ |
+
33 | +428x | +
+ zero_in <- rev(zero_in)+ |
+
34 | ++ | + + | +
35 | +428x | +
+ while (length(zero_in) != 0) {+ |
+
36 | +283x | +
+ visited <- visited + 1+ |
+
37 | +283x | +
+ sorted <- c(zero_in[[1]], sorted)+ |
+
38 | +283x | +
+ for (edge_to in graph[[zero_in[[1]]]]) {+ |
+
39 | +160x | +
+ in_degrees[[edge_to]] <- in_degrees[[edge_to]] - 1+ |
+
40 | +160x | +
+ if (in_degrees[[edge_to]] == 0) {+ |
+
41 | +141x | +
+ zero_in <- append(zero_in, edge_to, 1)+ |
+
42 | ++ |
+ }+ |
+
43 | ++ |
+ }+ |
+
44 | +283x | +
+ zero_in[[1]] <- NULL+ |
+
45 | ++ |
+ }+ |
+
46 | ++ | + + | +
47 | +428x | +
+ if (visited != length(in_degrees)) {+ |
+
48 | +4x | +
+ stop(+ |
+
49 | +4x | +
+ "Graph is not a directed acyclic graph. Cycles involving nodes: ",+ |
+
50 | +4x | +
+ paste0(setdiff(names(in_degrees), sorted), collapse = " ")+ |
+
51 | ++ |
+ )+ |
+
52 | ++ |
+ } else {+ |
+
53 | +424x | +
+ return(sorted)+ |
+
54 | ++ |
+ }+ |
+
55 | ++ |
+ }+ |
+
56 | ++ | + + | +
57 | ++ |
+ #' Checks whether a graph is a `Directed Acyclic Graph (DAG)`+ |
+
58 | ++ |
+ #'+ |
+
59 | ++ |
+ #' @inheritParams topological_sort+ |
+
60 | ++ |
+ #' @return `logical(1)` `TRUE` if the graph is a `DAG`; `FALSE` otherwise+ |
+
61 | ++ |
+ #' @keywords internal+ |
+
62 | ++ |
+ is_dag <- function(graph) {+ |
+
63 | +423x | +
+ inherits(try(topological_sort(graph), silent = TRUE), "try-error")+ |
+
64 | ++ |
+ }+ |
+
1 | ++ |
+ #' The names of a `join_keys` object+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' @inheritParams base::`names<-`+ |
+
4 | ++ |
+ #' @export+ |
+
5 | ++ |
+ `names<-.join_keys` <- function(x, value) {+ |
+
6 | +2x | +
+ new_x <- unclass(x)+ |
+
7 | +2x | +
+ parent_list <- parents(x)+ |
+
8 | ++ |
+ # Update inner keys+ |
+
9 | +2x | +
+ for (old_name in setdiff(names(new_x), value)) {+ |
+
10 | +3x | +
+ old_entry <- new_x[[old_name]]+ |
+
11 | +3x | +
+ new_name <- value[names(new_x) == old_name]+ |
+
12 | ++ | + + | +
13 | ++ |
+ # Change 2nd-tier first+ |
+
14 | +3x | +
+ for (sub_name in names(old_entry)) {+ |
+
15 | +7x | +
+ names(new_x[[sub_name]])[names(new_x[[sub_name]]) == old_name] <- new_name+ |
+
16 | ++ |
+ }+ |
+
17 | ++ | + + | +
18 | ++ |
+ # Change in first tier+ |
+
19 | +3x | +
+ names(new_x)[names(new_x) == old_name] <- new_name+ |
+
20 | ++ | + + | +
21 | ++ |
+ # changing name in the parents+ |
+
22 | +3x | +
+ if (length(parent_list)) {+ |
+
23 | +3x | +
+ names(parent_list)[names(parent_list) == old_name] <- new_name+ |
+
24 | +3x | +
+ ind <- vapply(parent_list, identical, logical(1), old_name)+ |
+
25 | +3x | +
+ parent_list[ind] <- new_name+ |
+
26 | +3x | +
+ attr(new_x, "parents") <- parent_list+ |
+
27 | ++ |
+ }+ |
+
28 | ++ |
+ }+ |
+
29 | ++ | + + | +
30 | +2x | +
+ class(new_x) <- c("join_keys", "list")+ |
+
31 | +2x | +
+ new_x+ |
+
32 | ++ |
+ }+ |
+
1 | ++ |
+ #' Variable labels+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' Get or set variable labels in a `data.frame`.+ |
+
4 | ++ |
+ #'+ |
+
5 | ++ |
+ #' @details Variable labels can be stored as a `label` attribute set on individual variables.+ |
+
6 | ++ |
+ #' These functions get or set this attribute, either on all (`col_labels`) or some variables (`col_relabel`).+ |
+
7 | ++ |
+ #'+ |
+
8 | ++ |
+ #' @param x (`data.frame` or `DataFrame`) data object+ |
+
9 | ++ |
+ #' @param fill (`logical(1)`) specifying what to return if variable has no label+ |
+
10 | ++ |
+ #' @param value (`character`) vector of variable labels of length equal to number of columns in `x`;+ |
+
11 | ++ |
+ #' if named, names must match variable names in `x` and will be used as key to set labels;+ |
+
12 | ++ |
+ #' use `NA` to remove label from variable+ |
+
13 | ++ |
+ #' @param ... name-value pairs, where name corresponds to a variable name in `x`+ |
+
14 | ++ |
+ #' and value is the new variable label; use `NA` to remove label from variable+ |
+
15 | ++ |
+ #'+ |
+
16 | ++ |
+ #' @return+ |
+
17 | ++ |
+ #' For `col_labels`, named character vector of variable labels, the names being the corresponding variable names.+ |
+
18 | ++ |
+ #' If the `label` attribute is missing, the vector elements will be+ |
+
19 | ++ |
+ #' the variable names themselves if `fill = TRUE` and `NA` if `fill = FALSE`.+ |
+
20 | ++ |
+ #'+ |
+
21 | ++ |
+ #' For `col_labels<-` and `col_relabel`, copy of `x` with variable labels modified.+ |
+
22 | ++ |
+ #'+ |
+
23 | ++ |
+ #' @examples+ |
+
24 | ++ |
+ #' x <- iris+ |
+
25 | ++ |
+ #' col_labels(x)+ |
+
26 | ++ |
+ #' col_labels(x) <- paste("label for", names(iris))+ |
+
27 | ++ |
+ #' col_labels(x)+ |
+
28 | ++ |
+ #' y <- col_relabel(x, Sepal.Length = "Sepal Length of iris flower")+ |
+
29 | ++ |
+ #' col_labels(y)+ |
+
30 | ++ |
+ #'+ |
+
31 | ++ |
+ #' @source These functions were taken from+ |
+
32 | ++ |
+ #' [formatters](https://cran.r-project.org/package=formatters) package, to reduce the complexity of+ |
+
33 | ++ |
+ #' the dependency tree and rewritten.+ |
+
34 | ++ |
+ #'+ |
+
35 | ++ |
+ #' @rdname col_labels+ |
+
36 | ++ |
+ #' @export+ |
+
37 | ++ |
+ #'+ |
+
38 | ++ |
+ col_labels <- function(x, fill = FALSE) {+ |
+
39 | +16x | +
+ checkmate::test_multi_class(x, c("data.frame", "DataFrame"))+ |
+
40 | +16x | +
+ checkmate::assert_flag(fill)+ |
+
41 | ++ | + + | +
42 | +16x | +
+ if (ncol(x) == 0L) {+ |
+
43 | +2x | +
+ return(character(0L))+ |
+
44 | ++ |
+ }+ |
+
45 | ++ | + + | +
46 | +14x | +
+ labels <- sapply(x, function(i) as.vector(attr(i, "label", exact = TRUE)), simplify = FALSE, USE.NAMES = TRUE)+ |
+
47 | +14x | +
+ mapply(+ |
+
48 | +14x | +
+ function(name, label) {+ |
+
49 | +62x | +
+ checkmate::assert_string(+ |
+
50 | +62x | +
+ label,+ |
+
51 | +62x | +
+ .var.name = sprintf("\"label\" attribute of column \"%s\"", name),+ |
+
52 | +62x | +
+ null.ok = TRUE+ |
+
53 | ++ |
+ )+ |
+
54 | ++ |
+ },+ |
+
55 | +14x | +
+ name = names(x),+ |
+
56 | +14x | +
+ label = labels+ |
+
57 | ++ |
+ )+ |
+
58 | ++ | + + | +
59 | +12x | +
+ nulls <- vapply(labels, is.null, logical(1L))+ |
+
60 | +12x | +
+ if (any(nulls)) {+ |
+
61 | +7x | +
+ labels[nulls] <-+ |
+
62 | +7x | +
+ if (fill) {+ |
+
63 | +1x | +
+ colnames(x)[nulls]+ |
+
64 | ++ |
+ } else {+ |
+
65 | +7x | +
+ NA_character_+ |
+
66 | ++ |
+ }+ |
+
67 | ++ |
+ }+ |
+
68 | ++ | + + | +
69 | +12x | +
+ unlist(labels)+ |
+
70 | ++ |
+ }+ |
+
71 | ++ | + + | +
72 | ++ |
+ #' @rdname col_labels+ |
+
73 | ++ |
+ #' @export+ |
+
74 | ++ |
+ `col_labels<-` <- function(x, value) {+ |
+
75 | +13x | +
+ checkmate::test_multi_class(x, c("data.frame", "DataFrame"))+ |
+
76 | +13x | +
+ checkmate::assert_character(value)+ |
+
77 | +12x | +
+ checkmate::assert_true(+ |
+
78 | +12x | +
+ ncol(x) == length(value),+ |
+
79 | +12x | +
+ .var.name = "Length of value is equal to the number of columns"+ |
+
80 | ++ |
+ )+ |
+
81 | ++ | + + | +
82 | +11x | +
+ varnames <-+ |
+
83 | +11x | +
+ if (is.null(names(value))) {+ |
+
84 | +4x | +
+ names(x)+ |
+
85 | +11x | +
+ } else if (any(names(value) == "")) {+ |
+
86 | +3x | +
+ specified_cols <- names(value)[names(value) != ""]+ |
+
87 | +3x | +
+ checkmate::assert_subset(specified_cols, names(x), .var.name = "names of value")+ |
+
88 | +3x | +
+ res <- names(value)+ |
+
89 | +3x | +
+ res[res == ""] <- setdiff(names(x), specified_cols)+ |
+
90 | +3x | +
+ res+ |
+
91 | ++ |
+ } else {+ |
+
92 | +4x | +
+ checkmate::assert_set_equal(names(value), names(x), .var.name = "names of value")+ |
+
93 | +3x | +
+ names(value)+ |
+
94 | ++ |
+ }+ |
+
95 | ++ | + + | +
96 | +10x | +
+ for (i in seq_along(value)) {+ |
+
97 | +40x | +
+ if (is.na(value[i])) {+ |
+
98 | +2x | +
+ attr(x[[varnames[i]]], "label") <- NULL+ |
+
99 | ++ |
+ } else {+ |
+
100 | +38x | +
+ attr(x[[varnames[i]]], "label") <- value[[i]]+ |
+
101 | ++ |
+ }+ |
+
102 | ++ |
+ }+ |
+
103 | +10x | +
+ x+ |
+
104 | ++ |
+ }+ |
+
105 | ++ | + + | +
106 | ++ |
+ #' @rdname col_labels+ |
+
107 | ++ |
+ #' @export+ |
+
108 | ++ |
+ col_relabel <- function(x, ...) {+ |
+
109 | +4x | +
+ checkmate::test_multi_class(x, c("data.frame", "DataFrame"))+ |
+
110 | +4x | +
+ if (missing(...)) {+ |
+
111 | +1x | +
+ return(x)+ |
+
112 | ++ |
+ }+ |
+
113 | +3x | +
+ value <- list(...)+ |
+
114 | +3x | +
+ varnames <- names(value)+ |
+
115 | ++ | + + | +
116 | +3x | +
+ checkmate::assert_subset(varnames, names(x), .var.name = "names of ...")+ |
+
117 | +2x | +
+ lapply(value, checkmate::assert_string, .var.name = "element of ...", na.ok = TRUE)+ |
+
118 | ++ | + + | +
119 | +2x | +
+ for (i in seq_along(value)) {+ |
+
120 | +2x | +
+ if (is.na(value[i])) {+ |
+
121 | +1x | +
+ attr(x[[varnames[i]]], "label") <- NULL+ |
+
122 | ++ |
+ } else {+ |
+
123 | +1x | +
+ attr(x[[varnames[i]]], "label") <- value[[i]]+ |
+
124 | ++ |
+ }+ |
+
125 | ++ |
+ }+ |
+
126 | +2x | +
+ x+ |
+
127 | ++ |
+ }+ |
+
1 | ++ |
+ #' @rdname join_keys+ |
+
2 | ++ |
+ #' @order 2+ |
+
3 | ++ |
+ #'+ |
+
4 | ++ |
+ #' @section Functions:+ |
+
5 | ++ |
+ #' - `x[datanames]`: Returns a subset of the `join_keys` object for+ |
+
6 | ++ |
+ #' given `datanames`, including parent `datanames` and symmetric mirror keys between+ |
+
7 | ++ |
+ #' `datanames` in the result.+ |
+
8 | ++ |
+ #' - `x[i, j]`: Returns join keys between datasets `i` and `j`,+ |
+
9 | ++ |
+ #' including implicit keys inferred from their relationship with a parent.+ |
+
10 | ++ |
+ #'+ |
+
11 | ++ |
+ #' @param i,j indices specifying elements to extract or replace. Index should be a+ |
+
12 | ++ |
+ #' a character vector, but it can also take numeric, logical, `NULL` or missing.+ |
+
13 | ++ |
+ #'+ |
+
14 | ++ |
+ #' @export+ |
+
15 | ++ |
+ #'+ |
+
16 | ++ |
+ #' @examples+ |
+
17 | ++ |
+ #' # Getter for join_keys ---+ |
+
18 | ++ |
+ #'+ |
+
19 | ++ |
+ #' jk["ds1", "ds2"]+ |
+
20 | ++ |
+ #'+ |
+
21 | ++ |
+ #' # Subsetting join_keys ----+ |
+
22 | ++ |
+ #'+ |
+
23 | ++ |
+ #' jk["ds1"]+ |
+
24 | ++ |
+ #' jk[1:2]+ |
+
25 | ++ |
+ #' jk[c("ds1", "ds2")]+ |
+
26 | ++ |
+ #'+ |
+
27 | ++ |
+ `[.join_keys` <- function(x, i, j) {+ |
+
28 | +31x | +
+ if (missing(i) && missing(j)) {+ |
+
29 | ++ |
+ # because:+ |
+
30 | ++ |
+ # - list(a = 1)[] returns list(a = 1)+ |
+
31 | ++ |
+ # - data.frame(a = 1)[] returns data.frame(a = 1)+ |
+
32 | +1x | +
+ return(x)+ |
+
33 | +30x | +
+ } else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) {+ |
+
34 | ++ |
+ # because list(a = 1)[NULL] returns NULL+ |
+
35 | ++ |
+ # data.frame(a = 1)[NULL, NULL] returns data.frame(+ |
+
36 | +2x | +
+ return(join_keys())+ |
+
37 | +28x | +
+ } else if (!missing(i) && !missing(j)) {+ |
+
38 | ++ |
+ if (+ |
+
39 | +8x | +
+ !any(+ |
+
40 | +8x | +
+ checkmate::test_string(i),+ |
+
41 | +8x | +
+ checkmate::test_number(i),+ |
+
42 | +8x | +
+ checkmate::test_logical(i, len = length(x)) && sum(j) == 1+ |
+
43 | ++ |
+ ) ||+ |
+
44 | +8x | +
+ !any(+ |
+
45 | +8x | +
+ checkmate::test_string(j),+ |
+
46 | +8x | +
+ checkmate::test_number(j),+ |
+
47 | +8x | +
+ checkmate::test_logical(j, len = length(x)) && sum(j) == 1+ |
+
48 | ++ |
+ )+ |
+
49 | ++ |
+ ) {+ |
+
50 | +1x | +
+ stop(+ |
+
51 | +1x | +
+ "join_keys[i, j] - Can't extract keys for multiple pairs.",+ |
+
52 | +1x | +
+ "When specifying a pair [i, j], both indices must point to a single key pair.\n",+ |
+
53 | +1x | +
+ call. = FALSE+ |
+
54 | ++ |
+ )+ |
+
55 | ++ |
+ }+ |
+
56 | +1x | +
+ if (is.numeric(i)) i <- names(x)[i]+ |
+
57 | +1x | +
+ if (is.numeric(j)) j <- names(x)[j]+ |
+
58 | ++ | + + | +
59 | +7x | +
+ subset_x <- update_keys_given_parents(x[union(i, j)])+ |
+
60 | +7x | +
+ return(subset_x[[i]][[j]])+ |
+
61 | +20x | +
+ } else if (!missing(j)) {+ |
+
62 | ++ |
+ # ie. select all keys which have j as dataset_2+ |
+
63 | ++ |
+ # since list is symmetrical it is equivalent to selecting by i+ |
+
64 | +1x | +
+ i <- j+ |
+
65 | ++ |
+ }+ |
+
66 | ++ | + + | +
67 | +20x | +
+ checkmate::assert(+ |
+
68 | +20x | +
+ combine = "or",+ |
+
69 | +20x | +
+ checkmate::check_character(i),+ |
+
70 | +20x | +
+ checkmate::check_numeric(i),+ |
+
71 | +20x | +
+ checkmate::check_logical(i)+ |
+
72 | ++ |
+ )+ |
+
73 | ++ | + + | +
74 | ++ | + + | +
75 | ++ |
+ # Convert integer/logical index to named index+ |
+
76 | +20x | +
+ if (checkmate::test_numeric(i) || checkmate::test_logical(i)) {+ |
+
77 | +2x | +
+ i <- names(x)[i]+ |
+
78 | ++ |
+ }+ |
+
79 | ++ | + + | +
80 | ++ |
+ # When retrieving a relationship pair, it will also return the symmetric key+ |
+
81 | +20x | +
+ new_jk <- new_join_keys()+ |
+
82 | +20x | +
+ queue <- unique(i)+ |
+
83 | +20x | +
+ bin <- character(0)+ |
+
84 | ++ | + + | +
85 | ++ |
+ # Need to iterate on a mutating queue if subset of a dataset will also+ |
+
86 | ++ |
+ # select its parent as that parent might have relationships with others+ |
+
87 | ++ |
+ # already selected.+ |
+
88 | +20x | +
+ while (length(queue) > 0) {+ |
+
89 | +45x | +
+ ix <- queue[1]+ |
+
90 | +45x | +
+ queue <- queue[-1]+ |
+
91 | +45x | +
+ bin <- c(bin, ix)+ |
+
92 | ++ | + + | +
93 | +45x | +
+ ix_parent <- parent(x, ix)+ |
+
94 | ++ | + + | +
95 | +45x | +
+ if (checkmate::test_string(ix_parent, min.chars = 1) && !ix_parent %in% c(queue, bin)) {+ |
+
96 | +10x | +
+ queue <- c(queue, ix_parent)+ |
+
97 | ++ |
+ }+ |
+
98 | ++ | + + | +
99 | +45x | +
+ ix_valid_names <- names(x[[ix]]) %in% c(queue, bin)+ |
+
100 | ++ | + + | +
101 | +45x | +
+ new_jk[[ix]] <- x[[ix]][ix_valid_names]+ |
+
102 | ++ | + + | +
103 | ++ |
+ # Add primary key of parent+ |
+
104 | +45x | +
+ if (length(ix_parent) > 0) {+ |
+
105 | +16x | +
+ new_jk[[ix_parent]][[ix_parent]] <- x[[ix_parent]][[ix_parent]]+ |
+
106 | ++ |
+ }+ |
+
107 | ++ |
+ }+ |
+
108 | ++ | + + | +
109 | +20x | +
+ common_parents_ix <- names(parents(x)) %in% names(new_jk) &+ |
+
110 | +20x | +
+ parents(x) %in% names(new_jk)+ |
+
111 | ++ | + + | +
112 | +9x | +
+ if (any(common_parents_ix)) parents(new_jk) <- parents(x)[common_parents_ix]+ |
+
113 | ++ | + + | +
114 | +20x | +
+ new_jk+ |
+
115 | ++ |
+ }+ |
+
116 | ++ | + + | +
117 | ++ |
+ #' @rdname join_keys+ |
+
118 | ++ |
+ #' @order 2+ |
+
119 | ++ |
+ #'+ |
+
120 | ++ |
+ #' @param directed (`logical(1)`) Flag that indicates whether it should create+ |
+
121 | ++ |
+ #' a parent-child relationship between the datasets.+ |
+
122 | ++ |
+ #' - `TRUE` (default) `dataset_1` is the parent of `dataset_2`;+ |
+
123 | ++ |
+ #' - `FALSE` when the relationship is undirected.+ |
+
124 | ++ |
+ #' @section Functions:+ |
+
125 | ++ |
+ #' - `x[i, j] <- value`: Assignment of a key to pair `(i, j)`.+ |
+
126 | ++ |
+ #' - `x[i] <- value`: This (without `j` parameter) **is not** a supported+ |
+
127 | ++ |
+ #' operation for `join_keys`.+ |
+
128 | ++ |
+ #' - `join_keys(x)[i, j] <- value`: Assignment to `join_keys` object stored in `x`,+ |
+
129 | ++ |
+ #' such as a `teal_data` object or `join_keys` object itself.+ |
+
130 | ++ |
+ #'+ |
+
131 | ++ |
+ #' @export+ |
+
132 | ++ |
+ #' @examples+ |
+
133 | ++ |
+ #' # Setting a new primary key ---+ |
+
134 | ++ |
+ #'+ |
+
135 | ++ |
+ #' jk["ds4", "ds4"] <- "pk4"+ |
+
136 | ++ |
+ #' jk["ds5", "ds5"] <- "pk5"+ |
+
137 | ++ |
+ #'+ |
+
138 | ++ |
+ #' # Setting a single relationship pair ---+ |
+
139 | ++ |
+ #'+ |
+
140 | ++ |
+ #' jk["ds1", "ds4"] <- c("pk1" = "pk4")+ |
+
141 | ++ |
+ #'+ |
+
142 | ++ |
+ #' # Removing a key ---+ |
+
143 | ++ |
+ #'+ |
+
144 | ++ |
+ #' jk["ds5", "ds5"] <- NULL+ |
+
145 | ++ |
+ `[<-.join_keys` <- function(x, i, j, directed = TRUE, value) {+ |
+
146 | +11x | +
+ checkmate::assert_flag(directed)+ |
+
147 | +11x | +
+ if (missing(i) || missing(j)) {+ |
+
148 | +4x | +
+ stop("join_keys[i, j] specify both indices to set a key pair.")+ |
+
149 | +7x | +
+ } else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) {+ |
+
150 | +2x | +
+ stop("join_keys[i, j] neither i nor j can be NULL.")+ |
+
151 | ++ |
+ } else if (+ |
+
152 | +5x | +
+ !any(+ |
+
153 | +5x | +
+ checkmate::test_string(i),+ |
+
154 | +5x | +
+ checkmate::test_number(i),+ |
+
155 | +5x | +
+ checkmate::test_logical(i, len = length(x)) && sum(j) == 1+ |
+
156 | ++ |
+ ) ||+ |
+
157 | +5x | +
+ !any(+ |
+
158 | +5x | +
+ checkmate::test_string(j),+ |
+
159 | +5x | +
+ checkmate::test_number(j),+ |
+
160 | +5x | +
+ checkmate::test_logical(j, len = length(x)) && sum(j) == 1+ |
+
161 | ++ |
+ )+ |
+
162 | ++ |
+ ) {+ |
+
163 | +2x | +
+ stop(+ |
+
164 | +2x | +
+ "join_keys[i, j] <- Can't set keys to specified indices.\n",+ |
+
165 | +2x | +
+ "When setting pair [i, j], both indices must point to a single key pair.\n",+ |
+
166 | +2x | +
+ call. = FALSE+ |
+
167 | ++ |
+ )+ |
+
168 | ++ |
+ }+ |
+
169 | ++ | + + | +
170 | ++ |
+ # Handle join key removal separately+ |
+
171 | +3x | +
+ if (is.null(value)) {+ |
+
172 | +1x | +
+ x[[i]][[j]] <- NULL+ |
+
173 | +1x | +
+ return(x)+ |
+
174 | ++ |
+ }+ |
+
175 | ++ | + + | +
176 | +2x | +
+ c(x, join_key(i, j, value, directed))+ |
+
177 | ++ |
+ }+ |
+
178 | ++ | + + | +
179 | ++ |
+ #' @rdname join_keys+ |
+
180 | ++ |
+ #'+ |
+
181 | ++ |
+ #' @order 1000+ |
+
182 | ++ |
+ #' @usage ## Preferred method is x[i, j] <- value+ |
+
183 | ++ |
+ #' x[[i]][[j]] <- value+ |
+
184 | ++ |
+ #'+ |
+
185 | ++ |
+ #' @section Functions:+ |
+
186 | ++ |
+ #' - `x[[i]][[j]] <- value`: It is equivalent as `x[i, j] <- value`.+ |
+
187 | ++ |
+ #'+ |
+
188 | ++ |
+ #' @export+ |
+
189 | ++ |
+ #' @examples+ |
+
190 | ++ |
+ #' # Setting via x[[i]] <- value ---+ |
+
191 | ++ |
+ #'+ |
+
192 | ++ |
+ #' jk <- join_keys()+ |
+
193 | ++ |
+ #' jk[["ds6"]][["ds6"]] <- "pk6"+ |
+
194 | ++ |
+ #' jk[["ds7"]] <- list(ds7 = "pk7", ds6 = c(pk7 = "pk6"))+ |
+
195 | ++ |
+ #' jk[["ds7"]][["ds7"]] <- NULL # removes key+ |
+
196 | ++ |
+ #'+ |
+
197 | ++ |
+ #' jk+ |
+
198 | ++ |
+ #'+ |
+
199 | ++ |
+ #' @noRd+ |
+
200 | ++ |
+ `[[<-.join_keys` <- function(x, i, value) {+ |
+
201 | +382x | +
+ checkmate::assert(+ |
+
202 | +382x | +
+ combine = "or",+ |
+
203 | +382x | +
+ checkmate::check_string(i),+ |
+
204 | +382x | +
+ checkmate::check_number(i),+ |
+
205 | +382x | +
+ checkmate::check_logical(i, len = length(x))+ |
+
206 | ++ |
+ )+ |
+
207 | +382x | +
+ checkmate::assert_list(value, names = "named", types = "character", null.ok = TRUE)+ |
+
208 | +375x | +
+ if (checkmate::test_numeric(i) || checkmate::test_logical(i)) {+ |
+
209 | +1x | +
+ i <- names(x)[[i]]+ |
+
210 | ++ |
+ }+ |
+
211 | ++ | + + | +
212 | ++ |
+ # Normalize values+ |
+
213 | +375x | +
+ norm_value <- lapply(seq_along(value), function(.x) {+ |
+
214 | +524x | +
+ join_key(i, names(value)[.x], value[[.x]])+ |
+
215 | ++ |
+ })+ |
+
216 | +375x | +
+ names(norm_value) <- names(value)+ |
+
217 | ++ | + + | +
218 | ++ |
+ # Check if multiple modifications don't have a conflict+ |
+
219 | +375x | +
+ repeated_value_ix <- names(value) %in% names(value)[duplicated(names(value))]+ |
+
220 | +375x | +
+ repeated <- norm_value[repeated_value_ix]+ |
+
221 | +375x | +
+ vapply(+ |
+
222 | +375x | +
+ seq_along(repeated),+ |
+
223 | +375x | +
+ function(.ix, .x_value = repeated[[.ix]], .x_name = names(.x_value[[1]])) {+ |
+
224 | +3x | +
+ assert_compatible_keys2(+ |
+
225 | +3x | +
+ .x_value,+ |
+
226 | +3x | +
+ unlist(unname(+ |
+
227 | +3x | +
+ repeated[-.ix][names(repeated[-.ix]) == .x_name]+ |
+
228 | +3x | +
+ ), recursive = FALSE)+ |
+
229 | ++ |
+ )+ |
+
230 | ++ |
+ },+ |
+
231 | +375x | +
+ logical(1)+ |
+
232 | ++ |
+ )+ |
+
233 | ++ | + + | +
234 | +374x | +
+ norm_value <- lapply(norm_value, function(x) x[[1]][[1]])+ |
+
235 | +374x | +
+ names(norm_value) <- names(value)+ |
+
236 | ++ | + + | +
237 | ++ |
+ # Safe to do as duplicated are the same+ |
+
238 | +374x | +
+ norm_value[duplicated(names(norm_value))] <- NULL+ |
+
239 | ++ | + + | +
240 | ++ |
+ # Keep only elements with length > 0L+ |
+
241 | +374x | +
+ norm_value <- Filter(length, norm_value)+ |
+
242 | ++ | + + | +
243 | ++ |
+ # Remove classes to use list-based get/assign operations+ |
+
244 | +374x | +
+ new_x <- unclass(x)+ |
+
245 | ++ | + + | +
246 | ++ |
+ # In case a pair is removed, also remove the symmetric pair and update parents+ |
+
247 | +374x | +
+ removed_names <- setdiff(names(new_x[[i]]), names(norm_value))+ |
+
248 | +374x | +
+ for (.x in removed_names) {+ |
+
249 | +2x | +
+ if (identical(parent(x, .x), i)) attr(new_x, "parents")[[.x]] <- NULL+ |
+
250 | +1x | +
+ if (identical(parent(x, i), .x)) attr(new_x, "parents")[[i]] <- NULL+ |
+
251 | ++ | + + | +
252 | +5x | +
+ new_x[[.x]][[i]] <- NULL+ |
+
253 | ++ |
+ }+ |
+
254 | ++ | + + | +
255 | +374x | +
+ new_x[[i]] <- norm_value+ |
+
256 | ++ | + + | +
257 | ++ |
+ # Iterate on all new values to create symmetrical pair+ |
+
258 | +374x | +
+ for (ds2 in names(norm_value)) {+ |
+
259 | +298x | +
+ if (ds2 == i) next+ |
+
260 | ++ | + + | +
261 | +222x | +
+ keep_value <- if (is.null(x)) list() else new_x[[ds2]]+ |
+
262 | ++ | + + | +
263 | ++ |
+ # Invert key+ |
+
264 | +222x | +
+ new_value <- stats::setNames(names(norm_value[[ds2]]), norm_value[[ds2]])+ |
+
265 | +222x | +
+ keep_value[[i]] <- new_value+ |
+
266 | ++ | + + | +
267 | ++ |
+ # Assign symmetrical+ |
+
268 | +222x | +
+ new_x[[ds2]] <- keep_value+ |
+
269 | ++ |
+ }+ |
+
270 | ++ | + + | +
271 | +374x | +
+ preserve_attr <- attributes(new_x)[!names(attributes(new_x)) %in% "names"]+ |
+
272 | ++ |
+ # Remove NULL or empty keys+ |
+
273 | +374x | +
+ new_x <- Filter(function(x) length(x) != 0L, new_x)+ |
+
274 | +374x | +
+ attributes(new_x) <- utils::modifyList(attributes(new_x), preserve_attr)+ |
+
275 | ++ | + + | +
276 | ++ |
+ #+ |
+
277 | ++ |
+ # restore class+ |
+
278 | +374x | +
+ class(new_x) <- class(x)+ |
+
279 | +374x | +
+ new_x+ |
+
280 | ++ |
+ }+ |
+
1 | ++ |
+ #' Test if two objects are (nearly) equal+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' `all.equal(target, current)` is a utility to compare `join_keys` objects target+ |
+
4 | ++ |
+ #' and current testing `near equality`.+ |
+
5 | ++ |
+ #'+ |
+
6 | ++ |
+ #' If they are different, comparison is still made to some extent, and a report+ |
+
7 | ++ |
+ #' of the differences is returned.+ |
+
8 | ++ |
+ #' Do not use `all.equal` directly in if expressions—either use `isTRUE(all.equal(....))`+ |
+
9 | ++ |
+ #' or identical if appropriate.+ |
+
10 | ++ |
+ #'+ |
+
11 | ++ |
+ #' The parents attribute comparison tolerates `NULL` and empty lists and will find+ |
+
12 | ++ |
+ #' no difference.+ |
+
13 | ++ |
+ #'+ |
+
14 | ++ |
+ #' The list containing all the relationships is treated like a map and ignores+ |
+
15 | ++ |
+ #' entries with `NULL` if they exist.+ |
+
16 | ++ |
+ #'+ |
+
17 | ++ |
+ #' @inheritParams base::all.equal+ |
+
18 | ++ |
+ #' @param ... further arguments for different methods. Not used with `join_keys`.+ |
+
19 | ++ |
+ #'+ |
+
20 | ++ |
+ #' @seealso [base::all.equal()]+ |
+
21 | ++ |
+ #' @keywords internal+ |
+
22 | ++ |
+ #'+ |
+
23 | ++ |
+ all.equal.join_keys <- function(target, current, ...) {+ |
+
24 | +21x | +
+ .as_map <- function(.x) {+ |
+
25 | +42x | +
+ old_attributes <- attributes(.x)+ |
+
26 | ++ |
+ # Keep only non-list attributes+ |
+
27 | +42x | +
+ old_attributes[["names"]] <- NULL+ |
+
28 | +42x | +
+ old_attributes[["original_class"]] <- old_attributes[["class"]]+ |
+
29 | +42x | +
+ old_attributes[["class"]] <- NULL+ |
+
30 | +42x | +
+ old_attributes[["parents"]] <- if (!length(old_attributes[["parents"]])) {+ |
+
31 | +18x | +
+ list()+ |
+
32 | ++ |
+ } else {+ |
+
33 | +24x | +
+ old_attributes[["parents"]][order(names(old_attributes[["parents"]]))]+ |
+
34 | ++ |
+ }+ |
+
35 | +42x | +
+ attr(.x, "class") <- "list"+ |
+
36 | ++ | + + | +
37 | ++ |
+ # Remove nulls+ |
+
38 | +42x | +
+ .x <- Filter(Negate(is.null), .x)+ |
+
39 | ++ | + + | +
40 | ++ |
+ # Sort named components, preserving positions of unnamed+ |
+
41 | +42x | +
+ nx <- rlang::names2(.x)+ |
+
42 | +42x | +
+ is_named <- nx != ""+ |
+
43 | +42x | +
+ if (any(is_named)) {+ |
+
44 | +42x | +
+ idx <- seq_along(.x)+ |
+
45 | +42x | +
+ idx[is_named] <- idx[is_named][order(nx[is_named])]+ |
+
46 | +42x | +
+ .x <- .x[idx]+ |
+
47 | ++ |
+ }+ |
+
48 | +42x | +
+ new_attributes <- if (is.null(attributes(.x))) list() else attributes(.x)+ |
+
49 | +42x | +
+ attributes(.x) <- utils::modifyList(old_attributes, new_attributes)+ |
+
50 | +42x | +
+ .x+ |
+
51 | ++ |
+ }+ |
+
52 | +21x | +
+ x <- .as_map(target)+ |
+
53 | +21x | +
+ y <- .as_map(current)+ |
+
54 | +21x | +
+ all.equal(x, y)+ |
+
55 | ++ |
+ }+ |
+
1 | ++ |
+ #' Get and set parents in `join_keys` object+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' `parents()` facilitates the creation of dependencies between datasets by+ |
+
4 | ++ |
+ #' assigning a parent-child relationship.+ |
+
5 | ++ |
+ #'+ |
+
6 | ++ |
+ #' Each element is defined by a `list` element, where `list("child" = "parent")`.+ |
+
7 | ++ |
+ #'+ |
+
8 | ++ |
+ #' @param x (`join_keys` or `teal_data`) object that contains "parents" information+ |
+
9 | ++ |
+ #' to retrieve or manipulate.+ |
+
10 | ++ |
+ #'+ |
+
11 | ++ |
+ #' @return a `list` of `character` representing the parents.+ |
+
12 | ++ |
+ #'+ |
+
13 | ++ |
+ #' @export+ |
+
14 | ++ |
+ #' @seealso [join_keys()]+ |
+
15 | ++ |
+ parents <- function(x) {+ |
+
16 | +606x | +
+ UseMethod("parents", x)+ |
+
17 | ++ |
+ }+ |
+
18 | ++ | + + | +
19 | ++ |
+ #' @describeIn parents Retrieves parents of `join_keys` object.+ |
+
20 | ++ |
+ #' @export+ |
+
21 | ++ |
+ #' @examples+ |
+
22 | ++ |
+ #' # Get parents of join_keys ---+ |
+
23 | ++ |
+ #'+ |
+
24 | ++ |
+ #' jk <- default_cdisc_join_keys["ADEX"]+ |
+
25 | ++ |
+ #' parents(jk)+ |
+
26 | ++ |
+ parents.join_keys <- function(x) {+ |
+
27 | +1x | +
+ if (is.null(attr(x, "parents"))) list() else attr(x, "parents")+ |
+
28 | ++ |
+ }+ |
+
29 | ++ | + + | +
30 | ++ |
+ #' @describeIn parents Retrieves parents of `join_keys` inside `teal_data` object.+ |
+
31 | ++ |
+ #' @export+ |
+
32 | ++ |
+ #' @examples+ |
+
33 | ++ |
+ #' # Get parents of join_keys inside teal_data object ---+ |
+
34 | ++ |
+ #'+ |
+
35 | ++ |
+ #' td <- teal_data(+ |
+
36 | ++ |
+ #' ADSL = rADSL,+ |
+
37 | ++ |
+ #' ADTTE = rADTTE,+ |
+
38 | ++ |
+ #' ADRS = rADRS,+ |
+
39 | ++ |
+ #' join_keys = default_cdisc_join_keys[c("ADSL", "ADTTE", "ADRS")]+ |
+
40 | ++ |
+ #' )+ |
+
41 | ++ |
+ #' parents(td)+ |
+
42 | ++ |
+ parents.teal_data <- function(x) {+ |
+
43 | +1x | +
+ parents(x@join_keys)+ |
+
44 | ++ |
+ }+ |
+
45 | ++ | + + | +
46 | ++ |
+ #' @describeIn parents Assignment of parents in `join_keys` object.+ |
+
47 | ++ |
+ #'+ |
+
48 | ++ |
+ #' @param value (`named list`) of `character` vectors.+ |
+
49 | ++ |
+ #'+ |
+
50 | ++ |
+ #' @export+ |
+
51 | ++ |
+ `parents<-` <- function(x, value) {+ |
+
52 | +428x | +
+ UseMethod("parents<-", x)+ |
+
53 | ++ |
+ }+ |
+
54 | ++ | + + | +
55 | ++ |
+ #' @describeIn parents Assignment of parents of `join_keys` object.+ |
+
56 | ++ |
+ #' @export+ |
+
57 | ++ |
+ #' @examples+ |
+
58 | ++ |
+ #' # Assignment of parents ---+ |
+
59 | ++ |
+ #'+ |
+
60 | ++ |
+ #' jk <- join_keys(+ |
+
61 | ++ |
+ #' join_key("ds1", "ds2", "id"),+ |
+
62 | ++ |
+ #' join_key("ds5", "ds6", "id"),+ |
+
63 | ++ |
+ #' join_key("ds7", "ds6", "id")+ |
+
64 | ++ |
+ #' )+ |
+
65 | ++ |
+ #'+ |
+
66 | ++ |
+ #' parents(jk) <- list(ds2 = "ds1")+ |
+
67 | ++ |
+ #'+ |
+
68 | ++ |
+ #' # Setting individual parent-child relationship+ |
+
69 | ++ |
+ #'+ |
+
70 | ++ |
+ #' parents(jk)["ds6"] <- "ds5"+ |
+
71 | ++ |
+ #' parents(jk)["ds7"] <- "ds6"+ |
+
72 | ++ |
+ `parents<-.join_keys` <- function(x, value) {+ |
+
73 | +427x | +
+ checkmate::assert_list(value, types = "character", names = "named")+ |
+
74 | ++ | + + | +
75 | +424x | +
+ new_parents <- list()+ |
+
76 | ++ | + + | +
77 | +424x | +
+ for (dataset in names(value)) {+ |
+
78 | ++ |
+ # Custom .var.name so it is verbose and helpful for users+ |
+
79 | +161x | +
+ checkmate::assert_string(value[[dataset]], .var.name = sprintf("value[[\"%s\"]]", dataset))+ |
+
80 | ++ | + + | +
81 | +160x | +
+ parent <- new_parents[[dataset]]+ |
+
82 | +160x | +
+ checkmate::assert(+ |
+
83 | +160x | +
+ checkmate::check_null(parent),+ |
+
84 | +160x | +
+ checkmate::check_true(+ |
+
85 | +160x | +
+ length(parent) == 0 &&+ |
+
86 | +160x | +
+ length(value[[dataset]]) == 0+ |
+
87 | ++ |
+ ),+ |
+
88 | +160x | +
+ checkmate::check_true(parent == value[[dataset]]),+ |
+
89 | +160x | +
+ "Please check the difference between provided datasets parents and provided join_keys parents.",+ |
+
90 | +160x | +
+ .var.name = "value"+ |
+
91 | ++ |
+ )+ |
+
92 | +160x | +
+ if (is.null(parent)) {+ |
+
93 | +160x | +
+ new_parents[[dataset]] <- value[[dataset]]+ |
+
94 | ++ |
+ }+ |
+
95 | ++ |
+ }+ |
+
96 | ++ | + + | +
97 | +423x | +
+ if (is_dag(new_parents)) {+ |
+
98 | +4x | +
+ stop("Cycle detected in a parent and child dataset graph.")+ |
+
99 | ++ |
+ }+ |
+
100 | ++ | + + | +
101 | +419x | +
+ attr(x, "parents") <- new_parents+ |
+
102 | ++ | + + | +
103 | +419x | +
+ assert_parent_child(x)+ |
+
104 | +418x | +
+ x+ |
+
105 | ++ |
+ }+ |
+
106 | ++ | + + | +
107 | ++ |
+ #' @describeIn parents Assignment of parents of `join_keys` inside `teal_data` object.+ |
+
108 | ++ |
+ #' @export+ |
+
109 | ++ |
+ #' @examples+ |
+
110 | ++ |
+ #' # Assignment of parents of join_keys inside teal_data object ---+ |
+
111 | ++ |
+ #'+ |
+
112 | ++ |
+ #' parents(td) <- list("ADTTE" = "ADSL") # replace existing+ |
+
113 | ++ |
+ #' parents(td)["ADRS"] <- "ADSL" # add new parent+ |
+
114 | ++ |
+ `parents<-.teal_data` <- function(x, value) {+ |
+
115 | +1x | +
+ parents(x@join_keys) <- value+ |
+
116 | +1x | +
+ x+ |
+
117 | ++ |
+ }+ |
+
118 | ++ | + + | +
119 | ++ |
+ #' @describeIn parents Getter for individual parent.+ |
+
120 | ++ |
+ #'+ |
+
121 | ++ |
+ #' @param dataset_name (`character(1)`) Name of dataset to query on their parent.+ |
+
122 | ++ |
+ #'+ |
+
123 | ++ |
+ #' @return For `parent(x, dataset_name)` returns `NULL` if parent does not exist.+ |
+
124 | ++ |
+ #'+ |
+
125 | ++ |
+ #' @export+ |
+
126 | ++ |
+ #'+ |
+
127 | ++ |
+ #' @examples+ |
+
128 | ++ |
+ #' # Get individual parent ---+ |
+
129 | ++ |
+ #'+ |
+
130 | ++ |
+ #' parent(jk, "ds2")+ |
+
131 | ++ |
+ #' parent(td, "ADTTE")+ |
+
132 | ++ |
+ parent <- function(x, dataset_name) {+ |
+
133 | +105x | +
+ checkmate::assert_string(dataset_name)+ |
+
134 | ++ |
+ # assert x is performed by parents()+ |
+
135 | +105x | +
+ parents(x)[[dataset_name]]+ |
+
136 | ++ |
+ }+ |
+
1 | ++ |
+ #' @rdname join_keys+ |
+
2 | ++ |
+ #' @order 7+ |
+
3 | ++ |
+ #' @export+ |
+
4 | ++ |
+ format.join_keys <- function(x, ...) {+ |
+
5 | +6x | +
+ if (length(x) > 0) {+ |
+
6 | +5x | +
+ my_parents <- parents(x)+ |
+
7 | +5x | +
+ names_sorted <- topological_sort(my_parents)+ |
+
8 | +5x | +
+ names <- union(names_sorted, names(x))+ |
+
9 | +5x | +
+ x_implicit <- update_keys_given_parents(x)+ |
+
10 | +5x | +
+ out <- lapply(names, function(i) {+ |
+
11 | +15x | +
+ out_i <- lapply(union(i, names(x[[i]])), function(j) {+ |
+
12 | +35x | +
+ direction <- if (identical(my_parents[[j]], i)) {+ |
+
13 | ++ |
+ " <-- "+ |
+
14 | +35x | +
+ } else if (identical(my_parents[[i]], j)) {+ |
+
15 | ++ |
+ " --> "+ |
+
16 | +35x | +
+ } else if (!identical(i, j)) {+ |
+
17 | ++ |
+ " <-> "+ |
+
18 | ++ |
+ } else {+ |
+
19 | ++ |
+ ""+ |
+
20 | ++ |
+ }+ |
+
21 | ++ | + + | +
22 | +35x | +
+ keys <- x[[i]][[j]]+ |
+
23 | +35x | +
+ sprintf(+ |
+
24 | +35x | +
+ "%s%s: [%s]",+ |
+
25 | +35x | +
+ direction, j,+ |
+
26 | +35x | +
+ if (length(keys) == 0) "no primary keys" else toString(keys)+ |
+
27 | ++ |
+ )+ |
+
28 | ++ |
+ })+ |
+
29 | ++ | + + | +
30 | +15x | +
+ implicit_datasets <- setdiff(names(x_implicit[[i]]), names(x[[i]]))+ |
+
31 | +15x | +
+ if (length(implicit_datasets) > 0) {+ |
+
32 | +2x | +
+ out_i <- c(+ |
+
33 | +2x | +
+ out_i,+ |
+
34 | +2x | +
+ paste0(+ |
+
35 | +2x | +
+ " --* (implicit via parent with): ",+ |
+
36 | +2x | +
+ paste(implicit_datasets, collapse = ", ")+ |
+
37 | ++ |
+ )+ |
+
38 | ++ |
+ )+ |
+
39 | ++ |
+ }+ |
+
40 | ++ | + + | +
41 | +15x | +
+ paste(out_i, collapse = "\n")+ |
+
42 | ++ |
+ })+ |
+
43 | +5x | +
+ paste(+ |
+
44 | +5x | +
+ c(+ |
+
45 | +5x | +
+ sprintf("A join_keys object containing foreign keys between %s datasets:", length(x)),+ |
+
46 | +5x | +
+ out+ |
+
47 | ++ |
+ ),+ |
+
48 | +5x | +
+ collapse = "\n"+ |
+
49 | ++ |
+ )+ |
+
50 | ++ |
+ } else {+ |
+
51 | +1x | +
+ "An empty join_keys object."+ |
+
52 | ++ |
+ }+ |
+
53 | ++ |
+ }+ |
+
54 | ++ | + + | +
55 | ++ |
+ #' @rdname join_keys+ |
+
56 | ++ |
+ #' @order 7+ |
+
57 | ++ |
+ #' @export+ |
+
58 | ++ |
+ print.join_keys <- function(x, ...) {+ |
+
59 | +1x | +
+ cat(format(x, ...), "\n")+ |
+
60 | +1x | +
+ invisible(x)+ |
+
61 | ++ |
+ }+ |
+
1 | ++ |
+ #' Names of data sets in `teal_data` object+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' Get or set the value of the `datanames` slot.+ |
+
4 | ++ |
+ #'+ |
+
5 | ++ |
+ #' The `@datanames` slot in a `teal_data` object specifies which of the variables stored in its environment+ |
+
6 | ++ |
+ #' (the `@env` slot) are data sets to be taken into consideration.+ |
+
7 | ++ |
+ #' The contents of `@datanames` can be specified upon creation and default to all variables in `@env`.+ |
+
8 | ++ |
+ #' Variables created later, which may well be data sets, are not automatically considered such.+ |
+
9 | ++ |
+ #' Use this function to update the slot.+ |
+
10 | ++ |
+ #'+ |
+
11 | ++ |
+ #' @param x (`teal_data`) object to access or modify+ |
+
12 | ++ |
+ #' @param value (`character`) new value for `@datanames`; all elements must be names of variables existing in `@env`+ |
+
13 | ++ |
+ #'+ |
+
14 | ++ |
+ #' @return The contents of `@datanames` or `teal_data` object with updated `@datanames`.+ |
+
15 | ++ |
+ #'+ |
+
16 | ++ |
+ #' @examples+ |
+
17 | ++ |
+ #' td <- teal_data(iris = iris)+ |
+
18 | ++ |
+ #' td <- within(td, mtcars <- mtcars)+ |
+
19 | ++ |
+ #' datanames(td)+ |
+
20 | ++ |
+ #'+ |
+
21 | ++ |
+ #' datanames(td) <- c("iris", "mtcars")+ |
+
22 | ++ |
+ #' datanames(td)+ |
+
23 | ++ |
+ #'+ |
+
24 | ++ |
+ #' @name datanames+ |
+
25 | ++ |
+ #' @aliases datanames,teal_data-method+ |
+
26 | ++ |
+ #' @aliases datanames<-,teal_data,character-method+ |
+
27 | ++ |
+ #' @aliases datanames,qenv.error-method+ |
+
28 | ++ |
+ #' @aliases datanames<-,qenv.error,character-method+ |
+
29 | ++ | + + | +
30 | ++ |
+ #' @rdname datanames+ |
+
31 | ++ |
+ #' @export+ |
+
32 | +6x | +
+ setGeneric("datanames", function(x) standardGeneric("datanames"))+ |
+
33 | ++ |
+ setMethod("datanames", signature = "teal_data", definition = function(x) {+ |
+
34 | +4x | +
+ x@datanames+ |
+
35 | ++ |
+ })+ |
+
36 | ++ |
+ setMethod("datanames", signature = "qenv.error", definition = function(x) {+ |
+
37 | +2x | +
+ NULL+ |
+
38 | ++ |
+ })+ |
+
39 | ++ | + + | +
40 | ++ |
+ #' @rdname datanames+ |
+
41 | ++ |
+ #' @export+ |
+
42 | +4x | +
+ setGeneric("datanames<-", function(x, value) standardGeneric("datanames<-"))+ |
+
43 | ++ |
+ setMethod("datanames<-", signature = c("teal_data", "character"), definition = function(x, value) {+ |
+
44 | +3x | +
+ checkmate::assert_subset(value, names(x@env))+ |
+
45 | +2x | +
+ x@datanames <- value+ |
+
46 | +2x | +
+ methods::validObject(x)+ |
+
47 | +2x | +
+ x+ |
+
48 | ++ |
+ })+ |
+
49 | ++ |
+ setMethod("datanames<-", signature = c("qenv.error", "character"), definition = function(x, value) {+ |
+
50 | +1x | +
+ methods::validObject(x)+ |
+
51 | +1x | +
+ x+ |
+
52 | ++ |
+ })+ |
+
1 | ++ |
+ #' Get code from `teal_data` object+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' Retrieve code from `teal_data` object.+ |
+
4 | ++ |
+ #'+ |
+
5 | ++ |
+ #' Retrieve code stored in `@code`, which (in principle) can be used to recreate all objects found in `@env`.+ |
+
6 | ++ |
+ #' Use `datanames` to limit the code to one or more of the datasets enumerated in `@datanames`.+ |
+
7 | ++ |
+ #' If the code has not passed verification (with [`verify()`]), a warning will be prepended.+ |
+
8 | ++ |
+ #'+ |
+
9 | ++ |
+ #' @section Extracting dataset-specific code:+ |
+
10 | ++ |
+ #' When `datanames` is specified, the code returned will be limited to the lines needed to _create_+ |
+
11 | ++ |
+ #' the requested datasets. The code stored in the `@code` slot is analyzed statically to determine+ |
+
12 | ++ |
+ #' which lines the datasets of interest depend upon. The analysis works well when objects are created+ |
+
13 | ++ |
+ #' with standard infix assignment operators (see `?assignOps`) but it can fail in some situations.+ |
+
14 | ++ |
+ #'+ |
+
15 | ++ |
+ #' Consider the following examples:+ |
+
16 | ++ |
+ #'+ |
+
17 | ++ |
+ #' _Case 1: Usual assignments._+ |
+
18 | ++ |
+ #' ```r+ |
+
19 | ++ |
+ #' data <- teal_data() |>+ |
+
20 | ++ |
+ #' within({+ |
+
21 | ++ |
+ #' foo <- function(x) {+ |
+
22 | ++ |
+ #' x + 1+ |
+
23 | ++ |
+ #' }+ |
+
24 | ++ |
+ #' x <- 0+ |
+
25 | ++ |
+ #' y <- foo(x)+ |
+
26 | ++ |
+ #' })+ |
+
27 | ++ |
+ #' get_code(data, datanames = "y")+ |
+
28 | ++ |
+ #' ```+ |
+
29 | ++ |
+ #' `x` has no dependencies, so `get_code(data, datanames = "x")` will return only the second call.\cr+ |
+
30 | ++ |
+ #' `y` depends on `x` and `foo`, so `get_code(data, datanames = "y")` will contain all three calls.+ |
+
31 | ++ |
+ #'+ |
+
32 | ++ |
+ #' _Case 2: Some objects are created by a function's side effects._+ |
+
33 | ++ |
+ #' ```r+ |
+
34 | ++ |
+ #' data <- teal_data() |>+ |
+
35 | ++ |
+ #' within({+ |
+
36 | ++ |
+ #' foo <- function() {+ |
+
37 | ++ |
+ #' x <<- x + 1+ |
+
38 | ++ |
+ #' }+ |
+
39 | ++ |
+ #' x <- 0+ |
+
40 | ++ |
+ #' foo()+ |
+
41 | ++ |
+ #' y <- x+ |
+
42 | ++ |
+ #' })+ |
+
43 | ++ |
+ #' get_code(data, datanames = "y")+ |
+
44 | ++ |
+ #' ```+ |
+
45 | ++ |
+ #' Here, `y` depends on `x` but `x` is modified by `foo` as a side effect (not by reassignment)+ |
+
46 | ++ |
+ #' and so `get_code(data, datanames = "y")` will not return the `foo()` call.\cr+ |
+
47 | ++ |
+ #' To overcome this limitation, code dependencies can be specified manually.+ |
+
48 | ++ |
+ #' Lines where side effects occur can be flagged by adding "`# @linksto <object name>`" at the end.\cr+ |
+
49 | ++ |
+ #' Note that `within` evaluates code passed to `expr` as is and comments are ignored.+ |
+
50 | ++ |
+ #' In order to include comments in code one must use the `eval_code` function instead.+ |
+
51 | ++ |
+ #'+ |
+
52 | ++ |
+ #' ```r+ |
+
53 | ++ |
+ #' data <- teal_data() |>+ |
+
54 | ++ |
+ #' eval_code("+ |
+
55 | ++ |
+ #' foo <- function() {+ |
+
56 | ++ |
+ #' x <<- x + 1+ |
+
57 | ++ |
+ #' }+ |
+
58 | ++ |
+ #' x <- 0+ |
+
59 | ++ |
+ #' foo() # @linksto x+ |
+
60 | ++ |
+ #' y <- x+ |
+
61 | ++ |
+ #' ")+ |
+
62 | ++ |
+ #' get_code(data, datanames = "y")+ |
+
63 | ++ |
+ #' ```+ |
+
64 | ++ |
+ #' Now the `foo()` call will be properly included in the code required to recreate `y`.+ |
+
65 | ++ |
+ #'+ |
+
66 | ++ |
+ #' Note that two functions that create objects as side effects, `assign` and `data`, are handled automatically.+ |
+
67 | ++ |
+ #'+ |
+
68 | ++ |
+ #' Here are known cases where manual tagging is necessary:+ |
+
69 | ++ |
+ #' - non-standard assignment operators, _e.g._ `%<>%`+ |
+
70 | ++ |
+ #' - objects used as conditions in `if` statements: `if (<condition>)`+ |
+
71 | ++ |
+ #' - objects used to iterate over in `for` loops: `for(i in <sequence>)`+ |
+
72 | ++ |
+ #' - creating and evaluating language objects, _e.g._ `eval(<call>)`+ |
+
73 | ++ |
+ #'+ |
+
74 | ++ |
+ #'+ |
+
75 | ++ |
+ #' @param object (`teal_data`)+ |
+
76 | ++ |
+ #' @param datanames `r lifecycle::badge("experimental")` (`character`) vector of dataset names to return the code for.+ |
+
77 | ++ |
+ #' For more details see the "Extracting dataset-specific code" section.+ |
+
78 | ++ |
+ #' @param deparse (`logical`) flag specifying whether to return code as `character` (`deparse = TRUE`) or as+ |
+
79 | ++ |
+ #' `expression` (`deparse = FALSE`).+ |
+
80 | ++ |
+ #' @param ... Parameters passed to internal methods. Currently, the only supported parameter is `check_names`+ |
+
81 | ++ |
+ #' (`logical(1)`) flag, which is `TRUE` by default. Function warns about missing objects, if they do not exist in+ |
+
82 | ++ |
+ #' `code` but are passed in `datanames`. To remove the warning, set `check_names = FALSE`.+ |
+
83 | ++ |
+ #'+ |
+
84 | ++ |
+ #' @return+ |
+
85 | ++ |
+ #' Either a character string or an expression. If `datanames` is used to request a specific dataset,+ |
+
86 | ++ |
+ #' only code that _creates_ that dataset (not code that uses it) is returned. Otherwise, all contents of `@code`.+ |
+
87 | ++ |
+ #'+ |
+
88 | ++ |
+ #' @examples+ |
+
89 | ++ |
+ #' tdata1 <- teal_data()+ |
+
90 | ++ |
+ #' tdata1 <- within(tdata1, {+ |
+
91 | ++ |
+ #' a <- 1+ |
+
92 | ++ |
+ #' b <- a^5+ |
+
93 | ++ |
+ #' c <- list(x = 2)+ |
+
94 | ++ |
+ #' })+ |
+
95 | ++ |
+ #' get_code(tdata1)+ |
+
96 | ++ |
+ #' get_code(tdata1, datanames = "a")+ |
+
97 | ++ |
+ #' get_code(tdata1, datanames = "b")+ |
+
98 | ++ |
+ #'+ |
+
99 | ++ |
+ #' tdata2 <- teal_data(x1 = iris, code = "x1 <- iris")+ |
+
100 | ++ |
+ #' get_code(tdata2)+ |
+
101 | ++ |
+ #' get_code(verify(tdata2))+ |
+
102 | ++ |
+ #'+ |
+
103 | ++ |
+ #' @rdname get_code+ |
+
104 | ++ |
+ #' @aliases get_code,teal_data-method+ |
+
105 | ++ |
+ #'+ |
+
106 | ++ |
+ #' @export+ |
+
107 | ++ |
+ setMethod("get_code", signature = "teal_data", definition = function(object, deparse = TRUE, datanames = NULL, ...) {+ |
+
108 | +53x | +
+ checkmate::assert_character(datanames, min.len = 1L, null.ok = TRUE)+ |
+
109 | +53x | +
+ checkmate::assert_flag(deparse)+ |
+
110 | ++ | + + | +
111 | +53x | +
+ code <- if (!is.null(datanames)) {+ |
+
112 | +49x | +
+ get_code_dependency(object@code, datanames, ...)+ |
+
113 | ++ |
+ } else {+ |
+
114 | +4x | +
+ object@code+ |
+
115 | ++ |
+ }+ |
+
116 | ++ | + + | +
117 | +53x | +
+ if (!object@verified) {+ |
+
118 | +11x | +
+ code <- c("warning('Code was not verified for reproducibility.')", code)+ |
+
119 | ++ |
+ }+ |
+
120 | ++ | + + | +
121 | +53x | +
+ if (deparse) {+ |
+
122 | +52x | +
+ if (length(code) == 0) {+ |
+
123 | +1x | +
+ code+ |
+
124 | ++ |
+ } else {+ |
+
125 | +51x | +
+ paste(code, collapse = "\n")+ |
+
126 | ++ |
+ }+ |
+
127 | ++ |
+ } else {+ |
+
128 | +1x | +
+ parse(text = paste(c("{", code, "}"), collapse = "\n"), keep.source = TRUE)+ |
+
129 | ++ |
+ }+ |
+
130 | ++ |
+ })+ |
+
1 | ++ |
+ #' Check Compatibility of keys+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' Helper function to assert if two key sets contain incompatible keys.+ |
+
4 | ++ |
+ #'+ |
+
5 | ++ |
+ #' @return Returns `TRUE` if successful, otherwise raises error.+ |
+
6 | ++ |
+ #' @keywords internal+ |
+
7 | ++ |
+ assert_compatible_keys <- function(join_key_1, join_key_2) {+ |
+
8 | +3x | +
+ stop_message <- function(dataset_1, dataset_2) {+ |
+
9 | +1x | +
+ stop(+ |
+
10 | +1x | +
+ paste("cannot specify multiple different join keys between datasets:", dataset_1, "and", dataset_2)+ |
+
11 | ++ |
+ )+ |
+
12 | ++ |
+ }+ |
+
13 | ++ | + + | +
14 | +3x | +
+ dataset_1_one <- names(join_key_1)+ |
+
15 | +3x | +
+ dataset_2_one <- names(join_key_1[[1]])+ |
+
16 | +3x | +
+ keys_one <- join_key_1[[1]][[1]]+ |
+
17 | ++ | + + | +
18 | +3x | +
+ dataset_1_two <- names(join_key_2)+ |
+
19 | +3x | +
+ dataset_2_two <- names(join_key_2[[1]])+ |
+
20 | +3x | +
+ keys_two <- join_key_2[[1]][[1]]+ |
+
21 | ++ | + + | +
22 | ++ |
+ # if first datasets and the second datasets match and keys+ |
+
23 | ++ |
+ # must contain the same named elements+ |
+
24 | +3x | +
+ if (dataset_1_one == dataset_1_two && dataset_2_one == dataset_2_two) {+ |
+
25 | +3x | +
+ if (!identical(sort(keys_one), sort(keys_two))) {+ |
+
26 | +1x | +
+ stop_message(dataset_1_one, dataset_2_one)+ |
+
27 | ++ |
+ }+ |
+
28 | ++ |
+ }+ |
+
29 | ++ | + + | +
30 | ++ |
+ # if first dataset of join_key_1 matches second dataset of join_key_2+ |
+
31 | ++ |
+ # and the first dataset of join_key_2 must match second dataset of join_key_1+ |
+
32 | ++ |
+ # and keys must contain the same elements but with names and values swapped+ |
+
33 | +2x | +
+ if (dataset_1_one == dataset_2_two && dataset_2_one == dataset_1_two) {+ |
+
34 | ++ |
+ if (+ |
+
35 | +! | +
+ xor(length(keys_one) == 0, length(keys_two) == 0) ||+ |
+
36 | +! | +
+ !identical(sort(keys_one), sort(stats::setNames(names(keys_two), keys_two)))+ |
+
37 | ++ |
+ ) {+ |
+
38 | +! | +
+ stop_message(dataset_1_one, dataset_2_one)+ |
+
39 | ++ |
+ }+ |
+
40 | ++ |
+ }+ |
+
41 | ++ | + + | +
42 | ++ |
+ # otherwise they are compatible+ |
+
43 | +2x | +
+ return(TRUE)+ |
+
44 | ++ |
+ }+ |
+
45 | ++ | + + | +
46 | ++ |
+ #' Validate parent-child key+ |
+
47 | ++ |
+ #'+ |
+
48 | ++ |
+ #' Helper function checks the parent-child relations are valid.+ |
+
49 | ++ |
+ #'+ |
+
50 | ++ |
+ #' @param x (`join_keys`) object to assert validity of relations+ |
+
51 | ++ |
+ #'+ |
+
52 | ++ |
+ #' @return `join_keys` invisibly+ |
+
53 | ++ |
+ #'+ |
+
54 | ++ |
+ #' @keywords internal+ |
+
55 | ++ |
+ assert_parent_child <- function(x) {+ |
+
56 | +431x | +
+ jk <- join_keys(x)+ |
+
57 | +431x | +
+ jk_parents <- parents(jk)+ |
+
58 | ++ | + + | +
59 | +431x | +
+ checkmate::assert_class(jk, c("join_keys", "list"))+ |
+
60 | ++ | + + | +
61 | +431x | +
+ if (!is.null(jk_parents)) {+ |
+
62 | +431x | +
+ for (idx1 in seq_along(jk_parents)) {+ |
+
63 | +170x | +
+ name_from <- names(jk_parents)[[idx1]]+ |
+
64 | +170x | +
+ for (idx2 in seq_along(jk_parents[[idx1]])) {+ |
+
65 | +170x | +
+ name_to <- jk_parents[[idx1]][[idx2]]+ |
+
66 | +170x | +
+ keys_from <- jk[[name_from]][[name_to]]+ |
+
67 | +170x | +
+ keys_to <- jk[[name_to]][[name_from]]+ |
+
68 | +170x | +
+ if (length(keys_from) == 0 && length(keys_to) == 0) {+ |
+
69 | +1x | +
+ stop(sprintf("No join keys from %s to its parent (%s) and vice versa", name_from, name_to))+ |
+
70 | ++ |
+ }+ |
+
71 | ++ |
+ }+ |
+
72 | ++ |
+ }+ |
+
73 | ++ |
+ }+ |
+
74 | +430x | +
+ invisible(x)+ |
+
75 | ++ |
+ }+ |
+
76 | ++ | + + | +
77 | ++ |
+ #' Verify key set compatibility+ |
+
78 | ++ |
+ #'+ |
+
79 | ++ |
+ #' Helper function to ensuring compatibility between two sets of keys+ |
+
80 | ++ |
+ #'+ |
+
81 | ++ |
+ #' @return Returns `TRUE` if successful, otherwise raises error.+ |
+
82 | ++ |
+ #' @keywords internal+ |
+
83 | ++ |
+ assert_compatible_keys2 <- function(x, y) {+ |
+
84 | ++ |
+ # Helper to flatten join_keys / join_key_set+ |
+
85 | +3x | +
+ flatten_join_key_sets <- function(value) {+ |
+
86 | +6x | +
+ value <- unclass(value)+ |
+
87 | +6x | +
+ Reduce(+ |
+
88 | +6x | +
+ init = list(),+ |
+
89 | +6x | +
+ f = function(u, v, ...) {+ |
+
90 | +6x | +
+ el <- value[v][[1]]+ |
+
91 | +6x | +
+ res <- lapply(seq_along(el), function(ix) el[ix])+ |
+
92 | +6x | +
+ names(res) <- rep(v, length(res))+ |
+
93 | +6x | +
+ append(u, res)+ |
+
94 | ++ |
+ },+ |
+
95 | +6x | +
+ x = names(value)+ |
+
96 | ++ |
+ )+ |
+
97 | ++ |
+ }+ |
+
98 | ++ | + + | +
99 | +3x | +
+ x <- flatten_join_key_sets(x)+ |
+
100 | +3x | +
+ y <- flatten_join_key_sets(y)+ |
+
101 | ++ | + + | +
102 | +3x | +
+ for (idx_1 in seq_along(x)) {+ |
+
103 | +3x | +
+ for (idx_2 in seq_along(y)) {+ |
+
104 | +3x | +
+ assert_compatible_keys(x[idx_1], y[idx_2])+ |
+
105 | ++ |
+ }+ |
+
106 | ++ |
+ }+ |
+
107 | +2x | +
+ TRUE+ |
+
108 | ++ |
+ }+ |
+
109 | ++ | + + | +
110 | ++ |
+ #' Updates the keys of the datasets based on the parents+ |
+
111 | ++ |
+ #'+ |
+
112 | ++ |
+ #' @param x (`join_keys`) object to update the keys.+ |
+
113 | ++ |
+ #'+ |
+
114 | ++ |
+ #' @return (`self`) invisibly for chaining+ |
+
115 | ++ |
+ #'+ |
+
116 | ++ |
+ #' @keywords internal+ |
+
117 | ++ |
+ update_keys_given_parents <- function(x) {+ |
+
118 | +12x | +
+ jk <- join_keys(x)+ |
+
119 | ++ | + + | +
120 | +12x | +
+ checkmate::assert_class(jk, "join_keys", .var.name = checkmate::vname(x))+ |
+
121 | ++ | + + | +
122 | +12x | +
+ datanames <- names(jk)+ |
+
123 | +12x | +
+ for (d1_ix in seq_along(datanames)) {+ |
+
124 | +34x | +
+ d1 <- datanames[[d1_ix]]+ |
+
125 | +34x | +
+ d1_parent <- parent(jk, d1)+ |
+
126 | +34x | +
+ for (d2 in datanames[-1 * seq.int(d1_ix)]) {+ |
+
127 | +38x | +
+ if (length(jk[[d1]][[d2]]) == 0) {+ |
+
128 | +16x | +
+ d2_parent <- parent(jk, d2)+ |
+
129 | ++ | + + | +
130 | +12x | +
+ if (!identical(d1_parent, d2_parent) || length(d1_parent) == 0) next+ |
+
131 | ++ | + + | +
132 | ++ |
+ # both has the same parent -> common keys to parent+ |
+
133 | +4x | +
+ keys_d1_parent <- sort(jk[[d1]][[d1_parent]])+ |
+
134 | +4x | +
+ keys_d2_parent <- sort(jk[[d2]][[d2_parent]])+ |
+
135 | ++ | + + | +
136 | +4x | +
+ common_ix_1 <- unname(keys_d1_parent) %in% unname(keys_d2_parent)+ |
+
137 | +4x | +
+ common_ix_2 <- unname(keys_d2_parent) %in% unname(keys_d1_parent)+ |
+
138 | ++ | + + | +
139 | ++ |
+ # No common keys between datasets - leave empty+ |
+
140 | +1x | +
+ if (all(!common_ix_1)) next+ |
+
141 | ++ | + + | +
142 | +3x | +
+ fk <- structure(+ |
+
143 | +3x | +
+ names(keys_d2_parent)[common_ix_2],+ |
+
144 | +3x | +
+ names = names(keys_d1_parent)[common_ix_1]+ |
+
145 | ++ |
+ )+ |
+
146 | +3x | +
+ jk[[d1]][[d2]] <- fk # mutate join key+ |
+
147 | ++ |
+ }+ |
+
148 | ++ |
+ }+ |
+
149 | ++ |
+ }+ |
+
150 | ++ |
+ # check parent child relation+ |
+
151 | +12x | +
+ assert_parent_child(x = jk)+ |
+
152 | ++ | + + | +
153 | +12x | +
+ jk+ |
+
154 | ++ |
+ }+ |
+
1 | ++ |
+ setOldClass("join_keys")+ |
+
2 | ++ | + + | +
3 | ++ |
+ #' Reproducible data+ |
+
4 | ++ |
+ #'+ |
+
5 | ++ |
+ #' Reproducible data container class. Inherits code tracking behavior from [`teal.code::qenv-class`].+ |
+
6 | ++ |
+ #'+ |
+
7 | ++ |
+ #' This class provides an isolated environment in which to store and process data with all code being recorded.+ |
+
8 | ++ |
+ #' The environment, code, data set names, and data joining keys are stored in their respective slots.+ |
+
9 | ++ |
+ #' These slots should never be accessed directly, use the provided get/set functions.+ |
+
10 | ++ |
+ #'+ |
+
11 | ++ |
+ #' As code is evaluated in `teal_data`, messages and warnings are stored in their respective slots.+ |
+
12 | ++ |
+ #' If errors are raised, a `qenv.error` object is returned.+ |
+
13 | ++ |
+ #'+ |
+
14 | ++ |
+ #' @name teal_data-class+ |
+
15 | ++ |
+ #' @rdname teal_data-class+ |
+
16 | ++ |
+ #'+ |
+
17 | ++ |
+ #' @slot env (`environment`) environment containing data sets and possibly auxiliary variables.+ |
+
18 | ++ |
+ #' Access variables with [get_var()] or [`[[`].+ |
+
19 | ++ |
+ #' No setter provided. Evaluate code to add variables into `@env`.+ |
+
20 | ++ |
+ #' @slot code (`character`) vector representing code necessary to reproduce the contents of `@env`.+ |
+
21 | ++ |
+ #' Access with [get_code()].+ |
+
22 | ++ |
+ #' No setter provided. Evaluate code to append code to the slot.+ |
+
23 | ++ |
+ #' @slot id (`integer`) random identifier assigned to each element of `@code`. Used internally.+ |
+
24 | ++ |
+ #' @slot warnings (`character`) vector of warnings raised when evaluating code.+ |
+
25 | ++ |
+ #' Access with [get_warnings()].+ |
+
26 | ++ |
+ #' @slot messages (`character`) vector of messages raised when evaluating code.+ |
+
27 | ++ |
+ #' @slot join_keys (`join_keys`) object specifying joining keys for data sets in `@env`.+ |
+
28 | ++ |
+ #' Access or modify with [join_keys()].+ |
+
29 | ++ |
+ #' @slot datanames (`character`) vector of names of data sets in `@env`.+ |
+
30 | ++ |
+ #' Used internally to distinguish them from auxiliary variables.+ |
+
31 | ++ |
+ #' Access or modify with [datanames()].+ |
+
32 | ++ |
+ #' @slot verified (`logical(1)`) flag signifying that code in `@code` has been proven to yield contents of `@env`.+ |
+
33 | ++ |
+ #' Used internally. See [`verify()`] for more details.+ |
+
34 | ++ |
+ #'+ |
+
35 | ++ |
+ #' @import teal.code+ |
+
36 | ++ |
+ #' @keywords internal+ |
+
37 | ++ |
+ setClass(+ |
+
38 | ++ |
+ Class = "teal_data",+ |
+
39 | ++ |
+ contains = "qenv",+ |
+
40 | ++ |
+ slots = c(join_keys = "join_keys", datanames = "character", verified = "logical"),+ |
+
41 | ++ |
+ prototype = list(+ |
+
42 | ++ |
+ join_keys = join_keys(),+ |
+
43 | ++ |
+ datanames = character(0),+ |
+
44 | ++ |
+ verified = logical(0)+ |
+
45 | ++ |
+ )+ |
+
46 | ++ |
+ )+ |
+
47 | ++ | + + | +
48 | ++ |
+ #' Initialize `teal_data` object+ |
+
49 | ++ |
+ #'+ |
+
50 | ++ |
+ #' @name new_teal_data+ |
+
51 | ++ |
+ #'+ |
+
52 | ++ |
+ #' @param data (`named list`) of data objects.+ |
+
53 | ++ |
+ #' @param code (`character` or `language`) code to reproduce the `data`.+ |
+
54 | ++ |
+ #' Accepts and stores comments also.+ |
+
55 | ++ |
+ #' @param join_keys (`join_keys`) object+ |
+
56 | ++ |
+ #' @param datanames (`character`) names of datasets passed to `data`.+ |
+
57 | ++ |
+ #' Needed when non-dataset objects are needed in the `env` slot.+ |
+
58 | ++ |
+ #' @rdname new_teal_data+ |
+
59 | ++ |
+ #' @keywords internal+ |
+
60 | ++ |
+ new_teal_data <- function(data,+ |
+
61 | ++ |
+ code = character(0),+ |
+
62 | ++ |
+ join_keys = join_keys(),+ |
+
63 | ++ |
+ datanames = union(names(data), names(join_keys))) {+ |
+
64 | +80x | +
+ checkmate::assert_list(data)+ |
+
65 | +80x | +
+ checkmate::assert_class(join_keys, "join_keys")+ |
+
66 | +49x | +
+ if (is.null(datanames)) datanames <- character(0) # todo: allow to specify+ |
+
67 | +80x | +
+ checkmate::assert_character(datanames)+ |
+
68 | +80x | +
+ if (!any(is.language(code), is.character(code))) {+ |
+
69 | +! | +
+ stop("`code` must be a character or language object.")+ |
+
70 | ++ |
+ }+ |
+
71 | ++ | + + | +
72 | +80x | +
+ if (is.language(code)) {+ |
+
73 | +2x | +
+ code <- paste(lang2calls(code), collapse = "\n")+ |
+
74 | ++ |
+ }+ |
+
75 | +80x | +
+ if (length(code)) {+ |
+
76 | +13x | +
+ code <- paste(code, collapse = "\n")+ |
+
77 | ++ |
+ }+ |
+
78 | +80x | +
+ verified <- (length(code) == 0L && length(data) == 0L)+ |
+
79 | ++ | + + | +
80 | +80x | +
+ id <- sample.int(.Machine$integer.max, size = length(code))+ |
+
81 | ++ | + + | +
82 | +80x | +
+ new_env <- rlang::env_clone(list2env(data), parent = parent.env(.GlobalEnv))+ |
+
83 | +80x | +
+ lockEnvironment(new_env, bindings = TRUE)+ |
+
84 | ++ | + + | +
85 | +80x | +
+ methods::new(+ |
+
86 | +80x | +
+ "teal_data",+ |
+
87 | +80x | +
+ env = new_env,+ |
+
88 | +80x | +
+ code = code,+ |
+
89 | +80x | +
+ warnings = rep("", length(code)),+ |
+
90 | +80x | +
+ messages = rep("", length(code)),+ |
+
91 | +80x | +
+ id = id,+ |
+
92 | +80x | +
+ join_keys = join_keys,+ |
+
93 | +80x | +
+ datanames = datanames,+ |
+
94 | +80x | +
+ verified = verified+ |
+
95 | ++ |
+ )+ |
+
96 | ++ |
+ }+ |
+
1 | ++ |
+ #' Verify code reproducibility+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' Checks whether code in `teal_data` object reproduces the stored objects.+ |
+
4 | ++ |
+ #'+ |
+
5 | ++ |
+ #' If objects created by code in the `@code` slot of `x` are `all_equal` to the contents of the `@env` slot,+ |
+
6 | ++ |
+ #' the function updates the `@verified` slot to `TRUE` in the returned `teal_data` object.+ |
+
7 | ++ |
+ #' Once verified, the slot will always be set to `TRUE`.+ |
+
8 | ++ |
+ #' If the `@code` fails to recreate objects in `teal_data@env`, an error is raised.+ |
+
9 | ++ |
+ #'+ |
+
10 | ++ |
+ #' @return Input `teal_data` object or error.+ |
+
11 | ++ |
+ #'+ |
+
12 | ++ |
+ #' @param x `teal_data` object+ |
+
13 | ++ |
+ #' @examples+ |
+
14 | ++ |
+ #' tdata1 <- teal_data()+ |
+
15 | ++ |
+ #' tdata1 <- within(tdata1, {+ |
+
16 | ++ |
+ #' a <- 1+ |
+
17 | ++ |
+ #' b <- a^5+ |
+
18 | ++ |
+ #' c <- list(x = 2)+ |
+
19 | ++ |
+ #' })+ |
+
20 | ++ |
+ #' verify(tdata1)+ |
+
21 | ++ |
+ #'+ |
+
22 | ++ |
+ #' tdata2 <- teal_data(x1 = iris, code = "x1 <- iris")+ |
+
23 | ++ |
+ #' verify(tdata2)+ |
+
24 | ++ |
+ #' verify(tdata2)@verified+ |
+
25 | ++ |
+ #' tdata2@verified+ |
+
26 | ++ |
+ #'+ |
+
27 | ++ |
+ #' tdata3 <- teal_data()+ |
+
28 | ++ |
+ #' tdata3 <- within(tdata3, {+ |
+
29 | ++ |
+ #' stop("error")+ |
+
30 | ++ |
+ #' })+ |
+
31 | ++ |
+ #' try(verify(tdata3)) # fails+ |
+
32 | ++ |
+ #'+ |
+
33 | ++ |
+ #'+ |
+
34 | ++ |
+ #' a <- 1+ |
+
35 | ++ |
+ #' b <- a + 2+ |
+
36 | ++ |
+ #' c <- list(x = 2)+ |
+
37 | ++ |
+ #' d <- 5+ |
+
38 | ++ |
+ #' tdata4 <- teal_data(+ |
+
39 | ++ |
+ #' a = a, b = b, c = c, d = d,+ |
+
40 | ++ |
+ #' code = "a <- 1+ |
+
41 | ++ |
+ #' b <- a+ |
+
42 | ++ |
+ #' c <- list(x = 2)+ |
+
43 | ++ |
+ #' e <- 1"+ |
+
44 | ++ |
+ #' )+ |
+
45 | ++ |
+ #' tdata4+ |
+
46 | ++ |
+ #' \dontrun{+ |
+
47 | ++ |
+ #' verify(tdata4) # fails+ |
+
48 | ++ |
+ #' }+ |
+
49 | ++ |
+ #'+ |
+
50 | ++ |
+ #' @name verify+ |
+
51 | ++ |
+ #' @rdname verify+ |
+
52 | ++ |
+ #' @aliases verify,teal_data-method+ |
+
53 | ++ |
+ #' @aliases verify,qenv.error-method+ |
+
54 | ++ |
+ #'+ |
+
55 | ++ |
+ #' @export+ |
+
56 | +5x | +
+ setGeneric("verify", function(x) standardGeneric("verify"))+ |
+
57 | ++ |
+ setMethod("verify", signature = "teal_data", definition = function(x) {+ |
+
58 | +4x | +
+ if (x@verified) {+ |
+
59 | +2x | +
+ return(x)+ |
+
60 | ++ |
+ }+ |
+
61 | +2x | +
+ x_name <- deparse(substitute(x))+ |
+
62 | +2x | +
+ y <- eval_code(teal_data(), get_code(x))+ |
+
63 | ++ | + + | +
64 | +2x | +
+ if (inherits(y, "qenv.error")) {+ |
+
65 | +! | +
+ stop(conditionMessage(y), call. = FALSE)+ |
+
66 | ++ |
+ }+ |
+
67 | ++ | + + | +
68 | +2x | +
+ reproduced <- isTRUE(all.equal(x@env, y@env))+ |
+
69 | +2x | +
+ if (reproduced) {+ |
+
70 | +1x | +
+ x@verified <- TRUE+ |
+
71 | +1x | +
+ methods::validObject(x)+ |
+
72 | +1x | +
+ x+ |
+
73 | ++ |
+ } else {+ |
+
74 | +1x | +
+ error <- "Code verification failed."+ |
+
75 | ++ | + + | +
76 | +1x | +
+ objects_diff <- vapply(+ |
+
77 | +1x | +
+ intersect(names(x@env), names(y@env)),+ |
+
78 | +1x | +
+ function(element) {+ |
+
79 | +1x | +
+ isTRUE(all.equal(x@env[[element]], y@env[[element]]))+ |
+
80 | ++ |
+ },+ |
+
81 | +1x | +
+ logical(1)+ |
+
82 | ++ |
+ )+ |
+
83 | ++ | + + | +
84 | +1x | +
+ names_diff_other <- setdiff(names(y@env), names(x@env))+ |
+
85 | +1x | +
+ names_diff_inenv <- setdiff(names(x@env), names(y@env))+ |
+
86 | ++ | + + | +
87 | +1x | +
+ if (length(objects_diff)) {+ |
+
88 | +1x | +
+ error <- c(+ |
+
89 | +1x | +
+ error,+ |
+
90 | +1x | +
+ paste0("Object(s) recreated with code that have different structure in ", x_name, ":"),+ |
+
91 | +1x | +
+ paste0(" \u2022 ", names(which(!objects_diff)))+ |
+
92 | ++ |
+ )+ |
+
93 | ++ |
+ }+ |
+
94 | +1x | +
+ if (length(names_diff_inenv)) {+ |
+
95 | +! | +
+ error <- c(+ |
+
96 | +! | +
+ error,+ |
+
97 | +! | +
+ paste0("Object(s) not created with code that exist in ", x_name, ":"),+ |
+
98 | +! | +
+ paste0(" \u2022 ", names_diff_inenv)+ |
+
99 | ++ |
+ )+ |
+
100 | ++ |
+ }+ |
+
101 | +1x | +
+ if (length(names_diff_other)) {+ |
+
102 | +! | +
+ error <- c(+ |
+
103 | +! | +
+ error,+ |
+
104 | +! | +
+ paste0("Object(s) created with code that do not exist in ", x_name, ":"),+ |
+
105 | +! | +
+ paste0(" \u2022 ", names_diff_other)+ |
+
106 | ++ |
+ )+ |
+
107 | ++ |
+ }+ |
+
108 | ++ | + + | +
109 | +1x | +
+ stop(paste(error, collapse = "\n"), call. = FALSE)+ |
+
110 | ++ |
+ }+ |
+
111 | ++ |
+ })+ |
+
112 | ++ |
+ setMethod("verify", signature = "qenv.error", definition = function(x) {+ |
+
113 | +1x | +
+ stop(conditionMessage(x), call. = FALSE)+ |
+
114 | ++ |
+ })+ |
+
1 | ++ |
+ #' @rdname join_keys+ |
+
2 | ++ |
+ #' @order 4+ |
+
3 | ++ |
+ #' @export+ |
+
4 | ++ |
+ #' @examples+ |
+
5 | ++ |
+ #' # Merging multiple `join_keys` objects ---+ |
+
6 | ++ |
+ #'+ |
+
7 | ++ |
+ #' jk_merged <- c(+ |
+
8 | ++ |
+ #' jk,+ |
+
9 | ++ |
+ #' join_keys(+ |
+
10 | ++ |
+ #' join_key("ds4", keys = c("pk4", "pk4_2")),+ |
+
11 | ++ |
+ #' join_key("ds3", "ds4", c(pk3 = "pk4_2"))+ |
+
12 | ++ |
+ #' )+ |
+
13 | ++ |
+ #' )+ |
+
14 | ++ |
+ c.join_keys <- function(...) {+ |
+
15 | +125x | +
+ x <- rlang::list2(...)+ |
+
16 | +125x | +
+ checkmate::assert_list(x, types = c("join_keys", "join_key_set"))+ |
+
17 | ++ | + + | +
18 | +122x | +
+ Reduce(+ |
+
19 | +122x | +
+ init = join_keys(),+ |
+
20 | +122x | +
+ x = x,+ |
+
21 | +122x | +
+ f = function(.x, .y) {+ |
+
22 | +394x | +
+ out <- utils::modifyList(.x, .y, keep.null = FALSE)+ |
+
23 | +394x | +
+ parents(out) <- utils::modifyList(attr(.x, "parents"), attr(.y, "parents"), keep.null = FALSE)+ |
+
24 | +392x | +
+ out+ |
+
25 | ++ |
+ }+ |
+
26 | ++ |
+ )+ |
+
27 | ++ |
+ }+ |
+
28 | ++ | + + | +
29 | ++ |
+ #' @rdname join_keys+ |
+
30 | ++ |
+ #' @order 4+ |
+
31 | ++ |
+ #' @export+ |
+
32 | ++ |
+ #' @examples+ |
+
33 | ++ |
+ #' # note: merge can be performed with both join_keys and join_key_set+ |
+
34 | ++ |
+ #'+ |
+
35 | ++ |
+ #' jk_merged <- c(+ |
+
36 | ++ |
+ #' jk_merged,+ |
+
37 | ++ |
+ #' join_key("ds5", keys = "pk5"),+ |
+
38 | ++ |
+ #' join_key("ds1", "ds5", c(pk1 = "pk5"))+ |
+
39 | ++ |
+ #' )+ |
+
40 | ++ |
+ c.join_key_set <- function(...) {+ |
+
41 | +2x | +
+ c.join_keys(...)+ |
+
42 | ++ |
+ }+ |
+
1 | ++ |
+ #' Manage relationships between datasets using `join_keys`+ |
+
2 | ++ |
+ #' @order 1+ |
+
3 | ++ |
+ #' @name join_keys+ |
+
4 | ++ |
+ #'+ |
+
5 | ++ |
+ #' @usage ## Constructor, getter and setter+ |
+
6 | ++ |
+ #' join_keys(...)+ |
+
7 | ++ |
+ #'+ |
+
8 | ++ |
+ #' @description+ |
+
9 | ++ |
+ #' Facilitates the creation and retrieval of relationships between datasets.+ |
+
10 | ++ |
+ #' `join_keys` class extends `list` and contains keys connecting pairs of datasets.+ |
+
11 | ++ |
+ #' Each element of the list contains keys for specific dataset.+ |
+
12 | ++ |
+ #' Each dataset can have a relationship with itself (primary key) and with other datasets.+ |
+
13 | ++ |
+ #'+ |
+
14 | ++ |
+ #' Note that `join_keys` list is symmetrical and assumes a default direction, that is:+ |
+
15 | ++ |
+ #' when keys are set between `ds1` and `ds2`, it defines `ds1` as the parent+ |
+
16 | ++ |
+ #' in a parent-child relationship and the mapping is automatically mirrored between+ |
+
17 | ++ |
+ #' `ds2` and `ds1`.+ |
+
18 | ++ |
+ #'+ |
+
19 | ++ |
+ #' @section Methods (by class):+ |
+
20 | ++ |
+ #' - `join_keys()`: Returns an empty `join_keys` object when called without arguments.+ |
+
21 | ++ |
+ #' - `join_keys(join_keys)`: Returns itself.+ |
+
22 | ++ |
+ #' - `join_keys(teal_data)`: Returns the `join_keys` object contained in `teal_data` object.+ |
+
23 | ++ |
+ #' - `join_keys(...)`: Creates a new object with one or more `join_key_set` parameters.+ |
+
24 | ++ |
+ #'+ |
+
25 | ++ |
+ #' @param ... optional,+ |
+
26 | ++ |
+ #' - either `teal_data` or `join_keys` object to extract `join_keys`+ |
+
27 | ++ |
+ #' - or any number of `join_key_set` objects to create `join_keys`+ |
+
28 | ++ |
+ #' - or nothing to create an empty `join_keys`+ |
+
29 | ++ |
+ #' @param value For `x[i, j, directed = TRUE)] <- value` (named/unnamed `character`)+ |
+
30 | ++ |
+ #' Column mapping between datasets.+ |
+
31 | ++ |
+ #'+ |
+
32 | ++ |
+ #' For `join_keys(x) <- value`: (`join_key_set` or list of `join_key_set`) relationship+ |
+
33 | ++ |
+ #' pairs to add to `join_keys` list.+ |
+
34 | ++ |
+ #'+ |
+
35 | ++ |
+ #'+ |
+
36 | ++ |
+ #' @return `join_keys` object.+ |
+
37 | ++ |
+ #'+ |
+
38 | ++ |
+ #' @examples+ |
+
39 | ++ |
+ #' # Creating a new join keys ----+ |
+
40 | ++ |
+ #'+ |
+
41 | ++ |
+ #' jk <- join_keys(+ |
+
42 | ++ |
+ #' join_key("ds1", "ds1", "pk1"),+ |
+
43 | ++ |
+ #' join_key("ds2", "ds2", "pk2"),+ |
+
44 | ++ |
+ #' join_key("ds3", "ds3", "pk3"),+ |
+
45 | ++ |
+ #' join_key("ds1", "ds2", c(pk1 = "pk2")),+ |
+
46 | ++ |
+ #' join_key("ds1", "ds3", c(pk1 = "pk3"))+ |
+
47 | ++ |
+ #' )+ |
+
48 | ++ |
+ #'+ |
+
49 | ++ |
+ #' jk+ |
+
50 | ++ |
+ #'+ |
+
51 | ++ |
+ #' @export+ |
+
52 | ++ |
+ #'+ |
+
53 | ++ |
+ #' @seealso [join_key()] for creating `join_keys_set`,+ |
+
54 | ++ |
+ #' [parents()] for parent operations,+ |
+
55 | ++ |
+ #' [teal_data()] for `teal_data` constructor _and_+ |
+
56 | ++ |
+ #' [default_cdisc_join_keys] for default CDISC keys.+ |
+
57 | ++ |
+ #'+ |
+
58 | ++ |
+ join_keys <- function(...) {+ |
+
59 | +790x | +
+ if (missing(...)) {+ |
+
60 | +228x | +
+ return(new_join_keys())+ |
+
61 | ++ |
+ }+ |
+
62 | +562x | +
+ x <- rlang::list2(...)+ |
+
63 | +562x | +
+ if (length(x) == 1L) {+ |
+
64 | +497x | +
+ UseMethod("join_keys", x[[1]])+ |
+
65 | ++ |
+ } else {+ |
+
66 | +65x | +
+ join_keys.default(...)+ |
+
67 | ++ |
+ }+ |
+
68 | ++ |
+ }+ |
+
69 | ++ | + + | +
70 | ++ |
+ #' @rdname join_keys+ |
+
71 | ++ |
+ #' @order 1+ |
+
72 | ++ |
+ #' @export+ |
+
73 | ++ |
+ join_keys.default <- function(...) {+ |
+
74 | +110x | +
+ c(new_join_keys(), ...)+ |
+
75 | ++ |
+ }+ |
+
76 | ++ | + + | +
77 | ++ |
+ #' @rdname join_keys+ |
+
78 | ++ |
+ #' @order 1+ |
+
79 | ++ |
+ #' @export+ |
+
80 | ++ |
+ join_keys.join_keys <- function(...) {+ |
+
81 | +447x | +
+ x <- rlang::list2(...)+ |
+
82 | +447x | +
+ x[[1]]+ |
+
83 | ++ |
+ }+ |
+
84 | ++ | + + | +
85 | ++ |
+ #' @rdname join_keys+ |
+
86 | ++ |
+ #' @order 1+ |
+
87 | ++ |
+ #' @export+ |
+
88 | ++ |
+ join_keys.teal_data <- function(...) {+ |
+
89 | +5x | +
+ x <- rlang::list2(...)+ |
+
90 | +5x | +
+ x[[1]]@join_keys+ |
+
91 | ++ |
+ }+ |
+
92 | ++ | + + | +
93 | ++ |
+ #' @rdname join_keys+ |
+
94 | ++ |
+ #' @order 5+ |
+
95 | ++ |
+ #'+ |
+
96 | ++ |
+ #' @section Functions:+ |
+
97 | ++ |
+ #' - `join_keys(x) <- value`: Assignment of the `join_keys` in object with `value`.+ |
+
98 | ++ |
+ #' `value` needs to be an object of class `join_keys` or `join_key_set`.+ |
+
99 | ++ |
+ #'+ |
+
100 | ++ |
+ #' @param x (`join_keys`) empty object to set the new relationship pairs.+ |
+
101 | ++ |
+ #' `x` is typically an object of `join_keys` class. When called with the `join_keys(x)`+ |
+
102 | ++ |
+ #' or `join_keys(x) <- value` then it can also take a supported class (`teal_data`, `join_keys`)+ |
+
103 | ++ |
+ #'+ |
+
104 | ++ |
+ #' @export+ |
+
105 | ++ |
+ `join_keys<-` <- function(x, value) {+ |
+
106 | +8x | +
+ checkmate::assert_class(value, classes = c("join_keys", "list"))+ |
+
107 | +8x | +
+ UseMethod("join_keys<-", x)+ |
+
108 | ++ |
+ }+ |
+
109 | ++ | + + | +
110 | ++ |
+ #' @rdname join_keys+ |
+
111 | ++ |
+ #' @order 5+ |
+
112 | ++ |
+ #' @export+ |
+
113 | ++ |
+ #' @examples+ |
+
114 | ++ |
+ #' # Assigning keys via join_keys(x)[i, j] <- value ----+ |
+
115 | ++ |
+ #'+ |
+
116 | ++ |
+ #' obj <- join_keys()+ |
+
117 | ++ |
+ #' # or+ |
+
118 | ++ |
+ #' obj <- teal_data()+ |
+
119 | ++ |
+ #'+ |
+
120 | ++ |
+ #' join_keys(obj)["ds1", "ds1"] <- "pk1"+ |
+
121 | ++ |
+ #' join_keys(obj)["ds2", "ds2"] <- "pk2"+ |
+
122 | ++ |
+ #' join_keys(obj)["ds3", "ds3"] <- "pk3"+ |
+
123 | ++ |
+ #' join_keys(obj)["ds1", "ds2"] <- c(pk1 = "pk2")+ |
+
124 | ++ |
+ #' join_keys(obj)["ds1", "ds3"] <- c(pk1 = "pk3")+ |
+
125 | ++ |
+ #'+ |
+
126 | ++ |
+ #' identical(jk, join_keys(obj))+ |
+
127 | ++ |
+ `join_keys<-.join_keys` <- function(x, value) {+ |
+
128 | +6x | +
+ value+ |
+
129 | ++ |
+ }+ |
+
130 | ++ | + + | +
131 | ++ |
+ #' @rdname join_keys+ |
+
132 | ++ |
+ #' @order 5+ |
+
133 | ++ |
+ #' @export+ |
+
134 | ++ |
+ #' @examples+ |
+
135 | ++ |
+ #' # Setter for join_keys within teal_data ----+ |
+
136 | ++ |
+ #'+ |
+
137 | ++ |
+ #' td <- teal_data()+ |
+
138 | ++ |
+ #' join_keys(td) <- jk+ |
+
139 | ++ |
+ #'+ |
+
140 | ++ |
+ #' join_keys(td)["ds1", "ds2"] <- "new_key"+ |
+
141 | ++ |
+ #' join_keys(td) <- c(join_keys(td), join_keys(join_key("ds3", "ds2", "key3")))+ |
+
142 | ++ |
+ #' join_keys(td)+ |
+
143 | ++ |
+ `join_keys<-.teal_data` <- function(x, value) {+ |
+
144 | +2x | +
+ join_keys(x@join_keys) <- value+ |
+
145 | +2x | +
+ x+ |
+
146 | ++ |
+ }+ |
+
147 | ++ | + + | +
148 | ++ |
+ #' Internal constructor+ |
+
149 | ++ |
+ #'+ |
+
150 | ++ |
+ #' @return an empty `join_keys` list+ |
+
151 | ++ |
+ #'+ |
+
152 | ++ |
+ #' @keywords internal+ |
+
153 | ++ |
+ new_join_keys <- function() {+ |
+
154 | +358x | +
+ structure(+ |
+
155 | +358x | +
+ list(),+ |
+
156 | +358x | +
+ class = c("join_keys", "list"),+ |
+
157 | +358x | +
+ "parents" = list()+ |
+
158 | ++ |
+ )+ |
+
159 | ++ |
+ }+ |
+
1 | ++ |
+ #' Comprehensive data integration function for `teal` applications+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' @description+ |
+
4 | ++ |
+ #' `r lifecycle::badge("stable")`+ |
+
5 | ++ |
+ #'+ |
+
6 | ++ |
+ #' Universal function to pass data to teal application.+ |
+
7 | ++ |
+ #'+ |
+
8 | ++ |
+ #' @param ... any number of objects (presumably data objects) provided as `name = value` pairs.+ |
+
9 | ++ |
+ #'+ |
+
10 | ++ |
+ #' @param join_keys (`join_keys` or single `join_key_set`)+ |
+
11 | ++ |
+ #' optional object with datasets column names used for joining.+ |
+
12 | ++ |
+ #' If empty then no joins between pairs of objects.+ |
+
13 | ++ |
+ #'+ |
+
14 | ++ |
+ #' @param code (`character`, `language`) optional code to reproduce the datasets provided in `...`.+ |
+
15 | ++ |
+ #' Note this code is not executed and the `teal_data` may not be reproducible+ |
+
16 | ++ |
+ #'+ |
+
17 | ++ |
+ #' @param check (`logical`) `r lifecycle::badge("deprecated")`+ |
+
18 | ++ |
+ #' Use [verify()] to verify code reproducibility .+ |
+
19 | ++ |
+ #'+ |
+
20 | ++ |
+ #' @return A `teal_data` object.+ |
+
21 | ++ |
+ #'+ |
+
22 | ++ |
+ #' @export+ |
+
23 | ++ |
+ #'+ |
+
24 | ++ |
+ #' @examples+ |
+
25 | ++ |
+ #' teal_data(x1 = iris, x2 = mtcars)+ |
+
26 | ++ |
+ #'+ |
+
27 | ++ |
+ teal_data <- function(...,+ |
+
28 | ++ |
+ join_keys = teal.data::join_keys(),+ |
+
29 | ++ |
+ code = character(0),+ |
+
30 | ++ |
+ check) {+ |
+
31 | +80x | +
+ data_objects <- rlang::list2(...)+ |
+
32 | +80x | +
+ if (inherits(join_keys, "join_key_set")) {+ |
+
33 | +! | +
+ join_keys <- teal.data::join_keys(join_keys)+ |
+
34 | ++ |
+ }+ |
+
35 | +80x | +
+ if (!missing(check)) {+ |
+
36 | +! | +
+ lifecycle::deprecate_stop(+ |
+
37 | +! | +
+ when = "0.4.0",+ |
+
38 | +! | +
+ "teal_data(+ |
+
39 | +! | +
+ check = 'check argument is deprecated. Use `verify()` to verify code reproducibility.+ |
+
40 | +! | +
+ Find more information on https://github.com/insightsengineering/teal/discussions/945'+ |
+
41 | ++ |
+ )"+ |
+
42 | ++ |
+ )+ |
+
43 | ++ |
+ }+ |
+
44 | ++ | + + | +
45 | ++ |
+ if (+ |
+
46 | +80x | +
+ checkmate::test_list(+ |
+
47 | +80x | +
+ data_objects,+ |
+
48 | +80x | +
+ types = c("TealDataConnector", "TealDataset", "TealDatasetConnector"),+ |
+
49 | +80x | +
+ min.len = 1+ |
+
50 | ++ |
+ )+ |
+
51 | ++ |
+ ) {+ |
+
52 | +! | +
+ lifecycle::deprecate_stop(+ |
+
53 | +! | +
+ when = "0.4.0",+ |
+
54 | +! | +
+ "teal_data(+ |
+
55 | +! | +
+ data_objects = 'should use data directly. Using TealDatasetConnector and TealDataset is deprecated.+ |
+
56 | +! | +
+ Find more information on https://github.com/insightsengineering/teal/discussions/945'+ |
+
57 | ++ |
+ )"+ |
+
58 | ++ |
+ )+ |
+
59 | ++ |
+ } else {+ |
+
60 | +80x | +
+ if (length(data_objects) > 0 && !checkmate::test_names(names(data_objects), type = "named")) {+ |
+
61 | +! | +
+ stop("Dot (`...`) arguments on `teal_data()` must be named.")+ |
+
62 | ++ |
+ }+ |
+
63 | +80x | +
+ new_teal_data(+ |
+
64 | +80x | +
+ data = data_objects,+ |
+
65 | +80x | +
+ code = code,+ |
+
66 | +80x | +
+ join_keys = join_keys+ |
+
67 | ++ |
+ )+ |
+
68 | ++ |
+ }+ |
+
69 | ++ |
+ }+ |
+
1 | ++ |
+ #' Show `teal_data` object+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' Prints `teal_data` object.+ |
+
4 | ++ |
+ #'+ |
+
5 | ++ |
+ #' @param object (`teal_data`)+ |
+
6 | ++ |
+ #' @return Input `teal_data` object.+ |
+
7 | ++ |
+ #' @importFrom methods show+ |
+
8 | ++ |
+ #' @examples+ |
+
9 | ++ |
+ #' teal_data()+ |
+
10 | ++ |
+ #' teal_data(x = iris, code = "x = iris")+ |
+
11 | ++ |
+ #' verify(teal_data(x = iris, code = "x = iris"))+ |
+
12 | ++ |
+ #' @export+ |
+
13 | ++ |
+ setMethod("show", signature = "teal_data", function(object) {+ |
+
14 | +! | +
+ if (object@verified) {+ |
+
15 | +! | +
+ cat("\u2705\ufe0e", "verified teal_data object\n")+ |
+
16 | ++ |
+ } else {+ |
+
17 | +! | +
+ cat("\u2716", "unverified teal_data object\n")+ |
+
18 | ++ |
+ }+ |
+
19 | +! | +
+ rlang::env_print(object@env)+ |
+
20 | ++ |
+ })+ |
+
1 | ++ |
+ #' Generate sample CDISC datasets+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' Retrieves example CDISC datasets for use in examples and testing.+ |
+
4 | ++ |
+ #'+ |
+
5 | ++ |
+ #' This function returns a dummy dataset and should only be used within `teal.data`.+ |
+
6 | ++ |
+ #' Note that the datasets are not created and maintained in `teal.data`, they are retrieved its dependencies.+ |
+
7 | ++ |
+ #'+ |
+
8 | ++ |
+ #' @param dataname (`character(1)`) name of a CDISC dataset+ |
+
9 | ++ |
+ #'+ |
+
10 | ++ |
+ #' @return A CDISC dataset as a `data.frame`.+ |
+
11 | ++ |
+ #'+ |
+
12 | ++ |
+ #' @export+ |
+
13 | ++ |
+ example_cdisc_data <- function(dataname = c("ADSL", "ADAE", "ADLB", "ADCM", "ADEX", "ADRS", "ADTR", "ADTTE", "ADVS")) {+ |
+
14 | +! | +
+ dataname <- sprintf("r%s", match.arg(dataname))+ |
+
15 | +! | +
+ dynGet(dataname, ifnotfound = stop(dataname, " not found"), inherits = TRUE)+ |
+
16 | ++ |
+ }+ |
+
1 | ++ |
+ #' Data input for `teal` app+ |
+
2 | ++ |
+ #'+ |
+
3 | ++ |
+ #' @description+ |
+
4 | ++ |
+ #' `r lifecycle::badge("stable")`+ |
+
5 | ++ |
+ #'+ |
+
6 | ++ |
+ #' Function is a wrapper around [teal_data()] and guesses `join_keys`+ |
+
7 | ++ |
+ #' for given datasets whose names match ADAM datasets names.+ |
+
8 | ++ |
+ #'+ |
+
9 | ++ |
+ #' @inheritParams teal_data+ |
+
10 | ++ |
+ #' @param join_keys (`join_keys` or single `join_key_set`)+ |
+
11 | ++ |
+ #' optional object with datasets column names used for joining.+ |
+
12 | ++ |
+ #' If empty then it would be automatically derived basing on intersection of datasets primary keys.+ |
+
13 | ++ |
+ #' For ADAM datasets it would be automatically derived.+ |
+
14 | ++ |
+ #'+ |
+
15 | ++ |
+ #' @return A `teal_data` object.+ |
+
16 | ++ |
+ #'+ |
+
17 | ++ |
+ #' @details This function checks if there were keys added to all data sets.+ |
+
18 | ++ |
+ #'+ |
+
19 | ++ |
+ #' @examples+ |
+
20 | ++ |
+ #' data <- cdisc_data(+ |
+
21 | ++ |
+ #' join_keys = join_keys(+ |
+
22 | ++ |
+ #' join_key("ADSL", "ADTTE", c("STUDYID" = "STUDYID", "USUBJID" = "USUBJID"))+ |
+
23 | ++ |
+ #' )+ |
+
24 | ++ |
+ #' )+ |
+
25 | ++ |
+ #'+ |
+
26 | ++ |
+ #' data <- within(data, {+ |
+
27 | ++ |
+ #' ADSL <- example_cdisc_data("ADSL")+ |
+
28 | ++ |
+ #' ADTTE <- example_cdisc_data("ADTTE")+ |
+
29 | ++ |
+ #' })+ |
+
30 | ++ |
+ #'+ |
+
31 | ++ |
+ #' @export+ |
+
32 | ++ |
+ #'+ |
+
33 | ++ |
+ cdisc_data <- function(...,+ |
+
34 | ++ |
+ join_keys = teal.data::default_cdisc_join_keys[names(rlang::list2(...))],+ |
+
35 | ++ |
+ code = character(0),+ |
+
36 | ++ |
+ check) {+ |
+
37 | +1x | +
+ teal_data(..., join_keys = join_keys, code = code, check = check)+ |
+
38 | ++ |
+ }+ |
+