Skip to content

Commit

Permalink
Closes #2563 no_list_columns: add check to avoid list columns (#2592)
Browse files Browse the repository at this point in the history
* #2563 no_list_columns: add check to avoid list columns

* #2563 no_list_columns: update documentation

* #2563 no_list_columns: fix example and add see also

* #2563 no_list_columns: avoid package name prefix

* #2563 no_list_columns: update required admiraldev version

* #2563 no_list_columns: split files (and clean up tests)

* #2563 no_list_columns: update man

---------

Co-authored-by: Daniel Sjoberg <[email protected]>
Co-authored-by: Ben Straub <[email protected]>
  • Loading branch information
3 people authored Dec 13, 2024
1 parent 29c2804 commit e2415a0
Show file tree
Hide file tree
Showing 13 changed files with 297 additions and 289 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ BugReports: https://github.com/pharmaverse/admiral/issues
Depends:
R (>= 4.0)
Imports:
admiraldev (>= 1.1.0),
admiraldev (>= 1.1.0.9007),
cli (>= 3.6.2),
dplyr (>= 1.0.5),
hms (>= 0.5.3),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ importFrom(rlang,call2)
importFrom(rlang,call_name)
importFrom(rlang,caller_env)
importFrom(rlang,cnd_muffle)
importFrom(rlang,cnd_signal)
importFrom(rlang,current_env)
importFrom(rlang,enexpr)
importFrom(rlang,enexprs)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ or that the queries dataset contains duplicates. (#2543)

- In `get_summary_records()`, previously deprecated formal arguments `analysis_var` and `summary_fun` now removed from function, documentation, tests etc. (#2521)

- A check was added to `derive_vars_transposed()` and `derive_vars_atc()` which
stops execution if the records in `dataset_merge` or `dataset_facm` respectively
are not unique. (#2563)

- The functions `derive_vars_joined()`, `derive_var_joined_exist_flag()`,
`derive_extreme_event()`, and `filter_joined()` were updated to reduce their
memory consumption. (#2590)
Expand Down
10 changes: 5 additions & 5 deletions R/admiral-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
#' map_if map_lgl map2 modify_at modify_if pmap reduce transpose
#' walk
#' @importFrom rlang := abort arg_match as_data_mask as_function as_label
#' as_name as_string call2 call_name caller_env cnd_muffle current_env .data
#' enexpr enexprs eval_bare eval_tidy expr expr_interp exec expr_label exprs
#' f_lhs f_rhs inform is_call is_expression is_missing is_named list2
#' new_environment new_formula parse_expr parse_exprs set_names sym syms
#' type_of warn
#' as_name as_string call2 call_name caller_env cnd_muffle cnd_signal
#' current_env .data enexpr enexprs eval_bare eval_tidy expr expr_interp exec
#' expr_label exprs f_lhs f_rhs inform is_call is_expression is_missing
#' is_named list2 new_environment new_formula parse_expr parse_exprs set_names
#' sym syms type_of warn
#' @importFrom stats setNames
#' @importFrom stringr str_c str_count str_detect str_extract str_glue
#' str_length str_locate str_locate_all str_match str_remove
Expand Down
108 changes: 108 additions & 0 deletions R/derive_vars_atc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#' Derive ATC Class Variables
#'
#' @description Add Anatomical Therapeutic Chemical class variables from `FACM` to `ADCM`.
#'
#' **Note:** This is a wrapper function for the more generic `derive_vars_transposed()`.
#'
#' @param dataset
#' `r roxygen_param_dataset(expected_vars = c("by_vars"))`
#'
#' @param dataset_facm FACM dataset
#'
#' The variables specified by the `by_vars`, `id_vars`, and `value_var`
#' arguments and `FATESTCD` are required. The variables `by_vars`, `id_vars`,
#' and `FATESTCD` must be a unique key.
#'
#' @param by_vars Grouping variables
#'
#' Keys used to merge `dataset_facm` with `dataset`.
#'
#' @param id_vars ID variables
#'
#' Variables (excluding by_vars) that uniquely identify each observation in `dataset_merge`.
#'
#' `r roxygen_param_by_vars()`
#'
#' @param value_var The variable of `dataset_facm` containing the values of the
#' transposed variables
#'
#' @return The input dataset with ATC variables added
#'
#' @seealso [derive_vars_transposed()]
#'
#' @family der_occds
#' @keywords der_occds
#'
#' @export
#'
#' @examples
#' library(tibble)
#'
#' cm <- tribble(
#' ~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD,
#' "STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL",
#' "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL",
#' "STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE"
#' )
#' facm <- tribble(
#' ~STUDYID, ~USUBJID, ~FAGRPID, ~FAREFID, ~FATESTCD, ~FASTRESC,
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC1CD", "N",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC2CD", "N02",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC3CD", "N02B",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC4CD", "N02BE",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC1CD", "D",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC2CD", "D10",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC3CD", "D10A",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC4CD", "D10AA",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC1CD", "D",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC2CD", "D07",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC3CD", "D07A",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC4CD", "D07AA",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC1CD", "H",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC2CD", "H02",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC3CD", "H02A",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC4CD", "H02AB",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC1CD", "C",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC2CD", "C03",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC3CD", "C03D",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC4CD", "C03DA"
#' )
#'
#' derive_vars_atc(cm, facm, id_vars = exprs(FAGRPID))
derive_vars_atc <- function(dataset,
dataset_facm,
by_vars = exprs(
!!!get_admiral_option("subject_keys"),
CMREFID = FAREFID
),
id_vars = NULL,
value_var = FASTRESC) {
value_var <- assert_symbol(enexpr(value_var))
assert_vars(by_vars)
assert_vars(id_vars, optional = TRUE)
assert_data_frame(dataset, required_vars = replace_values_by_names(by_vars))
assert_data_frame(
dataset_facm,
required_vars = exprs(!!!by_vars, !!value_var, !!!id_vars, FATESTCD)
)

tryCatch(
data_transposed <- derive_vars_transposed(
dataset,
select(dataset_facm, !!!unname(by_vars), !!value_var, !!!id_vars, FATESTCD),
by_vars = by_vars,
id_vars = id_vars,
key_var = FATESTCD,
value_var = !!value_var,
filter = str_detect(FATESTCD, "^CMATC[1-4](CD)?$")
),
merge_duplicates = function(cnd) {
cnd$message <- str_replace(cnd$message, "dataset_merge", "dataset_facm")
cnd$body[[1]] <- "Please check data and `by_vars` and `id_vars` arguments."
cnd_signal(cnd)
}
)
data_transposed %>%
select(-starts_with("FA")) %>%
rename_with(.fn = ~ str_remove(.x, "^CM"), .cols = starts_with("CMATC"))
}
142 changes: 22 additions & 120 deletions R/derive_vars_transposed.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
#'
#' @param dataset_merge Dataset to transpose and merge
#'
#' The variables specified by the `by_vars`, `key_var` and `value_var` parameters
#' are expected
#' The variables specified by the `by_vars`, `id_vars`, `key_var` and
#' `value_var` arguments are expected. The variables `by_vars`, `id_vars`,
#' `key_var` have to be a unique key.
#'
#' @param by_vars Grouping variables
#'
Expand All @@ -35,17 +36,18 @@
#' <https://dplyr.tidyverse.org/reference/mutate-joins.html#arguments> for
#' more details.
#'
#' Permitted Values for `relationship`: `"one-to-one"`, `"one-to-many"`,
#' `"many-to-one"`, `"many-to-many"`, `NULL`.
#' *Permitted Values*: `"one-to-one"`, `"one-to-many"`, `"many-to-one"`,
#' `"many-to-many"`, `NULL`
#'
#' @details
#' After filtering `dataset_merge` based upon the condition provided in `filter`, this
#' dataset is transposed and subsequently merged onto `dataset` using `by_vars` as
#' keys.
#'
#'
#' @return The input dataset with transposed variables from `dataset_merge` added
#'
#' @seealso [derive_vars_atc()]
#'
#' @family der_gen
#' @keywords der_gen
#'
Expand Down Expand Up @@ -116,6 +118,21 @@ derive_vars_transposed <- function(dataset,
optional = TRUE
)

# check for duplicates in dataset_merge as these will create list columns,
# which is not acceptable for ADaM datasets
signal_duplicate_records(
dataset_merge,
by_vars = c(by_vars, id_vars, exprs(!!key_var)),
msg = c(
paste(
"Dataset {.arg dataset_merge} contains duplicate records with respect to",
"{.var {by_vars}}"
),
"Please check data and {.arg by_vars}, {.arg id_vars}, and {.arg key_var} arguments."
),
class = "merge_duplicates"
)

dataset_transposed <- dataset_merge %>%
filter_if(filter) %>%
pivot_wider(
Expand Down Expand Up @@ -164,121 +181,6 @@ derive_vars_transposed <- function(dataset,
),
call = parent.frame(n = 4)
)
},
"dplyr_error_join_relationship_one_to_many" = function(cnd) {
cli_abort(
message = c(
str_replace(
str_replace(
cnd$message, "`x`", "`dataset`"
), "`y`", "the transposed `dataset_merge`"
),
i = str_replace(
str_replace(
cnd$body, "`x`", "`dataset`"
), "`y`", "the transposed `dataset_merge`"
)
),
call = parent.frame(n = 4)
)
}
)
}

#' Derive ATC Class Variables
#'
#' @description Add Anatomical Therapeutic Chemical class variables from `FACM` to `ADCM`.
#'
#' **Note:** This is a wrapper function for the more generic `derive_vars_transposed()`.
#'
#' @param dataset
#' `r roxygen_param_dataset(expected_vars = c("by_vars"))`
#'
#' @param dataset_facm FACM dataset
#'
#' The variables specified by the `by_vars` and `value_var` parameters,
#' `FAGRPID` and `FATESTCD` are required
#'
#' @param by_vars Grouping variables
#'
#' Keys used to merge `dataset_facm` with `dataset`.
#'
#' @param id_vars ID variables
#'
#' Variables (excluding by_vars) that uniquely identify each observation in `dataset_merge`.
#'
#' `r roxygen_param_by_vars()`
#'
#' @param value_var The variable of `dataset_facm` containing the values of the
#' transposed variables
#'
#' Default: `FASTRESC`
#'
#'
#' @return The input dataset with ATC variables added
#'
#' @family der_occds
#' @keywords der_occds
#'
#' @export
#'
#' @examples
#' library(tibble)
#'
#' cm <- tribble(
#' ~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD,
#' "STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL",
#' "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL",
#' "STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE"
#' )
#' facm <- tribble(
#' ~STUDYID, ~USUBJID, ~FAGRPID, ~FAREFID, ~FATESTCD, ~FASTRESC,
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC1CD", "N",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC2CD", "N02",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC3CD", "N02B",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC4CD", "N02BE",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC1CD", "D",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC2CD", "D10",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC3CD", "D10A",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC4CD", "D10AA",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC1CD", "D",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC2CD", "D07",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC3CD", "D07A",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC4CD", "D07AA",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC1CD", "H",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC2CD", "H02",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC3CD", "H02A",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC4CD", "H02AB",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC1CD", "C",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC2CD", "C03",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC3CD", "C03D",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC4CD", "C03DA"
#' )
#'
#' derive_vars_atc(cm, facm)
derive_vars_atc <- function(dataset,
dataset_facm,
by_vars = exprs(
!!!get_admiral_option("subject_keys"),
CMREFID = FAREFID
),
id_vars = NULL,
value_var = FASTRESC) {
value_var <- assert_symbol(enexpr(value_var))
assert_vars(by_vars)
assert_vars(id_vars, optional = TRUE)
assert_data_frame(dataset, required_vars = replace_values_by_names(by_vars))
assert_data_frame(dataset_facm, required_vars = exprs(!!!by_vars, !!value_var, FAGRPID, FATESTCD))

dataset %>%
derive_vars_transposed(
select(dataset_facm, !!!unname(by_vars), !!value_var, FAGRPID, FATESTCD),
by_vars = by_vars,
id_vars = id_vars,
key_var = FATESTCD,
value_var = !!value_var,
filter = str_detect(FATESTCD, "^CMATC[1-4](CD)?$")
) %>%
select(-starts_with("FA")) %>%
rename_with(.fn = ~ str_remove(.x, "^CM"), .cols = starts_with("CMATC"))
}
14 changes: 12 additions & 2 deletions R/duplicates.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ extract_duplicate_records <- function(dataset, by_vars) {
#' @param msg The condition message
#' @param cnd_type Type of condition to signal when detecting duplicate records.
#' One of `"message"`, `"warning"` or `"error"`. Default is `"error"`.
#' @param class Class of the condition
#'
#' The specified classes are added to the classes of the condition.
#' `c("duplicate_records", "assert-admiral")` is always added.
#'
#' @return No return value, called for side effects
#'
Expand All @@ -113,11 +117,13 @@ signal_duplicate_records <- function(dataset,
"with respect to",
"{.var {replace_values_by_names(by_vars)}}"
),
cnd_type = "error") {
cnd_type = "error",
class = NULL) {
assert_expr_list(by_vars)
assert_data_frame(dataset, required_vars = extract_vars(by_vars), check_is_grouped = FALSE)
assert_character_vector(msg)
assert_character_scalar(cnd_type, values = c("message", "warning", "error"))
assert_character_vector(class, optional = TRUE)

cnd_funs <- list(message = cli_inform, warning = cli_warn, error = cli_abort)

Expand All @@ -134,7 +140,11 @@ signal_duplicate_records <- function(dataset,
msg,
i = "Run {.run admiral::get_duplicates_dataset()} to access the duplicate records"
)
cnd_funs[[cnd_type]](full_msg)
cnd_funs[[cnd_type]](
full_msg,
class = c(class, "duplicate_records", "assert-admiral"),
by_vars = by_vars
)
}
}

Expand Down
Loading

0 comments on commit e2415a0

Please sign in to comment.