From e2415a0a69017f24238f0eb22c5caea1e866db93 Mon Sep 17 00:00:00 2001 From: Stefan Bundfuss <80953585+bundfussr@users.noreply.github.com> Date: Fri, 13 Dec 2024 21:17:41 +0100 Subject: [PATCH] Closes #2563 no_list_columns: add check to avoid list columns (#2592) * #2563 no_list_columns: add check to avoid list columns * #2563 no_list_columns: update documentation * #2563 no_list_columns: fix example and add see also * #2563 no_list_columns: avoid package name prefix * #2563 no_list_columns: update required admiraldev version * #2563 no_list_columns: split files (and clean up tests) * #2563 no_list_columns: update man --------- Co-authored-by: Daniel Sjoberg Co-authored-by: Ben Straub --- DESCRIPTION | 2 +- NAMESPACE | 1 + NEWS.md | 4 + R/admiral-package.R | 10 +- R/derive_vars_atc.R | 108 ++++++++++++ R/derive_vars_transposed.R | 142 +++------------- R/duplicates.R | 14 +- man/derive_vars_atc.Rd | 15 +- man/derive_vars_transposed.Rd | 11 +- man/signal_duplicate_records.Rd | 8 +- .../testthat/_snaps/derive_vars_transposed.md | 22 +++ tests/testthat/test-derive_vars_atc.R | 90 ++++++++++ tests/testthat/test-derive_vars_transposed.R | 159 ++---------------- 13 files changed, 297 insertions(+), 289 deletions(-) create mode 100644 R/derive_vars_atc.R create mode 100644 tests/testthat/test-derive_vars_atc.R diff --git a/DESCRIPTION b/DESCRIPTION index bf67d387ac..874cf89922 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -46,7 +46,7 @@ BugReports: https://github.com/pharmaverse/admiral/issues Depends: R (>= 4.0) Imports: - admiraldev (>= 1.1.0), + admiraldev (>= 1.1.0.9007), cli (>= 3.6.2), dplyr (>= 1.0.5), hms (>= 0.5.3), diff --git a/NAMESPACE b/NAMESPACE index baed6524ae..5da8c3fad5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -258,6 +258,7 @@ importFrom(rlang,call2) importFrom(rlang,call_name) importFrom(rlang,caller_env) importFrom(rlang,cnd_muffle) +importFrom(rlang,cnd_signal) importFrom(rlang,current_env) importFrom(rlang,enexpr) importFrom(rlang,enexprs) diff --git a/NEWS.md b/NEWS.md index 682d9d586c..68b75c7310 100644 --- a/NEWS.md +++ b/NEWS.md @@ -45,6 +45,10 @@ or that the queries dataset contains duplicates. (#2543) - In `get_summary_records()`, previously deprecated formal arguments `analysis_var` and `summary_fun` now removed from function, documentation, tests etc. (#2521) +- A check was added to `derive_vars_transposed()` and `derive_vars_atc()` which +stops execution if the records in `dataset_merge` or `dataset_facm` respectively +are not unique. (#2563) + - The functions `derive_vars_joined()`, `derive_var_joined_exist_flag()`, `derive_extreme_event()`, and `filter_joined()` were updated to reduce their memory consumption. (#2590) diff --git a/R/admiral-package.R b/R/admiral-package.R index 9b91b084bc..bb7811d5fd 100644 --- a/R/admiral-package.R +++ b/R/admiral-package.R @@ -17,11 +17,11 @@ #' map_if map_lgl map2 modify_at modify_if pmap reduce transpose #' walk #' @importFrom rlang := abort arg_match as_data_mask as_function as_label -#' as_name as_string call2 call_name caller_env cnd_muffle current_env .data -#' enexpr enexprs eval_bare eval_tidy expr expr_interp exec expr_label exprs -#' f_lhs f_rhs inform is_call is_expression is_missing is_named list2 -#' new_environment new_formula parse_expr parse_exprs set_names sym syms -#' type_of warn +#' as_name as_string call2 call_name caller_env cnd_muffle cnd_signal +#' current_env .data enexpr enexprs eval_bare eval_tidy expr expr_interp exec +#' expr_label exprs f_lhs f_rhs inform is_call is_expression is_missing +#' is_named list2 new_environment new_formula parse_expr parse_exprs set_names +#' sym syms type_of warn #' @importFrom stats setNames #' @importFrom stringr str_c str_count str_detect str_extract str_glue #' str_length str_locate str_locate_all str_match str_remove diff --git a/R/derive_vars_atc.R b/R/derive_vars_atc.R new file mode 100644 index 0000000000..5705d296b0 --- /dev/null +++ b/R/derive_vars_atc.R @@ -0,0 +1,108 @@ +#' Derive ATC Class Variables +#' +#' @description Add Anatomical Therapeutic Chemical class variables from `FACM` to `ADCM`. +#' +#' **Note:** This is a wrapper function for the more generic `derive_vars_transposed()`. +#' +#' @param dataset +#' `r roxygen_param_dataset(expected_vars = c("by_vars"))` +#' +#' @param dataset_facm FACM dataset +#' +#' The variables specified by the `by_vars`, `id_vars`, and `value_var` +#' arguments and `FATESTCD` are required. The variables `by_vars`, `id_vars`, +#' and `FATESTCD` must be a unique key. +#' +#' @param by_vars Grouping variables +#' +#' Keys used to merge `dataset_facm` with `dataset`. +#' +#' @param id_vars ID variables +#' +#' Variables (excluding by_vars) that uniquely identify each observation in `dataset_merge`. +#' +#' `r roxygen_param_by_vars()` +#' +#' @param value_var The variable of `dataset_facm` containing the values of the +#' transposed variables +#' +#' @return The input dataset with ATC variables added +#' +#' @seealso [derive_vars_transposed()] +#' +#' @family der_occds +#' @keywords der_occds +#' +#' @export +#' +#' @examples +#' library(tibble) +#' +#' cm <- tribble( +#' ~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD, +#' "STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL", +#' "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", +#' "STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE" +#' ) +#' facm <- tribble( +#' ~STUDYID, ~USUBJID, ~FAGRPID, ~FAREFID, ~FATESTCD, ~FASTRESC, +#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC1CD", "N", +#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC2CD", "N02", +#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC3CD", "N02B", +#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC4CD", "N02BE", +#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC1CD", "D", +#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC2CD", "D10", +#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC3CD", "D10A", +#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC4CD", "D10AA", +#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC1CD", "D", +#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC2CD", "D07", +#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC3CD", "D07A", +#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC4CD", "D07AA", +#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC1CD", "H", +#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC2CD", "H02", +#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC3CD", "H02A", +#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC4CD", "H02AB", +#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC1CD", "C", +#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC2CD", "C03", +#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC3CD", "C03D", +#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC4CD", "C03DA" +#' ) +#' +#' derive_vars_atc(cm, facm, id_vars = exprs(FAGRPID)) +derive_vars_atc <- function(dataset, + dataset_facm, + by_vars = exprs( + !!!get_admiral_option("subject_keys"), + CMREFID = FAREFID + ), + id_vars = NULL, + value_var = FASTRESC) { + value_var <- assert_symbol(enexpr(value_var)) + assert_vars(by_vars) + assert_vars(id_vars, optional = TRUE) + assert_data_frame(dataset, required_vars = replace_values_by_names(by_vars)) + assert_data_frame( + dataset_facm, + required_vars = exprs(!!!by_vars, !!value_var, !!!id_vars, FATESTCD) + ) + + tryCatch( + data_transposed <- derive_vars_transposed( + dataset, + select(dataset_facm, !!!unname(by_vars), !!value_var, !!!id_vars, FATESTCD), + by_vars = by_vars, + id_vars = id_vars, + key_var = FATESTCD, + value_var = !!value_var, + filter = str_detect(FATESTCD, "^CMATC[1-4](CD)?$") + ), + merge_duplicates = function(cnd) { + cnd$message <- str_replace(cnd$message, "dataset_merge", "dataset_facm") + cnd$body[[1]] <- "Please check data and `by_vars` and `id_vars` arguments." + cnd_signal(cnd) + } + ) + data_transposed %>% + select(-starts_with("FA")) %>% + rename_with(.fn = ~ str_remove(.x, "^CM"), .cols = starts_with("CMATC")) +} diff --git a/R/derive_vars_transposed.R b/R/derive_vars_transposed.R index b0946ed39e..d21322e73f 100644 --- a/R/derive_vars_transposed.R +++ b/R/derive_vars_transposed.R @@ -7,8 +7,9 @@ #' #' @param dataset_merge Dataset to transpose and merge #' -#' The variables specified by the `by_vars`, `key_var` and `value_var` parameters -#' are expected +#' The variables specified by the `by_vars`, `id_vars`, `key_var` and +#' `value_var` arguments are expected. The variables `by_vars`, `id_vars`, +#' `key_var` have to be a unique key. #' #' @param by_vars Grouping variables #' @@ -35,17 +36,18 @@ #' for #' more details. #' -#' Permitted Values for `relationship`: `"one-to-one"`, `"one-to-many"`, -#' `"many-to-one"`, `"many-to-many"`, `NULL`. +#' *Permitted Values*: `"one-to-one"`, `"one-to-many"`, `"many-to-one"`, +#' `"many-to-many"`, `NULL` #' #' @details #' After filtering `dataset_merge` based upon the condition provided in `filter`, this #' dataset is transposed and subsequently merged onto `dataset` using `by_vars` as #' keys. #' -#' #' @return The input dataset with transposed variables from `dataset_merge` added #' +#' @seealso [derive_vars_atc()] +#' #' @family der_gen #' @keywords der_gen #' @@ -116,6 +118,21 @@ derive_vars_transposed <- function(dataset, optional = TRUE ) + # check for duplicates in dataset_merge as these will create list columns, + # which is not acceptable for ADaM datasets + signal_duplicate_records( + dataset_merge, + by_vars = c(by_vars, id_vars, exprs(!!key_var)), + msg = c( + paste( + "Dataset {.arg dataset_merge} contains duplicate records with respect to", + "{.var {by_vars}}" + ), + "Please check data and {.arg by_vars}, {.arg id_vars}, and {.arg key_var} arguments." + ), + class = "merge_duplicates" + ) + dataset_transposed <- dataset_merge %>% filter_if(filter) %>% pivot_wider( @@ -164,121 +181,6 @@ derive_vars_transposed <- function(dataset, ), call = parent.frame(n = 4) ) - }, - "dplyr_error_join_relationship_one_to_many" = function(cnd) { - cli_abort( - message = c( - str_replace( - str_replace( - cnd$message, "`x`", "`dataset`" - ), "`y`", "the transposed `dataset_merge`" - ), - i = str_replace( - str_replace( - cnd$body, "`x`", "`dataset`" - ), "`y`", "the transposed `dataset_merge`" - ) - ), - call = parent.frame(n = 4) - ) } ) } - -#' Derive ATC Class Variables -#' -#' @description Add Anatomical Therapeutic Chemical class variables from `FACM` to `ADCM`. -#' -#' **Note:** This is a wrapper function for the more generic `derive_vars_transposed()`. -#' -#' @param dataset -#' `r roxygen_param_dataset(expected_vars = c("by_vars"))` -#' -#' @param dataset_facm FACM dataset -#' -#' The variables specified by the `by_vars` and `value_var` parameters, -#' `FAGRPID` and `FATESTCD` are required -#' -#' @param by_vars Grouping variables -#' -#' Keys used to merge `dataset_facm` with `dataset`. -#' -#' @param id_vars ID variables -#' -#' Variables (excluding by_vars) that uniquely identify each observation in `dataset_merge`. -#' -#' `r roxygen_param_by_vars()` -#' -#' @param value_var The variable of `dataset_facm` containing the values of the -#' transposed variables -#' -#' Default: `FASTRESC` -#' -#' -#' @return The input dataset with ATC variables added -#' -#' @family der_occds -#' @keywords der_occds -#' -#' @export -#' -#' @examples -#' library(tibble) -#' -#' cm <- tribble( -#' ~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD, -#' "STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL", -#' "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", -#' "STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE" -#' ) -#' facm <- tribble( -#' ~STUDYID, ~USUBJID, ~FAGRPID, ~FAREFID, ~FATESTCD, ~FASTRESC, -#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC1CD", "N", -#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC2CD", "N02", -#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC3CD", "N02B", -#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC4CD", "N02BE", -#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC1CD", "D", -#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC2CD", "D10", -#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC3CD", "D10A", -#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC4CD", "D10AA", -#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC1CD", "D", -#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC2CD", "D07", -#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC3CD", "D07A", -#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC4CD", "D07AA", -#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC1CD", "H", -#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC2CD", "H02", -#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC3CD", "H02A", -#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC4CD", "H02AB", -#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC1CD", "C", -#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC2CD", "C03", -#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC3CD", "C03D", -#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC4CD", "C03DA" -#' ) -#' -#' derive_vars_atc(cm, facm) -derive_vars_atc <- function(dataset, - dataset_facm, - by_vars = exprs( - !!!get_admiral_option("subject_keys"), - CMREFID = FAREFID - ), - id_vars = NULL, - value_var = FASTRESC) { - value_var <- assert_symbol(enexpr(value_var)) - assert_vars(by_vars) - assert_vars(id_vars, optional = TRUE) - assert_data_frame(dataset, required_vars = replace_values_by_names(by_vars)) - assert_data_frame(dataset_facm, required_vars = exprs(!!!by_vars, !!value_var, FAGRPID, FATESTCD)) - - dataset %>% - derive_vars_transposed( - select(dataset_facm, !!!unname(by_vars), !!value_var, FAGRPID, FATESTCD), - by_vars = by_vars, - id_vars = id_vars, - key_var = FATESTCD, - value_var = !!value_var, - filter = str_detect(FATESTCD, "^CMATC[1-4](CD)?$") - ) %>% - select(-starts_with("FA")) %>% - rename_with(.fn = ~ str_remove(.x, "^CM"), .cols = starts_with("CMATC")) -} diff --git a/R/duplicates.R b/R/duplicates.R index f427329158..d6c42a221f 100644 --- a/R/duplicates.R +++ b/R/duplicates.R @@ -92,6 +92,10 @@ extract_duplicate_records <- function(dataset, by_vars) { #' @param msg The condition message #' @param cnd_type Type of condition to signal when detecting duplicate records. #' One of `"message"`, `"warning"` or `"error"`. Default is `"error"`. +#' @param class Class of the condition +#' +#' The specified classes are added to the classes of the condition. +#' `c("duplicate_records", "assert-admiral")` is always added. #' #' @return No return value, called for side effects #' @@ -113,11 +117,13 @@ signal_duplicate_records <- function(dataset, "with respect to", "{.var {replace_values_by_names(by_vars)}}" ), - cnd_type = "error") { + cnd_type = "error", + class = NULL) { assert_expr_list(by_vars) assert_data_frame(dataset, required_vars = extract_vars(by_vars), check_is_grouped = FALSE) assert_character_vector(msg) assert_character_scalar(cnd_type, values = c("message", "warning", "error")) + assert_character_vector(class, optional = TRUE) cnd_funs <- list(message = cli_inform, warning = cli_warn, error = cli_abort) @@ -134,7 +140,11 @@ signal_duplicate_records <- function(dataset, msg, i = "Run {.run admiral::get_duplicates_dataset()} to access the duplicate records" ) - cnd_funs[[cnd_type]](full_msg) + cnd_funs[[cnd_type]]( + full_msg, + class = c(class, "duplicate_records", "assert-admiral"), + by_vars = by_vars + ) } } diff --git a/man/derive_vars_atc.Rd b/man/derive_vars_atc.Rd index d34cb6e48a..ae4512c2bc 100644 --- a/man/derive_vars_atc.Rd +++ b/man/derive_vars_atc.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/derive_vars_transposed.R +% Please edit documentation in R/derive_vars_atc.R \name{derive_vars_atc} \alias{derive_vars_atc} \title{Derive ATC Class Variables} @@ -19,8 +19,9 @@ The variables specified by the \code{by_vars} argument are expected to be in the \item{dataset_facm}{FACM dataset -The variables specified by the \code{by_vars} and \code{value_var} parameters, -\code{FAGRPID} and \code{FATESTCD} are required} +The variables specified by the \code{by_vars}, \code{id_vars}, and \code{value_var} +arguments and \code{FATESTCD} are required. The variables \code{by_vars}, \code{id_vars}, +and \code{FATESTCD} must be a unique key.} \item{by_vars}{Grouping variables @@ -34,9 +35,7 @@ Variables (excluding by_vars) that uniquely identify each observation in \code{d e.g. \code{exprs(USUBJID, VISIT)}} \item{value_var}{The variable of \code{dataset_facm} containing the values of the -transposed variables - -Default: \code{FASTRESC}} +transposed variables} } \value{ The input dataset with ATC variables added @@ -79,9 +78,11 @@ facm <- tribble( "STUDY01", "BP40257-1002", "1", "2791596", "CMATC4CD", "C03DA" ) -derive_vars_atc(cm, facm) +derive_vars_atc(cm, facm, id_vars = exprs(FAGRPID)) } \seealso{ +\code{\link[=derive_vars_transposed]{derive_vars_transposed()}} + OCCDS Functions: \code{\link{derive_var_trtemfl}()}, \code{\link{derive_vars_query}()}, diff --git a/man/derive_vars_transposed.Rd b/man/derive_vars_transposed.Rd index 9e5676f991..e81e0fcb27 100644 --- a/man/derive_vars_transposed.Rd +++ b/man/derive_vars_transposed.Rd @@ -22,8 +22,9 @@ The variables specified by the \code{by_vars} argument are expected to be in the \item{dataset_merge}{Dataset to transpose and merge -The variables specified by the \code{by_vars}, \code{key_var} and \code{value_var} parameters -are expected} +The variables specified by the \code{by_vars}, \code{id_vars}, \code{key_var} and +\code{value_var} arguments are expected. The variables \code{by_vars}, \code{id_vars}, +\code{key_var} have to be a unique key.} \item{by_vars}{Grouping variables @@ -51,8 +52,8 @@ This argument is passed to the \code{dplyr::left_join()} function. See \url{https://dplyr.tidyverse.org/reference/mutate-joins.html#arguments} for more details. -Permitted Values for \code{relationship}: \code{"one-to-one"}, \code{"one-to-many"}, -\code{"many-to-one"}, \code{"many-to-many"}, \code{NULL}.} +\emph{Permitted Values}: \code{"one-to-one"}, \code{"one-to-many"}, \code{"many-to-one"}, +\code{"many-to-many"}, \code{NULL}} } \value{ The input dataset with transposed variables from \code{dataset_merge} added @@ -110,6 +111,8 @@ cm \%>\% select(USUBJID, CMDECOD, starts_with("CMATC")) } \seealso{ +\code{\link[=derive_vars_atc]{derive_vars_atc()}} + General Derivation Functions for all ADaMs that returns variable appended to dataset: \code{\link{derive_var_extreme_flag}()}, \code{\link{derive_var_joined_exist_flag}()}, diff --git a/man/signal_duplicate_records.Rd b/man/signal_duplicate_records.Rd index 050565900f..a7f265cea6 100644 --- a/man/signal_duplicate_records.Rd +++ b/man/signal_duplicate_records.Rd @@ -9,7 +9,8 @@ signal_duplicate_records( by_vars, msg = paste("Dataset contains duplicate records", "with respect to", "{.var {replace_values_by_names(by_vars)}}"), - cnd_type = "error" + cnd_type = "error", + class = NULL ) } \arguments{ @@ -28,6 +29,11 @@ e.g. \code{exprs(USUBJID, VISIT)}} \item{cnd_type}{Type of condition to signal when detecting duplicate records. One of \code{"message"}, \code{"warning"} or \code{"error"}. Default is \code{"error"}.} + +\item{class}{Class of the condition + +The specified classes are added to the classes of the condition. +\code{c("duplicate_records", "assert-admiral")} is always added.} } \value{ No return value, called for side effects diff --git a/tests/testthat/_snaps/derive_vars_transposed.md b/tests/testthat/_snaps/derive_vars_transposed.md index d26b5ad584..2d76b742c9 100644 --- a/tests/testthat/_snaps/derive_vars_transposed.md +++ b/tests/testthat/_snaps/derive_vars_transposed.md @@ -12,3 +12,25 @@ 2 STUDY01 P02 31 3 3 STUDY01 P03 42 NA +# derive_vars_transposed Test 4: error if `relationship` is unexpected + + Code + cm %>% derive_vars_transposed(facm, by_vars = exprs(USUBJID, CMREFID = FAREFID), + id_vars = exprs(FAGRPID), key_var = FATESTCD, value_var = FASTRESC, + relationship = "one-to-one") + Condition + Error in `tryCatch()`: + ! Each row in `dataset` must match at most 1 row in the transposed `dataset_merge`. + i Row 2 of `dataset` matches multiple rows in the transposed `dataset_merge`. + +--- + + Code + cm %>% derive_vars_transposed(facm, by_vars = exprs(USUBJID, CMREFID = FAREFID), + id_vars = exprs(FAGRPID), key_var = FATESTCD, value_var = FASTRESC, + relationship = "many-to-one") + Condition + Error in `derive_vars_transposed()`: + ! Each row in `dataset` must match at most 1 row in the transposed `dataset_merge`. + i Row 2 of `dataset` matches multiple rows in the transposed `dataset_merge`. + diff --git a/tests/testthat/test-derive_vars_atc.R b/tests/testthat/test-derive_vars_atc.R new file mode 100644 index 0000000000..ebc18af43b --- /dev/null +++ b/tests/testthat/test-derive_vars_atc.R @@ -0,0 +1,90 @@ +## Test 1: ATC variables are merged properly ---- +test_that("derive_vars_atc Test 1: ATC variables are merged properly", { + cm <- tibble::tribble( + ~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD, + "STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL", + "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", + "STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE" + ) + facm <- tibble::tribble( + ~STUDYID, ~USUBJID, ~FAGRPID, ~FAREFID, ~FATESTCD, ~FASTRESC, + "STUDY01", "BP40257-1001", "1", "1192056", "CMATC1CD", "N", + "STUDY01", "BP40257-1001", "1", "1192056", "CMATC2CD", "N02", + "STUDY01", "BP40257-1001", "1", "1192056", "CMATC3CD", "N02B", + "STUDY01", "BP40257-1001", "1", "1192056", "CMATC4CD", "N02BE", + "STUDY01", "BP40257-1001", "1", "2007001", "CMATC1CD", "D", + "STUDY01", "BP40257-1001", "1", "2007001", "CMATC2CD", "D10", + "STUDY01", "BP40257-1001", "1", "2007001", "CMATC3CD", "D10A", + "STUDY01", "BP40257-1001", "1", "2007001", "CMATC4CD", "D10AA", + "STUDY01", "BP40257-1001", "2", "2007001", "CMATC1CD", "D", + "STUDY01", "BP40257-1001", "2", "2007001", "CMATC2CD", "D07", + "STUDY01", "BP40257-1001", "2", "2007001", "CMATC3CD", "D07A", + "STUDY01", "BP40257-1001", "2", "2007001", "CMATC4CD", "D07AA", + "STUDY01", "BP40257-1001", "3", "2007001", "CMATC1CD", "H", + "STUDY01", "BP40257-1001", "3", "2007001", "CMATC2CD", "H02", + "STUDY01", "BP40257-1001", "3", "2007001", "CMATC3CD", "H02A", + "STUDY01", "BP40257-1001", "3", "2007001", "CMATC4CD", "H02AB", + "STUDY01", "BP40257-1002", "1", "2791596", "CMATC1CD", "C", + "STUDY01", "BP40257-1002", "1", "2791596", "CMATC2CD", "C03", + "STUDY01", "BP40257-1002", "1", "2791596", "CMATC3CD", "C03D", + "STUDY01", "BP40257-1002", "1", "2791596", "CMATC4CD", "C03DA" + ) + # nolint start + expected_output <- tibble::tribble( + ~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD, ~ATC1CD, ~ATC2CD, ~ATC3CD, ~ATC4CD, + "STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL", "N", "N02", "N02B", "N02BE", + "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", "D", "D07", "D07A", "D07AA", + "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", "D", "D10", "D10A", "D10AA", + "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", "H", "H02", "H02A", "H02AB", + "STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE", "C", "C03", "C03D", "C03DA" + ) + # nolint end + actual_output <- derive_vars_atc( + dataset = cm, + dataset_facm = facm, + id_vars = exprs(FAGRPID) + ) + + expect_dfs_equal(expected_output, actual_output, keys = c("USUBJID", "CMDECOD", "ATC4CD")) +}) + +## Test 2: error if facm not unique ---- +test_that("derive_vars_atc Test 2: error if facm not unique", { + cm <- tibble::tribble( + ~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD, + "STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL", + "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", + "STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE" + ) + facm <- tibble::tribble( + ~STUDYID, ~USUBJID, ~FAGRPID, ~FAREFID, ~FATESTCD, ~FASTRESC, + "STUDY01", "BP40257-1001", "1", "1192056", "CMATC1CD", "N", + "STUDY01", "BP40257-1001", "1", "1192056", "CMATC2CD", "N02", + "STUDY01", "BP40257-1001", "1", "1192056", "CMATC3CD", "N02B", + "STUDY01", "BP40257-1001", "1", "1192056", "CMATC4CD", "N02BE", + "STUDY01", "BP40257-1001", "1", "2007001", "CMATC1CD", "D", + "STUDY01", "BP40257-1001", "1", "2007001", "CMATC2CD", "D10", + "STUDY01", "BP40257-1001", "1", "2007001", "CMATC3CD", "D10A", + "STUDY01", "BP40257-1001", "1", "2007001", "CMATC4CD", "D10AA", + "STUDY01", "BP40257-1001", "2", "2007001", "CMATC1CD", "D", + "STUDY01", "BP40257-1001", "2", "2007001", "CMATC2CD", "D07", + "STUDY01", "BP40257-1001", "2", "2007001", "CMATC3CD", "D07A", + "STUDY01", "BP40257-1001", "2", "2007001", "CMATC4CD", "D07AA", + "STUDY01", "BP40257-1001", "3", "2007001", "CMATC1CD", "H", + "STUDY01", "BP40257-1001", "3", "2007001", "CMATC2CD", "H02", + "STUDY01", "BP40257-1001", "3", "2007001", "CMATC3CD", "H02A", + "STUDY01", "BP40257-1001", "3", "2007001", "CMATC4CD", "H02AB", + "STUDY01", "BP40257-1002", "1", "2791596", "CMATC1CD", "C", + "STUDY01", "BP40257-1002", "1", "2791596", "CMATC2CD", "C03", + "STUDY01", "BP40257-1002", "1", "2791596", "CMATC3CD", "C03D", + "STUDY01", "BP40257-1002", "1", "2791596", "CMATC4CD", "C03DA" + ) + + expect_snapshot( + derive_vars_atc( + dataset = cm, + dataset_facm = facm + ), + error = TRUE + ) +}) diff --git a/tests/testthat/test-derive_vars_transposed.R b/tests/testthat/test-derive_vars_transposed.R index c7ddcb2b73..d2e1db5a8d 100644 --- a/tests/testthat/test-derive_vars_transposed.R +++ b/tests/testthat/test-derive_vars_transposed.R @@ -52,7 +52,7 @@ test_that("derive_vars_transposed Test 2: filtering the merge dataset works", { expect_dfs_equal(expected_output, actual_output, keys = "USUBJID") }) -## Test 3: filtering the merge dataset works with relationship 'many-to-one' ---- +## Test 3: filter merge dataset 'many-to-one' ---- test_that("derive_vars_transposed Test 3: filter merge dataset 'many-to-one'", { expect_snapshot( derive_vars_transposed( @@ -67,8 +67,8 @@ test_that("derive_vars_transposed Test 3: filter merge dataset 'many-to-one'", { ) }) -## Test 4: ATC variables are merged properly ---- -test_that("derive_vars_transposed Test 4: ATC variables are merged properly", { +## Test 4: error if `relationship` is unexpected ---- +test_that("derive_vars_transposed Test 4: error if `relationship` is unexpected", { cm <- tibble::tribble( ~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD, "STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL", @@ -98,135 +98,8 @@ test_that("derive_vars_transposed Test 4: ATC variables are merged properly", { "STUDY01", "BP40257-1002", "1", "2791596", "CMATC3CD", "C03D", "STUDY01", "BP40257-1002", "1", "2791596", "CMATC4CD", "C03DA" ) - # nolint start - expected_output <- tibble::tribble( - ~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD, ~ATC1CD, ~ATC2CD, ~ATC3CD, ~ATC4CD, - "STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL", "N", "N02", "N02B", "N02BE", - "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", "D", "D07", "D07A", "D07AA", - "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", "D", "D10", "D10A", "D10AA", - "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", "H", "H02", "H02A", "H02AB", - "STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE", "C", "C03", "C03D", "C03DA" - ) - # nolint end - actual_output <- derive_vars_atc( - dataset = cm, - dataset_facm = facm, - id_vars = exprs(FAGRPID) - ) - - expect_dfs_equal(expected_output, actual_output, keys = c("USUBJID", "CMDECOD", "ATC4CD")) -}) - -## Test 5: ATC variables are merged properly ---- -test_that("derive_vars_transposed Test 5: ATC variables are merged properly", { - cm <- tibble::tribble( - ~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD, - "STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL", - "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", - "STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE" - ) - facm <- tibble::tribble( - ~STUDYID, ~USUBJID, ~FAGRPID, ~FAREFID, ~FATESTCD, ~FASTRESC, - "STUDY01", "BP40257-1001", "1", "1192056", "CMATC1CD", "N", - "STUDY01", "BP40257-1001", "1", "1192056", "CMATC2CD", "N02", - "STUDY01", "BP40257-1001", "1", "1192056", "CMATC3CD", "N02B", - "STUDY01", "BP40257-1001", "1", "1192056", "CMATC4CD", "N02BE", - "STUDY01", "BP40257-1001", "1", "2007001", "CMATC1CD", "D", - "STUDY01", "BP40257-1001", "1", "2007001", "CMATC2CD", "D10", - "STUDY01", "BP40257-1001", "1", "2007001", "CMATC3CD", "D10A", - "STUDY01", "BP40257-1001", "1", "2007001", "CMATC4CD", "D10AA", - "STUDY01", "BP40257-1001", "2", "2007001", "CMATC1CD", "D", - "STUDY01", "BP40257-1001", "2", "2007001", "CMATC2CD", "D07", - "STUDY01", "BP40257-1001", "2", "2007001", "CMATC3CD", "D07A", - "STUDY01", "BP40257-1001", "2", "2007001", "CMATC4CD", "D07AA", - "STUDY01", "BP40257-1001", "3", "2007001", "CMATC1CD", "H", - "STUDY01", "BP40257-1001", "3", "2007001", "CMATC2CD", "H02", - "STUDY01", "BP40257-1001", "3", "2007001", "CMATC3CD", "H02A", - "STUDY01", "BP40257-1001", "3", "2007001", "CMATC4CD", "H02AB", - "STUDY01", "BP40257-1002", "1", "2791596", "CMATC1CD", "C", - "STUDY01", "BP40257-1002", "1", "2791596", "CMATC2CD", "C03", - "STUDY01", "BP40257-1002", "1", "2791596", "CMATC3CD", "C03D", - "STUDY01", "BP40257-1002", "1", "2791596", "CMATC4CD", "C03DA" - ) - # nolint start - expected_output <- tibble::tribble( - ~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD, ~ATC1CD, ~ATC2CD, ~ATC3CD, ~ATC4CD, - "STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL", "N", "N02", "N02B", "N02BE", - "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", "D", "D07", "D07A", "D07AA", - "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", "D", "D10", "D10A", "D10AA", - "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", "H", "H02", "H02A", "H02AB", - "STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE", "C", "C03", "C03D", "C03DA" - ) - # nolint end - actual_output <- derive_vars_atc( - dataset = cm, - dataset_facm = facm, - id_vars = exprs(FAGRPID) - ) - - expect_dfs_equal( - expected_output, - actual_output, - keys = c("STUDYID", "USUBJID", "CMDECOD", "ATC4CD") - ) -}) -## Test 6: `Relationship` argument handled by left_join ---- -test_that("derive_vars_transposed Test 6: left_join throws error when argument specified by - `relationship` is incorrect", { - cm <- tibble::tribble( - ~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD, - "STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL", - "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", - "STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE" - ) - facm <- tibble::tribble( - ~STUDYID, ~USUBJID, ~FAGRPID, ~FAREFID, ~FATESTCD, ~FASTRESC, - "STUDY01", "BP40257-1001", "1", "1192056", "CMATC1CD", "N", - "STUDY01", "BP40257-1001", "1", "1192056", "CMATC2CD", "N02", - "STUDY01", "BP40257-1001", "1", "1192056", "CMATC3CD", "N02B", - "STUDY01", "BP40257-1001", "1", "1192056", "CMATC4CD", "N02BE", - "STUDY01", "BP40257-1001", "1", "2007001", "CMATC1CD", "D", - "STUDY01", "BP40257-1001", "1", "2007001", "CMATC2CD", "D10", - "STUDY01", "BP40257-1001", "1", "2007001", "CMATC3CD", "D10A", - "STUDY01", "BP40257-1001", "1", "2007001", "CMATC4CD", "D10AA", - "STUDY01", "BP40257-1001", "2", "2007001", "CMATC1CD", "D", - "STUDY01", "BP40257-1001", "2", "2007001", "CMATC2CD", "D07", - "STUDY01", "BP40257-1001", "2", "2007001", "CMATC3CD", "D07A", - "STUDY01", "BP40257-1001", "2", "2007001", "CMATC4CD", "D07AA", - "STUDY01", "BP40257-1001", "3", "2007001", "CMATC1CD", "H", - "STUDY01", "BP40257-1001", "3", "2007001", "CMATC2CD", "H02", - "STUDY01", "BP40257-1001", "3", "2007001", "CMATC3CD", "H02A", - "STUDY01", "BP40257-1001", "3", "2007001", "CMATC4CD", "H02AB", - "STUDY01", "BP40257-1002", "1", "2791596", "CMATC1CD", "C", - "STUDY01", "BP40257-1002", "1", "2791596", "CMATC2CD", "C03", - "STUDY01", "BP40257-1002", "1", "2791596", "CMATC3CD", "C03D", - "STUDY01", "BP40257-1002", "1", "2791596", "CMATC4CD", "C03DA" - ) - - dup <- tibble( - STUDYID = "STUDYID01", - USUBJID = "BP40257-1001", - FAGRPID = "1", - FAREFID = "1192056" - ) - - facm1 <- bind_rows(facm, dup) - - expect_no_error( - cm %>% - derive_vars_transposed( - facm, - by_vars = exprs(USUBJID, CMREFID = FAREFID), - id_vars = exprs(FAGRPID), - key_var = FATESTCD, - value_var = FASTRESC, - relationship = "one-to-many" - ) %>% - select(USUBJID, CMDECOD, starts_with("CMATC")) - ) - - expect_error( + expect_snapshot( cm %>% derive_vars_transposed( facm, @@ -235,10 +108,11 @@ test_that("derive_vars_transposed Test 6: left_join throws error when argument s key_var = FATESTCD, value_var = FASTRESC, relationship = "one-to-one" - ) %>% - select(USUBJID, CMDECOD, starts_with("CMATC")) + ), + error = TRUE ) - expect_error( + + expect_snapshot( cm %>% derive_vars_transposed( facm, @@ -247,20 +121,7 @@ test_that("derive_vars_transposed Test 6: left_join throws error when argument s key_var = FATESTCD, value_var = FASTRESC, relationship = "many-to-one" - ) %>% - select(USUBJID, CMDECOD, starts_with("CMATC")) - ) - - expect_error( - cm %>% - derive_vars_transposed( - facm1, - by_vars = exprs(USUBJID, CMREFID = FAREFID), - id_vars = exprs(FAGRPID), - key_var = FATESTCD, - value_var = FASTRESC, - relationship = "one-to-one" - ) %>% - select(USUBJID, CMDECOD, starts_with("CMATC")) + ), + error = TRUE ) })