From 8e23388015eb44c9ebcf1d625852bac0e0d5f4ea Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Sun, 17 Nov 2024 19:52:38 -0500 Subject: [PATCH 01/23] Added order arguments to censor_source and event_source. Also added signal_duplicate_records to derive_param_tte. Still troubleshooting the test-derive_param_tte script. Failed tests have a "Required variable `AEDECOD` is missing in `dataset`" error. --- R/derive_joined.R | 2 +- R/derive_param_tte.R | 19 +- tests/testthat/_snaps/derive_param_tte.new.md | 54 +++++ tests/testthat/test-derive_param_tte.R | 205 ++++++++++++++++++ 4 files changed, 274 insertions(+), 6 deletions(-) create mode 100644 tests/testthat/_snaps/derive_param_tte.new.md diff --git a/R/derive_joined.R b/R/derive_joined.R index 5af5edf546..be469246d1 100644 --- a/R/derive_joined.R +++ b/R/derive_joined.R @@ -478,7 +478,7 @@ derive_vars_joined <- function(dataset, derive_var_obs_number( new_var = !!tmp_obs_nr, by_vars = by_vars_left, - check_type = "none" + "none" ) data_joined <- get_joined_data( diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index daa9f9af4d..3915eec8ee 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -322,7 +322,12 @@ derive_param_tte <- function(dataset = NULL, censor_conditions, create_datetime = FALSE, set_values_to, - subject_keys = get_admiral_option("subject_keys")) { + subject_keys = get_admiral_option("subject_keys"), + check_type = "warning") { + #check for duplicates in dataset + signal_duplicate_records(dataset = dataset_adsl, + by_vars = expr_c(subject_keys, by_vars), + cnd_type = check_type) # checking and quoting # assert_data_frame(dataset, optional = TRUE) assert_vars(by_vars, optional = TRUE) @@ -844,13 +849,15 @@ tte_source <- function(dataset_name, event_source <- function(dataset_name, filter = NULL, date, - set_values_to = NULL) { + set_values_to = NULL, + order = NULL) { out <- tte_source( dataset_name = assert_character_scalar(dataset_name), filter = !!enexpr(filter), date = !!assert_expr(enexpr(date)), censor = 0, - set_values_to = set_values_to + set_values_to = set_values_to, + order = order ) class(out) <- c("event_source", class(out)) out @@ -891,13 +898,15 @@ censor_source <- function(dataset_name, filter = NULL, date, censor = 1, - set_values_to = NULL) { + set_values_to = NULL, + order = NULL) { out <- tte_source( dataset_name = assert_character_scalar(dataset_name), filter = !!enexpr(filter), date = !!assert_expr(enexpr(date)), censor = assert_integer_scalar(censor, subset = "positive"), - set_values_to = set_values_to + set_values_to = set_values_to, + order = order ) class(out) <- c("censor_source", class(out)) out diff --git a/tests/testthat/_snaps/derive_param_tte.new.md b/tests/testthat/_snaps/derive_param_tte.new.md new file mode 100644 index 0000000000..28161a68d1 --- /dev/null +++ b/tests/testthat/_snaps/derive_param_tte.new.md @@ -0,0 +1,54 @@ +# derive_param_tte Test 6: an error is issued if some of the by variables are missing + + Code + derive_param_tte(dataset_adsl = adsl, by_vars = exprs(AEBODSYS, AEDECOD), + start_date = TRTSDT, event_conditions = list(ttae), censor_conditions = list( + eos), source_datasets = list(adsl = adsl, ae = ae), set_values_to = exprs( + PARAMCD = paste0("TTAE", as.numeric(as.factor(AEDECOD))), PARAM = paste( + "Time to First", AEDECOD, "Adverse Event"), PARCAT1 = "TTAE", PARCAT2 = AEDECOD)) + Condition + Error in `signal_duplicate_records()`: + ! Required variables `AEBODSYS` and `AEDECOD` are missing in `dataset` + +# derive_param_tte Test 7: errors if all by vars are missing in all source datasets + + Code + derive_param_tte(dataset_adsl = adsl, by_vars = exprs(AEBODSYS), start_date = TRTSDT, + event_conditions = list(ttae), censor_conditions = list(eos), source_datasets = list( + adsl = adsl, ae = ae), set_values_to = exprs(PARAMCD = paste0("TTAE", + as.numeric(as.factor(AEDECOD))), PARAM = paste("Time to First", AEDECOD, + "Adverse Event"), PARCAT1 = "TTAE", PARCAT2 = AEDECOD)) + Condition + Error in `signal_duplicate_records()`: + ! Required variable `AEBODSYS` is missing in `dataset` + +# derive_param_tte Test 8: errors if PARAMCD and by_vars are not one to one + + Code + derive_param_tte(dataset_adsl = adsl, by_vars = exprs(AEDECOD), start_date = TRTSDT, + event_conditions = list(ttae), censor_conditions = list(eos), source_datasets = list( + adsl = adsl, ae = ae), set_values_to = exprs(PARAMCD = "TTAE", PARCAT2 = AEDECOD)) + Condition + Error in `signal_duplicate_records()`: + ! Required variable `AEDECOD` is missing in `dataset` + +# derive_param_tte Test 9: errors if set_values_to contains invalid expressions + + Code + derive_param_tte(dataset_adsl = adsl, by_vars = exprs(AEDECOD), start_date = TRTSDT, + event_conditions = list(ttae), censor_conditions = list(eos), source_datasets = list( + adsl = adsl, ae = ae), set_values_to = exprs(PARAMCD = paste0("TTAE", + as.numeric(as.factor(AEDECOD))), PARAM = past("Time to First", AEDECOD, + "Adverse Event"), PARCAT1 = "TTAE", PARCAT2 = AEDECOD)) + Condition + Error in `signal_duplicate_records()`: + ! Required variable `AEDECOD` is missing in `dataset` + +# list_tte_source_objects Test 13: error is issued if package does not exist + + Code + list_tte_source_objects(package = "tte") + Condition + Error in `list_tte_source_objects()`: + ! No package called tte is installed and hence no objects are available. + diff --git a/tests/testthat/test-derive_param_tte.R b/tests/testthat/test-derive_param_tte.R index 0feeb4b236..0e7d19b0d5 100644 --- a/tests/testthat/test-derive_param_tte.R +++ b/tests/testthat/test-derive_param_tte.R @@ -928,3 +928,208 @@ test_that("list_tte_source_objects Test 14: expected objects produced", { expect_dfs_equal(expected_output, observed_output, keys = c("object")) }) + +# Test 15: "derive_param_tte detects duplicates when check_type = 'warning'` +test_that("derive_param_tte detects duplicates when check_type = 'warning'", { + # Define ADSL dataset + adsl <- tibble::tribble( + ~USUBJID, ~TRTSDT, ~TRTEDT, ~EOSDT, + "01", ymd("2020-12-06"), ymd("2021-03-02"), ymd("2021-03-06"), + "02", ymd("2021-01-16"), ymd("2021-01-20"), ymd("2021-02-03") + ) %>% + mutate(STUDYID = "AB42") + + # Define AE dataset with duplicates + ae <- tibble::tribble( + ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, + "01", "2021-01-03", 1, "Flu", + "01", "2021-03-04", 2, "Cough", + "01", "2021-01-03", 3, "Flu" + ) %>% + mutate( + STUDYID = "AB42", + AESTDT = ymd(AESTDTC) + ) + + # Define event source + ttae <- event_source( + dataset_name = "ae", + date = AESTDT, + set_values_to = exprs( + EVENTDESC = "AE", + SRCDOM = "AE", + SRCVAR = "AESTDTC", + SRCSEQ = AESEQ + ) + ) + + # Define censor source + eot <- censor_source( + dataset_name = "adsl", + date = pmin(TRTEDT + days(10), EOSDT), + censor = 1, + set_values_to = exprs( + EVENTDESC = "END OF TRT", + SRCDOM = "ADSL", + SRCVAR = "TRTEDT" + ) + ) + + # Test for duplicate detection + expect_warning( + derive_param_tte( + dataset_adsl = adsl, + start_date = TRTSDT, + event_conditions = list(ttae), + censor_conditions = list(eot), + source_datasets = list(adsl = adsl, ae = ae), + set_values_to = exprs(PARAMCD = "TTAE"), + check_type = "warning" + ), + regexp = "Dataset contains duplicate records" + ) +}) + +# Test 16: "derive_param_tte produces consistent results regardless of input sort order" +test_that("derive_param_tte produces consistent results regardless of input sort order", { + # Define ADSL dataset + adsl <- tibble::tribble( + ~USUBJID, ~TRTSDT, ~TRTEDT, ~EOSDT, + "01", ymd("2020-12-06"), ymd("2021-03-02"), ymd("2021-03-06"), + "02", ymd("2021-01-16"), ymd("2021-01-20"), ymd("2021-02-03") + ) %>% + mutate(STUDYID = "AB42") + + # Define AE dataset + ae <- tibble::tribble( + ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, + "01", "2021-01-03", 1, "Flu", + "01", "2021-03-04", 2, "Cough", + "01", "2021-01-03", 3, "Flu" + ) %>% + mutate( + STUDYID = "AB42", + AESTDT = ymd(AESTDTC) + ) + + # Define event source with order + ttae <- event_source( + dataset_name = "ae", + date = AESTDT, + set_values_to = exprs( + EVENTDESC = "AE", + SRCDOM = "AE", + SRCVAR = "AESTDTC", + SRCSEQ = AESEQ + ), + order = exprs(AESEQ) + ) + + # Define censor source with order + eot <- censor_source( + dataset_name = "adsl", + date = pmin(TRTEDT + days(10), EOSDT), + censor = 1, + set_values_to = exprs( + EVENTDESC = "END OF TRT", + SRCDOM = "ADSL", + SRCVAR = "TRTEDT" + ), + order = exprs(TRTEDT) + ) + + # Run derive_param_tte with sorted AE dataset + result_sorted <- derive_param_tte( + dataset_adsl = adsl, + start_date = TRTSDT, + event_conditions = list(ttae), + censor_conditions = list(eot), + source_datasets = list(adsl = adsl, ae = arrange(ae, AESEQ)), + set_values_to = exprs(PARAMCD = "TTAE") + ) + + # Run derive_param_tte with reverse-sorted AE dataset + result_unsorted <- derive_param_tte( + dataset_adsl = adsl, + start_date = TRTSDT, + event_conditions = list(ttae), + censor_conditions = list(eot), + source_datasets = list(adsl = adsl, ae = arrange(ae, desc(AESEQ))), + set_values_to = exprs(PARAMCD = "TTAE") + ) + + # Validate that the results are the same + expect_equal(result_sorted, result_unsorted, ignore_attr = TRUE) +}) + +# Test 17: "derive_param_tte produces expected output for common scenario" +test_that("derive_param_tte produces expected output for common scenario", { + # Define ADSL dataset + adsl <- tibble::tribble( + ~USUBJID, ~TRTSDT, ~TRTEDT, ~EOSDT, + "01", ymd("2020-12-06"), ymd("2021-03-02"), ymd("2021-03-06"), + "02", ymd("2021-01-16"), ymd("2021-01-20"), ymd("2021-02-03") + ) %>% + mutate(STUDYID = "AB42") + + # Define AE dataset + ae <- tibble::tribble( + ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, + "01", "2021-01-03", 1, "Flu", + "01", "2021-03-04", 2, "Cough" + ) %>% + mutate( + STUDYID = "AB42", + AESTDT = ymd(AESTDTC) + ) + + # Define event and censor sources + ttae <- event_source( + dataset_name = "ae", + date = AESTDT, + set_values_to = exprs( + EVENTDESC = "AE", + SRCDOM = "AE", + SRCVAR = "AESTDTC", + SRCSEQ = AESEQ + ) + ) + + eot <- censor_source( + dataset_name = "adsl", + date = pmin(TRTEDT + days(10), EOSDT), + censor = 1, + set_values_to = exprs( + EVENTDESC = "END OF TRT", + SRCDOM = "ADSL", + SRCVAR = "TRTEDT" + ) + ) + + # Run derive_param_tte + result <- derive_param_tte( + dataset_adsl = adsl, + start_date = TRTSDT, + event_conditions = list(ttae), + censor_conditions = list(eot), + source_datasets = list(adsl = adsl, ae = ae), + set_values_to = exprs(PARAMCD = "TTAE") + ) + + # Expected result + expected <- tibble::tibble( + USUBJID = c("01", "02"), + STUDYID = "AB42", + EVENTDESC = c("AE", "END OF TRT"), + SRCDOM = c("AE", "ADSL"), + SRCVAR = c("AESTDTC", "TRTEDT"), + SRCSEQ = c(1, NA), + CNSR = c(0, 1), + ADT = as.Date(c("2021-01-03", "2021-01-30")), + STARTDT = as.Date(c("2020-12-06", "2021-01-16")), + PARAMCD = "TTAE" + ) + + # Validate output + expect_equal(result, expected, ignore_attr = TRUE) +}) From cd5280176e2d1ca4b8223634e73f7a1974a15ec4 Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Sun, 17 Nov 2024 20:19:36 -0500 Subject: [PATCH 02/23] Added order argument to tte_source as part of development and error fixes. --- R/derive_param_tte.R | 6 ++++-- man/censor_source.Rd | 3 ++- man/derive_param_tte.Rd | 3 ++- man/event_source.Rd | 8 +++++++- man/tte_source.Rd | 9 ++++++++- 5 files changed, 23 insertions(+), 6 deletions(-) diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index 3915eec8ee..063d1d98b8 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -798,7 +798,8 @@ tte_source <- function(dataset_name, filter = NULL, date, censor = 0, - set_values_to = NULL) { + set_values_to = NULL, + order = order) { out <- list( dataset_name = assert_character_scalar(dataset_name), filter = assert_filter_cond(enexpr(filter), optional = TRUE), @@ -808,7 +809,8 @@ tte_source <- function(dataset_name, set_values_to, named = TRUE, optional = TRUE - ) + ), + order = order ) class(out) <- c("tte_source", "source", "list") out diff --git a/man/censor_source.Rd b/man/censor_source.Rd index 263ac16d6a..fdbaeece4e 100644 --- a/man/censor_source.Rd +++ b/man/censor_source.Rd @@ -9,7 +9,8 @@ censor_source( filter = NULL, date, censor = 1, - set_values_to = NULL + set_values_to = NULL, + order = NULL ) } \arguments{ diff --git a/man/derive_param_tte.Rd b/man/derive_param_tte.Rd index 7dadeae6f8..e3825464f0 100644 --- a/man/derive_param_tte.Rd +++ b/man/derive_param_tte.Rd @@ -14,7 +14,8 @@ derive_param_tte( censor_conditions, create_datetime = FALSE, set_values_to, - subject_keys = get_admiral_option("subject_keys") + subject_keys = get_admiral_option("subject_keys"), + check_type = "warning" ) } \arguments{ diff --git a/man/event_source.Rd b/man/event_source.Rd index 03e7b4913c..88bb6701dc 100644 --- a/man/event_source.Rd +++ b/man/event_source.Rd @@ -4,7 +4,13 @@ \alias{event_source} \title{Create an \code{event_source} Object} \usage{ -event_source(dataset_name, filter = NULL, date, set_values_to = NULL) +event_source( + dataset_name, + filter = NULL, + date, + set_values_to = NULL, + order = NULL +) } \arguments{ \item{dataset_name}{The name of the source dataset diff --git a/man/tte_source.Rd b/man/tte_source.Rd index f63a36d51d..bc007598f2 100644 --- a/man/tte_source.Rd +++ b/man/tte_source.Rd @@ -4,7 +4,14 @@ \alias{tte_source} \title{Create a \code{tte_source} Object} \usage{ -tte_source(dataset_name, filter = NULL, date, censor = 0, set_values_to = NULL) +tte_source( + dataset_name, + filter = NULL, + date, + censor = 0, + set_values_to = NULL, + order = order +) } \arguments{ \item{dataset_name}{The name of the source dataset From 2727736e49abaa141c326317698e3579631f2963 Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Sun, 17 Nov 2024 21:54:21 -0500 Subject: [PATCH 03/23] Fixed previous erros but still need to address failed tests for Test 9, 15, and 16 in test-derive_param_tte --- R/derive_param_tte.R | 4 +- tests/testthat/_snaps/derive_param_tte.md | 15 ------ tests/testthat/_snaps/derive_param_tte.new.md | 54 ------------------- tests/testthat/test-derive_param_tte.R | 2 +- 4 files changed, 4 insertions(+), 71 deletions(-) delete mode 100644 tests/testthat/_snaps/derive_param_tte.new.md diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index 063d1d98b8..9372c44e0f 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -324,9 +324,11 @@ derive_param_tte <- function(dataset = NULL, set_values_to, subject_keys = get_admiral_option("subject_keys"), check_type = "warning") { + #filter 'by_vars' to include variables present in dataset_adsl + filtered_by_vars <- by_vars[by_vars %in% colnames(dataset_adsl)] #check for duplicates in dataset signal_duplicate_records(dataset = dataset_adsl, - by_vars = expr_c(subject_keys, by_vars), + by_vars = expr_c(filtered_by_vars, subject_keys), cnd_type = check_type) # checking and quoting # assert_data_frame(dataset, optional = TRUE) diff --git a/tests/testthat/_snaps/derive_param_tte.md b/tests/testthat/_snaps/derive_param_tte.md index 345583b324..b2644e93db 100644 --- a/tests/testthat/_snaps/derive_param_tte.md +++ b/tests/testthat/_snaps/derive_param_tte.md @@ -34,21 +34,6 @@ ! For some values of "PARAMCD" there is more than one value of "AEDECOD" i Call `admiral::get_one_to_many_dataset()` to get all one-to-many values. -# derive_param_tte Test 9: errors if set_values_to contains invalid expressions - - Code - derive_param_tte(dataset_adsl = adsl, by_vars = exprs(AEDECOD), start_date = TRTSDT, - event_conditions = list(ttae), censor_conditions = list(eos), source_datasets = list( - adsl = adsl, ae = ae), set_values_to = exprs(PARAMCD = paste0("TTAE", - as.numeric(as.factor(AEDECOD))), PARAM = past("Time to First", AEDECOD, - "Adverse Event"), PARCAT1 = "TTAE", PARCAT2 = AEDECOD)) - Condition - Error in `process_set_values_to()`: - ! Assigning variables failed! - * `set_values_to = exprs(PARAMCD = paste0("TTAE", as.numeric(as.factor(AEDECOD))), PARAM = past("Time to First", AEDECOD, "Adverse Event"), PARCAT1 = TTAE, PARCAT2 = AEDECOD)` - See error message below: - i In argument: `PARAM = past("Time to First", AEDECOD, "Adverse Event")`. Caused by error in `past()`: ! could not find function "past" - # list_tte_source_objects Test 13: error is issued if package does not exist Code diff --git a/tests/testthat/_snaps/derive_param_tte.new.md b/tests/testthat/_snaps/derive_param_tte.new.md deleted file mode 100644 index 28161a68d1..0000000000 --- a/tests/testthat/_snaps/derive_param_tte.new.md +++ /dev/null @@ -1,54 +0,0 @@ -# derive_param_tte Test 6: an error is issued if some of the by variables are missing - - Code - derive_param_tte(dataset_adsl = adsl, by_vars = exprs(AEBODSYS, AEDECOD), - start_date = TRTSDT, event_conditions = list(ttae), censor_conditions = list( - eos), source_datasets = list(adsl = adsl, ae = ae), set_values_to = exprs( - PARAMCD = paste0("TTAE", as.numeric(as.factor(AEDECOD))), PARAM = paste( - "Time to First", AEDECOD, "Adverse Event"), PARCAT1 = "TTAE", PARCAT2 = AEDECOD)) - Condition - Error in `signal_duplicate_records()`: - ! Required variables `AEBODSYS` and `AEDECOD` are missing in `dataset` - -# derive_param_tte Test 7: errors if all by vars are missing in all source datasets - - Code - derive_param_tte(dataset_adsl = adsl, by_vars = exprs(AEBODSYS), start_date = TRTSDT, - event_conditions = list(ttae), censor_conditions = list(eos), source_datasets = list( - adsl = adsl, ae = ae), set_values_to = exprs(PARAMCD = paste0("TTAE", - as.numeric(as.factor(AEDECOD))), PARAM = paste("Time to First", AEDECOD, - "Adverse Event"), PARCAT1 = "TTAE", PARCAT2 = AEDECOD)) - Condition - Error in `signal_duplicate_records()`: - ! Required variable `AEBODSYS` is missing in `dataset` - -# derive_param_tte Test 8: errors if PARAMCD and by_vars are not one to one - - Code - derive_param_tte(dataset_adsl = adsl, by_vars = exprs(AEDECOD), start_date = TRTSDT, - event_conditions = list(ttae), censor_conditions = list(eos), source_datasets = list( - adsl = adsl, ae = ae), set_values_to = exprs(PARAMCD = "TTAE", PARCAT2 = AEDECOD)) - Condition - Error in `signal_duplicate_records()`: - ! Required variable `AEDECOD` is missing in `dataset` - -# derive_param_tte Test 9: errors if set_values_to contains invalid expressions - - Code - derive_param_tte(dataset_adsl = adsl, by_vars = exprs(AEDECOD), start_date = TRTSDT, - event_conditions = list(ttae), censor_conditions = list(eos), source_datasets = list( - adsl = adsl, ae = ae), set_values_to = exprs(PARAMCD = paste0("TTAE", - as.numeric(as.factor(AEDECOD))), PARAM = past("Time to First", AEDECOD, - "Adverse Event"), PARCAT1 = "TTAE", PARCAT2 = AEDECOD)) - Condition - Error in `signal_duplicate_records()`: - ! Required variable `AEDECOD` is missing in `dataset` - -# list_tte_source_objects Test 13: error is issued if package does not exist - - Code - list_tte_source_objects(package = "tte") - Condition - Error in `list_tte_source_objects()`: - ! No package called tte is installed and hence no objects are available. - diff --git a/tests/testthat/test-derive_param_tte.R b/tests/testthat/test-derive_param_tte.R index 0e7d19b0d5..104d177810 100644 --- a/tests/testthat/test-derive_param_tte.R +++ b/tests/testthat/test-derive_param_tte.R @@ -640,7 +640,7 @@ test_that("derive_param_tte Test 9: errors if set_values_to contains invalid exp source_datasets = list(adsl = adsl, ae = ae), set_values_to = exprs( PARAMCD = paste0("TTAE", as.numeric(as.factor(AEDECOD))), - PARAM = past("Time to First", AEDECOD, "Adverse Event"), + PARAM = paste("Time to First", AEDECOD, "Adverse Event"), PARCAT1 = "TTAE", PARCAT2 = AEDECOD ) From 9e8621708b1db5980fc544a157e16a5b34750f7f Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Sun, 17 Nov 2024 22:00:28 -0500 Subject: [PATCH 04/23] added check_type arg_match to derive_param_tte so user has to input a valid argument --- R/derive_param_tte.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index 9372c44e0f..815c16e7a6 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -324,8 +324,13 @@ derive_param_tte <- function(dataset = NULL, set_values_to, subject_keys = get_admiral_option("subject_keys"), check_type = "warning") { - #filter 'by_vars' to include variables present in dataset_adsl + + # Match check_type to valid admiral options + check_type <- rlang::arg_match(check_type, c("warning", "error", "none")) + + #filter 'by_vars' to only include variables present in dataset_adsl filtered_by_vars <- by_vars[by_vars %in% colnames(dataset_adsl)] + #check for duplicates in dataset signal_duplicate_records(dataset = dataset_adsl, by_vars = expr_c(filtered_by_vars, subject_keys), From d97377c97f795ae26319bc3234c0aa62854e92a0 Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Sun, 17 Nov 2024 22:54:55 -0500 Subject: [PATCH 05/23] Changed position of signal_duplicate_records function in derive_param_tte to fix missing data error --- R/derive_param_tte.R | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index 815c16e7a6..b84c8b6216 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -326,16 +326,9 @@ derive_param_tte <- function(dataset = NULL, check_type = "warning") { # Match check_type to valid admiral options - check_type <- rlang::arg_match(check_type, c("warning", "error", "none")) - - #filter 'by_vars' to only include variables present in dataset_adsl - filtered_by_vars <- by_vars[by_vars %in% colnames(dataset_adsl)] - - #check for duplicates in dataset - signal_duplicate_records(dataset = dataset_adsl, - by_vars = expr_c(filtered_by_vars, subject_keys), - cnd_type = check_type) - # checking and quoting # + check_type <- rlang::arg_match(check_type, c("warning", "message", "error", "none")) + + # checking and quoting # assert_data_frame(dataset, optional = TRUE) assert_vars(by_vars, optional = TRUE) start_date <- assert_symbol(enexpr(start_date)) @@ -387,6 +380,7 @@ derive_param_tte <- function(dataset = NULL, } tmp_event <- get_new_tmp_var(dataset) + # determine events # event_data <- filter_date_sources( sources = event_conditions, @@ -398,6 +392,11 @@ derive_param_tte <- function(dataset = NULL, ) %>% mutate(!!tmp_event := 1L) + #check for duplicates in event_data + signal_duplicate_records(dataset = event_data, + by_vars = expr_c(by_vars, subject_keys), + cnd_type = check_type) + # determine censoring observations # censor_data <- filter_date_sources( sources = censor_conditions, @@ -409,6 +408,11 @@ derive_param_tte <- function(dataset = NULL, ) %>% mutate(!!tmp_event := 0L) + #check for duplicates in censor_data + signal_duplicate_records(dataset = censor_data, + by_vars = expr_c(by_vars, subject_keys), + cnd_type = check_type) + # determine variable to add from ADSL # if (create_datetime) { date_var <- sym("ADTM") @@ -475,7 +479,7 @@ derive_param_tte <- function(dataset = NULL, } } - # add new parameter to input dataset # + # add new parameter to input dataset # bind_rows(dataset, new_param) } From fa49a51f04fdf822ceefc3ad0de5f076d3123a66 Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Sun, 17 Nov 2024 23:55:56 -0500 Subject: [PATCH 06/23] lintr changes by removing whitespace. --- R/derive_param_tte.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index b84c8b6216..169ee1df13 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -324,10 +324,8 @@ derive_param_tte <- function(dataset = NULL, set_values_to, subject_keys = get_admiral_option("subject_keys"), check_type = "warning") { - - # Match check_type to valid admiral options +# Match check_type to valid admiral options check_type <- rlang::arg_match(check_type, c("warning", "message", "error", "none")) - # checking and quoting # assert_data_frame(dataset, optional = TRUE) assert_vars(by_vars, optional = TRUE) From 01e8f5a21de07a822ee75c77988226f65ac4f185 Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Mon, 18 Nov 2024 11:57:09 -0500 Subject: [PATCH 07/23] styler fix. Pushing again and confirmed check_type argument is in derive_var_obs_number in derive_joined.R scripts --- R/derive_joined.R | 2 +- R/derive_param_tte.R | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/R/derive_joined.R b/R/derive_joined.R index be469246d1..5af5edf546 100644 --- a/R/derive_joined.R +++ b/R/derive_joined.R @@ -478,7 +478,7 @@ derive_vars_joined <- function(dataset, derive_var_obs_number( new_var = !!tmp_obs_nr, by_vars = by_vars_left, - "none" + check_type = "none" ) data_joined <- get_joined_data( diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index 169ee1df13..cf89c7477f 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -324,9 +324,9 @@ derive_param_tte <- function(dataset = NULL, set_values_to, subject_keys = get_admiral_option("subject_keys"), check_type = "warning") { -# Match check_type to valid admiral options + # Match check_type to valid admiral options check_type <- rlang::arg_match(check_type, c("warning", "message", "error", "none")) - # checking and quoting # + # checking and quoting # assert_data_frame(dataset, optional = TRUE) assert_vars(by_vars, optional = TRUE) start_date <- assert_symbol(enexpr(start_date)) @@ -390,10 +390,12 @@ derive_param_tte <- function(dataset = NULL, ) %>% mutate(!!tmp_event := 1L) - #check for duplicates in event_data - signal_duplicate_records(dataset = event_data, + # check for duplicates in event_data + signal_duplicate_records( + dataset = event_data, by_vars = expr_c(by_vars, subject_keys), - cnd_type = check_type) + cnd_type = check_type + ) # determine censoring observations # censor_data <- filter_date_sources( @@ -406,10 +408,12 @@ derive_param_tte <- function(dataset = NULL, ) %>% mutate(!!tmp_event := 0L) - #check for duplicates in censor_data - signal_duplicate_records(dataset = censor_data, + # check for duplicates in censor_data + signal_duplicate_records( + dataset = censor_data, by_vars = expr_c(by_vars, subject_keys), - cnd_type = check_type) + cnd_type = check_type + ) # determine variable to add from ADSL # if (create_datetime) { @@ -477,7 +481,7 @@ derive_param_tte <- function(dataset = NULL, } } - # add new parameter to input dataset # + # add new parameter to input dataset # bind_rows(dataset, new_param) } From 53457c24d96e7808594a6dd8621fe0f9fc7a39c7 Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Tue, 19 Nov 2024 10:54:50 -0500 Subject: [PATCH 08/23] updated NEWS.md with changes to derive_param_tte,. Removed Test 17 from test-derive_param_tte as it was redundant, and ran pharmaverse4devs format test script addin to format testest-derive_param_tte. --- NEWS.md | 7 ++ tests/testthat/_snaps/derive_param_tte.md | 15 ++++ tests/testthat/test-derive_param_tte.R | 87 +++-------------------- 3 files changed, 30 insertions(+), 79 deletions(-) diff --git a/NEWS.md b/NEWS.md index 52632e3bef..9dae1b8574 100644 --- a/NEWS.md +++ b/NEWS.md @@ -41,6 +41,13 @@ # admiral 1.1.1 +- `check_type = "warning"` default argument added to `derive_param_tte` with an + `arg_match` function within the function so the user can use a valid input of + `error, message, warning, or none`. `signal_duplicate_records()` has also been + added to the function on lines 394 and 411 to check for uniqueness of records. (#2481) + +- `order()` function has been added to `event_source()` and `censor_source()` and + defaulted to `NULL` to allow sorting of input data. (#2481) - `derive_extreme_event()` was fixed such that `check_type = "none"` is accepted again. (#2462) diff --git a/tests/testthat/_snaps/derive_param_tte.md b/tests/testthat/_snaps/derive_param_tte.md index b2644e93db..345583b324 100644 --- a/tests/testthat/_snaps/derive_param_tte.md +++ b/tests/testthat/_snaps/derive_param_tte.md @@ -34,6 +34,21 @@ ! For some values of "PARAMCD" there is more than one value of "AEDECOD" i Call `admiral::get_one_to_many_dataset()` to get all one-to-many values. +# derive_param_tte Test 9: errors if set_values_to contains invalid expressions + + Code + derive_param_tte(dataset_adsl = adsl, by_vars = exprs(AEDECOD), start_date = TRTSDT, + event_conditions = list(ttae), censor_conditions = list(eos), source_datasets = list( + adsl = adsl, ae = ae), set_values_to = exprs(PARAMCD = paste0("TTAE", + as.numeric(as.factor(AEDECOD))), PARAM = past("Time to First", AEDECOD, + "Adverse Event"), PARCAT1 = "TTAE", PARCAT2 = AEDECOD)) + Condition + Error in `process_set_values_to()`: + ! Assigning variables failed! + * `set_values_to = exprs(PARAMCD = paste0("TTAE", as.numeric(as.factor(AEDECOD))), PARAM = past("Time to First", AEDECOD, "Adverse Event"), PARCAT1 = TTAE, PARCAT2 = AEDECOD)` + See error message below: + i In argument: `PARAM = past("Time to First", AEDECOD, "Adverse Event")`. Caused by error in `past()`: ! could not find function "past" + # list_tte_source_objects Test 13: error is issued if package does not exist Code diff --git a/tests/testthat/test-derive_param_tte.R b/tests/testthat/test-derive_param_tte.R index 104d177810..abe98622e8 100644 --- a/tests/testthat/test-derive_param_tte.R +++ b/tests/testthat/test-derive_param_tte.R @@ -640,7 +640,7 @@ test_that("derive_param_tte Test 9: errors if set_values_to contains invalid exp source_datasets = list(adsl = adsl, ae = ae), set_values_to = exprs( PARAMCD = paste0("TTAE", as.numeric(as.factor(AEDECOD))), - PARAM = paste("Time to First", AEDECOD, "Adverse Event"), + PARAM = past("Time to First", AEDECOD, "Adverse Event"), PARCAT1 = "TTAE", PARCAT2 = AEDECOD ) @@ -929,8 +929,8 @@ test_that("list_tte_source_objects Test 14: expected objects produced", { expect_dfs_equal(expected_output, observed_output, keys = c("object")) }) -# Test 15: "derive_param_tte detects duplicates when check_type = 'warning'` -test_that("derive_param_tte detects duplicates when check_type = 'warning'", { +## Test 15: derive_param_tte detects duplicates when check_type = 'warning' ---- +test_that("list_tte_source_objects Test 15: detects duplicates when check_type = 'warning'", { # Define ADSL dataset adsl <- tibble::tribble( ~USUBJID, ~TRTSDT, ~TRTEDT, ~EOSDT, @@ -990,8 +990,9 @@ test_that("derive_param_tte detects duplicates when check_type = 'warning'", { ) }) -# Test 16: "derive_param_tte produces consistent results regardless of input sort order" -test_that("derive_param_tte produces consistent results regardless of input sort order", { +## Test 16: derive_param_tte produces consistent results regardless of input sort order ---- +test_that("list_tte_source_objects Test 16: derive_param_tte produces consistent results + regardless of input sort order", { # Define ADSL dataset adsl <- tibble::tribble( ~USUBJID, ~TRTSDT, ~TRTEDT, ~EOSDT, @@ -1020,7 +1021,7 @@ test_that("derive_param_tte produces consistent results regardless of input sort EVENTDESC = "AE", SRCDOM = "AE", SRCVAR = "AESTDTC", - SRCSEQ = AESEQ + SRCSEQ = "AESEQ" ), order = exprs(AESEQ) ) @@ -1059,77 +1060,5 @@ test_that("derive_param_tte produces consistent results regardless of input sort ) # Validate that the results are the same - expect_equal(result_sorted, result_unsorted, ignore_attr = TRUE) -}) - -# Test 17: "derive_param_tte produces expected output for common scenario" -test_that("derive_param_tte produces expected output for common scenario", { - # Define ADSL dataset - adsl <- tibble::tribble( - ~USUBJID, ~TRTSDT, ~TRTEDT, ~EOSDT, - "01", ymd("2020-12-06"), ymd("2021-03-02"), ymd("2021-03-06"), - "02", ymd("2021-01-16"), ymd("2021-01-20"), ymd("2021-02-03") - ) %>% - mutate(STUDYID = "AB42") - - # Define AE dataset - ae <- tibble::tribble( - ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, - "01", "2021-01-03", 1, "Flu", - "01", "2021-03-04", 2, "Cough" - ) %>% - mutate( - STUDYID = "AB42", - AESTDT = ymd(AESTDTC) - ) - - # Define event and censor sources - ttae <- event_source( - dataset_name = "ae", - date = AESTDT, - set_values_to = exprs( - EVENTDESC = "AE", - SRCDOM = "AE", - SRCVAR = "AESTDTC", - SRCSEQ = AESEQ - ) - ) - - eot <- censor_source( - dataset_name = "adsl", - date = pmin(TRTEDT + days(10), EOSDT), - censor = 1, - set_values_to = exprs( - EVENTDESC = "END OF TRT", - SRCDOM = "ADSL", - SRCVAR = "TRTEDT" - ) - ) - - # Run derive_param_tte - result <- derive_param_tte( - dataset_adsl = adsl, - start_date = TRTSDT, - event_conditions = list(ttae), - censor_conditions = list(eot), - source_datasets = list(adsl = adsl, ae = ae), - set_values_to = exprs(PARAMCD = "TTAE") - ) - - # Expected result - expected <- tibble::tibble( - USUBJID = c("01", "02"), - STUDYID = "AB42", - EVENTDESC = c("AE", "END OF TRT"), - SRCDOM = c("AE", "ADSL"), - SRCVAR = c("AESTDTC", "TRTEDT"), - SRCSEQ = c(1, NA), - CNSR = c(0, 1), - ADT = as.Date(c("2021-01-03", "2021-01-30")), - STARTDT = as.Date(c("2020-12-06", "2021-01-16")), - PARAMCD = "TTAE" - ) - - # Validate output - expect_equal(result, expected, ignore_attr = TRUE) + expect_dfs_equal(result_sorted, result_unsorted, keys = "USUBJID") }) From dccdbe1e71571cbaebcda8b062c7a6ab315ef8f3 Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Tue, 19 Nov 2024 11:42:43 -0500 Subject: [PATCH 09/23] changed the signal_duplicate_records within derive_parame_tte to handle dataset_adsl and source_datasets by combining them with bind_rows before to address error of AEDECOD missing from the dataset when just calling dataset_adsl. This starts on line 381 of derive_param_tte.R --- R/derive_param_tte.R | 30 ++++++++++------------- tests/testthat/_snaps/derive_param_tte.md | 6 +++++ 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index cf89c7477f..413774b9b3 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -326,7 +326,8 @@ derive_param_tte <- function(dataset = NULL, check_type = "warning") { # Match check_type to valid admiral options check_type <- rlang::arg_match(check_type, c("warning", "message", "error", "none")) - # checking and quoting # + + # checking and quoting # assert_data_frame(dataset, optional = TRUE) assert_vars(by_vars, optional = TRUE) start_date <- assert_symbol(enexpr(start_date)) @@ -377,6 +378,15 @@ derive_param_tte <- function(dataset = NULL, ) } + #check for duplicates in dataset_adsl and source_datasets + combined_dataset <- bind_rows(dataset_adsl, !!!source_datasets) + + signal_duplicate_records( + dataset = combined_dataset, + by_vars = expr_c(subject_keys, by_vars), + cnd_type = check_type + ) + tmp_event <- get_new_tmp_var(dataset) # determine events # @@ -390,14 +400,7 @@ derive_param_tte <- function(dataset = NULL, ) %>% mutate(!!tmp_event := 1L) - # check for duplicates in event_data - signal_duplicate_records( - dataset = event_data, - by_vars = expr_c(by_vars, subject_keys), - cnd_type = check_type - ) - - # determine censoring observations # + # determine censoring observations # censor_data <- filter_date_sources( sources = censor_conditions, source_datasets = source_datasets, @@ -408,14 +411,7 @@ derive_param_tte <- function(dataset = NULL, ) %>% mutate(!!tmp_event := 0L) - # check for duplicates in censor_data - signal_duplicate_records( - dataset = censor_data, - by_vars = expr_c(by_vars, subject_keys), - cnd_type = check_type - ) - - # determine variable to add from ADSL # + # determine variable to add from ADSL # if (create_datetime) { date_var <- sym("ADTM") start_var <- sym("STARTDTM") diff --git a/tests/testthat/_snaps/derive_param_tte.md b/tests/testthat/_snaps/derive_param_tte.md index 345583b324..c6dd1abb5b 100644 --- a/tests/testthat/_snaps/derive_param_tte.md +++ b/tests/testthat/_snaps/derive_param_tte.md @@ -30,6 +30,9 @@ event_conditions = list(ttae), censor_conditions = list(eos), source_datasets = list( adsl = adsl, ae = ae), set_values_to = exprs(PARAMCD = "TTAE", PARCAT2 = AEDECOD)) Condition + Warning: + Dataset contains duplicate records with respect to `STUDYID`, `USUBJID`, and `AEDECOD` + i Run `admiral::get_duplicates_dataset()` to access the duplicate records Error in `derive_param_tte()`: ! For some values of "PARAMCD" there is more than one value of "AEDECOD" i Call `admiral::get_one_to_many_dataset()` to get all one-to-many values. @@ -43,6 +46,9 @@ as.numeric(as.factor(AEDECOD))), PARAM = past("Time to First", AEDECOD, "Adverse Event"), PARCAT1 = "TTAE", PARCAT2 = AEDECOD)) Condition + Warning: + Dataset contains duplicate records with respect to `STUDYID`, `USUBJID`, and `AEDECOD` + i Run `admiral::get_duplicates_dataset()` to access the duplicate records Error in `process_set_values_to()`: ! Assigning variables failed! * `set_values_to = exprs(PARAMCD = paste0("TTAE", as.numeric(as.factor(AEDECOD))), PARAM = past("Time to First", AEDECOD, "Adverse Event"), PARCAT1 = TTAE, PARCAT2 = AEDECOD)` From 4c952431737adc4b33271ed9563481a55c1a9697 Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Thu, 21 Nov 2024 02:28:21 -0500 Subject: [PATCH 10/23] added a tryCatch() to filter_date_sources to catch duplicates to address failed runs in Test 16 of test-derive_param_tte. removed signal_duplicate_records() from within derive_param_tte Still need to troubleshoot errors in test script. --- R/derive_param_tte.R | 60 +++++++++++++---------- tests/testthat/_snaps/derive_param_tte.md | 14 ++---- tests/testthat/test-derive_param_tte.R | 2 +- 3 files changed, 38 insertions(+), 38 deletions(-) diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index 413774b9b3..f21539fece 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -377,19 +377,9 @@ derive_param_tte <- function(dataset = NULL, by_vars = by_vars ) } - - #check for duplicates in dataset_adsl and source_datasets - combined_dataset <- bind_rows(dataset_adsl, !!!source_datasets) - - signal_duplicate_records( - dataset = combined_dataset, - by_vars = expr_c(subject_keys, by_vars), - cnd_type = check_type - ) - tmp_event <- get_new_tmp_var(dataset) - # determine events # +# determine events # event_data <- filter_date_sources( sources = event_conditions, source_datasets = source_datasets, @@ -407,7 +397,8 @@ derive_param_tte <- function(dataset = NULL, by_vars = by_vars, create_datetime = create_datetime, subject_keys = subject_keys, - mode = "last" + mode = "last", + check_type = check_type ) %>% mutate(!!tmp_event := 0L) @@ -450,7 +441,8 @@ derive_param_tte <- function(dataset = NULL, bind_rows(event_data, censor_data), by_vars = expr_c(subject_keys, by_vars), order = exprs(!!tmp_event), - mode = "last" + mode = "last", + check_type = check_type ) %>% inner_join( adsl, @@ -460,7 +452,7 @@ derive_param_tte <- function(dataset = NULL, mutate(!!date_var := pmax(!!date_var, !!start_var, na.rm = TRUE)) %>% remove_tmp_vars() - if (!is.null(by_vars)) { + if (!is.null(by_vars)) { if (!is.null(set_values_to$PARAMCD)) { assert_one_to_one(new_param, exprs(PARAMCD), by_vars) } @@ -469,7 +461,7 @@ derive_param_tte <- function(dataset = NULL, new_param <- select(new_param, !!!negate_vars(by_vars)) } - # check newly created parameter(s) do not already exist + # check newly created parameter(s) do not already exist if (!is.null(set_values_to$PARAMCD) && !is.null(dataset)) { unique_params <- unique(new_param$PARAMCD) for (i in seq_along(unique_params)) { @@ -585,14 +577,16 @@ derive_param_tte <- function(dataset = NULL, #' by_vars = exprs(AEDECOD), #' create_datetime = FALSE, #' subject_keys = get_admiral_option("subject_keys"), -#' mode = "first" +#' mode = "first", +#' check_type = "none" #' ) filter_date_sources <- function(sources, source_datasets, by_vars, create_datetime = FALSE, subject_keys, - mode) { + mode, + check_type = "none") { assert_list_of(sources, "tte_source") assert_list_of(source_datasets, "data.frame") assert_logical_scalar(create_datetime) @@ -627,22 +621,34 @@ filter_date_sources <- function(sources, var = !!source_date_var, dataset_name = sources[[i]]$dataset_name ) - data[[i]] <- source_dataset %>% + # wrap filter_extreme in tryCatch to catch duplicate records and create a message + data[[i]] <- tryCatch( + { + source_dataset %>% filter_if(sources[[i]]$filter) %>% filter_extreme( order = exprs(!!source_date_var), by_vars = expr_c(subject_keys, by_vars), mode = mode, - check_type = "none" + check_type = check_type ) - - # add date variable and accompanying variables - - if (create_datetime) { - date_derv <- exprs(!!date_var := as_datetime(!!source_date_var)) - } else { - date_derv <- exprs(!!date_var := date(!!source_date_var)) + }, + warning = function(wrn) { + if (grepl("duplicate records", conditionMessage(wrn))) { + warning(sprintf( + "Duplicate records found in source dataset '%s': %s", + sources[[i]]$dataset_name, + conditionMessage(wrn) + ), call. = FALSE) } + } +) + # add date variable and accompanying variables + if (create_datetime) { + date_derv <- exprs(!!date_var := as_datetime(!!source_date_var)) + } else { + date_derv <- exprs(!!date_var := date(!!source_date_var)) + } data[[i]] <- mutate( data[[i]], @@ -663,7 +669,7 @@ filter_date_sources <- function(sources, by_vars = expr_c(subject_keys, by_vars), order = exprs(!!date_var), mode = mode, - check_type = "none" + check_type = check_type ) } diff --git a/tests/testthat/_snaps/derive_param_tte.md b/tests/testthat/_snaps/derive_param_tte.md index 3464fc3331..89031d4bb1 100644 --- a/tests/testthat/_snaps/derive_param_tte.md +++ b/tests/testthat/_snaps/derive_param_tte.md @@ -30,9 +30,6 @@ event_conditions = list(ttae), censor_conditions = list(eos), source_datasets = list( adsl = adsl, ae = ae), set_values_to = exprs(PARAMCD = "TTAE", PARCAT2 = AEDECOD)) Condition - Warning: - Dataset contains duplicate records with respect to `STUDYID`, `USUBJID`, and `AEDECOD` - i Run `admiral::get_duplicates_dataset()` to access the duplicate records Error in `derive_param_tte()`: ! For some values of "PARAMCD" there is more than one value of "AEDECOD" i Call `admiral::get_one_to_many_dataset()` to get all one-to-many values. @@ -46,9 +43,6 @@ as.numeric(as.factor(AEDECOD))), PARAM = past("Time to First", AEDECOD, "Adverse Event"), PARCAT1 = "TTAE", PARCAT2 = AEDECOD)) Condition - Warning: - Dataset contains duplicate records with respect to `STUDYID`, `USUBJID`, and `AEDECOD` - i Run `admiral::get_duplicates_dataset()` to access the duplicate records Error in `process_set_values_to()`: ! Assigning variables failed! * `set_values_to = exprs(PARAMCD = paste0("TTAE", as.numeric(as.factor(AEDECOD))), PARAM = past("Time to First", AEDECOD, "Adverse Event"), PARCAT1 = TTAE, PARCAT2 = AEDECOD)` @@ -62,10 +56,10 @@ death), censor_conditions = list(lstalv), source_datasets = list(adsl = adsl), set_values_to = exprs(PARAMCD = "OS", PARAM = "Overall Survival")) Condition - Error in `derive_param_tte()`: - ! The dataset names must be included in the list specified for the `source_datasets` argument. - i Following names were provided by `source_datasets`: "adsl" - i But, `censor_conditions[[1]]$dataset_name = adls` + Error: + ! Could not evaluate cli `{}` expression: `source_names`. + Caused by error: + ! object 'source_names' not found # list_tte_source_objects Test 14: error is issued if package does not exist diff --git a/tests/testthat/test-derive_param_tte.R b/tests/testthat/test-derive_param_tte.R index 4d34e0ae78..b741390c43 100644 --- a/tests/testthat/test-derive_param_tte.R +++ b/tests/testthat/test-derive_param_tte.R @@ -1067,7 +1067,7 @@ test_that("list_tte_source_objects Test 16: derive_param_tte produces consistent EVENTDESC = "AE", SRCDOM = "AE", SRCVAR = "AESTDTC", - SRCSEQ = "AESEQ" + SRCSEQ = AESEQ ), order = exprs(AESEQ) ) From 087c0f3f519d6747c83aed71715fc16a9fa94665 Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Sat, 23 Nov 2024 20:59:31 -0500 Subject: [PATCH 11/23] Moved duplication check to filter_date_sources in tryCatch() and rewrote Test 15 and 16 on test-derive_param_tte to deal with update to duplicate warnings within tryCatch and not directly by signal_duplicate_records inside derive_param_tte function. Accepted snapshots from devtools::check --- R/derive_param_tte.R | 71 ++++++++++--------- tests/testthat/_snaps/derive_extreme_event.md | 2 +- tests/testthat/_snaps/derive_var_dthcaus.md | 2 +- .../_snaps/derive_var_merged_ef_msrc.md | 2 +- tests/testthat/test-derive_param_tte.R | 65 ++++++++--------- 5 files changed, 68 insertions(+), 74 deletions(-) diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index f21539fece..69f80a4a00 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -327,7 +327,7 @@ derive_param_tte <- function(dataset = NULL, # Match check_type to valid admiral options check_type <- rlang::arg_match(check_type, c("warning", "message", "error", "none")) - # checking and quoting # + # checking and quoting # assert_data_frame(dataset, optional = TRUE) assert_vars(by_vars, optional = TRUE) start_date <- assert_symbol(enexpr(start_date)) @@ -379,18 +379,19 @@ derive_param_tte <- function(dataset = NULL, } tmp_event <- get_new_tmp_var(dataset) -# determine events # + # determine events # event_data <- filter_date_sources( sources = event_conditions, source_datasets = source_datasets, by_vars = by_vars, create_datetime = create_datetime, subject_keys = subject_keys, - mode = "first" + mode = "first", + check_type = check_type ) %>% mutate(!!tmp_event := 1L) - # determine censoring observations # + # determine censoring observations # censor_data <- filter_date_sources( sources = censor_conditions, source_datasets = source_datasets, @@ -402,7 +403,7 @@ derive_param_tte <- function(dataset = NULL, ) %>% mutate(!!tmp_event := 0L) - # determine variable to add from ADSL # + # determine variable to add from ADSL # if (create_datetime) { date_var <- sym("ADTM") start_var <- sym("STARTDTM") @@ -452,7 +453,7 @@ derive_param_tte <- function(dataset = NULL, mutate(!!date_var := pmax(!!date_var, !!start_var, na.rm = TRUE)) %>% remove_tmp_vars() - if (!is.null(by_vars)) { + if (!is.null(by_vars)) { if (!is.null(set_values_to$PARAMCD)) { assert_one_to_one(new_param, exprs(PARAMCD), by_vars) } @@ -461,7 +462,7 @@ derive_param_tte <- function(dataset = NULL, new_param <- select(new_param, !!!negate_vars(by_vars)) } - # check newly created parameter(s) do not already exist + # check newly created parameter(s) do not already exist if (!is.null(set_values_to$PARAMCD) && !is.null(dataset)) { unique_params <- unique(new_param$PARAMCD) for (i in seq_along(unique_params)) { @@ -621,34 +622,36 @@ filter_date_sources <- function(sources, var = !!source_date_var, dataset_name = sources[[i]]$dataset_name ) - # wrap filter_extreme in tryCatch to catch duplicate records and create a message - data[[i]] <- tryCatch( - { - source_dataset %>% - filter_if(sources[[i]]$filter) %>% - filter_extreme( - order = exprs(!!source_date_var), - by_vars = expr_c(subject_keys, by_vars), - mode = mode, - check_type = check_type - ) - }, - warning = function(wrn) { - if (grepl("duplicate records", conditionMessage(wrn))) { - warning(sprintf( - "Duplicate records found in source dataset '%s': %s", - sources[[i]]$dataset_name, - conditionMessage(wrn) - ), call. = FALSE) + # wrap filter_extreme in tryCatch to catch duplicate records and create a message + data[[i]] <- tryCatch( + { + source_dataset %>% + filter_if(sources[[i]]$filter) %>% + arrange(!!!sources[[i]]$order) %>% # Ensure order is applied + filter_extreme( + order = exprs(!!source_date_var), + by_vars = expr_c(subject_keys, by_vars), + mode = mode, + check_type = check_type + ) + }, + warning = function(wrn) { + if (grepl("duplicate records", conditionMessage(wrn))) { + warning(sprintf( + "Dataset '%s' contains duplicate records: %s", + sources[[i]]$dataset_name, + conditionMessage(wrn) + ), call. = FALSE) + } + return(source_dataset) + } + ) + # add date variable and accompanying variables + if (create_datetime) { + date_derv <- exprs(!!date_var := as_datetime(!!source_date_var)) + } else { + date_derv <- exprs(!!date_var := date(!!source_date_var)) } - } -) - # add date variable and accompanying variables - if (create_datetime) { - date_derv <- exprs(!!date_var := as_datetime(!!source_date_var)) - } else { - date_derv <- exprs(!!date_var := date(!!source_date_var)) - } data[[i]] <- mutate( data[[i]], diff --git a/tests/testthat/_snaps/derive_extreme_event.md b/tests/testthat/_snaps/derive_extreme_event.md index b228ab0335..a75bf9cb78 100644 --- a/tests/testthat/_snaps/derive_extreme_event.md +++ b/tests/testthat/_snaps/derive_extreme_event.md @@ -12,5 +12,5 @@ Error in `derive_extreme_event()`: ! The dataset names must be included in the list specified for the `source_datasets` argument. i Following names were provided by `source_datasets`: adhy - i But, `events[[1]]$dataset_name = adyh` + i But, `events[[1]]$dataset_name = adyh` diff --git a/tests/testthat/_snaps/derive_var_dthcaus.md b/tests/testthat/_snaps/derive_var_dthcaus.md index e3eb8534c4..ba7d1b62a7 100644 --- a/tests/testthat/_snaps/derive_var_dthcaus.md +++ b/tests/testthat/_snaps/derive_var_dthcaus.md @@ -7,5 +7,5 @@ Error in `derive_var_dthcaus()`: ! The dataset names must be included in the list specified for the `source_datasets` argument. i Following names were provided by `source_datasets`: ae and dd - i But, `sources[[2]]$dataset_name = ds` + i But, `sources[[2]]$dataset_name = ds` diff --git a/tests/testthat/_snaps/derive_var_merged_ef_msrc.md b/tests/testthat/_snaps/derive_var_merged_ef_msrc.md index 02767c3a77..b0e43f577b 100644 --- a/tests/testthat/_snaps/derive_var_merged_ef_msrc.md +++ b/tests/testthat/_snaps/derive_var_merged_ef_msrc.md @@ -8,5 +8,5 @@ Error in `derive_var_merged_ef_msrc()`: ! The dataset names must be included in the list specified for the `source_datasets` argument. i Following names were provided by `source_datasets`: cm and pro - i But, `flag_events[[2]]$dataset_name = pr` + i But, `flag_events[[2]]$dataset_name = pr` diff --git a/tests/testthat/test-derive_param_tte.R b/tests/testthat/test-derive_param_tte.R index b741390c43..bb51bb5b19 100644 --- a/tests/testthat/test-derive_param_tte.R +++ b/tests/testthat/test-derive_param_tte.R @@ -976,14 +976,13 @@ test_that("list_tte_source_objects Test 15: expected objects produced", { }) ## Test 15: derive_param_tte detects duplicates when check_type = 'warning' ---- -test_that("list_tte_source_objects Test 15: detects duplicates when check_type = 'warning'", { +test_that("derive_param_tte detects duplicates in the input datasets via pipeline functions", { # Define ADSL dataset adsl <- tibble::tribble( ~USUBJID, ~TRTSDT, ~TRTEDT, ~EOSDT, "01", ymd("2020-12-06"), ymd("2021-03-02"), ymd("2021-03-06"), "02", ymd("2021-01-16"), ymd("2021-01-20"), ymd("2021-02-03") - ) %>% - mutate(STUDYID = "AB42") + ) %>% mutate(STUDYID = "AB42") # Define AE dataset with duplicates ae <- tibble::tribble( @@ -991,13 +990,12 @@ test_that("list_tte_source_objects Test 15: detects duplicates when check_type = "01", "2021-01-03", 1, "Flu", "01", "2021-03-04", 2, "Cough", "01", "2021-01-03", 3, "Flu" - ) %>% - mutate( - STUDYID = "AB42", - AESTDT = ymd(AESTDTC) - ) + ) %>% mutate( + STUDYID = "AB42", + AESTDT = ymd(AESTDTC) + ) - # Define event source + # Define event and censor sources ttae <- event_source( dataset_name = "ae", date = AESTDT, @@ -1009,7 +1007,6 @@ test_that("list_tte_source_objects Test 15: detects duplicates when check_type = ) ) - # Define censor source eot <- censor_source( dataset_name = "adsl", date = pmin(TRTEDT + days(10), EOSDT), @@ -1021,7 +1018,7 @@ test_that("list_tte_source_objects Test 15: detects duplicates when check_type = ) ) - # Test for duplicate detection + # Run derive_param_tte and check for warning expect_warning( derive_param_tte( dataset_adsl = adsl, @@ -1032,34 +1029,31 @@ test_that("list_tte_source_objects Test 15: detects duplicates when check_type = set_values_to = exprs(PARAMCD = "TTAE"), check_type = "warning" ), - regexp = "Dataset contains duplicate records" + regexp = "Dataset 'ae' contains duplicate records" ) }) ## Test 16: derive_param_tte produces consistent results regardless of input sort order ---- -test_that("list_tte_source_objects Test 16: derive_param_tte produces consistent results - regardless of input sort order", { +test_that("derive_param_tte produces consistent results regardless of input sort order", { # Define ADSL dataset adsl <- tibble::tribble( ~USUBJID, ~TRTSDT, ~TRTEDT, ~EOSDT, "01", ymd("2020-12-06"), ymd("2021-03-02"), ymd("2021-03-06"), "02", ymd("2021-01-16"), ymd("2021-01-20"), ymd("2021-02-03") - ) %>% - mutate(STUDYID = "AB42") + ) %>% mutate(STUDYID = "AB42") - # Define AE dataset + # Define AE dataset with duplicates ae <- tibble::tribble( ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, "01", "2021-01-03", 1, "Flu", "01", "2021-03-04", 2, "Cough", "01", "2021-01-03", 3, "Flu" - ) %>% - mutate( - STUDYID = "AB42", - AESTDT = ymd(AESTDTC) - ) + ) %>% mutate(STUDYID = "AB42", AESTDT = ymd(AESTDTC)) + + # Deduplicate AE dataset to remove duplicate warnings + ae <- ae %>% distinct(STUDYID, USUBJID, AESTDT, .keep_all = TRUE) - # Define event source with order + # Define event and censor sources ttae <- event_source( dataset_name = "ae", date = AESTDT, @@ -1067,22 +1061,18 @@ test_that("list_tte_source_objects Test 16: derive_param_tte produces consistent EVENTDESC = "AE", SRCDOM = "AE", SRCVAR = "AESTDTC", - SRCSEQ = AESEQ - ), - order = exprs(AESEQ) + SRCSEQ = AESEQ # Ensure AESEQ is included here + ) ) - # Define censor source with order eot <- censor_source( dataset_name = "adsl", date = pmin(TRTEDT + days(10), EOSDT), censor = 1, - set_values_to = exprs( - EVENTDESC = "END OF TRT", - SRCDOM = "ADSL", - SRCVAR = "TRTEDT" - ), - order = exprs(TRTEDT) + set_values_to = exprs(EVENTDESC = "END OF TRT", + SRCDOM = "ADSL", + SRCVAR = "TRTEDT" + ) ) # Run derive_param_tte with sorted AE dataset @@ -1092,7 +1082,8 @@ test_that("list_tte_source_objects Test 16: derive_param_tte produces consistent event_conditions = list(ttae), censor_conditions = list(eot), source_datasets = list(adsl = adsl, ae = arrange(ae, AESEQ)), - set_values_to = exprs(PARAMCD = "TTAE") + set_values_to = exprs(PARAMCD = "TTAE"), + check_type = "warning" ) # Run derive_param_tte with reverse-sorted AE dataset @@ -1102,9 +1093,9 @@ test_that("list_tte_source_objects Test 16: derive_param_tte produces consistent event_conditions = list(ttae), censor_conditions = list(eot), source_datasets = list(adsl = adsl, ae = arrange(ae, desc(AESEQ))), - set_values_to = exprs(PARAMCD = "TTAE") + set_values_to = exprs(PARAMCD = "TTAE"), + check_type = "warning" ) - # Validate that the results are the same - expect_dfs_equal(result_sorted, result_unsorted, keys = "USUBJID") + expect_equal(result_sorted, result_unsorted) }) From 44058681931bd042d2bdd1fb6f3c731569759c7b Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Mon, 2 Dec 2024 22:51:47 -0500 Subject: [PATCH 12/23] 1. Moved updates in News section to admiral dev section 2. Made suggested fixes to derive_param_tte script. --- NEWS.md | 15 ++++++--------- R/derive_param_tte.R | 42 +++++++++++++++++++++++++++++++++++------- R/filter_extreme.R | 2 +- 3 files changed, 42 insertions(+), 17 deletions(-) diff --git a/NEWS.md b/NEWS.md index 54cf2bc770..b8bf58ddde 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,12 @@ - Update `ADEG` template to flag `ABLFL` and `ANL01FL` based on `DTYPE == "AVERAGE"` records. (#2561) ## Updates of Existing Functions +- added `message` as option for `check_type` argument in `filter_extreme` function. (#2481) + +- Users can now specify how duplicate records are handled in `derive_param_tte` using the `check_type` argument, with options including `"error"`, `"warning"`, `"message"`, or `"none"`, allowing for greater flexibility in managing duplicate data scenarios. (#2481) + +- `order` argument has been added to `event_source()` and `censor_source()` and + defaulted to `NULL` to allow specifying variables in addition to the date variable. This can be used to ensure the uniqueness of the select records if there is more than one record per date. (#2481) - The `keep_nas` argument of `derive_param_computed()` was enhanced such that it is now possible to specify a list of variables for which `NA`s are acceptable. @@ -80,15 +86,6 @@ example, `">2.5 x ULN"` changed to `">2.5"` for grade 3. (#2534) # admiral 1.1.1 - -- `check_type = "warning"` default argument added to `derive_param_tte` with an - `arg_match` function within the function so the user can use a valid input of - `error, message, warning, or none`. `signal_duplicate_records()` has also been - added to the function on lines 394 and 411 to check for uniqueness of records. (#2481) - -- `order()` function has been added to `event_source()` and `censor_source()` and - defaulted to `NULL` to allow sorting of input data. (#2481) - - `derive_extreme_event()` was fixed such that `check_type = "none"` is accepted again. (#2462) diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index 69f80a4a00..fafa811592 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -324,10 +324,12 @@ derive_param_tte <- function(dataset = NULL, set_values_to, subject_keys = get_admiral_option("subject_keys"), check_type = "warning") { - # Match check_type to valid admiral options - check_type <- rlang::arg_match(check_type, c("warning", "message", "error", "none")) - # checking and quoting # + check_type <- assert_character_scalar( + check_type, + values = c("warning", "message", "error", "none"), + case_sensitive = FALSE + ) assert_data_frame(dataset, optional = TRUE) assert_vars(by_vars, optional = TRUE) start_date <- assert_symbol(enexpr(start_date)) @@ -512,6 +514,16 @@ derive_param_tte <- function(dataset = NULL, #' #' Permitted Values: `"first"`, `"last"` #' +#' @param check_type Check uniqueness? +#' +#' If `"warning"`, `"message"`, or `"error"` is specified, the specified message is issued +#' if the observations of the input dataset are not unique with respect to the +#' by variables and the order. +#' +#' Default: `"none"` +#' +#' Permitted Values: `"none"`, `"warning"`, `"error"`, `"message"` +#' #' @details The following steps are performed to create the output dataset: #' #' \enumerate{ \item For each source dataset the observations as specified by @@ -623,13 +635,13 @@ filter_date_sources <- function(sources, dataset_name = sources[[i]]$dataset_name ) # wrap filter_extreme in tryCatch to catch duplicate records and create a message - data[[i]] <- tryCatch( + data[[i]] <- try_fetch( { source_dataset %>% filter_if(sources[[i]]$filter) %>% arrange(!!!sources[[i]]$order) %>% # Ensure order is applied filter_extreme( - order = exprs(!!source_date_var), + order = expr_c(exprs(!!source_date_var), sources[[i]]$order), by_vars = expr_c(subject_keys, by_vars), mode = mode, check_type = check_type @@ -637,13 +649,29 @@ filter_date_sources <- function(sources, }, warning = function(wrn) { if (grepl("duplicate records", conditionMessage(wrn))) { - warning(sprintf( + cli::cli_warn(c( "Dataset '%s' contains duplicate records: %s", sources[[i]]$dataset_name, conditionMessage(wrn) ), call. = FALSE) } - return(source_dataset) + source_dataset %>% + filter_if(sources[[i]]$filter) %>% + arrange(!!!sources[[i]]$order) # Return filtered dataset even if a warning occurred + }, + error = function(err) { + cli::cli_abort(c( + "Duplicate records detected during processing.", + "x Duplicate records were found in dataset {.val {sources[[i]]$dataset_name}}.", + "i The duplicates were identified based on the following variables: {.val {paste(c(subject_keys, by_vars, source_date_var), collapse = ', ')}}.", + "i Consider reviewing your dataset or adjusting the `by_vars` or `order` argument to ensure uniqueness." + )) + }, + message = function(msg) { + cli::cli_inform(c( + "Processing dataset '{.val {sources[[i]]$dataset_name}}'...", + "i Filter and order criteria: {.val {paste(c(subject_keys, by_vars, sources[[i]]$order), collapse = ', ')}}." + )) } ) # add date variable and accompanying variables diff --git a/R/filter_extreme.R b/R/filter_extreme.R index 63bf63838d..7b53645c1f 100644 --- a/R/filter_extreme.R +++ b/R/filter_extreme.R @@ -106,7 +106,7 @@ filter_extreme <- function(dataset, check_type <- assert_character_scalar( check_type, - values = c("none", "warning", "error"), + values = c("none", "warning", "error", "message"), case_sensitive = FALSE ) assert_data_frame(dataset, required_vars = by_vars) From 21b5a0042d4ba7a53574b1baa027b4ceb1cc91b5 Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Tue, 3 Dec 2024 00:01:15 -0500 Subject: [PATCH 13/23] Ran styler, lintr fixes, and devtools check. --- R/derive_merged.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/derive_merged.R b/R/derive_merged.R index d32e8089f0..8f759a8d68 100644 --- a/R/derive_merged.R +++ b/R/derive_merged.R @@ -110,7 +110,7 @@ #' *Permitted Values*: named list of expressions, e.g., #' `exprs(BASEC = "MISSING", BASE = -1)` #' -#' @param check_type Check uniqueness? +#' #' #' If `"warning"` or `"error"` is specified, the specified message is issued #' if the observations of the (restricted) additional dataset are not unique From ce07ad1f2e891ab989ee589265fe2ad90cddfa8c Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Tue, 3 Dec 2024 00:02:02 -0500 Subject: [PATCH 14/23] styler changes --- R/derive_merged.R | 2 +- R/derive_param_tte.R | 27 ++++++++++++++------------ man/derive_var_merged_summary.Rd | 12 +++++++++++- man/derive_vars_joined.Rd | 12 +++++++++++- man/derive_vars_merged.Rd | 4 +--- man/derive_vars_merged_lookup.Rd | 12 ------------ tests/testthat/test-derive_param_tte.R | 9 +++++---- 7 files changed, 44 insertions(+), 34 deletions(-) diff --git a/R/derive_merged.R b/R/derive_merged.R index 8f759a8d68..0dd2c9b813 100644 --- a/R/derive_merged.R +++ b/R/derive_merged.R @@ -110,7 +110,7 @@ #' *Permitted Values*: named list of expressions, e.g., #' `exprs(BASEC = "MISSING", BASE = -1)` #' -#' +#' #' #' If `"warning"` or `"error"` is specified, the specified message is issued #' if the observations of the (restricted) additional dataset are not unique diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index fafa811592..014f42a54a 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -519,11 +519,11 @@ derive_param_tte <- function(dataset = NULL, #' If `"warning"`, `"message"`, or `"error"` is specified, the specified message is issued #' if the observations of the input dataset are not unique with respect to the #' by variables and the order. -#' +#' #' Default: `"none"` #' #' Permitted Values: `"none"`, `"warning"`, `"error"`, `"message"` -#' +#' #' @details The following steps are performed to create the output dataset: #' #' \enumerate{ \item For each source dataset the observations as specified by @@ -647,13 +647,14 @@ filter_date_sources <- function(sources, check_type = check_type ) }, - warning = function(wrn) { - if (grepl("duplicate records", conditionMessage(wrn))) { + warning = function(cnd) { + # Handle warnings + if (grepl("duplicate records", conditionMessage(cnd))) { cli::cli_warn(c( - "Dataset '%s' contains duplicate records: %s", - sources[[i]]$dataset_name, - conditionMessage(wrn) - ), call. = FALSE) + "Dataset '{.val {sources[[i]]$dataset_name}}' contains duplicate records.", + "i Duplicates were identified based on variables: + {.val {paste(c(subject_keys, by_vars, source_date_var), collapse = ', ')}}." + )) } source_dataset %>% filter_if(sources[[i]]$filter) %>% @@ -663,14 +664,16 @@ filter_date_sources <- function(sources, cli::cli_abort(c( "Duplicate records detected during processing.", "x Duplicate records were found in dataset {.val {sources[[i]]$dataset_name}}.", - "i The duplicates were identified based on the following variables: {.val {paste(c(subject_keys, by_vars, source_date_var), collapse = ', ')}}.", - "i Consider reviewing your dataset or adjusting the `by_vars` or `order` argument to ensure uniqueness." + "i The duplicates were identified based on the following variables: + {.val {paste(c(subject_keys, by_vars, source_date_var), collapse = ', ')}}.", + "i Consider reviewing your `by_vars` or `order` argument to ensure uniqueness." )) }, message = function(msg) { cli::cli_inform(c( - "Processing dataset '{.val {sources[[i]]$dataset_name}}'...", - "i Filter and order criteria: {.val {paste(c(subject_keys, by_vars, sources[[i]]$order), collapse = ', ')}}." + "Processing dataset '{.val {sources[[i]]$dataset_name}}'...", + "i Filter and order criteria: {.val {paste(c(subject_keys, by_vars, + sources[[i]]$order), collapse = ', ')}}." )) } ) diff --git a/man/derive_var_merged_summary.Rd b/man/derive_var_merged_summary.Rd index 532fd263c0..bd2a2f4d8b 100644 --- a/man/derive_var_merged_summary.Rd +++ b/man/derive_var_merged_summary.Rd @@ -68,7 +68,17 @@ of the specified variables are set to the specified value. Only variables specified for \code{new_vars} can be specified for \code{missing_values}. \emph{Permitted Values}: named list of expressions, e.g., -\code{exprs(BASEC = "MISSING", BASE = -1)}} +\code{exprs(BASEC = "MISSING", BASE = -1)} + +If \code{"warning"} or \code{"error"} is specified, the specified message is issued +if the observations of the (restricted) additional dataset are not unique +with respect to the by variables and the order. + +If the \code{order} argument is not specified, the \code{check_type} argument is ignored: +if the observations of the (restricted) additional dataset are not unique with respect +to the by variables, an error is issued. + +\emph{Permitted Values}: \code{"none"}, \code{"warning"}, \code{"error"}} } \value{ The output dataset contains all observations and variables of the diff --git a/man/derive_vars_joined.Rd b/man/derive_vars_joined.Rd index 6c7913918e..0792f90b20 100644 --- a/man/derive_vars_joined.Rd +++ b/man/derive_vars_joined.Rd @@ -223,7 +223,17 @@ of the specified variables are set to the specified value. Only variables specified for \code{new_vars} can be specified for \code{missing_values}. \emph{Permitted Values}: named list of expressions, e.g., -\code{exprs(BASEC = "MISSING", BASE = -1)}} +\code{exprs(BASEC = "MISSING", BASE = -1)} + +If \code{"warning"} or \code{"error"} is specified, the specified message is issued +if the observations of the (restricted) additional dataset are not unique +with respect to the by variables and the order. + +If the \code{order} argument is not specified, the \code{check_type} argument is ignored: +if the observations of the (restricted) additional dataset are not unique with respect +to the by variables, an error is issued. + +\emph{Permitted Values}: \code{"none"}, \code{"warning"}, \code{"error"}} \item{check_type}{Check uniqueness? diff --git a/man/derive_vars_merged.Rd b/man/derive_vars_merged.Rd index 700612a4fc..ab66f4886b 100644 --- a/man/derive_vars_merged.Rd +++ b/man/derive_vars_merged.Rd @@ -129,9 +129,7 @@ of the specified variables are set to the specified value. Only variables specified for \code{new_vars} can be specified for \code{missing_values}. \emph{Permitted Values}: named list of expressions, e.g., -\code{exprs(BASEC = "MISSING", BASE = -1)}} - -\item{check_type}{Check uniqueness? +\code{exprs(BASEC = "MISSING", BASE = -1)} If \code{"warning"} or \code{"error"} is specified, the specified message is issued if the observations of the (restricted) additional dataset are not unique diff --git a/man/derive_vars_merged_lookup.Rd b/man/derive_vars_merged_lookup.Rd index 6f057576b5..df473ea7e2 100644 --- a/man/derive_vars_merged_lookup.Rd +++ b/man/derive_vars_merged_lookup.Rd @@ -92,18 +92,6 @@ condition. \emph{Permitted Values}: a condition} -\item{check_type}{Check uniqueness? - -If \code{"warning"} or \code{"error"} is specified, the specified message is issued -if the observations of the (restricted) additional dataset are not unique -with respect to the by variables and the order. - -If the \code{order} argument is not specified, the \code{check_type} argument is ignored: -if the observations of the (restricted) additional dataset are not unique with respect -to the by variables, an error is issued. - -\emph{Permitted Values}: \code{"none"}, \code{"warning"}, \code{"error"}} - \item{duplicate_msg}{Message of unique check If the uniqueness check fails, the specified message is displayed. diff --git a/tests/testthat/test-derive_param_tte.R b/tests/testthat/test-derive_param_tte.R index bb51bb5b19..c0890a70eb 100644 --- a/tests/testthat/test-derive_param_tte.R +++ b/tests/testthat/test-derive_param_tte.R @@ -1061,7 +1061,7 @@ test_that("derive_param_tte produces consistent results regardless of input sort EVENTDESC = "AE", SRCDOM = "AE", SRCVAR = "AESTDTC", - SRCSEQ = AESEQ # Ensure AESEQ is included here + SRCSEQ = AESEQ # Ensure AESEQ is included here ) ) @@ -1069,9 +1069,10 @@ test_that("derive_param_tte produces consistent results regardless of input sort dataset_name = "adsl", date = pmin(TRTEDT + days(10), EOSDT), censor = 1, - set_values_to = exprs(EVENTDESC = "END OF TRT", - SRCDOM = "ADSL", - SRCVAR = "TRTEDT" + set_values_to = exprs( + EVENTDESC = "END OF TRT", + SRCDOM = "ADSL", + SRCVAR = "TRTEDT" ) ) From 1d4e6b7189933732cd57f880e2d60c95f6bb0fc4 Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Tue, 3 Dec 2024 01:53:10 -0500 Subject: [PATCH 15/23] accepted snapshots from testthat and addressed bds_tte.Rmd error for devtool checks() --- R/derive_param_tte.R | 2 +- tests/testthat/_snaps/derive_var_extreme_date.md | 2 +- tests/testthat/test-derive_param_tte.R | 2 +- tests/testthat/test-user_utils.R | 3 ++- vignettes/bds_tte.Rmd | 1 + 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index 014f42a54a..42bceff56e 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -652,7 +652,7 @@ filter_date_sources <- function(sources, if (grepl("duplicate records", conditionMessage(cnd))) { cli::cli_warn(c( "Dataset '{.val {sources[[i]]$dataset_name}}' contains duplicate records.", - "i Duplicates were identified based on variables: + "i Duplicates were identified based on variables: {.val {paste(c(subject_keys, by_vars, source_date_var), collapse = ', ')}}." )) } diff --git a/tests/testthat/_snaps/derive_var_extreme_date.md b/tests/testthat/_snaps/derive_var_extreme_date.md index 6dd10a9227..9435afffbd 100644 --- a/tests/testthat/_snaps/derive_var_extreme_date.md +++ b/tests/testthat/_snaps/derive_var_extreme_date.md @@ -7,5 +7,5 @@ Error in `derive_var_extreme_dtm()`: ! The dataset names must be included in the list specified for the `source_datasets` argument. i Following names were provided by `source_datasets`: ea - i But, `sources[[1]]$dataset_name = ae` + i But, `sources[[1]]$dataset_name = ae` diff --git a/tests/testthat/test-derive_param_tte.R b/tests/testthat/test-derive_param_tte.R index c0890a70eb..f4c6e627e0 100644 --- a/tests/testthat/test-derive_param_tte.R +++ b/tests/testthat/test-derive_param_tte.R @@ -1029,7 +1029,7 @@ test_that("derive_param_tte detects duplicates in the input datasets via pipelin set_values_to = exprs(PARAMCD = "TTAE"), check_type = "warning" ), - regexp = "Dataset 'ae' contains duplicate records" + regexp = "Dataset '.*' contains duplicate records." ) }) diff --git a/tests/testthat/test-user_utils.R b/tests/testthat/test-user_utils.R index 317363665e..20ff1e624d 100644 --- a/tests/testthat/test-user_utils.R +++ b/tests/testthat/test-user_utils.R @@ -165,7 +165,8 @@ test_that("print.source Test 13: `source` objects are printed as intended", { " EVENTDESC: \"AE\"", " SRCDOM: \"AE\"", " SRCVAR: \"AESTDTC\"", - " SRCSEQ: AESEQ" + " SRCSEQ: AESEQ", + "order: NULL" ) expect_identical(capture.output(print(ttae)), expected_print_output) }) diff --git a/vignettes/bds_tte.Rmd b/vignettes/bds_tte.Rmd index 75e115c20f..468a629b02 100644 --- a/vignettes/bds_tte.Rmd +++ b/vignettes/bds_tte.Rmd @@ -37,6 +37,7 @@ The examples of this vignette require the following packages. library(admiral) library(dplyr, warn.conflicts = FALSE) library(pharmaversesdtm) +library(rlang) ``` ```{r, warning=FALSE, message=FALSE, include=FALSE} From 22f3f2de6e81842395788773004ac4e5da7559e5 Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Tue, 3 Dec 2024 10:40:55 -0500 Subject: [PATCH 16/23] added documentation for order and check_type arguments added to functions. Directly called rlang::try_fetch in derive_param_tte script. --- R/derive_merged.R | 8 ++++++++ R/derive_param_tte.R | 16 ++++++++++++++-- man/censor_source.Rd | 5 +++++ man/derive_param_tte.Rd | 8 ++++++++ man/derive_vars_merged.Rd | 8 ++++++++ man/derive_vars_merged_lookup.Rd | 8 ++++++++ man/event_source.Rd | 5 +++++ man/tte_source.Rd | 5 +++++ tests/testthat/_snaps/derive_extreme_event.md | 2 +- tests/testthat/_snaps/derive_param_tte.md | 8 ++++---- tests/testthat/_snaps/derive_var_dthcaus.md | 2 +- tests/testthat/_snaps/derive_var_extreme_date.md | 2 +- .../testthat/_snaps/derive_var_merged_ef_msrc.md | 2 +- 13 files changed, 69 insertions(+), 10 deletions(-) diff --git a/R/derive_merged.R b/R/derive_merged.R index 0dd2c9b813..9ccef85fd9 100644 --- a/R/derive_merged.R +++ b/R/derive_merged.R @@ -122,6 +122,14 @@ #' #' *Permitted Values*: `"none"`, `"warning"`, `"error"` #' +#' @param check_type Check uniqueness? +#' +#' If `"warning"`, `"message"`, or `"error"` is specified, the specified message is issued +#' if the observations of the input dataset are not unique with respect to the +#' by variables and the order. +#' +#' Default: `"warning"` +#' #' @param duplicate_msg Message of unique check #' #' If the uniqueness check fails, the specified message is displayed. diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index 42bceff56e..b9e788c3c0 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -60,6 +60,14 @@ #' #' A list of symbols created using `exprs()` is expected. #' +#' @param check_type Check uniqueness +#' +#' If `"warning"`, `"message"`, or `"error"` is specified, the specified message is issued +#' if the observations of the input dataset are not unique with respect to the +#' by variables and the order. +#' +#' Default: `"none"` +#' #' @details The following steps are performed to create the observations of the #' new parameter: #' @@ -514,7 +522,7 @@ derive_param_tte <- function(dataset = NULL, #' #' Permitted Values: `"first"`, `"last"` #' -#' @param check_type Check uniqueness? +#' @param check_type Check uniqueness #' #' If `"warning"`, `"message"`, or `"error"` is specified, the specified message is issued #' if the observations of the input dataset are not unique with respect to the @@ -635,7 +643,7 @@ filter_date_sources <- function(sources, dataset_name = sources[[i]]$dataset_name ) # wrap filter_extreme in tryCatch to catch duplicate records and create a message - data[[i]] <- try_fetch( + data[[i]] <- rlang::try_fetch( { source_dataset %>% filter_if(sources[[i]]$filter) %>% @@ -836,6 +844,10 @@ extend_source_datasets <- function(source_datasets, #' SRCDOM = "ADSL", SRCVAR = "DTHDT")`. The values must be a symbol, a #' character string, a numeric value, an expression, or `NA`. #' +#' @param order Sort order +#' +#' If the argument is set to a non-null value, for each by group the first or +#' last observation #' #' @keywords source_specifications #' @family source_specifications diff --git a/man/censor_source.Rd b/man/censor_source.Rd index fdbaeece4e..48c50151bf 100644 --- a/man/censor_source.Rd +++ b/man/censor_source.Rd @@ -37,6 +37,11 @@ censoring.} \item{set_values_to}{A named list returned by \code{exprs()} defining the variables to be set for the event or censoring, e.g. \code{exprs(EVENTDESC = "DEATH", SRCDOM = "ADSL", SRCVAR = "DTHDT")}. The values must be a symbol, a character string, a numeric value, an expression, or \code{NA}.} + +\item{order}{Sort order + +If the argument is set to a non-null value, for each by group the first or +last observation} } \value{ An object of class \code{censor_source}, inheriting from class \code{tte_source} diff --git a/man/derive_param_tte.Rd b/man/derive_param_tte.Rd index e3825464f0..dddd08624f 100644 --- a/man/derive_param_tte.Rd +++ b/man/derive_param_tte.Rd @@ -77,6 +77,14 @@ expressions, or \code{NA}.} \item{subject_keys}{Variables to uniquely identify a subject A list of symbols created using \code{exprs()} is expected.} + +\item{check_type}{Check uniqueness + +If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued +if the observations of the input dataset are not unique with respect to the +by variables and the order. + +Default: \code{"none"}} } \value{ The input dataset with the new parameter added diff --git a/man/derive_vars_merged.Rd b/man/derive_vars_merged.Rd index ab66f4886b..aa18382c9c 100644 --- a/man/derive_vars_merged.Rd +++ b/man/derive_vars_merged.Rd @@ -141,6 +141,14 @@ to the by variables, an error is issued. \emph{Permitted Values}: \code{"none"}, \code{"warning"}, \code{"error"}} +\item{check_type}{Check uniqueness? + +If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued +if the observations of the input dataset are not unique with respect to the +by variables and the order. + +Default: \code{"warning"}} + \item{duplicate_msg}{Message of unique check If the uniqueness check fails, the specified message is displayed. diff --git a/man/derive_vars_merged_lookup.Rd b/man/derive_vars_merged_lookup.Rd index df473ea7e2..f61e11f2ad 100644 --- a/man/derive_vars_merged_lookup.Rd +++ b/man/derive_vars_merged_lookup.Rd @@ -92,6 +92,14 @@ condition. \emph{Permitted Values}: a condition} +\item{check_type}{Check uniqueness? + +If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued +if the observations of the input dataset are not unique with respect to the +by variables and the order. + +Default: \code{"warning"}} + \item{duplicate_msg}{Message of unique check If the uniqueness check fails, the specified message is displayed. diff --git a/man/event_source.Rd b/man/event_source.Rd index 88bb6701dc..52f899fabd 100644 --- a/man/event_source.Rd +++ b/man/event_source.Rd @@ -31,6 +31,11 @@ date from a date character vector to a date object.} \item{set_values_to}{A named list returned by \code{exprs()} defining the variables to be set for the event or censoring, e.g. \code{exprs(EVENTDESC = "DEATH", SRCDOM = "ADSL", SRCVAR = "DTHDT")}. The values must be a symbol, a character string, a numeric value, an expression, or \code{NA}.} + +\item{order}{Sort order + +If the argument is set to a non-null value, for each by group the first or +last observation} } \value{ An object of class \code{event_source}, inheriting from class \code{tte_source} diff --git a/man/tte_source.Rd b/man/tte_source.Rd index bc007598f2..575566d8e7 100644 --- a/man/tte_source.Rd +++ b/man/tte_source.Rd @@ -37,6 +37,11 @@ censoring.} \item{set_values_to}{A named list returned by \code{exprs()} defining the variables to be set for the event or censoring, e.g. \code{exprs(EVENTDESC = "DEATH", SRCDOM = "ADSL", SRCVAR = "DTHDT")}. The values must be a symbol, a character string, a numeric value, an expression, or \code{NA}.} + +\item{order}{Sort order + +If the argument is set to a non-null value, for each by group the first or +last observation} } \value{ An object of class \code{tte_source} diff --git a/tests/testthat/_snaps/derive_extreme_event.md b/tests/testthat/_snaps/derive_extreme_event.md index a75bf9cb78..b228ab0335 100644 --- a/tests/testthat/_snaps/derive_extreme_event.md +++ b/tests/testthat/_snaps/derive_extreme_event.md @@ -12,5 +12,5 @@ Error in `derive_extreme_event()`: ! The dataset names must be included in the list specified for the `source_datasets` argument. i Following names were provided by `source_datasets`: adhy - i But, `events[[1]]$dataset_name = adyh` + i But, `events[[1]]$dataset_name = adyh` diff --git a/tests/testthat/_snaps/derive_param_tte.md b/tests/testthat/_snaps/derive_param_tte.md index 89031d4bb1..f1438eca6c 100644 --- a/tests/testthat/_snaps/derive_param_tte.md +++ b/tests/testthat/_snaps/derive_param_tte.md @@ -56,10 +56,10 @@ death), censor_conditions = list(lstalv), source_datasets = list(adsl = adsl), set_values_to = exprs(PARAMCD = "OS", PARAM = "Overall Survival")) Condition - Error: - ! Could not evaluate cli `{}` expression: `source_names`. - Caused by error: - ! object 'source_names' not found + Error in `derive_param_tte()`: + ! The dataset names must be included in the list specified for the `source_datasets` argument. + i Following names were provided by `source_datasets`: "adsl" + i But, `censor_conditions[[1]]$dataset_name = adls` # list_tte_source_objects Test 14: error is issued if package does not exist diff --git a/tests/testthat/_snaps/derive_var_dthcaus.md b/tests/testthat/_snaps/derive_var_dthcaus.md index ba7d1b62a7..e3eb8534c4 100644 --- a/tests/testthat/_snaps/derive_var_dthcaus.md +++ b/tests/testthat/_snaps/derive_var_dthcaus.md @@ -7,5 +7,5 @@ Error in `derive_var_dthcaus()`: ! The dataset names must be included in the list specified for the `source_datasets` argument. i Following names were provided by `source_datasets`: ae and dd - i But, `sources[[2]]$dataset_name = ds` + i But, `sources[[2]]$dataset_name = ds` diff --git a/tests/testthat/_snaps/derive_var_extreme_date.md b/tests/testthat/_snaps/derive_var_extreme_date.md index 9435afffbd..6dd10a9227 100644 --- a/tests/testthat/_snaps/derive_var_extreme_date.md +++ b/tests/testthat/_snaps/derive_var_extreme_date.md @@ -7,5 +7,5 @@ Error in `derive_var_extreme_dtm()`: ! The dataset names must be included in the list specified for the `source_datasets` argument. i Following names were provided by `source_datasets`: ea - i But, `sources[[1]]$dataset_name = ae` + i But, `sources[[1]]$dataset_name = ae` diff --git a/tests/testthat/_snaps/derive_var_merged_ef_msrc.md b/tests/testthat/_snaps/derive_var_merged_ef_msrc.md index b0e43f577b..02767c3a77 100644 --- a/tests/testthat/_snaps/derive_var_merged_ef_msrc.md +++ b/tests/testthat/_snaps/derive_var_merged_ef_msrc.md @@ -8,5 +8,5 @@ Error in `derive_var_merged_ef_msrc()`: ! The dataset names must be included in the list specified for the `source_datasets` argument. i Following names were provided by `source_datasets`: cm and pro - i But, `flag_events[[2]]$dataset_name = pr` + i But, `flag_events[[2]]$dataset_name = pr` From 47637a5f821d9f621af514cc0b31c2ed765bb10e Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Mon, 16 Dec 2024 11:10:01 -0500 Subject: [PATCH 17/23] requested updates to documentation and test script for derive_param_tte --- NEWS.md | 4 +- R/derive_merged.R | 12 +-- R/derive_param_tte.R | 3 +- man/derive_var_merged_summary.Rd | 6 +- man/derive_vars_joined.Rd | 6 +- man/derive_vars_merged.Rd | 8 +- man/derive_vars_merged_lookup.Rd | 8 -- man/filter_date_sources.Rd | 136 +++++++++++++++++++++++++ tests/testthat/test-derive_param_tte.R | 94 ++++++++--------- 9 files changed, 201 insertions(+), 76 deletions(-) create mode 100644 man/filter_date_sources.Rd diff --git a/NEWS.md b/NEWS.md index b8bf58ddde..a840d8a1c0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,9 +12,9 @@ - Update `ADEG` template to flag `ABLFL` and `ANL01FL` based on `DTYPE == "AVERAGE"` records. (#2561) ## Updates of Existing Functions -- added `message` as option for `check_type` argument in `filter_extreme` function. (#2481) +- added `message` as option for `check_type` argument in `filter_extreme()` function. (#2481) -- Users can now specify how duplicate records are handled in `derive_param_tte` using the `check_type` argument, with options including `"error"`, `"warning"`, `"message"`, or `"none"`, allowing for greater flexibility in managing duplicate data scenarios. (#2481) +- Users can now specify how duplicate records are handled in `derive_param_tte()` using the `check_type` argument, with options including `"error"`, `"warning"`, `"message"`, or `"none"`, allowing for greater flexibility in managing duplicate data scenarios. (#2481) - `order` argument has been added to `event_source()` and `censor_source()` and defaulted to `NULL` to allow specifying variables in addition to the date variable. This can be used to ensure the uniqueness of the select records if there is more than one record per date. (#2481) diff --git a/R/derive_merged.R b/R/derive_merged.R index 9ccef85fd9..f3ce412c53 100644 --- a/R/derive_merged.R +++ b/R/derive_merged.R @@ -110,26 +110,20 @@ #' *Permitted Values*: named list of expressions, e.g., #' `exprs(BASEC = "MISSING", BASE = -1)` #' -#' -#' #' If `"warning"` or `"error"` is specified, the specified message is issued #' if the observations of the (restricted) additional dataset are not unique #' with respect to the by variables and the order. #' #' If the `order` argument is not specified, the `check_type` argument is ignored: -#' if the observations of the (restricted) additional dataset are not unique with respect -#' to the by variables, an error is issued. +#' if the observations of the (restricted) additional dataset are not unique with respect +#' to the by variables, an error is issued. #' -#' *Permitted Values*: `"none"`, `"warning"`, `"error"` -#' -#' @param check_type Check uniqueness? +#' *Permitted Values*: `"none"`,`"message"`, `"warning"`, `"error"` #' #' If `"warning"`, `"message"`, or `"error"` is specified, the specified message is issued #' if the observations of the input dataset are not unique with respect to the #' by variables and the order. #' -#' Default: `"warning"` -#' #' @param duplicate_msg Message of unique check #' #' If the uniqueness check fails, the specified message is displayed. diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index b9e788c3c0..2324b416fa 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -556,7 +556,7 @@ derive_param_tte <- function(dataset = NULL, #' @return A dataset with one observation per subject as described in the #' "Details" section. #' -#' @noRd +#' @keywords internal #' #' @examples #' library(tibble) @@ -647,7 +647,6 @@ filter_date_sources <- function(sources, { source_dataset %>% filter_if(sources[[i]]$filter) %>% - arrange(!!!sources[[i]]$order) %>% # Ensure order is applied filter_extreme( order = expr_c(exprs(!!source_date_var), sources[[i]]$order), by_vars = expr_c(subject_keys, by_vars), diff --git a/man/derive_var_merged_summary.Rd b/man/derive_var_merged_summary.Rd index bd2a2f4d8b..2263b202b7 100644 --- a/man/derive_var_merged_summary.Rd +++ b/man/derive_var_merged_summary.Rd @@ -78,7 +78,11 @@ If the \code{order} argument is not specified, the \code{check_type} argument is if the observations of the (restricted) additional dataset are not unique with respect to the by variables, an error is issued. -\emph{Permitted Values}: \code{"none"}, \code{"warning"}, \code{"error"}} +\emph{Permitted Values}: \code{"none"},\code{"message"}, \code{"warning"}, \code{"error"} + +If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued +if the observations of the input dataset are not unique with respect to the +by variables and the order.} } \value{ The output dataset contains all observations and variables of the diff --git a/man/derive_vars_joined.Rd b/man/derive_vars_joined.Rd index 0792f90b20..553b87172f 100644 --- a/man/derive_vars_joined.Rd +++ b/man/derive_vars_joined.Rd @@ -233,7 +233,11 @@ If the \code{order} argument is not specified, the \code{check_type} argument is if the observations of the (restricted) additional dataset are not unique with respect to the by variables, an error is issued. -\emph{Permitted Values}: \code{"none"}, \code{"warning"}, \code{"error"}} +\emph{Permitted Values}: \code{"none"},\code{"message"}, \code{"warning"}, \code{"error"} + +If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued +if the observations of the input dataset are not unique with respect to the +by variables and the order.} \item{check_type}{Check uniqueness? diff --git a/man/derive_vars_merged.Rd b/man/derive_vars_merged.Rd index aa18382c9c..d8b57a0c99 100644 --- a/man/derive_vars_merged.Rd +++ b/man/derive_vars_merged.Rd @@ -139,15 +139,11 @@ If the \code{order} argument is not specified, the \code{check_type} argument is if the observations of the (restricted) additional dataset are not unique with respect to the by variables, an error is issued. -\emph{Permitted Values}: \code{"none"}, \code{"warning"}, \code{"error"}} - -\item{check_type}{Check uniqueness? +\emph{Permitted Values}: \code{"none"},\code{"message"}, \code{"warning"}, \code{"error"} If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued if the observations of the input dataset are not unique with respect to the -by variables and the order. - -Default: \code{"warning"}} +by variables and the order.} \item{duplicate_msg}{Message of unique check diff --git a/man/derive_vars_merged_lookup.Rd b/man/derive_vars_merged_lookup.Rd index f61e11f2ad..df473ea7e2 100644 --- a/man/derive_vars_merged_lookup.Rd +++ b/man/derive_vars_merged_lookup.Rd @@ -92,14 +92,6 @@ condition. \emph{Permitted Values}: a condition} -\item{check_type}{Check uniqueness? - -If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued -if the observations of the input dataset are not unique with respect to the -by variables and the order. - -Default: \code{"warning"}} - \item{duplicate_msg}{Message of unique check If the uniqueness check fails, the specified message is displayed. diff --git a/man/filter_date_sources.Rd b/man/filter_date_sources.Rd new file mode 100644 index 0000000000..7736677e69 --- /dev/null +++ b/man/filter_date_sources.Rd @@ -0,0 +1,136 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/derive_param_tte.R +\name{filter_date_sources} +\alias{filter_date_sources} +\title{Select the First or Last Date from Several Sources} +\usage{ +filter_date_sources( + sources, + source_datasets, + by_vars, + create_datetime = FALSE, + subject_keys, + mode, + check_type = "none" +) +} +\arguments{ +\item{sources}{Sources + +A list of \code{tte_source()} objects is expected.} + +\item{source_datasets}{Source datasets + +A named list of datasets is expected. The \code{dataset_name} field of +\code{tte_source()} refers to the dataset provided in the list.} + +\item{by_vars}{By variables + +If the parameter is specified, for each by group the observations are +selected separately. + +\emph{Permitted Values}: list of variables created by \code{exprs()} +e.g. \code{exprs(USUBJID, VISIT)}} + +\item{create_datetime}{Create datetime variable? + +If set to \code{TRUE}, variables \code{ADTM} is created. Otherwise, variables \code{ADT} +is created.} + +\item{subject_keys}{Variables to uniquely identify a subject + +A list of symbols created using \code{exprs()} is expected.} + +\item{mode}{Selection mode (first or last) + +If \code{"first"} is specified, for each subject the first observation with +respect to the date is included in the output dataset. If \code{"last"} is +specified, the last observation is included in the output dataset. + +Permitted Values: \code{"first"}, \code{"last"} + +@param check_type Check uniqueness + +If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued +if the observations of the input dataset are not unique with respect to the +by variables and the order. + +Default: \code{"none"} + +Permitted Values: \code{"none"}, \code{"warning"}, \code{"error"}, \code{"message"}} +} +\value{ +A dataset with one observation per subject as described in the +"Details" section. +} +\description{ +Select for each subject the first or last observation with respect to a date +from a list of sources. +} +\details{ +The following steps are performed to create the output dataset: + +\enumerate{ \item For each source dataset the observations as specified by +the \code{filter} element are selected. Then for each patient the first or last +observation (with respect to \code{date}) is selected. + +\item The \code{ADT} variable is set to the variable specified by the +\code{date} element. If the date variable is a datetime variable, only +the datepart is copied. If the source variable is a character variable, it +is converted to a date. If the date is incomplete, it is imputed as +the first possible date. + +\item The \code{CNSR} is added and set to the value of the \code{censor} +element. + +\item The selected observations of all source datasets are combined into a +single dataset. + +\item For each patient the first or last observation (with respect to the +\code{ADT} variable) from the single dataset is selected. } +} +\examples{ +library(tibble) +library(dplyr, warn.conflicts = FALSE) +library(lubridate) + +adsl <- tribble( + ~USUBJID, ~TRTSDT, ~EOSDT, + "01", ymd("2020-12-06"), ymd("2021-03-06"), + "02", ymd("2021-01-16"), ymd("2021-02-03") +) \%>\% + mutate(STUDYID = "AB42") + +ae <- tribble( + ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, + "01", "2021-01-03", 1, "Flu", + "01", "2021-03-04", 2, "Cough", + "01", "2021-01-01", 3, "Flu" +) \%>\% + mutate( + STUDYID = "AB42", + AESTDT = ymd(AESTDTC) + ) + +ttae <- event_source( + dataset_name = "ae", + date = AESTDT, + set_values_to = exprs( + EVNTDESC = "AE", + SRCDOM = "AE", + SRCVAR = "AESTDTC", + SRCSEQ = AESEQ + ) +) + +filter_date_sources( + sources = list(ttae), + source_datasets = list(adsl = adsl, ae = ae), + by_vars = exprs(AEDECOD), + create_datetime = FALSE, + subject_keys = get_admiral_option("subject_keys"), + mode = "first", + check_type = "none" +) +} +\keyword{internal} diff --git a/tests/testthat/test-derive_param_tte.R b/tests/testthat/test-derive_param_tte.R index f4c6e627e0..118f0e69ee 100644 --- a/tests/testthat/test-derive_param_tte.R +++ b/tests/testthat/test-derive_param_tte.R @@ -932,51 +932,8 @@ test_that("derive_param_tte Test 13: error if dataset_name not in source_datsets ) }) -# list_tte_source_objects ---- -## Test 14: error is issued if package does not exist ---- -test_that("list_tte_source_objects Test 14: error is issued if package does not exist", { - expect_snapshot( - list_tte_source_objects(package = "tte"), - error = TRUE - ) -}) - -## Test 15: expected objects produced ---- -test_that("list_tte_source_objects Test 15: expected objects produced", { - expected_output <- tibble::tribble( - ~object, ~dataset_name, ~filter, - "ae_ser_event", "adae", quote(TRTEMFL == "Y" & AESER == "Y"), - "ae_gr2_event", "adae", quote(TRTEMFL == "Y" & ATOXGR == "2"), - "ae_sev_event", "adae", quote(TRTEMFL == "Y" & AESEV == "SEVERE"), - "ae_gr4_event", "adae", quote(TRTEMFL == "Y" & ATOXGR == "4"), - "ae_gr3_event", "adae", quote(TRTEMFL == "Y" & ATOXGR == "3"), - "lastalive_censor", "adsl", NULL, - "ae_event", "adae", quote(TRTEMFL == "Y"), - "death_event", "adsl", quote(DTHFL == "Y"), - "ae_gr35_event", "adae", quote(TRTEMFL == "Y" & ATOXGR %in% c("3", "4", "5")), - "ae_wd_event", "adae", quote(TRTEMFL == "Y" & AEACN == "DRUG WITHDRAWN"), - "ae_gr1_event", "adae", quote(TRTEMFL == "Y" & ATOXGR == "1"), - "ae_gr5_event", "adae", quote(TRTEMFL == "Y" & ATOXGR == "5") - ) %>% - mutate( - date = case_when( - object == "lastalive_censor" ~ "LSTALVDT", - object == "death_event" ~ "DTHDT", - TRUE ~ "ASTDT" - ), - censor = if_else(object == "lastalive_censor", 1, 0), - filter = as.character(filter), - censor = as.integer(censor) - ) - - observed_output <- list_tte_source_objects(package = "admiral") %>% - select(object, dataset_name, filter, date, censor) - - expect_dfs_equal(expected_output, observed_output, keys = c("object")) -}) - -## Test 15: derive_param_tte detects duplicates when check_type = 'warning' ---- -test_that("derive_param_tte detects duplicates in the input datasets via pipeline functions", { +## Test 14: derive_param_tte detects duplicates when check_type = 'warning' ---- +test_that("derive_param_tte Test 14: detects duplicates in input datasets via pipeline functions", { # Define ADSL dataset adsl <- tibble::tribble( ~USUBJID, ~TRTSDT, ~TRTEDT, ~EOSDT, @@ -1033,8 +990,8 @@ test_that("derive_param_tte detects duplicates in the input datasets via pipelin ) }) -## Test 16: derive_param_tte produces consistent results regardless of input sort order ---- -test_that("derive_param_tte produces consistent results regardless of input sort order", { +## Test 15: derive_param_tte produces consistent results regardless of input sort order ---- +test_that("derive_param_tte Test 15: produces consistent results regardless of input sort order", { # Define ADSL dataset adsl <- tibble::tribble( ~USUBJID, ~TRTSDT, ~TRTEDT, ~EOSDT, @@ -1100,3 +1057,46 @@ test_that("derive_param_tte produces consistent results regardless of input sort expect_equal(result_sorted, result_unsorted) }) + +# list_tte_source_objects ---- +## Test 16: error is issued if package does not exist ---- +test_that("list_tte_source_objects Test 16: error is issued if package does not exist", { + expect_snapshot( + list_tte_source_objects(package = "tte"), + error = TRUE + ) +}) + +## Test 17: expected objects produced ---- +test_that("list_tte_source_objects Test 17: expected objects produced", { + expected_output <- tibble::tribble( + ~object, ~dataset_name, ~filter, + "ae_ser_event", "adae", quote(TRTEMFL == "Y" & AESER == "Y"), + "ae_gr2_event", "adae", quote(TRTEMFL == "Y" & ATOXGR == "2"), + "ae_sev_event", "adae", quote(TRTEMFL == "Y" & AESEV == "SEVERE"), + "ae_gr4_event", "adae", quote(TRTEMFL == "Y" & ATOXGR == "4"), + "ae_gr3_event", "adae", quote(TRTEMFL == "Y" & ATOXGR == "3"), + "lastalive_censor", "adsl", NULL, + "ae_event", "adae", quote(TRTEMFL == "Y"), + "death_event", "adsl", quote(DTHFL == "Y"), + "ae_gr35_event", "adae", quote(TRTEMFL == "Y" & ATOXGR %in% c("3", "4", "5")), + "ae_wd_event", "adae", quote(TRTEMFL == "Y" & AEACN == "DRUG WITHDRAWN"), + "ae_gr1_event", "adae", quote(TRTEMFL == "Y" & ATOXGR == "1"), + "ae_gr5_event", "adae", quote(TRTEMFL == "Y" & ATOXGR == "5") + ) %>% + mutate( + date = case_when( + object == "lastalive_censor" ~ "LSTALVDT", + object == "death_event" ~ "DTHDT", + TRUE ~ "ASTDT" + ), + censor = if_else(object == "lastalive_censor", 1, 0), + filter = as.character(filter), + censor = as.integer(censor) + ) + + observed_output <- list_tte_source_objects(package = "admiral") %>% + select(object, dataset_name, filter, date, censor) + + expect_dfs_equal(expected_output, observed_output, keys = c("object")) +}) From e882758056846638d48e6992e987e1c430242c11 Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Tue, 17 Dec 2024 10:50:24 -0500 Subject: [PATCH 18/23] corrected documentation and removed rlang from bds_tte.Rmd --- R/derive_merged.R | 10 ++++------ inst/WORDLIST | 1 + man/derive_var_merged_summary.Rd | 16 +--------------- man/derive_vars_joined.Rd | 16 +--------------- man/derive_vars_merged.Rd | 12 +++++------- man/derive_vars_merged_lookup.Rd | 12 ++++++++++++ vignettes/bds_tte.Rmd | 1 - 7 files changed, 24 insertions(+), 44 deletions(-) diff --git a/R/derive_merged.R b/R/derive_merged.R index f3ce412c53..2201b50c9b 100644 --- a/R/derive_merged.R +++ b/R/derive_merged.R @@ -110,7 +110,9 @@ #' *Permitted Values*: named list of expressions, e.g., #' `exprs(BASEC = "MISSING", BASE = -1)` #' -#' If `"warning"` or `"error"` is specified, the specified message is issued +#' @param check_type Check uniqueness? +#' +#' If `"warning"`, `"message"`, or `"error"` is specified, the specified message is issued #' if the observations of the (restricted) additional dataset are not unique #' with respect to the by variables and the order. #' @@ -118,11 +120,7 @@ #' if the observations of the (restricted) additional dataset are not unique with respect #' to the by variables, an error is issued. #' -#' *Permitted Values*: `"none"`,`"message"`, `"warning"`, `"error"` -#' -#' If `"warning"`, `"message"`, or `"error"` is specified, the specified message is issued -#' if the observations of the input dataset are not unique with respect to the -#' by variables and the order. +#' *Permitted Values*: `"none"`, `"message"`,`"warning"`, `"error"` #' #' @param duplicate_msg Message of unique check #' diff --git a/inst/WORDLIST b/inst/WORDLIST index bdee907508..f700923d73 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -304,6 +304,7 @@ msec nd occds onwards +param parttime pharmaverse pharmaverseadam diff --git a/man/derive_var_merged_summary.Rd b/man/derive_var_merged_summary.Rd index 2263b202b7..532fd263c0 100644 --- a/man/derive_var_merged_summary.Rd +++ b/man/derive_var_merged_summary.Rd @@ -68,21 +68,7 @@ of the specified variables are set to the specified value. Only variables specified for \code{new_vars} can be specified for \code{missing_values}. \emph{Permitted Values}: named list of expressions, e.g., -\code{exprs(BASEC = "MISSING", BASE = -1)} - -If \code{"warning"} or \code{"error"} is specified, the specified message is issued -if the observations of the (restricted) additional dataset are not unique -with respect to the by variables and the order. - -If the \code{order} argument is not specified, the \code{check_type} argument is ignored: -if the observations of the (restricted) additional dataset are not unique with respect -to the by variables, an error is issued. - -\emph{Permitted Values}: \code{"none"},\code{"message"}, \code{"warning"}, \code{"error"} - -If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued -if the observations of the input dataset are not unique with respect to the -by variables and the order.} +\code{exprs(BASEC = "MISSING", BASE = -1)}} } \value{ The output dataset contains all observations and variables of the diff --git a/man/derive_vars_joined.Rd b/man/derive_vars_joined.Rd index 553b87172f..6c7913918e 100644 --- a/man/derive_vars_joined.Rd +++ b/man/derive_vars_joined.Rd @@ -223,21 +223,7 @@ of the specified variables are set to the specified value. Only variables specified for \code{new_vars} can be specified for \code{missing_values}. \emph{Permitted Values}: named list of expressions, e.g., -\code{exprs(BASEC = "MISSING", BASE = -1)} - -If \code{"warning"} or \code{"error"} is specified, the specified message is issued -if the observations of the (restricted) additional dataset are not unique -with respect to the by variables and the order. - -If the \code{order} argument is not specified, the \code{check_type} argument is ignored: -if the observations of the (restricted) additional dataset are not unique with respect -to the by variables, an error is issued. - -\emph{Permitted Values}: \code{"none"},\code{"message"}, \code{"warning"}, \code{"error"} - -If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued -if the observations of the input dataset are not unique with respect to the -by variables and the order.} +\code{exprs(BASEC = "MISSING", BASE = -1)}} \item{check_type}{Check uniqueness? diff --git a/man/derive_vars_merged.Rd b/man/derive_vars_merged.Rd index d8b57a0c99..3753a4eec6 100644 --- a/man/derive_vars_merged.Rd +++ b/man/derive_vars_merged.Rd @@ -129,9 +129,11 @@ of the specified variables are set to the specified value. Only variables specified for \code{new_vars} can be specified for \code{missing_values}. \emph{Permitted Values}: named list of expressions, e.g., -\code{exprs(BASEC = "MISSING", BASE = -1)} +\code{exprs(BASEC = "MISSING", BASE = -1)}} -If \code{"warning"} or \code{"error"} is specified, the specified message is issued +\item{check_type}{Check uniqueness? + +If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued if the observations of the (restricted) additional dataset are not unique with respect to the by variables and the order. @@ -139,11 +141,7 @@ If the \code{order} argument is not specified, the \code{check_type} argument is if the observations of the (restricted) additional dataset are not unique with respect to the by variables, an error is issued. -\emph{Permitted Values}: \code{"none"},\code{"message"}, \code{"warning"}, \code{"error"} - -If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued -if the observations of the input dataset are not unique with respect to the -by variables and the order.} +\emph{Permitted Values}: \code{"none"}, \code{"message"},\code{"warning"}, \code{"error"}} \item{duplicate_msg}{Message of unique check diff --git a/man/derive_vars_merged_lookup.Rd b/man/derive_vars_merged_lookup.Rd index df473ea7e2..9407582095 100644 --- a/man/derive_vars_merged_lookup.Rd +++ b/man/derive_vars_merged_lookup.Rd @@ -92,6 +92,18 @@ condition. \emph{Permitted Values}: a condition} +\item{check_type}{Check uniqueness? + +If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued +if the observations of the (restricted) additional dataset are not unique +with respect to the by variables and the order. + +If the \code{order} argument is not specified, the \code{check_type} argument is ignored: +if the observations of the (restricted) additional dataset are not unique with respect +to the by variables, an error is issued. + +\emph{Permitted Values}: \code{"none"}, \code{"message"},\code{"warning"}, \code{"error"}} + \item{duplicate_msg}{Message of unique check If the uniqueness check fails, the specified message is displayed. diff --git a/vignettes/bds_tte.Rmd b/vignettes/bds_tte.Rmd index 468a629b02..75e115c20f 100644 --- a/vignettes/bds_tte.Rmd +++ b/vignettes/bds_tte.Rmd @@ -37,7 +37,6 @@ The examples of this vignette require the following packages. library(admiral) library(dplyr, warn.conflicts = FALSE) library(pharmaversesdtm) -library(rlang) ``` ```{r, warning=FALSE, message=FALSE, include=FALSE} From e5c28fc00171b44ea436279452403d8f3036fad2 Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Fri, 20 Dec 2024 02:42:44 -0500 Subject: [PATCH 19/23] updated derive_param_tte documentation and added test to derive_param_tte test script. --- R/derive_param_tte.R | 58 ++++++++++++++++-- man/censor_source.Rd | 6 +- man/derive_param_tte.Rd | 52 ++++++++++++++-- man/event_source.Rd | 6 +- man/tte_source.Rd | 6 +- tests/testthat/_snaps/derive_param_tte.md | 2 +- tests/testthat/test-derive_param_tte.R | 72 +++++++++++++++++++++-- 7 files changed, 179 insertions(+), 23 deletions(-) diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index 2324b416fa..eafb6ffcb6 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -270,10 +270,10 @@ #' mutate(STUDYID = "AB42") #' #' ae <- tribble( -#' ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, -#' "01", "2021-01-03T10:56", 1, "Flu", -#' "01", "2021-03-04", 2, "Cough", -#' "01", "2021", 3, "Flu" +#' ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, +#' "01", "2021-01-03T10:56", 1, "Flu", +#' "01", "2021-03-04", 2, "Cough", +#' "01", "2021-", 3, "Flu" #' ) %>% #' mutate(STUDYID = "AB42") #' @@ -321,6 +321,50 @@ #' ) #' ) %>% #' select(USUBJID, STARTDT, PARAMCD, PARAM, ADT, CNSR, SRCSEQ) +#' +#' # Resolve tie when serious AE share a date by sorting with order argument +#' adsl <- tribble( +#' ~USUBJID, ~TRTSDT, ~EOSDT, +#' "01", ymd("2020-12-06"), ymd("2021-03-06"), +#' "02", ymd("2021-01-16"), ymd("2021-02-03") +#' ) %>% mutate(STUDYID = "AB42") +#' +#' ae <- tribble( +#' ~USUBJID, ~AESTDTC, ~AESEQ, ~AESER, ~AEDECOD, +#' "01", "2021-01-03", 1, "Y", "Flu", +#' "01", "2021-01-03", 2, "Y", "cough", +#' "01", "2021-01-20", 3, "N", "Headache", +#' ) %>% mutate( +#' AESTDT = ymd(AESTDTC), +#' STUDYID = "AB42" +#' ) +#' +#' derive_param_tte( +#' dataset_adsl = adsl, +#' start_date = TRTSDT, +#' source_datasets = list(adsl = adsl, ae = ae), +#' event_conditions = list(event_source( +#' dataset_name = "ae", +#' date = AESTDT, +#' set_values_to = exprs( +#' EVENTDESC = "Serious AE", +#' SRCSEQ = AESEQ +#' ), +#' filter = AESER == "Y", +#' order = exprs(AESTDT, AESEQ) +#' )), +#' censor_conditions = list(censor_source( +#' dataset_name = "adsl", +#' date = EOSDT, +#' censor = 1, +#' set_values_to = exprs(EVENTDESC = "End of Study") +#' )), +#' set_values_to = exprs( +#' PARAMCD = "TTSAE", +#' PARAM = "Time to First Serious AE" +#' ) +#' ) +#' derive_param_tte <- function(dataset = NULL, dataset_adsl, source_datasets, @@ -845,8 +889,10 @@ extend_source_datasets <- function(source_datasets, #' #' @param order Sort order #' -#' If the argument is set to a non-null value, for each by group the first or -#' last observation +#' An optional named list returned by `exprs()` defining additional variables +#' that the input dataset is sorted on after `date`. +#' +#' Persmitted Values: list of variables created by `exprs()` e.g. `exprs(ASEQ)`. #' #' @keywords source_specifications #' @family source_specifications diff --git a/man/censor_source.Rd b/man/censor_source.Rd index 48c50151bf..4738cb95e0 100644 --- a/man/censor_source.Rd +++ b/man/censor_source.Rd @@ -40,8 +40,10 @@ character string, a numeric value, an expression, or \code{NA}.} \item{order}{Sort order -If the argument is set to a non-null value, for each by group the first or -last observation} +An optional named list returned by \code{exprs()} defining additional variables +that the input dataset is sorted on after \code{date}. + +Persmitted Values: list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} } \value{ An object of class \code{censor_source}, inheriting from class \code{tte_source} diff --git a/man/derive_param_tte.Rd b/man/derive_param_tte.Rd index dddd08624f..c9ad0c7a7b 100644 --- a/man/derive_param_tte.Rd +++ b/man/derive_param_tte.Rd @@ -287,10 +287,10 @@ adsl <- tribble( mutate(STUDYID = "AB42") ae <- tribble( - ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, - "01", "2021-01-03T10:56", 1, "Flu", - "01", "2021-03-04", 2, "Cough", - "01", "2021", 3, "Flu" + ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, + "01", "2021-01-03T10:56", 1, "Flu", + "01", "2021-03-04", 2, "Cough", + "01", "2021-", 3, "Flu" ) \%>\% mutate(STUDYID = "AB42") @@ -338,6 +338,50 @@ derive_param_tte( ) ) \%>\% select(USUBJID, STARTDT, PARAMCD, PARAM, ADT, CNSR, SRCSEQ) + +# Resolve tie when serious AE share a date by sorting with order argument +adsl <- tribble( + ~USUBJID, ~TRTSDT, ~EOSDT, + "01", ymd("2020-12-06"), ymd("2021-03-06"), + "02", ymd("2021-01-16"), ymd("2021-02-03") +) \%>\% mutate(STUDYID = "AB42") + +ae <- tribble( + ~USUBJID, ~AESTDTC, ~AESEQ, ~AESER, ~AEDECOD, + "01", "2021-01-03", 1, "Y", "Flu", + "01", "2021-01-03", 2, "Y", "cough", + "01", "2021-01-20", 3, "N", "Headache", +) \%>\% mutate( + AESTDT = ymd(AESTDTC), + STUDYID = "AB42" +) + +derive_param_tte( + dataset_adsl = adsl, + start_date = TRTSDT, + source_datasets = list(adsl = adsl, ae = ae), + event_conditions = list(event_source( + dataset_name = "ae", + date = AESTDT, + set_values_to = exprs( + EVENTDESC = "Serious AE", + SRCSEQ = AESEQ + ), + filter = AESER == "Y", + order = exprs(AESTDT, AESEQ) + )), + censor_conditions = list(censor_source( + dataset_name = "adsl", + date = EOSDT, + censor = 1, + set_values_to = exprs(EVENTDESC = "End of Study") + )), + set_values_to = exprs( + PARAMCD = "TTSAE", + PARAM = "Time to First Serious AE" + ) +) + } \seealso{ \code{\link[=event_source]{event_source()}}, \code{\link[=censor_source]{censor_source()}} diff --git a/man/event_source.Rd b/man/event_source.Rd index 52f899fabd..e9db5b48b3 100644 --- a/man/event_source.Rd +++ b/man/event_source.Rd @@ -34,8 +34,10 @@ character string, a numeric value, an expression, or \code{NA}.} \item{order}{Sort order -If the argument is set to a non-null value, for each by group the first or -last observation} +An optional named list returned by \code{exprs()} defining additional variables +that the input dataset is sorted on after \code{date}. + +Persmitted Values: list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} } \value{ An object of class \code{event_source}, inheriting from class \code{tte_source} diff --git a/man/tte_source.Rd b/man/tte_source.Rd index 575566d8e7..28802cc6b1 100644 --- a/man/tte_source.Rd +++ b/man/tte_source.Rd @@ -40,8 +40,10 @@ character string, a numeric value, an expression, or \code{NA}.} \item{order}{Sort order -If the argument is set to a non-null value, for each by group the first or -last observation} +An optional named list returned by \code{exprs()} defining additional variables +that the input dataset is sorted on after \code{date}. + +Persmitted Values: list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} } \value{ An object of class \code{tte_source} diff --git a/tests/testthat/_snaps/derive_param_tte.md b/tests/testthat/_snaps/derive_param_tte.md index f1438eca6c..1e006d1b6a 100644 --- a/tests/testthat/_snaps/derive_param_tte.md +++ b/tests/testthat/_snaps/derive_param_tte.md @@ -61,7 +61,7 @@ i Following names were provided by `source_datasets`: "adsl" i But, `censor_conditions[[1]]$dataset_name = adls` -# list_tte_source_objects Test 14: error is issued if package does not exist +# list_tte_source_objects Test 16: error is issued if package does not exist Code list_tte_source_objects(package = "tte") diff --git a/tests/testthat/test-derive_param_tte.R b/tests/testthat/test-derive_param_tte.R index 118f0e69ee..5a4da6a137 100644 --- a/tests/testthat/test-derive_param_tte.R +++ b/tests/testthat/test-derive_param_tte.R @@ -990,8 +990,68 @@ test_that("derive_param_tte Test 14: detects duplicates in input datasets via pi ) }) -## Test 15: derive_param_tte produces consistent results regardless of input sort order ---- -test_that("derive_param_tte Test 15: produces consistent results regardless of input sort order", { +## Test 15: derive_param_tte resolves ties using order argument when input is sorted descending +test_that("derive_param_tte Test 15: derive_param_tte resolves ties using order argument when +input is sorted descending", { + adsl <- tibble::tribble( + ~USUBJID, ~TRTSDT, ~EOSDT, + "01", ymd("2020-12-06"), ymd("2021-03-06"), + "02", ymd("2021-01-16"), ymd("2021-02-03") + ) %>% + mutate(STUDYID = "AB42") + + # Sort the input AE dataset in descending order by AESEQ + #to confirm that the order argument re-sorts it correctly. + ae <- tibble::tribble( + ~USUBJID, ~AESTDTC, ~AESEQ, ~AESER, ~AEDECOD, + "01", "2021-01-03", 2, "Y", "Cough", + "01", "2021-01-03", 1, "Y", "Flu", + "01", "2021-01-20", 3, "N", "Headache" + ) %>% + mutate( + STUDYID = "AB42", + AESTDT = ymd(AESTDTC) + ) %>% + arrange(desc(AESEQ)) # Intentionally sort descending to test the order argument + + result <- derive_param_tte( + dataset_adsl = adsl, + start_date = TRTSDT, + event_conditions = list(event_source( + dataset_name = "ae", + date = AESTDT, + set_values_to = exprs( + EVENTDESC = "Serious AE", + SRCSEQ = AESEQ + ), + filter = AESER == "Y", + order = exprs(AESTDT, AESEQ) # Should re-sort so that AESEQ=1 (Flu) is chosen on tie + )), + censor_conditions = list(censor_source( + dataset_name = "adsl", + date = EOSDT, + censor = 1, + set_values_to = exprs(EVENTDESC = "End of Study") + )), + set_values_to = exprs( + PARAMCD = "TTSAE", + PARAM = "Time to First Serious AE" + ), + source_datasets = list(adsl = adsl, ae = ae) + ) + + # Check that for USUBJID = "01", the first serious AE selected is the one with AESEQ = 1 (Flu), + # despite the input AE data initially being arranged to show AESEQ=2 (Cough) first. + selected_seq <- result %>% + filter(USUBJID == "01", PARAMCD == "TTSAE") %>% + pull(SRCSEQ) + + expect_equal(selected_seq, 1, info = "The order argument should ensure AE with AESEQ=1 + is chosen on tie.") +}) + +## Test 16: derive_param_tte produces consistent results regardless of input sort order ---- +test_that("derive_param_tte Test 16: produces consistent results regardless of input sort order", { # Define ADSL dataset adsl <- tibble::tribble( ~USUBJID, ~TRTSDT, ~TRTEDT, ~EOSDT, @@ -1059,16 +1119,16 @@ test_that("derive_param_tte Test 15: produces consistent results regardless of i }) # list_tte_source_objects ---- -## Test 16: error is issued if package does not exist ---- -test_that("list_tte_source_objects Test 16: error is issued if package does not exist", { +## Test 17: error is issued if package does not exist ---- +test_that("list_tte_source_objects Test 17: error is issued if package does not exist", { expect_snapshot( list_tte_source_objects(package = "tte"), error = TRUE ) }) -## Test 17: expected objects produced ---- -test_that("list_tte_source_objects Test 17: expected objects produced", { +## Test 18: expected objects produced ---- +test_that("list_tte_source_objects Test 18: expected objects produced", { expected_output <- tibble::tribble( ~object, ~dataset_name, ~filter, "ae_ser_event", "adae", quote(TRTEMFL == "Y" & AESER == "Y"), From 404c94944d32c85b3dc563a353363669e72fdf19 Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Fri, 20 Dec 2024 02:52:51 -0500 Subject: [PATCH 20/23] fixed spelling error --- R/derive_param_tte.R | 2 +- man/censor_source.Rd | 2 +- man/event_source.Rd | 2 +- man/tte_source.Rd | 2 +- tests/testthat/test-derive_param_tte.R | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index eafb6ffcb6..541c083192 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -892,7 +892,7 @@ extend_source_datasets <- function(source_datasets, #' An optional named list returned by `exprs()` defining additional variables #' that the input dataset is sorted on after `date`. #' -#' Persmitted Values: list of variables created by `exprs()` e.g. `exprs(ASEQ)`. +#' Permitted Values: list of variables created by `exprs()` e.g. `exprs(ASEQ)`. #' #' @keywords source_specifications #' @family source_specifications diff --git a/man/censor_source.Rd b/man/censor_source.Rd index 4738cb95e0..bc6ea0365a 100644 --- a/man/censor_source.Rd +++ b/man/censor_source.Rd @@ -43,7 +43,7 @@ character string, a numeric value, an expression, or \code{NA}.} An optional named list returned by \code{exprs()} defining additional variables that the input dataset is sorted on after \code{date}. -Persmitted Values: list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} +Permitted Values: list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} } \value{ An object of class \code{censor_source}, inheriting from class \code{tte_source} diff --git a/man/event_source.Rd b/man/event_source.Rd index e9db5b48b3..c17c6c313e 100644 --- a/man/event_source.Rd +++ b/man/event_source.Rd @@ -37,7 +37,7 @@ character string, a numeric value, an expression, or \code{NA}.} An optional named list returned by \code{exprs()} defining additional variables that the input dataset is sorted on after \code{date}. -Persmitted Values: list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} +Permitted Values: list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} } \value{ An object of class \code{event_source}, inheriting from class \code{tte_source} diff --git a/man/tte_source.Rd b/man/tte_source.Rd index 28802cc6b1..17d52d8969 100644 --- a/man/tte_source.Rd +++ b/man/tte_source.Rd @@ -43,7 +43,7 @@ character string, a numeric value, an expression, or \code{NA}.} An optional named list returned by \code{exprs()} defining additional variables that the input dataset is sorted on after \code{date}. -Persmitted Values: list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} +Permitted Values: list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} } \value{ An object of class \code{tte_source} diff --git a/tests/testthat/test-derive_param_tte.R b/tests/testthat/test-derive_param_tte.R index 5a4da6a137..03547ad754 100644 --- a/tests/testthat/test-derive_param_tte.R +++ b/tests/testthat/test-derive_param_tte.R @@ -1001,7 +1001,7 @@ input is sorted descending", { mutate(STUDYID = "AB42") # Sort the input AE dataset in descending order by AESEQ - #to confirm that the order argument re-sorts it correctly. + # to confirm that the order argument re-sorts it correctly. ae <- tibble::tribble( ~USUBJID, ~AESTDTC, ~AESEQ, ~AESER, ~AEDECOD, "01", "2021-01-03", 2, "Y", "Cough", @@ -1014,7 +1014,7 @@ input is sorted descending", { ) %>% arrange(desc(AESEQ)) # Intentionally sort descending to test the order argument - result <- derive_param_tte( + result <- derive_param_tte( dataset_adsl = adsl, start_date = TRTSDT, event_conditions = list(event_source( From ae70492b8bd79d623c2cc101e3184cc8da52d34d Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Sun, 22 Dec 2024 22:28:32 -0500 Subject: [PATCH 21/23] updates to derive_param_tte documentation and test examples. --- R/derive_param_tte.R | 10 +++++----- man/censor_source.Rd | 4 ++-- man/derive_param_tte.Rd | 4 ++-- man/event_source.Rd | 4 ++-- man/filter_date_sources.Rd | 2 +- man/tte_source.Rd | 4 ++-- tests/testthat/test-derive_param_tte.R | 2 +- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index 541c083192..fadfd6a2f8 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -332,7 +332,7 @@ #' ae <- tribble( #' ~USUBJID, ~AESTDTC, ~AESEQ, ~AESER, ~AEDECOD, #' "01", "2021-01-03", 1, "Y", "Flu", -#' "01", "2021-01-03", 2, "Y", "cough", +#' "01", "2021-01-03", 2, "Y", "Cough", #' "01", "2021-01-20", 3, "N", "Headache", #' ) %>% mutate( #' AESTDT = ymd(AESTDTC), @@ -351,7 +351,7 @@ #' SRCSEQ = AESEQ #' ), #' filter = AESER == "Y", -#' order = exprs(AESTDT, AESEQ) +#' order = exprs(AESEQ) #' )), #' censor_conditions = list(censor_source( #' dataset_name = "adsl", @@ -636,7 +636,7 @@ derive_param_tte <- function(dataset = NULL, #' ) #' ) #' -#' filter_date_sources( +#' admiral:::filter_date_sources( #' sources = list(ttae), #' source_datasets = list(adsl = adsl, ae = ae), #' by_vars = exprs(AEDECOD), @@ -890,9 +890,9 @@ extend_source_datasets <- function(source_datasets, #' @param order Sort order #' #' An optional named list returned by `exprs()` defining additional variables -#' that the input dataset is sorted on after `date`. +#' that the source dataset is sorted on after `date`. #' -#' Permitted Values: list of variables created by `exprs()` e.g. `exprs(ASEQ)`. +#' *Permitted Values:* list of variables created by `exprs()` e.g. `exprs(ASEQ)`. #' #' @keywords source_specifications #' @family source_specifications diff --git a/man/censor_source.Rd b/man/censor_source.Rd index bc6ea0365a..ff887ff87d 100644 --- a/man/censor_source.Rd +++ b/man/censor_source.Rd @@ -41,9 +41,9 @@ character string, a numeric value, an expression, or \code{NA}.} \item{order}{Sort order An optional named list returned by \code{exprs()} defining additional variables -that the input dataset is sorted on after \code{date}. +that the source dataset is sorted on after \code{date}. -Permitted Values: list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} +\emph{Permitted Values:} list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} } \value{ An object of class \code{censor_source}, inheriting from class \code{tte_source} diff --git a/man/derive_param_tte.Rd b/man/derive_param_tte.Rd index c9ad0c7a7b..4821c77f4d 100644 --- a/man/derive_param_tte.Rd +++ b/man/derive_param_tte.Rd @@ -349,7 +349,7 @@ adsl <- tribble( ae <- tribble( ~USUBJID, ~AESTDTC, ~AESEQ, ~AESER, ~AEDECOD, "01", "2021-01-03", 1, "Y", "Flu", - "01", "2021-01-03", 2, "Y", "cough", + "01", "2021-01-03", 2, "Y", "Cough", "01", "2021-01-20", 3, "N", "Headache", ) \%>\% mutate( AESTDT = ymd(AESTDTC), @@ -368,7 +368,7 @@ derive_param_tte( SRCSEQ = AESEQ ), filter = AESER == "Y", - order = exprs(AESTDT, AESEQ) + order = exprs(AESEQ) )), censor_conditions = list(censor_source( dataset_name = "adsl", diff --git a/man/event_source.Rd b/man/event_source.Rd index c17c6c313e..8a721a409e 100644 --- a/man/event_source.Rd +++ b/man/event_source.Rd @@ -35,9 +35,9 @@ character string, a numeric value, an expression, or \code{NA}.} \item{order}{Sort order An optional named list returned by \code{exprs()} defining additional variables -that the input dataset is sorted on after \code{date}. +that the source dataset is sorted on after \code{date}. -Permitted Values: list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} +\emph{Permitted Values:} list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} } \value{ An object of class \code{event_source}, inheriting from class \code{tte_source} diff --git a/man/filter_date_sources.Rd b/man/filter_date_sources.Rd index 7736677e69..e5cd516b31 100644 --- a/man/filter_date_sources.Rd +++ b/man/filter_date_sources.Rd @@ -123,7 +123,7 @@ ttae <- event_source( ) ) -filter_date_sources( +admiral:::filter_date_sources( sources = list(ttae), source_datasets = list(adsl = adsl, ae = ae), by_vars = exprs(AEDECOD), diff --git a/man/tte_source.Rd b/man/tte_source.Rd index 17d52d8969..c2c3b56ff7 100644 --- a/man/tte_source.Rd +++ b/man/tte_source.Rd @@ -41,9 +41,9 @@ character string, a numeric value, an expression, or \code{NA}.} \item{order}{Sort order An optional named list returned by \code{exprs()} defining additional variables -that the input dataset is sorted on after \code{date}. +that the source dataset is sorted on after \code{date}. -Permitted Values: list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} +\emph{Permitted Values:} list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} } \value{ An object of class \code{tte_source} diff --git a/tests/testthat/test-derive_param_tte.R b/tests/testthat/test-derive_param_tte.R index 03547ad754..557b6f7bd0 100644 --- a/tests/testthat/test-derive_param_tte.R +++ b/tests/testthat/test-derive_param_tte.R @@ -1025,7 +1025,7 @@ input is sorted descending", { SRCSEQ = AESEQ ), filter = AESER == "Y", - order = exprs(AESTDT, AESEQ) # Should re-sort so that AESEQ=1 (Flu) is chosen on tie + order = exprs(AESEQ) # Should re-sort so that AESEQ=1 (Flu) is chosen on tie )), censor_conditions = list(censor_source( dataset_name = "adsl", From 34d2fb3ff0c549c12b9397d256f179b540049129 Mon Sep 17 00:00:00 2001 From: Phil Webster <83318967+ProfessorP-beep@users.noreply.github.com> Date: Wed, 8 Jan 2025 09:53:26 -0500 Subject: [PATCH 22/23] Update NEWS.md Co-authored-by: Stefan Bundfuss <80953585+bundfussr@users.noreply.github.com> --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index a840d8a1c0..e2632c5791 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,7 +12,7 @@ - Update `ADEG` template to flag `ABLFL` and `ANL01FL` based on `DTYPE == "AVERAGE"` records. (#2561) ## Updates of Existing Functions -- added `message` as option for `check_type` argument in `filter_extreme()` function. (#2481) +- added `"message"` as option for `check_type` argument in `filter_extreme()` function. (#2481) - Users can now specify how duplicate records are handled in `derive_param_tte()` using the `check_type` argument, with options including `"error"`, `"warning"`, `"message"`, or `"none"`, allowing for greater flexibility in managing duplicate data scenarios. (#2481) From 2a3cf6c96b3bb6566a4aa97bc5c4c55990a29501 Mon Sep 17 00:00:00 2001 From: ProfessorP Date: Thu, 9 Jan 2025 12:24:27 -0500 Subject: [PATCH 23/23] update to derive_param_tte test, function examples, and documentation. --- R/derive_merged.R | 2 +- R/derive_param_tte.R | 74 +++++++++-------------- man/derive_param_tte.Rd | 29 ++++----- man/derive_vars_merged.Rd | 2 +- man/derive_vars_merged_lookup.Rd | 2 +- man/filter_date_sources.Rd | 9 ++- tests/testthat/_snaps/derive_param_tte.md | 20 +++++- tests/testthat/test-derive_param_tte.R | 27 ++++----- 8 files changed, 81 insertions(+), 84 deletions(-) diff --git a/R/derive_merged.R b/R/derive_merged.R index 2201b50c9b..64e166b2d8 100644 --- a/R/derive_merged.R +++ b/R/derive_merged.R @@ -120,7 +120,7 @@ #' if the observations of the (restricted) additional dataset are not unique with respect #' to the by variables, an error is issued. #' -#' *Permitted Values*: `"none"`, `"message"`,`"warning"`, `"error"` +#' *Permitted Values*: `"none"`, `"message"`, `"warning"`, `"error"` #' #' @param duplicate_msg Message of unique check #' diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index fadfd6a2f8..84eee1f5a6 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -63,10 +63,11 @@ #' @param check_type Check uniqueness #' #' If `"warning"`, `"message"`, or `"error"` is specified, the specified message is issued -#' if the observations of the input dataset are not unique with respect to the -#' by variables and the order. +#' if the observations of the source datasets are not unique with respect to the +#' by variables and the date and order specified in the `event_source()` and +#' `censor_source()` objects. #' -#' Default: `"none"` +#' *Permitted Values*: `"none"`, `"message"`, `"warning"`, `"error"` #' #' @details The following steps are performed to create the observations of the #' new parameter: @@ -270,10 +271,10 @@ #' mutate(STUDYID = "AB42") #' #' ae <- tribble( -#' ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, -#' "01", "2021-01-03T10:56", 1, "Flu", -#' "01", "2021-03-04", 2, "Cough", -#' "01", "2021-", 3, "Flu" +#' ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, +#' "01", "2021-01-03T10:56", 1, "Flu", +#' "01", "2021-03-04", 2, "Cough", +#' "01", "2021", 3, "Flu" #' ) %>% #' mutate(STUDYID = "AB42") #' @@ -324,16 +325,16 @@ #' #' # Resolve tie when serious AE share a date by sorting with order argument #' adsl <- tribble( -#' ~USUBJID, ~TRTSDT, ~EOSDT, -#' "01", ymd("2020-12-06"), ymd("2021-03-06"), -#' "02", ymd("2021-01-16"), ymd("2021-02-03") +#' ~USUBJID, ~TRTSDT, ~EOSDT, +#' "01", ymd("2020-12-06"), ymd("2021-03-06"), +#' "02", ymd("2021-01-16"), ymd("2021-02-03") #' ) %>% mutate(STUDYID = "AB42") #' #' ae <- tribble( -#' ~USUBJID, ~AESTDTC, ~AESEQ, ~AESER, ~AEDECOD, -#' "01", "2021-01-03", 1, "Y", "Flu", -#' "01", "2021-01-03", 2, "Y", "Cough", -#' "01", "2021-01-20", 3, "N", "Headache", +#' ~USUBJID, ~AESTDTC, ~AESEQ, ~AESER, ~AEDECOD, +#' "01", "2021-01-03", 1, "Y", "Flu", +#' "01", "2021-01-03", 2, "Y", "Cough", +#' "01", "2021-01-20", 3, "N", "Headache", #' ) %>% mutate( #' AESTDT = ymd(AESTDTC), #' STUDYID = "AB42" @@ -566,14 +567,13 @@ derive_param_tte <- function(dataset = NULL, #' #' Permitted Values: `"first"`, `"last"` #' -#' @param check_type Check uniqueness +#' @param check_type Check uniqueness #' #' If `"warning"`, `"message"`, or `"error"` is specified, the specified message is issued -#' if the observations of the input dataset are not unique with respect to the -#' by variables and the order. +#' if the observations of the source datasets are not unique with respect to the +#' by variables and the date and order specified in the `tte_source()` objects. #' #' Default: `"none"` -#' #' Permitted Values: `"none"`, `"warning"`, `"error"`, `"message"` #' #' @details The following steps are performed to create the output dataset: @@ -698,34 +698,16 @@ filter_date_sources <- function(sources, check_type = check_type ) }, - warning = function(cnd) { - # Handle warnings - if (grepl("duplicate records", conditionMessage(cnd))) { - cli::cli_warn(c( - "Dataset '{.val {sources[[i]]$dataset_name}}' contains duplicate records.", - "i Duplicates were identified based on variables: - {.val {paste(c(subject_keys, by_vars, source_date_var), collapse = ', ')}}." - )) - } - source_dataset %>% - filter_if(sources[[i]]$filter) %>% - arrange(!!!sources[[i]]$order) # Return filtered dataset even if a warning occurred - }, - error = function(err) { - cli::cli_abort(c( - "Duplicate records detected during processing.", - "x Duplicate records were found in dataset {.val {sources[[i]]$dataset_name}}.", - "i The duplicates were identified based on the following variables: - {.val {paste(c(subject_keys, by_vars, source_date_var), collapse = ', ')}}.", - "i Consider reviewing your `by_vars` or `order` argument to ensure uniqueness." - )) - }, - message = function(msg) { - cli::cli_inform(c( - "Processing dataset '{.val {sources[[i]]$dataset_name}}'...", - "i Filter and order criteria: {.val {paste(c(subject_keys, by_vars, - sources[[i]]$order), collapse = ', ')}}." - )) + duplicate_records = function(cnd) { + cnd_funs <- list(message = cli_inform, warning = cli_warn, error = cli_abort) + cnd_funs[[check_type]]( + paste( + "Dataset {.val {sources[[i]]$dataset_name}} contains duplicate records with respect to", + "{.var {cnd$by_vars}}" + ), + class = class(cnd)) + cnd_muffle(cnd) + zap() } ) # add date variable and accompanying variables diff --git a/man/derive_param_tte.Rd b/man/derive_param_tte.Rd index 4821c77f4d..ed312f5244 100644 --- a/man/derive_param_tte.Rd +++ b/man/derive_param_tte.Rd @@ -81,10 +81,11 @@ A list of symbols created using \code{exprs()} is expected.} \item{check_type}{Check uniqueness If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued -if the observations of the input dataset are not unique with respect to the -by variables and the order. +if the observations of the source datasets are not unique with respect to the +by variables and the date and order specified in the \code{event_source()} and +\code{censor_source()} objects. -Default: \code{"none"}} +\emph{Permitted Values}: \code{"none"}, \code{"message"}, \code{"warning"}, \code{"error"}} } \value{ The input dataset with the new parameter added @@ -287,10 +288,10 @@ adsl <- tribble( mutate(STUDYID = "AB42") ae <- tribble( - ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, - "01", "2021-01-03T10:56", 1, "Flu", - "01", "2021-03-04", 2, "Cough", - "01", "2021-", 3, "Flu" + ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, + "01", "2021-01-03T10:56", 1, "Flu", + "01", "2021-03-04", 2, "Cough", + "01", "2021", 3, "Flu" ) \%>\% mutate(STUDYID = "AB42") @@ -341,16 +342,16 @@ derive_param_tte( # Resolve tie when serious AE share a date by sorting with order argument adsl <- tribble( - ~USUBJID, ~TRTSDT, ~EOSDT, - "01", ymd("2020-12-06"), ymd("2021-03-06"), - "02", ymd("2021-01-16"), ymd("2021-02-03") + ~USUBJID, ~TRTSDT, ~EOSDT, + "01", ymd("2020-12-06"), ymd("2021-03-06"), + "02", ymd("2021-01-16"), ymd("2021-02-03") ) \%>\% mutate(STUDYID = "AB42") ae <- tribble( - ~USUBJID, ~AESTDTC, ~AESEQ, ~AESER, ~AEDECOD, - "01", "2021-01-03", 1, "Y", "Flu", - "01", "2021-01-03", 2, "Y", "Cough", - "01", "2021-01-20", 3, "N", "Headache", + ~USUBJID, ~AESTDTC, ~AESEQ, ~AESER, ~AEDECOD, + "01", "2021-01-03", 1, "Y", "Flu", + "01", "2021-01-03", 2, "Y", "Cough", + "01", "2021-01-20", 3, "N", "Headache", ) \%>\% mutate( AESTDT = ymd(AESTDTC), STUDYID = "AB42" diff --git a/man/derive_vars_merged.Rd b/man/derive_vars_merged.Rd index 3753a4eec6..d9d2db5061 100644 --- a/man/derive_vars_merged.Rd +++ b/man/derive_vars_merged.Rd @@ -141,7 +141,7 @@ If the \code{order} argument is not specified, the \code{check_type} argument is if the observations of the (restricted) additional dataset are not unique with respect to the by variables, an error is issued. -\emph{Permitted Values}: \code{"none"}, \code{"message"},\code{"warning"}, \code{"error"}} +\emph{Permitted Values}: \code{"none"}, \code{"message"}, \code{"warning"}, \code{"error"}} \item{duplicate_msg}{Message of unique check diff --git a/man/derive_vars_merged_lookup.Rd b/man/derive_vars_merged_lookup.Rd index 9407582095..3d2b503dd5 100644 --- a/man/derive_vars_merged_lookup.Rd +++ b/man/derive_vars_merged_lookup.Rd @@ -102,7 +102,7 @@ If the \code{order} argument is not specified, the \code{check_type} argument is if the observations of the (restricted) additional dataset are not unique with respect to the by variables, an error is issued. -\emph{Permitted Values}: \code{"none"}, \code{"message"},\code{"warning"}, \code{"error"}} +\emph{Permitted Values}: \code{"none"}, \code{"message"}, \code{"warning"}, \code{"error"}} \item{duplicate_msg}{Message of unique check diff --git a/man/filter_date_sources.Rd b/man/filter_date_sources.Rd index e5cd516b31..d411ec9e1e 100644 --- a/man/filter_date_sources.Rd +++ b/man/filter_date_sources.Rd @@ -47,16 +47,15 @@ If \code{"first"} is specified, for each subject the first observation with respect to the date is included in the output dataset. If \code{"last"} is specified, the last observation is included in the output dataset. -Permitted Values: \code{"first"}, \code{"last"} +Permitted Values: \code{"first"}, \code{"last"}} -@param check_type Check uniqueness +\item{check_type}{Check uniqueness If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued -if the observations of the input dataset are not unique with respect to the -by variables and the order. +if the observations of the source datasets are not unique with respect to the +by variables and the date and order specified in the \code{tte_source()} objects. Default: \code{"none"} - Permitted Values: \code{"none"}, \code{"warning"}, \code{"error"}, \code{"message"}} } \value{ diff --git a/tests/testthat/_snaps/derive_param_tte.md b/tests/testthat/_snaps/derive_param_tte.md index 1e006d1b6a..21b0a8c31c 100644 --- a/tests/testthat/_snaps/derive_param_tte.md +++ b/tests/testthat/_snaps/derive_param_tte.md @@ -61,7 +61,25 @@ i Following names were provided by `source_datasets`: "adsl" i But, `censor_conditions[[1]]$dataset_name = adls` -# list_tte_source_objects Test 16: error is issued if package does not exist +# derive_param_tte Test 14: detects duplicates in input datasets via pipeline functions + + Code + derive_param_tte(dataset_adsl = adsl, start_date = TRTSDT, event_conditions = list( + ttae), censor_conditions = list(eot), source_datasets = list(adsl = adsl, ae = ae), + set_values_to = exprs(PARAMCD = "TTAE"), check_type = "warning") + Condition + Warning: + Dataset contains duplicate records with respect to `STUDYID`, `USUBJID`, and `AESTDT` + i Run `admiral::get_duplicates_dataset()` to access the duplicate records + Output + # A tibble: 2 x 10 + USUBJID STUDYID EVENTDESC SRCDOM SRCVAR SRCSEQ CNSR ADT STARTDT + + 1 01 AB42 AE AE AESTDTC 1 0 2021-01-03 2020-12-06 + 2 02 AB42 END OF TRT ADSL TRTEDT NA 1 2021-01-30 2021-01-16 + # i 1 more variable: PARAMCD + +# list_tte_source_objects Test 17: error is issued if package does not exist Code list_tte_source_objects(package = "tte") diff --git a/tests/testthat/test-derive_param_tte.R b/tests/testthat/test-derive_param_tte.R index 557b6f7bd0..39cf295c5f 100644 --- a/tests/testthat/test-derive_param_tte.R +++ b/tests/testthat/test-derive_param_tte.R @@ -976,7 +976,7 @@ test_that("derive_param_tte Test 14: detects duplicates in input datasets via pi ) # Run derive_param_tte and check for warning - expect_warning( + expect_snapshot( derive_param_tte( dataset_adsl = adsl, start_date = TRTSDT, @@ -985,8 +985,7 @@ test_that("derive_param_tte Test 14: detects duplicates in input datasets via pi source_datasets = list(adsl = adsl, ae = ae), set_values_to = exprs(PARAMCD = "TTAE"), check_type = "warning" - ), - regexp = "Dataset '.*' contains duplicate records." + ) ) }) @@ -1003,10 +1002,10 @@ input is sorted descending", { # Sort the input AE dataset in descending order by AESEQ # to confirm that the order argument re-sorts it correctly. ae <- tibble::tribble( - ~USUBJID, ~AESTDTC, ~AESEQ, ~AESER, ~AEDECOD, - "01", "2021-01-03", 2, "Y", "Cough", - "01", "2021-01-03", 1, "Y", "Flu", - "01", "2021-01-20", 3, "N", "Headache" + ~USUBJID, ~AESTDTC, ~AESEQ, ~AESER, ~AEDECOD, + "01", "2021-01-03", 2, "Y", "Cough", + "01", "2021-01-03", 1, "Y", "Flu", + "01", "2021-01-20", 3, "N", "Headache" ) %>% mutate( STUDYID = "AB42", @@ -1061,19 +1060,17 @@ test_that("derive_param_tte Test 16: produces consistent results regardless of i # Define AE dataset with duplicates ae <- tibble::tribble( - ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, - "01", "2021-01-03", 1, "Flu", - "01", "2021-03-04", 2, "Cough", - "01", "2021-01-03", 3, "Flu" + ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, + "01", "2021-01-03", 1, "Flu", + "01", "2021-03-04", 2, "Cough", + "01", "2021-01-03", 3, "Flu" ) %>% mutate(STUDYID = "AB42", AESTDT = ymd(AESTDTC)) - # Deduplicate AE dataset to remove duplicate warnings - ae <- ae %>% distinct(STUDYID, USUBJID, AESTDT, .keep_all = TRUE) - - # Define event and censor sources + # Define event and censor sources ttae <- event_source( dataset_name = "ae", date = AESTDT, + order = exprs(AESTDT, AESEQ), set_values_to = exprs( EVENTDESC = "AE", SRCDOM = "AE",