diff --git a/NEWS.md b/NEWS.md index ee60e7806f..5a7785f643 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,6 +14,12 @@ target range. (#2571) - Update `ADEG` template to flag `ABLFL` and `ANL01FL` based on `DTYPE == "AVERAGE"` records. (#2561) ## Updates of Existing Functions +- added `"message"` as option for `check_type` argument in `filter_extreme()` function. (#2481) + +- Users can now specify how duplicate records are handled in `derive_param_tte()` using the `check_type` argument, with options including `"error"`, `"warning"`, `"message"`, or `"none"`, allowing for greater flexibility in managing duplicate data scenarios. (#2481) + +- `order` argument has been added to `event_source()` and `censor_source()` and + defaulted to `NULL` to allow specifying variables in addition to the date variable. This can be used to ensure the uniqueness of the select records if there is more than one record per date. (#2481) - NCICTCAEv5 grading criteria (`atoxgr_criteria_ctcv5`): diff --git a/R/derive_merged.R b/R/derive_merged.R index d32e8089f0..64e166b2d8 100644 --- a/R/derive_merged.R +++ b/R/derive_merged.R @@ -112,15 +112,15 @@ #' #' @param check_type Check uniqueness? #' -#' If `"warning"` or `"error"` is specified, the specified message is issued +#' If `"warning"`, `"message"`, or `"error"` is specified, the specified message is issued #' if the observations of the (restricted) additional dataset are not unique #' with respect to the by variables and the order. #' #' If the `order` argument is not specified, the `check_type` argument is ignored: -#' if the observations of the (restricted) additional dataset are not unique with respect -#' to the by variables, an error is issued. +#' if the observations of the (restricted) additional dataset are not unique with respect +#' to the by variables, an error is issued. #' -#' *Permitted Values*: `"none"`, `"warning"`, `"error"` +#' *Permitted Values*: `"none"`, `"message"`, `"warning"`, `"error"` #' #' @param duplicate_msg Message of unique check #' diff --git a/R/derive_param_tte.R b/R/derive_param_tte.R index daa9f9af4d..84eee1f5a6 100644 --- a/R/derive_param_tte.R +++ b/R/derive_param_tte.R @@ -60,6 +60,15 @@ #' #' A list of symbols created using `exprs()` is expected. #' +#' @param check_type Check uniqueness +#' +#' If `"warning"`, `"message"`, or `"error"` is specified, the specified message is issued +#' if the observations of the source datasets are not unique with respect to the +#' by variables and the date and order specified in the `event_source()` and +#' `censor_source()` objects. +#' +#' *Permitted Values*: `"none"`, `"message"`, `"warning"`, `"error"` +#' #' @details The following steps are performed to create the observations of the #' new parameter: #' @@ -262,10 +271,10 @@ #' mutate(STUDYID = "AB42") #' #' ae <- tribble( -#' ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, -#' "01", "2021-01-03T10:56", 1, "Flu", -#' "01", "2021-03-04", 2, "Cough", -#' "01", "2021", 3, "Flu" +#' ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, +#' "01", "2021-01-03T10:56", 1, "Flu", +#' "01", "2021-03-04", 2, "Cough", +#' "01", "2021", 3, "Flu" #' ) %>% #' mutate(STUDYID = "AB42") #' @@ -313,6 +322,50 @@ #' ) #' ) %>% #' select(USUBJID, STARTDT, PARAMCD, PARAM, ADT, CNSR, SRCSEQ) +#' +#' # Resolve tie when serious AE share a date by sorting with order argument +#' adsl <- tribble( +#' ~USUBJID, ~TRTSDT, ~EOSDT, +#' "01", ymd("2020-12-06"), ymd("2021-03-06"), +#' "02", ymd("2021-01-16"), ymd("2021-02-03") +#' ) %>% mutate(STUDYID = "AB42") +#' +#' ae <- tribble( +#' ~USUBJID, ~AESTDTC, ~AESEQ, ~AESER, ~AEDECOD, +#' "01", "2021-01-03", 1, "Y", "Flu", +#' "01", "2021-01-03", 2, "Y", "Cough", +#' "01", "2021-01-20", 3, "N", "Headache", +#' ) %>% mutate( +#' AESTDT = ymd(AESTDTC), +#' STUDYID = "AB42" +#' ) +#' +#' derive_param_tte( +#' dataset_adsl = adsl, +#' start_date = TRTSDT, +#' source_datasets = list(adsl = adsl, ae = ae), +#' event_conditions = list(event_source( +#' dataset_name = "ae", +#' date = AESTDT, +#' set_values_to = exprs( +#' EVENTDESC = "Serious AE", +#' SRCSEQ = AESEQ +#' ), +#' filter = AESER == "Y", +#' order = exprs(AESEQ) +#' )), +#' censor_conditions = list(censor_source( +#' dataset_name = "adsl", +#' date = EOSDT, +#' censor = 1, +#' set_values_to = exprs(EVENTDESC = "End of Study") +#' )), +#' set_values_to = exprs( +#' PARAMCD = "TTSAE", +#' PARAM = "Time to First Serious AE" +#' ) +#' ) +#' derive_param_tte <- function(dataset = NULL, dataset_adsl, source_datasets, @@ -322,8 +375,14 @@ derive_param_tte <- function(dataset = NULL, censor_conditions, create_datetime = FALSE, set_values_to, - subject_keys = get_admiral_option("subject_keys")) { + subject_keys = get_admiral_option("subject_keys"), + check_type = "warning") { # checking and quoting # + check_type <- assert_character_scalar( + check_type, + values = c("warning", "message", "error", "none"), + case_sensitive = FALSE + ) assert_data_frame(dataset, optional = TRUE) assert_vars(by_vars, optional = TRUE) start_date <- assert_symbol(enexpr(start_date)) @@ -373,8 +432,8 @@ derive_param_tte <- function(dataset = NULL, by_vars = by_vars ) } - tmp_event <- get_new_tmp_var(dataset) + # determine events # event_data <- filter_date_sources( sources = event_conditions, @@ -382,7 +441,8 @@ derive_param_tte <- function(dataset = NULL, by_vars = by_vars, create_datetime = create_datetime, subject_keys = subject_keys, - mode = "first" + mode = "first", + check_type = check_type ) %>% mutate(!!tmp_event := 1L) @@ -393,7 +453,8 @@ derive_param_tte <- function(dataset = NULL, by_vars = by_vars, create_datetime = create_datetime, subject_keys = subject_keys, - mode = "last" + mode = "last", + check_type = check_type ) %>% mutate(!!tmp_event := 0L) @@ -436,7 +497,8 @@ derive_param_tte <- function(dataset = NULL, bind_rows(event_data, censor_data), by_vars = expr_c(subject_keys, by_vars), order = exprs(!!tmp_event), - mode = "last" + mode = "last", + check_type = check_type ) %>% inner_join( adsl, @@ -505,6 +567,15 @@ derive_param_tte <- function(dataset = NULL, #' #' Permitted Values: `"first"`, `"last"` #' +#' @param check_type Check uniqueness +#' +#' If `"warning"`, `"message"`, or `"error"` is specified, the specified message is issued +#' if the observations of the source datasets are not unique with respect to the +#' by variables and the date and order specified in the `tte_source()` objects. +#' +#' Default: `"none"` +#' Permitted Values: `"none"`, `"warning"`, `"error"`, `"message"` +#' #' @details The following steps are performed to create the output dataset: #' #' \enumerate{ \item For each source dataset the observations as specified by @@ -529,7 +600,7 @@ derive_param_tte <- function(dataset = NULL, #' @return A dataset with one observation per subject as described in the #' "Details" section. #' -#' @noRd +#' @keywords internal #' #' @examples #' library(tibble) @@ -565,20 +636,22 @@ derive_param_tte <- function(dataset = NULL, #' ) #' ) #' -#' filter_date_sources( +#' admiral:::filter_date_sources( #' sources = list(ttae), #' source_datasets = list(adsl = adsl, ae = ae), #' by_vars = exprs(AEDECOD), #' create_datetime = FALSE, #' subject_keys = get_admiral_option("subject_keys"), -#' mode = "first" +#' mode = "first", +#' check_type = "none" #' ) filter_date_sources <- function(sources, source_datasets, by_vars, create_datetime = FALSE, subject_keys, - mode) { + mode, + check_type = "none") { assert_list_of(sources, "tte_source") assert_list_of(source_datasets, "data.frame") assert_logical_scalar(create_datetime) @@ -613,17 +686,31 @@ filter_date_sources <- function(sources, var = !!source_date_var, dataset_name = sources[[i]]$dataset_name ) - data[[i]] <- source_dataset %>% - filter_if(sources[[i]]$filter) %>% - filter_extreme( - order = exprs(!!source_date_var), - by_vars = expr_c(subject_keys, by_vars), - mode = mode, - check_type = "none" - ) - + # wrap filter_extreme in tryCatch to catch duplicate records and create a message + data[[i]] <- rlang::try_fetch( + { + source_dataset %>% + filter_if(sources[[i]]$filter) %>% + filter_extreme( + order = expr_c(exprs(!!source_date_var), sources[[i]]$order), + by_vars = expr_c(subject_keys, by_vars), + mode = mode, + check_type = check_type + ) + }, + duplicate_records = function(cnd) { + cnd_funs <- list(message = cli_inform, warning = cli_warn, error = cli_abort) + cnd_funs[[check_type]]( + paste( + "Dataset {.val {sources[[i]]$dataset_name}} contains duplicate records with respect to", + "{.var {cnd$by_vars}}" + ), + class = class(cnd)) + cnd_muffle(cnd) + zap() + } + ) # add date variable and accompanying variables - if (create_datetime) { date_derv <- exprs(!!date_var := as_datetime(!!source_date_var)) } else { @@ -649,7 +736,7 @@ filter_date_sources <- function(sources, by_vars = expr_c(subject_keys, by_vars), order = exprs(!!date_var), mode = mode, - check_type = "none" + check_type = check_type ) } @@ -782,6 +869,12 @@ extend_source_datasets <- function(source_datasets, #' SRCDOM = "ADSL", SRCVAR = "DTHDT")`. The values must be a symbol, a #' character string, a numeric value, an expression, or `NA`. #' +#' @param order Sort order +#' +#' An optional named list returned by `exprs()` defining additional variables +#' that the source dataset is sorted on after `date`. +#' +#' *Permitted Values:* list of variables created by `exprs()` e.g. `exprs(ASEQ)`. #' #' @keywords source_specifications #' @family source_specifications @@ -793,7 +886,8 @@ tte_source <- function(dataset_name, filter = NULL, date, censor = 0, - set_values_to = NULL) { + set_values_to = NULL, + order = order) { out <- list( dataset_name = assert_character_scalar(dataset_name), filter = assert_filter_cond(enexpr(filter), optional = TRUE), @@ -803,7 +897,8 @@ tte_source <- function(dataset_name, set_values_to, named = TRUE, optional = TRUE - ) + ), + order = order ) class(out) <- c("tte_source", "source", "list") out @@ -844,13 +939,15 @@ tte_source <- function(dataset_name, event_source <- function(dataset_name, filter = NULL, date, - set_values_to = NULL) { + set_values_to = NULL, + order = NULL) { out <- tte_source( dataset_name = assert_character_scalar(dataset_name), filter = !!enexpr(filter), date = !!assert_expr(enexpr(date)), censor = 0, - set_values_to = set_values_to + set_values_to = set_values_to, + order = order ) class(out) <- c("event_source", class(out)) out @@ -891,13 +988,15 @@ censor_source <- function(dataset_name, filter = NULL, date, censor = 1, - set_values_to = NULL) { + set_values_to = NULL, + order = NULL) { out <- tte_source( dataset_name = assert_character_scalar(dataset_name), filter = !!enexpr(filter), date = !!assert_expr(enexpr(date)), censor = assert_integer_scalar(censor, subset = "positive"), - set_values_to = set_values_to + set_values_to = set_values_to, + order = order ) class(out) <- c("censor_source", class(out)) out diff --git a/R/filter_extreme.R b/R/filter_extreme.R index 63bf63838d..7b53645c1f 100644 --- a/R/filter_extreme.R +++ b/R/filter_extreme.R @@ -106,7 +106,7 @@ filter_extreme <- function(dataset, check_type <- assert_character_scalar( check_type, - values = c("none", "warning", "error"), + values = c("none", "warning", "error", "message"), case_sensitive = FALSE ) assert_data_frame(dataset, required_vars = by_vars) diff --git a/inst/WORDLIST b/inst/WORDLIST index c91f003639..5b8d21b936 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -312,6 +312,7 @@ msec nd occds onwards +param parttime pharmaverse pharmaverseadam diff --git a/man/censor_source.Rd b/man/censor_source.Rd index 263ac16d6a..ff887ff87d 100644 --- a/man/censor_source.Rd +++ b/man/censor_source.Rd @@ -9,7 +9,8 @@ censor_source( filter = NULL, date, censor = 1, - set_values_to = NULL + set_values_to = NULL, + order = NULL ) } \arguments{ @@ -36,6 +37,13 @@ censoring.} \item{set_values_to}{A named list returned by \code{exprs()} defining the variables to be set for the event or censoring, e.g. \code{exprs(EVENTDESC = "DEATH", SRCDOM = "ADSL", SRCVAR = "DTHDT")}. The values must be a symbol, a character string, a numeric value, an expression, or \code{NA}.} + +\item{order}{Sort order + +An optional named list returned by \code{exprs()} defining additional variables +that the source dataset is sorted on after \code{date}. + +\emph{Permitted Values:} list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} } \value{ An object of class \code{censor_source}, inheriting from class \code{tte_source} diff --git a/man/derive_param_tte.Rd b/man/derive_param_tte.Rd index 7dadeae6f8..ed312f5244 100644 --- a/man/derive_param_tte.Rd +++ b/man/derive_param_tte.Rd @@ -14,7 +14,8 @@ derive_param_tte( censor_conditions, create_datetime = FALSE, set_values_to, - subject_keys = get_admiral_option("subject_keys") + subject_keys = get_admiral_option("subject_keys"), + check_type = "warning" ) } \arguments{ @@ -76,6 +77,15 @@ expressions, or \code{NA}.} \item{subject_keys}{Variables to uniquely identify a subject A list of symbols created using \code{exprs()} is expected.} + +\item{check_type}{Check uniqueness + +If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued +if the observations of the source datasets are not unique with respect to the +by variables and the date and order specified in the \code{event_source()} and +\code{censor_source()} objects. + +\emph{Permitted Values}: \code{"none"}, \code{"message"}, \code{"warning"}, \code{"error"}} } \value{ The input dataset with the new parameter added @@ -278,10 +288,10 @@ adsl <- tribble( mutate(STUDYID = "AB42") ae <- tribble( - ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, - "01", "2021-01-03T10:56", 1, "Flu", - "01", "2021-03-04", 2, "Cough", - "01", "2021", 3, "Flu" + ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, + "01", "2021-01-03T10:56", 1, "Flu", + "01", "2021-03-04", 2, "Cough", + "01", "2021", 3, "Flu" ) \%>\% mutate(STUDYID = "AB42") @@ -329,6 +339,50 @@ derive_param_tte( ) ) \%>\% select(USUBJID, STARTDT, PARAMCD, PARAM, ADT, CNSR, SRCSEQ) + +# Resolve tie when serious AE share a date by sorting with order argument +adsl <- tribble( + ~USUBJID, ~TRTSDT, ~EOSDT, + "01", ymd("2020-12-06"), ymd("2021-03-06"), + "02", ymd("2021-01-16"), ymd("2021-02-03") +) \%>\% mutate(STUDYID = "AB42") + +ae <- tribble( + ~USUBJID, ~AESTDTC, ~AESEQ, ~AESER, ~AEDECOD, + "01", "2021-01-03", 1, "Y", "Flu", + "01", "2021-01-03", 2, "Y", "Cough", + "01", "2021-01-20", 3, "N", "Headache", +) \%>\% mutate( + AESTDT = ymd(AESTDTC), + STUDYID = "AB42" +) + +derive_param_tte( + dataset_adsl = adsl, + start_date = TRTSDT, + source_datasets = list(adsl = adsl, ae = ae), + event_conditions = list(event_source( + dataset_name = "ae", + date = AESTDT, + set_values_to = exprs( + EVENTDESC = "Serious AE", + SRCSEQ = AESEQ + ), + filter = AESER == "Y", + order = exprs(AESEQ) + )), + censor_conditions = list(censor_source( + dataset_name = "adsl", + date = EOSDT, + censor = 1, + set_values_to = exprs(EVENTDESC = "End of Study") + )), + set_values_to = exprs( + PARAMCD = "TTSAE", + PARAM = "Time to First Serious AE" + ) +) + } \seealso{ \code{\link[=event_source]{event_source()}}, \code{\link[=censor_source]{censor_source()}} diff --git a/man/derive_vars_merged.Rd b/man/derive_vars_merged.Rd index 700612a4fc..d9d2db5061 100644 --- a/man/derive_vars_merged.Rd +++ b/man/derive_vars_merged.Rd @@ -133,7 +133,7 @@ specified for \code{new_vars} can be specified for \code{missing_values}. \item{check_type}{Check uniqueness? -If \code{"warning"} or \code{"error"} is specified, the specified message is issued +If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued if the observations of the (restricted) additional dataset are not unique with respect to the by variables and the order. @@ -141,7 +141,7 @@ If the \code{order} argument is not specified, the \code{check_type} argument is if the observations of the (restricted) additional dataset are not unique with respect to the by variables, an error is issued. -\emph{Permitted Values}: \code{"none"}, \code{"warning"}, \code{"error"}} +\emph{Permitted Values}: \code{"none"}, \code{"message"}, \code{"warning"}, \code{"error"}} \item{duplicate_msg}{Message of unique check diff --git a/man/derive_vars_merged_lookup.Rd b/man/derive_vars_merged_lookup.Rd index 6f057576b5..3d2b503dd5 100644 --- a/man/derive_vars_merged_lookup.Rd +++ b/man/derive_vars_merged_lookup.Rd @@ -94,7 +94,7 @@ condition. \item{check_type}{Check uniqueness? -If \code{"warning"} or \code{"error"} is specified, the specified message is issued +If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued if the observations of the (restricted) additional dataset are not unique with respect to the by variables and the order. @@ -102,7 +102,7 @@ If the \code{order} argument is not specified, the \code{check_type} argument is if the observations of the (restricted) additional dataset are not unique with respect to the by variables, an error is issued. -\emph{Permitted Values}: \code{"none"}, \code{"warning"}, \code{"error"}} +\emph{Permitted Values}: \code{"none"}, \code{"message"}, \code{"warning"}, \code{"error"}} \item{duplicate_msg}{Message of unique check diff --git a/man/event_source.Rd b/man/event_source.Rd index 03e7b4913c..8a721a409e 100644 --- a/man/event_source.Rd +++ b/man/event_source.Rd @@ -4,7 +4,13 @@ \alias{event_source} \title{Create an \code{event_source} Object} \usage{ -event_source(dataset_name, filter = NULL, date, set_values_to = NULL) +event_source( + dataset_name, + filter = NULL, + date, + set_values_to = NULL, + order = NULL +) } \arguments{ \item{dataset_name}{The name of the source dataset @@ -25,6 +31,13 @@ date from a date character vector to a date object.} \item{set_values_to}{A named list returned by \code{exprs()} defining the variables to be set for the event or censoring, e.g. \code{exprs(EVENTDESC = "DEATH", SRCDOM = "ADSL", SRCVAR = "DTHDT")}. The values must be a symbol, a character string, a numeric value, an expression, or \code{NA}.} + +\item{order}{Sort order + +An optional named list returned by \code{exprs()} defining additional variables +that the source dataset is sorted on after \code{date}. + +\emph{Permitted Values:} list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} } \value{ An object of class \code{event_source}, inheriting from class \code{tte_source} diff --git a/man/filter_date_sources.Rd b/man/filter_date_sources.Rd new file mode 100644 index 0000000000..d411ec9e1e --- /dev/null +++ b/man/filter_date_sources.Rd @@ -0,0 +1,135 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/derive_param_tte.R +\name{filter_date_sources} +\alias{filter_date_sources} +\title{Select the First or Last Date from Several Sources} +\usage{ +filter_date_sources( + sources, + source_datasets, + by_vars, + create_datetime = FALSE, + subject_keys, + mode, + check_type = "none" +) +} +\arguments{ +\item{sources}{Sources + +A list of \code{tte_source()} objects is expected.} + +\item{source_datasets}{Source datasets + +A named list of datasets is expected. The \code{dataset_name} field of +\code{tte_source()} refers to the dataset provided in the list.} + +\item{by_vars}{By variables + +If the parameter is specified, for each by group the observations are +selected separately. + +\emph{Permitted Values}: list of variables created by \code{exprs()} +e.g. \code{exprs(USUBJID, VISIT)}} + +\item{create_datetime}{Create datetime variable? + +If set to \code{TRUE}, variables \code{ADTM} is created. Otherwise, variables \code{ADT} +is created.} + +\item{subject_keys}{Variables to uniquely identify a subject + +A list of symbols created using \code{exprs()} is expected.} + +\item{mode}{Selection mode (first or last) + +If \code{"first"} is specified, for each subject the first observation with +respect to the date is included in the output dataset. If \code{"last"} is +specified, the last observation is included in the output dataset. + +Permitted Values: \code{"first"}, \code{"last"}} + +\item{check_type}{Check uniqueness + +If \code{"warning"}, \code{"message"}, or \code{"error"} is specified, the specified message is issued +if the observations of the source datasets are not unique with respect to the +by variables and the date and order specified in the \code{tte_source()} objects. + +Default: \code{"none"} +Permitted Values: \code{"none"}, \code{"warning"}, \code{"error"}, \code{"message"}} +} +\value{ +A dataset with one observation per subject as described in the +"Details" section. +} +\description{ +Select for each subject the first or last observation with respect to a date +from a list of sources. +} +\details{ +The following steps are performed to create the output dataset: + +\enumerate{ \item For each source dataset the observations as specified by +the \code{filter} element are selected. Then for each patient the first or last +observation (with respect to \code{date}) is selected. + +\item The \code{ADT} variable is set to the variable specified by the +\code{date} element. If the date variable is a datetime variable, only +the datepart is copied. If the source variable is a character variable, it +is converted to a date. If the date is incomplete, it is imputed as +the first possible date. + +\item The \code{CNSR} is added and set to the value of the \code{censor} +element. + +\item The selected observations of all source datasets are combined into a +single dataset. + +\item For each patient the first or last observation (with respect to the +\code{ADT} variable) from the single dataset is selected. } +} +\examples{ +library(tibble) +library(dplyr, warn.conflicts = FALSE) +library(lubridate) + +adsl <- tribble( + ~USUBJID, ~TRTSDT, ~EOSDT, + "01", ymd("2020-12-06"), ymd("2021-03-06"), + "02", ymd("2021-01-16"), ymd("2021-02-03") +) \%>\% + mutate(STUDYID = "AB42") + +ae <- tribble( + ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, + "01", "2021-01-03", 1, "Flu", + "01", "2021-03-04", 2, "Cough", + "01", "2021-01-01", 3, "Flu" +) \%>\% + mutate( + STUDYID = "AB42", + AESTDT = ymd(AESTDTC) + ) + +ttae <- event_source( + dataset_name = "ae", + date = AESTDT, + set_values_to = exprs( + EVNTDESC = "AE", + SRCDOM = "AE", + SRCVAR = "AESTDTC", + SRCSEQ = AESEQ + ) +) + +admiral:::filter_date_sources( + sources = list(ttae), + source_datasets = list(adsl = adsl, ae = ae), + by_vars = exprs(AEDECOD), + create_datetime = FALSE, + subject_keys = get_admiral_option("subject_keys"), + mode = "first", + check_type = "none" +) +} +\keyword{internal} diff --git a/man/tte_source.Rd b/man/tte_source.Rd index f63a36d51d..c2c3b56ff7 100644 --- a/man/tte_source.Rd +++ b/man/tte_source.Rd @@ -4,7 +4,14 @@ \alias{tte_source} \title{Create a \code{tte_source} Object} \usage{ -tte_source(dataset_name, filter = NULL, date, censor = 0, set_values_to = NULL) +tte_source( + dataset_name, + filter = NULL, + date, + censor = 0, + set_values_to = NULL, + order = order +) } \arguments{ \item{dataset_name}{The name of the source dataset @@ -30,6 +37,13 @@ censoring.} \item{set_values_to}{A named list returned by \code{exprs()} defining the variables to be set for the event or censoring, e.g. \code{exprs(EVENTDESC = "DEATH", SRCDOM = "ADSL", SRCVAR = "DTHDT")}. The values must be a symbol, a character string, a numeric value, an expression, or \code{NA}.} + +\item{order}{Sort order + +An optional named list returned by \code{exprs()} defining additional variables +that the source dataset is sorted on after \code{date}. + +\emph{Permitted Values:} list of variables created by \code{exprs()} e.g. \code{exprs(ASEQ)}.} } \value{ An object of class \code{tte_source} diff --git a/tests/testthat/_snaps/derive_param_tte.md b/tests/testthat/_snaps/derive_param_tte.md index f1438eca6c..21b0a8c31c 100644 --- a/tests/testthat/_snaps/derive_param_tte.md +++ b/tests/testthat/_snaps/derive_param_tte.md @@ -61,7 +61,25 @@ i Following names were provided by `source_datasets`: "adsl" i But, `censor_conditions[[1]]$dataset_name = adls` -# list_tte_source_objects Test 14: error is issued if package does not exist +# derive_param_tte Test 14: detects duplicates in input datasets via pipeline functions + + Code + derive_param_tte(dataset_adsl = adsl, start_date = TRTSDT, event_conditions = list( + ttae), censor_conditions = list(eot), source_datasets = list(adsl = adsl, ae = ae), + set_values_to = exprs(PARAMCD = "TTAE"), check_type = "warning") + Condition + Warning: + Dataset contains duplicate records with respect to `STUDYID`, `USUBJID`, and `AESTDT` + i Run `admiral::get_duplicates_dataset()` to access the duplicate records + Output + # A tibble: 2 x 10 + USUBJID STUDYID EVENTDESC SRCDOM SRCVAR SRCSEQ CNSR ADT STARTDT + + 1 01 AB42 AE AE AESTDTC 1 0 2021-01-03 2020-12-06 + 2 02 AB42 END OF TRT ADSL TRTEDT NA 1 2021-01-30 2021-01-16 + # i 1 more variable: PARAMCD + +# list_tte_source_objects Test 17: error is issued if package does not exist Code list_tte_source_objects(package = "tte") diff --git a/tests/testthat/test-derive_param_tte.R b/tests/testthat/test-derive_param_tte.R index 34067442da..39cf295c5f 100644 --- a/tests/testthat/test-derive_param_tte.R +++ b/tests/testthat/test-derive_param_tte.R @@ -932,17 +932,200 @@ test_that("derive_param_tte Test 13: error if dataset_name not in source_datsets ) }) +## Test 14: derive_param_tte detects duplicates when check_type = 'warning' ---- +test_that("derive_param_tte Test 14: detects duplicates in input datasets via pipeline functions", { + # Define ADSL dataset + adsl <- tibble::tribble( + ~USUBJID, ~TRTSDT, ~TRTEDT, ~EOSDT, + "01", ymd("2020-12-06"), ymd("2021-03-02"), ymd("2021-03-06"), + "02", ymd("2021-01-16"), ymd("2021-01-20"), ymd("2021-02-03") + ) %>% mutate(STUDYID = "AB42") + + # Define AE dataset with duplicates + ae <- tibble::tribble( + ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, + "01", "2021-01-03", 1, "Flu", + "01", "2021-03-04", 2, "Cough", + "01", "2021-01-03", 3, "Flu" + ) %>% mutate( + STUDYID = "AB42", + AESTDT = ymd(AESTDTC) + ) + + # Define event and censor sources + ttae <- event_source( + dataset_name = "ae", + date = AESTDT, + set_values_to = exprs( + EVENTDESC = "AE", + SRCDOM = "AE", + SRCVAR = "AESTDTC", + SRCSEQ = AESEQ + ) + ) + + eot <- censor_source( + dataset_name = "adsl", + date = pmin(TRTEDT + days(10), EOSDT), + censor = 1, + set_values_to = exprs( + EVENTDESC = "END OF TRT", + SRCDOM = "ADSL", + SRCVAR = "TRTEDT" + ) + ) + + # Run derive_param_tte and check for warning + expect_snapshot( + derive_param_tte( + dataset_adsl = adsl, + start_date = TRTSDT, + event_conditions = list(ttae), + censor_conditions = list(eot), + source_datasets = list(adsl = adsl, ae = ae), + set_values_to = exprs(PARAMCD = "TTAE"), + check_type = "warning" + ) + ) +}) + +## Test 15: derive_param_tte resolves ties using order argument when input is sorted descending +test_that("derive_param_tte Test 15: derive_param_tte resolves ties using order argument when +input is sorted descending", { + adsl <- tibble::tribble( + ~USUBJID, ~TRTSDT, ~EOSDT, + "01", ymd("2020-12-06"), ymd("2021-03-06"), + "02", ymd("2021-01-16"), ymd("2021-02-03") + ) %>% + mutate(STUDYID = "AB42") + + # Sort the input AE dataset in descending order by AESEQ + # to confirm that the order argument re-sorts it correctly. + ae <- tibble::tribble( + ~USUBJID, ~AESTDTC, ~AESEQ, ~AESER, ~AEDECOD, + "01", "2021-01-03", 2, "Y", "Cough", + "01", "2021-01-03", 1, "Y", "Flu", + "01", "2021-01-20", 3, "N", "Headache" + ) %>% + mutate( + STUDYID = "AB42", + AESTDT = ymd(AESTDTC) + ) %>% + arrange(desc(AESEQ)) # Intentionally sort descending to test the order argument + + result <- derive_param_tte( + dataset_adsl = adsl, + start_date = TRTSDT, + event_conditions = list(event_source( + dataset_name = "ae", + date = AESTDT, + set_values_to = exprs( + EVENTDESC = "Serious AE", + SRCSEQ = AESEQ + ), + filter = AESER == "Y", + order = exprs(AESEQ) # Should re-sort so that AESEQ=1 (Flu) is chosen on tie + )), + censor_conditions = list(censor_source( + dataset_name = "adsl", + date = EOSDT, + censor = 1, + set_values_to = exprs(EVENTDESC = "End of Study") + )), + set_values_to = exprs( + PARAMCD = "TTSAE", + PARAM = "Time to First Serious AE" + ), + source_datasets = list(adsl = adsl, ae = ae) + ) + + # Check that for USUBJID = "01", the first serious AE selected is the one with AESEQ = 1 (Flu), + # despite the input AE data initially being arranged to show AESEQ=2 (Cough) first. + selected_seq <- result %>% + filter(USUBJID == "01", PARAMCD == "TTSAE") %>% + pull(SRCSEQ) + + expect_equal(selected_seq, 1, info = "The order argument should ensure AE with AESEQ=1 + is chosen on tie.") +}) + +## Test 16: derive_param_tte produces consistent results regardless of input sort order ---- +test_that("derive_param_tte Test 16: produces consistent results regardless of input sort order", { + # Define ADSL dataset + adsl <- tibble::tribble( + ~USUBJID, ~TRTSDT, ~TRTEDT, ~EOSDT, + "01", ymd("2020-12-06"), ymd("2021-03-02"), ymd("2021-03-06"), + "02", ymd("2021-01-16"), ymd("2021-01-20"), ymd("2021-02-03") + ) %>% mutate(STUDYID = "AB42") + + # Define AE dataset with duplicates + ae <- tibble::tribble( + ~USUBJID, ~AESTDTC, ~AESEQ, ~AEDECOD, + "01", "2021-01-03", 1, "Flu", + "01", "2021-03-04", 2, "Cough", + "01", "2021-01-03", 3, "Flu" + ) %>% mutate(STUDYID = "AB42", AESTDT = ymd(AESTDTC)) + + # Define event and censor sources + ttae <- event_source( + dataset_name = "ae", + date = AESTDT, + order = exprs(AESTDT, AESEQ), + set_values_to = exprs( + EVENTDESC = "AE", + SRCDOM = "AE", + SRCVAR = "AESTDTC", + SRCSEQ = AESEQ # Ensure AESEQ is included here + ) + ) + + eot <- censor_source( + dataset_name = "adsl", + date = pmin(TRTEDT + days(10), EOSDT), + censor = 1, + set_values_to = exprs( + EVENTDESC = "END OF TRT", + SRCDOM = "ADSL", + SRCVAR = "TRTEDT" + ) + ) + + # Run derive_param_tte with sorted AE dataset + result_sorted <- derive_param_tte( + dataset_adsl = adsl, + start_date = TRTSDT, + event_conditions = list(ttae), + censor_conditions = list(eot), + source_datasets = list(adsl = adsl, ae = arrange(ae, AESEQ)), + set_values_to = exprs(PARAMCD = "TTAE"), + check_type = "warning" + ) + + # Run derive_param_tte with reverse-sorted AE dataset + result_unsorted <- derive_param_tte( + dataset_adsl = adsl, + start_date = TRTSDT, + event_conditions = list(ttae), + censor_conditions = list(eot), + source_datasets = list(adsl = adsl, ae = arrange(ae, desc(AESEQ))), + set_values_to = exprs(PARAMCD = "TTAE"), + check_type = "warning" + ) + + expect_equal(result_sorted, result_unsorted) +}) + # list_tte_source_objects ---- -## Test 14: error is issued if package does not exist ---- -test_that("list_tte_source_objects Test 14: error is issued if package does not exist", { +## Test 17: error is issued if package does not exist ---- +test_that("list_tte_source_objects Test 17: error is issued if package does not exist", { expect_snapshot( list_tte_source_objects(package = "tte"), error = TRUE ) }) -## Test 15: expected objects produced ---- -test_that("list_tte_source_objects Test 15: expected objects produced", { +## Test 18: expected objects produced ---- +test_that("list_tte_source_objects Test 18: expected objects produced", { expected_output <- tibble::tribble( ~object, ~dataset_name, ~filter, "ae_ser_event", "adae", quote(TRTEMFL == "Y" & AESER == "Y"), diff --git a/tests/testthat/test-user_utils.R b/tests/testthat/test-user_utils.R index 317363665e..20ff1e624d 100644 --- a/tests/testthat/test-user_utils.R +++ b/tests/testthat/test-user_utils.R @@ -165,7 +165,8 @@ test_that("print.source Test 13: `source` objects are printed as intended", { " EVENTDESC: \"AE\"", " SRCDOM: \"AE\"", " SRCVAR: \"AESTDTC\"", - " SRCSEQ: AESEQ" + " SRCSEQ: AESEQ", + "order: NULL" ) expect_identical(capture.output(print(ttae)), expected_print_output) })