diff --git a/NEWS.md b/NEWS.md index 48a221192d..46306889d1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,12 +6,16 @@ - `derive_extreme_records()`, `derive_var_extreme_flag()`,`derive_vars_joined()` and `derive_vars_merged()` were enhanced with the arguments `true_value` and `false_value` to align with preexisting functions that had similar functionality (#2125) -- `restrict_derivation()` now allows `{dplyr}` functions like `mutate` in the `derivation argument (#2143) +- `restrict_derivation()` now allows `{dplyr}` functions like `mutate` in the +`derivation` argument (#2143) - `derive_summary_records()`, `derive_var_merged_summary()`, and `get_summary_records()` were enhanced such that more than one summary variable can be derived, e.g., `AVAL` as the sum and `ADT` as the maximum of the contributing records. (#1792) +- The `tmp_event_nr_var` argument was added to `derive_extreme_records()` to +allow more control of the selection of records. It creates a temporary variable +for the event number, which can be used in `order`. (#2140) ## Breaking Changes @@ -28,6 +32,31 @@ were enhanced such that more than one summary variable can be derived, e.g., - The default value for the `false_value` argument in `derive_extreme_records()` was changed to `NA_character_` (#2125) +- The `ignore_event_order` argument in `derive_extreme_event()` was deprecated +and the selection of the records was changed to allow more control. Before, the +records were selected first by event and then by `order`. Now they are selected +by `order` only but the event number can be added to it. + + To achieve the old behavior update +``` +order = exprs(my_order_var), +ignore_event_order = FALSE, +``` +to +``` +tmp_event_nr_var = event_nr, +order = exprs(event_nr, my_order_var), +``` +and +``` +order = exprs(my_order_var), +ignore_event_order = TRUE, +``` +to +``` +order = exprs(my_order_var), +``` + - The following functions, which were deprecated in previous `{admiral}` versions, have been removed: (#2098) - `derive_param_extreme_event()` - `derive_vars_last_dose()` diff --git a/R/derive_extreme_event.R b/R/derive_extreme_event.R index aa4d01038c..5537176244 100644 --- a/R/derive_extreme_event.R +++ b/R/derive_extreme_event.R @@ -18,6 +18,16 @@ #' For `event_joined()` events the observations are selected by calling #' `filter_joined`. The `condition` field is passed to the `filter` argument. #' +#' @param tmp_event_nr_var Temporary event number variable +#' +#' The specified variable is added to all source datasets and is set to the +#' number of the event before selecting the records of the event. +#' +#' It can be used in `order` to determine which record should be used if +#' records from more than one event are selected. +#' +#' The variable is not included in the output dataset. +#' #' @param order Sort order #' #' If a particular event from `events` has more than one observation, within @@ -41,6 +51,13 @@ #' #' @param ignore_event_order Ignore event order #' +#' `r lifecycle::badge("deprecated")` +#' +#' This argument is *deprecated*. If event order should be ignored, please +#' specify neither `ignore_event_order` nor `tmp_event_nr_var`. If the event +#' order should be considered, don't specify `ignore_event_order` but specify +#' `tmp_event_nr_var` and and the specified variable to `order`. +#' #' If the argument is set to `TRUE`, all events defined by `events` are #' considered equivalent. If there is more than one observation per by group #' the first or last (with respect to `mode` and `order`) is select without @@ -73,12 +90,14 @@ #' #' 1. The variables specified by the `set_values_to` field of the event #' are added to the selected observations. +#' 1. The variable specified for `tmp_event_nr_var` is added and set to +#' the number of the event. #' 1. Only the variables specified for the `keep_source_vars` field of the #' event, and the by variables (`by_vars`) and the variables created by #' `set_values_to` are kept. +#' 1. All selected observations are bound together. #' 1. For each group (with respect to the variables specified for the -#' `by_vars` parameter) the first event is selected. If there is more than one -#' observation per event the first or last observation (with respect to the +#' `by_vars` parameter) the first or last observation (with respect to the #' order specified for the `order` parameter and the mode specified for the #' `mode` parameter) is selected. #' 1. The variables specified by the `set_values_to` parameter are added to @@ -141,8 +160,9 @@ #' set_values_to = exprs(AVALC = "Missing", AVAL = 99) #' ) #' ), -#' order = exprs(ADY), -#' mode = "last", +#' tmp_event_nr_var = event_nr, +#' order = exprs(event_nr, desc(ADY)), +#' mode = "first", #' set_values_to = exprs( #' PARAMCD = "WSP", #' PARAM = "Worst Sleeping Problems" @@ -178,7 +198,8 @@ #' set_values_to = exprs(AVALC = "Y") #' ) #' ), -#' order = exprs(AVISITN), +#' tmp_event_nr_var = event_nr, +#' order = exprs(event_nr, AVISITN), #' mode = "first", #' keep_source_vars = exprs(AVISITN), #' set_values_to = exprs( @@ -243,7 +264,8 @@ #' derive_extreme_event( #' adrs, #' by_vars = exprs(USUBJID), -#' order = exprs(ADT), +#' tmp_event_nr_var = event_nr, +#' order = exprs(event_nr, ADT), #' mode = "first", #' source_datasets = list(adsl = adsl), #' events = list( @@ -321,6 +343,7 @@ derive_extreme_event <- function(dataset, by_vars = NULL, events, + tmp_event_nr_var = NULL, order, mode, source_datasets = NULL, @@ -355,7 +378,29 @@ derive_extreme_event <- function(dataset, ) } - assert_logical_scalar(ignore_event_order) + if (!missing(ignore_event_order)) { + assert_logical_scalar(ignore_event_order) + if (ignore_event_order) { + deprecate_details <- paste( + "The event order is ignored by default.", + "Please remove `ignore_event_order = TRUE` from the call.", + sep = "\n" + ) + } else { + deprecate_details <- c( + "Please remove `ignore_event_order = FALSE` from the call.", + "Specify `tmp_event_nr_var`.", + "Add the specified variable to `order`." + ) + } + deprecate_warn( + "1.0.0", + "derive_extreme_event(ignore_event_order=)", + "derive_extreme_event(tmp_event_nr_var=)", + details = deprecate_details + ) + } + tmp_event_nr_var <- assert_symbol(enexpr(tmp_event_nr_var), optional = TRUE) check_type <- assert_character_scalar( check_type, @@ -368,11 +413,6 @@ derive_extreme_event <- function(dataset, # Create new observations ## Create a dataset (selected_records) from `events` event_index <- as.list(seq_along(events)) - if (ignore_event_order) { - tmp_event_no <- NULL - } else { - tmp_event_no <- get_new_tmp_var(dataset, prefix = "tmp_event_no") - } selected_records_ls <- map2( events, @@ -383,6 +423,9 @@ derive_extreme_event <- function(dataset, } else { data_source <- source_datasets[[event$dataset_name]] } + if (!is.null(tmp_event_nr_var)) { + data_source <- mutate(data_source, !!tmp_event_nr_var := index) + } if (is.null(event$order)) { event_order <- order } else { @@ -417,41 +460,26 @@ derive_extreme_event <- function(dataset, } else { event_keep_source_vars <- event$keep_source_vars } - if (!ignore_event_order) { - data_events <- mutate(data_events, !!tmp_event_no := index) - } data_events %>% process_set_values_to(set_values_to = event$set_values_to) %>% - select(!!!event_keep_source_vars, !!tmp_event_no, !!!by_vars, names(event$set_values_to)) + select( + !!!event_keep_source_vars, !!tmp_event_nr_var, !!!by_vars, + names(event$set_values_to) + ) } ) selected_records <- bind_rows(selected_records_ls) - ## tmp obs number within by_vars and a type of event - tmp_obs <- get_new_tmp_var(selected_records) - selected_records <- selected_records %>% - derive_var_obs_number( - new_var = !!tmp_obs, - order = order, - by_vars = expr_c(by_vars, tmp_event_no), - check_type = check_type - ) - ## filter_extreme - if (mode == "first") { - tmp_obs_expr <- expr(!!tmp_obs) - } else { - tmp_obs_expr <- expr(desc(!!tmp_obs)) - } new_obs <- selected_records %>% filter_extreme( by_vars = by_vars, - order = expr_c(tmp_event_no, tmp_obs_expr), - mode = "first", + order = order, + mode = mode, check_type = check_type ) %>% mutate(!!!set_values_to) %>% - select(-!!tmp_event_no, -!!tmp_obs) + select(-!!tmp_event_nr_var) # Create output dataset bind_rows(dataset, new_obs) diff --git a/man/derive_extreme_event.Rd b/man/derive_extreme_event.Rd index c21de5a24c..13311b72b9 100644 --- a/man/derive_extreme_event.Rd +++ b/man/derive_extreme_event.Rd @@ -8,6 +8,7 @@ derive_extreme_event( dataset, by_vars = NULL, events, + tmp_event_nr_var = NULL, order, mode, source_datasets = NULL, @@ -39,6 +40,16 @@ functions like \code{all()} or \code{any()} can be used in \code{condition}. For \code{event_joined()} events the observations are selected by calling \code{filter_joined}. The \code{condition} field is passed to the \code{filter} argument.} +\item{tmp_event_nr_var}{Temporary event number variable + +The specified variable is added to all source datasets and is set to the +number of the event before selecting the records of the event. + +It can be used in \code{order} to determine which record should be used if +records from more than one event are selected. + +The variable is not included in the output dataset.} + \item{order}{Sort order If a particular event from \code{events} has more than one observation, within @@ -62,6 +73,13 @@ and \code{event_joined()} refers to the dataset provided in the list.} \item{ignore_event_order}{Ignore event order +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} + +This argument is \emph{deprecated}. If event order should be ignored, please +specify neither \code{ignore_event_order} nor \code{tmp_event_nr_var}. If the event +order should be considered, don't specify \code{ignore_event_order} but specify +\code{tmp_event_nr_var} and and the specified variable to \code{order}. + If the argument is set to \code{TRUE}, all events defined by \code{events} are considered equivalent. If there is more than one observation per by group the first or last (with respect to \code{mode} and \code{order}) is select without @@ -134,13 +152,15 @@ If the event is of class \code{event_joined}, \code{filter_joined()} is called t select the observations. \item The variables specified by the \code{set_values_to} field of the event are added to the selected observations. +\item The variable specified for \code{tmp_event_nr_var} is added and set to +the number of the event. \item Only the variables specified for the \code{keep_source_vars} field of the event, and the by variables (\code{by_vars}) and the variables created by \code{set_values_to} are kept. } +\item All selected observations are bound together. \item For each group (with respect to the variables specified for the -\code{by_vars} parameter) the first event is selected. If there is more than one -observation per event the first or last observation (with respect to the +\code{by_vars} parameter) the first or last observation (with respect to the order specified for the \code{order} parameter and the mode specified for the \code{mode} parameter) is selected. \item The variables specified by the \code{set_values_to} parameter are added to @@ -193,8 +213,9 @@ derive_extreme_event( set_values_to = exprs(AVALC = "Missing", AVAL = 99) ) ), - order = exprs(ADY), - mode = "last", + tmp_event_nr_var = event_nr, + order = exprs(event_nr, desc(ADY)), + mode = "first", set_values_to = exprs( PARAMCD = "WSP", PARAM = "Worst Sleeping Problems" @@ -230,7 +251,8 @@ derive_extreme_event( set_values_to = exprs(AVALC = "Y") ) ), - order = exprs(AVISITN), + tmp_event_nr_var = event_nr, + order = exprs(event_nr, AVISITN), mode = "first", keep_source_vars = exprs(AVISITN), set_values_to = exprs( @@ -295,7 +317,8 @@ adrs <- tribble( derive_extreme_event( adrs, by_vars = exprs(USUBJID), - order = exprs(ADT), + tmp_event_nr_var = event_nr, + order = exprs(event_nr, ADT), mode = "first", source_datasets = list(adsl = adsl), events = list( diff --git a/tests/testthat/test-derive_extreme_event.R b/tests/testthat/test-derive_extreme_event.R index 426f1f8382..23711ceb8f 100644 --- a/tests/testthat/test-derive_extreme_event.R +++ b/tests/testthat/test-derive_extreme_event.R @@ -50,7 +50,8 @@ test_that("derive_extreme_records Test 1: `mode` = first", { set_values_to = exprs(AVALC = "Missing", AVAL = 99) ) ), - order = exprs(ADY), + tmp_event_nr_var = event_nr, + order = exprs(event_nr, ADY), mode = "first", set_values_to = exprs( PARAMCD = "WSP" @@ -116,7 +117,8 @@ test_that("derive_extreme_records Test 2: `mode` = last", { set_values_to = exprs(AVALC = "Missing", AVAL = 99) ) ), - order = exprs(ADY), + tmp_event_nr_var = event_nr, + order = exprs(desc(event_nr), ADY), mode = "last", set_values_to = exprs( PARAMCD = "WSP" @@ -216,7 +218,8 @@ test_that("derive_extreme_records Test 3: `source_datasets` works", { actual <- derive_extreme_event( dataset = adrs, by_vars = exprs(STUDYID, USUBJID), - order = exprs(ADT), + tmp_event_nr_var = event_nr, + order = exprs(event_nr, ADT), mode = "first", source_datasets = list(adsl = adsl), events = list( @@ -308,7 +311,8 @@ test_that("derive_extreme_records Test 4: event-specific mode", { set_values_to = exprs(AVALC = "Y") ) ), - order = exprs(AVISITN), + tmp_event_nr_var = event_nr, + order = exprs(event_nr, AVISITN), mode = "first", keep_source_vars = exprs(AVISITN), set_values_to = exprs( @@ -402,7 +406,8 @@ test_that("derive_extreme_records Test 5: event_joined() is handled correctly", derive_extreme_event( adrs, by_vars = exprs(STUDYID, USUBJID), - order = exprs(ADT), + tmp_event_nr_var = event_nr, + order = exprs(event_nr, ADT), mode = "first", source_datasets = list(adsl = adsl), events = list( @@ -511,8 +516,8 @@ test_that("derive_extreme_records Test 5: event_joined() is handled correctly", ) }) -## Test 6: ignore_event_order ---- -test_that("derive_extreme_records Test 6: ignore_event_order", { +## Test 6: no tmp_event_nr_var ---- +test_that("derive_extreme_records Test 6: no tmp_event_nr_var", { adrs <- tibble::tribble( ~USUBJID, ~AVISITN, ~AVALC, "1", 1, "PR", @@ -542,7 +547,6 @@ test_that("derive_extreme_records Test 6: ignore_event_order", { set_values_to = exprs(AVALC = "Y") ) ), - ignore_event_order = TRUE, set_values_to = exprs( PARAMCD = "CRSP" ) @@ -562,3 +566,57 @@ test_that("derive_extreme_records Test 6: ignore_event_order", { keys = c("USUBJID", "PARAMCD", "AVISITN") ) }) +## Test 7: deprecation of ignore_event_order ---- +test_that("derive_extreme_records Test 7: deprecation of ignore_event_order", { + adrs <- tibble::tribble( + ~USUBJID, ~AVISITN, ~AVALC, + "1", 1, "PR", + "1", 2, "CR", + "1", 3, "CR" + ) %>% + mutate(PARAMCD = "OVR") + + expect_warning( + actual <- derive_extreme_event( + adrs, + by_vars = exprs(USUBJID), + order = exprs(AVISITN), + mode = "first", + events = list( + event_joined( + join_vars = exprs(AVALC), + join_type = "after", + first_cond = AVALC.join == "CR", + condition = AVALC == "CR", + set_values_to = exprs(AVALC = "Y") + ), + event_joined( + join_vars = exprs(AVALC), + join_type = "after", + first_cond = AVALC.join %in% c("CR", "PR"), + condition = AVALC == "PR", + set_values_to = exprs(AVALC = "Y") + ) + ), + ignore_event_order = TRUE, + set_values_to = exprs( + PARAMCD = "CRSP" + ) + ), + class = "lifecycle_warning_deprecated" + ) + + expected <- bind_rows( + adrs, + tibble::tribble( + ~USUBJID, ~AVISITN, ~AVALC, ~PARAMCD, + "1", 1, "Y", "CRSP" + ) + ) + + expect_dfs_equal( + base = expected, + compare = actual, + keys = c("USUBJID", "PARAMCD", "AVISITN") + ) +}) diff --git a/vignettes/questionnaires.Rmd b/vignettes/questionnaires.Rmd index 12415d00ae..5a1b353e70 100644 --- a/vignettes/questionnaires.Rmd +++ b/vignettes/questionnaires.Rmd @@ -457,7 +457,8 @@ adsp <- adqs %>% filter(PARCAT1 == "SLEEPING PROBLEMS") %>% derive_extreme_event( by_vars = exprs(USUBJID, AVISIT), - order = exprs(ADY, QSSEQ), + tmp_event_nr_var = event_nr, + order = exprs(event_nr, ADY, QSSEQ), mode = "first", events = list( event(