diff --git a/.lintr b/.lintr index feb3a253..05e507a1 100644 --- a/.lintr +++ b/.lintr @@ -23,6 +23,8 @@ linters: linters_with_defaults( redundant_ifelse_linter(), sprintf_linter(), strings_as_factors_linter(), - undesirable_function_linter(c(Sys.setenv = NA_character_, mapply = NA_character_)) + undesirable_function_linter(c(Sys.setenv = NA_character_, mapply = NA_character_)), + object_name_linter = NULL, + object_usage_linter = NULL ) encoding: "UTF-8" diff --git a/NAMESPACE b/NAMESPACE index 00ec575c..ae0b7a9d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,9 +13,11 @@ export(create_iso8601) export(ct_map) export(ct_spec_example) export(ct_spec_vars) +export(derive_blfl) export(derive_seq) export(derive_study_day) export(domain_example) +export(dtc_timepart) export(fmt_cmp) export(generate_oak_id_vars) export(hardcode_ct) diff --git a/R/derive_blfl.R b/R/derive_blfl.R new file mode 100644 index 00000000..0c3e2508 --- /dev/null +++ b/R/derive_blfl.R @@ -0,0 +1,519 @@ +#' Extract date part from ISO8601 date/time variable +#' +#' The date part is extracted from an ISO8601 date/time variable. +#' By default, partial or missing dates are set to NA. +#' +#' @param dtc Character vector containing ISO8601 date/times. +#' @param partial_as_na Logical `TRUE` or `FALSE` indicating whether +#' partial dates should be set to NA (default is `TRUE`). +#' +#' @return Character vector containing ISO8601 dates. +#' +#' @examples +#' ## Partial or missing dates set to NA by default +#' sdtm.oak:::dtc_datepart( +#' c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00") +#' ) +#' # |--> c(NA, NA, NA, NA, "2021-12-25", "2021-12-25") +#' +#' ## Prevent partial or missing dates from being set to NA +#' sdtm.oak:::dtc_datepart( +#' c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00"), +#' partial_as_na = FALSE +#' ) +#' # |--> c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25") +#' @keywords internal +dtc_datepart <- function(dtc, partial_as_na = TRUE) { + # Assert that dtc is a character vector + admiraldev::assert_character_vector(dtc) + + # Extract date part from ISO 8601 date/time variable + dt <- sub("^([^T]+).*", "\\1", dtc) + + # Set partial or missing dates to NA, depending on partial_as_na parameter + if (partial_as_na) { + dt <- ifelse(nchar(dt) < 10L, NA, dt) + } + + return(dt) +} + +#' Extract time part from ISO 8601 date/time variable +#' +#' The time part is extracted from an ISO 8601 date/time variable. +#' By default, partial or missing times are set to NA, and seconds are ignored +#' and not extracted. +#' +#' @param dtc Character vector containing ISO 8601 date/times. +#' @param partial_as_na Logical `TRUE` or `FALSE` indicating whether +#' partial times should be set to NA (default is `TRUE`). +#' @param ignore_seconds Logical `TRUE` or `FALSE` indicating whether +#' seconds should be ignored (default is `TRUE`). +#' +#' @return Character vector containing ISO 8601 times. +#' @export +#' +#' @keywords internal +#' ## Partial or missing times set to NA and seconds ignored by default +#' sdtm.oak:::dtc_timepart( +#' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59") +#' ) +#' # |--> c(NA, NA, NA, NA, "12:30", "12:30") +#' +#' ## Prevent partial or missing times from being set to NA +#' sdtm.oak:::dtc_timepart( +#' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), +#' partial_as_na = FALSE +#' ) +#' # |--> c(NA, "", "", "12", "12:30", "12:30") +#' +#' ## Do not ignore seconds, partial or missing times set to NA +#' sdtm.oak:::dtc_timepart( +#' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), +#' ignore_seconds = FALSE +#' ) +#' # |--> c(NA, NA, NA, NA, NA, "12:30:59") +#' +#' ## Do not ignore seconds and prevent partial or missing times from being set to NA +#' sdtm.oak:::dtc_timepart( +#' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), +#' partial_as_na = FALSE, +#' ignore_seconds = FALSE +#' ) +#' # |--> c(NA, "", "", "12", "12:30", "12:30:59") +dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { + # Assert that dtc is a character vector + admiraldev::assert_character_vector(dtc) + + # Determine length of time part depending on ignore_seconds parameter + tm_length <- ifelse(ignore_seconds, 5L, 8L) + + # Extract time part from ISO 8601 date/time variable + tm <- substr(sub("^([^T]+)T?", "", dtc), 1L, tm_length) + + # Set partial or missing times to NA, depending on partial_as_na parameter + if (partial_as_na) { + tm <- ifelse(nchar(tm) < tm_length, NA, tm) + } + + return(tm) +} + +#' Derive Baseline Flag or Last Observation Before Exposure Flag +#' +#' Derive the baseline flag variable (`--BLFL`) or the last observation before +#' exposure flag (`--LOBXFL`), from the observation date/time (`--DTC`), and a +#' DM domain reference date/time. +#' +#' +#' The derivation is as follows: +#' +#' - Remove records where the result (`--ORRES`) is missing. Also, exclude records +#' with results labeled as "ND" (No Data) or "NOT DONE" in the `--ORRES` column, +#' which indicate that the measurement or observation was not completed. +#' - Remove records where the status (`--STAT`) indicates the observation or test +#' was not performed, marked as "NOT DONE". +#' - Divide the date and time column (`--DTC`) and the reference date/time +#' variable (`ref_var`) into separate date and time components. Ignore +#' any seconds recorded in the time component, focusing only on hours and +#' minutes for further calculations. +#' - Set partial or missing dates to `NA`. +#' - Set partial or missing times to `NA`. +#' - Filter on rows that have domain and reference dates not equal to +#' `NA`. (Ref to as **X**) +#' - Filter **X** on rows with domain date (--DTC) prior to (less than) +#' reference date. (Ref to as **A**) +#' - Filter **X** on rows with domain date (--DTC) equal to reference date but +#' domain and reference times not equal to `NA` and domain time prior to (less +#' than) reference time. (Ref to as **B**) +#' - Filter **X** on rows with domain date (--DTC) equal to reference date but +#' domain and/or reference time equal to NA and: +#' - VISIT is in baseline visits list (if it exists) and +#' - xxTPT is in baseline timepoints list (if it exists). +#' (Ref to as **C**) +#' - Combine the rows from **A**, **B**, and **C** to get a +#' data frame of pre-reference date observations. Sort the rows by `USUBJID`, +#' `--STAT`, and `--ORRES`. +#' - Group by `USUBJID` and `--TESTCD` and filter on the rows +#' that have maximum value from `--DTC`. Keep only the oak id variables and +#' `--TESTCD` (because these are the unique values). Remove any duplicate rows. +#' Assign the baseline flag variable, `--BLFL`, the last observation before +#' exposure flag (`--LOBXFL`) variable to these rows. +#' - Join the baseline flag onto the input dataset based on oak id vars +#' +#' @param sdtm_in Input SDTM domain. +#' @param dm_domain DM domain with the reference variable `ref_var` +#' @param tgt_var Name of variable to be derived (`--BLFL` or +#' `--LOBXFL` where `--` is domain). +#' @param ref_var vector of a date/time from the +#' Demographics (DM) dataset, which serves as a point of comparison for other +#' observations in the study. Common choices for this reference variable +#' include "RFSTDTC" (the date/time of the first study treatment) or +#' "RFXSTDTC" (the date/time of the first exposure to the study drug). +#' @param baseline_visits A character vector specifying the baseline visits within the study. +#' These visits are identified as critical points for data collection at the start of the study, +#' before any intervention is applied. This allows the function to assign the baseline +#' flag if the --DTC matches to the reference date. +#' @param baseline_timepoints A character vector of timepoints values in --TPT that specifies +#' the specific timepoints during the baseline visits when key assessments or measurements were taken. +#' This allows the function to assign the baseline flag if the --DTC matches to the reference date. +#' +#' @return Modified input data frame with baseline flag variable `--BLFL` or +#' last observation before exposure flag `--LOBXFL` added. +#' +#' @export +#' +#' @examples +#' dm <- tibble::tribble( +#' ~USUBJID, ~RFSTDTC, ~RFXSTDTC, +#' "test_study-375", "2020-09-28T10:10", "2020-09-28T10:10", +#' "test_study-376", "2020-09-21T11:00", "2020-09-21T11:00", +#' "test_study-377", NA, NA, +#' "test_study-378", "2020-01-20T10:00", "2020-01-20T10:00", +#' "test_study-379", NA, NA, +#' ) +#' +#' dm +#' +#' sdtm_in <- +#' tibble::tribble( +#' ~DOMAIN, +#' ~oak_id, +#' ~raw_source, +#' ~patient_number, +#' ~USUBJID, +#' ~VSDTC, +#' ~VSTESTCD, +#' ~VSORRES, +#' ~VSSTAT, +#' ~VISIT, +#' "VS", +#' 1L, +#' "VTLS1", +#' 375L, +#' "test_study-375", +#' "2020-09-01T13:31", +#' "DIABP", +#' "90", +#' NA, +#' "SCREENING", +#' "VS", +#' 2L, +#' "VTLS1", +#' 375L, +#' "test_study-375", +#' "2020-10-01T11:20", +#' "DIABP", +#' "90", +#' NA, +#' "SCREENING", +#' "VS", +#' 1L, +#' "VTLS1", +#' 375L, +#' "test_study-375", +#' "2020-09-28T10:10", +#' "PULSE", +#' "ND", +#' NA, +#' "SCREENING", +#' "VS", +#' 2L, +#' "VTLS1", +#' 375L, +#' "test_study-375", +#' "2020-10-01T13:31", +#' "PULSE", +#' "85", +#' NA, +#' "SCREENING", +#' "VS", +#' 1L, +#' "VTLS2", +#' 375L, +#' "test_study-375", +#' "2020-09-28T10:10", +#' "SYSBP", +#' "120", +#' NA, +#' "SCREENING", +#' "VS", +#' 2L, +#' "VTLS2", +#' 375L, +#' "test_study-375", +#' "2020-09-28T10:05", +#' "SYSBP", +#' "120", +#' NA, +#' "SCREENING", +#' "VS", +#' 1L, +#' "VTLS1", +#' 376L, +#' "test_study-376", +#' "2020-09-20", +#' "DIABP", +#' "75", +#' NA, +#' "SCREENING", +#' "VS", +#' 1L, +#' "VTLS1", +#' 376L, +#' "test_study-376", +#' "2020-09-20", +#' "PULSE", +#' NA, +#' "NOT DONE", +#' "SCREENING", +#' "VS", +#' 2L, +#' "VTLS1", +#' 376L, +#' "test_study-376", +#' "2020-09-20", +#' "PULSE", +#' "110", +#' NA, +#' "SCREENING", +#' "VS", +#' 2L, +#' "VTLS1", +#' 378L, +#' "test_study-378", +#' "2020-01-20T10:00", +#' "PULSE", +#' "110", +#' NA, +#' "SCREENING", +#' "VS", +#' 3L, +#' "VTLS1", +#' 378L, +#' "test_study-378", +#' "2020-01-21T11:00", +#' "PULSE", +#' "105", +#' NA, +#' "SCREENING" +#' ) +#' +#' sdtm_in +#' +#' observed_output <- derive_blfl( +#' sdtm_in = sdtm_in, +#' dm_domain = dm, +#' tgt_var = "VSLOBXFL", +#' ref_var = "RFXSTDTC", +#' baseline_visits = c("SCREENING") +#' ) +#' observed_output +#' +derive_blfl <- function(sdtm_in, + dm_domain, + tgt_var, + ref_var, + baseline_visits = character(), + baseline_timepoints = character()) { + # Check assertions -------------------------------------------------------- + # Check variables are character scalars + admiraldev::assert_character_scalar(tgt_var) + admiraldev::assert_character_scalar(ref_var) + + # Assert that sdtm_in is a data frame, contains DOMAIN and oak id vars + admiraldev::assert_data_frame( + sdtm_in, + required_vars = rlang::syms(c("DOMAIN", oak_id_vars())) + ) + + # Assert dm_domain is data.frame + admiraldev::assert_data_frame( + dm_domain, + required_vars = rlang::syms(c("USUBJID", ref_var)) + ) + + # Get domain from input dataset + domain <- unique(sdtm_in$DOMAIN) + + admiraldev::assert_character_scalar(domain) + + # Assert that tgt_var is a concatenation of domain and "BLFL" or "LOBXFL" + admiraldev::assert_character_scalar(tgt_var, + values = c( + paste0(domain, "BLFL"), + paste0(domain, "LOBXFL") + ) + ) + + # Determine domain prefixed columns + suffixes <- + c( + "ORRES", "STAT", "TESTCD", "TPT", "DTC", "CAT", "SCAT", "LOC", "LAT", + "DIR", "METHOD", "SPEC" + ) + domain_prefixed_names <- + paste0(domain, suffixes) |> + stats::setNames(tolower(suffixes)) + + # Assert that the input dataset has a "DTC" column + admiraldev::assert_data_frame( + sdtm_in, + required_vars = rlang::syms(c(domain_prefixed_names[c( + "orres", + "stat", + "testcd", + "dtc" + )])) + ) + + # End of assertions, work begins ------------------------------------------ + # Create copy of input dataset for modification and processing + ds_mod <- sdtm_in + + # Filter out rows where --ORRES is missing. Filter out --ORRES in + # ("ND", "NOT DONE") as well. + bad_orres_rows <- is.na(ds_mod[[domain_prefixed_names["orres"]]]) | + trimws(ds_mod[[domain_prefixed_names["orres"]]]) %in% c("ND", "NOT DONE", "") + ds_mod <- ds_mod[!bad_orres_rows, ] + + # Filter out rows where --STAT is not equal to "NOT DONE" + ds_mod <- + ds_mod |> + dplyr::filter(dplyr::if_any( + dplyr::any_of(domain_prefixed_names["stat"]), + ~ !.x %in% "NOT DONE" + )) + + if (nrow(ds_mod) == 0L) { + stop(paste0( + "No rows for which both --ORRES is not missing\n and --STAT not equals to NOT DONE.\n", + " Not able to derive Baseline Flag or Last Observation Before Exposure Flag" + )) + } + + # Checking for columns of interest + con_col <- c(domain_prefixed_names[c("testcd", "dtc", "var_tpt")], "VISIT") + + # Drop those columns from the list which are not present in ds_in + con_col <- con_col[con_col %in% names(sdtm_in)] + + # Check for any column which is all NA and removing it from con_col list + h <- which(sapply(ds_mod, function(x) all(is.na(x)))) + if (any(names(h) %in% con_col)) { + h <- names(h[names(h) %in% con_col]) + # remove all NA columns from con_col + con_col <- con_col[!con_col %in% h] + } + + # Keep only USUBJID and ref_var + dm_domain <- dplyr::select(dm_domain, dplyr::all_of(c("USUBJID", ref_var))) + + # Left join dataset with dm_domain domain based on USUBJID + ds_mod <- dplyr::left_join(ds_mod, dm_domain, by = "USUBJID") + + # Split --DTC and ref_var into date and time parts + # (partial or missing dates and times set to NA) + ds_mod$dom_dt <- dtc_datepart(ds_mod[[domain_prefixed_names["dtc"]]]) + ds_mod$dom_tm <- dtc_timepart(ds_mod[[domain_prefixed_names["dtc"]]]) + ds_mod$ref_dt <- dtc_datepart(ds_mod[[ref_var]]) + ds_mod$ref_tm <- dtc_timepart(ds_mod[[ref_var]]) + + + # If VISIT not in data frame then assign it as "" for processing + if (!"VISIT" %in% names(ds_mod)) { + ds_mod[["VISIT"]] <- "" + } + + # If --TPT not in data frame then assign it as "" for processing + if (!domain_prefixed_names["tpt"] %in% names(ds_mod)) { + ds_mod[[domain_prefixed_names["tpt"]]] <- "" + } + + # Filter on rows that have domain and reference dates not equal to NA + ds_subset <- dplyr::filter(ds_mod, !is.na(dom_dt) & !is.na(ref_dt)) + + # Filter on rows with domain date prior to reference date + # (*A) + ds_subset_lt <- dplyr::filter(ds_subset, dom_dt < ref_dt) + + # Filter on rows with domain date equal to reference date but + # - domain and reference times not equal to NA and + # - domain time prior to reference time + # (*B) + ds_subset_eq_1 <- dplyr::filter( + ds_subset, dom_dt == ref_dt, + !is.na(dom_tm) & !is.na(ref_tm), + dom_tm < ref_tm + ) + + # Filter on rows with domain date equal to reference date but + # - domain and/or reference time equal to NA and + # - VISIT is in baseline visits list and + # - xxTPT is in baseline timepoints list + # (*C) + ds_subset_eq_2 <- + ds_subset |> + dplyr::filter( + dom_dt == ref_dt, + is.na(dom_tm) | is.na(ref_tm) | dom_tm == ref_tm, + (VISIT %in% baseline_visits & get(domain_prefixed_names["tpt"]) %in% baseline_timepoints) | + (VISIT %in% baseline_visits & length(baseline_timepoints) == 0L) | + (get(domain_prefixed_names["tpt"]) %in% baseline_timepoints & length(baseline_visits) == 0L) + ) + + # Combine (*A) and (*B) and (*C) + ds_base <- rbind(ds_subset_lt, ds_subset_eq_1, ds_subset_eq_2) + + # Sort the rows in ascending order with respect to columns from con_col + ds_base <- dplyr::arrange_at(ds_base, c("USUBJID", con_col)) + + if (nrow(ds_base) == 0L) { + message(paste0("There are no baseline records.")) + } + + # Group by USUBJID and --TESTCD and filter on the rows that have max value + # from --DTC. Keep only the oak id variables, --TESTCD and any key variables + # from the findings domains (because these are the unique values). + # Remove any duplicate rows. + ds_blfl <- ds_base |> + dplyr::group_by(USUBJID, .data[[domain_prefixed_names["testcd"]]]) |> + dplyr::slice_max(!!rlang::sym(domain_prefixed_names["dtc"]), na_rm = TRUE) |> + dplyr::ungroup() |> + dplyr::select( + dplyr::all_of(c(sdtm.oak::oak_id_vars(), domain_prefixed_names[["testcd"]])), + dplyr::any_of( + c(domain_prefixed_names[c( + "cat", + "scat", + "spec", + "loc", + "lat", + "dir", + "method" + )]) + ) + ) |> + dplyr::distinct() + + # Assign the baseline flag variable + ds_blfl[[tgt_var]] <- "Y" + + # Join baseline flag onto input dataset + ds_out <- dplyr::left_join(sdtm_in, ds_blfl, by = c( + domain_prefixed_names[["testcd"]], + sdtm.oak::oak_id_vars() + )) + + # Assert that merged data frame has same number of rows as input data frame + if (nrow(ds_out) != nrow(sdtm_in)) { + stop(sprintf( + "Internal error: The processed dataset was expected to have the same + number of rows (%d) as the input dataset (sdtm_in), but it actually has %d rows.", + nrow(sdtm_in), + nrow(ds_out) + )) + } + + return(ds_out) +} diff --git a/R/globals.R b/R/globals.R new file mode 100644 index 00000000..9a2998a0 --- /dev/null +++ b/R/globals.R @@ -0,0 +1,4 @@ +utils::globalVariables(c( + "USUBJID", "VISIT", "dom_dt", "dom_tm", "ref_dt", + "ref_tm" +)) diff --git a/inst/WORDLIST b/inst/WORDLIST index 8cbf012d..3f90a5e4 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -60,3 +60,7 @@ SYS SYSBP VSALL yyyy +RFSTDTC +RFXSTDTC +TPT +xxTPT diff --git a/man/derive_blfl.Rd b/man/derive_blfl.Rd new file mode 100644 index 00000000..c1fd92b1 --- /dev/null +++ b/man/derive_blfl.Rd @@ -0,0 +1,234 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/derive_blfl.R +\name{derive_blfl} +\alias{derive_blfl} +\title{Derive Baseline Flag or Last Observation Before Exposure Flag} +\usage{ +derive_blfl( + sdtm_in, + dm_domain, + tgt_var, + ref_var, + baseline_visits = character(), + baseline_timepoints = character() +) +} +\arguments{ +\item{sdtm_in}{Input SDTM domain.} + +\item{dm_domain}{DM domain with the reference variable \code{ref_var}} + +\item{tgt_var}{Name of variable to be derived (\code{--BLFL} or +\code{--LOBXFL} where \verb{--} is domain).} + +\item{ref_var}{vector of a date/time from the +Demographics (DM) dataset, which serves as a point of comparison for other +observations in the study. Common choices for this reference variable +include "RFSTDTC" (the date/time of the first study treatment) or +"RFXSTDTC" (the date/time of the first exposure to the study drug).} + +\item{baseline_visits}{A character vector specifying the baseline visits within the study. +These visits are identified as critical points for data collection at the start of the study, +before any intervention is applied. This allows the function to assign the baseline +flag if the --DTC matches to the reference date.} + +\item{baseline_timepoints}{A character vector of timepoints values in --TPT that specifies +the specific timepoints during the baseline visits when key assessments or measurements were taken. +This allows the function to assign the baseline flag if the --DTC matches to the reference date.} +} +\value{ +Modified input data frame with baseline flag variable \code{--BLFL} or +last observation before exposure flag \code{--LOBXFL} added. +} +\description{ +Derive the baseline flag variable (\code{--BLFL}) or the last observation before +exposure flag (\code{--LOBXFL}), from the observation date/time (\code{--DTC}), and a +DM domain reference date/time. +} +\details{ +The derivation is as follows: +\itemize{ +\item Remove records where the result (\code{--ORRES}) is missing. Also, exclude records +with results labeled as "ND" (No Data) or "NOT DONE" in the \code{--ORRES} column, +which indicate that the measurement or observation was not completed. +\item Remove records where the status (\code{--STAT}) indicates the observation or test +was not performed, marked as "NOT DONE". +\item Divide the date and time column (\code{--DTC}) and the reference date/time +variable (\code{ref_var}) into separate date and time components. Ignore +any seconds recorded in the time component, focusing only on hours and +minutes for further calculations. +\item Set partial or missing dates to \code{NA}. +\item Set partial or missing times to \code{NA}. +\item Filter on rows that have domain and reference dates not equal to +\code{NA}. (Ref to as \strong{X}) +\item Filter \strong{X} on rows with domain date (--DTC) prior to (less than) +reference date. (Ref to as \strong{A}) +\item Filter \strong{X} on rows with domain date (--DTC) equal to reference date but +domain and reference times not equal to \code{NA} and domain time prior to (less +than) reference time. (Ref to as \strong{B}) +\item Filter \strong{X} on rows with domain date (--DTC) equal to reference date but +domain and/or reference time equal to NA and: +\itemize{ +\item VISIT is in baseline visits list (if it exists) and +\item xxTPT is in baseline timepoints list (if it exists). +(Ref to as \strong{C}) +} +\item Combine the rows from \strong{A}, \strong{B}, and \strong{C} to get a +data frame of pre-reference date observations. Sort the rows by \code{USUBJID}, +\code{--STAT}, and \code{--ORRES}. +\item Group by \code{USUBJID} and \code{--TESTCD} and filter on the rows +that have maximum value from \code{--DTC}. Keep only the oak id variables and +\code{--TESTCD} (because these are the unique values). Remove any duplicate rows. +Assign the baseline flag variable, \code{--BLFL}, the last observation before +exposure flag (\code{--LOBXFL}) variable to these rows. +\item Join the baseline flag onto the input dataset based on oak id vars +} +} +\examples{ +dm <- tibble::tribble( + ~USUBJID, ~RFSTDTC, ~RFXSTDTC, + "test_study-375", "2020-09-28T10:10", "2020-09-28T10:10", + "test_study-376", "2020-09-21T11:00", "2020-09-21T11:00", + "test_study-377", NA, NA, + "test_study-378", "2020-01-20T10:00", "2020-01-20T10:00", + "test_study-379", NA, NA, +) + +dm + +sdtm_in <- + tibble::tribble( + ~DOMAIN, + ~oak_id, + ~raw_source, + ~patient_number, + ~USUBJID, + ~VSDTC, + ~VSTESTCD, + ~VSORRES, + ~VSSTAT, + ~VISIT, + "VS", + 1L, + "VTLS1", + 375L, + "test_study-375", + "2020-09-01T13:31", + "DIABP", + "90", + NA, + "SCREENING", + "VS", + 2L, + "VTLS1", + 375L, + "test_study-375", + "2020-10-01T11:20", + "DIABP", + "90", + NA, + "SCREENING", + "VS", + 1L, + "VTLS1", + 375L, + "test_study-375", + "2020-09-28T10:10", + "PULSE", + "ND", + NA, + "SCREENING", + "VS", + 2L, + "VTLS1", + 375L, + "test_study-375", + "2020-10-01T13:31", + "PULSE", + "85", + NA, + "SCREENING", + "VS", + 1L, + "VTLS2", + 375L, + "test_study-375", + "2020-09-28T10:10", + "SYSBP", + "120", + NA, + "SCREENING", + "VS", + 2L, + "VTLS2", + 375L, + "test_study-375", + "2020-09-28T10:05", + "SYSBP", + "120", + NA, + "SCREENING", + "VS", + 1L, + "VTLS1", + 376L, + "test_study-376", + "2020-09-20", + "DIABP", + "75", + NA, + "SCREENING", + "VS", + 1L, + "VTLS1", + 376L, + "test_study-376", + "2020-09-20", + "PULSE", + NA, + "NOT DONE", + "SCREENING", + "VS", + 2L, + "VTLS1", + 376L, + "test_study-376", + "2020-09-20", + "PULSE", + "110", + NA, + "SCREENING", + "VS", + 2L, + "VTLS1", + 378L, + "test_study-378", + "2020-01-20T10:00", + "PULSE", + "110", + NA, + "SCREENING", + "VS", + 3L, + "VTLS1", + 378L, + "test_study-378", + "2020-01-21T11:00", + "PULSE", + "105", + NA, + "SCREENING" + ) + +sdtm_in + +observed_output <- derive_blfl( + sdtm_in = sdtm_in, + dm_domain = dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC", + baseline_visits = c("SCREENING") +) +observed_output + +} diff --git a/man/dtc_datepart.Rd b/man/dtc_datepart.Rd new file mode 100644 index 00000000..7d2f05e6 --- /dev/null +++ b/man/dtc_datepart.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/derive_blfl.R +\name{dtc_datepart} +\alias{dtc_datepart} +\title{Extract date part from ISO8601 date/time variable} +\usage{ +dtc_datepart(dtc, partial_as_na = TRUE) +} +\arguments{ +\item{dtc}{Character vector containing ISO8601 date/times.} + +\item{partial_as_na}{Logical \code{TRUE} or \code{FALSE} indicating whether +partial dates should be set to NA (default is \code{TRUE}).} +} +\value{ +Character vector containing ISO8601 dates. +} +\description{ +The date part is extracted from an ISO8601 date/time variable. +By default, partial or missing dates are set to NA. +} +\examples{ +## Partial or missing dates set to NA by default +sdtm.oak:::dtc_datepart( + c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00") +) +# |--> c(NA, NA, NA, NA, "2021-12-25", "2021-12-25") + +## Prevent partial or missing dates from being set to NA +sdtm.oak:::dtc_datepart( + c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00"), + partial_as_na = FALSE +) +# |--> c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25") +} +\keyword{internal} diff --git a/man/dtc_timepart.Rd b/man/dtc_timepart.Rd new file mode 100644 index 00000000..84d648a7 --- /dev/null +++ b/man/dtc_timepart.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/derive_blfl.R +\name{dtc_timepart} +\alias{dtc_timepart} +\title{Extract time part from ISO 8601 date/time variable} +\usage{ +dtc_timepart(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) +} +\arguments{ +\item{dtc}{Character vector containing ISO 8601 date/times.} + +\item{partial_as_na}{Logical \code{TRUE} or \code{FALSE} indicating whether +partial times should be set to NA (default is \code{TRUE}).} + +\item{ignore_seconds}{Logical \code{TRUE} or \code{FALSE} indicating whether +seconds should be ignored (default is \code{TRUE}).} +} +\value{ +Character vector containing ISO 8601 times. +} +\description{ +The time part is extracted from an ISO 8601 date/time variable. +By default, partial or missing times are set to NA, and seconds are ignored +and not extracted. +} +\keyword{"",} +\keyword{"12",} +\keyword{"12:30")} +\keyword{"12:30",} +\keyword{"12:30:59")} +\keyword{"2021-12-25",} +\keyword{"2021-12-25T12",} +\keyword{"2021-12-25T12:30",} +\keyword{"2021-12-25T12:30:59")} +\keyword{"2021-12-25T12:30:59"),} +\keyword{#} +\keyword{##} +\keyword{)} +\keyword{=} +\keyword{Do} +\keyword{FALSE} +\keyword{FALSE,} +\keyword{NA} +\keyword{NA,} +\keyword{Partial} +\keyword{Prevent} +\keyword{and} +\keyword{being} +\keyword{by} +\keyword{c(NA,} +\keyword{default} +\keyword{from} +\keyword{ignore} +\keyword{ignore_seconds} +\keyword{ignored} +\keyword{internal} +\keyword{missing} +\keyword{not} +\keyword{or} +\keyword{partial} +\keyword{partial_as_na} +\keyword{prevent} +\keyword{sdtm.oak:::dtc_timepart(} +\keyword{seconds} +\keyword{seconds,} +\keyword{set} +\keyword{times} +\keyword{to} +\keyword{|-->} diff --git a/tests/testthat/_snaps/derive_blfl.md b/tests/testthat/_snaps/derive_blfl.md new file mode 100644 index 00000000..606a22b3 --- /dev/null +++ b/tests/testthat/_snaps/derive_blfl.md @@ -0,0 +1,112 @@ +# derive_blfl example works + + { + "type": "list", + "attributes": { + "class": { + "type": "character", + "attributes": {}, + "value": ["tbl_df", "tbl", "data.frame"] + }, + "row.names": { + "type": "integer", + "attributes": {}, + "value": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + }, + "names": { + "type": "character", + "attributes": {}, + "value": ["DOMAIN", "oak_id", "raw_source", "patient_number", "USUBJID", "VSDTC", "VSTESTCD", "VSORRES", "VSSTAT", "VISIT", "VSLOBXFL"] + } + }, + "value": [ + { + "type": "character", + "attributes": {}, + "value": ["VS", "VS", "VS", "VS", "VS", "VS", "VS", "VS", "VS", "VS", "VS"] + }, + { + "type": "integer", + "attributes": {}, + "value": [1, 2, 1, 2, 1, 2, 1, 1, 2, 2, 3] + }, + { + "type": "character", + "attributes": {}, + "value": ["VTLS1", "VTLS1", "VTLS1", "VTLS1", "VTLS2", "VTLS2", "VTLS1", "VTLS1", "VTLS1", "VTLS1", "VTLS1"] + }, + { + "type": "integer", + "attributes": {}, + "value": [375, 375, 375, 375, 375, 375, 376, 376, 376, 378, 378] + }, + { + "type": "character", + "attributes": {}, + "value": ["test_study-375", "test_study-375", "test_study-375", "test_study-375", "test_study-375", "test_study-375", "test_study-376", "test_study-376", "test_study-376", "test_study-378", "test_study-378"] + }, + { + "type": "character", + "attributes": {}, + "value": ["2020-09-01T13:31", "2020-10-01T11:20", "2020-09-28T10:10", "2020-10-01T13:31", "2020-09-28T10:10", "2020-09-28T10:05", "2020-09-20", "2020-09-20", "2020-09-20", "2020-01-20T10:00", "2020-01-21T11:00"] + }, + { + "type": "character", + "attributes": {}, + "value": ["DIABP", "DIABP", "PULSE", "PULSE", "SYSBP", "SYSBP", "DIABP", "PULSE", "PULSE", "PULSE", "PULSE"] + }, + { + "type": "character", + "attributes": {}, + "value": ["90", "90", "ND", "85", "120", "120", "75", null, "110", "110", "105"] + }, + { + "type": "character", + "attributes": {}, + "value": [null, null, null, null, null, null, null, "NOT DONE", null, null, null] + }, + { + "type": "character", + "attributes": {}, + "value": ["SCREENING", "SCREENING", "SCREENING", "SCREENING", "SCREENING", "SCREENING", "SCREENING", "SCREENING", "SCREENING", "SCREENING", "SCREENING"] + }, + { + "type": "character", + "attributes": {}, + "value": ["Y", null, null, null, "Y", null, "Y", null, "Y", "Y", null] + } + ] + } + +# derive_blfl sdmt_in validations work + + Required variable `DOMAIN` is missing + +--- + + Required variables `oak_id`, `raw_source` and `patient_number` are missing + +--- + + Required variables `VSORRES`, `VSSTAT`, `VSTESTCD` and `VSDTC` are missing + +# derive_blfl dm_domain validations work + + Required variables `USUBJID` and `RFXSTDTC` are missing + +# derive_blfl tgt_var and ref_var validations work + + `tgt_var` must be a character scalar but is a list + +--- + + `ref_var` must be a character scalar but is a data frame + +--- + + `tgt_var` must be one of 'VSBLFL' or 'VSLOBXFL' but is 'DMLOBXFL' + +# derive_blfl DOMAIN validation works + + `domain` must be a character scalar but is `4` + diff --git a/tests/testthat/test-derive_blfl.R b/tests/testthat/test-derive_blfl.R new file mode 100644 index 00000000..7b6b963c --- /dev/null +++ b/tests/testthat/test-derive_blfl.R @@ -0,0 +1,129 @@ +dm <- tibble::tribble( + ~USUBJID, ~RFSTDTC, ~RFXSTDTC, + "test_study-375", "2020-09-28T10:10", "2020-09-28T10:10", + "test_study-376", "2020-09-21T11:00", "2020-09-21T11:00", + "test_study-377", NA, NA, + "test_study-378", "2020-01-20T10:00", "2020-01-20T10:00", + "test_study-379", NA, NA +) + +sdtm_in <- + tibble::tribble( + ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, ~VISIT, + "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, "SCREENING", + "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, "SCREENING", + "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, "SCREENING", + "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, "SCREENING", + "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, "SCREENING", + "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, "SCREENING", + "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, "SCREENING", + "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", "SCREENING", # nolint + "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA, "SCREENING", + "VS", 2L, "VTLS1", 378L, "test_study-378", "2020-01-20T10:00", "PULSE", "110", NA, "SCREENING", + "VS", 3L, "VTLS1", 378L, "test_study-378", "2020-01-21T11:00", "PULSE", "105", NA, "SCREENING" + ) + +d <- list(sdtm_in = sdtm_in, dm = dm) + + +test_that("derive_blfl example works", { + observed_output <- derive_blfl( + sdtm_in = d$sdtm_in, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC", + baseline_visits = c("SCREENING") + ) + observed_output + + expect_snapshot_value(observed_output, style = "json2") +}) + +test_that("derive_blfl sdmt_in validations work", { + sdmt_in_no_domain <- + d$sdtm_in |> + dplyr::select(-DOMAIN) + + expect_snapshot_error(derive_blfl( + sdtm_in = sdmt_in_no_domain, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC" + )) + + sdmt_in_no_id_vars <- + d$sdtm_in |> + dplyr::select(-sdtm.oak::oak_id_vars()) + + expect_snapshot_error(derive_blfl( + sdtm_in = sdmt_in_no_id_vars, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC" + )) + + sdmt_in_no_vs_vars <- + d$sdtm_in |> + dplyr::select(-c( + "VSORRES", + "VSSTAT", + "VSTESTCD", + "VSDTC" + )) + + expect_snapshot_error(derive_blfl( + sdtm_in = sdmt_in_no_vs_vars, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC" + )) +}) + +test_that("derive_blfl dm_domain validations work", { + dm_no_vars <- + d$dm |> + dplyr::select(-c(RFXSTDTC, USUBJID)) + + expect_snapshot_error(derive_blfl( + sdtm_in = d$sdtm_in, + dm_domain = dm_no_vars, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC" + )) +}) + +test_that("derive_blfl tgt_var and ref_var validations work", { + expect_snapshot_error(derive_blfl( + sdtm_in = d$sdtm_in, + dm_domain = d$dm, + tgt_var = list("bad"), + ref_var = "RFXSTDTC" + )) + + expect_snapshot_error(derive_blfl( + sdtm_in = d$sdtm_in, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = d$dm + )) + + expect_snapshot_error(derive_blfl( + sdtm_in = d$sdtm_in, + dm_domain = d$dm, + tgt_var = "DMLOBXFL", + ref_var = "RFXSTDTC" + )) +}) + +test_that("derive_blfl DOMAIN validation works", { + sdtm_in_bad_domain <- + d$sdtm_in |> + dplyr::mutate(DOMAIN = 4L) + + expect_snapshot_error(derive_blfl( + sdtm_in = sdtm_in_bad_domain, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC" + )) +})