diff --git a/DESCRIPTION b/DESCRIPTION index 78172387..636f5151 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,6 +21,8 @@ Authors@R: c( person("Pattern Institute", role = c("cph", "fnd")), person("F. Hoffmann-La Roche AG", role = c("cph", "fnd")), person("Pfizer Inc", role = c("cph", "fnd")), + person("Mohsin", "Uzzama", email = "mohsin.uzzama2@gmail.com", + role = "aut"), person("Transition Technologies Science", role = c("cph", "fnd")) ) Maintainer: Rammprasad Ganapathy diff --git a/NAMESPACE b/NAMESPACE index 2dc0c0f3..13951835 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ export("%.>%") export(assign_ct) export(assign_datetime) export(assign_no_ct) +export(cal_min_max_date) export(condition_add) export(create_iso8601) export(ct_map) @@ -20,6 +21,7 @@ export(fmt_cmp) export(generate_oak_id_vars) export(hardcode_ct) export(hardcode_no_ct) +export(oak_cal_ref_dates) export(oak_id_vars) export(problems) export(read_ct_spec) diff --git a/R/cal_min_max_date.R b/R/cal_min_max_date.R new file mode 100644 index 00000000..89d3e31b --- /dev/null +++ b/R/cal_min_max_date.R @@ -0,0 +1,130 @@ +#' Calculate minimum and maximum date and time in the data frame +#' +#' @description This function derives the earliest/latest ISO8601 datetime +#' +#' @param raw_dataset Raw source data frame +#' @param date_variable Single character string. Name of the date variable +#' @param time_variable Single character string. Name of the time variable +#' @param val_type Single character string determining whether to look +#' for the earliest or the latest datetime combination. Permitted values: +#' "min", "max". Default to "min". +#' @param date_format Format of source date variable +#' @param time_format Format of source time variable +#' +#' @return Data frame with 2 columns: unique patient_number and datetime variable +#' column storing the earliest/latest datetime. +#' +#' @export +#' @examples +#' EX <- tibble::tribble( +#' ~patient_number, ~EX_ST_DT, ~EX_ST_TM, +#' "001", "25-04-2022", "10:20", +#' "001", "25-04-2022", "10:15", +#' "001", "25-04-2022", "10:19", +#' "002", "26-05-2022", "UNK:UNK", +#' "002", "26-05-2022", "05:59" +#' ) +#' +#' min <- cal_min_max_date(EX, +#' date_variable = "EX_ST_DT", +#' time_variable = "EX_ST_TM", +#' val_type = "min", +#' date_format = "dd-mmm-yyyy", +#' time_format = "H:M" +#' ) +#' +#' max <- cal_min_max_date(EX, +#' date_variable = "EX_ST_DT", +#' time_variable = "EX_ST_TM", +#' val_type = "max", +#' date_format = "dd-mmm-yyyy", +#' time_format = "H:M" +#' ) +#' +cal_min_max_date <- function(raw_dataset, + date_variable, + time_variable, + val_type = "min", + date_format, + time_format) { + # Check if date parameter is missing or date variable is present in the raw data frame + date_not_in_data <- is.na(date_variable) || + !utils::hasName(raw_dataset, date_variable) + + # Check if time variable is used and present in the raw data frame + time_not_in_data <- !is.na(time_variable) && + !utils::hasName(raw_dataset, time_variable) + + # If date/time variables not present return empty data frame + if (date_not_in_data || time_not_in_data) { + # Return Empty data frame with patient_number and datetime columns + empty_df <- stats::setNames( + data.frame(matrix(ncol = 2L, nrow = 0L)), + c("patient_number", "datetime") + ) + cli::cli_warn(paste( + "Date variable", date_variable, "or Time variable", time_variable, + "not present in source data" + )) + return(empty_df) + } + + fin_df <- raw_dataset + + # Time is not used in reference date then use only date + if (is.na(time_variable)) { + fin_df$datetime <- create_iso8601(raw_dataset[[date_variable]], + .format = date_format + ) + } else { + # If both date and time variables are present use both date and time + raw_dataset$date_time <- paste0( + raw_dataset[[date_variable]], + raw_dataset[[time_variable]] + ) + format <- paste0(date_format, time_format) + + fin_df$datetime <- as.character(create_iso8601(raw_dataset$date_time, + .format = format, + .na = c( + "UNK", "NA", "U", "unk", + "u", "un", "UN" + ) + )) + } + + fin_df <- fin_df |> + dplyr::select(c("patient_number", "datetime")) |> + unique() + + fin_df <- fin_df |> + dplyr::mutate(date_time = datetime) |> + tidyr::separate( + date_time, + sep = "-|T|:", + into = c("year", "month", "day", "hour", "minute"), + fill = "right", + extra = "drop" + ) |> + list() |> + stats::setNames("x") |> + with(replace(x, x == "UNK", NA)) |> + list() |> + stats::setNames("x") |> + with(replace(x, x == "", NA)) + + if (val_type == "min") { + final_df <- fin_df |> + dplyr::arrange(year, month, day, hour, minute) + } else { + final_df <- fin_df |> + dplyr::arrange(dplyr::desc(year), dplyr::desc(month), dplyr::desc(day), dplyr::desc(hour), dplyr::desc(minute)) + } + + # Keep first appearance in the data frame since it is already sorted + final_df <- final_df[!duplicated(final_df$patient_number), c("patient_number", "datetime")] + + final_df <- final_df |> dplyr::filter(!is.na(datetime)) + + return(final_df) +} diff --git a/R/globals.R b/R/globals.R index 9a2998a0..3b1f54ba 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,4 +1,6 @@ utils::globalVariables(c( "USUBJID", "VISIT", "dom_dt", "dom_tm", "ref_dt", - "ref_tm" + "ref_tm", "datetime", "date_time", "year", "month", + "day", "hour", "minute", "dataset_name", "date_var", + "dformat", "tformat", "sdtm_var_name", "patient_number" )) diff --git a/R/oak_cal_ref_dates.R b/R/oak_cal_ref_dates.R new file mode 100644 index 00000000..d73dcbd2 --- /dev/null +++ b/R/oak_cal_ref_dates.R @@ -0,0 +1,120 @@ +#' Calculate Reference dates in ISO8601 character format. +#' +#' Populate RFSTDTC variable in demographic domain in ISO8601 character format. +#' +#' @description Derive RFSTDTC, RFENDTC, RFXENDTC, RFXSTDTC based on the input dates and time. +#' +#' +#' @param ds_in Data frame. DM domain. +#' @param der_var Character string. The reference date to be derived. +#' @param min_max Minimum or Maximum date to be calculated based on the input. +#' Default set to Minimum. Values should be min or max. +#' @param ref_date_config_df Data frame which has the details of the variables to +#' be used for the calculation of reference dates. +#' Should have columns listed below: +#' dataset_name : Name of the raw dataset. +#' date_var : Date variable name from the raw dataset. +#' time_var : Time variable name from the raw dataset. +#' dformat : Format of the date collected in raw data. +#' tformat: Format of the time collected in raw data. +#' sdtm_var_name : Reference variable name. +#' @param raw_source List contains all the raw datasets. +#' @return DM data frame with the reference dates populated. +#' @export +#' @examples +#' dm <- tibble::tribble( +#' ~patient_number, ~USUBJID, ~SUBJID, ~SEX, +#' "001", "XXXX-001", "001", "F", +#' "002", "XXXX-002", "002", "M", +#' "003", "XXXX-003", "003", "M" +#' ) +#' +#' ref_date_config_df <- tibble::tribble( +#' ~dataset_name, ~date_var, ~time_var, ~dformat, ~tformat, ~sdtm_var_name, +#' "EX1", "EX_ST_DT1", "EX_ST_TM1", "dd-mm-yyyy", "H:M", "RFSTDTC", +#' "EX2", "EX_ST_DT2", NA, "dd-mmm-yyyy", NA, "RFSTDTC", +#' "EX1", "EX_EN_DT1", "EX_EN_TM1", "dd-mm-yyyy", "H:M", "RFENDTC", +#' "EX2", "EX_ST_DT2", NA, "dd-mmm-yyyy", NA, "RFENDTC" +#' ) +#' +#' EX1 <- tibble::tribble( +#' ~patient_number, ~EX_ST_DT1, ~EX_EN_DT1, ~EX_ST_TM1, ~EX_EN_TM1, +#' "001", "15-05-2023", "15-05-2023", "10:20", "11:00", +#' "001", "15-05-2023", "15-05-2023", "9:15", "10:00", +#' "001", "15-05-2023", "15-05-2023", "8:19", "09:00", +#' "002", "02-10-2023", "02-10-2023", "UNK:UNK", NA, +#' "002", "03-11-2023", "03-11-2023", "11:19", NA +#' ) +#' +#' EX2 <- tibble::tribble( +#' ~patient_number, ~EX_ST_DT2, +#' "001", "11-JUN-2023", +#' "002", "24-OCT-2023", +#' "002", "25-JUL-2023", +#' "002", "30-OCT-2023", +#' "002", "UNK-OCT-2023" +#' ) +#' +#' raw_source <- list(EX1 = EX1, EX2 = EX2) +#' +#' dm_df <- oak_cal_ref_dates(dm, +#' der_var = "RFSTDTC", +#' min_max = "min", +#' ref_date_config_df = ref_date_config_df, +#' raw_source +#' ) +#' +oak_cal_ref_dates <- function(ds_in = dm, + der_var, + min_max = "min", + ref_date_config_df, + raw_source) { + # Check if ref_date_config_df is a data frame and has all required variables + admiraldev::assert_data_frame(ref_date_config_df, required_vars = exprs( + dataset_name, date_var, + time_var, dformat, + tformat, sdtm_var_name + )) + + admiraldev::assert_list_of(raw_source, "data.frame") + + ds_out <- data.frame() + for (i in seq_along(ref_date_config_df$dataset_name)) { + raw_dataset_name <- ref_date_config_df$dataset_name[i] + date_variable <- ref_date_config_df$date_var[i] + date_format <- ref_date_config_df$dformat[i] + time_var <- ref_date_config_df$time_var[i] + time_format <- ref_date_config_df$tformat[i] + sdtm_var <- ref_date_config_df$sdtm_var_name[i] + raw_dataset <- raw_source[[raw_dataset_name]] + + if (der_var == sdtm_var && !is.null(raw_dataset)) { + ds_out1 <- cal_min_max_date( + raw_dataset = raw_dataset, + date_variable = date_variable, + time_variable = time_var, + date_format = date_format, + time_format = time_format, + val_type = min_max + ) + ds_out <- rbind(ds_out, ds_out1) + } else if (der_var == sdtm_var && is.null(raw_dataset)) { + warning(paste0( + raw_dataset_name, + " is not present in the source data list but referenced in ref_date_config_df" + )) + } + } + + if (min_max == "min") { + df_final <- ds_out |> dplyr::arrange(patient_number, datetime) + } else { + df_final <- ds_out |> dplyr::arrange(dplyr::desc(datetime)) + } + + df_final <- df_final[!duplicated(df_final$patient_number), c("patient_number", "datetime")] + colnames(df_final)[colnames(df_final) == "datetime"] <- der_var + + dm <- dplyr::left_join(ds_in, y = df_final, by = "patient_number") + return(dm) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 87f2cb80..83020855 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -49,11 +49,19 @@ reference: - dtc_formats - problems +- title: Calculation of reference dates in DM + contents: + - oak_cal_ref_dates + - title: Explicit dot pipe operator desc: A simple alternative to `%>% {...}` contents: - "%.>%" +- title: Calculation of minimum/maximum ISO8601 dates + contents: + - cal_min_max_date + authors: Ramiro Magno: href: https://www.pattern.institute/team/rmagno/ @@ -63,3 +71,5 @@ authors: href: https://www.linkedin.com/in/edgar-manukyan-20987927 Shiyu Chen: href: https://www.linkedin.com/in/shiyu-chen-55a55410a/ + Mohsin Uzzama: + href: https://www.linkedin.com/in/mohsin-uzzama-34066741/ diff --git a/man/cal_min_max_date.Rd b/man/cal_min_max_date.Rd new file mode 100644 index 00000000..bca873a2 --- /dev/null +++ b/man/cal_min_max_date.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cal_min_max_date.R +\name{cal_min_max_date} +\alias{cal_min_max_date} +\title{Calculate minimum and maximum date and time in the data frame} +\usage{ +cal_min_max_date( + raw_dataset, + date_variable, + time_variable, + val_type = "min", + date_format, + time_format +) +} +\arguments{ +\item{raw_dataset}{Raw source data frame} + +\item{date_variable}{Single character string. Name of the date variable} + +\item{time_variable}{Single character string. Name of the time variable} + +\item{val_type}{Single character string determining whether to look +for the earliest or the latest datetime combination. Permitted values: +"min", "max". Default to "min".} + +\item{date_format}{Format of source date variable} + +\item{time_format}{Format of source time variable} +} +\value{ +Data frame with 2 columns: unique patient_number and datetime variable +column storing the earliest/latest datetime. +} +\description{ +This function derives the earliest/latest ISO8601 datetime +} +\examples{ +EX <- tibble::tribble( + ~patient_number, ~EX_ST_DT, ~EX_ST_TM, + "001", "25-04-2022", "10:20", + "001", "25-04-2022", "10:15", + "001", "25-04-2022", "10:19", + "002", "26-05-2022", "UNK:UNK", + "002", "26-05-2022", "05:59" +) + +min <- cal_min_max_date(EX, + date_variable = "EX_ST_DT", + time_variable = "EX_ST_TM", + val_type = "min", + date_format = "dd-mmm-yyyy", + time_format = "H:M" +) + +max <- cal_min_max_date(EX, + date_variable = "EX_ST_DT", + time_variable = "EX_ST_TM", + val_type = "max", + date_format = "dd-mmm-yyyy", + time_format = "H:M" +) + +} diff --git a/man/oak_cal_ref_dates.Rd b/man/oak_cal_ref_dates.Rd new file mode 100644 index 00000000..95f491a1 --- /dev/null +++ b/man/oak_cal_ref_dates.Rd @@ -0,0 +1,87 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/oak_cal_ref_dates.R +\name{oak_cal_ref_dates} +\alias{oak_cal_ref_dates} +\title{Calculate Reference dates in ISO8601 character format.} +\usage{ +oak_cal_ref_dates( + ds_in = dm, + der_var, + min_max = "min", + ref_date_config_df, + raw_source +) +} +\arguments{ +\item{ds_in}{Data frame. DM domain.} + +\item{der_var}{Character string. The reference date to be derived.} + +\item{min_max}{Minimum or Maximum date to be calculated based on the input. +Default set to Minimum. Values should be min or max.} + +\item{ref_date_config_df}{Data frame which has the details of the variables to +be used for the calculation of reference dates. +Should have columns listed below: +dataset_name : Name of the raw dataset. +date_var : Date variable name from the raw dataset. +time_var : Time variable name from the raw dataset. +dformat : Format of the date collected in raw data. +tformat: Format of the time collected in raw data. +sdtm_var_name : Reference variable name.} + +\item{raw_source}{List contains all the raw datasets.} +} +\value{ +DM data frame with the reference dates populated. +} +\description{ +Derive RFSTDTC, RFENDTC, RFXENDTC, RFXSTDTC based on the input dates and time. +} +\details{ +Populate RFSTDTC variable in demographic domain in ISO8601 character format. +} +\examples{ +dm <- tibble::tribble( + ~patient_number, ~USUBJID, ~SUBJID, ~SEX, + "001", "XXXX-001", "001", "F", + "002", "XXXX-002", "002", "M", + "003", "XXXX-003", "003", "M" +) + +ref_date_config_df <- tibble::tribble( + ~dataset_name, ~date_var, ~time_var, ~dformat, ~tformat, ~sdtm_var_name, + "EX1", "EX_ST_DT1", "EX_ST_TM1", "dd-mm-yyyy", "H:M", "RFSTDTC", + "EX2", "EX_ST_DT2", NA, "dd-mmm-yyyy", NA, "RFSTDTC", + "EX1", "EX_EN_DT1", "EX_EN_TM1", "dd-mm-yyyy", "H:M", "RFENDTC", + "EX2", "EX_ST_DT2", NA, "dd-mmm-yyyy", NA, "RFENDTC" +) + +EX1 <- tibble::tribble( + ~patient_number, ~EX_ST_DT1, ~EX_EN_DT1, ~EX_ST_TM1, ~EX_EN_TM1, + "001", "15-05-2023", "15-05-2023", "10:20", "11:00", + "001", "15-05-2023", "15-05-2023", "9:15", "10:00", + "001", "15-05-2023", "15-05-2023", "8:19", "09:00", + "002", "02-10-2023", "02-10-2023", "UNK:UNK", NA, + "002", "03-11-2023", "03-11-2023", "11:19", NA +) + +EX2 <- tibble::tribble( + ~patient_number, ~EX_ST_DT2, + "001", "11-JUN-2023", + "002", "24-OCT-2023", + "002", "25-JUL-2023", + "002", "30-OCT-2023", + "002", "UNK-OCT-2023" +) + +raw_source <- list(EX1 = EX1, EX2 = EX2) + +dm_df <- oak_cal_ref_dates(dm, + der_var = "RFSTDTC", + min_max = "min", + ref_date_config_df = ref_date_config_df, + raw_source +) + +} diff --git a/man/sdtm.oak-package.Rd b/man/sdtm.oak-package.Rd index 2675455d..37e986c0 100644 --- a/man/sdtm.oak-package.Rd +++ b/man/sdtm.oak-package.Rd @@ -31,6 +31,7 @@ Authors: \item Ramiro Magno \email{rmagno@pattern.institute} (\href{https://orcid.org/0000-0001-5226-3441}{ORCID}) \item Kamil Sijko \email{kamil.sijko@ttsi.com.pl} (\href{https://orcid.org/0000-0002-2203-1065}{ORCID}) \item Shiyu Chen \email{Shiyu.Chen@atorusresearch.com} + \item Mohsin Uzzama \email{mohsin.uzzama2@gmail.com} } Other contributors: diff --git a/renv.lock b/renv.lock index 81d0f3ce..cd6da0f0 100644 --- a/renv.lock +++ b/renv.lock @@ -57,7 +57,7 @@ }, "R.oo": { "Package": "R.oo", - "Version": "1.26.0", + "Version": "1.27.0", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -66,7 +66,7 @@ "methods", "utils" ], - "Hash": "4fed809e53ddb5407b3da3d0f572e591" + "Hash": "6ac79ff194202248cf946fe3a5d6d498" }, "R.utils": { "Package": "R.utils", diff --git a/renv/profiles/4.2/renv.lock b/renv/profiles/4.2/renv.lock index 0c9569a4..210a2de0 100644 --- a/renv/profiles/4.2/renv.lock +++ b/renv/profiles/4.2/renv.lock @@ -57,7 +57,7 @@ }, "R.oo": { "Package": "R.oo", - "Version": "1.26.0", + "Version": "1.27.0", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -66,7 +66,7 @@ "methods", "utils" ], - "Hash": "4fed809e53ddb5407b3da3d0f572e591" + "Hash": "6ac79ff194202248cf946fe3a5d6d498" }, "R.utils": { "Package": "R.utils", diff --git a/renv/profiles/4.3/renv.lock b/renv/profiles/4.3/renv.lock index 81d0f3ce..cd6da0f0 100644 --- a/renv/profiles/4.3/renv.lock +++ b/renv/profiles/4.3/renv.lock @@ -57,7 +57,7 @@ }, "R.oo": { "Package": "R.oo", - "Version": "1.26.0", + "Version": "1.27.0", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -66,7 +66,7 @@ "methods", "utils" ], - "Hash": "4fed809e53ddb5407b3da3d0f572e591" + "Hash": "6ac79ff194202248cf946fe3a5d6d498" }, "R.utils": { "Package": "R.utils", diff --git a/tests/testthat/test-cal_min_max_date.R b/tests/testthat/test-cal_min_max_date.R new file mode 100644 index 00000000..0d351cb6 --- /dev/null +++ b/tests/testthat/test-cal_min_max_date.R @@ -0,0 +1,95 @@ +test_that("Warn if date variable parameter is NULL", { + EX <- tibble::tribble( + ~patient_number, ~EX_ST_DT, + "001", "26-04-2022" + ) + + warning_msg <- "Date variable NA or Time variable NA not present in source data" + expect_warning( + observed <- cal_min_max_date(EX, + date_variable = NA, + time_variable = NA, + val_type = "max", + date_format = "dd-mmm-yyyy", + time_format = "H:M" + ), + regexp = warning_msg + ) + + expected <- stats::setNames( + data.frame(matrix(ncol = 2L, nrow = 0L)), + c("patient_number", "datetime") + ) + + expect_identical(observed, expected) +}) + +test_that("cal_min_max_date works as expected", { + EX <- tibble::tribble( + ~patient_number, ~EX_ST_DT, ~EX_ST_TM, + "001", "26-04-2022", "10:20", + "001", "25-04-2022", "10:15", + "001", "25-04-2022", "10:19", + "002", "26-05-2022", "06:23", + "002", "26-05-2022", "04:59", + "002", "26-05-2022", "05:59" + ) + + expected_min <- tibble::tribble( + ~patient_number, ~datetime, + "001", "2022-04-25T10:15", + "002", "2022-05-26T04:59" + ) + + expected_max <- tibble::tribble( + ~patient_number, ~datetime, + "002", "2022-05-26T06:23", + "001", "2022-04-26T10:20" + ) + + observed_min <- cal_min_max_date(EX, + date_variable = "EX_ST_DT", + time_variable = "EX_ST_TM", + val_type = "min", + date_format = "dd-mmm-yyyy", + time_format = "H:M" + ) + + expect_identical(observed_min, expected_min) + + observed_max <- cal_min_max_date(EX, + date_variable = "EX_ST_DT", + time_variable = "EX_ST_TM", + val_type = "max", + date_format = "dd-mmm-yyyy", + time_format = "H:M" + ) + + expect_identical(observed_max, expected_max) +}) + +test_that("Warning is displayed if date or time variables parameters passed are not present", { + EX <- tibble::tribble( + ~patient_number, ~EX_ST_DT, + "001", "26-04-2022" + ) + + warning_msg <- "Date variable EX_ST_DT or Time variable EX_ST_TM not present in source data" + expect_warning( + observed <- cal_min_max_date(EX, + date_variable = "EX_ST_DT", + time_variable = "EX_ST_TM", + val_type = "max", + date_format = "dd-mmm-yyyy", + time_format = "H:M" + ), + regexp = warning_msg + ) + + expected <- stats::setNames( + data.frame(matrix(ncol = 2L, nrow = 0L)), + c("patient_number", "datetime") + ) + + expect_identical(observed, expected) +}) diff --git a/tests/testthat/test-oak_cal_ref_dates.R b/tests/testthat/test-oak_cal_ref_dates.R new file mode 100644 index 00000000..8333a770 --- /dev/null +++ b/tests/testthat/test-oak_cal_ref_dates.R @@ -0,0 +1,65 @@ +dm <- tibble::tribble( + ~patient_number, ~USUBJID, ~SUBJID, ~SEX, + "001", "XXXX-001", "001", "F", + "002", "XXXX-002", "002", "M", + "003", "XXXX-003", "003", "M" +) + +expected <- tibble::tribble( + ~patient_number, ~USUBJID, ~SUBJID, ~SEX, ~RFSTDTC, ~RFENDTC, + "001", "XXXX-001", "001", "F", "2023-05-15T08:19", "2023-06-11", + "002", "XXXX-002", "002", "M", "2023-07-25", "2023-11-03T20:30", + "003", "XXXX-003", "003", "M", NA_character_, NA_character_ +) + +ref_date_conf_df <- tibble::tribble( + ~dataset_name, ~date_var, ~time_var, ~dformat, ~tformat, ~sdtm_var_name, + "EX1", "EX_ST_DT1", "EX_ST_TM1", "dd-mm-yyyy", "H:M", "RFSTDTC", + "EX2", "EX_ST_DT2", NA_character_, "dd-mmm-yyyy", NA_character_, "RFSTDTC", + "EX1", "EX_EN_DT1", "EX_EN_TM1", "dd-mm-yyyy", "H:M", "RFENDTC", + "EX2", "EX_ST_DT2", NA_character_, "dd-mmm-yyyy", NA_character_, "RFENDTC" +) + +EX1 <- tibble::tribble( + ~patient_number, ~EX_ST_DT1, ~EX_EN_DT1, ~EX_ST_TM1, ~EX_EN_TM1, + "001", "15-05-2023", "15-05-2023", "10:20", "11:00", + "001", "15-05-2023", "15-05-2023", "9:15", "10:00", + "001", "15-05-2023", "15-05-2023", "8:19", "09:00", + "002", "02-10-2023", "02-10-2023", "UNK:UNK", NA_character_, + "002", "0l-11-2023", "03-11-2023", "11:19", "20:30" +) + +EX2 <- tibble::tribble( + ~patient_number, ~EX_ST_DT2, + "001", "11-JUN-2023", + "002", "24-OCT-2023", + "002", "25-JUL-2023", + "002", "30-OCT-2023", + "002", "UNK-OCT-2023" +) + +raw_source <- list(EX1 = EX1, EX2 = EX2) + +test_that("Calculate the Reference dates :RFSTDTC", { + observed_rfstdtc <- oak_cal_ref_dates(dm, + der_var = "RFSTDTC", + min_max = "min", + ref_date_config_df = ref_date_conf_df, + raw_source + ) + expected_rfstdtc <- expected |> dplyr::select(-"RFENDTC") + + expect_identical(observed_rfstdtc, expected_rfstdtc) +}) + +test_that("Calculate the Reference dates :RFENDTC", { + observed_rfendtc <- oak_cal_ref_dates(dm, + der_var = "RFENDTC", + min_max = "max", + ref_date_config_df = ref_date_conf_df, + raw_source + ) + + expected_rfendtc <- expected |> dplyr::select(-"RFSTDTC") + expect_identical(observed_rfendtc, expected_rfendtc) +})