From a9dbb3846ce248c47b1db1c763c5055c4410f4d7 Mon Sep 17 00:00:00 2001 From: Shaistha Khanum Date: Sun, 8 Sep 2024 22:35:13 +0100 Subject: [PATCH 01/11] Function to calculate reference dates --- R/cal_min_max_date.R | 64 ++++++++++++++++++++++++++++++++++++ R/oak_cal_ref_dates.R | 75 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 R/cal_min_max_date.R create mode 100644 R/oak_cal_ref_dates.R diff --git a/R/cal_min_max_date.R b/R/cal_min_max_date.R new file mode 100644 index 00000000..71c3969a --- /dev/null +++ b/R/cal_min_max_date.R @@ -0,0 +1,64 @@ +#' Populate RFSTDTC variable in demographic domain in ISO8601 character format. +#' +#' Derive RFSTDTC based on the configuration file. +#' +#' @details +#' +#' Calculate minimum or maximum dates + +cal_min_max_date <- function(raw_dataset, + sdtm_var_name, + date_variable, + val_type = "min" +) { + browser() + # Check if date and time variable are present in the raw dataset + date_not_in_data <- !(date_variable %in% colnames(raw_dataset)) + + if (date_not_in_data) { + # Return Empty Dataset with SUBJID and sdtm_var_name + final_df <- setNames(data.frame(matrix(ncol = 2, nrow = 0)), c("patient_number", "datetime")) + return(final_df) + } + + final_df <- raw_dataset + + final_df$datetime <- create_iso8601(raw_dataset[[date_variable]], .format = "dd-mm-yyyy") + + final_df <- final_df |> + dplyr::select(c("SUBJID", "datetime"))|> unique() + + final_df <- final_df |> + dplyr::mutate(date_time = datetime) |> + tidyr::separate( + date_time, + sep = "-|T|:", + into = c("year", "month", "day"), #, "hour", "minute" + fill = "right", + extra = "drop" + )|> + list() |> + setNames("x") |> + with(replace(x, x == "UNK", NA)) |> + list() |> + setNames("x") |> + with(replace(x, x == "", NA)) + + + if (val_type == "min") { + final_df1 <- final_df |> + dplyr::arrange(year, month, day)#, hour, minute) + } else { + final_df1 <- final_df |> + dplyr::arrange(dplyr::desc(year), dplyr::desc(month), dplyr::desc(day))#, dplyr::desc(hour), dplyr::desc(minute)) + } + + # Keep first appearance in the data frame since it is already sorted + final_df2 <- final_df1[!duplicated(final_df1$SUBJID), c("SUBJID", "datetime")] + + final_df2 <- final_df2 |> dplyr::filter(!is.na(datetime)) + + # colnames(final_df2)[colnames(final_df2) == "datetime"] <- sdtm_var_name + + return(final_df2) +} diff --git a/R/oak_cal_ref_dates.R b/R/oak_cal_ref_dates.R new file mode 100644 index 00000000..045b4fbc --- /dev/null +++ b/R/oak_cal_ref_dates.R @@ -0,0 +1,75 @@ +#' Calculate Reference dates RFSTDTC, RFENDTC, RFXSTDTC, RFXENDTC +#' + + +oak_cal_ref_dates <- function(ds_in = dm, + der_var = "RFSTDTC", + min_max = "min", + raw_dataset_list, + raw_date_list) { + # current_function() + + # validate the input + # oak_assert_that(is.data.frame(ds_in)) + # # oak_assert_that( + # # der_var == "RFSTDTC", + # # msg = "der_var must be equal to 'RFSTDTC'" + # # ) + # oak_assert_that(unique(ds_in$DOMAIN) == "DM", + # msg = "ds_in must contain only 'DM' domain" + # ) + # oak_assert_that( + # !(der_var %in% colnames(ds_in)), + # msg = paste( + # "ds_in data frame must not contain", + # der_var, + # "column" + # ) + # ) + # oak_assert_that(is.list(raw_dataset_list)) + # oak_assert_that(length(raw_dataset_list) > 0) + # + # oak_assert_that(is.list(raw_var_list)) + # oak_assert_that(length(raw_var_list) > 0) + # + # # Check if the raw_var_list has equal count of raw_dataset_list + # oak_assert_that(length(raw_dataset_list) == length(raw_var_list)) + + # check if ref_date_conf file is present in the oak_pkg_env + # oak_assert_that( + # "ref_date_conf" %in% ls(oak_pkg_env), + # msg = paste( + # "'ref_date_conf' not present in the 'oak_pkg_env'.", + # "Provide reference_date_configuration.csv." + # ) + # ) + + # ref_date_conf_loaded <- oak_pkg_env$ref_date_conf + ds_out <- data.frame() + for(i in 1:length(raw_dataset_list)) { +browser() + raw_dataset <- raw_source[[raw_dataset_list[i]]] + date_variable <- raw_date_list[[i]] + + ds_out1 <- cal_min_max_date( + raw_dataset = raw_dataset, + sdtm_var_name = der_var, + date_variable = date_variable, + val_type = min_max + ) + ds_out <- rbind(ds_out, ds_out1) + } +browser() + +# ref_dates <- purrr::pmap_df(ds_out, .f = get_df) +if (min_max == "min") { + df_final <- ds_out %>% dplyr::arrange(SUBJID,datetime) +} else { + df_final <- ds_out %>% dplyr::arrange(dplyr::desc(SUBJID),dplyr::desc(datetime)) +} +df_final <- df_final[!duplicated(df_final$SUBJID), c("SUBJID", "datetime")] +colnames(df_final)[colnames(df_final) == "datetime"] <- der_var + +return(df_final) + +} From 9c90a71175f770d1a02de4fd4b9390c79f547d79 Mon Sep 17 00:00:00 2001 From: muzzama-1990 Date: Wed, 11 Sep 2024 17:23:21 +0100 Subject: [PATCH 02/11] Remove comments --- R/oak_cal_ref_dates.R | 37 ------------------------------------- 1 file changed, 37 deletions(-) diff --git a/R/oak_cal_ref_dates.R b/R/oak_cal_ref_dates.R index 045b4fbc..2194c825 100644 --- a/R/oak_cal_ref_dates.R +++ b/R/oak_cal_ref_dates.R @@ -7,44 +7,7 @@ oak_cal_ref_dates <- function(ds_in = dm, min_max = "min", raw_dataset_list, raw_date_list) { - # current_function() - # validate the input - # oak_assert_that(is.data.frame(ds_in)) - # # oak_assert_that( - # # der_var == "RFSTDTC", - # # msg = "der_var must be equal to 'RFSTDTC'" - # # ) - # oak_assert_that(unique(ds_in$DOMAIN) == "DM", - # msg = "ds_in must contain only 'DM' domain" - # ) - # oak_assert_that( - # !(der_var %in% colnames(ds_in)), - # msg = paste( - # "ds_in data frame must not contain", - # der_var, - # "column" - # ) - # ) - # oak_assert_that(is.list(raw_dataset_list)) - # oak_assert_that(length(raw_dataset_list) > 0) - # - # oak_assert_that(is.list(raw_var_list)) - # oak_assert_that(length(raw_var_list) > 0) - # - # # Check if the raw_var_list has equal count of raw_dataset_list - # oak_assert_that(length(raw_dataset_list) == length(raw_var_list)) - - # check if ref_date_conf file is present in the oak_pkg_env - # oak_assert_that( - # "ref_date_conf" %in% ls(oak_pkg_env), - # msg = paste( - # "'ref_date_conf' not present in the 'oak_pkg_env'.", - # "Provide reference_date_configuration.csv." - # ) - # ) - - # ref_date_conf_loaded <- oak_pkg_env$ref_date_conf ds_out <- data.frame() for(i in 1:length(raw_dataset_list)) { browser() From 92161076b5e439f159da703611996b365e988ae8 Mon Sep 17 00:00:00 2001 From: muzzama-1990 Date: Wed, 18 Sep 2024 16:40:28 +0100 Subject: [PATCH 03/11] updates --- R/cal_min_max_date.R | 92 +++++++++++++++++++++++++++++++------------ R/oak_cal_ref_dates.R | 45 +++++++++++++-------- 2 files changed, 94 insertions(+), 43 deletions(-) diff --git a/R/cal_min_max_date.R b/R/cal_min_max_date.R index 71c3969a..901c854d 100644 --- a/R/cal_min_max_date.R +++ b/R/cal_min_max_date.R @@ -1,42 +1,85 @@ -#' Populate RFSTDTC variable in demographic domain in ISO8601 character format. +#' Calculate minimum and maximum date and time in the dataframe #' -#' Derive RFSTDTC based on the configuration file. +#' @description This function derives the earliest/latest ISO8601 datetime #' -#' @details +#' @param raw_dataset Raw source data frame +#' @param date_variable Single character string. Name of the date variable +#' @param time_variable Single character string. Name of the time variable +#' @param val_type Single character string determining whether to look +#' for the earliest or the latest datetime combination. Permitted values: +#' "min", "max". Default to "min". +#' @param date_format Format of source date variable +#' @param time_format Format of source time variable +#' +#' @return Data frame with 2 columns: unique subject and datetime variable +#' column storing the earliest/latest datetime. +#' +#' @export +#' @examples +#' EX <- tibble::tribble( +#' ~patient_number, ~EX_ST_DT, ~EX_EN_DT, ~EX_ST_TM, +#' "001", "26-10-1990", "10-01-1985", "10:20", +#' "001", "26-10-1990", "10-01-1985", "10:15", +#' "001", "26-10-1990", "10-01-1985", "10:19", +#' "002", "26-10-1991", NA, "UNK:UNK" +#' ) +#' +#'cal_min_max_date(EX, "EX_ST_DT", +#' "EX_ST_TM", date_format = "dd-mmm-yyyy", +#' time_format = "H:M") #' -#' Calculate minimum or maximum dates - cal_min_max_date <- function(raw_dataset, - sdtm_var_name, - date_variable, - val_type = "min" + date_variable, + time_variable, + val_type = "min", + date_format, + time_format ) { - browser() - # Check if date and time variable are present in the raw dataset + + # Check if date is present in the raw data frame date_not_in_data <- !(date_variable %in% colnames(raw_dataset)) - if (date_not_in_data) { - # Return Empty Dataset with SUBJID and sdtm_var_name - final_df <- setNames(data.frame(matrix(ncol = 2, nrow = 0)), c("patient_number", "datetime")) - return(final_df) + # Check if time variable is used and if present in the raw data frame + time_not_in_data <- !(time_variable %in% colnames(raw_dataset)) && !is.na(time_variable) + + # If both date and time variables are not present return the empty data frame + if (date_not_in_data || time_not_in_data) { + # Return Empty data frame with patient_number and datetime columns + empty_df <- setNames(data.frame(matrix(ncol = 2, nrow = 0)), c("patient_number", "datetime")) + cli::cli_warn(paste("Date variable",date_variable, "or Time variable", time_variable, + "not present in source data")) + return(empty_df) } final_df <- raw_dataset + # Time variable is not used then use only date + if (is.na(time_variable)) { + final_df$datetime <- create_iso8601(raw_dataset[[date_variable]], + .format = date_format) + } else { + # If both date and time variables are presen use both date and time + raw_dataset$date_time <- paste0(raw_dataset[[date_variable]], + raw_dataset[[time_variable]]) + format = paste0(date_format,time_format) - final_df$datetime <- create_iso8601(raw_dataset[[date_variable]], .format = "dd-mm-yyyy") + final_df$datetime <- create_iso8601(raw_dataset$date_time, + .format = format, + .na = c("UNK", "NA", "U","unk", "u", "un", "UNK")) + } final_df <- final_df |> - dplyr::select(c("SUBJID", "datetime"))|> unique() + dplyr::select(c("patient_number", "datetime"))|> + unique() final_df <- final_df |> dplyr::mutate(date_time = datetime) |> tidyr::separate( date_time, sep = "-|T|:", - into = c("year", "month", "day"), #, "hour", "minute" + into = c("year", "month", "day", "hour", "minute"), fill = "right", extra = "drop" - )|> + )|> list() |> setNames("x") |> with(replace(x, x == "UNK", NA)) |> @@ -44,21 +87,18 @@ cal_min_max_date <- function(raw_dataset, setNames("x") |> with(replace(x, x == "", NA)) - if (val_type == "min") { final_df1 <- final_df |> - dplyr::arrange(year, month, day)#, hour, minute) + dplyr::arrange(year, month, day, hour, minute) } else { final_df1 <- final_df |> - dplyr::arrange(dplyr::desc(year), dplyr::desc(month), dplyr::desc(day))#, dplyr::desc(hour), dplyr::desc(minute)) + dplyr::arrange(dplyr::desc(year), dplyr::desc(month), dplyr::desc(day), dplyr::desc(hour), dplyr::desc(minute)) } # Keep first appearance in the data frame since it is already sorted - final_df2 <- final_df1[!duplicated(final_df1$SUBJID), c("SUBJID", "datetime")] - - final_df2 <- final_df2 |> dplyr::filter(!is.na(datetime)) + final <- final_df1[!duplicated(final_df1$patient_number), c("patient_number", "datetime")] - # colnames(final_df2)[colnames(final_df2) == "datetime"] <- sdtm_var_name + final <- final |> dplyr::filter(!is.na(datetime)) - return(final_df2) + return(final) } diff --git a/R/oak_cal_ref_dates.R b/R/oak_cal_ref_dates.R index 2194c825..2add98c8 100644 --- a/R/oak_cal_ref_dates.R +++ b/R/oak_cal_ref_dates.R @@ -5,34 +5,45 @@ oak_cal_ref_dates <- function(ds_in = dm, der_var = "RFSTDTC", min_max = "min", - raw_dataset_list, - raw_date_list) { + ref_date_conf, + raw_source) { ds_out <- data.frame() - for(i in 1:length(raw_dataset_list)) { + for(i in 1:length(ref_date_conf$dataset_name)) { browser() - raw_dataset <- raw_source[[raw_dataset_list[i]]] - date_variable <- raw_date_list[[i]] + raw_dataset_name <- ref_date_conf$dataset_name[i] + date_variable <- ref_date_conf$date_var[i] + date_format <- ref_date_conf$dformat[i] + time_var <- ref_date_conf$time_var[i] + time_format <- ref_date_conf$tformat[i] + sdtm_var <- ref_date_conf$sdtm_var_name[i] + raw_dataset <- raw_source[[raw_dataset_name]] - ds_out1 <- cal_min_max_date( - raw_dataset = raw_dataset, - sdtm_var_name = der_var, - date_variable = date_variable, - val_type = min_max - ) - ds_out <- rbind(ds_out, ds_out1) + if (der_var == sdtm_var) { + ds_out1 <- cal_min_max_date( + raw_dataset = raw_dataset, + sdtm_var_name = der_var, + date_variable = date_variable, + time_variable = time_var, + date_format = date_format, + time_format = time_format, + val_type = min_max + ) + ds_out <- rbind(ds_out, ds_out1) + } } browser() -# ref_dates <- purrr::pmap_df(ds_out, .f = get_df) +#ref_dates <- purrr::pmap_df(ds_out, .f = get_df) if (min_max == "min") { - df_final <- ds_out %>% dplyr::arrange(SUBJID,datetime) + df_final <- ds_out %>% dplyr::arrange(patient_number,datetime) } else { - df_final <- ds_out %>% dplyr::arrange(dplyr::desc(SUBJID),dplyr::desc(datetime)) + df_final <- ds_out %>% dplyr::arrange(dplyr::desc(patient_number),dplyr::desc(datetime)) } -df_final <- df_final[!duplicated(df_final$SUBJID), c("SUBJID", "datetime")] +df_final <- df_final[!duplicated(df_final$patient_number), c("patient_number", "datetime")] colnames(df_final)[colnames(df_final) == "datetime"] <- der_var -return(df_final) +dm <- dplyr::left_join(ds_in, y = df_final, by = "patient_number") +return(dm) } From cdf00cea34c5e79be302a8fafbcfe1e1e19fc4aa Mon Sep 17 00:00:00 2001 From: muzzama-1990 Date: Fri, 20 Sep 2024 00:41:58 +0100 Subject: [PATCH 04/11] Adding test cases and updated documentation --- NAMESPACE | 2 + R/cal_min_max_date.R | 54 +++++++---- R/oak_cal_ref_dates.R | 118 ++++++++++++++++++------ man/cal_min_max_date.Rd | 64 +++++++++++++ man/oak_cal_ref_dates.Rd | 87 +++++++++++++++++ tests/testthat/test-cal_min_max_date.R | 47 ++++++++++ tests/testthat/test-oak_cal_ref_dates.R | 69 ++++++++++++++ 7 files changed, 395 insertions(+), 46 deletions(-) create mode 100644 man/cal_min_max_date.Rd create mode 100644 man/oak_cal_ref_dates.Rd create mode 100644 tests/testthat/test-cal_min_max_date.R create mode 100644 tests/testthat/test-oak_cal_ref_dates.R diff --git a/NAMESPACE b/NAMESPACE index 2dc0c0f3..13951835 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ export("%.>%") export(assign_ct) export(assign_datetime) export(assign_no_ct) +export(cal_min_max_date) export(condition_add) export(create_iso8601) export(ct_map) @@ -20,6 +21,7 @@ export(fmt_cmp) export(generate_oak_id_vars) export(hardcode_ct) export(hardcode_no_ct) +export(oak_cal_ref_dates) export(oak_id_vars) export(problems) export(read_ct_spec) diff --git a/R/cal_min_max_date.R b/R/cal_min_max_date.R index 901c854d..830cbce2 100644 --- a/R/cal_min_max_date.R +++ b/R/cal_min_max_date.R @@ -11,22 +11,35 @@ #' @param date_format Format of source date variable #' @param time_format Format of source time variable #' -#' @return Data frame with 2 columns: unique subject and datetime variable +#' @return Data frame with 2 columns: unique patient_number and datetime variable #' column storing the earliest/latest datetime. #' #' @export #' @examples #' EX <- tibble::tribble( -#' ~patient_number, ~EX_ST_DT, ~EX_EN_DT, ~EX_ST_TM, -#' "001", "26-10-1990", "10-01-1985", "10:20", -#' "001", "26-10-1990", "10-01-1985", "10:15", -#' "001", "26-10-1990", "10-01-1985", "10:19", -#' "002", "26-10-1991", NA, "UNK:UNK" +#' ~patient_number, ~EX_ST_DT, ~EX_ST_TM, +#' "001", "26-04-2022", "10:20", +#' "001", "25-04-2022", "10:15", +#' "001", "25-04-2022", "10:19", +#' "002", "26-05-2022", "UNK:UNK", +#' "002", "26-05-2022", "05:59" #' ) #' -#'cal_min_max_date(EX, "EX_ST_DT", -#' "EX_ST_TM", date_format = "dd-mmm-yyyy", -#' time_format = "H:M") +#'min <- cal_min_max_date(EX, +#' "EX_ST_DT", +#' "EX_ST_TM", +#' val_type = "min", +#' date_format = "dd-mmm-yyyy", +#' time_format = "H:M" +#' ) +#' +#'max <- cal_min_max_date(EX, +#' "EX_ST_DT", +#' "EX_ST_TM", +#' val_type = "max", +#' date_format = "dd-mmm-yyyy", +#' time_format = "H:M" +#' ) #' cal_min_max_date <- function(raw_dataset, date_variable, @@ -51,10 +64,10 @@ cal_min_max_date <- function(raw_dataset, return(empty_df) } - final_df <- raw_dataset + fin_df <- raw_dataset # Time variable is not used then use only date if (is.na(time_variable)) { - final_df$datetime <- create_iso8601(raw_dataset[[date_variable]], + fin_df$datetime <- create_iso8601(raw_dataset[[date_variable]], .format = date_format) } else { # If both date and time variables are presen use both date and time @@ -62,16 +75,17 @@ cal_min_max_date <- function(raw_dataset, raw_dataset[[time_variable]]) format = paste0(date_format,time_format) - final_df$datetime <- create_iso8601(raw_dataset$date_time, + fin_df$datetime <- as.character(create_iso8601(raw_dataset$date_time, .format = format, - .na = c("UNK", "NA", "U","unk", "u", "un", "UNK")) + .na = c("UNK", "NA", "U","unk", + "u", "un","UN"))) } - final_df <- final_df |> + fin_df <- fin_df |> dplyr::select(c("patient_number", "datetime"))|> unique() - final_df <- final_df |> + fin_df <- fin_df |> dplyr::mutate(date_time = datetime) |> tidyr::separate( date_time, @@ -88,17 +102,17 @@ cal_min_max_date <- function(raw_dataset, with(replace(x, x == "", NA)) if (val_type == "min") { - final_df1 <- final_df |> + final_df <- fin_df |> dplyr::arrange(year, month, day, hour, minute) } else { - final_df1 <- final_df |> + final_df <- fin_df |> dplyr::arrange(dplyr::desc(year), dplyr::desc(month), dplyr::desc(day), dplyr::desc(hour), dplyr::desc(minute)) } # Keep first appearance in the data frame since it is already sorted - final <- final_df1[!duplicated(final_df1$patient_number), c("patient_number", "datetime")] + final_df <- final_df[!duplicated(final_df$patient_number), c("patient_number", "datetime")] - final <- final |> dplyr::filter(!is.na(datetime)) + final_df <- final_df |> dplyr::filter(!is.na(datetime)) - return(final) + return(final_df) } diff --git a/R/oak_cal_ref_dates.R b/R/oak_cal_ref_dates.R index 2add98c8..9fc45665 100644 --- a/R/oak_cal_ref_dates.R +++ b/R/oak_cal_ref_dates.R @@ -1,28 +1,95 @@ -#' Calculate Reference dates RFSTDTC, RFENDTC, RFXSTDTC, RFXENDTC +#' Calculate Reference dates in ISO8601 character format. +#' +#' Populate RFSTDTC variable in demographic domain in ISO8601 character format. +#' +#' @description Derive RFSTDTC, RFENDTC, RFXENDTC, RFXSTDTC based on the input dates and time. +#' +#' +#' @param ds_in Data frame. DM domain. +#' @param der_var Character string. The SDTMv reference date to be derived. +#' @param min_max Minimum or Maximum date to be calculated based on the input. +#' Default set to Minimum. Values should be min or max. +#' @param ref_date_config_df Data frame which has the details of the variables to +#' be used for the calculation of reference dates. +#' Should has columns listed below: +#' dataset_name : Name of the raw dataset. +#' date_var : Date variable name from the raw dataset. +#' time_var : Time variable name from the raw dataset. +#' dformat : Format of the date collected in raw data. +#' tformat: Format of the time collected in raw data. +#' sdtm_var_name : Reference variable name. +#' @param raw_source List contains all the raw datasets. +#' @return DM data frame with the reference dates populated. +#' @export +#' @examples +#' dm <- tibble::tribble( +#' ~patient_number, ~USUBJID, ~SUBJID, ~SEX, +#' "001", "XXXX-001", "001", "F", +#' "002", "XXXX-002", "002", "M", +#' "003", "XXXX-003", "003", "M" +#' ) +#' +#' ref_date_config_df <- tibble::tribble( +#' ~dataset_name, ~date_var, ~time_var, ~dformat, ~tformat, ~sdtm_var_name, +#' "EX1", "EX_ST_DT1", "EX_ST_TM1", "dd-mm-yyyy", "H:M", "RFSTDTC", +#' "EX2", "EX_ST_DT2", NA, "dd-mmm-yyyy", NA, "RFSTDTC", +#' "EX1", "EX_EN_DT1", "EX_EN_TM1", "dd-mm-yyyy", "H:M", "RFENDTC", +#' "EX2", "EX_ST_DT2", NA, "dd-mmm-yyyy", NA, "RFENDTC" +#' ) +#' +#' EX1 <- tibble::tribble( +#' ~patient_number, ~EX_ST_DT1, ~EX_EN_DT1, ~EX_ST_TM1, ~EX_EN_TM1, +#' "001", "15-05-2023", "15-05-2023", "10:20", "11:00", +#' "001", "15-05-2023", "15-05-2023", "9:15", "10:00", +#' "001", "15-05-2023", "15-05-2023", "8:19", "09:00", +#' "002", "02-10-2023", "02-10-2023", "UNK:UNK", NA, +#' "002", "03-11-2023", "03-11-2023", "11:19", NA +#' ) +#' +#' EX2 <- tibble::tribble( +#' ~patient_number, ~EX_ST_DT2, +#' "001", "11-JUN-2023", +#' "002", "24-OCT-2023", +#' "002", "25-JUL-2023", +#' "002", "30-OCT-2023", +#' "002", "UNK-OCT-2023" +#' ) +#' +#' raw_source <- list(EX1 = EX1, EX2 = EX2) +#' +#' dm_df <- oak_cal_ref_dates(dm, +#' der_var = "RFSTDTC", +#' min_max = "max", +#' ref_date_config_df = ref_date_config_df, +#' raw_source +#' ) #' - oak_cal_ref_dates <- function(ds_in = dm, - der_var = "RFSTDTC", - min_max = "min", - ref_date_conf, - raw_source) { + der_var, + min_max = "min", + ref_date_config_df, + raw_source) { + + # Check if ref_date_config_df is a data frame and has all required variables + admiraldev::assert_data_frame(ref_date_config_df, required_vars = exprs(dataset_name, date_var, + time_var, dformat, + tformat, sdtm_var_name)) ds_out <- data.frame() - for(i in 1:length(ref_date_conf$dataset_name)) { -browser() - raw_dataset_name <- ref_date_conf$dataset_name[i] - date_variable <- ref_date_conf$date_var[i] - date_format <- ref_date_conf$dformat[i] - time_var <- ref_date_conf$time_var[i] - time_format <- ref_date_conf$tformat[i] - sdtm_var <- ref_date_conf$sdtm_var_name[i] + for(i in 1:length(ref_date_config_df$dataset_name)) { + + raw_dataset_name <- ref_date_config_df$dataset_name[i] + date_variable <- ref_date_config_df$date_var[i] + date_format <- ref_date_config_df$dformat[i] + time_var <- ref_date_config_df$time_var[i] + time_format <- ref_date_config_df$tformat[i] + sdtm_var <- ref_date_config_df$sdtm_var_name[i] raw_dataset <- raw_source[[raw_dataset_name]] if (der_var == sdtm_var) { ds_out1 <- cal_min_max_date( raw_dataset = raw_dataset, - sdtm_var_name = der_var, date_variable = date_variable, time_variable = time_var, date_format = date_format, @@ -32,18 +99,17 @@ browser() ds_out <- rbind(ds_out, ds_out1) } } -browser() + #ref_dates <- purrr::pmap_df(ds_out, .f = get_df) + if (min_max == "min") { + df_final <- ds_out %>% dplyr::arrange(patient_number,datetime) + } else { + df_final <- ds_out %>% dplyr::arrange(dplyr::desc(datetime)) + } -#ref_dates <- purrr::pmap_df(ds_out, .f = get_df) -if (min_max == "min") { - df_final <- ds_out %>% dplyr::arrange(patient_number,datetime) -} else { - df_final <- ds_out %>% dplyr::arrange(dplyr::desc(patient_number),dplyr::desc(datetime)) -} -df_final <- df_final[!duplicated(df_final$patient_number), c("patient_number", "datetime")] -colnames(df_final)[colnames(df_final) == "datetime"] <- der_var + df_final <- df_final[!duplicated(df_final$patient_number), c("patient_number", "datetime")] + colnames(df_final)[colnames(df_final) == "datetime"] <- der_var -dm <- dplyr::left_join(ds_in, y = df_final, by = "patient_number") -return(dm) + dm <- dplyr::left_join(ds_in, y = df_final, by = "patient_number") + return(dm) } diff --git a/man/cal_min_max_date.Rd b/man/cal_min_max_date.Rd new file mode 100644 index 00000000..601a00fa --- /dev/null +++ b/man/cal_min_max_date.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cal_min_max_date.R +\name{cal_min_max_date} +\alias{cal_min_max_date} +\title{Calculate minimum and maximum date and time in the dataframe} +\usage{ +cal_min_max_date( + raw_dataset, + date_variable, + time_variable, + val_type = "min", + date_format, + time_format +) +} +\arguments{ +\item{raw_dataset}{Raw source data frame} + +\item{date_variable}{Single character string. Name of the date variable} + +\item{time_variable}{Single character string. Name of the time variable} + +\item{val_type}{Single character string determining whether to look +for the earliest or the latest datetime combination. Permitted values: +"min", "max". Default to "min".} + +\item{date_format}{Format of source date variable} + +\item{time_format}{Format of source time variable} +} +\value{ +Data frame with 2 columns: unique patient_number and datetime variable +column storing the earliest/latest datetime. +} +\description{ +This function derives the earliest/latest ISO8601 datetime +} +\examples{ +EX <- tibble::tribble( + ~patient_number, ~EX_ST_DT, ~EX_ST_TM, + "001", "26-04-2022", "10:20", + "001", "25-04-2022", "10:15", + "001", "25-04-2022", "10:19", + "002", "26-05-2022", "UNK:UNK", + "002", "26-05-2022", "05:59" + ) + +min <- cal_min_max_date(EX, + "EX_ST_DT", + "EX_ST_TM", + val_type = "min", + date_format = "dd-mmm-yyyy", + time_format = "H:M" + ) + +max <- cal_min_max_date(EX, + "EX_ST_DT", + "EX_ST_TM", + val_type = "max", + date_format = "dd-mmm-yyyy", + time_format = "H:M" + ) + +} diff --git a/man/oak_cal_ref_dates.Rd b/man/oak_cal_ref_dates.Rd new file mode 100644 index 00000000..4c8ff8d7 --- /dev/null +++ b/man/oak_cal_ref_dates.Rd @@ -0,0 +1,87 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/oak_cal_ref_dates.R +\name{oak_cal_ref_dates} +\alias{oak_cal_ref_dates} +\title{Calculate Reference dates in ISO8601 character format.} +\usage{ +oak_cal_ref_dates( + ds_in = dm, + der_var, + min_max = "min", + ref_date_config_df, + raw_source +) +} +\arguments{ +\item{ds_in}{Data frame. DM domain.} + +\item{der_var}{Character string. The SDTMv reference date to be derived.} + +\item{min_max}{Minimum or Maximum date to be calculated based on the input. +Default set to Minimum. Values should be min or max.} + +\item{ref_date_config_df}{Data frame which has the details of the variables to +be used for the calculation of reference dates. +Should has columns listed below: +dataset_name : Name of the raw dataset. +date_var : Date variable name from the raw dataset. +time_var : Time variable name from the raw dataset. +dformat : Format of the date collected in raw data. +tformat: Format of the time collected in raw data. +sdtm_var_name : Reference variable name.} + +\item{raw_source}{List contains all the raw datasets.} +} +\value{ +DM data frame with the reference dates populated. +} +\description{ +Derive RFSTDTC, RFENDTC, RFXENDTC, RFXSTDTC based on the input dates and time. +} +\details{ +Populate RFSTDTC variable in demographic domain in ISO8601 character format. +} +\examples{ +dm <- tibble::tribble( + ~patient_number, ~USUBJID, ~SUBJID, ~SEX, + "001", "XXXX-001", "001", "F", + "002", "XXXX-002", "002", "M", + "003", "XXXX-003", "003", "M" + ) + + ref_date_config_df <- tibble::tribble( + ~dataset_name, ~date_var, ~time_var, ~dformat, ~tformat, ~sdtm_var_name, + "EX1", "EX_ST_DT1", "EX_ST_TM1", "dd-mm-yyyy", "H:M", "RFSTDTC", + "EX2", "EX_ST_DT2", NA, "dd-mmm-yyyy", NA, "RFSTDTC", + "EX1", "EX_EN_DT1", "EX_EN_TM1", "dd-mm-yyyy", "H:M", "RFENDTC", + "EX2", "EX_ST_DT2", NA, "dd-mmm-yyyy", NA, "RFENDTC" + ) + + EX1 <- tibble::tribble( + ~patient_number, ~EX_ST_DT1, ~EX_EN_DT1, ~EX_ST_TM1, ~EX_EN_TM1, + "001", "15-05-2023", "15-05-2023", "10:20", "11:00", + "001", "15-05-2023", "15-05-2023", "9:15", "10:00", + "001", "15-05-2023", "15-05-2023", "8:19", "09:00", + "002", "02-10-2023", "02-10-2023", "UNK:UNK", NA, + "002", "03-11-2023", "03-11-2023", "11:19", NA + ) + + EX2 <- tibble::tribble( + ~patient_number, ~EX_ST_DT2, + "001", "11-JUN-2023", + "002", "24-OCT-2023", + "002", "25-JUL-2023", + "002", "30-OCT-2023", + "002", "UNK-OCT-2023" + ) + + raw_source <- list(EX1 = EX1, EX2 = EX2) + + dm_df <- oak_cal_ref_dates(dm, + der_var = "RFSTDTC", + min_max = "max", + ref_date_config_df = ref_date_config_df, + raw_source + ) + +} diff --git a/tests/testthat/test-cal_min_max_date.R b/tests/testthat/test-cal_min_max_date.R new file mode 100644 index 00000000..259ffd99 --- /dev/null +++ b/tests/testthat/test-cal_min_max_date.R @@ -0,0 +1,47 @@ +test_that("cal_min_max_date works as expected", { + + EX <- tibble::tribble( + ~patient_number, ~EX_ST_DT, ~EX_ST_TM, + "001", "26-04-2022", "10:20", + "001", "25-04-2022", "10:15", + "001", "25-04-2022", "10:19", + "002", "26-05-2022", "06:23", + "002", "26-05-2022", "04:59", + "002", "26-05-2022", "05:59" + ) + + expected_min <- tibble::tribble( + ~patient_number, ~datetime, + "001", "2022-04-25T10:15", + "002", "2022-05-26T04:59" + ) + + expected_max <- tibble::tribble( + ~patient_number, ~datetime, + "002", "2022-05-26T06:23", + "001", "2022-04-26T10:20" + ) + + observed_min <- cal_min_max_date(EX, + "EX_ST_DT", + "EX_ST_TM", + val_type = "min", + date_format = "dd-mmm-yyyy", + time_format = "H:M" + ) + + expect_identical(observed_min, expected_min) + + observed_max <- cal_min_max_date(EX, + "EX_ST_DT", + "EX_ST_TM", + val_type = "max", + date_format = "dd-mmm-yyyy", + time_format = "H:M" + ) + + expect_identical(observed_max, expected_max) + +}) + + diff --git a/tests/testthat/test-oak_cal_ref_dates.R b/tests/testthat/test-oak_cal_ref_dates.R new file mode 100644 index 00000000..a99f69aa --- /dev/null +++ b/tests/testthat/test-oak_cal_ref_dates.R @@ -0,0 +1,69 @@ +dm <- tibble::tribble( + ~patient_number, ~USUBJID, ~SUBJID, ~SEX, + "001", "XXXX-001", "001", "F", + "002", "XXXX-002", "002", "M", + "003", "XXXX-003", "003", "M" + ) + +expected <- tibble::tribble( + ~patient_number, ~USUBJID, ~SUBJID, ~SEX, ~RFSTDTC, ~RFENDTC, + "001", "XXXX-001", "001", "F", "2023-05-15T08:19", "2023-06-11", + "002", "XXXX-002", "002", "M", "2023-07-25", "2023-11-03T20:30", + "003", "XXXX-003", "003", "M", NA_character_, NA_character_ + ) + +ref_date_conf_df <- tibble::tribble( + ~dataset_name, ~date_var, ~time_var, ~dformat, ~tformat, ~sdtm_var_name, + "EX1", "EX_ST_DT1", "EX_ST_TM1", "dd-mm-yyyy", "H:M", "RFSTDTC", + "EX2", "EX_ST_DT2", NA_character_, "dd-mmm-yyyy", NA_character_, "RFSTDTC", + "EX1", "EX_EN_DT1", "EX_EN_TM1", "dd-mm-yyyy", "H:M", "RFENDTC", + "EX2", "EX_ST_DT2", NA_character_, "dd-mmm-yyyy", NA_character_, "RFENDTC" + ) + +EX1 <- tibble::tribble( + ~patient_number, ~EX_ST_DT1, ~EX_EN_DT1, ~EX_ST_TM1, ~EX_EN_TM1, + "001", "15-05-2023", "15-05-2023", "10:20", "11:00", + "001", "15-05-2023", "15-05-2023", "9:15", "10:00", + "001", "15-05-2023", "15-05-2023", "8:19", "09:00", + "002", "02-10-2023", "02-10-2023", "UNK:UNK", NA_character_, + "002", "0l-11-2023", "03-11-2023", "11:19", "20:30" + ) + +EX2 <- tibble::tribble( + ~patient_number, ~EX_ST_DT2, + "001", "11-JUN-2023", + "002", "24-OCT-2023", + "002", "25-JUL-2023", + "002", "30-OCT-2023", + "002", "UNK-OCT-2023" + ) + +raw_source <- list(EX1 = EX1, EX2 = EX2) + +test_that("Calculate the Reference dates :RFSTDTC", { + observed_rfstdtc <- oak_cal_ref_dates(dm, + der_var = "RFSTDTC", + min_max = "min", + ref_date_config_df = ref_date_conf_df, + raw_source + ) + expected_rfstdtc <- expected |> dplyr::select(-"RFENDTC") + + expect_identical(observed_rfstdtc, expected_rfstdtc) + +}) + +test_that("Calculate the Reference dates :RFENDTC", { + + observed_rfendtc <- oak_cal_ref_dates(dm, + der_var = "RFENDTC", + min_max = "max", + ref_date_config_df = ref_date_conf_df, + raw_source + ) + + expected_rfendtc <- expected |> dplyr::select(-"RFSTDTC") + expect_identical(observed_rfendtc, expected_rfendtc) + +}) + From 1b90edbb7c8e4ad131472cb9f722c64660237690 Mon Sep 17 00:00:00 2001 From: muzzama-1990 Date: Fri, 20 Sep 2024 02:42:13 +0100 Subject: [PATCH 05/11] updates --- R/cal_min_max_date.R | 87 +++++++++++++----------- R/globals.R | 4 +- R/oak_cal_ref_dates.R | 90 ++++++++++++------------- _pkgdown.yml | 10 +++ man/cal_min_max_date.Rd | 38 +++++------ man/oak_cal_ref_dates.Rd | 70 +++++++++---------- tests/testthat/test-cal_min_max_date.R | 76 +++++++++++++-------- tests/testthat/test-oak_cal_ref_dates.R | 82 +++++++++++----------- 8 files changed, 244 insertions(+), 213 deletions(-) diff --git a/R/cal_min_max_date.R b/R/cal_min_max_date.R index 830cbce2..16cf625b 100644 --- a/R/cal_min_max_date.R +++ b/R/cal_min_max_date.R @@ -17,50 +17,51 @@ #' @export #' @examples #' EX <- tibble::tribble( -#' ~patient_number, ~EX_ST_DT, ~EX_ST_TM, -#' "001", "26-04-2022", "10:20", -#' "001", "25-04-2022", "10:15", -#' "001", "25-04-2022", "10:19", -#' "002", "26-05-2022", "UNK:UNK", -#' "002", "26-05-2022", "05:59" -#' ) +#' ~patient_number, ~EX_ST_DT, ~EX_ST_TM, +#' "001", "26-04-2022", "10:20", +#' "001", "25-04-2022", "10:15", +#' "001", "25-04-2022", "10:19", +#' "002", "26-05-2022", "UNK:UNK", +#' "002", "26-05-2022", "05:59" +#' ) #' -#'min <- cal_min_max_date(EX, -#' "EX_ST_DT", -#' "EX_ST_TM", -#' val_type = "min", -#' date_format = "dd-mmm-yyyy", -#' time_format = "H:M" -#' ) +#' min <- cal_min_max_date(EX, +#' "EX_ST_DT", +#' "EX_ST_TM", +#' val_type = "min", +#' date_format = "dd-mmm-yyyy", +#' time_format = "H:M" +#' ) #' -#'max <- cal_min_max_date(EX, -#' "EX_ST_DT", -#' "EX_ST_TM", -#' val_type = "max", -#' date_format = "dd-mmm-yyyy", -#' time_format = "H:M" -#' ) +#' max <- cal_min_max_date(EX, +#' "EX_ST_DT", +#' "EX_ST_TM", +#' val_type = "max", +#' date_format = "dd-mmm-yyyy", +#' time_format = "H:M" +#' ) #' cal_min_max_date <- function(raw_dataset, date_variable, time_variable, val_type = "min", date_format, - time_format -) { - + time_format) { # Check if date is present in the raw data frame date_not_in_data <- !(date_variable %in% colnames(raw_dataset)) - # Check if time variable is used and if present in the raw data frame + # Check if time variable is used and present in the raw data frame time_not_in_data <- !(time_variable %in% colnames(raw_dataset)) && !is.na(time_variable) - # If both date and time variables are not present return the empty data frame + # If date/time variables not present return the empty data frame if (date_not_in_data || time_not_in_data) { # Return Empty data frame with patient_number and datetime columns - empty_df <- setNames(data.frame(matrix(ncol = 2, nrow = 0)), c("patient_number", "datetime")) - cli::cli_warn(paste("Date variable",date_variable, "or Time variable", time_variable, - "not present in source data")) + empty_df <- stats::setNames(data.frame(matrix(ncol = 2L, nrow = 0L)), + c("patient_number", "datetime")) + cli::cli_warn(paste( + "Date variable", date_variable, "or Time variable", time_variable, + "not present in source data" + )) return(empty_df) } @@ -68,21 +69,27 @@ cal_min_max_date <- function(raw_dataset, # Time variable is not used then use only date if (is.na(time_variable)) { fin_df$datetime <- create_iso8601(raw_dataset[[date_variable]], - .format = date_format) + .format = date_format + ) } else { # If both date and time variables are presen use both date and time - raw_dataset$date_time <- paste0(raw_dataset[[date_variable]], - raw_dataset[[time_variable]]) - format = paste0(date_format,time_format) + raw_dataset$date_time <- paste0( + raw_dataset[[date_variable]], + raw_dataset[[time_variable]] + ) + format <- paste0(date_format, time_format) fin_df$datetime <- as.character(create_iso8601(raw_dataset$date_time, - .format = format, - .na = c("UNK", "NA", "U","unk", - "u", "un","UN"))) + .format = format, + .na = c( + "UNK", "NA", "U", "unk", + "u", "un", "UN" + ) + )) } fin_df <- fin_df |> - dplyr::select(c("patient_number", "datetime"))|> + dplyr::select(c("patient_number", "datetime")) |> unique() fin_df <- fin_df |> @@ -93,12 +100,12 @@ cal_min_max_date <- function(raw_dataset, into = c("year", "month", "day", "hour", "minute"), fill = "right", extra = "drop" - )|> + ) |> list() |> - setNames("x") |> + stats::setNames("x") |> with(replace(x, x == "UNK", NA)) |> list() |> - setNames("x") |> + stats::setNames("x") |> with(replace(x, x == "", NA)) if (val_type == "min") { diff --git a/R/globals.R b/R/globals.R index 9a2998a0..3b1f54ba 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,4 +1,6 @@ utils::globalVariables(c( "USUBJID", "VISIT", "dom_dt", "dom_tm", "ref_dt", - "ref_tm" + "ref_tm", "datetime", "date_time", "year", "month", + "day", "hour", "minute", "dataset_name", "date_var", + "dformat", "tformat", "sdtm_var_name", "patient_number" )) diff --git a/R/oak_cal_ref_dates.R b/R/oak_cal_ref_dates.R index 9fc45665..5ab86728 100644 --- a/R/oak_cal_ref_dates.R +++ b/R/oak_cal_ref_dates.R @@ -6,7 +6,7 @@ #' #' #' @param ds_in Data frame. DM domain. -#' @param der_var Character string. The SDTMv reference date to be derived. +#' @param der_var Character string. The reference date to be derived. #' @param min_max Minimum or Maximum date to be calculated based on the input. #' Default set to Minimum. Values should be min or max. #' @param ref_date_config_df Data frame which has the details of the variables to @@ -24,61 +24,60 @@ #' @examples #' dm <- tibble::tribble( #' ~patient_number, ~USUBJID, ~SUBJID, ~SEX, -#' "001", "XXXX-001", "001", "F", -#' "002", "XXXX-002", "002", "M", -#' "003", "XXXX-003", "003", "M" -#' ) +#' "001", "XXXX-001", "001", "F", +#' "002", "XXXX-002", "002", "M", +#' "003", "XXXX-003", "003", "M" +#' ) #' -#' ref_date_config_df <- tibble::tribble( -#' ~dataset_name, ~date_var, ~time_var, ~dformat, ~tformat, ~sdtm_var_name, -#' "EX1", "EX_ST_DT1", "EX_ST_TM1", "dd-mm-yyyy", "H:M", "RFSTDTC", -#' "EX2", "EX_ST_DT2", NA, "dd-mmm-yyyy", NA, "RFSTDTC", -#' "EX1", "EX_EN_DT1", "EX_EN_TM1", "dd-mm-yyyy", "H:M", "RFENDTC", -#' "EX2", "EX_ST_DT2", NA, "dd-mmm-yyyy", NA, "RFENDTC" -#' ) +#' ref_date_config_df <- tibble::tribble( +#' ~dataset_name, ~date_var, ~time_var, ~dformat, ~tformat, ~sdtm_var_name, +#' "EX1", "EX_ST_DT1", "EX_ST_TM1", "dd-mm-yyyy", "H:M", "RFSTDTC", +#' "EX2", "EX_ST_DT2", NA, "dd-mmm-yyyy", NA, "RFSTDTC", +#' "EX1", "EX_EN_DT1", "EX_EN_TM1", "dd-mm-yyyy", "H:M", "RFENDTC", +#' "EX2", "EX_ST_DT2", NA, "dd-mmm-yyyy", NA, "RFENDTC" +#' ) #' -#' EX1 <- tibble::tribble( -#' ~patient_number, ~EX_ST_DT1, ~EX_EN_DT1, ~EX_ST_TM1, ~EX_EN_TM1, -#' "001", "15-05-2023", "15-05-2023", "10:20", "11:00", -#' "001", "15-05-2023", "15-05-2023", "9:15", "10:00", -#' "001", "15-05-2023", "15-05-2023", "8:19", "09:00", -#' "002", "02-10-2023", "02-10-2023", "UNK:UNK", NA, -#' "002", "03-11-2023", "03-11-2023", "11:19", NA -#' ) +#' EX1 <- tibble::tribble( +#' ~patient_number, ~EX_ST_DT1, ~EX_EN_DT1, ~EX_ST_TM1, ~EX_EN_TM1, +#' "001", "15-05-2023", "15-05-2023", "10:20", "11:00", +#' "001", "15-05-2023", "15-05-2023", "9:15", "10:00", +#' "001", "15-05-2023", "15-05-2023", "8:19", "09:00", +#' "002", "02-10-2023", "02-10-2023", "UNK:UNK", NA, +#' "002", "03-11-2023", "03-11-2023", "11:19", NA +#' ) #' -#' EX2 <- tibble::tribble( -#' ~patient_number, ~EX_ST_DT2, -#' "001", "11-JUN-2023", -#' "002", "24-OCT-2023", -#' "002", "25-JUL-2023", -#' "002", "30-OCT-2023", -#' "002", "UNK-OCT-2023" -#' ) +#' EX2 <- tibble::tribble( +#' ~patient_number, ~EX_ST_DT2, +#' "001", "11-JUN-2023", +#' "002", "24-OCT-2023", +#' "002", "25-JUL-2023", +#' "002", "30-OCT-2023", +#' "002", "UNK-OCT-2023" +#' ) #' -#' raw_source <- list(EX1 = EX1, EX2 = EX2) +#' raw_source <- list(EX1 = EX1, EX2 = EX2) #' -#' dm_df <- oak_cal_ref_dates(dm, -#' der_var = "RFSTDTC", -#' min_max = "max", -#' ref_date_config_df = ref_date_config_df, -#' raw_source -#' ) +#' dm_df <- oak_cal_ref_dates(dm, +#' der_var = "RFSTDTC", +#' min_max = "max", +#' ref_date_config_df = ref_date_config_df, +#' raw_source +#' ) #' - oak_cal_ref_dates <- function(ds_in = dm, der_var, min_max = "min", ref_date_config_df, raw_source) { - # Check if ref_date_config_df is a data frame and has all required variables - admiraldev::assert_data_frame(ref_date_config_df, required_vars = exprs(dataset_name, date_var, - time_var, dformat, - tformat, sdtm_var_name)) + admiraldev::assert_data_frame(ref_date_config_df, required_vars = exprs( + dataset_name, date_var, + time_var, dformat, + tformat, sdtm_var_name + )) ds_out <- data.frame() - for(i in 1:length(ref_date_config_df$dataset_name)) { - + for (i in seq_along(ref_date_config_df$dataset_name)) { raw_dataset_name <- ref_date_config_df$dataset_name[i] date_variable <- ref_date_config_df$date_var[i] date_format <- ref_date_config_df$dformat[i] @@ -99,11 +98,11 @@ oak_cal_ref_dates <- function(ds_in = dm, ds_out <- rbind(ds_out, ds_out1) } } - #ref_dates <- purrr::pmap_df(ds_out, .f = get_df) + if (min_max == "min") { - df_final <- ds_out %>% dplyr::arrange(patient_number,datetime) + df_final <- ds_out |> dplyr::arrange(patient_number, datetime) } else { - df_final <- ds_out %>% dplyr::arrange(dplyr::desc(datetime)) + df_final <- ds_out |> dplyr::arrange(dplyr::desc(datetime)) } df_final <- df_final[!duplicated(df_final$patient_number), c("patient_number", "datetime")] @@ -111,5 +110,4 @@ oak_cal_ref_dates <- function(ds_in = dm, dm <- dplyr::left_join(ds_in, y = df_final, by = "patient_number") return(dm) - } diff --git a/_pkgdown.yml b/_pkgdown.yml index 87f2cb80..83020855 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -49,11 +49,19 @@ reference: - dtc_formats - problems +- title: Calculation of reference dates in DM + contents: + - oak_cal_ref_dates + - title: Explicit dot pipe operator desc: A simple alternative to `%>% {...}` contents: - "%.>%" +- title: Calculation of minimum/maximum ISO8601 dates + contents: + - cal_min_max_date + authors: Ramiro Magno: href: https://www.pattern.institute/team/rmagno/ @@ -63,3 +71,5 @@ authors: href: https://www.linkedin.com/in/edgar-manukyan-20987927 Shiyu Chen: href: https://www.linkedin.com/in/shiyu-chen-55a55410a/ + Mohsin Uzzama: + href: https://www.linkedin.com/in/mohsin-uzzama-34066741/ diff --git a/man/cal_min_max_date.Rd b/man/cal_min_max_date.Rd index 601a00fa..5d977d2f 100644 --- a/man/cal_min_max_date.Rd +++ b/man/cal_min_max_date.Rd @@ -37,28 +37,28 @@ This function derives the earliest/latest ISO8601 datetime } \examples{ EX <- tibble::tribble( - ~patient_number, ~EX_ST_DT, ~EX_ST_TM, - "001", "26-04-2022", "10:20", - "001", "25-04-2022", "10:15", - "001", "25-04-2022", "10:19", - "002", "26-05-2022", "UNK:UNK", - "002", "26-05-2022", "05:59" - ) + ~patient_number, ~EX_ST_DT, ~EX_ST_TM, + "001", "26-04-2022", "10:20", + "001", "25-04-2022", "10:15", + "001", "25-04-2022", "10:19", + "002", "26-05-2022", "UNK:UNK", + "002", "26-05-2022", "05:59" +) min <- cal_min_max_date(EX, - "EX_ST_DT", - "EX_ST_TM", - val_type = "min", - date_format = "dd-mmm-yyyy", - time_format = "H:M" - ) + "EX_ST_DT", + "EX_ST_TM", + val_type = "min", + date_format = "dd-mmm-yyyy", + time_format = "H:M" +) max <- cal_min_max_date(EX, - "EX_ST_DT", - "EX_ST_TM", - val_type = "max", - date_format = "dd-mmm-yyyy", - time_format = "H:M" - ) + "EX_ST_DT", + "EX_ST_TM", + val_type = "max", + date_format = "dd-mmm-yyyy", + time_format = "H:M" +) } diff --git a/man/oak_cal_ref_dates.Rd b/man/oak_cal_ref_dates.Rd index 4c8ff8d7..11abd29b 100644 --- a/man/oak_cal_ref_dates.Rd +++ b/man/oak_cal_ref_dates.Rd @@ -15,7 +15,7 @@ oak_cal_ref_dates( \arguments{ \item{ds_in}{Data frame. DM domain.} -\item{der_var}{Character string. The SDTMv reference date to be derived.} +\item{der_var}{Character string. The reference date to be derived.} \item{min_max}{Minimum or Maximum date to be calculated based on the input. Default set to Minimum. Values should be min or max.} @@ -44,44 +44,44 @@ Populate RFSTDTC variable in demographic domain in ISO8601 character format. \examples{ dm <- tibble::tribble( ~patient_number, ~USUBJID, ~SUBJID, ~SEX, - "001", "XXXX-001", "001", "F", - "002", "XXXX-002", "002", "M", - "003", "XXXX-003", "003", "M" - ) + "001", "XXXX-001", "001", "F", + "002", "XXXX-002", "002", "M", + "003", "XXXX-003", "003", "M" +) - ref_date_config_df <- tibble::tribble( - ~dataset_name, ~date_var, ~time_var, ~dformat, ~tformat, ~sdtm_var_name, - "EX1", "EX_ST_DT1", "EX_ST_TM1", "dd-mm-yyyy", "H:M", "RFSTDTC", - "EX2", "EX_ST_DT2", NA, "dd-mmm-yyyy", NA, "RFSTDTC", - "EX1", "EX_EN_DT1", "EX_EN_TM1", "dd-mm-yyyy", "H:M", "RFENDTC", - "EX2", "EX_ST_DT2", NA, "dd-mmm-yyyy", NA, "RFENDTC" - ) +ref_date_config_df <- tibble::tribble( + ~dataset_name, ~date_var, ~time_var, ~dformat, ~tformat, ~sdtm_var_name, + "EX1", "EX_ST_DT1", "EX_ST_TM1", "dd-mm-yyyy", "H:M", "RFSTDTC", + "EX2", "EX_ST_DT2", NA, "dd-mmm-yyyy", NA, "RFSTDTC", + "EX1", "EX_EN_DT1", "EX_EN_TM1", "dd-mm-yyyy", "H:M", "RFENDTC", + "EX2", "EX_ST_DT2", NA, "dd-mmm-yyyy", NA, "RFENDTC" +) - EX1 <- tibble::tribble( - ~patient_number, ~EX_ST_DT1, ~EX_EN_DT1, ~EX_ST_TM1, ~EX_EN_TM1, - "001", "15-05-2023", "15-05-2023", "10:20", "11:00", - "001", "15-05-2023", "15-05-2023", "9:15", "10:00", - "001", "15-05-2023", "15-05-2023", "8:19", "09:00", - "002", "02-10-2023", "02-10-2023", "UNK:UNK", NA, - "002", "03-11-2023", "03-11-2023", "11:19", NA - ) +EX1 <- tibble::tribble( + ~patient_number, ~EX_ST_DT1, ~EX_EN_DT1, ~EX_ST_TM1, ~EX_EN_TM1, + "001", "15-05-2023", "15-05-2023", "10:20", "11:00", + "001", "15-05-2023", "15-05-2023", "9:15", "10:00", + "001", "15-05-2023", "15-05-2023", "8:19", "09:00", + "002", "02-10-2023", "02-10-2023", "UNK:UNK", NA, + "002", "03-11-2023", "03-11-2023", "11:19", NA +) - EX2 <- tibble::tribble( - ~patient_number, ~EX_ST_DT2, - "001", "11-JUN-2023", - "002", "24-OCT-2023", - "002", "25-JUL-2023", - "002", "30-OCT-2023", - "002", "UNK-OCT-2023" - ) +EX2 <- tibble::tribble( + ~patient_number, ~EX_ST_DT2, + "001", "11-JUN-2023", + "002", "24-OCT-2023", + "002", "25-JUL-2023", + "002", "30-OCT-2023", + "002", "UNK-OCT-2023" +) - raw_source <- list(EX1 = EX1, EX2 = EX2) +raw_source <- list(EX1 = EX1, EX2 = EX2) - dm_df <- oak_cal_ref_dates(dm, - der_var = "RFSTDTC", - min_max = "max", - ref_date_config_df = ref_date_config_df, - raw_source - ) +dm_df <- oak_cal_ref_dates(dm, + der_var = "RFSTDTC", + min_max = "max", + ref_date_config_df = ref_date_config_df, + raw_source +) } diff --git a/tests/testthat/test-cal_min_max_date.R b/tests/testthat/test-cal_min_max_date.R index 259ffd99..43a8befb 100644 --- a/tests/testthat/test-cal_min_max_date.R +++ b/tests/testthat/test-cal_min_max_date.R @@ -1,47 +1,65 @@ test_that("cal_min_max_date works as expected", { - EX <- tibble::tribble( - ~patient_number, ~EX_ST_DT, ~EX_ST_TM, - "001", "26-04-2022", "10:20", - "001", "25-04-2022", "10:15", - "001", "25-04-2022", "10:19", - "002", "26-05-2022", "06:23", - "002", "26-05-2022", "04:59", - "002", "26-05-2022", "05:59" - ) + ~patient_number, ~EX_ST_DT, ~EX_ST_TM, + "001", "26-04-2022", "10:20", + "001", "25-04-2022", "10:15", + "001", "25-04-2022", "10:19", + "002", "26-05-2022", "06:23", + "002", "26-05-2022", "04:59", + "002", "26-05-2022", "05:59" + ) expected_min <- tibble::tribble( ~patient_number, ~datetime, - "001", "2022-04-25T10:15", - "002", "2022-05-26T04:59" - ) + "001", "2022-04-25T10:15", + "002", "2022-05-26T04:59" + ) expected_max <- tibble::tribble( - ~patient_number, ~datetime, - "002", "2022-05-26T06:23", - "001", "2022-04-26T10:20" - ) + ~patient_number, ~datetime, + "002", "2022-05-26T06:23", + "001", "2022-04-26T10:20" + ) observed_min <- cal_min_max_date(EX, - "EX_ST_DT", - "EX_ST_TM", - val_type = "min", - date_format = "dd-mmm-yyyy", - time_format = "H:M" - ) + "EX_ST_DT", + "EX_ST_TM", + val_type = "min", + date_format = "dd-mmm-yyyy", + time_format = "H:M" + ) expect_identical(observed_min, expected_min) observed_max <- cal_min_max_date(EX, - "EX_ST_DT", - "EX_ST_TM", - val_type = "max", - date_format = "dd-mmm-yyyy", - time_format = "H:M" - ) + "EX_ST_DT", + "EX_ST_TM", + val_type = "max", + date_format = "dd-mmm-yyyy", + time_format = "H:M" + ) expect_identical(observed_max, expected_max) - }) +test_that("Warning is displayed if date or time variables parameters passed are not present", { + + EX <- tibble::tribble( + ~patient_number, ~EX_ST_DT, + "001", "26-04-2022" + ) + + warning_msg <- "Date variable EX_ST_DT or Time variable EX_ST_TM not present in source data" + expect_warning(observed <- cal_min_max_date(EX, + "EX_ST_DT", + "EX_ST_TM", + val_type = "max", + date_format = "dd-mmm-yyyy", + time_format = "H:M"), + regexp = warning_msg) + expected <- stats::setNames(data.frame(matrix(ncol = 2L, nrow = 0L)), + c("patient_number", "datetime")) + + expect_identical(observed, expected) +}) diff --git a/tests/testthat/test-oak_cal_ref_dates.R b/tests/testthat/test-oak_cal_ref_dates.R index a99f69aa..8333a770 100644 --- a/tests/testthat/test-oak_cal_ref_dates.R +++ b/tests/testthat/test-oak_cal_ref_dates.R @@ -1,69 +1,65 @@ dm <- tibble::tribble( ~patient_number, ~USUBJID, ~SUBJID, ~SEX, - "001", "XXXX-001", "001", "F", - "002", "XXXX-002", "002", "M", - "003", "XXXX-003", "003", "M" - ) + "001", "XXXX-001", "001", "F", + "002", "XXXX-002", "002", "M", + "003", "XXXX-003", "003", "M" +) expected <- tibble::tribble( - ~patient_number, ~USUBJID, ~SUBJID, ~SEX, ~RFSTDTC, ~RFENDTC, - "001", "XXXX-001", "001", "F", "2023-05-15T08:19", "2023-06-11", - "002", "XXXX-002", "002", "M", "2023-07-25", "2023-11-03T20:30", - "003", "XXXX-003", "003", "M", NA_character_, NA_character_ - ) + ~patient_number, ~USUBJID, ~SUBJID, ~SEX, ~RFSTDTC, ~RFENDTC, + "001", "XXXX-001", "001", "F", "2023-05-15T08:19", "2023-06-11", + "002", "XXXX-002", "002", "M", "2023-07-25", "2023-11-03T20:30", + "003", "XXXX-003", "003", "M", NA_character_, NA_character_ +) ref_date_conf_df <- tibble::tribble( - ~dataset_name, ~date_var, ~time_var, ~dformat, ~tformat, ~sdtm_var_name, - "EX1", "EX_ST_DT1", "EX_ST_TM1", "dd-mm-yyyy", "H:M", "RFSTDTC", - "EX2", "EX_ST_DT2", NA_character_, "dd-mmm-yyyy", NA_character_, "RFSTDTC", - "EX1", "EX_EN_DT1", "EX_EN_TM1", "dd-mm-yyyy", "H:M", "RFENDTC", - "EX2", "EX_ST_DT2", NA_character_, "dd-mmm-yyyy", NA_character_, "RFENDTC" - ) + ~dataset_name, ~date_var, ~time_var, ~dformat, ~tformat, ~sdtm_var_name, + "EX1", "EX_ST_DT1", "EX_ST_TM1", "dd-mm-yyyy", "H:M", "RFSTDTC", + "EX2", "EX_ST_DT2", NA_character_, "dd-mmm-yyyy", NA_character_, "RFSTDTC", + "EX1", "EX_EN_DT1", "EX_EN_TM1", "dd-mm-yyyy", "H:M", "RFENDTC", + "EX2", "EX_ST_DT2", NA_character_, "dd-mmm-yyyy", NA_character_, "RFENDTC" +) EX1 <- tibble::tribble( - ~patient_number, ~EX_ST_DT1, ~EX_EN_DT1, ~EX_ST_TM1, ~EX_EN_TM1, - "001", "15-05-2023", "15-05-2023", "10:20", "11:00", - "001", "15-05-2023", "15-05-2023", "9:15", "10:00", - "001", "15-05-2023", "15-05-2023", "8:19", "09:00", - "002", "02-10-2023", "02-10-2023", "UNK:UNK", NA_character_, - "002", "0l-11-2023", "03-11-2023", "11:19", "20:30" - ) + ~patient_number, ~EX_ST_DT1, ~EX_EN_DT1, ~EX_ST_TM1, ~EX_EN_TM1, + "001", "15-05-2023", "15-05-2023", "10:20", "11:00", + "001", "15-05-2023", "15-05-2023", "9:15", "10:00", + "001", "15-05-2023", "15-05-2023", "8:19", "09:00", + "002", "02-10-2023", "02-10-2023", "UNK:UNK", NA_character_, + "002", "0l-11-2023", "03-11-2023", "11:19", "20:30" +) -EX2 <- tibble::tribble( - ~patient_number, ~EX_ST_DT2, - "001", "11-JUN-2023", - "002", "24-OCT-2023", - "002", "25-JUL-2023", - "002", "30-OCT-2023", - "002", "UNK-OCT-2023" - ) +EX2 <- tibble::tribble( + ~patient_number, ~EX_ST_DT2, + "001", "11-JUN-2023", + "002", "24-OCT-2023", + "002", "25-JUL-2023", + "002", "30-OCT-2023", + "002", "UNK-OCT-2023" +) raw_source <- list(EX1 = EX1, EX2 = EX2) test_that("Calculate the Reference dates :RFSTDTC", { observed_rfstdtc <- oak_cal_ref_dates(dm, - der_var = "RFSTDTC", - min_max = "min", - ref_date_config_df = ref_date_conf_df, - raw_source - ) + der_var = "RFSTDTC", + min_max = "min", + ref_date_config_df = ref_date_conf_df, + raw_source + ) expected_rfstdtc <- expected |> dplyr::select(-"RFENDTC") expect_identical(observed_rfstdtc, expected_rfstdtc) - }) test_that("Calculate the Reference dates :RFENDTC", { - observed_rfendtc <- oak_cal_ref_dates(dm, - der_var = "RFENDTC", - min_max = "max", - ref_date_config_df = ref_date_conf_df, - raw_source + der_var = "RFENDTC", + min_max = "max", + ref_date_config_df = ref_date_conf_df, + raw_source ) expected_rfendtc <- expected |> dplyr::select(-"RFSTDTC") expect_identical(observed_rfendtc, expected_rfendtc) - }) - From badc49333309003d0a38eb92a6ace9664492d2c9 Mon Sep 17 00:00:00 2001 From: muzzama-1990 Date: Fri, 20 Sep 2024 02:52:00 +0100 Subject: [PATCH 06/11] styler updates --- R/cal_min_max_date.R | 6 ++++-- tests/testthat/test-cal_min_max_date.R | 24 ++++++++++++++---------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/R/cal_min_max_date.R b/R/cal_min_max_date.R index 16cf625b..0246e59a 100644 --- a/R/cal_min_max_date.R +++ b/R/cal_min_max_date.R @@ -56,8 +56,10 @@ cal_min_max_date <- function(raw_dataset, # If date/time variables not present return the empty data frame if (date_not_in_data || time_not_in_data) { # Return Empty data frame with patient_number and datetime columns - empty_df <- stats::setNames(data.frame(matrix(ncol = 2L, nrow = 0L)), - c("patient_number", "datetime")) + empty_df <- stats::setNames( + data.frame(matrix(ncol = 2L, nrow = 0L)), + c("patient_number", "datetime") + ) cli::cli_warn(paste( "Date variable", date_variable, "or Time variable", time_variable, "not present in source data" diff --git a/tests/testthat/test-cal_min_max_date.R b/tests/testthat/test-cal_min_max_date.R index 43a8befb..4f00c6cb 100644 --- a/tests/testthat/test-cal_min_max_date.R +++ b/tests/testthat/test-cal_min_max_date.R @@ -43,23 +43,27 @@ test_that("cal_min_max_date works as expected", { }) test_that("Warning is displayed if date or time variables parameters passed are not present", { - EX <- tibble::tribble( ~patient_number, ~EX_ST_DT, "001", "26-04-2022" ) warning_msg <- "Date variable EX_ST_DT or Time variable EX_ST_TM not present in source data" - expect_warning(observed <- cal_min_max_date(EX, - "EX_ST_DT", - "EX_ST_TM", - val_type = "max", - date_format = "dd-mmm-yyyy", - time_format = "H:M"), - regexp = warning_msg) + expect_warning( + observed <- cal_min_max_date(EX, + "EX_ST_DT", + "EX_ST_TM", + val_type = "max", + date_format = "dd-mmm-yyyy", + time_format = "H:M" + ), + regexp = warning_msg + ) - expected <- stats::setNames(data.frame(matrix(ncol = 2L, nrow = 0L)), - c("patient_number", "datetime")) + expected <- stats::setNames( + data.frame(matrix(ncol = 2L, nrow = 0L)), + c("patient_number", "datetime") + ) expect_identical(observed, expected) }) From 93adfa2f48345af2267d7fd1b58a2546923ad66b Mon Sep 17 00:00:00 2001 From: muzzama-1990 Date: Fri, 20 Sep 2024 03:04:15 +0100 Subject: [PATCH 07/11] Update to description file --- DESCRIPTION | 2 ++ 1 file changed, 2 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 78172387..636f5151 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,6 +21,8 @@ Authors@R: c( person("Pattern Institute", role = c("cph", "fnd")), person("F. Hoffmann-La Roche AG", role = c("cph", "fnd")), person("Pfizer Inc", role = c("cph", "fnd")), + person("Mohsin", "Uzzama", email = "mohsin.uzzama2@gmail.com", + role = "aut"), person("Transition Technologies Science", role = c("cph", "fnd")) ) Maintainer: Rammprasad Ganapathy From 40dc13231dcf9e0dd8252905967f6390a77934d2 Mon Sep 17 00:00:00 2001 From: muzzama-1990 Date: Fri, 20 Sep 2024 03:10:33 +0100 Subject: [PATCH 08/11] updates --- man/sdtm.oak-package.Rd | 1 + 1 file changed, 1 insertion(+) diff --git a/man/sdtm.oak-package.Rd b/man/sdtm.oak-package.Rd index 2675455d..37e986c0 100644 --- a/man/sdtm.oak-package.Rd +++ b/man/sdtm.oak-package.Rd @@ -31,6 +31,7 @@ Authors: \item Ramiro Magno \email{rmagno@pattern.institute} (\href{https://orcid.org/0000-0001-5226-3441}{ORCID}) \item Kamil Sijko \email{kamil.sijko@ttsi.com.pl} (\href{https://orcid.org/0000-0002-2203-1065}{ORCID}) \item Shiyu Chen \email{Shiyu.Chen@atorusresearch.com} + \item Mohsin Uzzama \email{mohsin.uzzama2@gmail.com} } Other contributors: From 1d5bcb1ec25095f54fb798b5e44e42e1124052ab Mon Sep 17 00:00:00 2001 From: muzzama-1990 Date: Tue, 5 Nov 2024 22:45:55 +0000 Subject: [PATCH 09/11] Updates to address the review comments --- R/cal_min_max_date.R | 27 ++++++++++-------- R/oak_cal_ref_dates.R | 13 +++++++-- man/cal_min_max_date.Rd | 12 ++++---- man/oak_cal_ref_dates.Rd | 4 +-- tests/testthat/test-cal_min_max_date.R | 38 ++++++++++++++++++++++---- 5 files changed, 65 insertions(+), 29 deletions(-) diff --git a/R/cal_min_max_date.R b/R/cal_min_max_date.R index 0246e59a..89d3e31b 100644 --- a/R/cal_min_max_date.R +++ b/R/cal_min_max_date.R @@ -1,4 +1,4 @@ -#' Calculate minimum and maximum date and time in the dataframe +#' Calculate minimum and maximum date and time in the data frame #' #' @description This function derives the earliest/latest ISO8601 datetime #' @@ -18,7 +18,7 @@ #' @examples #' EX <- tibble::tribble( #' ~patient_number, ~EX_ST_DT, ~EX_ST_TM, -#' "001", "26-04-2022", "10:20", +#' "001", "25-04-2022", "10:20", #' "001", "25-04-2022", "10:15", #' "001", "25-04-2022", "10:19", #' "002", "26-05-2022", "UNK:UNK", @@ -26,16 +26,16 @@ #' ) #' #' min <- cal_min_max_date(EX, -#' "EX_ST_DT", -#' "EX_ST_TM", +#' date_variable = "EX_ST_DT", +#' time_variable = "EX_ST_TM", #' val_type = "min", #' date_format = "dd-mmm-yyyy", #' time_format = "H:M" #' ) #' #' max <- cal_min_max_date(EX, -#' "EX_ST_DT", -#' "EX_ST_TM", +#' date_variable = "EX_ST_DT", +#' time_variable = "EX_ST_TM", #' val_type = "max", #' date_format = "dd-mmm-yyyy", #' time_format = "H:M" @@ -47,13 +47,15 @@ cal_min_max_date <- function(raw_dataset, val_type = "min", date_format, time_format) { - # Check if date is present in the raw data frame - date_not_in_data <- !(date_variable %in% colnames(raw_dataset)) + # Check if date parameter is missing or date variable is present in the raw data frame + date_not_in_data <- is.na(date_variable) || + !utils::hasName(raw_dataset, date_variable) # Check if time variable is used and present in the raw data frame - time_not_in_data <- !(time_variable %in% colnames(raw_dataset)) && !is.na(time_variable) + time_not_in_data <- !is.na(time_variable) && + !utils::hasName(raw_dataset, time_variable) - # If date/time variables not present return the empty data frame + # If date/time variables not present return empty data frame if (date_not_in_data || time_not_in_data) { # Return Empty data frame with patient_number and datetime columns empty_df <- stats::setNames( @@ -68,13 +70,14 @@ cal_min_max_date <- function(raw_dataset, } fin_df <- raw_dataset - # Time variable is not used then use only date + + # Time is not used in reference date then use only date if (is.na(time_variable)) { fin_df$datetime <- create_iso8601(raw_dataset[[date_variable]], .format = date_format ) } else { - # If both date and time variables are presen use both date and time + # If both date and time variables are present use both date and time raw_dataset$date_time <- paste0( raw_dataset[[date_variable]], raw_dataset[[time_variable]] diff --git a/R/oak_cal_ref_dates.R b/R/oak_cal_ref_dates.R index 5ab86728..d73dcbd2 100644 --- a/R/oak_cal_ref_dates.R +++ b/R/oak_cal_ref_dates.R @@ -11,7 +11,7 @@ #' Default set to Minimum. Values should be min or max. #' @param ref_date_config_df Data frame which has the details of the variables to #' be used for the calculation of reference dates. -#' Should has columns listed below: +#' Should have columns listed below: #' dataset_name : Name of the raw dataset. #' date_var : Date variable name from the raw dataset. #' time_var : Time variable name from the raw dataset. @@ -59,7 +59,7 @@ #' #' dm_df <- oak_cal_ref_dates(dm, #' der_var = "RFSTDTC", -#' min_max = "max", +#' min_max = "min", #' ref_date_config_df = ref_date_config_df, #' raw_source #' ) @@ -76,6 +76,8 @@ oak_cal_ref_dates <- function(ds_in = dm, tformat, sdtm_var_name )) + admiraldev::assert_list_of(raw_source, "data.frame") + ds_out <- data.frame() for (i in seq_along(ref_date_config_df$dataset_name)) { raw_dataset_name <- ref_date_config_df$dataset_name[i] @@ -86,7 +88,7 @@ oak_cal_ref_dates <- function(ds_in = dm, sdtm_var <- ref_date_config_df$sdtm_var_name[i] raw_dataset <- raw_source[[raw_dataset_name]] - if (der_var == sdtm_var) { + if (der_var == sdtm_var && !is.null(raw_dataset)) { ds_out1 <- cal_min_max_date( raw_dataset = raw_dataset, date_variable = date_variable, @@ -96,6 +98,11 @@ oak_cal_ref_dates <- function(ds_in = dm, val_type = min_max ) ds_out <- rbind(ds_out, ds_out1) + } else if (der_var == sdtm_var && is.null(raw_dataset)) { + warning(paste0( + raw_dataset_name, + " is not present in the source data list but referenced in ref_date_config_df" + )) } } diff --git a/man/cal_min_max_date.Rd b/man/cal_min_max_date.Rd index 5d977d2f..bca873a2 100644 --- a/man/cal_min_max_date.Rd +++ b/man/cal_min_max_date.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/cal_min_max_date.R \name{cal_min_max_date} \alias{cal_min_max_date} -\title{Calculate minimum and maximum date and time in the dataframe} +\title{Calculate minimum and maximum date and time in the data frame} \usage{ cal_min_max_date( raw_dataset, @@ -38,7 +38,7 @@ This function derives the earliest/latest ISO8601 datetime \examples{ EX <- tibble::tribble( ~patient_number, ~EX_ST_DT, ~EX_ST_TM, - "001", "26-04-2022", "10:20", + "001", "25-04-2022", "10:20", "001", "25-04-2022", "10:15", "001", "25-04-2022", "10:19", "002", "26-05-2022", "UNK:UNK", @@ -46,16 +46,16 @@ EX <- tibble::tribble( ) min <- cal_min_max_date(EX, - "EX_ST_DT", - "EX_ST_TM", + date_variable = "EX_ST_DT", + time_variable = "EX_ST_TM", val_type = "min", date_format = "dd-mmm-yyyy", time_format = "H:M" ) max <- cal_min_max_date(EX, - "EX_ST_DT", - "EX_ST_TM", + date_variable = "EX_ST_DT", + time_variable = "EX_ST_TM", val_type = "max", date_format = "dd-mmm-yyyy", time_format = "H:M" diff --git a/man/oak_cal_ref_dates.Rd b/man/oak_cal_ref_dates.Rd index 11abd29b..95f491a1 100644 --- a/man/oak_cal_ref_dates.Rd +++ b/man/oak_cal_ref_dates.Rd @@ -22,7 +22,7 @@ Default set to Minimum. Values should be min or max.} \item{ref_date_config_df}{Data frame which has the details of the variables to be used for the calculation of reference dates. -Should has columns listed below: +Should have columns listed below: dataset_name : Name of the raw dataset. date_var : Date variable name from the raw dataset. time_var : Time variable name from the raw dataset. @@ -79,7 +79,7 @@ raw_source <- list(EX1 = EX1, EX2 = EX2) dm_df <- oak_cal_ref_dates(dm, der_var = "RFSTDTC", - min_max = "max", + min_max = "min", ref_date_config_df = ref_date_config_df, raw_source ) diff --git a/tests/testthat/test-cal_min_max_date.R b/tests/testthat/test-cal_min_max_date.R index 4f00c6cb..0d351cb6 100644 --- a/tests/testthat/test-cal_min_max_date.R +++ b/tests/testthat/test-cal_min_max_date.R @@ -1,3 +1,29 @@ +test_that("Warn if date variable parameter is NULL", { + EX <- tibble::tribble( + ~patient_number, ~EX_ST_DT, + "001", "26-04-2022" + ) + + warning_msg <- "Date variable NA or Time variable NA not present in source data" + expect_warning( + observed <- cal_min_max_date(EX, + date_variable = NA, + time_variable = NA, + val_type = "max", + date_format = "dd-mmm-yyyy", + time_format = "H:M" + ), + regexp = warning_msg + ) + + expected <- stats::setNames( + data.frame(matrix(ncol = 2L, nrow = 0L)), + c("patient_number", "datetime") + ) + + expect_identical(observed, expected) +}) + test_that("cal_min_max_date works as expected", { EX <- tibble::tribble( ~patient_number, ~EX_ST_DT, ~EX_ST_TM, @@ -22,8 +48,8 @@ test_that("cal_min_max_date works as expected", { ) observed_min <- cal_min_max_date(EX, - "EX_ST_DT", - "EX_ST_TM", + date_variable = "EX_ST_DT", + time_variable = "EX_ST_TM", val_type = "min", date_format = "dd-mmm-yyyy", time_format = "H:M" @@ -32,8 +58,8 @@ test_that("cal_min_max_date works as expected", { expect_identical(observed_min, expected_min) observed_max <- cal_min_max_date(EX, - "EX_ST_DT", - "EX_ST_TM", + date_variable = "EX_ST_DT", + time_variable = "EX_ST_TM", val_type = "max", date_format = "dd-mmm-yyyy", time_format = "H:M" @@ -51,8 +77,8 @@ test_that("Warning is displayed if date or time variables parameters passed are warning_msg <- "Date variable EX_ST_DT or Time variable EX_ST_TM not present in source data" expect_warning( observed <- cal_min_max_date(EX, - "EX_ST_DT", - "EX_ST_TM", + date_variable = "EX_ST_DT", + time_variable = "EX_ST_TM", val_type = "max", date_format = "dd-mmm-yyyy", time_format = "H:M" From d798ae7b70caa7119d47d0afe668b4f10ede29bc Mon Sep 17 00:00:00 2001 From: muzzama-1990 Date: Tue, 5 Nov 2024 22:51:18 +0000 Subject: [PATCH 10/11] Automatic renv profile update. --- renv/profiles/4.2/renv.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/renv/profiles/4.2/renv.lock b/renv/profiles/4.2/renv.lock index 0c9569a4..210a2de0 100644 --- a/renv/profiles/4.2/renv.lock +++ b/renv/profiles/4.2/renv.lock @@ -57,7 +57,7 @@ }, "R.oo": { "Package": "R.oo", - "Version": "1.26.0", + "Version": "1.27.0", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -66,7 +66,7 @@ "methods", "utils" ], - "Hash": "4fed809e53ddb5407b3da3d0f572e591" + "Hash": "6ac79ff194202248cf946fe3a5d6d498" }, "R.utils": { "Package": "R.utils", From a1f04448c23623ff18c44f28f92409ea4e6abdf3 Mon Sep 17 00:00:00 2001 From: muzzama-1990 Date: Tue, 5 Nov 2024 22:54:49 +0000 Subject: [PATCH 11/11] Automatic renv profile update. --- renv.lock | 4 ++-- renv/profiles/4.3/renv.lock | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/renv.lock b/renv.lock index 81d0f3ce..cd6da0f0 100644 --- a/renv.lock +++ b/renv.lock @@ -57,7 +57,7 @@ }, "R.oo": { "Package": "R.oo", - "Version": "1.26.0", + "Version": "1.27.0", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -66,7 +66,7 @@ "methods", "utils" ], - "Hash": "4fed809e53ddb5407b3da3d0f572e591" + "Hash": "6ac79ff194202248cf946fe3a5d6d498" }, "R.utils": { "Package": "R.utils", diff --git a/renv/profiles/4.3/renv.lock b/renv/profiles/4.3/renv.lock index 81d0f3ce..cd6da0f0 100644 --- a/renv/profiles/4.3/renv.lock +++ b/renv/profiles/4.3/renv.lock @@ -57,7 +57,7 @@ }, "R.oo": { "Package": "R.oo", - "Version": "1.26.0", + "Version": "1.27.0", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -66,7 +66,7 @@ "methods", "utils" ], - "Hash": "4fed809e53ddb5407b3da3d0f572e591" + "Hash": "6ac79ff194202248cf946fe3a5d6d498" }, "R.utils": { "Package": "R.utils",