From a70fc30ff846e804837f1fcea8c312ec2fd8cfd3 Mon Sep 17 00:00:00 2001 From: Kamil Sijko Date: Thu, 11 Apr 2024 10:32:44 +0200 Subject: [PATCH 01/20] WIP: Baseline flag code adaptation, not functional yet --- R/derive_blfl.R | 429 ++++++++++++++++++ inst/derive_blfl/DM.csv | 9 + inst/derive_blfl/raw_dataset.csv | 7 + .../derive_blfl/study_visit_configuration.csv | 138 ++++++ 4 files changed, 583 insertions(+) create mode 100644 R/derive_blfl.R create mode 100644 inst/derive_blfl/DM.csv create mode 100644 inst/derive_blfl/raw_dataset.csv create mode 100644 inst/derive_blfl/study_visit_configuration.csv diff --git a/R/derive_blfl.R b/R/derive_blfl.R new file mode 100644 index 00000000..1d14693a --- /dev/null +++ b/R/derive_blfl.R @@ -0,0 +1,429 @@ +#' Extract date part from ISO8601 date/time variable +#' +#' The date part is extracted from an ISO8601 date/time variable. +#' By default, partial or missing dates are set to NA. +#' +#' @param dtc Character vector containing ISO8601 date/times. +#' @param partial_as_na Logical `TRUE` or `FALSE` indicating whether +#' partial dates should be set to NA (default is `TRUE`). +#' +#' @return Character vector containing ISO8601 dates. +#' @export +#' +#' @examples +#' ## Partial or missing dates set to NA by default +#' dtc_datepart( +#' c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00") +#' ) +#' # |--> c(NA, NA, NA, NA, "2021-12-25", "2021-12-25") +#' +#' ## Prevent partial or missing dates from being set to NA +#' dtc_datepart( +#' c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00"), +#' partial_as_na = FALSE +#' ) +#' # |--> c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25") +#' +dtc_datepart <- function(dtc, partial_as_na = TRUE) { + + # Assert that dtc is a character vector + checkmate::assert_character(dtc) + + # Extract date part from ISO 8601 date/time variable + dt <- sub("^([^T]+).*", "\\1", dtc) + + # Set partial or missing dates to NA, depending on partial_as_na parameter + if (partial_as_na) { + dt <- ifelse(nchar(dt) < 10, NA, dt) + } + + return(dt) +} + +#' Extract time part from ISO 8601 date/time variable +#' +#' The time part is extracted from an ISO 8601 date/time variable. +#' By default, partial or missing times are set to NA, and seconds are ignored +#' and not extracted. +#' +#' @param dtc Character vector containing ISO 8601 date/times. +#' @param partial_as_na Logical `TRUE` or `FALSE` indicating whether +#' partial times should be set to NA (default is `TRUE`). +#' @param ignore_seconds Logical `TRUE` or `FALSE` indicating whether +#' seconds should be ignored (default is `TRUE`). +#' +#' @return Character vector containing ISO 8601 times. +#' @export +#' +#' @examples +#' ## Partial or missing times set to NA and seconds ignored by default +#' dtc_timepart( +#' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59") +#' ) +#' # |--> c(NA, NA, NA, NA, "12:30", "12:30") +#' +#' ## Prevent partial or missing times from being set to NA +#' dtc_timepart( +#' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), +#' partial_as_na = FALSE +#' ) +#' # |--> c(NA, "", "", "12", "12:30", "12:30") +#' +#' ## Do not ignore seconds, partial or missing times set to NA +#' dtc_timepart( +#' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), +#' ignore_seconds = FALSE +#' ) +#' # |--> c(NA, NA, NA, NA, NA, "12:30:59") +#' +#' ## Do not ignore seconds and prevent partial or missing times from being set to NA +#' dtc_timepart( +#' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), +#' partial_as_na = FALSE, +#' ignore_seconds = FALSE +#' ) +#' # |--> c(NA, "", "", "12", "12:30", "12:30:59") +#' +dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { + + # Assert that dtc is a character vector + checkmate::assert_character(dtc) + + # Determine length of time part depending on ignore_seconds parameter + tm_length <- ifelse(ignore_seconds, 5, 8) + + # Extract time part from ISO 8601 date/time variable + tm <- substr(sub("^([^T]+)T?", "", dtc), 1, tm_length) + + # Set partial or missing times to NA, depending on partial_as_na parameter + if (partial_as_na) { + tm <- ifelse(nchar(tm) < tm_length, NA, tm) + } + + return(tm) +} + +#' Derive Baseline Flag or Last Observation Before Exposure Flag +#' +#' Derive the baseline flag variable (`--BLFL`) or the last observation before +#' exposure flag (`--LOBXFL`), from the observation date/time (`--DTC`), and a +#' DM domain reference date/time. +#' +#' The methodology and approach implemented in this function are based on +#' concepts and examples found in the Roche version of the {roak} package. +#' +#' The derivation is as follows: +#' +#' - Remove records where the result (`--ORRES`) is missing. Also, exclude records +#' with results labeled as "ND" (No Data) or "NOT DONE" in the `--ORRES` column, +#' which indicate that the measurement or observation was not completed. This +#' step is important even if a previous cleaning step (like the +#' 'oak_clean_not_done' function) might not have been applied to the data yet. +#' - Remove records where the status (`--STAT`) indicates the observation or test +#' was not performed, marked as "NOT DONE". +#' - Divide the date and time column (`--DTC`) and the reference date/time +#' variable (`reference_date_variable`) into separate date and time components. Ignore +#' any seconds recorded in the time component, focusing only on hours and +#' minutes for further calculations. +#' - Set partial or missing dates to `NA`. +#' - Set partial or missing times to `NA`. +#' - Get a list of baseline visits from `Baseline column` +#' (if it exists) in `oak_pkg_env$study_visit_configuration`. +#' - Get a list of baseline timepoints from `Baseline` column +#' (if it exists) in `oak_pkg_env$timepoint_conf`. +#' - Filter on rows that have domain and reference dates not equal to +#' `NA`. (Ref: **X**) +#' - Filter **X** on rows with domain date prior to (less than) +#' reference date. (Ref: **A**) +#' - Filter **X** on rows with domain date equal to reference date but +#' domain and reference times not equal to `NA` and domain time prior to (less +#' than) reference time. (Ref: **B**) +#' - Filter **X** on rows with domain date equal to reference date but +#' domain and/or reference time equal to NA and: +#' - VISIT is in baseline visits list (if it exists) and +#' - xxTPT is in baseline timepoints list (if it exists). +#' +#' (Ref: **C**) +#' - Combine the rows from **A**, **B**, and **C** to get a +#' data frame of pre-reference date observations. Sort the rows by `USUBJID`, +#' `--STAT`, and `--ORRES`. +#' - Group by `USUBJID` and `--TESTCD` and filter on the rows +#' that have maximum value from `--DTC`. Keep only the oak id variables and +#' `--TESTCD` (because these are the unique values). Remove any duplicate rows. +#' Assign the baseline flag variable, `--BLFL`, the last observation before +#' exposure flag (`--LOBXFL`) variable to these rows. +#' - Join the baseline flag onto the input dataset. +#' +#' @param raw_dataset Input data frame. +#' @param target_sdtm_variable Name of variable to be derived (`--BLFL` or +#' `--LOBXFL` where `--` is domain). +#' @param reference_date_variable vector of a date/time from the +#' Demographics (DM) dataset, which serves as a point of comparison for other +#' observations in the study. Common choices for this reference variable +#' include "RFSTDTC" (the date/time of the first study treatment) or +#' "RFXSTDTC" (the date/time of the first exposure to the study drug). +#' +#' @return Modified input data frame with baseline flag variable `--BLFL` or +#' last observation before exposure flag `--LOBXFL` added. +#' +#' @export +#' +#' @examples +#' DM <- read.csv(system.file("inst/derive_blfl/DM.csv", package = "sdtmoak")) +#' DM +#' raw_dataset <- read.csv(system.file("inst/derive_blfl/raw_dataset.csv", package = "sdtmoak")) +#' raw_dataset +#' observed_output <- derive_blfl(raw_dataset = raw_dataset, +#' DM_dataset = DM, +#' target_sdtm_variable = "VSBLFL", +#' reference_date_variable = "RFSTDTC") +#' observed_output +derive_blfl <- function(raw_dataset, + DM_dataset, + target_sdtm_variable, + reference_date_variable, + baseline_visits = character(), + baseline_timepoints = character()) { + # Check assertions -------------------------------------------------------- + assertion_collection = checkmate::makeAssertCollection() + # Assert that raw_dataset is a data frame, + checkmate::assert_data_frame(raw_dataset, + col.names = "strict", + min.rows = 1, + add = assertion_collection) + + # Assert that the input dataset has a "DOMAIN" column + checkmate::assert_names(names(raw_dataset), + must.include = "DOMAIN", + .var.name = "Columns of 'raw_dataset'", + add = assertion_collection) + + # Assert DM_dataset is data.frame + checkmate::assert_data_frame(DM_dataset, + col.names = "strict", + min.rows = 1, + add = assertion_collection) + + # Check if USUBJID and reference_date is present in the DM + checkmate::assert_names(names(DM), + must.include = c("USUBJID", reference_date_variable), + .var.name = "Columns of 'DM_dataset'", + add = assertion_collection) + + checkmate::assert_character(target_sdtm_variable, + min.chars = 1, + len = 1, + add = assertion_collection) + + checkmate::assert_names(target_sdtm_variable, + type = "strict", + add = assertion_collection) + + checkmate::assert_character(reference_date_variable, + min.chars = 1, + len = 1, + add = assertion_collection) + + checkmate::assert_names(reference_date_variable, + type = "strict", + add = assertion_collection) + + checkmate::reportAssertions(assertion_collection) + + # Get domain from input dataset + domain <- unique(raw_dataset$DOMAIN) + checkmate::assert_character(domain, + min.chars = 1, + len = 1, + add = assertion_collection) + + # Assert that target_sdtm_variable is a concatenation of domain and "BLFL" or "LOBXFL" + checkmate::assert_choice( + target_sdtm_variable, + choices = c(paste0(domain, "BLFL"), + paste0(domain, "LOBXFL")), + add = assertion_collection + ) + + # Determine domain prefixed columns + suffixes <- + c("ORRES", "STAT", "TESTCD", "TPT", "DTC", "CAT", "SCAT", "LOC", "LAT", + "DIR", "METHOD", "SPEC") + domain_prefixed_names <- + paste0(domain, suffixes) |> + setNames(tolower(suffixes)) + + # Assert that the input dataset has a "DTC" column + checkmate::assert_names(names(raw_dataset), + must.include = c(domain_prefixed_names[c("orres", + "stat", + "testcd", + "dtc")]), + .var.name = "Columns of 'raw_dataset'", + add = assertion_collection) + checkmate::reportAssertions(assertion_collection) + + # End of assertions, work begins ------------------------------------------ + # Create copy of input dataset for modification and processing + ds_mod <- raw_dataset + + # Filter out rows where --ORRES is missing. Filter out --ORRES in + # ("ND", "NOT DONE") as well. + bad_orres_rows <- is.na(ds_mod[[domain_prefixed_names["orres"]]]) | + trimws(ds_mod[[domain_prefixed_names["orres"]]]) %in% c("ND", "NOT DONE", "") + ds_mod <- ds_mod[!bad_orres_rows, ] + + # Filter out rows where --STAT is not equal to "NOT DONE" + ds_mod <- ds_mod |> + dplyr::filter( + dplyr::if_any( + dplyr::any_of(domain_prefixed_names["stat"]), ~ !.x %in% "NOT DONE")) + + if (nrow(ds_mod) == 0) { + stop(paste0( + "No rows for which both --ORRES is not missing\n and --STAT not equals to NOT DONE.\n", + " Not able to derive Baseline Flag or Last Observation Before Exposure Flag" + )) + } + + # Checking for columns of interest + con_col <- c(domain_prefixed_names[c("testcd", "dtc", "var_tpt")], + "VISITNUM") + + # Drop those columns from the list which are not present in ds_in + con_col <- con_col[con_col %in% names(raw_dataset)] + + # Check for any column which is all NA and removing it from con_col list + h <- which(sapply(ds_mod, function(x) all(is.na(x)))) + if (any(names(h) %in% con_col)) { + h <- names(h[names(h) %in% con_col]) + # remove all NA columns from con_col + con_col <- con_col[!con_col %in% h] + } + + # Keep only USUBJID and reference_date_variable + DM_dataset <- dplyr::select(DM_dataset, + dplyr::all_of(c("USUBJID", + reference_date_variable))) + + # Left join dataset with DM_dataset domain based on USUBJID + ds_mod <- dplyr::left_join(ds_mod, DM_dataset, by = "USUBJID") + + # Split --DTC and reference_date_variable into date and time parts + # (partial or missing dates and times set to NA) + ds_mod$dom_dt <- dtc_datepart(ds_mod[[domain_prefixed_names["dtc"]]]) + ds_mod$dom_tm <- dtc_timepart(ds_mod[[domain_prefixed_names["dtc"]]]) + ds_mod$ref_dt <- dtc_datepart(ds_mod[[reference_date_variable]]) + ds_mod$ref_tm <- dtc_timepart(ds_mod[[reference_date_variable]]) + + + # If VISIT not in data frame then assign it as "" for processing + if (!"VISIT" %in% names(ds_mod)) { + ds_mod[["VISIT"]] <- "" + } + + # Get a vector of baseline visits from Baseline column (if it exists) + # in oak_pkg_env$study_visit_configuration + # TODO: How this should work? Attached file doesn't contain this info + # if ("Baseline" %in% names(oak_pkg_env$study_visit_configuration)) { + # baseline_visits <- oak_pkg_env$study_visit_configuration |> + # dplyr::filter(toupper(Baseline) == "Y") |> + # dplyr::select(TV_Visit) |> + # dplyr::distinct() |> + # unlist() + # } + + # If --TPT not in data frame then assign it as "" for processing + if (!domain_prefixed_names["tpt"] %in% names(ds_mod)) { + ds_mod[[domain_prefixed_names["tpt"]]] <- "" + } + + # Get a vector of baseline timepoints from Baseline column (if it exists) + # TODO: How this part should work? I need example data + # if ("Baseline" %in% names(oak_pkg_env$study_visit_configuration)) { + # baseline_timepoints <- oak_pkg_env$timepoint_conf |> + # dplyr::filter(Domain == domain & toupper(Baseline) == "Y") |> + # dplyr::select(TPT) |> + # dplyr::distinct() |> + # unlist() + # } + + # Filter on rows that have domain and reference dates not equal to NA + ds_subset <- dplyr::filter(ds_mod, !is.na(dom_dt) & !is.na(ref_dt)) + + # Filter on rows with domain date prior to reference date + # (*A) + ds_subset_lt <- dplyr::filter(ds_subset, dom_dt < ref_dt) + + # Filter on rows with domain date equal to reference date but + # - domain and reference times not equal to NA and + # - domain time prior to reference time + # (*B) + ds_subset_eq_1 <- dplyr::filter(ds_subset, dom_dt == ref_dt, + !is.na(dom_tm) & !is.na(ref_tm), + dom_tm < ref_tm) + + # Filter on rows with domain date equal to reference date but + # - domain and/or reference time equal to NA and + # - VISIT is in baseline visits list and + # - xxTPT is in baseline timepoints list + # (*C) + ds_subset_eq_2 <- dplyr::filter(ds_subset, dom_dt == ref_dt, + is.na(dom_tm) | is.na(ref_tm), + (VISIT %in% baseline_visits & get(domain_prefixed_names["tpt"]) %in% baseline_timepoints) | + (VISIT %in% baseline_visits & length(baseline_timepoints) == 0) | + (get(domain_prefixed_names["tpt"]) %in% baseline_timepoints & length(baseline_visits) == 0)) + + # Combine (*A) and (*B) and (*C) + ds_base <- rbind(ds_subset_lt, ds_subset_eq_1, ds_subset_eq_2) + + # Sort the rows in ascending order with respect to columns from con_col + ds_base <- dplyr::arrange_at(ds_base, c("USUBJID", con_col)) + + if (nrow(ds_base) == 0) { + message(paste0("There are no baseline records.")) + } + + # Group by USUBJID and --TESTCD and filter on the rows that have max value + # from --DTC. Keep only the oak id variables, --TESTCD and any key variables + # from the findings domains (because these are the unique values). + # Remove any duplicate rows. + ds_blfl <- ds_base |> + dplyr::group_by(USUBJID, .data[[domain_prefixed_names["testcd"]]]) |> + dplyr::slice_max(!!rlang::sym(domain_prefixed_names["dtc"]), na_rm = TRUE) |> + dplyr::ungroup() |> + # OAK.id.vars already there + # TODO: this is not true + dplyr::select(dplyr::all_of(c(oak_id_vars(), domain_prefixed_names["testcd"])), + dplyr::any_of( + c(domain_prefixed_names[c("cat", + "scat", + "spec", + "loc", + "lat", + "dir", + "method")], + # For MI domain + "MIMRKSTI", + "MIGRPID" + ) + )) |> + dplyr::distinct() + + # Assign the baseline flag variable + ds_blfl[[target_sdtm_variable]] <- "Y" + + # Join baseline flag onto input dataset + ds_out <- dplyr::left_join(raw_dataset, ds_blfl) + + # Assert that merged data frame has same number of rows as input data frame + if (nrow(ds_out) != nrow(raw_dataset)) { + stop(sprintf( + "Internal error: The processed dataset was expected to have the same number of rows (%d) as the input dataset (raw_dataset), but it actually has %d rows.", + nrow(raw_dataset), + nrow(ds_out) + )) + } + + return(ds_out) +} diff --git a/inst/derive_blfl/DM.csv b/inst/derive_blfl/DM.csv new file mode 100644 index 00000000..17a9aa21 --- /dev/null +++ b/inst/derive_blfl/DM.csv @@ -0,0 +1,9 @@ +"STUDYID","DOMAIN","USUBJID","SUBJID","RFSTDTC","RFENDTC","RFXSTDTC","RFXENDTC","RFICDTC","RFPENDTC","DTHDTC","DTHFL","SITEID","INVID","INVNAM","BRTHDTC","AGE","AGEU","SEX","RACE","ETHNIC","ARMCD","ARM","COUNTRY","DMDTC","DMDY","RACE1","RACE2","RACESP1","RACESP2" +"GA41070","DM","GA41070-375","GA41070-375","2020-09-28T10:10","2020-10-22","2020-09-28T10:10","2020-10-22T11:00","2002-09-01","2020-10-22",NA,NA,"111111","90001","Dr doctor1","1965-01-21T05:56",NA,"YEARS","F","MULTIPLE","NOT HISPANIC OR LATINO","50","optional: insert name of study drug50 UNIT","US","2002-09-01",-6602,"AMERICAN INDIAN OR ALASKA NATIVE","NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER","ASIAN INDIAN","SAMOAN" +"GA41070","DM","GA41070-376","GA41070-376","2020-09-21T11:00","2020-10-19","2020-09-21T11:00","2020-10-21T10:00","2020-09-20","2020-10-19",NA,NA,"111111","90001","Dr doctor1","1998-08-10T19:05",NA,"YEARS","M","WHITE","NOT REPORTED","10","optional: insert name of study drug10 MG","US","2020-09-20",-1,NA,NA,NA,NA +"GA41070","DM","GA41070-377","GA41070-377",NA,"2020-10-22",NA,NA,"2020-08-25","2020-10-22",NA,NA,"111111","90001","Dr doctor1","1996-05-02T08:12",NA,"YEARS","F","MULTIPLE","UNKNOWN","10","optional: insert name of study drug10 MG","US","2020-08-31",NA,"BLACK OR AFRICAN AMERICAN","AMERICAN INDIAN OR ALASKA NATIVE",NA,NA +"GA41070","DM","GA41070-378","GA41070-378","2020-01-20T10:00","2020-02-02","2020-01-20T10:00","2020-01-20T11:00","2020-01-20","2020-02-02","2020-02-01","Y","111111","90001","Dr doctor1","2020-01-20T10:00","33","YEARS","M","AMERICAN INDIAN OR ALASKA NATIVE","HISPANIC OR LATINO","2","optional: insert name of study drug2 MG","US","2020-01-20",1,NA,NA,NA,NA +"GA41070","DM","GA41070-379","GA41070-379",NA,"2020-10-20",NA,NA,"2020-01-20","2020-10-20",NA,NA,"111111","90001","Dr doctor1","1980-01-20T10:00","30","YEARS","F","AMERICAN INDIAN OR ALASKA NATIVE","HISPANIC OR LATINO","2","optional: insert name of study drug2 MG","US","2020-01-20",NA,NA,NA,NA,NA +"GA41070","DM","GA41070-380","GA41070-380",NA,NA,NA,NA,NA,NA,NA,NA,"111111","90001","Dr doctor1","1980-01-20T10:00","40","YEARS","M","ASIAN","NOT HISPANIC OR LATINO",NA,NA,"US","2020-01-20",NA,NA,NA,"OTHER ASIAN","OTHER PACIFIC ISLANDER" +"GA41070","DM","GA41070-381","GA41070-381",NA,NA,NA,NA,NA,NA,NA,NA,"111111","90001","Dr doctor1",NA,NA,NA,NA,NA,NA,NA,NA,"US",NA,NA,NA,NA,NA,NA +"GA41070","DM","GA41070-382","GA41070-382",NA,NA,NA,NA,NA,NA,NA,NA,"111111","90001","Dr doctor1","1990-01-20T11:00","30","YEARS","F","UNKNOWN","NOT HISPANIC OR LATINO",NA,NA,"US","2020-03-20",NA,NA,NA,NA,NA diff --git a/inst/derive_blfl/raw_dataset.csv b/inst/derive_blfl/raw_dataset.csv new file mode 100644 index 00000000..d7c07465 --- /dev/null +++ b/inst/derive_blfl/raw_dataset.csv @@ -0,0 +1,7 @@ +"DOMAIN","OAK_ID","RAVE_SOURCE","PATIENT_NUM","USUBJID","VSDTC","VSTESTCD","VSORRES","VSSTAT" +"VS",1,"VTLS1","375","GA41070-375","2020-09-01T13:31","DIABP","ND",NA +"VS",1,"VTLS1","375","GA41070-375","2020-09-01T13:31","PULSE","ND",NA +"VS",1,"VTLS2","375","GA41070-375","2020-09-28T11:00","DIABP","ND",NA +"VS",1,"VTLS2","375","GA41070-375","2020-09-28T11:00","PULSE","ND",NA +"VS",2,"VTLS1","376","GA41070-376","2020-09-20","DIABP","75",NA +"VS",2,"VTLS1","376","GA41070-376","2020-09-20","PULSE","ND",NA diff --git a/inst/derive_blfl/study_visit_configuration.csv b/inst/derive_blfl/study_visit_configuration.csv new file mode 100644 index 00000000..5c992ed0 --- /dev/null +++ b/inst/derive_blfl/study_visit_configuration.csv @@ -0,0 +1,138 @@ +TV_Visit,Study_Visit_name,Source_dataset_var_names +CYCLE 1 DAY 1,Cycle 1 Day 1,FOLDERL +CYCLE 1 DAY 1,C1D1,VISIT +CYCLE 1 DAY 2,Cycle 1 Day 2,FOLDERL +CYCLE 1 DAY 3,Cycle 1 Day 3,FOLDERL +CYCLE 10 DAY 1,Cycle 10 Day 1,FOLDERL +CYCLE 11 DAY 1,Cycle 11 Day 1,FOLDERL +CYCLE 12 DAY 1,Cycle 12 Day 1,FOLDERL +CYCLE 12 DAY 1,C12D1,VISIT +CYCLE 13 DAY 1,Cycle 13 Day 1,FOLDERL +CYCLE 14 DAY 1,Cycle 14 Day 1,FOLDERL +CYCLE 15 DAY 1,Cycle 15 Day 1,FOLDERL +CYCLE 16 DAY 1,Cycle 16 Day 1,FOLDERL +CYCLE 16 DAY 1,C16D1,VISIT +CYCLE 17 DAY 1,Cycle 17 Day 1,FOLDERL +CYCLE 18 DAY 1,Cycle 18 Day 1,FOLDERL +CYCLE 19 DAY 1,Cycle 19 Day 1,FOLDERL +CYCLE 2 DAY 1,Cycle 2 Day 1,FOLDERL +CYCLE 2 DAY 1,C2D1,VISIT +CYCLE 2 DAY 2,Cycle 2 Day 2,FOLDERL +CYCLE 2 DAY 3,Cycle 2 Day 3,FOLDERL +CYCLE 20 DAY 1,Cycle 20 Day 1,FOLDERL +CYCLE 21 DAY 1,Cycle 21 Day 1,FOLDERL +CYCLE 22 DAY 1,Cycle 22 Day 1,FOLDERL +CYCLE 23 DAY 1,Cycle 23 Day 1,FOLDERL +CYCLE 24 DAY 1,Cycle 24 Day 1,FOLDERL +CYCLE 24 DAY 1,C24D1,VISIT +CYCLE 25 DAY 1,Cycle 25 Day 1,FOLDERL +CYCLE 26 DAY 1,Cycle 26 Day 1,FOLDERL +CYCLE 27 DAY 1,Cycle 27 Day 1,FOLDERL +CYCLE 28 DAY 1,Cycle 28 Day 1,FOLDERL +CYCLE 29 DAY 1,Cycle 29 Day 1,FOLDERL +CYCLE 3 DAY 1,Cycle 3 Day 1,FOLDERL +CYCLE 3 DAY 1,C3D1,VISIT +CYCLE 3 DAY 2,Cycle 3 Day 2,FOLDERL +CYCLE 3 DAY 3,Cycle 3 Day 3,FOLDERL +CYCLE 30 DAY 1,Cycle 30 Day 1,FOLDERL +CYCLE 31 DAY 1,Cycle 31 Day 1,FOLDERL +CYCLE 32 DAY 1,Cycle 32 Day 1,FOLDERL +CYCLE 32 DAY 1,C32D1,VISIT +CYCLE 33 DAY 1,Cycle 33 Day 1,FOLDERL +CYCLE 34 DAY 1,Cycle 34 Day 1,FOLDERL +CYCLE 35 DAY 1,Cycle 35 Day 1,FOLDERL +CYCLE 36 DAY 1,Cycle 36 Day 1,FOLDERL +CYCLE 37 DAY 1,Cycle 37 Day 1,FOLDERL +CYCLE 38 DAY 1,Cycle 38 Day 1,FOLDERL +CYCLE 39 DAY 1,Cycle 39 Day 1,FOLDERL +CYCLE 4 DAY 1,Cycle 4 Day 1,FOLDERL +CYCLE 4 DAY 1,C4D1,VISIT +CYCLE 4 DAY 2,Cycle 4 Day 2,FOLDERL +CYCLE 4 DAY 3,Cycle 4 Day 3,FOLDERL +CYCLE 40 DAY 1,Cycle 40 Day 1,FOLDERL +CYCLE 40 DAY 1,C40D1,VISIT +CYCLE 41 DAY 1,Cycle 41 Day 1,FOLDERL +CYCLE 42 DAY 1,Cycle 42 Day 1,FOLDERL +CYCLE 43 DAY 1,Cycle 43 Day 1,FOLDERL +CYCLE 44 DAY 1,Cycle 44 Day 1,FOLDERL +CYCLE 45 DAY 1,Cycle 45 Day 1,FOLDERL +CYCLE 46 DAY 1,Cycle 46 Day 1,FOLDERL +CYCLE 47 DAY 1,Cycle 47 Day 1,FOLDERL +CYCLE 48 DAY 1,Cycle 48 Day 1,FOLDERL +CYCLE 48 DAY 1,C48D1,VISIT +CYCLE 49 DAY 1,Cycle 49 Day 1,FOLDERL +CYCLE 5 DAY 1,Cycle 5 Day 1,FOLDERL +CYCLE 50 DAY 1,Cycle 50 Day 1,FOLDERL +CYCLE 51 DAY 1,Cycle 51 Day 1,FOLDERL +CYCLE 52 DAY 1,Cycle 52 Day 1,FOLDERL +CYCLE 53 DAY 1,Cycle 53 Day 1,FOLDERL +CYCLE 54 DAY 1,Cycle 54 Day 1,FOLDERL +CYCLE 55 DAY 1,Cycle 55 Day 1,FOLDERL +CYCLE 56 DAY 1,Cycle 56 Day 1,FOLDERL +CYCLE 56 DAY 1,C56D1,VISIT +CYCLE 57 DAY 1,Cycle 57 Day 1,FOLDERL +CYCLE 58 DAY 1,Cycle 58 Day 1,FOLDERL +CYCLE 59 DAY 1,Cycle 59 Day 1,FOLDERL +CYCLE 6 DAY 1,Cycle 6 Day 1,FOLDERL +CYCLE 60 DAY 1,Cycle 60 Day 1,FOLDERL +CYCLE 61 DAY 1,Cycle 61 Day 1,FOLDERL +CYCLE 62 DAY 1,Cycle 62 Day 1,FOLDERL +CYCLE 63 DAY 1,Cycle 63 Day 1,FOLDERL +CYCLE 64 DAY 1,Cycle 64 Day 1,FOLDERL +CYCLE 64 DAY 1,C64D1,VISIT +CYCLE 65 DAY 1,Cycle 65 Day 1,FOLDERL +CYCLE 66 DAY 1,Cycle 66 Day 1,FOLDERL +CYCLE 67 DAY 1,Cycle 67 Day 1,FOLDERL +CYCLE 68 DAY 1,Cycle 68 Day 1,FOLDERL +CYCLE 69 DAY 1,Cycle 69 Day 1,FOLDERL +CYCLE 7 DAY 1,Cycle 7 Day 1,FOLDERL +CYCLE 8 DAY 1,Cycle 8 Day 1,FOLDERL +CYCLE 9 DAY 1,Cycle 9 Day 1,FOLDERL +POST PROGRESSIVE DISEASE/TREATMENT DISCONTINUATION PRO 3 MONTHS,Post Progressive Disease/Treatment Discontinuation PRO 3 Months,FOLDERL +POST PROGRESSIVE DISEASE/TREATMENT DISCONTINUATION PRO 6 MONTHS,Post Progressive Disease/Treatment Discontinuation PRO 6 Months,FOLDERL +SCREENING,Screening,FOLDERL +SUBJECT DISPOSITION-PERIOD COMPLETION/EARLY DISCONTINUATION,Subject Disposition - Period Comp. / Early Disc.,FOLDERL +SURVIVAL FOLLOW UP 12 MONTHS,Survival Follow-Up Month 12,INSTANCE +SURVIVAL FOLLOW UP 15 MONTHS,Survival Follow-Up Month 15,INSTANCE +SURVIVAL FOLLOW UP 18 MONTHS,Survival Follow-Up Month 18,INSTANCE +SURVIVAL FOLLOW UP 21 MONTHS,Survival Follow-Up Month 21,INSTANCE +SURVIVAL FOLLOW UP 24 MONTHS,Survival Follow-Up Month 24,INSTANCE +SURVIVAL FOLLOW UP 27 MONTHS,Survival Follow-Up Month 27,INSTANCE +SURVIVAL FOLLOW UP 3 MONTHS,Survival Follow-Up Month 3,INSTANCE +SURVIVAL FOLLOW UP 30 MONTHS,Survival Follow-Up Month 30,INSTANCE +SURVIVAL FOLLOW UP 33 MONTHS,Survival Follow-Up Month 33,INSTANCE +SURVIVAL FOLLOW UP 36 MONTHS,Survival Follow-Up Month 36,INSTANCE +SURVIVAL FOLLOW UP 6 MONTHS,Survival Follow-Up Month 6,INSTANCE +SURVIVAL FOLLOW UP 9 MONTHS,Survival Follow-Up Month 9,INSTANCE +SYMPTOMATIC DETERIORATION ASSESSMENT,Symptomatic Deterioration Assessment,FOLDERL +TREATMENT DISCONTINUATION,Study Drug Completion / Early Discontinuation,FOLDERL +TREATMENT DISCONTINUATION,DISCTX,VISIT +TUMOR ASSESSMENT WEEK 102,Tumor Assessment Week 102,INSTANCE +TUMOR ASSESSMENT WEEK 111,Tumor Assessment Week 111,INSTANCE +TUMOR ASSESSMENT WEEK 12,Tumor Assessment Week 12,INSTANCE +TUMOR ASSESSMENT WEEK 12,Tumor Assessment Week 12,INSTANCE +TUMOR ASSESSMENT WEEK 120,Tumor Assessment Week 120,INSTANCE +TUMOR ASSESSMENT WEEK 129,Tumor Assessment Week 129,INSTANCE +TUMOR ASSESSMENT WEEK 138,Tumor Assessment Week 138,INSTANCE +TUMOR ASSESSMENT WEEK 147,Tumor Assessment Week 147,INSTANCE +TUMOR ASSESSMENT WEEK 156,Tumor Assessment Week 156,INSTANCE +TUMOR ASSESSMENT WEEK 165,Tumor Assessment Week 165,INSTANCE +TUMOR ASSESSMENT WEEK 174,Tumor Assessment Week 174,INSTANCE +TUMOR ASSESSMENT WEEK 18,Tumor Assessment Week 18,INSTANCE +TUMOR ASSESSMENT WEEK 183,Tumor Assessment Week 183,INSTANCE +TUMOR ASSESSMENT WEEK 192,Tumor Assessment Week 192,INSTANCE +TUMOR ASSESSMENT WEEK 201,Tumor Assessment Week 201,INSTANCE +TUMOR ASSESSMENT WEEK 24,Tumor Assessment Week 24,INSTANCE +TUMOR ASSESSMENT WEEK 30,Tumor Assessment Week 30,INSTANCE +TUMOR ASSESSMENT WEEK 36,Tumor Assessment Week 36,INSTANCE +TUMOR ASSESSMENT WEEK 42,Tumor Assessment Week 42,INSTANCE +TUMOR ASSESSMENT WEEK 48,Tumor Assessment Week 48,INSTANCE +TUMOR ASSESSMENT WEEK 57,Tumor Assessment Week 57,INSTANCE +TUMOR ASSESSMENT WEEK 6,Tumor Assessment Week 6,INSTANCE +TUMOR ASSESSMENT WEEK 6,Tumor Assessment Week 6,INSTANCE +TUMOR ASSESSMENT WEEK 66,Tumor Assessment Week 66,INSTANCE +TUMOR ASSESSMENT WEEK 75,Tumor Assessment Week 75,INSTANCE +TUMOR ASSESSMENT WEEK 84,Tumor Assessment Week 84,INSTANCE +TUMOR ASSESSMENT WEEK 93,Tumor Assessment Week 93,INSTANCE +UNSCHEDULED,Unscheduled,FOLDERL +UNSCHEDULED,UNSCH,VISIT From dc2c732608f4c7715498e8cf19e1c154d7c490eb Mon Sep 17 00:00:00 2001 From: Kamil Sijko Date: Wed, 8 May 2024 17:26:11 +0200 Subject: [PATCH 02/20] WIP: working code, pending tests for baseline logic due to lack of examples --- NAMESPACE | 1 + R/derive_blfl.R | 67 ++++++++---------- inst/derive_blfl/raw_dataset.csv | 2 +- man/derive_blfl.Rd | 112 +++++++++++++++++++++++++++++++ man/dtc_datepart.Rd | 36 ++++++++++ man/dtc_timepart.Rd | 55 +++++++++++++++ 6 files changed, 233 insertions(+), 40 deletions(-) create mode 100644 man/derive_blfl.Rd create mode 100644 man/dtc_datepart.Rd create mode 100644 man/dtc_timepart.Rd diff --git a/NAMESPACE b/NAMESPACE index 6170cee4..28a465fd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ export(create_iso8601) export(ct_map) export(ct_spec_example) export(ct_spec_vars) +export(derive_blfl) export(derive_study_day) export(fmt_cmp) export(hardcode_ct) diff --git a/R/derive_blfl.R b/R/derive_blfl.R index 1d14693a..c185e5fd 100644 --- a/R/derive_blfl.R +++ b/R/derive_blfl.R @@ -8,7 +8,6 @@ #' partial dates should be set to NA (default is `TRUE`). #' #' @return Character vector containing ISO8601 dates. -#' @export #' #' @examples #' ## Partial or missing dates set to NA by default @@ -53,7 +52,6 @@ dtc_datepart <- function(dtc, partial_as_na = TRUE) { #' seconds should be ignored (default is `TRUE`). #' #' @return Character vector containing ISO 8601 times. -#' @export #' #' @examples #' ## Partial or missing times set to NA and seconds ignored by default @@ -162,6 +160,18 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' observations in the study. Common choices for this reference variable #' include "RFSTDTC" (the date/time of the first study treatment) or #' "RFXSTDTC" (the date/time of the first exposure to the study drug). +#' @param baseline_visits A character vector specifying the baseline visits within the study. +#' These visits are identified as critical points for data collection at the start of the study, +#' before any intervention is applied. This parameter allows the function to filter and analyze +#' data specifically from these initial assessment points. For example, baseline visits might +#' include "Cycle 1 Day 1" if this is the first visit where subjects are assessed prior to receiving treatment. +#' @param baseline_timepoints A character vector of dates in "YYYY-MM-DD" format that specifies +#' the specific days during the baseline visits when key assessments or measurements were taken. +#' These timepoints are used to refine the selection of data points to include only those +#' collected on these specific dates, ensuring that only relevant baseline data is analyzed. +#' This is particularly important in studies where the timing of measurements can significantly +#' impact the interpretation of results. An example might be "2020-09-20", indicating a specific +#' day when baseline data was collected. #' #' @return Modified input data frame with baseline flag variable `--BLFL` or #' last observation before exposure flag `--LOBXFL` added. @@ -169,9 +179,9 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' @export #' #' @examples -#' DM <- read.csv(system.file("inst/derive_blfl/DM.csv", package = "sdtmoak")) +#' DM <- read.csv(system.file("derive_blfl/DM.csv", package = "sdtm.oak")) #' DM -#' raw_dataset <- read.csv(system.file("inst/derive_blfl/raw_dataset.csv", package = "sdtmoak")) +#' raw_dataset <- read.csv(system.file("derive_blfl/raw_dataset.csv", package = "sdtm.oak")) #' raw_dataset #' observed_output <- derive_blfl(raw_dataset = raw_dataset, #' DM_dataset = DM, @@ -194,7 +204,7 @@ derive_blfl <- function(raw_dataset, # Assert that the input dataset has a "DOMAIN" column checkmate::assert_names(names(raw_dataset), - must.include = "DOMAIN", + must.include = c("DOMAIN", sdtm.oak:::oak_id_vars()), .var.name = "Columns of 'raw_dataset'", add = assertion_collection) @@ -311,10 +321,10 @@ derive_blfl <- function(raw_dataset, # Split --DTC and reference_date_variable into date and time parts # (partial or missing dates and times set to NA) - ds_mod$dom_dt <- dtc_datepart(ds_mod[[domain_prefixed_names["dtc"]]]) - ds_mod$dom_tm <- dtc_timepart(ds_mod[[domain_prefixed_names["dtc"]]]) - ds_mod$ref_dt <- dtc_datepart(ds_mod[[reference_date_variable]]) - ds_mod$ref_tm <- dtc_timepart(ds_mod[[reference_date_variable]]) + ds_mod$dom_dt <- sdtm.oak:::dtc_datepart(ds_mod[[domain_prefixed_names["dtc"]]]) + ds_mod$dom_tm <- sdtm.oak:::dtc_timepart(ds_mod[[domain_prefixed_names["dtc"]]]) + ds_mod$ref_dt <- sdtm.oak:::dtc_datepart(ds_mod[[reference_date_variable]]) + ds_mod$ref_tm <- sdtm.oak:::dtc_timepart(ds_mod[[reference_date_variable]]) # If VISIT not in data frame then assign it as "" for processing @@ -322,32 +332,11 @@ derive_blfl <- function(raw_dataset, ds_mod[["VISIT"]] <- "" } - # Get a vector of baseline visits from Baseline column (if it exists) - # in oak_pkg_env$study_visit_configuration - # TODO: How this should work? Attached file doesn't contain this info - # if ("Baseline" %in% names(oak_pkg_env$study_visit_configuration)) { - # baseline_visits <- oak_pkg_env$study_visit_configuration |> - # dplyr::filter(toupper(Baseline) == "Y") |> - # dplyr::select(TV_Visit) |> - # dplyr::distinct() |> - # unlist() - # } - # If --TPT not in data frame then assign it as "" for processing if (!domain_prefixed_names["tpt"] %in% names(ds_mod)) { ds_mod[[domain_prefixed_names["tpt"]]] <- "" } - # Get a vector of baseline timepoints from Baseline column (if it exists) - # TODO: How this part should work? I need example data - # if ("Baseline" %in% names(oak_pkg_env$study_visit_configuration)) { - # baseline_timepoints <- oak_pkg_env$timepoint_conf |> - # dplyr::filter(Domain == domain & toupper(Baseline) == "Y") |> - # dplyr::select(TPT) |> - # dplyr::distinct() |> - # unlist() - # } - # Filter on rows that have domain and reference dates not equal to NA ds_subset <- dplyr::filter(ds_mod, !is.na(dom_dt) & !is.na(ref_dt)) @@ -368,11 +357,13 @@ derive_blfl <- function(raw_dataset, # - VISIT is in baseline visits list and # - xxTPT is in baseline timepoints list # (*C) - ds_subset_eq_2 <- dplyr::filter(ds_subset, dom_dt == ref_dt, - is.na(dom_tm) | is.na(ref_tm), - (VISIT %in% baseline_visits & get(domain_prefixed_names["tpt"]) %in% baseline_timepoints) | - (VISIT %in% baseline_visits & length(baseline_timepoints) == 0) | - (get(domain_prefixed_names["tpt"]) %in% baseline_timepoints & length(baseline_visits) == 0)) + ds_subset_eq_2 <- + ds_subset |> + dplyr::filter(dom_dt == ref_dt, + is.na(dom_tm) | is.na(ref_tm), + (VISIT %in% baseline_visits & get(domain_prefixed_names["tpt"]) %in% baseline_timepoints) | + (VISIT %in% baseline_visits & length(baseline_timepoints) == 0) | + (get(domain_prefixed_names["tpt"]) %in% baseline_timepoints & length(baseline_visits) == 0)) # Combine (*A) and (*B) and (*C) ds_base <- rbind(ds_subset_lt, ds_subset_eq_1, ds_subset_eq_2) @@ -392,9 +383,7 @@ derive_blfl <- function(raw_dataset, dplyr::group_by(USUBJID, .data[[domain_prefixed_names["testcd"]]]) |> dplyr::slice_max(!!rlang::sym(domain_prefixed_names["dtc"]), na_rm = TRUE) |> dplyr::ungroup() |> - # OAK.id.vars already there - # TODO: this is not true - dplyr::select(dplyr::all_of(c(oak_id_vars(), domain_prefixed_names["testcd"])), + dplyr::select(dplyr::all_of(c(sdtm.oak:::oak_id_vars(), domain_prefixed_names["testcd"])), dplyr::any_of( c(domain_prefixed_names[c("cat", "scat", @@ -414,7 +403,7 @@ derive_blfl <- function(raw_dataset, ds_blfl[[target_sdtm_variable]] <- "Y" # Join baseline flag onto input dataset - ds_out <- dplyr::left_join(raw_dataset, ds_blfl) + ds_out <- dplyr::left_join(raw_dataset, ds_blfl, by = sdtm.oak:::oak_id_vars()) # Assert that merged data frame has same number of rows as input data frame if (nrow(ds_out) != nrow(raw_dataset)) { diff --git a/inst/derive_blfl/raw_dataset.csv b/inst/derive_blfl/raw_dataset.csv index d7c07465..c03e81e7 100644 --- a/inst/derive_blfl/raw_dataset.csv +++ b/inst/derive_blfl/raw_dataset.csv @@ -1,4 +1,4 @@ -"DOMAIN","OAK_ID","RAVE_SOURCE","PATIENT_NUM","USUBJID","VSDTC","VSTESTCD","VSORRES","VSSTAT" +"DOMAIN","oak_id","raw_source","patient_number","USUBJID","VSDTC","VSTESTCD","VSORRES","VSSTAT" "VS",1,"VTLS1","375","GA41070-375","2020-09-01T13:31","DIABP","ND",NA "VS",1,"VTLS1","375","GA41070-375","2020-09-01T13:31","PULSE","ND",NA "VS",1,"VTLS2","375","GA41070-375","2020-09-28T11:00","DIABP","ND",NA diff --git a/man/derive_blfl.Rd b/man/derive_blfl.Rd new file mode 100644 index 00000000..5e5fa3cf --- /dev/null +++ b/man/derive_blfl.Rd @@ -0,0 +1,112 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/derive_blfl.R +\name{derive_blfl} +\alias{derive_blfl} +\title{Derive Baseline Flag or Last Observation Before Exposure Flag} +\usage{ +derive_blfl( + raw_dataset, + DM_dataset, + target_sdtm_variable, + reference_date_variable, + baseline_visits = character(), + baseline_timepoints = character() +) +} +\arguments{ +\item{raw_dataset}{Input data frame.} + +\item{target_sdtm_variable}{Name of variable to be derived (\code{--BLFL} or +\code{--LOBXFL} where \verb{--} is domain).} + +\item{reference_date_variable}{vector of a date/time from the +Demographics (DM) dataset, which serves as a point of comparison for other +observations in the study. Common choices for this reference variable +include "RFSTDTC" (the date/time of the first study treatment) or +"RFXSTDTC" (the date/time of the first exposure to the study drug).} + +\item{baseline_visits}{A character vector specifying the baseline visits within the study. +These visits are identified as critical points for data collection at the start of the study, +before any intervention is applied. This parameter allows the function to filter and analyze +data specifically from these initial assessment points. For example, baseline visits might +include "Cycle 1 Day 1" if this is the first visit where subjects are assessed prior to receiving treatment.} + +\item{baseline_timepoints}{A character vector of dates in "YYYY-MM-DD" format that specifies +the specific days during the baseline visits when key assessments or measurements were taken. +These timepoints are used to refine the selection of data points to include only those +collected on these specific dates, ensuring that only relevant baseline data is analyzed. +This is particularly important in studies where the timing of measurements can significantly +impact the interpretation of results. An example might be "2020-09-20", indicating a specific +day when baseline data was collected.} +} +\value{ +Modified input data frame with baseline flag variable \code{--BLFL} or +last observation before exposure flag \code{--LOBXFL} added. +} +\description{ +Derive the baseline flag variable (\code{--BLFL}) or the last observation before +exposure flag (\code{--LOBXFL}), from the observation date/time (\code{--DTC}), and a +DM domain reference date/time. +} +\details{ +The methodology and approach implemented in this function are based on +concepts and examples found in the Roche version of the {roak} package. + +The derivation is as follows: +\itemize{ +\item Remove records where the result (\code{--ORRES}) is missing. Also, exclude records +with results labeled as "ND" (No Data) or "NOT DONE" in the \code{--ORRES} column, +which indicate that the measurement or observation was not completed. This +step is important even if a previous cleaning step (like the +'oak_clean_not_done' function) might not have been applied to the data yet. +\item Remove records where the status (\code{--STAT}) indicates the observation or test +was not performed, marked as "NOT DONE". +\item Divide the date and time column (\code{--DTC}) and the reference date/time +variable (\code{reference_date_variable}) into separate date and time components. Ignore +any seconds recorded in the time component, focusing only on hours and +minutes for further calculations. +\item Set partial or missing dates to \code{NA}. +\item Set partial or missing times to \code{NA}. +\item Get a list of baseline visits from \verb{Baseline column} +(if it exists) in \code{oak_pkg_env$study_visit_configuration}. +\item Get a list of baseline timepoints from \code{Baseline} column +(if it exists) in \code{oak_pkg_env$timepoint_conf}. +\item Filter on rows that have domain and reference dates not equal to +\code{NA}. (Ref: \strong{X}) +\item Filter \strong{X} on rows with domain date prior to (less than) +reference date. (Ref: \strong{A}) +\item Filter \strong{X} on rows with domain date equal to reference date but +domain and reference times not equal to \code{NA} and domain time prior to (less +than) reference time. (Ref: \strong{B}) +\item Filter \strong{X} on rows with domain date equal to reference date but +domain and/or reference time equal to NA and: +\itemize{ +\item VISIT is in baseline visits list (if it exists) and +\item xxTPT is in baseline timepoints list (if it exists). +} +} + +(Ref: \strong{C}) +\itemize{ +\item Combine the rows from \strong{A}, \strong{B}, and \strong{C} to get a +data frame of pre-reference date observations. Sort the rows by \code{USUBJID}, +\code{--STAT}, and \code{--ORRES}. +\item Group by \code{USUBJID} and \code{--TESTCD} and filter on the rows +that have maximum value from \code{--DTC}. Keep only the oak id variables and +\code{--TESTCD} (because these are the unique values). Remove any duplicate rows. +Assign the baseline flag variable, \code{--BLFL}, the last observation before +exposure flag (\code{--LOBXFL}) variable to these rows. +\item Join the baseline flag onto the input dataset. +} +} +\examples{ +DM <- read.csv(system.file("derive_blfl/DM.csv", package = "sdtm.oak")) +DM +raw_dataset <- read.csv(system.file("derive_blfl/raw_dataset.csv", package = "sdtm.oak")) +raw_dataset +observed_output <- derive_blfl(raw_dataset = raw_dataset, + DM_dataset = DM, + target_sdtm_variable = "VSBLFL", + reference_date_variable = "RFSTDTC") +observed_output +} diff --git a/man/dtc_datepart.Rd b/man/dtc_datepart.Rd new file mode 100644 index 00000000..543d6ff1 --- /dev/null +++ b/man/dtc_datepart.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/derive_blfl.R +\name{dtc_datepart} +\alias{dtc_datepart} +\title{Extract date part from ISO8601 date/time variable} +\usage{ +dtc_datepart(dtc, partial_as_na = TRUE) +} +\arguments{ +\item{dtc}{Character vector containing ISO8601 date/times.} + +\item{partial_as_na}{Logical \code{TRUE} or \code{FALSE} indicating whether +partial dates should be set to NA (default is \code{TRUE}).} +} +\value{ +Character vector containing ISO8601 dates. +} +\description{ +The date part is extracted from an ISO8601 date/time variable. +By default, partial or missing dates are set to NA. +} +\examples{ +## Partial or missing dates set to NA by default +dtc_datepart( + c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00") +) + # |--> c(NA, NA, NA, NA, "2021-12-25", "2021-12-25") + +## Prevent partial or missing dates from being set to NA +dtc_datepart( + c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00"), + partial_as_na = FALSE +) + # |--> c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25") + +} diff --git a/man/dtc_timepart.Rd b/man/dtc_timepart.Rd new file mode 100644 index 00000000..cb821ed2 --- /dev/null +++ b/man/dtc_timepart.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/derive_blfl.R +\name{dtc_timepart} +\alias{dtc_timepart} +\title{Extract time part from ISO 8601 date/time variable} +\usage{ +dtc_timepart(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) +} +\arguments{ +\item{dtc}{Character vector containing ISO 8601 date/times.} + +\item{partial_as_na}{Logical \code{TRUE} or \code{FALSE} indicating whether +partial times should be set to NA (default is \code{TRUE}).} + +\item{ignore_seconds}{Logical \code{TRUE} or \code{FALSE} indicating whether +seconds should be ignored (default is \code{TRUE}).} +} +\value{ +Character vector containing ISO 8601 times. +} +\description{ +The time part is extracted from an ISO 8601 date/time variable. +By default, partial or missing times are set to NA, and seconds are ignored +and not extracted. +} +\examples{ +## Partial or missing times set to NA and seconds ignored by default +dtc_timepart( + c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59") +) + # |--> c(NA, NA, NA, NA, "12:30", "12:30") + +## Prevent partial or missing times from being set to NA +dtc_timepart( + c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), + partial_as_na = FALSE +) + # |--> c(NA, "", "", "12", "12:30", "12:30") + +## Do not ignore seconds, partial or missing times set to NA +dtc_timepart( + c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), + ignore_seconds = FALSE +) + # |--> c(NA, NA, NA, NA, NA, "12:30:59") + +## Do not ignore seconds and prevent partial or missing times from being set to NA +dtc_timepart( + c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), + partial_as_na = FALSE, + ignore_seconds = FALSE +) + # |--> c(NA, "", "", "12", "12:30", "12:30:59") + +} From 491252ef8f114314a6bf3476d244ade66ec9c392 Mon Sep 17 00:00:00 2001 From: Rammprasad Ganapathy Date: Tue, 14 May 2024 22:11:06 +0000 Subject: [PATCH 03/20] remove files as added in examples --- inst/derive_blfl/DM.csv | 9 -- inst/derive_blfl/raw_dataset.csv | 7 - .../derive_blfl/study_visit_configuration.csv | 138 ------------------ 3 files changed, 154 deletions(-) delete mode 100644 inst/derive_blfl/DM.csv delete mode 100644 inst/derive_blfl/raw_dataset.csv delete mode 100644 inst/derive_blfl/study_visit_configuration.csv diff --git a/inst/derive_blfl/DM.csv b/inst/derive_blfl/DM.csv deleted file mode 100644 index 17a9aa21..00000000 --- a/inst/derive_blfl/DM.csv +++ /dev/null @@ -1,9 +0,0 @@ -"STUDYID","DOMAIN","USUBJID","SUBJID","RFSTDTC","RFENDTC","RFXSTDTC","RFXENDTC","RFICDTC","RFPENDTC","DTHDTC","DTHFL","SITEID","INVID","INVNAM","BRTHDTC","AGE","AGEU","SEX","RACE","ETHNIC","ARMCD","ARM","COUNTRY","DMDTC","DMDY","RACE1","RACE2","RACESP1","RACESP2" -"GA41070","DM","GA41070-375","GA41070-375","2020-09-28T10:10","2020-10-22","2020-09-28T10:10","2020-10-22T11:00","2002-09-01","2020-10-22",NA,NA,"111111","90001","Dr doctor1","1965-01-21T05:56",NA,"YEARS","F","MULTIPLE","NOT HISPANIC OR LATINO","50","optional: insert name of study drug50 UNIT","US","2002-09-01",-6602,"AMERICAN INDIAN OR ALASKA NATIVE","NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER","ASIAN INDIAN","SAMOAN" -"GA41070","DM","GA41070-376","GA41070-376","2020-09-21T11:00","2020-10-19","2020-09-21T11:00","2020-10-21T10:00","2020-09-20","2020-10-19",NA,NA,"111111","90001","Dr doctor1","1998-08-10T19:05",NA,"YEARS","M","WHITE","NOT REPORTED","10","optional: insert name of study drug10 MG","US","2020-09-20",-1,NA,NA,NA,NA -"GA41070","DM","GA41070-377","GA41070-377",NA,"2020-10-22",NA,NA,"2020-08-25","2020-10-22",NA,NA,"111111","90001","Dr doctor1","1996-05-02T08:12",NA,"YEARS","F","MULTIPLE","UNKNOWN","10","optional: insert name of study drug10 MG","US","2020-08-31",NA,"BLACK OR AFRICAN AMERICAN","AMERICAN INDIAN OR ALASKA NATIVE",NA,NA -"GA41070","DM","GA41070-378","GA41070-378","2020-01-20T10:00","2020-02-02","2020-01-20T10:00","2020-01-20T11:00","2020-01-20","2020-02-02","2020-02-01","Y","111111","90001","Dr doctor1","2020-01-20T10:00","33","YEARS","M","AMERICAN INDIAN OR ALASKA NATIVE","HISPANIC OR LATINO","2","optional: insert name of study drug2 MG","US","2020-01-20",1,NA,NA,NA,NA -"GA41070","DM","GA41070-379","GA41070-379",NA,"2020-10-20",NA,NA,"2020-01-20","2020-10-20",NA,NA,"111111","90001","Dr doctor1","1980-01-20T10:00","30","YEARS","F","AMERICAN INDIAN OR ALASKA NATIVE","HISPANIC OR LATINO","2","optional: insert name of study drug2 MG","US","2020-01-20",NA,NA,NA,NA,NA -"GA41070","DM","GA41070-380","GA41070-380",NA,NA,NA,NA,NA,NA,NA,NA,"111111","90001","Dr doctor1","1980-01-20T10:00","40","YEARS","M","ASIAN","NOT HISPANIC OR LATINO",NA,NA,"US","2020-01-20",NA,NA,NA,"OTHER ASIAN","OTHER PACIFIC ISLANDER" -"GA41070","DM","GA41070-381","GA41070-381",NA,NA,NA,NA,NA,NA,NA,NA,"111111","90001","Dr doctor1",NA,NA,NA,NA,NA,NA,NA,NA,"US",NA,NA,NA,NA,NA,NA -"GA41070","DM","GA41070-382","GA41070-382",NA,NA,NA,NA,NA,NA,NA,NA,"111111","90001","Dr doctor1","1990-01-20T11:00","30","YEARS","F","UNKNOWN","NOT HISPANIC OR LATINO",NA,NA,"US","2020-03-20",NA,NA,NA,NA,NA diff --git a/inst/derive_blfl/raw_dataset.csv b/inst/derive_blfl/raw_dataset.csv deleted file mode 100644 index c03e81e7..00000000 --- a/inst/derive_blfl/raw_dataset.csv +++ /dev/null @@ -1,7 +0,0 @@ -"DOMAIN","oak_id","raw_source","patient_number","USUBJID","VSDTC","VSTESTCD","VSORRES","VSSTAT" -"VS",1,"VTLS1","375","GA41070-375","2020-09-01T13:31","DIABP","ND",NA -"VS",1,"VTLS1","375","GA41070-375","2020-09-01T13:31","PULSE","ND",NA -"VS",1,"VTLS2","375","GA41070-375","2020-09-28T11:00","DIABP","ND",NA -"VS",1,"VTLS2","375","GA41070-375","2020-09-28T11:00","PULSE","ND",NA -"VS",2,"VTLS1","376","GA41070-376","2020-09-20","DIABP","75",NA -"VS",2,"VTLS1","376","GA41070-376","2020-09-20","PULSE","ND",NA diff --git a/inst/derive_blfl/study_visit_configuration.csv b/inst/derive_blfl/study_visit_configuration.csv deleted file mode 100644 index 5c992ed0..00000000 --- a/inst/derive_blfl/study_visit_configuration.csv +++ /dev/null @@ -1,138 +0,0 @@ -TV_Visit,Study_Visit_name,Source_dataset_var_names -CYCLE 1 DAY 1,Cycle 1 Day 1,FOLDERL -CYCLE 1 DAY 1,C1D1,VISIT -CYCLE 1 DAY 2,Cycle 1 Day 2,FOLDERL -CYCLE 1 DAY 3,Cycle 1 Day 3,FOLDERL -CYCLE 10 DAY 1,Cycle 10 Day 1,FOLDERL -CYCLE 11 DAY 1,Cycle 11 Day 1,FOLDERL -CYCLE 12 DAY 1,Cycle 12 Day 1,FOLDERL -CYCLE 12 DAY 1,C12D1,VISIT -CYCLE 13 DAY 1,Cycle 13 Day 1,FOLDERL -CYCLE 14 DAY 1,Cycle 14 Day 1,FOLDERL -CYCLE 15 DAY 1,Cycle 15 Day 1,FOLDERL -CYCLE 16 DAY 1,Cycle 16 Day 1,FOLDERL -CYCLE 16 DAY 1,C16D1,VISIT -CYCLE 17 DAY 1,Cycle 17 Day 1,FOLDERL -CYCLE 18 DAY 1,Cycle 18 Day 1,FOLDERL -CYCLE 19 DAY 1,Cycle 19 Day 1,FOLDERL -CYCLE 2 DAY 1,Cycle 2 Day 1,FOLDERL -CYCLE 2 DAY 1,C2D1,VISIT -CYCLE 2 DAY 2,Cycle 2 Day 2,FOLDERL -CYCLE 2 DAY 3,Cycle 2 Day 3,FOLDERL -CYCLE 20 DAY 1,Cycle 20 Day 1,FOLDERL -CYCLE 21 DAY 1,Cycle 21 Day 1,FOLDERL -CYCLE 22 DAY 1,Cycle 22 Day 1,FOLDERL -CYCLE 23 DAY 1,Cycle 23 Day 1,FOLDERL -CYCLE 24 DAY 1,Cycle 24 Day 1,FOLDERL -CYCLE 24 DAY 1,C24D1,VISIT -CYCLE 25 DAY 1,Cycle 25 Day 1,FOLDERL -CYCLE 26 DAY 1,Cycle 26 Day 1,FOLDERL -CYCLE 27 DAY 1,Cycle 27 Day 1,FOLDERL -CYCLE 28 DAY 1,Cycle 28 Day 1,FOLDERL -CYCLE 29 DAY 1,Cycle 29 Day 1,FOLDERL -CYCLE 3 DAY 1,Cycle 3 Day 1,FOLDERL -CYCLE 3 DAY 1,C3D1,VISIT -CYCLE 3 DAY 2,Cycle 3 Day 2,FOLDERL -CYCLE 3 DAY 3,Cycle 3 Day 3,FOLDERL -CYCLE 30 DAY 1,Cycle 30 Day 1,FOLDERL -CYCLE 31 DAY 1,Cycle 31 Day 1,FOLDERL -CYCLE 32 DAY 1,Cycle 32 Day 1,FOLDERL -CYCLE 32 DAY 1,C32D1,VISIT -CYCLE 33 DAY 1,Cycle 33 Day 1,FOLDERL -CYCLE 34 DAY 1,Cycle 34 Day 1,FOLDERL -CYCLE 35 DAY 1,Cycle 35 Day 1,FOLDERL -CYCLE 36 DAY 1,Cycle 36 Day 1,FOLDERL -CYCLE 37 DAY 1,Cycle 37 Day 1,FOLDERL -CYCLE 38 DAY 1,Cycle 38 Day 1,FOLDERL -CYCLE 39 DAY 1,Cycle 39 Day 1,FOLDERL -CYCLE 4 DAY 1,Cycle 4 Day 1,FOLDERL -CYCLE 4 DAY 1,C4D1,VISIT -CYCLE 4 DAY 2,Cycle 4 Day 2,FOLDERL -CYCLE 4 DAY 3,Cycle 4 Day 3,FOLDERL -CYCLE 40 DAY 1,Cycle 40 Day 1,FOLDERL -CYCLE 40 DAY 1,C40D1,VISIT -CYCLE 41 DAY 1,Cycle 41 Day 1,FOLDERL -CYCLE 42 DAY 1,Cycle 42 Day 1,FOLDERL -CYCLE 43 DAY 1,Cycle 43 Day 1,FOLDERL -CYCLE 44 DAY 1,Cycle 44 Day 1,FOLDERL -CYCLE 45 DAY 1,Cycle 45 Day 1,FOLDERL -CYCLE 46 DAY 1,Cycle 46 Day 1,FOLDERL -CYCLE 47 DAY 1,Cycle 47 Day 1,FOLDERL -CYCLE 48 DAY 1,Cycle 48 Day 1,FOLDERL -CYCLE 48 DAY 1,C48D1,VISIT -CYCLE 49 DAY 1,Cycle 49 Day 1,FOLDERL -CYCLE 5 DAY 1,Cycle 5 Day 1,FOLDERL -CYCLE 50 DAY 1,Cycle 50 Day 1,FOLDERL -CYCLE 51 DAY 1,Cycle 51 Day 1,FOLDERL -CYCLE 52 DAY 1,Cycle 52 Day 1,FOLDERL -CYCLE 53 DAY 1,Cycle 53 Day 1,FOLDERL -CYCLE 54 DAY 1,Cycle 54 Day 1,FOLDERL -CYCLE 55 DAY 1,Cycle 55 Day 1,FOLDERL -CYCLE 56 DAY 1,Cycle 56 Day 1,FOLDERL -CYCLE 56 DAY 1,C56D1,VISIT -CYCLE 57 DAY 1,Cycle 57 Day 1,FOLDERL -CYCLE 58 DAY 1,Cycle 58 Day 1,FOLDERL -CYCLE 59 DAY 1,Cycle 59 Day 1,FOLDERL -CYCLE 6 DAY 1,Cycle 6 Day 1,FOLDERL -CYCLE 60 DAY 1,Cycle 60 Day 1,FOLDERL -CYCLE 61 DAY 1,Cycle 61 Day 1,FOLDERL -CYCLE 62 DAY 1,Cycle 62 Day 1,FOLDERL -CYCLE 63 DAY 1,Cycle 63 Day 1,FOLDERL -CYCLE 64 DAY 1,Cycle 64 Day 1,FOLDERL -CYCLE 64 DAY 1,C64D1,VISIT -CYCLE 65 DAY 1,Cycle 65 Day 1,FOLDERL -CYCLE 66 DAY 1,Cycle 66 Day 1,FOLDERL -CYCLE 67 DAY 1,Cycle 67 Day 1,FOLDERL -CYCLE 68 DAY 1,Cycle 68 Day 1,FOLDERL -CYCLE 69 DAY 1,Cycle 69 Day 1,FOLDERL -CYCLE 7 DAY 1,Cycle 7 Day 1,FOLDERL -CYCLE 8 DAY 1,Cycle 8 Day 1,FOLDERL -CYCLE 9 DAY 1,Cycle 9 Day 1,FOLDERL -POST PROGRESSIVE DISEASE/TREATMENT DISCONTINUATION PRO 3 MONTHS,Post Progressive Disease/Treatment Discontinuation PRO 3 Months,FOLDERL -POST PROGRESSIVE DISEASE/TREATMENT DISCONTINUATION PRO 6 MONTHS,Post Progressive Disease/Treatment Discontinuation PRO 6 Months,FOLDERL -SCREENING,Screening,FOLDERL -SUBJECT DISPOSITION-PERIOD COMPLETION/EARLY DISCONTINUATION,Subject Disposition - Period Comp. / Early Disc.,FOLDERL -SURVIVAL FOLLOW UP 12 MONTHS,Survival Follow-Up Month 12,INSTANCE -SURVIVAL FOLLOW UP 15 MONTHS,Survival Follow-Up Month 15,INSTANCE -SURVIVAL FOLLOW UP 18 MONTHS,Survival Follow-Up Month 18,INSTANCE -SURVIVAL FOLLOW UP 21 MONTHS,Survival Follow-Up Month 21,INSTANCE -SURVIVAL FOLLOW UP 24 MONTHS,Survival Follow-Up Month 24,INSTANCE -SURVIVAL FOLLOW UP 27 MONTHS,Survival Follow-Up Month 27,INSTANCE -SURVIVAL FOLLOW UP 3 MONTHS,Survival Follow-Up Month 3,INSTANCE -SURVIVAL FOLLOW UP 30 MONTHS,Survival Follow-Up Month 30,INSTANCE -SURVIVAL FOLLOW UP 33 MONTHS,Survival Follow-Up Month 33,INSTANCE -SURVIVAL FOLLOW UP 36 MONTHS,Survival Follow-Up Month 36,INSTANCE -SURVIVAL FOLLOW UP 6 MONTHS,Survival Follow-Up Month 6,INSTANCE -SURVIVAL FOLLOW UP 9 MONTHS,Survival Follow-Up Month 9,INSTANCE -SYMPTOMATIC DETERIORATION ASSESSMENT,Symptomatic Deterioration Assessment,FOLDERL -TREATMENT DISCONTINUATION,Study Drug Completion / Early Discontinuation,FOLDERL -TREATMENT DISCONTINUATION,DISCTX,VISIT -TUMOR ASSESSMENT WEEK 102,Tumor Assessment Week 102,INSTANCE -TUMOR ASSESSMENT WEEK 111,Tumor Assessment Week 111,INSTANCE -TUMOR ASSESSMENT WEEK 12,Tumor Assessment Week 12,INSTANCE -TUMOR ASSESSMENT WEEK 12,Tumor Assessment Week 12,INSTANCE -TUMOR ASSESSMENT WEEK 120,Tumor Assessment Week 120,INSTANCE -TUMOR ASSESSMENT WEEK 129,Tumor Assessment Week 129,INSTANCE -TUMOR ASSESSMENT WEEK 138,Tumor Assessment Week 138,INSTANCE -TUMOR ASSESSMENT WEEK 147,Tumor Assessment Week 147,INSTANCE -TUMOR ASSESSMENT WEEK 156,Tumor Assessment Week 156,INSTANCE -TUMOR ASSESSMENT WEEK 165,Tumor Assessment Week 165,INSTANCE -TUMOR ASSESSMENT WEEK 174,Tumor Assessment Week 174,INSTANCE -TUMOR ASSESSMENT WEEK 18,Tumor Assessment Week 18,INSTANCE -TUMOR ASSESSMENT WEEK 183,Tumor Assessment Week 183,INSTANCE -TUMOR ASSESSMENT WEEK 192,Tumor Assessment Week 192,INSTANCE -TUMOR ASSESSMENT WEEK 201,Tumor Assessment Week 201,INSTANCE -TUMOR ASSESSMENT WEEK 24,Tumor Assessment Week 24,INSTANCE -TUMOR ASSESSMENT WEEK 30,Tumor Assessment Week 30,INSTANCE -TUMOR ASSESSMENT WEEK 36,Tumor Assessment Week 36,INSTANCE -TUMOR ASSESSMENT WEEK 42,Tumor Assessment Week 42,INSTANCE -TUMOR ASSESSMENT WEEK 48,Tumor Assessment Week 48,INSTANCE -TUMOR ASSESSMENT WEEK 57,Tumor Assessment Week 57,INSTANCE -TUMOR ASSESSMENT WEEK 6,Tumor Assessment Week 6,INSTANCE -TUMOR ASSESSMENT WEEK 6,Tumor Assessment Week 6,INSTANCE -TUMOR ASSESSMENT WEEK 66,Tumor Assessment Week 66,INSTANCE -TUMOR ASSESSMENT WEEK 75,Tumor Assessment Week 75,INSTANCE -TUMOR ASSESSMENT WEEK 84,Tumor Assessment Week 84,INSTANCE -TUMOR ASSESSMENT WEEK 93,Tumor Assessment Week 93,INSTANCE -UNSCHEDULED,Unscheduled,FOLDERL -UNSCHEDULED,UNSCH,VISIT From 675fcef76ea8f95d24ec53062d5fdfe2d8bc9b8d Mon Sep 17 00:00:00 2001 From: Rammprasad Ganapathy Date: Tue, 14 May 2024 22:11:46 +0000 Subject: [PATCH 04/20] modified function --- R/derive_blfl.R | 172 +++++++++++++++++++++++++----------------------- 1 file changed, 90 insertions(+), 82 deletions(-) diff --git a/R/derive_blfl.R b/R/derive_blfl.R index c185e5fd..38835e35 100644 --- a/R/derive_blfl.R +++ b/R/derive_blfl.R @@ -107,41 +107,32 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' exposure flag (`--LOBXFL`), from the observation date/time (`--DTC`), and a #' DM domain reference date/time. #' -#' The methodology and approach implemented in this function are based on -#' concepts and examples found in the Roche version of the {roak} package. #' #' The derivation is as follows: #' #' - Remove records where the result (`--ORRES`) is missing. Also, exclude records #' with results labeled as "ND" (No Data) or "NOT DONE" in the `--ORRES` column, -#' which indicate that the measurement or observation was not completed. This -#' step is important even if a previous cleaning step (like the -#' 'oak_clean_not_done' function) might not have been applied to the data yet. +#' which indicate that the measurement or observation was not completed. #' - Remove records where the status (`--STAT`) indicates the observation or test #' was not performed, marked as "NOT DONE". #' - Divide the date and time column (`--DTC`) and the reference date/time -#' variable (`reference_date_variable`) into separate date and time components. Ignore +#' variable (`ref_var`) into separate date and time components. Ignore #' any seconds recorded in the time component, focusing only on hours and #' minutes for further calculations. #' - Set partial or missing dates to `NA`. #' - Set partial or missing times to `NA`. -#' - Get a list of baseline visits from `Baseline column` -#' (if it exists) in `oak_pkg_env$study_visit_configuration`. -#' - Get a list of baseline timepoints from `Baseline` column -#' (if it exists) in `oak_pkg_env$timepoint_conf`. #' - Filter on rows that have domain and reference dates not equal to -#' `NA`. (Ref: **X**) -#' - Filter **X** on rows with domain date prior to (less than) -#' reference date. (Ref: **A**) -#' - Filter **X** on rows with domain date equal to reference date but +#' `NA`. (Ref to as **X**) +#' - Filter **X** on rows with domain date (--DTC) prior to (less than) +#' reference date. (Ref to as **A**) +#' - Filter **X** on rows with domain date (--DTC) equal to reference date but #' domain and reference times not equal to `NA` and domain time prior to (less -#' than) reference time. (Ref: **B**) -#' - Filter **X** on rows with domain date equal to reference date but +#' than) reference time. (Ref to as **B**) +#' - Filter **X** on rows with domain date (--DTC) equal to reference date but #' domain and/or reference time equal to NA and: #' - VISIT is in baseline visits list (if it exists) and #' - xxTPT is in baseline timepoints list (if it exists). -#' -#' (Ref: **C**) +#' (Ref to as **C**) #' - Combine the rows from **A**, **B**, and **C** to get a #' data frame of pre-reference date observations. Sort the rows by `USUBJID`, #' `--STAT`, and `--ORRES`. @@ -150,28 +141,23 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' `--TESTCD` (because these are the unique values). Remove any duplicate rows. #' Assign the baseline flag variable, `--BLFL`, the last observation before #' exposure flag (`--LOBXFL`) variable to these rows. -#' - Join the baseline flag onto the input dataset. +#' - Join the baseline flag onto the input dataset based on oak id vars #' -#' @param raw_dataset Input data frame. -#' @param target_sdtm_variable Name of variable to be derived (`--BLFL` or +#' @param tgt_dat Input SDTM domain. +#' @param tgt_var Name of variable to be derived (`--BLFL` or #' `--LOBXFL` where `--` is domain). -#' @param reference_date_variable vector of a date/time from the +#' @param ref_var vector of a date/time from the #' Demographics (DM) dataset, which serves as a point of comparison for other #' observations in the study. Common choices for this reference variable #' include "RFSTDTC" (the date/time of the first study treatment) or #' "RFXSTDTC" (the date/time of the first exposure to the study drug). #' @param baseline_visits A character vector specifying the baseline visits within the study. #' These visits are identified as critical points for data collection at the start of the study, -#' before any intervention is applied. This parameter allows the function to filter and analyze -#' data specifically from these initial assessment points. For example, baseline visits might -#' include "Cycle 1 Day 1" if this is the first visit where subjects are assessed prior to receiving treatment. -#' @param baseline_timepoints A character vector of dates in "YYYY-MM-DD" format that specifies -#' the specific days during the baseline visits when key assessments or measurements were taken. -#' These timepoints are used to refine the selection of data points to include only those -#' collected on these specific dates, ensuring that only relevant baseline data is analyzed. -#' This is particularly important in studies where the timing of measurements can significantly -#' impact the interpretation of results. An example might be "2020-09-20", indicating a specific -#' day when baseline data was collected. +#' before any intervention is applied. This allows the function to assign the baseline +#' flag if thre --DTC matches to the reference date. +#' @param baseline_timepoints A character vector of timpoints values in --TPT that specifies +#' the specific timepoints during the baseline visits when key assessments or measurements were taken. +#' This allows the function to assign the baseline flag if the --DTC matches to the reference date. #' #' @return Modified input data frame with baseline flag variable `--BLFL` or #' last observation before exposure flag `--LOBXFL` added. @@ -179,77 +165,101 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' @export #' #' @examples -#' DM <- read.csv(system.file("derive_blfl/DM.csv", package = "sdtm.oak")) -#' DM -#' raw_dataset <- read.csv(system.file("derive_blfl/raw_dataset.csv", package = "sdtm.oak")) -#' raw_dataset -#' observed_output <- derive_blfl(raw_dataset = raw_dataset, -#' DM_dataset = DM, -#' target_sdtm_variable = "VSBLFL", -#' reference_date_variable = "RFSTDTC") +#' dm <- tibble::tribble( +#' ~USUBJID, ~RFSTDTC, ~RFXSTDTC, +#' "test_study-375", "2020-09-28T10:10", "2020-09-28T10:10", +#' "test_study-376", "2020-09-21T11:00", "2020-09-21T11:00", +#' "test_study-377", NA, NA, +#' "test_study-378", "2020-01-20T10:00", "2020-01-20T10:00", +#' "test_study-379", NA, NA, +#' ) +#' +#' dm +#' +#' tgt_dat <- +#' tibble::tribble( +#' ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, +#' "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, +#' "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, +#' "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, +#' "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, +#' "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, +#' "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, +#' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, +#' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", +#' "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA +#' ) +#' +#' tgt_dat +#' +#' observed_output <- derive_blfl(tgt_dat = tgt_dat, +#' dm_dat = dm, +#' tgt_var = "VSLOBXFL", +#' ref_var = "RFXSTDTC") #' observed_output -derive_blfl <- function(raw_dataset, - DM_dataset, - target_sdtm_variable, - reference_date_variable, +#' +derive_blfl <- function(tgt_dat, + dm_dat, + tgt_var, + ref_var, baseline_visits = character(), baseline_timepoints = character()) { # Check assertions -------------------------------------------------------- assertion_collection = checkmate::makeAssertCollection() - # Assert that raw_dataset is a data frame, - checkmate::assert_data_frame(raw_dataset, + # Assert that tgt_dat is a data frame, + checkmate::assert_data_frame(tgt_dat, col.names = "strict", min.rows = 1, add = assertion_collection) # Assert that the input dataset has a "DOMAIN" column - checkmate::assert_names(names(raw_dataset), + checkmate::assert_names(names(tgt_dat), must.include = c("DOMAIN", sdtm.oak:::oak_id_vars()), - .var.name = "Columns of 'raw_dataset'", + .var.name = "Columns of 'tgt_dataset'", add = assertion_collection) - # Assert DM_dataset is data.frame - checkmate::assert_data_frame(DM_dataset, + # Assert dm_dat is data.frame + checkmate::assert_data_frame(dm_dat, col.names = "strict", min.rows = 1, add = assertion_collection) # Check if USUBJID and reference_date is present in the DM - checkmate::assert_names(names(DM), - must.include = c("USUBJID", reference_date_variable), - .var.name = "Columns of 'DM_dataset'", + checkmate::assert_names(names(dm_dat), + must.include = c("USUBJID", ref_var), + .var.name = "Columns of 'dm_dat'", add = assertion_collection) - checkmate::assert_character(target_sdtm_variable, + checkmate::assert_character(tgt_var, min.chars = 1, len = 1, add = assertion_collection) - checkmate::assert_names(target_sdtm_variable, + checkmate::assert_names(tgt_var, type = "strict", add = assertion_collection) - checkmate::assert_character(reference_date_variable, + checkmate::assert_character(ref_var, min.chars = 1, len = 1, add = assertion_collection) - checkmate::assert_names(reference_date_variable, + checkmate::assert_names(ref_var, type = "strict", add = assertion_collection) checkmate::reportAssertions(assertion_collection) # Get domain from input dataset - domain <- unique(raw_dataset$DOMAIN) + domain <- unique(tgt_dat$DOMAIN) checkmate::assert_character(domain, min.chars = 1, len = 1, add = assertion_collection) - # Assert that target_sdtm_variable is a concatenation of domain and "BLFL" or "LOBXFL" + # Assert that tgt_var is a concatenation of domain and "BLFL" or "LOBXFL" checkmate::assert_choice( - target_sdtm_variable, + tgt_var, choices = c(paste0(domain, "BLFL"), paste0(domain, "LOBXFL")), add = assertion_collection @@ -264,18 +274,18 @@ derive_blfl <- function(raw_dataset, setNames(tolower(suffixes)) # Assert that the input dataset has a "DTC" column - checkmate::assert_names(names(raw_dataset), + checkmate::assert_names(names(tgt_dat), must.include = c(domain_prefixed_names[c("orres", "stat", "testcd", "dtc")]), - .var.name = "Columns of 'raw_dataset'", + .var.name = "Columns of 'tgt_dat'", add = assertion_collection) checkmate::reportAssertions(assertion_collection) # End of assertions, work begins ------------------------------------------ # Create copy of input dataset for modification and processing - ds_mod <- raw_dataset + ds_mod <- tgt_dat # Filter out rows where --ORRES is missing. Filter out --ORRES in # ("ND", "NOT DONE") as well. @@ -301,7 +311,7 @@ derive_blfl <- function(raw_dataset, "VISITNUM") # Drop those columns from the list which are not present in ds_in - con_col <- con_col[con_col %in% names(raw_dataset)] + con_col <- con_col[con_col %in% names(tgt_dat)] # Check for any column which is all NA and removing it from con_col list h <- which(sapply(ds_mod, function(x) all(is.na(x)))) @@ -311,20 +321,20 @@ derive_blfl <- function(raw_dataset, con_col <- con_col[!con_col %in% h] } - # Keep only USUBJID and reference_date_variable - DM_dataset <- dplyr::select(DM_dataset, + # Keep only USUBJID and ref_var + dm_dat <- dplyr::select(dm_dat, dplyr::all_of(c("USUBJID", - reference_date_variable))) + ref_var))) - # Left join dataset with DM_dataset domain based on USUBJID - ds_mod <- dplyr::left_join(ds_mod, DM_dataset, by = "USUBJID") + # Left join dataset with dm_dat domain based on USUBJID + ds_mod <- dplyr::left_join(ds_mod, dm_dat, by = "USUBJID") - # Split --DTC and reference_date_variable into date and time parts + # Split --DTC and ref_var into date and time parts # (partial or missing dates and times set to NA) ds_mod$dom_dt <- sdtm.oak:::dtc_datepart(ds_mod[[domain_prefixed_names["dtc"]]]) ds_mod$dom_tm <- sdtm.oak:::dtc_timepart(ds_mod[[domain_prefixed_names["dtc"]]]) - ds_mod$ref_dt <- sdtm.oak:::dtc_datepart(ds_mod[[reference_date_variable]]) - ds_mod$ref_tm <- sdtm.oak:::dtc_timepart(ds_mod[[reference_date_variable]]) + ds_mod$ref_dt <- sdtm.oak:::dtc_datepart(ds_mod[[ref_var]]) + ds_mod$ref_tm <- sdtm.oak:::dtc_timepart(ds_mod[[ref_var]]) # If VISIT not in data frame then assign it as "" for processing @@ -383,7 +393,7 @@ derive_blfl <- function(raw_dataset, dplyr::group_by(USUBJID, .data[[domain_prefixed_names["testcd"]]]) |> dplyr::slice_max(!!rlang::sym(domain_prefixed_names["dtc"]), na_rm = TRUE) |> dplyr::ungroup() |> - dplyr::select(dplyr::all_of(c(sdtm.oak:::oak_id_vars(), domain_prefixed_names["testcd"])), + dplyr::select(dplyr::all_of(c(sdtm.oak:::oak_id_vars(), domain_prefixed_names[["testcd"]])), dplyr::any_of( c(domain_prefixed_names[c("cat", "scat", @@ -391,25 +401,23 @@ derive_blfl <- function(raw_dataset, "loc", "lat", "dir", - "method")], - # For MI domain - "MIMRKSTI", - "MIGRPID" + "method")] ) )) |> dplyr::distinct() # Assign the baseline flag variable - ds_blfl[[target_sdtm_variable]] <- "Y" + ds_blfl[[tgt_var]] <- "Y" # Join baseline flag onto input dataset - ds_out <- dplyr::left_join(raw_dataset, ds_blfl, by = sdtm.oak:::oak_id_vars()) + ds_out <- dplyr::left_join(tgt_dat, ds_blfl, by = c(domain_prefixed_names[["testcd"]], + sdtm.oak:::oak_id_vars())) # Assert that merged data frame has same number of rows as input data frame - if (nrow(ds_out) != nrow(raw_dataset)) { + if (nrow(ds_out) != nrow(tgt_dat)) { stop(sprintf( - "Internal error: The processed dataset was expected to have the same number of rows (%d) as the input dataset (raw_dataset), but it actually has %d rows.", - nrow(raw_dataset), + "Internal error: The processed dataset was expected to have the same number of rows (%d) as the input dataset (tgt_dat), but it actually has %d rows.", + nrow(tgt_dat), nrow(ds_out) )) } From b4fa080a1da7560909f622999054fa87d8a5a650 Mon Sep 17 00:00:00 2001 From: Rammprasad Ganapathy Date: Tue, 14 May 2024 22:54:14 +0000 Subject: [PATCH 05/20] Harmonize input parameters with other derive_studyday function --- R/derive_blfl.R | 110 ++++++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 56 deletions(-) diff --git a/R/derive_blfl.R b/R/derive_blfl.R index 38835e35..c59d2fce 100644 --- a/R/derive_blfl.R +++ b/R/derive_blfl.R @@ -33,7 +33,7 @@ dtc_datepart <- function(dtc, partial_as_na = TRUE) { # Set partial or missing dates to NA, depending on partial_as_na parameter if (partial_as_na) { - dt <- ifelse(nchar(dt) < 10, NA, dt) + dt <- ifelse(nchar(dt) < 10L, NA, dt) } return(dt) @@ -88,10 +88,10 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { checkmate::assert_character(dtc) # Determine length of time part depending on ignore_seconds parameter - tm_length <- ifelse(ignore_seconds, 5, 8) + tm_length <- ifelse(ignore_seconds, 5L, 8L) # Extract time part from ISO 8601 date/time variable - tm <- substr(sub("^([^T]+)T?", "", dtc), 1, tm_length) + tm <- substr(sub("^([^T]+)T?", "", dtc), 1L, tm_length) # Set partial or missing times to NA, depending on partial_as_na parameter if (partial_as_na) { @@ -143,7 +143,7 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' exposure flag (`--LOBXFL`) variable to these rows. #' - Join the baseline flag onto the input dataset based on oak id vars #' -#' @param tgt_dat Input SDTM domain. +#' @param sdtm_in Input SDTM domain. #' @param tgt_var Name of variable to be derived (`--BLFL` or #' `--LOBXFL` where `--` is domain). #' @param ref_var vector of a date/time from the @@ -176,7 +176,7 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' #' dm #' -#' tgt_dat <- +#' sdtm_in <- #' tibble::tribble( #' ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, #' "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, @@ -186,62 +186,62 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, #' "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, #' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, -#' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", +#' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", #nolint #' "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA #' ) #' -#' tgt_dat +#' sdtm_in #' -#' observed_output <- derive_blfl(tgt_dat = tgt_dat, -#' dm_dat = dm, +#' observed_output <- derive_blfl(sdtm_in = sdtm_in, +#' dm_domain = dm, #' tgt_var = "VSLOBXFL", #' ref_var = "RFXSTDTC") #' observed_output #' -derive_blfl <- function(tgt_dat, - dm_dat, +derive_blfl <- function(sdtm_in, + dm_domain, tgt_var, ref_var, baseline_visits = character(), baseline_timepoints = character()) { # Check assertions -------------------------------------------------------- - assertion_collection = checkmate::makeAssertCollection() - # Assert that tgt_dat is a data frame, - checkmate::assert_data_frame(tgt_dat, + assertion_collection <- checkmate::makeAssertCollection() + # Assert that sdtm_in is a data frame, + checkmate::assert_data_frame(sdtm_in, col.names = "strict", - min.rows = 1, + min.rows = 1L, add = assertion_collection) # Assert that the input dataset has a "DOMAIN" column - checkmate::assert_names(names(tgt_dat), + checkmate::assert_names(names(sdtm_in), must.include = c("DOMAIN", sdtm.oak:::oak_id_vars()), - .var.name = "Columns of 'tgt_dataset'", + .var.name = "Columns of 'sdtm_inaset'", add = assertion_collection) - # Assert dm_dat is data.frame - checkmate::assert_data_frame(dm_dat, + # Assert dm_domain is data.frame + checkmate::assert_data_frame(dm_domain, col.names = "strict", - min.rows = 1, + min.rows = 1L, add = assertion_collection) # Check if USUBJID and reference_date is present in the DM - checkmate::assert_names(names(dm_dat), + checkmate::assert_names(names(dm_domain), must.include = c("USUBJID", ref_var), - .var.name = "Columns of 'dm_dat'", + .var.name = "Columns of 'dm_domain'", add = assertion_collection) checkmate::assert_character(tgt_var, - min.chars = 1, - len = 1, - add = assertion_collection) + min.chars = 1L, + len = 1L, + add = assertion_collection) checkmate::assert_names(tgt_var, - type = "strict", - add = assertion_collection) + type = "strict", + add = assertion_collection) checkmate::assert_character(ref_var, - min.chars = 1, - len = 1, + min.chars = 1L, + len = 1L, add = assertion_collection) checkmate::assert_names(ref_var, @@ -251,11 +251,11 @@ derive_blfl <- function(tgt_dat, checkmate::reportAssertions(assertion_collection) # Get domain from input dataset - domain <- unique(tgt_dat$DOMAIN) + domain <- unique(sdtm_in$DOMAIN) checkmate::assert_character(domain, - min.chars = 1, - len = 1, - add = assertion_collection) + min.chars = 1L, + len = 1L, + add = assertion_collection) # Assert that tgt_var is a concatenation of domain and "BLFL" or "LOBXFL" checkmate::assert_choice( @@ -274,18 +274,18 @@ derive_blfl <- function(tgt_dat, setNames(tolower(suffixes)) # Assert that the input dataset has a "DTC" column - checkmate::assert_names(names(tgt_dat), + checkmate::assert_names(names(sdtm_in), must.include = c(domain_prefixed_names[c("orres", "stat", "testcd", "dtc")]), - .var.name = "Columns of 'tgt_dat'", + .var.name = "Columns of 'sdtm_in'", add = assertion_collection) checkmate::reportAssertions(assertion_collection) # End of assertions, work begins ------------------------------------------ # Create copy of input dataset for modification and processing - ds_mod <- tgt_dat + ds_mod <- sdtm_in # Filter out rows where --ORRES is missing. Filter out --ORRES in # ("ND", "NOT DONE") as well. @@ -294,12 +294,12 @@ derive_blfl <- function(tgt_dat, ds_mod <- ds_mod[!bad_orres_rows, ] # Filter out rows where --STAT is not equal to "NOT DONE" - ds_mod <- ds_mod |> - dplyr::filter( - dplyr::if_any( - dplyr::any_of(domain_prefixed_names["stat"]), ~ !.x %in% "NOT DONE")) + ds_mod <- + ds_mod |> + dplyr::filter(dplyr::if_any(dplyr::any_of(domain_prefixed_names["stat"]), + ~ !.x %in% "NOT DONE")) - if (nrow(ds_mod) == 0) { + if (nrow(ds_mod) == 0L) { stop(paste0( "No rows for which both --ORRES is not missing\n and --STAT not equals to NOT DONE.\n", " Not able to derive Baseline Flag or Last Observation Before Exposure Flag" @@ -307,11 +307,10 @@ derive_blfl <- function(tgt_dat, } # Checking for columns of interest - con_col <- c(domain_prefixed_names[c("testcd", "dtc", "var_tpt")], - "VISITNUM") + con_col <- c(domain_prefixed_names[c("testcd", "dtc", "var_tpt")], "VISITNUM") # Drop those columns from the list which are not present in ds_in - con_col <- con_col[con_col %in% names(tgt_dat)] + con_col <- con_col[con_col %in% names(sdtm_in)] # Check for any column which is all NA and removing it from con_col list h <- which(sapply(ds_mod, function(x) all(is.na(x)))) @@ -322,12 +321,10 @@ derive_blfl <- function(tgt_dat, } # Keep only USUBJID and ref_var - dm_dat <- dplyr::select(dm_dat, - dplyr::all_of(c("USUBJID", - ref_var))) + dm_domain <- dplyr::select(dm_domain, dplyr::all_of(c("USUBJID", ref_var))) - # Left join dataset with dm_dat domain based on USUBJID - ds_mod <- dplyr::left_join(ds_mod, dm_dat, by = "USUBJID") + # Left join dataset with dm_domain domain based on USUBJID + ds_mod <- dplyr::left_join(ds_mod, dm_domain, by = "USUBJID") # Split --DTC and ref_var into date and time parts # (partial or missing dates and times set to NA) @@ -372,8 +369,8 @@ derive_blfl <- function(tgt_dat, dplyr::filter(dom_dt == ref_dt, is.na(dom_tm) | is.na(ref_tm), (VISIT %in% baseline_visits & get(domain_prefixed_names["tpt"]) %in% baseline_timepoints) | - (VISIT %in% baseline_visits & length(baseline_timepoints) == 0) | - (get(domain_prefixed_names["tpt"]) %in% baseline_timepoints & length(baseline_visits) == 0)) + (VISIT %in% baseline_visits & length(baseline_timepoints) == 0L) | + (get(domain_prefixed_names["tpt"]) %in% baseline_timepoints & length(baseline_visits) == 0L)) # Combine (*A) and (*B) and (*C) ds_base <- rbind(ds_subset_lt, ds_subset_eq_1, ds_subset_eq_2) @@ -381,7 +378,7 @@ derive_blfl <- function(tgt_dat, # Sort the rows in ascending order with respect to columns from con_col ds_base <- dplyr::arrange_at(ds_base, c("USUBJID", con_col)) - if (nrow(ds_base) == 0) { + if (nrow(ds_base) == 0L) { message(paste0("There are no baseline records.")) } @@ -410,14 +407,15 @@ derive_blfl <- function(tgt_dat, ds_blfl[[tgt_var]] <- "Y" # Join baseline flag onto input dataset - ds_out <- dplyr::left_join(tgt_dat, ds_blfl, by = c(domain_prefixed_names[["testcd"]], + ds_out <- dplyr::left_join(sdtm_in, ds_blfl, by = c(domain_prefixed_names[["testcd"]], sdtm.oak:::oak_id_vars())) # Assert that merged data frame has same number of rows as input data frame - if (nrow(ds_out) != nrow(tgt_dat)) { + if (nrow(ds_out) != nrow(sdtm_in)) { stop(sprintf( - "Internal error: The processed dataset was expected to have the same number of rows (%d) as the input dataset (tgt_dat), but it actually has %d rows.", - nrow(tgt_dat), + "Internal error: The processed dataset was expected to have the same + number of rows (%d) as the input dataset (sdtm_in), but it actually has %d rows.", + nrow(sdtm_in), nrow(ds_out) )) } From c0f90b9b24beb562e8643131a777db5c6f4c3512 Mon Sep 17 00:00:00 2001 From: Kamil Sijko Date: Mon, 20 May 2024 20:54:43 +0200 Subject: [PATCH 06/20] Switched from checkmate to admiraldev assertions; added tests for example + validations --- R/derive_blfl.R | 84 ++++++------------- man/derive_blfl.Rd | 101 +++++++++++----------- tests/testthat/_snaps/derive_blfl.md | 107 ++++++++++++++++++++++++ tests/testthat/test-derive_blfl.R | 120 +++++++++++++++++++++++++++ 4 files changed, 305 insertions(+), 107 deletions(-) create mode 100644 tests/testthat/_snaps/derive_blfl.md create mode 100644 tests/testthat/test-derive_blfl.R diff --git a/R/derive_blfl.R b/R/derive_blfl.R index c59d2fce..25be5fd2 100644 --- a/R/derive_blfl.R +++ b/R/derive_blfl.R @@ -205,65 +205,31 @@ derive_blfl <- function(sdtm_in, baseline_visits = character(), baseline_timepoints = character()) { # Check assertions -------------------------------------------------------- - assertion_collection <- checkmate::makeAssertCollection() - # Assert that sdtm_in is a data frame, - checkmate::assert_data_frame(sdtm_in, - col.names = "strict", - min.rows = 1L, - add = assertion_collection) - - # Assert that the input dataset has a "DOMAIN" column - checkmate::assert_names(names(sdtm_in), - must.include = c("DOMAIN", sdtm.oak:::oak_id_vars()), - .var.name = "Columns of 'sdtm_inaset'", - add = assertion_collection) + # Check variables are character scalars + admiraldev::assert_character_scalar(tgt_var) + admiraldev::assert_character_scalar(ref_var) - # Assert dm_domain is data.frame - checkmate::assert_data_frame(dm_domain, - col.names = "strict", - min.rows = 1L, - add = assertion_collection) - - # Check if USUBJID and reference_date is present in the DM - checkmate::assert_names(names(dm_domain), - must.include = c("USUBJID", ref_var), - .var.name = "Columns of 'dm_domain'", - add = assertion_collection) - - checkmate::assert_character(tgt_var, - min.chars = 1L, - len = 1L, - add = assertion_collection) - - checkmate::assert_names(tgt_var, - type = "strict", - add = assertion_collection) - - checkmate::assert_character(ref_var, - min.chars = 1L, - len = 1L, - add = assertion_collection) + # Assert that sdtm_in is a data frame, contains DOMAIN and oak id vars + admiraldev::assert_data_frame( + sdtm_in, + required_vars = rlang::syms(c("DOMAIN", sdtm.oak:::oak_id_vars())) + ) - checkmate::assert_names(ref_var, - type = "strict", - add = assertion_collection) - - checkmate::reportAssertions(assertion_collection) + # Assert dm_domain is data.frame + admiraldev::assert_data_frame( + dm_domain, + required_vars = rlang::syms(c("USUBJID", ref_var)) + ) # Get domain from input dataset domain <- unique(sdtm_in$DOMAIN) - checkmate::assert_character(domain, - min.chars = 1L, - len = 1L, - add = assertion_collection) + + admiraldev::assert_character_scalar(domain) # Assert that tgt_var is a concatenation of domain and "BLFL" or "LOBXFL" - checkmate::assert_choice( - tgt_var, - choices = c(paste0(domain, "BLFL"), - paste0(domain, "LOBXFL")), - add = assertion_collection - ) + admiraldev::assert_character_scalar(tgt_var, + values = c(paste0(domain, "BLFL"), + paste0(domain, "LOBXFL"))) # Determine domain prefixed columns suffixes <- @@ -274,14 +240,12 @@ derive_blfl <- function(sdtm_in, setNames(tolower(suffixes)) # Assert that the input dataset has a "DTC" column - checkmate::assert_names(names(sdtm_in), - must.include = c(domain_prefixed_names[c("orres", - "stat", - "testcd", - "dtc")]), - .var.name = "Columns of 'sdtm_in'", - add = assertion_collection) - checkmate::reportAssertions(assertion_collection) + admiraldev::assert_data_frame( + sdtm_in, + required_vars = rlang::syms(c(domain_prefixed_names[c("orres", + "stat", + "testcd", + "dtc")]))) # End of assertions, work begins ------------------------------------------ # Create copy of input dataset for modification and processing diff --git a/man/derive_blfl.Rd b/man/derive_blfl.Rd index 5e5fa3cf..fc9353ae 100644 --- a/man/derive_blfl.Rd +++ b/man/derive_blfl.Rd @@ -5,21 +5,21 @@ \title{Derive Baseline Flag or Last Observation Before Exposure Flag} \usage{ derive_blfl( - raw_dataset, - DM_dataset, - target_sdtm_variable, - reference_date_variable, + sdtm_in, + dm_domain, + tgt_var, + ref_var, baseline_visits = character(), baseline_timepoints = character() ) } \arguments{ -\item{raw_dataset}{Input data frame.} +\item{sdtm_in}{Input SDTM domain.} -\item{target_sdtm_variable}{Name of variable to be derived (\code{--BLFL} or +\item{tgt_var}{Name of variable to be derived (\code{--BLFL} or \code{--LOBXFL} where \verb{--} is domain).} -\item{reference_date_variable}{vector of a date/time from the +\item{ref_var}{vector of a date/time from the Demographics (DM) dataset, which serves as a point of comparison for other observations in the study. Common choices for this reference variable include "RFSTDTC" (the date/time of the first study treatment) or @@ -27,17 +27,12 @@ include "RFSTDTC" (the date/time of the first study treatment) or \item{baseline_visits}{A character vector specifying the baseline visits within the study. These visits are identified as critical points for data collection at the start of the study, -before any intervention is applied. This parameter allows the function to filter and analyze -data specifically from these initial assessment points. For example, baseline visits might -include "Cycle 1 Day 1" if this is the first visit where subjects are assessed prior to receiving treatment.} +before any intervention is applied. This allows the function to assign the baseline +flag if thre --DTC matches to the reference date.} -\item{baseline_timepoints}{A character vector of dates in "YYYY-MM-DD" format that specifies -the specific days during the baseline visits when key assessments or measurements were taken. -These timepoints are used to refine the selection of data points to include only those -collected on these specific dates, ensuring that only relevant baseline data is analyzed. -This is particularly important in studies where the timing of measurements can significantly -impact the interpretation of results. An example might be "2020-09-20", indicating a specific -day when baseline data was collected.} +\item{baseline_timepoints}{A character vector of timpoints values in --TPT that specifies +the specific timepoints during the baseline visits when key assessments or measurements were taken. +This allows the function to assign the baseline flag if the --DTC matches to the reference date.} } \value{ Modified input data frame with baseline flag variable \code{--BLFL} or @@ -49,45 +44,33 @@ exposure flag (\code{--LOBXFL}), from the observation date/time (\code{--DTC}), DM domain reference date/time. } \details{ -The methodology and approach implemented in this function are based on -concepts and examples found in the Roche version of the {roak} package. - The derivation is as follows: \itemize{ \item Remove records where the result (\code{--ORRES}) is missing. Also, exclude records with results labeled as "ND" (No Data) or "NOT DONE" in the \code{--ORRES} column, -which indicate that the measurement or observation was not completed. This -step is important even if a previous cleaning step (like the -'oak_clean_not_done' function) might not have been applied to the data yet. +which indicate that the measurement or observation was not completed. \item Remove records where the status (\code{--STAT}) indicates the observation or test was not performed, marked as "NOT DONE". \item Divide the date and time column (\code{--DTC}) and the reference date/time -variable (\code{reference_date_variable}) into separate date and time components. Ignore +variable (\code{ref_var}) into separate date and time components. Ignore any seconds recorded in the time component, focusing only on hours and minutes for further calculations. \item Set partial or missing dates to \code{NA}. \item Set partial or missing times to \code{NA}. -\item Get a list of baseline visits from \verb{Baseline column} -(if it exists) in \code{oak_pkg_env$study_visit_configuration}. -\item Get a list of baseline timepoints from \code{Baseline} column -(if it exists) in \code{oak_pkg_env$timepoint_conf}. \item Filter on rows that have domain and reference dates not equal to -\code{NA}. (Ref: \strong{X}) -\item Filter \strong{X} on rows with domain date prior to (less than) -reference date. (Ref: \strong{A}) -\item Filter \strong{X} on rows with domain date equal to reference date but +\code{NA}. (Ref to as \strong{X}) +\item Filter \strong{X} on rows with domain date (--DTC) prior to (less than) +reference date. (Ref to as \strong{A}) +\item Filter \strong{X} on rows with domain date (--DTC) equal to reference date but domain and reference times not equal to \code{NA} and domain time prior to (less -than) reference time. (Ref: \strong{B}) -\item Filter \strong{X} on rows with domain date equal to reference date but +than) reference time. (Ref to as \strong{B}) +\item Filter \strong{X} on rows with domain date (--DTC) equal to reference date but domain and/or reference time equal to NA and: \itemize{ \item VISIT is in baseline visits list (if it exists) and \item xxTPT is in baseline timepoints list (if it exists). +(Ref to as \strong{C}) } -} - -(Ref: \strong{C}) -\itemize{ \item Combine the rows from \strong{A}, \strong{B}, and \strong{C} to get a data frame of pre-reference date observations. Sort the rows by \code{USUBJID}, \code{--STAT}, and \code{--ORRES}. @@ -96,17 +79,41 @@ that have maximum value from \code{--DTC}. Keep only the oak id variables and \code{--TESTCD} (because these are the unique values). Remove any duplicate rows. Assign the baseline flag variable, \code{--BLFL}, the last observation before exposure flag (\code{--LOBXFL}) variable to these rows. -\item Join the baseline flag onto the input dataset. +\item Join the baseline flag onto the input dataset based on oak id vars } } \examples{ -DM <- read.csv(system.file("derive_blfl/DM.csv", package = "sdtm.oak")) -DM -raw_dataset <- read.csv(system.file("derive_blfl/raw_dataset.csv", package = "sdtm.oak")) -raw_dataset -observed_output <- derive_blfl(raw_dataset = raw_dataset, - DM_dataset = DM, - target_sdtm_variable = "VSBLFL", - reference_date_variable = "RFSTDTC") +dm <- tibble::tribble( +~USUBJID, ~RFSTDTC, ~RFXSTDTC, +"test_study-375", "2020-09-28T10:10", "2020-09-28T10:10", +"test_study-376", "2020-09-21T11:00", "2020-09-21T11:00", +"test_study-377", NA, NA, +"test_study-378", "2020-01-20T10:00", "2020-01-20T10:00", +"test_study-379", NA, NA, +) + +dm + +sdtm_in <- +tibble::tribble( +~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, +"VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, +"VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, +"VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, +"VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, +"VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, +"VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, +"VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, +"VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", #nolint +"VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA +) + +sdtm_in + +observed_output <- derive_blfl(sdtm_in = sdtm_in, + dm_domain = dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC") observed_output + } diff --git a/tests/testthat/_snaps/derive_blfl.md b/tests/testthat/_snaps/derive_blfl.md new file mode 100644 index 00000000..aa275ed4 --- /dev/null +++ b/tests/testthat/_snaps/derive_blfl.md @@ -0,0 +1,107 @@ +# derive_blfl example works + + { + "type": "list", + "attributes": { + "class": { + "type": "character", + "attributes": {}, + "value": ["tbl_df", "tbl", "data.frame"] + }, + "row.names": { + "type": "integer", + "attributes": {}, + "value": [1, 2, 3, 4, 5, 6, 7, 8, 9] + }, + "names": { + "type": "character", + "attributes": {}, + "value": ["DOMAIN", "oak_id", "raw_source", "patient_number", "USUBJID", "VSDTC", "VSTESTCD", "VSORRES", "VSSTAT", "VSLOBXFL"] + } + }, + "value": [ + { + "type": "character", + "attributes": {}, + "value": ["VS", "VS", "VS", "VS", "VS", "VS", "VS", "VS", "VS"] + }, + { + "type": "integer", + "attributes": {}, + "value": [1, 2, 1, 2, 1, 2, 1, 1, 2] + }, + { + "type": "character", + "attributes": {}, + "value": ["VTLS1", "VTLS1", "VTLS1", "VTLS1", "VTLS2", "VTLS2", "VTLS1", "VTLS1", "VTLS1"] + }, + { + "type": "integer", + "attributes": {}, + "value": [375, 375, 375, 375, 375, 375, 376, 376, 376] + }, + { + "type": "character", + "attributes": {}, + "value": ["test_study-375", "test_study-375", "test_study-375", "test_study-375", "test_study-375", "test_study-375", "test_study-376", "test_study-376", "test_study-376"] + }, + { + "type": "character", + "attributes": {}, + "value": ["2020-09-01T13:31", "2020-10-01T11:20", "2020-09-28T10:10", "2020-10-01T13:31", "2020-09-28T10:10", "2020-09-28T10:05", "2020-09-20", "2020-09-20", "2020-09-20"] + }, + { + "type": "character", + "attributes": {}, + "value": ["DIABP", "DIABP", "PULSE", "PULSE", "SYSBP", "SYSBP", "DIABP", "PULSE", "PULSE"] + }, + { + "type": "character", + "attributes": {}, + "value": ["90", "90", "ND", "85", "120", "120", "75", null, "110"] + }, + { + "type": "character", + "attributes": {}, + "value": [null, null, null, null, null, null, null, "NOT DONE", null] + }, + { + "type": "character", + "attributes": {}, + "value": ["Y", null, null, null, null, "Y", "Y", null, "Y"] + } + ] + } + +# derive_blfl sdmt_in validations work + + Required variable `DOMAIN` is missing + +--- + + Required variables `oak_id`, `raw_source` and `patient_number` are missing + +--- + + Required variables `VSORRES`, `VSSTAT`, `VSTESTCD` and `VSDTC` are missing + +# derive_blfl dm_domain validations work + + Required variables `USUBJID` and `RFXSTDTC` are missing + +# derive_blfl tgt_var and ref_var validations work + + `tgt_var` must be a character scalar but is a list + +--- + + `ref_var` must be a character scalar but is a data frame + +--- + + `tgt_var` must be one of 'VSBLFL' or 'VSLOBXFL' but is 'DMLOBXFL' + +# derive_blfl DOMAIN validation works + + `domain` must be a character scalar but is `4` + diff --git a/tests/testthat/test-derive_blfl.R b/tests/testthat/test-derive_blfl.R new file mode 100644 index 00000000..013e89e1 --- /dev/null +++ b/tests/testthat/test-derive_blfl.R @@ -0,0 +1,120 @@ +dta <- function(env = parent.frame()) { + dm <- tibble::tribble( + ~USUBJID, ~RFSTDTC, ~RFXSTDTC, + "test_study-375", "2020-09-28T10:10", "2020-09-28T10:10", + "test_study-376", "2020-09-21T11:00", "2020-09-21T11:00", + "test_study-377", NA, NA, + "test_study-378", "2020-01-20T10:00", "2020-01-20T10:00", + "test_study-379", NA, NA, + ) + + sdtm_in <- + tibble::tribble( + ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, + "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, + "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, + "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, + "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, + "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, + "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, + "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, + "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", #nolint + "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA + ) + + withr::defer({ + rm(d, envir = env) + }, envir = env) + + list(sdtm_in = sdtm_in, dm = dm) +} + +test_that("derive_blfl example works", { + d <- dta() + + observed_output <- derive_blfl(sdtm_in = d$sdtm_in, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC") + observed_output + + expect_snapshot_value(observed_output, style = "json2") +}) + +test_that("derive_blfl sdmt_in validations work", { + d <- dta() + sdmt_in_noDOMAIN <- + d$sdtm_in |> + dplyr::select(-DOMAIN) + + expect_snapshot_error(derive_blfl(sdtm_in = sdmt_in_noDOMAIN, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC")) + + sdmt_in_noIDvars <- + d$sdtm_in |> + dplyr::select(-sdtm.oak:::oak_id_vars()) + + expect_snapshot_error(derive_blfl(sdtm_in = sdmt_in_noIDvars, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC")) + + sdmt_in_noVSvars <- + d$sdtm_in |> + dplyr::select(-c("VSORRES", + "VSSTAT", + "VSTESTCD", + "VSDTC")) + + expect_snapshot_error(derive_blfl(sdtm_in = sdmt_in_noVSvars, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC")) +}) + +test_that("derive_blfl dm_domain validations work", { + d <- dta() + + dm_noVars <- + d$dm |> + dplyr::select(-c(RFXSTDTC, USUBJID)) + + expect_snapshot_error(derive_blfl(sdtm_in = d$sdtm_in, + dm_domain = dm_noVars, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC")) +}) + +test_that("derive_blfl tgt_var and ref_var validations work", { + d <- dta() + + expect_snapshot_error(derive_blfl(sdtm_in = d$sdtm_in, + dm_domain = d$dm, + tgt_var = list("bad"), + ref_var = "RFXSTDTC")) + + expect_snapshot_error(derive_blfl(sdtm_in = d$sdtm_in, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = d$dm)) + + expect_snapshot_error(derive_blfl(sdtm_in = d$sdtm_in, + dm_domain = d$dm, + tgt_var = "DMLOBXFL", + ref_var = "RFXSTDTC")) +}) + +test_that("derive_blfl DOMAIN validation works", { + d <- dta() + + sdtm_in_badDOMAIN <- + d$sdtm_in |> + dplyr::mutate(DOMAIN = 4) + + expect_snapshot_error(derive_blfl(sdtm_in = sdtm_in_badDOMAIN, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC")) +}) From bcc8a37b9eb7ebdd55925bb8de120bb490533ba9 Mon Sep 17 00:00:00 2001 From: Kamil Sijko Date: Mon, 20 May 2024 21:04:56 +0200 Subject: [PATCH 07/20] styler --- R/derive_blfl.R | 143 ++++++++++++++++------------- tests/testthat/test-derive_blfl.R | 147 +++++++++++++++++------------- 2 files changed, 166 insertions(+), 124 deletions(-) diff --git a/R/derive_blfl.R b/R/derive_blfl.R index 25be5fd2..360c0215 100644 --- a/R/derive_blfl.R +++ b/R/derive_blfl.R @@ -14,17 +14,16 @@ #' dtc_datepart( #' c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00") #' ) -#' # |--> c(NA, NA, NA, NA, "2021-12-25", "2021-12-25") +#' # |--> c(NA, NA, NA, NA, "2021-12-25", "2021-12-25") #' #' ## Prevent partial or missing dates from being set to NA #' dtc_datepart( #' c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00"), #' partial_as_na = FALSE #' ) -#' # |--> c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25") +#' # |--> c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25") #' dtc_datepart <- function(dtc, partial_as_na = TRUE) { - # Assert that dtc is a character vector checkmate::assert_character(dtc) @@ -58,21 +57,21 @@ dtc_datepart <- function(dtc, partial_as_na = TRUE) { #' dtc_timepart( #' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59") #' ) -#' # |--> c(NA, NA, NA, NA, "12:30", "12:30") +#' # |--> c(NA, NA, NA, NA, "12:30", "12:30") #' #' ## Prevent partial or missing times from being set to NA #' dtc_timepart( #' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), #' partial_as_na = FALSE #' ) -#' # |--> c(NA, "", "", "12", "12:30", "12:30") +#' # |--> c(NA, "", "", "12", "12:30", "12:30") #' #' ## Do not ignore seconds, partial or missing times set to NA #' dtc_timepart( #' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), #' ignore_seconds = FALSE #' ) -#' # |--> c(NA, NA, NA, NA, NA, "12:30:59") +#' # |--> c(NA, NA, NA, NA, NA, "12:30:59") #' #' ## Do not ignore seconds and prevent partial or missing times from being set to NA #' dtc_timepart( @@ -80,10 +79,9 @@ dtc_datepart <- function(dtc, partial_as_na = TRUE) { #' partial_as_na = FALSE, #' ignore_seconds = FALSE #' ) -#' # |--> c(NA, "", "", "12", "12:30", "12:30:59") +#' # |--> c(NA, "", "", "12", "12:30", "12:30:59") #' dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { - # Assert that dtc is a character vector checkmate::assert_character(dtc) @@ -166,36 +164,38 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' #' @examples #' dm <- tibble::tribble( -#' ~USUBJID, ~RFSTDTC, ~RFXSTDTC, -#' "test_study-375", "2020-09-28T10:10", "2020-09-28T10:10", -#' "test_study-376", "2020-09-21T11:00", "2020-09-21T11:00", -#' "test_study-377", NA, NA, -#' "test_study-378", "2020-01-20T10:00", "2020-01-20T10:00", -#' "test_study-379", NA, NA, +#' ~USUBJID, ~RFSTDTC, ~RFXSTDTC, +#' "test_study-375", "2020-09-28T10:10", "2020-09-28T10:10", +#' "test_study-376", "2020-09-21T11:00", "2020-09-21T11:00", +#' "test_study-377", NA, NA, +#' "test_study-378", "2020-01-20T10:00", "2020-01-20T10:00", +#' "test_study-379", NA, NA, #' ) #' #' dm #' #' sdtm_in <- -#' tibble::tribble( -#' ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, -#' "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, -#' "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, -#' "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, -#' "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, -#' "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, -#' "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, -#' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, -#' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", #nolint -#' "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA -#' ) +#' tibble::tribble( +#' ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, +#' "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, +#' "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, +#' "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, +#' "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, +#' "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, +#' "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, +#' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, +#' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", # nolint +#' "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA +#' ) #' #' sdtm_in #' -#' observed_output <- derive_blfl(sdtm_in = sdtm_in, -#' dm_domain = dm, -#' tgt_var = "VSLOBXFL", -#' ref_var = "RFXSTDTC") +#' observed_output <- derive_blfl( +#' sdtm_in = sdtm_in, +#' dm_domain = dm, +#' tgt_var = "VSLOBXFL", +#' ref_var = "RFXSTDTC" +#' ) #' observed_output #' derive_blfl <- function(sdtm_in, @@ -213,7 +213,7 @@ derive_blfl <- function(sdtm_in, admiraldev::assert_data_frame( sdtm_in, required_vars = rlang::syms(c("DOMAIN", sdtm.oak:::oak_id_vars())) - ) + ) # Assert dm_domain is data.frame admiraldev::assert_data_frame( @@ -228,13 +228,18 @@ derive_blfl <- function(sdtm_in, # Assert that tgt_var is a concatenation of domain and "BLFL" or "LOBXFL" admiraldev::assert_character_scalar(tgt_var, - values = c(paste0(domain, "BLFL"), - paste0(domain, "LOBXFL"))) + values = c( + paste0(domain, "BLFL"), + paste0(domain, "LOBXFL") + ) + ) # Determine domain prefixed columns suffixes <- - c("ORRES", "STAT", "TESTCD", "TPT", "DTC", "CAT", "SCAT", "LOC", "LAT", - "DIR", "METHOD", "SPEC") + c( + "ORRES", "STAT", "TESTCD", "TPT", "DTC", "CAT", "SCAT", "LOC", "LAT", + "DIR", "METHOD", "SPEC" + ) domain_prefixed_names <- paste0(domain, suffixes) |> setNames(tolower(suffixes)) @@ -242,10 +247,13 @@ derive_blfl <- function(sdtm_in, # Assert that the input dataset has a "DTC" column admiraldev::assert_data_frame( sdtm_in, - required_vars = rlang::syms(c(domain_prefixed_names[c("orres", - "stat", - "testcd", - "dtc")]))) + required_vars = rlang::syms(c(domain_prefixed_names[c( + "orres", + "stat", + "testcd", + "dtc" + )])) + ) # End of assertions, work begins ------------------------------------------ # Create copy of input dataset for modification and processing @@ -260,8 +268,10 @@ derive_blfl <- function(sdtm_in, # Filter out rows where --STAT is not equal to "NOT DONE" ds_mod <- ds_mod |> - dplyr::filter(dplyr::if_any(dplyr::any_of(domain_prefixed_names["stat"]), - ~ !.x %in% "NOT DONE")) + dplyr::filter(dplyr::if_any( + dplyr::any_of(domain_prefixed_names["stat"]), + ~ !.x %in% "NOT DONE" + )) if (nrow(ds_mod) == 0L) { stop(paste0( @@ -319,9 +329,11 @@ derive_blfl <- function(sdtm_in, # - domain and reference times not equal to NA and # - domain time prior to reference time # (*B) - ds_subset_eq_1 <- dplyr::filter(ds_subset, dom_dt == ref_dt, - !is.na(dom_tm) & !is.na(ref_tm), - dom_tm < ref_tm) + ds_subset_eq_1 <- dplyr::filter( + ds_subset, dom_dt == ref_dt, + !is.na(dom_tm) & !is.na(ref_tm), + dom_tm < ref_tm + ) # Filter on rows with domain date equal to reference date but # - domain and/or reference time equal to NA and @@ -330,11 +342,13 @@ derive_blfl <- function(sdtm_in, # (*C) ds_subset_eq_2 <- ds_subset |> - dplyr::filter(dom_dt == ref_dt, - is.na(dom_tm) | is.na(ref_tm), - (VISIT %in% baseline_visits & get(domain_prefixed_names["tpt"]) %in% baseline_timepoints) | - (VISIT %in% baseline_visits & length(baseline_timepoints) == 0L) | - (get(domain_prefixed_names["tpt"]) %in% baseline_timepoints & length(baseline_visits) == 0L)) + dplyr::filter( + dom_dt == ref_dt, + is.na(dom_tm) | is.na(ref_tm), + (VISIT %in% baseline_visits & get(domain_prefixed_names["tpt"]) %in% baseline_timepoints) | + (VISIT %in% baseline_visits & length(baseline_timepoints) == 0L) | + (get(domain_prefixed_names["tpt"]) %in% baseline_timepoints & length(baseline_visits) == 0L) + ) # Combine (*A) and (*B) and (*C) ds_base <- rbind(ds_subset_lt, ds_subset_eq_1, ds_subset_eq_2) @@ -354,25 +368,30 @@ derive_blfl <- function(sdtm_in, dplyr::group_by(USUBJID, .data[[domain_prefixed_names["testcd"]]]) |> dplyr::slice_max(!!rlang::sym(domain_prefixed_names["dtc"]), na_rm = TRUE) |> dplyr::ungroup() |> - dplyr::select(dplyr::all_of(c(sdtm.oak:::oak_id_vars(), domain_prefixed_names[["testcd"]])), - dplyr::any_of( - c(domain_prefixed_names[c("cat", - "scat", - "spec", - "loc", - "lat", - "dir", - "method")] - ) - )) |> + dplyr::select( + dplyr::all_of(c(sdtm.oak:::oak_id_vars(), domain_prefixed_names[["testcd"]])), + dplyr::any_of( + c(domain_prefixed_names[c( + "cat", + "scat", + "spec", + "loc", + "lat", + "dir", + "method" + )]) + ) + ) |> dplyr::distinct() # Assign the baseline flag variable ds_blfl[[tgt_var]] <- "Y" # Join baseline flag onto input dataset - ds_out <- dplyr::left_join(sdtm_in, ds_blfl, by = c(domain_prefixed_names[["testcd"]], - sdtm.oak:::oak_id_vars())) + ds_out <- dplyr::left_join(sdtm_in, ds_blfl, by = c( + domain_prefixed_names[["testcd"]], + sdtm.oak:::oak_id_vars() + )) # Assert that merged data frame has same number of rows as input data frame if (nrow(ds_out) != nrow(sdtm_in)) { diff --git a/tests/testthat/test-derive_blfl.R b/tests/testthat/test-derive_blfl.R index 013e89e1..3a388cce 100644 --- a/tests/testthat/test-derive_blfl.R +++ b/tests/testthat/test-derive_blfl.R @@ -1,44 +1,49 @@ dta <- function(env = parent.frame()) { dm <- tibble::tribble( - ~USUBJID, ~RFSTDTC, ~RFXSTDTC, + ~USUBJID, ~RFSTDTC, ~RFXSTDTC, "test_study-375", "2020-09-28T10:10", "2020-09-28T10:10", "test_study-376", "2020-09-21T11:00", "2020-09-21T11:00", - "test_study-377", NA, NA, + "test_study-377", NA, NA, "test_study-378", "2020-01-20T10:00", "2020-01-20T10:00", - "test_study-379", NA, NA, + "test_study-379", NA, NA, ) sdtm_in <- tibble::tribble( - ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, - "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, - "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, - "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, - "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, - "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, - "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, - "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, - "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", #nolint - "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA + ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, + "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, + "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, + "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, + "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, + "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, + "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, + "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, + "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", # nolint + "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA ) - withr::defer({ - rm(d, envir = env) - }, envir = env) + withr::defer( + { + rm(d, envir = env) + }, + envir = env + ) list(sdtm_in = sdtm_in, dm = dm) } test_that("derive_blfl example works", { - d <- dta() + d <- dta() - observed_output <- derive_blfl(sdtm_in = d$sdtm_in, - dm_domain = d$dm, - tgt_var = "VSLOBXFL", - ref_var = "RFXSTDTC") - observed_output + observed_output <- derive_blfl( + sdtm_in = d$sdtm_in, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC" + ) + observed_output - expect_snapshot_value(observed_output, style = "json2") + expect_snapshot_value(observed_output, style = "json2") }) test_that("derive_blfl sdmt_in validations work", { @@ -47,31 +52,39 @@ test_that("derive_blfl sdmt_in validations work", { d$sdtm_in |> dplyr::select(-DOMAIN) - expect_snapshot_error(derive_blfl(sdtm_in = sdmt_in_noDOMAIN, - dm_domain = d$dm, - tgt_var = "VSLOBXFL", - ref_var = "RFXSTDTC")) + expect_snapshot_error(derive_blfl( + sdtm_in = sdmt_in_noDOMAIN, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC" + )) sdmt_in_noIDvars <- d$sdtm_in |> dplyr::select(-sdtm.oak:::oak_id_vars()) - expect_snapshot_error(derive_blfl(sdtm_in = sdmt_in_noIDvars, - dm_domain = d$dm, - tgt_var = "VSLOBXFL", - ref_var = "RFXSTDTC")) + expect_snapshot_error(derive_blfl( + sdtm_in = sdmt_in_noIDvars, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC" + )) sdmt_in_noVSvars <- d$sdtm_in |> - dplyr::select(-c("VSORRES", - "VSSTAT", - "VSTESTCD", - "VSDTC")) - - expect_snapshot_error(derive_blfl(sdtm_in = sdmt_in_noVSvars, - dm_domain = d$dm, - tgt_var = "VSLOBXFL", - ref_var = "RFXSTDTC")) + dplyr::select(-c( + "VSORRES", + "VSSTAT", + "VSTESTCD", + "VSDTC" + )) + + expect_snapshot_error(derive_blfl( + sdtm_in = sdmt_in_noVSvars, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC" + )) }) test_that("derive_blfl dm_domain validations work", { @@ -81,29 +94,37 @@ test_that("derive_blfl dm_domain validations work", { d$dm |> dplyr::select(-c(RFXSTDTC, USUBJID)) - expect_snapshot_error(derive_blfl(sdtm_in = d$sdtm_in, - dm_domain = dm_noVars, - tgt_var = "VSLOBXFL", - ref_var = "RFXSTDTC")) + expect_snapshot_error(derive_blfl( + sdtm_in = d$sdtm_in, + dm_domain = dm_noVars, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC" + )) }) test_that("derive_blfl tgt_var and ref_var validations work", { d <- dta() - expect_snapshot_error(derive_blfl(sdtm_in = d$sdtm_in, - dm_domain = d$dm, - tgt_var = list("bad"), - ref_var = "RFXSTDTC")) - - expect_snapshot_error(derive_blfl(sdtm_in = d$sdtm_in, - dm_domain = d$dm, - tgt_var = "VSLOBXFL", - ref_var = d$dm)) - - expect_snapshot_error(derive_blfl(sdtm_in = d$sdtm_in, - dm_domain = d$dm, - tgt_var = "DMLOBXFL", - ref_var = "RFXSTDTC")) + expect_snapshot_error(derive_blfl( + sdtm_in = d$sdtm_in, + dm_domain = d$dm, + tgt_var = list("bad"), + ref_var = "RFXSTDTC" + )) + + expect_snapshot_error(derive_blfl( + sdtm_in = d$sdtm_in, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = d$dm + )) + + expect_snapshot_error(derive_blfl( + sdtm_in = d$sdtm_in, + dm_domain = d$dm, + tgt_var = "DMLOBXFL", + ref_var = "RFXSTDTC" + )) }) test_that("derive_blfl DOMAIN validation works", { @@ -113,8 +134,10 @@ test_that("derive_blfl DOMAIN validation works", { d$sdtm_in |> dplyr::mutate(DOMAIN = 4) - expect_snapshot_error(derive_blfl(sdtm_in = sdtm_in_badDOMAIN, - dm_domain = d$dm, - tgt_var = "VSLOBXFL", - ref_var = "RFXSTDTC")) + expect_snapshot_error(derive_blfl( + sdtm_in = sdtm_in_badDOMAIN, + dm_domain = d$dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC" + )) }) From 3456c52f46d5070d2242de1ced80c19b3f6e5120 Mon Sep 17 00:00:00 2001 From: Kamil Sijko Date: Mon, 20 May 2024 21:16:45 +0200 Subject: [PATCH 08/20] lintr fixes --- tests/testthat/test-derive_blfl.R | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/testthat/test-derive_blfl.R b/tests/testthat/test-derive_blfl.R index 3a388cce..46f7745d 100644 --- a/tests/testthat/test-derive_blfl.R +++ b/tests/testthat/test-derive_blfl.R @@ -5,7 +5,7 @@ dta <- function(env = parent.frame()) { "test_study-376", "2020-09-21T11:00", "2020-09-21T11:00", "test_study-377", NA, NA, "test_study-378", "2020-01-20T10:00", "2020-01-20T10:00", - "test_study-379", NA, NA, + "test_study-379", NA, NA ) sdtm_in <- @@ -48,29 +48,29 @@ test_that("derive_blfl example works", { test_that("derive_blfl sdmt_in validations work", { d <- dta() - sdmt_in_noDOMAIN <- + sdmt_in_no_domain <- d$sdtm_in |> dplyr::select(-DOMAIN) expect_snapshot_error(derive_blfl( - sdtm_in = sdmt_in_noDOMAIN, + sdtm_in = sdmt_in_no_domain, dm_domain = d$dm, tgt_var = "VSLOBXFL", ref_var = "RFXSTDTC" )) - sdmt_in_noIDvars <- + sdmt_in_no_id_vars <- d$sdtm_in |> dplyr::select(-sdtm.oak:::oak_id_vars()) expect_snapshot_error(derive_blfl( - sdtm_in = sdmt_in_noIDvars, + sdtm_in = sdmt_in_no_id_vars, dm_domain = d$dm, tgt_var = "VSLOBXFL", ref_var = "RFXSTDTC" )) - sdmt_in_noVSvars <- + sdmt_in_no_vs_vars <- d$sdtm_in |> dplyr::select(-c( "VSORRES", @@ -80,7 +80,7 @@ test_that("derive_blfl sdmt_in validations work", { )) expect_snapshot_error(derive_blfl( - sdtm_in = sdmt_in_noVSvars, + sdtm_in = sdmt_in_no_vs_vars, dm_domain = d$dm, tgt_var = "VSLOBXFL", ref_var = "RFXSTDTC" @@ -90,13 +90,13 @@ test_that("derive_blfl sdmt_in validations work", { test_that("derive_blfl dm_domain validations work", { d <- dta() - dm_noVars <- + dm_no_vars <- d$dm |> dplyr::select(-c(RFXSTDTC, USUBJID)) expect_snapshot_error(derive_blfl( sdtm_in = d$sdtm_in, - dm_domain = dm_noVars, + dm_domain = dm_no_vars, tgt_var = "VSLOBXFL", ref_var = "RFXSTDTC" )) @@ -130,12 +130,12 @@ test_that("derive_blfl tgt_var and ref_var validations work", { test_that("derive_blfl DOMAIN validation works", { d <- dta() - sdtm_in_badDOMAIN <- + sdtm_in_bad_domain <- d$sdtm_in |> - dplyr::mutate(DOMAIN = 4) + dplyr::mutate(DOMAIN = 4L) expect_snapshot_error(derive_blfl( - sdtm_in = sdtm_in_badDOMAIN, + sdtm_in = sdtm_in_bad_domain, dm_domain = d$dm, tgt_var = "VSLOBXFL", ref_var = "RFXSTDTC" From c92ae502d931c8c73180291f17ba035bff19e32f Mon Sep 17 00:00:00 2001 From: Rammprasad Ganapathy Date: Tue, 11 Jun 2024 19:51:26 +0000 Subject: [PATCH 09/20] Fixed baseline visit feature and added a test case --- R/derive_blfl.R | 31 +++++++++++++++------------- tests/testthat/_snaps/derive_blfl.md | 29 +++++++++++++++----------- tests/testthat/test-derive_blfl.R | 25 ++++++++++++---------- 3 files changed, 48 insertions(+), 37 deletions(-) diff --git a/R/derive_blfl.R b/R/derive_blfl.R index 360c0215..da3aa505 100644 --- a/R/derive_blfl.R +++ b/R/derive_blfl.R @@ -152,7 +152,7 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' @param baseline_visits A character vector specifying the baseline visits within the study. #' These visits are identified as critical points for data collection at the start of the study, #' before any intervention is applied. This allows the function to assign the baseline -#' flag if thre --DTC matches to the reference date. +#' flag if the --DTC matches to the reference date. #' @param baseline_timepoints A character vector of timpoints values in --TPT that specifies #' the specific timepoints during the baseline visits when key assessments or measurements were taken. #' This allows the function to assign the baseline flag if the --DTC matches to the reference date. @@ -176,16 +176,18 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' #' sdtm_in <- #' tibble::tribble( -#' ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, -#' "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, -#' "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, -#' "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, -#' "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, -#' "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, -#' "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, -#' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, -#' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", # nolint -#' "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA +#' ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, ~VISIT, +#' "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, "SCREENING", +#' "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, "SCREENING", +#' "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, "SCREENING", +#' "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, "SCREENING", +#' "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, "SCREENING", +#' "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, "SCREENING", +#' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, "SCREENING", +#' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", "SCREENING", # nolint +#' "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA, "SCREENING", +#' "VS", 2L, "VTLS1", 378L, "test_study-378", "2020-01-20T10:00", "PULSE", "110", NA, "SCREENING", +#' "VS", 3L, "VTLS1", 378L, "test_study-378", "2020-01-21T11:00", "PULSE", "105", NA, "SCREENING" #' ) #' #' sdtm_in @@ -194,7 +196,8 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' sdtm_in = sdtm_in, #' dm_domain = dm, #' tgt_var = "VSLOBXFL", -#' ref_var = "RFXSTDTC" +#' ref_var = "RFXSTDTC", +#' baseline_visits = c("SCREENING") #' ) #' observed_output #' @@ -281,7 +284,7 @@ derive_blfl <- function(sdtm_in, } # Checking for columns of interest - con_col <- c(domain_prefixed_names[c("testcd", "dtc", "var_tpt")], "VISITNUM") + con_col <- c(domain_prefixed_names[c("testcd", "dtc", "var_tpt")], "VISIT") # Drop those columns from the list which are not present in ds_in con_col <- con_col[con_col %in% names(sdtm_in)] @@ -344,7 +347,7 @@ derive_blfl <- function(sdtm_in, ds_subset |> dplyr::filter( dom_dt == ref_dt, - is.na(dom_tm) | is.na(ref_tm), + is.na(dom_tm) | is.na(ref_tm) | dom_tm == ref_tm, (VISIT %in% baseline_visits & get(domain_prefixed_names["tpt"]) %in% baseline_timepoints) | (VISIT %in% baseline_visits & length(baseline_timepoints) == 0L) | (get(domain_prefixed_names["tpt"]) %in% baseline_timepoints & length(baseline_visits) == 0L) diff --git a/tests/testthat/_snaps/derive_blfl.md b/tests/testthat/_snaps/derive_blfl.md index aa275ed4..606a22b3 100644 --- a/tests/testthat/_snaps/derive_blfl.md +++ b/tests/testthat/_snaps/derive_blfl.md @@ -11,64 +11,69 @@ "row.names": { "type": "integer", "attributes": {}, - "value": [1, 2, 3, 4, 5, 6, 7, 8, 9] + "value": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] }, "names": { "type": "character", "attributes": {}, - "value": ["DOMAIN", "oak_id", "raw_source", "patient_number", "USUBJID", "VSDTC", "VSTESTCD", "VSORRES", "VSSTAT", "VSLOBXFL"] + "value": ["DOMAIN", "oak_id", "raw_source", "patient_number", "USUBJID", "VSDTC", "VSTESTCD", "VSORRES", "VSSTAT", "VISIT", "VSLOBXFL"] } }, "value": [ { "type": "character", "attributes": {}, - "value": ["VS", "VS", "VS", "VS", "VS", "VS", "VS", "VS", "VS"] + "value": ["VS", "VS", "VS", "VS", "VS", "VS", "VS", "VS", "VS", "VS", "VS"] }, { "type": "integer", "attributes": {}, - "value": [1, 2, 1, 2, 1, 2, 1, 1, 2] + "value": [1, 2, 1, 2, 1, 2, 1, 1, 2, 2, 3] }, { "type": "character", "attributes": {}, - "value": ["VTLS1", "VTLS1", "VTLS1", "VTLS1", "VTLS2", "VTLS2", "VTLS1", "VTLS1", "VTLS1"] + "value": ["VTLS1", "VTLS1", "VTLS1", "VTLS1", "VTLS2", "VTLS2", "VTLS1", "VTLS1", "VTLS1", "VTLS1", "VTLS1"] }, { "type": "integer", "attributes": {}, - "value": [375, 375, 375, 375, 375, 375, 376, 376, 376] + "value": [375, 375, 375, 375, 375, 375, 376, 376, 376, 378, 378] }, { "type": "character", "attributes": {}, - "value": ["test_study-375", "test_study-375", "test_study-375", "test_study-375", "test_study-375", "test_study-375", "test_study-376", "test_study-376", "test_study-376"] + "value": ["test_study-375", "test_study-375", "test_study-375", "test_study-375", "test_study-375", "test_study-375", "test_study-376", "test_study-376", "test_study-376", "test_study-378", "test_study-378"] }, { "type": "character", "attributes": {}, - "value": ["2020-09-01T13:31", "2020-10-01T11:20", "2020-09-28T10:10", "2020-10-01T13:31", "2020-09-28T10:10", "2020-09-28T10:05", "2020-09-20", "2020-09-20", "2020-09-20"] + "value": ["2020-09-01T13:31", "2020-10-01T11:20", "2020-09-28T10:10", "2020-10-01T13:31", "2020-09-28T10:10", "2020-09-28T10:05", "2020-09-20", "2020-09-20", "2020-09-20", "2020-01-20T10:00", "2020-01-21T11:00"] }, { "type": "character", "attributes": {}, - "value": ["DIABP", "DIABP", "PULSE", "PULSE", "SYSBP", "SYSBP", "DIABP", "PULSE", "PULSE"] + "value": ["DIABP", "DIABP", "PULSE", "PULSE", "SYSBP", "SYSBP", "DIABP", "PULSE", "PULSE", "PULSE", "PULSE"] }, { "type": "character", "attributes": {}, - "value": ["90", "90", "ND", "85", "120", "120", "75", null, "110"] + "value": ["90", "90", "ND", "85", "120", "120", "75", null, "110", "110", "105"] }, { "type": "character", "attributes": {}, - "value": [null, null, null, null, null, null, null, "NOT DONE", null] + "value": [null, null, null, null, null, null, null, "NOT DONE", null, null, null] }, { "type": "character", "attributes": {}, - "value": ["Y", null, null, null, null, "Y", "Y", null, "Y"] + "value": ["SCREENING", "SCREENING", "SCREENING", "SCREENING", "SCREENING", "SCREENING", "SCREENING", "SCREENING", "SCREENING", "SCREENING", "SCREENING"] + }, + { + "type": "character", + "attributes": {}, + "value": ["Y", null, null, null, "Y", null, "Y", null, "Y", "Y", null] } ] } diff --git a/tests/testthat/test-derive_blfl.R b/tests/testthat/test-derive_blfl.R index 46f7745d..4aa3ea14 100644 --- a/tests/testthat/test-derive_blfl.R +++ b/tests/testthat/test-derive_blfl.R @@ -10,16 +10,18 @@ dta <- function(env = parent.frame()) { sdtm_in <- tibble::tribble( - ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, - "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, - "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, - "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, - "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, - "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, - "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, - "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, - "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", # nolint - "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA + ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, ~VISIT, + "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, "SCREENING", + "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, "SCREENING", + "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, "SCREENING", + "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, "SCREENING", + "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, "SCREENING", + "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, "SCREENING", + "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, "SCREENING", + "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", "SCREENING", # nolint + "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA, "SCREENING", + "VS", 2L, "VTLS1", 378L, "test_study-378", "2020-01-20T10:00", "PULSE", "110", NA, "SCREENING", + "VS", 3L, "VTLS1", 378L, "test_study-378", "2020-01-21T11:00", "PULSE", "105", NA, "SCREENING" ) withr::defer( @@ -39,7 +41,8 @@ test_that("derive_blfl example works", { sdtm_in = d$sdtm_in, dm_domain = d$dm, tgt_var = "VSLOBXFL", - ref_var = "RFXSTDTC" + ref_var = "RFXSTDTC", + baseline_visits = c("SCREENING") ) observed_output From a4fa80b0d44e0a2b81c56bb5a308108933a454d0 Mon Sep 17 00:00:00 2001 From: Rammprasad Ganapathy Date: Tue, 18 Jun 2024 05:58:00 +0000 Subject: [PATCH 10/20] Run Styler --- R/derive_blfl.R | 6 +++--- tests/testthat/test-derive_blfl.R | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/derive_blfl.R b/R/derive_blfl.R index da3aa505..8e3fa521 100644 --- a/R/derive_blfl.R +++ b/R/derive_blfl.R @@ -176,7 +176,7 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' #' sdtm_in <- #' tibble::tribble( -#' ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, ~VISIT, +#' ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, ~VISIT, #' "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, "SCREENING", #' "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, "SCREENING", #' "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, "SCREENING", @@ -184,8 +184,8 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, "SCREENING", #' "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, "SCREENING", #' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, "SCREENING", -#' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", "SCREENING", # nolint -#' "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA, "SCREENING", +#' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", "SCREENING", # nolint +#' "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA, "SCREENING", #' "VS", 2L, "VTLS1", 378L, "test_study-378", "2020-01-20T10:00", "PULSE", "110", NA, "SCREENING", #' "VS", 3L, "VTLS1", 378L, "test_study-378", "2020-01-21T11:00", "PULSE", "105", NA, "SCREENING" #' ) diff --git a/tests/testthat/test-derive_blfl.R b/tests/testthat/test-derive_blfl.R index 4aa3ea14..468424b7 100644 --- a/tests/testthat/test-derive_blfl.R +++ b/tests/testthat/test-derive_blfl.R @@ -18,8 +18,8 @@ dta <- function(env = parent.frame()) { "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, "SCREENING", "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, "SCREENING", "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, "SCREENING", - "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", "SCREENING", # nolint - "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA, "SCREENING", + "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", "SCREENING", # nolint + "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA, "SCREENING", "VS", 2L, "VTLS1", 378L, "test_study-378", "2020-01-20T10:00", "PULSE", "110", NA, "SCREENING", "VS", 3L, "VTLS1", 378L, "test_study-378", "2020-01-21T11:00", "PULSE", "105", NA, "SCREENING" ) From a86e4a92b3a6c7ab6ae3e0528c49d1c6032a2871 Mon Sep 17 00:00:00 2001 From: Rammprasad Ganapathy Date: Tue, 18 Jun 2024 18:20:05 +0000 Subject: [PATCH 11/20] Fix pipeline failures --- .lintr | 4 +++- R/derive_blfl.R | 4 ++-- man/derive_blfl.Rd | 51 +++++++++++++++++++++++++-------------------- man/dtc_datepart.Rd | 4 ++-- man/dtc_timepart.Rd | 8 +++---- 5 files changed, 39 insertions(+), 32 deletions(-) diff --git a/.lintr b/.lintr index feb3a253..05e507a1 100644 --- a/.lintr +++ b/.lintr @@ -23,6 +23,8 @@ linters: linters_with_defaults( redundant_ifelse_linter(), sprintf_linter(), strings_as_factors_linter(), - undesirable_function_linter(c(Sys.setenv = NA_character_, mapply = NA_character_)) + undesirable_function_linter(c(Sys.setenv = NA_character_, mapply = NA_character_)), + object_name_linter = NULL, + object_usage_linter = NULL ) encoding: "UTF-8" diff --git a/R/derive_blfl.R b/R/derive_blfl.R index 8e3fa521..94003c25 100644 --- a/R/derive_blfl.R +++ b/R/derive_blfl.R @@ -25,7 +25,7 @@ #' dtc_datepart <- function(dtc, partial_as_na = TRUE) { # Assert that dtc is a character vector - checkmate::assert_character(dtc) + admiraldev::assert_character_vector(dtc) # Extract date part from ISO 8601 date/time variable dt <- sub("^([^T]+).*", "\\1", dtc) @@ -83,7 +83,7 @@ dtc_datepart <- function(dtc, partial_as_na = TRUE) { #' dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { # Assert that dtc is a character vector - checkmate::assert_character(dtc) + admiraldev::assert_character_vector(dtc) # Determine length of time part depending on ignore_seconds parameter tm_length <- ifelse(ignore_seconds, 5L, 8L) diff --git a/man/derive_blfl.Rd b/man/derive_blfl.Rd index fc9353ae..ec3be0f0 100644 --- a/man/derive_blfl.Rd +++ b/man/derive_blfl.Rd @@ -28,7 +28,7 @@ include "RFSTDTC" (the date/time of the first study treatment) or \item{baseline_visits}{A character vector specifying the baseline visits within the study. These visits are identified as critical points for data collection at the start of the study, before any intervention is applied. This allows the function to assign the baseline -flag if thre --DTC matches to the reference date.} +flag if the --DTC matches to the reference date.} \item{baseline_timepoints}{A character vector of timpoints values in --TPT that specifies the specific timepoints during the baseline visits when key assessments or measurements were taken. @@ -84,36 +84,41 @@ exposure flag (\code{--LOBXFL}) variable to these rows. } \examples{ dm <- tibble::tribble( -~USUBJID, ~RFSTDTC, ~RFXSTDTC, -"test_study-375", "2020-09-28T10:10", "2020-09-28T10:10", -"test_study-376", "2020-09-21T11:00", "2020-09-21T11:00", -"test_study-377", NA, NA, -"test_study-378", "2020-01-20T10:00", "2020-01-20T10:00", -"test_study-379", NA, NA, + ~USUBJID, ~RFSTDTC, ~RFXSTDTC, + "test_study-375", "2020-09-28T10:10", "2020-09-28T10:10", + "test_study-376", "2020-09-21T11:00", "2020-09-21T11:00", + "test_study-377", NA, NA, + "test_study-378", "2020-01-20T10:00", "2020-01-20T10:00", + "test_study-379", NA, NA, ) dm sdtm_in <- -tibble::tribble( -~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, -"VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, -"VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, -"VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, -"VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, -"VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, -"VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, -"VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, -"VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", #nolint -"VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA -) + tibble::tribble( + ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, ~VISIT, + "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, "SCREENING", + "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, "SCREENING", + "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, "SCREENING", + "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, "SCREENING", + "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, "SCREENING", + "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, "SCREENING", + "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, "SCREENING", + "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", "SCREENING", # nolint + "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA, "SCREENING", + "VS", 2L, "VTLS1", 378L, "test_study-378", "2020-01-20T10:00", "PULSE", "110", NA, "SCREENING", + "VS", 3L, "VTLS1", 378L, "test_study-378", "2020-01-21T11:00", "PULSE", "105", NA, "SCREENING" + ) sdtm_in -observed_output <- derive_blfl(sdtm_in = sdtm_in, - dm_domain = dm, - tgt_var = "VSLOBXFL", - ref_var = "RFXSTDTC") +observed_output <- derive_blfl( + sdtm_in = sdtm_in, + dm_domain = dm, + tgt_var = "VSLOBXFL", + ref_var = "RFXSTDTC", + baseline_visits = c("SCREENING") +) observed_output } diff --git a/man/dtc_datepart.Rd b/man/dtc_datepart.Rd index 543d6ff1..e17ba977 100644 --- a/man/dtc_datepart.Rd +++ b/man/dtc_datepart.Rd @@ -24,13 +24,13 @@ By default, partial or missing dates are set to NA. dtc_datepart( c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00") ) - # |--> c(NA, NA, NA, NA, "2021-12-25", "2021-12-25") +# |--> c(NA, NA, NA, NA, "2021-12-25", "2021-12-25") ## Prevent partial or missing dates from being set to NA dtc_datepart( c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00"), partial_as_na = FALSE ) - # |--> c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25") +# |--> c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25") } diff --git a/man/dtc_timepart.Rd b/man/dtc_timepart.Rd index cb821ed2..46c1a88f 100644 --- a/man/dtc_timepart.Rd +++ b/man/dtc_timepart.Rd @@ -28,21 +28,21 @@ and not extracted. dtc_timepart( c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59") ) - # |--> c(NA, NA, NA, NA, "12:30", "12:30") +# |--> c(NA, NA, NA, NA, "12:30", "12:30") ## Prevent partial or missing times from being set to NA dtc_timepart( c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), partial_as_na = FALSE ) - # |--> c(NA, "", "", "12", "12:30", "12:30") +# |--> c(NA, "", "", "12", "12:30", "12:30") ## Do not ignore seconds, partial or missing times set to NA dtc_timepart( c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), ignore_seconds = FALSE ) - # |--> c(NA, NA, NA, NA, NA, "12:30:59") +# |--> c(NA, NA, NA, NA, NA, "12:30:59") ## Do not ignore seconds and prevent partial or missing times from being set to NA dtc_timepart( @@ -50,6 +50,6 @@ dtc_timepart( partial_as_na = FALSE, ignore_seconds = FALSE ) - # |--> c(NA, "", "", "12", "12:30", "12:30:59") +# |--> c(NA, "", "", "12", "12:30", "12:30:59") } From 3e9782ec9ec1b162bbb9d0a431e1cbcda5354af8 Mon Sep 17 00:00:00 2001 From: Rammprasad Ganapathy Date: Tue, 18 Jun 2024 21:27:29 +0000 Subject: [PATCH 12/20] Fix pipeline failure --- R/derive_blfl.R | 138 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 123 insertions(+), 15 deletions(-) diff --git a/R/derive_blfl.R b/R/derive_blfl.R index 94003c25..3d413859 100644 --- a/R/derive_blfl.R +++ b/R/derive_blfl.R @@ -174,21 +174,129 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' #' dm #' -#' sdtm_in <- -#' tibble::tribble( -#' ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, ~VISIT, -#' "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, "SCREENING", -#' "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, "SCREENING", -#' "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, "SCREENING", -#' "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, "SCREENING", -#' "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, "SCREENING", -#' "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, "SCREENING", -#' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, "SCREENING", -#' "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", "SCREENING", # nolint -#' "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA, "SCREENING", -#' "VS", 2L, "VTLS1", 378L, "test_study-378", "2020-01-20T10:00", "PULSE", "110", NA, "SCREENING", -#' "VS", 3L, "VTLS1", 378L, "test_study-378", "2020-01-21T11:00", "PULSE", "105", NA, "SCREENING" -#' ) +#' sdtm_in <- +#' tibble::tribble( +#' ~ DOMAIN, +#' ~ oak_id, +#' ~ raw_source, +#' ~ patient_number, +#' ~ USUBJID, +#' ~ VSDTC, +#' ~ VSTESTCD, +#' ~ VSORRES, +#' ~ VSSTAT, +#' ~ VISIT, +#' "VS", +#' 1L, +#' "VTLS1", +#' 375L, +#' "test_study-375", +#' "2020-09-01T13:31", +#' "DIABP", +#' "90", +#' NA, +#' "SCREENING", +#' "VS", +#' 2L, +#' "VTLS1", +#' 375L, +#' "test_study-375", +#' "2020-10-01T11:20", +#' "DIABP", +#' "90", +#' NA, +#' "SCREENING", +#' "VS", +#' 1L, +#' "VTLS1", +#' 375L, +#' "test_study-375", +#' "2020-09-28T10:10", +#' "PULSE", +#' "ND", +#' NA, +#' "SCREENING", +#' "VS", +#' 2L, +#' "VTLS1", +#' 375L, +#' "test_study-375", +#' "2020-10-01T13:31", +#' "PULSE", +#' "85", +#' NA, +#' "SCREENING", +#' "VS", +#' 1L, +#' "VTLS2", +#' 375L, +#' "test_study-375", +#' "2020-09-28T10:10", +#' "SYSBP", +#' "120", +#' NA, +#' "SCREENING", +#' "VS", +#' 2L, +#' "VTLS2", +#' 375L, +#' "test_study-375", +#' "2020-09-28T10:05", +#' "SYSBP", +#' "120", +#' NA, +#' "SCREENING", +#' "VS", +#' 1L, +#' "VTLS1", +#' 376L, +#' "test_study-376", +#' "2020-09-20", +#' "DIABP", +#' "75", +#' NA, +#' "SCREENING", +#' "VS", +#' 1L, +#' "VTLS1", +#' 376L, +#' "test_study-376", +#' "2020-09-20", +#' "PULSE", +#' NA, +#' "NOT DONE", +#' "SCREENING", +#' "VS", +#' 2L, +#' "VTLS1", +#' 376L, +#' "test_study-376", +#' "2020-09-20", +#' "PULSE", +#' "110", +#' NA, +#' "SCREENING", +#' "VS", +#' 2L, +#' "VTLS1", +#' 378L, +#' "test_study-378", +#' "2020-01-20T10:00", +#' "PULSE", +#' "110", +#' NA, +#' "SCREENING", +#' "VS", +#' 3L, +#' "VTLS1", +#' 378L, +#' "test_study-378", +#' "2020-01-21T11:00", +#' "PULSE", +#' "105", +#' NA, +#' "SCREENING" +#' ) #' #' sdtm_in #' From 6054a59de2a845518adf6ddf874e3d4020e3e2d2 Mon Sep 17 00:00:00 2001 From: Rammprasad Ganapathy Date: Tue, 18 Jun 2024 21:33:56 +0000 Subject: [PATCH 13/20] Fix style --- R/derive_blfl.R | 246 ++++++++++++++++++++++++------------------------ 1 file changed, 123 insertions(+), 123 deletions(-) diff --git a/R/derive_blfl.R b/R/derive_blfl.R index 3d413859..e01ff2e1 100644 --- a/R/derive_blfl.R +++ b/R/derive_blfl.R @@ -174,129 +174,129 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' #' dm #' -#' sdtm_in <- -#' tibble::tribble( -#' ~ DOMAIN, -#' ~ oak_id, -#' ~ raw_source, -#' ~ patient_number, -#' ~ USUBJID, -#' ~ VSDTC, -#' ~ VSTESTCD, -#' ~ VSORRES, -#' ~ VSSTAT, -#' ~ VISIT, -#' "VS", -#' 1L, -#' "VTLS1", -#' 375L, -#' "test_study-375", -#' "2020-09-01T13:31", -#' "DIABP", -#' "90", -#' NA, -#' "SCREENING", -#' "VS", -#' 2L, -#' "VTLS1", -#' 375L, -#' "test_study-375", -#' "2020-10-01T11:20", -#' "DIABP", -#' "90", -#' NA, -#' "SCREENING", -#' "VS", -#' 1L, -#' "VTLS1", -#' 375L, -#' "test_study-375", -#' "2020-09-28T10:10", -#' "PULSE", -#' "ND", -#' NA, -#' "SCREENING", -#' "VS", -#' 2L, -#' "VTLS1", -#' 375L, -#' "test_study-375", -#' "2020-10-01T13:31", -#' "PULSE", -#' "85", -#' NA, -#' "SCREENING", -#' "VS", -#' 1L, -#' "VTLS2", -#' 375L, -#' "test_study-375", -#' "2020-09-28T10:10", -#' "SYSBP", -#' "120", -#' NA, -#' "SCREENING", -#' "VS", -#' 2L, -#' "VTLS2", -#' 375L, -#' "test_study-375", -#' "2020-09-28T10:05", -#' "SYSBP", -#' "120", -#' NA, -#' "SCREENING", -#' "VS", -#' 1L, -#' "VTLS1", -#' 376L, -#' "test_study-376", -#' "2020-09-20", -#' "DIABP", -#' "75", -#' NA, -#' "SCREENING", -#' "VS", -#' 1L, -#' "VTLS1", -#' 376L, -#' "test_study-376", -#' "2020-09-20", -#' "PULSE", -#' NA, -#' "NOT DONE", -#' "SCREENING", -#' "VS", -#' 2L, -#' "VTLS1", -#' 376L, -#' "test_study-376", -#' "2020-09-20", -#' "PULSE", -#' "110", -#' NA, -#' "SCREENING", -#' "VS", -#' 2L, -#' "VTLS1", -#' 378L, -#' "test_study-378", -#' "2020-01-20T10:00", -#' "PULSE", -#' "110", -#' NA, -#' "SCREENING", -#' "VS", -#' 3L, -#' "VTLS1", -#' 378L, -#' "test_study-378", -#' "2020-01-21T11:00", -#' "PULSE", -#' "105", -#' NA, -#' "SCREENING" -#' ) +#' sdtm_in <- +#' tibble::tribble( +#' ~DOMAIN, +#' ~oak_id, +#' ~raw_source, +#' ~patient_number, +#' ~USUBJID, +#' ~VSDTC, +#' ~VSTESTCD, +#' ~VSORRES, +#' ~VSSTAT, +#' ~VISIT, +#' "VS", +#' 1L, +#' "VTLS1", +#' 375L, +#' "test_study-375", +#' "2020-09-01T13:31", +#' "DIABP", +#' "90", +#' NA, +#' "SCREENING", +#' "VS", +#' 2L, +#' "VTLS1", +#' 375L, +#' "test_study-375", +#' "2020-10-01T11:20", +#' "DIABP", +#' "90", +#' NA, +#' "SCREENING", +#' "VS", +#' 1L, +#' "VTLS1", +#' 375L, +#' "test_study-375", +#' "2020-09-28T10:10", +#' "PULSE", +#' "ND", +#' NA, +#' "SCREENING", +#' "VS", +#' 2L, +#' "VTLS1", +#' 375L, +#' "test_study-375", +#' "2020-10-01T13:31", +#' "PULSE", +#' "85", +#' NA, +#' "SCREENING", +#' "VS", +#' 1L, +#' "VTLS2", +#' 375L, +#' "test_study-375", +#' "2020-09-28T10:10", +#' "SYSBP", +#' "120", +#' NA, +#' "SCREENING", +#' "VS", +#' 2L, +#' "VTLS2", +#' 375L, +#' "test_study-375", +#' "2020-09-28T10:05", +#' "SYSBP", +#' "120", +#' NA, +#' "SCREENING", +#' "VS", +#' 1L, +#' "VTLS1", +#' 376L, +#' "test_study-376", +#' "2020-09-20", +#' "DIABP", +#' "75", +#' NA, +#' "SCREENING", +#' "VS", +#' 1L, +#' "VTLS1", +#' 376L, +#' "test_study-376", +#' "2020-09-20", +#' "PULSE", +#' NA, +#' "NOT DONE", +#' "SCREENING", +#' "VS", +#' 2L, +#' "VTLS1", +#' 376L, +#' "test_study-376", +#' "2020-09-20", +#' "PULSE", +#' "110", +#' NA, +#' "SCREENING", +#' "VS", +#' 2L, +#' "VTLS1", +#' 378L, +#' "test_study-378", +#' "2020-01-20T10:00", +#' "PULSE", +#' "110", +#' NA, +#' "SCREENING", +#' "VS", +#' 3L, +#' "VTLS1", +#' 378L, +#' "test_study-378", +#' "2020-01-21T11:00", +#' "PULSE", +#' "105", +#' NA, +#' "SCREENING" +#' ) #' #' sdtm_in #' From d9d2e5eab15eac8c17b5d6cfe27ad38de732e8d6 Mon Sep 17 00:00:00 2001 From: Rammprasad Ganapathy Date: Tue, 18 Jun 2024 21:45:46 +0000 Subject: [PATCH 14/20] Fix pipeline failure --- man/derive_blfl.Rd | 132 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 120 insertions(+), 12 deletions(-) diff --git a/man/derive_blfl.Rd b/man/derive_blfl.Rd index ec3be0f0..50af8410 100644 --- a/man/derive_blfl.Rd +++ b/man/derive_blfl.Rd @@ -96,18 +96,126 @@ dm sdtm_in <- tibble::tribble( - ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, ~VISIT, - "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, "SCREENING", - "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, "SCREENING", - "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, "SCREENING", - "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, "SCREENING", - "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, "SCREENING", - "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, "SCREENING", - "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, "SCREENING", - "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", "SCREENING", # nolint - "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA, "SCREENING", - "VS", 2L, "VTLS1", 378L, "test_study-378", "2020-01-20T10:00", "PULSE", "110", NA, "SCREENING", - "VS", 3L, "VTLS1", 378L, "test_study-378", "2020-01-21T11:00", "PULSE", "105", NA, "SCREENING" + ~DOMAIN, + ~oak_id, + ~raw_source, + ~patient_number, + ~USUBJID, + ~VSDTC, + ~VSTESTCD, + ~VSORRES, + ~VSSTAT, + ~VISIT, + "VS", + 1L, + "VTLS1", + 375L, + "test_study-375", + "2020-09-01T13:31", + "DIABP", + "90", + NA, + "SCREENING", + "VS", + 2L, + "VTLS1", + 375L, + "test_study-375", + "2020-10-01T11:20", + "DIABP", + "90", + NA, + "SCREENING", + "VS", + 1L, + "VTLS1", + 375L, + "test_study-375", + "2020-09-28T10:10", + "PULSE", + "ND", + NA, + "SCREENING", + "VS", + 2L, + "VTLS1", + 375L, + "test_study-375", + "2020-10-01T13:31", + "PULSE", + "85", + NA, + "SCREENING", + "VS", + 1L, + "VTLS2", + 375L, + "test_study-375", + "2020-09-28T10:10", + "SYSBP", + "120", + NA, + "SCREENING", + "VS", + 2L, + "VTLS2", + 375L, + "test_study-375", + "2020-09-28T10:05", + "SYSBP", + "120", + NA, + "SCREENING", + "VS", + 1L, + "VTLS1", + 376L, + "test_study-376", + "2020-09-20", + "DIABP", + "75", + NA, + "SCREENING", + "VS", + 1L, + "VTLS1", + 376L, + "test_study-376", + "2020-09-20", + "PULSE", + NA, + "NOT DONE", + "SCREENING", + "VS", + 2L, + "VTLS1", + 376L, + "test_study-376", + "2020-09-20", + "PULSE", + "110", + NA, + "SCREENING", + "VS", + 2L, + "VTLS1", + 378L, + "test_study-378", + "2020-01-20T10:00", + "PULSE", + "110", + NA, + "SCREENING", + "VS", + 3L, + "VTLS1", + 378L, + "test_study-378", + "2020-01-21T11:00", + "PULSE", + "105", + NA, + "SCREENING" ) sdtm_in From 2b1a9c0bb02babefc27e01649997dfac3a0d959d Mon Sep 17 00:00:00 2001 From: Rammprasad Ganapathy Date: Tue, 18 Jun 2024 22:29:49 +0000 Subject: [PATCH 15/20] fix pipeline failures --- NAMESPACE | 2 ++ R/derive_blfl.R | 20 ++++++++++---------- man/dtc_datepart.Rd | 1 - man/dtc_timepart.Rd | 1 - 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index af491170..6cc59b6e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,8 @@ export(derive_blfl) export(derive_seq) export(derive_study_day) export(domain_example) +export(dtc_datepart) +export(dtc_timepart) export(fmt_cmp) export(hardcode_ct) export(hardcode_no_ct) diff --git a/R/derive_blfl.R b/R/derive_blfl.R index e01ff2e1..94d2ca6d 100644 --- a/R/derive_blfl.R +++ b/R/derive_blfl.R @@ -22,7 +22,7 @@ #' partial_as_na = FALSE #' ) #' # |--> c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25") -#' +#' @export dtc_datepart <- function(dtc, partial_as_na = TRUE) { # Assert that dtc is a character vector admiraldev::assert_character_vector(dtc) @@ -51,6 +51,7 @@ dtc_datepart <- function(dtc, partial_as_na = TRUE) { #' seconds should be ignored (default is `TRUE`). #' #' @return Character vector containing ISO 8601 times. +#' @export #' #' @examples #' ## Partial or missing times set to NA and seconds ignored by default @@ -80,7 +81,6 @@ dtc_datepart <- function(dtc, partial_as_na = TRUE) { #' ignore_seconds = FALSE #' ) #' # |--> c(NA, "", "", "12", "12:30", "12:30:59") -#' dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { # Assert that dtc is a character vector admiraldev::assert_character_vector(dtc) @@ -323,7 +323,7 @@ derive_blfl <- function(sdtm_in, # Assert that sdtm_in is a data frame, contains DOMAIN and oak id vars admiraldev::assert_data_frame( sdtm_in, - required_vars = rlang::syms(c("DOMAIN", sdtm.oak:::oak_id_vars())) + required_vars = rlang::syms(c("DOMAIN", oak_id_vars())) ) # Assert dm_domain is data.frame @@ -353,7 +353,7 @@ derive_blfl <- function(sdtm_in, ) domain_prefixed_names <- paste0(domain, suffixes) |> - setNames(tolower(suffixes)) + stats::setNames(tolower(suffixes)) # Assert that the input dataset has a "DTC" column admiraldev::assert_data_frame( @@ -413,15 +413,15 @@ derive_blfl <- function(sdtm_in, # Split --DTC and ref_var into date and time parts # (partial or missing dates and times set to NA) - ds_mod$dom_dt <- sdtm.oak:::dtc_datepart(ds_mod[[domain_prefixed_names["dtc"]]]) - ds_mod$dom_tm <- sdtm.oak:::dtc_timepart(ds_mod[[domain_prefixed_names["dtc"]]]) - ds_mod$ref_dt <- sdtm.oak:::dtc_datepart(ds_mod[[ref_var]]) - ds_mod$ref_tm <- sdtm.oak:::dtc_timepart(ds_mod[[ref_var]]) + ds_mod$dom_dt <- dtc_datepart(ds_mod[[domain_prefixed_names["dtc"]]]) # nolint object_name_linter() + ds_mod$dom_tm <- dtc_timepart(ds_mod[[domain_prefixed_names["dtc"]]]) # nolint object_name_linter() + ds_mod$ref_dt <- dtc_datepart(ds_mod[[ref_var]]) # nolint object_name_linter() + ds_mod$ref_tm <- dtc_timepart(ds_mod[[ref_var]]) # nolint object_name_linter() # If VISIT not in data frame then assign it as "" for processing if (!"VISIT" %in% names(ds_mod)) { - ds_mod[["VISIT"]] <- "" + ds_mod[["VISIT"]] <- "" # nolint object_name_linter() } # If --TPT not in data frame then assign it as "" for processing @@ -465,7 +465,7 @@ derive_blfl <- function(sdtm_in, ds_base <- rbind(ds_subset_lt, ds_subset_eq_1, ds_subset_eq_2) # Sort the rows in ascending order with respect to columns from con_col - ds_base <- dplyr::arrange_at(ds_base, c("USUBJID", con_col)) + ds_base <- dplyr::arrange_at(ds_base, c("USUBJID", con_col)) # nolint object_name_linter() if (nrow(ds_base) == 0L) { message(paste0("There are no baseline records.")) diff --git a/man/dtc_datepart.Rd b/man/dtc_datepart.Rd index e17ba977..662625b1 100644 --- a/man/dtc_datepart.Rd +++ b/man/dtc_datepart.Rd @@ -32,5 +32,4 @@ dtc_datepart( partial_as_na = FALSE ) # |--> c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25") - } diff --git a/man/dtc_timepart.Rd b/man/dtc_timepart.Rd index 46c1a88f..bba11182 100644 --- a/man/dtc_timepart.Rd +++ b/man/dtc_timepart.Rd @@ -51,5 +51,4 @@ dtc_timepart( ignore_seconds = FALSE ) # |--> c(NA, "", "", "12", "12:30", "12:30:59") - } From 13321977cde93789f2ed7b309b4cad579933024d Mon Sep 17 00:00:00 2001 From: Rammprasad Ganapathy Date: Tue, 18 Jun 2024 22:58:05 +0000 Subject: [PATCH 16/20] Fix pipeline failures --- R/derive_blfl.R | 17 +++++++++-------- R/globals.R | 2 ++ man/derive_blfl.Rd | 2 ++ tests/testthat/test-derive_blfl.R | 17 ++--------------- 4 files changed, 15 insertions(+), 23 deletions(-) create mode 100644 R/globals.R diff --git a/R/derive_blfl.R b/R/derive_blfl.R index 94d2ca6d..c801a6d5 100644 --- a/R/derive_blfl.R +++ b/R/derive_blfl.R @@ -142,6 +142,7 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' - Join the baseline flag onto the input dataset based on oak id vars #' #' @param sdtm_in Input SDTM domain. +#' @param dm_domain DM domain with the reference varaible `ref_var` #' @param tgt_var Name of variable to be derived (`--BLFL` or #' `--LOBXFL` where `--` is domain). #' @param ref_var vector of a date/time from the @@ -413,15 +414,15 @@ derive_blfl <- function(sdtm_in, # Split --DTC and ref_var into date and time parts # (partial or missing dates and times set to NA) - ds_mod$dom_dt <- dtc_datepart(ds_mod[[domain_prefixed_names["dtc"]]]) # nolint object_name_linter() - ds_mod$dom_tm <- dtc_timepart(ds_mod[[domain_prefixed_names["dtc"]]]) # nolint object_name_linter() - ds_mod$ref_dt <- dtc_datepart(ds_mod[[ref_var]]) # nolint object_name_linter() - ds_mod$ref_tm <- dtc_timepart(ds_mod[[ref_var]]) # nolint object_name_linter() + ds_mod$dom_dt <- dtc_datepart(ds_mod[[domain_prefixed_names["dtc"]]]) + ds_mod$dom_tm <- dtc_timepart(ds_mod[[domain_prefixed_names["dtc"]]]) + ds_mod$ref_dt <- dtc_datepart(ds_mod[[ref_var]]) + ds_mod$ref_tm <- dtc_timepart(ds_mod[[ref_var]]) # If VISIT not in data frame then assign it as "" for processing if (!"VISIT" %in% names(ds_mod)) { - ds_mod[["VISIT"]] <- "" # nolint object_name_linter() + ds_mod[["VISIT"]] <- "" } # If --TPT not in data frame then assign it as "" for processing @@ -465,7 +466,7 @@ derive_blfl <- function(sdtm_in, ds_base <- rbind(ds_subset_lt, ds_subset_eq_1, ds_subset_eq_2) # Sort the rows in ascending order with respect to columns from con_col - ds_base <- dplyr::arrange_at(ds_base, c("USUBJID", con_col)) # nolint object_name_linter() + ds_base <- dplyr::arrange_at(ds_base, c("USUBJID", con_col)) if (nrow(ds_base) == 0L) { message(paste0("There are no baseline records.")) @@ -480,7 +481,7 @@ derive_blfl <- function(sdtm_in, dplyr::slice_max(!!rlang::sym(domain_prefixed_names["dtc"]), na_rm = TRUE) |> dplyr::ungroup() |> dplyr::select( - dplyr::all_of(c(sdtm.oak:::oak_id_vars(), domain_prefixed_names[["testcd"]])), + dplyr::all_of(c(sdtm.oak::oak_id_vars(), domain_prefixed_names[["testcd"]])), dplyr::any_of( c(domain_prefixed_names[c( "cat", @@ -501,7 +502,7 @@ derive_blfl <- function(sdtm_in, # Join baseline flag onto input dataset ds_out <- dplyr::left_join(sdtm_in, ds_blfl, by = c( domain_prefixed_names[["testcd"]], - sdtm.oak:::oak_id_vars() + sdtm.oak::oak_id_vars() )) # Assert that merged data frame has same number of rows as input data frame diff --git a/R/globals.R b/R/globals.R new file mode 100644 index 00000000..30e021b3 --- /dev/null +++ b/R/globals.R @@ -0,0 +1,2 @@ +utils::globalVariables(c("USUBJID", "VISIT", "dom_dt", "dom_tm", "ref_dt", + "ref_tm")) diff --git a/man/derive_blfl.Rd b/man/derive_blfl.Rd index 50af8410..b58dc5c7 100644 --- a/man/derive_blfl.Rd +++ b/man/derive_blfl.Rd @@ -16,6 +16,8 @@ derive_blfl( \arguments{ \item{sdtm_in}{Input SDTM domain.} +\item{dm_domain}{DM domain with the reference varaible \code{ref_var}} + \item{tgt_var}{Name of variable to be derived (\code{--BLFL} or \code{--LOBXFL} where \verb{--} is domain).} diff --git a/tests/testthat/test-derive_blfl.R b/tests/testthat/test-derive_blfl.R index 468424b7..f58da69d 100644 --- a/tests/testthat/test-derive_blfl.R +++ b/tests/testthat/test-derive_blfl.R @@ -1,4 +1,3 @@ -dta <- function(env = parent.frame()) { dm <- tibble::tribble( ~USUBJID, ~RFSTDTC, ~RFXSTDTC, "test_study-375", "2020-09-28T10:10", "2020-09-28T10:10", @@ -24,18 +23,10 @@ dta <- function(env = parent.frame()) { "VS", 3L, "VTLS1", 378L, "test_study-378", "2020-01-21T11:00", "PULSE", "105", NA, "SCREENING" ) - withr::defer( - { - rm(d, envir = env) - }, - envir = env - ) + d <- list(sdtm_in = sdtm_in, dm = dm) - list(sdtm_in = sdtm_in, dm = dm) -} test_that("derive_blfl example works", { - d <- dta() observed_output <- derive_blfl( sdtm_in = d$sdtm_in, @@ -50,7 +41,6 @@ test_that("derive_blfl example works", { }) test_that("derive_blfl sdmt_in validations work", { - d <- dta() sdmt_in_no_domain <- d$sdtm_in |> dplyr::select(-DOMAIN) @@ -64,7 +54,7 @@ test_that("derive_blfl sdmt_in validations work", { sdmt_in_no_id_vars <- d$sdtm_in |> - dplyr::select(-sdtm.oak:::oak_id_vars()) + dplyr::select(-sdtm.oak::oak_id_vars()) expect_snapshot_error(derive_blfl( sdtm_in = sdmt_in_no_id_vars, @@ -91,7 +81,6 @@ test_that("derive_blfl sdmt_in validations work", { }) test_that("derive_blfl dm_domain validations work", { - d <- dta() dm_no_vars <- d$dm |> @@ -106,7 +95,6 @@ test_that("derive_blfl dm_domain validations work", { }) test_that("derive_blfl tgt_var and ref_var validations work", { - d <- dta() expect_snapshot_error(derive_blfl( sdtm_in = d$sdtm_in, @@ -131,7 +119,6 @@ test_that("derive_blfl tgt_var and ref_var validations work", { }) test_that("derive_blfl DOMAIN validation works", { - d <- dta() sdtm_in_bad_domain <- d$sdtm_in |> From 2de0eb79196356a513c56e9c7708c602891a92b6 Mon Sep 17 00:00:00 2001 From: Rammprasad Ganapathy Date: Tue, 18 Jun 2024 23:07:49 +0000 Subject: [PATCH 17/20] Fix styler issues --- R/globals.R | 6 ++-- tests/testthat/test-derive_blfl.R | 52 ++++++++++++++----------------- 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/R/globals.R b/R/globals.R index 30e021b3..9a2998a0 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,2 +1,4 @@ -utils::globalVariables(c("USUBJID", "VISIT", "dom_dt", "dom_tm", "ref_dt", - "ref_tm")) +utils::globalVariables(c( + "USUBJID", "VISIT", "dom_dt", "dom_tm", "ref_dt", + "ref_tm" +)) diff --git a/tests/testthat/test-derive_blfl.R b/tests/testthat/test-derive_blfl.R index f58da69d..7b6b963c 100644 --- a/tests/testthat/test-derive_blfl.R +++ b/tests/testthat/test-derive_blfl.R @@ -1,33 +1,32 @@ - dm <- tibble::tribble( - ~USUBJID, ~RFSTDTC, ~RFXSTDTC, - "test_study-375", "2020-09-28T10:10", "2020-09-28T10:10", - "test_study-376", "2020-09-21T11:00", "2020-09-21T11:00", - "test_study-377", NA, NA, - "test_study-378", "2020-01-20T10:00", "2020-01-20T10:00", - "test_study-379", NA, NA +dm <- tibble::tribble( + ~USUBJID, ~RFSTDTC, ~RFXSTDTC, + "test_study-375", "2020-09-28T10:10", "2020-09-28T10:10", + "test_study-376", "2020-09-21T11:00", "2020-09-21T11:00", + "test_study-377", NA, NA, + "test_study-378", "2020-01-20T10:00", "2020-01-20T10:00", + "test_study-379", NA, NA +) + +sdtm_in <- + tibble::tribble( + ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, ~VISIT, + "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, "SCREENING", + "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, "SCREENING", + "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, "SCREENING", + "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, "SCREENING", + "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, "SCREENING", + "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, "SCREENING", + "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, "SCREENING", + "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", "SCREENING", # nolint + "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA, "SCREENING", + "VS", 2L, "VTLS1", 378L, "test_study-378", "2020-01-20T10:00", "PULSE", "110", NA, "SCREENING", + "VS", 3L, "VTLS1", 378L, "test_study-378", "2020-01-21T11:00", "PULSE", "105", NA, "SCREENING" ) - sdtm_in <- - tibble::tribble( - ~DOMAIN, ~oak_id, ~raw_source, ~patient_number, ~USUBJID, ~VSDTC, ~VSTESTCD, ~VSORRES, ~VSSTAT, ~VISIT, - "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-01T13:31", "DIABP", "90", NA, "SCREENING", - "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T11:20", "DIABP", "90", NA, "SCREENING", - "VS", 1L, "VTLS1", 375L, "test_study-375", "2020-09-28T10:10", "PULSE", "ND", NA, "SCREENING", - "VS", 2L, "VTLS1", 375L, "test_study-375", "2020-10-01T13:31", "PULSE", "85", NA, "SCREENING", - "VS", 1L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:10", "SYSBP", "120", NA, "SCREENING", - "VS", 2L, "VTLS2", 375L, "test_study-375", "2020-09-28T10:05", "SYSBP", "120", NA, "SCREENING", - "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "DIABP", "75", NA, "SCREENING", - "VS", 1L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", NA, "NOT DONE", "SCREENING", # nolint - "VS", 2L, "VTLS1", 376L, "test_study-376", "2020-09-20", "PULSE", "110", NA, "SCREENING", - "VS", 2L, "VTLS1", 378L, "test_study-378", "2020-01-20T10:00", "PULSE", "110", NA, "SCREENING", - "VS", 3L, "VTLS1", 378L, "test_study-378", "2020-01-21T11:00", "PULSE", "105", NA, "SCREENING" - ) - - d <- list(sdtm_in = sdtm_in, dm = dm) +d <- list(sdtm_in = sdtm_in, dm = dm) test_that("derive_blfl example works", { - observed_output <- derive_blfl( sdtm_in = d$sdtm_in, dm_domain = d$dm, @@ -81,7 +80,6 @@ test_that("derive_blfl sdmt_in validations work", { }) test_that("derive_blfl dm_domain validations work", { - dm_no_vars <- d$dm |> dplyr::select(-c(RFXSTDTC, USUBJID)) @@ -95,7 +93,6 @@ test_that("derive_blfl dm_domain validations work", { }) test_that("derive_blfl tgt_var and ref_var validations work", { - expect_snapshot_error(derive_blfl( sdtm_in = d$sdtm_in, dm_domain = d$dm, @@ -119,7 +116,6 @@ test_that("derive_blfl tgt_var and ref_var validations work", { }) test_that("derive_blfl DOMAIN validation works", { - sdtm_in_bad_domain <- d$sdtm_in |> dplyr::mutate(DOMAIN = 4L) From d88093bac77962fd750796bc850cd19f25868b16 Mon Sep 17 00:00:00 2001 From: Rammprasad Ganapathy Date: Wed, 19 Jun 2024 18:59:56 +0000 Subject: [PATCH 18/20] Review comments --- NAMESPACE | 1 - R/derive_blfl.R | 4 +-- man/dtc_datepart.Rd | 1 + man/dtc_timepart.Rd | 73 +++++++++++++++++++++++++++------------------ 4 files changed, 47 insertions(+), 32 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 6cc59b6e..cd5de562 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,7 +17,6 @@ export(derive_blfl) export(derive_seq) export(derive_study_day) export(domain_example) -export(dtc_datepart) export(dtc_timepart) export(fmt_cmp) export(hardcode_ct) diff --git a/R/derive_blfl.R b/R/derive_blfl.R index c801a6d5..d60f2afe 100644 --- a/R/derive_blfl.R +++ b/R/derive_blfl.R @@ -22,7 +22,7 @@ #' partial_as_na = FALSE #' ) #' # |--> c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25") -#' @export +#' @keywords internal dtc_datepart <- function(dtc, partial_as_na = TRUE) { # Assert that dtc is a character vector admiraldev::assert_character_vector(dtc) @@ -53,7 +53,7 @@ dtc_datepart <- function(dtc, partial_as_na = TRUE) { #' @return Character vector containing ISO 8601 times. #' @export #' -#' @examples +#' @keywords internal #' ## Partial or missing times set to NA and seconds ignored by default #' dtc_timepart( #' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59") diff --git a/man/dtc_datepart.Rd b/man/dtc_datepart.Rd index 662625b1..2c5134ba 100644 --- a/man/dtc_datepart.Rd +++ b/man/dtc_datepart.Rd @@ -33,3 +33,4 @@ dtc_datepart( ) # |--> c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25") } +\keyword{internal} diff --git a/man/dtc_timepart.Rd b/man/dtc_timepart.Rd index bba11182..74c5bb61 100644 --- a/man/dtc_timepart.Rd +++ b/man/dtc_timepart.Rd @@ -23,32 +23,47 @@ The time part is extracted from an ISO 8601 date/time variable. By default, partial or missing times are set to NA, and seconds are ignored and not extracted. } -\examples{ -## Partial or missing times set to NA and seconds ignored by default -dtc_timepart( - c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59") -) -# |--> c(NA, NA, NA, NA, "12:30", "12:30") - -## Prevent partial or missing times from being set to NA -dtc_timepart( - c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), - partial_as_na = FALSE -) -# |--> c(NA, "", "", "12", "12:30", "12:30") - -## Do not ignore seconds, partial or missing times set to NA -dtc_timepart( - c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), - ignore_seconds = FALSE -) -# |--> c(NA, NA, NA, NA, NA, "12:30:59") - -## Do not ignore seconds and prevent partial or missing times from being set to NA -dtc_timepart( - c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), - partial_as_na = FALSE, - ignore_seconds = FALSE -) -# |--> c(NA, "", "", "12", "12:30", "12:30:59") -} +\keyword{"",} +\keyword{"12",} +\keyword{"12:30")} +\keyword{"12:30",} +\keyword{"12:30:59")} +\keyword{"2021-12-25",} +\keyword{"2021-12-25T12",} +\keyword{"2021-12-25T12:30",} +\keyword{"2021-12-25T12:30:59")} +\keyword{"2021-12-25T12:30:59"),} +\keyword{#} +\keyword{##} +\keyword{)} +\keyword{=} +\keyword{Do} +\keyword{FALSE} +\keyword{FALSE,} +\keyword{NA} +\keyword{NA,} +\keyword{Partial} +\keyword{Prevent} +\keyword{and} +\keyword{being} +\keyword{by} +\keyword{c(NA,} +\keyword{default} +\keyword{dtc_timepart(} +\keyword{from} +\keyword{ignore} +\keyword{ignore_seconds} +\keyword{ignored} +\keyword{internal} +\keyword{missing} +\keyword{not} +\keyword{or} +\keyword{partial} +\keyword{partial_as_na} +\keyword{prevent} +\keyword{seconds} +\keyword{seconds,} +\keyword{set} +\keyword{times} +\keyword{to} +\keyword{|-->} From cb3b9cb33e7742a05feb86dc83fc6d71b1784522 Mon Sep 17 00:00:00 2001 From: Rammprasad Ganapathy Date: Wed, 19 Jun 2024 19:13:48 +0000 Subject: [PATCH 19/20] Fix errors --- R/derive_blfl.R | 12 ++++++------ man/dtc_datepart.Rd | 4 ++-- man/dtc_timepart.Rd | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/R/derive_blfl.R b/R/derive_blfl.R index d60f2afe..7a97bcf9 100644 --- a/R/derive_blfl.R +++ b/R/derive_blfl.R @@ -11,13 +11,13 @@ #' #' @examples #' ## Partial or missing dates set to NA by default -#' dtc_datepart( +#' sdtm.oak:::dtc_datepart( #' c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00") #' ) #' # |--> c(NA, NA, NA, NA, "2021-12-25", "2021-12-25") #' #' ## Prevent partial or missing dates from being set to NA -#' dtc_datepart( +#' sdtm.oak:::dtc_datepart( #' c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00"), #' partial_as_na = FALSE #' ) @@ -55,27 +55,27 @@ dtc_datepart <- function(dtc, partial_as_na = TRUE) { #' #' @keywords internal #' ## Partial or missing times set to NA and seconds ignored by default -#' dtc_timepart( +#' sdtm.oak:::dtc_timepart( #' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59") #' ) #' # |--> c(NA, NA, NA, NA, "12:30", "12:30") #' #' ## Prevent partial or missing times from being set to NA -#' dtc_timepart( +#' sdtm.oak:::dtc_timepart( #' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), #' partial_as_na = FALSE #' ) #' # |--> c(NA, "", "", "12", "12:30", "12:30") #' #' ## Do not ignore seconds, partial or missing times set to NA -#' dtc_timepart( +#' sdtm.oak:::dtc_timepart( #' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), #' ignore_seconds = FALSE #' ) #' # |--> c(NA, NA, NA, NA, NA, "12:30:59") #' #' ## Do not ignore seconds and prevent partial or missing times from being set to NA -#' dtc_timepart( +#' sdtm.oak:::dtc_timepart( #' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), #' partial_as_na = FALSE, #' ignore_seconds = FALSE diff --git a/man/dtc_datepart.Rd b/man/dtc_datepart.Rd index 2c5134ba..7d2f05e6 100644 --- a/man/dtc_datepart.Rd +++ b/man/dtc_datepart.Rd @@ -21,13 +21,13 @@ By default, partial or missing dates are set to NA. } \examples{ ## Partial or missing dates set to NA by default -dtc_datepart( +sdtm.oak:::dtc_datepart( c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00") ) # |--> c(NA, NA, NA, NA, "2021-12-25", "2021-12-25") ## Prevent partial or missing dates from being set to NA -dtc_datepart( +sdtm.oak:::dtc_datepart( c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00"), partial_as_na = FALSE ) diff --git a/man/dtc_timepart.Rd b/man/dtc_timepart.Rd index 74c5bb61..84d648a7 100644 --- a/man/dtc_timepart.Rd +++ b/man/dtc_timepart.Rd @@ -49,7 +49,6 @@ and not extracted. \keyword{by} \keyword{c(NA,} \keyword{default} -\keyword{dtc_timepart(} \keyword{from} \keyword{ignore} \keyword{ignore_seconds} @@ -61,6 +60,7 @@ and not extracted. \keyword{partial} \keyword{partial_as_na} \keyword{prevent} +\keyword{sdtm.oak:::dtc_timepart(} \keyword{seconds} \keyword{seconds,} \keyword{set} From ff2d7779b9b1087e4a4227506b176135b73ef4b1 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Thu, 20 Jun 2024 12:30:54 -0400 Subject: [PATCH 20/20] fix spelling --- R/derive_blfl.R | 4 ++-- inst/WORDLIST | 32 ++++++++++++++++++++++++++++++++ man/derive_blfl.Rd | 4 ++-- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/R/derive_blfl.R b/R/derive_blfl.R index 7a97bcf9..0c3e2508 100644 --- a/R/derive_blfl.R +++ b/R/derive_blfl.R @@ -142,7 +142,7 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' - Join the baseline flag onto the input dataset based on oak id vars #' #' @param sdtm_in Input SDTM domain. -#' @param dm_domain DM domain with the reference varaible `ref_var` +#' @param dm_domain DM domain with the reference variable `ref_var` #' @param tgt_var Name of variable to be derived (`--BLFL` or #' `--LOBXFL` where `--` is domain). #' @param ref_var vector of a date/time from the @@ -154,7 +154,7 @@ dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { #' These visits are identified as critical points for data collection at the start of the study, #' before any intervention is applied. This allows the function to assign the baseline #' flag if the --DTC matches to the reference date. -#' @param baseline_timepoints A character vector of timpoints values in --TPT that specifies +#' @param baseline_timepoints A character vector of timepoints values in --TPT that specifies #' the specific timepoints during the baseline visits when key assessments or measurements were taken. #' This allows the function to assign the baseline flag if the --DTC matches to the reference date. #' diff --git a/inst/WORDLIST b/inst/WORDLIST index 70830c44..3f90a5e4 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -32,3 +32,35 @@ Immunogenicity Pharmacokinetics iRECIST pharmaversesdtm +pre +CMGRPID +CMMODIFY +CMSTRTPT +CMSTTPT +CMTRT +CRF +datetime +DIABP +eCRF +eDT +hardcode +hardcoding +MDBDR +MDBTM +MDEDR +MDETM +MDR +MDRAW +mmm +NonCRF +OID +OXYSAT +px +SYS +SYSBP +VSALL +yyyy +RFSTDTC +RFXSTDTC +TPT +xxTPT diff --git a/man/derive_blfl.Rd b/man/derive_blfl.Rd index b58dc5c7..c1fd92b1 100644 --- a/man/derive_blfl.Rd +++ b/man/derive_blfl.Rd @@ -16,7 +16,7 @@ derive_blfl( \arguments{ \item{sdtm_in}{Input SDTM domain.} -\item{dm_domain}{DM domain with the reference varaible \code{ref_var}} +\item{dm_domain}{DM domain with the reference variable \code{ref_var}} \item{tgt_var}{Name of variable to be derived (\code{--BLFL} or \code{--LOBXFL} where \verb{--} is domain).} @@ -32,7 +32,7 @@ These visits are identified as critical points for data collection at the start before any intervention is applied. This allows the function to assign the baseline flag if the --DTC matches to the reference date.} -\item{baseline_timepoints}{A character vector of timpoints values in --TPT that specifies +\item{baseline_timepoints}{A character vector of timepoints values in --TPT that specifies the specific timepoints during the baseline visits when key assessments or measurements were taken. This allows the function to assign the baseline flag if the --DTC matches to the reference date.} }