diff --git a/R/assign.R b/R/assign.R index c9c1d831..88dbf8c4 100644 --- a/R/assign.R +++ b/R/assign.R @@ -21,7 +21,7 @@ #' the variables indicated in `id_vars`. This parameter is optional, see #' section Value for how the output changes depending on this argument value. #' @param id_vars Key variables to be used in the join between the raw dataset -#' (`raw_dat`) and the target data set (`raw_dat`). +#' (`raw_dat`) and the target data set (`tgt_dat`). #' #' @returns The returned data set depends on the value of `tgt_dat`: #' - If no target dataset is supplied, meaning that `tgt_dat` defaults to @@ -53,33 +53,19 @@ sdtm_assign <- function(raw_dat, assert_ct_spec(ct_spec, optional = TRUE) assert_ct_clst(ct_spec = ct_spec, ct_clst = ct_clst, optional = TRUE) - # Recode the raw variable following terminology. - tgt_val <- ct_map(raw_dat[[raw_var]], ct_spec = ct_spec, ct_clst = ct_clst) - - # Apply derivation by assigning `raw_var` to `tgt_var`. - # `der_dat`: derived dataset. - der_dat <- + join_dat <- raw_dat |> - dplyr::select(c(id_vars, raw_var)) |> - mutate("{tgt_var}" := tgt_val) |> # nolint object_name_linter() - dplyr::select(-rlang::sym(raw_var)) + dplyr::select(dplyr::all_of(c(id_vars, raw_var))) |> + sdtm_join(tgt_dat = tgt_dat, id_vars = id_vars) - # If a target dataset is supplied, then join the so far derived dataset with - # the target dataset (`tgt_dat`), otherwise leave it be. - der_dat <- - if (!is.null(tgt_dat)) { - # If variable `tgt_var` exists in `tgt_dat` remove it as we want to - # keep the derived variable in `der_dat`. - tgt_dat <- dplyr::select(tgt_dat, -dplyr::any_of(tgt_var)) + # Recode the raw variable following terminology. + tgt_val <- ct_map(join_dat[[raw_var]], ct_spec = ct_spec, ct_clst = ct_clst) - der_dat |> - dplyr::right_join(y = tgt_dat, by = id_vars) |> - dplyr::relocate(tgt_var, .after = dplyr::last_col()) - } else { - der_dat - } + join_dat |> + mutate("{tgt_var}" := tgt_val) |> # nolint object_name_linter() + dplyr::select(-dplyr::any_of(setdiff(raw_var, tgt_var))) |> + dplyr::relocate(dplyr::all_of(tgt_var), .after = dplyr::last_col()) - der_dat } #' Derive an SDTM variable diff --git a/R/assign_datetime.R b/R/assign_datetime.R index d8d4ec8b..e6881e8b 100644 --- a/R/assign_datetime.R +++ b/R/assign_datetime.R @@ -170,30 +170,20 @@ assign_datetime <- admiraldev::assert_character_vector(raw_unk) admiraldev::assert_logical_scalar(.warn) - tgt_val <- - create_iso8601(!!!raw_dat[raw_var], - .format = raw_fmt, - .na = raw_unk, - .warn = .warn - ) - - der_dat <- + join_dat <- raw_dat |> dplyr::select(dplyr::all_of(c(id_vars, raw_var))) |> - dplyr::mutate("{tgt_var}" := tgt_val) |> # nolint object_name_linter() - dplyr::select(-dplyr::any_of(raw_var)) + sdtm_join(tgt_dat = tgt_dat, id_vars = id_vars) - der_dat <- - if (!is.null(tgt_dat)) { - # If variable `tgt_var` exists in `tgt_dat` remove it as we want to - # keep the derived variable in `der_dat`. - tgt_dat <- dplyr::select(tgt_dat, -dplyr::any_of(tgt_var)) - der_dat |> - dplyr::right_join(y = tgt_dat, by = id_vars) |> - dplyr::relocate(tgt_var, .after = dplyr::last_col()) - } else { - der_dat - } + tgt_val <- + create_iso8601(!!!join_dat[raw_var], + .format = raw_fmt, + .na = raw_unk, + .warn = .warn + ) - der_dat + join_dat |> + mutate("{tgt_var}" := tgt_val) |> # nolint object_name_linter() + dplyr::select(-dplyr::any_of(setdiff(raw_var, tgt_var))) |> + dplyr::relocate(dplyr::all_of(tgt_var), .after = dplyr::last_col()) } diff --git a/R/cnd_df.R b/R/cnd_df.R index 9ef23a83..54dfc1d0 100644 --- a/R/cnd_df.R +++ b/R/cnd_df.R @@ -122,9 +122,7 @@ rm_cnd_df <- function(dat) { return(dat) } -#' Print -#' -#' Blah +#' Conditioned tibble header print method #' #' @param x A conditioned tibble of class `cnd_df`. #' @param ... Additional arguments passed to the default print method. @@ -143,6 +141,9 @@ lgl_to_chr <- function(x) { ifelse(is.na(x), "-", ifelse(x, "T", "F")) } +#' Conditioned tibble pillar print method +#' +#' @inheritParams pillar::ctl_new_rowid_pillar #' @importFrom pillar ctl_new_rowid_pillar #' @export ctl_new_rowid_pillar.cnd_df <- function(controller, x, width, ...) { @@ -261,7 +262,7 @@ eval_conditions <- function(dat, #' Condition a data set based on specified conditions #' #' This function tags records in a data set, indicating which rows match the -#' specified conditions. +#' specified conditions, resulting in a conditioned data frame. #' #' @param dat A tibble. #' @param ... Conditions to filter the tibble. @@ -272,6 +273,8 @@ eval_conditions <- function(dat, #' expression passed in `...`. A data frame or a list can also be passed that #' will be coerced to an environment internally. #' +#' @returns A conditioned data frame. +#' #' @export condition_by <- function(dat, ..., .na = NA, .env = rlang::env()) { @@ -286,6 +289,16 @@ condition_by <- function(dat, ..., .na = NA, .env = rlang::env()) { new_cnd_df(dat, cnd = cnd, .warn = FALSE) } +#' Mutate method for conditioned data frames +#' +#' [mutate.cnd_df()] is an S3 method to be dispatched by [mutate][dplyr::mutate] +#' generic on conditioned data frames. This function implements a conditional +#' mutate by only changing rows for which the condition stored in the +#' conditioned data frame is `TRUE`. +#' +#' @param .data A conditioned data frame. +#' +#' @inheritParams dplyr::mutate #' @importFrom dplyr mutate #' @export mutate.cnd_df <- function(.data, @@ -304,5 +317,5 @@ mutate.cnd_df <- function(.data, lst <- purrr::map(derivations, ~ rlang::expr(dplyr::if_else({{cnd}}, !!.x, NA))) lst <- rlang::set_names(lst, derived_vars) - dplyr::mutate(dat, !!!lst) + dplyr::mutate(dat, !!!lst, .by = .by, .keep = .keep, .after = .after) } diff --git a/R/hardcode.R b/R/hardcode.R index ffce1bd7..e893f6b7 100644 --- a/R/hardcode.R +++ b/R/hardcode.R @@ -23,7 +23,7 @@ #' the variables indicated in `id_vars`. This parameter is optional, see #' section Value for how the output changes depending on this argument value. #' @param id_vars Key variables to be used in the join between the raw dataset -#' (`raw_dat`) and the target data set (`raw_dat`). +#' (`raw_dat`) and the target data set (`tgt_dat`). #' #' @returns The returned data set depends on the value of `tgt_dat`: #' - If no target dataset is supplied, meaning that `tgt_dat` defaults to @@ -56,33 +56,18 @@ sdtm_hardcode <- function(raw_dat, assert_ct_spec(ct_spec, optional = TRUE) assert_ct_clst(ct_spec = ct_spec, ct_clst = ct_clst, optional = TRUE) - # Recode the hardcoded value following terminology. - tgt_val <- ct_map(tgt_val, ct_spec = ct_spec, ct_clst = ct_clst) - - # Apply derivation of the hardcoded value. - # `der_dat`: derived dataset. - der_dat <- + join_dat <- raw_dat |> dplyr::select(dplyr::all_of(c(id_vars, raw_var))) |> - mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> # nolint object_name_linter() - dplyr::select(-dplyr::any_of(raw_var)) + sdtm_join(tgt_dat = tgt_dat, id_vars = id_vars) - # If a target dataset is supplied, then join the so far derived dataset with - # the target dataset (`tgt_dat`), otherwise leave it be. - der_dat <- - if (!is.null(tgt_dat)) { - # If variable `tgt_var` exists in `tgt_dat` remove it as we want to - # keep the derived variable in `der_dat`. - tgt_dat <- dplyr::select(tgt_dat, -dplyr::any_of(tgt_var)) - - der_dat |> - dplyr::right_join(y = tgt_dat, by = id_vars) |> - dplyr::relocate(tgt_var, .after = dplyr::last_col()) - } else { - der_dat - } + # Recode the hardcoded value following terminology. + tgt_val <- ct_map(tgt_val, ct_spec = ct_spec, ct_clst = ct_clst) - der_dat + join_dat |> + mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> # nolint object_name_linter() + dplyr::select(-dplyr::any_of(setdiff(raw_var, tgt_var))) |> + dplyr::relocate(dplyr::all_of(tgt_var), .after = dplyr::last_col()) } #' Derive an SDTM variable with a hardcoded value diff --git a/R/join.R b/R/join.R new file mode 100644 index 00000000..1e0e64b9 --- /dev/null +++ b/R/join.R @@ -0,0 +1,39 @@ +#' SDTM join +#' +#' [sdtm_join()] is a special join between a raw data set and a target data +#' set. This function supports conditioned data frames. +#' +#' @param raw_dat The raw dataset: a dataframe or a conditioned data frame. Must +#' include the variables passed in `id_vars`. +#' @param tgt_dat Target dataset: a data frame or a conditioned data frame to be +#' merged against `raw_dat` by the variables indicated in `id_vars`. +#' @param id_vars Key variables to be used in the join between the raw dataset +#' (`raw_dat`) and the target data set (`raw_dat`). +#' +#' @returns A data frame, or a conditioned data frame if at least one of the +#' input data sets is a conditioned data frame. +#' +#' @keywords internal +#' @importFrom rlang %||% +sdtm_join <- function(raw_dat, + tgt_dat = NULL, + id_vars = oak_id_vars()) { + raw_dat_cnd <- get_cnd_df_cnd(raw_dat) %||% rep(TRUE, nrow(raw_dat)) + tgt_dat <- tgt_dat %||% raw_dat[id_vars] + tgt_dat_cnd <- get_cnd_df_cnd(tgt_dat) %||% rep(TRUE, nrow(tgt_dat)) + + # `rm_cnd_df()` prevents `mutate` from dispatching. + raw_dat <- dplyr::mutate(rm_cnd_df(raw_dat), `__raw_dat_cond__` = raw_dat_cnd) + tgt_dat <- dplyr::mutate(rm_cnd_df(tgt_dat), `__tgt_dat_cond__` = tgt_dat_cnd) + + res <- dplyr::right_join(raw_dat, y = tgt_dat, by = id_vars) + + cnd <- res$`__raw_dat_cond__` & res$`__tgt_dat_cond__` + res |> + dplyr::select(-dplyr::all_of(c( + "__raw_dat_cond__", "__tgt_dat_cond__" + ))) |> + new_cnd_df(cnd = cnd, .warn = FALSE) + +} + diff --git a/R/sdtm_join.R b/R/sdtm_join.R new file mode 100644 index 00000000..fc973ead --- /dev/null +++ b/R/sdtm_join.R @@ -0,0 +1,39 @@ +#' SDTM join +#' +#' [sdtm_join()] is a special join between a raw data set and a target data +#' set. This function supports conditioned data frames. +#' +#' @param raw_dat The raw dataset: a dataframe or a conditioned data frame. Must +#' include the variables passed in `id_vars`. +#' @param tgt_dat Target dataset: a data frame or a conditioned data frame to be +#' merged against `raw_dat` by the variables indicated in `id_vars`. +#' @param id_vars Key variables to be used in the join between the raw dataset +#' (`raw_dat`) and the target data set (`tgt_dat`). +#' +#' @returns A data frame, or a conditioned data frame if, at least, one of the +#' input data sets is a conditioned data frame. +#' +#' @keywords internal +#' @importFrom rlang %||% +sdtm_join <- function(raw_dat, + tgt_dat = NULL, + id_vars = oak_id_vars()) { + raw_dat_cnd <- get_cnd_df_cnd(raw_dat) %||% rep(TRUE, nrow(raw_dat)) + tgt_dat <- tgt_dat %||% raw_dat[id_vars] + tgt_dat_cnd <- get_cnd_df_cnd(tgt_dat) %||% rep(TRUE, nrow(tgt_dat)) + + # `rm_cnd_df()` prevents `mutate` from dispatching. + raw_dat <- dplyr::mutate(rm_cnd_df(raw_dat), `__raw_dat_cond__` = raw_dat_cnd) + tgt_dat <- dplyr::mutate(rm_cnd_df(tgt_dat), `__tgt_dat_cond__` = tgt_dat_cnd) + + res <- dplyr::right_join(raw_dat, y = tgt_dat, by = id_vars) + + cnd <- res$`__raw_dat_cond__` & res$`__tgt_dat_cond__` + res |> + dplyr::select(-dplyr::all_of(c( + "__raw_dat_cond__", "__tgt_dat_cond__" + ))) |> + new_cnd_df(cnd = cnd, .warn = FALSE) + +} + diff --git a/inst/WORDLIST b/inst/WORDLIST index 538f7a2e..78f8f566 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -25,3 +25,4 @@ AE AESTDY CMSTDY DM +ungrouped diff --git a/man/condition_by.Rd b/man/condition_by.Rd index e1b7cb2a..7a5ff82c 100644 --- a/man/condition_by.Rd +++ b/man/condition_by.Rd @@ -20,8 +20,10 @@ will be coerced to an environment internally.} \value{ A tibble with an additional class \code{cnd_df} and a logical vector attribute indicating matching rows. + +A conditioned data frame. } \description{ This function tags records in a data set, indicating which rows match the -specified conditions. +specified conditions, resulting in a conditioned data frame. } diff --git a/man/ctl_new_rowid_pillar.cnd_df.Rd b/man/ctl_new_rowid_pillar.cnd_df.Rd new file mode 100644 index 00000000..dc70aa27 --- /dev/null +++ b/man/ctl_new_rowid_pillar.cnd_df.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cnd_df.R +\name{ctl_new_rowid_pillar.cnd_df} +\alias{ctl_new_rowid_pillar.cnd_df} +\title{Conditioned tibble pillar print method} +\usage{ +\method{ctl_new_rowid_pillar}{cnd_df}(controller, x, width, ...) +} +\arguments{ +\item{controller}{The object of class \code{"tbl"} currently printed.} + +\item{x}{A simple (one-dimensional) vector.} + +\item{width}{The available width, can be a vector for multiple tiers.} + +\item{...}{These dots are for future extensions and must be empty.} +} +\description{ +Conditioned tibble pillar print method +} diff --git a/man/mutate.cnd_df.Rd b/man/mutate.cnd_df.Rd new file mode 100644 index 00000000..d55b2134 --- /dev/null +++ b/man/mutate.cnd_df.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cnd_df.R +\name{mutate.cnd_df} +\alias{mutate.cnd_df} +\title{Mutate method for conditioned data frames} +\usage{ +\method{mutate}{cnd_df}( + .data, + ..., + .by = NULL, + .keep = c("all", "used", "unused", "none"), + .before = NULL, + .after = NULL +) +} +\arguments{ +\item{.data}{A conditioned data frame.} + +\item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs. +The name gives the name of the column in the output. + +The value can be: +\itemize{ +\item A vector of length 1, which will be recycled to the correct length. +\item A vector the same length as the current group (or the whole data frame +if ungrouped). +\item \code{NULL}, to remove the column. +\item A data frame or tibble, to create multiple columns in the output. +}} + +\item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} + +<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to +group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For +details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.} + +\item{.keep}{Control which columns from \code{.data} are retained in the output. Grouping +columns and columns created by \code{...} are always kept. +\itemize{ +\item \code{"all"} retains all columns from \code{.data}. This is the default. +\item \code{"used"} retains only the columns used in \code{...} to create new +columns. This is useful for checking your work, as it displays inputs +and outputs side-by-side. +\item \code{"unused"} retains only the columns \emph{not} used in \code{...} to create new +columns. This is useful if you generate new columns, but no longer need +the columns used to generate them. +\item \code{"none"} doesn't retain any extra columns from \code{.data}. Only the grouping +variables and columns created by \code{...} are kept. +}} + +\item{.before, .after}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, control where new columns +should appear (the default is to add to the right hand side). See +\code{\link[dplyr:relocate]{relocate()}} for more details.} +} +\description{ +\code{\link[=mutate.cnd_df]{mutate.cnd_df()}} is an S3 method to be dispatched by \link[dplyr:mutate]{mutate} +generic on conditioned data frames. This function implements a conditional +mutate by only changing rows for which the condition stored in the +conditioned data frame is \code{TRUE}. +} diff --git a/man/sdtm_assign.Rd b/man/sdtm_assign.Rd index 676979dc..85601e3c 100644 --- a/man/sdtm_assign.Rd +++ b/man/sdtm_assign.Rd @@ -37,7 +37,7 @@ the variables indicated in \code{id_vars}. This parameter is optional, see section Value for how the output changes depending on this argument value.} \item{id_vars}{Key variables to be used in the join between the raw dataset -(\code{raw_dat}) and the target data set (\code{raw_dat}).} +(\code{raw_dat}) and the target data set (\code{tgt_dat}).} } \value{ The returned data set depends on the value of \code{tgt_dat}: diff --git a/man/sdtm_hardcode.Rd b/man/sdtm_hardcode.Rd index 5c3435b5..d8cb59a6 100644 --- a/man/sdtm_hardcode.Rd +++ b/man/sdtm_hardcode.Rd @@ -41,7 +41,7 @@ the variables indicated in \code{id_vars}. This parameter is optional, see section Value for how the output changes depending on this argument value.} \item{id_vars}{Key variables to be used in the join between the raw dataset -(\code{raw_dat}) and the target data set (\code{raw_dat}).} +(\code{raw_dat}) and the target data set (\code{tgt_dat}).} } \value{ The returned data set depends on the value of \code{tgt_dat}: diff --git a/man/sdtm_join.Rd b/man/sdtm_join.Rd new file mode 100644 index 00000000..1c7c02e7 --- /dev/null +++ b/man/sdtm_join.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/join.R, R/sdtm_join.R +\name{sdtm_join} +\alias{sdtm_join} +\title{SDTM join} +\usage{ +sdtm_join(raw_dat, tgt_dat = NULL, id_vars = oak_id_vars()) + +sdtm_join(raw_dat, tgt_dat = NULL, id_vars = oak_id_vars()) +} +\arguments{ +\item{raw_dat}{The raw dataset: a dataframe or a conditioned data frame. Must +include the variables passed in \code{id_vars}.} + +\item{tgt_dat}{Target dataset: a data frame or a conditioned data frame to be +merged against \code{raw_dat} by the variables indicated in \code{id_vars}.} + +\item{id_vars}{Key variables to be used in the join between the raw dataset +(\code{raw_dat}) and the target data set (\code{tgt_dat}).} +} +\value{ +A data frame, or a conditioned data frame if at least one of the +input data sets is a conditioned data frame. + +A data frame, or a conditioned data frame if, at least, one of the +input data sets is a conditioned data frame. +} +\description{ +\code{\link[=sdtm_join]{sdtm_join()}} is a special join between a raw data set and a target data +set. This function supports conditioned data frames. + +\code{\link[=sdtm_join]{sdtm_join()}} is a special join between a raw data set and a target data +set. This function supports conditioned data frames. +} +\keyword{internal} diff --git a/man/tbl_sum.cnd_df.Rd b/man/tbl_sum.cnd_df.Rd index f8a50544..760e5507 100644 --- a/man/tbl_sum.cnd_df.Rd +++ b/man/tbl_sum.cnd_df.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/cnd_df.R \name{tbl_sum.cnd_df} \alias{tbl_sum.cnd_df} -\title{Print} +\title{Conditioned tibble header print method} \usage{ \method{tbl_sum}{cnd_df}(x, ...) } @@ -12,5 +12,5 @@ \item{...}{Additional arguments passed to the default print method.} } \description{ -Blah +Conditioned tibble header print method } diff --git a/tests/testthat/test-assign.R b/tests/testthat/test-assign.R new file mode 100644 index 00000000..46d86a87 --- /dev/null +++ b/tests/testthat/test-assign.R @@ -0,0 +1,167 @@ +test_that("assign_ct works as expected with a conditioned `tgt_dat`", { + + vs_raw_dat <- tibble::tibble( + oak_id = 1:5, + raw_source = c("VS1", "VS2", "VS3", "VS4", "VS5"), + patient_number = c(101L, 102L, 103L, 104L, 105L), + TEMPLOC = c("Oral", "Axillary", "Rectal", "Tympanic", "Temporal") + ) + + vs_tgt_dat <- tibble::tibble( + oak_id = as.integer(rep(1:5, each = 4)), + raw_source = rep(c("VS1", "VS2", "VS3", "VS4", "VS5"), each = 4), + patient_number = as.integer(rep(c(101L, 102L, 103L, 104L, 105L), each = 4)), + VSTESTCD = c("TEMP", "BPSYS", "BPDIAS", "HR", + "TEMP", "BPSYS", "BPDIAS", "HR", + "TEMP", "BPSYS", "BPDIAS", "HR", + "TEMP", "BPSYS", "BPDIAS", "HR", + "TEMP", "BPSYS", "BPDIAS", "HR") + ) + + # vital signs' locations + vs_loc_raw <- c("Mouth", "Arm", "Arm", "Arm", "Armpit", "Arm", "Arm", "Arm", + "Rectum", "Arm", "Arm", "Arm", "auris", "Arm", "Arm", "Arm", "brow", "Arm", + "Arm", "Arm") + + vs_loc_tgt <- c( + "ORAL", + rep(NA, 3L), + "AXILLA", + rep(NA, 3L), + "ANUS", + rep(NA, 3L), + "EAR", + rep(NA, 3L), + "FOREHEAD", + rep(NA, 3L) + ) + + ct_spec <- tibble::tibble( + codelist_code = "C74456", + term_code = c("C32141", "C12674", "C12394", "C89803", "C43362"), + CodedData = c("ARM", "AXILLA", "EAR", "FOREHEAD", "ANUS"), + term_value = c("ARM", "AXILLA", "EAR", "FOREHEAD", "ANUS"), + collected_value = c("Arm", "Armpit", "auris", "brow", "anus"), + term_synonyms = c("Arm", "Axillary", "Tympanic", "Temporal", "Rectal") + ) + + result <- + assign_ct( + raw_dat = vs_raw_dat, + raw_var = "TEMPLOC", + tgt_var = "VSLOC", + ct_spec = ct_spec, + ct_clst = "C74456", + tgt_dat = condition_by(vs_tgt_dat, VSTESTCD == "TEMP") + ) + + expected_result <- + tibble::add_column( + vs_tgt_dat, + VSLOC = vs_loc_tgt + ) + + expect_equal(result, expected_result) +}) + + +test_that("assign_ct works as expected with both `raw_dat` and `tgt_dat` as conditioned data frames", { + + ct_spec <- tibble::tibble( + codelist_code = "C78734", + term_code = c("C150895", "C12434", "C13275", "C89803", "C12801"), + CodedData = c("SWABBED MATERIAL", "BLOOD", "SALIVA", "URINE", "TISSUE"), + term_value = c("SWABBED MATERIAL", "BLOOD", "SALIVA", "URINE", "TISSUE"), + collected_value = c("Nasopharyngeal Swab", "blood", "drool", "urine sample", "tissue"), + term_synonyms = c("Swab", "Blood", "Spit", "urinary excretion", "tissue sample") + ) + + fa_raw_dat <- tibble::tibble( + oak_id = as.integer(1:5), + raw_source = c("FA1", "FA2", "FA3", "FA4", "FA5"), + patient_number = 101:105, + SPCNM = c("Nasopharyngeal Swab", "Blood", "Saliva", "Urine", "Tissue"), + SPECTYP = c(NA, NA, "Swab", NA, NA) + ) + + fa_tgt_dat <- tibble::tibble( + oak_id = 1:5, + raw_source = c("FA1", "FA2", "FA3", "FA4", "FA5"), + patient_number = 101:105, + FATESTCD = c("STATUS", "OTHER", "STATUS", "STATUS", "OTHER"), + FAOBJ = c( + "Severe Acute Resp Syndrome Coronavirus 2", + "Other Condition", + "Severe Acute Resp Syndrome Coronavirus 2", + "Severe Acute Resp Syndrome Coronavirus 2", + "Other Condition" + ) + ) + + result <- + assign_ct( + raw_dat = condition_by(fa_raw_dat, is.na(SPECTYP)), + raw_var = "SPCNM", + tgt_var = "FASPEC", + ct_spec = ct_spec, + ct_clst = "C78734", + tgt_dat = condition_by( + fa_tgt_dat, + FATESTCD == "STATUS" & + FAOBJ == "Severe Acute Resp Syndrome Coronavirus 2" + ) + ) + + expected_result <- + fa_tgt_dat |> + tibble::add_column(FASPEC = c("SWABBED MATERIAL", NA, NA, "URINE", NA)) + + expect_equal(result, expected_result) +}) + +test_that("assign_ct works as expected with conditions across both data sets", { + + cm_raw_dat <- tibble::tibble( + oak_id = 1:5, + raw_source = paste0("MD", 1:5), + patient_number = 101:105, + CMMODIFY = c("ASPIRIN EC", "IBUPROFEN LYSINE" , "PARACETAMOL", "DICLOFENAC", "NAPROXEN") + ) + + cm_tgt_dat <- tibble::tibble( + oak_id = 1:5, + raw_source = paste0("MD", 1:5), + patient_number = 101:105, + CMTRT = c("ASPIRIN", "IBUPROFEN", "PARACETAMOL", "DICLOFENAC", "NAPROXEN") + ) + + # This only works if the raw data set and the target data set have the same + # number of records, otherwise the comparison CMMODIFY != CMTRT is not + # meaningful. + result1 <- + assign_no_ct( + raw_dat = cm_raw_dat, + raw_var = "CMMODIFY", + tgt_var = "CMMODIFY", + tgt_dat = condition_by(cm_tgt_dat, CMMODIFY != CMTRT, .env = cm_raw_dat) + ) + + # Because both data sets have to have the same number of records for the + # comparison to be meaningful, then we can just as well condition the + # raw data set itself. + result2 <- + assign_no_ct( + raw_dat = condition_by(cm_raw_dat, CMMODIFY != CMTRT, .env = cm_tgt_dat), + raw_var = "CMMODIFY", + tgt_var = "CMMODIFY", + tgt_dat = cm_tgt_dat + ) + + expected_result <- + cm_tgt_dat |> + tibble::add_column(CMMODIFY = c("ASPIRIN EC", "IBUPROFEN LYSINE", NA, NA, NA)) + + expect_equal(result1, expected_result) + expect_equal(result2, expected_result) + +}) diff --git a/tests/testthat/test-hardcode.R b/tests/testthat/test-hardcode.R index 29af4b32..9f7ec73e 100644 --- a/tests/testthat/test-hardcode.R +++ b/tests/testthat/test-hardcode.R @@ -79,3 +79,6 @@ test_that("hardcode_ct works as expected", { expect_equal(result, expected_result) }) + + +