diff --git a/NAMESPACE b/NAMESPACE index 472cb825..5ec1b0bd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +S3method(ctl_new_rowid_pillar,cnd_df) +S3method(mutate,cnd_df) S3method(print,iso8601) S3method(tbl_sum,cnd_df) export(assign_ct) @@ -19,6 +21,8 @@ export(problems) export(read_ct_spec) export(read_ct_spec_example) export(rm_cnd_df) +importFrom(dplyr,mutate) +importFrom(pillar,ctl_new_rowid_pillar) importFrom(pillar,tbl_sum) importFrom(rlang,"%||%") importFrom(rlang,":=") diff --git a/R/assign.R b/R/assign.R index a91eacaf..c9c1d831 100644 --- a/R/assign.R +++ b/R/assign.R @@ -61,13 +61,17 @@ sdtm_assign <- function(raw_dat, der_dat <- raw_dat |> dplyr::select(c(id_vars, raw_var)) |> - dplyr::mutate("{tgt_var}" := tgt_val) |> # nolint object_name_linter() + mutate("{tgt_var}" := tgt_val) |> # nolint object_name_linter() dplyr::select(-rlang::sym(raw_var)) # If a target dataset is supplied, then join the so far derived dataset with # the target dataset (`tgt_dat`), otherwise leave it be. der_dat <- if (!is.null(tgt_dat)) { + # If variable `tgt_var` exists in `tgt_dat` remove it as we want to + # keep the derived variable in `der_dat`. + tgt_dat <- dplyr::select(tgt_dat, -dplyr::any_of(tgt_var)) + der_dat |> dplyr::right_join(y = tgt_dat, by = id_vars) |> dplyr::relocate(tgt_var, .after = dplyr::last_col()) diff --git a/R/assign_datetime.R b/R/assign_datetime.R index 4622f579..d8d4ec8b 100644 --- a/R/assign_datetime.R +++ b/R/assign_datetime.R @@ -179,12 +179,15 @@ assign_datetime <- der_dat <- raw_dat |> - dplyr::select(c(id_vars, raw_var)) |> + dplyr::select(dplyr::all_of(c(id_vars, raw_var))) |> dplyr::mutate("{tgt_var}" := tgt_val) |> # nolint object_name_linter() - dplyr::select(-raw_var) + dplyr::select(-dplyr::any_of(raw_var)) der_dat <- if (!is.null(tgt_dat)) { + # If variable `tgt_var` exists in `tgt_dat` remove it as we want to + # keep the derived variable in `der_dat`. + tgt_dat <- dplyr::select(tgt_dat, -dplyr::any_of(tgt_var)) der_dat |> dplyr::right_join(y = tgt_dat, by = id_vars) |> dplyr::relocate(tgt_var, .after = dplyr::last_col()) diff --git a/R/cnd_df.R b/R/cnd_df.R index f9640294..9ef23a83 100644 --- a/R/cnd_df.R +++ b/R/cnd_df.R @@ -69,6 +69,7 @@ new_cnd_df <- function(dat, cnd, .warn = TRUE) { } if (!is_cnd_df) { + dat <- tibble::as_tibble(dat) class(dat) <- c("cnd_df", class(dat)) } @@ -103,13 +104,13 @@ get_cnd_df_cnd_sum <- function(dat) { } } -#' Remove the cnd_df class from a data frame +#' Remove the `cnd_df` class from a data frame #' -#' This function removes the 'cnd_df' class, along with its attributes, if +#' This function removes the `cnd_df` class, along with its attributes, if #' applicable. #' #' @param dat A data frame. -#' @return The input `dat` without the 'cnd_df' class. +#' @return The input `dat` without the `cnd_df` class. #' #' @export rm_cnd_df <- function(dat) { @@ -125,7 +126,7 @@ rm_cnd_df <- function(dat) { #' #' Blah #' -#' @param x A conditioned tibble of class 'cnd_df'. +#' @param x A conditioned tibble of class `cnd_df`. #' @param ... Additional arguments passed to the default print method. #' #' @importFrom pillar tbl_sum @@ -142,6 +143,8 @@ lgl_to_chr <- function(x) { ifelse(is.na(x), "-", ifelse(x, "T", "F")) } +#' @importFrom pillar ctl_new_rowid_pillar +#' @export ctl_new_rowid_pillar.cnd_df <- function(controller, x, width, ...) { out <- NextMethod() @@ -188,10 +191,10 @@ ctl_new_rowid_pillar.cnd_df <- function(controller, x, width, ...) { #' #' @param dat A data frame #' @param ... A set of logical conditions, e.g. `y & z, x | z` (`x`, `y`, `z` -#' would have to exist either as columns in `dat` or in the enviroment +#' would have to exist either as columns in `dat` or in the environment #' `.env`). If multiple expressions are included, they are combined with the #' `&` operator. -#' @param .na Return value to be used when the conditions evalute to `NA`. +#' @param .na Return value to be used when the conditions evaluate to `NA`. #' @param .env An optional environment to look for variables involved in logical #' expression passed in `...`. A data frame or a list can also be passed that #' will be coerced to an environment internally. @@ -262,9 +265,9 @@ eval_conditions <- function(dat, #' #' @param dat A tibble. #' @param ... Conditions to filter the tibble. -#' @return A tibble with an additional class 'cnd_df' and a logical vector +#' @return A tibble with an additional class `cnd_df` and a logical vector #' attribute indicating matching rows. -#' @param .na Return value to be used when the conditions evalute to `NA`. +#' @param .na Return value to be used when the conditions evaluate to `NA`. #' @param .env An optional environment to look for variables involved in logical #' expression passed in `...`. A data frame or a list can also be passed that #' will be coerced to an environment internally. @@ -283,17 +286,23 @@ condition_by <- function(dat, ..., .na = NA, .env = rlang::env()) { new_cnd_df(dat, cnd = cnd, .warn = FALSE) } -#' @keywords internal -derive_by_condition <- function(dat, ...) { +#' @importFrom dplyr mutate +#' @export +mutate.cnd_df <- function(.data, + ..., + .by = NULL, + .keep = c("all", "used", "unused", "none"), + .before = NULL, + .after = NULL) { + + cnd <- get_cnd_df_cnd(.data) + dat <- rm_cnd_df(.data) # avoids recursive S3 method dispatch. - cnd <- get_cnd_df_cnd(dat) derivations <- rlang::enquos(...) derived_vars <- names(derivations) lst <- purrr::map(derivations, ~ rlang::expr(dplyr::if_else({{cnd}}, !!.x, NA))) lst <- rlang::set_names(lst, derived_vars) - dat2 <- dplyr::mutate({{dat}}, !!!lst) - rm_cnd_df(dat2) -} - + dplyr::mutate(dat, !!!lst) +} diff --git a/R/hardcode.R b/R/hardcode.R index 31938689..ffce1bd7 100644 --- a/R/hardcode.R +++ b/R/hardcode.R @@ -63,14 +63,18 @@ sdtm_hardcode <- function(raw_dat, # `der_dat`: derived dataset. der_dat <- raw_dat |> - dplyr::select(c(id_vars, raw_var)) |> - dplyr::mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> # nolint object_name_linter() - dplyr::select(-rlang::sym(raw_var)) + dplyr::select(dplyr::all_of(c(id_vars, raw_var))) |> + mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> # nolint object_name_linter() + dplyr::select(-dplyr::any_of(raw_var)) # If a target dataset is supplied, then join the so far derived dataset with # the target dataset (`tgt_dat`), otherwise leave it be. der_dat <- if (!is.null(tgt_dat)) { + # If variable `tgt_var` exists in `tgt_dat` remove it as we want to + # keep the derived variable in `der_dat`. + tgt_dat <- dplyr::select(tgt_dat, -dplyr::any_of(tgt_var)) + der_dat |> dplyr::right_join(y = tgt_dat, by = id_vars) |> dplyr::relocate(tgt_var, .after = dplyr::last_col()) diff --git a/man/condition_by.Rd b/man/condition_by.Rd index f3d0cf7e..e1b7cb2a 100644 --- a/man/condition_by.Rd +++ b/man/condition_by.Rd @@ -11,14 +11,14 @@ condition_by(dat, ..., .na = NA, .env = rlang::env()) \item{...}{Conditions to filter the tibble.} -\item{.na}{Return value to be used when the conditions evalute to \code{NA}.} +\item{.na}{Return value to be used when the conditions evaluate to \code{NA}.} \item{.env}{An optional environment to look for variables involved in logical expression passed in \code{...}. A data frame or a list can also be passed that will be coerced to an environment internally.} } \value{ -A tibble with an additional class 'cnd_df' and a logical vector +A tibble with an additional class \code{cnd_df} and a logical vector attribute indicating matching rows. } \description{ diff --git a/man/eval_conditions.Rd b/man/eval_conditions.Rd index 3de50b96..ce5286d4 100644 --- a/man/eval_conditions.Rd +++ b/man/eval_conditions.Rd @@ -10,11 +10,11 @@ eval_conditions(dat, ..., .na = NA, .env = rlang::env()) \item{dat}{A data frame} \item{...}{A set of logical conditions, e.g. \verb{y & z, x | z} (\code{x}, \code{y}, \code{z} -would have to exist either as columns in \code{dat} or in the enviroment +would have to exist either as columns in \code{dat} or in the environment \code{.env}). If multiple expressions are included, they are combined with the \code{&} operator.} -\item{.na}{Return value to be used when the conditions evalute to \code{NA}.} +\item{.na}{Return value to be used when the conditions evaluate to \code{NA}.} \item{.env}{An optional environment to look for variables involved in logical expression passed in \code{...}. A data frame or a list can also be passed that diff --git a/man/rm_cnd_df.Rd b/man/rm_cnd_df.Rd index 21f096b6..531c972d 100644 --- a/man/rm_cnd_df.Rd +++ b/man/rm_cnd_df.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/cnd_df.R \name{rm_cnd_df} \alias{rm_cnd_df} -\title{Remove the cnd_df class from a data frame} +\title{Remove the \code{cnd_df} class from a data frame} \usage{ rm_cnd_df(dat) } @@ -10,9 +10,9 @@ rm_cnd_df(dat) \item{dat}{A data frame.} } \value{ -The input \code{dat} without the 'cnd_df' class. +The input \code{dat} without the \code{cnd_df} class. } \description{ -This function removes the 'cnd_df' class, along with its attributes, if +This function removes the \code{cnd_df} class, along with its attributes, if applicable. } diff --git a/man/tbl_sum.cnd_df.Rd b/man/tbl_sum.cnd_df.Rd index 5a0f301f..f8a50544 100644 --- a/man/tbl_sum.cnd_df.Rd +++ b/man/tbl_sum.cnd_df.Rd @@ -7,7 +7,7 @@ \method{tbl_sum}{cnd_df}(x, ...) } \arguments{ -\item{x}{A conditioned tibble of class 'cnd_df'.} +\item{x}{A conditioned tibble of class \code{cnd_df}.} \item{...}{Additional arguments passed to the default print method.} } diff --git a/tests/testthat/test-hardcode.R b/tests/testthat/test-hardcode.R new file mode 100644 index 00000000..29af4b32 --- /dev/null +++ b/tests/testthat/test-hardcode.R @@ -0,0 +1,81 @@ +# `aesos`: example raw data set. +aesos <- tibble::tribble( + ~oak_id, ~raw_source, ~patient_number, ~AESO, ~AESOSP, ~AESEV, ~AESER, ~AETERM, + 1L, "RS1", 101L, 0L, "Pain", "Mild", "No", "Headache", + 2L, "RS1", 102L, 0L, NA, "Severe", "Yes", "Dizziness", + 3L, "RS2", 103L, 1L, NA, "Moderate", "No", NA, + 4L, "RS2", 104L, 1L, NA, "Mild", "No", "Eye issues", + 5L, "RS3", 105L, 1L, "Nausea", "Severe", "Yes", "Food Poisoning" +) + +# `oe_inter`: example target data set. +oe_inter <- tibble::tribble( + ~oak_id, ~raw_source, ~patient_number, + 1L, "RS1", 101L, + 3L, "RS2", 103L, + 4L, "RS2", 104L, + 5L, "RS3", 105L, +) + +test_that("hardcode_no_ct works as expected", { + aesos_cnd <- condition_by(aesos, AESO == 1L & !is.na(AESOSP)) + + result <- hardcode_no_ct( + raw_dat = aesos_cnd, + raw_var = "AESO", + tgt_var = "OEORRES", + tgt_val = "Y", + tgt_dat = oe_inter + ) + + expected_result <- tibble::tribble( + ~oak_id, ~raw_source, ~patient_number, ~OEORRES, + # NA because `aesos_cnd` is conditioned to be FALSE on this record. + 1L, "RS1", 101L, NA_character_, + # NA because `aesos_cnd` is conditioned to be FALSE on this record. + 3L, "RS2", 103L, NA_character_, + # NA because `aesos_cnd` is conditioned to be FALSE on this record. + 4L, "RS2", 104L, NA_character_, + # Successful derivation + 5L, "RS3", 105L, "Y" + ) + + expect_equal(result, expected_result) +}) + +test_that("hardcode_ct works as expected", { + aesos_cnd <- condition_by(aesos, AESO == 1L & is.na(AESOSP)) + ct_spec <- tibble::tibble( + codelist_code = "C117743", + term_code = "C178048", + CodedData = "HYPERMIA", + term_value = "HYPERMIA", + collected_value = "IOISYMPO", + term_synonyms = "IOISYMPO" + ) + + result <- + hardcode_ct( + raw_dat = aesos_cnd, + raw_var = "AETERM", + tgt_var = "OETESTCD", + tgt_val = "IOISYMPO", + ct_spec = ct_spec, + ct_clst = "C117743", + tgt_dat = oe_inter + ) + + expected_result <- tibble::tribble( + ~oak_id, ~raw_source, ~patient_number, ~OETESTCD, + # `NA` because `aesos_cnd` is conditioned to be FALSE for this record. + 1L, "RS1", 101L, NA_character_, + # `NA` because AETERM == NA for this record in `aesos_cnd`. + 3L, "RS2", 103L, NA_character_, + # Successful derivation: IOISYMPO -> HYPERMIA. + 4L, "RS2", 104L, "HYPERMIA", + # `NA` because `aesos_cnd` is conditioned to be FALSE for this record. + 5L, "RS3", 105L, NA_character_ + ) + + expect_equal(result, expected_result) +})