Skip to content

Commit

Permalink
Basic support for conditioned data sets
Browse files Browse the repository at this point in the history
  • Loading branch information
ramiromagno committed May 26, 2024
1 parent 8dfa57f commit d08794b
Show file tree
Hide file tree
Showing 10 changed files with 134 additions and 29 deletions.
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Generated by roxygen2: do not edit by hand

S3method(ctl_new_rowid_pillar,cnd_df)
S3method(mutate,cnd_df)
S3method(print,iso8601)
S3method(tbl_sum,cnd_df)
export(assign_ct)
Expand All @@ -19,6 +21,8 @@ export(problems)
export(read_ct_spec)
export(read_ct_spec_example)
export(rm_cnd_df)
importFrom(dplyr,mutate)
importFrom(pillar,ctl_new_rowid_pillar)
importFrom(pillar,tbl_sum)
importFrom(rlang,"%||%")
importFrom(rlang,":=")
Expand Down
6 changes: 5 additions & 1 deletion R/assign.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,17 @@ sdtm_assign <- function(raw_dat,
der_dat <-
raw_dat |>
dplyr::select(c(id_vars, raw_var)) |>
dplyr::mutate("{tgt_var}" := tgt_val) |> # nolint object_name_linter()
mutate("{tgt_var}" := tgt_val) |> # nolint object_name_linter()
dplyr::select(-rlang::sym(raw_var))

# If a target dataset is supplied, then join the so far derived dataset with
# the target dataset (`tgt_dat`), otherwise leave it be.
der_dat <-
if (!is.null(tgt_dat)) {
# If variable `tgt_var` exists in `tgt_dat` remove it as we want to
# keep the derived variable in `der_dat`.
tgt_dat <- dplyr::select(tgt_dat, -dplyr::any_of(tgt_var))

der_dat |>
dplyr::right_join(y = tgt_dat, by = id_vars) |>
dplyr::relocate(tgt_var, .after = dplyr::last_col())
Expand Down
7 changes: 5 additions & 2 deletions R/assign_datetime.R
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,15 @@ assign_datetime <-

der_dat <-
raw_dat |>
dplyr::select(c(id_vars, raw_var)) |>
dplyr::select(dplyr::all_of(c(id_vars, raw_var))) |>
dplyr::mutate("{tgt_var}" := tgt_val) |> # nolint object_name_linter()
dplyr::select(-raw_var)
dplyr::select(-dplyr::any_of(raw_var))

der_dat <-
if (!is.null(tgt_dat)) {
# If variable `tgt_var` exists in `tgt_dat` remove it as we want to
# keep the derived variable in `der_dat`.
tgt_dat <- dplyr::select(tgt_dat, -dplyr::any_of(tgt_var))
der_dat |>
dplyr::right_join(y = tgt_dat, by = id_vars) |>
dplyr::relocate(tgt_var, .after = dplyr::last_col())
Expand Down
39 changes: 24 additions & 15 deletions R/cnd_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ new_cnd_df <- function(dat, cnd, .warn = TRUE) {
}

if (!is_cnd_df) {
dat <- tibble::as_tibble(dat)
class(dat) <- c("cnd_df", class(dat))
}

Expand Down Expand Up @@ -103,13 +104,13 @@ get_cnd_df_cnd_sum <- function(dat) {
}
}

#' Remove the cnd_df class from a data frame
#' Remove the `cnd_df` class from a data frame
#'
#' This function removes the 'cnd_df' class, along with its attributes, if
#' This function removes the `cnd_df` class, along with its attributes, if
#' applicable.
#'
#' @param dat A data frame.
#' @return The input `dat` without the 'cnd_df' class.
#' @return The input `dat` without the `cnd_df` class.
#'
#' @export
rm_cnd_df <- function(dat) {
Expand All @@ -125,7 +126,7 @@ rm_cnd_df <- function(dat) {
#'
#' Blah
#'
#' @param x A conditioned tibble of class 'cnd_df'.
#' @param x A conditioned tibble of class `cnd_df`.
#' @param ... Additional arguments passed to the default print method.
#'
#' @importFrom pillar tbl_sum
Expand All @@ -142,6 +143,8 @@ lgl_to_chr <- function(x) {
ifelse(is.na(x), "-", ifelse(x, "T", "F"))
}

#' @importFrom pillar ctl_new_rowid_pillar
#' @export
ctl_new_rowid_pillar.cnd_df <- function(controller, x, width, ...) {

out <- NextMethod()
Expand Down Expand Up @@ -188,10 +191,10 @@ ctl_new_rowid_pillar.cnd_df <- function(controller, x, width, ...) {
#'
#' @param dat A data frame
#' @param ... A set of logical conditions, e.g. `y & z, x | z` (`x`, `y`, `z`
#' would have to exist either as columns in `dat` or in the enviroment
#' would have to exist either as columns in `dat` or in the environment
#' `.env`). If multiple expressions are included, they are combined with the
#' `&` operator.
#' @param .na Return value to be used when the conditions evalute to `NA`.
#' @param .na Return value to be used when the conditions evaluate to `NA`.
#' @param .env An optional environment to look for variables involved in logical
#' expression passed in `...`. A data frame or a list can also be passed that
#' will be coerced to an environment internally.
Expand Down Expand Up @@ -262,9 +265,9 @@ eval_conditions <- function(dat,
#'
#' @param dat A tibble.
#' @param ... Conditions to filter the tibble.
#' @return A tibble with an additional class 'cnd_df' and a logical vector
#' @return A tibble with an additional class `cnd_df` and a logical vector
#' attribute indicating matching rows.
#' @param .na Return value to be used when the conditions evalute to `NA`.
#' @param .na Return value to be used when the conditions evaluate to `NA`.
#' @param .env An optional environment to look for variables involved in logical
#' expression passed in `...`. A data frame or a list can also be passed that
#' will be coerced to an environment internally.
Expand All @@ -283,17 +286,23 @@ condition_by <- function(dat, ..., .na = NA, .env = rlang::env()) {
new_cnd_df(dat, cnd = cnd, .warn = FALSE)
}

#' @keywords internal
derive_by_condition <- function(dat, ...) {
#' @importFrom dplyr mutate
#' @export
mutate.cnd_df <- function(.data,
...,
.by = NULL,
.keep = c("all", "used", "unused", "none"),
.before = NULL,
.after = NULL) {

cnd <- get_cnd_df_cnd(.data)
dat <- rm_cnd_df(.data) # avoids recursive S3 method dispatch.

cnd <- get_cnd_df_cnd(dat)
derivations <- rlang::enquos(...)
derived_vars <- names(derivations)

lst <- purrr::map(derivations, ~ rlang::expr(dplyr::if_else({{cnd}}, !!.x, NA)))
lst <- rlang::set_names(lst, derived_vars)
dat2 <- dplyr::mutate({{dat}}, !!!lst)
rm_cnd_df(dat2)
}


dplyr::mutate(dat, !!!lst)
}
10 changes: 7 additions & 3 deletions R/hardcode.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,18 @@ sdtm_hardcode <- function(raw_dat,
# `der_dat`: derived dataset.
der_dat <-
raw_dat |>
dplyr::select(c(id_vars, raw_var)) |>
dplyr::mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> # nolint object_name_linter()
dplyr::select(-rlang::sym(raw_var))
dplyr::select(dplyr::all_of(c(id_vars, raw_var))) |>
mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> # nolint object_name_linter()
dplyr::select(-dplyr::any_of(raw_var))

# If a target dataset is supplied, then join the so far derived dataset with
# the target dataset (`tgt_dat`), otherwise leave it be.
der_dat <-
if (!is.null(tgt_dat)) {
# If variable `tgt_var` exists in `tgt_dat` remove it as we want to
# keep the derived variable in `der_dat`.
tgt_dat <- dplyr::select(tgt_dat, -dplyr::any_of(tgt_var))

der_dat |>
dplyr::right_join(y = tgt_dat, by = id_vars) |>
dplyr::relocate(tgt_var, .after = dplyr::last_col())
Expand Down
4 changes: 2 additions & 2 deletions man/condition_by.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/eval_conditions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/rm_cnd_df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/tbl_sum.cnd_df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

81 changes: 81 additions & 0 deletions tests/testthat/test-hardcode.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# `aesos`: example raw data set.
aesos <- tibble::tribble(
~oak_id, ~raw_source, ~patient_number, ~AESO, ~AESOSP, ~AESEV, ~AESER, ~AETERM,
1L, "RS1", 101L, 0L, "Pain", "Mild", "No", "Headache",
2L, "RS1", 102L, 0L, NA, "Severe", "Yes", "Dizziness",
3L, "RS2", 103L, 1L, NA, "Moderate", "No", NA,
4L, "RS2", 104L, 1L, NA, "Mild", "No", "Eye issues",
5L, "RS3", 105L, 1L, "Nausea", "Severe", "Yes", "Food Poisoning"
)

# `oe_inter`: example target data set.
oe_inter <- tibble::tribble(
~oak_id, ~raw_source, ~patient_number,
1L, "RS1", 101L,
3L, "RS2", 103L,
4L, "RS2", 104L,
5L, "RS3", 105L,
)

test_that("hardcode_no_ct works as expected", {
aesos_cnd <- condition_by(aesos, AESO == 1L & !is.na(AESOSP))

result <- hardcode_no_ct(
raw_dat = aesos_cnd,
raw_var = "AESO",
tgt_var = "OEORRES",
tgt_val = "Y",
tgt_dat = oe_inter
)

expected_result <- tibble::tribble(
~oak_id, ~raw_source, ~patient_number, ~OEORRES,
# NA because `aesos_cnd` is conditioned to be FALSE on this record.
1L, "RS1", 101L, NA_character_,
# NA because `aesos_cnd` is conditioned to be FALSE on this record.
3L, "RS2", 103L, NA_character_,
# NA because `aesos_cnd` is conditioned to be FALSE on this record.
4L, "RS2", 104L, NA_character_,
# Successful derivation
5L, "RS3", 105L, "Y"
)

expect_equal(result, expected_result)
})

test_that("hardcode_ct works as expected", {
aesos_cnd <- condition_by(aesos, AESO == 1L & is.na(AESOSP))
ct_spec <- tibble::tibble(
codelist_code = "C117743",
term_code = "C178048",
CodedData = "HYPERMIA",
term_value = "HYPERMIA",
collected_value = "IOISYMPO",
term_synonyms = "IOISYMPO"
)

result <-
hardcode_ct(
raw_dat = aesos_cnd,
raw_var = "AETERM",
tgt_var = "OETESTCD",
tgt_val = "IOISYMPO",
ct_spec = ct_spec,
ct_clst = "C117743",
tgt_dat = oe_inter
)

expected_result <- tibble::tribble(
~oak_id, ~raw_source, ~patient_number, ~OETESTCD,
# `NA` because `aesos_cnd` is conditioned to be FALSE for this record.
1L, "RS1", 101L, NA_character_,
# `NA` because AETERM == NA for this record in `aesos_cnd`.
3L, "RS2", 103L, NA_character_,
# Successful derivation: IOISYMPO -> HYPERMIA.
4L, "RS2", 104L, "HYPERMIA",
# `NA` because `aesos_cnd` is conditioned to be FALSE for this record.
5L, "RS3", 105L, NA_character_
)

expect_equal(result, expected_result)
})

0 comments on commit d08794b

Please sign in to comment.