Skip to content

Commit

Permalink
if_then_else support via "conditioned" data frames (#55)
Browse files Browse the repository at this point in the history
* Basic support for "conditioned" data frames

- Adds a new S3 class (cnd_df) for represented conditioned data frames, i.e. data frames that carry metadata about what records should be used for derivations

- Adds support for basic pretty printing of cnd_df objects

- Adds a user-facing function for creating such cnd_df objects: `condition_by`

- Adds experimental "mutate"-version function for these conditioned data frames: `derive_by_condition()`

* Basic support for conditioned data sets

* Extensive support for conditioned tibbles

- Joins by raw and target data sets are now aware of conditioned tibbles
- Transformation functions, namely `assign_datetime()`, `hardcode*()` and `assign*` are also conditioned-tibble aware
- Unit test coverage for most cases indicated at #54

I believe the essential components are here to support the if_then_else algorithm via conditioned tibbles. Now, further testing, assertions and documentation is needed.

* Ramm's feedback integration

- Move `tgt_dat` to the first position in the argument list for cleaner command pipes.

- Rename `condition_by()` to `condition_add()`.

- Export `oak_id_vars()` for direct user access.

- Update tidyselections to align with the latest practices.

* Update on conditioned data frames

- Documentation
- Examples
- New article about cnd_df (WIP)

* Styling fixes

* Update linting and styling

* Tidying up

- No need for S3 methods to be exported
- `condition_add()` now links to the appropriate article about conditioned data frames
- Documentation tweaks
- Version bump, NEWS update and pkgdown reference list update

* Last tweaks

- Add example for `condition_add()`
- Re-export S3 methods for `cnd_df`
- Update pkgdown reference list

* Remove blank line

* Tweaks to `%.>%` docs

* Automatic renv profile update.

---------

Authored-by: ramiromagno <[email protected]>
  • Loading branch information
ramiromagno authored Jun 18, 2024
1 parent e1aa479 commit 13644bd
Show file tree
Hide file tree
Showing 55 changed files with 2,314 additions and 760 deletions.
7 changes: 5 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: sdtm.oak
Type: Package
Title: SDTM Data Transformation Engine
Version: 0.0.0.9004
Version: 0.0.0.9005
Authors@R: c(
person("Rammprasad", "Ganapathy", role = c("aut", "cre"),
email = "[email protected]"),
Expand Down Expand Up @@ -46,9 +46,12 @@ Imports:
tibble,
vctrs,
readr,
glue
glue,
pillar
Suggests:
knitr,
lifecycle,
magrittr,
rmarkdown,
spelling,
testthat (>= 3.1.7)
Expand Down
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
# Generated by roxygen2: do not edit by hand

S3method(ctl_new_rowid_pillar,cnd_df)
S3method(print,iso8601)
S3method(tbl_sum,cnd_df)
export("%.>%")
export(assign_ct)
export(assign_datetime)
export(assign_no_ct)
export(clear_cache)
export(condition_add)
export(create_iso8601)
export(ct_map)
export(ct_spec_example)
Expand All @@ -15,11 +19,15 @@ export(domain_example)
export(fmt_cmp)
export(hardcode_ct)
export(hardcode_no_ct)
export(oak_id_vars)
export(problems)
export(read_ct_spec)
export(read_ct_spec_example)
export(read_domain_example)
export(sbj_vars)
importFrom(dplyr,mutate)
importFrom(pillar,ctl_new_rowid_pillar)
importFrom(pillar,tbl_sum)
importFrom(rlang,"%||%")
importFrom(rlang,":=")
importFrom(rlang,.data)
Expand Down
8 changes: 6 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# sdtm.oak 0.0.0.9004 (development version)
# sdtm.oak 0.0.0.9005 (development version)

## New Features
* New function for creating conditioned data frames: `condition_add()`.
* New pipe operator: `%.>%` for explicit dot placeholder placement.
* `oak_id_vars()` is now an exported function.

# sdtm.oak 0.0.0.9004 (development version)

* New function: `derive_seq()` for deriving a sequence number variable.

Expand Down
16 changes: 16 additions & 0 deletions R/assertions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Surprisingly, admiraldev doesn't provide `assert_logical_vector`.
assert_logical_vector <- function(arg, optional = FALSE) {
if (optional && is.null(arg)) {
return(invisible(arg))
}

if (!is.logical(arg)) {
err_msg <- sprintf(
"`arg` must be a logical vector but is %s.",
admiraldev::what_is_it(arg)
)
rlang::abort(err_msg)
}

invisible(arg)
}
67 changes: 28 additions & 39 deletions R/assign.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#' the variables indicated in `id_vars`. This parameter is optional, see
#' section Value for how the output changes depending on this argument value.
#' @param id_vars Key variables to be used in the join between the raw dataset
#' (`raw_dat`) and the target data set (`raw_dat`).
#' (`raw_dat`) and the target data set (`tgt_dat`).
#'
#' @returns The returned data set depends on the value of `tgt_dat`:
#' - If no target dataset is supplied, meaning that `tgt_dat` defaults to
Expand All @@ -35,12 +35,12 @@
#'
#' @importFrom rlang :=
#' @keywords internal
sdtm_assign <- function(raw_dat,
raw_var,
sdtm_assign <- function(tgt_dat = NULL,
tgt_var,
raw_dat,
raw_var,
ct_spec = NULL,
ct_clst = NULL,
tgt_dat = NULL,
id_vars = oak_id_vars()) {
admiraldev::assert_character_scalar(raw_var)
admiraldev::assert_character_scalar(tgt_var)
Expand All @@ -53,29 +53,18 @@ sdtm_assign <- function(raw_dat,
assert_ct_spec(ct_spec, optional = TRUE)
assert_ct_clst(ct_spec = ct_spec, ct_clst = ct_clst, optional = TRUE)

# Recode the raw variable following terminology.
tgt_val <- ct_map(raw_dat[[raw_var]], ct_spec = ct_spec, ct_clst = ct_clst)

# Apply derivation by assigning `raw_var` to `tgt_var`.
# `der_dat`: derived dataset.
der_dat <-
join_dat <-
raw_dat |>
dplyr::select(c(id_vars, raw_var)) |>
dplyr::mutate("{tgt_var}" := tgt_val) |> # nolint object_name_linter()
dplyr::select(-rlang::sym(raw_var))
dplyr::select(dplyr::all_of(c(id_vars, raw_var))) |>
sdtm_join(tgt_dat = tgt_dat, id_vars = id_vars)

# If a target dataset is supplied, then join the so far derived dataset with
# the target dataset (`tgt_dat`), otherwise leave it be.
der_dat <-
if (!is.null(tgt_dat)) {
der_dat |>
dplyr::right_join(y = tgt_dat, by = id_vars) |>
dplyr::relocate(tgt_var, .after = dplyr::last_col())
} else {
der_dat
}
# Recode the raw variable following terminology.
tgt_val <- ct_map(join_dat[[raw_var]], ct_spec = ct_spec, ct_clst = ct_clst)

der_dat
join_dat |>
mutate("{tgt_var}" := tgt_val) |> # nolint object_name_linter()
dplyr::select(-dplyr::any_of(setdiff(raw_var, tgt_var))) |>
dplyr::relocate(dplyr::all_of(tgt_var), .after = dplyr::last_col())
}

#' Derive an SDTM variable
Expand Down Expand Up @@ -127,9 +116,9 @@ sdtm_assign <- function(raw_dat,
#' )
#'
#' assign_no_ct(
#' raw_dat = md1,
#' raw_var = "MDIND",
#' tgt_var = "CMINDC",
#' raw_dat = md1,
#' raw_var = "MDIND"
#' )
#'
#' cm_inter <-
Expand Down Expand Up @@ -175,12 +164,12 @@ sdtm_assign <- function(raw_dat,
#' (ct_spec <- read_ct_spec_example("ct-01-cm"))
#'
#' assign_ct(
#' tgt_dat = cm_inter,
#' tgt_var = "CMINDC",
#' raw_dat = md1,
#' raw_var = "MDIND",
#' tgt_var = "CMINDC",
#' ct_spec = ct_spec,
#' ct_clst = "C66729",
#' tgt_dat = cm_inter
#' ct_clst = "C66729"
#' )
#'
#' @name assign
Expand All @@ -189,10 +178,10 @@ NULL
#' @order 1
#' @export
#' @rdname assign
assign_no_ct <- function(raw_dat,
raw_var,
assign_no_ct <- function(tgt_dat = NULL,
tgt_var,
tgt_dat = NULL,
raw_dat,
raw_var,
id_vars = oak_id_vars()) {
admiraldev::assert_character_scalar(raw_var)
admiraldev::assert_character_scalar(tgt_var)
Expand All @@ -204,23 +193,23 @@ assign_no_ct <- function(raw_dat,
admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE)

sdtm_assign(
tgt_dat = tgt_dat,
tgt_var = tgt_var,
raw_dat = raw_dat,
raw_var = raw_var,
tgt_var = tgt_var,
tgt_dat = tgt_dat,
id_vars = id_vars
)
}

#' @order 2
#' @export
#' @rdname assign
assign_ct <- function(raw_dat,
raw_var,
assign_ct <- function(tgt_dat = NULL,
tgt_var,
raw_dat,
raw_var,
ct_spec,
ct_clst,
tgt_dat = NULL,
id_vars = oak_id_vars()) {
admiraldev::assert_character_scalar(raw_var)
admiraldev::assert_character_scalar(tgt_var)
Expand All @@ -232,10 +221,10 @@ assign_ct <- function(raw_dat,
admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE)

sdtm_assign(
tgt_dat = tgt_dat,
tgt_var = tgt_var,
raw_dat = raw_dat,
raw_var = raw_var,
tgt_var = tgt_var,
tgt_dat = tgt_dat,
id_vars = id_vars,
ct_spec = ct_spec,
ct_clst = ct_clst
Expand Down
47 changes: 20 additions & 27 deletions R/assign_datetime.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@
#' # indicating that these values are missing/unknown (unk).
#' cm1 <-
#' assign_datetime(
#' tgt_var = "CMSTDTC",
#' raw_dat = md1,
#' raw_var = "MDBDR",
#' raw_fmt = "d-m-y",
#' raw_unk = c("UN", "UNK"),
#' tgt_var = "CMSTDTC"
#' raw_unk = c("UN", "UNK")
#' )
#'
#' cm1
Expand Down Expand Up @@ -120,11 +120,11 @@
#' # data set `cm_inter`.
#' cm2 <-
#' assign_datetime(
#' tgt_dat = cm_inter,
#' tgt_var = "CMSTDTC",
#' raw_dat = md1,
#' raw_var = "MDBDR",
#' raw_fmt = "d-m-y",
#' tgt_var = "CMSTDTC",
#' tgt_dat = cm_inter
#' raw_fmt = "d-m-y"
#' )
#'
#' cm2
Expand All @@ -137,11 +137,11 @@
#' # MDETM (correspondence is by positional matching).
#' cm3 <-
#' assign_datetime(
#' tgt_var = "CMSTDTC",
#' raw_dat = md1,
#' raw_var = c("MDEDR", "MDETM"),
#' raw_fmt = c("d-m-y", "H:M:S"),
#' raw_unk = c("UN", "UNK"),
#' tgt_var = "CMSTDTC"
#' raw_unk = c("UN", "UNK")
#' )
#'
#' cm3
Expand All @@ -151,12 +151,12 @@
#'
#' @export
assign_datetime <-
function(raw_dat,
function(tgt_dat = NULL,
tgt_var,
raw_dat,
raw_var,
raw_fmt,
tgt_var,
raw_unk = c("UN", "UNK"),
tgt_dat = NULL,
id_vars = oak_id_vars(),
.warn = TRUE) {
admiraldev::assert_character_vector(raw_var)
Expand All @@ -170,27 +170,20 @@ assign_datetime <-
admiraldev::assert_character_vector(raw_unk)
admiraldev::assert_logical_scalar(.warn)

join_dat <-
raw_dat |>
dplyr::select(dplyr::all_of(c(id_vars, raw_var))) |>
sdtm_join(tgt_dat = tgt_dat, id_vars = id_vars)

tgt_val <-
create_iso8601(!!!raw_dat[raw_var],
create_iso8601(!!!join_dat[raw_var],
.format = raw_fmt,
.na = raw_unk,
.warn = .warn
)

der_dat <-
raw_dat |>
dplyr::select(c(id_vars, raw_var)) |>
dplyr::mutate("{tgt_var}" := tgt_val) |> # nolint object_name_linter()
dplyr::select(-raw_var)

der_dat <-
if (!is.null(tgt_dat)) {
der_dat |>
dplyr::right_join(y = tgt_dat, by = id_vars) |>
dplyr::relocate(tgt_var, .after = dplyr::last_col())
} else {
der_dat
}

der_dat
join_dat |>
mutate("{tgt_var}" := tgt_val) |> # nolint object_name_linter()
dplyr::select(-dplyr::any_of(setdiff(raw_var, tgt_var))) |>
dplyr::relocate(dplyr::all_of(tgt_var), .after = dplyr::last_col())
}
Loading

0 comments on commit 13644bd

Please sign in to comment.