Skip to content

Commit

Permalink
Initial mockup of assign_datetime()
Browse files Browse the repository at this point in the history
  • Loading branch information
ramiromagno committed Apr 2, 2024
1 parent a8f1bf5 commit 92e490c
Show file tree
Hide file tree
Showing 3 changed files with 197 additions and 0 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

S3method(print,iso8601)
export(assign_ct)
export(assign_datetime)
export(assign_no_ct)
export(clear_cache)
export(create_iso8601)
Expand Down
100 changes: 100 additions & 0 deletions R/assign_datetime.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#' Derive an ISO8601 date-time variable
#'
#' [assign_datetime()] maps one or more variables with date/time components in a
#' raw dataset to a target SDTM variable following the ISO8601 format.
#'
#' @param raw_dat The raw dataset (dataframe); must include the
#' variables passed in `id_vars` and `raw_var`.
#' @param raw_var The raw variable(s): a character vector indicating the name(s)
#' of the raw variable(s) in `raw_dat` with date or time components to be
#' parsed into a ISO8601 format variable in `tgt_var`.
#' @param raw_fmt A date/time parsing format. Either a character vector or a
#' list of character vectors. If a character vector is passed then each
#' element is taken as parsing format for each variable indicated in
#' `raw_var`. If a list is provided, then each element must be a character
#' vector of formats. The first vector of formats is used for parsing the
#' first variable in `raw_var`, and so on.
#' @param tgt_var The target SDTM variable: a single string indicating the name
#' of variable to be derived.
#' @param raw_unk A character vector of string literals to be regarded as
#' missing values during parsing.
#' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by
#' the variables indicated in `id_vars`. This parameter is optional, see
#' section Value for how the output changes depending on this argument value.
#' @param id_vars Key variables to be used in the join between the raw dataset
#' (`raw_dat`) and the target data set (`raw_dat`).
#' @param .warn Whether to warn about parsing failures.
#'
#' @returns The returned data set depends on the value of `tgt_dat`:
#' - If no target dataset is supplied, meaning that `tgt_dat` defaults to
#' `NULL`, then the returned data set is `raw_dat`, selected for the variables
#' indicated in `id_vars`, and a new extra column: the derived variable, as
#' indicated in `tgt_var`.
#' - If the target dataset is provided, then it is merged with the raw data set
#' `raw_dat` by the variables indicated in `id_vars`, with a new column: the
#' derived variable, as indicated in `tgt_var`.
#'
#' @examples
#' md1 <-
#' tibble::tribble(
#' ~oak_id, ~raw_source, ~patient_number, ~MDBDR, ~MDEDR, ~MDETM,
#' 1L, "MD1", 375, NA, NA, NA,
#' 2L, "MD1", 375, "15-Sep-20", NA, NA,
#' 3L, "MD1", 376, "17-Feb-21", "17-Feb-21", NA,
#' 4L, "MD1", 377, "4-Oct-20", NA, NA,
#' 5L, "MD1", 377, "20-Jan-20", "20-Jan-20", "10:00:00",
#' 6L, "MD1", 377, "UN-UNK-2019", "UN-UNK-2019", NA,
#' 7L, "MD1", 377, "20-UNK-2019", "20-UNK-2019", NA,
#' 8L, "MD1", 378, "UN-UNK-2020", "UN-UNK-2020", NA,
#' 9L, "MD1", 378, "26-Jan-20", "26-Jan-20", "07:00:00",
#' 10L, "MD1", 378, "28-Jan-20", "1-Feb-20", NA,
#' 11L, "MD1", 378, "12-Feb-20", "18-Feb-20", NA,
#' 12L, "MD1", 379, "10-UNK-2020", "20-UNK-2020", NA,
#' 13L, "MD1", 379, NA, NA, NA,
#' 14L, "MD1", 379, NA, "17-Feb-20", NA
#' )
#'
#' cm <-
#' assign_datetime(
#' raw_dat = md1,
#' raw_var = "MDBDR",
#' raw_fmt = "d-m-y",
#' raw_unk = c("UN", "UNK"),
#' tgt_var = "CMSTDTC"
#' )
#'
#' cm
#' problems(cm$CMSTDTC)
#'
#' @export
assign_datetime <-
function(raw_dat,
raw_var,
raw_fmt,
tgt_var,
raw_unk = c("UN", "UNK"),
tgt_dat = NULL,
id_vars = oak_id_vars(),
.warn = TRUE) {

tgt_val <-
create_iso8601(!!!raw_dat[raw_var], .format = raw_fmt, .na = raw_unk)

der_dat <-
raw_dat |>
dplyr::select(c(id_vars, raw_var)) |>
dplyr::mutate("{tgt_var}" := tgt_val) |>
dplyr::select(-rlang::sym(raw_var))

der_dat <-
if (!is.null(tgt_dat)) {
der_dat |>
dplyr::right_join(y = tgt_dat, by = id_vars) |>
dplyr::relocate(tgt_var, .after = dplyr::last_col())
} else {
der_dat
}

der_dat

}
96 changes: 96 additions & 0 deletions man/assign_datetime.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 92e490c

Please sign in to comment.