From 1c4501f0a4ea5ba22b6355a60cc05c22c5ac2c20 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 8 Feb 2024 02:22:57 +0000 Subject: [PATCH 01/80] First mockup of `hardcode_no_ct()` --- NAMESPACE | 2 ++ R/hardcode_no_ct.R | 71 ++++++++++++++++++++++++++++++++++++++++ inst/WORDLIST | 3 ++ man/hardcode_no_ct.Rd | 76 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 152 insertions(+) create mode 100644 R/hardcode_no_ct.R create mode 100644 man/hardcode_no_ct.Rd diff --git a/NAMESPACE b/NAMESPACE index 7fc88eef..8ccc1503 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,8 @@ S3method(print,iso8601) export(create_iso8601) export(fmt_cmp) +export(hardcode_no_ct) export(problems) +importFrom(rlang,":=") importFrom(rlang,.data) importFrom(tibble,tibble) diff --git a/R/hardcode_no_ct.R b/R/hardcode_no_ct.R new file mode 100644 index 00000000..71e62260 --- /dev/null +++ b/R/hardcode_no_ct.R @@ -0,0 +1,71 @@ +#' Derive an SDTM variable with a hardcoded value +#' +#' [hardcode_no_ct()] maps a hardcoded value to a target SDTM variable that has +#' no terminology restrictions. +#' +#' @param raw_dataset The raw dataset. +#' @param raw_variable The raw variable. +#' @param target_sdtm_variable The target SDTM variable. +#' @param target_hardcoded_value Hardcoded value. +#' @param target_dataset Target dataset. By default the same as `raw_dataset`. +#' @param merge_to_topic_by If `target_dataset` is different than `raw_dataset`, +#' then this parameter defines keys to use in the join between `raw_dataset` +#' and `target_dataset`. +#' +#' @examples +#' MD1 <- +#' tibble::tribble( +#' ~oak_id, ~raw_source, ~patient_number, ~MDRAW, +#' 1L, "MD1", "PATNUM", "BABY ASPIRIN", +#' 2L, "MD1", "PATNUM", "CORTISPORIN", +#' 3L, "MD1", "PATNUM", "ASPIRIN", +#' 4L, "MD1", "PATNUM", "DIPHENHYDRAMINE HCL" +#' ) +#' +#' # Derive a new variable `CMCAT` by overwriting `MDRAW` with the +#' # hardcoded value "GENERAL CONCOMITANT MEDICATIONS". +#' hardcode_no_ct( +#' raw_dataset = MD1, +#' raw_variable = "MDRAW", +#' target_sdtm_variable = "CMCAT", +#' target_hardcoded_value = "GENERAL CONCOMITANT MEDICATIONS" +#' ) +#' +#' CM_INTER <- +#' tibble::tribble( +#' ~oak_id, ~raw_source, ~patient_number, ~CMTRT, ~CMINDC, +#' 1L, "MD1", "PATNUM", "BABY ASPIRIN", NA, +#' 2L, "MD1", "PATNUM", "CORTISPORIN", "NAUSEA", +#' 3L, "MD1", "PATNUM", "ASPIRIN", "ANEMIA", +#' 4L, "MD1", "PATNUM", "DIPHENHYDRAMINE HCL", "NAUSEA" +#' ) +#' +#' # Derive a new variable `CMCAT` by overwriting `MDRAW` with the +#' # hardcoded value "GENERAL CONCOMITANT MEDICATIONS" with a prior join to +#' # `target_dataset`. +#' +#' hardcode_no_ct( +#' raw_dataset = MD1, +#' raw_variable = "MDRAW", +#' target_sdtm_variable = "CMCAT", +#' target_hardcoded_value = "GENERAL CONCOMITANT MEDICATIONS", +#' target_dataset = CM_INTER, +#' merge_to_topic_by = c("oak_id","raw_source","patient_number") +#' ) +#' +#' @importFrom rlang := +#' @export +hardcode_no_ct <- function(raw_dataset, + raw_variable, + target_sdtm_variable, + target_hardcoded_value, + target_dataset = raw_dataset, + merge_to_topic_by = NULL + +) { + + dplyr::left_join(x = raw_dataset, y = target_dataset, by = merge_to_topic_by) |> + dplyr::mutate("{raw_variable}" := target_hardcoded_value) |> + dplyr::rename("{target_sdtm_variable}" := raw_variable) + +} diff --git a/inst/WORDLIST b/inst/WORDLIST index 65b6b4f9..36a406da 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -8,3 +8,6 @@ funder vectorized ORCID iso +hardcoded +CDISC +PMDA diff --git a/man/hardcode_no_ct.Rd b/man/hardcode_no_ct.Rd new file mode 100644 index 00000000..a823244a --- /dev/null +++ b/man/hardcode_no_ct.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/hardcode_no_ct.R +\name{hardcode_no_ct} +\alias{hardcode_no_ct} +\title{Derive an SDTM variable with a hardcoded value} +\usage{ +hardcode_no_ct( + raw_dataset, + raw_variable, + target_sdtm_variable, + target_hardcoded_value, + target_dataset = raw_dataset, + merge_to_topic_by = NULL +) +} +\arguments{ +\item{raw_dataset}{The raw dataset.} + +\item{raw_variable}{The raw variable.} + +\item{target_sdtm_variable}{The target SDTM variable.} + +\item{target_hardcoded_value}{Hardcoded value.} + +\item{target_dataset}{Target dataset. By default the same as \code{raw_dataset}.} + +\item{merge_to_topic_by}{If \code{target_dataset} is different than \code{raw_dataset}, +then this parameter defines keys to use in the join between \code{raw_dataset} +and \code{target_dataset}.} +} +\description{ +\code{\link[=hardcode_no_ct]{hardcode_no_ct()}} maps a hardcoded value to a target SDTM variable that has +no terminology restrictions. +} +\examples{ +MD1 <- + tibble::tribble( + ~oak_id, ~raw_source, ~patient_number, ~MDRAW, + 1L, "MD1", "PATNUM", "BABY ASPIRIN", + 2L, "MD1", "PATNUM", "CORTISPORIN", + 3L, "MD1", "PATNUM", "ASPIRIN", + 4L, "MD1", "PATNUM", "DIPHENHYDRAMINE HCL" + ) + +# Derive a new variable `CMCAT` by overwriting `MDRAW` with the +# hardcoded value "GENERAL CONCOMITANT MEDICATIONS". +hardcode_no_ct( + raw_dataset = MD1, + raw_variable = "MDRAW", + target_sdtm_variable = "CMCAT", + target_hardcoded_value = "GENERAL CONCOMITANT MEDICATIONS" +) + +CM_INTER <- + tibble::tribble( + ~oak_id, ~raw_source, ~patient_number, ~CMTRT, ~CMINDC, + 1L, "MD1", "PATNUM", "BABY ASPIRIN", NA, + 2L, "MD1", "PATNUM", "CORTISPORIN", "NAUSEA", + 3L, "MD1", "PATNUM", "ASPIRIN", "ANEMIA", + 4L, "MD1", "PATNUM", "DIPHENHYDRAMINE HCL", "NAUSEA" + ) + +# Derive a new variable `CMCAT` by overwriting `MDRAW` with the +# hardcoded value "GENERAL CONCOMITANT MEDICATIONS" with a prior join to +# `target_dataset`. + +hardcode_no_ct( + raw_dataset = MD1, + raw_variable = "MDRAW", + target_sdtm_variable = "CMCAT", + target_hardcoded_value = "GENERAL CONCOMITANT MEDICATIONS", + target_dataset = CM_INTER, + merge_to_topic_by = c("oak_id","raw_source","patient_number") + ) + +} From faef0b1a9ac6a3b28d3e32057a9f1bb8e0f177dc Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Sat, 17 Feb 2024 01:03:13 +0000 Subject: [PATCH 02/80] Update `hardcode_no_ct()` Update `hardcode_no_ct()` by allowing the rewriting of the `target_sdtm_variable` variable to preserve `NA` --- R/hardcode_no_ct.R | 30 ++++---- R/recode.R | 148 +++++++++++++++++++++++++++++++++++++++ man/are_to_rewrite.Rd | 32 +++++++++ man/hardcode_no_ct.Rd | 17 ++--- man/index_for_rewrite.Rd | 30 ++++++++ man/overwrite.Rd | 58 +++++++++++++++ man/rewrite.Rd | 48 +++++++++++++ 7 files changed, 339 insertions(+), 24 deletions(-) create mode 100644 R/recode.R create mode 100644 man/are_to_rewrite.Rd create mode 100644 man/index_for_rewrite.Rd create mode 100644 man/overwrite.Rd create mode 100644 man/rewrite.Rd diff --git a/R/hardcode_no_ct.R b/R/hardcode_no_ct.R index 71e62260..47a8f785 100644 --- a/R/hardcode_no_ct.R +++ b/R/hardcode_no_ct.R @@ -16,9 +16,9 @@ #' MD1 <- #' tibble::tribble( #' ~oak_id, ~raw_source, ~patient_number, ~MDRAW, -#' 1L, "MD1", "PATNUM", "BABY ASPIRIN", -#' 2L, "MD1", "PATNUM", "CORTISPORIN", -#' 3L, "MD1", "PATNUM", "ASPIRIN", +#' 1L, "MD1", "PATNUM", "BABY ASPIRIN", +#' 2L, "MD1", "PATNUM", "CORTISPORIN", +#' 3L, "MD1", "PATNUM", NA_character_, #' 4L, "MD1", "PATNUM", "DIPHENHYDRAMINE HCL" #' ) #' @@ -33,12 +33,13 @@ #' #' CM_INTER <- #' tibble::tribble( -#' ~oak_id, ~raw_source, ~patient_number, ~CMTRT, ~CMINDC, +#' ~oak_id, ~raw_source, ~patient_number, ~CMTRT, ~CMINDC, #' 1L, "MD1", "PATNUM", "BABY ASPIRIN", NA, #' 2L, "MD1", "PATNUM", "CORTISPORIN", "NAUSEA", #' 3L, "MD1", "PATNUM", "ASPIRIN", "ANEMIA", -#' 4L, "MD1", "PATNUM", "DIPHENHYDRAMINE HCL", "NAUSEA" -#' ) +#' 4L, "MD1", "PATNUM", "DIPHENHYDRAMINE HCL", "NAUSEA", +#' 5L, "MD1", "PATNUM", "PARACETAMOL", "PYREXIA" +#' ) #' #' # Derive a new variable `CMCAT` by overwriting `MDRAW` with the #' # hardcoded value "GENERAL CONCOMITANT MEDICATIONS" with a prior join to @@ -50,8 +51,8 @@ #' target_sdtm_variable = "CMCAT", #' target_hardcoded_value = "GENERAL CONCOMITANT MEDICATIONS", #' target_dataset = CM_INTER, -#' merge_to_topic_by = c("oak_id","raw_source","patient_number") -#' ) +#' merge_to_topic_by = c("oak_id", "raw_source", "patient_number") +#' ) #' #' @importFrom rlang := #' @export @@ -60,12 +61,9 @@ hardcode_no_ct <- function(raw_dataset, target_sdtm_variable, target_hardcoded_value, target_dataset = raw_dataset, - merge_to_topic_by = NULL - -) { - - dplyr::left_join(x = raw_dataset, y = target_dataset, by = merge_to_topic_by) |> - dplyr::mutate("{raw_variable}" := target_hardcoded_value) |> - dplyr::rename("{target_sdtm_variable}" := raw_variable) - + merge_to_topic_by = NULL) { + dplyr::right_join(x = raw_dataset, y = target_dataset, by = merge_to_topic_by) |> + dplyr::mutate("{raw_variable}" := overwrite(!!rlang::sym(raw_variable), target_hardcoded_value)) |> + dplyr::rename("{target_sdtm_variable}" := raw_variable) |> + dplyr::relocate(target_sdtm_variable, .after = dplyr::last_col()) } diff --git a/R/recode.R b/R/recode.R new file mode 100644 index 00000000..07fe9a09 --- /dev/null +++ b/R/recode.R @@ -0,0 +1,148 @@ +#' Overwrite values +#' +#' @description +#' [overwrite()] recodes values in `x` to a new set of values provided in `to`; +#' the values in `to` are recycled to match the length of `x`. By default, +#' missing values remain `NA`. +#' +#' @param x An atomic vector. +#' @param .na New value for missing values in `x`. Defaults to `NA`. +#' +#' @returns A vector of the same length of `x` with new values matching those +#' in `to`. +#' +#' @examples +#' x <- c(letters[1:4], NA, NA) +#' # Recode all values to `"x"` but keep `NA`. +#' sdtm.oak:::overwrite(x, to = "x") +#' +#' # Recode all values to `"x"` but recode `NA` to a new value. +#' sdtm.oak:::overwrite(x, to = "x", .na = "x") +#' sdtm.oak:::overwrite(x, to = "x", .na = "Absent") +#' +#' # If `to` is not a scalar, it is recycled and matched by position for +#' # replacement. +#' sdtm.oak:::overwrite(x, to = c("x", "y")) +#' +#' # `x` can be of other types besides `character`, e.g. replace integers to a +#' # hard-coded new integer value. +#' sdtm.oak:::overwrite(x = 1:5, to = 0) +#' +#' # Example involving `logical` vectors +#' sdtm.oak:::overwrite(x = c(TRUE, FALSE), to = FALSE) +#' +#' # Returned type will be a type compatible with both the types of `to` and +#' # `.na`. +#' sdtm.oak:::overwrite(x = c("sdtm", "adam"), to = 0) +#' sdtm.oak:::overwrite( +#' x = c("sdtm", "adam"), +#' to = 0, +#' .na = NA_character_ +#' ) +#' sdtm.oak:::overwrite( +#' x = c("sdtm", "adam"), +#' to = TRUE, +#' .na = NA_real_ +#' ) +#' +#' @keywords internal +overwrite <- function(x, to, .na = NA) { + # y <- rep_len(to, length(x)) + y <- rlang::rep_along(x, to) + y[is.na(x)] <- .na + + y +} + +#' Determine Indices for Rewriting +#' +#' [index_for_rewrite()] identifies the positions of elements in `x` that match +#' any of the values specified in the `from` vector. This function is primarily +#' used to facilitate the rewriting of values by pinpointing which elements in +#' `x` correspond to the `from` values and thus need to be replaced or updated. +#' +#' @param x A vector of values in which to search for matches. +#' @param from A vector of values to match against the elements in `x`. +#' @return An integer vector of the same length as `x`, containing the indices +#' of the matched values from the `from` vector. If an element in `x` does not +#' match any value in `from`, the corresponding position in the output will be +#' `NA`. This index information is critical for subsequent rewrite operations. +#' @examples +#' sdtm.oak:::index_for_rewrite(x = 1:5, from = c(2, 4)) +#' +#' @keywords internal +index_for_rewrite <- function(x, from) { + match(x, from) +} + +#' Are values to be rewritten? +#' +#' `are_to_rewrite` is a helper function designed to determine if any values +#' in a vector `x` match the specified `from` values, indicating they are +#' candidates for recoding or rewriting. +#' +#' @param x A vector of values that will be checked against the `from` vector. +#' @param from A vector of values that `x` will be checked for matches against. +#' @return A logical vector of the same length as `x`, where `TRUE` indicates +#' that the corresponding value in `x` matches a value in `from` and +#' should be rewritten, and `FALSE` otherwise. If `x` is empty, returns +#' an empty logical vector. This function is intended for internal use +#' and optimization in data transformation processes. +#' @keywords internal +#' @examples +#' sdtm.oak:::are_to_rewrite(x = 1:5, from = c(2, 4)) +#' +#' sdtm.oak:::are_to_rewrite(letters[1:3], from = c("a", "c")) +#' +#' @keywords internal +are_to_rewrite <- function(x, from) { + # match(x, from, nomatch = 0) != 0 + !is.na(index_for_rewrite(x, from)) +} + +#' Rewrite values +#' +#' [rewrite()] recodes values in `x` by matching elements in `from` onto values +#' in `to`. +#' +#' @param x An atomic vector of values are to be recoded. +#' @param from A vector of values to be matched in `x` for rewriting. +#' @param to A vector of values to be used as replacement for values in `from`. +#' @param .no_match Value to be used as replacement when cases in `from` are not +#' matched. +#' @param .na Value to be used to recode missing values. +#' +#' @returns A vector of recoded values. +#' +#' @examples +#' x <- c("male", "female", "x", NA) +#' sdtm.oak:::rewrite(x, +#' from = c("male", "female"), +#' to = c("M", "F") +#' ) +#' sdtm.oak:::rewrite( +#' x, +#' from = c("male", "female"), +#' to = c("M", "F"), +#' .no_match = "?" +#' ) +#' sdtm.oak:::rewrite( +#' x, +#' from = c("male", "female"), +#' to = c("M", "F"), +#' .na = "missing" +#' ) +#' +#' @keywords internal +rewrite <- function(x, + from, + to, + .no_match = x, + .na = NA) { + to <- rlang::rep_along(x, to) + index <- index_for_rewrite(x, from) + y <- ifelse(!is.na(index), to[index], .no_match) + y[is.na(x)] <- .na + + y +} diff --git a/man/are_to_rewrite.Rd b/man/are_to_rewrite.Rd new file mode 100644 index 00000000..9fc7e753 --- /dev/null +++ b/man/are_to_rewrite.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/recode.R +\name{are_to_rewrite} +\alias{are_to_rewrite} +\title{Are values to be rewritten?} +\usage{ +are_to_rewrite(x, from) +} +\arguments{ +\item{x}{A vector of values that will be checked against the \code{from} vector.} + +\item{from}{A vector of values that \code{x} will be checked for matches against.} +} +\value{ +A logical vector of the same length as \code{x}, where \code{TRUE} indicates +that the corresponding value in \code{x} matches a value in \code{from} and +should be rewritten, and \code{FALSE} otherwise. If \code{x} is empty, returns +an empty logical vector. This function is intended for internal use +and optimization in data transformation processes. +} +\description{ +\code{are_to_rewrite} is a helper function designed to determine if any values +in a vector \code{x} match the specified \code{from} values, indicating they are +candidates for recoding or rewriting. +} +\examples{ +sdtm.oak:::are_to_rewrite(x = 1:5, from = c(2, 4)) + +sdtm.oak:::are_to_rewrite(letters[1:3], from = c("a", "c")) + +} +\keyword{internal} diff --git a/man/hardcode_no_ct.Rd b/man/hardcode_no_ct.Rd index a823244a..6b452d5a 100644 --- a/man/hardcode_no_ct.Rd +++ b/man/hardcode_no_ct.Rd @@ -36,9 +36,9 @@ no terminology restrictions. MD1 <- tibble::tribble( ~oak_id, ~raw_source, ~patient_number, ~MDRAW, - 1L, "MD1", "PATNUM", "BABY ASPIRIN", - 2L, "MD1", "PATNUM", "CORTISPORIN", - 3L, "MD1", "PATNUM", "ASPIRIN", + 1L, "MD1", "PATNUM", "BABY ASPIRIN", + 2L, "MD1", "PATNUM", "CORTISPORIN", + 3L, "MD1", "PATNUM", NA_character_, 4L, "MD1", "PATNUM", "DIPHENHYDRAMINE HCL" ) @@ -53,12 +53,13 @@ hardcode_no_ct( CM_INTER <- tibble::tribble( - ~oak_id, ~raw_source, ~patient_number, ~CMTRT, ~CMINDC, + ~oak_id, ~raw_source, ~patient_number, ~CMTRT, ~CMINDC, 1L, "MD1", "PATNUM", "BABY ASPIRIN", NA, 2L, "MD1", "PATNUM", "CORTISPORIN", "NAUSEA", 3L, "MD1", "PATNUM", "ASPIRIN", "ANEMIA", - 4L, "MD1", "PATNUM", "DIPHENHYDRAMINE HCL", "NAUSEA" - ) + 4L, "MD1", "PATNUM", "DIPHENHYDRAMINE HCL", "NAUSEA", + 5L, "MD1", "PATNUM", "PARACETAMOL", "PYREXIA" + ) # Derive a new variable `CMCAT` by overwriting `MDRAW` with the # hardcoded value "GENERAL CONCOMITANT MEDICATIONS" with a prior join to @@ -70,7 +71,7 @@ hardcode_no_ct( target_sdtm_variable = "CMCAT", target_hardcoded_value = "GENERAL CONCOMITANT MEDICATIONS", target_dataset = CM_INTER, - merge_to_topic_by = c("oak_id","raw_source","patient_number") - ) + merge_to_topic_by = c("oak_id", "raw_source", "patient_number") +) } diff --git a/man/index_for_rewrite.Rd b/man/index_for_rewrite.Rd new file mode 100644 index 00000000..6a729a56 --- /dev/null +++ b/man/index_for_rewrite.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/recode.R +\name{index_for_rewrite} +\alias{index_for_rewrite} +\title{Determine Indices for Rewriting} +\usage{ +index_for_rewrite(x, from) +} +\arguments{ +\item{x}{A vector of values in which to search for matches.} + +\item{from}{A vector of values to match against the elements in \code{x}.} +} +\value{ +An integer vector of the same length as \code{x}, containing the indices +of the matched values from the \code{from} vector. If an element in \code{x} does not +match any value in \code{from}, the corresponding position in the output will be +\code{NA}. This index information is critical for subsequent rewrite operations. +} +\description{ +\code{\link[=index_for_rewrite]{index_for_rewrite()}} identifies the positions of elements in \code{x} that match +any of the values specified in the \code{from} vector. This function is primarily +used to facilitate the rewriting of values by pinpointing which elements in +\code{x} correspond to the \code{from} values and thus need to be replaced or updated. +} +\examples{ +sdtm.oak:::index_for_rewrite(x = 1:5, from = c(2, 4)) + +} +\keyword{internal} diff --git a/man/overwrite.Rd b/man/overwrite.Rd new file mode 100644 index 00000000..6d608bdf --- /dev/null +++ b/man/overwrite.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/recode.R +\name{overwrite} +\alias{overwrite} +\title{Overwrite values} +\usage{ +overwrite(x, to, .na = NA) +} +\arguments{ +\item{x}{An atomic vector.} + +\item{.na}{New value for missing values in \code{x}. Defaults to \code{NA}.} +} +\value{ +A vector of the same length of \code{x} with new values matching those +in \code{to}. +} +\description{ +\code{\link[=overwrite]{overwrite()}} recodes values in \code{x} to a new set of values provided in \code{to}; +the values in \code{to} are recycled to match the length of \code{x}. By default, +missing values remain \code{NA}. +} +\examples{ +x <- c(letters[1:4], NA, NA) +# Recode all values to `"x"` but keep `NA`. +sdtm.oak:::overwrite(x, to = "x") + +# Recode all values to `"x"` but recode `NA` to a new value. +sdtm.oak:::overwrite(x, to = "x", .na = "x") +sdtm.oak:::overwrite(x, to = "x", .na = "Absent") + +# If `to` is not a scalar, it is recycled and matched by position for +# replacement. +sdtm.oak:::overwrite(x, to = c("x", "y")) + +# `x` can be of other types besides `character`, e.g. replace integers to a +# hard-coded new integer value. +sdtm.oak:::overwrite(x = 1:5, to = 0) + +# Example involving `logical` vectors +sdtm.oak:::overwrite(x = c(TRUE, FALSE), to = FALSE) + +# Returned type will be a type compatible with both the types of `to` and +# `.na`. +sdtm.oak:::overwrite(x = c("sdtm", "adam"), to = 0) +sdtm.oak:::overwrite( + x = c("sdtm", "adam"), + to = 0, + .na = NA_character_ +) +sdtm.oak:::overwrite( + x = c("sdtm", "adam"), + to = TRUE, + .na = NA_real_ +) + +} +\keyword{internal} diff --git a/man/rewrite.Rd b/man/rewrite.Rd new file mode 100644 index 00000000..6f25762f --- /dev/null +++ b/man/rewrite.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/recode.R +\name{rewrite} +\alias{rewrite} +\title{Rewrite values} +\usage{ +rewrite(x, from, to, .no_match = x, .na = NA) +} +\arguments{ +\item{x}{An atomic vector of values are to be recoded.} + +\item{from}{A vector of values to be matched in \code{x} for rewriting.} + +\item{to}{A vector of values to be used as replacement for values in \code{from}.} + +\item{.no_match}{Value to be used as replacement when cases in \code{from} are not +matched.} + +\item{.na}{Value to be used to recode missing values.} +} +\value{ +A vector of recoded values. +} +\description{ +\code{\link[=rewrite]{rewrite()}} recodes values in \code{x} by matching elements in \code{from} onto values +in \code{to}. +} +\examples{ +x <- c("male", "female", "x", NA) +sdtm.oak:::rewrite(x, + from = c("male", "female"), + to = c("M", "F") +) +sdtm.oak:::rewrite( + x, + from = c("male", "female"), + to = c("M", "F"), + .no_match = "?" +) +sdtm.oak:::rewrite( + x, + from = c("male", "female"), + to = c("M", "F"), + .na = "missing" +) + +} +\keyword{internal} From fd63b37ad17c15d0232297d452e9d73ef27e14af Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 21 Feb 2024 15:01:45 +0000 Subject: [PATCH 03/80] Align `hardcode_no_ct()` code style with Ramm's expectations --- R/hardcode_no_ct.R | 25 +++++++++++++------------ man/hardcode_no_ct.Rd | 18 +++++++++--------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/R/hardcode_no_ct.R b/R/hardcode_no_ct.R index 47a8f785..c7249da3 100644 --- a/R/hardcode_no_ct.R +++ b/R/hardcode_no_ct.R @@ -16,10 +16,10 @@ #' MD1 <- #' tibble::tribble( #' ~oak_id, ~raw_source, ~patient_number, ~MDRAW, -#' 1L, "MD1", "PATNUM", "BABY ASPIRIN", -#' 2L, "MD1", "PATNUM", "CORTISPORIN", -#' 3L, "MD1", "PATNUM", NA_character_, -#' 4L, "MD1", "PATNUM", "DIPHENHYDRAMINE HCL" +#' 1L, "MD1", 101L, "BABY ASPIRIN", +#' 2L, "MD1", 102L, "CORTISPORIN", +#' 3L, "MD1", 103L, NA_character_, +#' 4L, "MD1", 104L, "DIPHENHYDRAMINE HCL" #' ) #' #' # Derive a new variable `CMCAT` by overwriting `MDRAW` with the @@ -34,11 +34,11 @@ #' CM_INTER <- #' tibble::tribble( #' ~oak_id, ~raw_source, ~patient_number, ~CMTRT, ~CMINDC, -#' 1L, "MD1", "PATNUM", "BABY ASPIRIN", NA, -#' 2L, "MD1", "PATNUM", "CORTISPORIN", "NAUSEA", -#' 3L, "MD1", "PATNUM", "ASPIRIN", "ANEMIA", -#' 4L, "MD1", "PATNUM", "DIPHENHYDRAMINE HCL", "NAUSEA", -#' 5L, "MD1", "PATNUM", "PARACETAMOL", "PYREXIA" +#' 1L, "MD1", 101L, "BABY ASPIRIN", NA, +#' 2L, "MD1", 102L, "CORTISPORIN", "NAUSEA", +#' 3L, "MD1", 103L, "ASPIRIN", "ANEMIA", +#' 4L, "MD1", 104L, "DIPHENHYDRAMINE HCL", "NAUSEA", +#' 5L, "MD1", 105L, "PARACETAMOL", "PYREXIA" #' ) #' #' # Derive a new variable `CMCAT` by overwriting `MDRAW` with the @@ -62,8 +62,9 @@ hardcode_no_ct <- function(raw_dataset, target_hardcoded_value, target_dataset = raw_dataset, merge_to_topic_by = NULL) { - dplyr::right_join(x = raw_dataset, y = target_dataset, by = merge_to_topic_by) |> - dplyr::mutate("{raw_variable}" := overwrite(!!rlang::sym(raw_variable), target_hardcoded_value)) |> - dplyr::rename("{target_sdtm_variable}" := raw_variable) |> + raw_dataset |> + dplyr::mutate("{target_sdtm_variable}" := overwrite(!!rlang::sym(raw_variable), target_hardcoded_value)) |> + dplyr::right_join(y = target_dataset, by = merge_to_topic_by) |> + dplyr::select(-rlang::sym(raw_variable)) |> dplyr::relocate(target_sdtm_variable, .after = dplyr::last_col()) } diff --git a/man/hardcode_no_ct.Rd b/man/hardcode_no_ct.Rd index 6b452d5a..cc37c81e 100644 --- a/man/hardcode_no_ct.Rd +++ b/man/hardcode_no_ct.Rd @@ -36,10 +36,10 @@ no terminology restrictions. MD1 <- tibble::tribble( ~oak_id, ~raw_source, ~patient_number, ~MDRAW, - 1L, "MD1", "PATNUM", "BABY ASPIRIN", - 2L, "MD1", "PATNUM", "CORTISPORIN", - 3L, "MD1", "PATNUM", NA_character_, - 4L, "MD1", "PATNUM", "DIPHENHYDRAMINE HCL" + 1L, "MD1", 101L, "BABY ASPIRIN", + 2L, "MD1", 102L, "CORTISPORIN", + 3L, "MD1", 103L, NA_character_, + 4L, "MD1", 104L, "DIPHENHYDRAMINE HCL" ) # Derive a new variable `CMCAT` by overwriting `MDRAW` with the @@ -54,11 +54,11 @@ hardcode_no_ct( CM_INTER <- tibble::tribble( ~oak_id, ~raw_source, ~patient_number, ~CMTRT, ~CMINDC, - 1L, "MD1", "PATNUM", "BABY ASPIRIN", NA, - 2L, "MD1", "PATNUM", "CORTISPORIN", "NAUSEA", - 3L, "MD1", "PATNUM", "ASPIRIN", "ANEMIA", - 4L, "MD1", "PATNUM", "DIPHENHYDRAMINE HCL", "NAUSEA", - 5L, "MD1", "PATNUM", "PARACETAMOL", "PYREXIA" + 1L, "MD1", 101L, "BABY ASPIRIN", NA, + 2L, "MD1", 102L, "CORTISPORIN", "NAUSEA", + 3L, "MD1", 103L, "ASPIRIN", "ANEMIA", + 4L, "MD1", 104L, "DIPHENHYDRAMINE HCL", "NAUSEA", + 5L, "MD1", 105L, "PARACETAMOL", "PYREXIA" ) # Derive a new variable `CMCAT` by overwriting `MDRAW` with the From 80d39432366b2c1b9196c2b7a3e81152f97c4ff9 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 22 Feb 2024 13:40:58 +0000 Subject: [PATCH 04/80] Add `hardcode_*()` and `assign_*()` functions --- DESCRIPTION | 4 +- NAMESPACE | 6 + R/assign.R | 388 ++++++++++++++++++ R/ct.R | 104 +++++ R/{hardcode_no_ct.R => hardcode.R} | 77 +++- R/recode.R | 107 ++--- R/sdtm.oak-package.R | 1 + R/str_split.R | 13 + inst/WORDLIST | 6 + man/{are_to_rewrite.Rd => are_to_recode.Rd} | 18 +- man/assign.Rd | 351 ++++++++++++++++ man/ct_map.Rd | 40 ++ man/ct_mappings.Rd | 48 +++ man/{hardcode_no_ct.Rd => harcode.Rd} | 32 +- ...dex_for_rewrite.Rd => index_for_recode.Rd} | 16 +- man/overwrite.Rd | 58 --- man/{rewrite.Rd => recode.Rd} | 18 +- 17 files changed, 1109 insertions(+), 178 deletions(-) create mode 100644 R/assign.R create mode 100644 R/ct.R rename R/{hardcode_no_ct.R => hardcode.R} (51%) create mode 100644 R/str_split.R rename man/{are_to_rewrite.Rd => are_to_recode.Rd} (62%) create mode 100644 man/assign.Rd create mode 100644 man/ct_map.Rd create mode 100644 man/ct_mappings.Rd rename man/{hardcode_no_ct.Rd => harcode.Rd} (70%) rename man/{index_for_rewrite.Rd => index_for_recode.Rd} (61%) delete mode 100644 man/overwrite.Rd rename man/{rewrite.Rd => recode.Rd} (67%) diff --git a/DESCRIPTION b/DESCRIPTION index acb15ebf..e748ceb0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -41,7 +41,9 @@ Imports: purrr (>= 0.3.3), rlang (>= 0.4.4), stringr (>= 1.4.0), - tibble + tibble, + tidyr, + vctrs Suggests: knitr, rmarkdown, diff --git a/NAMESPACE b/NAMESPACE index 8ccc1503..708540e0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,10 +1,16 @@ # Generated by roxygen2: do not edit by hand S3method(print,iso8601) +export(assign_ct) +export(assign_no_ct) export(create_iso8601) +export(ct_map) export(fmt_cmp) +export(hardcode_ct) export(hardcode_no_ct) export(problems) +importFrom(rlang,"%||%") importFrom(rlang,":=") importFrom(rlang,.data) +importFrom(stats,na.omit) importFrom(tibble,tibble) diff --git a/R/assign.R b/R/assign.R new file mode 100644 index 00000000..bdaf46df --- /dev/null +++ b/R/assign.R @@ -0,0 +1,388 @@ +#' @importFrom rlang := +sdtm_assign <- function(raw_dat, + raw_var, + tgt_var, + tgt_dat = raw_dat, + by = NULL, + ct = NULL, + cl = NULL) { + + # TODO: Assertions. + + raw_dat |> + dplyr::right_join(y = tgt_dat, by = by) |> + dplyr::mutate("{tgt_var}" := ct_map(!!rlang::sym(raw_var), ct = ct, cl = cl)) |> + dplyr::select(-rlang::sym(raw_var)) |> + dplyr::relocate(tgt_var, .after = dplyr::last_col()) + +} + +#' Derive an SDTM variable +#' +#' @description +#' - [assign_no_ct()] maps a variable in a source dataset to a target SDTM +#' variable that has no terminology restrictions. +#' +#' - [assign_ct()] maps a variable in a source dataset to a target SDTM variable +#' following controlled terminology recoding. +#' +#' @param raw_dataset The raw dataset. +#' @param raw_variable The raw variable. +#' @param target_sdtm_variable The target SDTM variable. +#' @param target_dataset Target dataset. By default the same as `raw_dataset`. +#' @param merge_to_topic_by If `target_dataset` is different than `raw_dataset`, +#' then this parameter defines keys to use in the join between `raw_dataset` +#' and `target_dataset`. +#' @param study_ct Study controlled terminology specification. +#' @param target_sdtm_variable_codelist_code A codelist code indicating which +#' subset of the controlled terminology to apply in the derivation. +#' +#' @returns The target dataset with the derived variable `target_sdtm_variable`. +#' +#' @examples +#' study_ct <- +#' tibble::tibble( +#' codelist_code = rep("C66729", 8L), +#' term_code = c( +#' "C28161", +#' "C38210", +#' "C38222", +#' "C38223", +#' "C38287", +#' "C38288", +#' "C38305", +#' "C38311" +#' ), +#' CodedData = c( +#' "INTRAMUSCULAR", +#' "EPIDURAL", +#' "INTRA-ARTERIAL", +#' "INTRA-ARTICULAR", +#' "OPHTHALMIC", +#' "ORAL", +#' "TRANSDERMAL", +#' "UNKNOWN" +#' ), +#' term_value = CodedData, +#' collected_value = c( +#' "IM (Intramuscular)", +#' "EP (Epidural)", +#' "IA (Intra-arterial)", +#' "IJ (Intra-articular)", +#' "OP (Ophthalmic)", +#' "PO (Oral)", +#' "DE (Transdermal)", +#' "Unknown" +#' ), +#' term_preferred_term = c( +#' "Intramuscular Route of Administration", +#' "Epidural Route of Administration", +#' "Intraarterial Route of Administration", +#' "Intraarticular Route of Administration", +#' "Ophthalmic Route of Administration", +#' "Oral Route of Administration", +#' "Transdermal Route of Administration", +#' "Unknown Route of Administration" +#' ), +#' term_synonyms = c(rep(NA, 5L), "Intraoral Route of Administration; PO", NA, NA), +#' raw_codelist = rep("ROUTE_CV1", 8L) +#' ) +#' +#' md1 <- +#' tibble::tibble( +#' oak_id = 1:14, +#' raw_source = "MD1", +#' PATIENT_NUM = 101:114, +#' MDRTE = c( +#' "PO (Oral)", +#' "PO (Oral)", +#' NA_character_, +#' "PO", +#' "Intraoral Route of Administration", +#' "PO (Oral)", +#' "IM (Intramuscular)", +#' "IA (Intra-arterial)", +#' "", +#' "Non-standard", +#' "random_value", +#' "IJ (Intra-articular)", +#' "TRANSDERMAL", +#' "OPHTHALMIC" +#' ) +#' ) +#' +#' assign_ct( +#' raw_dataset = md1, +#' raw_variable = "MDRTE", +#' study_ct = study_ct, +#' target_sdtm_variable = "CMROUTE", +#' target_sdtm_variable_codelist_code = "C66729" +#' ) +#' +#' cm_inter <- +#' tibble::tibble( +#' oak_id = 1:14, +#' raw_source = "MD1", +#' PATIENT_NUM = 101:114, +#' CMTRT = c( +#' "BABY ASPIRIN", +#' "CORTISPORIN", +#' "ASPIRIN", +#' "DIPHENHYDRAMINE HCL", +#' "PARCETEMOL", +#' "VOMIKIND", +#' "ZENFLOX OZ", +#' "AMITRYPTYLINE", +#' "BENADRYL", +#' "DIPHENHYDRAMINE HYDROCHLORIDE", +#' "TETRACYCLINE", +#' "BENADRYL", +#' "SOMINEX", +#' "ZQUILL" +#' ), +#' CMINDC = c( +#' NA, +#' "NAUSEA", +#' "ANEMIA", +#' "NAUSEA", +#' "PYREXIA", +#' "VOMITINGS", +#' "DIARHHEA", +#' "COLD", +#' "FEVER", +#' "LEG PAIN", +#' "FEVER", +#' "COLD", +#' "COLD", +#' "PAIN" +#' ), +#' CMROUTE = c( +#' "ORAL", +#' "ORAL", +#' NA, +#' "ORAL", +#' "ORAL", +#' "ORAL", +#' "INTRAMUSCULAR", +#' "INTRA-ARTERIAL", +#' NA, +#' "NON-STANDARD", +#' "RANDOM_VALUE", +#' "INTRA-ARTICULAR", +#' "TRANSDERMAL", +#' "OPHTHALMIC" +#' ) +#' ) +#' +#' assign_ct( +#' raw_dataset = md1, +#' raw_variable = "MDRTE", +#' study_ct = study_ct, +#' target_sdtm_variable = "CMROUTE", +#' target_sdtm_variable_codelist_code = "C66729", +#' target_dataset = cm_inter, +#' merge_to_topic_by = c("oak_id","raw_source","PATIENT_NUM") +#' ) +#' +#' @name assign +NULL + +#' @export +#' @rdname assign +assign_no_ct <- function(raw_dataset, + raw_variable, + target_sdtm_variable, + target_dataset = raw_dataset, + merge_to_topic_by = NULL) { + sdtm_assign( + raw_dat = raw_dataset, + raw_var = raw_variable, + tgt_var = target_sdtm_variable, + tgt_dat = target_dataset, + by = merge_to_topic_by + ) +} + +#' Derive an SDTM variable with controlled terminology +#' +#' [assign_ct()] maps a variable in a source dataset to a target SDTM variable +#' following controlled terminology recoding. +#' +#' @param raw_dataset The raw dataset. +#' @param raw_variable The raw variable. +#' @param target_sdtm_variable The target SDTM variable. +#' @param target_dataset Target dataset. By default the same as `raw_dataset`. +#' @param merge_to_topic_by If `target_dataset` is different than `raw_dataset`, +#' then this parameter defines keys to use in the join between `raw_dataset` +#' and `target_dataset`. +#' @param study_ct Study controlled terminology specification. +#' @param target_sdtm_variable_codelist_code A codelist code indicating which +#' subset of the controlled terminology to apply in the derivation. +#' +#' @returns The target dataset with the derived variable `target_sdtm_variable`. +#' +#' @examples +#' study_ct <- +#' tibble::tibble( +#' codelist_code = rep("C66729", 8L), +#' term_code = c( +#' "C28161", +#' "C38210", +#' "C38222", +#' "C38223", +#' "C38287", +#' "C38288", +#' "C38305", +#' "C38311" +#' ), +#' CodedData = c( +#' "INTRAMUSCULAR", +#' "EPIDURAL", +#' "INTRA-ARTERIAL", +#' "INTRA-ARTICULAR", +#' "OPHTHALMIC", +#' "ORAL", +#' "TRANSDERMAL", +#' "UNKNOWN" +#' ), +#' term_value = CodedData, +#' collected_value = c( +#' "IM (Intramuscular)", +#' "EP (Epidural)", +#' "IA (Intra-arterial)", +#' "IJ (Intra-articular)", +#' "OP (Ophthalmic)", +#' "PO (Oral)", +#' "DE (Transdermal)", +#' "Unknown" +#' ), +#' term_preferred_term = c( +#' "Intramuscular Route of Administration", +#' "Epidural Route of Administration", +#' "Intraarterial Route of Administration", +#' "Intraarticular Route of Administration", +#' "Ophthalmic Route of Administration", +#' "Oral Route of Administration", +#' "Transdermal Route of Administration", +#' "Unknown Route of Administration" +#' ), +#' term_synonyms = c(rep(NA, 5L), "Intraoral Route of Administration; PO", NA, NA), +#' raw_codelist = rep("ROUTE_CV1", 8L) +#' ) +#' +#' md1 <- +#' tibble::tibble( +#' oak_id = 1:14, +#' raw_source = "MD1", +#' PATIENT_NUM = 101:114, +#' MDRTE = c( +#' "PO (Oral)", +#' "PO (Oral)", +#' NA_character_, +#' "PO", +#' "Intraoral Route of Administration", +#' "PO (Oral)", +#' "IM (Intramuscular)", +#' "IA (Intra-arterial)", +#' "", +#' "Non-standard", +#' "random_value", +#' "IJ (Intra-articular)", +#' "TRANSDERMAL", +#' "OPHTHALMIC" +#' ) +#' ) +#' +#' assign_ct( +#' raw_dataset = md1, +#' raw_variable = "MDRTE", +#' study_ct = study_ct, +#' target_sdtm_variable = "CMROUTE", +#' target_sdtm_variable_codelist_code = "C66729" +#' ) +#' +#' cm_inter <- +#' tibble::tibble( +#' oak_id = 1:14, +#' raw_source = "MD1", +#' PATIENT_NUM = 101:114, +#' CMTRT = c( +#' "BABY ASPIRIN", +#' "CORTISPORIN", +#' "ASPIRIN", +#' "DIPHENHYDRAMINE HCL", +#' "PARCETEMOL", +#' "VOMIKIND", +#' "ZENFLOX OZ", +#' "AMITRYPTYLINE", +#' "BENADRYL", +#' "DIPHENHYDRAMINE HYDROCHLORIDE", +#' "TETRACYCLINE", +#' "BENADRYL", +#' "SOMINEX", +#' "ZQUILL" +#' ), +#' CMINDC = c( +#' NA, +#' "NAUSEA", +#' "ANEMIA", +#' "NAUSEA", +#' "PYREXIA", +#' "VOMITINGS", +#' "DIARHHEA", +#' "COLD", +#' "FEVER", +#' "LEG PAIN", +#' "FEVER", +#' "COLD", +#' "COLD", +#' "PAIN" +#' ), +#' CMROUTE = c( +#' "ORAL", +#' "ORAL", +#' NA, +#' "ORAL", +#' "ORAL", +#' "ORAL", +#' "INTRAMUSCULAR", +#' "INTRA-ARTERIAL", +#' NA, +#' "NON-STANDARD", +#' "RANDOM_VALUE", +#' "INTRA-ARTICULAR", +#' "TRANSDERMAL", +#' "OPHTHALMIC" +#' ) +#' ) +#' +#' assign_ct( +#' raw_dataset = md1, +#' raw_variable = "MDRTE", +#' study_ct = study_ct, +#' target_sdtm_variable = "CMROUTE", +#' target_sdtm_variable_codelist_code = "C66729", +#' target_dataset = cm_inter, +#' merge_to_topic_by = c("oak_id","raw_source","PATIENT_NUM") +#' ) +#' +#' +#' @export +#' @rdname assign +assign_ct <- function(raw_dataset, + raw_variable, + target_sdtm_variable, + target_dataset = raw_dataset, + merge_to_topic_by = NULL, + study_ct = NULL, + target_sdtm_variable_codelist_code = NULL) { + sdtm_assign( + raw_dat = raw_dataset, + raw_var = raw_variable, + tgt_var = target_sdtm_variable, + tgt_dat = target_dataset, + by = merge_to_topic_by, + ct = study_ct, + cl = target_sdtm_variable_codelist_code + ) +} diff --git a/R/ct.R b/R/ct.R new file mode 100644 index 00000000..714460b7 --- /dev/null +++ b/R/ct.R @@ -0,0 +1,104 @@ +#' Controlled terminology mappings +#' +#' @description +#' [ct_mappings()] takes a controlled terminology specification and returns the +#' mappings in the form of a [tibble][tibble::tibble-package] in long format, +#' i.e. the recoding of values in the `from` column to the `to` column values, +#' one mapping per row. +#' +#' The resulting mappings are unique, i.e. if `from` values are duplicated in +#' two `from` columns, the first column indicated in `from` takes precedence, +#' and only that mapping is retained in the controlled terminology map. +#' +#' @param ct Controlled terminology specification as a +#' [tibble][tibble::tibble-package]. Each row is for a mapped controlled term. +#' Controlled terms are expected in the column indicated by `to_col`. +#' @param from A character vector of column names indicating the variables +#' containing values to be recoded. +#' @param to A single string indicating the column whose values are to be +#' recoded into. +#' +#' @returns A [tibble][tibble::tibble-package] with two columns, `from` and +#' `to`, indicating the mapping of values, one per row. +#' +#' @examples +#' # example code +#' +#' +#' +#' +#' +#' +#' +#' @importFrom rlang .data +#' @keywords internal +ct_mappings <- function(ct, from = c("collected_value", "term_synonyms"), to = "term_value") { + + # TODO: Assertions and memoisation. + + cols <- c(to, from) + + ct_mappings <- + ct |> + dplyr::mutate(to = !!rlang::sym(to)) |> + tidyr::pivot_longer(cols = dplyr::all_of(cols), + values_to = "from", + names_to = "type") |> + dplyr::select(c("type", "from", "to")) |> + dplyr::mutate(type = factor(.data$type, levels = cols)) |> + dplyr::arrange(.data$type) |> + dplyr::select(-"type") |> + tidyr::drop_na(.data$from) |> + dplyr::mutate(from = str_split(.data$from)) |> + tidyr::unnest(from) |> + dplyr::filter(from != "") |> # In case the split resulted in empty strings. + dplyr::mutate(from = trimws(.data$from), to = trimws(.data$to)) |> + dplyr::distinct(.data$from, .keep_all = TRUE) + + ct_mappings +} + +#' Recode according to controlled terminology +#' +#' [ct_map()] recodes a vector following a controlled terminology. +#' +#' @param x A character vector of terms to be recoded following a controlled +#' terminology. +#' @param ct A [tibble][tibble::tibble-package] providing a controlled +#' terminology specification. +#' @param cl A character vector indicating a set of possible controlled +#' terminology code-lists codes to be used for recoding. By default (`NULL`) +#' all code-lists available in `ct` are used. +#' @param from A character vector of column names indicating the variables +#' containing values to be matched against for terminology recoding. +#' @param to A single string indicating the column whose values are to be +#' recoded into. +#' +#' @returns A character vector of terminology recoded values from `x`. If no +#' match is found in the controlled terminology spec provided in `ct`, then +#' `x` values are returned in uppercase. If `ct` is not provided `x` is +#' returned unchanged. +#' +#' @importFrom rlang %||% .data +#' @export +ct_map <- + function(x, + ct = NULL, + cl = NULL, + from = c("collected_value", "term_synonyms"), + to = "term_value") { + + ct %||% return(x) + + cl <- cl %||% unique(ct$codelist_code) + ct <- dplyr::filter(ct, .data$codelist_code %in% cl) + + mappings <- ct_mappings(ct, from = from, to = to) + recode( + x, + from = mappings$from, + to = mappings$to, + .no_match = toupper(x) + ) + + } diff --git a/R/hardcode_no_ct.R b/R/hardcode.R similarity index 51% rename from R/hardcode_no_ct.R rename to R/hardcode.R index c7249da3..f5720a6e 100644 --- a/R/hardcode_no_ct.R +++ b/R/hardcode.R @@ -1,8 +1,36 @@ +#' @importFrom rlang := +#' @keywords internal +sdtm_hardcode <- function(raw_dat, + raw_var, + tgt_var, + tgt_val, + tgt_dat = raw_dat, + by = NULL, + ct = NULL, + cl = NULL) { + + # TODO: Assertions. + + tgt_val <- ct_map(tgt_val, ct = ct, cl = cl) + + raw_dat |> + dplyr::right_join(y = tgt_dat, by = by) |> + dplyr::mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> + dplyr::select(-rlang::sym(raw_var)) |> + dplyr::relocate(tgt_var, .after = dplyr::last_col()) + +} + #' Derive an SDTM variable with a hardcoded value #' -#' [hardcode_no_ct()] maps a hardcoded value to a target SDTM variable that has +#' +#' @description +#' - [hardcode_no_ct()] maps a hardcoded value to a target SDTM variable that has #' no terminology restrictions. #' +#' - [hardcode_ct()] maps a hardcoded value to a target SDTM variable with +#' controlled terminology recoding. +#' #' @param raw_dataset The raw dataset. #' @param raw_variable The raw variable. #' @param target_sdtm_variable The target SDTM variable. @@ -11,6 +39,11 @@ #' @param merge_to_topic_by If `target_dataset` is different than `raw_dataset`, #' then this parameter defines keys to use in the join between `raw_dataset` #' and `target_dataset`. +#' @param study_ct Study controlled terminology specification. +#' @param target_sdtm_variable_codelist_code A codelist code indicating which +#' subset of the controlled terminology to apply in the derivation. +#' +#' @returns The target dataset with the derived variable `target_sdtm_variable`. #' #' @examples #' MD1 <- @@ -44,7 +77,6 @@ #' # Derive a new variable `CMCAT` by overwriting `MDRAW` with the #' # hardcoded value "GENERAL CONCOMITANT MEDICATIONS" with a prior join to #' # `target_dataset`. -#' #' hardcode_no_ct( #' raw_dataset = MD1, #' raw_variable = "MDRAW", @@ -54,17 +86,46 @@ #' merge_to_topic_by = c("oak_id", "raw_source", "patient_number") #' ) #' -#' @importFrom rlang := +#' @name harcode +NULL + #' @export +#' @rdname harcode hardcode_no_ct <- function(raw_dataset, raw_variable, target_sdtm_variable, target_hardcoded_value, target_dataset = raw_dataset, merge_to_topic_by = NULL) { - raw_dataset |> - dplyr::mutate("{target_sdtm_variable}" := overwrite(!!rlang::sym(raw_variable), target_hardcoded_value)) |> - dplyr::right_join(y = target_dataset, by = merge_to_topic_by) |> - dplyr::select(-rlang::sym(raw_variable)) |> - dplyr::relocate(target_sdtm_variable, .after = dplyr::last_col()) + sdtm_hardcode( + raw_dat = raw_dataset, + raw_var = raw_variable, + tgt_var = target_sdtm_variable, + tgt_val = target_hardcoded_value, + tgt_dat = target_dataset, + by = merge_to_topic_by + ) +} + +#' @export +#' @rdname harcode +hardcode_ct <- function(raw_dataset, + raw_variable, + target_sdtm_variable, + target_hardcoded_value, + target_dataset = raw_dataset, + merge_to_topic_by = NULL, + study_ct = NULL, + target_sdtm_variable_codelist_code = NULL) { + sdtm_hardcode( + raw_dat = raw_dataset, + raw_var = raw_variable, + tgt_var = target_sdtm_variable, + tgt_val = target_hardcoded_value, + tgt_dat = target_dataset, + by = merge_to_topic_by, + ct = study_ct, + cl = target_sdtm_variable_codelist_code + ) } + diff --git a/R/recode.R b/R/recode.R index 07fe9a09..a645bcb2 100644 --- a/R/recode.R +++ b/R/recode.R @@ -1,64 +1,8 @@ -#' Overwrite values +#' Determine Indices for Recoding #' -#' @description -#' [overwrite()] recodes values in `x` to a new set of values provided in `to`; -#' the values in `to` are recycled to match the length of `x`. By default, -#' missing values remain `NA`. -#' -#' @param x An atomic vector. -#' @param .na New value for missing values in `x`. Defaults to `NA`. -#' -#' @returns A vector of the same length of `x` with new values matching those -#' in `to`. -#' -#' @examples -#' x <- c(letters[1:4], NA, NA) -#' # Recode all values to `"x"` but keep `NA`. -#' sdtm.oak:::overwrite(x, to = "x") -#' -#' # Recode all values to `"x"` but recode `NA` to a new value. -#' sdtm.oak:::overwrite(x, to = "x", .na = "x") -#' sdtm.oak:::overwrite(x, to = "x", .na = "Absent") -#' -#' # If `to` is not a scalar, it is recycled and matched by position for -#' # replacement. -#' sdtm.oak:::overwrite(x, to = c("x", "y")) -#' -#' # `x` can be of other types besides `character`, e.g. replace integers to a -#' # hard-coded new integer value. -#' sdtm.oak:::overwrite(x = 1:5, to = 0) -#' -#' # Example involving `logical` vectors -#' sdtm.oak:::overwrite(x = c(TRUE, FALSE), to = FALSE) -#' -#' # Returned type will be a type compatible with both the types of `to` and -#' # `.na`. -#' sdtm.oak:::overwrite(x = c("sdtm", "adam"), to = 0) -#' sdtm.oak:::overwrite( -#' x = c("sdtm", "adam"), -#' to = 0, -#' .na = NA_character_ -#' ) -#' sdtm.oak:::overwrite( -#' x = c("sdtm", "adam"), -#' to = TRUE, -#' .na = NA_real_ -#' ) -#' -#' @keywords internal -overwrite <- function(x, to, .na = NA) { - # y <- rep_len(to, length(x)) - y <- rlang::rep_along(x, to) - y[is.na(x)] <- .na - - y -} - -#' Determine Indices for Rewriting -#' -#' [index_for_rewrite()] identifies the positions of elements in `x` that match +#' [index_for_recode()] identifies the positions of elements in `x` that match #' any of the values specified in the `from` vector. This function is primarily -#' used to facilitate the rewriting of values by pinpointing which elements in +#' used to facilitate the recoding of values by pinpointing which elements in #' `x` correspond to the `from` values and thus need to be replaced or updated. #' #' @param x A vector of values in which to search for matches. @@ -66,47 +10,47 @@ overwrite <- function(x, to, .na = NA) { #' @return An integer vector of the same length as `x`, containing the indices #' of the matched values from the `from` vector. If an element in `x` does not #' match any value in `from`, the corresponding position in the output will be -#' `NA`. This index information is critical for subsequent rewrite operations. +#' `NA`. This index information is critical for subsequent recoding operations. #' @examples -#' sdtm.oak:::index_for_rewrite(x = 1:5, from = c(2, 4)) +#' sdtm.oak:::index_for_recode(x = 1:5, from = c(2, 4)) #' #' @keywords internal -index_for_rewrite <- function(x, from) { +index_for_recode <- function(x, from) { match(x, from) } -#' Are values to be rewritten? +#' Are values to be recoded? #' -#' `are_to_rewrite` is a helper function designed to determine if any values +#' `are_to_recode` is a helper function designed to determine if any values #' in a vector `x` match the specified `from` values, indicating they are -#' candidates for recoding or rewriting. +#' candidates for recoding. #' #' @param x A vector of values that will be checked against the `from` vector. #' @param from A vector of values that `x` will be checked for matches against. #' @return A logical vector of the same length as `x`, where `TRUE` indicates #' that the corresponding value in `x` matches a value in `from` and -#' should be rewritten, and `FALSE` otherwise. If `x` is empty, returns +#' should be recoded, and `FALSE` otherwise. If `x` is empty, returns #' an empty logical vector. This function is intended for internal use #' and optimization in data transformation processes. #' @keywords internal #' @examples -#' sdtm.oak:::are_to_rewrite(x = 1:5, from = c(2, 4)) +#' sdtm.oak:::are_to_recode(x = 1:5, from = c(2, 4)) #' -#' sdtm.oak:::are_to_rewrite(letters[1:3], from = c("a", "c")) +#' sdtm.oak:::are_to_recode(letters[1:3], from = c("a", "c")) #' #' @keywords internal -are_to_rewrite <- function(x, from) { +are_to_recode <- function(x, from) { # match(x, from, nomatch = 0) != 0 - !is.na(index_for_rewrite(x, from)) + !is.na(index_for_recode(x, from)) } -#' Rewrite values +#' Recode values #' -#' [rewrite()] recodes values in `x` by matching elements in `from` onto values +#' [recode()] recodes values in `x` by matching elements in `from` onto values #' in `to`. #' #' @param x An atomic vector of values are to be recoded. -#' @param from A vector of values to be matched in `x` for rewriting. +#' @param from A vector of values to be matched in `x` for recoded. #' @param to A vector of values to be used as replacement for values in `from`. #' @param .no_match Value to be used as replacement when cases in `from` are not #' matched. @@ -116,17 +60,17 @@ are_to_rewrite <- function(x, from) { #' #' @examples #' x <- c("male", "female", "x", NA) -#' sdtm.oak:::rewrite(x, +#' sdtm.oak:::recode(x, #' from = c("male", "female"), #' to = c("M", "F") #' ) -#' sdtm.oak:::rewrite( +#' sdtm.oak:::recode( #' x, #' from = c("male", "female"), #' to = c("M", "F"), #' .no_match = "?" #' ) -#' sdtm.oak:::rewrite( +#' sdtm.oak:::recode( #' x, #' from = c("male", "female"), #' to = c("M", "F"), @@ -134,13 +78,14 @@ are_to_rewrite <- function(x, from) { #' ) #' #' @keywords internal -rewrite <- function(x, - from, - to, +recode <- function(x, + from = unique(na.omit(x)), + to = from, .no_match = x, .na = NA) { - to <- rlang::rep_along(x, to) - index <- index_for_rewrite(x, from) + # to <- rlang::rep_along(x, to) + to <- vctrs::vec_recycle(to, length(from)) + index <- index_for_recode(x, from) y <- ifelse(!is.na(index), to[index], .no_match) y[is.na(x)] <- .na diff --git a/R/sdtm.oak-package.R b/R/sdtm.oak-package.R index b1a48e6f..0ba23dc1 100644 --- a/R/sdtm.oak-package.R +++ b/R/sdtm.oak-package.R @@ -4,5 +4,6 @@ ## usethis namespace: start #' @importFrom tibble tibble #' @importFrom rlang .data +#' @importFrom stats na.omit ## usethis namespace: end NULL diff --git a/R/str_split.R b/R/str_split.R new file mode 100644 index 00000000..7b5b1125 --- /dev/null +++ b/R/str_split.R @@ -0,0 +1,13 @@ +str_split_ <- function(x, split = ";", quote = '"') { + scan( + text = x, + what = "character", + sep = split, + quote = quote, + quiet = TRUE + ) +} + +str_split <- function(x, split = ";", quote = '"') { + lapply(x, str_split_, split = split, quote = quote) +} diff --git a/inst/WORDLIST b/inst/WORDLIST index 36a406da..0a2a530f 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -11,3 +11,9 @@ iso hardcoded CDISC PMDA +recode +recodes +recoded +recoding +tibble +codelist diff --git a/man/are_to_rewrite.Rd b/man/are_to_recode.Rd similarity index 62% rename from man/are_to_rewrite.Rd rename to man/are_to_recode.Rd index 9fc7e753..bbc5750e 100644 --- a/man/are_to_rewrite.Rd +++ b/man/are_to_recode.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/recode.R -\name{are_to_rewrite} -\alias{are_to_rewrite} -\title{Are values to be rewritten?} +\name{are_to_recode} +\alias{are_to_recode} +\title{Are values to be recoded?} \usage{ -are_to_rewrite(x, from) +are_to_recode(x, from) } \arguments{ \item{x}{A vector of values that will be checked against the \code{from} vector.} @@ -14,19 +14,19 @@ are_to_rewrite(x, from) \value{ A logical vector of the same length as \code{x}, where \code{TRUE} indicates that the corresponding value in \code{x} matches a value in \code{from} and -should be rewritten, and \code{FALSE} otherwise. If \code{x} is empty, returns +should be recoded, and \code{FALSE} otherwise. If \code{x} is empty, returns an empty logical vector. This function is intended for internal use and optimization in data transformation processes. } \description{ -\code{are_to_rewrite} is a helper function designed to determine if any values +\code{are_to_recode} is a helper function designed to determine if any values in a vector \code{x} match the specified \code{from} values, indicating they are -candidates for recoding or rewriting. +candidates for recoding. } \examples{ -sdtm.oak:::are_to_rewrite(x = 1:5, from = c(2, 4)) +sdtm.oak:::are_to_recode(x = 1:5, from = c(2, 4)) -sdtm.oak:::are_to_rewrite(letters[1:3], from = c("a", "c")) +sdtm.oak:::are_to_recode(letters[1:3], from = c("a", "c")) } \keyword{internal} diff --git a/man/assign.Rd b/man/assign.Rd new file mode 100644 index 00000000..0b00a8dd --- /dev/null +++ b/man/assign.Rd @@ -0,0 +1,351 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/assign.R +\name{assign} +\alias{assign} +\alias{assign_no_ct} +\alias{assign_ct} +\title{Derive an SDTM variable} +\usage{ +assign_no_ct( + raw_dataset, + raw_variable, + target_sdtm_variable, + target_dataset = raw_dataset, + merge_to_topic_by = NULL +) + +assign_ct( + raw_dataset, + raw_variable, + target_sdtm_variable, + target_dataset = raw_dataset, + merge_to_topic_by = NULL, + study_ct = NULL, + target_sdtm_variable_codelist_code = NULL +) +} +\arguments{ +\item{raw_dataset}{The raw dataset.} + +\item{raw_variable}{The raw variable.} + +\item{target_sdtm_variable}{The target SDTM variable.} + +\item{target_dataset}{Target dataset. By default the same as \code{raw_dataset}.} + +\item{merge_to_topic_by}{If \code{target_dataset} is different than \code{raw_dataset}, +then this parameter defines keys to use in the join between \code{raw_dataset} +and \code{target_dataset}.} + +\item{study_ct}{Study controlled terminology specification.} + +\item{target_sdtm_variable_codelist_code}{A codelist code indicating which +subset of the controlled terminology to apply in the derivation.} +} +\value{ +The target dataset with the derived variable \code{target_sdtm_variable}. + +The target dataset with the derived variable \code{target_sdtm_variable}. +} +\description{ +\itemize{ +\item \code{\link[=assign_no_ct]{assign_no_ct()}} maps a variable in a source dataset to a target SDTM +variable that has no terminology restrictions. +\item \code{\link[=assign_ct]{assign_ct()}} maps a variable in a source dataset to a target SDTM variable +following controlled terminology recoding. +} + +\code{\link[=assign_ct]{assign_ct()}} maps a variable in a source dataset to a target SDTM variable +following controlled terminology recoding. +} +\examples{ +study_ct <- + tibble::tibble( + codelist_code = rep("C66729", 8L), + term_code = c( + "C28161", + "C38210", + "C38222", + "C38223", + "C38287", + "C38288", + "C38305", + "C38311" + ), + CodedData = c( + "INTRAMUSCULAR", + "EPIDURAL", + "INTRA-ARTERIAL", + "INTRA-ARTICULAR", + "OPHTHALMIC", + "ORAL", + "TRANSDERMAL", + "UNKNOWN" + ), + term_value = CodedData, + collected_value = c( + "IM (Intramuscular)", + "EP (Epidural)", + "IA (Intra-arterial)", + "IJ (Intra-articular)", + "OP (Ophthalmic)", + "PO (Oral)", + "DE (Transdermal)", + "Unknown" + ), + term_preferred_term = c( + "Intramuscular Route of Administration", + "Epidural Route of Administration", + "Intraarterial Route of Administration", + "Intraarticular Route of Administration", + "Ophthalmic Route of Administration", + "Oral Route of Administration", + "Transdermal Route of Administration", + "Unknown Route of Administration" + ), + term_synonyms = c(rep(NA, 5L), "Intraoral Route of Administration; PO", NA, NA), + raw_codelist = rep("ROUTE_CV1", 8L) + ) + +md1 <- + tibble::tibble( + oak_id = 1:14, + raw_source = "MD1", + PATIENT_NUM = 101:114, + MDRTE = c( + "PO (Oral)", + "PO (Oral)", + NA_character_, + "PO", + "Intraoral Route of Administration", + "PO (Oral)", + "IM (Intramuscular)", + "IA (Intra-arterial)", + "", + "Non-standard", + "random_value", + "IJ (Intra-articular)", + "TRANSDERMAL", + "OPHTHALMIC" + ) + ) + +assign_ct( + raw_dataset = md1, + raw_variable = "MDRTE", + study_ct = study_ct, + target_sdtm_variable = "CMROUTE", + target_sdtm_variable_codelist_code = "C66729" + ) + +cm_inter <- + tibble::tibble( + oak_id = 1:14, + raw_source = "MD1", + PATIENT_NUM = 101:114, + CMTRT = c( + "BABY ASPIRIN", + "CORTISPORIN", + "ASPIRIN", + "DIPHENHYDRAMINE HCL", + "PARCETEMOL", + "VOMIKIND", + "ZENFLOX OZ", + "AMITRYPTYLINE", + "BENADRYL", + "DIPHENHYDRAMINE HYDROCHLORIDE", + "TETRACYCLINE", + "BENADRYL", + "SOMINEX", + "ZQUILL" + ), + CMINDC = c( + NA, + "NAUSEA", + "ANEMIA", + "NAUSEA", + "PYREXIA", + "VOMITINGS", + "DIARHHEA", + "COLD", + "FEVER", + "LEG PAIN", + "FEVER", + "COLD", + "COLD", + "PAIN" + ), + CMROUTE = c( + "ORAL", + "ORAL", + NA, + "ORAL", + "ORAL", + "ORAL", + "INTRAMUSCULAR", + "INTRA-ARTERIAL", + NA, + "NON-STANDARD", + "RANDOM_VALUE", + "INTRA-ARTICULAR", + "TRANSDERMAL", + "OPHTHALMIC" + ) + ) + +assign_ct( + raw_dataset = md1, + raw_variable = "MDRTE", + study_ct = study_ct, + target_sdtm_variable = "CMROUTE", + target_sdtm_variable_codelist_code = "C66729", + target_dataset = cm_inter, + merge_to_topic_by = c("oak_id","raw_source","PATIENT_NUM") + ) + +study_ct <- + tibble::tibble( + codelist_code = rep("C66729", 8L), + term_code = c( + "C28161", + "C38210", + "C38222", + "C38223", + "C38287", + "C38288", + "C38305", + "C38311" + ), + CodedData = c( + "INTRAMUSCULAR", + "EPIDURAL", + "INTRA-ARTERIAL", + "INTRA-ARTICULAR", + "OPHTHALMIC", + "ORAL", + "TRANSDERMAL", + "UNKNOWN" + ), + term_value = CodedData, + collected_value = c( + "IM (Intramuscular)", + "EP (Epidural)", + "IA (Intra-arterial)", + "IJ (Intra-articular)", + "OP (Ophthalmic)", + "PO (Oral)", + "DE (Transdermal)", + "Unknown" + ), + term_preferred_term = c( + "Intramuscular Route of Administration", + "Epidural Route of Administration", + "Intraarterial Route of Administration", + "Intraarticular Route of Administration", + "Ophthalmic Route of Administration", + "Oral Route of Administration", + "Transdermal Route of Administration", + "Unknown Route of Administration" + ), + term_synonyms = c(rep(NA, 5L), "Intraoral Route of Administration; PO", NA, NA), + raw_codelist = rep("ROUTE_CV1", 8L) + ) + +md1 <- + tibble::tibble( + oak_id = 1:14, + raw_source = "MD1", + PATIENT_NUM = 101:114, + MDRTE = c( + "PO (Oral)", + "PO (Oral)", + NA_character_, + "PO", + "Intraoral Route of Administration", + "PO (Oral)", + "IM (Intramuscular)", + "IA (Intra-arterial)", + "", + "Non-standard", + "random_value", + "IJ (Intra-articular)", + "TRANSDERMAL", + "OPHTHALMIC" + ) + ) + +assign_ct( + raw_dataset = md1, + raw_variable = "MDRTE", + study_ct = study_ct, + target_sdtm_variable = "CMROUTE", + target_sdtm_variable_codelist_code = "C66729" + ) + +cm_inter <- + tibble::tibble( + oak_id = 1:14, + raw_source = "MD1", + PATIENT_NUM = 101:114, + CMTRT = c( + "BABY ASPIRIN", + "CORTISPORIN", + "ASPIRIN", + "DIPHENHYDRAMINE HCL", + "PARCETEMOL", + "VOMIKIND", + "ZENFLOX OZ", + "AMITRYPTYLINE", + "BENADRYL", + "DIPHENHYDRAMINE HYDROCHLORIDE", + "TETRACYCLINE", + "BENADRYL", + "SOMINEX", + "ZQUILL" + ), + CMINDC = c( + NA, + "NAUSEA", + "ANEMIA", + "NAUSEA", + "PYREXIA", + "VOMITINGS", + "DIARHHEA", + "COLD", + "FEVER", + "LEG PAIN", + "FEVER", + "COLD", + "COLD", + "PAIN" + ), + CMROUTE = c( + "ORAL", + "ORAL", + NA, + "ORAL", + "ORAL", + "ORAL", + "INTRAMUSCULAR", + "INTRA-ARTERIAL", + NA, + "NON-STANDARD", + "RANDOM_VALUE", + "INTRA-ARTICULAR", + "TRANSDERMAL", + "OPHTHALMIC" + ) + ) + +assign_ct( + raw_dataset = md1, + raw_variable = "MDRTE", + study_ct = study_ct, + target_sdtm_variable = "CMROUTE", + target_sdtm_variable_codelist_code = "C66729", + target_dataset = cm_inter, + merge_to_topic_by = c("oak_id","raw_source","PATIENT_NUM") + ) + + +} diff --git a/man/ct_map.Rd b/man/ct_map.Rd new file mode 100644 index 00000000..847d8a77 --- /dev/null +++ b/man/ct_map.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ct.R +\name{ct_map} +\alias{ct_map} +\title{Recode according to controlled terminology} +\usage{ +ct_map( + x, + ct = NULL, + cl = NULL, + from = c("collected_value", "term_synonyms"), + to = "term_value" +) +} +\arguments{ +\item{x}{A character vector of terms to be recoded following a controlled +terminology.} + +\item{ct}{A \link[tibble:tibble-package]{tibble} providing a controlled +terminology specification.} + +\item{cl}{A character vector indicating a set of possible controlled +terminology code-lists codes to be used for recoding. By default (\code{NULL}) +all code-lists available in \code{ct} are used.} + +\item{from}{A character vector of column names indicating the variables +containing values to be matched against for terminology recoding.} + +\item{to}{A single string indicating the column whose values are to be +recoded into.} +} +\value{ +A character vector of terminology recoded values from \code{x}. If no +match is found in the controlled terminology spec provided in \code{ct}, then +\code{x} values are returned in uppercase. If \code{ct} is not provided \code{x} is +returned unchanged. +} +\description{ +\code{\link[=ct_map]{ct_map()}} recodes a vector following a controlled terminology. +} diff --git a/man/ct_mappings.Rd b/man/ct_mappings.Rd new file mode 100644 index 00000000..2d7db0f5 --- /dev/null +++ b/man/ct_mappings.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ct.R +\name{ct_mappings} +\alias{ct_mappings} +\title{Controlled terminology mappings} +\usage{ +ct_mappings( + ct, + from = c("collected_value", "term_synonyms"), + to = "term_value" +) +} +\arguments{ +\item{ct}{Controlled terminology specification as a +\link[tibble:tibble-package]{tibble}. Each row is for a mapped controlled term. +Controlled terms are expected in the column indicated by \code{to_col}.} + +\item{from}{A character vector of column names indicating the variables +containing values to be recoded.} + +\item{to}{A single string indicating the column whose values are to be +recoded into.} +} +\value{ +A \link[tibble:tibble-package]{tibble} with two columns, \code{from} and +\code{to}, indicating the mapping of values, one per row. +} +\description{ +\code{\link[=ct_mappings]{ct_mappings()}} takes a controlled terminology specification and returns the +mappings in the form of a \link[tibble:tibble-package]{tibble} in long format, +i.e. the recoding of values in the \code{from} column to the \code{to} column values, +one mapping per row. + +The resulting mappings are unique, i.e. if \code{from} values are duplicated in +two \code{from} columns, the first column indicated in \code{from} takes precedence, +and only that mapping is retained in the controlled terminology map. +} +\examples{ +# example code + + + + + + + +} +\keyword{internal} diff --git a/man/hardcode_no_ct.Rd b/man/harcode.Rd similarity index 70% rename from man/hardcode_no_ct.Rd rename to man/harcode.Rd index cc37c81e..185263d2 100644 --- a/man/hardcode_no_ct.Rd +++ b/man/harcode.Rd @@ -1,7 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/hardcode_no_ct.R -\name{hardcode_no_ct} +% Please edit documentation in R/hardcode.R +\name{harcode} +\alias{harcode} \alias{hardcode_no_ct} +\alias{hardcode_ct} \title{Derive an SDTM variable with a hardcoded value} \usage{ hardcode_no_ct( @@ -12,6 +14,17 @@ hardcode_no_ct( target_dataset = raw_dataset, merge_to_topic_by = NULL ) + +hardcode_ct( + raw_dataset, + raw_variable, + target_sdtm_variable, + target_hardcoded_value, + target_dataset = raw_dataset, + merge_to_topic_by = NULL, + study_ct = NULL, + target_sdtm_variable_codelist_code = NULL +) } \arguments{ \item{raw_dataset}{The raw dataset.} @@ -27,10 +40,22 @@ hardcode_no_ct( \item{merge_to_topic_by}{If \code{target_dataset} is different than \code{raw_dataset}, then this parameter defines keys to use in the join between \code{raw_dataset} and \code{target_dataset}.} + +\item{study_ct}{Study controlled terminology specification.} + +\item{target_sdtm_variable_codelist_code}{A codelist code indicating which +subset of the controlled terminology to apply in the derivation.} +} +\value{ +The target dataset with the derived variable \code{target_sdtm_variable}. } \description{ -\code{\link[=hardcode_no_ct]{hardcode_no_ct()}} maps a hardcoded value to a target SDTM variable that has +\itemize{ +\item \code{\link[=hardcode_no_ct]{hardcode_no_ct()}} maps a hardcoded value to a target SDTM variable that has no terminology restrictions. +\item \code{\link[=hardcode_ct]{hardcode_ct()}} maps a hardcoded value to a target SDTM variable with +controlled terminology recoding. +} } \examples{ MD1 <- @@ -64,7 +89,6 @@ CM_INTER <- # Derive a new variable `CMCAT` by overwriting `MDRAW` with the # hardcoded value "GENERAL CONCOMITANT MEDICATIONS" with a prior join to # `target_dataset`. - hardcode_no_ct( raw_dataset = MD1, raw_variable = "MDRAW", diff --git a/man/index_for_rewrite.Rd b/man/index_for_recode.Rd similarity index 61% rename from man/index_for_rewrite.Rd rename to man/index_for_recode.Rd index 6a729a56..2362517f 100644 --- a/man/index_for_rewrite.Rd +++ b/man/index_for_recode.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/recode.R -\name{index_for_rewrite} -\alias{index_for_rewrite} -\title{Determine Indices for Rewriting} +\name{index_for_recode} +\alias{index_for_recode} +\title{Determine Indices for Recoding} \usage{ -index_for_rewrite(x, from) +index_for_recode(x, from) } \arguments{ \item{x}{A vector of values in which to search for matches.} @@ -15,16 +15,16 @@ index_for_rewrite(x, from) An integer vector of the same length as \code{x}, containing the indices of the matched values from the \code{from} vector. If an element in \code{x} does not match any value in \code{from}, the corresponding position in the output will be -\code{NA}. This index information is critical for subsequent rewrite operations. +\code{NA}. This index information is critical for subsequent recoding operations. } \description{ -\code{\link[=index_for_rewrite]{index_for_rewrite()}} identifies the positions of elements in \code{x} that match +\code{\link[=index_for_recode]{index_for_recode()}} identifies the positions of elements in \code{x} that match any of the values specified in the \code{from} vector. This function is primarily -used to facilitate the rewriting of values by pinpointing which elements in +used to facilitate the recoding of values by pinpointing which elements in \code{x} correspond to the \code{from} values and thus need to be replaced or updated. } \examples{ -sdtm.oak:::index_for_rewrite(x = 1:5, from = c(2, 4)) +sdtm.oak:::index_for_recode(x = 1:5, from = c(2, 4)) } \keyword{internal} diff --git a/man/overwrite.Rd b/man/overwrite.Rd deleted file mode 100644 index 6d608bdf..00000000 --- a/man/overwrite.Rd +++ /dev/null @@ -1,58 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/recode.R -\name{overwrite} -\alias{overwrite} -\title{Overwrite values} -\usage{ -overwrite(x, to, .na = NA) -} -\arguments{ -\item{x}{An atomic vector.} - -\item{.na}{New value for missing values in \code{x}. Defaults to \code{NA}.} -} -\value{ -A vector of the same length of \code{x} with new values matching those -in \code{to}. -} -\description{ -\code{\link[=overwrite]{overwrite()}} recodes values in \code{x} to a new set of values provided in \code{to}; -the values in \code{to} are recycled to match the length of \code{x}. By default, -missing values remain \code{NA}. -} -\examples{ -x <- c(letters[1:4], NA, NA) -# Recode all values to `"x"` but keep `NA`. -sdtm.oak:::overwrite(x, to = "x") - -# Recode all values to `"x"` but recode `NA` to a new value. -sdtm.oak:::overwrite(x, to = "x", .na = "x") -sdtm.oak:::overwrite(x, to = "x", .na = "Absent") - -# If `to` is not a scalar, it is recycled and matched by position for -# replacement. -sdtm.oak:::overwrite(x, to = c("x", "y")) - -# `x` can be of other types besides `character`, e.g. replace integers to a -# hard-coded new integer value. -sdtm.oak:::overwrite(x = 1:5, to = 0) - -# Example involving `logical` vectors -sdtm.oak:::overwrite(x = c(TRUE, FALSE), to = FALSE) - -# Returned type will be a type compatible with both the types of `to` and -# `.na`. -sdtm.oak:::overwrite(x = c("sdtm", "adam"), to = 0) -sdtm.oak:::overwrite( - x = c("sdtm", "adam"), - to = 0, - .na = NA_character_ -) -sdtm.oak:::overwrite( - x = c("sdtm", "adam"), - to = TRUE, - .na = NA_real_ -) - -} -\keyword{internal} diff --git a/man/rewrite.Rd b/man/recode.Rd similarity index 67% rename from man/rewrite.Rd rename to man/recode.Rd index 6f25762f..b7eb79e8 100644 --- a/man/rewrite.Rd +++ b/man/recode.Rd @@ -1,15 +1,15 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/recode.R -\name{rewrite} -\alias{rewrite} -\title{Rewrite values} +\name{recode} +\alias{recode} +\title{Recode values} \usage{ -rewrite(x, from, to, .no_match = x, .na = NA) +recode(x, from = unique(na.omit(x)), to = from, .no_match = x, .na = NA) } \arguments{ \item{x}{An atomic vector of values are to be recoded.} -\item{from}{A vector of values to be matched in \code{x} for rewriting.} +\item{from}{A vector of values to be matched in \code{x} for recoded.} \item{to}{A vector of values to be used as replacement for values in \code{from}.} @@ -22,22 +22,22 @@ matched.} A vector of recoded values. } \description{ -\code{\link[=rewrite]{rewrite()}} recodes values in \code{x} by matching elements in \code{from} onto values +\code{\link[=recode]{recode()}} recodes values in \code{x} by matching elements in \code{from} onto values in \code{to}. } \examples{ x <- c("male", "female", "x", NA) -sdtm.oak:::rewrite(x, +sdtm.oak:::recode(x, from = c("male", "female"), to = c("M", "F") ) -sdtm.oak:::rewrite( +sdtm.oak:::recode( x, from = c("male", "female"), to = c("M", "F"), .no_match = "?" ) -sdtm.oak:::rewrite( +sdtm.oak:::recode( x, from = c("male", "female"), to = c("M", "F"), From ec5a9e4d656bd06467d60ed7cf9c30a79e03e69f Mon Sep 17 00:00:00 2001 From: Ram Ganapathy Date: Wed, 13 Mar 2024 10:51:00 -0700 Subject: [PATCH 05/80] hardcode_no_ct algorithm code changes (#45) * hardcode_no_ct algorithm code changes * harcode_ct working as expected * assign_ct and assign_no_ct works great. * address review comments --- R/assign.R | 176 +++++++++++++++++---------------------------------- R/hardcode.R | 39 ++++++++---- 2 files changed, 84 insertions(+), 131 deletions(-) diff --git a/R/assign.R b/R/assign.R index bdaf46df..deb4c6f5 100644 --- a/R/assign.R +++ b/R/assign.R @@ -2,18 +2,51 @@ sdtm_assign <- function(raw_dat, raw_var, tgt_var, - tgt_dat = raw_dat, - by = NULL, + tgt_dat, + by_var = NULL, ct = NULL, cl = NULL) { # TODO: Assertions. - raw_dat |> - dplyr::right_join(y = tgt_dat, by = by) |> - dplyr::mutate("{tgt_var}" := ct_map(!!rlang::sym(raw_var), ct = ct, cl = cl)) |> - dplyr::select(-rlang::sym(raw_var)) |> - dplyr::relocate(tgt_var, .after = dplyr::last_col()) + # When target dataset and by_var variables are provided, + # the tar_var is added to the input target dataset. + # This if block deals when ct is not provided. + if((!is.null(by_var) && is.null(ct))){ + tgt_dat_out <- raw_dat |> + dplyr::right_join(y = tgt_dat, by = by_var) |> + dplyr::mutate("{tgt_var}" := !!rlang::sym(raw_var)) |> + dplyr::select(-rlang::sym(raw_var)) |> + dplyr::relocate(tgt_var, .after = dplyr::last_col()) + } else if (is.null(by_var) && is.null(ct)) { + # When target dataset and by_var variables are NOT provided, + # the tgt_dat_out is created with tar_var & oak_id_vars. + # This if block deals when ct is not provided. + tgt_dat_out <- raw_dat |> + dplyr::select(oak_id, raw_source, patient_number,rlang::sym(raw_var)) |> + dplyr::mutate("{tgt_var}" := !!rlang::sym(raw_var)) |> + dplyr::select(-rlang::sym(raw_var)) + } else if (is.null(by_var) && !is.null(ct)) { + # When target dataset and by_var variables are NOT provided, + # the tgt_dat_out is created with tar_var & oak_id_vars. + # This if block deals when ct is provided. + tgt_dat_out <- raw_dat |> + dplyr::select(oak_id, raw_source, patient_number,rlang::sym(raw_var)) |> + dplyr::mutate("{tgt_var}" := ct_map(!!rlang::sym(raw_var), ct = ct, cl = cl)) |> + dplyr::select(-rlang::sym(raw_var)) |> + dplyr::relocate(tgt_var, .after = dplyr::last_col()) + } else if (!is.null(by_var) && !is.null(ct)) { + # When target dataset and by_var variables are provided, + # the tar_var is added to the input target dataset. + # This if block deals when ct is provided. + tgt_dat_out <- raw_dat |> + dplyr::right_join(y = tgt_dat, by = by_var) |> + dplyr::mutate("{tgt_var}" := ct_map(!!rlang::sym(raw_var), ct = ct, cl = cl)) |> + dplyr::select(-rlang::sym(raw_var)) |> + dplyr::relocate(tgt_var, .after = dplyr::last_col()) + } + + return(tgt_dat_out) } @@ -40,90 +73,29 @@ sdtm_assign <- function(raw_dat, #' @returns The target dataset with the derived variable `target_sdtm_variable`. #' #' @examples -#' study_ct <- -#' tibble::tibble( -#' codelist_code = rep("C66729", 8L), -#' term_code = c( -#' "C28161", -#' "C38210", -#' "C38222", -#' "C38223", -#' "C38287", -#' "C38288", -#' "C38305", -#' "C38311" -#' ), -#' CodedData = c( -#' "INTRAMUSCULAR", -#' "EPIDURAL", -#' "INTRA-ARTERIAL", -#' "INTRA-ARTICULAR", -#' "OPHTHALMIC", -#' "ORAL", -#' "TRANSDERMAL", -#' "UNKNOWN" -#' ), -#' term_value = CodedData, -#' collected_value = c( -#' "IM (Intramuscular)", -#' "EP (Epidural)", -#' "IA (Intra-arterial)", -#' "IJ (Intra-articular)", -#' "OP (Ophthalmic)", -#' "PO (Oral)", -#' "DE (Transdermal)", -#' "Unknown" -#' ), -#' term_preferred_term = c( -#' "Intramuscular Route of Administration", -#' "Epidural Route of Administration", -#' "Intraarterial Route of Administration", -#' "Intraarticular Route of Administration", -#' "Ophthalmic Route of Administration", -#' "Oral Route of Administration", -#' "Transdermal Route of Administration", -#' "Unknown Route of Administration" -#' ), -#' term_synonyms = c(rep(NA, 5L), "Intraoral Route of Administration; PO", NA, NA), -#' raw_codelist = rep("ROUTE_CV1", 8L) -#' ) #' #' md1 <- #' tibble::tibble( #' oak_id = 1:14, #' raw_source = "MD1", -#' PATIENT_NUM = 101:114, -#' MDRTE = c( -#' "PO (Oral)", -#' "PO (Oral)", -#' NA_character_, -#' "PO", -#' "Intraoral Route of Administration", -#' "PO (Oral)", -#' "IM (Intramuscular)", -#' "IA (Intra-arterial)", -#' "", -#' "Non-standard", -#' "random_value", -#' "IJ (Intra-articular)", -#' "TRANSDERMAL", -#' "OPHTHALMIC" +#' patient_number = 101:114, +#' MDIND = c( "NAUSEA", "NAUSEA", "ANEMIA", "NAUSEA", "PYREXIA", +#' "VOMITINGS", "DIARHHEA", "COLD", +#' "FEVER", "LEG PAIN", "FEVER", "COLD", "COLD", "PAIN" #' ) #' ) #' -#' assign_ct( +#' assign_no_ct( #' raw_dataset = md1, -#' raw_variable = "MDRTE", -#' study_ct = study_ct, -#' target_sdtm_variable = "CMROUTE", -#' target_sdtm_variable_codelist_code = "C66729" +#' raw_variable = "MDIND", +#' target_sdtm_variable = "CMINDC", #' ) #' #' cm_inter <- #' tibble::tibble( #' oak_id = 1:14, #' raw_source = "MD1", -#' PATIENT_NUM = 101:114, +#' patient_number = 101:114, #' CMTRT = c( #' "BABY ASPIRIN", #' "CORTISPORIN", @@ -140,22 +112,6 @@ sdtm_assign <- function(raw_dat, #' "SOMINEX", #' "ZQUILL" #' ), -#' CMINDC = c( -#' NA, -#' "NAUSEA", -#' "ANEMIA", -#' "NAUSEA", -#' "PYREXIA", -#' "VOMITINGS", -#' "DIARHHEA", -#' "COLD", -#' "FEVER", -#' "LEG PAIN", -#' "FEVER", -#' "COLD", -#' "COLD", -#' "PAIN" -#' ), #' CMROUTE = c( #' "ORAL", #' "ORAL", @@ -176,12 +132,10 @@ sdtm_assign <- function(raw_dat, #' #' assign_ct( #' raw_dataset = md1, -#' raw_variable = "MDRTE", -#' study_ct = study_ct, -#' target_sdtm_variable = "CMROUTE", -#' target_sdtm_variable_codelist_code = "C66729", +#' raw_variable = "MDIND", +#' target_sdtm_variable = "CMINDC", #' target_dataset = cm_inter, -#' merge_to_topic_by = c("oak_id","raw_source","PATIENT_NUM") +#' merge_to_topic_by = c("oak_id","raw_source","patient_number") #' ) #' #' @name assign @@ -192,14 +146,14 @@ NULL assign_no_ct <- function(raw_dataset, raw_variable, target_sdtm_variable, - target_dataset = raw_dataset, + target_dataset = NULL, merge_to_topic_by = NULL) { sdtm_assign( raw_dat = raw_dataset, raw_var = raw_variable, tgt_var = target_sdtm_variable, tgt_dat = target_dataset, - by = merge_to_topic_by + by_var = merge_to_topic_by ) } @@ -274,7 +228,7 @@ assign_no_ct <- function(raw_dataset, #' tibble::tibble( #' oak_id = 1:14, #' raw_source = "MD1", -#' PATIENT_NUM = 101:114, +#' patient_number = 101:114, #' MDRTE = c( #' "PO (Oral)", #' "PO (Oral)", @@ -305,7 +259,7 @@ assign_no_ct <- function(raw_dataset, #' tibble::tibble( #' oak_id = 1:14, #' raw_source = "MD1", -#' PATIENT_NUM = 101:114, +#' patient_number = 101:114, #' CMTRT = c( #' "BABY ASPIRIN", #' "CORTISPORIN", @@ -337,22 +291,6 @@ assign_no_ct <- function(raw_dataset, #' "COLD", #' "COLD", #' "PAIN" -#' ), -#' CMROUTE = c( -#' "ORAL", -#' "ORAL", -#' NA, -#' "ORAL", -#' "ORAL", -#' "ORAL", -#' "INTRAMUSCULAR", -#' "INTRA-ARTERIAL", -#' NA, -#' "NON-STANDARD", -#' "RANDOM_VALUE", -#' "INTRA-ARTICULAR", -#' "TRANSDERMAL", -#' "OPHTHALMIC" #' ) #' ) #' @@ -363,7 +301,7 @@ assign_no_ct <- function(raw_dataset, #' target_sdtm_variable = "CMROUTE", #' target_sdtm_variable_codelist_code = "C66729", #' target_dataset = cm_inter, -#' merge_to_topic_by = c("oak_id","raw_source","PATIENT_NUM") +#' merge_to_topic_by = c("oak_id","raw_source","patient_number") #' ) #' #' @@ -381,7 +319,7 @@ assign_ct <- function(raw_dataset, raw_var = raw_variable, tgt_var = target_sdtm_variable, tgt_dat = target_dataset, - by = merge_to_topic_by, + by_var = merge_to_topic_by, ct = study_ct, cl = target_sdtm_variable_codelist_code ) diff --git a/R/hardcode.R b/R/hardcode.R index f5720a6e..ed4d71e0 100644 --- a/R/hardcode.R +++ b/R/hardcode.R @@ -5,7 +5,7 @@ sdtm_hardcode <- function(raw_dat, tgt_var, tgt_val, tgt_dat = raw_dat, - by = NULL, + by_var = NULL, ct = NULL, cl = NULL) { @@ -13,11 +13,26 @@ sdtm_hardcode <- function(raw_dat, tgt_val <- ct_map(tgt_val, ct = ct, cl = cl) - raw_dat |> - dplyr::right_join(y = tgt_dat, by = by) |> - dplyr::mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> - dplyr::select(-rlang::sym(raw_var)) |> - dplyr::relocate(tgt_var, .after = dplyr::last_col()) + # When target dataset and by_var variables are provided, + # the tar_var is added to the input target dataset. + if((!is.null(by_var))){ + tgt_dat_out <- raw_dat |> + #we need to keep only the required variables in the input raw dataset + dplyr::select(dplyr::all_of(by_var), rlang::sym(raw_var)) |> + dplyr::right_join(y = tgt_dat, by = by_var) |> + dplyr::mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> + dplyr::select(-rlang::sym(raw_var)) |> + dplyr::relocate(tgt_var, .after = dplyr::last_col()) + } else { + # When target dataset and by_var variables are NOT provided, + # the tgt_dat_out is created with tar_var & oak_id_vars. + tgt_dat_out <- raw_dat |> + dplyr::select(oak_id, raw_source, patient_number,rlang::sym(raw_var)) |> + dplyr::mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> + dplyr::select(-rlang::sym(raw_var)) + } + + return(tgt_dat_out) } @@ -95,7 +110,7 @@ hardcode_no_ct <- function(raw_dataset, raw_variable, target_sdtm_variable, target_hardcoded_value, - target_dataset = raw_dataset, + target_dataset = NULL, merge_to_topic_by = NULL) { sdtm_hardcode( raw_dat = raw_dataset, @@ -103,7 +118,7 @@ hardcode_no_ct <- function(raw_dataset, tgt_var = target_sdtm_variable, tgt_val = target_hardcoded_value, tgt_dat = target_dataset, - by = merge_to_topic_by + by_var = merge_to_topic_by ) } @@ -113,17 +128,17 @@ hardcode_ct <- function(raw_dataset, raw_variable, target_sdtm_variable, target_hardcoded_value, - target_dataset = raw_dataset, + target_dataset = NULL, merge_to_topic_by = NULL, - study_ct = NULL, - target_sdtm_variable_codelist_code = NULL) { + study_ct, + target_sdtm_variable_codelist_code) { sdtm_hardcode( raw_dat = raw_dataset, raw_var = raw_variable, tgt_var = target_sdtm_variable, tgt_val = target_hardcoded_value, tgt_dat = target_dataset, - by = merge_to_topic_by, + by_var = merge_to_topic_by, ct = study_ct, cl = target_sdtm_variable_codelist_code ) From 0333d95292377f2a4ddf7b762aba04a711318a18 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 14 Mar 2024 00:02:08 +0000 Subject: [PATCH 06/80] Add `oak_id_vars()` --- R/oak_id_vars.R | 26 ++++++++++++++++++++++++++ man/oak_id_vars.Rd | 30 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 R/oak_id_vars.R create mode 100644 man/oak_id_vars.Rd diff --git a/R/oak_id_vars.R b/R/oak_id_vars.R new file mode 100644 index 00000000..1eace0e1 --- /dev/null +++ b/R/oak_id_vars.R @@ -0,0 +1,26 @@ +#' Raw dataset keys +#' +#' [oak_id_vars()] is a helper function providing the variable (column) names to +#' be regarded as keys in [tibbles][tibble::tibble-package] representing raw +#' datasets. By default, the set of names is +#' `r knitr::combine_words(oak_id_vars())`. Extra variable names may be +#' indicated and passed in `extra_vars` which are appended to the default names. +#' +#' @param extra_vars A character vector of extra column names to be appended to +#' the default names: `r knitr::combine_words(oak_id_vars())`. +#' +#' @returns A character vector of column names to be regarded +#' as keys in [tibbles][tibble::tibble-package] representing raw datasets. +#' +#' @examples +#' sdtm.oak:::oak_id_vars() +#' +#' sdtm.oak:::oak_id_vars(extra_vars = "sample_id") +#' +#' @keywords internal +oak_id_vars <- function(extra_vars = NULL) { + + admiraldev::assert_character_vector(extra_vars, optional = TRUE) + unique(c("oak_id", "raw_source", "patient_number", extra_vars)) + +} diff --git a/man/oak_id_vars.Rd b/man/oak_id_vars.Rd new file mode 100644 index 00000000..34823227 --- /dev/null +++ b/man/oak_id_vars.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/oak_id_vars.R +\name{oak_id_vars} +\alias{oak_id_vars} +\title{Raw dataset keys} +\usage{ +oak_id_vars(extra_vars = NULL) +} +\arguments{ +\item{extra_vars}{A character vector of extra column names to be appended to +the default names: oak_id, raw_source, and patient_number.} +} +\value{ +A character vector of column names to be regarded +as keys in \link[tibble:tibble-package]{tibbles} representing raw datasets. +} +\description{ +\code{\link[=oak_id_vars]{oak_id_vars()}} is a helper function providing the variable (column) names to +be regarded as keys in \link[tibble:tibble-package]{tibbles} representing raw +datasets. By default, the set of names is +oak_id, raw_source, and patient_number. Extra variable names may be +indicated and passed in \code{extra_vars} which are appended to the default names. +} +\examples{ +sdtm.oak:::oak_id_vars() + +sdtm.oak:::oak_id_vars(extra_vars = "sample_id") + +} +\keyword{internal} From 7fd771677f7333b3213e200694ef0f74db818a86 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 14 Mar 2024 00:02:51 +0000 Subject: [PATCH 07/80] Fix typo in `recode()` --- R/recode.R | 2 +- man/recode.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/recode.R b/R/recode.R index a645bcb2..88ba8275 100644 --- a/R/recode.R +++ b/R/recode.R @@ -50,7 +50,7 @@ are_to_recode <- function(x, from) { #' in `to`. #' #' @param x An atomic vector of values are to be recoded. -#' @param from A vector of values to be matched in `x` for recoded. +#' @param from A vector of values to be matched in `x` for recoding. #' @param to A vector of values to be used as replacement for values in `from`. #' @param .no_match Value to be used as replacement when cases in `from` are not #' matched. diff --git a/man/recode.Rd b/man/recode.Rd index b7eb79e8..aca082ac 100644 --- a/man/recode.Rd +++ b/man/recode.Rd @@ -9,7 +9,7 @@ recode(x, from = unique(na.omit(x)), to = from, .no_match = x, .na = NA) \arguments{ \item{x}{An atomic vector of values are to be recoded.} -\item{from}{A vector of values to be matched in \code{x} for recoded.} +\item{from}{A vector of values to be matched in \code{x} for recoding.} \item{to}{A vector of values to be used as replacement for values in \code{from}.} From 802aacc1ca75144ad0e449d27188f27cf8ce645b Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 14 Mar 2024 00:30:24 +0000 Subject: [PATCH 08/80] Simplify `oak_id_vars()` docs --- R/oak_id_vars.R | 2 +- man/oak_id_vars.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/oak_id_vars.R b/R/oak_id_vars.R index 1eace0e1..cf6853be 100644 --- a/R/oak_id_vars.R +++ b/R/oak_id_vars.R @@ -10,7 +10,7 @@ #' the default names: `r knitr::combine_words(oak_id_vars())`. #' #' @returns A character vector of column names to be regarded -#' as keys in [tibbles][tibble::tibble-package] representing raw datasets. +#' as keys in raw datasets. #' #' @examples #' sdtm.oak:::oak_id_vars() diff --git a/man/oak_id_vars.Rd b/man/oak_id_vars.Rd index 34823227..af5550a3 100644 --- a/man/oak_id_vars.Rd +++ b/man/oak_id_vars.Rd @@ -12,7 +12,7 @@ the default names: oak_id, raw_source, and patient_number.} } \value{ A character vector of column names to be regarded -as keys in \link[tibble:tibble-package]{tibbles} representing raw datasets. +as keys in raw datasets. } \description{ \code{\link[=oak_id_vars]{oak_id_vars()}} is a helper function providing the variable (column) names to From 7fd07b42772020a9c8b7b73f55a5a38ff57f052f Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 14 Mar 2024 00:32:04 +0000 Subject: [PATCH 09/80] Update `assign_*` and `hardcode_*` implementations --- R/assign.R | 4 +- R/hardcode.R | 2 +- man/assign.Rd | 125 ++++++------------------------------------------- man/harcode.Rd | 8 ++-- 4 files changed, 22 insertions(+), 117 deletions(-) diff --git a/R/assign.R b/R/assign.R index deb4c6f5..c4864579 100644 --- a/R/assign.R +++ b/R/assign.R @@ -23,7 +23,7 @@ sdtm_assign <- function(raw_dat, # the tgt_dat_out is created with tar_var & oak_id_vars. # This if block deals when ct is not provided. tgt_dat_out <- raw_dat |> - dplyr::select(oak_id, raw_source, patient_number,rlang::sym(raw_var)) |> + dplyr::select(c(oak_id_vars(), raw_var)) |> dplyr::mutate("{tgt_var}" := !!rlang::sym(raw_var)) |> dplyr::select(-rlang::sym(raw_var)) } else if (is.null(by_var) && !is.null(ct)) { @@ -31,7 +31,7 @@ sdtm_assign <- function(raw_dat, # the tgt_dat_out is created with tar_var & oak_id_vars. # This if block deals when ct is provided. tgt_dat_out <- raw_dat |> - dplyr::select(oak_id, raw_source, patient_number,rlang::sym(raw_var)) |> + dplyr::select(c(oak_id_vars(), raw_var)) |> dplyr::mutate("{tgt_var}" := ct_map(!!rlang::sym(raw_var), ct = ct, cl = cl)) |> dplyr::select(-rlang::sym(raw_var)) |> dplyr::relocate(tgt_var, .after = dplyr::last_col()) diff --git a/R/hardcode.R b/R/hardcode.R index ed4d71e0..3c836f25 100644 --- a/R/hardcode.R +++ b/R/hardcode.R @@ -27,7 +27,7 @@ sdtm_hardcode <- function(raw_dat, # When target dataset and by_var variables are NOT provided, # the tgt_dat_out is created with tar_var & oak_id_vars. tgt_dat_out <- raw_dat |> - dplyr::select(oak_id, raw_source, patient_number,rlang::sym(raw_var)) |> + dplyr::select(c(oak_id_vars(), raw_var)) |> dplyr::mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> dplyr::select(-rlang::sym(raw_var)) } diff --git a/man/assign.Rd b/man/assign.Rd index 0b00a8dd..8bb536cd 100644 --- a/man/assign.Rd +++ b/man/assign.Rd @@ -10,7 +10,7 @@ assign_no_ct( raw_dataset, raw_variable, target_sdtm_variable, - target_dataset = raw_dataset, + target_dataset = NULL, merge_to_topic_by = NULL ) @@ -59,90 +59,29 @@ following controlled terminology recoding. following controlled terminology recoding. } \examples{ -study_ct <- - tibble::tibble( - codelist_code = rep("C66729", 8L), - term_code = c( - "C28161", - "C38210", - "C38222", - "C38223", - "C38287", - "C38288", - "C38305", - "C38311" - ), - CodedData = c( - "INTRAMUSCULAR", - "EPIDURAL", - "INTRA-ARTERIAL", - "INTRA-ARTICULAR", - "OPHTHALMIC", - "ORAL", - "TRANSDERMAL", - "UNKNOWN" - ), - term_value = CodedData, - collected_value = c( - "IM (Intramuscular)", - "EP (Epidural)", - "IA (Intra-arterial)", - "IJ (Intra-articular)", - "OP (Ophthalmic)", - "PO (Oral)", - "DE (Transdermal)", - "Unknown" - ), - term_preferred_term = c( - "Intramuscular Route of Administration", - "Epidural Route of Administration", - "Intraarterial Route of Administration", - "Intraarticular Route of Administration", - "Ophthalmic Route of Administration", - "Oral Route of Administration", - "Transdermal Route of Administration", - "Unknown Route of Administration" - ), - term_synonyms = c(rep(NA, 5L), "Intraoral Route of Administration; PO", NA, NA), - raw_codelist = rep("ROUTE_CV1", 8L) - ) md1 <- tibble::tibble( oak_id = 1:14, raw_source = "MD1", - PATIENT_NUM = 101:114, - MDRTE = c( - "PO (Oral)", - "PO (Oral)", - NA_character_, - "PO", - "Intraoral Route of Administration", - "PO (Oral)", - "IM (Intramuscular)", - "IA (Intra-arterial)", - "", - "Non-standard", - "random_value", - "IJ (Intra-articular)", - "TRANSDERMAL", - "OPHTHALMIC" + patient_number = 101:114, + MDIND = c( "NAUSEA", "NAUSEA", "ANEMIA", "NAUSEA", "PYREXIA", + "VOMITINGS", "DIARHHEA", "COLD", + "FEVER", "LEG PAIN", "FEVER", "COLD", "COLD", "PAIN" ) ) -assign_ct( +assign_no_ct( raw_dataset = md1, - raw_variable = "MDRTE", - study_ct = study_ct, - target_sdtm_variable = "CMROUTE", - target_sdtm_variable_codelist_code = "C66729" + raw_variable = "MDIND", + target_sdtm_variable = "CMINDC", ) cm_inter <- tibble::tibble( oak_id = 1:14, raw_source = "MD1", - PATIENT_NUM = 101:114, + patient_number = 101:114, CMTRT = c( "BABY ASPIRIN", "CORTISPORIN", @@ -159,22 +98,6 @@ cm_inter <- "SOMINEX", "ZQUILL" ), - CMINDC = c( - NA, - "NAUSEA", - "ANEMIA", - "NAUSEA", - "PYREXIA", - "VOMITINGS", - "DIARHHEA", - "COLD", - "FEVER", - "LEG PAIN", - "FEVER", - "COLD", - "COLD", - "PAIN" - ), CMROUTE = c( "ORAL", "ORAL", @@ -195,12 +118,10 @@ cm_inter <- assign_ct( raw_dataset = md1, - raw_variable = "MDRTE", - study_ct = study_ct, - target_sdtm_variable = "CMROUTE", - target_sdtm_variable_codelist_code = "C66729", + raw_variable = "MDIND", + target_sdtm_variable = "CMINDC", target_dataset = cm_inter, - merge_to_topic_by = c("oak_id","raw_source","PATIENT_NUM") + merge_to_topic_by = c("oak_id","raw_source","patient_number") ) study_ct <- @@ -255,7 +176,7 @@ md1 <- tibble::tibble( oak_id = 1:14, raw_source = "MD1", - PATIENT_NUM = 101:114, + patient_number = 101:114, MDRTE = c( "PO (Oral)", "PO (Oral)", @@ -286,7 +207,7 @@ cm_inter <- tibble::tibble( oak_id = 1:14, raw_source = "MD1", - PATIENT_NUM = 101:114, + patient_number = 101:114, CMTRT = c( "BABY ASPIRIN", "CORTISPORIN", @@ -318,22 +239,6 @@ cm_inter <- "COLD", "COLD", "PAIN" - ), - CMROUTE = c( - "ORAL", - "ORAL", - NA, - "ORAL", - "ORAL", - "ORAL", - "INTRAMUSCULAR", - "INTRA-ARTERIAL", - NA, - "NON-STANDARD", - "RANDOM_VALUE", - "INTRA-ARTICULAR", - "TRANSDERMAL", - "OPHTHALMIC" ) ) @@ -344,7 +249,7 @@ assign_ct( target_sdtm_variable = "CMROUTE", target_sdtm_variable_codelist_code = "C66729", target_dataset = cm_inter, - merge_to_topic_by = c("oak_id","raw_source","PATIENT_NUM") + merge_to_topic_by = c("oak_id","raw_source","patient_number") ) diff --git a/man/harcode.Rd b/man/harcode.Rd index 185263d2..6b4dde42 100644 --- a/man/harcode.Rd +++ b/man/harcode.Rd @@ -11,7 +11,7 @@ hardcode_no_ct( raw_variable, target_sdtm_variable, target_hardcoded_value, - target_dataset = raw_dataset, + target_dataset = NULL, merge_to_topic_by = NULL ) @@ -20,10 +20,10 @@ hardcode_ct( raw_variable, target_sdtm_variable, target_hardcoded_value, - target_dataset = raw_dataset, + target_dataset = NULL, merge_to_topic_by = NULL, - study_ct = NULL, - target_sdtm_variable_codelist_code = NULL + study_ct, + target_sdtm_variable_codelist_code ) } \arguments{ From 9cc26d1a504f5c5beea0c3a7ebe85f5860d5cdb9 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 14 Mar 2024 00:33:02 +0000 Subject: [PATCH 10/80] Introduce memoisation of `ct_mappings()` --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/clear_cache.R | 19 +++++++++++++++++++ R/zzz.R | 3 +++ inst/WORDLIST | 2 ++ man/clear_cache.Rd | 25 +++++++++++++++++++++++++ 6 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 R/clear_cache.R create mode 100644 R/zzz.R create mode 100644 man/clear_cache.Rd diff --git a/DESCRIPTION b/DESCRIPTION index e748ceb0..d4cd7f93 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,11 +38,11 @@ Depends: R (>= 4.2) Imports: admiraldev, dplyr (>= 1.0.0), + memoise, purrr (>= 0.3.3), rlang (>= 0.4.4), stringr (>= 1.4.0), tibble, - tidyr, vctrs Suggests: knitr, diff --git a/NAMESPACE b/NAMESPACE index 708540e0..697b0166 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ S3method(print,iso8601) export(assign_ct) export(assign_no_ct) +export(clear_cache) export(create_iso8601) export(ct_map) export(fmt_cmp) diff --git a/R/clear_cache.R b/R/clear_cache.R new file mode 100644 index 00000000..047bd23e --- /dev/null +++ b/R/clear_cache.R @@ -0,0 +1,19 @@ +#' Clear `{sdtm.oak}` cache of memoised functions +#' +#' @description +#' Some of `{sdtm.oak}` functions have their results cached for runtime +#' efficiency. Use this function to reset the cache. +#' +#' Memoised functions: +#' - [ct_mappings()] +#' +#' @return Returns a logical value, indicating whether the resetting of the +#' cache was successful (`TRUE`) or not (`FALSE`). +#' +#' @examples +#' clear_cache() +#' +#' @export +clear_cache <- function() { + memoise::forget(ct_mappings) +} diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 00000000..e05837ee --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,3 @@ +.onLoad <- function(libname, pkgname) { + ct_mappings <<- memoise::memoise(ct_mappings) +} diff --git a/inst/WORDLIST b/inst/WORDLIST index 0a2a530f..8e198249 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -17,3 +17,5 @@ recoded recoding tibble codelist +Memoised +memoised diff --git a/man/clear_cache.Rd b/man/clear_cache.Rd new file mode 100644 index 00000000..212f245e --- /dev/null +++ b/man/clear_cache.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clear_cache.R +\name{clear_cache} +\alias{clear_cache} +\title{Clear \code{{sdtm.oak}} cache of memoised functions} +\usage{ +clear_cache() +} +\value{ +Returns a logical value, indicating whether the resetting of the +cache was successful (\code{TRUE}) or not (\code{FALSE}). +} +\description{ +Some of \code{{sdtm.oak}} functions have their results cached for runtime +efficiency. Use this function to reset the cache. + +Memoised functions: +\itemize{ +\item \code{\link[=ct_mappings]{ct_mappings()}} +} +} +\examples{ +clear_cache() + +} From 329feaa3bdd9dcd69aed68eb0ecb81f73ac2b64a Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 14 Mar 2024 09:44:02 +0000 Subject: [PATCH 11/80] Update of README introductory paragraph --- README.Rmd | 5 ++++- README.md | 8 +++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/README.Rmd b/README.Rmd index eea23ea5..36b48af7 100644 --- a/README.Rmd +++ b/README.Rmd @@ -19,7 +19,10 @@ knitr::opts_chunk$set( [![CRAN status](https://www.r-pkg.org/badges/version/sdtm.oak)](https://CRAN.R-project.org/package=sdtm.oak) -An EDC and Data Standard agnostic SDTM data transformation engine that automates the transformation of raw clinical data in ODM format to SDTM based on standard mapping algorithms +An EDC and Data Standard agnostic solution that enables the pharmaceutical +programming community to develop SDTM datasets in R. The reusable algorithms +concept in `{sdtm.oak}` provides a framework for modular programming and also +can automate SDTM creation based on the standard SDTM spec. ## Installation diff --git a/README.md b/README.md index 04f2fa5b..7806c4a5 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,11 @@ status](https://www.r-pkg.org/badges/version/sdtm.oak)](https://CRAN.R-project.org/package=sdtm.oak) -An EDC and Data Standard agnostic SDTM data transformation engine that -automates the transformation of raw clinical data in ODM format to SDTM -based on standard mapping algorithms +An EDC and Data Standard agnostic solution that enables the +pharmaceutical programming community to develop SDTM datasets in R. The +reusable algorithms concept in `{sdtm.oak}` provides a framework for +modular programming and also can automate SDTM creation based on the +standard SDTM spec. ## Installation From 7720c054cb21524d18efe3722369fcc476ec7c0e Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Sun, 24 Mar 2024 16:36:11 +0000 Subject: [PATCH 12/80] Update hardcode_* functions' interface --- R/hardcode.R | 56 ++++++++++++++++++++++++++------------------------ man/harcode.Rd | 8 ++++---- 2 files changed, 33 insertions(+), 31 deletions(-) diff --git a/R/hardcode.R b/R/hardcode.R index 3c836f25..e6285061 100644 --- a/R/hardcode.R +++ b/R/hardcode.R @@ -4,33 +4,35 @@ sdtm_hardcode <- function(raw_dat, raw_var, tgt_var, tgt_val, - tgt_dat = raw_dat, - by_var = NULL, + tgt_dat = NULL, + id_vars = oak_id_vars(), ct = NULL, cl = NULL) { - # TODO: Assertions. + # TODO: Assertions. assert that id_vars always contains "oak_id", + # "raw_source", "patient_number" tgt_val <- ct_map(tgt_val, ct = ct, cl = cl) - # When target dataset and by_var variables are provided, - # the tar_var is added to the input target dataset. - if((!is.null(by_var))){ - tgt_dat_out <- raw_dat |> - #we need to keep only the required variables in the input raw dataset - dplyr::select(dplyr::all_of(by_var), rlang::sym(raw_var)) |> - dplyr::right_join(y = tgt_dat, by = by_var) |> - dplyr::mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> - dplyr::select(-rlang::sym(raw_var)) |> - dplyr::relocate(tgt_var, .after = dplyr::last_col()) - } else { - # When target dataset and by_var variables are NOT provided, - # the tgt_dat_out is created with tar_var & oak_id_vars. - tgt_dat_out <- raw_dat |> - dplyr::select(c(oak_id_vars(), raw_var)) |> - dplyr::mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> - dplyr::select(-rlang::sym(raw_var)) - } + # When target dataset and id_vars variables are provided, the tar_var is added + # to the input target dataset. + tgt_dat_out <- + if (!is.null(tgt_dat)) { + raw_dat |> + #we need to keep only the required variables in the input raw dataset + dplyr::select(c(id_vars, raw_var)) |> + dplyr::right_join(y = tgt_dat, by = id_vars) |> + dplyr::mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> + dplyr::select(-rlang::sym(raw_var)) |> + dplyr::relocate(tgt_var, .after = dplyr::last_col()) + } else { + # When target dataset and id_vars variables are NOT provided, the + # tgt_dat_out is created with tar_var & oak_id_vars. + raw_dat |> + dplyr::select(c(id_vars, raw_var)) |> + dplyr::mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> + dplyr::select(-rlang::sym(raw_var)) + } return(tgt_dat_out) @@ -51,7 +53,7 @@ sdtm_hardcode <- function(raw_dat, #' @param target_sdtm_variable The target SDTM variable. #' @param target_hardcoded_value Hardcoded value. #' @param target_dataset Target dataset. By default the same as `raw_dataset`. -#' @param merge_to_topic_by If `target_dataset` is different than `raw_dataset`, +#' @param id_vars If `target_dataset` is different than `raw_dataset`, #' then this parameter defines keys to use in the join between `raw_dataset` #' and `target_dataset`. #' @param study_ct Study controlled terminology specification. @@ -98,7 +100,7 @@ sdtm_hardcode <- function(raw_dat, #' target_sdtm_variable = "CMCAT", #' target_hardcoded_value = "GENERAL CONCOMITANT MEDICATIONS", #' target_dataset = CM_INTER, -#' merge_to_topic_by = c("oak_id", "raw_source", "patient_number") +#' id_vars = c("oak_id", "raw_source", "patient_number") #' ) #' #' @name harcode @@ -111,14 +113,14 @@ hardcode_no_ct <- function(raw_dataset, target_sdtm_variable, target_hardcoded_value, target_dataset = NULL, - merge_to_topic_by = NULL) { + id_vars = NULL) { sdtm_hardcode( raw_dat = raw_dataset, raw_var = raw_variable, tgt_var = target_sdtm_variable, tgt_val = target_hardcoded_value, tgt_dat = target_dataset, - by_var = merge_to_topic_by + id_vars = id_vars ) } @@ -129,7 +131,7 @@ hardcode_ct <- function(raw_dataset, target_sdtm_variable, target_hardcoded_value, target_dataset = NULL, - merge_to_topic_by = NULL, + id_vars = NULL, study_ct, target_sdtm_variable_codelist_code) { sdtm_hardcode( @@ -138,7 +140,7 @@ hardcode_ct <- function(raw_dataset, tgt_var = target_sdtm_variable, tgt_val = target_hardcoded_value, tgt_dat = target_dataset, - by_var = merge_to_topic_by, + id_vars = id_vars, ct = study_ct, cl = target_sdtm_variable_codelist_code ) diff --git a/man/harcode.Rd b/man/harcode.Rd index 6b4dde42..916f31e9 100644 --- a/man/harcode.Rd +++ b/man/harcode.Rd @@ -12,7 +12,7 @@ hardcode_no_ct( target_sdtm_variable, target_hardcoded_value, target_dataset = NULL, - merge_to_topic_by = NULL + id_vars = NULL ) hardcode_ct( @@ -21,7 +21,7 @@ hardcode_ct( target_sdtm_variable, target_hardcoded_value, target_dataset = NULL, - merge_to_topic_by = NULL, + id_vars = NULL, study_ct, target_sdtm_variable_codelist_code ) @@ -37,7 +37,7 @@ hardcode_ct( \item{target_dataset}{Target dataset. By default the same as \code{raw_dataset}.} -\item{merge_to_topic_by}{If \code{target_dataset} is different than \code{raw_dataset}, +\item{id_vars}{If \code{target_dataset} is different than \code{raw_dataset}, then this parameter defines keys to use in the join between \code{raw_dataset} and \code{target_dataset}.} @@ -95,7 +95,7 @@ hardcode_no_ct( target_sdtm_variable = "CMCAT", target_hardcoded_value = "GENERAL CONCOMITANT MEDICATIONS", target_dataset = CM_INTER, - merge_to_topic_by = c("oak_id", "raw_source", "patient_number") + id_vars = c("oak_id", "raw_source", "patient_number") ) } From e87ca66c72ef6fa2b30cbd4e85be09d7aa4b9852 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Sun, 24 Mar 2024 17:17:25 +0000 Subject: [PATCH 13/80] Add `contains_oak_id_vars()` function --- R/oak_id_vars.R | 29 +++++++++++++++++++++++++++++ man/contains_oak_id_vars.Rd | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 man/contains_oak_id_vars.Rd diff --git a/R/oak_id_vars.R b/R/oak_id_vars.R index cf6853be..801132bf 100644 --- a/R/oak_id_vars.R +++ b/R/oak_id_vars.R @@ -24,3 +24,32 @@ oak_id_vars <- function(extra_vars = NULL) { unique(c("oak_id", "raw_source", "patient_number", extra_vars)) } + +#' Does a vector contain the raw dataset key variables? +#' +#' [contains_oak_id_vars()] evaluates whether a character vector `x` contains +#' the raw dataset key variable names, i.e. the so called Oak identifier +#' variables --- these are defined by the return value of [oak_id_vars()]. +#' +#' @param x A character vector. +#' +#' @returns A logical scalar value. +#' +#' @examples +#' # `oak_id_vars()` is the function that defines what are the minimal set of +#' # oak keys. Hence, by definition, the following code should always return +#' # `TRUE`. +#' sdtm.oak:::contains_oak_id_vars(oak_id_vars()) +#' +#' # Returns `FALSE`. +#' sdtm.oak:::contains_oak_id_vars(character()) +#' +#' # Another example that returns `FALSE` because it is missing +#' # `"patient_number"`. +#' sdtm.oak:::contains_oak_id_vars(c("oak_id", "raw_source")) +#' +#' @keywords internal +contains_oak_id_vars <- function(x) { + admiraldev::assert_character_vector(x) + all(oak_id_vars() %in% x) +} diff --git a/man/contains_oak_id_vars.Rd b/man/contains_oak_id_vars.Rd new file mode 100644 index 00000000..21bc97fb --- /dev/null +++ b/man/contains_oak_id_vars.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/oak_id_vars.R +\name{contains_oak_id_vars} +\alias{contains_oak_id_vars} +\title{Does a vector contain the raw dataset key variables?} +\usage{ +contains_oak_id_vars(x) +} +\arguments{ +\item{x}{A character vector.} +} +\value{ +A logical scalar value. +} +\description{ +\code{\link[=contains_oak_id_vars]{contains_oak_id_vars()}} evaluates whether a character vector \code{x} contains +the raw dataset key variable names, i.e. the so called Oak identifier +variables --- these are defined by the return value of \code{\link[=oak_id_vars]{oak_id_vars()}}. +} +\examples{ +# `oak_id_vars()` is the function that defines what are the minimal set of +# oak keys. Hence, by definition, the following code should always return +# `TRUE`. +sdtm.oak:::contains_oak_id_vars(oak_id_vars()) + +# Returns `FALSE`. +sdtm.oak:::contains_oak_id_vars(character()) + +# Another example that returns `FALSE` because it is missing +# `"patient_number"`. +sdtm.oak:::contains_oak_id_vars(c("oak_id", "raw_source")) + +} +\keyword{internal} From a5e61f00a0196fb89e7fba0be615d79740e086d8 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Sun, 24 Mar 2024 20:58:10 +0000 Subject: [PATCH 14/80] Update `contains_oak_id_vars()` doc examples --- R/oak_id_vars.R | 2 +- man/contains_oak_id_vars.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/oak_id_vars.R b/R/oak_id_vars.R index 801132bf..ed3c419a 100644 --- a/R/oak_id_vars.R +++ b/R/oak_id_vars.R @@ -39,7 +39,7 @@ oak_id_vars <- function(extra_vars = NULL) { #' # `oak_id_vars()` is the function that defines what are the minimal set of #' # oak keys. Hence, by definition, the following code should always return #' # `TRUE`. -#' sdtm.oak:::contains_oak_id_vars(oak_id_vars()) +#' sdtm.oak:::contains_oak_id_vars(sdtm.oak:::oak_id_vars()) #' #' # Returns `FALSE`. #' sdtm.oak:::contains_oak_id_vars(character()) diff --git a/man/contains_oak_id_vars.Rd b/man/contains_oak_id_vars.Rd index 21bc97fb..c872bbbe 100644 --- a/man/contains_oak_id_vars.Rd +++ b/man/contains_oak_id_vars.Rd @@ -21,7 +21,7 @@ variables --- these are defined by the return value of \code{\link[=oak_id_vars] # `oak_id_vars()` is the function that defines what are the minimal set of # oak keys. Hence, by definition, the following code should always return # `TRUE`. -sdtm.oak:::contains_oak_id_vars(oak_id_vars()) +sdtm.oak:::contains_oak_id_vars(sdtm.oak:::oak_id_vars()) # Returns `FALSE`. sdtm.oak:::contains_oak_id_vars(character()) From a60ccd6133fe13853aca40bbbea0083a23d9e200 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Sun, 24 Mar 2024 20:59:38 +0000 Subject: [PATCH 15/80] Update `sdtm_harcode()` and dependant functions --- R/hardcode.R | 46 +++++++++++++++++++++++++--------------------- man/harcode.Rd | 4 ++-- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/R/hardcode.R b/R/hardcode.R index e6285061..2b610ce4 100644 --- a/R/hardcode.R +++ b/R/hardcode.R @@ -9,33 +9,37 @@ sdtm_hardcode <- function(raw_dat, ct = NULL, cl = NULL) { - # TODO: Assertions. assert that id_vars always contains "oak_id", - # "raw_source", "patient_number" + admiraldev::assert_character_scalar(raw_var) + admiraldev::assert_character_scalar(tgt_var) + assertthat::assert_that(assertthat::is.scalar(tgt_val), + msg = "`tgt_val` must be a scalar value.") + admiraldev::assert_character_vector(id_vars) + assertthat::assert_that(contains_oak_id_vars(id_vars), + msg = "`id_vars` must include the oak id vars.") + admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) + admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) + # Recode the hardcoded value following terminology. tgt_val <- ct_map(tgt_val, ct = ct, cl = cl) - # When target dataset and id_vars variables are provided, the tar_var is added - # to the input target dataset. - tgt_dat_out <- + # Apply derivation of the hardcoded value. + # `der_dat`: derived dataset. + der_dat <- + raw_dat |> + dplyr::select(c(id_vars, raw_var)) |> + dplyr::mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> + dplyr::select(-rlang::sym(raw_var)) + + # If a target dataset is supplied, then join the so far derived dataset with + # the target dataset (`tgt_dat`). + der_dat <- if (!is.null(tgt_dat)) { - raw_dat |> - #we need to keep only the required variables in the input raw dataset - dplyr::select(c(id_vars, raw_var)) |> + der_dat |> dplyr::right_join(y = tgt_dat, by = id_vars) |> - dplyr::mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> - dplyr::select(-rlang::sym(raw_var)) |> dplyr::relocate(tgt_var, .after = dplyr::last_col()) - } else { - # When target dataset and id_vars variables are NOT provided, the - # tgt_dat_out is created with tar_var & oak_id_vars. - raw_dat |> - dplyr::select(c(id_vars, raw_var)) |> - dplyr::mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> - dplyr::select(-rlang::sym(raw_var)) } - return(tgt_dat_out) - + der_dat } #' Derive an SDTM variable with a hardcoded value @@ -113,7 +117,7 @@ hardcode_no_ct <- function(raw_dataset, target_sdtm_variable, target_hardcoded_value, target_dataset = NULL, - id_vars = NULL) { + id_vars = oak_id_vars()) { sdtm_hardcode( raw_dat = raw_dataset, raw_var = raw_variable, @@ -131,7 +135,7 @@ hardcode_ct <- function(raw_dataset, target_sdtm_variable, target_hardcoded_value, target_dataset = NULL, - id_vars = NULL, + id_vars = oak_id_vars(), study_ct, target_sdtm_variable_codelist_code) { sdtm_hardcode( diff --git a/man/harcode.Rd b/man/harcode.Rd index 916f31e9..ff257f0c 100644 --- a/man/harcode.Rd +++ b/man/harcode.Rd @@ -12,7 +12,7 @@ hardcode_no_ct( target_sdtm_variable, target_hardcoded_value, target_dataset = NULL, - id_vars = NULL + id_vars = oak_id_vars() ) hardcode_ct( @@ -21,7 +21,7 @@ hardcode_ct( target_sdtm_variable, target_hardcoded_value, target_dataset = NULL, - id_vars = NULL, + id_vars = oak_id_vars(), study_ct, target_sdtm_variable_codelist_code ) From cd8980451a8ce4a2bb8f37e864ed8120a7d60f36 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 25 Mar 2024 11:04:36 +0000 Subject: [PATCH 16/80] Update `assign_*` and `hardcore_*` related functions --- R/assign.R | 186 +++++++++++++++++++++++-------------------------- R/hardcode.R | 84 +++++++++++----------- man/assign.Rd | 77 ++++++++++---------- man/harcode.Rd | 56 +++++++-------- 4 files changed, 197 insertions(+), 206 deletions(-) diff --git a/R/assign.R b/R/assign.R index c4864579..4d86a07b 100644 --- a/R/assign.R +++ b/R/assign.R @@ -2,51 +2,42 @@ sdtm_assign <- function(raw_dat, raw_var, tgt_var, - tgt_dat, - by_var = NULL, + tgt_dat = NULL, + id_vars = oak_id_vars(), ct = NULL, cl = NULL) { - # TODO: Assertions. + admiraldev::assert_character_scalar(raw_var) + admiraldev::assert_character_scalar(tgt_var) + admiraldev::assert_character_vector(id_vars) + assertthat::assert_that(contains_oak_id_vars(id_vars), + msg = "`id_vars` must include the oak id vars.") + admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) + admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) - # When target dataset and by_var variables are provided, - # the tar_var is added to the input target dataset. - # This if block deals when ct is not provided. - if((!is.null(by_var) && is.null(ct))){ - tgt_dat_out <- raw_dat |> - dplyr::right_join(y = tgt_dat, by = by_var) |> - dplyr::mutate("{tgt_var}" := !!rlang::sym(raw_var)) |> - dplyr::select(-rlang::sym(raw_var)) |> - dplyr::relocate(tgt_var, .after = dplyr::last_col()) - } else if (is.null(by_var) && is.null(ct)) { - # When target dataset and by_var variables are NOT provided, - # the tgt_dat_out is created with tar_var & oak_id_vars. - # This if block deals when ct is not provided. - tgt_dat_out <- raw_dat |> - dplyr::select(c(oak_id_vars(), raw_var)) |> - dplyr::mutate("{tgt_var}" := !!rlang::sym(raw_var)) |> - dplyr::select(-rlang::sym(raw_var)) - } else if (is.null(by_var) && !is.null(ct)) { - # When target dataset and by_var variables are NOT provided, - # the tgt_dat_out is created with tar_var & oak_id_vars. - # This if block deals when ct is provided. - tgt_dat_out <- raw_dat |> - dplyr::select(c(oak_id_vars(), raw_var)) |> - dplyr::mutate("{tgt_var}" := ct_map(!!rlang::sym(raw_var), ct = ct, cl = cl)) |> - dplyr::select(-rlang::sym(raw_var)) |> - dplyr::relocate(tgt_var, .after = dplyr::last_col()) - } else if (!is.null(by_var) && !is.null(ct)) { - # When target dataset and by_var variables are provided, - # the tar_var is added to the input target dataset. - # This if block deals when ct is provided. - tgt_dat_out <- raw_dat |> - dplyr::right_join(y = tgt_dat, by = by_var) |> - dplyr::mutate("{tgt_var}" := ct_map(!!rlang::sym(raw_var), ct = ct, cl = cl)) |> - dplyr::select(-rlang::sym(raw_var)) |> - dplyr::relocate(tgt_var, .after = dplyr::last_col()) - } + # Recode the raw variable following terminology. + tgt_val <- ct_map(raw_dat[[raw_var]], ct = ct, cl = cl) - return(tgt_dat_out) + # Apply derivation by assigning `raw_var` to `tgt_var`. + # `der_dat`: derived dataset. + der_dat <- + raw_dat |> + dplyr::select(c(id_vars, raw_var)) |> + dplyr::mutate("{tgt_var}" := tgt_val) |> + dplyr::select(-rlang::sym(raw_var)) + + # If a target dataset is supplied, then join the so far derived dataset with + # the target dataset (`tgt_dat`), otherwise leave it be. + der_dat <- + if (!is.null(tgt_dat)) { + der_dat |> + dplyr::right_join(y = tgt_dat, by = id_vars) |> + dplyr::relocate(tgt_var, .after = dplyr::last_col()) + } else { + der_dat + } + + der_dat } @@ -59,15 +50,15 @@ sdtm_assign <- function(raw_dat, #' - [assign_ct()] maps a variable in a source dataset to a target SDTM variable #' following controlled terminology recoding. #' -#' @param raw_dataset The raw dataset. -#' @param raw_variable The raw variable. -#' @param target_sdtm_variable The target SDTM variable. -#' @param target_dataset Target dataset. By default the same as `raw_dataset`. -#' @param merge_to_topic_by If `target_dataset` is different than `raw_dataset`, +#' @param raw_dat The raw dataset. +#' @param raw_var The raw variable. +#' @param tgt_var The target SDTM variable. +#' @param tgt_dat Target dataset. +#' @param id_vars If `target_dataset` is different than `raw_dataset`, #' then this parameter defines keys to use in the join between `raw_dataset` #' and `target_dataset`. -#' @param study_ct Study controlled terminology specification. -#' @param target_sdtm_variable_codelist_code A codelist code indicating which +#' @param ct Study controlled terminology specification. +#' @param cl A codelist code indicating which #' subset of the controlled terminology to apply in the derivation. #' #' @returns The target dataset with the derived variable `target_sdtm_variable`. @@ -86,9 +77,9 @@ sdtm_assign <- function(raw_dat, #' ) #' #' assign_no_ct( -#' raw_dataset = md1, -#' raw_variable = "MDIND", -#' target_sdtm_variable = "CMINDC", +#' raw_dat = md1, +#' raw_var = "MDIND", +#' tgt_var = "CMINDC", #' ) #' #' cm_inter <- @@ -131,11 +122,11 @@ sdtm_assign <- function(raw_dat, #' ) #' #' assign_ct( -#' raw_dataset = md1, -#' raw_variable = "MDIND", -#' target_sdtm_variable = "CMINDC", -#' target_dataset = cm_inter, -#' merge_to_topic_by = c("oak_id","raw_source","patient_number") +#' raw_dat = md1, +#' raw_var = "MDIND", +#' tgt_var = "CMINDC", +#' tgt_dat = cm_inter, +#' id_vars = c("oak_id","raw_source","patient_number") #' ) #' #' @name assign @@ -143,17 +134,17 @@ NULL #' @export #' @rdname assign -assign_no_ct <- function(raw_dataset, - raw_variable, - target_sdtm_variable, - target_dataset = NULL, - merge_to_topic_by = NULL) { +assign_no_ct <- function(raw_dat, + raw_var, + tgt_var, + tgt_dat = NULL, + id_vars = oak_id_vars()) { sdtm_assign( - raw_dat = raw_dataset, - raw_var = raw_variable, - tgt_var = target_sdtm_variable, - tgt_dat = target_dataset, - by_var = merge_to_topic_by + raw_dat = raw_dat, + raw_var = raw_var, + tgt_var = tgt_var, + tgt_dat = tgt_dat, + id_vars = id_vars ) } @@ -162,15 +153,15 @@ assign_no_ct <- function(raw_dataset, #' [assign_ct()] maps a variable in a source dataset to a target SDTM variable #' following controlled terminology recoding. #' -#' @param raw_dataset The raw dataset. -#' @param raw_variable The raw variable. -#' @param target_sdtm_variable The target SDTM variable. -#' @param target_dataset Target dataset. By default the same as `raw_dataset`. -#' @param merge_to_topic_by If `target_dataset` is different than `raw_dataset`, +#' @param raw_dat The raw dataset. +#' @param raw_var The raw variable. +#' @param tgt_var The target SDTM variable. +#' @param tgt_dat Target dataset. +#' @param id_vars If `target_dataset` is different than `raw_dataset`, #' then this parameter defines keys to use in the join between `raw_dataset` #' and `target_dataset`. -#' @param study_ct Study controlled terminology specification. -#' @param target_sdtm_variable_codelist_code A codelist code indicating which +#' @param ct Study controlled terminology specification. +#' @param cl A codelist code indicating which #' subset of the controlled terminology to apply in the derivation. #' #' @returns The target dataset with the derived variable `target_sdtm_variable`. @@ -248,11 +239,11 @@ assign_no_ct <- function(raw_dataset, #' ) #' #' assign_ct( -#' raw_dataset = md1, -#' raw_variable = "MDRTE", -#' study_ct = study_ct, -#' target_sdtm_variable = "CMROUTE", -#' target_sdtm_variable_codelist_code = "C66729" +#' raw_dat = md1, +#' raw_var = "MDRTE", +#' tgt_var = "CMROUTE", +#' ct = study_ct, +#' cl = "C66729" #' ) #' #' cm_inter <- @@ -295,32 +286,31 @@ assign_no_ct <- function(raw_dataset, #' ) #' #' assign_ct( -#' raw_dataset = md1, -#' raw_variable = "MDRTE", -#' study_ct = study_ct, -#' target_sdtm_variable = "CMROUTE", -#' target_sdtm_variable_codelist_code = "C66729", -#' target_dataset = cm_inter, -#' merge_to_topic_by = c("oak_id","raw_source","patient_number") +#' raw_dat = md1, +#' raw_var = "MDRTE", +#' tgt_var = "CMROUTE", +#' tgt_dat = cm_inter, +#' ct = study_ct, +#' cl = "C66729" #' ) #' #' #' @export #' @rdname assign -assign_ct <- function(raw_dataset, - raw_variable, - target_sdtm_variable, - target_dataset = raw_dataset, - merge_to_topic_by = NULL, - study_ct = NULL, - target_sdtm_variable_codelist_code = NULL) { +assign_ct <- function(raw_dat, + raw_var, + tgt_var, + tgt_dat = NULL, + id_vars = oak_id_vars(), + ct = NULL, + cl = NULL) { sdtm_assign( - raw_dat = raw_dataset, - raw_var = raw_variable, - tgt_var = target_sdtm_variable, - tgt_dat = target_dataset, - by_var = merge_to_topic_by, - ct = study_ct, - cl = target_sdtm_variable_codelist_code + raw_dat = raw_dat, + raw_var = raw_var, + tgt_var = tgt_var, + tgt_dat = tgt_dat, + id_vars = id_vars, + ct = ct, + cl = cl ) } diff --git a/R/hardcode.R b/R/hardcode.R index 2b610ce4..de50482b 100644 --- a/R/hardcode.R +++ b/R/hardcode.R @@ -31,12 +31,14 @@ sdtm_hardcode <- function(raw_dat, dplyr::select(-rlang::sym(raw_var)) # If a target dataset is supplied, then join the so far derived dataset with - # the target dataset (`tgt_dat`). + # the target dataset (`tgt_dat`), otherwise leave it be. der_dat <- if (!is.null(tgt_dat)) { der_dat |> dplyr::right_join(y = tgt_dat, by = id_vars) |> dplyr::relocate(tgt_var, .after = dplyr::last_col()) + } else { + der_dat } der_dat @@ -52,16 +54,16 @@ sdtm_hardcode <- function(raw_dat, #' - [hardcode_ct()] maps a hardcoded value to a target SDTM variable with #' controlled terminology recoding. #' -#' @param raw_dataset The raw dataset. -#' @param raw_variable The raw variable. -#' @param target_sdtm_variable The target SDTM variable. -#' @param target_hardcoded_value Hardcoded value. -#' @param target_dataset Target dataset. By default the same as `raw_dataset`. +#' @param raw_dat The raw dataset. +#' @param raw_var The raw variable. +#' @param tgt_var The target SDTM variable. +#' @param tgt_val Hardcoded value. +#' @param tgt_dat Target dataset. By default the same as `raw_dataset`. #' @param id_vars If `target_dataset` is different than `raw_dataset`, #' then this parameter defines keys to use in the join between `raw_dataset` #' and `target_dataset`. -#' @param study_ct Study controlled terminology specification. -#' @param target_sdtm_variable_codelist_code A codelist code indicating which +#' @param ct Study controlled terminology specification. +#' @param cl A codelist code indicating which #' subset of the controlled terminology to apply in the derivation. #' #' @returns The target dataset with the derived variable `target_sdtm_variable`. @@ -79,10 +81,10 @@ sdtm_hardcode <- function(raw_dat, #' # Derive a new variable `CMCAT` by overwriting `MDRAW` with the #' # hardcoded value "GENERAL CONCOMITANT MEDICATIONS". #' hardcode_no_ct( -#' raw_dataset = MD1, -#' raw_variable = "MDRAW", -#' target_sdtm_variable = "CMCAT", -#' target_hardcoded_value = "GENERAL CONCOMITANT MEDICATIONS" +#' raw_dat = MD1, +#' raw_var = "MDRAW", +#' tgt_var = "CMCAT", +#' tgt_val = "GENERAL CONCOMITANT MEDICATIONS" #' ) #' #' CM_INTER <- @@ -99,11 +101,11 @@ sdtm_hardcode <- function(raw_dat, #' # hardcoded value "GENERAL CONCOMITANT MEDICATIONS" with a prior join to #' # `target_dataset`. #' hardcode_no_ct( -#' raw_dataset = MD1, -#' raw_variable = "MDRAW", -#' target_sdtm_variable = "CMCAT", -#' target_hardcoded_value = "GENERAL CONCOMITANT MEDICATIONS", -#' target_dataset = CM_INTER, +#' raw_dat = MD1, +#' raw_var = "MDRAW", +#' tgt_var = "CMCAT", +#' tgt_val = "GENERAL CONCOMITANT MEDICATIONS", +#' tgt_dat = CM_INTER, #' id_vars = c("oak_id", "raw_source", "patient_number") #' ) #' @@ -112,41 +114,41 @@ NULL #' @export #' @rdname harcode -hardcode_no_ct <- function(raw_dataset, - raw_variable, - target_sdtm_variable, - target_hardcoded_value, - target_dataset = NULL, +hardcode_no_ct <- function(raw_dat, + raw_var, + tgt_var, + tgt_val, + tgt_dat = NULL, id_vars = oak_id_vars()) { sdtm_hardcode( - raw_dat = raw_dataset, - raw_var = raw_variable, - tgt_var = target_sdtm_variable, - tgt_val = target_hardcoded_value, - tgt_dat = target_dataset, + raw_dat = raw_dat, + raw_var = raw_var, + tgt_var = tgt_var, + tgt_val = tgt_val, + tgt_dat = tgt_dat, id_vars = id_vars ) } #' @export #' @rdname harcode -hardcode_ct <- function(raw_dataset, - raw_variable, - target_sdtm_variable, - target_hardcoded_value, - target_dataset = NULL, +hardcode_ct <- function(raw_dat, + raw_var, + tgt_var, + tgt_val, + tgt_dat = NULL, id_vars = oak_id_vars(), - study_ct, - target_sdtm_variable_codelist_code) { + ct, + cl) { sdtm_hardcode( - raw_dat = raw_dataset, - raw_var = raw_variable, - tgt_var = target_sdtm_variable, - tgt_val = target_hardcoded_value, - tgt_dat = target_dataset, + raw_dat = raw_dat, + raw_var = raw_var, + tgt_var = tgt_var, + tgt_val = tgt_val, + tgt_dat = tgt_dat, id_vars = id_vars, - ct = study_ct, - cl = target_sdtm_variable_codelist_code + ct = ct, + cl = cl ) } diff --git a/man/assign.Rd b/man/assign.Rd index 8bb536cd..49b48063 100644 --- a/man/assign.Rd +++ b/man/assign.Rd @@ -7,39 +7,39 @@ \title{Derive an SDTM variable} \usage{ assign_no_ct( - raw_dataset, - raw_variable, - target_sdtm_variable, - target_dataset = NULL, - merge_to_topic_by = NULL + raw_dat, + raw_var, + tgt_var, + tgt_dat = NULL, + id_vars = oak_id_vars() ) assign_ct( - raw_dataset, - raw_variable, - target_sdtm_variable, - target_dataset = raw_dataset, - merge_to_topic_by = NULL, - study_ct = NULL, - target_sdtm_variable_codelist_code = NULL + raw_dat, + raw_var, + tgt_var, + tgt_dat = NULL, + id_vars = oak_id_vars(), + ct = NULL, + cl = NULL ) } \arguments{ -\item{raw_dataset}{The raw dataset.} +\item{raw_dat}{The raw dataset.} -\item{raw_variable}{The raw variable.} +\item{raw_var}{The raw variable.} -\item{target_sdtm_variable}{The target SDTM variable.} +\item{tgt_var}{The target SDTM variable.} -\item{target_dataset}{Target dataset. By default the same as \code{raw_dataset}.} +\item{tgt_dat}{Target dataset.} -\item{merge_to_topic_by}{If \code{target_dataset} is different than \code{raw_dataset}, +\item{id_vars}{If \code{target_dataset} is different than \code{raw_dataset}, then this parameter defines keys to use in the join between \code{raw_dataset} and \code{target_dataset}.} -\item{study_ct}{Study controlled terminology specification.} +\item{ct}{Study controlled terminology specification.} -\item{target_sdtm_variable_codelist_code}{A codelist code indicating which +\item{cl}{A codelist code indicating which subset of the controlled terminology to apply in the derivation.} } \value{ @@ -72,9 +72,9 @@ md1 <- ) assign_no_ct( - raw_dataset = md1, - raw_variable = "MDIND", - target_sdtm_variable = "CMINDC", + raw_dat = md1, + raw_var = "MDIND", + tgt_var = "CMINDC", ) cm_inter <- @@ -117,11 +117,11 @@ cm_inter <- ) assign_ct( - raw_dataset = md1, - raw_variable = "MDIND", - target_sdtm_variable = "CMINDC", - target_dataset = cm_inter, - merge_to_topic_by = c("oak_id","raw_source","patient_number") + raw_dat = md1, + raw_var = "MDIND", + tgt_var = "CMINDC", + tgt_dat = cm_inter, + id_vars = c("oak_id","raw_source","patient_number") ) study_ct <- @@ -196,11 +196,11 @@ md1 <- ) assign_ct( - raw_dataset = md1, - raw_variable = "MDRTE", - study_ct = study_ct, - target_sdtm_variable = "CMROUTE", - target_sdtm_variable_codelist_code = "C66729" + raw_dat = md1, + raw_var = "MDRTE", + tgt_var = "CMROUTE", + ct = study_ct, + cl = "C66729" ) cm_inter <- @@ -243,13 +243,12 @@ cm_inter <- ) assign_ct( - raw_dataset = md1, - raw_variable = "MDRTE", - study_ct = study_ct, - target_sdtm_variable = "CMROUTE", - target_sdtm_variable_codelist_code = "C66729", - target_dataset = cm_inter, - merge_to_topic_by = c("oak_id","raw_source","patient_number") + raw_dat = md1, + raw_var = "MDRTE", + tgt_var = "CMROUTE", + tgt_dat = cm_inter, + ct = study_ct, + cl = "C66729" ) diff --git a/man/harcode.Rd b/man/harcode.Rd index ff257f0c..991979f5 100644 --- a/man/harcode.Rd +++ b/man/harcode.Rd @@ -7,43 +7,43 @@ \title{Derive an SDTM variable with a hardcoded value} \usage{ hardcode_no_ct( - raw_dataset, - raw_variable, - target_sdtm_variable, - target_hardcoded_value, - target_dataset = NULL, + raw_dat, + raw_var, + tgt_var, + tgt_val, + tgt_dat = NULL, id_vars = oak_id_vars() ) hardcode_ct( - raw_dataset, - raw_variable, - target_sdtm_variable, - target_hardcoded_value, - target_dataset = NULL, + raw_dat, + raw_var, + tgt_var, + tgt_val, + tgt_dat = NULL, id_vars = oak_id_vars(), - study_ct, - target_sdtm_variable_codelist_code + ct, + cl ) } \arguments{ -\item{raw_dataset}{The raw dataset.} +\item{raw_dat}{The raw dataset.} -\item{raw_variable}{The raw variable.} +\item{raw_var}{The raw variable.} -\item{target_sdtm_variable}{The target SDTM variable.} +\item{tgt_var}{The target SDTM variable.} -\item{target_hardcoded_value}{Hardcoded value.} +\item{tgt_val}{Hardcoded value.} -\item{target_dataset}{Target dataset. By default the same as \code{raw_dataset}.} +\item{tgt_dat}{Target dataset. By default the same as \code{raw_dataset}.} \item{id_vars}{If \code{target_dataset} is different than \code{raw_dataset}, then this parameter defines keys to use in the join between \code{raw_dataset} and \code{target_dataset}.} -\item{study_ct}{Study controlled terminology specification.} +\item{ct}{Study controlled terminology specification.} -\item{target_sdtm_variable_codelist_code}{A codelist code indicating which +\item{cl}{A codelist code indicating which subset of the controlled terminology to apply in the derivation.} } \value{ @@ -70,10 +70,10 @@ MD1 <- # Derive a new variable `CMCAT` by overwriting `MDRAW` with the # hardcoded value "GENERAL CONCOMITANT MEDICATIONS". hardcode_no_ct( - raw_dataset = MD1, - raw_variable = "MDRAW", - target_sdtm_variable = "CMCAT", - target_hardcoded_value = "GENERAL CONCOMITANT MEDICATIONS" + raw_dat = MD1, + raw_var = "MDRAW", + tgt_var = "CMCAT", + tgt_val = "GENERAL CONCOMITANT MEDICATIONS" ) CM_INTER <- @@ -90,11 +90,11 @@ CM_INTER <- # hardcoded value "GENERAL CONCOMITANT MEDICATIONS" with a prior join to # `target_dataset`. hardcode_no_ct( - raw_dataset = MD1, - raw_variable = "MDRAW", - target_sdtm_variable = "CMCAT", - target_hardcoded_value = "GENERAL CONCOMITANT MEDICATIONS", - target_dataset = CM_INTER, + raw_dat = MD1, + raw_var = "MDRAW", + tgt_var = "CMCAT", + tgt_val = "GENERAL CONCOMITANT MEDICATIONS", + tgt_dat = CM_INTER, id_vars = c("oak_id", "raw_source", "patient_number") ) From ae2da80ca856a97e9dbd662c92679d5758aab069 Mon Sep 17 00:00:00 2001 From: ramiromagno Date: Mon, 25 Mar 2024 22:50:04 +0000 Subject: [PATCH 17/80] Automatic renv profile update. --- renv/profiles/4.2/renv.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/renv/profiles/4.2/renv.lock b/renv/profiles/4.2/renv.lock index 494312ef..52d8d0f9 100644 --- a/renv/profiles/4.2/renv.lock +++ b/renv/profiles/4.2/renv.lock @@ -1008,7 +1008,7 @@ }, "remotes": { "Package": "remotes", - "Version": "2.4.2.1", + "Version": "2.5.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1018,7 +1018,7 @@ "tools", "utils" ], - "Hash": "63d15047eb239f95160112bcadc4fcb9" + "Hash": "3ee025083e66f18db6cf27b56e23e141" }, "renv": { "Package": "renv", From 30857e31bb263b24a5867a6db53e5f6a58277152 Mon Sep 17 00:00:00 2001 From: ramiromagno Date: Mon, 25 Mar 2024 22:54:17 +0000 Subject: [PATCH 18/80] Automatic renv profile update. --- renv.lock | 4 ++-- renv/profiles/4.3/renv.lock | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/renv.lock b/renv.lock index 329ec463..4a72ab8a 100644 --- a/renv.lock +++ b/renv.lock @@ -1007,7 +1007,7 @@ }, "remotes": { "Package": "remotes", - "Version": "2.4.2.1", + "Version": "2.5.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1017,7 +1017,7 @@ "tools", "utils" ], - "Hash": "63d15047eb239f95160112bcadc4fcb9" + "Hash": "3ee025083e66f18db6cf27b56e23e141" }, "renv": { "Package": "renv", diff --git a/renv/profiles/4.3/renv.lock b/renv/profiles/4.3/renv.lock index 329ec463..4a72ab8a 100644 --- a/renv/profiles/4.3/renv.lock +++ b/renv/profiles/4.3/renv.lock @@ -1007,7 +1007,7 @@ }, "remotes": { "Package": "remotes", - "Version": "2.4.2.1", + "Version": "2.5.0", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1017,7 +1017,7 @@ "tools", "utils" ], - "Hash": "63d15047eb239f95160112bcadc4fcb9" + "Hash": "3ee025083e66f18db6cf27b56e23e141" }, "renv": { "Package": "renv", From 73ebe2db4f7a10c72386dbf1bf00f27361383c62 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 27 Mar 2024 14:10:10 +0000 Subject: [PATCH 19/80] Make `ct` and `cl` parameters mandatory for `assign_ct()` --- R/assign.R | 154 ++------------------------------------------------ man/assign.Rd | 139 +-------------------------------------------- 2 files changed, 7 insertions(+), 286 deletions(-) diff --git a/R/assign.R b/R/assign.R index 4d86a07b..9e82f254 100644 --- a/R/assign.R +++ b/R/assign.R @@ -148,162 +148,16 @@ assign_no_ct <- function(raw_dat, ) } -#' Derive an SDTM variable with controlled terminology -#' -#' [assign_ct()] maps a variable in a source dataset to a target SDTM variable -#' following controlled terminology recoding. -#' -#' @param raw_dat The raw dataset. -#' @param raw_var The raw variable. -#' @param tgt_var The target SDTM variable. -#' @param tgt_dat Target dataset. -#' @param id_vars If `target_dataset` is different than `raw_dataset`, -#' then this parameter defines keys to use in the join between `raw_dataset` -#' and `target_dataset`. -#' @param ct Study controlled terminology specification. -#' @param cl A codelist code indicating which -#' subset of the controlled terminology to apply in the derivation. -#' -#' @returns The target dataset with the derived variable `target_sdtm_variable`. -#' -#' @examples -#' study_ct <- -#' tibble::tibble( -#' codelist_code = rep("C66729", 8L), -#' term_code = c( -#' "C28161", -#' "C38210", -#' "C38222", -#' "C38223", -#' "C38287", -#' "C38288", -#' "C38305", -#' "C38311" -#' ), -#' CodedData = c( -#' "INTRAMUSCULAR", -#' "EPIDURAL", -#' "INTRA-ARTERIAL", -#' "INTRA-ARTICULAR", -#' "OPHTHALMIC", -#' "ORAL", -#' "TRANSDERMAL", -#' "UNKNOWN" -#' ), -#' term_value = CodedData, -#' collected_value = c( -#' "IM (Intramuscular)", -#' "EP (Epidural)", -#' "IA (Intra-arterial)", -#' "IJ (Intra-articular)", -#' "OP (Ophthalmic)", -#' "PO (Oral)", -#' "DE (Transdermal)", -#' "Unknown" -#' ), -#' term_preferred_term = c( -#' "Intramuscular Route of Administration", -#' "Epidural Route of Administration", -#' "Intraarterial Route of Administration", -#' "Intraarticular Route of Administration", -#' "Ophthalmic Route of Administration", -#' "Oral Route of Administration", -#' "Transdermal Route of Administration", -#' "Unknown Route of Administration" -#' ), -#' term_synonyms = c(rep(NA, 5L), "Intraoral Route of Administration; PO", NA, NA), -#' raw_codelist = rep("ROUTE_CV1", 8L) -#' ) -#' -#' md1 <- -#' tibble::tibble( -#' oak_id = 1:14, -#' raw_source = "MD1", -#' patient_number = 101:114, -#' MDRTE = c( -#' "PO (Oral)", -#' "PO (Oral)", -#' NA_character_, -#' "PO", -#' "Intraoral Route of Administration", -#' "PO (Oral)", -#' "IM (Intramuscular)", -#' "IA (Intra-arterial)", -#' "", -#' "Non-standard", -#' "random_value", -#' "IJ (Intra-articular)", -#' "TRANSDERMAL", -#' "OPHTHALMIC" -#' ) -#' ) -#' -#' assign_ct( -#' raw_dat = md1, -#' raw_var = "MDRTE", -#' tgt_var = "CMROUTE", -#' ct = study_ct, -#' cl = "C66729" -#' ) -#' -#' cm_inter <- -#' tibble::tibble( -#' oak_id = 1:14, -#' raw_source = "MD1", -#' patient_number = 101:114, -#' CMTRT = c( -#' "BABY ASPIRIN", -#' "CORTISPORIN", -#' "ASPIRIN", -#' "DIPHENHYDRAMINE HCL", -#' "PARCETEMOL", -#' "VOMIKIND", -#' "ZENFLOX OZ", -#' "AMITRYPTYLINE", -#' "BENADRYL", -#' "DIPHENHYDRAMINE HYDROCHLORIDE", -#' "TETRACYCLINE", -#' "BENADRYL", -#' "SOMINEX", -#' "ZQUILL" -#' ), -#' CMINDC = c( -#' NA, -#' "NAUSEA", -#' "ANEMIA", -#' "NAUSEA", -#' "PYREXIA", -#' "VOMITINGS", -#' "DIARHHEA", -#' "COLD", -#' "FEVER", -#' "LEG PAIN", -#' "FEVER", -#' "COLD", -#' "COLD", -#' "PAIN" -#' ) -#' ) -#' -#' assign_ct( -#' raw_dat = md1, -#' raw_var = "MDRTE", -#' tgt_var = "CMROUTE", -#' tgt_dat = cm_inter, -#' ct = study_ct, -#' cl = "C66729" -#' ) -#' -#' + #' @export #' @rdname assign assign_ct <- function(raw_dat, raw_var, tgt_var, + ct, + cl, tgt_dat = NULL, - id_vars = oak_id_vars(), - ct = NULL, - cl = NULL) { + id_vars = oak_id_vars()) { sdtm_assign( raw_dat = raw_dat, raw_var = raw_var, diff --git a/man/assign.Rd b/man/assign.Rd index 49b48063..7b3910f1 100644 --- a/man/assign.Rd +++ b/man/assign.Rd @@ -18,10 +18,10 @@ assign_ct( raw_dat, raw_var, tgt_var, + ct, + cl, tgt_dat = NULL, - id_vars = oak_id_vars(), - ct = NULL, - cl = NULL + id_vars = oak_id_vars() ) } \arguments{ @@ -43,8 +43,6 @@ and \code{target_dataset}.} subset of the controlled terminology to apply in the derivation.} } \value{ -The target dataset with the derived variable \code{target_sdtm_variable}. - The target dataset with the derived variable \code{target_sdtm_variable}. } \description{ @@ -54,9 +52,6 @@ variable that has no terminology restrictions. \item \code{\link[=assign_ct]{assign_ct()}} maps a variable in a source dataset to a target SDTM variable following controlled terminology recoding. } - -\code{\link[=assign_ct]{assign_ct()}} maps a variable in a source dataset to a target SDTM variable -following controlled terminology recoding. } \examples{ @@ -124,132 +119,4 @@ assign_ct( id_vars = c("oak_id","raw_source","patient_number") ) -study_ct <- - tibble::tibble( - codelist_code = rep("C66729", 8L), - term_code = c( - "C28161", - "C38210", - "C38222", - "C38223", - "C38287", - "C38288", - "C38305", - "C38311" - ), - CodedData = c( - "INTRAMUSCULAR", - "EPIDURAL", - "INTRA-ARTERIAL", - "INTRA-ARTICULAR", - "OPHTHALMIC", - "ORAL", - "TRANSDERMAL", - "UNKNOWN" - ), - term_value = CodedData, - collected_value = c( - "IM (Intramuscular)", - "EP (Epidural)", - "IA (Intra-arterial)", - "IJ (Intra-articular)", - "OP (Ophthalmic)", - "PO (Oral)", - "DE (Transdermal)", - "Unknown" - ), - term_preferred_term = c( - "Intramuscular Route of Administration", - "Epidural Route of Administration", - "Intraarterial Route of Administration", - "Intraarticular Route of Administration", - "Ophthalmic Route of Administration", - "Oral Route of Administration", - "Transdermal Route of Administration", - "Unknown Route of Administration" - ), - term_synonyms = c(rep(NA, 5L), "Intraoral Route of Administration; PO", NA, NA), - raw_codelist = rep("ROUTE_CV1", 8L) - ) - -md1 <- - tibble::tibble( - oak_id = 1:14, - raw_source = "MD1", - patient_number = 101:114, - MDRTE = c( - "PO (Oral)", - "PO (Oral)", - NA_character_, - "PO", - "Intraoral Route of Administration", - "PO (Oral)", - "IM (Intramuscular)", - "IA (Intra-arterial)", - "", - "Non-standard", - "random_value", - "IJ (Intra-articular)", - "TRANSDERMAL", - "OPHTHALMIC" - ) - ) - -assign_ct( - raw_dat = md1, - raw_var = "MDRTE", - tgt_var = "CMROUTE", - ct = study_ct, - cl = "C66729" - ) - -cm_inter <- - tibble::tibble( - oak_id = 1:14, - raw_source = "MD1", - patient_number = 101:114, - CMTRT = c( - "BABY ASPIRIN", - "CORTISPORIN", - "ASPIRIN", - "DIPHENHYDRAMINE HCL", - "PARCETEMOL", - "VOMIKIND", - "ZENFLOX OZ", - "AMITRYPTYLINE", - "BENADRYL", - "DIPHENHYDRAMINE HYDROCHLORIDE", - "TETRACYCLINE", - "BENADRYL", - "SOMINEX", - "ZQUILL" - ), - CMINDC = c( - NA, - "NAUSEA", - "ANEMIA", - "NAUSEA", - "PYREXIA", - "VOMITINGS", - "DIARHHEA", - "COLD", - "FEVER", - "LEG PAIN", - "FEVER", - "COLD", - "COLD", - "PAIN" - ) - ) - -assign_ct( - raw_dat = md1, - raw_var = "MDRTE", - tgt_var = "CMROUTE", - tgt_dat = cm_inter, - ct = study_ct, - cl = "C66729" - ) - - } From 0eb467797a14621082b8c5e4a2f8731d11454966 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 27 Mar 2024 21:50:48 +0000 Subject: [PATCH 20/80] Add functions ct importing - Adds three new user facing ct-related functions: `read_ct_example()`, `ct_example()` and `read_ct()` - Provides a ct example file in inst/ct/ --- .Rbuildignore | 1 + NAMESPACE | 3 + R/ct.R | 180 +++++++++++++++++++++++++++++++++++++++-- inst/ct/README.md | 22 +++++ inst/ct/ct-01-cm.csv | 35 ++++++++ man/ct_map.Rd | 8 +- man/ct_mappings.Rd | 6 +- man/ct_vars.Rd | 39 +++++++++ man/read_ct.Rd | 25 ++++++ man/read_ct_example.Rd | 31 +++++++ 10 files changed, 333 insertions(+), 17 deletions(-) create mode 100644 inst/ct/README.md create mode 100644 inst/ct/ct-01-cm.csv create mode 100644 man/ct_vars.Rd create mode 100644 man/read_ct.Rd create mode 100644 man/read_ct_example.Rd diff --git a/.Rbuildignore b/.Rbuildignore index c8038ef3..80fe0c2f 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -14,3 +14,4 @@ ^data-raw$ ^staged_dependencies.yaml$ ^vignettes/articles$ +^inst/ct/README.md$ diff --git a/NAMESPACE b/NAMESPACE index 56960c00..4afae539 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,12 +5,15 @@ export(assign_ct) export(assign_no_ct) export(clear_cache) export(create_iso8601) +export(ct_example) export(ct_map) export(derive_study_day) export(fmt_cmp) export(hardcode_ct) export(hardcode_no_ct) export(problems) +export(read_ct) +export(read_ct_example) importFrom(rlang,"%||%") importFrom(rlang,":=") importFrom(rlang,.data) diff --git a/R/ct.R b/R/ct.R index 714460b7..31abdc43 100644 --- a/R/ct.R +++ b/R/ct.R @@ -1,3 +1,57 @@ +#' Controlled terminology variables +#' +#' @description +#' [ct_vars()] returns the mandatory variables to be present in a data set +#' representing a controlled terminology. By default, it returns all required +#' variables. +#' +#' If only the subset of variables used for matching terms are needed, then +#' request this subset of variables by passing the argument value `"from"`. If +#' only the mapping-to variable is to be requested, then simply pass `"to"`. If +#' only the code-list code variable name is needed then pass `"cl"`. +#' +#' @param set A scalar character (string), one of: `"all"` (default), `"cl"`, +#' `"from"` or `"to"`. +#' +#' @examples +#' # These two calls are equivalent and return all required variables in a +#' # controlled terminology data set. +#' sdtm.oak:::ct_vars() +#' sdtm.oak:::ct_vars("all") +#' +#' # "Codelist code" variable name. +#' sdtm.oak:::ct_vars("cl") +#' +#' # "From" variables +#' sdtm.oak:::ct_vars("from") +#' +#' # The "to" variable. +#' sdtm.oak:::ct_vars("to") +#' +#' @keywords internal +ct_vars <- function(set = c("all", "cl", "from", "to")) { + + admiraldev::assert_character_vector(set) + + set <- match.arg(set) + cl_var <- "codelist_code" + from_vars <- c("collected_value", "term_synonyms") + to_var <- "term_value" + + if (identical(set, "all")) + return(c(cl_var, from_vars, to_var)) + + if (identical(set, "cl")) + return(cl_var) + + if (identical(set, "from")) + return(from_vars) + + if (identical(set, "to")) + return(to_var) + +} + #' Controlled terminology mappings #' #' @description @@ -32,7 +86,7 @@ #' #' @importFrom rlang .data #' @keywords internal -ct_mappings <- function(ct, from = c("collected_value", "term_synonyms"), to = "term_value") { +ct_mappings <- function(ct, from = ct_vars("from"), to = ct_vars("to")) { # TODO: Assertions and memoisation. @@ -85,13 +139,13 @@ ct_map <- function(x, ct = NULL, cl = NULL, - from = c("collected_value", "term_synonyms"), - to = "term_value") { + from = ct_vars("from"), + to = ct_vars("to")) { ct %||% return(x) - cl <- cl %||% unique(ct$codelist_code) - ct <- dplyr::filter(ct, .data$codelist_code %in% cl) + cl <- cl %||% unique(ct[[ct_vars("cl")]]) + ct <- dplyr::filter(ct, .data[[ct_vars("cl")]] %in% cl) mappings <- ct_mappings(ct, from = from, to = to) recode( @@ -102,3 +156,119 @@ ct_map <- ) } + +#' Read in a controlled terminology +#' +#' [read_ct()] imports a controlled terminology specification data set as a +#' [tibble][tibble::tibble-package]. +#' +#' @param file A path to a file containing a controlled terminology +#' specification data set. The following are expected of this file: +#' +#' - The file is expected to be a CSV file; +#' - The file is expected to contain a first row of column names; +#' - This minimal set of variables is expected: `r knitr::combine_words(ct_vars())`. +#' +#' @returns A [tibble][tibble::tibble-package] with a controlled terminology +#' specification. +#' +#' @export +read_ct <- function(file = stop("`file` must be specified")) { + + # TODO: Until we have a more precise specification of the controlled + # terminology, we just read all columns as character. We assert nonetheless + # for the existence of the ct variables: `collected_value`, `term_synonyms` and + # `term_value` (provided by the helper `ct_vars()`). + ct <- readr::read_csv(file = file, col_types = "c") + admiraldev::assert_data_frame(arg = ct, required_vars = rlang::syms(ct_vars())) + + ct +} + +#' Find the path to an example controlled terminology file +#' +#' [ct_example()] resolves the local path to an example controlled +#' terminology file. +#' +#' @param example A string with either the basename, file name, or relative path +#' to a controlled terminology file bundled with `{stdm.oak}`, see examples. +#' +#' +#' @examples +#' # Get the local path to controlled terminology example file 01 +#' # Using the basename only: +#' ct_example("ct-01-cm") +#' +#' # Using the file name: +#' ct_example("ct-01-cm.csv") +#' +#' # Using the relative path: +#' ct_example("ct/ct-01-cm.csv") +#' +#' # If no example is provided it returns a vector of possible choices. +#' ct_example() +#' +#' @export +ct_example <- function(example) { + # If no example is requested, then return all available files. + if (missing(example)) { + ct_path <- system.file("ct", package = "sdtm.oak", mustWork = TRUE) + ct_files <- list.files(ct_path, pattern = "*.csv") + return(ct_files) + } + + # Otherwise, resolve the local path to the example requested. + admiraldev::assert_character_scalar(example, optional = TRUE) + base_name <- tools::file_path_sans_ext(basename(example)) + path <- file.path("ct", paste0(base_name, ".csv")) + local_path <- system.file(path, package = "sdtm.oak") + + if (identical(local_path, "")) { + stop( + glue::glue( + "'{example}' does not match any ct files. Run `ct_example()` for options." + ), + call. = FALSE + ) + + } else { + local_path <- + system.file(path, package = "sdtm.oak", mustWork = TRUE) + return(local_path) + } +} + +#' Read an example controlled terminology specification +#' +#' [read_ct_example()] imports one of the bundled controlled terminology +#' specification data sets as a [tibble][tibble::tibble-package] into R. +#' +#' @param example The file name of a controlled terminology data set bundled +#' with `{stdm.oak}`, run `read_ct_example()` for available example files. +#' +#' @returns A [tibble][tibble::tibble-package] with a controlled terminology +#' specification data set. +#' +#' @examples +#' # Leave the `example` parameter as missing for available example files. +#' read_ct_example() +#' +#' # Read an example ct file. +#' read_ct_example("ct-01-cm.csv") +#' +#' # You may omit the file extension. +#' read_ct_example("ct-01-cm") +#' +#' @export +read_ct_example <- function(example) { + + if (missing(example)) { + return(ct_example()) + } else { + admiraldev::assert_character_scalar(example) + } + + path <- ct_example(example) + read_ct(file = path) + +} diff --git a/inst/ct/README.md b/inst/ct/README.md new file mode 100644 index 00000000..58433341 --- /dev/null +++ b/inst/ct/README.md @@ -0,0 +1,22 @@ +# Controlled terminology examples + +## Introduction + +The folder inst/ct is meant to hold examples of controlled terminology +specifications. These example files should be plain CSV files. + +If you're adding new example files please follow the naming convention: +`ct-
-.csv`, where `
` is a simple numerical identifier +for the example, i.e. 01, 02, etc.. The `` is a short keyword +providing a simple contextual annotation for the controlled terminology. + +For example, `ct-01-cm.csv` is an example of a controlled terminology +specification data set: + +- `01`: indicates that it is the first example +- `cm`: suggests that this data set is related to the Concomitant Medication +domain. + +## Examples + +- `ct-01-cm.csv`: TBD. diff --git a/inst/ct/ct-01-cm.csv b/inst/ct/ct-01-cm.csv new file mode 100644 index 00000000..bf231e7e --- /dev/null +++ b/inst/ct/ct-01-cm.csv @@ -0,0 +1,35 @@ +codelist_code,term_code,CodedData,term_value,collected_value,term_preferred_term,term_synonyms,raw_codelist +C71113,C25473,QD,QD,QD (Every Day),Daily,/day; Daily; Per Day,FREQ_CV1 +C71113,C64496,BID,BID,BID (Twice a Day),Twice Daily,BD; Twice per day,FREQ_CV1 +C71113,C64499,PRN,PRN,PRN (As Needed),As Needed,As needed,FREQ_CV1 +C71113,C64516,Q2H,Q2H,Q2H (Every 2 Hours),Every Two Hours,Every 2 hours,FREQ_CV1 +C71113,C64530,QID,QID,QID (4 Times a Day),Four Times Daily,4 times per day,FREQ_CV1 +C66726,C25158,CAPSULE,CAPSULE,Capsule,Capsule Dosage Form,cap,FRM_CV1 +C66726,C25394,PILL,PILL,Pill,Pill Dosage Form,NA,FRM_CV1 +C66726,C29167,LOTION,LOTION,Lotion,Lotion Dosage Form,NA,FRM_CV1 +C66726,C42887,AEROSOL,AEROSOL,Aerosol,Aerosol Dosage Form,aer,FRM_CV1 +C66726,C42944,INHALANT,INHALANT,Inhalant,Inhalant Dosage Form,NA,FRM_CV1 +C66726,C42946,INJECTION,INJECTION,Injection,Injectable Dosage Form,NA,FRM_CV1 +C66726,C42953,LIQUID,LIQUID,Liquid,Liquid Dosage Form,NA,FRM_CV1 +C66726,C42998,TABLET,TABLET,Tablet,Tablet Dosage Form,tab,FRM_CV1 +C66742,C49488,Y,Y,Yes,Yes,Yes,NY_CV1 +C66729,C28161,INTRAMUSCULAR,INTRAMUSCULAR,IM (Intramuscular),Intramuscular Route of Administration,NA,ROUTE_CV1 +C66729,C38210,EPIDURAL,EPIDURAL,EP (Epidural),Epidural Route of Administration,NA,ROUTE_CV1 +C66729,C38222,INTRA-ARTERIAL,INTRA-ARTERIAL,IA (Intra-arterial),Intraarterial Route of Administration,NA,ROUTE_CV1 +C66729,C38223,INTRA-ARTICULAR,INTRA-ARTICULAR,IJ (Intra-articular),Intraarticular Route of Administration,NA,ROUTE_CV1 +C66729,C38287,OPHTHALMIC,OPHTHALMIC,OP (Ophthalmic),Ophthalmic Route of Administration,NA,ROUTE_CV1 +C66729,C38288,ORAL,ORAL,PO (Oral),Oral Route of Administration,Intraoral Route of Administration; PO,ROUTE_CV1 +C66729,C38305,TRANSDERMAL,TRANSDERMAL,DE (Transdermal),Transdermal Route of Administration,NA,ROUTE_CV1 +C66729,C38311,UNKNOWN,UNKNOWN,Unknown,Unknown Route of Administration,NA,ROUTE_CV1 +C71620,C25613,%,%,%,Percentage,Percentage,UNIT_CV1 +C71620,C28253,MG,mg,mg,Milligram,Milligram,UNIT_CV1 +C71620,C28254,ML,mL,mL,Milliliter,cm3; Milliliter,UNIT_CV1 +C71620,C48155,G,g,g,Gram,Gram,UNIT_CV1 +C71620,C48480,CAPSULE,CAPSULE,Capsule,Capsule Dosing Unit,cap; Capsule Dosing Unit,UNIT_CV1 +C71620,C48542,TABLET,TABLET,Tablet,Tablet Dosing Unit,tab; Tablet Dosing Unit,UNIT_CV1 +C71620,C48579,IU,IU,IU,International Unit,IE; International Unit,UNIT_CV1 +C71620,C28254,ML,mL,mL,Milliliter,cm3; Milliliter,UNIT_CV5 +C66728,C25629,BEFORE,BEFORE,Prior,Prior,,NA +C66728,C53279,ONGOING,ONGOING,Continue,Continue,Continuous,NA +C66734,C49568,CM,CM,Concomitant Medication Domain,Concomitant Medication Domain,Concomitant/Prior Medications,NA +,,,,,,, \ No newline at end of file diff --git a/man/ct_map.Rd b/man/ct_map.Rd index 847d8a77..07581ee8 100644 --- a/man/ct_map.Rd +++ b/man/ct_map.Rd @@ -4,13 +4,7 @@ \alias{ct_map} \title{Recode according to controlled terminology} \usage{ -ct_map( - x, - ct = NULL, - cl = NULL, - from = c("collected_value", "term_synonyms"), - to = "term_value" -) +ct_map(x, ct = NULL, cl = NULL, from = ct_vars("from"), to = ct_vars("to")) } \arguments{ \item{x}{A character vector of terms to be recoded following a controlled diff --git a/man/ct_mappings.Rd b/man/ct_mappings.Rd index 2d7db0f5..3190b340 100644 --- a/man/ct_mappings.Rd +++ b/man/ct_mappings.Rd @@ -4,11 +4,7 @@ \alias{ct_mappings} \title{Controlled terminology mappings} \usage{ -ct_mappings( - ct, - from = c("collected_value", "term_synonyms"), - to = "term_value" -) +ct_mappings(ct, from = ct_vars("from"), to = ct_vars("to")) } \arguments{ \item{ct}{Controlled terminology specification as a diff --git a/man/ct_vars.Rd b/man/ct_vars.Rd new file mode 100644 index 00000000..ee786241 --- /dev/null +++ b/man/ct_vars.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ct.R +\name{ct_vars} +\alias{ct_vars} +\title{Controlled terminology variables} +\usage{ +ct_vars(set = c("all", "cl", "from", "to")) +} +\arguments{ +\item{set}{A scalar character (string), one of: \code{"all"} (default), \code{"cl"}, +\code{"from"} or \code{"to"}.} +} +\description{ +\code{\link[=ct_vars]{ct_vars()}} returns the mandatory variables to be present in a data set +representing a controlled terminology. By default, it returns all required +variables. + +If only the subset of variables used for matching terms are needed, then +request this subset of variables by passing the argument value \code{"from"}. If +only the mapping-to variable is to be requested, then simply pass \code{"to"}. If +only the code-list code variable name is needed then pass \code{"cl"}. +} +\examples{ +# These two calls are equivalent and return all required variables in a +# controlled terminology data set. +sdtm.oak:::ct_vars() +sdtm.oak:::ct_vars("all") + +# "Codelist code" variable name. +sdtm.oak:::ct_vars("cl") + +# "From" variables +sdtm.oak:::ct_vars("from") + +# The "to" variable. +sdtm.oak:::ct_vars("to") + +} +\keyword{internal} diff --git a/man/read_ct.Rd b/man/read_ct.Rd new file mode 100644 index 00000000..35b4d6c7 --- /dev/null +++ b/man/read_ct.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ct.R +\name{read_ct} +\alias{read_ct} +\title{Read in a controlled terminology} +\usage{ +read_ct(file = stop("`file` must be specified")) +} +\arguments{ +\item{file}{A path to a file containing a controlled terminology +specification data set. The following are expected of this file: +\itemize{ +\item The file is expected to be a CSV file; +\item The file is expected to contain a first row of column names; +\item This minimal set of variables is expected: codelist_code, collected_value, term_synonyms, and term_value. +}} +} +\value{ +A \link[tibble:tibble-package]{tibble} with a controlled terminology +specification. +} +\description{ +\code{\link[=read_ct]{read_ct()}} imports a controlled terminology specification data set as a +\link[tibble:tibble-package]{tibble}. +} diff --git a/man/read_ct_example.Rd b/man/read_ct_example.Rd new file mode 100644 index 00000000..f365182b --- /dev/null +++ b/man/read_ct_example.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ct.R +\name{read_ct_example} +\alias{read_ct_example} +\title{Read an example controlled terminology specification} +\usage{ +read_ct_example(example) +} +\arguments{ +\item{example}{The file name of a controlled terminology data set bundled +with \code{{stdm.oak}}, run \code{read_ct_example()} for available example files.} +} +\value{ +A \link[tibble:tibble-package]{tibble} with a controlled terminology +specification data set. +} +\description{ +\code{\link[=read_ct_example]{read_ct_example()}} imports one of the bundled controlled terminology +specification data sets as a \link[tibble:tibble-package]{tibble} into R. +} +\examples{ +# Leave the `example` parameter as missing for available example files. +read_ct_example() + +# Read an example ct file. +read_ct_example("ct-01-cm.csv") + +# You may omit the file extension. +read_ct_example("ct-01-cm") + +} From dfd77101938c871b024ed0085d7ae24d808ac439 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 27 Mar 2024 21:56:14 +0000 Subject: [PATCH 21/80] Bring `hardcode*()` and `assign*()` related assertions closer to user calling functions --- R/assign.R | 26 ++++++++++++++++++-------- R/hardcode.R | 39 ++++++++++++++++++++++++++------------- man/harcode.Rd | 6 +++--- 3 files changed, 47 insertions(+), 24 deletions(-) diff --git a/R/assign.R b/R/assign.R index 9e82f254..c6dee742 100644 --- a/R/assign.R +++ b/R/assign.R @@ -7,14 +7,6 @@ sdtm_assign <- function(raw_dat, ct = NULL, cl = NULL) { - admiraldev::assert_character_scalar(raw_var) - admiraldev::assert_character_scalar(tgt_var) - admiraldev::assert_character_vector(id_vars) - assertthat::assert_that(contains_oak_id_vars(id_vars), - msg = "`id_vars` must include the oak id vars.") - admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) - admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) - # Recode the raw variable following terminology. tgt_val <- ct_map(raw_dat[[raw_var]], ct = ct, cl = cl) @@ -139,6 +131,15 @@ assign_no_ct <- function(raw_dat, tgt_var, tgt_dat = NULL, id_vars = oak_id_vars()) { + + admiraldev::assert_character_scalar(raw_var) + admiraldev::assert_character_scalar(tgt_var) + admiraldev::assert_character_vector(id_vars) + assertthat::assert_that(contains_oak_id_vars(id_vars), + msg = "`id_vars` must include the oak id vars.") + admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) + admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) + sdtm_assign( raw_dat = raw_dat, raw_var = raw_var, @@ -158,6 +159,15 @@ assign_ct <- function(raw_dat, cl, tgt_dat = NULL, id_vars = oak_id_vars()) { + + admiraldev::assert_character_scalar(raw_var) + admiraldev::assert_character_scalar(tgt_var) + admiraldev::assert_character_vector(id_vars) + assertthat::assert_that(contains_oak_id_vars(id_vars), + msg = "`id_vars` must include the oak id vars.") + admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) + admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) + sdtm_assign( raw_dat = raw_dat, raw_var = raw_var, diff --git a/R/hardcode.R b/R/hardcode.R index de50482b..a051e7af 100644 --- a/R/hardcode.R +++ b/R/hardcode.R @@ -9,16 +9,6 @@ sdtm_hardcode <- function(raw_dat, ct = NULL, cl = NULL) { - admiraldev::assert_character_scalar(raw_var) - admiraldev::assert_character_scalar(tgt_var) - assertthat::assert_that(assertthat::is.scalar(tgt_val), - msg = "`tgt_val` must be a scalar value.") - admiraldev::assert_character_vector(id_vars) - assertthat::assert_that(contains_oak_id_vars(id_vars), - msg = "`id_vars` must include the oak id vars.") - admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) - admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) - # Recode the hardcoded value following terminology. tgt_val <- ct_map(tgt_val, ct = ct, cl = cl) @@ -120,6 +110,17 @@ hardcode_no_ct <- function(raw_dat, tgt_val, tgt_dat = NULL, id_vars = oak_id_vars()) { + + admiraldev::assert_character_scalar(raw_var) + admiraldev::assert_character_scalar(tgt_var) + assertthat::assert_that(assertthat::is.scalar(tgt_val), + msg = "`tgt_val` must be a scalar value.") + admiraldev::assert_character_vector(id_vars) + assertthat::assert_that(contains_oak_id_vars(id_vars), + msg = "`id_vars` must include the oak id vars.") + admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) + admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) + sdtm_hardcode( raw_dat = raw_dat, raw_var = raw_var, @@ -134,12 +135,24 @@ hardcode_no_ct <- function(raw_dat, #' @rdname harcode hardcode_ct <- function(raw_dat, raw_var, + ct, + cl, tgt_var, tgt_val, tgt_dat = NULL, - id_vars = oak_id_vars(), - ct, - cl) { + id_vars = oak_id_vars() + ) { + + admiraldev::assert_character_scalar(raw_var) + admiraldev::assert_character_scalar(tgt_var) + assertthat::assert_that(assertthat::is.scalar(tgt_val), + msg = "`tgt_val` must be a scalar value.") + admiraldev::assert_character_vector(id_vars) + assertthat::assert_that(contains_oak_id_vars(id_vars), + msg = "`id_vars` must include the oak id vars.") + admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) + admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) + sdtm_hardcode( raw_dat = raw_dat, raw_var = raw_var, diff --git a/man/harcode.Rd b/man/harcode.Rd index 991979f5..85faa83d 100644 --- a/man/harcode.Rd +++ b/man/harcode.Rd @@ -18,12 +18,12 @@ hardcode_no_ct( hardcode_ct( raw_dat, raw_var, + ct, + cl, tgt_var, tgt_val, tgt_dat = NULL, - id_vars = oak_id_vars(), - ct, - cl + id_vars = oak_id_vars() ) } \arguments{ From 6652aae7b60a93f3f10920270e1d1ad00c67f087 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 27 Mar 2024 21:56:53 +0000 Subject: [PATCH 22/80] Add lagging behind Rd for `ct_example()` --- man/ct_example.Rd | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 man/ct_example.Rd diff --git a/man/ct_example.Rd b/man/ct_example.Rd new file mode 100644 index 00000000..f7e018a9 --- /dev/null +++ b/man/ct_example.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ct.R +\name{ct_example} +\alias{ct_example} +\title{Find the path to an example controlled terminology file} +\usage{ +ct_example(example) +} +\arguments{ +\item{example}{A string with either the basename, file name, or relative path +to a controlled terminology file bundled with \code{{stdm.oak}}, see examples.} +} +\description{ +\code{\link[=ct_example]{ct_example()}} resolves the local path to an example controlled +terminology file. +} +\examples{ +# Get the local path to controlled terminology example file 01 +# Using the basename only: +ct_example("ct-01-cm") + +# Using the file name: +ct_example("ct-01-cm.csv") + +# Using the relative path: +ct_example("ct/ct-01-cm.csv") + +# If no example is provided it returns a vector of possible choices. +ct_example() + +} From 59bcc71d0a9343287998370b47384153d2d47c66 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 27 Mar 2024 22:39:12 +0000 Subject: [PATCH 23/80] Add `assert_ct()` --- R/ct.R | 55 +++++++++++++++++++++++++++++++++++++--------- man/assert_ct.Rd | 40 +++++++++++++++++++++++++++++++++ man/ct_mappings.Rd | 13 ++++++----- 3 files changed, 93 insertions(+), 15 deletions(-) create mode 100644 man/assert_ct.Rd diff --git a/R/ct.R b/R/ct.R index 31abdc43..a27d3fe3 100644 --- a/R/ct.R +++ b/R/ct.R @@ -52,6 +52,42 @@ ct_vars <- function(set = c("all", "cl", "from", "to")) { } +#' Assert a controlled terminology specification +#' +#' [assert_ct()] will check whether `ct` is a data frame and +#' if it contains the variables: `r knitr::combine_words(ct_vars())`. +#' +#' @param ct A data frame to be asserted as a controlled terminology data set. +#' +#' @returns The function throws an error if `ct` is not a valid controlled +#' terminology data set; otherwise, `ct` is returned invisibly. +#' +#' @examples +#' # If `ct` is a valid controlled terminology then it is returned invisibly. +#' ct_01 <- read_ct_example("ct-01-cm") +#' all.equal(ct_01, sdtm.oak:::assert_ct(ct_01)) +#' +#' # A minimal set of variables needs to be present in `ct` for it to pass the +#' # assertion; `sdtm.oak:::ct_vars()` defines their names. +#' (req_vars <- sdtm.oak:::ct_vars()) +#' +#' # Other (facultative) variables also present in the controlled terminology +#' # example. +#' (opt_vars <- setdiff(colnames(ct_01), req_vars)) +#' +#' # With only the mandatory variables, the assertion still passes. +#' assert_ct(ct_01[req_vars]) +#' +#' # Not having the required variables results in an error. +#' try(assert_ct(ct_01[opt_vars])) +#' +#' @keywords internal +assert_ct <- function(ct) { + + admiraldev::assert_data_frame(arg = ct, required_vars = rlang::syms(ct_vars())) + invisible(ct) +} + #' Controlled terminology mappings #' #' @description @@ -76,13 +112,16 @@ ct_vars <- function(set = c("all", "cl", "from", "to")) { #' `to`, indicating the mapping of values, one per row. #' #' @examples -#' # example code -#' -#' -#' -#' +#' # Read in a bundled controlled terminology spec example (ex. 01). +#' (ct_01 <- read_ct_example("ct-01-cm")) #' +#' # Generate mappings from the terminology specification. +#' sdtm.oak:::ct_mappings(ct = ct_01) #' +#' # Take a glimpse at those mappings where an actual recoding happens. +#' sdtm.oak:::ct_mappings(ct = ct_01) |> +#' dplyr::filter(from != to) |> +#' print(n = 20) #' #' @importFrom rlang .data #' @keywords internal @@ -175,12 +214,8 @@ ct_map <- #' @export read_ct <- function(file = stop("`file` must be specified")) { - # TODO: Until we have a more precise specification of the controlled - # terminology, we just read all columns as character. We assert nonetheless - # for the existence of the ct variables: `collected_value`, `term_synonyms` and - # `term_value` (provided by the helper `ct_vars()`). ct <- readr::read_csv(file = file, col_types = "c") - admiraldev::assert_data_frame(arg = ct, required_vars = rlang::syms(ct_vars())) + assert_ct(ct) ct } diff --git a/man/assert_ct.Rd b/man/assert_ct.Rd new file mode 100644 index 00000000..9525f816 --- /dev/null +++ b/man/assert_ct.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ct.R +\name{assert_ct} +\alias{assert_ct} +\title{Assert a controlled terminology specification} +\usage{ +assert_ct(ct) +} +\arguments{ +\item{ct}{A data frame to be asserted as a controlled terminology data set.} +} +\value{ +The function throws an error if \code{ct} is not a valid controlled +terminology data set; otherwise, \code{ct} is returned invisibly. +} +\description{ +\code{\link[=assert_ct]{assert_ct()}} will check whether \code{ct} is a data frame and +if it contains the variables: codelist_code, collected_value, term_synonyms, and term_value. +} +\examples{ +# If `ct` is a valid controlled terminology then it is returned invisibly. +ct_01 <- read_ct_example("ct-01-cm") +all.equal(ct_01, sdtm.oak:::assert_ct(ct_01)) + +# A minimal set of variables needs to be present in `ct` for it to pass the +# assertion; `sdtm.oak:::ct_vars()` defines their names. +(req_vars <- sdtm.oak:::ct_vars()) + +# Other (facultative) variables also present in the controlled terminology +# example. +(opt_vars <- setdiff(colnames(ct_01), req_vars)) + +# With only the mandatory variables, the assertion still passes. +assert_ct(ct_01[req_vars]) + +# Not having the required variables results in an error. +try(assert_ct(ct_01[opt_vars])) + +} +\keyword{internal} diff --git a/man/ct_mappings.Rd b/man/ct_mappings.Rd index 3190b340..35787340 100644 --- a/man/ct_mappings.Rd +++ b/man/ct_mappings.Rd @@ -32,13 +32,16 @@ two \code{from} columns, the first column indicated in \code{from} takes precede and only that mapping is retained in the controlled terminology map. } \examples{ -# example code - - - - +# Read in a bundled controlled terminology spec example (ex. 01). +(ct_01 <- read_ct_example("ct-01-cm")) +# Generate mappings from the terminology specification. +sdtm.oak:::ct_mappings(ct = ct_01) +# Take a glimpse at those mappings where an actual recoding happens. +sdtm.oak:::ct_mappings(ct = ct_01) |> + dplyr::filter(from != to) |> + print(n = 20) } \keyword{internal} From 7f9f388d73217e93500811329dce8cbb25c5f96e Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 27 Mar 2024 22:41:32 +0000 Subject: [PATCH 24/80] Add ct assertions --- R/ct.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/ct.R b/R/ct.R index a27d3fe3..2e5d5802 100644 --- a/R/ct.R +++ b/R/ct.R @@ -127,7 +127,7 @@ assert_ct <- function(ct) { #' @keywords internal ct_mappings <- function(ct, from = ct_vars("from"), to = ct_vars("to")) { - # TODO: Assertions and memoisation. + assert_ct(ct) cols <- c(to, from) @@ -182,6 +182,7 @@ ct_map <- to = ct_vars("to")) { ct %||% return(x) + assert_ct(ct) cl <- cl %||% unique(ct[[ct_vars("cl")]]) ct <- dplyr::filter(ct, .data[[ct_vars("cl")]] %in% cl) From 4ed5c413100f1ea78bc075bc134725b1bd5d0a83 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 1 Apr 2024 15:31:10 +0100 Subject: [PATCH 25/80] Remove R/.gitkeep As it is no longer needed. --- R/.gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 R/.gitkeep diff --git a/R/.gitkeep b/R/.gitkeep deleted file mode 100644 index e69de29b..00000000 From ca26d22ca8320b2e69889cf5e7204ce52f68b072 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 1 Apr 2024 15:45:56 +0100 Subject: [PATCH 26/80] Add unit tests for `ct_vars()` --- tests/testthat/test-ct.R | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 tests/testthat/test-ct.R diff --git a/tests/testthat/test-ct.R b/tests/testthat/test-ct.R new file mode 100644 index 00000000..19d66835 --- /dev/null +++ b/tests/testthat/test-ct.R @@ -0,0 +1,37 @@ +test_that("ct_vars works as expected", { + + expect_equal(ct_vars(), + c( + "codelist_code", + "collected_value", + "term_synonyms", + "term_value" + )) + + expect_equal( + ct_vars(set = "all"), + c( + "codelist_code", + "collected_value", + "term_synonyms", + "term_value" + ) + ) + + expect_equal(ct_vars(set = "cl"), + "codelist_code") + + expect_equal(ct_vars(set = "from"), + c("collected_value", + "term_synonyms")) + + expect_equal(ct_vars(set = "to"), "term_value") +}) + +test_that("ct_vars fails with invalid input choice", { + + expect_error(ct_vars("foo")) + expect_error(ct_vars(1L)) + expect_error(ct_vars(FALSE)) + expect_error(ct_vars(NULL)) +}) From 0456d553634d937135bf689aad053322cad1ff3b Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 1 Apr 2024 16:26:06 +0100 Subject: [PATCH 27/80] Update dependencies --- DESCRIPTION | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6c4a0d52..d9717bc4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -44,7 +44,9 @@ Imports: rlang (>= 0.4.4), stringr (>= 1.4.0), tibble, - vctrs + vctrs, + readr, + glue Suggests: knitr, rmarkdown, From 0e1eab4b6f198a98ad732e8f691c91aedeaebb91 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 1 Apr 2024 16:26:57 +0100 Subject: [PATCH 28/80] Export `ct_vars()` Export `ct_vars()` such that we can cross-reference it from other functions' documentation. --- NAMESPACE | 1 + R/ct.R | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 4afae539..d7e8e203 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(clear_cache) export(create_iso8601) export(ct_example) export(ct_map) +export(ct_vars) export(derive_study_day) export(fmt_cmp) export(hardcode_ct) diff --git a/R/ct.R b/R/ct.R index 2e5d5802..b4cc7333 100644 --- a/R/ct.R +++ b/R/ct.R @@ -29,6 +29,7 @@ #' sdtm.oak:::ct_vars("to") #' #' @keywords internal +#' @export ct_vars <- function(set = c("all", "cl", "from", "to")) { admiraldev::assert_character_vector(set) @@ -76,10 +77,10 @@ ct_vars <- function(set = c("all", "cl", "from", "to")) { #' (opt_vars <- setdiff(colnames(ct_01), req_vars)) #' #' # With only the mandatory variables, the assertion still passes. -#' assert_ct(ct_01[req_vars]) +#' sdtm.oak:::assert_ct(ct_01[req_vars]) #' #' # Not having the required variables results in an error. -#' try(assert_ct(ct_01[opt_vars])) +#' try(sdtm.oak:::assert_ct(ct_01[opt_vars])) #' #' @keywords internal assert_ct <- function(ct) { From 84a4f7dde10d05eb180eac45035a3676f81cdf94 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 1 Apr 2024 16:27:30 +0100 Subject: [PATCH 29/80] Update `assert_ct()` docs --- man/assert_ct.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/assert_ct.Rd b/man/assert_ct.Rd index 9525f816..8e79fdb8 100644 --- a/man/assert_ct.Rd +++ b/man/assert_ct.Rd @@ -31,10 +31,10 @@ all.equal(ct_01, sdtm.oak:::assert_ct(ct_01)) (opt_vars <- setdiff(colnames(ct_01), req_vars)) # With only the mandatory variables, the assertion still passes. -assert_ct(ct_01[req_vars]) +sdtm.oak:::assert_ct(ct_01[req_vars]) # Not having the required variables results in an error. -try(assert_ct(ct_01[opt_vars])) +try(sdtm.oak:::assert_ct(ct_01[opt_vars])) } \keyword{internal} From 7cf1072b529eb9964cfa43c341667d21f6b3e2fc Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 1 Apr 2024 16:27:59 +0100 Subject: [PATCH 30/80] Clarify `assign_ct()`/`assign_no_ct()` doc --- R/assign.R | 46 +++++++++++++++++++++++++++++++--------------- man/assign.Rd | 48 +++++++++++++++++++++++++++++++++--------------- 2 files changed, 64 insertions(+), 30 deletions(-) diff --git a/R/assign.R b/R/assign.R index c6dee742..806a4b0b 100644 --- a/R/assign.R +++ b/R/assign.R @@ -36,24 +36,36 @@ sdtm_assign <- function(raw_dat, #' Derive an SDTM variable #' #' @description -#' - [assign_no_ct()] maps a variable in a source dataset to a target SDTM +#' - [assign_no_ct()] maps a variable in a raw dataset to a target SDTM #' variable that has no terminology restrictions. #' -#' - [assign_ct()] maps a variable in a source dataset to a target SDTM variable +#' - [assign_ct()] maps a variable in a raw dataset to a target SDTM variable #' following controlled terminology recoding. #' -#' @param raw_dat The raw dataset. -#' @param raw_var The raw variable. -#' @param tgt_var The target SDTM variable. -#' @param tgt_dat Target dataset. -#' @param id_vars If `target_dataset` is different than `raw_dataset`, -#' then this parameter defines keys to use in the join between `raw_dataset` -#' and `target_dataset`. -#' @param ct Study controlled terminology specification. -#' @param cl A codelist code indicating which -#' subset of the controlled terminology to apply in the derivation. +#' @param raw_dat The raw dataset (dataframe); must include the +#' variables passed in `id_vars` and `raw_var`. +#' @param raw_var The raw variable: a single string indicating the name of the +#' raw variable in `raw_dat`. +#' @param tgt_var The target SDTM variable: a single string indicating the name +#' of variable to be derived. +#' @param ct Study controlled terminology specification: a dataframe with a +#' minimal set of columns, see [ct_vars()] for details. +#' @param cl A code-list code indicating which subset of the controlled +#' terminology to apply in the derivation. +#' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by +#' the variables indicated in `id_vars`. This parameter is optional, see +#' section Value for how the output changes depending on this argument value. +#' @param id_vars Key variables to be used in the join between the raw dataset +#' (`raw_dat`) and the target data set (`raw_dat`). #' -#' @returns The target dataset with the derived variable `target_sdtm_variable`. +#' @returns The returned data set depends on the value of `tgt_dat`: +#' - If no target dataset is supplied, meaning that `tgt_dat` defaults to +#' `NULL`, then the returned data set is `raw_dat` selected for the variables +#' indicated in `id_vars` and a new extra column: the derived variable, as +#' indicated in `tgt_var`. +#' - If the target dataset is provided, then it is merged with the raw data set +#' `raw_dat` by the variables indicated in `id_vars`, with a new column: the +#' derived variable, as indicated in `tgt_var`. #' #' @examples #' @@ -113,12 +125,16 @@ sdtm_assign <- function(raw_dat, #' ) #' ) #' +#' # Controlled terminology specification +#' (ct <- read_ct_example("ct-01-cm")) +#' #' assign_ct( #' raw_dat = md1, #' raw_var = "MDIND", #' tgt_var = "CMINDC", -#' tgt_dat = cm_inter, -#' id_vars = c("oak_id","raw_source","patient_number") +#' ct = ct, +#' cl = "C66729", +#' tgt_dat = cm_inter #' ) #' #' @name assign diff --git a/man/assign.Rd b/man/assign.Rd index 7b3910f1..0840a924 100644 --- a/man/assign.Rd +++ b/man/assign.Rd @@ -25,31 +25,45 @@ assign_ct( ) } \arguments{ -\item{raw_dat}{The raw dataset.} +\item{raw_dat}{The raw dataset (dataframe); must include the +variables passed in \code{id_vars} and \code{raw_var}.} -\item{raw_var}{The raw variable.} +\item{raw_var}{The raw variable: a single string indicating the name of the +raw variable in \code{raw_dat}.} -\item{tgt_var}{The target SDTM variable.} +\item{tgt_var}{The target SDTM variable: a single string indicating the name +of variable to be derived.} -\item{tgt_dat}{Target dataset.} +\item{tgt_dat}{Target dataset: a data frame to be merged against \code{raw_dat} by +the variables indicated in \code{id_vars}. This parameter is optional, see +section Value for how the output changes depending on this argument value.} -\item{id_vars}{If \code{target_dataset} is different than \code{raw_dataset}, -then this parameter defines keys to use in the join between \code{raw_dataset} -and \code{target_dataset}.} +\item{id_vars}{Key variables to be used in the join between the raw dataset +(\code{raw_dat}) and the target data set (\code{raw_dat}).} -\item{ct}{Study controlled terminology specification.} +\item{ct}{Study controlled terminology specification: a dataframe with a +minimal set of columns, see \code{\link[=ct_vars]{ct_vars()}} for details.} -\item{cl}{A codelist code indicating which -subset of the controlled terminology to apply in the derivation.} +\item{cl}{A code-list code indicating which subset of the controlled +terminology to apply in the derivation.} } \value{ -The target dataset with the derived variable \code{target_sdtm_variable}. +The returned data set depends on the value of \code{tgt_dat}: +\itemize{ +\item If no target dataset is supplied, meaning that \code{tgt_dat} defaults to +\code{NULL}, then the returned data set is \code{raw_dat} selected for the variables +indicated in \code{id_vars} and a new extra column: the derived variable, as +indicated in \code{tgt_var}. +\item If the target dataset is provided, then it is merged with the raw data set +\code{raw_dat} by the variables indicated in \code{id_vars}, with a new column: the +derived variable, as indicated in \code{tgt_var}. +} } \description{ \itemize{ -\item \code{\link[=assign_no_ct]{assign_no_ct()}} maps a variable in a source dataset to a target SDTM +\item \code{\link[=assign_no_ct]{assign_no_ct()}} maps a variable in a raw dataset to a target SDTM variable that has no terminology restrictions. -\item \code{\link[=assign_ct]{assign_ct()}} maps a variable in a source dataset to a target SDTM variable +\item \code{\link[=assign_ct]{assign_ct()}} maps a variable in a raw dataset to a target SDTM variable following controlled terminology recoding. } } @@ -111,12 +125,16 @@ cm_inter <- ) ) +# Controlled terminology specification +(ct <- read_ct_example("ct-01-cm")) + assign_ct( raw_dat = md1, raw_var = "MDIND", tgt_var = "CMINDC", - tgt_dat = cm_inter, - id_vars = c("oak_id","raw_source","patient_number") + ct = ct, + cl = "C66729", + tgt_dat = cm_inter ) } From 7dff0aaf2e5fa6efa99d6bce1d0c980c2aa73863 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 1 Apr 2024 16:30:27 +0100 Subject: [PATCH 31/80] Improve grammar in doc --- R/assign.R | 4 ++-- man/assign.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/assign.R b/R/assign.R index 806a4b0b..be98cf34 100644 --- a/R/assign.R +++ b/R/assign.R @@ -60,8 +60,8 @@ sdtm_assign <- function(raw_dat, #' #' @returns The returned data set depends on the value of `tgt_dat`: #' - If no target dataset is supplied, meaning that `tgt_dat` defaults to -#' `NULL`, then the returned data set is `raw_dat` selected for the variables -#' indicated in `id_vars` and a new extra column: the derived variable, as +#' `NULL`, then the returned data set is `raw_dat`, selected for the variables +#' indicated in `id_vars`, and a new extra column: the derived variable, as #' indicated in `tgt_var`. #' - If the target dataset is provided, then it is merged with the raw data set #' `raw_dat` by the variables indicated in `id_vars`, with a new column: the diff --git a/man/assign.Rd b/man/assign.Rd index 0840a924..0ff8fc0c 100644 --- a/man/assign.Rd +++ b/man/assign.Rd @@ -51,8 +51,8 @@ terminology to apply in the derivation.} The returned data set depends on the value of \code{tgt_dat}: \itemize{ \item If no target dataset is supplied, meaning that \code{tgt_dat} defaults to -\code{NULL}, then the returned data set is \code{raw_dat} selected for the variables -indicated in \code{id_vars} and a new extra column: the derived variable, as +\code{NULL}, then the returned data set is \code{raw_dat}, selected for the variables +indicated in \code{id_vars}, and a new extra column: the derived variable, as indicated in \code{tgt_var}. \item If the target dataset is provided, then it is merged with the raw data set \code{raw_dat} by the variables indicated in \code{id_vars}, with a new column: the From cb2f2e8d0e39661b828a90c3370d5fe7ac6d33df Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 1 Apr 2024 19:00:01 +0100 Subject: [PATCH 32/80] Remove last empty line from ct example file --- inst/ct/ct-01-cm.csv | 69 ++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/inst/ct/ct-01-cm.csv b/inst/ct/ct-01-cm.csv index bf231e7e..ffbe0903 100644 --- a/inst/ct/ct-01-cm.csv +++ b/inst/ct/ct-01-cm.csv @@ -1,35 +1,34 @@ -codelist_code,term_code,CodedData,term_value,collected_value,term_preferred_term,term_synonyms,raw_codelist -C71113,C25473,QD,QD,QD (Every Day),Daily,/day; Daily; Per Day,FREQ_CV1 -C71113,C64496,BID,BID,BID (Twice a Day),Twice Daily,BD; Twice per day,FREQ_CV1 -C71113,C64499,PRN,PRN,PRN (As Needed),As Needed,As needed,FREQ_CV1 -C71113,C64516,Q2H,Q2H,Q2H (Every 2 Hours),Every Two Hours,Every 2 hours,FREQ_CV1 -C71113,C64530,QID,QID,QID (4 Times a Day),Four Times Daily,4 times per day,FREQ_CV1 -C66726,C25158,CAPSULE,CAPSULE,Capsule,Capsule Dosage Form,cap,FRM_CV1 -C66726,C25394,PILL,PILL,Pill,Pill Dosage Form,NA,FRM_CV1 -C66726,C29167,LOTION,LOTION,Lotion,Lotion Dosage Form,NA,FRM_CV1 -C66726,C42887,AEROSOL,AEROSOL,Aerosol,Aerosol Dosage Form,aer,FRM_CV1 -C66726,C42944,INHALANT,INHALANT,Inhalant,Inhalant Dosage Form,NA,FRM_CV1 -C66726,C42946,INJECTION,INJECTION,Injection,Injectable Dosage Form,NA,FRM_CV1 -C66726,C42953,LIQUID,LIQUID,Liquid,Liquid Dosage Form,NA,FRM_CV1 -C66726,C42998,TABLET,TABLET,Tablet,Tablet Dosage Form,tab,FRM_CV1 -C66742,C49488,Y,Y,Yes,Yes,Yes,NY_CV1 -C66729,C28161,INTRAMUSCULAR,INTRAMUSCULAR,IM (Intramuscular),Intramuscular Route of Administration,NA,ROUTE_CV1 -C66729,C38210,EPIDURAL,EPIDURAL,EP (Epidural),Epidural Route of Administration,NA,ROUTE_CV1 -C66729,C38222,INTRA-ARTERIAL,INTRA-ARTERIAL,IA (Intra-arterial),Intraarterial Route of Administration,NA,ROUTE_CV1 -C66729,C38223,INTRA-ARTICULAR,INTRA-ARTICULAR,IJ (Intra-articular),Intraarticular Route of Administration,NA,ROUTE_CV1 -C66729,C38287,OPHTHALMIC,OPHTHALMIC,OP (Ophthalmic),Ophthalmic Route of Administration,NA,ROUTE_CV1 -C66729,C38288,ORAL,ORAL,PO (Oral),Oral Route of Administration,Intraoral Route of Administration; PO,ROUTE_CV1 -C66729,C38305,TRANSDERMAL,TRANSDERMAL,DE (Transdermal),Transdermal Route of Administration,NA,ROUTE_CV1 -C66729,C38311,UNKNOWN,UNKNOWN,Unknown,Unknown Route of Administration,NA,ROUTE_CV1 -C71620,C25613,%,%,%,Percentage,Percentage,UNIT_CV1 -C71620,C28253,MG,mg,mg,Milligram,Milligram,UNIT_CV1 -C71620,C28254,ML,mL,mL,Milliliter,cm3; Milliliter,UNIT_CV1 -C71620,C48155,G,g,g,Gram,Gram,UNIT_CV1 -C71620,C48480,CAPSULE,CAPSULE,Capsule,Capsule Dosing Unit,cap; Capsule Dosing Unit,UNIT_CV1 -C71620,C48542,TABLET,TABLET,Tablet,Tablet Dosing Unit,tab; Tablet Dosing Unit,UNIT_CV1 -C71620,C48579,IU,IU,IU,International Unit,IE; International Unit,UNIT_CV1 -C71620,C28254,ML,mL,mL,Milliliter,cm3; Milliliter,UNIT_CV5 -C66728,C25629,BEFORE,BEFORE,Prior,Prior,,NA -C66728,C53279,ONGOING,ONGOING,Continue,Continue,Continuous,NA -C66734,C49568,CM,CM,Concomitant Medication Domain,Concomitant Medication Domain,Concomitant/Prior Medications,NA -,,,,,,, \ No newline at end of file +codelist_code,term_code,CodedData,term_value,collected_value,term_preferred_term,term_synonyms,raw_codelist +C71113,C25473,QD,QD,QD (Every Day),Daily,/day; Daily; Per Day,FREQ_CV1 +C71113,C64496,BID,BID,BID (Twice a Day),Twice Daily,BD; Twice per day,FREQ_CV1 +C71113,C64499,PRN,PRN,PRN (As Needed),As Needed,As needed,FREQ_CV1 +C71113,C64516,Q2H,Q2H,Q2H (Every 2 Hours),Every Two Hours,Every 2 hours,FREQ_CV1 +C71113,C64530,QID,QID,QID (4 Times a Day),Four Times Daily,4 times per day,FREQ_CV1 +C66726,C25158,CAPSULE,CAPSULE,Capsule,Capsule Dosage Form,cap,FRM_CV1 +C66726,C25394,PILL,PILL,Pill,Pill Dosage Form,NA,FRM_CV1 +C66726,C29167,LOTION,LOTION,Lotion,Lotion Dosage Form,NA,FRM_CV1 +C66726,C42887,AEROSOL,AEROSOL,Aerosol,Aerosol Dosage Form,aer,FRM_CV1 +C66726,C42944,INHALANT,INHALANT,Inhalant,Inhalant Dosage Form,NA,FRM_CV1 +C66726,C42946,INJECTION,INJECTION,Injection,Injectable Dosage Form,NA,FRM_CV1 +C66726,C42953,LIQUID,LIQUID,Liquid,Liquid Dosage Form,NA,FRM_CV1 +C66726,C42998,TABLET,TABLET,Tablet,Tablet Dosage Form,tab,FRM_CV1 +C66742,C49488,Y,Y,Yes,Yes,Yes,NY_CV1 +C66729,C28161,INTRAMUSCULAR,INTRAMUSCULAR,IM (Intramuscular),Intramuscular Route of Administration,NA,ROUTE_CV1 +C66729,C38210,EPIDURAL,EPIDURAL,EP (Epidural),Epidural Route of Administration,NA,ROUTE_CV1 +C66729,C38222,INTRA-ARTERIAL,INTRA-ARTERIAL,IA (Intra-arterial),Intraarterial Route of Administration,NA,ROUTE_CV1 +C66729,C38223,INTRA-ARTICULAR,INTRA-ARTICULAR,IJ (Intra-articular),Intraarticular Route of Administration,NA,ROUTE_CV1 +C66729,C38287,OPHTHALMIC,OPHTHALMIC,OP (Ophthalmic),Ophthalmic Route of Administration,NA,ROUTE_CV1 +C66729,C38288,ORAL,ORAL,PO (Oral),Oral Route of Administration,Intraoral Route of Administration; PO,ROUTE_CV1 +C66729,C38305,TRANSDERMAL,TRANSDERMAL,DE (Transdermal),Transdermal Route of Administration,NA,ROUTE_CV1 +C66729,C38311,UNKNOWN,UNKNOWN,Unknown,Unknown Route of Administration,NA,ROUTE_CV1 +C71620,C25613,%,%,%,Percentage,Percentage,UNIT_CV1 +C71620,C28253,MG,mg,mg,Milligram,Milligram,UNIT_CV1 +C71620,C28254,ML,mL,mL,Milliliter,cm3; Milliliter,UNIT_CV1 +C71620,C48155,G,g,g,Gram,Gram,UNIT_CV1 +C71620,C48480,CAPSULE,CAPSULE,Capsule,Capsule Dosing Unit,cap; Capsule Dosing Unit,UNIT_CV1 +C71620,C48542,TABLET,TABLET,Tablet,Tablet Dosing Unit,tab; Tablet Dosing Unit,UNIT_CV1 +C71620,C48579,IU,IU,IU,International Unit,IE; International Unit,UNIT_CV1 +C71620,C28254,ML,mL,mL,Milliliter,cm3; Milliliter,UNIT_CV5 +C66728,C25629,BEFORE,BEFORE,Prior,Prior,,NA +C66728,C53279,ONGOING,ONGOING,Continue,Continue,Continuous,NA +C66734,C49568,CM,CM,Concomitant Medication Domain,Concomitant Medication Domain,Concomitant/Prior Medications,NA From 454b7d86c9e7ccfced2e2969b32d0e816ba87d27 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 1 Apr 2024 19:03:33 +0100 Subject: [PATCH 33/80] Add documentation to `sdtm_assign()` and ct-related unit tests Although we had discussed to keep assertions only at the user facing functions, I am getting the feeling we would miss assertions also at the internal function... because of several reasons: firstly, the internal function is more flexible having more optional parameters, which requires extra assertion logic, and also because eventually we will be checking code coverage and we will regret not having done this now. --- R/assign.R | 59 ++++++++++++++++++++++++++-- R/ct.R | 84 +++++++++++++++++++++++++++++++++++++--- man/assert_cl.Rd | 32 +++++++++++++++ man/assert_ct.Rd | 13 +++++-- man/assign.Rd | 7 +++- man/sdtm_assign.Rd | 59 ++++++++++++++++++++++++++++ tests/testthat/test-ct.R | 73 +++++++++++++++++++++++++++++++++- 7 files changed, 310 insertions(+), 17 deletions(-) create mode 100644 man/assert_cl.Rd create mode 100644 man/sdtm_assign.Rd diff --git a/R/assign.R b/R/assign.R index be98cf34..3a22702d 100644 --- a/R/assign.R +++ b/R/assign.R @@ -1,11 +1,57 @@ +#' Derive an SDTM variable +#' +#' @description +#' [sdtm_assign()] is an internal function packing the same functionality as +#' [assign_no_ct()] and [assign_ct()] together but aimed at developers only. +#' As a user please use either [assign_no_ct()] or [assign_ct()]. +#' +#' @param raw_dat The raw dataset (dataframe); must include the +#' variables passed in `id_vars` and `raw_var`. +#' @param raw_var The raw variable: a single string indicating the name of the +#' raw variable in `raw_dat`. +#' @param tgt_var The target SDTM variable: a single string indicating the name +#' of variable to be derived. +#' @param ct Study controlled terminology specification: a dataframe with a +#' minimal set of columns, see [ct_vars()] for details. This parameter is +#' optional, if left as `NULL` no controlled terminology recoding is applied. +#' @param cl A code-list code indicating which subset of the controlled +#' terminology to apply in the derivation. This parameter is optional, if left +#' as `NULL`, all possible recodings in `ct` are attempted. +#' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by +#' the variables indicated in `id_vars`. This parameter is optional, see +#' section Value for how the output changes depending on this argument value. +#' @param id_vars Key variables to be used in the join between the raw dataset +#' (`raw_dat`) and the target data set (`raw_dat`). +#' +#' @returns The returned data set depends on the value of `tgt_dat`: +#' - If no target dataset is supplied, meaning that `tgt_dat` defaults to +#' `NULL`, then the returned data set is `raw_dat`, selected for the variables +#' indicated in `id_vars`, and a new extra column: the derived variable, as +#' indicated in `tgt_var`. +#' - If the target dataset is provided, then it is merged with the raw data set +#' `raw_dat` by the variables indicated in `id_vars`, with a new column: the +#' derived variable, as indicated in `tgt_var`. +#' +#' #' @importFrom rlang := +#' @keywords internal sdtm_assign <- function(raw_dat, raw_var, tgt_var, - tgt_dat = NULL, - id_vars = oak_id_vars(), ct = NULL, - cl = NULL) { + cl = NULL, + tgt_dat = NULL, + id_vars = oak_id_vars()) { + + admiraldev::assert_character_scalar(raw_var) + admiraldev::assert_character_scalar(tgt_var) + admiraldev::assert_character_vector(id_vars) + assertthat::assert_that(contains_oak_id_vars(id_vars), + msg = "`id_vars` must include the oak id vars.") + admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) + admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) + assert_ct(ct, optional = TRUE) + assert_cl(ct = ct, cl = cl, optional = TRUE) # Recode the raw variable following terminology. tgt_val <- ct_map(raw_dat[[raw_var]], ct = ct, cl = cl) @@ -42,6 +88,10 @@ sdtm_assign <- function(raw_dat, #' - [assign_ct()] maps a variable in a raw dataset to a target SDTM variable #' following controlled terminology recoding. #' +#' - [sdtm_assign()] is an internal function packing the same functionality as +#' [assign_no_ct()] and [assign_ct()] together but aimed at developers only. +#' As a user please use either [assign_no_ct()] or [assign_ct()]. +#' #' @param raw_dat The raw dataset (dataframe); must include the #' variables passed in `id_vars` and `raw_var`. #' @param raw_var The raw variable: a single string indicating the name of the @@ -140,6 +190,7 @@ sdtm_assign <- function(raw_dat, #' @name assign NULL +#' @order 1 #' @export #' @rdname assign assign_no_ct <- function(raw_dat, @@ -165,7 +216,7 @@ assign_no_ct <- function(raw_dat, ) } - +#' @order 2 #' @export #' @rdname assign assign_ct <- function(raw_dat, diff --git a/R/ct.R b/R/ct.R index b4cc7333..4f3d3634 100644 --- a/R/ct.R +++ b/R/ct.R @@ -55,10 +55,16 @@ ct_vars <- function(set = c("all", "cl", "from", "to")) { #' Assert a controlled terminology specification #' -#' [assert_ct()] will check whether `ct` is a data frame and -#' if it contains the variables: `r knitr::combine_words(ct_vars())`. +#' @description +#' [assert_ct()] will check whether `ct` is a data frame and if it contains the +#' variables: `r knitr::combine_words(ct_vars())`. +#' +#' In addition, it will also check if the data frame is not empty (no rows), and +#' whether the columns \code{`r ct_vars('cl')`} and \code{`r ct_vars('to')`} do +#' not contain any `NA` values. #' -#' @param ct A data frame to be asserted as a controlled terminology data set. +#' @param ct A data frame to be asserted as a valid controlled terminology data +#' set. #' #' @returns The function throws an error if `ct` is not a valid controlled #' terminology data set; otherwise, `ct` is returned invisibly. @@ -83,12 +89,80 @@ ct_vars <- function(set = c("all", "cl", "from", "to")) { #' try(sdtm.oak:::assert_ct(ct_01[opt_vars])) #' #' @keywords internal -assert_ct <- function(ct) { +assert_ct <- function(ct, optional = FALSE) { + + admiraldev::assert_data_frame( + arg = ct, + required_vars = rlang::syms(ct_vars()), + optional = optional + ) + + if (!is.null(ct) && nrow(ct) == 0L) { + rlang::abort("`ct` can't be empty.") + } + + if (!is.null(ct) && any(is.na(ct[[ct_vars("cl")]]))) { + rlang::abort(glue::glue("`{ct_vars('cl')}` can't have any NA values.")) + } + + if (!is.null(ct) && any(is.na(ct[[ct_vars("to")]]))) { + rlang::abort(glue::glue("`{ct_vars('to')}` can't have any NA values.")) + } - admiraldev::assert_data_frame(arg = ct, required_vars = rlang::syms(ct_vars())) invisible(ct) } +#' Assert a code-list code +#' +#' [assert_cl()] asserts the validity of a code-list code in the context of +#' a controlled terminology specification. +#' +#' @param ct Either a data frame encoding a controlled terminology data set, or +#' `NULL`. +#' @param cl A string with a to-be asserted code-list code, or `NULL`. +#' @param optional A scalar logical, indicating whether `cl` can be `NULL` or +#' not. +#' +#' @returns The function throws an error if `cl` is not a valid code-list code +#' given the controlled terminology data set; otherwise, `cl` is returned +#' invisibly. +#' +#' @examples +#' # example code +#' +#' +#' @keywords internal +assert_cl <- function(ct, cl, optional = FALSE) { + + if (!is.null(cl)) { + admiraldev::assert_character_scalar(cl) + } + + if (is.null(cl) && !optional) { + rlang::abort("`cl` is a required parameter.") + } + + if (is.null(ct) && !is.null(cl)) { + rlang::abort("`ct` must be a valid controlled terminology if `cl` is supplied.") + } + + if (is.null(cl)) { + return(invisible(NULL)) + } + + if (!is.null(ct) && is.na(cl)) { + rlang::abort("`cl` can't be NA. Did you mean `NULL`?") + } + + if (!is.null(ct) && !is.null(cl)) { + assert_ct(ct, optional = FALSE) + cl_possibilities <- unique(ct[[ct_vars("cl")]]) + admiraldev::assert_character_scalar(cl, values = cl_possibilities) + } + + return(cl) +} + #' Controlled terminology mappings #' #' @description diff --git a/man/assert_cl.Rd b/man/assert_cl.Rd new file mode 100644 index 00000000..785c0415 --- /dev/null +++ b/man/assert_cl.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ct.R +\name{assert_cl} +\alias{assert_cl} +\title{Assert a code-list code} +\usage{ +assert_cl(ct, cl, optional = FALSE) +} +\arguments{ +\item{ct}{Either a data frame encoding a controlled terminology data set, or +\code{NULL}.} + +\item{cl}{A string with a to-be asserted code-list code, or \code{NULL}.} + +\item{optional}{A scalar logical, indicating whether \code{cl} can be \code{NULL} or +not.} +} +\value{ +The function throws an error if \code{cl} is not a valid code-list code +given the controlled terminology data set; otherwise, \code{cl} is returned +invisibly. +} +\description{ +\code{\link[=assert_cl]{assert_cl()}} asserts the validity of a code-list code in the context of +a controlled terminology specification. +} +\examples{ +# example code + + +} +\keyword{internal} diff --git a/man/assert_ct.Rd b/man/assert_ct.Rd index 8e79fdb8..ace3a345 100644 --- a/man/assert_ct.Rd +++ b/man/assert_ct.Rd @@ -4,18 +4,23 @@ \alias{assert_ct} \title{Assert a controlled terminology specification} \usage{ -assert_ct(ct) +assert_ct(ct, optional = FALSE) } \arguments{ -\item{ct}{A data frame to be asserted as a controlled terminology data set.} +\item{ct}{A data frame to be asserted as a valid controlled terminology data +set.} } \value{ The function throws an error if \code{ct} is not a valid controlled terminology data set; otherwise, \code{ct} is returned invisibly. } \description{ -\code{\link[=assert_ct]{assert_ct()}} will check whether \code{ct} is a data frame and -if it contains the variables: codelist_code, collected_value, term_synonyms, and term_value. +\code{\link[=assert_ct]{assert_ct()}} will check whether \code{ct} is a data frame and if it contains the +variables: codelist_code, collected_value, term_synonyms, and term_value. + +In addition, it will also check if the data frame is not empty (no rows), and +whether the columns \code{codelist_code} and \code{term_value} do +not contain any \code{NA} values. } \examples{ # If `ct` is a valid controlled terminology then it is returned invisibly. diff --git a/man/assign.Rd b/man/assign.Rd index 0ff8fc0c..baaee93b 100644 --- a/man/assign.Rd +++ b/man/assign.Rd @@ -1,9 +1,9 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/assign.R -\name{assign} -\alias{assign} +\name{assign_no_ct} \alias{assign_no_ct} \alias{assign_ct} +\alias{assign} \title{Derive an SDTM variable} \usage{ assign_no_ct( @@ -65,6 +65,9 @@ derived variable, as indicated in \code{tgt_var}. variable that has no terminology restrictions. \item \code{\link[=assign_ct]{assign_ct()}} maps a variable in a raw dataset to a target SDTM variable following controlled terminology recoding. +\item \code{\link[=sdtm_assign]{sdtm_assign()}} is an internal function packing the same functionality as +\code{\link[=assign_no_ct]{assign_no_ct()}} and \code{\link[=assign_ct]{assign_ct()}} together but aimed at developers only. +As a user please use either \code{\link[=assign_no_ct]{assign_no_ct()}} or \code{\link[=assign_ct]{assign_ct()}}. } } \examples{ diff --git a/man/sdtm_assign.Rd b/man/sdtm_assign.Rd new file mode 100644 index 00000000..56e2e8c2 --- /dev/null +++ b/man/sdtm_assign.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/assign.R +\name{sdtm_assign} +\alias{sdtm_assign} +\title{Derive an SDTM variable} +\usage{ +sdtm_assign( + raw_dat, + raw_var, + tgt_var, + ct = NULL, + cl = NULL, + tgt_dat = NULL, + id_vars = oak_id_vars() +) +} +\arguments{ +\item{raw_dat}{The raw dataset (dataframe); must include the +variables passed in \code{id_vars} and \code{raw_var}.} + +\item{raw_var}{The raw variable: a single string indicating the name of the +raw variable in \code{raw_dat}.} + +\item{tgt_var}{The target SDTM variable: a single string indicating the name +of variable to be derived.} + +\item{ct}{Study controlled terminology specification: a dataframe with a +minimal set of columns, see \code{\link[=ct_vars]{ct_vars()}} for details. This parameter is +optional, if left as \code{NULL} no controlled terminology recoding is applied.} + +\item{cl}{A code-list code indicating which subset of the controlled +terminology to apply in the derivation. This parameter is optional, if left +as \code{NULL}, all possible recodings in \code{ct} are attempted.} + +\item{tgt_dat}{Target dataset: a data frame to be merged against \code{raw_dat} by +the variables indicated in \code{id_vars}. This parameter is optional, see +section Value for how the output changes depending on this argument value.} + +\item{id_vars}{Key variables to be used in the join between the raw dataset +(\code{raw_dat}) and the target data set (\code{raw_dat}).} +} +\value{ +The returned data set depends on the value of \code{tgt_dat}: +\itemize{ +\item If no target dataset is supplied, meaning that \code{tgt_dat} defaults to +\code{NULL}, then the returned data set is \code{raw_dat}, selected for the variables +indicated in \code{id_vars}, and a new extra column: the derived variable, as +indicated in \code{tgt_var}. +\item If the target dataset is provided, then it is merged with the raw data set +\code{raw_dat} by the variables indicated in \code{id_vars}, with a new column: the +derived variable, as indicated in \code{tgt_var}. +} +} +\description{ +\code{\link[=sdtm_assign]{sdtm_assign()}} is an internal function packing the same functionality as +\code{\link[=assign_no_ct]{assign_no_ct()}} and \code{\link[=assign_ct]{assign_ct()}} together but aimed at developers only. +As a user please use either \code{\link[=assign_no_ct]{assign_no_ct()}} or \code{\link[=assign_ct]{assign_ct()}}. +} +\keyword{internal} diff --git a/tests/testthat/test-ct.R b/tests/testthat/test-ct.R index 19d66835..6bbe6273 100644 --- a/tests/testthat/test-ct.R +++ b/tests/testthat/test-ct.R @@ -1,4 +1,4 @@ -test_that("ct_vars works as expected", { +test_that("ct_vars() works as expected", { expect_equal(ct_vars(), c( @@ -28,10 +28,79 @@ test_that("ct_vars works as expected", { expect_equal(ct_vars(set = "to"), "term_value") }) -test_that("ct_vars fails with invalid input choice", { +test_that("ct_vars() fails with invalid input choice", { expect_error(ct_vars("foo")) expect_error(ct_vars(1L)) expect_error(ct_vars(FALSE)) expect_error(ct_vars(NULL)) }) + +test_that("assert_cl() works as expected", { + + # Read in a controlled terminology example. + ct <- read_ct_example("ct-01-cm") + + # If `cl` is not supplied and is not optional, then it should err. + expect_error(assert_cl(ct = NULL, cl = NULL, optional = FALSE)) + + # If `cl` is not supplied but it is optional, then all fine. + expect_no_error(assert_cl(ct = NULL, cl = NULL, optional = TRUE)) + # Moreover, in case of no error, `cl` should be returned invisibly, in this + # case `NULL`. + expect_null(assert_cl(ct = NULL, cl = NULL, optional = TRUE)) + + # If `cl` is supplied but `ct` is not, then err. + expect_error(assert_cl(ct = NULL, cl = "C71113", optional = FALSE)) + expect_error(assert_cl(ct = NULL, cl = "C71113", optional = TRUE)) + + # If `ct` is supplied but `cl` is NULL, then err if `cl` is not optional, or + # return `cl` invisibly. + expect_error(assert_cl(ct = ct, cl = NULL, optional = FALSE)) + expect_no_error(assert_cl(ct = ct, cl = NULL, optional = TRUE)) + expect_null(assert_cl(ct = ct, cl = NULL, optional = TRUE)) + + # If both `ct` and `cl` are supplied, then `ct` must be a valid controlled + # terminology data set and `cl` must contain a code-list code available among + # the possibilities in column `codelist_code` (as returned by `ct_vars("cl")`). + expect_error(assert_cl(ct = ct, cl = "foo", optional = FALSE)) + expect_error(assert_cl(ct = ct, cl = "", optional = FALSE)) + + expect_error(assert_cl(ct = ct, cl = NA_character_, optional = FALSE)) + expect_error(assert_cl(ct = ct, cl = NA_character_, optional = TRUE)) + + expect_identical(assert_cl(ct = ct, cl = "C71113", optional = FALSE), "C71113") + expect_identical(assert_cl(ct = ct, cl = "C66726", optional = FALSE), "C66726") + expect_identical(assert_cl(ct = ct, cl = "C71113", optional = TRUE), "C71113") + expect_identical(assert_cl(ct = ct, cl = "C66726", optional = TRUE), "C66726") + +}) + +test_that("assert_cl(): when ct is empty", { + ct <- + data.frame( + codelist_code = character(), + collected_value = character(), + term_synonyms = character(), + term_value = character() + ) + + # If `ct` is supplied but `cl` is NULL, then err if `cl` is not optional, or + # return `cl` invisibly. + expect_error(assert_cl(ct = ct, cl = NULL, optional = FALSE)) + expect_no_error(assert_cl(ct = ct, cl = NULL, optional = TRUE)) + expect_null(assert_cl(ct = ct, cl = NULL, optional = TRUE)) + + # If both `ct` and `cl` are supplied, then `ct` must be a valid controlled + # terminology data set and `cl` must contain a code-list code available among + # the possibilities in column `codelist_code` (as returned by `ct_vars("cl")`). + expect_error(assert_cl(ct = ct, cl = "foo", optional = FALSE)) + expect_error(assert_cl(ct = ct, cl = "", optional = FALSE)) + + expect_error(assert_cl(ct = ct, cl = NA_character_, optional = FALSE)) + expect_error(assert_cl(ct = ct, cl = NA_character_, optional = TRUE)) + + expect_error(assert_cl(ct = ct, cl = "C71113", optional = FALSE)) + expect_error(assert_cl(ct = ct, cl = "C71113", optional = TRUE)) + +}) From fafe01b4751e4fbfcc1685f681cb20c5a1a3a369 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 1 Apr 2024 19:46:16 +0100 Subject: [PATCH 34/80] Update hardcode-related fns --- R/hardcode.R | 132 ++++++++++++++++++++++++++++++++++--------- man/harcode.Rd | 72 ++++++++++++++++------- man/sdtm_hardcode.Rd | 63 +++++++++++++++++++++ 3 files changed, 221 insertions(+), 46 deletions(-) create mode 100644 man/sdtm_hardcode.Rd diff --git a/R/hardcode.R b/R/hardcode.R index a051e7af..2fe49b56 100644 --- a/R/hardcode.R +++ b/R/hardcode.R @@ -1,13 +1,60 @@ +#' Derive an SDTM variable with a hardcoded value +#' +#' @description +#' [sdtm_hardcode()] is an internal function packing the same functionality as +#' [hardcode_no_ct()] and [hardcode_ct()] together but aimed at developers only. +#' As a user please use either [hardcode_no_ct()] or [hardcode_ct()]. +#' +#' @param raw_dat The raw dataset (dataframe); must include the +#' variables passed in `id_vars` and `raw_var`. +#' @param raw_var The raw variable: a single string indicating the name of the +#' raw variable in `raw_dat`. +#' @param tgt_var The target SDTM variable: a single string indicating the name +#' of variable to be derived. +#' @param tgt_val The target SDTM value to be hardcoded into the variable +#' indicated in `tgt_var`. +#' @param ct Study controlled terminology specification: a dataframe with a +#' minimal set of columns, see [ct_vars()] for details. This parameter is +#' optional, if left as `NULL` no controlled terminology recoding is applied. +#' @param cl A code-list code indicating which subset of the controlled +#' terminology to apply in the derivation. This parameter is optional, if left +#' as `NULL`, all possible recodings in `ct` are attempted. +#' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by +#' the variables indicated in `id_vars`. This parameter is optional, see +#' section Value for how the output changes depending on this argument value. +#' @param id_vars Key variables to be used in the join between the raw dataset +#' (`raw_dat`) and the target data set (`raw_dat`). +#' +#' @returns The returned data set depends on the value of `tgt_dat`: +#' - If no target dataset is supplied, meaning that `tgt_dat` defaults to +#' `NULL`, then the returned data set is `raw_dat`, selected for the variables +#' indicated in `id_vars`, and a new extra column: the derived variable, as +#' indicated in `tgt_var`. +#' - If the target dataset is provided, then it is merged with the raw data set +#' `raw_dat` by the variables indicated in `id_vars`, with a new column: the +#' derived variable, as indicated in `tgt_var`. +#' #' @importFrom rlang := #' @keywords internal sdtm_hardcode <- function(raw_dat, raw_var, tgt_var, tgt_val, - tgt_dat = NULL, - id_vars = oak_id_vars(), ct = NULL, - cl = NULL) { + cl = NULL, + tgt_dat = NULL, + id_vars = oak_id_vars()) { + + admiraldev::assert_character_scalar(raw_var) + admiraldev::assert_character_scalar(tgt_var) + admiraldev::assert_character_scalar(tgt_val) + admiraldev::assert_character_vector(id_vars) + assertthat::assert_that(contains_oak_id_vars(id_vars), + msg = "`id_vars` must include the oak id vars.") + admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) + admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) + assert_ct(ct, optional = TRUE) + assert_cl(ct = ct, cl = cl, optional = TRUE) # Recode the hardcoded value following terminology. tgt_val <- ct_map(tgt_val, ct = ct, cl = cl) @@ -44,22 +91,37 @@ sdtm_hardcode <- function(raw_dat, #' - [hardcode_ct()] maps a hardcoded value to a target SDTM variable with #' controlled terminology recoding. #' -#' @param raw_dat The raw dataset. -#' @param raw_var The raw variable. -#' @param tgt_var The target SDTM variable. -#' @param tgt_val Hardcoded value. -#' @param tgt_dat Target dataset. By default the same as `raw_dataset`. -#' @param id_vars If `target_dataset` is different than `raw_dataset`, -#' then this parameter defines keys to use in the join between `raw_dataset` -#' and `target_dataset`. -#' @param ct Study controlled terminology specification. -#' @param cl A codelist code indicating which -#' subset of the controlled terminology to apply in the derivation. +#' @param raw_dat The raw dataset (dataframe); must include the +#' variables passed in `id_vars` and `raw_var`. +#' @param raw_var The raw variable: a single string indicating the name of the +#' raw variable in `raw_dat`. +#' @param tgt_var The target SDTM variable: a single string indicating the name +#' of variable to be derived. +#' @param tgt_val The target SDTM value to be hardcoded into the variable +#' indicated in `tgt_var`. +#' @param ct Study controlled terminology specification: a dataframe with a +#' minimal set of columns, see [ct_vars()] for details. This parameter is +#' optional, if left as `NULL` no controlled terminology recoding is applied. +#' @param cl A code-list code indicating which subset of the controlled +#' terminology to apply in the derivation. This parameter is optional, if left +#' as `NULL`, all possible recodings in `ct` are attempted. +#' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by +#' the variables indicated in `id_vars`. This parameter is optional, see +#' section Value for how the output changes depending on this argument value. +#' @param id_vars Key variables to be used in the join between the raw dataset +#' (`raw_dat`) and the target data set (`raw_dat`). #' -#' @returns The target dataset with the derived variable `target_sdtm_variable`. +#' @returns The returned data set depends on the value of `tgt_dat`: +#' - If no target dataset is supplied, meaning that `tgt_dat` defaults to +#' `NULL`, then the returned data set is `raw_dat`, selected for the variables +#' indicated in `id_vars`, and a new extra column: the derived variable, as +#' indicated in `tgt_var`. +#' - If the target dataset is provided, then it is merged with the raw data set +#' `raw_dat` by the variables indicated in `id_vars`, with a new column: the +#' derived variable, as indicated in `tgt_var`. #' #' @examples -#' MD1 <- +#' md1 <- #' tibble::tribble( #' ~oak_id, ~raw_source, ~patient_number, ~MDRAW, #' 1L, "MD1", 101L, "BABY ASPIRIN", @@ -71,13 +133,13 @@ sdtm_hardcode <- function(raw_dat, #' # Derive a new variable `CMCAT` by overwriting `MDRAW` with the #' # hardcoded value "GENERAL CONCOMITANT MEDICATIONS". #' hardcode_no_ct( -#' raw_dat = MD1, +#' raw_dat = md1, #' raw_var = "MDRAW", #' tgt_var = "CMCAT", #' tgt_val = "GENERAL CONCOMITANT MEDICATIONS" #' ) #' -#' CM_INTER <- +#' cm_inter <- #' tibble::tribble( #' ~oak_id, ~raw_source, ~patient_number, ~CMTRT, ~CMINDC, #' 1L, "MD1", 101L, "BABY ASPIRIN", NA, @@ -91,14 +153,29 @@ sdtm_hardcode <- function(raw_dat, #' # hardcoded value "GENERAL CONCOMITANT MEDICATIONS" with a prior join to #' # `target_dataset`. #' hardcode_no_ct( -#' raw_dat = MD1, +#' raw_dat = md1, #' raw_var = "MDRAW", #' tgt_var = "CMCAT", #' tgt_val = "GENERAL CONCOMITANT MEDICATIONS", -#' tgt_dat = CM_INTER, -#' id_vars = c("oak_id", "raw_source", "patient_number") +#' tgt_dat = cm_inter #' ) #' +#' # Controlled terminology specification +#' (ct <- read_ct_example("ct-01-cm")) +#' +#' # Hardcoding of `CMCAT` with the value `"GENERAL CONCOMITANT MEDICATIONS"` +#' # involving terminology recoding. `NA` values in `MDRAW` are preserved in +#' # `CMCAT`. +#' hardcode_ct( +#' raw_dat = md1, +#' raw_var = "MDRAW", +#' tgt_var = "CMCAT", +#' tgt_val = "GENERAL CONCOMITANT MEDICATIONS", +#' ct = ct, +#' cl = "C66729", +#' tgt_dat = cm_inter +#' ) +#' #' @name harcode NULL @@ -135,10 +212,10 @@ hardcode_no_ct <- function(raw_dat, #' @rdname harcode hardcode_ct <- function(raw_dat, raw_var, - ct, - cl, tgt_var, tgt_val, + ct, + cl, tgt_dat = NULL, id_vars = oak_id_vars() ) { @@ -153,15 +230,18 @@ hardcode_ct <- function(raw_dat, admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) + assert_ct(ct, optional = FALSE) + assert_cl(ct = ct, cl = cl, optional = FALSE) + sdtm_hardcode( raw_dat = raw_dat, raw_var = raw_var, tgt_var = tgt_var, tgt_val = tgt_val, - tgt_dat = tgt_dat, - id_vars = id_vars, ct = ct, - cl = cl + cl = cl, + tgt_dat = tgt_dat, + id_vars = id_vars ) } diff --git a/man/harcode.Rd b/man/harcode.Rd index 85faa83d..e593964c 100644 --- a/man/harcode.Rd +++ b/man/harcode.Rd @@ -18,36 +18,53 @@ hardcode_no_ct( hardcode_ct( raw_dat, raw_var, - ct, - cl, tgt_var, tgt_val, + ct, + cl, tgt_dat = NULL, id_vars = oak_id_vars() ) } \arguments{ -\item{raw_dat}{The raw dataset.} +\item{raw_dat}{The raw dataset (dataframe); must include the +variables passed in \code{id_vars} and \code{raw_var}.} -\item{raw_var}{The raw variable.} +\item{raw_var}{The raw variable: a single string indicating the name of the +raw variable in \code{raw_dat}.} -\item{tgt_var}{The target SDTM variable.} +\item{tgt_var}{The target SDTM variable: a single string indicating the name +of variable to be derived.} -\item{tgt_val}{Hardcoded value.} +\item{tgt_val}{The target SDTM value to be hardcoded into the variable +indicated in \code{tgt_var}.} -\item{tgt_dat}{Target dataset. By default the same as \code{raw_dataset}.} +\item{tgt_dat}{Target dataset: a data frame to be merged against \code{raw_dat} by +the variables indicated in \code{id_vars}. This parameter is optional, see +section Value for how the output changes depending on this argument value.} -\item{id_vars}{If \code{target_dataset} is different than \code{raw_dataset}, -then this parameter defines keys to use in the join between \code{raw_dataset} -and \code{target_dataset}.} +\item{id_vars}{Key variables to be used in the join between the raw dataset +(\code{raw_dat}) and the target data set (\code{raw_dat}).} -\item{ct}{Study controlled terminology specification.} +\item{ct}{Study controlled terminology specification: a dataframe with a +minimal set of columns, see \code{\link[=ct_vars]{ct_vars()}} for details. This parameter is +optional, if left as \code{NULL} no controlled terminology recoding is applied.} -\item{cl}{A codelist code indicating which -subset of the controlled terminology to apply in the derivation.} +\item{cl}{A code-list code indicating which subset of the controlled +terminology to apply in the derivation. This parameter is optional, if left +as \code{NULL}, all possible recodings in \code{ct} are attempted.} } \value{ -The target dataset with the derived variable \code{target_sdtm_variable}. +The returned data set depends on the value of \code{tgt_dat}: +\itemize{ +\item If no target dataset is supplied, meaning that \code{tgt_dat} defaults to +\code{NULL}, then the returned data set is \code{raw_dat}, selected for the variables +indicated in \code{id_vars}, and a new extra column: the derived variable, as +indicated in \code{tgt_var}. +\item If the target dataset is provided, then it is merged with the raw data set +\code{raw_dat} by the variables indicated in \code{id_vars}, with a new column: the +derived variable, as indicated in \code{tgt_var}. +} } \description{ \itemize{ @@ -58,7 +75,7 @@ controlled terminology recoding. } } \examples{ -MD1 <- +md1 <- tibble::tribble( ~oak_id, ~raw_source, ~patient_number, ~MDRAW, 1L, "MD1", 101L, "BABY ASPIRIN", @@ -70,13 +87,13 @@ MD1 <- # Derive a new variable `CMCAT` by overwriting `MDRAW` with the # hardcoded value "GENERAL CONCOMITANT MEDICATIONS". hardcode_no_ct( - raw_dat = MD1, + raw_dat = md1, raw_var = "MDRAW", tgt_var = "CMCAT", tgt_val = "GENERAL CONCOMITANT MEDICATIONS" ) -CM_INTER <- +cm_inter <- tibble::tribble( ~oak_id, ~raw_source, ~patient_number, ~CMTRT, ~CMINDC, 1L, "MD1", 101L, "BABY ASPIRIN", NA, @@ -90,12 +107,27 @@ CM_INTER <- # hardcoded value "GENERAL CONCOMITANT MEDICATIONS" with a prior join to # `target_dataset`. hardcode_no_ct( - raw_dat = MD1, + raw_dat = md1, raw_var = "MDRAW", tgt_var = "CMCAT", tgt_val = "GENERAL CONCOMITANT MEDICATIONS", - tgt_dat = CM_INTER, - id_vars = c("oak_id", "raw_source", "patient_number") + tgt_dat = cm_inter ) +# Controlled terminology specification +(ct <- read_ct_example("ct-01-cm")) + +# Hardcoding of `CMCAT` with the value `"GENERAL CONCOMITANT MEDICATIONS"` +# involving terminology recoding. `NA` values in `MDRAW` are preserved in +# `CMCAT`. +hardcode_ct( + raw_dat = md1, + raw_var = "MDRAW", + tgt_var = "CMCAT", + tgt_val = "GENERAL CONCOMITANT MEDICATIONS", + ct = ct, + cl = "C66729", + tgt_dat = cm_inter + ) + } diff --git a/man/sdtm_hardcode.Rd b/man/sdtm_hardcode.Rd new file mode 100644 index 00000000..0f0759bd --- /dev/null +++ b/man/sdtm_hardcode.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/hardcode.R +\name{sdtm_hardcode} +\alias{sdtm_hardcode} +\title{Derive an SDTM variable with a hardcoded value} +\usage{ +sdtm_hardcode( + raw_dat, + raw_var, + tgt_var, + tgt_val, + ct = NULL, + cl = NULL, + tgt_dat = NULL, + id_vars = oak_id_vars() +) +} +\arguments{ +\item{raw_dat}{The raw dataset (dataframe); must include the +variables passed in \code{id_vars} and \code{raw_var}.} + +\item{raw_var}{The raw variable: a single string indicating the name of the +raw variable in \code{raw_dat}.} + +\item{tgt_var}{The target SDTM variable: a single string indicating the name +of variable to be derived.} + +\item{tgt_val}{The target SDTM value to be hardcoded into the variable +indicated in \code{tgt_var}.} + +\item{ct}{Study controlled terminology specification: a dataframe with a +minimal set of columns, see \code{\link[=ct_vars]{ct_vars()}} for details. This parameter is +optional, if left as \code{NULL} no controlled terminology recoding is applied.} + +\item{cl}{A code-list code indicating which subset of the controlled +terminology to apply in the derivation. This parameter is optional, if left +as \code{NULL}, all possible recodings in \code{ct} are attempted.} + +\item{tgt_dat}{Target dataset: a data frame to be merged against \code{raw_dat} by +the variables indicated in \code{id_vars}. This parameter is optional, see +section Value for how the output changes depending on this argument value.} + +\item{id_vars}{Key variables to be used in the join between the raw dataset +(\code{raw_dat}) and the target data set (\code{raw_dat}).} +} +\value{ +The returned data set depends on the value of \code{tgt_dat}: +\itemize{ +\item If no target dataset is supplied, meaning that \code{tgt_dat} defaults to +\code{NULL}, then the returned data set is \code{raw_dat}, selected for the variables +indicated in \code{id_vars}, and a new extra column: the derived variable, as +indicated in \code{tgt_var}. +\item If the target dataset is provided, then it is merged with the raw data set +\code{raw_dat} by the variables indicated in \code{id_vars}, with a new column: the +derived variable, as indicated in \code{tgt_var}. +} +} +\description{ +\code{\link[=sdtm_hardcode]{sdtm_hardcode()}} is an internal function packing the same functionality as +\code{\link[=hardcode_no_ct]{hardcode_no_ct()}} and \code{\link[=hardcode_ct]{hardcode_ct()}} together but aimed at developers only. +As a user please use either \code{\link[=hardcode_no_ct]{hardcode_no_ct()}} or \code{\link[=hardcode_ct]{hardcode_ct()}}. +} +\keyword{internal} From 3a4b3552f2dfd7ecb93e2c0d669cf429fc1ea37a Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 1 Apr 2024 19:54:11 +0100 Subject: [PATCH 35/80] Changes to meet linter issues --- R/ct.R | 4 ++-- tests/testthat/test-ct.R | 13 +++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/R/ct.R b/R/ct.R index 4f3d3634..ef3b0b0e 100644 --- a/R/ct.R +++ b/R/ct.R @@ -101,11 +101,11 @@ assert_ct <- function(ct, optional = FALSE) { rlang::abort("`ct` can't be empty.") } - if (!is.null(ct) && any(is.na(ct[[ct_vars("cl")]]))) { + if (!is.null(ct) && anyNA(ct[[ct_vars("cl")]])) { rlang::abort(glue::glue("`{ct_vars('cl')}` can't have any NA values.")) } - if (!is.null(ct) && any(is.na(ct[[ct_vars("to")]]))) { + if (!is.null(ct) && anyNA(ct[[ct_vars("to")]])) { rlang::abort(glue::glue("`{ct_vars('to')}` can't have any NA values.")) } diff --git a/tests/testthat/test-ct.R b/tests/testthat/test-ct.R index 6bbe6273..bc166f85 100644 --- a/tests/testthat/test-ct.R +++ b/tests/testthat/test-ct.R @@ -1,6 +1,6 @@ test_that("ct_vars() works as expected", { - expect_equal(ct_vars(), + expect_identical(ct_vars(), c( "codelist_code", "collected_value", @@ -8,7 +8,7 @@ test_that("ct_vars() works as expected", { "term_value" )) - expect_equal( + expect_identical( ct_vars(set = "all"), c( "codelist_code", @@ -18,14 +18,14 @@ test_that("ct_vars() works as expected", { ) ) - expect_equal(ct_vars(set = "cl"), + expect_identical(ct_vars(set = "cl"), "codelist_code") - expect_equal(ct_vars(set = "from"), + expect_identical(ct_vars(set = "from"), c("collected_value", "term_synonyms")) - expect_equal(ct_vars(set = "to"), "term_value") + expect_identical(ct_vars(set = "to"), "term_value") }) test_that("ct_vars() fails with invalid input choice", { @@ -82,7 +82,8 @@ test_that("assert_cl(): when ct is empty", { codelist_code = character(), collected_value = character(), term_synonyms = character(), - term_value = character() + term_value = character(), + stringsAsFactors = FALSE ) # If `ct` is supplied but `cl` is NULL, then err if `cl` is not optional, or From 37575b26eb09c52d08b76f3f22ea0ebac6f127cf Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 1 Apr 2024 20:12:43 +0100 Subject: [PATCH 36/80] Code reformatting --- R/assign.R | 24 ++--- R/ct.R | 34 +++---- R/hardcode.R | 82 ++++++++-------- R/oak_id_vars.R | 2 - R/recode.R | 13 ++- tests/testthat/test-ct.R | 195 ++++++++++++++++++++++++++++++--------- 6 files changed, 224 insertions(+), 126 deletions(-) diff --git a/R/assign.R b/R/assign.R index 3a22702d..396dd46d 100644 --- a/R/assign.R +++ b/R/assign.R @@ -42,12 +42,12 @@ sdtm_assign <- function(raw_dat, cl = NULL, tgt_dat = NULL, id_vars = oak_id_vars()) { - admiraldev::assert_character_scalar(raw_var) admiraldev::assert_character_scalar(tgt_var) admiraldev::assert_character_vector(id_vars) assertthat::assert_that(contains_oak_id_vars(id_vars), - msg = "`id_vars` must include the oak id vars.") + msg = "`id_vars` must include the oak id vars." + ) admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) assert_ct(ct, optional = TRUE) @@ -76,7 +76,6 @@ sdtm_assign <- function(raw_dat, } der_dat - } #' Derive an SDTM variable @@ -124,9 +123,10 @@ sdtm_assign <- function(raw_dat, #' oak_id = 1:14, #' raw_source = "MD1", #' patient_number = 101:114, -#' MDIND = c( "NAUSEA", "NAUSEA", "ANEMIA", "NAUSEA", "PYREXIA", -#' "VOMITINGS", "DIARHHEA", "COLD", -#' "FEVER", "LEG PAIN", "FEVER", "COLD", "COLD", "PAIN" +#' MDIND = c( +#' "NAUSEA", "NAUSEA", "ANEMIA", "NAUSEA", "PYREXIA", +#' "VOMITINGS", "DIARHHEA", "COLD", +#' "FEVER", "LEG PAIN", "FEVER", "COLD", "COLD", "PAIN" #' ) #' ) #' @@ -134,7 +134,7 @@ sdtm_assign <- function(raw_dat, #' raw_dat = md1, #' raw_var = "MDIND", #' tgt_var = "CMINDC", -#' ) +#' ) #' #' cm_inter <- #' tibble::tibble( @@ -185,7 +185,7 @@ sdtm_assign <- function(raw_dat, #' ct = ct, #' cl = "C66729", #' tgt_dat = cm_inter -#' ) +#' ) #' #' @name assign NULL @@ -198,12 +198,12 @@ assign_no_ct <- function(raw_dat, tgt_var, tgt_dat = NULL, id_vars = oak_id_vars()) { - admiraldev::assert_character_scalar(raw_var) admiraldev::assert_character_scalar(tgt_var) admiraldev::assert_character_vector(id_vars) assertthat::assert_that(contains_oak_id_vars(id_vars), - msg = "`id_vars` must include the oak id vars.") + msg = "`id_vars` must include the oak id vars." + ) admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) @@ -226,12 +226,12 @@ assign_ct <- function(raw_dat, cl, tgt_dat = NULL, id_vars = oak_id_vars()) { - admiraldev::assert_character_scalar(raw_var) admiraldev::assert_character_scalar(tgt_var) admiraldev::assert_character_vector(id_vars) assertthat::assert_that(contains_oak_id_vars(id_vars), - msg = "`id_vars` must include the oak id vars.") + msg = "`id_vars` must include the oak id vars." + ) admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) diff --git a/R/ct.R b/R/ct.R index ef3b0b0e..097fdcbb 100644 --- a/R/ct.R +++ b/R/ct.R @@ -31,7 +31,6 @@ #' @keywords internal #' @export ct_vars <- function(set = c("all", "cl", "from", "to")) { - admiraldev::assert_character_vector(set) set <- match.arg(set) @@ -39,18 +38,21 @@ ct_vars <- function(set = c("all", "cl", "from", "to")) { from_vars <- c("collected_value", "term_synonyms") to_var <- "term_value" - if (identical(set, "all")) + if (identical(set, "all")) { return(c(cl_var, from_vars, to_var)) + } - if (identical(set, "cl")) + if (identical(set, "cl")) { return(cl_var) + } - if (identical(set, "from")) + if (identical(set, "from")) { return(from_vars) + } - if (identical(set, "to")) + if (identical(set, "to")) { return(to_var) - + } } #' Assert a controlled terminology specification @@ -90,7 +92,6 @@ ct_vars <- function(set = c("all", "cl", "from", "to")) { #' #' @keywords internal assert_ct <- function(ct, optional = FALSE) { - admiraldev::assert_data_frame( arg = ct, required_vars = rlang::syms(ct_vars()), @@ -130,10 +131,8 @@ assert_ct <- function(ct, optional = FALSE) { #' @examples #' # example code #' -#' #' @keywords internal assert_cl <- function(ct, cl, optional = FALSE) { - if (!is.null(cl)) { admiraldev::assert_character_scalar(cl) } @@ -201,7 +200,6 @@ assert_cl <- function(ct, cl, optional = FALSE) { #' @importFrom rlang .data #' @keywords internal ct_mappings <- function(ct, from = ct_vars("from"), to = ct_vars("to")) { - assert_ct(ct) cols <- c(to, from) @@ -209,9 +207,11 @@ ct_mappings <- function(ct, from = ct_vars("from"), to = ct_vars("to")) { ct_mappings <- ct |> dplyr::mutate(to = !!rlang::sym(to)) |> - tidyr::pivot_longer(cols = dplyr::all_of(cols), - values_to = "from", - names_to = "type") |> + tidyr::pivot_longer( + cols = dplyr::all_of(cols), + values_to = "from", + names_to = "type" + ) |> dplyr::select(c("type", "from", "to")) |> dplyr::mutate(type = factor(.data$type, levels = cols)) |> dplyr::arrange(.data$type) |> @@ -253,9 +253,8 @@ ct_map <- function(x, ct = NULL, cl = NULL, - from = ct_vars("from"), + from = ct_vars("from"), to = ct_vars("to")) { - ct %||% return(x) assert_ct(ct) @@ -269,7 +268,6 @@ ct_map <- to = mappings$to, .no_match = toupper(x) ) - } #' Read in a controlled terminology @@ -289,7 +287,6 @@ ct_map <- #' #' @export read_ct <- function(file = stop("`file` must be specified")) { - ct <- readr::read_csv(file = file, col_types = "c") assert_ct(ct) @@ -341,7 +338,6 @@ ct_example <- function(example) { ), call. = FALSE ) - } else { local_path <- system.file(path, package = "sdtm.oak", mustWork = TRUE) @@ -372,7 +368,6 @@ ct_example <- function(example) { #' #' @export read_ct_example <- function(example) { - if (missing(example)) { return(ct_example()) } else { @@ -381,5 +376,4 @@ read_ct_example <- function(example) { path <- ct_example(example) read_ct(file = path) - } diff --git a/R/hardcode.R b/R/hardcode.R index 2fe49b56..7f454421 100644 --- a/R/hardcode.R +++ b/R/hardcode.R @@ -44,13 +44,13 @@ sdtm_hardcode <- function(raw_dat, cl = NULL, tgt_dat = NULL, id_vars = oak_id_vars()) { - admiraldev::assert_character_scalar(raw_var) admiraldev::assert_character_scalar(tgt_var) admiraldev::assert_character_scalar(tgt_val) admiraldev::assert_character_vector(id_vars) assertthat::assert_that(contains_oak_id_vars(id_vars), - msg = "`id_vars` must include the oak id vars.") + msg = "`id_vars` must include the oak id vars." + ) admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) assert_ct(ct, optional = TRUE) @@ -174,7 +174,7 @@ sdtm_hardcode <- function(raw_dat, #' ct = ct, #' cl = "C66729", #' tgt_dat = cm_inter -#' ) +#' ) #' #' @name harcode NULL @@ -187,14 +187,15 @@ hardcode_no_ct <- function(raw_dat, tgt_val, tgt_dat = NULL, id_vars = oak_id_vars()) { - admiraldev::assert_character_scalar(raw_var) admiraldev::assert_character_scalar(tgt_var) assertthat::assert_that(assertthat::is.scalar(tgt_val), - msg = "`tgt_val` must be a scalar value.") + msg = "`tgt_val` must be a scalar value." + ) admiraldev::assert_character_vector(id_vars) assertthat::assert_that(contains_oak_id_vars(id_vars), - msg = "`id_vars` must include the oak id vars.") + msg = "`id_vars` must include the oak id vars." + ) admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) @@ -210,38 +211,41 @@ hardcode_no_ct <- function(raw_dat, #' @export #' @rdname harcode -hardcode_ct <- function(raw_dat, - raw_var, - tgt_var, - tgt_val, - ct, - cl, - tgt_dat = NULL, - id_vars = oak_id_vars() - ) { +hardcode_ct <- + function(raw_dat, + raw_var, + tgt_var, + tgt_val, + ct, + cl, + tgt_dat = NULL, + id_vars = oak_id_vars()) { + admiraldev::assert_character_scalar(raw_var) + admiraldev::assert_character_scalar(tgt_var) + assertthat::assert_that(assertthat::is.scalar(tgt_val), + msg = "`tgt_val` must be a scalar value." + ) + admiraldev::assert_character_vector(id_vars) + assertthat::assert_that(contains_oak_id_vars(id_vars), + msg = "`id_vars` must include the oak id vars." + ) + admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) + admiraldev::assert_data_frame(tgt_dat, + required_vars = rlang::syms(id_vars), + optional = TRUE + ) - admiraldev::assert_character_scalar(raw_var) - admiraldev::assert_character_scalar(tgt_var) - assertthat::assert_that(assertthat::is.scalar(tgt_val), - msg = "`tgt_val` must be a scalar value.") - admiraldev::assert_character_vector(id_vars) - assertthat::assert_that(contains_oak_id_vars(id_vars), - msg = "`id_vars` must include the oak id vars.") - admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) - admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) - - assert_ct(ct, optional = FALSE) - assert_cl(ct = ct, cl = cl, optional = FALSE) - - sdtm_hardcode( - raw_dat = raw_dat, - raw_var = raw_var, - tgt_var = tgt_var, - tgt_val = tgt_val, - ct = ct, - cl = cl, - tgt_dat = tgt_dat, - id_vars = id_vars - ) -} + assert_ct(ct, optional = FALSE) + assert_cl(ct = ct, cl = cl, optional = FALSE) + sdtm_hardcode( + raw_dat = raw_dat, + raw_var = raw_var, + tgt_var = tgt_var, + tgt_val = tgt_val, + ct = ct, + cl = cl, + tgt_dat = tgt_dat, + id_vars = id_vars + ) + } diff --git a/R/oak_id_vars.R b/R/oak_id_vars.R index ed3c419a..718d11da 100644 --- a/R/oak_id_vars.R +++ b/R/oak_id_vars.R @@ -19,10 +19,8 @@ #' #' @keywords internal oak_id_vars <- function(extra_vars = NULL) { - admiraldev::assert_character_vector(extra_vars, optional = TRUE) unique(c("oak_id", "raw_source", "patient_number", extra_vars)) - } #' Does a vector contain the raw dataset key variables? diff --git a/R/recode.R b/R/recode.R index 88ba8275..2226bd11 100644 --- a/R/recode.R +++ b/R/recode.R @@ -40,7 +40,6 @@ index_for_recode <- function(x, from) { #' #' @keywords internal are_to_recode <- function(x, from) { - # match(x, from, nomatch = 0) != 0 !is.na(index_for_recode(x, from)) } @@ -78,12 +77,12 @@ are_to_recode <- function(x, from) { #' ) #' #' @keywords internal -recode <- function(x, - from = unique(na.omit(x)), - to = from, - .no_match = x, - .na = NA) { - # to <- rlang::rep_along(x, to) +recode <- function( + x, + from = unique(na.omit(x)), + to = from, + .no_match = x, + .na = NA) { to <- vctrs::vec_recycle(to, length(from)) index <- index_for_recode(x, from) y <- ifelse(!is.na(index), to[index], .no_match) diff --git a/tests/testthat/test-ct.R b/tests/testthat/test-ct.R index bc166f85..cdf98d80 100644 --- a/tests/testthat/test-ct.R +++ b/tests/testthat/test-ct.R @@ -1,12 +1,13 @@ test_that("ct_vars() works as expected", { - - expect_identical(ct_vars(), - c( - "codelist_code", - "collected_value", - "term_synonyms", - "term_value" - )) + expect_identical( + ct_vars(), + c( + "codelist_code", + "collected_value", + "term_synonyms", + "term_value" + ) + ) expect_identical( ct_vars(set = "all"), @@ -18,18 +19,23 @@ test_that("ct_vars() works as expected", { ) ) - expect_identical(ct_vars(set = "cl"), - "codelist_code") + expect_identical( + ct_vars(set = "cl"), + "codelist_code" + ) - expect_identical(ct_vars(set = "from"), - c("collected_value", - "term_synonyms")) + expect_identical( + ct_vars(set = "from"), + c( + "collected_value", + "term_synonyms" + ) + ) expect_identical(ct_vars(set = "to"), "term_value") }) test_that("ct_vars() fails with invalid input choice", { - expect_error(ct_vars("foo")) expect_error(ct_vars(1L)) expect_error(ct_vars(FALSE)) @@ -37,43 +43,105 @@ test_that("ct_vars() fails with invalid input choice", { }) test_that("assert_cl() works as expected", { - # Read in a controlled terminology example. ct <- read_ct_example("ct-01-cm") # If `cl` is not supplied and is not optional, then it should err. - expect_error(assert_cl(ct = NULL, cl = NULL, optional = FALSE)) + expect_error(assert_cl( + ct = NULL, + cl = NULL, + optional = FALSE + )) # If `cl` is not supplied but it is optional, then all fine. - expect_no_error(assert_cl(ct = NULL, cl = NULL, optional = TRUE)) + expect_no_error(assert_cl( + ct = NULL, + cl = NULL, + optional = TRUE + )) # Moreover, in case of no error, `cl` should be returned invisibly, in this # case `NULL`. - expect_null(assert_cl(ct = NULL, cl = NULL, optional = TRUE)) + expect_null(assert_cl( + ct = NULL, + cl = NULL, + optional = TRUE + )) # If `cl` is supplied but `ct` is not, then err. - expect_error(assert_cl(ct = NULL, cl = "C71113", optional = FALSE)) - expect_error(assert_cl(ct = NULL, cl = "C71113", optional = TRUE)) + expect_error(assert_cl( + ct = NULL, + cl = "C71113", + optional = FALSE + )) + expect_error(assert_cl( + ct = NULL, + cl = "C71113", + optional = TRUE + )) # If `ct` is supplied but `cl` is NULL, then err if `cl` is not optional, or # return `cl` invisibly. - expect_error(assert_cl(ct = ct, cl = NULL, optional = FALSE)) - expect_no_error(assert_cl(ct = ct, cl = NULL, optional = TRUE)) - expect_null(assert_cl(ct = ct, cl = NULL, optional = TRUE)) + expect_error(assert_cl( + ct = ct, + cl = NULL, + optional = FALSE + )) + expect_no_error(assert_cl( + ct = ct, + cl = NULL, + optional = TRUE + )) + expect_null(assert_cl( + ct = ct, + cl = NULL, + optional = TRUE + )) # If both `ct` and `cl` are supplied, then `ct` must be a valid controlled # terminology data set and `cl` must contain a code-list code available among # the possibilities in column `codelist_code` (as returned by `ct_vars("cl")`). - expect_error(assert_cl(ct = ct, cl = "foo", optional = FALSE)) - expect_error(assert_cl(ct = ct, cl = "", optional = FALSE)) - - expect_error(assert_cl(ct = ct, cl = NA_character_, optional = FALSE)) - expect_error(assert_cl(ct = ct, cl = NA_character_, optional = TRUE)) - - expect_identical(assert_cl(ct = ct, cl = "C71113", optional = FALSE), "C71113") - expect_identical(assert_cl(ct = ct, cl = "C66726", optional = FALSE), "C66726") - expect_identical(assert_cl(ct = ct, cl = "C71113", optional = TRUE), "C71113") - expect_identical(assert_cl(ct = ct, cl = "C66726", optional = TRUE), "C66726") - + expect_error(assert_cl( + ct = ct, + cl = "foo", + optional = FALSE + )) + expect_error(assert_cl( + ct = ct, + cl = "", + optional = FALSE + )) + + expect_error(assert_cl( + ct = ct, + cl = NA_character_, + optional = FALSE + )) + expect_error(assert_cl( + ct = ct, + cl = NA_character_, + optional = TRUE + )) + + expect_identical(assert_cl( + ct = ct, + cl = "C71113", + optional = FALSE + ), "C71113") + expect_identical(assert_cl( + ct = ct, + cl = "C66726", + optional = FALSE + ), "C66726") + expect_identical(assert_cl( + ct = ct, + cl = "C71113", + optional = TRUE + ), "C71113") + expect_identical(assert_cl( + ct = ct, + cl = "C66726", + optional = TRUE + ), "C66726") }) test_that("assert_cl(): when ct is empty", { @@ -88,20 +156,55 @@ test_that("assert_cl(): when ct is empty", { # If `ct` is supplied but `cl` is NULL, then err if `cl` is not optional, or # return `cl` invisibly. - expect_error(assert_cl(ct = ct, cl = NULL, optional = FALSE)) - expect_no_error(assert_cl(ct = ct, cl = NULL, optional = TRUE)) - expect_null(assert_cl(ct = ct, cl = NULL, optional = TRUE)) + expect_error(assert_cl( + ct = ct, + cl = NULL, + optional = FALSE + )) + expect_no_error(assert_cl( + ct = ct, + cl = NULL, + optional = TRUE + )) + expect_null(assert_cl( + ct = ct, + cl = NULL, + optional = TRUE + )) # If both `ct` and `cl` are supplied, then `ct` must be a valid controlled # terminology data set and `cl` must contain a code-list code available among # the possibilities in column `codelist_code` (as returned by `ct_vars("cl")`). - expect_error(assert_cl(ct = ct, cl = "foo", optional = FALSE)) - expect_error(assert_cl(ct = ct, cl = "", optional = FALSE)) - - expect_error(assert_cl(ct = ct, cl = NA_character_, optional = FALSE)) - expect_error(assert_cl(ct = ct, cl = NA_character_, optional = TRUE)) - - expect_error(assert_cl(ct = ct, cl = "C71113", optional = FALSE)) - expect_error(assert_cl(ct = ct, cl = "C71113", optional = TRUE)) - + expect_error(assert_cl( + ct = ct, + cl = "foo", + optional = FALSE + )) + expect_error(assert_cl( + ct = ct, + cl = "", + optional = FALSE + )) + + expect_error(assert_cl( + ct = ct, + cl = NA_character_, + optional = FALSE + )) + expect_error(assert_cl( + ct = ct, + cl = NA_character_, + optional = TRUE + )) + + expect_error(assert_cl( + ct = ct, + cl = "C71113", + optional = FALSE + )) + expect_error(assert_cl( + ct = ct, + cl = "C71113", + optional = TRUE + )) }) From c176654ae8d975edd18385a2ac7d851a77e40843 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 1 Apr 2024 20:20:33 +0100 Subject: [PATCH 37/80] Code reflow --- man/assign.Rd | 11 ++++++----- man/harcode.Rd | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/man/assign.Rd b/man/assign.Rd index baaee93b..02a8ffcb 100644 --- a/man/assign.Rd +++ b/man/assign.Rd @@ -77,9 +77,10 @@ md1 <- oak_id = 1:14, raw_source = "MD1", patient_number = 101:114, - MDIND = c( "NAUSEA", "NAUSEA", "ANEMIA", "NAUSEA", "PYREXIA", - "VOMITINGS", "DIARHHEA", "COLD", - "FEVER", "LEG PAIN", "FEVER", "COLD", "COLD", "PAIN" + MDIND = c( + "NAUSEA", "NAUSEA", "ANEMIA", "NAUSEA", "PYREXIA", + "VOMITINGS", "DIARHHEA", "COLD", + "FEVER", "LEG PAIN", "FEVER", "COLD", "COLD", "PAIN" ) ) @@ -87,7 +88,7 @@ assign_no_ct( raw_dat = md1, raw_var = "MDIND", tgt_var = "CMINDC", - ) +) cm_inter <- tibble::tibble( @@ -138,6 +139,6 @@ assign_ct( ct = ct, cl = "C66729", tgt_dat = cm_inter - ) +) } diff --git a/man/harcode.Rd b/man/harcode.Rd index e593964c..e097cc5d 100644 --- a/man/harcode.Rd +++ b/man/harcode.Rd @@ -128,6 +128,6 @@ hardcode_ct( ct = ct, cl = "C66729", tgt_dat = cm_inter - ) +) } From dafcfef876145268b88157426ccd9ff8562ef5ee Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 1 Apr 2024 20:20:50 +0100 Subject: [PATCH 38/80] Improve `assert_cl()` docs --- R/ct.R | 14 +++++++++++++- man/assert_cl.Rd | 13 ++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/R/ct.R b/R/ct.R index 097fdcbb..d86b4c91 100644 --- a/R/ct.R +++ b/R/ct.R @@ -129,7 +129,19 @@ assert_ct <- function(ct, optional = FALSE) { #' invisibly. #' #' @examples -#' # example code +#' # Load a controlled terminology example. +#' (ct <- read_ct_example("ct-01-cm")) +#' +#' # Should work fine. +#' sdtm.oak:::assert_cl(ct = ct, cl = "C71113") +#' +#' # In certain cases, you might allow `cl` to be `NULL` as to indicate absence, +#' # in that case, set `optional` to `TRUE` to make `assert_cl()` more +#' # forgiving. +#' sdtm.oak:::assert_cl(ct = ct, cl = NULL, optional = TRUE) +#' +#' # Otherwise it would err. +#' try(sdtm.oak:::assert_cl(ct = ct, cl = NULL, optional = FALSE)) #' #' @keywords internal assert_cl <- function(ct, cl, optional = FALSE) { diff --git a/man/assert_cl.Rd b/man/assert_cl.Rd index 785c0415..ea035803 100644 --- a/man/assert_cl.Rd +++ b/man/assert_cl.Rd @@ -25,8 +25,19 @@ invisibly. a controlled terminology specification. } \examples{ -# example code +# Load a controlled terminology example. +(ct <- read_ct_example("ct-01-cm")) +# Should work fine. +sdtm.oak:::assert_cl(ct = ct, cl = "C71113") + +# In certain cases, you might allow `cl` to be `NULL` as to indicate absence, +# in that case, set `optional` to `TRUE` to make `assert_cl()` more +# forgiving. +sdtm.oak:::assert_cl(ct = ct, cl = NULL, optional = TRUE) + +# Otherwise it would err. +try(sdtm.oak:::assert_cl(ct = ct, cl = NULL, optional = FALSE)) } \keyword{internal} From e1287794577bba055812b8ef516b08e974950a2b Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 1 Apr 2024 20:23:24 +0100 Subject: [PATCH 39/80] Update `read_ct()` docs --- R/ct.R | 7 +++++++ man/read_ct.Rd | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/R/ct.R b/R/ct.R index d86b4c91..9ff14bbb 100644 --- a/R/ct.R +++ b/R/ct.R @@ -297,6 +297,13 @@ ct_map <- #' @returns A [tibble][tibble::tibble-package] with a controlled terminology #' specification. #' +#' @examples +#' # Get the local path to one of the controlled terminology example files. +#' path <- ct_example("ct-01-cm") +#' +#' # Import it to R. +#' read_ct(file = path) +#' #' @export read_ct <- function(file = stop("`file` must be specified")) { ct <- readr::read_csv(file = file, col_types = "c") diff --git a/man/read_ct.Rd b/man/read_ct.Rd index 35b4d6c7..ac790442 100644 --- a/man/read_ct.Rd +++ b/man/read_ct.Rd @@ -23,3 +23,11 @@ specification. \code{\link[=read_ct]{read_ct()}} imports a controlled terminology specification data set as a \link[tibble:tibble-package]{tibble}. } +\examples{ +# Get the local path to one of the controlled terminology example files. +path <- ct_example("ct-01-cm") + +# Import it to R. +read_ct(file = path) + +} From 08957642daeb12d61a6cc6c94df65102742947a3 Mon Sep 17 00:00:00 2001 From: ramiromagno Date: Mon, 1 Apr 2024 19:30:19 +0000 Subject: [PATCH 40/80] Automatic renv profile update. --- renv/profiles/4.2/renv.lock | 97 +++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/renv/profiles/4.2/renv.lock b/renv/profiles/4.2/renv.lock index 52d8d0f9..885895ef 100644 --- a/renv/profiles/4.2/renv.lock +++ b/renv/profiles/4.2/renv.lock @@ -128,6 +128,30 @@ ], "Hash": "543776ae6848fde2f48ff3816d0628bc" }, + "bit": { + "Package": "bit", + "Version": "4.0.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "d242abec29412ce988848d0294b208fd" + }, + "bit64": { + "Package": "bit64", + "Version": "4.0.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "bit", + "methods", + "stats", + "utils" + ], + "Hash": "9fe98599ca456d6552421db0d6772d8f" + }, "brew": { "Package": "brew", "Version": "1.0-8", @@ -912,6 +936,19 @@ ], "Hash": "e9d21e79848e02e524bea6f5bd53e7e4" }, + "progress": { + "Package": "progress", + "Version": "1.2.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R6", + "crayon", + "hms", + "prettyunits" + ], + "Hash": "14dc9f7a3c91ebb14ec5bb9208a07061" + }, "promises": { "Package": "promises", "Version": "1.2.0.1", @@ -996,6 +1033,29 @@ ], "Hash": "8f25ebe2ec38b1f2aef3b0d2ef76f6c4" }, + "readr": { + "Package": "readr", + "Version": "2.1.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "R6", + "cli", + "clipr", + "cpp11", + "crayon", + "hms", + "lifecycle", + "methods", + "rlang", + "tibble", + "tzdb", + "utils", + "vroom" + ], + "Hash": "b5047343b3825f37ad9d3b5d89aa1078" + }, "rematch2": { "Package": "rematch2", "Version": "2.1.2", @@ -1424,6 +1484,17 @@ ], "Hash": "c0f007e2eeed7722ce13d42b84a22e07" }, + "tzdb": { + "Package": "tzdb", + "Version": "0.3.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cpp11" + ], + "Hash": "b2e1cbce7c903eaf23ec05c58e59fb5e" + }, "urlchecker": { "Package": "urlchecker", "Version": "1.0.1", @@ -1493,6 +1564,32 @@ ], "Hash": "e4ffa94ceed5f124d429a5a5f0f5b378" }, + "vroom": { + "Package": "vroom", + "Version": "1.6.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "bit64", + "cli", + "cpp11", + "crayon", + "glue", + "hms", + "lifecycle", + "methods", + "progress", + "rlang", + "stats", + "tibble", + "tidyselect", + "tzdb", + "vctrs", + "withr" + ], + "Hash": "7015a74373b83ffaef64023f4a0f5033" + }, "waldo": { "Package": "waldo", "Version": "0.4.0", From 339039ea998724b85cfc452fe2d1d1deab54a729 Mon Sep 17 00:00:00 2001 From: ramiromagno Date: Mon, 1 Apr 2024 19:34:32 +0000 Subject: [PATCH 41/80] Automatic renv profile update. --- renv.lock | 97 +++++++++++++++++++++++++++++++++++++ renv/profiles/4.3/renv.lock | 97 +++++++++++++++++++++++++++++++++++++ 2 files changed, 194 insertions(+) diff --git a/renv.lock b/renv.lock index 4a72ab8a..ebc34e12 100644 --- a/renv.lock +++ b/renv.lock @@ -128,6 +128,30 @@ ], "Hash": "543776ae6848fde2f48ff3816d0628bc" }, + "bit": { + "Package": "bit", + "Version": "4.0.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "d242abec29412ce988848d0294b208fd" + }, + "bit64": { + "Package": "bit64", + "Version": "4.0.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "bit", + "methods", + "stats", + "utils" + ], + "Hash": "9fe98599ca456d6552421db0d6772d8f" + }, "brew": { "Package": "brew", "Version": "1.0-8", @@ -911,6 +935,19 @@ ], "Hash": "e9d21e79848e02e524bea6f5bd53e7e4" }, + "progress": { + "Package": "progress", + "Version": "1.2.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R6", + "crayon", + "hms", + "prettyunits" + ], + "Hash": "14dc9f7a3c91ebb14ec5bb9208a07061" + }, "promises": { "Package": "promises", "Version": "1.2.0.1", @@ -995,6 +1032,29 @@ ], "Hash": "8f25ebe2ec38b1f2aef3b0d2ef76f6c4" }, + "readr": { + "Package": "readr", + "Version": "2.1.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "R6", + "cli", + "clipr", + "cpp11", + "crayon", + "hms", + "lifecycle", + "methods", + "rlang", + "tibble", + "tzdb", + "utils", + "vroom" + ], + "Hash": "b5047343b3825f37ad9d3b5d89aa1078" + }, "rematch2": { "Package": "rematch2", "Version": "2.1.2", @@ -1424,6 +1484,17 @@ ], "Hash": "e4e357f28c2edff493936b6cb30c3d65" }, + "tzdb": { + "Package": "tzdb", + "Version": "0.3.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cpp11" + ], + "Hash": "b2e1cbce7c903eaf23ec05c58e59fb5e" + }, "urlchecker": { "Package": "urlchecker", "Version": "1.0.1", @@ -1493,6 +1564,32 @@ ], "Hash": "a745bda7aff4734c17294bb41d4e4607" }, + "vroom": { + "Package": "vroom", + "Version": "1.6.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "bit64", + "cli", + "cpp11", + "crayon", + "glue", + "hms", + "lifecycle", + "methods", + "progress", + "rlang", + "stats", + "tibble", + "tidyselect", + "tzdb", + "vctrs", + "withr" + ], + "Hash": "7015a74373b83ffaef64023f4a0f5033" + }, "waldo": { "Package": "waldo", "Version": "0.4.0", diff --git a/renv/profiles/4.3/renv.lock b/renv/profiles/4.3/renv.lock index 4a72ab8a..ebc34e12 100644 --- a/renv/profiles/4.3/renv.lock +++ b/renv/profiles/4.3/renv.lock @@ -128,6 +128,30 @@ ], "Hash": "543776ae6848fde2f48ff3816d0628bc" }, + "bit": { + "Package": "bit", + "Version": "4.0.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "d242abec29412ce988848d0294b208fd" + }, + "bit64": { + "Package": "bit64", + "Version": "4.0.5", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "bit", + "methods", + "stats", + "utils" + ], + "Hash": "9fe98599ca456d6552421db0d6772d8f" + }, "brew": { "Package": "brew", "Version": "1.0-8", @@ -911,6 +935,19 @@ ], "Hash": "e9d21e79848e02e524bea6f5bd53e7e4" }, + "progress": { + "Package": "progress", + "Version": "1.2.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R6", + "crayon", + "hms", + "prettyunits" + ], + "Hash": "14dc9f7a3c91ebb14ec5bb9208a07061" + }, "promises": { "Package": "promises", "Version": "1.2.0.1", @@ -995,6 +1032,29 @@ ], "Hash": "8f25ebe2ec38b1f2aef3b0d2ef76f6c4" }, + "readr": { + "Package": "readr", + "Version": "2.1.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "R6", + "cli", + "clipr", + "cpp11", + "crayon", + "hms", + "lifecycle", + "methods", + "rlang", + "tibble", + "tzdb", + "utils", + "vroom" + ], + "Hash": "b5047343b3825f37ad9d3b5d89aa1078" + }, "rematch2": { "Package": "rematch2", "Version": "2.1.2", @@ -1424,6 +1484,17 @@ ], "Hash": "e4e357f28c2edff493936b6cb30c3d65" }, + "tzdb": { + "Package": "tzdb", + "Version": "0.3.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cpp11" + ], + "Hash": "b2e1cbce7c903eaf23ec05c58e59fb5e" + }, "urlchecker": { "Package": "urlchecker", "Version": "1.0.1", @@ -1493,6 +1564,32 @@ ], "Hash": "a745bda7aff4734c17294bb41d4e4607" }, + "vroom": { + "Package": "vroom", + "Version": "1.6.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "bit64", + "cli", + "cpp11", + "crayon", + "glue", + "hms", + "lifecycle", + "methods", + "progress", + "rlang", + "stats", + "tibble", + "tidyselect", + "tzdb", + "vctrs", + "withr" + ], + "Hash": "7015a74373b83ffaef64023f4a0f5033" + }, "waldo": { "Package": "waldo", "Version": "0.4.0", From ab9db1439ff6e03ecf29ccec11853a128f615472 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 2 Apr 2024 00:28:17 +0100 Subject: [PATCH 42/80] Add units tests for `recode()` --- tests/testthat/test-recode.R | 117 +++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 tests/testthat/test-recode.R diff --git a/tests/testthat/test-recode.R b/tests/testthat/test-recode.R new file mode 100644 index 00000000..b469af3d --- /dev/null +++ b/tests/testthat/test-recode.R @@ -0,0 +1,117 @@ +test_that("recode() works as intended on typical input", { + + x <- c("a", NA_character_, "α") + # Map letters from lowercase to uppercase. NA is left as NA. Unmatched + # values are returned as inputted. + expect_identical(recode(x = x, from = letters, to = LETTERS), + c("A", NA_character_, "α")) + + # The same as before but map now to integer values. Note though that the + # type of the returned vector is nonetheless character because "α" is not + # matched and will be preserved in the output, forcing coercion to character. + expect_identical(recode( + x = x, + from = letters, + to = seq_along(LETTERS) + ), + c("1", NA_character_, "α")) + + # Now that `.no_match` is of integer type, like the vector supplied in `to`, + # the returned vector is also integer + expect_identical(recode( + x = x, + from = letters, + to = seq_along(LETTERS), + .no_match = 0L + ), + c(1L, NA_integer_, 0L)) + +}) + +test_that("recode() handling of NAs in input", { + + x <- c("aye", "nay", "maybe", NA_character_) + from <- c("aye", "nay") + to <- c("yes", "no") + + expect_identical(recode(x = x, from = from, to = to), + c("yes", "no", "maybe", NA_character_)) + expect_identical(recode( + x = x, + from = from, + to = to, + .na = "uh?" + ), + c("yes", "no", "maybe", "uh?")) + + # The type of the vector in the output is always the most compatible across + # the types of `to`, `.no_match` and `.na`. + expect_identical(recode( + x = x, + from = from, + to = to, + .na = NA + ), + c("yes", "no", "maybe", NA_character_)) + expect_identical( + recode( + x = x, + from = from, + to = to, + .na = NA_integer_ + ), + c("yes", "no", "maybe", NA_character_) + ) + expect_identical( + recode( + x = x, + from = from, + to = to, + .na = NA_character_ + ), + c("yes", "no", "maybe", NA_character_) + ) +}) + +test_that("recode(): recycling between `from` and `to` parameters", { + + x <- c("aye", "nay", "maybe", NA_character_) + from <- c("aye", "nay") + to <- "?" + + # Mapping one to many values + expect_identical(recode(x = x, from = from, to = to), + c("?", "?", "maybe", NA_character_)) + + # Enforce every value to become the hardcoded value specified in `to`. + expect_identical( + recode( + x = x, + from = from, + to = to, + .no_match = to, + .na = to + ), + c("?", "?", "?", "?") + ) + +}) + +test_that("recode(): notable cases", { + + x <- c(letters[1:3], NA_character_) + + # Identity: no recoding. + expect_identical(recode(x = x), x) + + # Hardcode all values, leave NA at peace + expect_identical(recode(x = x, to = "X"), c(rep("X", 3L), NA_character_)) + + # Or, really hardcode every single value + expect_identical(recode( + x = x, + to = "X", + .no_match = "X", + .na = "X" + ), rep("X", 4L)) +}) From 52c52fad804441855891258c458655f34b681805 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 2 Apr 2024 00:30:34 +0100 Subject: [PATCH 43/80] Remove `are_to_recode()` function Ended up not using this function. --- R/recode.R | 24 ------------------------ man/are_to_recode.Rd | 32 -------------------------------- 2 files changed, 56 deletions(-) delete mode 100644 man/are_to_recode.Rd diff --git a/R/recode.R b/R/recode.R index 2226bd11..424096b2 100644 --- a/R/recode.R +++ b/R/recode.R @@ -19,30 +19,6 @@ index_for_recode <- function(x, from) { match(x, from) } -#' Are values to be recoded? -#' -#' `are_to_recode` is a helper function designed to determine if any values -#' in a vector `x` match the specified `from` values, indicating they are -#' candidates for recoding. -#' -#' @param x A vector of values that will be checked against the `from` vector. -#' @param from A vector of values that `x` will be checked for matches against. -#' @return A logical vector of the same length as `x`, where `TRUE` indicates -#' that the corresponding value in `x` matches a value in `from` and -#' should be recoded, and `FALSE` otherwise. If `x` is empty, returns -#' an empty logical vector. This function is intended for internal use -#' and optimization in data transformation processes. -#' @keywords internal -#' @examples -#' sdtm.oak:::are_to_recode(x = 1:5, from = c(2, 4)) -#' -#' sdtm.oak:::are_to_recode(letters[1:3], from = c("a", "c")) -#' -#' @keywords internal -are_to_recode <- function(x, from) { - !is.na(index_for_recode(x, from)) -} - #' Recode values #' #' [recode()] recodes values in `x` by matching elements in `from` onto values diff --git a/man/are_to_recode.Rd b/man/are_to_recode.Rd deleted file mode 100644 index bbc5750e..00000000 --- a/man/are_to_recode.Rd +++ /dev/null @@ -1,32 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/recode.R -\name{are_to_recode} -\alias{are_to_recode} -\title{Are values to be recoded?} -\usage{ -are_to_recode(x, from) -} -\arguments{ -\item{x}{A vector of values that will be checked against the \code{from} vector.} - -\item{from}{A vector of values that \code{x} will be checked for matches against.} -} -\value{ -A logical vector of the same length as \code{x}, where \code{TRUE} indicates -that the corresponding value in \code{x} matches a value in \code{from} and -should be recoded, and \code{FALSE} otherwise. If \code{x} is empty, returns -an empty logical vector. This function is intended for internal use -and optimization in data transformation processes. -} -\description{ -\code{are_to_recode} is a helper function designed to determine if any values -in a vector \code{x} match the specified \code{from} values, indicating they are -candidates for recoding. -} -\examples{ -sdtm.oak:::are_to_recode(x = 1:5, from = c(2, 4)) - -sdtm.oak:::are_to_recode(letters[1:3], from = c("a", "c")) - -} -\keyword{internal} From 229c0bd58cac34795659abb392c15bc59835d1ff Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 2 Apr 2024 00:41:43 +0100 Subject: [PATCH 44/80] Add units tests for `assert_ct()` --- tests/testthat/test-ct.R | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/testthat/test-ct.R b/tests/testthat/test-ct.R index cdf98d80..8bab3de9 100644 --- a/tests/testthat/test-ct.R +++ b/tests/testthat/test-ct.R @@ -42,6 +42,40 @@ test_that("ct_vars() fails with invalid input choice", { expect_error(ct_vars(NULL)) }) +test_that("assert_ct() works as expected", { + + # Load an example controlled terminology spec. + ct <- read_ct_example("ct-01-cm") + cols <- colnames(ct) + cl_col <- ct_vars("cl") + to_col <- ct_vars("to") + + expect_no_error(assert_ct(ct, optional = FALSE)) + expect_no_error(assert_ct(ct, optional = TRUE)) + expect_identical(assert_ct(ct, optional = FALSE), ct) + expect_identical(assert_ct(ct, optional = TRUE), ct) + expect_identical(assert_ct(NULL, optional = TRUE), NULL) + + # Code-list code column is one of the key variables that must be present + # in `ct`, so being missing should trigger an error. + expect_error(assert_ct(ct[setdiff(cols, cl_col)], optional = FALSE)) + expect_error(assert_ct(ct[setdiff(cols, cl_col)], optional = TRUE)) + + # The code-list code and the "to" columns of a controlled terminology should + # not contain NAs, as otherwise the mapping is undefined. If that happens + # an error is triggered. + ct01 <- ct + ct01[[cl_col]] <- NA_character_ + expect_error(assert_ct(ct01, optional = FALSE)) + expect_error(assert_ct(ct01, optional = TRUE)) + + ct02 <- ct + ct02[[to_col]] <- NA_character_ + expect_error(assert_ct(ct01, optional = FALSE)) + expect_error(assert_ct(ct01, optional = TRUE)) + +}) + test_that("assert_cl() works as expected", { # Read in a controlled terminology example. ct <- read_ct_example("ct-01-cm") From c83bfdf0ce675b6c0dbc9ca5e8db50b60f448cbd Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 2 Apr 2024 00:45:01 +0100 Subject: [PATCH 45/80] Add one more test for `assert_ct()` --- tests/testthat/test-ct.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/testthat/test-ct.R b/tests/testthat/test-ct.R index 8bab3de9..b1ab89e1 100644 --- a/tests/testthat/test-ct.R +++ b/tests/testthat/test-ct.R @@ -74,6 +74,19 @@ test_that("assert_ct() works as expected", { expect_error(assert_ct(ct01, optional = FALSE)) expect_error(assert_ct(ct01, optional = TRUE)) + ct_empty <- + data.frame( + codelist_code = character(), + collected_value = character(), + term_synonyms = character(), + term_value = character(), + stringsAsFactors = FALSE + ) + + # `ct` cannot be empty as that means that there are no mappings. + expect_error(assert_ct(ct_empty, optional = TRUE)) + expect_error(assert_ct(ct_empty, optional = FALSE)) + }) test_that("assert_cl() works as expected", { From a362578dcac3956a5c775f48d91dffef03a7ca32 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 2 Apr 2024 00:57:14 +0100 Subject: [PATCH 46/80] Add a basic unit test for `ct_mappings()` --- R/ct.R | 2 +- tests/testthat/test-ct.R | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/R/ct.R b/R/ct.R index 9ff14bbb..b7d4c66a 100644 --- a/R/ct.R +++ b/R/ct.R @@ -212,8 +212,8 @@ assert_cl <- function(ct, cl, optional = FALSE) { #' @importFrom rlang .data #' @keywords internal ct_mappings <- function(ct, from = ct_vars("from"), to = ct_vars("to")) { - assert_ct(ct) + assert_ct(ct) cols <- c(to, from) ct_mappings <- diff --git a/tests/testthat/test-ct.R b/tests/testthat/test-ct.R index b1ab89e1..27fdccad 100644 --- a/tests/testthat/test-ct.R +++ b/tests/testthat/test-ct.R @@ -255,3 +255,16 @@ test_that("assert_cl(): when ct is empty", { optional = TRUE )) }) + +test_that("ct_mappings(): works as expected", { + + ct <- read_ct_example("ct-01-cm") + ct_qd <- dplyr::filter(ct, term_code == "C25473") + + expect_identical( + ct_mappings(ct = ct_qd), + tibble::tibble( + from = c("QD", "QD (Every Day)", "/day", "Daily", "Per Day"), + to = rep("QD", 5L) + )) +}) From 934a15c139feb0470a5f2eb82b5c33a41d19f542 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 2 Apr 2024 01:02:00 +0100 Subject: [PATCH 47/80] Fill in some doc details of ct-related functions --- R/ct.R | 4 +++- man/ct_example.Rd | 4 ++++ man/read_ct_example.Rd | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/R/ct.R b/R/ct.R index b7d4c66a..d7930471 100644 --- a/R/ct.R +++ b/R/ct.R @@ -320,6 +320,8 @@ read_ct <- function(file = stop("`file` must be specified")) { #' @param example A string with either the basename, file name, or relative path #' to a controlled terminology file bundled with `{stdm.oak}`, see examples. #' +#' @returns The local path to an example file if `example` is supplied, or a +#' character vector of example file names. #' #' @examples #' # Get the local path to controlled terminology example file 01 @@ -373,7 +375,7 @@ ct_example <- function(example) { #' with `{stdm.oak}`, run `read_ct_example()` for available example files. #' #' @returns A [tibble][tibble::tibble-package] with a controlled terminology -#' specification data set. +#' specification data set, or a character vector of example file names. #' #' @examples #' # Leave the `example` parameter as missing for available example files. diff --git a/man/ct_example.Rd b/man/ct_example.Rd index f7e018a9..95a6c560 100644 --- a/man/ct_example.Rd +++ b/man/ct_example.Rd @@ -10,6 +10,10 @@ ct_example(example) \item{example}{A string with either the basename, file name, or relative path to a controlled terminology file bundled with \code{{stdm.oak}}, see examples.} } +\value{ +The local path to an example file if \code{example} is supplied, or a +character vector of example file names. +} \description{ \code{\link[=ct_example]{ct_example()}} resolves the local path to an example controlled terminology file. diff --git a/man/read_ct_example.Rd b/man/read_ct_example.Rd index f365182b..8db2911e 100644 --- a/man/read_ct_example.Rd +++ b/man/read_ct_example.Rd @@ -12,7 +12,7 @@ with \code{{stdm.oak}}, run \code{read_ct_example()} for available example files } \value{ A \link[tibble:tibble-package]{tibble} with a controlled terminology -specification data set. +specification data set, or a character vector of example file names. } \description{ \code{\link[=read_ct_example]{read_ct_example()}} imports one of the bundled controlled terminology From 0dcf0fc7fc5227db22a47f1c6d9bf2164a317e47 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 2 Apr 2024 01:15:31 +0100 Subject: [PATCH 48/80] Remove leftover doc text in `assign` --- R/assign.R | 4 ---- man/assign.Rd | 3 --- 2 files changed, 7 deletions(-) diff --git a/R/assign.R b/R/assign.R index 396dd46d..aa30a696 100644 --- a/R/assign.R +++ b/R/assign.R @@ -87,10 +87,6 @@ sdtm_assign <- function(raw_dat, #' - [assign_ct()] maps a variable in a raw dataset to a target SDTM variable #' following controlled terminology recoding. #' -#' - [sdtm_assign()] is an internal function packing the same functionality as -#' [assign_no_ct()] and [assign_ct()] together but aimed at developers only. -#' As a user please use either [assign_no_ct()] or [assign_ct()]. -#' #' @param raw_dat The raw dataset (dataframe); must include the #' variables passed in `id_vars` and `raw_var`. #' @param raw_var The raw variable: a single string indicating the name of the diff --git a/man/assign.Rd b/man/assign.Rd index 02a8ffcb..f11234f1 100644 --- a/man/assign.Rd +++ b/man/assign.Rd @@ -65,9 +65,6 @@ derived variable, as indicated in \code{tgt_var}. variable that has no terminology restrictions. \item \code{\link[=assign_ct]{assign_ct()}} maps a variable in a raw dataset to a target SDTM variable following controlled terminology recoding. -\item \code{\link[=sdtm_assign]{sdtm_assign()}} is an internal function packing the same functionality as -\code{\link[=assign_no_ct]{assign_no_ct()}} and \code{\link[=assign_ct]{assign_ct()}} together but aimed at developers only. -As a user please use either \code{\link[=assign_no_ct]{assign_no_ct()}} or \code{\link[=assign_ct]{assign_ct()}}. } } \examples{ From a44c8651aadb1eea3c7033d187eb3ddc34ddbf10 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 2 Apr 2024 01:24:48 +0100 Subject: [PATCH 49/80] Update website's reference --- _pkgdown.yml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index d0716d79..e4ae3b89 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -6,3 +6,29 @@ template: navbar: title: "sdtm.oak" + +reference: +- title: SDTM derivation + desc: Set of functions to perform SDTM derivations + contents: + - assign + - harcode + - derive_study_day + +- title: Controlled terminology + contents: + - read_ct + - read_ct_example + - ct_example + - ct_map + +- title: Date and time conversion + contents: + - create_iso8601 + - fmt_cmp + - dtc_formats + - problems + +- title: Package global state + contents: + - clear_cache From efb423f802941378c3ad61108b7bc989875f2b85 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 2 Apr 2024 01:27:43 +0100 Subject: [PATCH 50/80] Styling update --- R/ct.R | 1 - tests/testthat/test-ct.R | 6 +-- tests/testthat/test-recode.R | 86 ++++++++++++++++++++---------------- 3 files changed, 49 insertions(+), 44 deletions(-) diff --git a/R/ct.R b/R/ct.R index d7930471..1bb66a02 100644 --- a/R/ct.R +++ b/R/ct.R @@ -212,7 +212,6 @@ assert_cl <- function(ct, cl, optional = FALSE) { #' @importFrom rlang .data #' @keywords internal ct_mappings <- function(ct, from = ct_vars("from"), to = ct_vars("to")) { - assert_ct(ct) cols <- c(to, from) diff --git a/tests/testthat/test-ct.R b/tests/testthat/test-ct.R index 27fdccad..8b3990f7 100644 --- a/tests/testthat/test-ct.R +++ b/tests/testthat/test-ct.R @@ -43,7 +43,6 @@ test_that("ct_vars() fails with invalid input choice", { }) test_that("assert_ct() works as expected", { - # Load an example controlled terminology spec. ct <- read_ct_example("ct-01-cm") cols <- colnames(ct) @@ -86,7 +85,6 @@ test_that("assert_ct() works as expected", { # `ct` cannot be empty as that means that there are no mappings. expect_error(assert_ct(ct_empty, optional = TRUE)) expect_error(assert_ct(ct_empty, optional = FALSE)) - }) test_that("assert_cl() works as expected", { @@ -257,7 +255,6 @@ test_that("assert_cl(): when ct is empty", { }) test_that("ct_mappings(): works as expected", { - ct <- read_ct_example("ct-01-cm") ct_qd <- dplyr::filter(ct, term_code == "C25473") @@ -266,5 +263,6 @@ test_that("ct_mappings(): works as expected", { tibble::tibble( from = c("QD", "QD (Every Day)", "/day", "Daily", "Per Day"), to = rep("QD", 5L) - )) + ) + ) }) diff --git a/tests/testthat/test-recode.R b/tests/testthat/test-recode.R index b469af3d..d6f11b80 100644 --- a/tests/testthat/test-recode.R +++ b/tests/testthat/test-recode.R @@ -1,58 +1,67 @@ test_that("recode() works as intended on typical input", { - x <- c("a", NA_character_, "α") # Map letters from lowercase to uppercase. NA is left as NA. Unmatched # values are returned as inputted. - expect_identical(recode(x = x, from = letters, to = LETTERS), - c("A", NA_character_, "α")) + expect_identical( + recode(x = x, from = letters, to = LETTERS), + c("A", NA_character_, "α") + ) # The same as before but map now to integer values. Note though that the # type of the returned vector is nonetheless character because "α" is not # matched and will be preserved in the output, forcing coercion to character. - expect_identical(recode( - x = x, - from = letters, - to = seq_along(LETTERS) - ), - c("1", NA_character_, "α")) + expect_identical( + recode( + x = x, + from = letters, + to = seq_along(LETTERS) + ), + c("1", NA_character_, "α") + ) # Now that `.no_match` is of integer type, like the vector supplied in `to`, # the returned vector is also integer - expect_identical(recode( - x = x, - from = letters, - to = seq_along(LETTERS), - .no_match = 0L - ), - c(1L, NA_integer_, 0L)) - + expect_identical( + recode( + x = x, + from = letters, + to = seq_along(LETTERS), + .no_match = 0L + ), + c(1L, NA_integer_, 0L) + ) }) test_that("recode() handling of NAs in input", { - x <- c("aye", "nay", "maybe", NA_character_) from <- c("aye", "nay") to <- c("yes", "no") - expect_identical(recode(x = x, from = from, to = to), - c("yes", "no", "maybe", NA_character_)) - expect_identical(recode( - x = x, - from = from, - to = to, - .na = "uh?" - ), - c("yes", "no", "maybe", "uh?")) + expect_identical( + recode(x = x, from = from, to = to), + c("yes", "no", "maybe", NA_character_) + ) + expect_identical( + recode( + x = x, + from = from, + to = to, + .na = "uh?" + ), + c("yes", "no", "maybe", "uh?") + ) # The type of the vector in the output is always the most compatible across # the types of `to`, `.no_match` and `.na`. - expect_identical(recode( - x = x, - from = from, - to = to, - .na = NA - ), - c("yes", "no", "maybe", NA_character_)) + expect_identical( + recode( + x = x, + from = from, + to = to, + .na = NA + ), + c("yes", "no", "maybe", NA_character_) + ) expect_identical( recode( x = x, @@ -74,14 +83,15 @@ test_that("recode() handling of NAs in input", { }) test_that("recode(): recycling between `from` and `to` parameters", { - x <- c("aye", "nay", "maybe", NA_character_) from <- c("aye", "nay") to <- "?" # Mapping one to many values - expect_identical(recode(x = x, from = from, to = to), - c("?", "?", "maybe", NA_character_)) + expect_identical( + recode(x = x, from = from, to = to), + c("?", "?", "maybe", NA_character_) + ) # Enforce every value to become the hardcoded value specified in `to`. expect_identical( @@ -94,11 +104,9 @@ test_that("recode(): recycling between `from` and `to` parameters", { ), c("?", "?", "?", "?") ) - }) test_that("recode(): notable cases", { - x <- c(letters[1:3], NA_character_) # Identity: no recoding. From 365fa099a79442a4c7529a7490bf129bc36a6ae2 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 2 Apr 2024 01:32:54 +0100 Subject: [PATCH 51/80] Bump version and update NEWS --- DESCRIPTION | 2 +- NEWS.md | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d9717bc4..8abff617 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sdtm.oak Type: Package Title: SDTM Data Transformation Engine -Version: 0.0.0.9001 +Version: 0.0.0.9002 Authors@R: c( person("Rammprasad", "Ganapathy", role = c("aut", "cre"), email = "ganapathy.rammprasad@gene.com"), diff --git a/NEWS.md b/NEWS.md index 77776156..b5d3c2f5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,15 @@ +# sdtm.oak 0.0.0.9002 (development version) + +## New Features + +* New function: `derive_study_day()` for study day calculation. + +* New functions for basic SDTM derivations: ` assign_no_ct()`, `assign_ct()`, +`hardcode_no_ct()` and `hardcode_ct()`. + +* New functions for handling controlled terminologies: `read_ct()`, +`read_ct_example()`, `ct_example()` and `ct_map()`. + # sdtm.oak 0.0.0.9001 (development version) ## New Features From b2676109f39c1f3758208fc5deb7f53c86b78452 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 2 Apr 2024 01:35:20 +0100 Subject: [PATCH 52/80] Fix a few lintr issues --- tests/testthat/test-ct.R | 2 +- tests/testthat/test-recode.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-ct.R b/tests/testthat/test-ct.R index 8b3990f7..8d1ddb85 100644 --- a/tests/testthat/test-ct.R +++ b/tests/testthat/test-ct.R @@ -53,7 +53,7 @@ test_that("assert_ct() works as expected", { expect_no_error(assert_ct(ct, optional = TRUE)) expect_identical(assert_ct(ct, optional = FALSE), ct) expect_identical(assert_ct(ct, optional = TRUE), ct) - expect_identical(assert_ct(NULL, optional = TRUE), NULL) + expect_null(assert_ct(NULL, optional = TRUE)) # Code-list code column is one of the key variables that must be present # in `ct`, so being missing should trigger an error. diff --git a/tests/testthat/test-recode.R b/tests/testthat/test-recode.R index d6f11b80..87562015 100644 --- a/tests/testthat/test-recode.R +++ b/tests/testthat/test-recode.R @@ -107,7 +107,7 @@ test_that("recode(): recycling between `from` and `to` parameters", { }) test_that("recode(): notable cases", { - x <- c(letters[1:3], NA_character_) + x <- c(letters[1L:3L], NA_character_) # Identity: no recoding. expect_identical(recode(x = x), x) From 9cb23f53b4d6ed332b7978e1d4c846c9740f58e0 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 2 Apr 2024 02:07:39 +0100 Subject: [PATCH 53/80] Add examples to `ct_map()` doc --- R/ct.R | 23 +++++++++++++++++++++++ man/ct_map.Rd | 24 ++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/R/ct.R b/R/ct.R index 1bb66a02..35896660 100644 --- a/R/ct.R +++ b/R/ct.R @@ -258,6 +258,29 @@ ct_mappings <- function(ct, from = ct_vars("from"), to = ct_vars("to")) { #' `x` values are returned in uppercase. If `ct` is not provided `x` is #' returned unchanged. #' +#' @examples +#' # A few example terms. +#' terms <- +#' c("/day", +#' "Yes", +#' "Unknown", +#' "Prior", +#' "Every 2 hours", +#' "Percentage", +#' "International Unit") +#' +#' # Load a controlled terminology example +#' (ct <- read_ct_example("ct-01-cm")) +#' +#' # Use all possible matching terms in the controlled terminology. +#' ct_map(x = terms, ct = ct) +#' +#' # Note that if the controlled terminology mapping is restricted to a code-list +#' # code, e.g. C71113, then only `"/day"` gets mapped to `"QD"`; remaining terms +#' # won't match given the code-list code restriction, and will be mapped to an +#' # uppercase version of the original terms. +#' ct_map(x = terms, ct = ct, cl = "C71113") +#' #' @importFrom rlang %||% .data #' @export ct_map <- diff --git a/man/ct_map.Rd b/man/ct_map.Rd index 07581ee8..4d21cec9 100644 --- a/man/ct_map.Rd +++ b/man/ct_map.Rd @@ -32,3 +32,27 @@ returned unchanged. \description{ \code{\link[=ct_map]{ct_map()}} recodes a vector following a controlled terminology. } +\examples{ +# A few example terms. +terms <- + c("/day", + "Yes", + "Unknown", + "Prior", + "Every 2 hours", + "Percentage", + "International Unit") + +# Load a controlled terminology example +(ct <- read_ct_example("ct-01-cm")) + +# Use all possible matching terms in the controlled terminology. +ct_map(x = terms, ct = ct) + +# Note that if the controlled terminology mapping is restricted to a code-list +# code, e.g. C71113, then only `"/day"` gets mapped to `"QD"`; remaining terms +# won't match given the code-list code restriction, and will be mapped to an +# uppercase version of the original terms. +ct_map(x = terms, ct = ct, cl = "C71113") + +} From 1bebdd84cf4ca6542ca5132e5656d7bca489e295 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 2 Apr 2024 02:10:59 +0100 Subject: [PATCH 54/80] Fix typo in `problems()` doc --- R/dtc_problems.R | 2 +- man/problems.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/dtc_problems.R b/R/dtc_problems.R index c6ab3494..f115211d 100644 --- a/R/dtc_problems.R +++ b/R/dtc_problems.R @@ -159,7 +159,7 @@ any_problems <- function(cap_matrices, .cutoff_2000 = 68L) { #' "20231225" #' ) #' -#' #' # By inspecting the problematic dates it can be understood that +#' # By inspecting the problematic dates it can be understood that #' # the `.format` parameter needs to updated to include other variations. #' iso8601_dttm <- create_iso8601(dates, .format = "y-m-d") #' problems(iso8601_dttm) diff --git a/man/problems.Rd b/man/problems.Rd index ce68ad46..5d3833f6 100644 --- a/man/problems.Rd +++ b/man/problems.Rd @@ -42,7 +42,7 @@ dates <- "20231225" ) -#' # By inspecting the problematic dates it can be understood that +# By inspecting the problematic dates it can be understood that # the `.format` parameter needs to updated to include other variations. iso8601_dttm <- create_iso8601(dates, .format = "y-m-d") problems(iso8601_dttm) From a8f1bf55fb461048eff5cbcf3b2da13068374fe8 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 2 Apr 2024 02:16:08 +0100 Subject: [PATCH 55/80] Fix typo --- R/dtc_problems.R | 2 +- man/problems.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/dtc_problems.R b/R/dtc_problems.R index f115211d..9c23544e 100644 --- a/R/dtc_problems.R +++ b/R/dtc_problems.R @@ -133,7 +133,7 @@ any_problems <- function(cap_matrices, .cutoff_2000 = 68L) { #' provides easy access to these parsing problems. #' #' @param x An object of class iso8601, as typically obtained from a call to -#' [create_iso8601()]. The argument can also be left empty, in that case it +#' [create_iso8601()]. The argument can also be left empty, in that case #' `problems()` will use the last returned value, making it convenient to use #' immediately after [create_iso8601()]. #' diff --git a/man/problems.Rd b/man/problems.Rd index 5d3833f6..8a5c23fe 100644 --- a/man/problems.Rd +++ b/man/problems.Rd @@ -8,7 +8,7 @@ problems(x = .Last.value) } \arguments{ \item{x}{An object of class iso8601, as typically obtained from a call to -\code{\link[=create_iso8601]{create_iso8601()}}. The argument can also be left empty, in that case it +\code{\link[=create_iso8601]{create_iso8601()}}. The argument can also be left empty, in that case \code{problems()} will use the last returned value, making it convenient to use immediately after \code{\link[=create_iso8601]{create_iso8601()}}.} } From 92e490cd4365382ef410a7a29f688147fd7f378c Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 2 Apr 2024 15:17:11 +0100 Subject: [PATCH 56/80] Initial mockup of `assign_datetime()` --- NAMESPACE | 1 + R/assign_datetime.R | 100 +++++++++++++++++++++++++++++++++++++++++ man/assign_datetime.Rd | 96 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 197 insertions(+) create mode 100644 R/assign_datetime.R create mode 100644 man/assign_datetime.Rd diff --git a/NAMESPACE b/NAMESPACE index d7e8e203..458597cc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ S3method(print,iso8601) export(assign_ct) +export(assign_datetime) export(assign_no_ct) export(clear_cache) export(create_iso8601) diff --git a/R/assign_datetime.R b/R/assign_datetime.R new file mode 100644 index 00000000..2a9f172f --- /dev/null +++ b/R/assign_datetime.R @@ -0,0 +1,100 @@ +#' Derive an ISO8601 date-time variable +#' +#' [assign_datetime()] maps one or more variables with date/time components in a +#' raw dataset to a target SDTM variable following the ISO8601 format. +#' +#' @param raw_dat The raw dataset (dataframe); must include the +#' variables passed in `id_vars` and `raw_var`. +#' @param raw_var The raw variable(s): a character vector indicating the name(s) +#' of the raw variable(s) in `raw_dat` with date or time components to be +#' parsed into a ISO8601 format variable in `tgt_var`. +#' @param raw_fmt A date/time parsing format. Either a character vector or a +#' list of character vectors. If a character vector is passed then each +#' element is taken as parsing format for each variable indicated in +#' `raw_var`. If a list is provided, then each element must be a character +#' vector of formats. The first vector of formats is used for parsing the +#' first variable in `raw_var`, and so on. +#' @param tgt_var The target SDTM variable: a single string indicating the name +#' of variable to be derived. +#' @param raw_unk A character vector of string literals to be regarded as +#' missing values during parsing. +#' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by +#' the variables indicated in `id_vars`. This parameter is optional, see +#' section Value for how the output changes depending on this argument value. +#' @param id_vars Key variables to be used in the join between the raw dataset +#' (`raw_dat`) and the target data set (`raw_dat`). +#' @param .warn Whether to warn about parsing failures. +#' +#' @returns The returned data set depends on the value of `tgt_dat`: +#' - If no target dataset is supplied, meaning that `tgt_dat` defaults to +#' `NULL`, then the returned data set is `raw_dat`, selected for the variables +#' indicated in `id_vars`, and a new extra column: the derived variable, as +#' indicated in `tgt_var`. +#' - If the target dataset is provided, then it is merged with the raw data set +#' `raw_dat` by the variables indicated in `id_vars`, with a new column: the +#' derived variable, as indicated in `tgt_var`. +#' +#' @examples +#' md1 <- +#' tibble::tribble( +#' ~oak_id, ~raw_source, ~patient_number, ~MDBDR, ~MDEDR, ~MDETM, +#' 1L, "MD1", 375, NA, NA, NA, +#' 2L, "MD1", 375, "15-Sep-20", NA, NA, +#' 3L, "MD1", 376, "17-Feb-21", "17-Feb-21", NA, +#' 4L, "MD1", 377, "4-Oct-20", NA, NA, +#' 5L, "MD1", 377, "20-Jan-20", "20-Jan-20", "10:00:00", +#' 6L, "MD1", 377, "UN-UNK-2019", "UN-UNK-2019", NA, +#' 7L, "MD1", 377, "20-UNK-2019", "20-UNK-2019", NA, +#' 8L, "MD1", 378, "UN-UNK-2020", "UN-UNK-2020", NA, +#' 9L, "MD1", 378, "26-Jan-20", "26-Jan-20", "07:00:00", +#' 10L, "MD1", 378, "28-Jan-20", "1-Feb-20", NA, +#' 11L, "MD1", 378, "12-Feb-20", "18-Feb-20", NA, +#' 12L, "MD1", 379, "10-UNK-2020", "20-UNK-2020", NA, +#' 13L, "MD1", 379, NA, NA, NA, +#' 14L, "MD1", 379, NA, "17-Feb-20", NA +#' ) +#' +#' cm <- +#' assign_datetime( +#' raw_dat = md1, +#' raw_var = "MDBDR", +#' raw_fmt = "d-m-y", +#' raw_unk = c("UN", "UNK"), +#' tgt_var = "CMSTDTC" +#' ) +#' +#' cm +#' problems(cm$CMSTDTC) +#' +#' @export +assign_datetime <- + function(raw_dat, + raw_var, + raw_fmt, + tgt_var, + raw_unk = c("UN", "UNK"), + tgt_dat = NULL, + id_vars = oak_id_vars(), + .warn = TRUE) { + + tgt_val <- + create_iso8601(!!!raw_dat[raw_var], .format = raw_fmt, .na = raw_unk) + + der_dat <- + raw_dat |> + dplyr::select(c(id_vars, raw_var)) |> + dplyr::mutate("{tgt_var}" := tgt_val) |> + dplyr::select(-rlang::sym(raw_var)) + + der_dat <- + if (!is.null(tgt_dat)) { + der_dat |> + dplyr::right_join(y = tgt_dat, by = id_vars) |> + dplyr::relocate(tgt_var, .after = dplyr::last_col()) + } else { + der_dat + } + + der_dat + + } diff --git a/man/assign_datetime.Rd b/man/assign_datetime.Rd new file mode 100644 index 00000000..b26cf7fd --- /dev/null +++ b/man/assign_datetime.Rd @@ -0,0 +1,96 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/assign_datetime.R +\name{assign_datetime} +\alias{assign_datetime} +\title{Derive an ISO8601 date-time variable} +\usage{ +assign_datetime( + raw_dat, + raw_var, + raw_fmt, + tgt_var, + raw_unk = c("UN", "UNK"), + tgt_dat = NULL, + id_vars = oak_id_vars(), + .warn = TRUE +) +} +\arguments{ +\item{raw_dat}{The raw dataset (dataframe); must include the +variables passed in \code{id_vars} and \code{raw_var}.} + +\item{raw_var}{The raw variable(s): a character vector indicating the name(s) +of the raw variable(s) in \code{raw_dat} with date or time components to be +parsed into a ISO8601 format variable in \code{tgt_var}.} + +\item{raw_fmt}{A date/time parsing format. Either a character vector or a +list of character vectors. If a character vector is passed then each +element is taken as parsing format for each variable indicated in +\code{raw_var}. If a list is provided, then each element must be a character +vector of formats. The first vector of formats is used for parsing the +first variable in \code{raw_var}, and so on.} + +\item{tgt_var}{The target SDTM variable: a single string indicating the name +of variable to be derived.} + +\item{raw_unk}{A character vector of string literals to be regarded as +missing values during parsing.} + +\item{tgt_dat}{Target dataset: a data frame to be merged against \code{raw_dat} by +the variables indicated in \code{id_vars}. This parameter is optional, see +section Value for how the output changes depending on this argument value.} + +\item{id_vars}{Key variables to be used in the join between the raw dataset +(\code{raw_dat}) and the target data set (\code{raw_dat}).} + +\item{.warn}{Whether to warn about parsing failures.} +} +\value{ +The returned data set depends on the value of \code{tgt_dat}: +\itemize{ +\item If no target dataset is supplied, meaning that \code{tgt_dat} defaults to +\code{NULL}, then the returned data set is \code{raw_dat}, selected for the variables +indicated in \code{id_vars}, and a new extra column: the derived variable, as +indicated in \code{tgt_var}. +\item If the target dataset is provided, then it is merged with the raw data set +\code{raw_dat} by the variables indicated in \code{id_vars}, with a new column: the +derived variable, as indicated in \code{tgt_var}. +} +} +\description{ +\code{\link[=assign_datetime]{assign_datetime()}} maps one or more variables with date/time components in a +raw dataset to a target SDTM variable following the ISO8601 format. +} +\examples{ +md1 <- + tibble::tribble( + ~oak_id, ~raw_source, ~patient_number, ~MDBDR, ~MDEDR, ~MDETM, + 1L, "MD1", 375, NA, NA, NA, + 2L, "MD1", 375, "15-Sep-20", NA, NA, + 3L, "MD1", 376, "17-Feb-21", "17-Feb-21", NA, + 4L, "MD1", 377, "4-Oct-20", NA, NA, + 5L, "MD1", 377, "20-Jan-20", "20-Jan-20", "10:00:00", + 6L, "MD1", 377, "UN-UNK-2019", "UN-UNK-2019", NA, + 7L, "MD1", 377, "20-UNK-2019", "20-UNK-2019", NA, + 8L, "MD1", 378, "UN-UNK-2020", "UN-UNK-2020", NA, + 9L, "MD1", 378, "26-Jan-20", "26-Jan-20", "07:00:00", + 10L, "MD1", 378, "28-Jan-20", "1-Feb-20", NA, + 11L, "MD1", 378, "12-Feb-20", "18-Feb-20", NA, + 12L, "MD1", 379, "10-UNK-2020", "20-UNK-2020", NA, + 13L, "MD1", 379, NA, NA, NA, + 14L, "MD1", 379, NA, "17-Feb-20", NA + ) + +cm <- + assign_datetime( + raw_dat = md1, + raw_var = "MDBDR", + raw_fmt = "d-m-y", + raw_unk = c("UN", "UNK"), + tgt_var = "CMSTDTC" + ) + +cm +problems(cm$CMSTDTC) + +} From d9031fdf90c1d45ba37033b0b17c8477bde3f95d Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 3 Apr 2024 16:53:50 +0100 Subject: [PATCH 57/80] Add `.warn` parameter to `create_iso8601()` internals --- R/assign_datetime.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/assign_datetime.R b/R/assign_datetime.R index 2a9f172f..2abc82d0 100644 --- a/R/assign_datetime.R +++ b/R/assign_datetime.R @@ -78,7 +78,10 @@ assign_datetime <- .warn = TRUE) { tgt_val <- - create_iso8601(!!!raw_dat[raw_var], .format = raw_fmt, .na = raw_unk) + create_iso8601(!!!raw_dat[raw_var], + .format = raw_fmt, + .na = raw_unk, + .warn = .warn) der_dat <- raw_dat |> From 5987684c3a68c87ba0c6fc8ca341dd728217a816 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 4 Apr 2024 00:15:24 +0100 Subject: [PATCH 58/80] Remove lint issues --- R/assign.R | 2 +- R/ct.R | 24 +++++++++++++++++------- R/hardcode.R | 2 +- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/R/assign.R b/R/assign.R index aa30a696..325eb7c5 100644 --- a/R/assign.R +++ b/R/assign.R @@ -61,7 +61,7 @@ sdtm_assign <- function(raw_dat, der_dat <- raw_dat |> dplyr::select(c(id_vars, raw_var)) |> - dplyr::mutate("{tgt_var}" := tgt_val) |> + dplyr::mutate("{tgt_var}" := tgt_val) |> # nolint object_name_linter() dplyr::select(-rlang::sym(raw_var)) # If a target dataset is supplied, then join the so far derived dataset with diff --git a/R/ct.R b/R/ct.R index 35896660..8bb5e21d 100644 --- a/R/ct.R +++ b/R/ct.R @@ -145,29 +145,39 @@ assert_ct <- function(ct, optional = FALSE) { #' #' @keywords internal assert_cl <- function(ct, cl, optional = FALSE) { - if (!is.null(cl)) { + + is_ct_missing <- is.null(ct) + is_cl_missing <- is.null(cl) + is_required_cl_missing <- is_cl_missing && !optional + is_cl_without_ct <- is_ct_missing && !is_cl_missing + are_ct_cl_available <- !is_ct_missing && !is_cl_missing + + if (!is_cl_missing) { admiraldev::assert_character_scalar(cl) } - if (is.null(cl) && !optional) { + if (is_required_cl_missing) { rlang::abort("`cl` is a required parameter.") } - if (is.null(ct) && !is.null(cl)) { + if (is_cl_without_ct) { rlang::abort("`ct` must be a valid controlled terminology if `cl` is supplied.") } - if (is.null(cl)) { + if (is_cl_missing) { return(invisible(NULL)) } - if (!is.null(ct) && is.na(cl)) { + if (!is_ct_missing && is.na(cl)) { rlang::abort("`cl` can't be NA. Did you mean `NULL`?") } - if (!is.null(ct) && !is.null(cl)) { + if (are_ct_cl_available) { assert_ct(ct, optional = FALSE) - cl_possibilities <- unique(ct[[ct_vars("cl")]]) + cl_possibilities <- + ct |> + dplyr::pull(ct_vars("cl")) |> + unique() admiraldev::assert_character_scalar(cl, values = cl_possibilities) } diff --git a/R/hardcode.R b/R/hardcode.R index 7f454421..a9a0dfd9 100644 --- a/R/hardcode.R +++ b/R/hardcode.R @@ -64,7 +64,7 @@ sdtm_hardcode <- function(raw_dat, der_dat <- raw_dat |> dplyr::select(c(id_vars, raw_var)) |> - dplyr::mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> + dplyr::mutate("{tgt_var}" := recode(x = !!rlang::sym(raw_var), to = tgt_val)) |> # nolint object_name_linter() dplyr::select(-rlang::sym(raw_var)) # If a target dataset is supplied, then join the so far derived dataset with From 2791ef045c63642f17f3752376b087e6b6134b20 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 4 Apr 2024 00:22:27 +0100 Subject: [PATCH 59/80] Replace `.data` usage in tidyselect expressions See https://github.com/tidyverse/tidyverse.org/pull/600 for more details. --- R/ct.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ct.R b/R/ct.R index 8bb5e21d..ce216a07 100644 --- a/R/ct.R +++ b/R/ct.R @@ -237,7 +237,7 @@ ct_mappings <- function(ct, from = ct_vars("from"), to = ct_vars("to")) { dplyr::mutate(type = factor(.data$type, levels = cols)) |> dplyr::arrange(.data$type) |> dplyr::select(-"type") |> - tidyr::drop_na(.data$from) |> + tidyr::drop_na("from") |> dplyr::mutate(from = str_split(.data$from)) |> tidyr::unnest(from) |> dplyr::filter(from != "") |> # In case the split resulted in empty strings. From 2a8dbf59ab816ac038f445bf58f4696eab4529a8 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 4 Apr 2024 01:24:28 +0100 Subject: [PATCH 60/80] Variable renaming - `ct` to `ct_spec` (ct specification) - `cl` to `ct_cltc` (codelist code) --- NAMESPACE | 8 +- R/assign.R | 38 +-- R/ct.R | 248 ++++++++-------- R/hardcode.R | 44 +-- man/assert_cl.Rd | 43 --- man/assert_ct.Rd | 45 --- man/assert_ct_cltc.Rd | 43 +++ man/assert_ct_spec.Rd | 45 +++ man/assign.Rd | 16 +- man/ct_map.Rd | 24 +- man/ct_mappings.Rd | 10 +- man/{ct_example.Rd => ct_spec_example.Rd} | 16 +- man/{ct_vars.Rd => ct_spec_vars.Rd} | 22 +- man/harcode.Rd | 18 +- man/{read_ct.Rd => read_ct_spec.Rd} | 12 +- ..._ct_example.Rd => read_ct_spec_example.Rd} | 18 +- man/sdtm_assign.Rd | 12 +- man/sdtm_hardcode.Rd | 12 +- tests/testthat/test-ct.R | 264 +++++++++--------- 19 files changed, 472 insertions(+), 466 deletions(-) delete mode 100644 man/assert_cl.Rd delete mode 100644 man/assert_ct.Rd create mode 100644 man/assert_ct_cltc.Rd create mode 100644 man/assert_ct_spec.Rd rename man/{ct_example.Rd => ct_spec_example.Rd} (71%) rename man/{ct_vars.Rd => ct_spec_vars.Rd} (68%) rename man/{read_ct.Rd => read_ct_spec.Rd} (74%) rename man/{read_ct_example.Rd => read_ct_spec_example.Rd} (58%) diff --git a/NAMESPACE b/NAMESPACE index d7e8e203..6170cee4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,16 +5,16 @@ export(assign_ct) export(assign_no_ct) export(clear_cache) export(create_iso8601) -export(ct_example) export(ct_map) -export(ct_vars) +export(ct_spec_example) +export(ct_spec_vars) export(derive_study_day) export(fmt_cmp) export(hardcode_ct) export(hardcode_no_ct) export(problems) -export(read_ct) -export(read_ct_example) +export(read_ct_spec) +export(read_ct_spec_example) importFrom(rlang,"%||%") importFrom(rlang,":=") importFrom(rlang,.data) diff --git a/R/assign.R b/R/assign.R index 325eb7c5..6ff07397 100644 --- a/R/assign.R +++ b/R/assign.R @@ -11,12 +11,12 @@ #' raw variable in `raw_dat`. #' @param tgt_var The target SDTM variable: a single string indicating the name #' of variable to be derived. -#' @param ct Study controlled terminology specification: a dataframe with a -#' minimal set of columns, see [ct_vars()] for details. This parameter is +#' @param ct_spec Study controlled terminology specification: a dataframe with a +#' minimal set of columns, see [ct_spec_vars()] for details. This parameter is #' optional, if left as `NULL` no controlled terminology recoding is applied. -#' @param cl A code-list code indicating which subset of the controlled +#' @param ct_cltc A code-list code indicating which subset of the controlled #' terminology to apply in the derivation. This parameter is optional, if left -#' as `NULL`, all possible recodings in `ct` are attempted. +#' as `NULL`, all possible recodings in `ct_spec` are attempted. #' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by #' the variables indicated in `id_vars`. This parameter is optional, see #' section Value for how the output changes depending on this argument value. @@ -38,8 +38,8 @@ sdtm_assign <- function(raw_dat, raw_var, tgt_var, - ct = NULL, - cl = NULL, + ct_spec = NULL, + ct_cltc = NULL, tgt_dat = NULL, id_vars = oak_id_vars()) { admiraldev::assert_character_scalar(raw_var) @@ -50,11 +50,11 @@ sdtm_assign <- function(raw_dat, ) admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) - assert_ct(ct, optional = TRUE) - assert_cl(ct = ct, cl = cl, optional = TRUE) + assert_ct_spec(ct_spec, optional = TRUE) + assert_ct_cltc(ct_spec = ct_spec, ct_cltc = ct_cltc, optional = TRUE) # Recode the raw variable following terminology. - tgt_val <- ct_map(raw_dat[[raw_var]], ct = ct, cl = cl) + tgt_val <- ct_map(raw_dat[[raw_var]], ct_spec = ct_spec, ct_cltc = ct_cltc) # Apply derivation by assigning `raw_var` to `tgt_var`. # `der_dat`: derived dataset. @@ -93,9 +93,9 @@ sdtm_assign <- function(raw_dat, #' raw variable in `raw_dat`. #' @param tgt_var The target SDTM variable: a single string indicating the name #' of variable to be derived. -#' @param ct Study controlled terminology specification: a dataframe with a -#' minimal set of columns, see [ct_vars()] for details. -#' @param cl A code-list code indicating which subset of the controlled +#' @param ct_spec Study controlled terminology specification: a dataframe with a +#' minimal set of columns, see [ct_spec_vars()] for details. +#' @param ct_cltc A code-list code indicating which subset of the controlled #' terminology to apply in the derivation. #' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by #' the variables indicated in `id_vars`. This parameter is optional, see @@ -172,14 +172,14 @@ sdtm_assign <- function(raw_dat, #' ) #' #' # Controlled terminology specification -#' (ct <- read_ct_example("ct-01-cm")) +#' (ct_spec <- read_ct_spec_example("ct-01-cm")) #' #' assign_ct( #' raw_dat = md1, #' raw_var = "MDIND", #' tgt_var = "CMINDC", -#' ct = ct, -#' cl = "C66729", +#' ct_spec = ct_spec, +#' ct_cltc = "C66729", #' tgt_dat = cm_inter #' ) #' @@ -218,8 +218,8 @@ assign_no_ct <- function(raw_dat, assign_ct <- function(raw_dat, raw_var, tgt_var, - ct, - cl, + ct_spec, + ct_cltc, tgt_dat = NULL, id_vars = oak_id_vars()) { admiraldev::assert_character_scalar(raw_var) @@ -237,7 +237,7 @@ assign_ct <- function(raw_dat, tgt_var = tgt_var, tgt_dat = tgt_dat, id_vars = id_vars, - ct = ct, - cl = cl + ct_spec = ct_spec, + ct_cltc = ct_cltc ) } diff --git a/R/ct.R b/R/ct.R index ce216a07..544e93cc 100644 --- a/R/ct.R +++ b/R/ct.R @@ -1,49 +1,49 @@ #' Controlled terminology variables #' #' @description -#' [ct_vars()] returns the mandatory variables to be present in a data set +#' [ct_spec_vars()] returns the mandatory variables to be present in a data set #' representing a controlled terminology. By default, it returns all required #' variables. #' #' If only the subset of variables used for matching terms are needed, then #' request this subset of variables by passing the argument value `"from"`. If #' only the mapping-to variable is to be requested, then simply pass `"to"`. If -#' only the code-list code variable name is needed then pass `"cl"`. +#' only the code-list code variable name is needed then pass `"ct_cltc"`. #' -#' @param set A scalar character (string), one of: `"all"` (default), `"cl"`, +#' @param set A scalar character (string), one of: `"all"` (default), `"ct_cltc"`, #' `"from"` or `"to"`. #' #' @examples #' # These two calls are equivalent and return all required variables in a #' # controlled terminology data set. -#' sdtm.oak:::ct_vars() -#' sdtm.oak:::ct_vars("all") +#' sdtm.oak:::ct_spec_vars() +#' sdtm.oak:::ct_spec_vars("all") #' #' # "Codelist code" variable name. -#' sdtm.oak:::ct_vars("cl") +#' sdtm.oak:::ct_spec_vars("ct_cltc") #' #' # "From" variables -#' sdtm.oak:::ct_vars("from") +#' sdtm.oak:::ct_spec_vars("from") #' #' # The "to" variable. -#' sdtm.oak:::ct_vars("to") +#' sdtm.oak:::ct_spec_vars("to") #' #' @keywords internal #' @export -ct_vars <- function(set = c("all", "cl", "from", "to")) { +ct_spec_vars <- function(set = c("all", "ct_cltc", "from", "to")) { admiraldev::assert_character_vector(set) set <- match.arg(set) - cl_var <- "codelist_code" + ct_cltc_var <- "codelist_code" from_vars <- c("collected_value", "term_synonyms") to_var <- "term_value" if (identical(set, "all")) { - return(c(cl_var, from_vars, to_var)) + return(c(ct_cltc_var, from_vars, to_var)) } - if (identical(set, "cl")) { - return(cl_var) + if (identical(set, "ct_cltc")) { + return(ct_cltc_var) } if (identical(set, "from")) { @@ -58,130 +58,130 @@ ct_vars <- function(set = c("all", "cl", "from", "to")) { #' Assert a controlled terminology specification #' #' @description -#' [assert_ct()] will check whether `ct` is a data frame and if it contains the -#' variables: `r knitr::combine_words(ct_vars())`. +#' [assert_ct_spec()] will check whether `ct_spec` is a data frame and if it contains the +#' variables: `r knitr::combine_words(ct_spec_vars())`. #' #' In addition, it will also check if the data frame is not empty (no rows), and -#' whether the columns \code{`r ct_vars('cl')`} and \code{`r ct_vars('to')`} do +#' whether the columns \code{`r ct_spec_vars('ct_cltc')`} and \code{`r ct_spec_vars('to')`} do #' not contain any `NA` values. #' -#' @param ct A data frame to be asserted as a valid controlled terminology data +#' @param ct_spec A data frame to be asserted as a valid controlled terminology data #' set. #' -#' @returns The function throws an error if `ct` is not a valid controlled -#' terminology data set; otherwise, `ct` is returned invisibly. +#' @returns The function throws an error if `ct_spec` is not a valid controlled +#' terminology data set; otherwise, `ct_spec` is returned invisibly. #' #' @examples -#' # If `ct` is a valid controlled terminology then it is returned invisibly. -#' ct_01 <- read_ct_example("ct-01-cm") -#' all.equal(ct_01, sdtm.oak:::assert_ct(ct_01)) +#' # If `ct_spec` is a valid controlled terminology then it is returned invisibly. +#' ct_spec_01 <- read_ct_spec_example("ct-01-cm") +#' all.equal(ct_spec_01, sdtm.oak:::assert_ct_spec(ct_spec_01)) #' -#' # A minimal set of variables needs to be present in `ct` for it to pass the -#' # assertion; `sdtm.oak:::ct_vars()` defines their names. -#' (req_vars <- sdtm.oak:::ct_vars()) +#' # A minimal set of variables needs to be present in `ct_spec` for it to pass the +#' # assertion; `sdtm.oak:::ct_spec_vars()` defines their names. +#' (req_vars <- sdtm.oak:::ct_spec_vars()) #' #' # Other (facultative) variables also present in the controlled terminology #' # example. -#' (opt_vars <- setdiff(colnames(ct_01), req_vars)) +#' (opt_vars <- setdiff(colnames(ct_spec_01), req_vars)) #' #' # With only the mandatory variables, the assertion still passes. -#' sdtm.oak:::assert_ct(ct_01[req_vars]) +#' sdtm.oak:::assert_ct_spec(ct_spec_01[req_vars]) #' #' # Not having the required variables results in an error. -#' try(sdtm.oak:::assert_ct(ct_01[opt_vars])) +#' try(sdtm.oak:::assert_ct_spec(ct_spec_01[opt_vars])) #' #' @keywords internal -assert_ct <- function(ct, optional = FALSE) { +assert_ct_spec <- function(ct_spec, optional = FALSE) { admiraldev::assert_data_frame( - arg = ct, - required_vars = rlang::syms(ct_vars()), + arg = ct_spec, + required_vars = rlang::syms(ct_spec_vars()), optional = optional ) - if (!is.null(ct) && nrow(ct) == 0L) { - rlang::abort("`ct` can't be empty.") + if (!is.null(ct_spec) && nrow(ct_spec) == 0L) { + rlang::abort("`ct_spec` can't be empty.") } - if (!is.null(ct) && anyNA(ct[[ct_vars("cl")]])) { - rlang::abort(glue::glue("`{ct_vars('cl')}` can't have any NA values.")) + if (!is.null(ct_spec) && anyNA(ct_spec[[ct_spec_vars("ct_cltc")]])) { + rlang::abort(glue::glue("`{ct_spec_vars('ct_cltc')}` can't have any NA values.")) } - if (!is.null(ct) && anyNA(ct[[ct_vars("to")]])) { - rlang::abort(glue::glue("`{ct_vars('to')}` can't have any NA values.")) + if (!is.null(ct_spec) && anyNA(ct_spec[[ct_spec_vars("to")]])) { + rlang::abort(glue::glue("`{ct_spec_vars('to')}` can't have any NA values.")) } - invisible(ct) + invisible(ct_spec) } #' Assert a code-list code #' -#' [assert_cl()] asserts the validity of a code-list code in the context of +#' [assert_ct_cltc()] asserts the validity of a code-list code in the context of #' a controlled terminology specification. #' -#' @param ct Either a data frame encoding a controlled terminology data set, or +#' @param ct_spec Either a data frame encoding a controlled terminology data set, or #' `NULL`. -#' @param cl A string with a to-be asserted code-list code, or `NULL`. -#' @param optional A scalar logical, indicating whether `cl` can be `NULL` or +#' @param ct_cltc A string with a to-be asserted code-list code, or `NULL`. +#' @param optional A scalar logical, indicating whether `ct_cltc` can be `NULL` or #' not. #' -#' @returns The function throws an error if `cl` is not a valid code-list code -#' given the controlled terminology data set; otherwise, `cl` is returned +#' @returns The function throws an error if `ct_cltc` is not a valid code-list code +#' given the controlled terminology data set; otherwise, `ct_cltc` is returned #' invisibly. #' #' @examples #' # Load a controlled terminology example. -#' (ct <- read_ct_example("ct-01-cm")) +#' (ct_spec <- read_ct_spec_example("ct-01-cm")) #' #' # Should work fine. -#' sdtm.oak:::assert_cl(ct = ct, cl = "C71113") +#' sdtm.oak:::assert_ct_cltc(ct_spec = ct_spec, ct_cltc = "C71113") #' -#' # In certain cases, you might allow `cl` to be `NULL` as to indicate absence, -#' # in that case, set `optional` to `TRUE` to make `assert_cl()` more +#' # In certain cases, you might allow `ct_cltc` to be `NULL` as to indicate absence, +#' # in that case, set `optional` to `TRUE` to make `assert_ct_cltc()` more #' # forgiving. -#' sdtm.oak:::assert_cl(ct = ct, cl = NULL, optional = TRUE) +#' sdtm.oak:::assert_ct_cltc(ct_spec = ct_spec, ct_cltc = NULL, optional = TRUE) #' #' # Otherwise it would err. -#' try(sdtm.oak:::assert_cl(ct = ct, cl = NULL, optional = FALSE)) +#' try(sdtm.oak:::assert_ct_cltc(ct_spec = ct_spec, ct_cltc = NULL, optional = FALSE)) #' #' @keywords internal -assert_cl <- function(ct, cl, optional = FALSE) { +assert_ct_cltc <- function(ct_spec, ct_cltc, optional = FALSE) { - is_ct_missing <- is.null(ct) - is_cl_missing <- is.null(cl) - is_required_cl_missing <- is_cl_missing && !optional - is_cl_without_ct <- is_ct_missing && !is_cl_missing - are_ct_cl_available <- !is_ct_missing && !is_cl_missing + is_ct_spec_missing <- is.null(ct_spec) + is_ct_cltc_missing <- is.null(ct_cltc) + is_required_ct_cltc_missing <- is_ct_cltc_missing && !optional + is_ct_cltc_without_ct_spec <- is_ct_spec_missing && !is_ct_cltc_missing + are_ct_spec_ct_cltc_available <- !is_ct_spec_missing && !is_ct_cltc_missing - if (!is_cl_missing) { - admiraldev::assert_character_scalar(cl) + if (!is_ct_cltc_missing) { + admiraldev::assert_character_scalar(ct_cltc) } - if (is_required_cl_missing) { - rlang::abort("`cl` is a required parameter.") + if (is_required_ct_cltc_missing) { + rlang::abort("`ct_cltc` is a required parameter.") } - if (is_cl_without_ct) { - rlang::abort("`ct` must be a valid controlled terminology if `cl` is supplied.") + if (is_ct_cltc_without_ct_spec) { + rlang::abort("`ct_spec` must be a valid controlled terminology if `ct_cltc` is supplied.") } - if (is_cl_missing) { + if (is_ct_cltc_missing) { return(invisible(NULL)) } - if (!is_ct_missing && is.na(cl)) { - rlang::abort("`cl` can't be NA. Did you mean `NULL`?") + if (!is_ct_spec_missing && is.na(ct_cltc)) { + rlang::abort("`ct_cltc` can't be NA. Did you mean `NULL`?") } - if (are_ct_cl_available) { - assert_ct(ct, optional = FALSE) - cl_possibilities <- - ct |> - dplyr::pull(ct_vars("cl")) |> + if (are_ct_spec_ct_cltc_available) { + assert_ct_spec(ct_spec, optional = FALSE) + ct_cltc_possibilities <- + ct_spec |> + dplyr::pull(ct_spec_vars("ct_cltc")) |> unique() - admiraldev::assert_character_scalar(cl, values = cl_possibilities) + admiraldev::assert_character_scalar(ct_cltc, values = ct_cltc_possibilities) } - return(cl) + return(ct_cltc) } #' Controlled terminology mappings @@ -196,7 +196,7 @@ assert_cl <- function(ct, cl, optional = FALSE) { #' two `from` columns, the first column indicated in `from` takes precedence, #' and only that mapping is retained in the controlled terminology map. #' -#' @param ct Controlled terminology specification as a +#' @param ct_spec Controlled terminology specification as a #' [tibble][tibble::tibble-package]. Each row is for a mapped controlled term. #' Controlled terms are expected in the column indicated by `to_col`. #' @param from A character vector of column names indicating the variables @@ -209,24 +209,24 @@ assert_cl <- function(ct, cl, optional = FALSE) { #' #' @examples #' # Read in a bundled controlled terminology spec example (ex. 01). -#' (ct_01 <- read_ct_example("ct-01-cm")) +#' (ct_spec_01 <- read_ct_spec_example("ct-01-cm")) #' #' # Generate mappings from the terminology specification. -#' sdtm.oak:::ct_mappings(ct = ct_01) +#' sdtm.oak:::ct_mappings(ct_spec = ct_spec_01) #' #' # Take a glimpse at those mappings where an actual recoding happens. -#' sdtm.oak:::ct_mappings(ct = ct_01) |> +#' sdtm.oak:::ct_mappings(ct_spec = ct_spec_01) |> #' dplyr::filter(from != to) |> #' print(n = 20) #' #' @importFrom rlang .data #' @keywords internal -ct_mappings <- function(ct, from = ct_vars("from"), to = ct_vars("to")) { - assert_ct(ct) +ct_mappings <- function(ct_spec, from = ct_spec_vars("from"), to = ct_spec_vars("to")) { + assert_ct_spec(ct_spec) cols <- c(to, from) ct_mappings <- - ct |> + ct_spec |> dplyr::mutate(to = !!rlang::sym(to)) |> tidyr::pivot_longer( cols = dplyr::all_of(cols), @@ -253,19 +253,19 @@ ct_mappings <- function(ct, from = ct_vars("from"), to = ct_vars("to")) { #' #' @param x A character vector of terms to be recoded following a controlled #' terminology. -#' @param ct A [tibble][tibble::tibble-package] providing a controlled +#' @param ct_spec A [tibble][tibble::tibble-package] providing a controlled #' terminology specification. -#' @param cl A character vector indicating a set of possible controlled +#' @param ct_cltc A character vector indicating a set of possible controlled #' terminology code-lists codes to be used for recoding. By default (`NULL`) -#' all code-lists available in `ct` are used. +#' all code-lists available in `ct_spec` are used. #' @param from A character vector of column names indicating the variables #' containing values to be matched against for terminology recoding. #' @param to A single string indicating the column whose values are to be #' recoded into. #' #' @returns A character vector of terminology recoded values from `x`. If no -#' match is found in the controlled terminology spec provided in `ct`, then -#' `x` values are returned in uppercase. If `ct` is not provided `x` is +#' match is found in the controlled terminology spec provided in `ct_spec`, then +#' `x` values are returned in uppercase. If `ct_spec` is not provided `x` is #' returned unchanged. #' #' @examples @@ -280,32 +280,32 @@ ct_mappings <- function(ct, from = ct_vars("from"), to = ct_vars("to")) { #' "International Unit") #' #' # Load a controlled terminology example -#' (ct <- read_ct_example("ct-01-cm")) +#' (ct_spec <- read_ct_spec_example("ct-01-cm")) #' #' # Use all possible matching terms in the controlled terminology. -#' ct_map(x = terms, ct = ct) +#' ct_map(x = terms, ct_spec = ct_spec) #' #' # Note that if the controlled terminology mapping is restricted to a code-list #' # code, e.g. C71113, then only `"/day"` gets mapped to `"QD"`; remaining terms #' # won't match given the code-list code restriction, and will be mapped to an #' # uppercase version of the original terms. -#' ct_map(x = terms, ct = ct, cl = "C71113") +#' ct_map(x = terms, ct_spec = ct_spec, ct_cltc = "C71113") #' #' @importFrom rlang %||% .data #' @export ct_map <- function(x, - ct = NULL, - cl = NULL, - from = ct_vars("from"), - to = ct_vars("to")) { - ct %||% return(x) - assert_ct(ct) + ct_spec = NULL, + ct_cltc = NULL, + from = ct_spec_vars("from"), + to = ct_spec_vars("to")) { + ct_spec %||% return(x) + assert_ct_spec(ct_spec) - cl <- cl %||% unique(ct[[ct_vars("cl")]]) - ct <- dplyr::filter(ct, .data[[ct_vars("cl")]] %in% cl) + ct_cltc <- ct_cltc %||% unique(ct_spec[[ct_spec_vars("ct_cltc")]]) + ct_spec <- dplyr::filter(ct_spec, .data[[ct_spec_vars("ct_cltc")]] %in% ct_cltc) - mappings <- ct_mappings(ct, from = from, to = to) + mappings <- ct_mappings(ct_spec, from = from, to = to) recode( x, from = mappings$from, @@ -316,7 +316,7 @@ ct_map <- #' Read in a controlled terminology #' -#' [read_ct()] imports a controlled terminology specification data set as a +#' [read_ct_spec()] imports a controlled terminology specification data set as a #' [tibble][tibble::tibble-package]. #' #' @param file A path to a file containing a controlled terminology @@ -324,29 +324,29 @@ ct_map <- #' #' - The file is expected to be a CSV file; #' - The file is expected to contain a first row of column names; -#' - This minimal set of variables is expected: `r knitr::combine_words(ct_vars())`. +#' - This minimal set of variables is expected: `r knitr::combine_words(ct_spec_vars())`. #' #' @returns A [tibble][tibble::tibble-package] with a controlled terminology #' specification. #' #' @examples #' # Get the local path to one of the controlled terminology example files. -#' path <- ct_example("ct-01-cm") +#' path <- ct_spec_example("ct-01-cm") #' #' # Import it to R. -#' read_ct(file = path) +#' read_ct_spec(file = path) #' #' @export -read_ct <- function(file = stop("`file` must be specified")) { - ct <- readr::read_csv(file = file, col_types = "c") - assert_ct(ct) +read_ct_spec <- function(file = stop("`file` must be specified")) { + ct_spec <- readr::read_csv(file = file, col_types = "c") + assert_ct_spec(ct_spec) - ct + ct_spec } #' Find the path to an example controlled terminology file #' -#' [ct_example()] resolves the local path to an example controlled +#' [ct_spec_example()] resolves the local path to an example controlled #' terminology file. #' #' @param example A string with either the basename, file name, or relative path @@ -358,24 +358,24 @@ read_ct <- function(file = stop("`file` must be specified")) { #' @examples #' # Get the local path to controlled terminology example file 01 #' # Using the basename only: -#' ct_example("ct-01-cm") +#' ct_spec_example("ct-01-cm") #' #' # Using the file name: -#' ct_example("ct-01-cm.csv") +#' ct_spec_example("ct-01-cm.csv") #' #' # Using the relative path: -#' ct_example("ct/ct-01-cm.csv") +#' ct_spec_example("ct/ct-01-cm.csv") #' #' # If no example is provided it returns a vector of possible choices. -#' ct_example() +#' ct_spec_example() #' #' @export -ct_example <- function(example) { +ct_spec_example <- function(example) { # If no example is requested, then return all available files. if (missing(example)) { - ct_path <- system.file("ct", package = "sdtm.oak", mustWork = TRUE) - ct_files <- list.files(ct_path, pattern = "*.csv") - return(ct_files) + ct_spec_path <- system.file("ct", package = "sdtm.oak", mustWork = TRUE) + ct_spec_files <- list.files(ct_spec_path, pattern = "*.csv") + return(ct_spec_files) } # Otherwise, resolve the local path to the example requested. @@ -387,7 +387,7 @@ ct_example <- function(example) { if (identical(local_path, "")) { stop( glue::glue( - "'{example}' does not match any ct files. Run `ct_example()` for options." + "'{example}' does not match any ct spec files. Run `ct_spec_example()` for options." ), call. = FALSE ) @@ -400,33 +400,33 @@ ct_example <- function(example) { #' Read an example controlled terminology specification #' -#' [read_ct_example()] imports one of the bundled controlled terminology +#' [read_ct_spec_example()] imports one of the bundled controlled terminology #' specification data sets as a [tibble][tibble::tibble-package] into R. #' #' @param example The file name of a controlled terminology data set bundled -#' with `{stdm.oak}`, run `read_ct_example()` for available example files. +#' with `{stdm.oak}`, run `read_ct_spec_example()` for available example files. #' #' @returns A [tibble][tibble::tibble-package] with a controlled terminology #' specification data set, or a character vector of example file names. #' #' @examples #' # Leave the `example` parameter as missing for available example files. -#' read_ct_example() +#' read_ct_spec_example() #' -#' # Read an example ct file. -#' read_ct_example("ct-01-cm.csv") +#' # Read an example controlled terminology spec file. +#' read_ct_spec_example("ct-01-cm.csv") #' #' # You may omit the file extension. -#' read_ct_example("ct-01-cm") +#' read_ct_spec_example("ct-01-cm") #' #' @export -read_ct_example <- function(example) { +read_ct_spec_example <- function(example) { if (missing(example)) { - return(ct_example()) + return(ct_spec_example()) } else { admiraldev::assert_character_scalar(example) } - path <- ct_example(example) - read_ct(file = path) + path <- ct_spec_example(example) + read_ct_spec(file = path) } diff --git a/R/hardcode.R b/R/hardcode.R index a9a0dfd9..a86e5df7 100644 --- a/R/hardcode.R +++ b/R/hardcode.R @@ -13,12 +13,12 @@ #' of variable to be derived. #' @param tgt_val The target SDTM value to be hardcoded into the variable #' indicated in `tgt_var`. -#' @param ct Study controlled terminology specification: a dataframe with a -#' minimal set of columns, see [ct_vars()] for details. This parameter is +#' @param ct_spec Study controlled terminology specification: a dataframe with a +#' minimal set of columns, see [ct_spec_vars()] for details. This parameter is #' optional, if left as `NULL` no controlled terminology recoding is applied. -#' @param cl A code-list code indicating which subset of the controlled +#' @param ct_cltc A code-list code indicating which subset of the controlled #' terminology to apply in the derivation. This parameter is optional, if left -#' as `NULL`, all possible recodings in `ct` are attempted. +#' as `NULL`, all possible recodings in `ct_spec` are attempted. #' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by #' the variables indicated in `id_vars`. This parameter is optional, see #' section Value for how the output changes depending on this argument value. @@ -40,8 +40,8 @@ sdtm_hardcode <- function(raw_dat, raw_var, tgt_var, tgt_val, - ct = NULL, - cl = NULL, + ct_spec = NULL, + ct_cltc = NULL, tgt_dat = NULL, id_vars = oak_id_vars()) { admiraldev::assert_character_scalar(raw_var) @@ -53,11 +53,11 @@ sdtm_hardcode <- function(raw_dat, ) admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) - assert_ct(ct, optional = TRUE) - assert_cl(ct = ct, cl = cl, optional = TRUE) + assert_ct_spec(ct_spec, optional = TRUE) + assert_ct_cltc(ct_spec = ct_spec, ct_cltc = ct_cltc, optional = TRUE) # Recode the hardcoded value following terminology. - tgt_val <- ct_map(tgt_val, ct = ct, cl = cl) + tgt_val <- ct_map(tgt_val, ct_spec = ct_spec, ct_cltc = ct_cltc) # Apply derivation of the hardcoded value. # `der_dat`: derived dataset. @@ -99,12 +99,12 @@ sdtm_hardcode <- function(raw_dat, #' of variable to be derived. #' @param tgt_val The target SDTM value to be hardcoded into the variable #' indicated in `tgt_var`. -#' @param ct Study controlled terminology specification: a dataframe with a -#' minimal set of columns, see [ct_vars()] for details. This parameter is +#' @param ct_spec Study controlled terminology specification: a dataframe with a +#' minimal set of columns, see [ct_spec_vars()] for details. This parameter is #' optional, if left as `NULL` no controlled terminology recoding is applied. -#' @param cl A code-list code indicating which subset of the controlled +#' @param ct_cltc A code-list code indicating which subset of the controlled #' terminology to apply in the derivation. This parameter is optional, if left -#' as `NULL`, all possible recodings in `ct` are attempted. +#' as `NULL`, all possible recodings in `ct_spec` are attempted. #' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by #' the variables indicated in `id_vars`. This parameter is optional, see #' section Value for how the output changes depending on this argument value. @@ -161,7 +161,7 @@ sdtm_hardcode <- function(raw_dat, #' ) #' #' # Controlled terminology specification -#' (ct <- read_ct_example("ct-01-cm")) +#' (ct_spec <- read_ct_spec_example("ct-01-cm")) #' #' # Hardcoding of `CMCAT` with the value `"GENERAL CONCOMITANT MEDICATIONS"` #' # involving terminology recoding. `NA` values in `MDRAW` are preserved in @@ -171,8 +171,8 @@ sdtm_hardcode <- function(raw_dat, #' raw_var = "MDRAW", #' tgt_var = "CMCAT", #' tgt_val = "GENERAL CONCOMITANT MEDICATIONS", -#' ct = ct, -#' cl = "C66729", +#' ct_spec = ct_spec, +#' ct_cltc = "C66729", #' tgt_dat = cm_inter #' ) #' @@ -216,8 +216,8 @@ hardcode_ct <- raw_var, tgt_var, tgt_val, - ct, - cl, + ct_spec, + ct_cltc, tgt_dat = NULL, id_vars = oak_id_vars()) { admiraldev::assert_character_scalar(raw_var) @@ -235,16 +235,16 @@ hardcode_ct <- optional = TRUE ) - assert_ct(ct, optional = FALSE) - assert_cl(ct = ct, cl = cl, optional = FALSE) + assert_ct_spec(ct_spec, optional = FALSE) + assert_ct_cltc(ct_spec = ct_spec, ct_cltc = ct_cltc, optional = FALSE) sdtm_hardcode( raw_dat = raw_dat, raw_var = raw_var, tgt_var = tgt_var, tgt_val = tgt_val, - ct = ct, - cl = cl, + ct_spec = ct_spec, + ct_cltc = ct_cltc, tgt_dat = tgt_dat, id_vars = id_vars ) diff --git a/man/assert_cl.Rd b/man/assert_cl.Rd deleted file mode 100644 index ea035803..00000000 --- a/man/assert_cl.Rd +++ /dev/null @@ -1,43 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ct.R -\name{assert_cl} -\alias{assert_cl} -\title{Assert a code-list code} -\usage{ -assert_cl(ct, cl, optional = FALSE) -} -\arguments{ -\item{ct}{Either a data frame encoding a controlled terminology data set, or -\code{NULL}.} - -\item{cl}{A string with a to-be asserted code-list code, or \code{NULL}.} - -\item{optional}{A scalar logical, indicating whether \code{cl} can be \code{NULL} or -not.} -} -\value{ -The function throws an error if \code{cl} is not a valid code-list code -given the controlled terminology data set; otherwise, \code{cl} is returned -invisibly. -} -\description{ -\code{\link[=assert_cl]{assert_cl()}} asserts the validity of a code-list code in the context of -a controlled terminology specification. -} -\examples{ -# Load a controlled terminology example. -(ct <- read_ct_example("ct-01-cm")) - -# Should work fine. -sdtm.oak:::assert_cl(ct = ct, cl = "C71113") - -# In certain cases, you might allow `cl` to be `NULL` as to indicate absence, -# in that case, set `optional` to `TRUE` to make `assert_cl()` more -# forgiving. -sdtm.oak:::assert_cl(ct = ct, cl = NULL, optional = TRUE) - -# Otherwise it would err. -try(sdtm.oak:::assert_cl(ct = ct, cl = NULL, optional = FALSE)) - -} -\keyword{internal} diff --git a/man/assert_ct.Rd b/man/assert_ct.Rd deleted file mode 100644 index ace3a345..00000000 --- a/man/assert_ct.Rd +++ /dev/null @@ -1,45 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ct.R -\name{assert_ct} -\alias{assert_ct} -\title{Assert a controlled terminology specification} -\usage{ -assert_ct(ct, optional = FALSE) -} -\arguments{ -\item{ct}{A data frame to be asserted as a valid controlled terminology data -set.} -} -\value{ -The function throws an error if \code{ct} is not a valid controlled -terminology data set; otherwise, \code{ct} is returned invisibly. -} -\description{ -\code{\link[=assert_ct]{assert_ct()}} will check whether \code{ct} is a data frame and if it contains the -variables: codelist_code, collected_value, term_synonyms, and term_value. - -In addition, it will also check if the data frame is not empty (no rows), and -whether the columns \code{codelist_code} and \code{term_value} do -not contain any \code{NA} values. -} -\examples{ -# If `ct` is a valid controlled terminology then it is returned invisibly. -ct_01 <- read_ct_example("ct-01-cm") -all.equal(ct_01, sdtm.oak:::assert_ct(ct_01)) - -# A minimal set of variables needs to be present in `ct` for it to pass the -# assertion; `sdtm.oak:::ct_vars()` defines their names. -(req_vars <- sdtm.oak:::ct_vars()) - -# Other (facultative) variables also present in the controlled terminology -# example. -(opt_vars <- setdiff(colnames(ct_01), req_vars)) - -# With only the mandatory variables, the assertion still passes. -sdtm.oak:::assert_ct(ct_01[req_vars]) - -# Not having the required variables results in an error. -try(sdtm.oak:::assert_ct(ct_01[opt_vars])) - -} -\keyword{internal} diff --git a/man/assert_ct_cltc.Rd b/man/assert_ct_cltc.Rd new file mode 100644 index 00000000..774b7e86 --- /dev/null +++ b/man/assert_ct_cltc.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ct.R +\name{assert_ct_cltc} +\alias{assert_ct_cltc} +\title{Assert a code-list code} +\usage{ +assert_ct_cltc(ct_spec, ct_cltc, optional = FALSE) +} +\arguments{ +\item{ct_spec}{Either a data frame encoding a controlled terminology data set, or +\code{NULL}.} + +\item{ct_cltc}{A string with a to-be asserted code-list code, or \code{NULL}.} + +\item{optional}{A scalar logical, indicating whether \code{ct_cltc} can be \code{NULL} or +not.} +} +\value{ +The function throws an error if \code{ct_cltc} is not a valid code-list code +given the controlled terminology data set; otherwise, \code{ct_cltc} is returned +invisibly. +} +\description{ +\code{\link[=assert_ct_cltc]{assert_ct_cltc()}} asserts the validity of a code-list code in the context of +a controlled terminology specification. +} +\examples{ +# Load a controlled terminology example. +(ct_spec <- read_ct_spec_example("ct-01-cm")) + +# Should work fine. +sdtm.oak:::assert_ct_cltc(ct_spec = ct_spec, ct_cltc = "C71113") + +# In certain cases, you might allow `ct_cltc` to be `NULL` as to indicate absence, +# in that case, set `optional` to `TRUE` to make `assert_ct_cltc()` more +# forgiving. +sdtm.oak:::assert_ct_cltc(ct_spec = ct_spec, ct_cltc = NULL, optional = TRUE) + +# Otherwise it would err. +try(sdtm.oak:::assert_ct_cltc(ct_spec = ct_spec, ct_cltc = NULL, optional = FALSE)) + +} +\keyword{internal} diff --git a/man/assert_ct_spec.Rd b/man/assert_ct_spec.Rd new file mode 100644 index 00000000..5ce72ce4 --- /dev/null +++ b/man/assert_ct_spec.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ct.R +\name{assert_ct_spec} +\alias{assert_ct_spec} +\title{Assert a controlled terminology specification} +\usage{ +assert_ct_spec(ct_spec, optional = FALSE) +} +\arguments{ +\item{ct_spec}{A data frame to be asserted as a valid controlled terminology data +set.} +} +\value{ +The function throws an error if \code{ct_spec} is not a valid controlled +terminology data set; otherwise, \code{ct_spec} is returned invisibly. +} +\description{ +\code{\link[=assert_ct_spec]{assert_ct_spec()}} will check whether \code{ct_spec} is a data frame and if it contains the +variables: codelist_code, collected_value, term_synonyms, and term_value. + +In addition, it will also check if the data frame is not empty (no rows), and +whether the columns \code{codelist_code} and \code{term_value} do +not contain any \code{NA} values. +} +\examples{ +# If `ct_spec` is a valid controlled terminology then it is returned invisibly. +ct_spec_01 <- read_ct_spec_example("ct-01-cm") +all.equal(ct_spec_01, sdtm.oak:::assert_ct_spec(ct_spec_01)) + +# A minimal set of variables needs to be present in `ct_spec` for it to pass the +# assertion; `sdtm.oak:::ct_spec_vars()` defines their names. +(req_vars <- sdtm.oak:::ct_spec_vars()) + +# Other (facultative) variables also present in the controlled terminology +# example. +(opt_vars <- setdiff(colnames(ct_spec_01), req_vars)) + +# With only the mandatory variables, the assertion still passes. +sdtm.oak:::assert_ct_spec(ct_spec_01[req_vars]) + +# Not having the required variables results in an error. +try(sdtm.oak:::assert_ct_spec(ct_spec_01[opt_vars])) + +} +\keyword{internal} diff --git a/man/assign.Rd b/man/assign.Rd index f11234f1..34828a1c 100644 --- a/man/assign.Rd +++ b/man/assign.Rd @@ -18,8 +18,8 @@ assign_ct( raw_dat, raw_var, tgt_var, - ct, - cl, + ct_spec, + ct_cltc, tgt_dat = NULL, id_vars = oak_id_vars() ) @@ -41,10 +41,10 @@ section Value for how the output changes depending on this argument value.} \item{id_vars}{Key variables to be used in the join between the raw dataset (\code{raw_dat}) and the target data set (\code{raw_dat}).} -\item{ct}{Study controlled terminology specification: a dataframe with a -minimal set of columns, see \code{\link[=ct_vars]{ct_vars()}} for details.} +\item{ct_spec}{Study controlled terminology specification: a dataframe with a +minimal set of columns, see \code{\link[=ct_spec_vars]{ct_spec_vars()}} for details.} -\item{cl}{A code-list code indicating which subset of the controlled +\item{ct_cltc}{A code-list code indicating which subset of the controlled terminology to apply in the derivation.} } \value{ @@ -127,14 +127,14 @@ cm_inter <- ) # Controlled terminology specification -(ct <- read_ct_example("ct-01-cm")) +(ct_spec <- read_ct_spec_example("ct-01-cm")) assign_ct( raw_dat = md1, raw_var = "MDIND", tgt_var = "CMINDC", - ct = ct, - cl = "C66729", + ct_spec = ct_spec, + ct_cltc = "C66729", tgt_dat = cm_inter ) diff --git a/man/ct_map.Rd b/man/ct_map.Rd index 4d21cec9..a2a74553 100644 --- a/man/ct_map.Rd +++ b/man/ct_map.Rd @@ -4,18 +4,24 @@ \alias{ct_map} \title{Recode according to controlled terminology} \usage{ -ct_map(x, ct = NULL, cl = NULL, from = ct_vars("from"), to = ct_vars("to")) +ct_map( + x, + ct_spec = NULL, + ct_cltc = NULL, + from = ct_spec_vars("from"), + to = ct_spec_vars("to") +) } \arguments{ \item{x}{A character vector of terms to be recoded following a controlled terminology.} -\item{ct}{A \link[tibble:tibble-package]{tibble} providing a controlled +\item{ct_spec}{A \link[tibble:tibble-package]{tibble} providing a controlled terminology specification.} -\item{cl}{A character vector indicating a set of possible controlled +\item{ct_cltc}{A character vector indicating a set of possible controlled terminology code-lists codes to be used for recoding. By default (\code{NULL}) -all code-lists available in \code{ct} are used.} +all code-lists available in \code{ct_spec} are used.} \item{from}{A character vector of column names indicating the variables containing values to be matched against for terminology recoding.} @@ -25,8 +31,8 @@ recoded into.} } \value{ A character vector of terminology recoded values from \code{x}. If no -match is found in the controlled terminology spec provided in \code{ct}, then -\code{x} values are returned in uppercase. If \code{ct} is not provided \code{x} is +match is found in the controlled terminology spec provided in \code{ct_spec}, then +\code{x} values are returned in uppercase. If \code{ct_spec} is not provided \code{x} is returned unchanged. } \description{ @@ -44,15 +50,15 @@ terms <- "International Unit") # Load a controlled terminology example -(ct <- read_ct_example("ct-01-cm")) +(ct_spec <- read_ct_spec_example("ct-01-cm")) # Use all possible matching terms in the controlled terminology. -ct_map(x = terms, ct = ct) +ct_map(x = terms, ct_spec = ct_spec) # Note that if the controlled terminology mapping is restricted to a code-list # code, e.g. C71113, then only `"/day"` gets mapped to `"QD"`; remaining terms # won't match given the code-list code restriction, and will be mapped to an # uppercase version of the original terms. -ct_map(x = terms, ct = ct, cl = "C71113") +ct_map(x = terms, ct_spec = ct_spec, ct_cltc = "C71113") } diff --git a/man/ct_mappings.Rd b/man/ct_mappings.Rd index 35787340..83ca4898 100644 --- a/man/ct_mappings.Rd +++ b/man/ct_mappings.Rd @@ -4,10 +4,10 @@ \alias{ct_mappings} \title{Controlled terminology mappings} \usage{ -ct_mappings(ct, from = ct_vars("from"), to = ct_vars("to")) +ct_mappings(ct_spec, from = ct_spec_vars("from"), to = ct_spec_vars("to")) } \arguments{ -\item{ct}{Controlled terminology specification as a +\item{ct_spec}{Controlled terminology specification as a \link[tibble:tibble-package]{tibble}. Each row is for a mapped controlled term. Controlled terms are expected in the column indicated by \code{to_col}.} @@ -33,13 +33,13 @@ and only that mapping is retained in the controlled terminology map. } \examples{ # Read in a bundled controlled terminology spec example (ex. 01). -(ct_01 <- read_ct_example("ct-01-cm")) +(ct_spec_01 <- read_ct_spec_example("ct-01-cm")) # Generate mappings from the terminology specification. -sdtm.oak:::ct_mappings(ct = ct_01) +sdtm.oak:::ct_mappings(ct_spec = ct_spec_01) # Take a glimpse at those mappings where an actual recoding happens. -sdtm.oak:::ct_mappings(ct = ct_01) |> +sdtm.oak:::ct_mappings(ct_spec = ct_spec_01) |> dplyr::filter(from != to) |> print(n = 20) diff --git a/man/ct_example.Rd b/man/ct_spec_example.Rd similarity index 71% rename from man/ct_example.Rd rename to man/ct_spec_example.Rd index 95a6c560..2a7f2e3f 100644 --- a/man/ct_example.Rd +++ b/man/ct_spec_example.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ct.R -\name{ct_example} -\alias{ct_example} +\name{ct_spec_example} +\alias{ct_spec_example} \title{Find the path to an example controlled terminology file} \usage{ -ct_example(example) +ct_spec_example(example) } \arguments{ \item{example}{A string with either the basename, file name, or relative path @@ -15,21 +15,21 @@ The local path to an example file if \code{example} is supplied, or a character vector of example file names. } \description{ -\code{\link[=ct_example]{ct_example()}} resolves the local path to an example controlled +\code{\link[=ct_spec_example]{ct_spec_example()}} resolves the local path to an example controlled terminology file. } \examples{ # Get the local path to controlled terminology example file 01 # Using the basename only: -ct_example("ct-01-cm") +ct_spec_example("ct-01-cm") # Using the file name: -ct_example("ct-01-cm.csv") +ct_spec_example("ct-01-cm.csv") # Using the relative path: -ct_example("ct/ct-01-cm.csv") +ct_spec_example("ct/ct-01-cm.csv") # If no example is provided it returns a vector of possible choices. -ct_example() +ct_spec_example() } diff --git a/man/ct_vars.Rd b/man/ct_spec_vars.Rd similarity index 68% rename from man/ct_vars.Rd rename to man/ct_spec_vars.Rd index ee786241..f4af9dbe 100644 --- a/man/ct_vars.Rd +++ b/man/ct_spec_vars.Rd @@ -1,39 +1,39 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ct.R -\name{ct_vars} -\alias{ct_vars} +\name{ct_spec_vars} +\alias{ct_spec_vars} \title{Controlled terminology variables} \usage{ -ct_vars(set = c("all", "cl", "from", "to")) +ct_spec_vars(set = c("all", "ct_cltc", "from", "to")) } \arguments{ -\item{set}{A scalar character (string), one of: \code{"all"} (default), \code{"cl"}, +\item{set}{A scalar character (string), one of: \code{"all"} (default), \code{"ct_cltc"}, \code{"from"} or \code{"to"}.} } \description{ -\code{\link[=ct_vars]{ct_vars()}} returns the mandatory variables to be present in a data set +\code{\link[=ct_spec_vars]{ct_spec_vars()}} returns the mandatory variables to be present in a data set representing a controlled terminology. By default, it returns all required variables. If only the subset of variables used for matching terms are needed, then request this subset of variables by passing the argument value \code{"from"}. If only the mapping-to variable is to be requested, then simply pass \code{"to"}. If -only the code-list code variable name is needed then pass \code{"cl"}. +only the code-list code variable name is needed then pass \code{"ct_cltc"}. } \examples{ # These two calls are equivalent and return all required variables in a # controlled terminology data set. -sdtm.oak:::ct_vars() -sdtm.oak:::ct_vars("all") +sdtm.oak:::ct_spec_vars() +sdtm.oak:::ct_spec_vars("all") # "Codelist code" variable name. -sdtm.oak:::ct_vars("cl") +sdtm.oak:::ct_spec_vars("ct_cltc") # "From" variables -sdtm.oak:::ct_vars("from") +sdtm.oak:::ct_spec_vars("from") # The "to" variable. -sdtm.oak:::ct_vars("to") +sdtm.oak:::ct_spec_vars("to") } \keyword{internal} diff --git a/man/harcode.Rd b/man/harcode.Rd index e097cc5d..6201b950 100644 --- a/man/harcode.Rd +++ b/man/harcode.Rd @@ -20,8 +20,8 @@ hardcode_ct( raw_var, tgt_var, tgt_val, - ct, - cl, + ct_spec, + ct_cltc, tgt_dat = NULL, id_vars = oak_id_vars() ) @@ -46,13 +46,13 @@ section Value for how the output changes depending on this argument value.} \item{id_vars}{Key variables to be used in the join between the raw dataset (\code{raw_dat}) and the target data set (\code{raw_dat}).} -\item{ct}{Study controlled terminology specification: a dataframe with a -minimal set of columns, see \code{\link[=ct_vars]{ct_vars()}} for details. This parameter is +\item{ct_spec}{Study controlled terminology specification: a dataframe with a +minimal set of columns, see \code{\link[=ct_spec_vars]{ct_spec_vars()}} for details. This parameter is optional, if left as \code{NULL} no controlled terminology recoding is applied.} -\item{cl}{A code-list code indicating which subset of the controlled +\item{ct_cltc}{A code-list code indicating which subset of the controlled terminology to apply in the derivation. This parameter is optional, if left -as \code{NULL}, all possible recodings in \code{ct} are attempted.} +as \code{NULL}, all possible recodings in \code{ct_spec} are attempted.} } \value{ The returned data set depends on the value of \code{tgt_dat}: @@ -115,7 +115,7 @@ hardcode_no_ct( ) # Controlled terminology specification -(ct <- read_ct_example("ct-01-cm")) +(ct_spec <- read_ct_spec_example("ct-01-cm")) # Hardcoding of `CMCAT` with the value `"GENERAL CONCOMITANT MEDICATIONS"` # involving terminology recoding. `NA` values in `MDRAW` are preserved in @@ -125,8 +125,8 @@ hardcode_ct( raw_var = "MDRAW", tgt_var = "CMCAT", tgt_val = "GENERAL CONCOMITANT MEDICATIONS", - ct = ct, - cl = "C66729", + ct_spec = ct_spec, + ct_cltc = "C66729", tgt_dat = cm_inter ) diff --git a/man/read_ct.Rd b/man/read_ct_spec.Rd similarity index 74% rename from man/read_ct.Rd rename to man/read_ct_spec.Rd index ac790442..21d6103d 100644 --- a/man/read_ct.Rd +++ b/man/read_ct_spec.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ct.R -\name{read_ct} -\alias{read_ct} +\name{read_ct_spec} +\alias{read_ct_spec} \title{Read in a controlled terminology} \usage{ -read_ct(file = stop("`file` must be specified")) +read_ct_spec(file = stop("`file` must be specified")) } \arguments{ \item{file}{A path to a file containing a controlled terminology @@ -20,14 +20,14 @@ A \link[tibble:tibble-package]{tibble} with a controlled terminology specification. } \description{ -\code{\link[=read_ct]{read_ct()}} imports a controlled terminology specification data set as a +\code{\link[=read_ct_spec]{read_ct_spec()}} imports a controlled terminology specification data set as a \link[tibble:tibble-package]{tibble}. } \examples{ # Get the local path to one of the controlled terminology example files. -path <- ct_example("ct-01-cm") +path <- ct_spec_example("ct-01-cm") # Import it to R. -read_ct(file = path) +read_ct_spec(file = path) } diff --git a/man/read_ct_example.Rd b/man/read_ct_spec_example.Rd similarity index 58% rename from man/read_ct_example.Rd rename to man/read_ct_spec_example.Rd index 8db2911e..b3b26c08 100644 --- a/man/read_ct_example.Rd +++ b/man/read_ct_spec_example.Rd @@ -1,31 +1,31 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ct.R -\name{read_ct_example} -\alias{read_ct_example} +\name{read_ct_spec_example} +\alias{read_ct_spec_example} \title{Read an example controlled terminology specification} \usage{ -read_ct_example(example) +read_ct_spec_example(example) } \arguments{ \item{example}{The file name of a controlled terminology data set bundled -with \code{{stdm.oak}}, run \code{read_ct_example()} for available example files.} +with \code{{stdm.oak}}, run \code{read_ct_spec_example()} for available example files.} } \value{ A \link[tibble:tibble-package]{tibble} with a controlled terminology specification data set, or a character vector of example file names. } \description{ -\code{\link[=read_ct_example]{read_ct_example()}} imports one of the bundled controlled terminology +\code{\link[=read_ct_spec_example]{read_ct_spec_example()}} imports one of the bundled controlled terminology specification data sets as a \link[tibble:tibble-package]{tibble} into R. } \examples{ # Leave the `example` parameter as missing for available example files. -read_ct_example() +read_ct_spec_example() -# Read an example ct file. -read_ct_example("ct-01-cm.csv") +# Read an example controlled terminology spec file. +read_ct_spec_example("ct-01-cm.csv") # You may omit the file extension. -read_ct_example("ct-01-cm") +read_ct_spec_example("ct-01-cm") } diff --git a/man/sdtm_assign.Rd b/man/sdtm_assign.Rd index 56e2e8c2..d49364d0 100644 --- a/man/sdtm_assign.Rd +++ b/man/sdtm_assign.Rd @@ -8,8 +8,8 @@ sdtm_assign( raw_dat, raw_var, tgt_var, - ct = NULL, - cl = NULL, + ct_spec = NULL, + ct_cltc = NULL, tgt_dat = NULL, id_vars = oak_id_vars() ) @@ -24,13 +24,13 @@ raw variable in \code{raw_dat}.} \item{tgt_var}{The target SDTM variable: a single string indicating the name of variable to be derived.} -\item{ct}{Study controlled terminology specification: a dataframe with a -minimal set of columns, see \code{\link[=ct_vars]{ct_vars()}} for details. This parameter is +\item{ct_spec}{Study controlled terminology specification: a dataframe with a +minimal set of columns, see \code{\link[=ct_spec_vars]{ct_spec_vars()}} for details. This parameter is optional, if left as \code{NULL} no controlled terminology recoding is applied.} -\item{cl}{A code-list code indicating which subset of the controlled +\item{ct_cltc}{A code-list code indicating which subset of the controlled terminology to apply in the derivation. This parameter is optional, if left -as \code{NULL}, all possible recodings in \code{ct} are attempted.} +as \code{NULL}, all possible recodings in \code{ct_spec} are attempted.} \item{tgt_dat}{Target dataset: a data frame to be merged against \code{raw_dat} by the variables indicated in \code{id_vars}. This parameter is optional, see diff --git a/man/sdtm_hardcode.Rd b/man/sdtm_hardcode.Rd index 0f0759bd..89722bf1 100644 --- a/man/sdtm_hardcode.Rd +++ b/man/sdtm_hardcode.Rd @@ -9,8 +9,8 @@ sdtm_hardcode( raw_var, tgt_var, tgt_val, - ct = NULL, - cl = NULL, + ct_spec = NULL, + ct_cltc = NULL, tgt_dat = NULL, id_vars = oak_id_vars() ) @@ -28,13 +28,13 @@ of variable to be derived.} \item{tgt_val}{The target SDTM value to be hardcoded into the variable indicated in \code{tgt_var}.} -\item{ct}{Study controlled terminology specification: a dataframe with a -minimal set of columns, see \code{\link[=ct_vars]{ct_vars()}} for details. This parameter is +\item{ct_spec}{Study controlled terminology specification: a dataframe with a +minimal set of columns, see \code{\link[=ct_spec_vars]{ct_spec_vars()}} for details. This parameter is optional, if left as \code{NULL} no controlled terminology recoding is applied.} -\item{cl}{A code-list code indicating which subset of the controlled +\item{ct_cltc}{A code-list code indicating which subset of the controlled terminology to apply in the derivation. This parameter is optional, if left -as \code{NULL}, all possible recodings in \code{ct} are attempted.} +as \code{NULL}, all possible recodings in \code{ct_spec} are attempted.} \item{tgt_dat}{Target dataset: a data frame to be merged against \code{raw_dat} by the variables indicated in \code{id_vars}. This parameter is optional, see diff --git a/tests/testthat/test-ct.R b/tests/testthat/test-ct.R index 8d1ddb85..f2a4ce8d 100644 --- a/tests/testthat/test-ct.R +++ b/tests/testthat/test-ct.R @@ -1,6 +1,6 @@ -test_that("ct_vars() works as expected", { +test_that("ct_spec_vars() works as expected", { expect_identical( - ct_vars(), + ct_spec_vars(), c( "codelist_code", "collected_value", @@ -10,7 +10,7 @@ test_that("ct_vars() works as expected", { ) expect_identical( - ct_vars(set = "all"), + ct_spec_vars(set = "all"), c( "codelist_code", "collected_value", @@ -20,60 +20,60 @@ test_that("ct_vars() works as expected", { ) expect_identical( - ct_vars(set = "cl"), + ct_spec_vars(set = "ct_cltc"), "codelist_code" ) expect_identical( - ct_vars(set = "from"), + ct_spec_vars(set = "from"), c( "collected_value", "term_synonyms" ) ) - expect_identical(ct_vars(set = "to"), "term_value") + expect_identical(ct_spec_vars(set = "to"), "term_value") }) -test_that("ct_vars() fails with invalid input choice", { - expect_error(ct_vars("foo")) - expect_error(ct_vars(1L)) - expect_error(ct_vars(FALSE)) - expect_error(ct_vars(NULL)) +test_that("ct_spec_vars() fails with invalid input choice", { + expect_error(ct_spec_vars("foo")) + expect_error(ct_spec_vars(1L)) + expect_error(ct_spec_vars(FALSE)) + expect_error(ct_spec_vars(NULL)) }) -test_that("assert_ct() works as expected", { +test_that("assert_ct_spec() works as expected", { # Load an example controlled terminology spec. - ct <- read_ct_example("ct-01-cm") - cols <- colnames(ct) - cl_col <- ct_vars("cl") - to_col <- ct_vars("to") + ct_spec <- read_ct_spec_example("ct-01-cm") + cols <- colnames(ct_spec) + ct_cltc_col <- ct_spec_vars("ct_cltc") + to_col <- ct_spec_vars("to") - expect_no_error(assert_ct(ct, optional = FALSE)) - expect_no_error(assert_ct(ct, optional = TRUE)) - expect_identical(assert_ct(ct, optional = FALSE), ct) - expect_identical(assert_ct(ct, optional = TRUE), ct) - expect_null(assert_ct(NULL, optional = TRUE)) + expect_no_error(assert_ct_spec(ct_spec, optional = FALSE)) + expect_no_error(assert_ct_spec(ct_spec, optional = TRUE)) + expect_identical(assert_ct_spec(ct_spec, optional = FALSE), ct_spec) + expect_identical(assert_ct_spec(ct_spec, optional = TRUE), ct_spec) + expect_null(assert_ct_spec(NULL, optional = TRUE)) # Code-list code column is one of the key variables that must be present - # in `ct`, so being missing should trigger an error. - expect_error(assert_ct(ct[setdiff(cols, cl_col)], optional = FALSE)) - expect_error(assert_ct(ct[setdiff(cols, cl_col)], optional = TRUE)) + # in `ct_spec`, so being missing should trigger an error. + expect_error(assert_ct_spec(ct_spec[setdiff(cols, ct_cltc_col)], optional = FALSE)) + expect_error(assert_ct_spec(ct_spec[setdiff(cols, ct_cltc_col)], optional = TRUE)) # The code-list code and the "to" columns of a controlled terminology should # not contain NAs, as otherwise the mapping is undefined. If that happens # an error is triggered. - ct01 <- ct - ct01[[cl_col]] <- NA_character_ - expect_error(assert_ct(ct01, optional = FALSE)) - expect_error(assert_ct(ct01, optional = TRUE)) + ct_spec01 <- ct_spec + ct_spec01[[ct_cltc_col]] <- NA_character_ + expect_error(assert_ct_spec(ct_spec01, optional = FALSE)) + expect_error(assert_ct_spec(ct_spec01, optional = TRUE)) - ct02 <- ct - ct02[[to_col]] <- NA_character_ - expect_error(assert_ct(ct01, optional = FALSE)) - expect_error(assert_ct(ct01, optional = TRUE)) + ct_spec02 <- ct_spec + ct_spec02[[to_col]] <- NA_character_ + expect_error(assert_ct_spec(ct_spec01, optional = FALSE)) + expect_error(assert_ct_spec(ct_spec01, optional = TRUE)) - ct_empty <- + ct_spec_empty <- data.frame( codelist_code = character(), collected_value = character(), @@ -82,115 +82,115 @@ test_that("assert_ct() works as expected", { stringsAsFactors = FALSE ) - # `ct` cannot be empty as that means that there are no mappings. - expect_error(assert_ct(ct_empty, optional = TRUE)) - expect_error(assert_ct(ct_empty, optional = FALSE)) + # `ct_spec` cannot be empty as that means that there are no mappings. + expect_error(assert_ct_spec(ct_spec_empty, optional = TRUE)) + expect_error(assert_ct_spec(ct_spec_empty, optional = FALSE)) }) -test_that("assert_cl() works as expected", { +test_that("assert_ct_cltc() works as expected", { # Read in a controlled terminology example. - ct <- read_ct_example("ct-01-cm") + ct_spec <- read_ct_spec_example("ct-01-cm") - # If `cl` is not supplied and is not optional, then it should err. - expect_error(assert_cl( - ct = NULL, - cl = NULL, + # If `ct_cltc` is not supplied and is not optional, then it should err. + expect_error(assert_ct_cltc( + ct_spec = NULL, + ct_cltc = NULL, optional = FALSE )) - # If `cl` is not supplied but it is optional, then all fine. - expect_no_error(assert_cl( - ct = NULL, - cl = NULL, + # If `ct_cltc` is not supplied but it is optional, then all fine. + expect_no_error(assert_ct_cltc( + ct_spec = NULL, + ct_cltc = NULL, optional = TRUE )) - # Moreover, in case of no error, `cl` should be returned invisibly, in this + # Moreover, in case of no error, `ct_cltc` should be returned invisibly, in this # case `NULL`. - expect_null(assert_cl( - ct = NULL, - cl = NULL, + expect_null(assert_ct_cltc( + ct_spec = NULL, + ct_cltc = NULL, optional = TRUE )) - # If `cl` is supplied but `ct` is not, then err. - expect_error(assert_cl( - ct = NULL, - cl = "C71113", + # If `ct_cltc` is supplied but `ct_spec` is not, then err. + expect_error(assert_ct_cltc( + ct_spec = NULL, + ct_cltc = "C71113", optional = FALSE )) - expect_error(assert_cl( - ct = NULL, - cl = "C71113", + expect_error(assert_ct_cltc( + ct_spec = NULL, + ct_cltc = "C71113", optional = TRUE )) - # If `ct` is supplied but `cl` is NULL, then err if `cl` is not optional, or - # return `cl` invisibly. - expect_error(assert_cl( - ct = ct, - cl = NULL, + # If `ct_spec` is supplied but `ct_cltc` is NULL, then err if `ct_cltc` is not optional, or + # return `ct_cltc` invisibly. + expect_error(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = NULL, optional = FALSE )) - expect_no_error(assert_cl( - ct = ct, - cl = NULL, + expect_no_error(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = NULL, optional = TRUE )) - expect_null(assert_cl( - ct = ct, - cl = NULL, + expect_null(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = NULL, optional = TRUE )) - # If both `ct` and `cl` are supplied, then `ct` must be a valid controlled - # terminology data set and `cl` must contain a code-list code available among - # the possibilities in column `codelist_code` (as returned by `ct_vars("cl")`). - expect_error(assert_cl( - ct = ct, - cl = "foo", + # If both `ct_spec` and `ct_cltc` are supplied, then `ct_spec` must be a valid controlled + # terminology data set and `ct_cltc` must contain a code-list code available among + # the possibilities in column `codelist_code` (as returned by `ct_spec_vars("ct_cltc")`). + expect_error(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = "foo", optional = FALSE )) - expect_error(assert_cl( - ct = ct, - cl = "", + expect_error(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = "", optional = FALSE )) - expect_error(assert_cl( - ct = ct, - cl = NA_character_, + expect_error(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = NA_character_, optional = FALSE )) - expect_error(assert_cl( - ct = ct, - cl = NA_character_, + expect_error(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = NA_character_, optional = TRUE )) - expect_identical(assert_cl( - ct = ct, - cl = "C71113", + expect_identical(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = "C71113", optional = FALSE ), "C71113") - expect_identical(assert_cl( - ct = ct, - cl = "C66726", + expect_identical(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = "C66726", optional = FALSE ), "C66726") - expect_identical(assert_cl( - ct = ct, - cl = "C71113", + expect_identical(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = "C71113", optional = TRUE ), "C71113") - expect_identical(assert_cl( - ct = ct, - cl = "C66726", + expect_identical(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = "C66726", optional = TRUE ), "C66726") }) -test_that("assert_cl(): when ct is empty", { - ct <- +test_that("assert_ct_cltc(): when ct_spec is empty", { + ct_spec <- data.frame( codelist_code = character(), collected_value = character(), @@ -199,67 +199,67 @@ test_that("assert_cl(): when ct is empty", { stringsAsFactors = FALSE ) - # If `ct` is supplied but `cl` is NULL, then err if `cl` is not optional, or - # return `cl` invisibly. - expect_error(assert_cl( - ct = ct, - cl = NULL, + # If `ct_spec` is supplied but `ct_cltc` is NULL, then err if `ct_cltc` is not optional, or + # return `ct_cltc` invisibly. + expect_error(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = NULL, optional = FALSE )) - expect_no_error(assert_cl( - ct = ct, - cl = NULL, + expect_no_error(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = NULL, optional = TRUE )) - expect_null(assert_cl( - ct = ct, - cl = NULL, + expect_null(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = NULL, optional = TRUE )) - # If both `ct` and `cl` are supplied, then `ct` must be a valid controlled - # terminology data set and `cl` must contain a code-list code available among - # the possibilities in column `codelist_code` (as returned by `ct_vars("cl")`). - expect_error(assert_cl( - ct = ct, - cl = "foo", + # If both `ct_spec` and `ct_cltc` are supplied, then `ct_spec` must be a valid controlled + # terminology data set and `ct_cltc` must contain a code-list code available among + # the possibilities in column `codelist_code` (as returned by `ct_spec_vars("ct_cltc")`). + expect_error(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = "foo", optional = FALSE )) - expect_error(assert_cl( - ct = ct, - cl = "", + expect_error(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = "", optional = FALSE )) - expect_error(assert_cl( - ct = ct, - cl = NA_character_, + expect_error(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = NA_character_, optional = FALSE )) - expect_error(assert_cl( - ct = ct, - cl = NA_character_, + expect_error(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = NA_character_, optional = TRUE )) - expect_error(assert_cl( - ct = ct, - cl = "C71113", + expect_error(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = "C71113", optional = FALSE )) - expect_error(assert_cl( - ct = ct, - cl = "C71113", + expect_error(assert_ct_cltc( + ct_spec = ct_spec, + ct_cltc = "C71113", optional = TRUE )) }) test_that("ct_mappings(): works as expected", { - ct <- read_ct_example("ct-01-cm") - ct_qd <- dplyr::filter(ct, term_code == "C25473") + ct_spec <- read_ct_spec_example("ct-01-cm") + ct_spec_qd <- dplyr::filter(ct_spec, term_code == "C25473") expect_identical( - ct_mappings(ct = ct_qd), + ct_mappings(ct_spec = ct_spec_qd), tibble::tibble( from = c("QD", "QD (Every Day)", "/day", "Daily", "Per Day"), to = rep("QD", 5L) From a7182070809f966831f36ba2838add10982b6e71 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 4 Apr 2024 01:34:33 +0100 Subject: [PATCH 61/80] Finish pending renaming of variables --- NEWS.md | 4 ++-- _pkgdown.yml | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index b5d3c2f5..297b7d53 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,8 +7,8 @@ * New functions for basic SDTM derivations: ` assign_no_ct()`, `assign_ct()`, `hardcode_no_ct()` and `hardcode_ct()`. -* New functions for handling controlled terminologies: `read_ct()`, -`read_ct_example()`, `ct_example()` and `ct_map()`. +* New functions for handling controlled terminologies: `read_ct_spec()`, +`read_ct_spec_example()`, `ct_spec_example()` and `ct_map()`. # sdtm.oak 0.0.0.9001 (development version) diff --git a/_pkgdown.yml b/_pkgdown.yml index e4ae3b89..2f306131 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -17,9 +17,9 @@ reference: - title: Controlled terminology contents: - - read_ct - - read_ct_example - - ct_example + - read_ct_spec + - read_ct_spec_example + - ct_spec_example - ct_map - title: Date and time conversion From 8cc8dcb9815a41d85b154974be2c193eb511157c Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 4 Apr 2024 02:12:52 +0100 Subject: [PATCH 62/80] Rename code-list to codelist --- R/assign.R | 4 ++-- R/ct.R | 18 +++++++++--------- R/hardcode.R | 4 ++-- man/assert_ct_cltc.Rd | 8 ++++---- man/assign.Rd | 2 +- man/ct_map.Rd | 8 ++++---- man/ct_spec_vars.Rd | 2 +- man/harcode.Rd | 2 +- man/sdtm_assign.Rd | 2 +- man/sdtm_hardcode.Rd | 2 +- tests/testthat/test-ct.R | 8 ++++---- 11 files changed, 30 insertions(+), 30 deletions(-) diff --git a/R/assign.R b/R/assign.R index 6ff07397..25745359 100644 --- a/R/assign.R +++ b/R/assign.R @@ -14,7 +14,7 @@ #' @param ct_spec Study controlled terminology specification: a dataframe with a #' minimal set of columns, see [ct_spec_vars()] for details. This parameter is #' optional, if left as `NULL` no controlled terminology recoding is applied. -#' @param ct_cltc A code-list code indicating which subset of the controlled +#' @param ct_cltc A codelist code indicating which subset of the controlled #' terminology to apply in the derivation. This parameter is optional, if left #' as `NULL`, all possible recodings in `ct_spec` are attempted. #' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by @@ -95,7 +95,7 @@ sdtm_assign <- function(raw_dat, #' of variable to be derived. #' @param ct_spec Study controlled terminology specification: a dataframe with a #' minimal set of columns, see [ct_spec_vars()] for details. -#' @param ct_cltc A code-list code indicating which subset of the controlled +#' @param ct_cltc A codelist code indicating which subset of the controlled #' terminology to apply in the derivation. #' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by #' the variables indicated in `id_vars`. This parameter is optional, see diff --git a/R/ct.R b/R/ct.R index 544e93cc..246043a2 100644 --- a/R/ct.R +++ b/R/ct.R @@ -8,7 +8,7 @@ #' If only the subset of variables used for matching terms are needed, then #' request this subset of variables by passing the argument value `"from"`. If #' only the mapping-to variable is to be requested, then simply pass `"to"`. If -#' only the code-list code variable name is needed then pass `"ct_cltc"`. +#' only the codelist code variable name is needed then pass `"ct_cltc"`. #' #' @param set A scalar character (string), one of: `"all"` (default), `"ct_cltc"`, #' `"from"` or `"to"`. @@ -113,18 +113,18 @@ assert_ct_spec <- function(ct_spec, optional = FALSE) { invisible(ct_spec) } -#' Assert a code-list code +#' Assert a codelist code #' -#' [assert_ct_cltc()] asserts the validity of a code-list code in the context of +#' [assert_ct_cltc()] asserts the validity of a codelist code in the context of #' a controlled terminology specification. #' #' @param ct_spec Either a data frame encoding a controlled terminology data set, or #' `NULL`. -#' @param ct_cltc A string with a to-be asserted code-list code, or `NULL`. +#' @param ct_cltc A string with a to-be asserted codelist code, or `NULL`. #' @param optional A scalar logical, indicating whether `ct_cltc` can be `NULL` or #' not. #' -#' @returns The function throws an error if `ct_cltc` is not a valid code-list code +#' @returns The function throws an error if `ct_cltc` is not a valid codelist code #' given the controlled terminology data set; otherwise, `ct_cltc` is returned #' invisibly. #' @@ -256,8 +256,8 @@ ct_mappings <- function(ct_spec, from = ct_spec_vars("from"), to = ct_spec_vars( #' @param ct_spec A [tibble][tibble::tibble-package] providing a controlled #' terminology specification. #' @param ct_cltc A character vector indicating a set of possible controlled -#' terminology code-lists codes to be used for recoding. By default (`NULL`) -#' all code-lists available in `ct_spec` are used. +#' terminology codelists codes to be used for recoding. By default (`NULL`) +#' all codelists available in `ct_spec` are used. #' @param from A character vector of column names indicating the variables #' containing values to be matched against for terminology recoding. #' @param to A single string indicating the column whose values are to be @@ -285,9 +285,9 @@ ct_mappings <- function(ct_spec, from = ct_spec_vars("from"), to = ct_spec_vars( #' # Use all possible matching terms in the controlled terminology. #' ct_map(x = terms, ct_spec = ct_spec) #' -#' # Note that if the controlled terminology mapping is restricted to a code-list +#' # Note that if the controlled terminology mapping is restricted to a codelist #' # code, e.g. C71113, then only `"/day"` gets mapped to `"QD"`; remaining terms -#' # won't match given the code-list code restriction, and will be mapped to an +#' # won't match given the codelist code restriction, and will be mapped to an #' # uppercase version of the original terms. #' ct_map(x = terms, ct_spec = ct_spec, ct_cltc = "C71113") #' diff --git a/R/hardcode.R b/R/hardcode.R index a86e5df7..f05d2973 100644 --- a/R/hardcode.R +++ b/R/hardcode.R @@ -16,7 +16,7 @@ #' @param ct_spec Study controlled terminology specification: a dataframe with a #' minimal set of columns, see [ct_spec_vars()] for details. This parameter is #' optional, if left as `NULL` no controlled terminology recoding is applied. -#' @param ct_cltc A code-list code indicating which subset of the controlled +#' @param ct_cltc A codelist code indicating which subset of the controlled #' terminology to apply in the derivation. This parameter is optional, if left #' as `NULL`, all possible recodings in `ct_spec` are attempted. #' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by @@ -102,7 +102,7 @@ sdtm_hardcode <- function(raw_dat, #' @param ct_spec Study controlled terminology specification: a dataframe with a #' minimal set of columns, see [ct_spec_vars()] for details. This parameter is #' optional, if left as `NULL` no controlled terminology recoding is applied. -#' @param ct_cltc A code-list code indicating which subset of the controlled +#' @param ct_cltc A codelist code indicating which subset of the controlled #' terminology to apply in the derivation. This parameter is optional, if left #' as `NULL`, all possible recodings in `ct_spec` are attempted. #' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by diff --git a/man/assert_ct_cltc.Rd b/man/assert_ct_cltc.Rd index 774b7e86..1a612ea6 100644 --- a/man/assert_ct_cltc.Rd +++ b/man/assert_ct_cltc.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ct.R \name{assert_ct_cltc} \alias{assert_ct_cltc} -\title{Assert a code-list code} +\title{Assert a codelist code} \usage{ assert_ct_cltc(ct_spec, ct_cltc, optional = FALSE) } @@ -10,18 +10,18 @@ assert_ct_cltc(ct_spec, ct_cltc, optional = FALSE) \item{ct_spec}{Either a data frame encoding a controlled terminology data set, or \code{NULL}.} -\item{ct_cltc}{A string with a to-be asserted code-list code, or \code{NULL}.} +\item{ct_cltc}{A string with a to-be asserted codelist code, or \code{NULL}.} \item{optional}{A scalar logical, indicating whether \code{ct_cltc} can be \code{NULL} or not.} } \value{ -The function throws an error if \code{ct_cltc} is not a valid code-list code +The function throws an error if \code{ct_cltc} is not a valid codelist code given the controlled terminology data set; otherwise, \code{ct_cltc} is returned invisibly. } \description{ -\code{\link[=assert_ct_cltc]{assert_ct_cltc()}} asserts the validity of a code-list code in the context of +\code{\link[=assert_ct_cltc]{assert_ct_cltc()}} asserts the validity of a codelist code in the context of a controlled terminology specification. } \examples{ diff --git a/man/assign.Rd b/man/assign.Rd index 34828a1c..f74a9b6d 100644 --- a/man/assign.Rd +++ b/man/assign.Rd @@ -44,7 +44,7 @@ section Value for how the output changes depending on this argument value.} \item{ct_spec}{Study controlled terminology specification: a dataframe with a minimal set of columns, see \code{\link[=ct_spec_vars]{ct_spec_vars()}} for details.} -\item{ct_cltc}{A code-list code indicating which subset of the controlled +\item{ct_cltc}{A codelist code indicating which subset of the controlled terminology to apply in the derivation.} } \value{ diff --git a/man/ct_map.Rd b/man/ct_map.Rd index a2a74553..6da53618 100644 --- a/man/ct_map.Rd +++ b/man/ct_map.Rd @@ -20,8 +20,8 @@ terminology.} terminology specification.} \item{ct_cltc}{A character vector indicating a set of possible controlled -terminology code-lists codes to be used for recoding. By default (\code{NULL}) -all code-lists available in \code{ct_spec} are used.} +terminology codelists codes to be used for recoding. By default (\code{NULL}) +all codelists available in \code{ct_spec} are used.} \item{from}{A character vector of column names indicating the variables containing values to be matched against for terminology recoding.} @@ -55,9 +55,9 @@ terms <- # Use all possible matching terms in the controlled terminology. ct_map(x = terms, ct_spec = ct_spec) -# Note that if the controlled terminology mapping is restricted to a code-list +# Note that if the controlled terminology mapping is restricted to a codelist # code, e.g. C71113, then only `"/day"` gets mapped to `"QD"`; remaining terms -# won't match given the code-list code restriction, and will be mapped to an +# won't match given the codelist code restriction, and will be mapped to an # uppercase version of the original terms. ct_map(x = terms, ct_spec = ct_spec, ct_cltc = "C71113") diff --git a/man/ct_spec_vars.Rd b/man/ct_spec_vars.Rd index f4af9dbe..929030bb 100644 --- a/man/ct_spec_vars.Rd +++ b/man/ct_spec_vars.Rd @@ -18,7 +18,7 @@ variables. If only the subset of variables used for matching terms are needed, then request this subset of variables by passing the argument value \code{"from"}. If only the mapping-to variable is to be requested, then simply pass \code{"to"}. If -only the code-list code variable name is needed then pass \code{"ct_cltc"}. +only the codelist code variable name is needed then pass \code{"ct_cltc"}. } \examples{ # These two calls are equivalent and return all required variables in a diff --git a/man/harcode.Rd b/man/harcode.Rd index 6201b950..327a4e67 100644 --- a/man/harcode.Rd +++ b/man/harcode.Rd @@ -50,7 +50,7 @@ section Value for how the output changes depending on this argument value.} minimal set of columns, see \code{\link[=ct_spec_vars]{ct_spec_vars()}} for details. This parameter is optional, if left as \code{NULL} no controlled terminology recoding is applied.} -\item{ct_cltc}{A code-list code indicating which subset of the controlled +\item{ct_cltc}{A codelist code indicating which subset of the controlled terminology to apply in the derivation. This parameter is optional, if left as \code{NULL}, all possible recodings in \code{ct_spec} are attempted.} } diff --git a/man/sdtm_assign.Rd b/man/sdtm_assign.Rd index d49364d0..47b6db1d 100644 --- a/man/sdtm_assign.Rd +++ b/man/sdtm_assign.Rd @@ -28,7 +28,7 @@ of variable to be derived.} minimal set of columns, see \code{\link[=ct_spec_vars]{ct_spec_vars()}} for details. This parameter is optional, if left as \code{NULL} no controlled terminology recoding is applied.} -\item{ct_cltc}{A code-list code indicating which subset of the controlled +\item{ct_cltc}{A codelist code indicating which subset of the controlled terminology to apply in the derivation. This parameter is optional, if left as \code{NULL}, all possible recodings in \code{ct_spec} are attempted.} diff --git a/man/sdtm_hardcode.Rd b/man/sdtm_hardcode.Rd index 89722bf1..59029e01 100644 --- a/man/sdtm_hardcode.Rd +++ b/man/sdtm_hardcode.Rd @@ -32,7 +32,7 @@ indicated in \code{tgt_var}.} minimal set of columns, see \code{\link[=ct_spec_vars]{ct_spec_vars()}} for details. This parameter is optional, if left as \code{NULL} no controlled terminology recoding is applied.} -\item{ct_cltc}{A code-list code indicating which subset of the controlled +\item{ct_cltc}{A codelist code indicating which subset of the controlled terminology to apply in the derivation. This parameter is optional, if left as \code{NULL}, all possible recodings in \code{ct_spec} are attempted.} diff --git a/tests/testthat/test-ct.R b/tests/testthat/test-ct.R index f2a4ce8d..07f28ea4 100644 --- a/tests/testthat/test-ct.R +++ b/tests/testthat/test-ct.R @@ -55,12 +55,12 @@ test_that("assert_ct_spec() works as expected", { expect_identical(assert_ct_spec(ct_spec, optional = TRUE), ct_spec) expect_null(assert_ct_spec(NULL, optional = TRUE)) - # Code-list code column is one of the key variables that must be present + # Codelist code column is one of the key variables that must be present # in `ct_spec`, so being missing should trigger an error. expect_error(assert_ct_spec(ct_spec[setdiff(cols, ct_cltc_col)], optional = FALSE)) expect_error(assert_ct_spec(ct_spec[setdiff(cols, ct_cltc_col)], optional = TRUE)) - # The code-list code and the "to" columns of a controlled terminology should + # The codelist code and the "to" columns of a controlled terminology should # not contain NAs, as otherwise the mapping is undefined. If that happens # an error is triggered. ct_spec01 <- ct_spec @@ -143,7 +143,7 @@ test_that("assert_ct_cltc() works as expected", { )) # If both `ct_spec` and `ct_cltc` are supplied, then `ct_spec` must be a valid controlled - # terminology data set and `ct_cltc` must contain a code-list code available among + # terminology data set and `ct_cltc` must contain a codelist code available among # the possibilities in column `codelist_code` (as returned by `ct_spec_vars("ct_cltc")`). expect_error(assert_ct_cltc( ct_spec = ct_spec, @@ -218,7 +218,7 @@ test_that("assert_ct_cltc(): when ct_spec is empty", { )) # If both `ct_spec` and `ct_cltc` are supplied, then `ct_spec` must be a valid controlled - # terminology data set and `ct_cltc` must contain a code-list code available among + # terminology data set and `ct_cltc` must contain a codelist code available among # the possibilities in column `codelist_code` (as returned by `ct_spec_vars("ct_cltc")`). expect_error(assert_ct_cltc( ct_spec = ct_spec, From 609b60e28e41597e04a2412b1c73ddd2d4fcfdd6 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 4 Apr 2024 02:16:55 +0100 Subject: [PATCH 63/80] Fix style --- R/ct.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/ct.R b/R/ct.R index 246043a2..310b7c54 100644 --- a/R/ct.R +++ b/R/ct.R @@ -145,7 +145,6 @@ assert_ct_spec <- function(ct_spec, optional = FALSE) { #' #' @keywords internal assert_ct_cltc <- function(ct_spec, ct_cltc, optional = FALSE) { - is_ct_spec_missing <- is.null(ct_spec) is_ct_cltc_missing <- is.null(ct_cltc) is_required_ct_cltc_missing <- is_ct_cltc_missing && !optional @@ -271,13 +270,15 @@ ct_mappings <- function(ct_spec, from = ct_spec_vars("from"), to = ct_spec_vars( #' @examples #' # A few example terms. #' terms <- -#' c("/day", +#' c( +#' "/day", #' "Yes", #' "Unknown", #' "Prior", #' "Every 2 hours", #' "Percentage", -#' "International Unit") +#' "International Unit" +#' ) #' #' # Load a controlled terminology example #' (ct_spec <- read_ct_spec_example("ct-01-cm")) From e8beefc739287a245a0fd3984b7aa8093a5c7a63 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 4 Apr 2024 02:17:48 +0100 Subject: [PATCH 64/80] Fix style --- man/ct_map.Rd | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/man/ct_map.Rd b/man/ct_map.Rd index 6da53618..7763c7e3 100644 --- a/man/ct_map.Rd +++ b/man/ct_map.Rd @@ -41,13 +41,15 @@ returned unchanged. \examples{ # A few example terms. terms <- - c("/day", + c( + "/day", "Yes", "Unknown", "Prior", "Every 2 hours", "Percentage", - "International Unit") + "International Unit" + ) # Load a controlled terminology example (ct_spec <- read_ct_spec_example("ct-01-cm")) From b854a10feacf267cb73b4d2736c74c8924d8408e Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 4 Apr 2024 17:43:37 +0100 Subject: [PATCH 65/80] Add assertions to `assign_datetime()` --- R/assign_datetime.R | 30 ++++++++++++++++++++---------- _pkgdown.yml | 1 + 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/R/assign_datetime.R b/R/assign_datetime.R index 2abc82d0..7c2b66f6 100644 --- a/R/assign_datetime.R +++ b/R/assign_datetime.R @@ -47,11 +47,11 @@ #' 7L, "MD1", 377, "20-UNK-2019", "20-UNK-2019", NA, #' 8L, "MD1", 378, "UN-UNK-2020", "UN-UNK-2020", NA, #' 9L, "MD1", 378, "26-Jan-20", "26-Jan-20", "07:00:00", -#' 10L, "MD1", 378, "28-Jan-20", "1-Feb-20", NA, -#' 11L, "MD1", 378, "12-Feb-20", "18-Feb-20", NA, -#' 12L, "MD1", 379, "10-UNK-2020", "20-UNK-2020", NA, -#' 13L, "MD1", 379, NA, NA, NA, -#' 14L, "MD1", 379, NA, "17-Feb-20", NA +#' 10L, "MD1", 378, "28-Jan-20", "1-Feb-20", NA, +#' 11L, "MD1", 378, "12-Feb-20", "18-Feb-20", NA, +#' 12L, "MD1", 379, "10-UNK-2020", "20-UNK-2020", NA, +#' 13L, "MD1", 379, NA, NA, NA, +#' 14L, "MD1", 379, NA, "17-Feb-20", NA #' ) #' #' cm <- @@ -76,17 +76,28 @@ assign_datetime <- tgt_dat = NULL, id_vars = oak_id_vars(), .warn = TRUE) { + admiraldev::assert_character_vector(raw_var) + admiraldev::assert_character_scalar(tgt_var) + admiraldev::assert_character_vector(id_vars) + assertthat::assert_that(contains_oak_id_vars(id_vars), + msg = "`id_vars` must include the oak id vars." + ) + admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) + admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) + admiraldev::assert_character_vector(raw_unk) + admiraldev::assert_logical_scalar(.warn) tgt_val <- create_iso8601(!!!raw_dat[raw_var], - .format = raw_fmt, - .na = raw_unk, - .warn = .warn) + .format = raw_fmt, + .na = raw_unk, + .warn = .warn + ) der_dat <- raw_dat |> dplyr::select(c(id_vars, raw_var)) |> - dplyr::mutate("{tgt_var}" := tgt_val) |> + dplyr::mutate("{tgt_var}" := tgt_val) |> # nolint object_name_linter() dplyr::select(-rlang::sym(raw_var)) der_dat <- @@ -99,5 +110,4 @@ assign_datetime <- } der_dat - } diff --git a/_pkgdown.yml b/_pkgdown.yml index 2f306131..13e86fbf 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -14,6 +14,7 @@ reference: - assign - harcode - derive_study_day + - assign_datetime - title: Controlled terminology contents: From c737310d542acf77370090941ba9dd44bbe5749e Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 4 Apr 2024 18:09:19 +0100 Subject: [PATCH 66/80] Add merge example to `assign_datetime()` doc --- R/assign_datetime.R | 58 +++++++++++++++++++++++++++++++++-- man/assign_datetime.Rd | 68 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 115 insertions(+), 11 deletions(-) diff --git a/R/assign_datetime.R b/R/assign_datetime.R index 7c2b66f6..6cfbcfeb 100644 --- a/R/assign_datetime.R +++ b/R/assign_datetime.R @@ -54,7 +54,7 @@ #' 14L, "MD1", 379, NA, "17-Feb-20", NA #' ) #' -#' cm <- +#' cm1 <- #' assign_datetime( #' raw_dat = md1, #' raw_var = "MDBDR", @@ -63,8 +63,60 @@ #' tgt_var = "CMSTDTC" #' ) #' -#' cm -#' problems(cm$CMSTDTC) +#' cm1 +#' problems(cm1$CMSTDTC) +#' +#' cm_inter <- +#' tibble::tibble( +#' oak_id = 1L:14L, +#' raw_source = "MD1", +#' patient_number = c(375, 375, 376, 377, 377, 377, 377, 378, +#' 378, 378, 378, 379, 379, 379), +#' CMTRT = c( +#' "BABY ASPIRIN", +#' "CORTISPORIN", +#' "ASPIRIN", +#' "DIPHENHYDRAMINE HCL", +#' "PARCETEMOL", +#' "VOMIKIND", +#' "ZENFLOX OZ", +#' "AMITRYPTYLINE", +#' "BENADRYL", +#' "DIPHENHYDRAMINE HYDROCHLORIDE", +#' "TETRACYCLINE", +#' "BENADRYL", +#' "SOMINEX", +#' "ZQUILL" +#' ), +#' CMINDC = c( +#' "NA", +#' "NAUSEA", +#' "ANEMIA", +#' "NAUSEA", +#' "PYREXIA", +#' "VOMITINGS", +#' "DIARHHEA", +#' "COLD", +#' "FEVER", +#' "LEG PAIN", +#' "FEVER", +#' "COLD", +#' "COLD", +#' "PAIN" +#' ) +#' ) +#' +#' cm2 <- +#' assign_datetime( +#' raw_dat = md1, +#' raw_var = "MDBDR", +#' raw_fmt = "d-m-y", +#' tgt_var = "CMSTDTC", +#' tgt_dat = cm_inter +#' ) +#' +#' cm2 +#' problems(cm2$CMSTDTC) #' #' @export assign_datetime <- diff --git a/man/assign_datetime.Rd b/man/assign_datetime.Rd index b26cf7fd..1a82e5fd 100644 --- a/man/assign_datetime.Rd +++ b/man/assign_datetime.Rd @@ -74,14 +74,14 @@ md1 <- 7L, "MD1", 377, "20-UNK-2019", "20-UNK-2019", NA, 8L, "MD1", 378, "UN-UNK-2020", "UN-UNK-2020", NA, 9L, "MD1", 378, "26-Jan-20", "26-Jan-20", "07:00:00", - 10L, "MD1", 378, "28-Jan-20", "1-Feb-20", NA, - 11L, "MD1", 378, "12-Feb-20", "18-Feb-20", NA, - 12L, "MD1", 379, "10-UNK-2020", "20-UNK-2020", NA, - 13L, "MD1", 379, NA, NA, NA, - 14L, "MD1", 379, NA, "17-Feb-20", NA + 10L, "MD1", 378, "28-Jan-20", "1-Feb-20", NA, + 11L, "MD1", 378, "12-Feb-20", "18-Feb-20", NA, + 12L, "MD1", 379, "10-UNK-2020", "20-UNK-2020", NA, + 13L, "MD1", 379, NA, NA, NA, + 14L, "MD1", 379, NA, "17-Feb-20", NA ) -cm <- +cm1 <- assign_datetime( raw_dat = md1, raw_var = "MDBDR", @@ -90,7 +90,59 @@ cm <- tgt_var = "CMSTDTC" ) -cm -problems(cm$CMSTDTC) +cm1 +problems(cm1$CMSTDTC) + +cm_inter <- + tibble::tibble( + oak_id = 1L:14L, + raw_source = "MD1", + patient_number = c(375, 375, 376, 377, 377, 377, 377, 378, + 378, 378, 378, 379, 379, 379), + CMTRT = c( + "BABY ASPIRIN", + "CORTISPORIN", + "ASPIRIN", + "DIPHENHYDRAMINE HCL", + "PARCETEMOL", + "VOMIKIND", + "ZENFLOX OZ", + "AMITRYPTYLINE", + "BENADRYL", + "DIPHENHYDRAMINE HYDROCHLORIDE", + "TETRACYCLINE", + "BENADRYL", + "SOMINEX", + "ZQUILL" + ), + CMINDC = c( + "NA", + "NAUSEA", + "ANEMIA", + "NAUSEA", + "PYREXIA", + "VOMITINGS", + "DIARHHEA", + "COLD", + "FEVER", + "LEG PAIN", + "FEVER", + "COLD", + "COLD", + "PAIN" + ) + ) + +cm2 <- + assign_datetime( + raw_dat = md1, + raw_var = "MDBDR", + raw_fmt = "d-m-y", + tgt_var = "CMSTDTC", + tgt_dat = cm_inter + ) + +cm2 +problems(cm2$CMSTDTC) } From bbbadd3adace35f933c544712635988b19f59893 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 4 Apr 2024 18:10:39 +0100 Subject: [PATCH 67/80] Style changes --- R/assign_datetime.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/assign_datetime.R b/R/assign_datetime.R index 6cfbcfeb..5d5e6eca 100644 --- a/R/assign_datetime.R +++ b/R/assign_datetime.R @@ -70,8 +70,10 @@ #' tibble::tibble( #' oak_id = 1L:14L, #' raw_source = "MD1", -#' patient_number = c(375, 375, 376, 377, 377, 377, 377, 378, -#' 378, 378, 378, 379, 379, 379), +#' patient_number = c( +#' 375, 375, 376, 377, 377, 377, 377, 378, +#' 378, 378, 378, 379, 379, 379 +#' ), #' CMTRT = c( #' "BABY ASPIRIN", #' "CORTISPORIN", From 54a64603cb586ec6c3a87536f0aa7cf16c0039c9 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 4 Apr 2024 18:11:32 +0100 Subject: [PATCH 68/80] Style changes (.Rd) --- man/assign_datetime.Rd | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/man/assign_datetime.Rd b/man/assign_datetime.Rd index 1a82e5fd..4b2c2f8d 100644 --- a/man/assign_datetime.Rd +++ b/man/assign_datetime.Rd @@ -97,8 +97,10 @@ cm_inter <- tibble::tibble( oak_id = 1L:14L, raw_source = "MD1", - patient_number = c(375, 375, 376, 377, 377, 377, 377, 378, - 378, 378, 378, 379, 379, 379), + patient_number = c( + 375, 375, 376, 377, 377, 377, 377, 378, + 378, 378, 378, 379, 379, 379 + ), CMTRT = c( "BABY ASPIRIN", "CORTISPORIN", From 79e79da870f8bcc2638d28916137cb0e420dc94d Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 4 Apr 2024 18:15:07 +0100 Subject: [PATCH 69/80] Bump version and update news --- DESCRIPTION | 2 +- NEWS.md | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8abff617..721a589a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sdtm.oak Type: Package Title: SDTM Data Transformation Engine -Version: 0.0.0.9002 +Version: 0.0.0.9003 Authors@R: c( person("Rammprasad", "Ganapathy", role = c("aut", "cre"), email = "ganapathy.rammprasad@gene.com"), diff --git a/NEWS.md b/NEWS.md index 297b7d53..8db329a1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# sdtm.oak 0.0.0.9003 (development version) + +## New Features + +* New function: `assign_datetime()` for deriving an ISO8601 date-time variable. + # sdtm.oak 0.0.0.9002 (development version) ## New Features From 42d4d5ab1042cf501589a3da63b765cf9e72aa1f Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 10 Apr 2024 15:39:39 +0100 Subject: [PATCH 70/80] Update `ct_map()` doc example --- R/ct.R | 7 ++++--- man/ct_map.Rd | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/R/ct.R b/R/ct.R index 310b7c54..41cb4354 100644 --- a/R/ct.R +++ b/R/ct.R @@ -287,9 +287,10 @@ ct_mappings <- function(ct_spec, from = ct_spec_vars("from"), to = ct_spec_vars( #' ct_map(x = terms, ct_spec = ct_spec) #' #' # Note that if the controlled terminology mapping is restricted to a codelist -#' # code, e.g. C71113, then only `"/day"` gets mapped to `"QD"`; remaining terms -#' # won't match given the codelist code restriction, and will be mapped to an -#' # uppercase version of the original terms. +#' # code, e.g. C71113, then only `"/day"` and `"Every 2 hours"` get mapped to +#' # `"QD"` and `"Q2H"`, respectively; remaining terms won't match given the +#' # codelist code # restriction, and will be mapped to an uppercase version of +#' # the original terms. #' ct_map(x = terms, ct_spec = ct_spec, ct_cltc = "C71113") #' #' @importFrom rlang %||% .data diff --git a/man/ct_map.Rd b/man/ct_map.Rd index 7763c7e3..a5f3318a 100644 --- a/man/ct_map.Rd +++ b/man/ct_map.Rd @@ -58,9 +58,10 @@ terms <- ct_map(x = terms, ct_spec = ct_spec) # Note that if the controlled terminology mapping is restricted to a codelist -# code, e.g. C71113, then only `"/day"` gets mapped to `"QD"`; remaining terms -# won't match given the codelist code restriction, and will be mapped to an -# uppercase version of the original terms. +# code, e.g. C71113, then only `"/day"` and `"Every 2 hours"` get mapped to +# `"QD"` and `"Q2H"`, respectively; remaining terms won't match given the +# codelist code # restriction, and will be mapped to an uppercase version of +# the original terms. ct_map(x = terms, ct_spec = ct_spec, ct_cltc = "C71113") } From 66644eb453d02b4fd759b31b075e6ed214716a8f Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 10 Apr 2024 15:47:48 +0100 Subject: [PATCH 71/80] Make tibbles more readable in doc examples --- R/hardcode.R | 20 ++++++++++---------- man/harcode.Rd | 20 ++++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/R/hardcode.R b/R/hardcode.R index f05d2973..938f1394 100644 --- a/R/hardcode.R +++ b/R/hardcode.R @@ -124,10 +124,10 @@ sdtm_hardcode <- function(raw_dat, #' md1 <- #' tibble::tribble( #' ~oak_id, ~raw_source, ~patient_number, ~MDRAW, -#' 1L, "MD1", 101L, "BABY ASPIRIN", -#' 2L, "MD1", 102L, "CORTISPORIN", -#' 3L, "MD1", 103L, NA_character_, -#' 4L, "MD1", 104L, "DIPHENHYDRAMINE HCL" +#' 1L, "MD1", 101L, "BABY ASPIRIN", +#' 2L, "MD1", 102L, "CORTISPORIN", +#' 3L, "MD1", 103L, NA_character_, +#' 4L, "MD1", 104L, "DIPHENHYDRAMINE HCL" #' ) #' #' # Derive a new variable `CMCAT` by overwriting `MDRAW` with the @@ -141,12 +141,12 @@ sdtm_hardcode <- function(raw_dat, #' #' cm_inter <- #' tibble::tribble( -#' ~oak_id, ~raw_source, ~patient_number, ~CMTRT, ~CMINDC, -#' 1L, "MD1", 101L, "BABY ASPIRIN", NA, -#' 2L, "MD1", 102L, "CORTISPORIN", "NAUSEA", -#' 3L, "MD1", 103L, "ASPIRIN", "ANEMIA", -#' 4L, "MD1", 104L, "DIPHENHYDRAMINE HCL", "NAUSEA", -#' 5L, "MD1", 105L, "PARACETAMOL", "PYREXIA" +#' ~oak_id, ~raw_source, ~patient_number, ~CMTRT, ~CMINDC, +#' 1L, "MD1", 101L, "BABY ASPIRIN", NA, +#' 2L, "MD1", 102L, "CORTISPORIN", "NAUSEA", +#' 3L, "MD1", 103L, "ASPIRIN", "ANEMIA", +#' 4L, "MD1", 104L, "DIPHENHYDRAMINE HCL", "NAUSEA", +#' 5L, "MD1", 105L, "PARACETAMOL", "PYREXIA" #' ) #' #' # Derive a new variable `CMCAT` by overwriting `MDRAW` with the diff --git a/man/harcode.Rd b/man/harcode.Rd index 327a4e67..cdaefb11 100644 --- a/man/harcode.Rd +++ b/man/harcode.Rd @@ -78,10 +78,10 @@ controlled terminology recoding. md1 <- tibble::tribble( ~oak_id, ~raw_source, ~patient_number, ~MDRAW, - 1L, "MD1", 101L, "BABY ASPIRIN", - 2L, "MD1", 102L, "CORTISPORIN", - 3L, "MD1", 103L, NA_character_, - 4L, "MD1", 104L, "DIPHENHYDRAMINE HCL" + 1L, "MD1", 101L, "BABY ASPIRIN", + 2L, "MD1", 102L, "CORTISPORIN", + 3L, "MD1", 103L, NA_character_, + 4L, "MD1", 104L, "DIPHENHYDRAMINE HCL" ) # Derive a new variable `CMCAT` by overwriting `MDRAW` with the @@ -95,12 +95,12 @@ hardcode_no_ct( cm_inter <- tibble::tribble( - ~oak_id, ~raw_source, ~patient_number, ~CMTRT, ~CMINDC, - 1L, "MD1", 101L, "BABY ASPIRIN", NA, - 2L, "MD1", 102L, "CORTISPORIN", "NAUSEA", - 3L, "MD1", 103L, "ASPIRIN", "ANEMIA", - 4L, "MD1", 104L, "DIPHENHYDRAMINE HCL", "NAUSEA", - 5L, "MD1", 105L, "PARACETAMOL", "PYREXIA" + ~oak_id, ~raw_source, ~patient_number, ~CMTRT, ~CMINDC, + 1L, "MD1", 101L, "BABY ASPIRIN", NA, + 2L, "MD1", 102L, "CORTISPORIN", "NAUSEA", + 3L, "MD1", 103L, "ASPIRIN", "ANEMIA", + 4L, "MD1", 104L, "DIPHENHYDRAMINE HCL", "NAUSEA", + 5L, "MD1", 105L, "PARACETAMOL", "PYREXIA" ) # Derive a new variable `CMCAT` by overwriting `MDRAW` with the From bb2e0d2b68fcc81d356e27134082e28b39cc5068 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 10 Apr 2024 16:00:19 +0100 Subject: [PATCH 72/80] Rename `ct_cltc` to `ct_clst` As per @rammprasad's suggestion. --- R/assign.R | 16 +-- R/ct.R | 90 ++++++------ R/hardcode.R | 18 +-- man/{assert_ct_cltc.Rd => assert_ct_clst.Rd} | 26 ++-- man/assign.Rd | 6 +- man/ct_map.Rd | 6 +- man/ct_spec_vars.Rd | 8 +- man/harcode.Rd | 6 +- man/sdtm_assign.Rd | 4 +- man/sdtm_hardcode.Rd | 4 +- tests/testthat/test-ct.R | 142 +++++++++---------- 11 files changed, 163 insertions(+), 163 deletions(-) rename man/{assert_ct_cltc.Rd => assert_ct_clst.Rd} (53%) diff --git a/R/assign.R b/R/assign.R index 25745359..a91eacaf 100644 --- a/R/assign.R +++ b/R/assign.R @@ -14,7 +14,7 @@ #' @param ct_spec Study controlled terminology specification: a dataframe with a #' minimal set of columns, see [ct_spec_vars()] for details. This parameter is #' optional, if left as `NULL` no controlled terminology recoding is applied. -#' @param ct_cltc A codelist code indicating which subset of the controlled +#' @param ct_clst A codelist code indicating which subset of the controlled #' terminology to apply in the derivation. This parameter is optional, if left #' as `NULL`, all possible recodings in `ct_spec` are attempted. #' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by @@ -39,7 +39,7 @@ sdtm_assign <- function(raw_dat, raw_var, tgt_var, ct_spec = NULL, - ct_cltc = NULL, + ct_clst = NULL, tgt_dat = NULL, id_vars = oak_id_vars()) { admiraldev::assert_character_scalar(raw_var) @@ -51,10 +51,10 @@ sdtm_assign <- function(raw_dat, admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) assert_ct_spec(ct_spec, optional = TRUE) - assert_ct_cltc(ct_spec = ct_spec, ct_cltc = ct_cltc, optional = TRUE) + assert_ct_clst(ct_spec = ct_spec, ct_clst = ct_clst, optional = TRUE) # Recode the raw variable following terminology. - tgt_val <- ct_map(raw_dat[[raw_var]], ct_spec = ct_spec, ct_cltc = ct_cltc) + tgt_val <- ct_map(raw_dat[[raw_var]], ct_spec = ct_spec, ct_clst = ct_clst) # Apply derivation by assigning `raw_var` to `tgt_var`. # `der_dat`: derived dataset. @@ -95,7 +95,7 @@ sdtm_assign <- function(raw_dat, #' of variable to be derived. #' @param ct_spec Study controlled terminology specification: a dataframe with a #' minimal set of columns, see [ct_spec_vars()] for details. -#' @param ct_cltc A codelist code indicating which subset of the controlled +#' @param ct_clst A codelist code indicating which subset of the controlled #' terminology to apply in the derivation. #' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by #' the variables indicated in `id_vars`. This parameter is optional, see @@ -179,7 +179,7 @@ sdtm_assign <- function(raw_dat, #' raw_var = "MDIND", #' tgt_var = "CMINDC", #' ct_spec = ct_spec, -#' ct_cltc = "C66729", +#' ct_clst = "C66729", #' tgt_dat = cm_inter #' ) #' @@ -219,7 +219,7 @@ assign_ct <- function(raw_dat, raw_var, tgt_var, ct_spec, - ct_cltc, + ct_clst, tgt_dat = NULL, id_vars = oak_id_vars()) { admiraldev::assert_character_scalar(raw_var) @@ -238,6 +238,6 @@ assign_ct <- function(raw_dat, tgt_dat = tgt_dat, id_vars = id_vars, ct_spec = ct_spec, - ct_cltc = ct_cltc + ct_clst = ct_clst ) } diff --git a/R/ct.R b/R/ct.R index 41cb4354..32b0596a 100644 --- a/R/ct.R +++ b/R/ct.R @@ -8,9 +8,9 @@ #' If only the subset of variables used for matching terms are needed, then #' request this subset of variables by passing the argument value `"from"`. If #' only the mapping-to variable is to be requested, then simply pass `"to"`. If -#' only the codelist code variable name is needed then pass `"ct_cltc"`. +#' only the codelist code variable name is needed then pass `"ct_clst"`. #' -#' @param set A scalar character (string), one of: `"all"` (default), `"ct_cltc"`, +#' @param set A scalar character (string), one of: `"all"` (default), `"ct_clst"`, #' `"from"` or `"to"`. #' #' @examples @@ -20,7 +20,7 @@ #' sdtm.oak:::ct_spec_vars("all") #' #' # "Codelist code" variable name. -#' sdtm.oak:::ct_spec_vars("ct_cltc") +#' sdtm.oak:::ct_spec_vars("ct_clst") #' #' # "From" variables #' sdtm.oak:::ct_spec_vars("from") @@ -30,20 +30,20 @@ #' #' @keywords internal #' @export -ct_spec_vars <- function(set = c("all", "ct_cltc", "from", "to")) { +ct_spec_vars <- function(set = c("all", "ct_clst", "from", "to")) { admiraldev::assert_character_vector(set) set <- match.arg(set) - ct_cltc_var <- "codelist_code" + ct_clst_var <- "codelist_code" from_vars <- c("collected_value", "term_synonyms") to_var <- "term_value" if (identical(set, "all")) { - return(c(ct_cltc_var, from_vars, to_var)) + return(c(ct_clst_var, from_vars, to_var)) } - if (identical(set, "ct_cltc")) { - return(ct_cltc_var) + if (identical(set, "ct_clst")) { + return(ct_clst_var) } if (identical(set, "from")) { @@ -62,7 +62,7 @@ ct_spec_vars <- function(set = c("all", "ct_cltc", "from", "to")) { #' variables: `r knitr::combine_words(ct_spec_vars())`. #' #' In addition, it will also check if the data frame is not empty (no rows), and -#' whether the columns \code{`r ct_spec_vars('ct_cltc')`} and \code{`r ct_spec_vars('to')`} do +#' whether the columns \code{`r ct_spec_vars('ct_clst')`} and \code{`r ct_spec_vars('to')`} do #' not contain any `NA` values. #' #' @param ct_spec A data frame to be asserted as a valid controlled terminology data @@ -102,8 +102,8 @@ assert_ct_spec <- function(ct_spec, optional = FALSE) { rlang::abort("`ct_spec` can't be empty.") } - if (!is.null(ct_spec) && anyNA(ct_spec[[ct_spec_vars("ct_cltc")]])) { - rlang::abort(glue::glue("`{ct_spec_vars('ct_cltc')}` can't have any NA values.")) + if (!is.null(ct_spec) && anyNA(ct_spec[[ct_spec_vars("ct_clst")]])) { + rlang::abort(glue::glue("`{ct_spec_vars('ct_clst')}` can't have any NA values.")) } if (!is.null(ct_spec) && anyNA(ct_spec[[ct_spec_vars("to")]])) { @@ -115,17 +115,17 @@ assert_ct_spec <- function(ct_spec, optional = FALSE) { #' Assert a codelist code #' -#' [assert_ct_cltc()] asserts the validity of a codelist code in the context of +#' [assert_ct_clst()] asserts the validity of a codelist code in the context of #' a controlled terminology specification. #' #' @param ct_spec Either a data frame encoding a controlled terminology data set, or #' `NULL`. -#' @param ct_cltc A string with a to-be asserted codelist code, or `NULL`. -#' @param optional A scalar logical, indicating whether `ct_cltc` can be `NULL` or +#' @param ct_clst A string with a to-be asserted codelist code, or `NULL`. +#' @param optional A scalar logical, indicating whether `ct_clst` can be `NULL` or #' not. #' -#' @returns The function throws an error if `ct_cltc` is not a valid codelist code -#' given the controlled terminology data set; otherwise, `ct_cltc` is returned +#' @returns The function throws an error if `ct_clst` is not a valid codelist code +#' given the controlled terminology data set; otherwise, `ct_clst` is returned #' invisibly. #' #' @examples @@ -133,54 +133,54 @@ assert_ct_spec <- function(ct_spec, optional = FALSE) { #' (ct_spec <- read_ct_spec_example("ct-01-cm")) #' #' # Should work fine. -#' sdtm.oak:::assert_ct_cltc(ct_spec = ct_spec, ct_cltc = "C71113") +#' sdtm.oak:::assert_ct_clst(ct_spec = ct_spec, ct_clst = "C71113") #' -#' # In certain cases, you might allow `ct_cltc` to be `NULL` as to indicate absence, -#' # in that case, set `optional` to `TRUE` to make `assert_ct_cltc()` more +#' # In certain cases, you might allow `ct_clst` to be `NULL` as to indicate absence, +#' # in that case, set `optional` to `TRUE` to make `assert_ct_clst()` more #' # forgiving. -#' sdtm.oak:::assert_ct_cltc(ct_spec = ct_spec, ct_cltc = NULL, optional = TRUE) +#' sdtm.oak:::assert_ct_clst(ct_spec = ct_spec, ct_clst = NULL, optional = TRUE) #' #' # Otherwise it would err. -#' try(sdtm.oak:::assert_ct_cltc(ct_spec = ct_spec, ct_cltc = NULL, optional = FALSE)) +#' try(sdtm.oak:::assert_ct_clst(ct_spec = ct_spec, ct_clst = NULL, optional = FALSE)) #' #' @keywords internal -assert_ct_cltc <- function(ct_spec, ct_cltc, optional = FALSE) { +assert_ct_clst <- function(ct_spec, ct_clst, optional = FALSE) { is_ct_spec_missing <- is.null(ct_spec) - is_ct_cltc_missing <- is.null(ct_cltc) - is_required_ct_cltc_missing <- is_ct_cltc_missing && !optional - is_ct_cltc_without_ct_spec <- is_ct_spec_missing && !is_ct_cltc_missing - are_ct_spec_ct_cltc_available <- !is_ct_spec_missing && !is_ct_cltc_missing + is_ct_clst_missing <- is.null(ct_clst) + is_required_ct_clst_missing <- is_ct_clst_missing && !optional + is_ct_clst_without_ct_spec <- is_ct_spec_missing && !is_ct_clst_missing + are_ct_spec_ct_clst_available <- !is_ct_spec_missing && !is_ct_clst_missing - if (!is_ct_cltc_missing) { - admiraldev::assert_character_scalar(ct_cltc) + if (!is_ct_clst_missing) { + admiraldev::assert_character_scalar(ct_clst) } - if (is_required_ct_cltc_missing) { - rlang::abort("`ct_cltc` is a required parameter.") + if (is_required_ct_clst_missing) { + rlang::abort("`ct_clst` is a required parameter.") } - if (is_ct_cltc_without_ct_spec) { - rlang::abort("`ct_spec` must be a valid controlled terminology if `ct_cltc` is supplied.") + if (is_ct_clst_without_ct_spec) { + rlang::abort("`ct_spec` must be a valid controlled terminology if `ct_clst` is supplied.") } - if (is_ct_cltc_missing) { + if (is_ct_clst_missing) { return(invisible(NULL)) } - if (!is_ct_spec_missing && is.na(ct_cltc)) { - rlang::abort("`ct_cltc` can't be NA. Did you mean `NULL`?") + if (!is_ct_spec_missing && is.na(ct_clst)) { + rlang::abort("`ct_clst` can't be NA. Did you mean `NULL`?") } - if (are_ct_spec_ct_cltc_available) { + if (are_ct_spec_ct_clst_available) { assert_ct_spec(ct_spec, optional = FALSE) - ct_cltc_possibilities <- + ct_clst_possibilities <- ct_spec |> - dplyr::pull(ct_spec_vars("ct_cltc")) |> + dplyr::pull(ct_spec_vars("ct_clst")) |> unique() - admiraldev::assert_character_scalar(ct_cltc, values = ct_cltc_possibilities) + admiraldev::assert_character_scalar(ct_clst, values = ct_clst_possibilities) } - return(ct_cltc) + return(ct_clst) } #' Controlled terminology mappings @@ -254,7 +254,7 @@ ct_mappings <- function(ct_spec, from = ct_spec_vars("from"), to = ct_spec_vars( #' terminology. #' @param ct_spec A [tibble][tibble::tibble-package] providing a controlled #' terminology specification. -#' @param ct_cltc A character vector indicating a set of possible controlled +#' @param ct_clst A character vector indicating a set of possible controlled #' terminology codelists codes to be used for recoding. By default (`NULL`) #' all codelists available in `ct_spec` are used. #' @param from A character vector of column names indicating the variables @@ -291,21 +291,21 @@ ct_mappings <- function(ct_spec, from = ct_spec_vars("from"), to = ct_spec_vars( #' # `"QD"` and `"Q2H"`, respectively; remaining terms won't match given the #' # codelist code # restriction, and will be mapped to an uppercase version of #' # the original terms. -#' ct_map(x = terms, ct_spec = ct_spec, ct_cltc = "C71113") +#' ct_map(x = terms, ct_spec = ct_spec, ct_clst = "C71113") #' #' @importFrom rlang %||% .data #' @export ct_map <- function(x, ct_spec = NULL, - ct_cltc = NULL, + ct_clst = NULL, from = ct_spec_vars("from"), to = ct_spec_vars("to")) { ct_spec %||% return(x) assert_ct_spec(ct_spec) - ct_cltc <- ct_cltc %||% unique(ct_spec[[ct_spec_vars("ct_cltc")]]) - ct_spec <- dplyr::filter(ct_spec, .data[[ct_spec_vars("ct_cltc")]] %in% ct_cltc) + ct_clst <- ct_clst %||% unique(ct_spec[[ct_spec_vars("ct_clst")]]) + ct_spec <- dplyr::filter(ct_spec, .data[[ct_spec_vars("ct_clst")]] %in% ct_clst) mappings <- ct_mappings(ct_spec, from = from, to = to) recode( diff --git a/R/hardcode.R b/R/hardcode.R index 938f1394..31938689 100644 --- a/R/hardcode.R +++ b/R/hardcode.R @@ -16,7 +16,7 @@ #' @param ct_spec Study controlled terminology specification: a dataframe with a #' minimal set of columns, see [ct_spec_vars()] for details. This parameter is #' optional, if left as `NULL` no controlled terminology recoding is applied. -#' @param ct_cltc A codelist code indicating which subset of the controlled +#' @param ct_clst A codelist code indicating which subset of the controlled #' terminology to apply in the derivation. This parameter is optional, if left #' as `NULL`, all possible recodings in `ct_spec` are attempted. #' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by @@ -41,7 +41,7 @@ sdtm_hardcode <- function(raw_dat, tgt_var, tgt_val, ct_spec = NULL, - ct_cltc = NULL, + ct_clst = NULL, tgt_dat = NULL, id_vars = oak_id_vars()) { admiraldev::assert_character_scalar(raw_var) @@ -54,10 +54,10 @@ sdtm_hardcode <- function(raw_dat, admiraldev::assert_data_frame(raw_dat, required_vars = rlang::syms(c(id_vars, raw_var))) admiraldev::assert_data_frame(tgt_dat, required_vars = rlang::syms(id_vars), optional = TRUE) assert_ct_spec(ct_spec, optional = TRUE) - assert_ct_cltc(ct_spec = ct_spec, ct_cltc = ct_cltc, optional = TRUE) + assert_ct_clst(ct_spec = ct_spec, ct_clst = ct_clst, optional = TRUE) # Recode the hardcoded value following terminology. - tgt_val <- ct_map(tgt_val, ct_spec = ct_spec, ct_cltc = ct_cltc) + tgt_val <- ct_map(tgt_val, ct_spec = ct_spec, ct_clst = ct_clst) # Apply derivation of the hardcoded value. # `der_dat`: derived dataset. @@ -102,7 +102,7 @@ sdtm_hardcode <- function(raw_dat, #' @param ct_spec Study controlled terminology specification: a dataframe with a #' minimal set of columns, see [ct_spec_vars()] for details. This parameter is #' optional, if left as `NULL` no controlled terminology recoding is applied. -#' @param ct_cltc A codelist code indicating which subset of the controlled +#' @param ct_clst A codelist code indicating which subset of the controlled #' terminology to apply in the derivation. This parameter is optional, if left #' as `NULL`, all possible recodings in `ct_spec` are attempted. #' @param tgt_dat Target dataset: a data frame to be merged against `raw_dat` by @@ -172,7 +172,7 @@ sdtm_hardcode <- function(raw_dat, #' tgt_var = "CMCAT", #' tgt_val = "GENERAL CONCOMITANT MEDICATIONS", #' ct_spec = ct_spec, -#' ct_cltc = "C66729", +#' ct_clst = "C66729", #' tgt_dat = cm_inter #' ) #' @@ -217,7 +217,7 @@ hardcode_ct <- tgt_var, tgt_val, ct_spec, - ct_cltc, + ct_clst, tgt_dat = NULL, id_vars = oak_id_vars()) { admiraldev::assert_character_scalar(raw_var) @@ -236,7 +236,7 @@ hardcode_ct <- ) assert_ct_spec(ct_spec, optional = FALSE) - assert_ct_cltc(ct_spec = ct_spec, ct_cltc = ct_cltc, optional = FALSE) + assert_ct_clst(ct_spec = ct_spec, ct_clst = ct_clst, optional = FALSE) sdtm_hardcode( raw_dat = raw_dat, @@ -244,7 +244,7 @@ hardcode_ct <- tgt_var = tgt_var, tgt_val = tgt_val, ct_spec = ct_spec, - ct_cltc = ct_cltc, + ct_clst = ct_clst, tgt_dat = tgt_dat, id_vars = id_vars ) diff --git a/man/assert_ct_cltc.Rd b/man/assert_ct_clst.Rd similarity index 53% rename from man/assert_ct_cltc.Rd rename to man/assert_ct_clst.Rd index 1a612ea6..c0239c97 100644 --- a/man/assert_ct_cltc.Rd +++ b/man/assert_ct_clst.Rd @@ -1,27 +1,27 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ct.R -\name{assert_ct_cltc} -\alias{assert_ct_cltc} +\name{assert_ct_clst} +\alias{assert_ct_clst} \title{Assert a codelist code} \usage{ -assert_ct_cltc(ct_spec, ct_cltc, optional = FALSE) +assert_ct_clst(ct_spec, ct_clst, optional = FALSE) } \arguments{ \item{ct_spec}{Either a data frame encoding a controlled terminology data set, or \code{NULL}.} -\item{ct_cltc}{A string with a to-be asserted codelist code, or \code{NULL}.} +\item{ct_clst}{A string with a to-be asserted codelist code, or \code{NULL}.} -\item{optional}{A scalar logical, indicating whether \code{ct_cltc} can be \code{NULL} or +\item{optional}{A scalar logical, indicating whether \code{ct_clst} can be \code{NULL} or not.} } \value{ -The function throws an error if \code{ct_cltc} is not a valid codelist code -given the controlled terminology data set; otherwise, \code{ct_cltc} is returned +The function throws an error if \code{ct_clst} is not a valid codelist code +given the controlled terminology data set; otherwise, \code{ct_clst} is returned invisibly. } \description{ -\code{\link[=assert_ct_cltc]{assert_ct_cltc()}} asserts the validity of a codelist code in the context of +\code{\link[=assert_ct_clst]{assert_ct_clst()}} asserts the validity of a codelist code in the context of a controlled terminology specification. } \examples{ @@ -29,15 +29,15 @@ a controlled terminology specification. (ct_spec <- read_ct_spec_example("ct-01-cm")) # Should work fine. -sdtm.oak:::assert_ct_cltc(ct_spec = ct_spec, ct_cltc = "C71113") +sdtm.oak:::assert_ct_clst(ct_spec = ct_spec, ct_clst = "C71113") -# In certain cases, you might allow `ct_cltc` to be `NULL` as to indicate absence, -# in that case, set `optional` to `TRUE` to make `assert_ct_cltc()` more +# In certain cases, you might allow `ct_clst` to be `NULL` as to indicate absence, +# in that case, set `optional` to `TRUE` to make `assert_ct_clst()` more # forgiving. -sdtm.oak:::assert_ct_cltc(ct_spec = ct_spec, ct_cltc = NULL, optional = TRUE) +sdtm.oak:::assert_ct_clst(ct_spec = ct_spec, ct_clst = NULL, optional = TRUE) # Otherwise it would err. -try(sdtm.oak:::assert_ct_cltc(ct_spec = ct_spec, ct_cltc = NULL, optional = FALSE)) +try(sdtm.oak:::assert_ct_clst(ct_spec = ct_spec, ct_clst = NULL, optional = FALSE)) } \keyword{internal} diff --git a/man/assign.Rd b/man/assign.Rd index f74a9b6d..ff7df056 100644 --- a/man/assign.Rd +++ b/man/assign.Rd @@ -19,7 +19,7 @@ assign_ct( raw_var, tgt_var, ct_spec, - ct_cltc, + ct_clst, tgt_dat = NULL, id_vars = oak_id_vars() ) @@ -44,7 +44,7 @@ section Value for how the output changes depending on this argument value.} \item{ct_spec}{Study controlled terminology specification: a dataframe with a minimal set of columns, see \code{\link[=ct_spec_vars]{ct_spec_vars()}} for details.} -\item{ct_cltc}{A codelist code indicating which subset of the controlled +\item{ct_clst}{A codelist code indicating which subset of the controlled terminology to apply in the derivation.} } \value{ @@ -134,7 +134,7 @@ assign_ct( raw_var = "MDIND", tgt_var = "CMINDC", ct_spec = ct_spec, - ct_cltc = "C66729", + ct_clst = "C66729", tgt_dat = cm_inter ) diff --git a/man/ct_map.Rd b/man/ct_map.Rd index a5f3318a..0f00c10a 100644 --- a/man/ct_map.Rd +++ b/man/ct_map.Rd @@ -7,7 +7,7 @@ ct_map( x, ct_spec = NULL, - ct_cltc = NULL, + ct_clst = NULL, from = ct_spec_vars("from"), to = ct_spec_vars("to") ) @@ -19,7 +19,7 @@ terminology.} \item{ct_spec}{A \link[tibble:tibble-package]{tibble} providing a controlled terminology specification.} -\item{ct_cltc}{A character vector indicating a set of possible controlled +\item{ct_clst}{A character vector indicating a set of possible controlled terminology codelists codes to be used for recoding. By default (\code{NULL}) all codelists available in \code{ct_spec} are used.} @@ -62,6 +62,6 @@ ct_map(x = terms, ct_spec = ct_spec) # `"QD"` and `"Q2H"`, respectively; remaining terms won't match given the # codelist code # restriction, and will be mapped to an uppercase version of # the original terms. -ct_map(x = terms, ct_spec = ct_spec, ct_cltc = "C71113") +ct_map(x = terms, ct_spec = ct_spec, ct_clst = "C71113") } diff --git a/man/ct_spec_vars.Rd b/man/ct_spec_vars.Rd index 929030bb..fff7e108 100644 --- a/man/ct_spec_vars.Rd +++ b/man/ct_spec_vars.Rd @@ -4,10 +4,10 @@ \alias{ct_spec_vars} \title{Controlled terminology variables} \usage{ -ct_spec_vars(set = c("all", "ct_cltc", "from", "to")) +ct_spec_vars(set = c("all", "ct_clst", "from", "to")) } \arguments{ -\item{set}{A scalar character (string), one of: \code{"all"} (default), \code{"ct_cltc"}, +\item{set}{A scalar character (string), one of: \code{"all"} (default), \code{"ct_clst"}, \code{"from"} or \code{"to"}.} } \description{ @@ -18,7 +18,7 @@ variables. If only the subset of variables used for matching terms are needed, then request this subset of variables by passing the argument value \code{"from"}. If only the mapping-to variable is to be requested, then simply pass \code{"to"}. If -only the codelist code variable name is needed then pass \code{"ct_cltc"}. +only the codelist code variable name is needed then pass \code{"ct_clst"}. } \examples{ # These two calls are equivalent and return all required variables in a @@ -27,7 +27,7 @@ sdtm.oak:::ct_spec_vars() sdtm.oak:::ct_spec_vars("all") # "Codelist code" variable name. -sdtm.oak:::ct_spec_vars("ct_cltc") +sdtm.oak:::ct_spec_vars("ct_clst") # "From" variables sdtm.oak:::ct_spec_vars("from") diff --git a/man/harcode.Rd b/man/harcode.Rd index cdaefb11..e38424a5 100644 --- a/man/harcode.Rd +++ b/man/harcode.Rd @@ -21,7 +21,7 @@ hardcode_ct( tgt_var, tgt_val, ct_spec, - ct_cltc, + ct_clst, tgt_dat = NULL, id_vars = oak_id_vars() ) @@ -50,7 +50,7 @@ section Value for how the output changes depending on this argument value.} minimal set of columns, see \code{\link[=ct_spec_vars]{ct_spec_vars()}} for details. This parameter is optional, if left as \code{NULL} no controlled terminology recoding is applied.} -\item{ct_cltc}{A codelist code indicating which subset of the controlled +\item{ct_clst}{A codelist code indicating which subset of the controlled terminology to apply in the derivation. This parameter is optional, if left as \code{NULL}, all possible recodings in \code{ct_spec} are attempted.} } @@ -126,7 +126,7 @@ hardcode_ct( tgt_var = "CMCAT", tgt_val = "GENERAL CONCOMITANT MEDICATIONS", ct_spec = ct_spec, - ct_cltc = "C66729", + ct_clst = "C66729", tgt_dat = cm_inter ) diff --git a/man/sdtm_assign.Rd b/man/sdtm_assign.Rd index 47b6db1d..676979dc 100644 --- a/man/sdtm_assign.Rd +++ b/man/sdtm_assign.Rd @@ -9,7 +9,7 @@ sdtm_assign( raw_var, tgt_var, ct_spec = NULL, - ct_cltc = NULL, + ct_clst = NULL, tgt_dat = NULL, id_vars = oak_id_vars() ) @@ -28,7 +28,7 @@ of variable to be derived.} minimal set of columns, see \code{\link[=ct_spec_vars]{ct_spec_vars()}} for details. This parameter is optional, if left as \code{NULL} no controlled terminology recoding is applied.} -\item{ct_cltc}{A codelist code indicating which subset of the controlled +\item{ct_clst}{A codelist code indicating which subset of the controlled terminology to apply in the derivation. This parameter is optional, if left as \code{NULL}, all possible recodings in \code{ct_spec} are attempted.} diff --git a/man/sdtm_hardcode.Rd b/man/sdtm_hardcode.Rd index 59029e01..5c3435b5 100644 --- a/man/sdtm_hardcode.Rd +++ b/man/sdtm_hardcode.Rd @@ -10,7 +10,7 @@ sdtm_hardcode( tgt_var, tgt_val, ct_spec = NULL, - ct_cltc = NULL, + ct_clst = NULL, tgt_dat = NULL, id_vars = oak_id_vars() ) @@ -32,7 +32,7 @@ indicated in \code{tgt_var}.} minimal set of columns, see \code{\link[=ct_spec_vars]{ct_spec_vars()}} for details. This parameter is optional, if left as \code{NULL} no controlled terminology recoding is applied.} -\item{ct_cltc}{A codelist code indicating which subset of the controlled +\item{ct_clst}{A codelist code indicating which subset of the controlled terminology to apply in the derivation. This parameter is optional, if left as \code{NULL}, all possible recodings in \code{ct_spec} are attempted.} diff --git a/tests/testthat/test-ct.R b/tests/testthat/test-ct.R index 07f28ea4..e1fb5ca7 100644 --- a/tests/testthat/test-ct.R +++ b/tests/testthat/test-ct.R @@ -20,7 +20,7 @@ test_that("ct_spec_vars() works as expected", { ) expect_identical( - ct_spec_vars(set = "ct_cltc"), + ct_spec_vars(set = "ct_clst"), "codelist_code" ) @@ -46,7 +46,7 @@ test_that("assert_ct_spec() works as expected", { # Load an example controlled terminology spec. ct_spec <- read_ct_spec_example("ct-01-cm") cols <- colnames(ct_spec) - ct_cltc_col <- ct_spec_vars("ct_cltc") + ct_clst_col <- ct_spec_vars("ct_clst") to_col <- ct_spec_vars("to") expect_no_error(assert_ct_spec(ct_spec, optional = FALSE)) @@ -57,14 +57,14 @@ test_that("assert_ct_spec() works as expected", { # Codelist code column is one of the key variables that must be present # in `ct_spec`, so being missing should trigger an error. - expect_error(assert_ct_spec(ct_spec[setdiff(cols, ct_cltc_col)], optional = FALSE)) - expect_error(assert_ct_spec(ct_spec[setdiff(cols, ct_cltc_col)], optional = TRUE)) + expect_error(assert_ct_spec(ct_spec[setdiff(cols, ct_clst_col)], optional = FALSE)) + expect_error(assert_ct_spec(ct_spec[setdiff(cols, ct_clst_col)], optional = TRUE)) # The codelist code and the "to" columns of a controlled terminology should # not contain NAs, as otherwise the mapping is undefined. If that happens # an error is triggered. ct_spec01 <- ct_spec - ct_spec01[[ct_cltc_col]] <- NA_character_ + ct_spec01[[ct_clst_col]] <- NA_character_ expect_error(assert_ct_spec(ct_spec01, optional = FALSE)) expect_error(assert_ct_spec(ct_spec01, optional = TRUE)) @@ -87,109 +87,109 @@ test_that("assert_ct_spec() works as expected", { expect_error(assert_ct_spec(ct_spec_empty, optional = FALSE)) }) -test_that("assert_ct_cltc() works as expected", { +test_that("assert_ct_clst() works as expected", { # Read in a controlled terminology example. ct_spec <- read_ct_spec_example("ct-01-cm") - # If `ct_cltc` is not supplied and is not optional, then it should err. - expect_error(assert_ct_cltc( + # If `ct_clst` is not supplied and is not optional, then it should err. + expect_error(assert_ct_clst( ct_spec = NULL, - ct_cltc = NULL, + ct_clst = NULL, optional = FALSE )) - # If `ct_cltc` is not supplied but it is optional, then all fine. - expect_no_error(assert_ct_cltc( + # If `ct_clst` is not supplied but it is optional, then all fine. + expect_no_error(assert_ct_clst( ct_spec = NULL, - ct_cltc = NULL, + ct_clst = NULL, optional = TRUE )) - # Moreover, in case of no error, `ct_cltc` should be returned invisibly, in this + # Moreover, in case of no error, `ct_clst` should be returned invisibly, in this # case `NULL`. - expect_null(assert_ct_cltc( + expect_null(assert_ct_clst( ct_spec = NULL, - ct_cltc = NULL, + ct_clst = NULL, optional = TRUE )) - # If `ct_cltc` is supplied but `ct_spec` is not, then err. - expect_error(assert_ct_cltc( + # If `ct_clst` is supplied but `ct_spec` is not, then err. + expect_error(assert_ct_clst( ct_spec = NULL, - ct_cltc = "C71113", + ct_clst = "C71113", optional = FALSE )) - expect_error(assert_ct_cltc( + expect_error(assert_ct_clst( ct_spec = NULL, - ct_cltc = "C71113", + ct_clst = "C71113", optional = TRUE )) - # If `ct_spec` is supplied but `ct_cltc` is NULL, then err if `ct_cltc` is not optional, or - # return `ct_cltc` invisibly. - expect_error(assert_ct_cltc( + # If `ct_spec` is supplied but `ct_clst` is NULL, then err if `ct_clst` is not optional, or + # return `ct_clst` invisibly. + expect_error(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = NULL, + ct_clst = NULL, optional = FALSE )) - expect_no_error(assert_ct_cltc( + expect_no_error(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = NULL, + ct_clst = NULL, optional = TRUE )) - expect_null(assert_ct_cltc( + expect_null(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = NULL, + ct_clst = NULL, optional = TRUE )) - # If both `ct_spec` and `ct_cltc` are supplied, then `ct_spec` must be a valid controlled - # terminology data set and `ct_cltc` must contain a codelist code available among - # the possibilities in column `codelist_code` (as returned by `ct_spec_vars("ct_cltc")`). - expect_error(assert_ct_cltc( + # If both `ct_spec` and `ct_clst` are supplied, then `ct_spec` must be a valid controlled + # terminology data set and `ct_clst` must contain a codelist code available among + # the possibilities in column `codelist_code` (as returned by `ct_spec_vars("ct_clst")`). + expect_error(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = "foo", + ct_clst = "foo", optional = FALSE )) - expect_error(assert_ct_cltc( + expect_error(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = "", + ct_clst = "", optional = FALSE )) - expect_error(assert_ct_cltc( + expect_error(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = NA_character_, + ct_clst = NA_character_, optional = FALSE )) - expect_error(assert_ct_cltc( + expect_error(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = NA_character_, + ct_clst = NA_character_, optional = TRUE )) - expect_identical(assert_ct_cltc( + expect_identical(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = "C71113", + ct_clst = "C71113", optional = FALSE ), "C71113") - expect_identical(assert_ct_cltc( + expect_identical(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = "C66726", + ct_clst = "C66726", optional = FALSE ), "C66726") - expect_identical(assert_ct_cltc( + expect_identical(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = "C71113", + ct_clst = "C71113", optional = TRUE ), "C71113") - expect_identical(assert_ct_cltc( + expect_identical(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = "C66726", + ct_clst = "C66726", optional = TRUE ), "C66726") }) -test_that("assert_ct_cltc(): when ct_spec is empty", { +test_that("assert_ct_clst(): when ct_spec is empty", { ct_spec <- data.frame( codelist_code = character(), @@ -199,57 +199,57 @@ test_that("assert_ct_cltc(): when ct_spec is empty", { stringsAsFactors = FALSE ) - # If `ct_spec` is supplied but `ct_cltc` is NULL, then err if `ct_cltc` is not optional, or - # return `ct_cltc` invisibly. - expect_error(assert_ct_cltc( + # If `ct_spec` is supplied but `ct_clst` is NULL, then err if `ct_clst` is not optional, or + # return `ct_clst` invisibly. + expect_error(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = NULL, + ct_clst = NULL, optional = FALSE )) - expect_no_error(assert_ct_cltc( + expect_no_error(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = NULL, + ct_clst = NULL, optional = TRUE )) - expect_null(assert_ct_cltc( + expect_null(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = NULL, + ct_clst = NULL, optional = TRUE )) - # If both `ct_spec` and `ct_cltc` are supplied, then `ct_spec` must be a valid controlled - # terminology data set and `ct_cltc` must contain a codelist code available among - # the possibilities in column `codelist_code` (as returned by `ct_spec_vars("ct_cltc")`). - expect_error(assert_ct_cltc( + # If both `ct_spec` and `ct_clst` are supplied, then `ct_spec` must be a valid controlled + # terminology data set and `ct_clst` must contain a codelist code available among + # the possibilities in column `codelist_code` (as returned by `ct_spec_vars("ct_clst")`). + expect_error(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = "foo", + ct_clst = "foo", optional = FALSE )) - expect_error(assert_ct_cltc( + expect_error(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = "", + ct_clst = "", optional = FALSE )) - expect_error(assert_ct_cltc( + expect_error(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = NA_character_, + ct_clst = NA_character_, optional = FALSE )) - expect_error(assert_ct_cltc( + expect_error(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = NA_character_, + ct_clst = NA_character_, optional = TRUE )) - expect_error(assert_ct_cltc( + expect_error(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = "C71113", + ct_clst = "C71113", optional = FALSE )) - expect_error(assert_ct_cltc( + expect_error(assert_ct_clst( ct_spec = ct_spec, - ct_cltc = "C71113", + ct_clst = "C71113", optional = TRUE )) }) From b17161c0a101e96634501968cbfa6e76a45a44a7 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Sat, 4 May 2024 01:20:19 +0100 Subject: [PATCH 73/80] Fix bug in `assign_datetime` - This bug is related to the support of input is in two different variables (date and time). - A unit test was also added --- R/assign_datetime.R | 2 +- tests/testthat/test-assign_datetime.R | 78 +++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 tests/testthat/test-assign_datetime.R diff --git a/R/assign_datetime.R b/R/assign_datetime.R index 6b4bd4e4..f1415b36 100644 --- a/R/assign_datetime.R +++ b/R/assign_datetime.R @@ -152,7 +152,7 @@ assign_datetime <- raw_dat |> dplyr::select(c(id_vars, raw_var)) |> dplyr::mutate("{tgt_var}" := tgt_val) |> # nolint object_name_linter() - dplyr::select(-rlang::sym(raw_var)) + dplyr::select(-raw_var) der_dat <- if (!is.null(tgt_dat)) { diff --git a/tests/testthat/test-assign_datetime.R b/tests/testthat/test-assign_datetime.R new file mode 100644 index 00000000..c9bf65c0 --- /dev/null +++ b/tests/testthat/test-assign_datetime.R @@ -0,0 +1,78 @@ +test_that("assign_datetime: date and time conversion", { + md1 <- + tibble::tribble( + ~oak_id, ~raw_source, ~patient_number, ~MDBDR, ~MDEDR, ~MDETM, + 1L, "MD1", 375, NA, NA, NA, + 2L, "MD1", 375, "15-Sep-20", NA, NA, + 3L, "MD1", 376, "17-Feb-21", "17-Feb-21", NA, + 4L, "MD1", 377, "4-Oct-20", NA, NA, + 5L, "MD1", 377, "20-Jan-20", "20-Jan-20", "10:00:00", + 6L, "MD1", 377, "UN-UNK-2019", "UN-UNK-2019", NA, + 7L, "MD1", 377, "20-UNK-2019", "20-UNK-2019", NA, + 8L, "MD1", 378, "UN-UNK-2020", "UN-UNK-2020", NA, + 9L, "MD1", 378, "26-Jan-20", "26-Jan-20", "07:00:00", + 10L, "MD1", 378, "28-Jan-20", "1-Feb-20", NA, + 11L, "MD1", 378, "12-Feb-20", "18-Feb-20", NA, + 12L, "MD1", 379, "10-UNK-2020", "20-UNK-2020", NA, + 13L, "MD1", 379, NA, NA, NA, + 14L, "MD1", 379, NA, "17-Feb-20", NA + ) + + warning_msg <- "There were 12 parsing problems\\. Run `problems\\(\\)` on parsed results for details\\." + expect_warning( + rlang::with_interactive( + assign_datetime( + raw_dat = md1, + raw_var = c("MDEDR", "MDETM"), + raw_fmt = c("d-m-y", "H:M:S"), + raw_unk = c("UN", "UNK"), + tgt_var = "CMSTDTC" + ) + ), regexp = warning_msg) + + # If not run interactively then warnings should not be raised. + expect_silent( + cm1 <- assign_datetime( + raw_dat = md1, + raw_var = c("MDEDR", "MDETM"), + raw_fmt = c("d-m-y", "H:M:S"), + raw_unk = c("UN", "UNK"), + tgt_var = "CMSTDTC" + ) + ) + + problems_index <- seq(1L, 14L)[-c(5, 9)] + problems <- tibble::tibble(..i = problems_index, + MDEDR = md1$MDEDR[problems_index], + MDETM = md1$MDETM[problems_index]) + + CMSTDTC <- + structure( + c( + NA, + NA, + "2021-02-17", + NA, + "2020-01-20T10:00:00", + "2019", + "2019---20", + "2020", + "2020-01-26T07:00:00", + "2020-02-01", + "2020-02-18", + "2020---20", + NA, + "2020-02-17" + ), + class = "iso8601", + problems = problems + ) + + expected <- + cm1 |> + dplyr::select("oak_id", "raw_source", "patient_number") |> + dplyr::bind_cols(tibble::tibble(CMSTDTC = CMSTDTC)) + + expect_equal(object = cm1, expected = expected) + +}) From 3dba6d922aa47d88daaf328f9beb0e3912477567 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Sat, 4 May 2024 01:23:27 +0100 Subject: [PATCH 74/80] Linting --- tests/testthat/test-assign_datetime.R | 56 +++++++++++++-------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/tests/testthat/test-assign_datetime.R b/tests/testthat/test-assign_datetime.R index c9bf65c0..3eb117c0 100644 --- a/tests/testthat/test-assign_datetime.R +++ b/tests/testthat/test-assign_datetime.R @@ -2,33 +2,33 @@ test_that("assign_datetime: date and time conversion", { md1 <- tibble::tribble( ~oak_id, ~raw_source, ~patient_number, ~MDBDR, ~MDEDR, ~MDETM, - 1L, "MD1", 375, NA, NA, NA, - 2L, "MD1", 375, "15-Sep-20", NA, NA, - 3L, "MD1", 376, "17-Feb-21", "17-Feb-21", NA, - 4L, "MD1", 377, "4-Oct-20", NA, NA, - 5L, "MD1", 377, "20-Jan-20", "20-Jan-20", "10:00:00", - 6L, "MD1", 377, "UN-UNK-2019", "UN-UNK-2019", NA, - 7L, "MD1", 377, "20-UNK-2019", "20-UNK-2019", NA, - 8L, "MD1", 378, "UN-UNK-2020", "UN-UNK-2020", NA, - 9L, "MD1", 378, "26-Jan-20", "26-Jan-20", "07:00:00", - 10L, "MD1", 378, "28-Jan-20", "1-Feb-20", NA, - 11L, "MD1", 378, "12-Feb-20", "18-Feb-20", NA, - 12L, "MD1", 379, "10-UNK-2020", "20-UNK-2020", NA, - 13L, "MD1", 379, NA, NA, NA, - 14L, "MD1", 379, NA, "17-Feb-20", NA + 1L, "MD1", 375L, NA, NA, NA, + 2L, "MD1", 375L, "15-Sep-20", NA, NA, + 3L, "MD1", 376L, "17-Feb-21", "17-Feb-21", NA, + 4L, "MD1", 377L, "4-Oct-20", NA, NA, + 5L, "MD1", 377L, "20-Jan-20", "20-Jan-20", "10:00:00", + 6L, "MD1", 377L, "UN-UNK-2019", "UN-UNK-2019", NA, + 7L, "MD1", 377L, "20-UNK-2019", "20-UNK-2019", NA, + 8L, "MD1", 378L, "UN-UNK-2020", "UN-UNK-2020", NA, + 9L, "MD1", 378L, "26-Jan-20", "26-Jan-20", "07:00:00", + 10L, "MD1", 378L, "28-Jan-20", "1-Feb-20", NA, + 11L, "MD1", 378L, "12-Feb-20", "18-Feb-20", NA, + 12L, "MD1", 379L, "10-UNK-2020", "20-UNK-2020", NA, + 13L, "MD1", 379L, NA, NA, NA, + 14L, "MD1", 379L, NA, "17-Feb-20", NA ) - warning_msg <- "There were 12 parsing problems\\. Run `problems\\(\\)` on parsed results for details\\." - expect_warning( - rlang::with_interactive( - assign_datetime( - raw_dat = md1, - raw_var = c("MDEDR", "MDETM"), - raw_fmt = c("d-m-y", "H:M:S"), - raw_unk = c("UN", "UNK"), - tgt_var = "CMSTDTC" - ) - ), regexp = warning_msg) + warning_msg <- + "There were 12 parsing problems\\. Run `problems\\(\\)` on parsed results for details\\." + expect_warning(rlang::with_interactive( + assign_datetime( + raw_dat = md1, + raw_var = c("MDEDR", "MDETM"), + raw_fmt = c("d-m-y", "H:M:S"), + raw_unk = c("UN", "UNK"), + tgt_var = "CMSTDTC" + ) + ), regexp = warning_msg) # If not run interactively then warnings should not be raised. expect_silent( @@ -41,12 +41,12 @@ test_that("assign_datetime: date and time conversion", { ) ) - problems_index <- seq(1L, 14L)[-c(5, 9)] + problems_index <- seq(1L, 14L)[-c(5L, 9L)] problems <- tibble::tibble(..i = problems_index, MDEDR = md1$MDEDR[problems_index], MDETM = md1$MDETM[problems_index]) - CMSTDTC <- + cmstdtc <- structure( c( NA, @@ -71,7 +71,7 @@ test_that("assign_datetime: date and time conversion", { expected <- cm1 |> dplyr::select("oak_id", "raw_source", "patient_number") |> - dplyr::bind_cols(tibble::tibble(CMSTDTC = CMSTDTC)) + dplyr::bind_cols(tibble::tibble(CMSTDTC = cmstdtc)) expect_equal(object = cm1, expected = expected) From 04254ac25866aad4c353f3a21a83d6d5a453e9af Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Sat, 4 May 2024 01:42:46 +0100 Subject: [PATCH 75/80] Update styling --- tests/testthat/test-assign_datetime.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-assign_datetime.R b/tests/testthat/test-assign_datetime.R index 3eb117c0..69980b95 100644 --- a/tests/testthat/test-assign_datetime.R +++ b/tests/testthat/test-assign_datetime.R @@ -42,9 +42,11 @@ test_that("assign_datetime: date and time conversion", { ) problems_index <- seq(1L, 14L)[-c(5L, 9L)] - problems <- tibble::tibble(..i = problems_index, - MDEDR = md1$MDEDR[problems_index], - MDETM = md1$MDETM[problems_index]) + problems <- tibble::tibble( + ..i = problems_index, + MDEDR = md1$MDEDR[problems_index], + MDETM = md1$MDETM[problems_index] + ) cmstdtc <- structure( @@ -74,5 +76,4 @@ test_that("assign_datetime: date and time conversion", { dplyr::bind_cols(tibble::tibble(CMSTDTC = cmstdtc)) expect_equal(object = cm1, expected = expected) - }) From 391df0aaff7694d402656b7368e7e708ca9996b3 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 14 May 2024 22:17:17 +0100 Subject: [PATCH 76/80] Add example with date and time to `assign_datetime()` docs --- R/assign_datetime.R | 29 +++++++++++++++++++++++++++++ man/assign_datetime.Rd | 29 +++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/R/assign_datetime.R b/R/assign_datetime.R index f1415b36..a39ce91d 100644 --- a/R/assign_datetime.R +++ b/R/assign_datetime.R @@ -35,6 +35,7 @@ #' derived variable, as indicated in `tgt_var`. #' #' @examples +#' # `md1`: an example raw data set. #' md1 <- #' tibble::tribble( #' ~oak_id, ~raw_source, ~patient_number, ~MDBDR, ~MDEDR, ~MDETM, @@ -54,6 +55,10 @@ #' 14L, "MD1", 379, NA, "17-Feb-20", NA #' ) #' +#' # Using the raw data set `md1`, derive the variable CMSTDTC from MDBDR using +#' # the parsing format (`raw_fmt`) `"d-m-y"` (day-month-year), while allowing +#' # for the presence of special date component values (e.g. `"UN"` or `"UNK"`), +#' # indicating that these values are missing/unknown (unk). #' cm1 <- #' assign_datetime( #' raw_dat = md1, @@ -64,8 +69,11 @@ #' ) #' #' cm1 +#' +#' # Inspect parsing failures associated with derivation of CMSTDTC. #' problems(cm1$CMSTDTC) #' +#' # `cm_inter`: an example target data set. #' cm_inter <- #' tibble::tibble( #' oak_id = 1L:14L, @@ -108,6 +116,8 @@ #' ) #' ) #' +#' # Same derivation as above but now involving the merging with the target +#' # data set `cm_inter`. #' cm2 <- #' assign_datetime( #' raw_dat = md1, @@ -118,8 +128,27 @@ #' ) #' #' cm2 +#' +#' # Inspect parsing failures associated with derivation of CMSTDTC. #' problems(cm2$CMSTDTC) #' +#' # Derive CMSTDTC using both MDEDR and MDETM variables. +#' # Note that the format `"d-m-y"` is used for parsing MDEDR and `"H:M:S"` for +#' # MDETM (correspondence is by positional matching). +#' cm3 <- +#' assign_datetime( +#' raw_dat = md1, +#' raw_var = c("MDEDR", "MDETM"), +#' raw_fmt = c("d-m-y", "H:M:S"), +#' raw_unk = c("UN", "UNK"), +#' tgt_var = "CMSTDTC" +#' ) +#' +#' cm3 +#' +#' # Inspect parsing failures associated with derivation of CMSTDTC. +#' problems(cm3$CMSTDTC) +#' #' @export assign_datetime <- function(raw_dat, diff --git a/man/assign_datetime.Rd b/man/assign_datetime.Rd index fcd20169..f85ad265 100644 --- a/man/assign_datetime.Rd +++ b/man/assign_datetime.Rd @@ -62,6 +62,7 @@ derived variable, as indicated in \code{tgt_var}. raw dataset to a target SDTM variable following the ISO8601 format. } \examples{ +# `md1`: an example raw data set. md1 <- tibble::tribble( ~oak_id, ~raw_source, ~patient_number, ~MDBDR, ~MDEDR, ~MDETM, @@ -81,6 +82,10 @@ md1 <- 14L, "MD1", 379, NA, "17-Feb-20", NA ) +# Using the raw data set `md1`, derive the variable CMSTDTC from MDBDR using +# the parsing format (`raw_fmt`) `"d-m-y"` (day-month-year), while allowing +# for the presence of special date component values (e.g. `"UN"` or `"UNK"`), +# indicating that these values are missing/unknown (unk). cm1 <- assign_datetime( raw_dat = md1, @@ -91,8 +96,11 @@ cm1 <- ) cm1 + +# Inspect parsing failures associated with derivation of CMSTDTC. problems(cm1$CMSTDTC) +# `cm_inter`: an example target data set. cm_inter <- tibble::tibble( oak_id = 1L:14L, @@ -135,6 +143,8 @@ cm_inter <- ) ) +# Same derivation as above but now involving the merging with the target +# data set `cm_inter`. cm2 <- assign_datetime( raw_dat = md1, @@ -145,6 +155,25 @@ cm2 <- ) cm2 + +# Inspect parsing failures associated with derivation of CMSTDTC. problems(cm2$CMSTDTC) +# Derive CMSTDTC using both MDEDR and MDETM variables. +# Note that the format `"d-m-y"` is used for parsing MDEDR and `"H:M:S"` for +# MDETM (correspondence is by positional matching). +cm3 <- + assign_datetime( + raw_dat = md1, + raw_var = c("MDEDR", "MDETM"), + raw_fmt = c("d-m-y", "H:M:S"), + raw_unk = c("UN", "UNK"), + tgt_var = "CMSTDTC" + ) + +cm3 + +# Inspect parsing failures associated with derivation of CMSTDTC. +problems(cm3$CMSTDTC) + } From 372f1479306cc7814238605d8f57b3a1a4ccaf9e Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 14 May 2024 22:22:23 +0100 Subject: [PATCH 77/80] =?UTF-8?q?Avoid=20backslash=20hell=20(=D5=B4=D5=A5?= =?UTF-8?q?=D6=80=D5=BD=D5=AB)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Credit goes to @edgar-manukya for the expression --- tests/testthat/test-assign_datetime.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-assign_datetime.R b/tests/testthat/test-assign_datetime.R index 69980b95..5cb6e42c 100644 --- a/tests/testthat/test-assign_datetime.R +++ b/tests/testthat/test-assign_datetime.R @@ -19,7 +19,7 @@ test_that("assign_datetime: date and time conversion", { ) warning_msg <- - "There were 12 parsing problems\\. Run `problems\\(\\)` on parsed results for details\\." + r"{There were 12 parsing problems\. Run `problems\(\)` on parsed results for details\.}" expect_warning(rlang::with_interactive( assign_datetime( raw_dat = md1, From 6801222a519e7f000ae29b5d59e9758f8f38c95d Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 14 May 2024 22:40:32 +0100 Subject: [PATCH 78/80] Update `ct_spec_vars()` docs' examples `ct_spec_vars()` used to be an internal function but not anymore: so no need for `:::`. --- R/ct.R | 10 +++++----- man/ct_spec_vars.Rd | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R/ct.R b/R/ct.R index 32b0596a..68be7e90 100644 --- a/R/ct.R +++ b/R/ct.R @@ -16,17 +16,17 @@ #' @examples #' # These two calls are equivalent and return all required variables in a #' # controlled terminology data set. -#' sdtm.oak:::ct_spec_vars() -#' sdtm.oak:::ct_spec_vars("all") +#' ct_spec_vars() +#' ct_spec_vars("all") #' #' # "Codelist code" variable name. -#' sdtm.oak:::ct_spec_vars("ct_clst") +#' ct_spec_vars("ct_clst") #' #' # "From" variables -#' sdtm.oak:::ct_spec_vars("from") +#' ct_spec_vars("from") #' #' # The "to" variable. -#' sdtm.oak:::ct_spec_vars("to") +#' ct_spec_vars("to") #' #' @keywords internal #' @export diff --git a/man/ct_spec_vars.Rd b/man/ct_spec_vars.Rd index fff7e108..f21f006c 100644 --- a/man/ct_spec_vars.Rd +++ b/man/ct_spec_vars.Rd @@ -23,17 +23,17 @@ only the codelist code variable name is needed then pass \code{"ct_clst"}. \examples{ # These two calls are equivalent and return all required variables in a # controlled terminology data set. -sdtm.oak:::ct_spec_vars() -sdtm.oak:::ct_spec_vars("all") +ct_spec_vars() +ct_spec_vars("all") # "Codelist code" variable name. -sdtm.oak:::ct_spec_vars("ct_clst") +ct_spec_vars("ct_clst") # "From" variables -sdtm.oak:::ct_spec_vars("from") +ct_spec_vars("from") # The "to" variable. -sdtm.oak:::ct_spec_vars("to") +ct_spec_vars("to") } \keyword{internal} From 994012bb67ecbac322479b350b66376dec259912 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 14 May 2024 23:53:54 +0100 Subject: [PATCH 79/80] Fix typo in `assign_datetime()` documentation --- R/assign_datetime.R | 2 +- man/assign_datetime.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/assign_datetime.R b/R/assign_datetime.R index a39ce91d..4622f579 100644 --- a/R/assign_datetime.R +++ b/R/assign_datetime.R @@ -22,7 +22,7 @@ #' the variables indicated in `id_vars`. This parameter is optional, see #' section Value for how the output changes depending on this argument value. #' @param id_vars Key variables to be used in the join between the raw dataset -#' (`raw_dat`) and the target data set (`raw_dat`). +#' (`raw_dat`) and the target data set (`tgt_dat`). #' @param .warn Whether to warn about parsing failures. #' #' @returns The returned data set depends on the value of `tgt_dat`: diff --git a/man/assign_datetime.Rd b/man/assign_datetime.Rd index f85ad265..9130e775 100644 --- a/man/assign_datetime.Rd +++ b/man/assign_datetime.Rd @@ -41,7 +41,7 @@ the variables indicated in \code{id_vars}. This parameter is optional, see section Value for how the output changes depending on this argument value.} \item{id_vars}{Key variables to be used in the join between the raw dataset -(\code{raw_dat}) and the target data set (\code{raw_dat}).} +(\code{raw_dat}) and the target data set (\code{tgt_dat}).} \item{.warn}{Whether to warn about parsing failures.} } From 1230e7c8e5caa72c7700f28fba8141dc260a05e6 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 15 May 2024 00:02:18 +0100 Subject: [PATCH 80/80] Fix typo in `ct_map()` docs --- R/ct.R | 2 +- man/ct_map.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ct.R b/R/ct.R index 68be7e90..e66c2d6d 100644 --- a/R/ct.R +++ b/R/ct.R @@ -289,7 +289,7 @@ ct_mappings <- function(ct_spec, from = ct_spec_vars("from"), to = ct_spec_vars( #' # Note that if the controlled terminology mapping is restricted to a codelist #' # code, e.g. C71113, then only `"/day"` and `"Every 2 hours"` get mapped to #' # `"QD"` and `"Q2H"`, respectively; remaining terms won't match given the -#' # codelist code # restriction, and will be mapped to an uppercase version of +#' # codelist code restriction, and will be mapped to an uppercase version of #' # the original terms. #' ct_map(x = terms, ct_spec = ct_spec, ct_clst = "C71113") #' diff --git a/man/ct_map.Rd b/man/ct_map.Rd index 0f00c10a..d0aca194 100644 --- a/man/ct_map.Rd +++ b/man/ct_map.Rd @@ -60,7 +60,7 @@ ct_map(x = terms, ct_spec = ct_spec) # Note that if the controlled terminology mapping is restricted to a codelist # code, e.g. C71113, then only `"/day"` and `"Every 2 hours"` get mapped to # `"QD"` and `"Q2H"`, respectively; remaining terms won't match given the -# codelist code # restriction, and will be mapped to an uppercase version of +# codelist code restriction, and will be mapped to an uppercase version of # the original terms. ct_map(x = terms, ct_spec = ct_spec, ct_clst = "C71113")