From 6b24f18d45e7d95769d94a466d98b728166534be Mon Sep 17 00:00:00 2001 From: Shiyu Chen Date: Fri, 19 Jul 2024 18:53:18 +0000 Subject: [PATCH] rest of the internal funs --- NAMESPACE | 1 - R/derive_blfl.R | 42 +----- R/dtc_create_iso8601.R | 98 ------------- R/dtc_parse_dttm.R | 26 ---- R/dtc_problems.R | 34 ----- R/dtc_utils.R | 62 -------- R/oak_id_vars.R | 18 --- R/parse_dttm_fmt.R | 55 ------- R/recode.R | 21 --- man/add_problems.Rd | 11 -- man/any_problems.Rd | 25 ---- man/assert_capture_matrix.Rd | 13 -- man/assert_ct_clst.Rd | 16 --- man/assert_ct_spec.Rd | 20 --- man/assert_dtc_fmt.Rd | 9 -- man/assert_dtc_format.Rd | 12 -- man/coalesce_capture_matrices.Rd | 18 --- man/complete_capture_matrix.Rd | 15 -- man/contains_oak_id_vars.Rd | 14 -- man/ct_mappings.Rd | 13 -- man/ct_spec_vars.Rd | 16 --- man/dtc_datepart.Rd | 14 -- man/dtc_timepart.Rd | 43 ------ man/dttm_fmt_to_regex.Rd | 10 -- man/eval_conditions.Rd | 36 ----- man/fmt_rg.Rd | 17 --- man/format_iso8601.Rd | 18 --- man/get_cnd_df_cnd.Rd | 8 -- man/get_cnd_df_cnd_sum.Rd | 8 -- man/index_for_recode.Rd | 4 - man/is_cnd_df.Rd | 8 -- man/is_seq_name.Rd | 12 -- man/iso8601_mon.Rd | 12 -- man/iso8601_na.Rd | 4 - man/iso8601_sec.Rd | 4 - man/iso8601_truncate.Rd | 30 ---- man/iso8601_two_digits.Rd | 5 - man/iso8601_year.Rd | 12 -- man/mutate.cnd_df.Rd | 10 -- man/new_cnd_df.Rd | 5 - man/oak_id_vars.Rd | 6 - man/parse_dttm.Rd | 27 ---- man/parse_dttm_fmt.Rd | 23 --- man/recode.Rd | 20 --- man/regex_or.Rd | 9 -- man/rm_cnd_df.Rd | 8 -- man/yy_to_yyyy.Rd | 13 -- man/zero_pad_whole_number.Rd | 8 -- tests/testthat/test-cnd_df.R | 34 ++--- tests/testthat/test-derive_blfl.R | 73 ++++++++++ tests/testthat/test-derve_seq.R | 4 +- tests/testthat/test-dtc_parse_dttm.R | 142 ++++++++++++++++++ tests/testthat/test-dtc_problems.R | 29 ++++ tests/testthat/test-dtc_utils.R | 159 ++++++++++++++++++++ tests/testthat/test-eval_conditions.R | 14 +- tests/testthat/test-iso8601.R | 100 +++++++++++++ tests/testthat/test-oak_id_vars.R | 15 ++ tests/testthat/test-parse_dttm_fmt.R | 199 ++++++++++++++++++++++++++ tests/testthat/test-recode.R | 6 + 59 files changed, 750 insertions(+), 938 deletions(-) create mode 100644 tests/testthat/test-dtc_parse_dttm.R create mode 100644 tests/testthat/test-dtc_problems.R create mode 100644 tests/testthat/test-dtc_utils.R create mode 100644 tests/testthat/test-oak_id_vars.R diff --git a/NAMESPACE b/NAMESPACE index ae0b7a9d..ab10edaf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,7 +17,6 @@ export(derive_blfl) export(derive_seq) export(derive_study_day) export(domain_example) -export(dtc_timepart) export(fmt_cmp) export(generate_oak_id_vars) export(hardcode_ct) diff --git a/R/derive_blfl.R b/R/derive_blfl.R index 0c3e2508..5acb50ef 100644 --- a/R/derive_blfl.R +++ b/R/derive_blfl.R @@ -9,19 +9,6 @@ #' #' @return Character vector containing ISO8601 dates. #' -#' @examples -#' ## Partial or missing dates set to NA by default -#' sdtm.oak:::dtc_datepart( -#' c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00") -#' ) -#' # |--> c(NA, NA, NA, NA, "2021-12-25", "2021-12-25") -#' -#' ## Prevent partial or missing dates from being set to NA -#' sdtm.oak:::dtc_datepart( -#' c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00"), -#' partial_as_na = FALSE -#' ) -#' # |--> c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25") #' @keywords internal dtc_datepart <- function(dtc, partial_as_na = TRUE) { # Assert that dtc is a character vector @@ -51,36 +38,9 @@ dtc_datepart <- function(dtc, partial_as_na = TRUE) { #' seconds should be ignored (default is `TRUE`). #' #' @return Character vector containing ISO 8601 times. -#' @export #' #' @keywords internal -#' ## Partial or missing times set to NA and seconds ignored by default -#' sdtm.oak:::dtc_timepart( -#' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59") -#' ) -#' # |--> c(NA, NA, NA, NA, "12:30", "12:30") -#' -#' ## Prevent partial or missing times from being set to NA -#' sdtm.oak:::dtc_timepart( -#' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), -#' partial_as_na = FALSE -#' ) -#' # |--> c(NA, "", "", "12", "12:30", "12:30") -#' -#' ## Do not ignore seconds, partial or missing times set to NA -#' sdtm.oak:::dtc_timepart( -#' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), -#' ignore_seconds = FALSE -#' ) -#' # |--> c(NA, NA, NA, NA, NA, "12:30:59") -#' -#' ## Do not ignore seconds and prevent partial or missing times from being set to NA -#' sdtm.oak:::dtc_timepart( -#' c(NA, "", "2021-12-25", "2021-12-25T12", "2021-12-25T12:30", "2021-12-25T12:30:59"), -#' partial_as_na = FALSE, -#' ignore_seconds = FALSE -#' ) -#' # |--> c(NA, "", "", "12", "12:30", "12:30:59") + dtc_timepart <- function(dtc, partial_as_na = TRUE, ignore_seconds = TRUE) { # Assert that dtc is a character vector admiraldev::assert_character_vector(dtc) diff --git a/R/dtc_create_iso8601.R b/R/dtc_create_iso8601.R index 24c9e3e1..9861ffe6 100644 --- a/R/dtc_create_iso8601.R +++ b/R/dtc_create_iso8601.R @@ -9,9 +9,6 @@ mon_abb_to_mon_num <- stats::setNames(sprintf("%02d", seq_along(month.abb)), tol #' #' @returns A character vector. #' -#' @examples -#' sdtm.oak:::iso8601_na(c("10", NA_character_)) -#' #' @keywords internal iso8601_na <- function(x) { admiraldev::assert_character_vector(x) @@ -30,13 +27,6 @@ iso8601_na <- function(x) { #' #' @returns A character vector. #' -#' @examples -#' sdtm.oak:::zero_pad_whole_number(c(-1, 0, 1)) -#' -#' sdtm.oak:::zero_pad_whole_number(c(-1, 0, 1, 10, 99, 100), n = 2) -#' -#' sdtm.oak:::zero_pad_whole_number(c(-1, 0, 1, 10, 99, 100), n = 3) -#' #' @keywords internal zero_pad_whole_number <- function(x, n = 2L) { # Check `x` @@ -69,18 +59,6 @@ zero_pad_whole_number <- function(x, n = 2L) { #' #' @returns An integer vector. #' -#' @examples -#' sdtm.oak:::yy_to_yyyy(0:5) -#' sdtm.oak:::yy_to_yyyy(2000:2005) -#' -#' sdtm.oak:::yy_to_yyyy(90:99) -#' sdtm.oak:::yy_to_yyyy(1990:1999) -#' -#' # NB: change in behavior after 68 -#' sdtm.oak:::yy_to_yyyy(65:72) -#' -#' sdtm.oak:::yy_to_yyyy(1965:1972) -#' #' @keywords internal yy_to_yyyy <- function(x, cutoff_2000 = 68L) { # Check `x` @@ -105,10 +83,6 @@ yy_to_yyyy <- function(x, cutoff_2000 = 68L) { #' #' @returns A character vector of the same size as `x`. #' -#' @examples -#' x <- c("0", "00", "1", "01", "42", "100", NA_character_, "1.") -#' sdtm.oak:::iso8601_two_digits(x) -#' #' @keywords internal iso8601_two_digits <- function(x) { admiraldev::assert_character_vector(x) @@ -132,17 +106,6 @@ iso8601_min <- iso8601_two_digits #' #' @returns A character vector. #' -#' @examples -#' sdtm.oak:::iso8601_year(c("0", "1", "2", "50", "68", "69", "90", "99", "00")) -#' -#' # Be default, `cutoff_2000` is at 68. -#' sdtm.oak:::iso8601_year(c("67", "68", "69", "70")) -#' sdtm.oak:::iso8601_year(c("1967", "1968", "1969", "1970")) -#' -#' # Change it to something else, e.g. `cutoff_2000 = 25`. -#' sdtm.oak:::iso8601_year(as.character(0:50), cutoff_2000 = 25) -#' sdtm.oak:::iso8601_year(as.character(1900:1950), cutoff_2000 = 25) -#' #' @keywords internal iso8601_year <- function(x, cutoff_2000 = 68L) { admiraldev::assert_character_vector(x) @@ -161,17 +124,6 @@ iso8601_year <- function(x, cutoff_2000 = 68L) { #' #' @returns A character vector. #' -#' @examples -#' sdtm.oak:::iso8601_mon(c(NA, "0", "1", "2", "10", "11", "12")) -#' -#' # No semantic validation is performed on the numeric months, so `"13"` stays -#' # `"13"` but representations that can't be represented as two-digit numbers -#' # become `NA`. -#' sdtm.oak:::iso8601_mon(c("13", "99", "100", "-1")) -#' -#' (mon <- month.abb) -#' sdtm.oak:::iso8601_mon(mon) -#' #' @keywords internal iso8601_mon <- function(x) { x <- tolower(x) @@ -190,9 +142,6 @@ iso8601_mon <- function(x) { #' #' @returns A character vector. #' -#' @examples -#' sdtm.oak:::iso8601_sec(c(NA, "0", "1", "10", "59", "99", "100")) -#' #' @keywords internal iso8601_sec <- function(x) { x_iso8601 <- stringr::str_extract(x, "^\\d?\\d(\\.\\d*)?$") @@ -212,36 +161,6 @@ iso8601_sec <- function(x) { #' @param x A character vector. #' #' @returns A character vector. -#' -#' @examples -#' x <- -#' c( -#' "1999-01-01T15:20:01", -#' "1999-01-01T15:20:-", -#' "1999-01-01T15:-:-", -#' "1999-01-01T-:-:-", -#' "1999-01--T-:-:-", -#' "1999----T-:-:-", -#' "-----T-:-:-" -#' ) -#' -#' sdtm.oak:::iso8601_truncate(x) -#' -#' # With `empty_as_na = FALSE` empty strings are not replaced with `NA` -#' sdtm.oak:::iso8601_truncate("-----T-:-:-", empty_as_na = TRUE) -#' sdtm.oak:::iso8601_truncate("-----T-:-:-", empty_as_na = FALSE) -#' -#' # Truncation only happens if missing components are the right most end, -#' # otherwise they remain unaltered. -#' sdtm.oak:::iso8601_truncate( -#' c( -#' "1999----T15:20:01", -#' "1999-01-01T-:20:01", -#' "1999-01-01T-:-:01", -#' "1999-01-01T-:-:-" -#' ) -#' ) -#' #' @keywords internal iso8601_truncate <- function(x, empty_as_na = TRUE) { x <- stringr::str_remove(x, "[^\\d]*$") @@ -265,23 +184,6 @@ iso8601_truncate <- function(x, empty_as_na = TRUE) { #' #' @returns A character vector with date-times following the ISO8601 format. #' -#' @examples -#' cols <- c("year", "mon", "mday", "hour", "min", "sec") -#' m <- matrix( -#' c( -#' "99", "00", "01", -#' "Jan", "feb", "03", -#' "1", "01", "31", -#' "00", "12", "23", -#' "00", "59", "10", -#' "42", "5.15", NA -#' ), -#' ncol = 6, -#' dimnames = list(c(), cols) -#' ) -#' -#' sdtm.oak:::format_iso8601(m) -#' #' @keywords internal format_iso8601 <- function(m, .cutoff_2000 = 68L) { admiraldev::assert_integer_scalar(.cutoff_2000) diff --git a/R/dtc_parse_dttm.R b/R/dtc_parse_dttm.R index 0211eb09..fa0a4bb0 100644 --- a/R/dtc_parse_dttm.R +++ b/R/dtc_parse_dttm.R @@ -64,32 +64,6 @@ parse_dttm_ <- function(dttm, #' `"hour"`, `"min"` and `"sec"`. Each row corresponds to an element in #' `dttm`. Each element of the matrix is the parsed date/time component. #' -#' @examples -#' sdtm.oak:::parse_dttm("2020", "y") -#' sdtm.oak:::parse_dttm("2020-05", "y") -#' -#' sdtm.oak:::parse_dttm("2020-05", "y-m") -#' sdtm.oak:::parse_dttm("2020-05-11", "y-m-d") -#' -#' sdtm.oak:::parse_dttm("2020 05 11", "y m d") -#' sdtm.oak:::parse_dttm("2020 05 11", "y m d") -#' sdtm.oak:::parse_dttm("2020 05 11", "y\\s+m\\s+d") -#' sdtm.oak:::parse_dttm("2020 05 11", "y\\s+m\\s+d") -#' -#' sdtm.oak:::parse_dttm("2020-05-11 11:45", "y-m-d H:M") -#' sdtm.oak:::parse_dttm("2020-05-11 11:45:15.6", "y-m-d H:M:S") -#' -#' sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), "y-m-d H:M") -#' sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), "-m-d H:M") -#' sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), c("y-m-d H:M", "-m-d H:M")) -#' -#' sdtm.oak:::parse_dttm("05 feb 1985 12 55 02", "d m y H M S") -#' sdtm.oak:::parse_dttm("12 55 02 05 feb 1985", "H M S d m y") -#' -#' sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d") -#' sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d", na = "UN") -#' sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d", na = c("UN", "UNK")) -#' #' @keywords internal parse_dttm <- function(dttm, fmt, diff --git a/R/dtc_problems.R b/R/dtc_problems.R index 9c23544e..81c74471 100644 --- a/R/dtc_problems.R +++ b/R/dtc_problems.R @@ -31,16 +31,6 @@ #' or an annotated `x`, meaning having a `problems` attribute that holds #' parsing issues (see the Details section). #' -#' @examples -#' date <- c("2000-01-05", "", "1980-06-18", "1979-09-07") -#' time <- c("001221", "22:35:05", "03:00:15", "07:09:00") -#' dtc <- list(date, time) -#' dttm <- c("2000-01-05", "T22:35:05", "1980-06-18T03:00:15", "1979-09-07T07:09:00") -#' is_problem <- c(TRUE, TRUE, FALSE, FALSE) -#' -#' dttm2 <- sdtm.oak:::add_problems(dttm, is_problem, dtc) -#' sdtm.oak:::problems(dttm2) -#' #' @keywords internal add_problems <- function(x, is_problem, dtc) { is_x_na <- is_problem @@ -89,30 +79,6 @@ add_problems <- function(x, is_problem, dtc) { #' passed as inputs to [create_iso8601()], i.e. whose length matches the #' number of rows of the capture matrices in `cap_matrices`. #' -#' @examples -#' # No problem (return value is `FALSE`). -#' sdtm.oak:::any_problems(list(sdtm.oak:::parse_dttm("1980-06-18", "y-m-d"))) -#' -#' # Now the parsing fails (return value is `TRUE`). -#' sdtm.oak:::any_problems(list(sdtm.oak:::parse_dttm("1980-06-18", "ymd"))) -#' -#' # Find if there has been a problem in either in the `date` or `time` inputs. -#' # The following problems are expected with: -#' # - `"2001/12/25"` as it won't be parsed with the format `"y-m-d"` -#' # - `"00h12m21"` as it won't be parsed with the format `"H:M:S"`. -#' # -#' date <- c("2000-01-05", "2001/12/25", "1980-06-18", "1979-09-07") -#' time <- c("00h12m21", "22:35:05", "03:00:15", "07:09:00") -#' -#' cap_matrix_date <- sdtm.oak:::parse_dttm(date, "y-m-d") -#' cap_matrix_time <- sdtm.oak:::parse_dttm(time, "H:M:S") -#' -#' (cap_matrices <- list(cap_matrix_date, cap_matrix_time)) -#' -#' # `any_problems()` returns `TRUE` for the first two elements because of the -#' # failure to parse `"2001/12/25"` and `"00h12m21"`, respectively. -#' sdtm.oak:::any_problems(cap_matrices) -#' #' @keywords internal any_problems <- function(cap_matrices, .cutoff_2000 = 68L) { cap_matrices |> diff --git a/R/dtc_utils.R b/R/dtc_utils.R index 00068168..4ee2d3b7 100644 --- a/R/dtc_utils.R +++ b/R/dtc_utils.R @@ -6,14 +6,6 @@ #' #' @param fmt A character vector. #' -#' @examples -#' sdtm.oak:::assert_dtc_fmt(c("ymd", "y m d", "dmy", "HM", "H:M:S", "y-m-d H:M:S")) -#' -#' # This example is guarded to avoid throwing errors -#' if (FALSE) { -#' sdtm.oak:::assert_dtc_fmt("y years m months d days") -#' } -#' #' @keywords internal assert_dtc_fmt <- function(fmt) { rlang::arg_match(fmt, @@ -35,17 +27,6 @@ assert_dtc_fmt <- function(fmt) { #' #' Otherwise, it returns `.format` invisibly. #' -#' @examples -#' sdtm.oak:::assert_dtc_format("ymd") -#' sdtm.oak:::assert_dtc_format(c("ymd", "y-m-d")) -#' sdtm.oak:::assert_dtc_format(list(c("ymd", "y-m-d"), "H:M:S")) -#' -#' # These commands should throw an error -#' if (FALSE) { -#' # Note that `"year, month, day"` is not a supported format. -#' sdtm.oak:::assert_dtc_format("year, month, day") -#' } -#' #' @keywords internal assert_dtc_format <- function(.format) { abort_msg <- "`.format` must be either a character vector of formats of a list thereof." @@ -79,18 +60,6 @@ assert_dtc_format <- function(.format) { #' #' Otherwise, it returns `m` invisibly. #' -#' @examples -#' cols <- c("year", "mon", "mday", "hour", "min", "sec") -#' m <- matrix(NA_character_, nrow = 1L, ncol = 6L, dimnames = list(NULL, cols)) -#' sdtm.oak:::assert_capture_matrix(m) -#' -#' # These commands should throw an error -#' if (FALSE) { -#' sdtm.oak:::assert_capture_matrix(character()) -#' sdtm.oak:::assert_capture_matrix(matrix(data = NA_character_, nrow = 0, ncol = 0)) -#' sdtm.oak:::assert_capture_matrix(matrix(data = NA_character_, nrow = 1)) -#' } -#' #' @keywords internal assert_capture_matrix <- function(m) { # `m` must be of character type. @@ -120,20 +89,6 @@ assert_capture_matrix <- function(m) { #' @returns A character matrix that contains the columns `year`, `mon`, `mday`, #' `hour`, `min` and `sec`. Any other existing columns are dropped. #' -#' @examples -#' sdtm.oak:::complete_capture_matrix(matrix(data = NA_character_, nrow = 0, ncol = 0)) -#' sdtm.oak:::complete_capture_matrix(matrix(data = NA_character_, nrow = 1)) -#' -#' # m <- matrix(NA_character_, nrow = 1, ncol = 2, dimnames = list(NULL, c("year", "sec"))) -#' # sdtm.oak:::complete_capture_matrix(m) -#' -#' # m <- matrix(c("2020", "10"), nrow = 1, ncol = 2, dimnames = list(NULL, c("year", "sec"))) -#' # sdtm.oak:::complete_capture_matrix(m) -#' -#' # Any other existing columns are dropped. -#' # m <- matrix(c("2020", "10"), nrow = 1, ncol = 2, dimnames = list(NULL, c("semester", "quarter"))) -#' # sdtm.oak:::complete_capture_matrix(m) -#' #' @keywords internal complete_capture_matrix <- function(m) { @@ -172,23 +127,6 @@ complete_capture_matrix <- #' @returns A single capture matrix whose values have been coalesced in the #' sense of [coalesce()][dplyr::coalesce]. #' -#' @examples -#' cols <- c("year", "mon", "mday", "hour", "min", "sec") -#' dates <- c("2020", "01", "01", "20", NA, NA) -#' times <- c(NA, NA, NA, "10", "00", "05") -#' m_dates <- matrix(dates, nrow = 1L, ncol = 6L, dimnames = list(NULL, cols)) -#' m_times <- matrix(times, nrow = 1L, ncol = 6L, dimnames = list(NULL, cols)) -#' -#' # Note how the hour "20" takes precedence over "10" -#' sdtm.oak:::coalesce_capture_matrices(m_dates, m_times) -#' -#' # Reverse the order of the inputs and now hour "10" takes precedence -#' sdtm.oak:::coalesce_capture_matrices(m_times, m_dates) -#' -#' # Single inputs should result in the same output as the input -#' sdtm.oak:::coalesce_capture_matrices(m_dates) -#' sdtm.oak:::coalesce_capture_matrices(m_times) -#' #' @keywords internal coalesce_capture_matrices <- function(...) { dots <- rlang::list2(...) diff --git a/R/oak_id_vars.R b/R/oak_id_vars.R index 19ef3323..3a323b2b 100644 --- a/R/oak_id_vars.R +++ b/R/oak_id_vars.R @@ -12,11 +12,6 @@ #' @returns A character vector of column names to be regarded #' as keys in raw datasets. #' -#' @examples -#' oak_id_vars() -#' -#' oak_id_vars(extra_vars = "sample_id") -#' #' @export oak_id_vars <- function(extra_vars = NULL) { admiraldev::assert_character_vector(extra_vars, optional = TRUE) @@ -33,19 +28,6 @@ oak_id_vars <- function(extra_vars = NULL) { #' #' @returns A logical scalar value. #' -#' @examples -#' # `oak_id_vars()` is the function that defines what are the minimal set of -#' # oak keys. Hence, by definition, the following code should always return -#' # `TRUE`. -#' sdtm.oak:::contains_oak_id_vars(oak_id_vars()) -#' -#' # Returns `FALSE`. -#' sdtm.oak:::contains_oak_id_vars(character()) -#' -#' # Another example that returns `FALSE` because it is missing -#' # `"patient_number"`. -#' sdtm.oak:::contains_oak_id_vars(c("oak_id", "raw_source")) -#' #' @keywords internal contains_oak_id_vars <- function(x) { admiraldev::assert_character_vector(x) diff --git a/R/parse_dttm_fmt.R b/R/parse_dttm_fmt.R index eeb1e371..534ff7b9 100644 --- a/R/parse_dttm_fmt.R +++ b/R/parse_dttm_fmt.R @@ -183,14 +183,6 @@ assert_fmt_c <- function(x) { #' #' @returns A character scalar of the resulting regex. #' -#' @examples -#' # A regex for matching either "jan" or "feb" -#' sdtm.oak:::regex_or(c("jan", "feb")) -#' -#' # Setting `.open` and/or `.close` to `TRUE` can be handy if this regex -#' # is to be combined into a larger regex. -#' paste0(sdtm.oak:::regex_or(c("jan", "feb"), .close = TRUE), r"{\d{2}}") -#' #' @keywords internal regex_or <- function(x, .open = FALSE, .close = FALSE) { admiraldev::assert_character_vector(x) @@ -226,22 +218,6 @@ regex_or <- function(x, .open = FALSE, .close = FALSE) { #' @returns A named character vector of named patterns (regexps) for matching #' each date/time component. #' -#' @examples -#' # Default regexps -#' sdtm.oak:::fmt_rg() -#' -#' # You may change the way months are matched, e.g. you might not want to match -#' # month abbreviations, i.e. only numerical months. So pass an explicit regex -#' # for numerical months: -#' sdtm.oak:::fmt_rg(mon = r"[\b\d|\d{2}]") -#' -#' # Make date/time components accept `"UNK"` as a possible pattern (useful -#' # to match funny codes for `NA`). -#' sdtm.oak:::fmt_rg(na = "UNK") -#' -#' # Or be more specific and use `"UNK"` for the year component only. -#' sdtm.oak:::fmt_rg(year_na = "UNK") -#' #' @keywords internal fmt_rg <- function( sec = r"[(\b\d|\d{2})(\.\d*)?]", @@ -346,28 +322,6 @@ parse_dttm_fmt_ <- function(fmt, pattern) { #' Each row is for either a date/time format component or a "delimiter" string #' or pattern in-between format components. #' -#' @examples -#' sdtm.oak:::parse_dttm_fmt("ymd") -#' sdtm.oak:::parse_dttm_fmt("H:M:S") -#' -#' sdtm.oak:::parse_dttm_fmt("ymd HMS") -#' -#' # Repeating the same special patterns, e.g. "yy" still counts as one pattern -#' # only. -#' sdtm.oak:::parse_dttm_fmt("yymmdd HHMMSS") -#' -#' # Note that `"y"`, `"m"`, `"d"`, `"H"`, `"M"` or `"S"` are reserved patterns -#' # that are matched first and interpreted as format components. # Example: the -#' # first "y" in "year" is parsed as meaning year followed by "ear y". The -#' # second "y" is not longer matched because a first match already # succeded. -#' sdtm.oak:::parse_dttm_fmt("year y") -#' -#' # Specify custom patterns -#' sdtm.oak:::parse_dttm_fmt( -#' "year month day", -#' fmt_cmp(year = "year", mon = "month", mday = "day") -#' ) -#' #' @keywords internal parse_dttm_fmt <- function(fmt, patterns = fmt_cmp()) { admiraldev::assert_character_scalar(fmt) @@ -428,15 +382,6 @@ parse_dttm_fmt <- function(fmt, patterns = fmt_cmp()) { #' @returns A string containing a regular expression for matching date/time #' components according to a format. #' -#' @examples -#' sdtm.oak:::dttm_fmt_to_regex("y") -#' sdtm.oak:::dttm_fmt_to_regex("y", anchored = FALSE) -#' -#' sdtm.oak:::dttm_fmt_to_regex("m") -#' sdtm.oak:::dttm_fmt_to_regex("ymd") -#' -#' sdtm.oak:::dttm_fmt_to_regex("ymd HH:MM:SS") -#' #' @keywords internal dttm_fmt_to_regex <- function(fmt, fmt_regex = fmt_rg(), fmt_c = fmt_cmp(), anchored = TRUE) { tbl_fmt_c <- parse_dttm_fmt(fmt, patterns = fmt_c) diff --git a/R/recode.R b/R/recode.R index 424096b2..f26ee431 100644 --- a/R/recode.R +++ b/R/recode.R @@ -11,8 +11,6 @@ #' of the matched values from the `from` vector. If an element in `x` does not #' match any value in `from`, the corresponding position in the output will be #' `NA`. This index information is critical for subsequent recoding operations. -#' @examples -#' sdtm.oak:::index_for_recode(x = 1:5, from = c(2, 4)) #' #' @keywords internal index_for_recode <- function(x, from) { @@ -33,25 +31,6 @@ index_for_recode <- function(x, from) { #' #' @returns A vector of recoded values. #' -#' @examples -#' x <- c("male", "female", "x", NA) -#' sdtm.oak:::recode(x, -#' from = c("male", "female"), -#' to = c("M", "F") -#' ) -#' sdtm.oak:::recode( -#' x, -#' from = c("male", "female"), -#' to = c("M", "F"), -#' .no_match = "?" -#' ) -#' sdtm.oak:::recode( -#' x, -#' from = c("male", "female"), -#' to = c("M", "F"), -#' .na = "missing" -#' ) -#' #' @keywords internal recode <- function( x, diff --git a/man/add_problems.Rd b/man/add_problems.Rd index 23005a07..1e1ea7db 100644 --- a/man/add_problems.Rd +++ b/man/add_problems.Rd @@ -40,16 +40,5 @@ named \code{..i} that indicates the date/time index of the problematic date/time in \code{x}, and as many extra columns as there were inputs (passed in \code{dtc}). If \code{dtc} is named, then those names are used to name the extra columns, otherwise they get named sequentially like so \code{..var1}, \code{..var2}, etc.. -} -\examples{ -date <- c("2000-01-05", "", "1980-06-18", "1979-09-07") -time <- c("001221", "22:35:05", "03:00:15", "07:09:00") -dtc <- list(date, time) -dttm <- c("2000-01-05", "T22:35:05", "1980-06-18T03:00:15", "1979-09-07T07:09:00") -is_problem <- c(TRUE, TRUE, FALSE, FALSE) - -dttm2 <- sdtm.oak:::add_problems(dttm, is_problem, dtc) -sdtm.oak:::problems(dttm2) - } \keyword{internal} diff --git a/man/any_problems.Rd b/man/any_problems.Rd index 36f01c96..f9f325f9 100644 --- a/man/any_problems.Rd +++ b/man/any_problems.Rd @@ -27,30 +27,5 @@ failure in at least one of the inputs to \code{\link[=create_iso8601]{create_iso is an internal function to be used in the context of \code{\link[=create_iso8601]{create_iso8601()}} source code and hence each capture matrix corresponds to one input to \code{\link[=create_iso8601]{create_iso8601()}}. -} -\examples{ -# No problem (return value is `FALSE`). -sdtm.oak:::any_problems(list(sdtm.oak:::parse_dttm("1980-06-18", "y-m-d"))) - -# Now the parsing fails (return value is `TRUE`). -sdtm.oak:::any_problems(list(sdtm.oak:::parse_dttm("1980-06-18", "ymd"))) - -# Find if there has been a problem in either in the `date` or `time` inputs. -# The following problems are expected with: -# - `"2001/12/25"` as it won't be parsed with the format `"y-m-d"` -# - `"00h12m21"` as it won't be parsed with the format `"H:M:S"`. -# -date <- c("2000-01-05", "2001/12/25", "1980-06-18", "1979-09-07") -time <- c("00h12m21", "22:35:05", "03:00:15", "07:09:00") - -cap_matrix_date <- sdtm.oak:::parse_dttm(date, "y-m-d") -cap_matrix_time <- sdtm.oak:::parse_dttm(time, "H:M:S") - -(cap_matrices <- list(cap_matrix_date, cap_matrix_time)) - -# `any_problems()` returns `TRUE` for the first two elements because of the -# failure to parse `"2001/12/25"` and `"00h12m21"`, respectively. -sdtm.oak:::any_problems(cap_matrices) - } \keyword{internal} diff --git a/man/assert_capture_matrix.Rd b/man/assert_capture_matrix.Rd index 8d5db779..bd5594b2 100644 --- a/man/assert_capture_matrix.Rd +++ b/man/assert_capture_matrix.Rd @@ -26,18 +26,5 @@ returned by \code{\link[=parse_dttm]{parse_dttm()}}: capture matrix. This function checks that the capture matrix is a matrix and that it contains six columns: \code{year}, \code{mon}, \code{mday}, \code{hour}, \code{min} and \code{sec}. -} -\examples{ -cols <- c("year", "mon", "mday", "hour", "min", "sec") -m <- matrix(NA_character_, nrow = 1L, ncol = 6L, dimnames = list(NULL, cols)) -sdtm.oak:::assert_capture_matrix(m) - -# These commands should throw an error -if (FALSE) { - sdtm.oak:::assert_capture_matrix(character()) - sdtm.oak:::assert_capture_matrix(matrix(data = NA_character_, nrow = 0, ncol = 0)) - sdtm.oak:::assert_capture_matrix(matrix(data = NA_character_, nrow = 1)) -} - } \keyword{internal} diff --git a/man/assert_ct_clst.Rd b/man/assert_ct_clst.Rd index c0239c97..aeb5e85a 100644 --- a/man/assert_ct_clst.Rd +++ b/man/assert_ct_clst.Rd @@ -23,21 +23,5 @@ invisibly. \description{ \code{\link[=assert_ct_clst]{assert_ct_clst()}} asserts the validity of a codelist code in the context of a controlled terminology specification. -} -\examples{ -# Load a controlled terminology example. -(ct_spec <- read_ct_spec_example("ct-01-cm")) - -# Should work fine. -sdtm.oak:::assert_ct_clst(ct_spec = ct_spec, ct_clst = "C71113") - -# In certain cases, you might allow `ct_clst` to be `NULL` as to indicate absence, -# in that case, set `optional` to `TRUE` to make `assert_ct_clst()` more -# forgiving. -sdtm.oak:::assert_ct_clst(ct_spec = ct_spec, ct_clst = NULL, optional = TRUE) - -# Otherwise it would err. -try(sdtm.oak:::assert_ct_clst(ct_spec = ct_spec, ct_clst = NULL, optional = FALSE)) - } \keyword{internal} diff --git a/man/assert_ct_spec.Rd b/man/assert_ct_spec.Rd index 5ce72ce4..589faf80 100644 --- a/man/assert_ct_spec.Rd +++ b/man/assert_ct_spec.Rd @@ -21,25 +21,5 @@ variables: codelist_code, collected_value, term_synonyms, and term_value. In addition, it will also check if the data frame is not empty (no rows), and whether the columns \code{codelist_code} and \code{term_value} do not contain any \code{NA} values. -} -\examples{ -# If `ct_spec` is a valid controlled terminology then it is returned invisibly. -ct_spec_01 <- read_ct_spec_example("ct-01-cm") -all.equal(ct_spec_01, sdtm.oak:::assert_ct_spec(ct_spec_01)) - -# A minimal set of variables needs to be present in `ct_spec` for it to pass the -# assertion; `sdtm.oak:::ct_spec_vars()` defines their names. -(req_vars <- sdtm.oak:::ct_spec_vars()) - -# Other (facultative) variables also present in the controlled terminology -# example. -(opt_vars <- setdiff(colnames(ct_spec_01), req_vars)) - -# With only the mandatory variables, the assertion still passes. -sdtm.oak:::assert_ct_spec(ct_spec_01[req_vars]) - -# Not having the required variables results in an error. -try(sdtm.oak:::assert_ct_spec(ct_spec_01[opt_vars])) - } \keyword{internal} diff --git a/man/assert_dtc_fmt.Rd b/man/assert_dtc_fmt.Rd index c7868e5c..cd6794ac 100644 --- a/man/assert_dtc_fmt.Rd +++ b/man/assert_dtc_fmt.Rd @@ -13,14 +13,5 @@ assert_dtc_fmt(fmt) \code{\link[=assert_dtc_fmt]{assert_dtc_fmt()}} takes a character vector of date/time formats and checks if the formats are supported, meaning it checks if they are one of the formats listed in column \code{fmt} of \link{dtc_formats}, failing with an error otherwise. -} -\examples{ -sdtm.oak:::assert_dtc_fmt(c("ymd", "y m d", "dmy", "HM", "H:M:S", "y-m-d H:M:S")) - -# This example is guarded to avoid throwing errors -if (FALSE) { - sdtm.oak:::assert_dtc_fmt("y years m months d days") -} - } \keyword{internal} diff --git a/man/assert_dtc_format.Rd b/man/assert_dtc_format.Rd index fe19d9e6..74c6b47c 100644 --- a/man/assert_dtc_format.Rd +++ b/man/assert_dtc_format.Rd @@ -21,17 +21,5 @@ Otherwise, it returns \code{.format} invisibly. \description{ \code{\link[=assert_dtc_format]{assert_dtc_format()}} is an internal helper function aiding with the checking of the \code{.format} parameter of \code{\link[=create_iso8601]{create_iso8601()}}. -} -\examples{ -sdtm.oak:::assert_dtc_format("ymd") -sdtm.oak:::assert_dtc_format(c("ymd", "y-m-d")) -sdtm.oak:::assert_dtc_format(list(c("ymd", "y-m-d"), "H:M:S")) - -# These commands should throw an error -if (FALSE) { - # Note that `"year, month, day"` is not a supported format. - sdtm.oak:::assert_dtc_format("year, month, day") -} - } \keyword{internal} diff --git a/man/coalesce_capture_matrices.Rd b/man/coalesce_capture_matrices.Rd index 5b32bbfc..6de1fc06 100644 --- a/man/coalesce_capture_matrices.Rd +++ b/man/coalesce_capture_matrices.Rd @@ -18,23 +18,5 @@ sense of \link[dplyr:coalesce]{coalesce()}. Each argument of \code{...} should be a capture matrix in the sense of the output by \code{\link[=complete_capture_matrix]{complete_capture_matrix()}}, meaning a character matrix of six columns whose names are: \code{year}, \code{mon}, \code{mday}, \code{hour}, \code{min} or \code{sec}. -} -\examples{ -cols <- c("year", "mon", "mday", "hour", "min", "sec") -dates <- c("2020", "01", "01", "20", NA, NA) -times <- c(NA, NA, NA, "10", "00", "05") -m_dates <- matrix(dates, nrow = 1L, ncol = 6L, dimnames = list(NULL, cols)) -m_times <- matrix(times, nrow = 1L, ncol = 6L, dimnames = list(NULL, cols)) - -# Note how the hour "20" takes precedence over "10" -sdtm.oak:::coalesce_capture_matrices(m_dates, m_times) - -# Reverse the order of the inputs and now hour "10" takes precedence -sdtm.oak:::coalesce_capture_matrices(m_times, m_dates) - -# Single inputs should result in the same output as the input -sdtm.oak:::coalesce_capture_matrices(m_dates) -sdtm.oak:::coalesce_capture_matrices(m_times) - } \keyword{internal} diff --git a/man/complete_capture_matrix.Rd b/man/complete_capture_matrix.Rd index fd3f67ac..10b7ca64 100644 --- a/man/complete_capture_matrix.Rd +++ b/man/complete_capture_matrix.Rd @@ -17,20 +17,5 @@ A character matrix that contains the columns \code{year}, \code{mon}, \code{mday \description{ \code{\link[=complete_capture_matrix]{complete_capture_matrix()}} completes the missing, if any, columns of the capture matrix. -} -\examples{ -sdtm.oak:::complete_capture_matrix(matrix(data = NA_character_, nrow = 0, ncol = 0)) -sdtm.oak:::complete_capture_matrix(matrix(data = NA_character_, nrow = 1)) - -# m <- matrix(NA_character_, nrow = 1, ncol = 2, dimnames = list(NULL, c("year", "sec"))) -# sdtm.oak:::complete_capture_matrix(m) - -# m <- matrix(c("2020", "10"), nrow = 1, ncol = 2, dimnames = list(NULL, c("year", "sec"))) -# sdtm.oak:::complete_capture_matrix(m) - -# Any other existing columns are dropped. -# m <- matrix(c("2020", "10"), nrow = 1, ncol = 2, dimnames = list(NULL, c("semester", "quarter"))) -# sdtm.oak:::complete_capture_matrix(m) - } \keyword{internal} diff --git a/man/contains_oak_id_vars.Rd b/man/contains_oak_id_vars.Rd index 21bc97fb..f71b4479 100644 --- a/man/contains_oak_id_vars.Rd +++ b/man/contains_oak_id_vars.Rd @@ -16,19 +16,5 @@ A logical scalar value. \code{\link[=contains_oak_id_vars]{contains_oak_id_vars()}} evaluates whether a character vector \code{x} contains the raw dataset key variable names, i.e. the so called Oak identifier variables --- these are defined by the return value of \code{\link[=oak_id_vars]{oak_id_vars()}}. -} -\examples{ -# `oak_id_vars()` is the function that defines what are the minimal set of -# oak keys. Hence, by definition, the following code should always return -# `TRUE`. -sdtm.oak:::contains_oak_id_vars(oak_id_vars()) - -# Returns `FALSE`. -sdtm.oak:::contains_oak_id_vars(character()) - -# Another example that returns `FALSE` because it is missing -# `"patient_number"`. -sdtm.oak:::contains_oak_id_vars(c("oak_id", "raw_source")) - } \keyword{internal} diff --git a/man/ct_mappings.Rd b/man/ct_mappings.Rd index 83ca4898..77c814fc 100644 --- a/man/ct_mappings.Rd +++ b/man/ct_mappings.Rd @@ -30,18 +30,5 @@ one mapping per row. The resulting mappings are unique, i.e. if \code{from} values are duplicated in two \code{from} columns, the first column indicated in \code{from} takes precedence, and only that mapping is retained in the controlled terminology map. -} -\examples{ -# Read in a bundled controlled terminology spec example (ex. 01). -(ct_spec_01 <- read_ct_spec_example("ct-01-cm")) - -# Generate mappings from the terminology specification. -sdtm.oak:::ct_mappings(ct_spec = ct_spec_01) - -# Take a glimpse at those mappings where an actual recoding happens. -sdtm.oak:::ct_mappings(ct_spec = ct_spec_01) |> - dplyr::filter(from != to) |> - print(n = 20) - } \keyword{internal} diff --git a/man/ct_spec_vars.Rd b/man/ct_spec_vars.Rd index f21f006c..70fa1ace 100644 --- a/man/ct_spec_vars.Rd +++ b/man/ct_spec_vars.Rd @@ -19,21 +19,5 @@ If only the subset of variables used for matching terms are needed, then request this subset of variables by passing the argument value \code{"from"}. If only the mapping-to variable is to be requested, then simply pass \code{"to"}. If only the codelist code variable name is needed then pass \code{"ct_clst"}. -} -\examples{ -# These two calls are equivalent and return all required variables in a -# controlled terminology data set. -ct_spec_vars() -ct_spec_vars("all") - -# "Codelist code" variable name. -ct_spec_vars("ct_clst") - -# "From" variables -ct_spec_vars("from") - -# The "to" variable. -ct_spec_vars("to") - } \keyword{internal} diff --git a/man/dtc_datepart.Rd b/man/dtc_datepart.Rd index 7d2f05e6..0e168b81 100644 --- a/man/dtc_datepart.Rd +++ b/man/dtc_datepart.Rd @@ -19,18 +19,4 @@ Character vector containing ISO8601 dates. The date part is extracted from an ISO8601 date/time variable. By default, partial or missing dates are set to NA. } -\examples{ -## Partial or missing dates set to NA by default -sdtm.oak:::dtc_datepart( - c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00") -) -# |--> c(NA, NA, NA, NA, "2021-12-25", "2021-12-25") - -## Prevent partial or missing dates from being set to NA -sdtm.oak:::dtc_datepart( - c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00"), - partial_as_na = FALSE -) -# |--> c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25") -} \keyword{internal} diff --git a/man/dtc_timepart.Rd b/man/dtc_timepart.Rd index 84d648a7..19b14a6f 100644 --- a/man/dtc_timepart.Rd +++ b/man/dtc_timepart.Rd @@ -23,47 +23,4 @@ The time part is extracted from an ISO 8601 date/time variable. By default, partial or missing times are set to NA, and seconds are ignored and not extracted. } -\keyword{"",} -\keyword{"12",} -\keyword{"12:30")} -\keyword{"12:30",} -\keyword{"12:30:59")} -\keyword{"2021-12-25",} -\keyword{"2021-12-25T12",} -\keyword{"2021-12-25T12:30",} -\keyword{"2021-12-25T12:30:59")} -\keyword{"2021-12-25T12:30:59"),} -\keyword{#} -\keyword{##} -\keyword{)} -\keyword{=} -\keyword{Do} -\keyword{FALSE} -\keyword{FALSE,} -\keyword{NA} -\keyword{NA,} -\keyword{Partial} -\keyword{Prevent} -\keyword{and} -\keyword{being} -\keyword{by} -\keyword{c(NA,} -\keyword{default} -\keyword{from} -\keyword{ignore} -\keyword{ignore_seconds} -\keyword{ignored} \keyword{internal} -\keyword{missing} -\keyword{not} -\keyword{or} -\keyword{partial} -\keyword{partial_as_na} -\keyword{prevent} -\keyword{sdtm.oak:::dtc_timepart(} -\keyword{seconds} -\keyword{seconds,} -\keyword{set} -\keyword{times} -\keyword{to} -\keyword{|-->} diff --git a/man/dttm_fmt_to_regex.Rd b/man/dttm_fmt_to_regex.Rd index 12510e9c..e8b3fa7c 100644 --- a/man/dttm_fmt_to_regex.Rd +++ b/man/dttm_fmt_to_regex.Rd @@ -29,15 +29,5 @@ components according to a format. date/time format components (as returned by \code{\link[=parse_dttm_fmt]{parse_dttm_fmt()}}), and a mapping of date/time component formats to regexps and generates a single regular expression with groups for matching each of the date/time components. -} -\examples{ -sdtm.oak:::dttm_fmt_to_regex("y") -sdtm.oak:::dttm_fmt_to_regex("y", anchored = FALSE) - -sdtm.oak:::dttm_fmt_to_regex("m") -sdtm.oak:::dttm_fmt_to_regex("ymd") - -sdtm.oak:::dttm_fmt_to_regex("ymd HH:MM:SS") - } \keyword{internal} diff --git a/man/eval_conditions.Rd b/man/eval_conditions.Rd index cafa6f5d..d6521e4d 100644 --- a/man/eval_conditions.Rd +++ b/man/eval_conditions.Rd @@ -35,41 +35,5 @@ expressions to be evaluated in the context of \code{dat} and \code{.env}. Variables are looked up in \code{dat}, then in \code{.env}, then in the calling function's environment, followed by its parent environments. -} -\examples{ -# Create a sample data frame -df <- data.frame( - x = c(1, 2, NA_integer_, 4, 5), - y = c(TRUE, FALSE, TRUE, FALSE, TRUE), - z = c("a", "b", "a", "b", "a") -) - -# Simple condition on one column -sdtm.oak:::eval_conditions(df, x > 2) - -# Combined conditions on multiple columns -sdtm.oak:::eval_conditions(df, x > 2 & y) -sdtm.oak:::eval_conditions(df, x > 2, y) - -# Using conditions with NA handling -df_with_na <- data.frame( - x = c(1, 2, NA, 4, 5), - y = c(TRUE, FALSE, TRUE, FALSE, TRUE) -) -sdtm.oak:::eval_conditions(df_with_na, x > 2, .na = FALSE) - -# The environment where `eval_conditions()` is called is also inspected -# when evaluating conditions in `...`. -w <- 1 -sdtm.oak:::eval_conditions(df, x > w) - -# Using an environment -env <- rlang::env(w = 2) -sdtm.oak:::eval_conditions(df, x > w, .env = env) - -# In place of an environment, you may alternatively pass a list or data frame. -sdtm.oak:::eval_conditions(df, x > w, .env = list(w = 3)) -sdtm.oak:::eval_conditions(df, x > w, .env = tibble::tibble(w = 4)) - } \keyword{internal} diff --git a/man/fmt_rg.Rd b/man/fmt_rg.Rd index 1c59944a..e5767e54 100644 --- a/man/fmt_rg.Rd +++ b/man/fmt_rg.Rd @@ -55,22 +55,5 @@ each date/time component. \description{ \code{\link[=fmt_rg]{fmt_rg()}} creates a character vector of named patterns to match individual date/time components. -} -\examples{ -# Default regexps -sdtm.oak:::fmt_rg() - -# You may change the way months are matched, e.g. you might not want to match -# month abbreviations, i.e. only numerical months. So pass an explicit regex -# for numerical months: -sdtm.oak:::fmt_rg(mon = r"[\b\d|\d{2}]") - -# Make date/time components accept `"UNK"` as a possible pattern (useful -# to match funny codes for `NA`). -sdtm.oak:::fmt_rg(na = "UNK") - -# Or be more specific and use `"UNK"` for the year component only. -sdtm.oak:::fmt_rg(year_na = "UNK") - } \keyword{internal} diff --git a/man/format_iso8601.Rd b/man/format_iso8601.Rd index ec101f24..b3a4bed2 100644 --- a/man/format_iso8601.Rd +++ b/man/format_iso8601.Rd @@ -23,23 +23,5 @@ converts each component to ISO8601 format. In practice this entails converting years to a four digit number, and month, day, hours, minutes and seconds to two-digit numbers. Not available (\code{NA}) components are converted to \code{"-"}. -} -\examples{ -cols <- c("year", "mon", "mday", "hour", "min", "sec") -m <- matrix( - c( - "99", "00", "01", - "Jan", "feb", "03", - "1", "01", "31", - "00", "12", "23", - "00", "59", "10", - "42", "5.15", NA - ), - ncol = 6, - dimnames = list(c(), cols) -) - -sdtm.oak:::format_iso8601(m) - } \keyword{internal} diff --git a/man/get_cnd_df_cnd.Rd b/man/get_cnd_df_cnd.Rd index ae3b3d7c..837ed932 100644 --- a/man/get_cnd_df_cnd.Rd +++ b/man/get_cnd_df_cnd.Rd @@ -17,14 +17,6 @@ frame (\code{cnd_df}). \description{ \code{\link[=get_cnd_df_cnd]{get_cnd_df_cnd()}} extracts the conditioning vector from a conditioned data frame, i.e. from an object of class \code{cnd_df}. -} -\examples{ -df <- data.frame(x = 1:3, y = letters[1:3]) -sdtm.oak:::get_cnd_df_cnd(df) - -cnd_df <- sdtm.oak:::new_cnd_df(dat = df, cnd = c(FALSE, NA, TRUE)) -sdtm.oak:::get_cnd_df_cnd(cnd_df) - } \seealso{ \code{\link[=new_cnd_df]{new_cnd_df()}}, \code{\link[=is_cnd_df]{is_cnd_df()}}, \code{\link[=get_cnd_df_cnd_sum]{get_cnd_df_cnd_sum()}}, diff --git a/man/get_cnd_df_cnd_sum.Rd b/man/get_cnd_df_cnd_sum.Rd index b7182b18..8bf60cac 100644 --- a/man/get_cnd_df_cnd_sum.Rd +++ b/man/get_cnd_df_cnd_sum.Rd @@ -17,14 +17,6 @@ a conditioned data frame (\code{cnd_df}). \description{ \code{\link[=get_cnd_df_cnd_sum]{get_cnd_df_cnd_sum()}} extracts the tally of the conditioning vector from a conditioned data frame. -} -\examples{ -df <- data.frame(x = 1:3, y = letters[1:3]) -sdtm.oak:::get_cnd_df_cnd_sum(df) - -cnd_df <- sdtm.oak:::new_cnd_df(dat = df, cnd = c(FALSE, NA, TRUE)) -sdtm.oak:::get_cnd_df_cnd_sum(cnd_df) - } \seealso{ \code{\link[=new_cnd_df]{new_cnd_df()}}, \code{\link[=is_cnd_df]{is_cnd_df()}}, \code{\link[=get_cnd_df_cnd]{get_cnd_df_cnd()}}, \code{\link[=rm_cnd_df]{rm_cnd_df()}}. diff --git a/man/index_for_recode.Rd b/man/index_for_recode.Rd index 2362517f..6e2360af 100644 --- a/man/index_for_recode.Rd +++ b/man/index_for_recode.Rd @@ -22,9 +22,5 @@ match any value in \code{from}, the corresponding position in the output will be any of the values specified in the \code{from} vector. This function is primarily used to facilitate the recoding of values by pinpointing which elements in \code{x} correspond to the \code{from} values and thus need to be replaced or updated. -} -\examples{ -sdtm.oak:::index_for_recode(x = 1:5, from = c(2, 4)) - } \keyword{internal} diff --git a/man/is_cnd_df.Rd b/man/is_cnd_df.Rd index 3cc10af8..041aa918 100644 --- a/man/is_cnd_df.Rd +++ b/man/is_cnd_df.Rd @@ -16,14 +16,6 @@ otherwise \code{FALSE}. \description{ \code{\link[=is_cnd_df]{is_cnd_df()}} checks whether a data frame is a conditioned data frame, i.e. of class \code{cnd_df}. -} -\examples{ -df <- data.frame(x = 1:3, y = letters[1:3]) -sdtm.oak:::is_cnd_df(df) - -cnd_df <- sdtm.oak:::new_cnd_df(dat = df, cnd = c(FALSE, NA, TRUE)) -sdtm.oak:::is_cnd_df(cnd_df) - } \seealso{ \code{\link[=new_cnd_df]{new_cnd_df()}}, \code{\link[=get_cnd_df_cnd]{get_cnd_df_cnd()}}, \code{\link[=get_cnd_df_cnd_sum]{get_cnd_df_cnd_sum()}}, diff --git a/man/is_seq_name.Rd b/man/is_seq_name.Rd index ad39c17b..78db84a2 100644 --- a/man/is_seq_name.Rd +++ b/man/is_seq_name.Rd @@ -14,17 +14,5 @@ A logical vector. } \description{ \code{\link[=is_seq_name]{is_seq_name()}} returns which variable names end in \code{"SEQ"}. -} -\examples{ -# A valid SEQ name. -sdtm.oak:::is_seq_name("AESEQ") - -# Not valid sequence number (`--SEQ`) variable names. -# Case matters. -sdtm.oak:::is_seq_name("AEseq") - -# A valid name has to end in "SEQ". -sdtm.oak:::is_seq_name("AESEQUENCE") - } \keyword{internal} diff --git a/man/iso8601_mon.Rd b/man/iso8601_mon.Rd index e6c9b69f..592244f3 100644 --- a/man/iso8601_mon.Rd +++ b/man/iso8601_mon.Rd @@ -15,17 +15,5 @@ A character vector. \description{ \code{\link[=iso8601_mon]{iso8601_mon()}} converts a character vector whose values represent numeric or abbreviated month names to zero-padded numeric months. -} -\examples{ -sdtm.oak:::iso8601_mon(c(NA, "0", "1", "2", "10", "11", "12")) - -# No semantic validation is performed on the numeric months, so `"13"` stays -# `"13"` but representations that can't be represented as two-digit numbers -# become `NA`. -sdtm.oak:::iso8601_mon(c("13", "99", "100", "-1")) - -(mon <- month.abb) -sdtm.oak:::iso8601_mon(mon) - } \keyword{internal} diff --git a/man/iso8601_na.Rd b/man/iso8601_na.Rd index 03f5a707..058a9e65 100644 --- a/man/iso8601_na.Rd +++ b/man/iso8601_na.Rd @@ -14,9 +14,5 @@ A character vector. } \description{ \code{\link[=iso8601_na]{iso8601_na()}} takes a character vector and converts \code{NA} values to \code{"-"}. -} -\examples{ -sdtm.oak:::iso8601_na(c("10", NA_character_)) - } \keyword{internal} diff --git a/man/iso8601_sec.Rd b/man/iso8601_sec.Rd index b788de71..bd30f796 100644 --- a/man/iso8601_sec.Rd +++ b/man/iso8601_sec.Rd @@ -14,9 +14,5 @@ A character vector. } \description{ \code{\link[=iso8601_sec]{iso8601_sec()}} converts a character vector whose values represent seconds. -} -\examples{ -sdtm.oak:::iso8601_sec(c(NA, "0", "1", "10", "59", "99", "100")) - } \keyword{internal} diff --git a/man/iso8601_truncate.Rd b/man/iso8601_truncate.Rd index 4c4a4eb6..02ee3fd7 100644 --- a/man/iso8601_truncate.Rd +++ b/man/iso8601_truncate.Rd @@ -16,35 +16,5 @@ A character vector. \code{\link[=iso8601_truncate]{iso8601_truncate()}} converts a character vector of ISO8601 dates, times or date-times that might be partial and truncates the format by removing those missing components. -} -\examples{ -x <- - c( - "1999-01-01T15:20:01", - "1999-01-01T15:20:-", - "1999-01-01T15:-:-", - "1999-01-01T-:-:-", - "1999-01--T-:-:-", - "1999----T-:-:-", - "-----T-:-:-" - ) - -sdtm.oak:::iso8601_truncate(x) - -# With `empty_as_na = FALSE` empty strings are not replaced with `NA` -sdtm.oak:::iso8601_truncate("-----T-:-:-", empty_as_na = TRUE) -sdtm.oak:::iso8601_truncate("-----T-:-:-", empty_as_na = FALSE) - -# Truncation only happens if missing components are the right most end, -# otherwise they remain unaltered. -sdtm.oak:::iso8601_truncate( - c( - "1999----T15:20:01", - "1999-01-01T-:20:01", - "1999-01-01T-:-:01", - "1999-01-01T-:-:-" - ) -) - } \keyword{internal} diff --git a/man/iso8601_two_digits.Rd b/man/iso8601_two_digits.Rd index 337da57f..04fbf2b8 100644 --- a/man/iso8601_two_digits.Rd +++ b/man/iso8601_two_digits.Rd @@ -16,10 +16,5 @@ A character vector of the same size as \code{x}. \code{\link[=iso8601_two_digits]{iso8601_two_digits()}} converts a single digit or two digit number into a two digit, 0-padded, number. Failing to parse the input as a two digit number results in \code{NA}. -} -\examples{ -x <- c("0", "00", "1", "01", "42", "100", NA_character_, "1.") -sdtm.oak:::iso8601_two_digits(x) - } \keyword{internal} diff --git a/man/iso8601_year.Rd b/man/iso8601_year.Rd index 190db0f9..692b8220 100644 --- a/man/iso8601_year.Rd +++ b/man/iso8601_year.Rd @@ -19,17 +19,5 @@ A character vector. \description{ \code{\link[=iso8601_year]{iso8601_year()}} converts a character vector whose values represent years to four-digit years. -} -\examples{ -sdtm.oak:::iso8601_year(c("0", "1", "2", "50", "68", "69", "90", "99", "00")) - -# Be default, `cutoff_2000` is at 68. -sdtm.oak:::iso8601_year(c("67", "68", "69", "70")) -sdtm.oak:::iso8601_year(c("1967", "1968", "1969", "1970")) - -# Change it to something else, e.g. `cutoff_2000 = 25`. -sdtm.oak:::iso8601_year(as.character(0:50), cutoff_2000 = 25) -sdtm.oak:::iso8601_year(as.character(1900:1950), cutoff_2000 = 25) - } \keyword{internal} diff --git a/man/mutate.cnd_df.Rd b/man/mutate.cnd_df.Rd index a11b38fd..105db788 100644 --- a/man/mutate.cnd_df.Rd +++ b/man/mutate.cnd_df.Rd @@ -54,15 +54,5 @@ columns.} generic on conditioned data frames. This function implements a conditional mutate by only changing rows for which the condition stored in the conditioned data frame is \code{TRUE}. -} -\examples{ -df <- tibble::tibble(x = 1L:3L, y = letters[x]) -cnd_df <- condition_add(df, x > 1L, y \%in\% c("a", "b")) - -# Because `cnd_df` is a conditioned data frame, dplyr::mutate() generic -# dispatches this S3 method and mutates only the second row, as that is the -# only record that fulfills simultaneously `x > 1L` and `y \%in\% c("a", "b")`. -dplyr::mutate(cnd_df, z = "match") - } \keyword{internal} diff --git a/man/new_cnd_df.Rd b/man/new_cnd_df.Rd index 0de6282e..dedb4e63 100644 --- a/man/new_cnd_df.Rd +++ b/man/new_cnd_df.Rd @@ -31,11 +31,6 @@ resulting in \code{NA}. that this function extends the data frame passed as argument by storing a logical vector \code{cnd} (as attribute) that marks rows for posterior conditional transformation by methods that support \emph{conditioned} data frames. -} -\examples{ -df <- data.frame(x = 1:3, y = letters[1:3]) -sdtm.oak:::new_cnd_df(dat = df, cnd = c(FALSE, NA, TRUE)) - } \seealso{ \code{\link[=is_cnd_df]{is_cnd_df()}}, \code{\link[=get_cnd_df_cnd]{get_cnd_df_cnd()}}, \code{\link[=get_cnd_df_cnd_sum]{get_cnd_df_cnd_sum()}}, diff --git a/man/oak_id_vars.Rd b/man/oak_id_vars.Rd index 86b9113b..7c7859a8 100644 --- a/man/oak_id_vars.Rd +++ b/man/oak_id_vars.Rd @@ -21,9 +21,3 @@ datasets. By default, the set of names is oak_id, raw_source, and patient_number. Extra variable names may be indicated and passed in \code{extra_vars} which are appended to the default names. } -\examples{ -oak_id_vars() - -oak_id_vars(extra_vars = "sample_id") - -} diff --git a/man/parse_dttm.Rd b/man/parse_dttm.Rd index b0de5132..f969603a 100644 --- a/man/parse_dttm.Rd +++ b/man/parse_dttm.Rd @@ -63,32 +63,5 @@ A character matrix of six columns: \code{"year"}, \code{"mon"}, \code{"mday"}, \description{ \code{\link[=parse_dttm]{parse_dttm()}} extracts date and time components. \code{\link[=parse_dttm]{parse_dttm()}} wraps around \code{\link[=parse_dttm_]{parse_dttm_()}}, which is not vectorized over \code{fmt}. -} -\examples{ -sdtm.oak:::parse_dttm("2020", "y") -sdtm.oak:::parse_dttm("2020-05", "y") - -sdtm.oak:::parse_dttm("2020-05", "y-m") -sdtm.oak:::parse_dttm("2020-05-11", "y-m-d") - -sdtm.oak:::parse_dttm("2020 05 11", "y m d") -sdtm.oak:::parse_dttm("2020 05 11", "y m d") -sdtm.oak:::parse_dttm("2020 05 11", "y\\\\s+m\\\\s+d") -sdtm.oak:::parse_dttm("2020 05 11", "y\\\\s+m\\\\s+d") - -sdtm.oak:::parse_dttm("2020-05-11 11:45", "y-m-d H:M") -sdtm.oak:::parse_dttm("2020-05-11 11:45:15.6", "y-m-d H:M:S") - -sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), "y-m-d H:M") -sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), "-m-d H:M") -sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), c("y-m-d H:M", "-m-d H:M")) - -sdtm.oak:::parse_dttm("05 feb 1985 12 55 02", "d m y H M S") -sdtm.oak:::parse_dttm("12 55 02 05 feb 1985", "H M S d m y") - -sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d") -sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d", na = "UN") -sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d", na = c("UN", "UNK")) - } \keyword{internal} diff --git a/man/parse_dttm_fmt.Rd b/man/parse_dttm_fmt.Rd index 6a74b183..636423ef 100644 --- a/man/parse_dttm_fmt.Rd +++ b/man/parse_dttm_fmt.Rd @@ -38,28 +38,5 @@ or pattern in-between format components. the components of the format \code{fmt} that refer to date/time components. \code{\link[=parse_dttm_fmt_]{parse_dttm_fmt_()}} is similar to \code{\link[=parse_dttm_fmt]{parse_dttm_fmt()}} but is not vectorized over \code{fmt}. -} -\examples{ -sdtm.oak:::parse_dttm_fmt("ymd") -sdtm.oak:::parse_dttm_fmt("H:M:S") - -sdtm.oak:::parse_dttm_fmt("ymd HMS") - -# Repeating the same special patterns, e.g. "yy" still counts as one pattern -# only. -sdtm.oak:::parse_dttm_fmt("yymmdd HHMMSS") - -# Note that `"y"`, `"m"`, `"d"`, `"H"`, `"M"` or `"S"` are reserved patterns -# that are matched first and interpreted as format components. # Example: the -# first "y" in "year" is parsed as meaning year followed by "ear y". The -# second "y" is not longer matched because a first match already # succeded. -sdtm.oak:::parse_dttm_fmt("year y") - -# Specify custom patterns -sdtm.oak:::parse_dttm_fmt( - "year month day", - fmt_cmp(year = "year", mon = "month", mday = "day") -) - } \keyword{internal} diff --git a/man/recode.Rd b/man/recode.Rd index aca082ac..2d6394fc 100644 --- a/man/recode.Rd +++ b/man/recode.Rd @@ -24,25 +24,5 @@ A vector of recoded values. \description{ \code{\link[=recode]{recode()}} recodes values in \code{x} by matching elements in \code{from} onto values in \code{to}. -} -\examples{ -x <- c("male", "female", "x", NA) -sdtm.oak:::recode(x, - from = c("male", "female"), - to = c("M", "F") -) -sdtm.oak:::recode( - x, - from = c("male", "female"), - to = c("M", "F"), - .no_match = "?" -) -sdtm.oak:::recode( - x, - from = c("male", "female"), - to = c("M", "F"), - .na = "missing" -) - } \keyword{internal} diff --git a/man/regex_or.Rd b/man/regex_or.Rd index efb2ba49..ea241c84 100644 --- a/man/regex_or.Rd +++ b/man/regex_or.Rd @@ -19,14 +19,5 @@ A character scalar of the resulting regex. \description{ \code{\link[=regex_or]{regex_or()}} takes a set of patterns and binds them with the Or (\code{"|"}) pattern for an easy regex of alternative patterns. -} -\examples{ -# A regex for matching either "jan" or "feb" -sdtm.oak:::regex_or(c("jan", "feb")) - -# Setting `.open` and/or `.close` to `TRUE` can be handy if this regex -# is to be combined into a larger regex. -paste0(sdtm.oak:::regex_or(c("jan", "feb"), .close = TRUE), r"{\d{2}}") - } \keyword{internal} diff --git a/man/rm_cnd_df.Rd b/man/rm_cnd_df.Rd index bd740d2c..b00ca2b4 100644 --- a/man/rm_cnd_df.Rd +++ b/man/rm_cnd_df.Rd @@ -15,14 +15,6 @@ The input \code{dat} without the \code{cnd_df} class and associated attributes. \description{ This function removes the \code{cnd_df} class, along with its attributes, if applicable. -} -\examples{ -df <- data.frame(x = 1:3, y = letters[1:3]) -cnd_df <- sdtm.oak:::new_cnd_df(dat = df, cnd = c(FALSE, NA, TRUE)) - -sdtm.oak:::is_cnd_df(cnd_df) -sdtm.oak:::is_cnd_df(sdtm.oak:::rm_cnd_df(cnd_df)) - } \seealso{ \code{\link[=new_cnd_df]{new_cnd_df()}}, \code{\link[=is_cnd_df]{is_cnd_df()}}, \code{\link[=get_cnd_df_cnd]{get_cnd_df_cnd()}}, diff --git a/man/yy_to_yyyy.Rd b/man/yy_to_yyyy.Rd index c4895aa8..a20686c0 100644 --- a/man/yy_to_yyyy.Rd +++ b/man/yy_to_yyyy.Rd @@ -18,18 +18,5 @@ An integer vector. } \description{ \code{\link[=yy_to_yyyy]{yy_to_yyyy()}} converts two-digit years to four-digit years. -} -\examples{ -sdtm.oak:::yy_to_yyyy(0:5) -sdtm.oak:::yy_to_yyyy(2000:2005) - -sdtm.oak:::yy_to_yyyy(90:99) -sdtm.oak:::yy_to_yyyy(1990:1999) - -# NB: change in behavior after 68 -sdtm.oak:::yy_to_yyyy(65:72) - -sdtm.oak:::yy_to_yyyy(1965:1972) - } \keyword{internal} diff --git a/man/zero_pad_whole_number.Rd b/man/zero_pad_whole_number.Rd index d4b972f8..fffe22f6 100644 --- a/man/zero_pad_whole_number.Rd +++ b/man/zero_pad_whole_number.Rd @@ -18,13 +18,5 @@ A character vector. \code{\link[=zero_pad_whole_number]{zero_pad_whole_number()}} takes non-negative integer values and converts them to character with zero padding. Negative numbers and numbers greater than the width specified by the number of digits \code{n} are converted to \code{NA}. -} -\examples{ -sdtm.oak:::zero_pad_whole_number(c(-1, 0, 1)) - -sdtm.oak:::zero_pad_whole_number(c(-1, 0, 1, 10, 99, 100), n = 2) - -sdtm.oak:::zero_pad_whole_number(c(-1, 0, 1, 10, 99, 100), n = 3) - } \keyword{internal} diff --git a/tests/testthat/test-cnd_df.R b/tests/testthat/test-cnd_df.R index 1574c4e7..ae2e406b 100644 --- a/tests/testthat/test-cnd_df.R +++ b/tests/testthat/test-cnd_df.R @@ -1,7 +1,7 @@ test_that("new_cnd_df creates conditioned data frame correctly", { df <- tibble(x = 1L:3L, y = letters[1L:3L]) cnd <- c(FALSE, NA, TRUE) - cnd_df <- sdtm.oak:::new_cnd_df(dat = df, cnd = cnd) + cnd_df <- new_cnd_df(dat = df, cnd = cnd) expect_s3_class(cnd_df, "cnd_df") expect_identical(attr(cnd_df, "cnd"), cnd) @@ -11,49 +11,49 @@ test_that("new_cnd_df creates conditioned data frame correctly", { test_that("new_cnd_df gives warning if dat is already cnd_df", { df <- tibble(x = 1L:3L, y = letters[1L:3L]) cnd <- c(FALSE, NA, TRUE) - cnd_df <- sdtm.oak:::new_cnd_df(dat = df, cnd = cnd) + cnd_df <- new_cnd_df(dat = df, cnd = cnd) - expect_warning(sdtm.oak:::new_cnd_df(dat = cnd_df, cnd = cnd, .warn = TRUE)) + expect_warning(new_cnd_df(dat = cnd_df, cnd = cnd, .warn = TRUE)) }) test_that("new_cnd_df errors when cnd length doesn't match dat rows", { df <- tibble(x = 1L:3L, y = letters[1L:3L]) cnd <- c(FALSE, TRUE) - expect_error(sdtm.oak:::new_cnd_df(dat = df, cnd = cnd)) + expect_error(new_cnd_df(dat = df, cnd = cnd)) }) test_that("is_cnd_df correctly identifies cnd_df class", { df <- tibble(x = 1L:3L, y = letters[1L:3L]) - cnd_df <- sdtm.oak:::new_cnd_df(dat = df, cnd = c(FALSE, NA, TRUE)) + cnd_df <- new_cnd_df(dat = df, cnd = c(FALSE, NA, TRUE)) - expect_true(sdtm.oak:::is_cnd_df(cnd_df)) - expect_false(sdtm.oak:::is_cnd_df(df)) + expect_true(is_cnd_df(cnd_df)) + expect_false(is_cnd_df(df)) }) test_that("get_cnd_df_cnd correctly extracts cnd attribute", { df <- tibble(x = 1L:3L, y = letters[1L:3L]) cnd <- c(FALSE, NA, TRUE) - cnd_df <- sdtm.oak:::new_cnd_df(dat = df, cnd = cnd) + cnd_df <- new_cnd_df(dat = df, cnd = cnd) - expect_identical(sdtm.oak:::get_cnd_df_cnd(cnd_df), cnd) - expect_null(sdtm.oak:::get_cnd_df_cnd(df)) + expect_identical(get_cnd_df_cnd(cnd_df), cnd) + expect_null(get_cnd_df_cnd(df)) }) test_that("get_cnd_df_cnd_sum correctly extracts cnd_sum attribute", { df <- tibble(x = 1L:3L, y = letters[1L:3L]) cnd <- c(FALSE, NA, TRUE) - cnd_df <- sdtm.oak:::new_cnd_df(dat = df, cnd = cnd) + cnd_df <- new_cnd_df(dat = df, cnd = cnd) - expect_identical(sdtm.oak:::get_cnd_df_cnd_sum(cnd_df), c(n_true = 1L, n_false = 1L, n_na = 1L)) - expect_null(sdtm.oak:::get_cnd_df_cnd_sum(df)) + expect_identical(get_cnd_df_cnd_sum(cnd_df), c(n_true = 1L, n_false = 1L, n_na = 1L)) + expect_null(get_cnd_df_cnd_sum(df)) }) test_that("rm_cnd_df correctly removes cnd_df class and attributes", { df <- tibble(x = 1L:3L, y = letters[1L:3L]) cnd <- c(FALSE, NA, TRUE) - cnd_df <- sdtm.oak:::new_cnd_df(dat = df, cnd = cnd) - un_cnd_df <- sdtm.oak:::rm_cnd_df(cnd_df) + cnd_df <- new_cnd_df(dat = df, cnd = cnd) + un_cnd_df <- rm_cnd_df(cnd_df) expect_false(inherits(un_cnd_df, "cnd_df")) expect_null(attr(un_cnd_df, "cnd")) @@ -63,7 +63,7 @@ test_that("rm_cnd_df correctly removes cnd_df class and attributes", { test_that("tbl_sum.cnd_df adds conditioning summary to tibble header", { df <- tibble(x = 1L:3L, y = letters[1L:3L]) cnd <- c(FALSE, NA, TRUE) - cnd_df <- sdtm.oak:::new_cnd_df(dat = df, cnd = cnd) + cnd_df <- new_cnd_df(dat = df, cnd = cnd) sum_output <- tbl_sum(cnd_df) expect_identical(sum_output["Cond. tbl"], c("Cond. tbl" = "1/1/1")) @@ -72,7 +72,7 @@ test_that("tbl_sum.cnd_df adds conditioning summary to tibble header", { test_that("ctl_new_rowid_pillar.cnd_df customizes row IDs with condition", { df <- tibble(x = 1L:3L, y = letters[1L:3L]) cnd <- c(FALSE, NA, TRUE) - cnd_df <- sdtm.oak:::new_cnd_df(dat = df, cnd = cnd) + cnd_df <- new_cnd_df(dat = df, cnd = cnd) rowid_pillar <- ctl_new_rowid_pillar(controller = cnd_df, x = cnd_df, width = 10L) diff --git a/tests/testthat/test-derive_blfl.R b/tests/testthat/test-derive_blfl.R index 7b6b963c..42f64f2b 100644 --- a/tests/testthat/test-derive_blfl.R +++ b/tests/testthat/test-derive_blfl.R @@ -127,3 +127,76 @@ test_that("derive_blfl DOMAIN validation works", { ref_var = "RFXSTDTC" )) }) + +test_that("`dtc_datepart`: basic usage", { + expect_identical(dtc_datepart( + c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00") + ), + c(NA, NA, NA, NA, "2021-12-25", "2021-12-25")) + + ## Prevent partial or missing dates from being set to NA + expect_identical(dtc_datepart( + c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25T12:00:00"), + partial_as_na = FALSE + ), + c(NA, "", "2021", "2021-12", "2021-12-25", "2021-12-25")) +}) + +test_that("`dtc_timepart`: basic usage", { + # Partial or missing times set to NA and seconds ignored by default + expect_identical(dtc_timepart( + c( + NA, + "", + "2021-12-25", + "2021-12-25T12", + "2021-12-25T12:30", + "2021-12-25T12:30:59" + ) + ), c(NA, NA, NA, NA, "12:30", "12:30")) + + # Prevent partial or missing times from being set to NA + expect_identical(dtc_timepart( + c( + NA, + "", + "2021-12-25", + "2021-12-25T12", + "2021-12-25T12:30", + "2021-12-25T12:30:59" + ), + partial_as_na = FALSE + ), + c(NA, "", "", "12", "12:30", "12:30")) + + # Do not ignore seconds, partial or missing times set to NA + expect_identical(dtc_timepart( + c( + NA, + "", + "2021-12-25", + "2021-12-25T12", + "2021-12-25T12:30", + "2021-12-25T12:30:59" + ), + ignore_seconds = FALSE + ), + c(NA, NA, NA, NA, NA, "12:30:59")) + + # Do not ignore seconds and prevent partial or missing times from being set to NA + expect_identical( + dtc_timepart( + c( + NA, + "", + "2021-12-25", + "2021-12-25T12", + "2021-12-25T12:30", + "2021-12-25T12:30:59" + ), + partial_as_na = FALSE, + ignore_seconds = FALSE + ), + c(NA, "", "", "12", "12:30", "12:30:59") + ) +}) diff --git a/tests/testthat/test-derve_seq.R b/tests/testthat/test-derve_seq.R index f756b24b..58120b55 100644 --- a/tests/testthat/test-derve_seq.R +++ b/tests/testthat/test-derve_seq.R @@ -1,4 +1,4 @@ -test_that("derive_seq works as expected", { +test_that("`derive_seq()` works as expected", { # Test for VSSEQ derivation vs <- read_domain_example("vs") @@ -40,7 +40,7 @@ test_that("derive_seq works as expected", { expect_identical(observed_apseq, expected_apseq) }) -test_that("is_seq_name works as expected", { +test_that("`is_seq_name()`: basic usage", { expect_true(is_seq_name("AESEQ")) expect_false(is_seq_name("AEseq")) diff --git a/tests/testthat/test-dtc_parse_dttm.R b/tests/testthat/test-dtc_parse_dttm.R new file mode 100644 index 00000000..6a0f386b --- /dev/null +++ b/tests/testthat/test-dtc_parse_dttm.R @@ -0,0 +1,142 @@ +test_that("`parse_dttm()` works as expected", { + + # Year + expected_y <- tibble::tribble( + ~year, ~mon, ~mday, ~hour, ~min, ~sec, + "2020", NA, NA, NA, NA, NA + ) |> + as.matrix() + + expect_identical(parse_dttm("2020", "y"), + expected_y) + + # Year, Month + expected_ym <- tibble::tribble( + ~year, ~mon, ~mday, ~hour, ~min, ~sec, + "2020", "05", NA, NA, NA, NA + ) |> + as.matrix() + + expect_identical(parse_dttm("2020-05", "y-m"), + expected_ym) + + # Year, Month, Day + expected_ymd <- tibble::tribble( + ~year, ~mon, ~mday, ~hour, ~min, ~sec, + "2020", "05", "11", NA, NA, NA + ) |> + as.matrix() + + expect_identical(parse_dttm("2020-05-11", "y-m-d"), + expected_ymd) + + # Year, Month, Day in other formats + parse_dttm("2020 05 11", "y m d") |> expect_identical(expected_ymd) + parse_dttm("2020 05 11", "y\\s+m\\s+d") |> expect_identical(expected_ymd) + parse_dttm("2020 05 11", "y\\s+m\\s+d") |> expect_identical(expected_ymd) + + # Year, Month, Day, Hour, Minute + expected_ymdhm <- tibble::tribble( + ~year, ~mon, ~mday, ~hour, ~min, ~sec, + "2020", "05", "11", "11", "45", NA + ) |> + as.matrix() + + expect_identical(parse_dttm("2020-05-11 11:45", "y-m-d H:M"), + expected_ymdhm) + + # Year, Month, Day, Hour, Minute, Second + expected_ymdhms <- tibble::tribble( + ~year, ~mon, ~mday, ~hour, ~min, ~sec, + "2020", "05", "11", "11", "45", "15.6" + ) |> + as.matrix() + + expect_identical(parse_dttm("2020-05-11 11:45:15.6", "y-m-d H:M:S"), + expected_ymdhms) + + # Multiple records + expected_ymdhm_1 <- tibble::tribble( + ~year, ~mon, ~mday, ~hour, ~min, ~sec, + "2002", "05", "11", "11", "45", NA, + NA, NA, NA, NA, NA, NA + ) |> + as.matrix() + + expect_identical(parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), "y-m-d H:M"), + expected_ymdhm_1) + + expected_ymdhm_2 <- tibble::tribble( + ~year, ~mon, ~mday, ~hour, ~min, ~sec, + NA, NA, NA, NA, NA, NA, + NA, "05", "11", "11", "45", NA, + ) |> + as.matrix() + + expect_identical(parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), "-m-d H:M"), + expected_ymdhm_2) + + + + expected_ymdhm_3 <- tibble::tribble( + ~year, ~mon, ~mday, ~hour, ~min, ~sec, + "2002", "05", "11", "11", "45", NA, + NA, "05", "11", "11", "45", NA + ) |> + as.matrix() + + expect_identical( + parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), c("y-m-d H:M", "-m-d H:M")), + expected_ymdhm_3 + ) + + # Different date formats + expected_ymdhm_4 <- tibble::tribble( + ~year, ~mon, ~mday, ~hour, ~min, ~sec, + "1985", "feb", "05", "12", "55", "02", + ) |> + as.matrix() + + expect_identical(parse_dttm("05 feb 1985 12 55 02", "d m y H M S"), + expected_ymdhm_4) + + expect_identical(parse_dttm("12 55 02 05 feb 1985", "H M S d m y"), + expected_ymdhm_4) + + # UNK included + expected_unk <- tibble::tribble( + ~year, ~mon, ~mday, ~hour, ~min, ~sec, + "2020", "05", "18", NA, NA, NA, + NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, + ) |> + as.matrix() + + expect_identical(parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d"), + expected_unk) + expected_unk_1 <- tibble::tribble( + ~year, ~mon, ~mday, ~hour, ~min, ~sec, + "2020", "05", "18", NA, NA, NA, + "2020", "UN", "18", NA, NA, NA, + NA, NA, NA, NA, NA, NA, + ) |> + as.matrix() + + expect_identical(parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), + "y-m-d", + na = "UN"), + expected_unk_1) + + expected_unk_2 <- tibble::tribble( + ~year, ~mon, ~mday, ~hour, ~min, ~sec, + "2020", "05", "18", NA, NA, NA, + "2020", "UN", "18", NA, NA, NA, + "2020", "UNK", "UN", NA, NA, NA, + ) |> + as.matrix() + + expect_identical(parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), + "y-m-d", + na = c("UN", "UNK")), + expected_unk_2) +}) diff --git a/tests/testthat/test-dtc_problems.R b/tests/testthat/test-dtc_problems.R new file mode 100644 index 00000000..4529629a --- /dev/null +++ b/tests/testthat/test-dtc_problems.R @@ -0,0 +1,29 @@ +test_that("`add_problems()`: basic usage", { + date <- c("2000-01-05", "", "1980-06-18", "1979-09-07") + time <- c("001221", "22:35:05", "03:00:15", "07:09:00") + dtc <- list(date, time) + dttm <- c("2000-01-05", "T22:35:05", "1980-06-18T03:00:15", "1979-09-07T07:09:00") + is_problem <- c(TRUE, TRUE, FALSE, FALSE) + + dttm2 <- add_problems(dttm, is_problem, dtc) + dttm2_expected <- c("2000-01-05", "T22:35:05", "1980-06-18T03:00:15", "1979-09-07T07:09:00") + + expect_identical(as.vector(dttm2), dttm2_expected) +}) + +test_that("`any_problems()`: basic usage", { + expect_false(any_problems(list(parse_dttm("1980-06-18", "y-m-d")))) + + expect_true(any_problems(list(parse_dttm("1980-06-18", "ymd")))) + + # Multiple records + date <- c("2000-01-05", "2001/12/25", "1980-06-18", "1979-09-07") + time <- c("00h12m21", "22:35:05", "03:00:15", "07:09:00") + + cap_matrix_date <- parse_dttm(date, "y-m-d") + cap_matrix_time <- parse_dttm(time, "H:M:S") + + cap_matrices <- list(cap_matrix_date, cap_matrix_time) + expect_identical(any_problems(cap_matrices), + c(TRUE, TRUE, FALSE, FALSE)) +}) diff --git a/tests/testthat/test-dtc_utils.R b/tests/testthat/test-dtc_utils.R new file mode 100644 index 00000000..7130c568 --- /dev/null +++ b/tests/testthat/test-dtc_utils.R @@ -0,0 +1,159 @@ +test_that("`assert_dtc_fmt()`: basic usage", { + + fmt <- c("ymd", "y m d", "dmy", "HM", "H:M:S", "y-m-d H:M:S") + expect_identical(assert_dtc_fmt(fmt), + fmt) + + expect_error(assert_dtc_fmt("y years m months d days")) +}) + +test_that("`assert_dtc_format()`: basic usage", { + + expect_identical(assert_dtc_format("ymd"), "ymd") + expect_identical(assert_dtc_format(c("ymd", "y-m-d")), c("ymd", "y-m-d")) + expect_identical(assert_dtc_format(list(c("ymd", "y-m-d"), "H:M:S")), + list(c("ymd", "y-m-d"), "H:M:S")) + + expect_error(assert_dtc_format("year, month, day")) + +}) + +test_that("`assert_capture_matrix()`: basic usage", { + + cols <- c("year", "mon", "mday", "hour", "min", "sec") + m <- matrix(NA_character_, nrow = 1L, ncol = 6L, dimnames = list(NULL, cols)) + expect_identical(assert_capture_matrix(m), m) + + expect_error(assert_capture_matrix(character())) + expect_error(assert_capture_matrix(matrix(data = NA_character_, nrow = 0, ncol = 0))) + expect_error(assert_capture_matrix(matrix(data = NA_character_, nrow = 1))) +}) + +test_that("`complete_capture_matrix()`: basic usage", { + + # Input with no cols and rows + input <- matrix(data = NA_character_, nrow = 0, ncol = 0) + expected_output <- matrix( + data = NA_character_, + nrow = 0, + ncol = 6, + dimnames = list(NULL, c("year", "mon", "mday", "hour", "min", "sec")) + ) + + expect_identical(complete_capture_matrix(input), + expected_output) + + # Input with no cols and 1 row + input <- matrix(data = NA_character_, nrow = 1) + expected_output <- matrix( + data = NA_character_, + nrow = 1, + ncol = 6, + dimnames = list(NULL, c("year", "mon", "mday", "hour", "min", "sec")) + ) + + expect_identical(complete_capture_matrix(input), + expected_output) + + # Input with incomplete cols + input <- + matrix( + NA_character_, + nrow = 1, + ncol = 2, + dimnames = list(NULL, c("year", "sec")) + ) + + expected_output <- matrix( + data = NA_character_, + nrow = 1, + ncol = 6, + dimnames = list(NULL, c("year", "mon", "mday", "hour", "min", "sec")) + ) + + expect_identical(complete_capture_matrix(input), + expected_output) + + # Input with year and second specified + input <- + matrix( + c("2020", "10"), + nrow = 1, + ncol = 2, + dimnames = list(NULL, c("year", "sec")) + ) + + expected_output <- + matrix( + data = c("2020", rep(NA, 4), "10"), + nrow = 1, + ncol = 6, + dimnames = list(NULL, c("year", "mon", "mday", "hour", "min", "sec")) + ) + + expect_identical(complete_capture_matrix(input), + expected_output) + + # Any other existing columns are dropped. + input <- + matrix( + c("2020", "10"), + nrow = 1, + ncol = 2, + dimnames = list(NULL, c("semester", "quarter")) + ) + + expected_output <- matrix( + data = NA_character_, + nrow = 1, + ncol = 6, + dimnames = list(NULL, c("year", "mon", "mday", "hour", "min", "sec")) + ) + + expect_identical(complete_capture_matrix(input), + expected_output) +}) + +test_that("`coalesce_capture_matrices()`: basic usage", { + + cols <- c("year", "mon", "mday", "hour", "min", "sec") + dates <- c("2020", "01", "01", "20", NA, NA) + times <- c(NA, NA, NA, "10", "00", "05") + m_dates <- matrix(dates, nrow = 1L, ncol = 6L, dimnames = list(NULL, cols)) + m_times <- matrix(times, nrow = 1L, ncol = 6L, dimnames = list(NULL, cols)) + + # Note how the hour "20" takes precedence over "10" + expected_output <- tibble::tribble( + ~year, ~mon, ~mday, ~hour, ~min, ~sec, + "2020", "01", "01", "20", "00", "05" + ) |> + as.matrix() + + expect_identical(coalesce_capture_matrices(m_dates, m_times), expected_output) + + # Reverse the order of the inputs and now hour "10" takes precedence + expected_output <- tibble::tribble( + ~year, ~mon, ~mday, ~hour, ~min, ~sec, + "2020", "01", "01", "10", "00", "05" + ) |> + as.matrix() + + expect_identical(coalesce_capture_matrices(m_times, m_dates), expected_output) + + # Single inputs should result in the same output as the input + expected_output <- tibble::tribble( + ~year, ~mon, ~mday, ~hour, ~min, ~sec, + "2020", "01", "01", "20", NA, NA + ) |> + as.matrix() + + expect_identical(coalesce_capture_matrices(m_dates), expected_output) + + expected_output <- tibble::tribble( + ~year, ~mon, ~mday, ~hour, ~min, ~sec, + NA, NA, NA, "10", "00", "05", + ) |> + as.matrix() + + expect_identical(coalesce_capture_matrices(m_times), expected_output) +}) diff --git a/tests/testthat/test-eval_conditions.R b/tests/testthat/test-eval_conditions.R index 8cf0e4b2..d3bc095b 100644 --- a/tests/testthat/test-eval_conditions.R +++ b/tests/testthat/test-eval_conditions.R @@ -7,26 +7,26 @@ test_that("`eval_conditions()` evaluates conditions correctly", { # Tag records for which `x` is greater than 2. expect_identical( - sdtm.oak:::eval_conditions(df, x > 2L), + eval_conditions(df, x > 2L), c(FALSE, FALSE, NA, TRUE, TRUE) ) # Tag records for which `x` is greater than 2 and `y` is TRUE. expect_identical( - sdtm.oak:::eval_conditions(df, x > 2L, y), + eval_conditions(df, x > 2L, y), c(FALSE, FALSE, NA, FALSE, TRUE) ) # Tag records for which `x` is greater than 2 and convert resulting NAs into FALSE. expect_identical( - sdtm.oak:::eval_conditions(df, x > 2L, .na = FALSE), + eval_conditions(df, x > 2L, .na = FALSE), c(FALSE, FALSE, FALSE, TRUE, TRUE) ) # Conditions may involve variables defined in the caller environment. w <- 1L expect_identical( - sdtm.oak:::eval_conditions(df, x > w), + eval_conditions(df, x > w), c(FALSE, TRUE, NA, TRUE, TRUE) ) @@ -34,18 +34,18 @@ test_that("`eval_conditions()` evaluates conditions correctly", { # environments). env <- rlang::env(w = 1L) expect_identical( - sdtm.oak:::eval_conditions(df, x > w, .env = env), + eval_conditions(df, x > w, .env = env), c(FALSE, TRUE, NA, TRUE, TRUE) ) # Other scopes are not restricted to environments but lists and tibbles also # work as namespaces for look-up. expect_identical( - sdtm.oak:::eval_conditions(df, x > w, .env = list(w = 3L)), + eval_conditions(df, x > w, .env = list(w = 3L)), c(FALSE, FALSE, NA, TRUE, TRUE) ) expect_identical( - sdtm.oak:::eval_conditions(df, x > w, .env = tibble::tibble(w = 4L)), + eval_conditions(df, x > w, .env = tibble::tibble(w = 4L)), c(FALSE, FALSE, NA, FALSE, TRUE) ) }) diff --git a/tests/testthat/test-iso8601.R b/tests/testthat/test-iso8601.R index b21737ff..454eb295 100644 --- a/tests/testthat/test-iso8601.R +++ b/tests/testthat/test-iso8601.R @@ -42,3 +42,103 @@ test_that("`iso8601_two_digits()`: basic usage", { y <- c("00", "00", "01", "01", "42", NA, NA, NA) expect_identical(iso8601_two_digits(x), y) }) + +test_that("`iso8601_year()`: basic usage", { + + expect_identical( + iso8601_year(c("0", "1", "2", "50", "68", "69", "90", "99", "00")), + c("2000", "2001", "2002", "2050", "2068", "1969", "1990", "1999", "2000") + ) + + # By default, `cutoff_2000` is at 68. + expect_identical( + iso8601_year(c("67", "68", "69", "70")), + c("2067", "2068", "1969", "1970") + ) + + expect_identical( + iso8601_year(c("1967", "1968", "1969", "1970")), + c("1967", "1968", "1969", "1970") + ) + + # Set cutoff_2000 to something else + expect_identical(iso8601_year(as.character(0:50), cutoff_2000 = 25), + as.character(c(2000:2025, 1926:1950))) + + expect_identical(iso8601_year(as.character(1900:1950), cutoff_2000 = 25), + as.character(c(1900:1950))) + +}) + +test_that("`iso8601_mon()`: basic usage", { + + expect_identical( + iso8601_mon(c(NA, "0", "1", "2", "10", "11", "12")), + c(NA, "00", "01", "02", "10", "11", "12") + ) + + # No semantic validation is performed on the numeric months, so `"13"` stays + # `"13"` but representations that can't be represented as two-digit numbers + # become `NA`. + expect_identical( + iso8601_mon(c("13", "99", "100", "-1")), + c("13", "99", NA, NA) + ) + + mon <- month.abb + expect_identical(iso8601_mon(mon), + "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12") + +}) + +test_that("`iso8601_sec()`: basic usage", { + + expect_identical( + iso8601_sec(c(NA, "0", "1", "10", "59", "99", "100")), + c( NA, "00", "01", "10", "59", "99", NA) + ) +}) + +test_that("`iso8601_truncate()`: basic usage", { + + x <- + c( + "1999-01-01T15:20:01", + "1999-01-01T15:20:-", + "1999-01-01T15:-:-", + "1999-01-01T-:-:-", + "1999-01--T-:-:-", + "1999----T-:-:-", + "-----T-:-:-" + ) + + expect_identical( + iso8601_truncate(x), + c("1999-01-01T15:20:01", "1999-01-01T15:20", "1999-01-01T15", "1999-01-01", + "1999-01", "1999", NA) + ) + + # With `empty_as_na = FALSE` empty strings are not replaced with `NA` + expect_true(is.na(iso8601_truncate("-----T-:-:-", empty_as_na = TRUE))) + expect_identical(iso8601_truncate("-----T-:-:-", empty_as_na = FALSE), "") + + # Truncation only happens if missing components are the right most end, + # otherwise they remain unaltered. + expect_identical( + iso8601_truncate( + c( + "1999----T15:20:01", + "1999-01-01T-:20:01", + "1999-01-01T-:-:01", + "1999-01-01T-:-:-" + ) + ), + c( + "1999----T15:20:01", + "1999-01-01T-:20:01", + "1999-01-01T-:-:01", + "1999-01-01" + ) + ) + +}) diff --git a/tests/testthat/test-oak_id_vars.R b/tests/testthat/test-oak_id_vars.R new file mode 100644 index 00000000..7c64f257 --- /dev/null +++ b/tests/testthat/test-oak_id_vars.R @@ -0,0 +1,15 @@ +test_that("`oak_id_vars`: basic usage", { + expect_identical(oak_id_vars(), + c("oak_id", "raw_source", "patient_number")) + + expect_identical(oak_id_vars(extra_vars = "sample_id"), + c("oak_id", "raw_source", "patient_number", "sample_id")) +}) + +test_that("`contains_oak_id_vars()`: basic usage", { + expect_true(contains_oak_id_vars(oak_id_vars())) + + expect_false(contains_oak_id_vars(character())) + + expect_false(contains_oak_id_vars(c("oak_id", "raw_source"))) +}) diff --git a/tests/testthat/test-parse_dttm_fmt.R b/tests/testthat/test-parse_dttm_fmt.R index 1108c569..942a11d8 100644 --- a/tests/testthat/test-parse_dttm_fmt.R +++ b/tests/testthat/test-parse_dttm_fmt.R @@ -125,7 +125,206 @@ test_that("`parse_dttm_fmt_`: only the first match is returned", { expect_identical(x3, parse_dttm_fmt_(fmt, pattern = "H+")) }) +test_that("`fmt_rg`: basic usage", { + # Default + observed <- fmt_rg() + + sec <- r"[(\b\d|\d{2})(\.\d*)?]" + min <- r"[(\b\d|\d{2})]" + hour <- r"[\d?\d]" + mday <- r"[\b\d|\d{2}]" + mon <- stringr::str_glue(r"[\d\d|{months_abb_regex()}]") + year <- r"[(\d{2})?\d{2}]" + sec_na <- "" + min_na <- "" + hour_na <- "" + mday_na <- "" + mon_na <- "" + year_na <- "" + + expected <- c( + sec = stringr::str_glue("(?{sec}{sec_na})"), + min = stringr::str_glue("(?{min}{min_na})"), + hour = stringr::str_glue("(?{hour}{hour_na})"), + mday = stringr::str_glue("(?{mday}{mday_na})"), + mon = stringr::str_glue("(?{mon}{mon_na})"), + year = stringr::str_glue("(?{year}{year_na})") + ) + + expect_identical(observed, expected) + + # Pass an explicit regex for numerical months + observed_m <- fmt_rg(mon = r"[\b\d|\d{2}]") + + mon <- r"[\b\d|\d{2}]" + expected_m <- c( + sec = stringr::str_glue("(?{sec}{sec_na})"), + min = stringr::str_glue("(?{min}{min_na})"), + hour = stringr::str_glue("(?{hour}{hour_na})"), + mday = stringr::str_glue("(?{mday}{mday_na})"), + mon = stringr::str_glue("(?{mon}{mon_na})"), + year = stringr::str_glue("(?{year}{year_na})") + ) + + expect_identical(observed_m, expected_m) + + # Use `"UNK"` for the year component only + observed_yr_unk <- fmt_rg(year_na = "UNK") + + mon <- stringr::str_glue(r"[\d\d|{months_abb_regex()}]") + year_na <- regex_or("UNK", .open = TRUE) + + expected_yr_unk <- c( + sec = stringr::str_glue("(?{sec}{sec_na})"), + min = stringr::str_glue("(?{min}{min_na})"), + hour = stringr::str_glue("(?{hour}{hour_na})"), + mday = stringr::str_glue("(?{mday}{mday_na})"), + mon = stringr::str_glue("(?{mon}{mon_na})"), + year = stringr::str_glue("(?{year}{year_na})") + ) + + expect_identical(observed_yr_unk, expected_yr_unk) + # Test if date/time components accept `"UNK"` as a possible pattern (useful + # to match funny codes for `NA`). + observed_unk <- fmt_rg(na = "UNK") + sec_na <- regex_or("UNK", .open = TRUE) + min_na <- regex_or("UNK", .open = TRUE) + hour_na <- regex_or("UNK", .open = TRUE) + mday_na <- regex_or("UNK", .open = TRUE) + mon_na <- regex_or("UNK", .open = TRUE) + year_na <- regex_or("UNK", .open = TRUE) + + expected_unk <- c( + sec = stringr::str_glue("(?{sec}{sec_na})"), + min = stringr::str_glue("(?{min}{min_na})"), + hour = stringr::str_glue("(?{hour}{hour_na})"), + mday = stringr::str_glue("(?{mday}{mday_na})"), + mon = stringr::str_glue("(?{mon}{mon_na})"), + year = stringr::str_glue("(?{year}{year_na})") + ) + + expect_identical(observed_unk, expected_unk) +}) + +test_that("`dttm_fmt_to_regex`: basic usage", { + + expect_identical(dttm_fmt_to_regex("y"), + "^(?(\\d{2})?\\d{2})$") + + expect_identical(dttm_fmt_to_regex("y", anchored = FALSE), + "(?(\\d{2})?\\d{2})") + # nolint start + expect_identical(dttm_fmt_to_regex("m"), + "^(?\\d\\d|[Jj][Aa][Nn]|[Ff][Ee][Bb]|[Mm][Aa][Rr]|[Aa][Pp][Rr]|[Mm][Aa][Yy]|[Jj][Uu][Nn]|[Jj][Uu][Ll]|[Aa][Uu][Gg]|[Ss][Ee][Pp]|[Oo][Cc][Tt]|[Nn][Oo][Vv]|[Dd][Ee][Cc])$") + + expect_identical(dttm_fmt_to_regex("ymd"), + "^(?(\\d{2})?\\d{2})(?\\d\\d|[Jj][Aa][Nn]|[Ff][Ee][Bb]|[Mm][Aa][Rr]|[Aa][Pp][Rr]|[Mm][Aa][Yy]|[Jj][Uu][Nn]|[Jj][Uu][Ll]|[Aa][Uu][Gg]|[Ss][Ee][Pp]|[Oo][Cc][Tt]|[Nn][Oo][Vv]|[Dd][Ee][Cc])(?\\b\\d|\\d{2})$") + + expect_identical(dttm_fmt_to_regex("ymd HH:MM:SS"), + "^(?(\\d{2})?\\d{2})(?\\d\\d|[Jj][Aa][Nn]|[Ff][Ee][Bb]|[Mm][Aa][Rr]|[Aa][Pp][Rr]|[Mm][Aa][Yy]|[Jj][Uu][Nn]|[Jj][Uu][Ll]|[Aa][Uu][Gg]|[Ss][Ee][Pp]|[Oo][Cc][Tt]|[Nn][Oo][Vv]|[Dd][Ee][Cc])(?\\b\\d|\\d{2}) (?\\d?\\d):(?(\\b\\d|\\d{2})):(?(\\b\\d|\\d{2})(\\.\\d*)?)$") + # nolint end +}) + +test_that("`regrex_or`: basic usage", { + expect_identical(regex_or(c("jan", "feb")), "jan|feb") + + # Setting `.open` and/or `.close` to `TRUE` can be handy if this regex + # is to be combined into a larger regex. + expect_identical(paste0(regex_or(c("jan", "feb"), .close = TRUE), r"{\d{2}}"), + "jan|feb|\\d{2}") +}) + test_that("`parse_dttm_fmt`: empty fmt", { expect_identical(fmt_dttmc(), parse_dttm_fmt("", pattern = "y")) expect_error(parse_dttm_fmt_(character(), pattern = "y")) }) + +test_that("`parse_dttm_fmt` works as expected", { + # ymd + observed_ymd <- parse_dttm_fmt("ymd") + expected_ymd <- tibble::tribble( + ~fmt_c, ~pat, ~cap, ~start, ~end, ~len, ~ord, + "year", "y+", "y", 1L, 1L, 1L, 1L, + "mon", "m+", "m", 2L, 2L, 1L, 2L, + "mday", "d+", "d", 3L, 3L, 1L, 3L + ) + + expect_identical(observed_ymd, expected_ymd) + + # hms + observed_hms <- parse_dttm_fmt("H:M:S") + expected_hms <- tibble::tribble( + ~fmt_c, ~pat, ~cap, ~start, ~end, ~len, ~ord, + "hour", "H+", "H", 1L, 1L, 1L, 1L, + NA, NA, ":", 2L, 2L, 1L, NA, + "min", "M+", "M", 3L, 3L, 1L, 2L, + NA, NA, ":", 4L, 4L, 1L, NA, + "sec", "S+", "S", 5L, 5L, 1L, 3L + ) + + expect_identical(observed_hms, expected_hms) + + # ymdhms + observed_ymdhms <- parse_dttm_fmt("ymd HMS") + expected_ymdhms <- tibble::tribble( + ~fmt_c, ~pat, ~cap, ~start, ~end, ~len, ~ord, + "year", "y+", "y", 1L, 1L, 1L, 1L, + "mon", "m+", "m", 2L, 2L, 1L, 2L, + "mday", "d+", "d", 3L, 3L, 1L, 3L, + NA, NA, " ", 4L, 4L, 1L, NA, + "hour", "H+", "H", 5L, 5L, 1L, 4L, + "min", "M+", "M", 6L, 6L, 1L, 5L, + "sec", "S+", "S", 7L, 7L, 1L, 6L + ) + + expect_identical(observed_ymdhms, expected_ymdhms) + + # Repeating the same special patterns, e.g. "yy" still counts as one pattern + # only. + observed_ymdhms_1 <- parse_dttm_fmt("yymmdd HHMMSS") + expected_ymdhms_1 <- tibble::tribble( + ~fmt_c, ~pat, ~cap, ~start, ~end, ~len, ~ord, + "year", "y+", "yy", 1L, 2L, 2L, 1L, + "mon", "m+", "mm", 3L, 4L, 2L, 2L, + "mday", "d+", "dd", 5L, 6L, 2L, 3L, + NA, NA, " ", 7L, 7L, 1L, NA, + "hour", "H+", "HH", 8L, 9L, 2L, 4L, + "min", "M+", "MM", 10L, 11L, 2L, 5L, + "sec", "S+", "SS", 12L, 13L, 2L, 6L + ) + + expect_identical(observed_ymdhms_1, expected_ymdhms_1) + + # `"y"`, `"m"`, `"d"`, `"H"`, `"M"` or `"S"` are reserved patterns + # that are matched first and interpreted as format components. Example: the + # first "y" in "year" is parsed as meaning year followed by "ear y". The + # second "y" is not longer matched because a first match already succeed. + observed_y <- parse_dttm_fmt("year y") + expected_y <- tibble::tribble( + ~fmt_c, ~pat, ~cap, ~start, ~end, ~len, ~ord, + "year", "y+", "y", 1L, 1L, 1L, 1L, + NA, NA, "ear y", 2L, 6L, 5L, NA + ) + + expect_identical(observed_y, expected_y) + + # Specify custom patterns + observed_cus <- parse_dttm_fmt("year month day", + fmt_cmp(year = "year", + mon = "month", + mday = "day") + ) + + expected_cus <- tibble::tribble( + ~fmt_c, ~pat, ~cap, ~start, ~end, ~len, ~ord, + "year", "year", "year", 1L, 4L, 4L, 1L, + NA, NA, " ", 5L, 5L, 1L, NA, + "mon", "month", "month", 6L, 10L, 5L, 2L, + NA, NA, " ", 11L, 11L, 1L, NA, + "mday", "day", "day", 12L, 14L, 3L, 3L + ) + + expect_identical(observed_cus, expected_cus) +}) + + diff --git a/tests/testthat/test-recode.R b/tests/testthat/test-recode.R index 87562015..d74e70f8 100644 --- a/tests/testthat/test-recode.R +++ b/tests/testthat/test-recode.R @@ -123,3 +123,9 @@ test_that("recode(): notable cases", { .na = "X" ), rep("X", 4L)) }) + +test_that("index_for_recode(): basic usage", { + expect_identical(index_for_recode(x = 1:5, from = c(2, 4)), + as.integer(c(NA, 1, NA, 2, NA))) + +})