From c9554f53f1f407deb3276a676a8207bd6e7284c2 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 15 Nov 2023 11:46:58 +0000 Subject: [PATCH 01/52] clean up dummy test --- tests/testthat/test-onload.R | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 tests/testthat/test-onload.R diff --git a/tests/testthat/test-onload.R b/tests/testthat/test-onload.R deleted file mode 100644 index 0cc12457..00000000 --- a/tests/testthat/test-onload.R +++ /dev/null @@ -1,3 +0,0 @@ -test_that("multiplication works", { - expect_identical(2L * 2L, 4L) -}) From 0b2532c78d1fe27ce3beebc2ad6081b153229e16 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 15 Nov 2023 11:47:51 +0000 Subject: [PATCH 02/52] add `dtc_formats` data set --- data-raw/dtc_formats.R | 32 ++++++++++++++++++++++++++++++++ data/dtc_formats.rda | Bin 0 -> 440 bytes man/dtc_formats.Rd | 26 ++++++++++++++++++++++++++ 3 files changed, 58 insertions(+) create mode 100644 data-raw/dtc_formats.R create mode 100644 data/dtc_formats.rda create mode 100644 man/dtc_formats.Rd diff --git a/data-raw/dtc_formats.R b/data-raw/dtc_formats.R new file mode 100644 index 00000000..8a8acdb6 --- /dev/null +++ b/data-raw/dtc_formats.R @@ -0,0 +1,32 @@ +## code to prepare `dtc_formats` dataset goes here + +dtc_formats <- tibble::tribble( + ~ fmt, ~type, ~ description, + "ymd", "date", "Parses a date: year, month, and month day.", + "y m d", "date", "Parses a date: year, month, and month day.", + "y-m-d", "date", "Parses a date: year, month, and month day.", + + "dmy", "date", "Parses a date: month day, month and year.", + "d m y", "date", "Parses a date: month day, month and year.", + "d-m-y", "date", "Parses a date: month day, month and year.", + + "ym", "date", "Parses a date: year and month.", + "y m", "date", "Parses a date: year and month.", + "y-m", "date", "Parses a date: year and month.", + + "my", "date", "Parses a date: month and year.", + "m y", "date", "Parses a date: month and year.", + "m-y", "date", "Parses a date: month and year.", + + "HM", "time", "Parses a time: hour and minutes.", + "HMS", "time", "Parses a time: hour, minutes, and seconds.", + "H:M", "time", "Parses a time: hour and minutes.", + "H:M:S", "time", "Parses a time: hour, minutes and seconds.", + + "ymdH:M:S", "datetime", "Parses a date-time: year, month, month day, hour, minutes, and seconds.", + "ymd H:M:S", "datetime", "Parses a date-time: year, month, month day, hour, minutes, and seconds.", + "y-m-d H:M:S", "datetime", "Parses a date-time: year, month, month day, hour, minutes, and seconds.", + "y m d H:M:S", "datetime", "Parses a date-time: year, month, month day, hour, minutes, and seconds." +) + +usethis::use_data(dtc_formats, overwrite = TRUE) diff --git a/data/dtc_formats.rda b/data/dtc_formats.rda new file mode 100644 index 0000000000000000000000000000000000000000..1b12c598623c77adbc73d278e41d65cd9338314d GIT binary patch literal 440 zcmV;p0Z0BqT4*^jL0KkKS?W>6NdN)c|H1!yOaVYQ5D)~ySU|sL-k?AL00aO5zyZe! z1SFL7lQkPrq{uT<0R|_jk)~-3XlQ5v$Y=ln00E5+4FDMp000003X(|lB{mdoPg74* z)CQVqp{64dFh(l`q&SLM2sx>>d`_nXTX$&!iwLslxMz zqDJsvgMPcYg(j+vXGJ8Q)PkgjbXcUqrKple=c>{))H$U&$nkYbl@{<#?%=XJCYq_f zXJn=eRV7PHNl?;Vp%^A(8I)vZl`t(PEG7BO7roT0RK2NDpPtN!?N2mwkXOUrxsMoA z;3ni8SJGu_qBC& z%`nNVl_!;p6iPZnVp&9BH&Iz<*P#>8C@~ek`r*&bjMZ2zSJ6Cv)g-0RcJ%G?+tG;_ z11M`v$eH#Hdhh1WQdCRjpYuT+oIQ3#+@%%jO=MqfZ}Z0jO@fXnnevq@QS3P1M&UpW iIaLhRd07;l-P8axPm|XqJMkEA@pmLsg$WL&98{23yu~8` literal 0 HcmV?d00001 diff --git a/man/dtc_formats.Rd b/man/dtc_formats.Rd new file mode 100644 index 00000000..08adcdc2 --- /dev/null +++ b/man/dtc_formats.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_formats.R +\docType{data} +\name{dtc_formats} +\alias{dtc_formats} +\title{Date/time collection formats} +\format{ +A \link[tibble:tibble-package]{tibble} of 20 formats +with three variables: +\describe{ +\item{\code{fmt}}{Format string.} +\item{\code{type}}{Whether a date, time or date-time.} +\item{\code{description}}{Description of which date-time components are parsed.} +} +} +\usage{ +dtc_formats +} +\description{ +Date/time collection formats +} +\examples{ +dtc_formats + +} +\keyword{datasets} From 5368f4ddf690caedf129b0582e5102393f5b3025 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 15 Nov 2023 12:01:51 +0000 Subject: [PATCH 03/52] update .Rbuildignore --- .Rbuildignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.Rbuildignore b/.Rbuildignore index 24e5e90a..4bca79b1 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -11,3 +11,4 @@ ^pkgdown$ ^LICENSE\.md$ ^\.lintr$ +^data-raw$ From b3f29d2df6c0344b3433002c298452d5db966a08 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 15 Nov 2023 12:04:41 +0000 Subject: [PATCH 04/52] add tibble support for automatic pretty printing of tibbles --- R/package.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/package.R b/R/package.R index d34d384c..3a56202e 100644 --- a/R/package.R +++ b/R/package.R @@ -5,6 +5,7 @@ #' @name sdtm.oak #' #' @import rlang +#' @importFrom tibble tibble NULL #' onLoad function From 475d8df7edb423886a9b8762f70bbd691163bb79 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 15 Nov 2023 12:05:56 +0000 Subject: [PATCH 05/52] add `create_iso8601()` (closes #10) --- DESCRIPTION | 10 +- NAMESPACE | 3 + NEWS.md | 4 +- R/dtc_create_iso8601.R | 405 +++++++++++++++++++++++++++ R/dtc_formats.R | 14 + R/dtc_parse_dttm.R | 118 ++++++++ R/dtc_utils.R | 163 +++++++++++ R/parse_dttm.R | 141 ++++++++++ R/parse_dttm_fields.R | 125 +++++++++ R/parse_dttm_fmt.R | 257 +++++++++++++++++ man/assert_capture_matrix.Rd | 42 +++ man/assert_dtc_fmt.Rd | 26 ++ man/assert_dtc_format.Rd | 37 +++ man/create_iso8601.Rd | 81 ++++++ man/find_int_gap.Rd | 31 ++ man/format_iso8601.Rd | 45 +++ man/is_dtc_fmt.Rd | 29 ++ man/iso8601_mon.Rd | 31 ++ man/iso8601_na.Rd | 22 ++ man/iso8601_sec.Rd | 22 ++ man/iso8601_truncate.Rd | 50 ++++ man/iso8601_two_digits.Rd | 25 ++ man/iso8601_year.Rd | 35 +++ man/months_abb_regex.Rd | 24 ++ man/parse_dttm.Rd | 89 ++++++ man/pseq.Rd | 22 ++ man/reg_matches.Rd | 25 ++ man/str_to_anycase.Rd | 19 ++ man/yy_to_yyyy.Rd | 35 +++ man/zero_pad_whole_number.Rd | 30 ++ tests/testthat/test-create_iso8601.R | 84 ++++++ tests/testthat/test-find_int_gap.R | 55 ++++ tests/testthat/test-format_iso8601.R | 26 ++ tests/testthat/test-iso8601.R | 40 +++ tests/testthat/test-parse_dttm.R | 53 ++++ tests/testthat/test-pseq.R | 7 + tests/testthat/test-reg_matches.R | 10 + tests/testthat/test-str_to_anycase.R | 5 + tests/testthat/test-yy_to_yyyy.R | 27 ++ 39 files changed, 2263 insertions(+), 4 deletions(-) create mode 100644 R/dtc_create_iso8601.R create mode 100644 R/dtc_formats.R create mode 100644 R/dtc_parse_dttm.R create mode 100644 R/dtc_utils.R create mode 100644 R/parse_dttm.R create mode 100644 R/parse_dttm_fields.R create mode 100644 R/parse_dttm_fmt.R create mode 100644 man/assert_capture_matrix.Rd create mode 100644 man/assert_dtc_fmt.Rd create mode 100644 man/assert_dtc_format.Rd create mode 100644 man/create_iso8601.Rd create mode 100644 man/find_int_gap.Rd create mode 100644 man/format_iso8601.Rd create mode 100644 man/is_dtc_fmt.Rd create mode 100644 man/iso8601_mon.Rd create mode 100644 man/iso8601_na.Rd create mode 100644 man/iso8601_sec.Rd create mode 100644 man/iso8601_truncate.Rd create mode 100644 man/iso8601_two_digits.Rd create mode 100644 man/iso8601_year.Rd create mode 100644 man/months_abb_regex.Rd create mode 100644 man/parse_dttm.Rd create mode 100644 man/pseq.Rd create mode 100644 man/reg_matches.Rd create mode 100644 man/str_to_anycase.Rd create mode 100644 man/yy_to_yyyy.Rd create mode 100644 man/zero_pad_whole_number.Rd create mode 100644 tests/testthat/test-create_iso8601.R create mode 100644 tests/testthat/test-find_int_gap.R create mode 100644 tests/testthat/test-format_iso8601.R create mode 100644 tests/testthat/test-iso8601.R create mode 100644 tests/testthat/test-parse_dttm.R create mode 100644 tests/testthat/test-pseq.R create mode 100644 tests/testthat/test-reg_matches.R create mode 100644 tests/testthat/test-str_to_anycase.R create mode 100644 tests/testthat/test-yy_to_yyyy.R diff --git a/DESCRIPTION b/DESCRIPTION index d253c411..d30e4833 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,13 +21,17 @@ Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 Depends: R (>= 4.1) Imports: - rlang (>= 1.0.0) + admiraldev, + dplyr, + purrr, + rlang (>= 1.0.0), + stringr, + tibble Suggests: knitr, rmarkdown, spelling, - testthat (>= 3.1.7), - tibble + testthat (>= 3.1.7) VignetteBuilder: knitr Config/testthat/edition: 3 Config/testthat/parallel: true diff --git a/NAMESPACE b/NAMESPACE index b7307769..392f93da 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,3 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(create_iso8601) import(rlang) +importFrom(rlang,.data) +importFrom(tibble,tibble) diff --git a/NEWS.md b/NEWS.md index 5b727d40..df003685 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ # sdtm.oak (development version) -* Initial CRAN submission. +## New Features + +* New function `create_iso8601()` for conversion of vectors of dates, times or date-times to ISO8601 format. diff --git a/R/dtc_create_iso8601.R b/R/dtc_create_iso8601.R new file mode 100644 index 00000000..56518bba --- /dev/null +++ b/R/dtc_create_iso8601.R @@ -0,0 +1,405 @@ +# Month abbreviation (en) to numeric month mapping +mon_abb_to_mon_num <- stats::setNames(sprintf("%02d", seq_along(month.abb)), tolower(month.abb)) + +#' Convert NA to `"-"` +#' +#' [iso8601_na()] takes a character vector and converts `NA` values to `"-"`. +#' +#' @param x A character vector. +#' +#' @returns A character vector. +#' +#' @examples +#' sdtm.oak:::iso8601_na(c("10", NA_character_)) +#' +#' @keywords internal +iso8601_na <- function(x) { + admiraldev::assert_character_vector(x) + x[is.na(x)] <- "-" + x +} + +#' Convert an integer to a zero-padded character vector +#' +#' [zero_pad_whole_number()] takes non-negative integer values and converts +#' them to character with zero padding. Negative numbers and numbers greater +#' than the width specified by the number of digits `n` are converted to `NA`. +#' +#' @param x An integer vector. +#' @param n Number of digits in the output, including zero padding. +#' +#' @returns A character vector. +#' +#' @examples +#' sdtm.oak:::zero_pad_whole_number(c(-1, 0, 1)) +#' +#' sdtm.oak:::zero_pad_whole_number(c(-1, 0, 1, 10, 99, 100), n = 2) +#' +#' sdtm.oak:::zero_pad_whole_number(c(-1, 0, 1, 10, 99, 100), n = 3) +#' +#' @keywords internal +zero_pad_whole_number <- function(x, n = 2L) { + + # Check `x` + if (!rlang::is_integerish(x)) rlang::abort("`x` must be integerish.") + + # Check `n` + admiraldev::assert_integer_scalar(n) + if (n < 1) rlang::abort("`n` must be positive.") + + # Negative numbers are not allowed, and hence get converted to NA. + x[x < 0] <- NA_integer_ + + # Numbers that do not fit within the padding width are converted to NA + x[floor(log10(x)) >= n] <- NA_integer_ + + fmt <- paste0("%0", n, "d") + y <- sprintf(fmt, x) + y[is.na(x)] <- NA_character_ + y +} + +#' Convert two-digit to four-digit years +#' +#' [yy_to_yyyy()] converts two-digit years to four-digit years. +#' +#' @param x An integer vector of years. +#' @param cutoff_2000 An integer value. Two-digit years smaller or equal to +#' `cutoff_2000` are parsed as though starting with `20`, otherwise parsed as +#' though starting with `19`. +#' +#' @returns An integer vector. +#' +#' @examples +#' sdtm.oak:::yy_to_yyyy(0:5) +#' sdtm.oak:::yy_to_yyyy(2000:2005) +#' +#' sdtm.oak:::yy_to_yyyy(90:99) +#' sdtm.oak:::yy_to_yyyy(1990:1999) +#' +#' # NB: change in behavior after 68 +#' sdtm.oak:::yy_to_yyyy(65:72) +#' +#' sdtm.oak:::yy_to_yyyy(1965:1972) +#' +#' @keywords internal +yy_to_yyyy <- function(x, cutoff_2000 = 68L) { + + # Check `x` + if (!rlang::is_integerish(x)) rlang::abort("`x` must be integerish.") + + if (any(x < 0, na.rm = TRUE)) + rlang::abort("`x` cannot have negative years.") + + x <- dplyr::if_else(x <= cutoff_2000, x + 2000L, x) + x <- dplyr::if_else(x <= 99, x + 1900L, x) + x +} + +#' Format as a ISO8601 two-digit number +#' +#' [iso8601_two_digits()] converts a single digit or two digit number into a +#' two digit, 0-padded, number. Failing to parse the input as a two digit number +#' results in `NA`. +#' +#' @param x A character vector. +#' +#' @returns A character vector of the same size as `x`. +#' +#' @examples +#' x <- c("0", "00", "1", "01", "42", "100", NA_character_, "1.") +#' sdtm.oak:::iso8601_two_digits(x) +#' +#' @keywords internal +iso8601_two_digits <- function(x) { + admiraldev::assert_character_vector(x) + x_int <- as.integer(stringr::str_match(x, "^\\d?\\d$")) + zero_pad_whole_number(x_int, n = 2) +} + +iso8601_mday <- iso8601_two_digits +iso8601_hour <- iso8601_two_digits +iso8601_min <- iso8601_two_digits + +#' Format as a ISO8601 four-digit year +#' +#' [iso8601_year()] converts a character vector whose values represent years to +#' four-digit years. +#' +#' @param x A character vector. +#' @param cutoff_2000 A non-negative integer value. Two-digit years smaller or +#' equal to `cutoff_2000` are parsed as though starting with `20`, otherwise +#' parsed as though starting with `19`. +#' +#' @returns A character vector. +#' +#' @examples +#' sdtm.oak:::iso8601_year(c("0", "1", "2", "50", "68", "69", "90", "99", "00")) +#' +#' # Be default, `cutoff_2000` is at 68. +#' sdtm.oak:::iso8601_year(c("67", "68", "69", "70")) +#' sdtm.oak:::iso8601_year(c("1967", "1968", "1969", "1970")) +#' +#' # Change it to something else, e.g. `cutoff_2000 = 25`. +#' sdtm.oak:::iso8601_year(as.character(0:50), cutoff_2000 = 25) +#' sdtm.oak:::iso8601_year(as.character(1900:1950), cutoff_2000 = 25) +#' +#' @keywords internal +iso8601_year <- function(x, cutoff_2000 = 68L) { + admiraldev::assert_character_vector(x) + admiraldev::assert_integer_scalar(cutoff_2000, subset = "non-negative") + x_int <- as.integer(stringr::str_match(x, "^\\d{1,4}$")) + x_int <- yy_to_yyyy(x_int, cutoff_2000 = cutoff_2000) + zero_pad_whole_number(x_int, n = 4) +} + +#' Format as a ISO8601 month +#' +#' [iso8601_mon()] converts a character vector whose values represent numeric +#' or abbreviated month names to zero-padded numeric months. +#' +#' @param x A character vector. +#' +#' @returns A character vector. +#' +#' @examples +#' sdtm.oak:::iso8601_mon(c(NA, "0", "1", "2", "10", "11", "12")) +#' +#' # No semantic validation is performed on the numeric months, so `"13"` stays +#' # `"13"` but representations that can't be represented as two-digit numbers +#' # become `NA`. +#' sdtm.oak:::iso8601_mon(c("13", "99", "100", "-1")) +#' +#' (mon <- month.abb) +#' sdtm.oak:::iso8601_mon(mon) +#' +#' @keywords internal +iso8601_mon <- function(x) { + x <- tolower(x) + num_mon <- mon_abb_to_mon_num[x] + num_mon_chr <- num_mon + num_mon_chr[is.na(num_mon)] <- iso8601_two_digits(x[is.na(num_mon)]) + mon_int <- as.integer(num_mon_chr) + zero_pad_whole_number(mon_int, n = 2) +} + +#' Format as ISO8601 seconds +#' +#' [iso8601_sec()] converts a character vector whose values represent seconds. +#' +#' @param x A character vector. +#' +#' @returns A character vector. +#' +#' @examples +#' sdtm.oak:::iso8601_sec(c(NA, "0", "1", "10", "59", "99", "100")) +#' +#' @keywords internal +iso8601_sec <- function(x) { + x_iso8601 <- stringr::str_extract(x, "^\\d?\\d(\\.\\d*)?$") + x_iso8601 <- stringr::str_replace(x_iso8601, "^\\d(\\.\\d*)?$", "0\\0") + x_iso8601 <- stringr::str_replace(x_iso8601, "(\\.[^0]*)(0*)$", "\\1") + x_iso8601 <- stringr::str_remove(x_iso8601, "\\.$") + x_iso8601[is.na(x_iso8601)] <- NA_character_ + x_iso8601 +} + +#' Truncate a partial ISO8601 date-time +#' +#' [iso8601_truncate()] converts a character vector of ISO8601 dates, times or +#' date-times that might be partial and truncates the format by removing those +#' missing components. +#' +#' @param x A character vector. +#' +#' @returns A character vector. +#' +#' @examples +#' x <- +#' c( +#' "1999-01-01T15:20:01", +#' "1999-01-01T15:20:-", +#' "1999-01-01T15:-:-", +#' "1999-01-01T-:-:-", +#' "1999-01--T-:-:-", +#' "1999----T-:-:-", +#' "-----T-:-:-" +#' ) +#' +#' sdtm.oak:::iso8601_truncate(x) +#' +#' # With `empty_as_na = FALSE` empty strings are not replaced with `NA` +#' sdtm.oak:::iso8601_truncate("-----T-:-:-", empty_as_na = TRUE) +#' sdtm.oak:::iso8601_truncate("-----T-:-:-", empty_as_na = FALSE) +#' +#' # Truncation only happens if missing components are the right most end, +#' # otherwise they remain unaltered. +#' sdtm.oak:::iso8601_truncate( +#' c( +#' "1999----T15:20:01", +#' "1999-01-01T-:20:01", +#' "1999-01-01T-:-:01", +#' "1999-01-01T-:-:-" +#' ) +#' ) +#' +#' @keywords internal +iso8601_truncate <- function(x, empty_as_na = TRUE) { + x <- stringr::str_remove(x, "[^\\d]*$") + if (empty_as_na) x[x == ""] <- NA_character_ + x +} + +#' Convert date/time components into ISO8601 format +#' +#' [format_iso8601()] takes a character matrix of date/time components and +#' converts each component to ISO8601 format. In practice this entails +#' converting years to a four digit number, and month, day, hours, minutes and +#' seconds to two-digit numbers. Not available (`NA`) components are converted +#' to `"-"`. +#' +#' @param m A character matrix of date/time components. It must have six +#' named columns: `year`, `mon`, `mday`, `hour`, `min` and `sec`. +#' @param .cutoff_2000 An integer value. Two-digit years smaller or equal to +#' `.cutoff_2000` are parsed as though starting with `20`, otherwise parsed as +#' though starting with `19`. +#' +#' @returns A character vector with date-times following the ISO8601 format. +#' +#' @examples +#' cols <- c("year", "mon", "mday", "hour", "min", "sec") +#' m <- matrix( +#' c( +#' "99", "00", "01", +#' "Jan", "feb", "03", +#' "1", "01", "31", +#' "00", "12", "23", +#' "00", "59", "10", +#' "42", "5.15", NA +#' ), +#' ncol = 6, +#' dimnames = list(c(), cols) +#' ) +#' +#' sdtm.oak:::format_iso8601(m) +#' +#' @keywords internal +format_iso8601 <- function(m, .cutoff_2000 = 68) { + + admiraldev::assert_integer_scalar(.cutoff_2000) + + m[, 'year'] <- iso8601_year(m[, 'year'], cutoff_2000 = .cutoff_2000) + m[, 'mon'] <- iso8601_mon(m[, 'mon']) + m[, 'mday'] <- iso8601_mday(m[, 'mday']) + m[, 'hour'] <- iso8601_hour(m[, 'hour']) + m[, 'min'] <- iso8601_min(m[, 'min']) + m[, 'sec'] <- iso8601_sec(m[, 'sec']) + + m <- iso8601_na(m) + + x <- + paste0(m[, 'year'], + "-", + m[, 'mon'], + "-", + m[, 'mday'], + "T", + m[, 'hour'], + ":", + m[, 'min'], + ":", + m[, 'sec']) + + iso8601_truncate(x) +} + +#' Convert date or time collected values to ISO 8601 +#' +#' [create_iso8601()] converts vectors of dates, times or date-times to +#' [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) format. +#' +#' @param ... Character vectors of dates, times or date-times' components. +#' @param .format Parsing format(s). Either a character vector or a list of +#' character vectors. If a character vector is passed then each element is +#' taken as parsing format for each vector passed in `...`. If a list is +#' provided, then each element must be a character vector of formats. The +#' first vector of formats is used for parsing the first vector passed in +#' `...`, and so on. +#' @param .na A character vector of string literals to be regarded as missing +#' values during parsing. +#' @param .cutoff_2000 An integer value. Two-digit years smaller or equal to +#' `.cutoff_2000` are parsed as though starting with `20`, otherwise parsed as +#' though starting with `19`. +#' @param .check_format Whether to check the formats passed in `.format`, +#' meaning to check against a selection of validated formats in +#' [dtc_formats][sdtm.oak::dtc_formats]; or to have a more permissible +#' interpretation of the formats. +#' +#' @examples +#' # Converting dates +#' create_iso8601(c("2020-01-01", "20200102"), .format = "y-m-d") +#' create_iso8601(c("2020-01-01", "20200102"), .format = "ymd") +#' create_iso8601(c("2020-01-01", "20200102"), .format = list(c("y-m-d", "ymd"))) +#' +#' # Two-digit years are supported +#' create_iso8601(c("20-01-01", "200101"), .format = list(c("y-m-d", "ymd"))) +#' +#' # `.cutoff_2000` sets the cutoff for two-digit to four-digit year conversion +#' # Default is at 68. +#' create_iso8601(c("67-01-01", "68-01-01", "69-01-01"), .format = "y-m-d") +#' +#' # Change it to 80. +#' create_iso8601(c("79-01-01", "80-01-01", "81-01-01"), .format = "y-m-d", .cutoff_2000 = 80) +#' +#' # Converting times +#' create_iso8601("15:10", .format = "HH:MM", .check_format = FALSE) +#' create_iso8601("2:10", .format = "HH:MM", .check_format = FALSE) +#' create_iso8601("2:1", .format = "HH:MM", .check_format = FALSE) +#' create_iso8601("02:01:56", .format = "HH:MM:SS", .check_format = FALSE) +#' create_iso8601("020156.5", .format = "HHMMSS", .check_format = FALSE) +#' +#' # Converting date-times +#' create_iso8601("12 NOV 202015:15", .format = "dd mmm yyyyHH:MM", .check_format = FALSE) +#' +#' # Indicate allowed missing values to make the parsing pass +#' create_iso8601("U DEC 201914:00", .format = "dd mmm yyyyHH:MM", .check_format = FALSE) +#' create_iso8601("U DEC 201914:00", .format = "dd mmm yyyyHH:MM", .check_format = FALSE, .na = "U") +#' +#' create_iso8601("NOV 2020", .format = "m y", .check_format = FALSE) +#' create_iso8601(c("MAR 2019", "MaR 2020", "mar 2021"), .format = "m y", .check_format = FALSE) +#' +#' create_iso8601("2019-04-041045-", .format = "yyyy-mm-ddHHMM-", .check_format = FALSE) +#' +#' create_iso8601("20200507null", .format = "ymd(HH:MM:SS)", .check_format = FALSE) +#' create_iso8601("20200507null", .format = "ymd((HH:MM:SS)|null)", .check_format = FALSE) +#' +#' # Fractional seconds +#' create_iso8601("2019-120602:20:13.1230001", .format = "y-mdH:M:S", .check_format = FALSE) +#' @export +create_iso8601 <- function(..., .format, .na = NULL, .cutoff_2000 = 68, .check_format = TRUE) { + + dots <- rlang::dots_list(...) + + if (rlang::is_empty(dots)) return(character()) + + # Check if all vectors in `dots` are of character type. + if (!identical(unique(sapply(dots, typeof)), "character")) + rlang::abort("All vectors in `...` must be of type character.") + + # Check if all vectors in `dots` are of the same length. + n <- unique(lengths(dots)) + if (!identical(length(n), 1L)) + rlang::abort("All vectors in `...` must be of the same length.") + + if (!identical(length(dots), length(.format))) + rlang::abort("Number of vectors in `...` should match length of `.format`.") + + # Check that the `.format` is either a character vector or a list of + # character vectors, and that each string is one of the possible formats. + if (.check_format) assert_dtc_format(.format) + + cap_matrices <- purrr::map2(dots, .format, ~ parse_dttm(dttm = .x, fmt = .y, na = .na)) + cap_matrix <- coalesce_capture_matrices(!!!cap_matrices) + + format_iso8601(cap_matrix, .cutoff_2000 = .cutoff_2000) +} diff --git a/R/dtc_formats.R b/R/dtc_formats.R new file mode 100644 index 00000000..af3b0a6a --- /dev/null +++ b/R/dtc_formats.R @@ -0,0 +1,14 @@ +#' Date/time collection formats +#' +#' @format A [tibble][tibble::tibble-package] of `r nrow(dtc_formats)` formats +#' with three variables: +#' \describe{ +#' \item{`fmt`}{Format string.} +#' \item{`type`}{Whether a date, time or date-time.} +#' \item{`description`}{Description of which date-time components are parsed.} +#' } +#' +#' @examples +#' dtc_formats +#' +"dtc_formats" diff --git a/R/dtc_parse_dttm.R b/R/dtc_parse_dttm.R new file mode 100644 index 00000000..24d95fa4 --- /dev/null +++ b/R/dtc_parse_dttm.R @@ -0,0 +1,118 @@ +#' @rdname parse_dttm +#' @order 2 +parse_dttm_ <- function(dttm, + fmt, + na = NULL, + sec_na = na, + min_na = na, + hour_na = na, + mday_na = na, + mon_na = na, + year_na = na) { + + admiraldev::assert_character_scalar(fmt) + + tbl_fmt_c <- parse_dttm_fmt(fmt) + regex <- + dttm_fmt_to_regex( + tbl_fmt_c, + fmt_regex = fmt_rg( + na = na, + sec_na = sec_na, + min_na = min_na, + hour_na = hour_na, + mday_na = mday_na, + mon_na = mon_na, + year_na = year_na + ) + ) + + m <- stringr::str_match(dttm, regex) + + # Drop matching subgroups (those are unnamed) + m <- m[, colnames(m) != "", drop = FALSE] + + complete_capture_matrix(m) +} + +#' Parse a date, time, or date-time +#' +#' [parse_dttm()] extracts date and time components. [parse_dttm()] wraps around +#' [parse_dttm_()], which is not vectorized over `fmt`. +#' +#' @param dttm A character vector of dates, times or date-times. +#' @param fmt In the case of `parse_dttm()`, a character vector of parsing +#' formats, or a single string format in the case of `parse_dttm_()`. When a +#' character vector of formats is passed, each format is attempted in turn +#' with the first parsing result to be successful taking precedence in the +#' final result. The formats in `fmt` can be any strings, however the +#' following characters (or successive repetitions thereof) are reserved in +#' the sense that they are treated in a special way: +#' - `"y"`: parsed as year; +#' - `"m"`: parsed as month; +#' - `"d"`: parsed as day; +#' - `"H"`: parsed as hour; +#' - `"M"`: parsed as minute; +#' - `"S"`: parsed as second. +#' +#' @param na,sec_na,min_na,hour_na,mday_na,mon_na,year_na A character vector of +#' alternative values to allow during matching. This can be used to indicate +#' different forms of missing values to be found during the parsing date-time +#' strings. +#' +#' @returns A character matrix of six columns: `"year"`, `"mon"`, `"mday"`, +#' `"hour"`, `"min"` and `"sec"`. Each row corresponds to an element in +#' `dttm`. Each element of the matrix is the parsed date/time component. +#' +#' @examples +#' sdtm.oak:::parse_dttm("2020", "y") +#' sdtm.oak:::parse_dttm("2020-05", "y") +#' +#' sdtm.oak:::parse_dttm("2020-05", "y-m") +#' sdtm.oak:::parse_dttm("2020-05-11", "y-m-d") +#' +#' sdtm.oak:::parse_dttm("2020 05 11", "y m d") +#' sdtm.oak:::parse_dttm("2020 05 11", "y m d") +#' sdtm.oak:::parse_dttm("2020 05 11", "y\\s+m\\s+d") +#' sdtm.oak:::parse_dttm("2020 05 11", "y\\s+m\\s+d") +#' +#' sdtm.oak:::parse_dttm("2020-05-11 11:45", "y-m-d H:M") +#' sdtm.oak:::parse_dttm("2020-05-11 11:45:15.6", "y-m-d H:M:S") +#' +#' sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), "y-m-d H:M") +#' sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), "-m-d H:M") +#' sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), c("y-m-d H:M", "-m-d H:M")) +#' +#' sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d") +#' sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d", na = "UN") +#' sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d", na = c("UN", "UNK")) +#' +#' @keywords internal +parse_dttm <- function(dttm, + fmt, + na = NULL, + sec_na = na, + min_na = na, + hour_na = na, + mday_na = na, + mon_na = na, + year_na = na) { + lst <- + purrr::map( + fmt, + ~ parse_dttm_( + dttm = dttm, + fmt = .x, + na = na, + sec_na = sec_na, + min_na = min_na, + hour_na = hour_na, + mday_na = mday_na, + mon_na = mon_na, + year_na = year_na + ) + ) + + coalesce_capture_matrices(!!!lst) + +} diff --git a/R/dtc_utils.R b/R/dtc_utils.R new file mode 100644 index 00000000..d654c39f --- /dev/null +++ b/R/dtc_utils.R @@ -0,0 +1,163 @@ +#' Is it one of the supported formats? +#' +#' [is_dtc_fmt()] takes a character vector of date/time formats and returns +#' a logical indicating which are supported. +#' +#' @details +#' This function works by checking if the values of `x` are one of the formats +#' listed in column `fmt` of [dtc_formats]. +#' +#' @param fmt A character vector. +#' +#' @returns A [logical][base::logical] vector of the same size of `fmt`. +#' +#' @examples +#' sdtm.oak:::is_dtc_fmt(c("ymd", "y m d", "dmy", "HM", "H:M:S", "y-m-d H:M:S")) +#' +#' sdtm.oak:::is_dtc_fmt("y years m months d days") +#' +#' @keywords internal +is_dtc_fmt <- function(fmt) { + admiraldev::assert_character_vector(fmt) + fmt %in% sdtm.oak::dtc_formats$fmt +} + +#' Assert date time character formats +#' +#' [assert_dtc_fmt()] takes a character vector of date/time formats and check if +#' the formats are supported, meaning it check if they are one of the formats +#' listed in column `fmt` of [dtc_formats], failing with an error otherwise. +#' +#' @param fmt A character vector. +#' +#' @examples +#' sdtm.oak:::assert_dtc_fmt(c("ymd", "y m d", "dmy", "HM", "H:M:S", "y-m-d H:M:S")) +#' +#' # This example is guarded to avoid throwing errors +#' if (FALSE) { +#' sdtm.oak:::assert_dtc_fmt("y years m months d days") +#' } +#' +#' @keywords internal +assert_dtc_fmt <- function(fmt) { + admiraldev::assert_character_vector(fmt) + rlang::arg_match(fmt, + values = sdtm.oak::dtc_formats$fmt, + multiple = TRUE) +} + +#' Assert dtc format +#' +#' [assert_dtc_format()] is an internal helper function aiding with the checking +#' of the `.format` parameter of [create_iso8601()]. +#' +#' @param .format The argument of [create_iso8601()]'s `.format` parameter. +#' +#' @returns This function throws an error if `.format` is not either: +#' - A character vector of formats permitted by [assert_dtc_fmt()]; +#' - A list of character vectors of formats permitted by [assert_dtc_fmt()]. +#' +#' Otherwise, it returns `.format` invisibly. +#' +#' @examples +#' sdtm.oak:::assert_dtc_format("ymd") +#' sdtm.oak:::assert_dtc_format(c("ymd", "y-m-d")) +#' sdtm.oak:::assert_dtc_format(list(c("ymd", "y-m-d"), "H:M:S")) +#' +#' # These commands should throw an error +#' if (FALSE) { +#' # Note that `"year, month, day"` is not a supported format. +#' sdtm.oak:::assert_dtc_format("year, month, day") +#' } +#' +#' @keywords internal +assert_dtc_format <- function(.format) { + + abort_msg <- "`.format` must be either a character vector of formats of a list thereof." + + switch ( + typeof(.format), + character = assert_dtc_fmt(.format) , + list = purrr::map(.format, assert_dtc_format), + rlang::abort(abort_msg) + ) + + invisible(.format) +} + +#' Assert capture matrix +#' +#' @description +#' +#' [assert_capture_matrix()] is an internal helper function aiding with the +#' checking of an internal R object that contains the parsing results as +#' returned by [parse_dttm()]: capture matrix. +#' +#' This function checks that the capture matrix is a matrix and that it contains +#' six columns: `year`, `mon`, `mday`, `hour`, `min` and `sec`. +#' +#' @param .format The argument of [create_iso8601()]'s `.format` parameter. +#' +#' @returns This function throws an error if `m` is not either: +#' - A character matrix; +#' - A matrix whose columns are (at least): `year`, `mon`, `mday`, `hour`, +#' `min` and `sec`. +#' +#' Otherwise, it returns `.format` invisibly. +#' +#' @examples +#' sdtm.oak:::assert_dtc_format("ymd") +#' sdtm.oak:::assert_dtc_format(c("ymd", "y-m-d")) +#' sdtm.oak:::assert_dtc_format(list(c("ymd", "y-m-d"), "H:M:S")) +#' +#' # These commands should throw an error +#' if (FALSE) { +#' # Note that `"year, month, day"` is not a supported format. +#' sdtm.oak:::assert_dtc_format("year, month, day") +#' } +#' +#' @keywords internal +assert_capture_matrix <- function(m) { + + # `m` must be of character type. + admiraldev::assert_character_vector(m) + + if (!is.matrix(m)) + rlang::abort("`m` must be a matrix.") + + col_names <- c("year", "mon", "mday", "hour", "min", "sec") + if (!all(colnames(m) %in% col_names)) + rlang::abort("`m` must have the following colnames: `year`, `mon`, `mday`, `hour`, `min` and `sec`.") + + invisible(m) +} + + +complete_capture_matrix <- + function(m) { + col_names <- c("year", "mon", "mday", "hour", "min", "sec") + + if (setequal(col_names, colnames(m))) + return(m) + + miss_cols <- setdiff(col_names, colnames(m)) + miss_n_cols <- length(miss_cols) + + m2 <- matrix(nrow = nrow(m), ncol = miss_n_cols) + colnames(m2) <- miss_cols + + cbind(m, m2)[, col_names] + + } + +coalesce_capture_matrices <- function(...) { + + dots <- rlang::list2(...) + # `as.vector` needed because of: https://github.com/tidyverse/dplyr/issues/6954 + vecs <- purrr::map(dots, as.vector) + vec <- dplyr::coalesce(!!!vecs) + m <- matrix(vec, ncol = 6L) + colnames(m) <- c("year", "mon", "mday", "hour", "min", "sec") + + m +} diff --git a/R/parse_dttm.R b/R/parse_dttm.R new file mode 100644 index 00000000..a4f5e8ca --- /dev/null +++ b/R/parse_dttm.R @@ -0,0 +1,141 @@ +# new_dttm_tbl <- function(sec = character(), +# min = character(), +# hour = character(), +# mday = character(), +# mon = character(), +# year = character()) { +# +# tibble::tibble( +# year = year, +# mon = mon, +# mday = mday, +# hour = hour, +# min = min, +# sec = sec +# ) +# } + +# parse_dttm <- function(dttm, +# fmt, +# na = NULL, +# sec_na = na, +# min_na = na, +# hour_na = na, +# mday_na = na, +# mon_na = na, +# year_na = na) { +# +# +# tbl_fmt_c <- parse_dttm_fmt(fmt) +# regex <- +# dttm_fmt_to_regex( +# tbl_fmt_c, +# fmt_regex = fmt_rg( +# na = na, +# sec_na = sec_na, +# min_na = min_na, +# hour_na = hour_na, +# mday_na = mday_na, +# mon_na = mon_na, +# year_na = year_na +# ) +# ) +# +# m <- stringr::str_match(dttm, regex) +# colnames(m)[1] <- "dttm" +# # Remove unnamed capture groups (these are subgroups within the dttm components) +# m <- m[, colnames(m) != "", drop = FALSE] +# dplyr::bind_rows(new_dttm_tbl(), tibble::as_tibble(m)) +# } + +# convert NA in character x to "-" +# iso8601_na <- function(x) { +# x[is.na(x)] <- "-" +# x +# } + + + +# iso8601_mday <- function(x) { +# iso8601_two_digits(x) |> iso8601_na() +# } +# +# iso8601_hour <- function(x) { +# iso8601_two_digits(x) |> iso8601_na() +# } +# +# iso8601_min <- function(x) { +# iso8601_two_digits(x) |> iso8601_na() +# } + +# iso8601_sec <- function(x) { +# x_iso8601 <- stringr::str_extract(x, "^\\d?\\d(\\.\\d*)?$") +# #x_iso8601 <- sprintf("%02f", x_dbl) +# x_iso8601 <- stringr::str_replace(x_iso8601, "^\\d(\\.\\d*)?$", "0\\0") +# x_iso8601 <- stringr::str_replace(x_iso8601, "(\\.[^0]*)(0*)$", "\\1") +# x_iso8601 <- stringr::str_remove(x_iso8601, "\\.$") +# x_iso8601[is.na(x_iso8601)] <- NA_character_ +# x_iso8601 |> iso8601_na() +# } + +# Month abbreviation (en) to numeric month mapping +# mon_abb_to_mon_num <- setNames(sprintf("%02d", seq_along(month.abb)), tolower(month.abb)) + +# iso8601_mon <- function(x) { +# +# x <- tolower(x) +# # Translate month abbreviations to numeric months +# num_mon <- mon_abb_to_mon_num[x] +# num_mon_chr <- num_mon +# num_mon_chr[is.na(num_mon)] <- iso8601_two_digits(x[is.na(num_mon)]) +# +# mon_int <- as.integer(num_mon_chr) +# x_iso8601 <- sprintf("%02d", mon_int) +# x_iso8601[is.na(mon_int)] <- NA_character_ +# iso8601_na(x_iso8601) +# } + +# iso8601_year <- function(x, cutoff_2000 = 68L) { +# x_int <- as.integer(stringr::str_match(x, "^\\d{1,4}$")) +# x_int <- ifelse (x_int <= cutoff_2000, x_int + 2000L, x_int) +# x_int <- ifelse (x_int <= 99, x_int + 1900L, x_int) +# +# +# x_iso8601 <- sprintf("%04d", x_int) +# x_iso8601[is.na(x_int)] <- NA_character_ +# iso8601_na(x_iso8601) +# } + +# iso8601_truncate <- function(x, empty_as_na = TRUE) { +# +# x <- stringr::str_remove(x, "[^\\d]*$") +# +# if (empty_as_na) x[x == ""] <- NA_character_ +# +# x +# } + +#' #' @importFrom rlang .data +#' #' @export +#' format_iso8601 <- function(dttm, fmt, na = NULL) { +#' +#' tbl <- parse_dttm(dttm, fmt, na = na) +#' tbl |> +#' dplyr::mutate( +#' year = iso8601_year(.data$year), +#' mon = iso8601_mon(.data$mon), +#' mday = iso8601_mday(.data$mday), +#' hour = iso8601_hour(.data$hour), +#' min = iso8601_min(.data$min), +#' sec = iso8601_sec(.data$sec), +#' iso8601 = dplyr::if_else( +#' !is.na(dttm), +#' iso8601_truncate( +#' stringr::str_glue("{.data$year}-{.data$mon}-{.data$mday}T{.data$hour}:{.data$min}:{.data$sec}") +#' ), +#' NA_character_ +#' ) +#' ) |> +#' dplyr::relocate(.data$iso8601, .data$dttm, .before = 1L) +#' +#' } diff --git a/R/parse_dttm_fields.R b/R/parse_dttm_fields.R new file mode 100644 index 00000000..082e6fb0 --- /dev/null +++ b/R/parse_dttm_fields.R @@ -0,0 +1,125 @@ +# # pseq <- +# # function(from = 1, +# # to = 1) { +# # mapply( +# # `:`, +# # from = from, +# # to = to, +# # SIMPLIFY = FALSE +# # ) |> +# # unlist() +# # } +# +# find_gaps <- function(x, x_min, x_max) { +# y <- setdiff(seq(x_min, x_max), x) +# streaks <- split(y, cumsum(c(TRUE, diff(y) != 1))) +# +# lapply(X = streaks, \(x) c(start = min(x), end = max(x))) +# } +# +# fmt_tokens <- +# list( +# sec = "S+", +# min = "M+", +# hour = "H+", +# mday = "d+", +# mon = "m+", +# year = "y+" +# ) +# +# fmt_regex <- +# list( +# sec = "(\\d{2})", +# min = "(\\d{2})", +# hour = "(\\d{2})", +# mday = "(\\d{2})", +# mon = "(\\d{2}|Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)", +# year = "(\\d{4})" +# ) +# +# +# as_order <- function(fmt) { +# gsub("[^[:alpha:]]+", "", fmt) +# } +# +# fmt_token_tally <- function(fmt, tokens = fmt_tokens) { +# sapply(fmt_tokens, \(x) stringr::str_count(fmt, x)) +# } +# +# is_dt_fmt <- function(fmt) { +# +# tally <- fmt_token_tally(fmt) +# all(tally < 2) && sum(tally) > 0 +# } +# +# reg_matches <- function(x, m, invert = FALSE) { +# match <- regmatches(x, m, invert = invert) +# ifelse(length(match), match, NA_character_) +# } +# +# # parse_dt_fmt <- function(fmt, tokens = fmt_tokens) { +# # if (!is_dt_fmt(fmt)) { +# # stop("Not a valid format in `fmt`.", call. = TRUE) +# # } +# # sapply(fmt_tokens, \(x) stringr::str_match(fmt, x)) +# # } +# # +# # parse_dt_fmt2 <- function(fmt, tokens = fmt_tokens) { +# # if (!is_dt_fmt(fmt)) { +# # stop("Not a valid format in `fmt`.", call. = TRUE) +# # } +# # sapply(fmt_tokens, \(x) grep(x, fmt)) +# # } +# +# extract_tidy_match_ <- function(x, pattern) { +# +# match_data <- regexpr(pattern, x) +# match <- reg_matches(x, match_data) +# +# is_match <- !is.na(match) +# +# start <- ifelse(is_match, match_data, NA_integer_) +# len <- ifelse(is_match, attr(match_data, "match.length"), NA_integer_) +# end <- start + len - 1L +# data.frame(pat = pattern, cap = match, start = start, end = end, len = len) +# } +# +# extract_tidy_match <- function(x, patterns, sort = TRUE) { +# +# df <- +# lapply(patterns, \(str) extract_tidy_match_(x, str)) |> +# do.call(what = "rbind") +# +# df$ord <- rank(df$start) +# df$ord[is.na(df$start)] <- NA_integer_ +# +# df <- cbind(dttm_el = rownames(df), df) +# rownames(df) <- NULL +# +# if (sort) { +# df[order(df$ord),] +# } else { +# df +# } +# } +# +# +# +# +# parse_dttm_fields <- function(x, +# fmt, +# na = +# list( +# year = "UNKN", +# month = "UNK", +# day = "UN", +# hour = "", +# min = "", +# sec = "" +# )) { +# +# +# +# +# +# } diff --git a/R/parse_dttm_fmt.R b/R/parse_dttm_fmt.R new file mode 100644 index 00000000..a1bf821b --- /dev/null +++ b/R/parse_dttm_fmt.R @@ -0,0 +1,257 @@ +#' Find gap intervals in integer sequences +#' +#' [find_int_gap()] determines the `start` and `end` positions for gap intervals +#' in a sequence of integers. By default, the interval range to look for gaps is +#' defined by the minimum and maximum values of `x`; specify `xmin` and `xmax` +#' to change the range explicitly. +#' +#' @param x An integer vector. +#' @param xmin Left endpoint integer value. +#' @param xmax Right endpoint integer value. +#' +#' @returns A [tibble][tibble::tibble-package] of gap intervals of two columns: +#' - `start`: left endpoint +#' - `end`: right endpoint +#' If no gap intervals are found then an empty [tibble][tibble::tibble-package] +#' is returned. +#' +#' @keywords internal +find_int_gap <- function(x, xmin = min(x), xmax = max(x)) { + + if (!rlang::is_integerish(x)) + rlang::abort("`x` must be integer-ish") + + if (rlang::is_empty(x)) + return(tibble::tibble(start = integer(), end = integer())) + + admiraldev::assert_integer_scalar(xmin) + admiraldev::assert_integer_scalar(xmax) + + x <- sort(unique(x)) + x <- c(xmin - 1, x, xmax + 1) + gaps <- which(diff(x) > 1) + start <- x[gaps] + 1 + end <- x[gaps + 1] - 1 + tibble::tibble(start = x[gaps] + 1, end = x[gaps + 1] - 1) +} + +#' `regmatches()` with `NA` +#' +#' [reg_matches()] is a thin wrapper around [regmatches()] that returns +#' `NA` instead of `character(0)` when matching fails. +#' +#' @param x A character vector. +#' @param m An object with match data. +#' @param invert A logical scalar. If `TRUE`, extract or replace the non-matched +#' substrings. +#' +#' @returns A list of character vectors with the matched substrings, or `NA` if +#' matching failed. +#' +#' @keywords internal +reg_matches <- function(x, m, invert = FALSE) { + match <- regmatches(x, m, invert = invert) + match[!lengths(match)] <- NA_character_ + match +} + +#' Parallel sequence generation +#' +#' [pseq()] is similar to [seq()] but conveniently accepts integer vectors as +#' inputs to `from` and `to`, allowing for parallel generation of sequences. +#' The result is the union of the generated sequences. +#' +#' @param from An integer vector. The starting value(s) of the sequence(s). +#' @param to An integer vector. The ending value(s) of the sequence(s). +#' +#' @returns An integer vector. +#' +#' @keywords internal +pseq <- + function(from, + to) { + mapply( + `:`, + from = from, + to = to, + SIMPLIFY = FALSE + ) |> + unlist() + } + +#' Generate case insensitive regexps +#' +#' [str_to_anycase()] takes a character vector of word strings as input, and +#' generates regular expressions that express that match in any case. +#' +#' @param x A character vector of strings consisting of word characters. +#' +#' @returns A character vector. +#' +#' @keywords internal +str_to_anycase <- function(x) { + + lst <- stringr::str_split(x, "") + purrr::map(lst, ~ stringr::str_c(stringr::str_to_upper(.x), stringr::str_to_lower(.x))) |> + purrr::map(~ sprintf("[%s]", .x)) |> + purrr::map(~ stringr::str_flatten(.x)) |> + unlist() +} + +#' Regex for months' abbreviations +#' +#' [months_abb_regex()] generates a regex that matches month abbreviations. For +#' finer control, the case can be specified with parameter `case`. +#' +#' @param x A character vector of three-letter month abbreviations. Default is +#' `month.abb`. +#' @param case A string scalar: `"any"`, if month abbreviations are to be +#' matched in any case; `"upper"`, to match uppercase abbreviations; +#' `"lower"`, to match lowercase; and, `"title"` to match title case. +#' +#' @returns A regex as a string. +#' +#' @keywords internal +months_abb_regex <- function(x = month.abb, case = c("any", "upper", "lower", "title")) { + + admiraldev::assert_character_vector(x) + case <- match.arg(case) + + if (identical(case, "any")) x <- str_to_anycase(x) + if (identical(case, "upper")) x <- stringr::str_to_upper(x) + if (identical(case, "lower")) x <- stringr::str_to_lower(x) + if (identical(case, "title")) x <- stringr::str_to_title(x) + + stringr::str_flatten(x, collapse = "|") +} + + +# Date time components. This is a nice +# utility function that allows you to easily +# change the regexp for one specific dttm component +# while keeping the other defaults. +fmt_c <- function(sec = "S+", + min = "M+", + hour = "H+", + mday = "d+", + mon = "m+", + year = "y+") { + c( + sec = sec, + min = min, + hour = hour, + mday = mday, + mon = mon, + year = year + ) + +} + +regex_or <- function(x, .open = FALSE, .close = FALSE) { + + admiraldev::assert_character_vector(x) + admiraldev::assert_logical_scalar(.open) + admiraldev::assert_logical_scalar(.close) + + if (.open) x <- c("", x) + if (.close) x <- c(x, "") + + stringr::str_flatten(x, collapse = "|") +} + +fmt_rg <- + function(sec = "(\\b\\d|\\d{2})(\\.\\d*)?", + min = "(\\b\\d|\\d{2})", + hour = "\\d?\\d", + mday = "\\b\\d|\\d{2}", + mon = stringr::str_glue("\\d\\d|{months_abb_regex()}"), + year = "(\\d{2})?\\d{2}", + na = NULL, + sec_na = na, + min_na = na, + hour_na = na, + mday_na = na, + mon_na = na, + year_na = na + ) { + + sec_na <- ifelse(!is.null(sec_na), regex_or(sec_na, .open = TRUE), "") + min_na <- ifelse(!is.null(min_na), regex_or(min_na, .open = TRUE), "") + hour_na <- ifelse(!is.null(hour_na), regex_or(hour_na, .open = TRUE), "") + mday_na <- ifelse(!is.null(mday_na), regex_or(mday_na, .open = TRUE), "") + mon_na <- ifelse(!is.null(mon_na), regex_or(mon_na, .open = TRUE), "") + year_na <- ifelse(!is.null(year_na), regex_or(year_na, .open = TRUE), "") + + c( + sec = stringr::str_glue("(?{sec}{sec_na})"), + min = stringr::str_glue("(?{min}{min_na})"), + hour = stringr::str_glue("(?{hour}{hour_na})"), + mday = stringr::str_glue("(?{mday}{mday_na})"), + mon = stringr::str_glue("(?{mon}{mon_na})"), + year = stringr::str_glue("(?{year}{year_na})") + ) + } + +# Scalar version of `parse_dttm_fmt()`. +parse_dttm_fmt_ <- function(x, pattern) { + + match_data <- regexpr(pattern, x) + match <- reg_matches(x, match_data) + + is_match <- !is.na(match) + + start <- ifelse(is_match, match_data, NA_integer_) + len <- ifelse(is_match, attr(match_data, "match.length"), NA_integer_) + end <- start + len - 1L + tibble::tibble(pat = pattern, cap = match, start = start, end = end, len = len) +} + +#' @importFrom rlang .data +parse_dttm_fmt <- function(fmt, patterns = fmt_c()) { + + fmt_dttmc <- + purrr::map(patterns, ~ parse_dttm_fmt_(fmt, .x)) |> + purrr::list_rbind(names_to = "fmt_c") + + # Get captures' ranks while leaving NA as NA (`rank()` won't do this.) + fmt_dttmc$ord <- dplyr::row_number(fmt_dttmc$start) + + fmt_len <- nchar(fmt) + fmt_pos <- seq_len(fmt_len) + + start <- end <- NULL # To avoid a "no visible binding for global variable" NOTE. + dttmc_pos <- with(fmt_dttmc, pseq(from = start[!is.na(start)], to = end[!is.na(end)])) + # `delim_pos`: delimiter positions, i.e. positions in `fmt` in-between dttm components. + delim_pos <- find_int_gap(dttmc_pos, xmin = 1, xmax = fmt_len) + + delim <- with(delim_pos, stringr::str_sub(fmt, start = start, end = end)) + fmt_delim <- + tibble::tibble( + fmt_c = NA_character_, + pat = NA_character_, + cap = delim, + start = delim_pos$start, + end = delim_pos$end, + len = end - start + 1L + ) + + dplyr::bind_rows(fmt_dttmc, fmt_delim) |> + dplyr::arrange(.data$start) + +} + +#' @importFrom rlang .data +dttm_fmt_to_regex <- function(tbl_fmt_c, fmt_regex = fmt_rg(), anchored = TRUE) { + fmt_regex <- + tbl_fmt_c |> + dplyr::mutate(regex = dplyr::if_else(is.na(.data$fmt_c), .data$cap, fmt_regex[.data$fmt_c])) |> + dplyr::mutate(regex = dplyr::if_else(is.na(.data$cap), NA_character_, .data$regex)) |> + dplyr::pull(.data$regex) + + fmt_regex <- stringr::str_flatten(fmt_regex, na.rm = TRUE) + if (anchored) fmt_regex <- stringr::str_glue("^{fmt_regex}$") + + fmt_regex +} + + diff --git a/man/assert_capture_matrix.Rd b/man/assert_capture_matrix.Rd new file mode 100644 index 00000000..08ec0061 --- /dev/null +++ b/man/assert_capture_matrix.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_utils.R +\name{assert_capture_matrix} +\alias{assert_capture_matrix} +\title{Assert capture matrix} +\usage{ +assert_capture_matrix(m) +} +\arguments{ +\item{.format}{The argument of \code{\link[=create_iso8601]{create_iso8601()}}'s \code{.format} parameter.} +} +\value{ +This function throws an error if \code{m} is not either: +\itemize{ +\item A character matrix; +\item A matrix whose columns are (at least): \code{year}, \code{mon}, \code{mday}, \code{hour}, +\code{min} and \code{sec}. +} + +Otherwise, it returns \code{.format} invisibly. +} +\description{ +\code{\link[=assert_capture_matrix]{assert_capture_matrix()}} is an internal helper function aiding with the +checking of an internal R object that contains the parsing results as +returned by \code{\link[=parse_dttm]{parse_dttm()}}: capture matrix. + +This function checks that the capture matrix is a matrix and that it contains +six columns: \code{year}, \code{mon}, \code{mday}, \code{hour}, \code{min} and \code{sec}. +} +\examples{ +sdtm.oak:::assert_dtc_format("ymd") +sdtm.oak:::assert_dtc_format(c("ymd", "y-m-d")) +sdtm.oak:::assert_dtc_format(list(c("ymd", "y-m-d"), "H:M:S")) + +# These commands should throw an error +if (FALSE) { +# Note that `"year, month, day"` is not a supported format. + sdtm.oak:::assert_dtc_format("year, month, day") +} + +} +\keyword{internal} diff --git a/man/assert_dtc_fmt.Rd b/man/assert_dtc_fmt.Rd new file mode 100644 index 00000000..21e9f255 --- /dev/null +++ b/man/assert_dtc_fmt.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_utils.R +\name{assert_dtc_fmt} +\alias{assert_dtc_fmt} +\title{Assert date time character formats} +\usage{ +assert_dtc_fmt(fmt) +} +\arguments{ +\item{fmt}{A character vector.} +} +\description{ +\code{\link[=assert_dtc_fmt]{assert_dtc_fmt()}} takes a character vector of date/time formats and check if +the formats are supported, meaning it check if they are one of the formats +listed in column \code{fmt} of \link{dtc_formats}, failing with an error otherwise. +} +\examples{ +sdtm.oak:::assert_dtc_fmt(c("ymd", "y m d", "dmy", "HM", "H:M:S", "y-m-d H:M:S")) + +# This example is guarded to avoid throwing errors +if (FALSE) { + sdtm.oak:::assert_dtc_fmt("y years m months d days") +} + +} +\keyword{internal} diff --git a/man/assert_dtc_format.Rd b/man/assert_dtc_format.Rd new file mode 100644 index 00000000..4ba28bdb --- /dev/null +++ b/man/assert_dtc_format.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_utils.R +\name{assert_dtc_format} +\alias{assert_dtc_format} +\title{Assert dtc format} +\usage{ +assert_dtc_format(.format) +} +\arguments{ +\item{.format}{The argument of \code{\link[=create_iso8601]{create_iso8601()}}'s \code{.format} parameter.} +} +\value{ +This function throws an error if \code{.format} is not either: +\itemize{ +\item A character vector of formats permitted by \code{\link[=assert_dtc_fmt]{assert_dtc_fmt()}}; +\item A list of character vectors of formats permitted by \code{\link[=assert_dtc_fmt]{assert_dtc_fmt()}}. +} + +Otherwise, it returns \code{.format} invisibly. +} +\description{ +\code{\link[=assert_dtc_format]{assert_dtc_format()}} is an internal helper function aiding with the checking +of the \code{.format} parameter of \code{\link[=create_iso8601]{create_iso8601()}}. +} +\examples{ +sdtm.oak:::assert_dtc_format("ymd") +sdtm.oak:::assert_dtc_format(c("ymd", "y-m-d")) +sdtm.oak:::assert_dtc_format(list(c("ymd", "y-m-d"), "H:M:S")) + +# These commands should throw an error +if (FALSE) { +# Note that `"year, month, day"` is not a supported format. + sdtm.oak:::assert_dtc_format("year, month, day") +} + +} +\keyword{internal} diff --git a/man/create_iso8601.Rd b/man/create_iso8601.Rd new file mode 100644 index 00000000..15b170ec --- /dev/null +++ b/man/create_iso8601.Rd @@ -0,0 +1,81 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_create_iso8601.R +\name{create_iso8601} +\alias{create_iso8601} +\title{Convert date or time collected values to ISO 8601} +\usage{ +create_iso8601( + ..., + .format, + .na = NULL, + .cutoff_2000 = 68, + .check_format = TRUE +) +} +\arguments{ +\item{...}{Character vectors of dates, times or date-times' components.} + +\item{.format}{Parsing format(s). Either a character vector or a list of +character vectors. If a character vector is passed then each element is +taken as parsing format for each vector passed in \code{...}. If a list is +provided, then each element must be a character vector of formats. The +first vector of formats is used for parsing the first vector passed in +\code{...}, and so on.} + +\item{.na}{A character vector of string literals to be regarded as missing +values during parsing.} + +\item{.cutoff_2000}{An integer value. Two-digit years smaller or equal to +\code{.cutoff_2000} are parsed as though starting with \code{20}, otherwise parsed as +though starting with \code{19}.} + +\item{.check_format}{Whether to check the formats passed in \code{.format}, +meaning to check against a selection of validated formats in +\link[=dtc_formats]{dtc_formats}; or to have a more permissible +interpretation of the formats.} +} +\description{ +\code{\link[=create_iso8601]{create_iso8601()}} converts vectors of dates, times or date-times to +\href{https://en.wikipedia.org/wiki/ISO_8601}{ISO 8601} format. +} +\examples{ +# Converting dates +create_iso8601(c("2020-01-01", "20200102"), .format = "y-m-d") +create_iso8601(c("2020-01-01", "20200102"), .format = "ymd") +create_iso8601(c("2020-01-01", "20200102"), .format = list(c("y-m-d", "ymd"))) + +# Two-digit years are supported +create_iso8601(c("20-01-01", "200101"), .format = list(c("y-m-d", "ymd"))) + +# `.cutoff_2000` sets the cutoff for two-digit to four-digit year conversion +# Default is at 68. +create_iso8601(c("67-01-01", "68-01-01", "69-01-01"), .format = "y-m-d") + +# Change it to 80. +create_iso8601(c("79-01-01", "80-01-01", "81-01-01"), .format = "y-m-d", .cutoff_2000 = 80) + +# Converting times +create_iso8601("15:10", .format = "HH:MM", .check_format = FALSE) +create_iso8601("2:10", .format = "HH:MM", .check_format = FALSE) +create_iso8601("2:1", .format = "HH:MM", .check_format = FALSE) +create_iso8601("02:01:56", .format = "HH:MM:SS", .check_format = FALSE) +create_iso8601("020156.5", .format = "HHMMSS", .check_format = FALSE) + +# Converting date-times +create_iso8601("12 NOV 202015:15", .format = "dd mmm yyyyHH:MM", .check_format = FALSE) + +# Indicate allowed missing values to make the parsing pass +create_iso8601("U DEC 201914:00", .format = "dd mmm yyyyHH:MM", .check_format = FALSE) +create_iso8601("U DEC 201914:00", .format = "dd mmm yyyyHH:MM", .check_format = FALSE, .na = "U") + +create_iso8601("NOV 2020", .format = "m y", .check_format = FALSE) +create_iso8601(c("MAR 2019", "MaR 2020", "mar 2021"), .format = "m y", .check_format = FALSE) + +create_iso8601("2019-04-041045-", .format = "yyyy-mm-ddHHMM-", .check_format = FALSE) + +create_iso8601("20200507null", .format = "ymd(HH:MM:SS)", .check_format = FALSE) +create_iso8601("20200507null", .format = "ymd((HH:MM:SS)|null)", .check_format = FALSE) + +# Fractional seconds +create_iso8601("2019-120602:20:13.1230001", .format = "y-mdH:M:S", .check_format = FALSE) +} diff --git a/man/find_int_gap.Rd b/man/find_int_gap.Rd new file mode 100644 index 00000000..5fa5654d --- /dev/null +++ b/man/find_int_gap.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parse_dttm_fmt.R +\name{find_int_gap} +\alias{find_int_gap} +\title{Find gap intervals in integer sequences} +\usage{ +find_int_gap(x, xmin = min(x), xmax = max(x)) +} +\arguments{ +\item{x}{An integer vector.} + +\item{xmin}{Left endpoint integer value.} + +\item{xmax}{Right endpoint integer value.} +} +\value{ +A \link[tibble:tibble-package]{tibble} of gap intervals of two columns: +\itemize{ +\item \code{start}: left endpoint +\item \code{end}: right endpoint +If no gap intervals are found then an empty \link[tibble:tibble-package]{tibble} +is returned. +} +} +\description{ +\code{\link[=find_int_gap]{find_int_gap()}} determines the \code{start} and \code{end} positions for gap intervals +in a sequence of integers. By default, the interval range to look for gaps is +defined by the minimum and maximum values of \code{x}; specify \code{xmin} and \code{xmax} +to change the range explicitly. +} +\keyword{internal} diff --git a/man/format_iso8601.Rd b/man/format_iso8601.Rd new file mode 100644 index 00000000..f16e6f82 --- /dev/null +++ b/man/format_iso8601.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_create_iso8601.R +\name{format_iso8601} +\alias{format_iso8601} +\title{Convert date/time components into ISO8601 format} +\usage{ +format_iso8601(m, .cutoff_2000 = 68) +} +\arguments{ +\item{m}{A character matrix of date/time components. It must have six +named columns: \code{year}, \code{mon}, \code{mday}, \code{hour}, \code{min} and \code{sec}.} + +\item{.cutoff_2000}{An integer value. Two-digit years smaller or equal to +\code{.cutoff_2000} are parsed as though starting with \code{20}, otherwise parsed as +though starting with \code{19}.} +} +\value{ +A character vector with date-times following the ISO8601 format. +} +\description{ +\code{\link[=format_iso8601]{format_iso8601()}} takes a character matrix of date/time components and +converts each component to ISO8601 format. In practice this entails +converting years to a four digit number, and month, day, hours, minutes and +seconds to two-digit numbers. Not available (\code{NA}) components are converted +to \code{"-"}. +} +\examples{ +cols <- c("year", "mon", "mday", "hour", "min", "sec") +m <- matrix( + c( + "99", "00", "01", + "Jan", "feb", "03", + "1", "01", "31", + "00", "12", "23", + "00", "59", "10", + "42", "5.15", NA + ), + ncol = 6, + dimnames = list(c(), cols) +) + +sdtm.oak:::format_iso8601(m) + +} +\keyword{internal} diff --git a/man/is_dtc_fmt.Rd b/man/is_dtc_fmt.Rd new file mode 100644 index 00000000..5dcd41f3 --- /dev/null +++ b/man/is_dtc_fmt.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_utils.R +\name{is_dtc_fmt} +\alias{is_dtc_fmt} +\title{Is it one of the supported formats?} +\usage{ +is_dtc_fmt(fmt) +} +\arguments{ +\item{fmt}{A character vector.} +} +\value{ +A \link[base:logical]{logical} vector of the same size of \code{fmt}. +} +\description{ +\code{\link[=is_dtc_fmt]{is_dtc_fmt()}} takes a character vector of date/time formats and returns +a logical indicating which are supported. +} +\details{ +This function works by checking if the values of \code{x} are one of the formats +listed in column \code{fmt} of \link{dtc_formats}. +} +\examples{ +sdtm.oak:::is_dtc_fmt(c("ymd", "y m d", "dmy", "HM", "H:M:S", "y-m-d H:M:S")) + +sdtm.oak:::is_dtc_fmt("y years m months d days") + +} +\keyword{internal} diff --git a/man/iso8601_mon.Rd b/man/iso8601_mon.Rd new file mode 100644 index 00000000..e6c9b69f --- /dev/null +++ b/man/iso8601_mon.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_create_iso8601.R +\name{iso8601_mon} +\alias{iso8601_mon} +\title{Format as a ISO8601 month} +\usage{ +iso8601_mon(x) +} +\arguments{ +\item{x}{A character vector.} +} +\value{ +A character vector. +} +\description{ +\code{\link[=iso8601_mon]{iso8601_mon()}} converts a character vector whose values represent numeric +or abbreviated month names to zero-padded numeric months. +} +\examples{ +sdtm.oak:::iso8601_mon(c(NA, "0", "1", "2", "10", "11", "12")) + +# No semantic validation is performed on the numeric months, so `"13"` stays +# `"13"` but representations that can't be represented as two-digit numbers +# become `NA`. +sdtm.oak:::iso8601_mon(c("13", "99", "100", "-1")) + +(mon <- month.abb) +sdtm.oak:::iso8601_mon(mon) + +} +\keyword{internal} diff --git a/man/iso8601_na.Rd b/man/iso8601_na.Rd new file mode 100644 index 00000000..03f5a707 --- /dev/null +++ b/man/iso8601_na.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_create_iso8601.R +\name{iso8601_na} +\alias{iso8601_na} +\title{Convert NA to \code{"-"}} +\usage{ +iso8601_na(x) +} +\arguments{ +\item{x}{A character vector.} +} +\value{ +A character vector. +} +\description{ +\code{\link[=iso8601_na]{iso8601_na()}} takes a character vector and converts \code{NA} values to \code{"-"}. +} +\examples{ +sdtm.oak:::iso8601_na(c("10", NA_character_)) + +} +\keyword{internal} diff --git a/man/iso8601_sec.Rd b/man/iso8601_sec.Rd new file mode 100644 index 00000000..b788de71 --- /dev/null +++ b/man/iso8601_sec.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_create_iso8601.R +\name{iso8601_sec} +\alias{iso8601_sec} +\title{Format as ISO8601 seconds} +\usage{ +iso8601_sec(x) +} +\arguments{ +\item{x}{A character vector.} +} +\value{ +A character vector. +} +\description{ +\code{\link[=iso8601_sec]{iso8601_sec()}} converts a character vector whose values represent seconds. +} +\examples{ +sdtm.oak:::iso8601_sec(c(NA, "0", "1", "10", "59", "99", "100")) + +} +\keyword{internal} diff --git a/man/iso8601_truncate.Rd b/man/iso8601_truncate.Rd new file mode 100644 index 00000000..4c4a4eb6 --- /dev/null +++ b/man/iso8601_truncate.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_create_iso8601.R +\name{iso8601_truncate} +\alias{iso8601_truncate} +\title{Truncate a partial ISO8601 date-time} +\usage{ +iso8601_truncate(x, empty_as_na = TRUE) +} +\arguments{ +\item{x}{A character vector.} +} +\value{ +A character vector. +} +\description{ +\code{\link[=iso8601_truncate]{iso8601_truncate()}} converts a character vector of ISO8601 dates, times or +date-times that might be partial and truncates the format by removing those +missing components. +} +\examples{ +x <- + c( + "1999-01-01T15:20:01", + "1999-01-01T15:20:-", + "1999-01-01T15:-:-", + "1999-01-01T-:-:-", + "1999-01--T-:-:-", + "1999----T-:-:-", + "-----T-:-:-" + ) + +sdtm.oak:::iso8601_truncate(x) + +# With `empty_as_na = FALSE` empty strings are not replaced with `NA` +sdtm.oak:::iso8601_truncate("-----T-:-:-", empty_as_na = TRUE) +sdtm.oak:::iso8601_truncate("-----T-:-:-", empty_as_na = FALSE) + +# Truncation only happens if missing components are the right most end, +# otherwise they remain unaltered. +sdtm.oak:::iso8601_truncate( + c( + "1999----T15:20:01", + "1999-01-01T-:20:01", + "1999-01-01T-:-:01", + "1999-01-01T-:-:-" + ) +) + +} +\keyword{internal} diff --git a/man/iso8601_two_digits.Rd b/man/iso8601_two_digits.Rd new file mode 100644 index 00000000..337da57f --- /dev/null +++ b/man/iso8601_two_digits.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_create_iso8601.R +\name{iso8601_two_digits} +\alias{iso8601_two_digits} +\title{Format as a ISO8601 two-digit number} +\usage{ +iso8601_two_digits(x) +} +\arguments{ +\item{x}{A character vector.} +} +\value{ +A character vector of the same size as \code{x}. +} +\description{ +\code{\link[=iso8601_two_digits]{iso8601_two_digits()}} converts a single digit or two digit number into a +two digit, 0-padded, number. Failing to parse the input as a two digit number +results in \code{NA}. +} +\examples{ +x <- c("0", "00", "1", "01", "42", "100", NA_character_, "1.") +sdtm.oak:::iso8601_two_digits(x) + +} +\keyword{internal} diff --git a/man/iso8601_year.Rd b/man/iso8601_year.Rd new file mode 100644 index 00000000..190db0f9 --- /dev/null +++ b/man/iso8601_year.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_create_iso8601.R +\name{iso8601_year} +\alias{iso8601_year} +\title{Format as a ISO8601 four-digit year} +\usage{ +iso8601_year(x, cutoff_2000 = 68L) +} +\arguments{ +\item{x}{A character vector.} + +\item{cutoff_2000}{A non-negative integer value. Two-digit years smaller or +equal to \code{cutoff_2000} are parsed as though starting with \code{20}, otherwise +parsed as though starting with \code{19}.} +} +\value{ +A character vector. +} +\description{ +\code{\link[=iso8601_year]{iso8601_year()}} converts a character vector whose values represent years to +four-digit years. +} +\examples{ +sdtm.oak:::iso8601_year(c("0", "1", "2", "50", "68", "69", "90", "99", "00")) + +# Be default, `cutoff_2000` is at 68. +sdtm.oak:::iso8601_year(c("67", "68", "69", "70")) +sdtm.oak:::iso8601_year(c("1967", "1968", "1969", "1970")) + +# Change it to something else, e.g. `cutoff_2000 = 25`. +sdtm.oak:::iso8601_year(as.character(0:50), cutoff_2000 = 25) +sdtm.oak:::iso8601_year(as.character(1900:1950), cutoff_2000 = 25) + +} +\keyword{internal} diff --git a/man/months_abb_regex.Rd b/man/months_abb_regex.Rd new file mode 100644 index 00000000..adcd3347 --- /dev/null +++ b/man/months_abb_regex.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parse_dttm_fmt.R +\name{months_abb_regex} +\alias{months_abb_regex} +\title{Regex for months' abbreviations} +\usage{ +months_abb_regex(x = month.abb, case = c("any", "upper", "lower", "title")) +} +\arguments{ +\item{x}{A character vector of three-letter month abbreviations. Default is +\code{month.abb}.} + +\item{case}{A string scalar: \code{"any"}, if month abbreviations are to be +matched in any case; \code{"upper"}, to match uppercase abbreviations; +\code{"lower"}, to match lowercase; and, \code{"title"} to match title case.} +} +\value{ +A regex as a string. +} +\description{ +\code{\link[=months_abb_regex]{months_abb_regex()}} generates a regex that matches month abbreviations. For +finer control, the case can be specified with parameter \code{case}. +} +\keyword{internal} diff --git a/man/parse_dttm.Rd b/man/parse_dttm.Rd new file mode 100644 index 00000000..81be074d --- /dev/null +++ b/man/parse_dttm.Rd @@ -0,0 +1,89 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_parse_dttm.R +\name{parse_dttm_} +\alias{parse_dttm_} +\alias{parse_dttm} +\title{Parse a date, time, or date-time} +\usage{ +parse_dttm_( + dttm, + fmt, + na = NULL, + sec_na = na, + min_na = na, + hour_na = na, + mday_na = na, + mon_na = na, + year_na = na +) + +parse_dttm( + dttm, + fmt, + na = NULL, + sec_na = na, + min_na = na, + hour_na = na, + mday_na = na, + mon_na = na, + year_na = na +) +} +\arguments{ +\item{dttm}{A character vector of dates, times or date-times.} + +\item{fmt}{In the case of \code{parse_dttm()}, a character vector of parsing +formats, or a single string format in the case of \code{parse_dttm_()}. When a +character vector of formats is passed, each format is attempted in turn +with the first parsing result to be successful taking precedence in the +final result. The formats in \code{fmt} can be any strings, however the +following characters (or successive repetitions thereof) are reserved in +the sense that they are treated in a special way: +\itemize{ +\item \code{"y"}: parsed as year; +\item \code{"m"}: parsed as month; +\item \code{"d"}: parsed as day; +\item \code{"H"}: parsed as hour; +\item \code{"M"}: parsed as minute; +\item \code{"S"}: parsed as second. +}} + +\item{na, sec_na, min_na, hour_na, mday_na, mon_na, year_na}{A character vector of +alternative values to allow during matching. This can be used to indicate +different forms of missing values to be found during the parsing date-time +strings.} +} +\value{ +A character matrix of six columns: \code{"year"}, \code{"mon"}, \code{"mday"}, +\code{"hour"}, \code{"min"} and \code{"sec"}. Each row corresponds to an element in +\code{dttm}. Each element of the matrix is the parsed date/time component. +} +\description{ +\code{\link[=parse_dttm]{parse_dttm()}} extracts date and time components. \code{\link[=parse_dttm]{parse_dttm()}} wraps around +\code{\link[=parse_dttm_]{parse_dttm_()}}, which is not vectorized over \code{fmt}. +} +\examples{ +sdtm.oak:::parse_dttm("2020", "y") +sdtm.oak:::parse_dttm("2020-05", "y") + +sdtm.oak:::parse_dttm("2020-05", "y-m") +sdtm.oak:::parse_dttm("2020-05-11", "y-m-d") + +sdtm.oak:::parse_dttm("2020 05 11", "y m d") +sdtm.oak:::parse_dttm("2020 05 11", "y m d") +sdtm.oak:::parse_dttm("2020 05 11", "y\\\\s+m\\\\s+d") +sdtm.oak:::parse_dttm("2020 05 11", "y\\\\s+m\\\\s+d") + +sdtm.oak:::parse_dttm("2020-05-11 11:45", "y-m-d H:M") +sdtm.oak:::parse_dttm("2020-05-11 11:45:15.6", "y-m-d H:M:S") + +sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), "y-m-d H:M") +sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), "-m-d H:M") +sdtm.oak:::parse_dttm(c("2002-05-11 11:45", "-05-11 11:45"), c("y-m-d H:M", "-m-d H:M")) + +sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d") +sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d", na = "UN") +sdtm.oak:::parse_dttm(c("2020-05-18", "2020-UN-18", "2020-UNK-UN"), "y-m-d", na = c("UN", "UNK")) + +} +\keyword{internal} diff --git a/man/pseq.Rd b/man/pseq.Rd new file mode 100644 index 00000000..c1bb8770 --- /dev/null +++ b/man/pseq.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parse_dttm_fmt.R +\name{pseq} +\alias{pseq} +\title{Parallel sequence generation} +\usage{ +pseq(from, to) +} +\arguments{ +\item{from}{An integer vector. The starting value(s) of the sequence(s).} + +\item{to}{An integer vector. The ending value(s) of the sequence(s).} +} +\value{ +An integer vector. +} +\description{ +\code{\link[=pseq]{pseq()}} is similar to \code{\link[=seq]{seq()}} but conveniently accepts integer vectors as +inputs to \code{from} and \code{to}, allowing for parallel generation of sequences. +The result is the union of the generated sequences. +} +\keyword{internal} diff --git a/man/reg_matches.Rd b/man/reg_matches.Rd new file mode 100644 index 00000000..72e531c6 --- /dev/null +++ b/man/reg_matches.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parse_dttm_fmt.R +\name{reg_matches} +\alias{reg_matches} +\title{\code{regmatches()} with \code{NA}} +\usage{ +reg_matches(x, m, invert = FALSE) +} +\arguments{ +\item{x}{A character vector.} + +\item{m}{An object with match data.} + +\item{invert}{A logical scalar. If \code{TRUE}, extract or replace the non-matched +substrings.} +} +\value{ +A list of character vectors with the matched substrings, or \code{NA} if +matching failed. +} +\description{ +\code{\link[=reg_matches]{reg_matches()}} is a thin wrapper around \code{\link[=regmatches]{regmatches()}} that returns +\code{NA} instead of \code{character(0)} when matching fails. +} +\keyword{internal} diff --git a/man/str_to_anycase.Rd b/man/str_to_anycase.Rd new file mode 100644 index 00000000..6d5007fd --- /dev/null +++ b/man/str_to_anycase.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parse_dttm_fmt.R +\name{str_to_anycase} +\alias{str_to_anycase} +\title{Generate case insensitive regexps} +\usage{ +str_to_anycase(x) +} +\arguments{ +\item{x}{A character vector of strings consisting of word characters.} +} +\value{ +A character vector. +} +\description{ +\code{\link[=str_to_anycase]{str_to_anycase()}} takes a character vector of word strings as input, and +generates regular expressions that express that match in any case. +} +\keyword{internal} diff --git a/man/yy_to_yyyy.Rd b/man/yy_to_yyyy.Rd new file mode 100644 index 00000000..c4895aa8 --- /dev/null +++ b/man/yy_to_yyyy.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_create_iso8601.R +\name{yy_to_yyyy} +\alias{yy_to_yyyy} +\title{Convert two-digit to four-digit years} +\usage{ +yy_to_yyyy(x, cutoff_2000 = 68L) +} +\arguments{ +\item{x}{An integer vector of years.} + +\item{cutoff_2000}{An integer value. Two-digit years smaller or equal to +\code{cutoff_2000} are parsed as though starting with \code{20}, otherwise parsed as +though starting with \code{19}.} +} +\value{ +An integer vector. +} +\description{ +\code{\link[=yy_to_yyyy]{yy_to_yyyy()}} converts two-digit years to four-digit years. +} +\examples{ +sdtm.oak:::yy_to_yyyy(0:5) +sdtm.oak:::yy_to_yyyy(2000:2005) + +sdtm.oak:::yy_to_yyyy(90:99) +sdtm.oak:::yy_to_yyyy(1990:1999) + +# NB: change in behavior after 68 +sdtm.oak:::yy_to_yyyy(65:72) + +sdtm.oak:::yy_to_yyyy(1965:1972) + +} +\keyword{internal} diff --git a/man/zero_pad_whole_number.Rd b/man/zero_pad_whole_number.Rd new file mode 100644 index 00000000..d4b972f8 --- /dev/null +++ b/man/zero_pad_whole_number.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_create_iso8601.R +\name{zero_pad_whole_number} +\alias{zero_pad_whole_number} +\title{Convert an integer to a zero-padded character vector} +\usage{ +zero_pad_whole_number(x, n = 2L) +} +\arguments{ +\item{x}{An integer vector.} + +\item{n}{Number of digits in the output, including zero padding.} +} +\value{ +A character vector. +} +\description{ +\code{\link[=zero_pad_whole_number]{zero_pad_whole_number()}} takes non-negative integer values and converts +them to character with zero padding. Negative numbers and numbers greater +than the width specified by the number of digits \code{n} are converted to \code{NA}. +} +\examples{ +sdtm.oak:::zero_pad_whole_number(c(-1, 0, 1)) + +sdtm.oak:::zero_pad_whole_number(c(-1, 0, 1, 10, 99, 100), n = 2) + +sdtm.oak:::zero_pad_whole_number(c(-1, 0, 1, 10, 99, 100), n = 3) + +} +\keyword{internal} diff --git a/tests/testthat/test-create_iso8601.R b/tests/testthat/test-create_iso8601.R new file mode 100644 index 00000000..7b29c1c7 --- /dev/null +++ b/tests/testthat/test-create_iso8601.R @@ -0,0 +1,84 @@ +test_that("`create_iso8601()`: individual date components", { + + x <- c("0", "50", "1950", "80", "1980", "2000") + y0 <- create_iso8601(x, .format = "y", .check_format = FALSE) + y1 <- c(NA, "2050", "1950", "1980", "1980", "2000") + expect_equal(y0, y1) + + x <- c("0", "jan", "JAN", "JaN", "1", "01") + y0 <- create_iso8601(x, .format = "m", .check_format = FALSE) + y1 <- c(NA, "--01", "--01", "--01", NA, "--01") + expect_equal(y0, y1) + + x <- c("0", "00", "1", "01", "10", "31") + y0 <- create_iso8601(x, .format = "d", .check_format = FALSE) + y1 <- c("----00", "----00", "----01", "----01", "----10", "----31") + expect_equal(y0, y1) + +}) + +test_that("`create_iso8601()`: dates", { + + y1 <- c("1999-01-01", "2000-01-01", "1999-01-01", "1999-12-31") + + x <- c("19990101", "20000101", "990101", "991231") + y0 <- create_iso8601(x, .format = "ymd", .check_format = FALSE) + expect_equal(y0, y1) + + x <- c("1999-01-01", "2000-01-01", "99-01-01", "99-12-31") + y0 <- create_iso8601(x, .format = "y-m-d", .check_format = FALSE) + expect_equal(y0, y1) + + x <- c("1999 01 01", "2000 01 01", "99 01 01", "99 12 31") + y0 <- create_iso8601(x, .format = "y m d", .check_format = FALSE) + expect_equal(y0, y1) + +}) + +test_that("`create_iso8601()`: times: hours and minutes", { + + y1 <- c("-----T15:20", "-----T00:10", "-----T23:01", "-----T00:00") + + x <- c("1520", "0010", "2301", "0000") + y0 <- create_iso8601(x, .format = "HM", .check_format = FALSE) + expect_equal(y0, y1) + + x <- c("15:20", "00:10", "23:01", "00:00") + y0 <- create_iso8601(x, .format = "H:M", .check_format = FALSE) + expect_equal(y0, y1) + + x <- c("15h20", "00h10", "23h01", "00h00") + y0 <- create_iso8601(x, .format = "HhM", .check_format = FALSE) + expect_equal(y0, y1) + +}) + +test_that("`create_iso8601()`: times: hours, minutes and seconds", { + + x <- c("152000", "001059", "230112.123", "00002.") + y0 <- create_iso8601(x, .format = "HMS", .check_format = FALSE) + y1 <- c("-----T15:20:00", "-----T00:10:59", "-----T23:01:12.123", "-----T00:00:02") + expect_equal(y0, y1) + + x <- c("15:20:00", "00:10:59", "23:01:12.123", "00:00:2.", "5:1:4") + y0 <- create_iso8601(x, .format = "H:M:S", .check_format = FALSE) + y1 <- c(y1, "-----T05:01:04") + expect_equal(y0, y1) + +}) + + +test_that("`create_iso8601()`: dates and times", { + + dates <- c("1999-01-01", "2000-01-01", "99-01-01", "99-12-31") + times <- c("1520", "0010", "2301", "0000") + iso8601_dttm <- create_iso8601(dates, times, .format = c("y-m-d", "HM"), .check_format = FALSE) + expectation <- + c("1999-01-01T15:20", + "2000-01-01T00:10", + "1999-01-01T23:01", + "1999-12-31T00:00") + expect_equal(iso8601_dttm, expectation) + +}) + diff --git a/tests/testthat/test-find_int_gap.R b/tests/testthat/test-find_int_gap.R new file mode 100644 index 00000000..18cc54a0 --- /dev/null +++ b/tests/testthat/test-find_int_gap.R @@ -0,0 +1,55 @@ +test_that("`find_int_gap()`: one interval", { + + tbl <- find_int_gap(c(1:3, 7:10)) + + expect_equal(tbl$start, 4) + expect_equal(tbl$end, 6) + +}) + +test_that("`find_int_gap()`: two intervals", { + + tbl <- find_int_gap(c(1:3, 7:10, 15:20)) + + expect_equal(tbl$start, c(4, 11)) + expect_equal(tbl$end, c(6, 14)) + +}) + +test_that("`find_int_gap()`: explicit endpoints", { + + tbl <- find_int_gap(c(3:5, 8), xmin = 0, xmax = 10) + + expect_equal(tbl$start, c(0, 6, 9)) + expect_equal(tbl$end, c(2, 7, 10)) + +}) + +test_that("`find_int_gap()`: no intervals", { + + tbl <- find_int_gap(0:5) + expect_equal(tbl, tibble::tibble(start = integer(), end = integer())) + +}) + +test_that("`find_int_gap()`: ensure `x` is integerish", { + + expect_error(find_int_gap(c(1.5, pi))) + +}) + +test_that("`find_int_gap()`: ensure `xmin` and `xmax` are integer scalars", { + + # Error because `xmin` and `xmax` are vectors + expect_error(find_int_gap(c(1:3, 7:10), xmin = 1:2)) + expect_error(find_int_gap(c(1:3, 7:10), xmax = 3:4)) + + # Error because `xmin` and `xmax` are double + expect_error(find_int_gap(c(1:3, 7:10), xmin = 1.5)) + expect_error(find_int_gap(c(1:3, 7:10), xmax = 1.5)) + + # Error because `xmin` and `xmax` are character + expect_error(find_int_gap(c(1:3, 7:10), xmin = "1")) + expect_error(find_int_gap(c(1:3, 7:10), xmax = "2")) + +}) diff --git a/tests/testthat/test-format_iso8601.R b/tests/testthat/test-format_iso8601.R new file mode 100644 index 00000000..cc10bde9 --- /dev/null +++ b/tests/testthat/test-format_iso8601.R @@ -0,0 +1,26 @@ +test_that("`format_iso8601()`: basic usage", { + + cols <- c("year", "mon", "mday", "hour", "min", "sec") + m <- matrix( + c( + "99", "00", "01", + "Jan", "feb", "03", + "1", "01", "31", + "00", "12", "23", + "00", "59", "10", + "42", "5.15", NA + ), + ncol = 6, + dimnames = list(c(), cols) + ) + + expect_equal( + format_iso8601(m), + c( + "1999-01-01T00:00:42", + "2000-02-01T12:59:05.15", + "2001-03-31T23:10" + ) + ) + +}) diff --git a/tests/testthat/test-iso8601.R b/tests/testthat/test-iso8601.R new file mode 100644 index 00000000..57a3d923 --- /dev/null +++ b/tests/testthat/test-iso8601.R @@ -0,0 +1,40 @@ +test_that("`iso8601_na()`: basic usage", { + expect_equal(iso8601_na(c("10", "15")), c("10", "15")) + expect_equal(iso8601_na(c("10", NA_character_)), c("10", "-")) + expect_equal(iso8601_na(character()), character(0)) +}) + +test_that("`iso8601_na()`: input can't be `NULL`", { + expect_error(iso8601_na(NULL)) + expect_error(iso8601_na(c())) +}) + +test_that("`zero_pad_whole_number()`: ensure `x` is integerish", { + expect_error(zero_pad_whole_number(pi)) + expect_error(zero_pad_whole_number("42")) + expect_error(zero_pad_whole_number(sqrt(2))) + expect_error(zero_pad_whole_number(TRUE)) + + expect_no_error(zero_pad_whole_number(1)) + expect_no_error(zero_pad_whole_number(1.00)) + expect_no_error(zero_pad_whole_number(c(1:3))) +}) + +test_that("`zero_pad_whole_number()`: basic usage", { + expect_equal(zero_pad_whole_number(c(-1, 0, 1)), c(NA, "00", "01")) + expect_equal(zero_pad_whole_number(c(-1, 0, 1, 10, 99, 100), n = 2), + c(NA, "00", "01", "10", "99", NA)) + expect_equal(zero_pad_whole_number(c(-1, 0, 1, 10, 99, 100), n = 3), + c(NA, "000", "001", "010", "099", "100")) +}) + +test_that("`zero_pad_whole_number()`: ensure `n` is scalar integer", { + expect_no_error(zero_pad_whole_number(1, n = 1)) + expect_error(zero_pad_whole_number(1, n = 1:2)) +}) + +test_that("`iso8601_two_digits()`: basic usage", { + x <- c("0", "00", "1", "01", "42", "100", NA_character_, "1.") + y <- c("00", "00", "01", "01", "42", NA, NA, NA) + expect_equal(iso8601_two_digits(x), y) +}) diff --git a/tests/testthat/test-parse_dttm.R b/tests/testthat/test-parse_dttm.R new file mode 100644 index 00000000..f12d9d23 --- /dev/null +++ b/tests/testthat/test-parse_dttm.R @@ -0,0 +1,53 @@ +test_that("`months_abb_regex()`: default behavior (case insensitive)", { + x <- paste0( + "[Jj][Aa][Nn]|", + "[Ff][Ee][Bb]|", + "[Mm][Aa][Rr]|", + "[Aa][Pp][Rr]|", + "[Mm][Aa][Yy]|", + "[Jj][Uu][Nn]|", + "[Jj][Uu][Ll]|", + "[Aa][Uu][Gg]|", + "[Ss][Ee][Pp]|", + "[Oo][Cc][Tt]|", + "[Nn][Oo][Vv]|", + "[Dd][Ee][Cc]" + ) + expect_equal(months_abb_regex(), x) +}) + +test_that("`months_abb_regex()`: uppercase", { + x <- paste0( + "JAN|", + "FEB|", + "MAR|", + "APR|", + "MAY|", + "JUN|", + "JUL|", + "AUG|", + "SEP|", + "OCT|", + "NOV|", + "DEC" + ) + expect_equal(months_abb_regex(case = "upper"), x) +}) + +test_that("`months_abb_regex()`: lowercase", { + x <- paste0( + "jan|", + "feb|", + "mar|", + "apr|", + "may|", + "jun|", + "jul|", + "aug|", + "sep|", + "oct|", + "nov|", + "dec" + ) + expect_equal(months_abb_regex(case = "lower"), x) +}) diff --git a/tests/testthat/test-pseq.R b/tests/testthat/test-pseq.R new file mode 100644 index 00000000..f300b561 --- /dev/null +++ b/tests/testthat/test-pseq.R @@ -0,0 +1,7 @@ +test_that("`pseq()`: scalar inputs", { + expect_equal(pseq(from = 0, to = 5), 0:5) +}) + +test_that("`pseq()`: vector inputs", { + expect_equal(pseq(from = c(0, 10), to = c(5, 15)), c(0:5, 10:15)) +}) diff --git a/tests/testthat/test-reg_matches.R b/tests/testthat/test-reg_matches.R new file mode 100644 index 00000000..979e277b --- /dev/null +++ b/tests/testthat/test-reg_matches.R @@ -0,0 +1,10 @@ +test_that("`reg_matches()`: basic usage", { + + x <- c("sdtm.oak", "sdtm.cdisc", "adam") + m <- gregexpr("sdtm", x) + + # `regmatches()` returns `character(0)` for `"adam"` + # But `reg_matches()` returns `NA` for `"adam"` + expect_equal(reg_matches(x, m), list("sdtm", "sdtm", NA_character_)) + +}) diff --git a/tests/testthat/test-str_to_anycase.R b/tests/testthat/test-str_to_anycase.R new file mode 100644 index 00000000..ea24c9f3 --- /dev/null +++ b/tests/testthat/test-str_to_anycase.R @@ -0,0 +1,5 @@ +test_that("`str_to_anycase()`: basic usage", { + x <- c("JAN", "feb", "mAr") + y <- c("[Jj][Aa][Nn]", "[Ff][Ee][Bb]", "[Mm][Aa][Rr]") + expect_equal(str_to_anycase(x), y) +}) diff --git a/tests/testthat/test-yy_to_yyyy.R b/tests/testthat/test-yy_to_yyyy.R new file mode 100644 index 00000000..38836222 --- /dev/null +++ b/tests/testthat/test-yy_to_yyyy.R @@ -0,0 +1,27 @@ +test_that("`yy_to_yyyy()`: basic usage", { + + # Default cutoff is at `68`. + x1 <- c(0, 1, 50, 68, 69, 70) + y1 <- c(2000, 2001, 2050, 2068, 1969, 1970) + expect_equal(yy_to_yyyy(x1), y1) + + # Different cutoff, e.g. `79`. + x2 <- 75:85 + y2 <- + c(2075L, + 2076L, + 2077L, + 2078L, + 2079L, + 1980L, + 1981L, + 1982L, + 1983L, + 1984L, + 1985L) + expect_equal(yy_to_yyyy(x2, cutoff_2000 = 79L), y2) + + # Four-digit years remain altered. + x3 <- 1965:1975 + expect_equal(yy_to_yyyy(x3), x3) +}) From 81ef1e548339ecf7d151c0df52d3713e54aa02c9 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 15 Nov 2023 13:01:46 +0000 Subject: [PATCH 06/52] clean up `lintr::lint_package()` issues --- R/dtc_create_iso8601.R | 44 ++++----- R/dtc_parse_dttm.R | 17 ++-- R/dtc_utils.R | 6 +- R/parse_dttm.R | 141 --------------------------- R/parse_dttm_fields.R | 125 ------------------------ R/parse_dttm_fmt.R | 112 ++++++++++----------- man/create_iso8601.Rd | 2 +- man/format_iso8601.Rd | 2 +- tests/testthat/test-create_iso8601.R | 25 +++-- tests/testthat/test-find_int_gap.R | 34 +++---- tests/testthat/test-format_iso8601.R | 4 +- tests/testthat/test-iso8601.R | 28 +++--- tests/testthat/test-parse_dttm.R | 6 +- tests/testthat/test-pseq.R | 4 +- tests/testthat/test-reg_matches.R | 4 +- tests/testthat/test-str_to_anycase.R | 2 +- tests/testthat/test-yy_to_yyyy.R | 14 +-- 17 files changed, 150 insertions(+), 420 deletions(-) delete mode 100644 R/parse_dttm.R delete mode 100644 R/parse_dttm_fields.R diff --git a/R/dtc_create_iso8601.R b/R/dtc_create_iso8601.R index 56518bba..633923d4 100644 --- a/R/dtc_create_iso8601.R +++ b/R/dtc_create_iso8601.R @@ -45,10 +45,10 @@ zero_pad_whole_number <- function(x, n = 2L) { # Check `n` admiraldev::assert_integer_scalar(n) - if (n < 1) rlang::abort("`n` must be positive.") + if (n < 1L) rlang::abort("`n` must be positive.") # Negative numbers are not allowed, and hence get converted to NA. - x[x < 0] <- NA_integer_ + x[x < 0L] <- NA_integer_ # Numbers that do not fit within the padding width are converted to NA x[floor(log10(x)) >= n] <- NA_integer_ @@ -88,11 +88,11 @@ yy_to_yyyy <- function(x, cutoff_2000 = 68L) { # Check `x` if (!rlang::is_integerish(x)) rlang::abort("`x` must be integerish.") - if (any(x < 0, na.rm = TRUE)) + if (any(x < 0L, na.rm = TRUE)) rlang::abort("`x` cannot have negative years.") x <- dplyr::if_else(x <= cutoff_2000, x + 2000L, x) - x <- dplyr::if_else(x <= 99, x + 1900L, x) + x <- dplyr::if_else(x <= 99L, x + 1900L, x) x } @@ -114,7 +114,7 @@ yy_to_yyyy <- function(x, cutoff_2000 = 68L) { iso8601_two_digits <- function(x) { admiraldev::assert_character_vector(x) x_int <- as.integer(stringr::str_match(x, "^\\d?\\d$")) - zero_pad_whole_number(x_int, n = 2) + zero_pad_whole_number(x_int, n = 2L) } iso8601_mday <- iso8601_two_digits @@ -150,7 +150,7 @@ iso8601_year <- function(x, cutoff_2000 = 68L) { admiraldev::assert_integer_scalar(cutoff_2000, subset = "non-negative") x_int <- as.integer(stringr::str_match(x, "^\\d{1,4}$")) x_int <- yy_to_yyyy(x_int, cutoff_2000 = cutoff_2000) - zero_pad_whole_number(x_int, n = 4) + zero_pad_whole_number(x_int, n = 4L) } #' Format as a ISO8601 month @@ -180,7 +180,7 @@ iso8601_mon <- function(x) { num_mon_chr <- num_mon num_mon_chr[is.na(num_mon)] <- iso8601_two_digits(x[is.na(num_mon)]) mon_int <- as.integer(num_mon_chr) - zero_pad_whole_number(mon_int, n = 2) + zero_pad_whole_number(mon_int, n = 2L) } #' Format as ISO8601 seconds @@ -284,31 +284,31 @@ iso8601_truncate <- function(x, empty_as_na = TRUE) { #' sdtm.oak:::format_iso8601(m) #' #' @keywords internal -format_iso8601 <- function(m, .cutoff_2000 = 68) { +format_iso8601 <- function(m, .cutoff_2000 = 68L) { admiraldev::assert_integer_scalar(.cutoff_2000) - m[, 'year'] <- iso8601_year(m[, 'year'], cutoff_2000 = .cutoff_2000) - m[, 'mon'] <- iso8601_mon(m[, 'mon']) - m[, 'mday'] <- iso8601_mday(m[, 'mday']) - m[, 'hour'] <- iso8601_hour(m[, 'hour']) - m[, 'min'] <- iso8601_min(m[, 'min']) - m[, 'sec'] <- iso8601_sec(m[, 'sec']) + m[, "year"] <- iso8601_year(m[, "year"], cutoff_2000 = .cutoff_2000) + m[, "mon"] <- iso8601_mon(m[, "mon"]) + m[, "mday"] <- iso8601_mday(m[, "mday"]) + m[, "hour"] <- iso8601_hour(m[, "hour"]) + m[, "min"] <- iso8601_min(m[, "min"]) + m[, "sec"] <- iso8601_sec(m[, "sec"]) m <- iso8601_na(m) x <- - paste0(m[, 'year'], + paste0(m[, "year"], "-", - m[, 'mon'], + m[, "mon"], "-", - m[, 'mday'], + m[, "mday"], "T", - m[, 'hour'], + m[, "hour"], ":", - m[, 'min'], + m[, "min"], ":", - m[, 'sec']) + m[, "sec"]) iso8601_truncate(x) } @@ -376,7 +376,7 @@ format_iso8601 <- function(m, .cutoff_2000 = 68) { #' # Fractional seconds #' create_iso8601("2019-120602:20:13.1230001", .format = "y-mdH:M:S", .check_format = FALSE) #' @export -create_iso8601 <- function(..., .format, .na = NULL, .cutoff_2000 = 68, .check_format = TRUE) { +create_iso8601 <- function(..., .format, .na = NULL, .cutoff_2000 = 68L, .check_format = TRUE) { dots <- rlang::dots_list(...) @@ -389,7 +389,7 @@ create_iso8601 <- function(..., .format, .na = NULL, .cutoff_2000 = 68, .check_f # Check if all vectors in `dots` are of the same length. n <- unique(lengths(dots)) if (!identical(length(n), 1L)) - rlang::abort("All vectors in `...` must be of the same length.") + rlang::abort("All vectors in `...` must be of the same length.") if (!identical(length(dots), length(.format))) rlang::abort("Number of vectors in `...` should match length of `.format`.") diff --git a/R/dtc_parse_dttm.R b/R/dtc_parse_dttm.R index 24d95fa4..cbeb8eec 100644 --- a/R/dtc_parse_dttm.R +++ b/R/dtc_parse_dttm.R @@ -1,14 +1,15 @@ #' @rdname parse_dttm #' @order 2 parse_dttm_ <- function(dttm, - fmt, - na = NULL, - sec_na = na, - min_na = na, - hour_na = na, - mday_na = na, - mon_na = na, - year_na = na) { + fmt, + na = NULL, + sec_na = na, + min_na = na, + hour_na = na, + mday_na = na, + mon_na = na, + year_na = na) { + admiraldev::assert_character_scalar(fmt) diff --git a/R/dtc_utils.R b/R/dtc_utils.R index d654c39f..46e8414a 100644 --- a/R/dtc_utils.R +++ b/R/dtc_utils.R @@ -75,9 +75,9 @@ assert_dtc_format <- function(.format) { abort_msg <- "`.format` must be either a character vector of formats of a list thereof." - switch ( + switch( typeof(.format), - character = assert_dtc_fmt(.format) , + character = assert_dtc_fmt(.format), list = purrr::map(.format, assert_dtc_format), rlang::abort(abort_msg) ) @@ -127,7 +127,7 @@ assert_capture_matrix <- function(m) { col_names <- c("year", "mon", "mday", "hour", "min", "sec") if (!all(colnames(m) %in% col_names)) - rlang::abort("`m` must have the following colnames: `year`, `mon`, `mday`, `hour`, `min` and `sec`.") + rlang::abort("`m` must have the following colnames: `year`, `mon`, `mday`, `hour`, `min` and `sec`.") invisible(m) } diff --git a/R/parse_dttm.R b/R/parse_dttm.R deleted file mode 100644 index a4f5e8ca..00000000 --- a/R/parse_dttm.R +++ /dev/null @@ -1,141 +0,0 @@ -# new_dttm_tbl <- function(sec = character(), -# min = character(), -# hour = character(), -# mday = character(), -# mon = character(), -# year = character()) { -# -# tibble::tibble( -# year = year, -# mon = mon, -# mday = mday, -# hour = hour, -# min = min, -# sec = sec -# ) -# } - -# parse_dttm <- function(dttm, -# fmt, -# na = NULL, -# sec_na = na, -# min_na = na, -# hour_na = na, -# mday_na = na, -# mon_na = na, -# year_na = na) { -# -# -# tbl_fmt_c <- parse_dttm_fmt(fmt) -# regex <- -# dttm_fmt_to_regex( -# tbl_fmt_c, -# fmt_regex = fmt_rg( -# na = na, -# sec_na = sec_na, -# min_na = min_na, -# hour_na = hour_na, -# mday_na = mday_na, -# mon_na = mon_na, -# year_na = year_na -# ) -# ) -# -# m <- stringr::str_match(dttm, regex) -# colnames(m)[1] <- "dttm" -# # Remove unnamed capture groups (these are subgroups within the dttm components) -# m <- m[, colnames(m) != "", drop = FALSE] -# dplyr::bind_rows(new_dttm_tbl(), tibble::as_tibble(m)) -# } - -# convert NA in character x to "-" -# iso8601_na <- function(x) { -# x[is.na(x)] <- "-" -# x -# } - - - -# iso8601_mday <- function(x) { -# iso8601_two_digits(x) |> iso8601_na() -# } -# -# iso8601_hour <- function(x) { -# iso8601_two_digits(x) |> iso8601_na() -# } -# -# iso8601_min <- function(x) { -# iso8601_two_digits(x) |> iso8601_na() -# } - -# iso8601_sec <- function(x) { -# x_iso8601 <- stringr::str_extract(x, "^\\d?\\d(\\.\\d*)?$") -# #x_iso8601 <- sprintf("%02f", x_dbl) -# x_iso8601 <- stringr::str_replace(x_iso8601, "^\\d(\\.\\d*)?$", "0\\0") -# x_iso8601 <- stringr::str_replace(x_iso8601, "(\\.[^0]*)(0*)$", "\\1") -# x_iso8601 <- stringr::str_remove(x_iso8601, "\\.$") -# x_iso8601[is.na(x_iso8601)] <- NA_character_ -# x_iso8601 |> iso8601_na() -# } - -# Month abbreviation (en) to numeric month mapping -# mon_abb_to_mon_num <- setNames(sprintf("%02d", seq_along(month.abb)), tolower(month.abb)) - -# iso8601_mon <- function(x) { -# -# x <- tolower(x) -# # Translate month abbreviations to numeric months -# num_mon <- mon_abb_to_mon_num[x] -# num_mon_chr <- num_mon -# num_mon_chr[is.na(num_mon)] <- iso8601_two_digits(x[is.na(num_mon)]) -# -# mon_int <- as.integer(num_mon_chr) -# x_iso8601 <- sprintf("%02d", mon_int) -# x_iso8601[is.na(mon_int)] <- NA_character_ -# iso8601_na(x_iso8601) -# } - -# iso8601_year <- function(x, cutoff_2000 = 68L) { -# x_int <- as.integer(stringr::str_match(x, "^\\d{1,4}$")) -# x_int <- ifelse (x_int <= cutoff_2000, x_int + 2000L, x_int) -# x_int <- ifelse (x_int <= 99, x_int + 1900L, x_int) -# -# -# x_iso8601 <- sprintf("%04d", x_int) -# x_iso8601[is.na(x_int)] <- NA_character_ -# iso8601_na(x_iso8601) -# } - -# iso8601_truncate <- function(x, empty_as_na = TRUE) { -# -# x <- stringr::str_remove(x, "[^\\d]*$") -# -# if (empty_as_na) x[x == ""] <- NA_character_ -# -# x -# } - -#' #' @importFrom rlang .data -#' #' @export -#' format_iso8601 <- function(dttm, fmt, na = NULL) { -#' -#' tbl <- parse_dttm(dttm, fmt, na = na) -#' tbl |> -#' dplyr::mutate( -#' year = iso8601_year(.data$year), -#' mon = iso8601_mon(.data$mon), -#' mday = iso8601_mday(.data$mday), -#' hour = iso8601_hour(.data$hour), -#' min = iso8601_min(.data$min), -#' sec = iso8601_sec(.data$sec), -#' iso8601 = dplyr::if_else( -#' !is.na(dttm), -#' iso8601_truncate( -#' stringr::str_glue("{.data$year}-{.data$mon}-{.data$mday}T{.data$hour}:{.data$min}:{.data$sec}") -#' ), -#' NA_character_ -#' ) -#' ) |> -#' dplyr::relocate(.data$iso8601, .data$dttm, .before = 1L) -#' -#' } diff --git a/R/parse_dttm_fields.R b/R/parse_dttm_fields.R deleted file mode 100644 index 082e6fb0..00000000 --- a/R/parse_dttm_fields.R +++ /dev/null @@ -1,125 +0,0 @@ -# # pseq <- -# # function(from = 1, -# # to = 1) { -# # mapply( -# # `:`, -# # from = from, -# # to = to, -# # SIMPLIFY = FALSE -# # ) |> -# # unlist() -# # } -# -# find_gaps <- function(x, x_min, x_max) { -# y <- setdiff(seq(x_min, x_max), x) -# streaks <- split(y, cumsum(c(TRUE, diff(y) != 1))) -# -# lapply(X = streaks, \(x) c(start = min(x), end = max(x))) -# } -# -# fmt_tokens <- -# list( -# sec = "S+", -# min = "M+", -# hour = "H+", -# mday = "d+", -# mon = "m+", -# year = "y+" -# ) -# -# fmt_regex <- -# list( -# sec = "(\\d{2})", -# min = "(\\d{2})", -# hour = "(\\d{2})", -# mday = "(\\d{2})", -# mon = "(\\d{2}|Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)", -# year = "(\\d{4})" -# ) -# -# -# as_order <- function(fmt) { -# gsub("[^[:alpha:]]+", "", fmt) -# } -# -# fmt_token_tally <- function(fmt, tokens = fmt_tokens) { -# sapply(fmt_tokens, \(x) stringr::str_count(fmt, x)) -# } -# -# is_dt_fmt <- function(fmt) { -# -# tally <- fmt_token_tally(fmt) -# all(tally < 2) && sum(tally) > 0 -# } -# -# reg_matches <- function(x, m, invert = FALSE) { -# match <- regmatches(x, m, invert = invert) -# ifelse(length(match), match, NA_character_) -# } -# -# # parse_dt_fmt <- function(fmt, tokens = fmt_tokens) { -# # if (!is_dt_fmt(fmt)) { -# # stop("Not a valid format in `fmt`.", call. = TRUE) -# # } -# # sapply(fmt_tokens, \(x) stringr::str_match(fmt, x)) -# # } -# # -# # parse_dt_fmt2 <- function(fmt, tokens = fmt_tokens) { -# # if (!is_dt_fmt(fmt)) { -# # stop("Not a valid format in `fmt`.", call. = TRUE) -# # } -# # sapply(fmt_tokens, \(x) grep(x, fmt)) -# # } -# -# extract_tidy_match_ <- function(x, pattern) { -# -# match_data <- regexpr(pattern, x) -# match <- reg_matches(x, match_data) -# -# is_match <- !is.na(match) -# -# start <- ifelse(is_match, match_data, NA_integer_) -# len <- ifelse(is_match, attr(match_data, "match.length"), NA_integer_) -# end <- start + len - 1L -# data.frame(pat = pattern, cap = match, start = start, end = end, len = len) -# } -# -# extract_tidy_match <- function(x, patterns, sort = TRUE) { -# -# df <- -# lapply(patterns, \(str) extract_tidy_match_(x, str)) |> -# do.call(what = "rbind") -# -# df$ord <- rank(df$start) -# df$ord[is.na(df$start)] <- NA_integer_ -# -# df <- cbind(dttm_el = rownames(df), df) -# rownames(df) <- NULL -# -# if (sort) { -# df[order(df$ord),] -# } else { -# df -# } -# } -# -# -# -# -# parse_dttm_fields <- function(x, -# fmt, -# na = -# list( -# year = "UNKN", -# month = "UNK", -# day = "UN", -# hour = "", -# min = "", -# sec = "" -# )) { -# -# -# -# -# -# } diff --git a/R/parse_dttm_fmt.R b/R/parse_dttm_fmt.R index a1bf821b..a8cbb8f7 100644 --- a/R/parse_dttm_fmt.R +++ b/R/parse_dttm_fmt.R @@ -28,11 +28,11 @@ find_int_gap <- function(x, xmin = min(x), xmax = max(x)) { admiraldev::assert_integer_scalar(xmax) x <- sort(unique(x)) - x <- c(xmin - 1, x, xmax + 1) - gaps <- which(diff(x) > 1) - start <- x[gaps] + 1 - end <- x[gaps + 1] - 1 - tibble::tibble(start = x[gaps] + 1, end = x[gaps + 1] - 1) + x <- c(xmin - 1L, x, xmax + 1L) + gaps <- which(diff(x) > 1L) + start <- x[gaps] + 1L + end <- x[gaps + 1L] - 1L + tibble::tibble(start = start, end = end) } #' `regmatches()` with `NA` @@ -67,17 +67,9 @@ reg_matches <- function(x, m, invert = FALSE) { #' @returns An integer vector. #' #' @keywords internal -pseq <- - function(from, - to) { - mapply( - `:`, - from = from, - to = to, - SIMPLIFY = FALSE - ) |> - unlist() - } +pseq <- function(from, to) { + unlist(purrr::map2(.x = from, .y = to, .f = `:`)) +} #' Generate case insensitive regexps #' @@ -91,7 +83,7 @@ pseq <- #' @keywords internal str_to_anycase <- function(x) { - lst <- stringr::str_split(x, "") + lst <- stringr::str_split(x, stringr::boundary("character")) purrr::map(lst, ~ stringr::str_c(stringr::str_to_upper(.x), stringr::str_to_lower(.x))) |> purrr::map(~ sprintf("[%s]", .x)) |> purrr::map(~ stringr::str_flatten(.x)) |> @@ -131,11 +123,12 @@ months_abb_regex <- function(x = month.abb, case = c("any", "upper", "lower", "t # change the regexp for one specific dttm component # while keeping the other defaults. fmt_c <- function(sec = "S+", - min = "M+", - hour = "H+", - mday = "d+", - mon = "m+", - year = "y+") { + min = "M+", + hour = "H+", + mday = "d+", + mon = "m+", + year = "y+") { + c( sec = sec, min = min, @@ -159,38 +152,44 @@ regex_or <- function(x, .open = FALSE, .close = FALSE) { stringr::str_flatten(x, collapse = "|") } -fmt_rg <- - function(sec = "(\\b\\d|\\d{2})(\\.\\d*)?", - min = "(\\b\\d|\\d{2})", - hour = "\\d?\\d", - mday = "\\b\\d|\\d{2}", - mon = stringr::str_glue("\\d\\d|{months_abb_regex()}"), - year = "(\\d{2})?\\d{2}", - na = NULL, - sec_na = na, - min_na = na, - hour_na = na, - mday_na = na, - mon_na = na, - year_na = na - ) { - - sec_na <- ifelse(!is.null(sec_na), regex_or(sec_na, .open = TRUE), "") - min_na <- ifelse(!is.null(min_na), regex_or(min_na, .open = TRUE), "") - hour_na <- ifelse(!is.null(hour_na), regex_or(hour_na, .open = TRUE), "") - mday_na <- ifelse(!is.null(mday_na), regex_or(mday_na, .open = TRUE), "") - mon_na <- ifelse(!is.null(mon_na), regex_or(mon_na, .open = TRUE), "") - year_na <- ifelse(!is.null(year_na), regex_or(year_na, .open = TRUE), "") - - c( - sec = stringr::str_glue("(?{sec}{sec_na})"), - min = stringr::str_glue("(?{min}{min_na})"), - hour = stringr::str_glue("(?{hour}{hour_na})"), - mday = stringr::str_glue("(?{mday}{mday_na})"), - mon = stringr::str_glue("(?{mon}{mon_na})"), - year = stringr::str_glue("(?{year}{year_na})") - ) - } +fmt_rg <- function( + sec = "(\\b\\d|\\d{2})(\\.\\d*)?", + min = "(\\b\\d|\\d{2})", + hour = "\\d?\\d", + mday = "\\b\\d|\\d{2}", + mon = stringr::str_glue("\\d\\d|{months_abb_regex()}"), + year = "(\\d{2})?\\d{2}", + na = NULL, + sec_na = na, + min_na = na, + hour_na = na, + mday_na = na, + mon_na = na, + year_na = na) { + + sec_na <- + ifelse(!is.null(sec_na), regex_or(sec_na, .open = TRUE), "") + min_na <- + ifelse(!is.null(min_na), regex_or(min_na, .open = TRUE), "") + hour_na <- + ifelse(!is.null(hour_na), regex_or(hour_na, .open = TRUE), "") + mday_na <- + ifelse(!is.null(mday_na), regex_or(mday_na, .open = TRUE), "") + mon_na <- + ifelse(!is.null(mon_na), regex_or(mon_na, .open = TRUE), "") + year_na <- + ifelse(!is.null(year_na), regex_or(year_na, .open = TRUE), "") + + + c( + sec = stringr::str_glue("(?{sec}{sec_na})"), + min = stringr::str_glue("(?{min}{min_na})"), + hour = stringr::str_glue("(?{hour}{hour_na})"), + mday = stringr::str_glue("(?{mday}{mday_na})"), + mon = stringr::str_glue("(?{mon}{mon_na})"), + year = stringr::str_glue("(?{year}{year_na})") + ) +} # Scalar version of `parse_dttm_fmt()`. parse_dttm_fmt_ <- function(x, pattern) { @@ -217,12 +216,11 @@ parse_dttm_fmt <- function(fmt, patterns = fmt_c()) { fmt_dttmc$ord <- dplyr::row_number(fmt_dttmc$start) fmt_len <- nchar(fmt) - fmt_pos <- seq_len(fmt_len) start <- end <- NULL # To avoid a "no visible binding for global variable" NOTE. dttmc_pos <- with(fmt_dttmc, pseq(from = start[!is.na(start)], to = end[!is.na(end)])) # `delim_pos`: delimiter positions, i.e. positions in `fmt` in-between dttm components. - delim_pos <- find_int_gap(dttmc_pos, xmin = 1, xmax = fmt_len) + delim_pos <- find_int_gap(dttmc_pos, xmin = 1L, xmax = fmt_len) delim <- with(delim_pos, stringr::str_sub(fmt, start = start, end = end)) fmt_delim <- @@ -253,5 +251,3 @@ dttm_fmt_to_regex <- function(tbl_fmt_c, fmt_regex = fmt_rg(), anchored = TRUE) fmt_regex } - - diff --git a/man/create_iso8601.Rd b/man/create_iso8601.Rd index 15b170ec..bca57597 100644 --- a/man/create_iso8601.Rd +++ b/man/create_iso8601.Rd @@ -8,7 +8,7 @@ create_iso8601( ..., .format, .na = NULL, - .cutoff_2000 = 68, + .cutoff_2000 = 68L, .check_format = TRUE ) } diff --git a/man/format_iso8601.Rd b/man/format_iso8601.Rd index f16e6f82..ec101f24 100644 --- a/man/format_iso8601.Rd +++ b/man/format_iso8601.Rd @@ -4,7 +4,7 @@ \alias{format_iso8601} \title{Convert date/time components into ISO8601 format} \usage{ -format_iso8601(m, .cutoff_2000 = 68) +format_iso8601(m, .cutoff_2000 = 68L) } \arguments{ \item{m}{A character matrix of date/time components. It must have six diff --git a/tests/testthat/test-create_iso8601.R b/tests/testthat/test-create_iso8601.R index 7b29c1c7..42b9d4b4 100644 --- a/tests/testthat/test-create_iso8601.R +++ b/tests/testthat/test-create_iso8601.R @@ -3,17 +3,17 @@ test_that("`create_iso8601()`: individual date components", { x <- c("0", "50", "1950", "80", "1980", "2000") y0 <- create_iso8601(x, .format = "y", .check_format = FALSE) y1 <- c(NA, "2050", "1950", "1980", "1980", "2000") - expect_equal(y0, y1) + expect_identical(y0, y1) x <- c("0", "jan", "JAN", "JaN", "1", "01") y0 <- create_iso8601(x, .format = "m", .check_format = FALSE) y1 <- c(NA, "--01", "--01", "--01", NA, "--01") - expect_equal(y0, y1) + expect_identical(y0, y1) x <- c("0", "00", "1", "01", "10", "31") y0 <- create_iso8601(x, .format = "d", .check_format = FALSE) y1 <- c("----00", "----00", "----01", "----01", "----10", "----31") - expect_equal(y0, y1) + expect_identical(y0, y1) }) @@ -23,15 +23,15 @@ test_that("`create_iso8601()`: dates", { x <- c("19990101", "20000101", "990101", "991231") y0 <- create_iso8601(x, .format = "ymd", .check_format = FALSE) - expect_equal(y0, y1) + expect_identical(y0, y1) x <- c("1999-01-01", "2000-01-01", "99-01-01", "99-12-31") y0 <- create_iso8601(x, .format = "y-m-d", .check_format = FALSE) - expect_equal(y0, y1) + expect_identical(y0, y1) x <- c("1999 01 01", "2000 01 01", "99 01 01", "99 12 31") y0 <- create_iso8601(x, .format = "y m d", .check_format = FALSE) - expect_equal(y0, y1) + expect_identical(y0, y1) }) @@ -41,15 +41,15 @@ test_that("`create_iso8601()`: times: hours and minutes", { x <- c("1520", "0010", "2301", "0000") y0 <- create_iso8601(x, .format = "HM", .check_format = FALSE) - expect_equal(y0, y1) + expect_identical(y0, y1) x <- c("15:20", "00:10", "23:01", "00:00") y0 <- create_iso8601(x, .format = "H:M", .check_format = FALSE) - expect_equal(y0, y1) + expect_identical(y0, y1) x <- c("15h20", "00h10", "23h01", "00h00") y0 <- create_iso8601(x, .format = "HhM", .check_format = FALSE) - expect_equal(y0, y1) + expect_identical(y0, y1) }) @@ -58,12 +58,12 @@ test_that("`create_iso8601()`: times: hours, minutes and seconds", { x <- c("152000", "001059", "230112.123", "00002.") y0 <- create_iso8601(x, .format = "HMS", .check_format = FALSE) y1 <- c("-----T15:20:00", "-----T00:10:59", "-----T23:01:12.123", "-----T00:00:02") - expect_equal(y0, y1) + expect_identical(y0, y1) x <- c("15:20:00", "00:10:59", "23:01:12.123", "00:00:2.", "5:1:4") y0 <- create_iso8601(x, .format = "H:M:S", .check_format = FALSE) y1 <- c(y1, "-----T05:01:04") - expect_equal(y0, y1) + expect_identical(y0, y1) }) @@ -78,7 +78,6 @@ test_that("`create_iso8601()`: dates and times", { "2000-01-01T00:10", "1999-01-01T23:01", "1999-12-31T00:00") - expect_equal(iso8601_dttm, expectation) + expect_identical(iso8601_dttm, expectation) }) - diff --git a/tests/testthat/test-find_int_gap.R b/tests/testthat/test-find_int_gap.R index 18cc54a0..5fcfd453 100644 --- a/tests/testthat/test-find_int_gap.R +++ b/tests/testthat/test-find_int_gap.R @@ -1,34 +1,34 @@ test_that("`find_int_gap()`: one interval", { - tbl <- find_int_gap(c(1:3, 7:10)) + tbl <- find_int_gap(c(1L:3L, 7L:10L)) - expect_equal(tbl$start, 4) - expect_equal(tbl$end, 6) + expect_identical(tbl$start, 4L) + expect_identical(tbl$end, 6L) }) test_that("`find_int_gap()`: two intervals", { - tbl <- find_int_gap(c(1:3, 7:10, 15:20)) + tbl <- find_int_gap(c(1L:3L, 7L:10L, 15L:20L)) - expect_equal(tbl$start, c(4, 11)) - expect_equal(tbl$end, c(6, 14)) + expect_identical(tbl$start, c(4L, 11L)) + expect_identical(tbl$end, c(6L, 14L)) }) test_that("`find_int_gap()`: explicit endpoints", { - tbl <- find_int_gap(c(3:5, 8), xmin = 0, xmax = 10) + tbl <- find_int_gap(c(3L:5L, 8L), xmin = 0L, xmax = 10L) - expect_equal(tbl$start, c(0, 6, 9)) - expect_equal(tbl$end, c(2, 7, 10)) + expect_identical(tbl$start, c(0L, 6L, 9L)) + expect_identical(tbl$end, c(2L, 7L, 10L)) }) test_that("`find_int_gap()`: no intervals", { - tbl <- find_int_gap(0:5) - expect_equal(tbl, tibble::tibble(start = integer(), end = integer())) + tbl <- find_int_gap(0L:5L) + expect_identical(tbl, tibble::tibble(start = integer(), end = integer())) }) @@ -41,15 +41,15 @@ test_that("`find_int_gap()`: ensure `x` is integerish", { test_that("`find_int_gap()`: ensure `xmin` and `xmax` are integer scalars", { # Error because `xmin` and `xmax` are vectors - expect_error(find_int_gap(c(1:3, 7:10), xmin = 1:2)) - expect_error(find_int_gap(c(1:3, 7:10), xmax = 3:4)) + expect_error(find_int_gap(c(1L:3L, 7L:10L), xmin = 1L:2L)) + expect_error(find_int_gap(c(1L:3L, 7L:10L), xmax = 3L:4L)) # Error because `xmin` and `xmax` are double - expect_error(find_int_gap(c(1:3, 7:10), xmin = 1.5)) - expect_error(find_int_gap(c(1:3, 7:10), xmax = 1.5)) + expect_error(find_int_gap(c(1L:3L, 7L:10L), xmin = 1.5)) + expect_error(find_int_gap(c(1L:3L, 7L:10L), xmax = 1.5)) # Error because `xmin` and `xmax` are character - expect_error(find_int_gap(c(1:3, 7:10), xmin = "1")) - expect_error(find_int_gap(c(1:3, 7:10), xmax = "2")) + expect_error(find_int_gap(c(1L:3L, 7L:10L), xmin = "1")) + expect_error(find_int_gap(c(1L:3L, 7L:10L), xmax = "2")) }) diff --git a/tests/testthat/test-format_iso8601.R b/tests/testthat/test-format_iso8601.R index cc10bde9..460e9ecd 100644 --- a/tests/testthat/test-format_iso8601.R +++ b/tests/testthat/test-format_iso8601.R @@ -10,11 +10,11 @@ test_that("`format_iso8601()`: basic usage", { "00", "59", "10", "42", "5.15", NA ), - ncol = 6, + ncol = 6L, dimnames = list(c(), cols) ) - expect_equal( + expect_identical( format_iso8601(m), c( "1999-01-01T00:00:42", diff --git a/tests/testthat/test-iso8601.R b/tests/testthat/test-iso8601.R index 57a3d923..881b514f 100644 --- a/tests/testthat/test-iso8601.R +++ b/tests/testthat/test-iso8601.R @@ -1,7 +1,7 @@ test_that("`iso8601_na()`: basic usage", { - expect_equal(iso8601_na(c("10", "15")), c("10", "15")) - expect_equal(iso8601_na(c("10", NA_character_)), c("10", "-")) - expect_equal(iso8601_na(character()), character(0)) + expect_identical(iso8601_na(c("10", "15")), c("10", "15")) + expect_identical(iso8601_na(c("10", NA_character_)), c("10", "-")) + expect_identical(iso8601_na(character()), character(0L)) }) test_that("`iso8601_na()`: input can't be `NULL`", { @@ -12,29 +12,29 @@ test_that("`iso8601_na()`: input can't be `NULL`", { test_that("`zero_pad_whole_number()`: ensure `x` is integerish", { expect_error(zero_pad_whole_number(pi)) expect_error(zero_pad_whole_number("42")) - expect_error(zero_pad_whole_number(sqrt(2))) + expect_error(zero_pad_whole_number(sqrt(2.0))) expect_error(zero_pad_whole_number(TRUE)) - expect_no_error(zero_pad_whole_number(1)) + expect_no_error(zero_pad_whole_number(1L)) expect_no_error(zero_pad_whole_number(1.00)) - expect_no_error(zero_pad_whole_number(c(1:3))) + expect_no_error(zero_pad_whole_number(c(1L:3L))) }) test_that("`zero_pad_whole_number()`: basic usage", { - expect_equal(zero_pad_whole_number(c(-1, 0, 1)), c(NA, "00", "01")) - expect_equal(zero_pad_whole_number(c(-1, 0, 1, 10, 99, 100), n = 2), - c(NA, "00", "01", "10", "99", NA)) - expect_equal(zero_pad_whole_number(c(-1, 0, 1, 10, 99, 100), n = 3), - c(NA, "000", "001", "010", "099", "100")) + expect_identical(zero_pad_whole_number(c(-1L, 0L, 1L)), c(NA, "00", "01")) + expect_identical(zero_pad_whole_number(c(-1L, 0L, 1L, 10L, 99L, 100L), n = 2L), + c(NA, "00", "01", "10", "99", NA)) + expect_identical(zero_pad_whole_number(c(-1L, 0L, 1L, 10L, 99L, 100L), n = 3L), + c(NA, "000", "001", "010", "099", "100")) }) test_that("`zero_pad_whole_number()`: ensure `n` is scalar integer", { - expect_no_error(zero_pad_whole_number(1, n = 1)) - expect_error(zero_pad_whole_number(1, n = 1:2)) + expect_no_error(zero_pad_whole_number(1L, n = 1L)) + expect_error(zero_pad_whole_number(1L, n = 1L:2L)) }) test_that("`iso8601_two_digits()`: basic usage", { x <- c("0", "00", "1", "01", "42", "100", NA_character_, "1.") y <- c("00", "00", "01", "01", "42", NA, NA, NA) - expect_equal(iso8601_two_digits(x), y) + expect_identical(iso8601_two_digits(x), y) }) diff --git a/tests/testthat/test-parse_dttm.R b/tests/testthat/test-parse_dttm.R index f12d9d23..8da7ca9a 100644 --- a/tests/testthat/test-parse_dttm.R +++ b/tests/testthat/test-parse_dttm.R @@ -13,7 +13,7 @@ test_that("`months_abb_regex()`: default behavior (case insensitive)", { "[Nn][Oo][Vv]|", "[Dd][Ee][Cc]" ) - expect_equal(months_abb_regex(), x) + expect_identical(months_abb_regex(), x) }) test_that("`months_abb_regex()`: uppercase", { @@ -31,7 +31,7 @@ test_that("`months_abb_regex()`: uppercase", { "NOV|", "DEC" ) - expect_equal(months_abb_regex(case = "upper"), x) + expect_identical(months_abb_regex(case = "upper"), x) }) test_that("`months_abb_regex()`: lowercase", { @@ -49,5 +49,5 @@ test_that("`months_abb_regex()`: lowercase", { "nov|", "dec" ) - expect_equal(months_abb_regex(case = "lower"), x) + expect_identical(months_abb_regex(case = "lower"), x) }) diff --git a/tests/testthat/test-pseq.R b/tests/testthat/test-pseq.R index f300b561..abdbde3b 100644 --- a/tests/testthat/test-pseq.R +++ b/tests/testthat/test-pseq.R @@ -1,7 +1,7 @@ test_that("`pseq()`: scalar inputs", { - expect_equal(pseq(from = 0, to = 5), 0:5) + expect_identical(pseq(from = 0L, to = 5L), 0L:5L) }) test_that("`pseq()`: vector inputs", { - expect_equal(pseq(from = c(0, 10), to = c(5, 15)), c(0:5, 10:15)) + expect_identical(pseq(from = c(0L, 10L), to = c(5L, 15L)), c(0L:5L, 10L:15L)) }) diff --git a/tests/testthat/test-reg_matches.R b/tests/testthat/test-reg_matches.R index 979e277b..a5ecb573 100644 --- a/tests/testthat/test-reg_matches.R +++ b/tests/testthat/test-reg_matches.R @@ -1,10 +1,10 @@ test_that("`reg_matches()`: basic usage", { x <- c("sdtm.oak", "sdtm.cdisc", "adam") - m <- gregexpr("sdtm", x) + m <- gregexpr("sdtm", x, fixed = TRUE) # `regmatches()` returns `character(0)` for `"adam"` # But `reg_matches()` returns `NA` for `"adam"` - expect_equal(reg_matches(x, m), list("sdtm", "sdtm", NA_character_)) + expect_identical(reg_matches(x, m), list("sdtm", "sdtm", NA_character_)) }) diff --git a/tests/testthat/test-str_to_anycase.R b/tests/testthat/test-str_to_anycase.R index ea24c9f3..b6e372e7 100644 --- a/tests/testthat/test-str_to_anycase.R +++ b/tests/testthat/test-str_to_anycase.R @@ -1,5 +1,5 @@ test_that("`str_to_anycase()`: basic usage", { x <- c("JAN", "feb", "mAr") y <- c("[Jj][Aa][Nn]", "[Ff][Ee][Bb]", "[Mm][Aa][Rr]") - expect_equal(str_to_anycase(x), y) + expect_identical(str_to_anycase(x), y) }) diff --git a/tests/testthat/test-yy_to_yyyy.R b/tests/testthat/test-yy_to_yyyy.R index 38836222..76f4002b 100644 --- a/tests/testthat/test-yy_to_yyyy.R +++ b/tests/testthat/test-yy_to_yyyy.R @@ -1,12 +1,12 @@ test_that("`yy_to_yyyy()`: basic usage", { # Default cutoff is at `68`. - x1 <- c(0, 1, 50, 68, 69, 70) - y1 <- c(2000, 2001, 2050, 2068, 1969, 1970) - expect_equal(yy_to_yyyy(x1), y1) + x1 <- c(0L, 1L, 50L, 68L, 69L, 70L) + y1 <- c(2000L, 2001L, 2050L, 2068L, 1969L, 1970L) + expect_identical(yy_to_yyyy(x1), y1) # Different cutoff, e.g. `79`. - x2 <- 75:85 + x2 <- 75L:85L y2 <- c(2075L, 2076L, @@ -19,9 +19,9 @@ test_that("`yy_to_yyyy()`: basic usage", { 1983L, 1984L, 1985L) - expect_equal(yy_to_yyyy(x2, cutoff_2000 = 79L), y2) + expect_identical(yy_to_yyyy(x2, cutoff_2000 = 79L), y2) # Four-digit years remain altered. - x3 <- 1965:1975 - expect_equal(yy_to_yyyy(x3), x3) + x3 <- 1965L:1975L + expect_identical(yy_to_yyyy(x3), x3) }) From b0bbe79769fa7b3d711bff0585b7011a66770b6b Mon Sep 17 00:00:00 2001 From: ramiromagno Date: Wed, 15 Nov 2023 13:12:40 +0000 Subject: [PATCH 07/52] Automatic renv profile update. --- renv/profiles/4.2/renv.lock | 39 +++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/renv/profiles/4.2/renv.lock b/renv/profiles/4.2/renv.lock index c7dd8ca1..14ac16c0 100644 --- a/renv/profiles/4.2/renv.lock +++ b/renv/profiles/4.2/renv.lock @@ -526,6 +526,21 @@ ], "Hash": "06230136b2d2b9ba5805e1963fa6e890" }, + "hms": { + "Package": "hms", + "Version": "1.1.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "ellipsis", + "lifecycle", + "methods", + "pkgconfig", + "rlang", + "vctrs" + ], + "Hash": "41100392191e1244b887878b533eea91" + }, "htmltools": { "Package": "htmltools", "Version": "0.5.4", @@ -686,6 +701,19 @@ ], "Hash": "001cecbeac1cff9301bdc3775ee46a86" }, + "lubridate": { + "Package": "lubridate", + "Version": "1.9.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "generics", + "methods", + "timechange" + ], + "Hash": "e25f18436e3efd42c7c590a1c4c15390" + }, "magrittr": { "Package": "magrittr", "Version": "2.0.3", @@ -1365,6 +1393,17 @@ ], "Hash": "79540e5fcd9e0435af547d885f184fd5" }, + "timechange": { + "Package": "timechange", + "Version": "0.2.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cpp11" + ], + "Hash": "8548b44f79a35ba1791308b61e6012d7" + }, "tinytex": { "Package": "tinytex", "Version": "0.44", From d493b4c9b1e36fcb195509c686c43030ff1cb2be Mon Sep 17 00:00:00 2001 From: ramiromagno Date: Wed, 15 Nov 2023 13:16:35 +0000 Subject: [PATCH 08/52] Automatic renv profile update. --- renv.lock | 40 ++++++++++++++++++++++++++++++++++++- renv/profiles/4.3/renv.lock | 40 ++++++++++++++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/renv.lock b/renv.lock index 7426a192..181fd2eb 100644 --- a/renv.lock +++ b/renv.lock @@ -1,6 +1,6 @@ { "R": { - "Version": "4.3.1", + "Version": "4.3.2", "Repositories": [ { "Name": "CRAN", @@ -526,6 +526,20 @@ ], "Hash": "06230136b2d2b9ba5805e1963fa6e890" }, + "hms": { + "Package": "hms", + "Version": "1.1.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "lifecycle", + "methods", + "pkgconfig", + "rlang", + "vctrs" + ], + "Hash": "b59377caa7ed00fa41808342002138f9" + }, "htmltools": { "Package": "htmltools", "Version": "0.5.5", @@ -686,6 +700,19 @@ ], "Hash": "001cecbeac1cff9301bdc3775ee46a86" }, + "lubridate": { + "Package": "lubridate", + "Version": "1.9.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "generics", + "methods", + "timechange" + ], + "Hash": "e25f18436e3efd42c7c590a1c4c15390" + }, "magrittr": { "Package": "magrittr", "Version": "2.0.3", @@ -1366,6 +1393,17 @@ ], "Hash": "79540e5fcd9e0435af547d885f184fd5" }, + "timechange": { + "Package": "timechange", + "Version": "0.2.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cpp11" + ], + "Hash": "8548b44f79a35ba1791308b61e6012d7" + }, "tinytex": { "Package": "tinytex", "Version": "0.45", diff --git a/renv/profiles/4.3/renv.lock b/renv/profiles/4.3/renv.lock index 7426a192..181fd2eb 100644 --- a/renv/profiles/4.3/renv.lock +++ b/renv/profiles/4.3/renv.lock @@ -1,6 +1,6 @@ { "R": { - "Version": "4.3.1", + "Version": "4.3.2", "Repositories": [ { "Name": "CRAN", @@ -526,6 +526,20 @@ ], "Hash": "06230136b2d2b9ba5805e1963fa6e890" }, + "hms": { + "Package": "hms", + "Version": "1.1.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "lifecycle", + "methods", + "pkgconfig", + "rlang", + "vctrs" + ], + "Hash": "b59377caa7ed00fa41808342002138f9" + }, "htmltools": { "Package": "htmltools", "Version": "0.5.5", @@ -686,6 +700,19 @@ ], "Hash": "001cecbeac1cff9301bdc3775ee46a86" }, + "lubridate": { + "Package": "lubridate", + "Version": "1.9.2", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "generics", + "methods", + "timechange" + ], + "Hash": "e25f18436e3efd42c7c590a1c4c15390" + }, "magrittr": { "Package": "magrittr", "Version": "2.0.3", @@ -1366,6 +1393,17 @@ ], "Hash": "79540e5fcd9e0435af547d885f184fd5" }, + "timechange": { + "Package": "timechange", + "Version": "0.2.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cpp11" + ], + "Hash": "8548b44f79a35ba1791308b61e6012d7" + }, "tinytex": { "Package": "tinytex", "Version": "0.45", From 83db47d15c31c8228b8dfd69e7ae492bcad256ae Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 16 Nov 2023 15:42:19 +0000 Subject: [PATCH 09/52] Fix typos in R/dtc_utils.R Co-authored-by: edgar-manukyan --- R/dtc_utils.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/dtc_utils.R b/R/dtc_utils.R index 46e8414a..d0c64380 100644 --- a/R/dtc_utils.R +++ b/R/dtc_utils.R @@ -24,8 +24,8 @@ is_dtc_fmt <- function(fmt) { #' Assert date time character formats #' -#' [assert_dtc_fmt()] takes a character vector of date/time formats and check if -#' the formats are supported, meaning it check if they are one of the formats +#' [assert_dtc_fmt()] takes a character vector of date/time formats and checks if +#' the formats are supported, meaning it checks if they are one of the formats #' listed in column `fmt` of [dtc_formats], failing with an error otherwise. #' #' @param fmt A character vector. From 00d1e8feda72aecddffc2057a682a906e949b98a Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 16 Nov 2023 15:49:50 +0000 Subject: [PATCH 10/52] remove `dummy()` function --- R/package.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/R/package.R b/R/package.R index 3a56202e..76c90e0d 100644 --- a/R/package.R +++ b/R/package.R @@ -17,9 +17,3 @@ NULL #' @noRd .onLoad <- function(libname, pkgname) { # nolint } - -#' Temporary dummy function -#' @noRd -dummy <- function() { - rlang::as_list -} From 5347631c11c82d2ff6894ed2ad3c7bbffe6aaf99 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 16 Nov 2023 15:55:46 +0000 Subject: [PATCH 11/52] remove `.onLoad()` function This function was likely added as part of an automatic setup of the R package as a whole but I guess we should add the `.onLoad()` if really needed. --- R/package.R | 9 --------- 1 file changed, 9 deletions(-) diff --git a/R/package.R b/R/package.R index 76c90e0d..e61d25fd 100644 --- a/R/package.R +++ b/R/package.R @@ -8,12 +8,3 @@ #' @importFrom tibble tibble NULL -#' onLoad function -#' -#' This function is called automatically during package loading. -#' -#' @param libname lib name -#' @param pkgname package name -#' @noRd -.onLoad <- function(libname, pkgname) { # nolint -} From 8cc595a81bb9c14625641bd54e769eccdcffbbcb Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 16 Nov 2023 15:59:03 +0000 Subject: [PATCH 12/52] Remove the `is_dtc_fmt()` function Initially I thought of calling this function from within `assert_dtc_fmt()` but I think now that the current usage of `rlang::arg_match()` leads to more concise code, so this is preferred. --- R/dtc_utils.R | 24 ------------------------ man/is_dtc_fmt.Rd | 29 ----------------------------- 2 files changed, 53 deletions(-) delete mode 100644 man/is_dtc_fmt.Rd diff --git a/R/dtc_utils.R b/R/dtc_utils.R index 46e8414a..bba22f4f 100644 --- a/R/dtc_utils.R +++ b/R/dtc_utils.R @@ -1,27 +1,3 @@ -#' Is it one of the supported formats? -#' -#' [is_dtc_fmt()] takes a character vector of date/time formats and returns -#' a logical indicating which are supported. -#' -#' @details -#' This function works by checking if the values of `x` are one of the formats -#' listed in column `fmt` of [dtc_formats]. -#' -#' @param fmt A character vector. -#' -#' @returns A [logical][base::logical] vector of the same size of `fmt`. -#' -#' @examples -#' sdtm.oak:::is_dtc_fmt(c("ymd", "y m d", "dmy", "HM", "H:M:S", "y-m-d H:M:S")) -#' -#' sdtm.oak:::is_dtc_fmt("y years m months d days") -#' -#' @keywords internal -is_dtc_fmt <- function(fmt) { - admiraldev::assert_character_vector(fmt) - fmt %in% sdtm.oak::dtc_formats$fmt -} - #' Assert date time character formats #' #' [assert_dtc_fmt()] takes a character vector of date/time formats and check if diff --git a/man/is_dtc_fmt.Rd b/man/is_dtc_fmt.Rd deleted file mode 100644 index 5dcd41f3..00000000 --- a/man/is_dtc_fmt.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dtc_utils.R -\name{is_dtc_fmt} -\alias{is_dtc_fmt} -\title{Is it one of the supported formats?} -\usage{ -is_dtc_fmt(fmt) -} -\arguments{ -\item{fmt}{A character vector.} -} -\value{ -A \link[base:logical]{logical} vector of the same size of \code{fmt}. -} -\description{ -\code{\link[=is_dtc_fmt]{is_dtc_fmt()}} takes a character vector of date/time formats and returns -a logical indicating which are supported. -} -\details{ -This function works by checking if the values of \code{x} are one of the formats -listed in column \code{fmt} of \link{dtc_formats}. -} -\examples{ -sdtm.oak:::is_dtc_fmt(c("ymd", "y m d", "dmy", "HM", "H:M:S", "y-m-d H:M:S")) - -sdtm.oak:::is_dtc_fmt("y years m months d days") - -} -\keyword{internal} From ae8966c3800d154c4b4c5c1f9ace7ae47a3a4713 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 16 Nov 2023 16:15:52 +0000 Subject: [PATCH 13/52] Import `.data` from rlang globally Import `.data` from rlang globally by using the R package level documentation (https://roxygen2.r-lib.org/articles/rd-other.html?q=_PACKAGE#packages). --- NAMESPACE | 1 - R/package.R | 10 ---------- R/parse_dttm_fmt.R | 2 -- R/sdtm.oak-package.R | 8 ++++++++ man/sdtm.oak-package.Rd | 35 +++++++++++++++++++++++++++++++++++ man/sdtm.oak.Rd | 12 ------------ 6 files changed, 43 insertions(+), 25 deletions(-) delete mode 100644 R/package.R create mode 100644 R/sdtm.oak-package.R create mode 100644 man/sdtm.oak-package.Rd delete mode 100644 man/sdtm.oak.Rd diff --git a/NAMESPACE b/NAMESPACE index 392f93da..bbac999a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,5 @@ # Generated by roxygen2: do not edit by hand export(create_iso8601) -import(rlang) importFrom(rlang,.data) importFrom(tibble,tibble) diff --git a/R/package.R b/R/package.R deleted file mode 100644 index e61d25fd..00000000 --- a/R/package.R +++ /dev/null @@ -1,10 +0,0 @@ -#' An EDC and Data Standard agnostic SDTM data transformation engine that automates -#' the transformation of raw clinical data in ODM format to SDTM based on standard -#' mapping algorithms -#' -#' @name sdtm.oak -#' -#' @import rlang -#' @importFrom tibble tibble -NULL - diff --git a/R/parse_dttm_fmt.R b/R/parse_dttm_fmt.R index a8cbb8f7..cd776806 100644 --- a/R/parse_dttm_fmt.R +++ b/R/parse_dttm_fmt.R @@ -205,7 +205,6 @@ parse_dttm_fmt_ <- function(x, pattern) { tibble::tibble(pat = pattern, cap = match, start = start, end = end, len = len) } -#' @importFrom rlang .data parse_dttm_fmt <- function(fmt, patterns = fmt_c()) { fmt_dttmc <- @@ -238,7 +237,6 @@ parse_dttm_fmt <- function(fmt, patterns = fmt_c()) { } -#' @importFrom rlang .data dttm_fmt_to_regex <- function(tbl_fmt_c, fmt_regex = fmt_rg(), anchored = TRUE) { fmt_regex <- tbl_fmt_c |> diff --git a/R/sdtm.oak-package.R b/R/sdtm.oak-package.R new file mode 100644 index 00000000..b1a48e6f --- /dev/null +++ b/R/sdtm.oak-package.R @@ -0,0 +1,8 @@ +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +#' @importFrom tibble tibble +#' @importFrom rlang .data +## usethis namespace: end +NULL diff --git a/man/sdtm.oak-package.Rd b/man/sdtm.oak-package.Rd new file mode 100644 index 00000000..042f69dd --- /dev/null +++ b/man/sdtm.oak-package.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sdtm.oak-package.R +\docType{package} +\name{sdtm.oak-package} +\alias{sdtm.oak} +\alias{sdtm.oak-package} +\title{sdtm.oak: SDTM Data Transformation Engine} +\description{ +An EDC and Data Standard agnostic SDTM data transformation engine that automates the transformation of raw clinical data in ODM format to SDTM based on standard mapping algorithms. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://pharmaverse.github.io/oak/} + \item \url{https://github.com/pharmaverse/oak} + \item Report bugs at \url{https://github.com/pharmaverse/oak/issues} +} + +} +\author{ +\strong{Maintainer}: Omar Garcia \email{ogcalderon@cdisc.org} + +Authors: +\itemize{ + \item Rammprasad Ganapathy +} + +Other contributors: +\itemize{ + \item F. Hoffmann-La Roche AG [copyright holder, funder] + \item Pfizer Inc [copyright holder, funder] +} + +} +\keyword{internal} diff --git a/man/sdtm.oak.Rd b/man/sdtm.oak.Rd deleted file mode 100644 index 3a8c4604..00000000 --- a/man/sdtm.oak.Rd +++ /dev/null @@ -1,12 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/package.R -\name{sdtm.oak} -\alias{sdtm.oak} -\title{An EDC and Data Standard agnostic SDTM data transformation engine that automates -the transformation of raw clinical data in ODM format to SDTM based on standard -mapping algorithms} -\description{ -An EDC and Data Standard agnostic SDTM data transformation engine that automates -the transformation of raw clinical data in ODM format to SDTM based on standard -mapping algorithms -} From 96505f0d1f37c312139836c5dd257e6a968f4eff Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 16 Nov 2023 16:45:22 +0000 Subject: [PATCH 14/52] Update WORDLIST --- inst/WORDLIST | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/inst/WORDLIST b/inst/WORDLIST index 84f80d49..07ffb10f 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -2,3 +2,7 @@ EDC ODM SDTM sdtm +Hoffmann +dtc +funder +vectorized From 431c14a31f20ee707b5e5d4b2a3cbe4b3eb166c1 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 16 Nov 2023 17:40:29 +0000 Subject: [PATCH 15/52] Update `assert_capture_matrix()` and `complete_capture_matrix()` docs --- R/dtc_utils.R | 52 +++++++++++++++++++++++++++------- man/assert_capture_matrix.Rd | 15 +++++----- man/complete_capture_matrix.Rd | 36 +++++++++++++++++++++++ 3 files changed, 86 insertions(+), 17 deletions(-) create mode 100644 man/complete_capture_matrix.Rd diff --git a/R/dtc_utils.R b/R/dtc_utils.R index bba22f4f..541a9d19 100644 --- a/R/dtc_utils.R +++ b/R/dtc_utils.R @@ -72,24 +72,25 @@ assert_dtc_format <- function(.format) { #' This function checks that the capture matrix is a matrix and that it contains #' six columns: `year`, `mon`, `mday`, `hour`, `min` and `sec`. #' -#' @param .format The argument of [create_iso8601()]'s `.format` parameter. +#' @param m A character matrix. #' #' @returns This function throws an error if `m` is not either: #' - A character matrix; #' - A matrix whose columns are (at least): `year`, `mon`, `mday`, `hour`, #' `min` and `sec`. #' -#' Otherwise, it returns `.format` invisibly. +#' Otherwise, it returns `m` invisibly. #' #' @examples -#' sdtm.oak:::assert_dtc_format("ymd") -#' sdtm.oak:::assert_dtc_format(c("ymd", "y-m-d")) -#' sdtm.oak:::assert_dtc_format(list(c("ymd", "y-m-d"), "H:M:S")) +#' cols <- c("year", "mon", "mday", "hour", "min", "sec") +#' m <- matrix(NA_character_, nrow = 1L, ncol = 6L, dimnames = list(NULL, cols)) +#' sdtm.oak:::assert_capture_matrix(m) #' #' # These commands should throw an error #' if (FALSE) { -#' # Note that `"year, month, day"` is not a supported format. -#' sdtm.oak:::assert_dtc_format("year, month, day") +#' sdtm.oak:::assert_capture_matrix(character()) +#' sdtm.oak:::assert_capture_matrix(matrix(data = NA_character_, nrow = 0, ncol = 0)) +#' sdtm.oak:::assert_capture_matrix(matrix(data = NA_character_, nrow = 1)) #' } #' #' @keywords internal @@ -102,13 +103,39 @@ assert_capture_matrix <- function(m) { rlang::abort("`m` must be a matrix.") col_names <- c("year", "mon", "mday", "hour", "min", "sec") - if (!all(colnames(m) %in% col_names)) + m_col_names <- colnames(m) + if (is.null(m_col_names) || !all(m_col_names %in% col_names)) rlang::abort("`m` must have the following colnames: `year`, `mon`, `mday`, `hour`, `min` and `sec`.") invisible(m) } - +#' Complete a capture matrix +#' +#' [complete_capture_matrix()] completes the missing, if any, columns of the +#' capture matrix. +#' +#' @param m A character matrix that might be missing one or more of the +#' following columns: `year`, `mon`, `mday`, `hour`, `min` or `sec`. +#' +#' @returns A character matrix that contains the columns `year`, `mon`, `mday`, +#' `hour`, `min` and `sec`. Any other existing columns are dropped. +#' +#' @examples +#' sdtm.oak:::complete_capture_matrix(matrix(data = NA_character_, nrow = 0, ncol = 0)) +#' sdtm.oak:::complete_capture_matrix(matrix(data = NA_character_, nrow = 1)) +#' +#' # m <- matrix(NA_character_, nrow = 1, ncol = 2, dimnames = list(NULL, c("year", "sec"))) +#' # sdtm.oak:::complete_capture_matrix(m) +#' +#' # m <- matrix(c("2020", "10"), nrow = 1, ncol = 2, dimnames = list(NULL, c("year", "sec"))) +#' # sdtm.oak:::complete_capture_matrix(m) +#' +#' # Any other existing columns are dropped. +#' # m <- matrix(c("2020", "10"), nrow = 1, ncol = 2, dimnames = list(NULL, c("semester", "quarter"))) +#' # sdtm.oak:::complete_capture_matrix(m) +#' +#' @keywords internal complete_capture_matrix <- function(m) { col_names <- c("year", "mon", "mday", "hour", "min", "sec") @@ -122,13 +149,18 @@ complete_capture_matrix <- m2 <- matrix(nrow = nrow(m), ncol = miss_n_cols) colnames(m2) <- miss_cols - cbind(m, m2)[, col_names] + m3 <- cbind(m, m2)[, col_names, drop = FALSE] + assert_capture_matrix(m3) } coalesce_capture_matrices <- function(...) { dots <- rlang::list2(...) + + # Assert that every argument in `...` is a capture matrix + purrr::walk(dots, assert_capture_matrix) + # `as.vector` needed because of: https://github.com/tidyverse/dplyr/issues/6954 vecs <- purrr::map(dots, as.vector) vec <- dplyr::coalesce(!!!vecs) diff --git a/man/assert_capture_matrix.Rd b/man/assert_capture_matrix.Rd index 08ec0061..8d5db779 100644 --- a/man/assert_capture_matrix.Rd +++ b/man/assert_capture_matrix.Rd @@ -7,7 +7,7 @@ assert_capture_matrix(m) } \arguments{ -\item{.format}{The argument of \code{\link[=create_iso8601]{create_iso8601()}}'s \code{.format} parameter.} +\item{m}{A character matrix.} } \value{ This function throws an error if \code{m} is not either: @@ -17,7 +17,7 @@ This function throws an error if \code{m} is not either: \code{min} and \code{sec}. } -Otherwise, it returns \code{.format} invisibly. +Otherwise, it returns \code{m} invisibly. } \description{ \code{\link[=assert_capture_matrix]{assert_capture_matrix()}} is an internal helper function aiding with the @@ -28,14 +28,15 @@ This function checks that the capture matrix is a matrix and that it contains six columns: \code{year}, \code{mon}, \code{mday}, \code{hour}, \code{min} and \code{sec}. } \examples{ -sdtm.oak:::assert_dtc_format("ymd") -sdtm.oak:::assert_dtc_format(c("ymd", "y-m-d")) -sdtm.oak:::assert_dtc_format(list(c("ymd", "y-m-d"), "H:M:S")) +cols <- c("year", "mon", "mday", "hour", "min", "sec") +m <- matrix(NA_character_, nrow = 1L, ncol = 6L, dimnames = list(NULL, cols)) +sdtm.oak:::assert_capture_matrix(m) # These commands should throw an error if (FALSE) { -# Note that `"year, month, day"` is not a supported format. - sdtm.oak:::assert_dtc_format("year, month, day") + sdtm.oak:::assert_capture_matrix(character()) + sdtm.oak:::assert_capture_matrix(matrix(data = NA_character_, nrow = 0, ncol = 0)) + sdtm.oak:::assert_capture_matrix(matrix(data = NA_character_, nrow = 1)) } } diff --git a/man/complete_capture_matrix.Rd b/man/complete_capture_matrix.Rd new file mode 100644 index 00000000..fd3f67ac --- /dev/null +++ b/man/complete_capture_matrix.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_utils.R +\name{complete_capture_matrix} +\alias{complete_capture_matrix} +\title{Complete a capture matrix} +\usage{ +complete_capture_matrix(m) +} +\arguments{ +\item{m}{A character matrix that might be missing one or more of the +following columns: \code{year}, \code{mon}, \code{mday}, \code{hour}, \code{min} or \code{sec}.} +} +\value{ +A character matrix that contains the columns \code{year}, \code{mon}, \code{mday}, +\code{hour}, \code{min} and \code{sec}. Any other existing columns are dropped. +} +\description{ +\code{\link[=complete_capture_matrix]{complete_capture_matrix()}} completes the missing, if any, columns of the +capture matrix. +} +\examples{ +sdtm.oak:::complete_capture_matrix(matrix(data = NA_character_, nrow = 0, ncol = 0)) +sdtm.oak:::complete_capture_matrix(matrix(data = NA_character_, nrow = 1)) + +# m <- matrix(NA_character_, nrow = 1, ncol = 2, dimnames = list(NULL, c("year", "sec"))) +# sdtm.oak:::complete_capture_matrix(m) + +# m <- matrix(c("2020", "10"), nrow = 1, ncol = 2, dimnames = list(NULL, c("year", "sec"))) +# sdtm.oak:::complete_capture_matrix(m) + +# Any other existing columns are dropped. +# m <- matrix(c("2020", "10"), nrow = 1, ncol = 2, dimnames = list(NULL, c("semester", "quarter"))) +# sdtm.oak:::complete_capture_matrix(m) + +} +\keyword{internal} From 673adccbd64b0b715697b4d77c9382cb0d57ba9b Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 16 Nov 2023 18:00:45 +0000 Subject: [PATCH 16/52] Add `coalesce_capture_matrices()` doc --- R/dtc_utils.R | 33 ++++++++++++++++++++++++++ man/coalesce_capture_matrices.Rd | 40 ++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 man/coalesce_capture_matrices.Rd diff --git a/R/dtc_utils.R b/R/dtc_utils.R index 541a9d19..b370c6b6 100644 --- a/R/dtc_utils.R +++ b/R/dtc_utils.R @@ -154,10 +154,43 @@ complete_capture_matrix <- } +#' Coalesce capture matrices +#' +#' [coalesce_capture_matrices()] combines several capture matrices into one. +#' Each argument of `...` should be a capture matrix in the sense of the output +#' by [complete_capture_matrix()], meaning a character matrix of six columns +#' whose names are: `year`, `mon`, `mday`, `hour`, `min` or `sec`. +#' +#' @param ... A sequence of capture matrices. +#' +#' @returns A single capture matrix whose values have been coalesced in the +#' sense of [coalesce()][dplyr::coalesce]. +#' +#' @examples +#' cols <- c("year", "mon", "mday", "hour", "min", "sec") +#' dates <- c("2020", "01", "01", "20", NA, NA) +#' times <- c(NA, NA, NA, "10", "00", "05") +#' m_dates <- matrix(dates, nrow = 1L, ncol = 6L, dimnames = list(NULL, cols)) +#' m_times <- matrix(times, nrow = 1L, ncol = 6L, dimnames = list(NULL, cols)) +#' +#' # Note how the hour "20" takes precedence over "10" +#' sdtm.oak:::coalesce_capture_matrices(m_dates, m_times) +#' +#' # Reverse the order of the inputs and now hour "10" takes precedence +#' sdtm.oak:::coalesce_capture_matrices(m_times, m_dates) +#' +#' # Single inputs should result in the same output as the input +#' sdtm.oak:::coalesce_capture_matrices(m_dates) +#' sdtm.oak:::coalesce_capture_matrices(m_times) +#' +#' @keywords internal coalesce_capture_matrices <- function(...) { dots <- rlang::list2(...) + if (rlang::is_empty(dots)) + rlang::abort("At least one input must be passed.") + # Assert that every argument in `...` is a capture matrix purrr::walk(dots, assert_capture_matrix) diff --git a/man/coalesce_capture_matrices.Rd b/man/coalesce_capture_matrices.Rd new file mode 100644 index 00000000..5b32bbfc --- /dev/null +++ b/man/coalesce_capture_matrices.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_utils.R +\name{coalesce_capture_matrices} +\alias{coalesce_capture_matrices} +\title{Coalesce capture matrices} +\usage{ +coalesce_capture_matrices(...) +} +\arguments{ +\item{...}{A sequence of capture matrices.} +} +\value{ +A single capture matrix whose values have been coalesced in the +sense of \link[dplyr:coalesce]{coalesce()}. +} +\description{ +\code{\link[=coalesce_capture_matrices]{coalesce_capture_matrices()}} combines several capture matrices into one. +Each argument of \code{...} should be a capture matrix in the sense of the output +by \code{\link[=complete_capture_matrix]{complete_capture_matrix()}}, meaning a character matrix of six columns +whose names are: \code{year}, \code{mon}, \code{mday}, \code{hour}, \code{min} or \code{sec}. +} +\examples{ +cols <- c("year", "mon", "mday", "hour", "min", "sec") +dates <- c("2020", "01", "01", "20", NA, NA) +times <- c(NA, NA, NA, "10", "00", "05") +m_dates <- matrix(dates, nrow = 1L, ncol = 6L, dimnames = list(NULL, cols)) +m_times <- matrix(times, nrow = 1L, ncol = 6L, dimnames = list(NULL, cols)) + +# Note how the hour "20" takes precedence over "10" +sdtm.oak:::coalesce_capture_matrices(m_dates, m_times) + +# Reverse the order of the inputs and now hour "10" takes precedence +sdtm.oak:::coalesce_capture_matrices(m_times, m_dates) + +# Single inputs should result in the same output as the input +sdtm.oak:::coalesce_capture_matrices(m_dates) +sdtm.oak:::coalesce_capture_matrices(m_times) + +} +\keyword{internal} From 0b7537428a8419aba96d0fc1a5f1337875f5a089 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 16 Nov 2023 18:27:29 +0000 Subject: [PATCH 17/52] Fix typo in `assert_dtc_fmt()` doc --- man/assert_dtc_fmt.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/assert_dtc_fmt.Rd b/man/assert_dtc_fmt.Rd index 21e9f255..c7868e5c 100644 --- a/man/assert_dtc_fmt.Rd +++ b/man/assert_dtc_fmt.Rd @@ -10,8 +10,8 @@ assert_dtc_fmt(fmt) \item{fmt}{A character vector.} } \description{ -\code{\link[=assert_dtc_fmt]{assert_dtc_fmt()}} takes a character vector of date/time formats and check if -the formats are supported, meaning it check if they are one of the formats +\code{\link[=assert_dtc_fmt]{assert_dtc_fmt()}} takes a character vector of date/time formats and checks if +the formats are supported, meaning it checks if they are one of the formats listed in column \code{fmt} of \link{dtc_formats}, failing with an error otherwise. } \examples{ From 0810be056ce38e16cd3a1e9a9a581f91e218c4b0 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 16 Nov 2023 18:27:49 +0000 Subject: [PATCH 18/52] Add `regex_or()` doc --- R/parse_dttm_fmt.R | 20 ++++++++++++++++++++ man/regex_or.Rd | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 man/regex_or.Rd diff --git a/R/parse_dttm_fmt.R b/R/parse_dttm_fmt.R index cd776806..dd4815f8 100644 --- a/R/parse_dttm_fmt.R +++ b/R/parse_dttm_fmt.R @@ -140,6 +140,26 @@ fmt_c <- function(sec = "S+", } +#' Utility function to assemble a regex of alternative patterns +#' +#' [regex_or()] takes a set of patterns and binds them with the Or (`"|"`) +#' pattern for an easy regex of alternative patterns. +#' +#' @param x A character vector of alternative patterns. +#' @param .open Whether the resulting regex should start with `"|"`. +#' @param .close Whether the resulting regex should end with `"|"`. +#' +#' @returns A character scalar of the resulting regex. +#' +#' @examples +#' # A regex for matching either "jan" or "feb" +#' sdtm.oak:::regex_or(c("jan", "feb")) +#' +#' # Setting `.open` and/or `.close` to `TRUE` can be handy if this regex +#' # is to be combined into a larger regex. +#' paste0(sdtm.oak:::regex_or(c("jan", "feb"), .close = TRUE), r"{\d{2}}") +#' +#' @keywords internal regex_or <- function(x, .open = FALSE, .close = FALSE) { admiraldev::assert_character_vector(x) diff --git a/man/regex_or.Rd b/man/regex_or.Rd new file mode 100644 index 00000000..efb2ba49 --- /dev/null +++ b/man/regex_or.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parse_dttm_fmt.R +\name{regex_or} +\alias{regex_or} +\title{Utility function to assemble a regex of alternative patterns} +\usage{ +regex_or(x, .open = FALSE, .close = FALSE) +} +\arguments{ +\item{x}{A character vector of alternative patterns.} + +\item{.open}{Whether the resulting regex should start with \code{"|"}.} + +\item{.close}{Whether the resulting regex should end with \code{"|"}.} +} +\value{ +A character scalar of the resulting regex. +} +\description{ +\code{\link[=regex_or]{regex_or()}} takes a set of patterns and binds them with the Or (\code{"|"}) +pattern for an easy regex of alternative patterns. +} +\examples{ +# A regex for matching either "jan" or "feb" +sdtm.oak:::regex_or(c("jan", "feb")) + +# Setting `.open` and/or `.close` to `TRUE` can be handy if this regex +# is to be combined into a larger regex. +paste0(sdtm.oak:::regex_or(c("jan", "feb"), .close = TRUE), r"{\d{2}}") + +} +\keyword{internal} From 0a848c3f0a837fe26f5245ce5cd9f3bf06f03458 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 16 Nov 2023 20:39:17 +0000 Subject: [PATCH 19/52] Add `fmt_rg()` doc --- R/parse_dttm_fmt.R | 52 +++++++++++++++++++++++++++---- man/fmt_rg.Rd | 76 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 6 deletions(-) create mode 100644 man/fmt_rg.Rd diff --git a/R/parse_dttm_fmt.R b/R/parse_dttm_fmt.R index dd4815f8..37e42e22 100644 --- a/R/parse_dttm_fmt.R +++ b/R/parse_dttm_fmt.R @@ -172,13 +172,53 @@ regex_or <- function(x, .open = FALSE, .close = FALSE) { stringr::str_flatten(x, collapse = "|") } +#' Regexps for date/time components +#' +#' [fmt_rg()] creates a character vector of named patterns to match individual +#' date/time components. +#' +#' @param sec Regexp for the second component. +#' @param min Regexp for the minute component. +#' @param hour Regexp for the hour component. +#' @param mday Regexp for the month day component. +#' @param mon Regexp for the month component. +#' @param year Regexp for the year component. +#' @param na Regexp of alternatives, useful to match special values coding for +#' missingness. +#' @param sec_na Same as `na` but specifically for the second component. +#' @param min_na Same as `na` but specifically for the minute component. +#' @param hour_na Same as `na` but specifically for the hour component. +#' @param mday_na Same as `na` but specifically for the month day component. +#' @param mon_na Same as `na` but specifically for the month component. +#' @param year_na Same as `na` but specifically for the year component. +#' +#' @returns A named character vector of named patterns (regexps) for matching +#' each date/time component. +#' +#' @examples +#' # Default regexps +#' sdtm.oak:::fmt_rg() +#' +#' # You may change the way months are matched, e.g. you might not want to match +#' # month abbreviations, i.e. only numerical months. So pass an explicity regex +#' # for numerical months: +#' sdtm.oak:::fmt_rg(mon = r"[\b\d|\d{2}]") +#' +#' # Make date/time components accept `"UNK"` as a possible pattern (useful +#' # to match funny codes for `NA`). +#' sdtm.oak:::fmt_rg(na = "UNK") +#' +#' # Or be more specific and use `"UNK"` for the year component only. +#' sdtm.oak:::fmt_rg(year_na = "UNK") +#' +#' @keywords internal fmt_rg <- function( - sec = "(\\b\\d|\\d{2})(\\.\\d*)?", - min = "(\\b\\d|\\d{2})", - hour = "\\d?\\d", - mday = "\\b\\d|\\d{2}", - mon = stringr::str_glue("\\d\\d|{months_abb_regex()}"), - year = "(\\d{2})?\\d{2}", + sec = r"[(\b\d|\d{2})(\.\d*)?]", + min = r"[(\b\d|\d{2})]", + hour = r"[\d?\d]", + mday = r"[\b\d|\d{2}]", + mon = stringr::str_glue(r"[\d\d|{months_abb_regex()}]"), + year = r"[(\d{2})?\d{2}]", na = NULL, sec_na = na, min_na = na, diff --git a/man/fmt_rg.Rd b/man/fmt_rg.Rd new file mode 100644 index 00000000..1816238a --- /dev/null +++ b/man/fmt_rg.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parse_dttm_fmt.R +\name{fmt_rg} +\alias{fmt_rg} +\title{Regexps for date/time components} +\usage{ +fmt_rg( + sec = "(\\\\b\\\\d|\\\\d{2})(\\\\.\\\\d*)?", + min = "(\\\\b\\\\d|\\\\d{2})", + hour = "\\\\d?\\\\d", + mday = "\\\\b\\\\d|\\\\d{2}", + mon = stringr::str_glue("\\\\d\\\\d|{months_abb_regex()}"), + year = "(\\\\d{2})?\\\\d{2}", + na = NULL, + sec_na = na, + min_na = na, + hour_na = na, + mday_na = na, + mon_na = na, + year_na = na +) +} +\arguments{ +\item{sec}{Regexp for the second component.} + +\item{min}{Regexp for the minute component.} + +\item{hour}{Regexp for the hour component.} + +\item{mday}{Regexp for the month day component.} + +\item{mon}{Regexp for the month component.} + +\item{year}{Regexp for the year component.} + +\item{na}{Regexp of alternatives, useful to match special values coding for +missingness.} + +\item{sec_na}{Same as \code{na} but specifically for the second component.} + +\item{min_na}{Same as \code{na} but specifically for the minute component.} + +\item{hour_na}{Same as \code{na} but specifically for the hour component.} + +\item{mday_na}{Same as \code{na} but specifically for the month day component.} + +\item{mon_na}{Same as \code{na} but specifically for the month component.} + +\item{year_na}{Same as \code{na} but specifically for the year component.} +} +\value{ +A named character vector of named patterns (regexps) for matching +each date/time component. +} +\description{ +\code{\link[=fmt_rg]{fmt_rg()}} creates a character vector of named patterns to match individual +date/time components. +} +\examples{ +# Default regexps +sdtm.oak:::fmt_rg() + +# You may change the way months are matched, e.g. you might not want to match +# month abbreviations, i.e. only numerical months. So pass an explicity regex +# for numerical months: +sdtm.oak:::fmt_rg(mon = r"[\b\d|\d{2}]") + +# Make date/time components accept `"UNK"` as a possible pattern (useful +# to match funny codes for `NA`). +sdtm.oak:::fmt_rg(na = "UNK") + +# Or be more specific and use `"UNK"` for the year component only. +sdtm.oak:::fmt_rg(year_na = "UNK") + +} +\keyword{internal} From 113c67c0bfd4df519a00d55ec455ce1b1f3f037d Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 16 Nov 2023 22:32:38 +0000 Subject: [PATCH 20/52] Add `fmt_c()` doc --- R/parse_dttm_fmt.R | 23 ++++++++++++++++++++++- man/fmt_c.Rd | 42 ++++++++++++++++++++++++++++++++++++++++++ man/fmt_rg.Rd | 2 +- 3 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 man/fmt_c.Rd diff --git a/R/parse_dttm_fmt.R b/R/parse_dttm_fmt.R index 37e42e22..a8819b97 100644 --- a/R/parse_dttm_fmt.R +++ b/R/parse_dttm_fmt.R @@ -122,6 +122,27 @@ months_abb_regex <- function(x = month.abb, case = c("any", "upper", "lower", "t # utility function that allows you to easily # change the regexp for one specific dttm component # while keeping the other defaults. + +#' Regexps for date/time format components +#' +#' [fmt_c()] creates a character vector of patterns to match individual +#' format date/time components. +#' +#' @param sec A string pattern for matching the second format component. +#' @param min A string pattern for matching the minute format component. +#' @param hour A string pattern for matching the hour format component. +#' @param mday A string pattern for matching the month day format component. +#' @param mon A string pattern for matching the month format component. +#' @param year A string pattern for matching the year format component. +#' +#' @returns A named character vector of date/time format patterns. This a vector +#' of six elements, one for each date/time component. +#' +#' @examples +#' # Default patterns +#' sdtm.oak:::fmt_c() +#' +#' @keywords internal fmt_c <- function(sec = "S+", min = "M+", hour = "H+", @@ -200,7 +221,7 @@ regex_or <- function(x, .open = FALSE, .close = FALSE) { #' sdtm.oak:::fmt_rg() #' #' # You may change the way months are matched, e.g. you might not want to match -#' # month abbreviations, i.e. only numerical months. So pass an explicity regex +#' # month abbreviations, i.e. only numerical months. So pass an explicit regex #' # for numerical months: #' sdtm.oak:::fmt_rg(mon = r"[\b\d|\d{2}]") #' diff --git a/man/fmt_c.Rd b/man/fmt_c.Rd new file mode 100644 index 00000000..0304c017 --- /dev/null +++ b/man/fmt_c.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parse_dttm_fmt.R +\name{fmt_c} +\alias{fmt_c} +\title{Regexps for date/time format components} +\usage{ +fmt_c( + sec = "S+", + min = "M+", + hour = "H+", + mday = "d+", + mon = "m+", + year = "y+" +) +} +\arguments{ +\item{sec}{A string pattern for matching the second format component.} + +\item{min}{A string pattern for matching the minute format component.} + +\item{hour}{A string pattern for matching the hour format component.} + +\item{mday}{A string pattern for matching the month day format component.} + +\item{mon}{A string pattern for matching the month format component.} + +\item{year}{A string pattern for matching the year format component.} +} +\value{ +A named character vector of date/time format patterns. This a vector +of six elements, one for each date/time component. +} +\description{ +\code{\link[=fmt_c]{fmt_c()}} creates a character vector of patterns to match individual +format date/time components. +} +\examples{ +# Default patterns +sdtm.oak:::fmt_c() + +} +\keyword{internal} diff --git a/man/fmt_rg.Rd b/man/fmt_rg.Rd index 1816238a..1c59944a 100644 --- a/man/fmt_rg.Rd +++ b/man/fmt_rg.Rd @@ -61,7 +61,7 @@ date/time components. sdtm.oak:::fmt_rg() # You may change the way months are matched, e.g. you might not want to match -# month abbreviations, i.e. only numerical months. So pass an explicity regex +# month abbreviations, i.e. only numerical months. So pass an explicit regex # for numerical months: sdtm.oak:::fmt_rg(mon = r"[\b\d|\d{2}]") From d558103459fc0988afe21d629fc6bffdb64fdeeb Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 16 Nov 2023 23:21:29 +0000 Subject: [PATCH 21/52] Add `parse_dttm_fmt()` doc --- R/parse_dttm_fmt.R | 56 +++++++++++++++++++++++++++++++++++--- man/parse_dttm_fmt.Rd | 62 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 4 deletions(-) create mode 100644 man/parse_dttm_fmt.Rd diff --git a/R/parse_dttm_fmt.R b/R/parse_dttm_fmt.R index a8819b97..785e790e 100644 --- a/R/parse_dttm_fmt.R +++ b/R/parse_dttm_fmt.R @@ -272,11 +272,11 @@ fmt_rg <- function( ) } -# Scalar version of `parse_dttm_fmt()`. -parse_dttm_fmt_ <- function(x, pattern) { +#' @rdname parse_dttm_fmt +parse_dttm_fmt_ <- function(fmt, pattern) { - match_data <- regexpr(pattern, x) - match <- reg_matches(x, match_data) + match_data <- regexpr(pattern, fmt) + match <- reg_matches(fmt, match_data) is_match <- !is.na(match) @@ -286,8 +286,56 @@ parse_dttm_fmt_ <- function(x, pattern) { tibble::tibble(pat = pattern, cap = match, start = start, end = end, len = len) } +#' Parse a date/time format +#' +#' [parse_dttm_fmt()] parses a date/time formats, meaning it will try to parse +#' the components of the format `fmt` that refer to date/time components. +#' [parse_dttm_fmt_()] is similar to [parse_dttm_fmt()] but is not vectorized +#' over `fmt`. +#' +#' @param fmt A format string (scalar) to be parsed by `patterns`. +#' @param pattern,patterns A string (in the case of `pattern`), or a character +#' vector (in the case of `patterns`) of regexps for each of the individual +#' date/time components. Default value is that of [fmt_c()]. Use this function +#' if you plan on passing a different set of patterns. +#' +#' @returns A matrix of seven columns: +#' - `fmt_c`: date/time format component. Values are either `"year"`, `"mon"`, `"mday"`, +#' `"hour"`, `"min"`, `"sec"`, or `NA`. +#' - `pat`: Regexp used to parse the date/time component. +#' - `cap`: The captured substring from the format. +#' - `start`: Start position in the format string for this capture. +#' - `end`: End position in the format string for this capture. +#' - `len`: Length of the capture (number of chars). +#' - `ord`: Ordinal of this date/time component in the format string. +#' +#' Each row is for either a date/time format component or a "delimiter" string +#' or pattern in-between format components. +#' +#' @examples +#' sdtm.oak:::parse_dttm_fmt("ymd") +#' sdtm.oak:::parse_dttm_fmt("H:M:S") +#' +#' sdtm.oak:::parse_dttm_fmt("ymd HMS") +#' +#' # Repeating the same special patterns, e.g. "yy" still counts as one pattern +#' # only. +#' sdtm.oak:::parse_dttm_fmt("yymmdd HHMMSS") +#' +#' # Note that `"y"`, `"m"`, `"d"`, `"H"`, `"M"` or `"S"` are reserved patterns +#' # that are matched first and interpreted as format components. # Example: the +#' # first "y" in "year" is parsed as meaning year followed by # "ear y". The +#' # second "y" is not longer matched because a first match already # succeded. +#' sdtm.oak:::parse_dttm_fmt("year y") +#' +#' # Specify custom patterns +#' sdtm.oak:::parse_dttm_fmt("year month day", fmt_c(year = "year", mon = "month", mday = "day")) +#' +#' @keywords internal parse_dttm_fmt <- function(fmt, patterns = fmt_c()) { + admiraldev::assert_character_scalar(fmt) + fmt_dttmc <- purrr::map(patterns, ~ parse_dttm_fmt_(fmt, .x)) |> purrr::list_rbind(names_to = "fmt_c") diff --git a/man/parse_dttm_fmt.Rd b/man/parse_dttm_fmt.Rd new file mode 100644 index 00000000..fca0bd83 --- /dev/null +++ b/man/parse_dttm_fmt.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parse_dttm_fmt.R +\name{parse_dttm_fmt_} +\alias{parse_dttm_fmt_} +\alias{parse_dttm_fmt} +\title{Parse a date/time format} +\usage{ +parse_dttm_fmt_(fmt, pattern) + +parse_dttm_fmt(fmt, patterns = fmt_c()) +} +\arguments{ +\item{fmt}{A format string (scalar) to be parsed by \code{patterns}.} + +\item{pattern, patterns}{A string (in the case of \code{pattern}), or a character +vector (in the case of \code{patterns}) of regexps for each of the individual +date/time components. Default value is that of \code{\link[=fmt_c]{fmt_c()}}. Use this function +if you plan on passing a different set of patterns.} +} +\value{ +A matrix of seven columns: +\itemize{ +\item \code{fmt_c}: date/time format component. Values are either \code{"year"}, \code{"mon"}, \code{"mday"}, +\code{"hour"}, \code{"min"}, \code{"sec"}, or \code{NA}. +\item \code{pat}: Regexp used to parse the date/time component. +\item \code{cap}: The captured substring from the format. +\item \code{start}: Start position in the format string for this capture. +\item \code{end}: End position in the format string for this capture. +\item \code{len}: Length of the capture (number of chars). +\item \code{ord}: Ordinal of this date/time component in the format string. +} + +Each row is for either a date/time format component or a "delimiter" string +or pattern in-between format components. +} +\description{ +\code{\link[=parse_dttm_fmt]{parse_dttm_fmt()}} parses a date/time formats, meaning it will try to parse +the components of the format \code{fmt} that refer to date/time components. +\code{\link[=parse_dttm_fmt_]{parse_dttm_fmt_()}} is similar to \code{\link[=parse_dttm_fmt]{parse_dttm_fmt()}} but is not vectorized +over \code{fmt}. +} +\examples{ +sdtm.oak:::parse_dttm_fmt("ymd") +sdtm.oak:::parse_dttm_fmt("H:M:S") + +sdtm.oak:::parse_dttm_fmt("ymd HMS") + +# Repeating the same special patterns, e.g. "yy" still counts as one pattern +# only. +sdtm.oak:::parse_dttm_fmt("yymmdd HHMMSS") + +# Note that `"y"`, `"m"`, `"d"`, `"H"`, `"M"` or `"S"` are reserved patterns +# that are matched first and interpreted as format components. # Example: the +# first "y" in "year" is parsed as meaning year followed by # "ear y". The +# second "y" is not longer matched because a first match already # succeded. +sdtm.oak:::parse_dttm_fmt("year y") + +# Specify custom patterns +sdtm.oak:::parse_dttm_fmt("year month day", fmt_c(year = "year", mon = "month", mday = "day")) + +} +\keyword{internal} From 95e76ed749449b3f88a1fa9a748ee4a084f14217 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 16 Nov 2023 23:23:34 +0000 Subject: [PATCH 22/52] Fix doc of `parse_dttm_fmt()` --- R/parse_dttm_fmt.R | 3 ++- man/parse_dttm_fmt.Rd | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/R/parse_dttm_fmt.R b/R/parse_dttm_fmt.R index 785e790e..9161af82 100644 --- a/R/parse_dttm_fmt.R +++ b/R/parse_dttm_fmt.R @@ -299,7 +299,7 @@ parse_dttm_fmt_ <- function(fmt, pattern) { #' date/time components. Default value is that of [fmt_c()]. Use this function #' if you plan on passing a different set of patterns. #' -#' @returns A matrix of seven columns: +#' @returns A [tibble][tibble::tibble-package] of seven columns: #' - `fmt_c`: date/time format component. Values are either `"year"`, `"mon"`, `"mday"`, #' `"hour"`, `"min"`, `"sec"`, or `NA`. #' - `pat`: Regexp used to parse the date/time component. @@ -366,6 +366,7 @@ parse_dttm_fmt <- function(fmt, patterns = fmt_c()) { } + dttm_fmt_to_regex <- function(tbl_fmt_c, fmt_regex = fmt_rg(), anchored = TRUE) { fmt_regex <- tbl_fmt_c |> diff --git a/man/parse_dttm_fmt.Rd b/man/parse_dttm_fmt.Rd index fca0bd83..55502699 100644 --- a/man/parse_dttm_fmt.Rd +++ b/man/parse_dttm_fmt.Rd @@ -18,7 +18,7 @@ date/time components. Default value is that of \code{\link[=fmt_c]{fmt_c()}}. Us if you plan on passing a different set of patterns.} } \value{ -A matrix of seven columns: +A \link[tibble:tibble-package]{tibble} of seven columns: \itemize{ \item \code{fmt_c}: date/time format component. Values are either \code{"year"}, \code{"mon"}, \code{"mday"}, \code{"hour"}, \code{"min"}, \code{"sec"}, or \code{NA}. From 0f298c9feaea5e050fd3221eaf33e70c0ffbf15b Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 16 Nov 2023 23:38:48 +0000 Subject: [PATCH 23/52] Add `dttm_fmt_to_regex()` doc --- R/parse_dttm_fmt.R | 36 ++++++++++++++++++++++++++++++-- man/dttm_fmt_to_regex.Rd | 44 ++++++++++++++++++++++++++++++++++++++++ man/parse_dttm_fmt.Rd | 3 ++- 3 files changed, 80 insertions(+), 3 deletions(-) create mode 100644 man/dttm_fmt_to_regex.Rd diff --git a/R/parse_dttm_fmt.R b/R/parse_dttm_fmt.R index 9161af82..520187c5 100644 --- a/R/parse_dttm_fmt.R +++ b/R/parse_dttm_fmt.R @@ -329,7 +329,8 @@ parse_dttm_fmt_ <- function(fmt, pattern) { #' sdtm.oak:::parse_dttm_fmt("year y") #' #' # Specify custom patterns -#' sdtm.oak:::parse_dttm_fmt("year month day", fmt_c(year = "year", mon = "month", mday = "day")) +#' sdtm.oak:::parse_dttm_fmt("year month day", +#' sdtm.oak:::fmt_c(year = "year", mon = "month", mday = "day")) #' #' @keywords internal parse_dttm_fmt <- function(fmt, patterns = fmt_c()) { @@ -366,7 +367,38 @@ parse_dttm_fmt <- function(fmt, patterns = fmt_c()) { } - +#' Convert a parsed date/time format to regex +#' +#' [dttm_fmt_to_regex()] takes a [tibble][tibble::tibble-package] of parsed +#' date/time format components (as returned by [parse_dttm_fmt()]), and a +#' mapping of date/time component formats to regexps and generates a single +#' regular expression with groups for matching each of the date/time components. +#' +#' @param tbl_fmt_c A [tibble][tibble::tibble-package] of parsed date/time +#' format components as returned by [parse_dttm_fmt()]. +#' @param fmt_regex A named character vector of regexps, one for each date/time +#' component. +#' @param anchored Whether the final regex should be anchored, i.e. bounded by +#' `"^"` and `"$"` for a whole match. +#' +#' @returns A string containing a regular expression for matching date/time +#' components according to a format. +#' +#' @examples +#' tbl_fmt_c <- sdtm.oak:::parse_dttm_fmt("y") +#' sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c) +#' sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c, anchored = FALSE) +#' +#' tbl_fmt_c <- sdtm.oak:::parse_dttm_fmt("m") +#' sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c) +#' +#' tbl_fmt_c <- sdtm.oak:::parse_dttm_fmt("ymd") +#' sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c) +#' +#' tbl_fmt_c <- sdtm.oak:::parse_dttm_fmt("ymd HH:MM:SS") +#' sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c) +#' +#' @keywords internal dttm_fmt_to_regex <- function(tbl_fmt_c, fmt_regex = fmt_rg(), anchored = TRUE) { fmt_regex <- tbl_fmt_c |> diff --git a/man/dttm_fmt_to_regex.Rd b/man/dttm_fmt_to_regex.Rd new file mode 100644 index 00000000..5ba3fc33 --- /dev/null +++ b/man/dttm_fmt_to_regex.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parse_dttm_fmt.R +\name{dttm_fmt_to_regex} +\alias{dttm_fmt_to_regex} +\title{Convert a parsed date/time format to regex} +\usage{ +dttm_fmt_to_regex(tbl_fmt_c, fmt_regex = fmt_rg(), anchored = TRUE) +} +\arguments{ +\item{tbl_fmt_c}{A \link[tibble:tibble-package]{tibble} of parsed date/time +format components as returned by \code{\link[=parse_dttm_fmt]{parse_dttm_fmt()}}.} + +\item{fmt_regex}{A named character vector of regexps, one for each date/time +component.} + +\item{anchored}{Whether the final regex should be anchored, i.e. bounded by +\code{"^"} and \code{"$"} for a whole match.} +} +\value{ +A string containing a regular expression for matching date/time +components according to a format. +} +\description{ +\code{\link[=dttm_fmt_to_regex]{dttm_fmt_to_regex()}} takes a \link[tibble:tibble-package]{tibble} of parsed +date/time format components (as returned by \code{\link[=parse_dttm_fmt]{parse_dttm_fmt()}}), and a +mapping of date/time component formats to regexps and generates a single +regular expression with groups for matching each of the date/time components. +} +\examples{ +tbl_fmt_c <- sdtm.oak:::parse_dttm_fmt("y") +sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c) +sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c, anchored = FALSE) + +tbl_fmt_c <- sdtm.oak:::parse_dttm_fmt("m") +sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c) + +tbl_fmt_c <- sdtm.oak:::parse_dttm_fmt("ymd") +sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c) + +tbl_fmt_c <- sdtm.oak:::parse_dttm_fmt("ymd HH:MM:SS") +sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c) + +} +\keyword{internal} diff --git a/man/parse_dttm_fmt.Rd b/man/parse_dttm_fmt.Rd index 55502699..9090f77a 100644 --- a/man/parse_dttm_fmt.Rd +++ b/man/parse_dttm_fmt.Rd @@ -56,7 +56,8 @@ sdtm.oak:::parse_dttm_fmt("yymmdd HHMMSS") sdtm.oak:::parse_dttm_fmt("year y") # Specify custom patterns -sdtm.oak:::parse_dttm_fmt("year month day", fmt_c(year = "year", mon = "month", mday = "day")) +sdtm.oak:::parse_dttm_fmt("year month day", + sdtm.oak:::fmt_c(year = "year", mon = "month", mday = "day")) } \keyword{internal} From 07cb48939e7369f3512f3104838d69058a2a8a98 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Thu, 16 Nov 2023 23:49:08 +0000 Subject: [PATCH 24/52] Bump development version to 0.0.0.9001 --- DESCRIPTION | 2 +- NEWS.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d30e4833..4509f099 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sdtm.oak Type: Package Title: SDTM Data Transformation Engine -Version: 0.0.0.9000 +Version: 0.0.0.9001 Authors@R: c( person("Omar", "Garcia", email = "ogcalderon@cdisc.org", role = c("aut", "cre")), person("Rammprasad", "Ganapathy", role = "aut"), diff --git a/NEWS.md b/NEWS.md index df003685..77776156 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# sdtm.oak (development version) +# sdtm.oak 0.0.0.9001 (development version) ## New Features From 57dd58401e82f5169ec0c0311e020e9b5eb8de2e Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Fri, 17 Nov 2023 01:09:00 +0000 Subject: [PATCH 25/52] Style updates Style updates on R/dtc_create_iso8601.R, R/dtc_parse_dttm.R, R/dtc_utils.R. Mostly indentation corrections, wrapping single line body if-conditions in braces, white space removal. --- R/dtc_create_iso8601.R | 44 +++++++++++++++++++++++------------------- R/dtc_parse_dttm.R | 3 --- R/dtc_utils.R | 26 ++++++++++++------------- 3 files changed, 37 insertions(+), 36 deletions(-) diff --git a/R/dtc_create_iso8601.R b/R/dtc_create_iso8601.R index 633923d4..2aa51b40 100644 --- a/R/dtc_create_iso8601.R +++ b/R/dtc_create_iso8601.R @@ -39,7 +39,6 @@ iso8601_na <- function(x) { #' #' @keywords internal zero_pad_whole_number <- function(x, n = 2L) { - # Check `x` if (!rlang::is_integerish(x)) rlang::abort("`x` must be integerish.") @@ -84,12 +83,12 @@ zero_pad_whole_number <- function(x, n = 2L) { #' #' @keywords internal yy_to_yyyy <- function(x, cutoff_2000 = 68L) { - # Check `x` if (!rlang::is_integerish(x)) rlang::abort("`x` must be integerish.") - if (any(x < 0L, na.rm = TRUE)) + if (any(x < 0L, na.rm = TRUE)) { rlang::abort("`x` cannot have negative years.") + } x <- dplyr::if_else(x <= cutoff_2000, x + 2000L, x) x <- dplyr::if_else(x <= 99L, x + 1900L, x) @@ -285,7 +284,6 @@ iso8601_truncate <- function(x, empty_as_na = TRUE) { #' #' @keywords internal format_iso8601 <- function(m, .cutoff_2000 = 68L) { - admiraldev::assert_integer_scalar(.cutoff_2000) m[, "year"] <- iso8601_year(m[, "year"], cutoff_2000 = .cutoff_2000) @@ -298,17 +296,19 @@ format_iso8601 <- function(m, .cutoff_2000 = 68L) { m <- iso8601_na(m) x <- - paste0(m[, "year"], - "-", - m[, "mon"], - "-", - m[, "mday"], - "T", - m[, "hour"], - ":", - m[, "min"], - ":", - m[, "sec"]) + paste0( + m[, "year"], + "-", + m[, "mon"], + "-", + m[, "mday"], + "T", + m[, "hour"], + ":", + m[, "min"], + ":", + m[, "sec"] + ) iso8601_truncate(x) } @@ -377,22 +377,26 @@ format_iso8601 <- function(m, .cutoff_2000 = 68L) { #' create_iso8601("2019-120602:20:13.1230001", .format = "y-mdH:M:S", .check_format = FALSE) #' @export create_iso8601 <- function(..., .format, .na = NULL, .cutoff_2000 = 68L, .check_format = TRUE) { - dots <- rlang::dots_list(...) - if (rlang::is_empty(dots)) return(character()) + if (rlang::is_empty(dots)) { + return(character()) + } # Check if all vectors in `dots` are of character type. - if (!identical(unique(sapply(dots, typeof)), "character")) + if (!identical(unique(sapply(dots, typeof)), "character")) { rlang::abort("All vectors in `...` must be of type character.") + } # Check if all vectors in `dots` are of the same length. n <- unique(lengths(dots)) - if (!identical(length(n), 1L)) + if (!identical(length(n), 1L)) { rlang::abort("All vectors in `...` must be of the same length.") + } - if (!identical(length(dots), length(.format))) + if (!identical(length(dots), length(.format))) { rlang::abort("Number of vectors in `...` should match length of `.format`.") + } # Check that the `.format` is either a character vector or a list of # character vectors, and that each string is one of the possible formats. diff --git a/R/dtc_parse_dttm.R b/R/dtc_parse_dttm.R index cbeb8eec..73f66c4d 100644 --- a/R/dtc_parse_dttm.R +++ b/R/dtc_parse_dttm.R @@ -9,8 +9,6 @@ parse_dttm_ <- function(dttm, mday_na = na, mon_na = na, year_na = na) { - - admiraldev::assert_character_scalar(fmt) tbl_fmt_c <- parse_dttm_fmt(fmt) @@ -115,5 +113,4 @@ parse_dttm <- function(dttm, ) coalesce_capture_matrices(!!!lst) - } diff --git a/R/dtc_utils.R b/R/dtc_utils.R index 2797d175..d97e4350 100644 --- a/R/dtc_utils.R +++ b/R/dtc_utils.R @@ -18,8 +18,9 @@ assert_dtc_fmt <- function(fmt) { admiraldev::assert_character_vector(fmt) rlang::arg_match(fmt, - values = sdtm.oak::dtc_formats$fmt, - multiple = TRUE) + values = sdtm.oak::dtc_formats$fmt, + multiple = TRUE + ) } #' Assert dtc format @@ -42,17 +43,15 @@ assert_dtc_fmt <- function(fmt) { #' #' # These commands should throw an error #' if (FALSE) { -#' # Note that `"year, month, day"` is not a supported format. +#' # Note that `"year, month, day"` is not a supported format. #' sdtm.oak:::assert_dtc_format("year, month, day") #' } #' #' @keywords internal assert_dtc_format <- function(.format) { - abort_msg <- "`.format` must be either a character vector of formats of a list thereof." - switch( - typeof(.format), + switch(typeof(.format), character = assert_dtc_fmt(.format), list = purrr::map(.format, assert_dtc_format), rlang::abort(abort_msg) @@ -95,17 +94,18 @@ assert_dtc_format <- function(.format) { #' #' @keywords internal assert_capture_matrix <- function(m) { - # `m` must be of character type. admiraldev::assert_character_vector(m) - if (!is.matrix(m)) + if (!is.matrix(m)) { rlang::abort("`m` must be a matrix.") + } col_names <- c("year", "mon", "mday", "hour", "min", "sec") m_col_names <- colnames(m) - if (is.null(m_col_names) || !all(m_col_names %in% col_names)) + if (is.null(m_col_names) || !all(m_col_names %in% col_names)) { rlang::abort("`m` must have the following colnames: `year`, `mon`, `mday`, `hour`, `min` and `sec`.") + } invisible(m) } @@ -140,8 +140,9 @@ complete_capture_matrix <- function(m) { col_names <- c("year", "mon", "mday", "hour", "min", "sec") - if (setequal(col_names, colnames(m))) + if (setequal(col_names, colnames(m))) { return(m) + } miss_cols <- setdiff(col_names, colnames(m)) miss_n_cols <- length(miss_cols) @@ -151,7 +152,6 @@ complete_capture_matrix <- m3 <- cbind(m, m2)[, col_names, drop = FALSE] assert_capture_matrix(m3) - } #' Coalesce capture matrices @@ -185,11 +185,11 @@ complete_capture_matrix <- #' #' @keywords internal coalesce_capture_matrices <- function(...) { - dots <- rlang::list2(...) - if (rlang::is_empty(dots)) + if (rlang::is_empty(dots)) { rlang::abort("At least one input must be passed.") + } # Assert that every argument in `...` is a capture matrix purrr::walk(dots, assert_capture_matrix) From 52c752caa003b640671d9dc4a6b05351942fe5df Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Fri, 17 Nov 2023 01:10:32 +0000 Subject: [PATCH 26/52] Style update to tests/testthat/test-yy_to_yyyy.R --- tests/testthat/test-yy_to_yyyy.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-yy_to_yyyy.R b/tests/testthat/test-yy_to_yyyy.R index 76f4002b..a909b984 100644 --- a/tests/testthat/test-yy_to_yyyy.R +++ b/tests/testthat/test-yy_to_yyyy.R @@ -1,5 +1,4 @@ test_that("`yy_to_yyyy()`: basic usage", { - # Default cutoff is at `68`. x1 <- c(0L, 1L, 50L, 68L, 69L, 70L) y1 <- c(2000L, 2001L, 2050L, 2068L, 1969L, 1970L) @@ -8,7 +7,8 @@ test_that("`yy_to_yyyy()`: basic usage", { # Different cutoff, e.g. `79`. x2 <- 75L:85L y2 <- - c(2075L, + c( + 2075L, 2076L, 2077L, 2078L, @@ -18,7 +18,8 @@ test_that("`yy_to_yyyy()`: basic usage", { 1982L, 1983L, 1984L, - 1985L) + 1985L + ) expect_identical(yy_to_yyyy(x2, cutoff_2000 = 79L), y2) # Four-digit years remain altered. From 55b0e5e307342fad0459343a9a2dcb514e47edba Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Fri, 17 Nov 2023 01:11:38 +0000 Subject: [PATCH 27/52] Style update Style updates on tests/testthat/test-iso8601.R and tests/testthat/test-reg_matches.R --- tests/testthat/test-iso8601.R | 12 ++++++++---- tests/testthat/test-reg_matches.R | 2 -- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-iso8601.R b/tests/testthat/test-iso8601.R index 881b514f..b21737ff 100644 --- a/tests/testthat/test-iso8601.R +++ b/tests/testthat/test-iso8601.R @@ -22,10 +22,14 @@ test_that("`zero_pad_whole_number()`: ensure `x` is integerish", { test_that("`zero_pad_whole_number()`: basic usage", { expect_identical(zero_pad_whole_number(c(-1L, 0L, 1L)), c(NA, "00", "01")) - expect_identical(zero_pad_whole_number(c(-1L, 0L, 1L, 10L, 99L, 100L), n = 2L), - c(NA, "00", "01", "10", "99", NA)) - expect_identical(zero_pad_whole_number(c(-1L, 0L, 1L, 10L, 99L, 100L), n = 3L), - c(NA, "000", "001", "010", "099", "100")) + expect_identical( + zero_pad_whole_number(c(-1L, 0L, 1L, 10L, 99L, 100L), n = 2L), + c(NA, "00", "01", "10", "99", NA) + ) + expect_identical( + zero_pad_whole_number(c(-1L, 0L, 1L, 10L, 99L, 100L), n = 3L), + c(NA, "000", "001", "010", "099", "100") + ) }) test_that("`zero_pad_whole_number()`: ensure `n` is scalar integer", { diff --git a/tests/testthat/test-reg_matches.R b/tests/testthat/test-reg_matches.R index a5ecb573..8f3d5cb1 100644 --- a/tests/testthat/test-reg_matches.R +++ b/tests/testthat/test-reg_matches.R @@ -1,10 +1,8 @@ test_that("`reg_matches()`: basic usage", { - x <- c("sdtm.oak", "sdtm.cdisc", "adam") m <- gregexpr("sdtm", x, fixed = TRUE) # `regmatches()` returns `character(0)` for `"adam"` # But `reg_matches()` returns `NA` for `"adam"` expect_identical(reg_matches(x, m), list("sdtm", "sdtm", NA_character_)) - }) From 20f40ac1de60bd2c55259251424b008af9015f2b Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Fri, 17 Nov 2023 01:12:19 +0000 Subject: [PATCH 28/52] Blank lines removal --- tests/testthat/test-find_int_gap.R | 12 ------------ tests/testthat/test-format_iso8601.R | 2 -- 2 files changed, 14 deletions(-) diff --git a/tests/testthat/test-find_int_gap.R b/tests/testthat/test-find_int_gap.R index 5fcfd453..27bb2a80 100644 --- a/tests/testthat/test-find_int_gap.R +++ b/tests/testthat/test-find_int_gap.R @@ -1,45 +1,34 @@ test_that("`find_int_gap()`: one interval", { - tbl <- find_int_gap(c(1L:3L, 7L:10L)) expect_identical(tbl$start, 4L) expect_identical(tbl$end, 6L) - }) test_that("`find_int_gap()`: two intervals", { - tbl <- find_int_gap(c(1L:3L, 7L:10L, 15L:20L)) expect_identical(tbl$start, c(4L, 11L)) expect_identical(tbl$end, c(6L, 14L)) - }) test_that("`find_int_gap()`: explicit endpoints", { - tbl <- find_int_gap(c(3L:5L, 8L), xmin = 0L, xmax = 10L) expect_identical(tbl$start, c(0L, 6L, 9L)) expect_identical(tbl$end, c(2L, 7L, 10L)) - }) test_that("`find_int_gap()`: no intervals", { - tbl <- find_int_gap(0L:5L) expect_identical(tbl, tibble::tibble(start = integer(), end = integer())) - }) test_that("`find_int_gap()`: ensure `x` is integerish", { - expect_error(find_int_gap(c(1.5, pi))) - }) test_that("`find_int_gap()`: ensure `xmin` and `xmax` are integer scalars", { - # Error because `xmin` and `xmax` are vectors expect_error(find_int_gap(c(1L:3L, 7L:10L), xmin = 1L:2L)) expect_error(find_int_gap(c(1L:3L, 7L:10L), xmax = 3L:4L)) @@ -51,5 +40,4 @@ test_that("`find_int_gap()`: ensure `xmin` and `xmax` are integer scalars", { # Error because `xmin` and `xmax` are character expect_error(find_int_gap(c(1L:3L, 7L:10L), xmin = "1")) expect_error(find_int_gap(c(1L:3L, 7L:10L), xmax = "2")) - }) diff --git a/tests/testthat/test-format_iso8601.R b/tests/testthat/test-format_iso8601.R index 460e9ecd..7753ea6f 100644 --- a/tests/testthat/test-format_iso8601.R +++ b/tests/testthat/test-format_iso8601.R @@ -1,5 +1,4 @@ test_that("`format_iso8601()`: basic usage", { - cols <- c("year", "mon", "mday", "hour", "min", "sec") m <- matrix( c( @@ -22,5 +21,4 @@ test_that("`format_iso8601()`: basic usage", { "2001-03-31T23:10" ) ) - }) From c43343eff41bea9bb515fc0233e43c9edfc59ece Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Fri, 17 Nov 2023 01:13:22 +0000 Subject: [PATCH 29/52] Style update --- R/parse_dttm_fmt.R | 24 +++++++++--------------- data-raw/dtc_formats.R | 7 +------ tests/testthat/test-create_iso8601.R | 16 ++++------------ 3 files changed, 14 insertions(+), 33 deletions(-) diff --git a/R/parse_dttm_fmt.R b/R/parse_dttm_fmt.R index 520187c5..ad6b1299 100644 --- a/R/parse_dttm_fmt.R +++ b/R/parse_dttm_fmt.R @@ -17,12 +17,13 @@ #' #' @keywords internal find_int_gap <- function(x, xmin = min(x), xmax = max(x)) { - - if (!rlang::is_integerish(x)) + if (!rlang::is_integerish(x)) { rlang::abort("`x` must be integer-ish") + } - if (rlang::is_empty(x)) + if (rlang::is_empty(x)) { return(tibble::tibble(start = integer(), end = integer())) + } admiraldev::assert_integer_scalar(xmin) admiraldev::assert_integer_scalar(xmax) @@ -82,7 +83,6 @@ pseq <- function(from, to) { #' #' @keywords internal str_to_anycase <- function(x) { - lst <- stringr::str_split(x, stringr::boundary("character")) purrr::map(lst, ~ stringr::str_c(stringr::str_to_upper(.x), stringr::str_to_lower(.x))) |> purrr::map(~ sprintf("[%s]", .x)) |> @@ -105,13 +105,12 @@ str_to_anycase <- function(x) { #' #' @keywords internal months_abb_regex <- function(x = month.abb, case = c("any", "upper", "lower", "title")) { - admiraldev::assert_character_vector(x) case <- match.arg(case) if (identical(case, "any")) x <- str_to_anycase(x) if (identical(case, "upper")) x <- stringr::str_to_upper(x) - if (identical(case, "lower")) x <- stringr::str_to_lower(x) + if (identical(case, "lower")) x <- stringr::str_to_lower(x) if (identical(case, "title")) x <- stringr::str_to_title(x) stringr::str_flatten(x, collapse = "|") @@ -149,7 +148,6 @@ fmt_c <- function(sec = "S+", mday = "d+", mon = "m+", year = "y+") { - c( sec = sec, min = min, @@ -158,7 +156,6 @@ fmt_c <- function(sec = "S+", mon = mon, year = year ) - } #' Utility function to assemble a regex of alternative patterns @@ -182,7 +179,6 @@ fmt_c <- function(sec = "S+", #' #' @keywords internal regex_or <- function(x, .open = FALSE, .close = FALSE) { - admiraldev::assert_character_vector(x) admiraldev::assert_logical_scalar(.open) admiraldev::assert_logical_scalar(.close) @@ -247,7 +243,6 @@ fmt_rg <- function( mday_na = na, mon_na = na, year_na = na) { - sec_na <- ifelse(!is.null(sec_na), regex_or(sec_na, .open = TRUE), "") min_na <- @@ -274,7 +269,6 @@ fmt_rg <- function( #' @rdname parse_dttm_fmt parse_dttm_fmt_ <- function(fmt, pattern) { - match_data <- regexpr(pattern, fmt) match <- reg_matches(fmt, match_data) @@ -329,12 +323,13 @@ parse_dttm_fmt_ <- function(fmt, pattern) { #' sdtm.oak:::parse_dttm_fmt("year y") #' #' # Specify custom patterns -#' sdtm.oak:::parse_dttm_fmt("year month day", -#' sdtm.oak:::fmt_c(year = "year", mon = "month", mday = "day")) +#' sdtm.oak:::parse_dttm_fmt( +#' "year month day", +#' sdtm.oak:::fmt_c(year = "year", mon = "month", mday = "day") +#' ) #' #' @keywords internal parse_dttm_fmt <- function(fmt, patterns = fmt_c()) { - admiraldev::assert_character_scalar(fmt) fmt_dttmc <- @@ -364,7 +359,6 @@ parse_dttm_fmt <- function(fmt, patterns = fmt_c()) { dplyr::bind_rows(fmt_dttmc, fmt_delim) |> dplyr::arrange(.data$start) - } #' Convert a parsed date/time format to regex diff --git a/data-raw/dtc_formats.R b/data-raw/dtc_formats.R index 8a8acdb6..365f928c 100644 --- a/data-raw/dtc_formats.R +++ b/data-raw/dtc_formats.R @@ -1,28 +1,23 @@ ## code to prepare `dtc_formats` dataset goes here dtc_formats <- tibble::tribble( - ~ fmt, ~type, ~ description, + ~fmt, ~type, ~description, "ymd", "date", "Parses a date: year, month, and month day.", "y m d", "date", "Parses a date: year, month, and month day.", "y-m-d", "date", "Parses a date: year, month, and month day.", - "dmy", "date", "Parses a date: month day, month and year.", "d m y", "date", "Parses a date: month day, month and year.", "d-m-y", "date", "Parses a date: month day, month and year.", - "ym", "date", "Parses a date: year and month.", "y m", "date", "Parses a date: year and month.", "y-m", "date", "Parses a date: year and month.", - "my", "date", "Parses a date: month and year.", "m y", "date", "Parses a date: month and year.", "m-y", "date", "Parses a date: month and year.", - "HM", "time", "Parses a time: hour and minutes.", "HMS", "time", "Parses a time: hour, minutes, and seconds.", "H:M", "time", "Parses a time: hour and minutes.", "H:M:S", "time", "Parses a time: hour, minutes and seconds.", - "ymdH:M:S", "datetime", "Parses a date-time: year, month, month day, hour, minutes, and seconds.", "ymd H:M:S", "datetime", "Parses a date-time: year, month, month day, hour, minutes, and seconds.", "y-m-d H:M:S", "datetime", "Parses a date-time: year, month, month day, hour, minutes, and seconds.", diff --git a/tests/testthat/test-create_iso8601.R b/tests/testthat/test-create_iso8601.R index 42b9d4b4..f1325cfc 100644 --- a/tests/testthat/test-create_iso8601.R +++ b/tests/testthat/test-create_iso8601.R @@ -1,5 +1,4 @@ test_that("`create_iso8601()`: individual date components", { - x <- c("0", "50", "1950", "80", "1980", "2000") y0 <- create_iso8601(x, .format = "y", .check_format = FALSE) y1 <- c(NA, "2050", "1950", "1980", "1980", "2000") @@ -14,11 +13,9 @@ test_that("`create_iso8601()`: individual date components", { y0 <- create_iso8601(x, .format = "d", .check_format = FALSE) y1 <- c("----00", "----00", "----01", "----01", "----10", "----31") expect_identical(y0, y1) - }) test_that("`create_iso8601()`: dates", { - y1 <- c("1999-01-01", "2000-01-01", "1999-01-01", "1999-12-31") x <- c("19990101", "20000101", "990101", "991231") @@ -32,11 +29,9 @@ test_that("`create_iso8601()`: dates", { x <- c("1999 01 01", "2000 01 01", "99 01 01", "99 12 31") y0 <- create_iso8601(x, .format = "y m d", .check_format = FALSE) expect_identical(y0, y1) - }) test_that("`create_iso8601()`: times: hours and minutes", { - y1 <- c("-----T15:20", "-----T00:10", "-----T23:01", "-----T00:00") x <- c("1520", "0010", "2301", "0000") @@ -50,11 +45,9 @@ test_that("`create_iso8601()`: times: hours and minutes", { x <- c("15h20", "00h10", "23h01", "00h00") y0 <- create_iso8601(x, .format = "HhM", .check_format = FALSE) expect_identical(y0, y1) - }) test_that("`create_iso8601()`: times: hours, minutes and seconds", { - x <- c("152000", "001059", "230112.123", "00002.") y0 <- create_iso8601(x, .format = "HMS", .check_format = FALSE) y1 <- c("-----T15:20:00", "-----T00:10:59", "-----T23:01:12.123", "-----T00:00:02") @@ -64,20 +57,19 @@ test_that("`create_iso8601()`: times: hours, minutes and seconds", { y0 <- create_iso8601(x, .format = "H:M:S", .check_format = FALSE) y1 <- c(y1, "-----T05:01:04") expect_identical(y0, y1) - }) test_that("`create_iso8601()`: dates and times", { - dates <- c("1999-01-01", "2000-01-01", "99-01-01", "99-12-31") times <- c("1520", "0010", "2301", "0000") iso8601_dttm <- create_iso8601(dates, times, .format = c("y-m-d", "HM"), .check_format = FALSE) expectation <- - c("1999-01-01T15:20", + c( + "1999-01-01T15:20", "2000-01-01T00:10", "1999-01-01T23:01", - "1999-12-31T00:00") + "1999-12-31T00:00" + ) expect_identical(iso8601_dttm, expectation) - }) From 8bac47b4f9ede702eea7b7ed2521001df2cecc58 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Fri, 17 Nov 2023 01:14:43 +0000 Subject: [PATCH 30/52] Update docs after style update --- man/assert_dtc_format.Rd | 2 +- man/parse_dttm_fmt.Rd | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/man/assert_dtc_format.Rd b/man/assert_dtc_format.Rd index 4ba28bdb..fe19d9e6 100644 --- a/man/assert_dtc_format.Rd +++ b/man/assert_dtc_format.Rd @@ -29,7 +29,7 @@ sdtm.oak:::assert_dtc_format(list(c("ymd", "y-m-d"), "H:M:S")) # These commands should throw an error if (FALSE) { -# Note that `"year, month, day"` is not a supported format. + # Note that `"year, month, day"` is not a supported format. sdtm.oak:::assert_dtc_format("year, month, day") } diff --git a/man/parse_dttm_fmt.Rd b/man/parse_dttm_fmt.Rd index 9090f77a..9a45f582 100644 --- a/man/parse_dttm_fmt.Rd +++ b/man/parse_dttm_fmt.Rd @@ -56,8 +56,10 @@ sdtm.oak:::parse_dttm_fmt("yymmdd HHMMSS") sdtm.oak:::parse_dttm_fmt("year y") # Specify custom patterns -sdtm.oak:::parse_dttm_fmt("year month day", - sdtm.oak:::fmt_c(year = "year", mon = "month", mday = "day")) +sdtm.oak:::parse_dttm_fmt( + "year month day", + sdtm.oak:::fmt_c(year = "year", mon = "month", mday = "day") +) } \keyword{internal} From 0bad5edc49ddc7ed2d11995a79133648acb59d36 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Sun, 19 Nov 2023 19:49:31 +0000 Subject: [PATCH 31/52] Refactor code about parsing dttm formats Made `dttm_fmt_to_regex()` interface more intitutive by accepting directly the argument `fmt` instead of `tbl_fmt_c` which was an intermediate R object returned by `parse_dttm_fmt()`. Also, introduced unit tests for `parse_dttm_fmt_()`. --- R/dtc_parse_dttm.R | 3 +- R/parse_dttm_fmt.R | 36 ++++---- man/dttm_fmt_to_regex.Rd | 20 ++--- man/parse_dttm_fmt.Rd | 4 +- tests/testthat/test-parse_dttm_fmt.R | 130 +++++++++++++++++++++++++++ 5 files changed, 160 insertions(+), 33 deletions(-) create mode 100644 tests/testthat/test-parse_dttm_fmt.R diff --git a/R/dtc_parse_dttm.R b/R/dtc_parse_dttm.R index 73f66c4d..8f976fa2 100644 --- a/R/dtc_parse_dttm.R +++ b/R/dtc_parse_dttm.R @@ -11,10 +11,9 @@ parse_dttm_ <- function(dttm, year_na = na) { admiraldev::assert_character_scalar(fmt) - tbl_fmt_c <- parse_dttm_fmt(fmt) regex <- dttm_fmt_to_regex( - tbl_fmt_c, + fmt, fmt_regex = fmt_rg( na = na, sec_na = sec_na, diff --git a/R/parse_dttm_fmt.R b/R/parse_dttm_fmt.R index ad6b1299..7ba9b6d4 100644 --- a/R/parse_dttm_fmt.R +++ b/R/parse_dttm_fmt.R @@ -269,10 +269,17 @@ fmt_rg <- function( #' @rdname parse_dttm_fmt parse_dttm_fmt_ <- function(fmt, pattern) { + + admiraldev::assert_character_scalar(fmt) + admiraldev::assert_character_scalar(pattern) + + if(identical(nchar(pattern), 0L)) + rlang::abort("`pattern` must be a literal string of at least one char.") + match_data <- regexpr(pattern, fmt) match <- reg_matches(fmt, match_data) - is_match <- !is.na(match) + is_match <- (!length(match)) || (!is.na(match)) start <- ifelse(is_match, match_data, NA_integer_) len <- ifelse(is_match, attr(match_data, "match.length"), NA_integer_) @@ -294,8 +301,8 @@ parse_dttm_fmt_ <- function(fmt, pattern) { #' if you plan on passing a different set of patterns. #' #' @returns A [tibble][tibble::tibble-package] of seven columns: -#' - `fmt_c`: date/time format component. Values are either `"year"`, `"mon"`, `"mday"`, -#' `"hour"`, `"min"`, `"sec"`, or `NA`. +#' - `fmt_c`: date/time format component. Values are either `"year"`, `"mon"`, +#' `"mday"`, `"hour"`, `"min"`, `"sec"`, or `NA`. #' - `pat`: Regexp used to parse the date/time component. #' - `cap`: The captured substring from the format. #' - `start`: Start position in the format string for this capture. @@ -368,8 +375,7 @@ parse_dttm_fmt <- function(fmt, patterns = fmt_c()) { #' mapping of date/time component formats to regexps and generates a single #' regular expression with groups for matching each of the date/time components. #' -#' @param tbl_fmt_c A [tibble][tibble::tibble-package] of parsed date/time -#' format components as returned by [parse_dttm_fmt()]. +#' @param fmt A format string (scalar) to be parsed by `patterns`. #' @param fmt_regex A named character vector of regexps, one for each date/time #' component. #' @param anchored Whether the final regex should be anchored, i.e. bounded by @@ -379,21 +385,19 @@ parse_dttm_fmt <- function(fmt, patterns = fmt_c()) { #' components according to a format. #' #' @examples -#' tbl_fmt_c <- sdtm.oak:::parse_dttm_fmt("y") -#' sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c) -#' sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c, anchored = FALSE) -#' -#' tbl_fmt_c <- sdtm.oak:::parse_dttm_fmt("m") -#' sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c) +#' sdtm.oak:::dttm_fmt_to_regex("y") +#' sdtm.oak:::dttm_fmt_to_regex("y", anchored = FALSE) #' -#' tbl_fmt_c <- sdtm.oak:::parse_dttm_fmt("ymd") -#' sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c) +#' sdtm.oak:::dttm_fmt_to_regex("m") +#' sdtm.oak:::dttm_fmt_to_regex("ymd") #' -#' tbl_fmt_c <- sdtm.oak:::parse_dttm_fmt("ymd HH:MM:SS") -#' sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c) +#' sdtm.oak:::dttm_fmt_to_regex("ymd HH:MM:SS") #' #' @keywords internal -dttm_fmt_to_regex <- function(tbl_fmt_c, fmt_regex = fmt_rg(), anchored = TRUE) { +dttm_fmt_to_regex <- function(fmt, fmt_regex = fmt_rg(), anchored = TRUE) { + + tbl_fmt_c <- parse_dttm_fmt(fmt) + fmt_regex <- tbl_fmt_c |> dplyr::mutate(regex = dplyr::if_else(is.na(.data$fmt_c), .data$cap, fmt_regex[.data$fmt_c])) |> diff --git a/man/dttm_fmt_to_regex.Rd b/man/dttm_fmt_to_regex.Rd index 5ba3fc33..4701c213 100644 --- a/man/dttm_fmt_to_regex.Rd +++ b/man/dttm_fmt_to_regex.Rd @@ -4,11 +4,10 @@ \alias{dttm_fmt_to_regex} \title{Convert a parsed date/time format to regex} \usage{ -dttm_fmt_to_regex(tbl_fmt_c, fmt_regex = fmt_rg(), anchored = TRUE) +dttm_fmt_to_regex(fmt, fmt_regex = fmt_rg(), anchored = TRUE) } \arguments{ -\item{tbl_fmt_c}{A \link[tibble:tibble-package]{tibble} of parsed date/time -format components as returned by \code{\link[=parse_dttm_fmt]{parse_dttm_fmt()}}.} +\item{fmt}{A format string (scalar) to be parsed by \code{patterns}.} \item{fmt_regex}{A named character vector of regexps, one for each date/time component.} @@ -27,18 +26,13 @@ mapping of date/time component formats to regexps and generates a single regular expression with groups for matching each of the date/time components. } \examples{ -tbl_fmt_c <- sdtm.oak:::parse_dttm_fmt("y") -sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c) -sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c, anchored = FALSE) +sdtm.oak:::dttm_fmt_to_regex("y") +sdtm.oak:::dttm_fmt_to_regex("y", anchored = FALSE) -tbl_fmt_c <- sdtm.oak:::parse_dttm_fmt("m") -sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c) +sdtm.oak:::dttm_fmt_to_regex("m") +sdtm.oak:::dttm_fmt_to_regex("ymd") -tbl_fmt_c <- sdtm.oak:::parse_dttm_fmt("ymd") -sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c) - -tbl_fmt_c <- sdtm.oak:::parse_dttm_fmt("ymd HH:MM:SS") -sdtm.oak:::dttm_fmt_to_regex(tbl_fmt_c) +sdtm.oak:::dttm_fmt_to_regex("ymd HH:MM:SS") } \keyword{internal} diff --git a/man/parse_dttm_fmt.Rd b/man/parse_dttm_fmt.Rd index 9a45f582..9b9ecaab 100644 --- a/man/parse_dttm_fmt.Rd +++ b/man/parse_dttm_fmt.Rd @@ -20,8 +20,8 @@ if you plan on passing a different set of patterns.} \value{ A \link[tibble:tibble-package]{tibble} of seven columns: \itemize{ -\item \code{fmt_c}: date/time format component. Values are either \code{"year"}, \code{"mon"}, \code{"mday"}, -\code{"hour"}, \code{"min"}, \code{"sec"}, or \code{NA}. +\item \code{fmt_c}: date/time format component. Values are either \code{"year"}, \code{"mon"}, +\code{"mday"}, \code{"hour"}, \code{"min"}, \code{"sec"}, or \code{NA}. \item \code{pat}: Regexp used to parse the date/time component. \item \code{cap}: The captured substring from the format. \item \code{start}: Start position in the format string for this capture. diff --git a/tests/testthat/test-parse_dttm_fmt.R b/tests/testthat/test-parse_dttm_fmt.R new file mode 100644 index 00000000..d618ceac --- /dev/null +++ b/tests/testthat/test-parse_dttm_fmt.R @@ -0,0 +1,130 @@ +test_that("`parse_dttm_fmt_`: empty fmt", { + + x <- + tibble::tibble( + pat = character(), + cap = character(), + start = integer(), + end = integer(), + len = integer() + ) + expect_identical(x, parse_dttm_fmt_("", pattern = "y")) + expect_error(parse_dttm_fmt_(character(), pattern = "y")) +}) + +test_that("`parse_dttm_fmt_`: empty pattern", { + expect_error(parse_dttm_fmt_("ymd", pattern = "")) + expect_error(parse_dttm_fmt_("ymd", pattern = character())) +}) + +test_that("`parse_dttm_fmt_`: basic usage", { + + fmt1 <- "y m d" + fmt2 <- "y-m-d" + + x1 <- + tibble::tibble( + pat = "y", + cap = "y", + start = 1L, + end = 1L, + len = 1L + ) + expect_identical(x1, parse_dttm_fmt_(fmt1, pattern = "y")) + expect_identical(x1, parse_dttm_fmt_(fmt2, pattern = "y")) + + x2 <- + tibble::tibble( + pat = "m", + cap = "m", + start = 3L, + end = 3L, + len = 1L + ) + expect_identical(x2, parse_dttm_fmt_(fmt1, pattern = "m")) + expect_identical(x2, parse_dttm_fmt_(fmt2, pattern = "m")) + + x3 <- + tibble::tibble( + pat = "d", + cap = "d", + start = 5L, + end = 5L, + len = 1L + ) + + expect_identical(x3, parse_dttm_fmt_(fmt1, pattern = "d")) + expect_identical(x3, parse_dttm_fmt_(fmt2, pattern = "d")) +}) + +test_that("`parse_dttm_fmt_`: patterns", { + + fmt <- "HH:MM:SS" + + x1 <- + tibble::tibble( + pat = "H", + cap = "H", + start = 1L, + end = 1L, + len = 1L + ) + + x2 <- + tibble::tibble( + pat = "HH", + cap = "HH", + start = 1L, + end = 2L, + len = 2L + ) + + x3 <- + tibble::tibble( + pat = "H+", + cap = "HH", + start = 1L, + end = 2L, + len = 2L + ) + + expect_identical(x1, parse_dttm_fmt_(fmt, pattern = "H")) + expect_identical(x2, parse_dttm_fmt_(fmt, pattern = "HH")) + expect_identical(x3, parse_dttm_fmt_(fmt, pattern = "H+")) +}) + +test_that("`parse_dttm_fmt_`: only the first match is returned", { + + fmt <- "H M S H" + + x1 <- + tibble::tibble( + pat = "H", + cap = "H", + start = 1L, + end = 1L, + len = 1L + ) + + x2 <- + tibble::tibble( + pat = character(), + cap = character(), + start = integer(), + end = integer(), + len = integer() + ) + + x3 <- + tibble::tibble( + pat = "H+", + cap = "H", + start = 1L, + end = 1L, + len = 1L + ) + + expect_identical(x1, parse_dttm_fmt_(fmt, pattern = "H")) + expect_identical(x2, parse_dttm_fmt_(fmt, pattern = "HH")) + expect_identical(x3, parse_dttm_fmt_(fmt, pattern = "H+")) +}) From dae8861ad7897eb963c13d8f994fa2d74a1b85d2 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Sun, 19 Nov 2023 20:29:27 +0000 Subject: [PATCH 32/52] Make `parse_dttm_fmt()` handle the case of no matching format components --- R/parse_dttm_fmt.R | 28 ++++++++++++++++++++++++++-- tests/testthat/test-parse_dttm_fmt.R | 16 ++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/R/parse_dttm_fmt.R b/R/parse_dttm_fmt.R index 7ba9b6d4..d6db5d41 100644 --- a/R/parse_dttm_fmt.R +++ b/R/parse_dttm_fmt.R @@ -267,6 +267,26 @@ fmt_rg <- function( ) } +fmt_dttmc <- + function(fmt_c = character(), + pat = character(), + cap = character(), + start = integer(), + end = integer(), + len = integer(), + ord = integer()) { + tibble::tibble( + fmt_c = fmt_c, + pat = pat, + cap = cap, + start = start, + end = end, + len = len, + ord = ord + ) + + } + #' @rdname parse_dttm_fmt parse_dttm_fmt_ <- function(fmt, pattern) { @@ -346,6 +366,9 @@ parse_dttm_fmt <- function(fmt, patterns = fmt_c()) { # Get captures' ranks while leaving NA as NA (`rank()` won't do this.) fmt_dttmc$ord <- dplyr::row_number(fmt_dttmc$start) + if(identical(nrow(fmt_dttmc), 0L)) + return(fmt_dttmc()) + fmt_len <- nchar(fmt) start <- end <- NULL # To avoid a "no visible binding for global variable" NOTE. @@ -355,13 +378,14 @@ parse_dttm_fmt <- function(fmt, patterns = fmt_c()) { delim <- with(delim_pos, stringr::str_sub(fmt, start = start, end = end)) fmt_delim <- - tibble::tibble( + fmt_dttmc( fmt_c = NA_character_, pat = NA_character_, cap = delim, start = delim_pos$start, end = delim_pos$end, - len = end - start + 1L + len = delim_pos$end - delim_pos$start + 1L, + ord = NA_integer_ ) dplyr::bind_rows(fmt_dttmc, fmt_delim) |> diff --git a/tests/testthat/test-parse_dttm_fmt.R b/tests/testthat/test-parse_dttm_fmt.R index d618ceac..8a9ea276 100644 --- a/tests/testthat/test-parse_dttm_fmt.R +++ b/tests/testthat/test-parse_dttm_fmt.R @@ -128,3 +128,19 @@ test_that("`parse_dttm_fmt_`: only the first match is returned", { expect_identical(x2, parse_dttm_fmt_(fmt, pattern = "HH")) expect_identical(x3, parse_dttm_fmt_(fmt, pattern = "H+")) }) + +test_that("`parse_dttm_fmt`: empty fmt", { + + x <- + tibble::tibble( + fmt_c = character(), + pat = character(), + cap = character(), + start = integer(), + end = integer(), + len = integer(), + ord = integer() + ) + expect_identical(x, parse_dttm_fmt("", pattern = "y")) + expect_error(parse_dttm_fmt_(character(), pattern = "y")) +}) From 80e727896e9443943991737022ca721bff6c0241 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Sun, 19 Nov 2023 20:31:22 +0000 Subject: [PATCH 33/52] Use `fmt_dttmc()` in unit tests --- tests/testthat/test-parse_dttm_fmt.R | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/tests/testthat/test-parse_dttm_fmt.R b/tests/testthat/test-parse_dttm_fmt.R index 8a9ea276..68a5e486 100644 --- a/tests/testthat/test-parse_dttm_fmt.R +++ b/tests/testthat/test-parse_dttm_fmt.R @@ -131,16 +131,6 @@ test_that("`parse_dttm_fmt_`: only the first match is returned", { test_that("`parse_dttm_fmt`: empty fmt", { - x <- - tibble::tibble( - fmt_c = character(), - pat = character(), - cap = character(), - start = integer(), - end = integer(), - len = integer(), - ord = integer() - ) - expect_identical(x, parse_dttm_fmt("", pattern = "y")) + expect_identical(fmt_dttmc(), parse_dttm_fmt("", pattern = "y")) expect_error(parse_dttm_fmt_(character(), pattern = "y")) }) From 507b911e465bc9359acc86c1ff53daadf89d22f8 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 20 Nov 2023 11:52:09 +0000 Subject: [PATCH 34/52] Small clarification on unit test description --- tests/testthat/test-parse_dttm_fmt.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-parse_dttm_fmt.R b/tests/testthat/test-parse_dttm_fmt.R index 68a5e486..ed79dae7 100644 --- a/tests/testthat/test-parse_dttm_fmt.R +++ b/tests/testthat/test-parse_dttm_fmt.R @@ -57,7 +57,7 @@ test_that("`parse_dttm_fmt_`: basic usage", { expect_identical(x3, parse_dttm_fmt_(fmt2, pattern = "d")) }) -test_that("`parse_dttm_fmt_`: patterns", { +test_that("`parse_dttm_fmt_`: pattern variations", { fmt <- "HH:MM:SS" From d6cd751dfd06d9747cfa528974c7ae449b381d01 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 20 Nov 2023 17:10:47 +0000 Subject: [PATCH 35/52] Remove futile assertion from `assert_dtc_fmt()` --- R/dtc_utils.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/dtc_utils.R b/R/dtc_utils.R index d97e4350..93361403 100644 --- a/R/dtc_utils.R +++ b/R/dtc_utils.R @@ -16,7 +16,6 @@ #' #' @keywords internal assert_dtc_fmt <- function(fmt) { - admiraldev::assert_character_vector(fmt) rlang::arg_match(fmt, values = sdtm.oak::dtc_formats$fmt, multiple = TRUE From 2079aa7d60077f503ab369dcc2627981525658b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Fory=C5=9B?= Date: Mon, 20 Nov 2023 18:48:07 +0100 Subject: [PATCH 36/52] Add staged_dependencies for admiraldev (#26) * Add staged_dependencies for admiraldev * Add new line * Fix admiraldev links. * Fix admiraldev articles links. * Remove R 4.1 a it causing dependencies issues. We want to use purrr >= 1.0.0 * Test latest lintr * Test lintr with install package locally * Add install pacakge variable for lintr * Skip multi version pkgdown workflow. * R build ignore staged_dependencies.yaml --- .Rbuildignore | 1 + .Rprofile | 2 +- .devcontainer/4.1/devcontainer.json | 95 -- .github/CONTRIBUTING.md | 4 +- .github/pull_request_template.md | 8 +- .github/workflows/common.yml | 10 +- .github/workflows/r-renv-lock.yml | 1 - DESCRIPTION | 6 +- renv/profiles/4.1/renv.lock | 1254 -------------------------- renv/profiles/4.1/renv/.gitignore | 7 - renv/profiles/4.1/renv/settings.json | 21 - staged_dependencies.yaml | 11 + 12 files changed, 26 insertions(+), 1394 deletions(-) delete mode 100644 .devcontainer/4.1/devcontainer.json delete mode 100644 renv/profiles/4.1/renv.lock delete mode 100644 renv/profiles/4.1/renv/.gitignore delete mode 100644 renv/profiles/4.1/renv/settings.json create mode 100644 staged_dependencies.yaml diff --git a/.Rbuildignore b/.Rbuildignore index 4bca79b1..41c60f38 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,4 @@ ^LICENSE\.md$ ^\.lintr$ ^data-raw$ +^staged_dependencies.yaml$ diff --git a/.Rprofile b/.Rprofile index a94c4d6f..062e3271 100644 --- a/.Rprofile +++ b/.Rprofile @@ -27,7 +27,7 @@ Sys.setenv("RENV_CONFIG_AUTO_SNAPSHOT" = FALSE) if (!Sys.getenv("RENV_AUTOLOADER_ENABLED") %in% c("false", "FALSE")) { .renv_profile <- paste(R.version$major, substr(R.version$minor, 1, 1), sep = ".") if (!file.exists("./renv/profile")) { - if (.renv_profile %in% c("4.1", "4.2", "4.3")) { + if (.renv_profile %in% c("4.2", "4.3")) { message("Set renv profile to `", .renv_profile, "`") Sys.setenv("RENV_PROFILE" = .renv_profile) } else { diff --git a/.devcontainer/4.1/devcontainer.json b/.devcontainer/4.1/devcontainer.json deleted file mode 100644 index e60b4fd9..00000000 --- a/.devcontainer/4.1/devcontainer.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - // https://containers.dev/implementors/json_reference/ - "name": "sdtm.oak (RStudio) container", - "image": "ghcr.io/pharmaverse/sdtm.oak-4.1:latest", - // Install Dev Container Features. More info: https://containers.dev/features - "containerEnv": { - "ROOT": "true", - "PASSWORD": "rstudio", - "DISABLE_AUTH": "true", - "RENV_AUTOLOADER_ENABLED": "false" - }, - "features": { - "./ca-cert": {}, - "ghcr.io/rocker-org/devcontainer-features/r-rig:1": { - "version": "none", - "vscodeRSupport": "full", - "installRadian": true, - "installVscDebugger": true - }, - "ghcr.io/rocker-org/devcontainer-features/renv-cache:latest": {}, - "ghcr.io/devcontainers/features/common-utils:2": { - "installZsh": true, - "configureZshAsDefaultShell": false, - "installOhMyZsh": true, - "username": "rstudio", - "upgradePackages": false - }, - "ghcr.io/mikaello/devcontainer-features/modern-shell-utils:1": {} - }, - "overrideFeatureInstallOrder": [ - "./ca-cert", - "./arm64-repos", - "ghcr.io/devcontainers/features/common-utils", - "ghcr.io/rocker-org/devcontainer-features/renv-cache", - "ghcr.io/rocker-org/devcontainer-features/r-rig", - "ghcr.io/mikaello/devcontainer-features/modern-shell-utils" - ], - "init": true, - "overrideCommand": false, - - "postCreateCommand": "bash ./.devcontainer/postCreateCommand.sh", - - "postAttachCommand": "rstudio || true", - - "customizations": { - "codespaces": { - "repositories": { - "pharmaverse/mint": { - "permissions": "write-all" - }, - "pharmaverse/raw.synthetic.data": { - "permissions": "write-all" - } - } - }, - "vscode": { - "settings": { - "r.rterm.linux": "/usr/local/bin/radian", - "r.bracketedPaste": true, - "editor.bracketPairColorization.enabled": true, - "editor.guides.bracketPairs": "active" - }, - "extensions": [ - "vsls-contrib.codetour", - "GitHub.copilot", - "GitHub.copilot-chat", - // R extensions - "ikuyadeu.r", - "REditorSupport.r-lsp", - // Extra extension - "streetsidesoftware.code-spell-checker", - "eamodio.gitlens", - "cweijan.vscode-office", - "donjayamanne.githistory", - "GitHub.vscode-github-actions", - "GitHub.vscode-pull-request-github", - "GitHub.remotehub", - "alefragnani.Bookmarks", - "vscode-icons-team.vscode-icons" - ] - } - }, - - // RStudio ports - "forwardPorts": [8787], - "portsAttributes": { - "8787": { - "label": "Rstudio", - "requireLocalPort": true, - "onAutoForward": "openBrowser" - } - }, - // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root - "remoteUser": "rstudio" -} diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index df42b284..8a1ba358 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -3,7 +3,7 @@ This outlines how to propose a change to the aok package. For more detailed info about contributing to {oak}, and other [pharmaverse packages](https://pharmaverse.org/), please see the [development process -guide](https://pharmaverse.github.io/admiraldev/main/articles/development_process.html) +guide](https://pharmaverse.github.io/admiraldev/articles/development_process.html) as well as other Developer Guides in the Articles section of the [{admiral} website](https://pharmaverse.github.io/admiral/cran-release/index.html) @@ -23,7 +23,7 @@ feedback. Since we are not a 100% fully resourced software development team it might be that some issues will take longer to respond to depending on the amount of overall issues. - * Familiarize yourself with our [programming strategy](https://pharmaverse.github.io/admiraldev/main/articles/programming_strategy.html), guidance for [GitHub usage](https://pharmaverse.github.io/admiraldev/main/articles/git_usage.html) and [unit testing](https://pharmaverse.github.io/admiraldev/main/articles/unit_test_guidance.html). + * Familiarize yourself with our [programming strategy](https://pharmaverse.github.io/admiraldev/articles/programming_strategy.html), guidance for [GitHub usage](https://pharmaverse.github.io/admiraldev/articles/git_usage.html) and [unit testing](https://pharmaverse.github.io/admiraldev/articles/unit_test_guidance.html). * All newly [created issues](https://github.com/pharmaverse/oak/issues) will be reviewed within the next backlog meeting and the creator will receive an diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index f49a5340..e32853fc 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,6 +1,6 @@ Thank you for your Pull Request! We have developed this task checklist from the [Development Process -Guide](https://pharmaverse.github.io/admiraldev/devel/articles/development_process.html) +Guide](https://pharmaverse.github.io/admiraldev/articles/development_process.html) to help with the final steps of the process. Completing the below tasks helps to ensure our reviewers can maximize their time on your code as well as making sure the oak codebase remains robust and consistent. @@ -18,13 +18,13 @@ Request Title (Use Edit button in top-right if you need to update) - [ ] Updated relevant unit tests or have written new unit tests, which should consider realistic data scenarios and edge cases, e.g. empty datasets, errors, boundary cases etc. - See -[Unit Test Guide](https://pharmaverse.github.io/admiraldev/devel/articles/unit_test_guidance.html#tests-should-be-robust-to-cover-realistic-data-scenarios) +[Unit Test Guide](https://pharmaverse.github.io/admiraldev/articles/unit_test_guidance.html#tests-should-be-robust-to-cover-realistic-data-scenarios) - [ ] If you removed/replaced any function and/or function parameters, did you fully follow the -[deprecation guidance](https://pharmaverse.github.io/admiraldev/devel/articles/programming_strategy.html#deprecation)? +[deprecation guidance](https://pharmaverse.github.io/admiraldev/articles/programming_strategy.html#deprecation)? - [ ] Update to all relevant roxygen headers and examples, including keywords and families. Refer to the -[categorization of functions](https://pharmaverse.github.io/admiraldev/devel/articles/programming_strategy.html#categorization-of-functions) to tag appropriate keyword/family. +[categorization of functions](https://pharmaverse.github.io/admiraldev/articles/programming_strategy.html#categorization-of-functions) to tag appropriate keyword/family. - [ ] Run `devtools::document()` so all `.Rd` files in the `man` folder and the `NAMESPACE` file in the project root are updated appropriately - [ ] Address any updates needed for vignettes and/or templates diff --git a/.github/workflows/common.yml b/.github/workflows/common.yml index a8c9a5d3..f4aacbb0 100644 --- a/.github/workflows/common.yml +++ b/.github/workflows/common.yml @@ -71,17 +71,15 @@ jobs: # Whether to skip multiversion docs # Note that if you have multiple versions of docs, # your URL links are likely to break due to path changes - skip-multiversion-docs: false - latest-tag-alt-name: cran-release - multiversion-docs-landing-page: cran-release - branches-or-tags-to-list: >- - ^cran-release$|^main$|^v([0-9]+\\.)?([0-9]+\\.)?([0-9]+)$ + skip-multiversion-docs: true linter: name: Lint - uses: pharmaverse/admiralci/.github/workflows/lintr.yml@main + uses: pharmaverse/admiralci/.github/workflows/lintr.yml@lintr-latest-cran if: github.event_name == 'pull_request' with: r-version: "4.3" + latest-lintr: "true" + install-package: "true" links: name: Links uses: pharmaverse/admiralci/.github/workflows/links.yml@main diff --git a/.github/workflows/r-renv-lock.yml b/.github/workflows/r-renv-lock.yml index fd5015d4..d07747e0 100644 --- a/.github/workflows/r-renv-lock.yml +++ b/.github/workflows/r-renv-lock.yml @@ -22,7 +22,6 @@ jobs: fail-fast: false matrix: config: - - {os: ubuntu-20.04, r: '4.1', repos: 'https://packagemanager.posit.co/cran/2022-03-10/'} - {os: ubuntu-20.04, r: '4.2', repos: 'https://packagemanager.posit.co/cran/2023-03-15/'} - {os: ubuntu-20.04, r: '4.3', repos: 'https://packagemanager.posit.co/cran/2023-04-20/'} diff --git a/DESCRIPTION b/DESCRIPTION index 4509f099..d115078b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,11 +19,11 @@ Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 -Depends: R (>= 4.1) +Depends: R (>= 4.2) Imports: admiraldev, - dplyr, - purrr, + dplyr (>= 1.0.0), + purrr (>= 1.0.0), rlang (>= 1.0.0), stringr, tibble diff --git a/renv/profiles/4.1/renv.lock b/renv/profiles/4.1/renv.lock deleted file mode 100644 index 5b858691..00000000 --- a/renv/profiles/4.1/renv.lock +++ /dev/null @@ -1,1254 +0,0 @@ -{ - "R": { - "Version": "4.1.3", - "Repositories": [ - { - "Name": "CRAN", - "URL": "https://packagemanager.posit.co/cran/latest" - }, - { - "Name": "RSPM", - "URL": "https://packagemanager.posit.co/cran/2022-03-10" - } - ] - }, - "Packages": { - "R.cache": { - "Package": "R.cache", - "Version": "0.16.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "R.methodsS3", - "R.oo", - "R.utils", - "digest", - "utils" - ], - "Hash": "fe539ca3f8efb7410c3ae2cf5fe6c0f8" - }, - "R.methodsS3": { - "Package": "R.methodsS3", - "Version": "1.8.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "utils" - ], - "Hash": "278c286fd6e9e75d0c2e8f731ea445c8" - }, - "R.oo": { - "Package": "R.oo", - "Version": "1.25.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "R.methodsS3", - "methods", - "utils" - ], - "Hash": "a0900a114f4f0194cf4aa8cd4a700681" - }, - "R.utils": { - "Package": "R.utils", - "Version": "2.12.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "R.methodsS3", - "R.oo", - "methods", - "tools", - "utils" - ], - "Hash": "325f01db13da12c04d8f6e7be36ff514" - }, - "R6": { - "Package": "R6", - "Version": "2.5.1", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R" - ], - "Hash": "470851b6d5d0ac559e9d01bb352b4021" - }, - "Rcpp": { - "Package": "Rcpp", - "Version": "1.0.8", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "methods", - "utils" - ], - "Hash": "22b546dd7e337f6c0c58a39983a496bc" - }, - "askpass": { - "Package": "askpass", - "Version": "1.1", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "sys" - ], - "Hash": "e8a22846fff485f0be3770c2da758713" - }, - "backports": { - "Package": "backports", - "Version": "1.4.1", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R" - ], - "Hash": "c39fbec8a30d23e721980b8afb31984c" - }, - "base64enc": { - "Package": "base64enc", - "Version": "0.1-3", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R" - ], - "Hash": "543776ae6848fde2f48ff3816d0628bc" - }, - "brew": { - "Package": "brew", - "Version": "1.0-7", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "38875ea52350ff4b4c03849fc69736c8" - }, - "brio": { - "Package": "brio", - "Version": "1.1.3", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "976cf154dfb043c012d87cddd8bca363" - }, - "bslib": { - "Package": "bslib", - "Version": "0.3.1", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "grDevices", - "htmltools", - "jquerylib", - "jsonlite", - "rlang", - "sass" - ], - "Hash": "56ae7e1987b340186a8a5a157c2ec358" - }, - "cachem": { - "Package": "cachem", - "Version": "1.0.6", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "fastmap", - "rlang" - ], - "Hash": "648c5b3d71e6a37e3043617489a0a0e9" - }, - "callr": { - "Package": "callr", - "Version": "3.7.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R6", - "processx", - "utils" - ], - "Hash": "461aa75a11ce2400245190ef5d3995df" - }, - "checkmate": { - "Package": "checkmate", - "Version": "2.0.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "backports", - "utils" - ], - "Hash": "a667800d5f0350371bedeb8b8b950289" - }, - "cli": { - "Package": "cli", - "Version": "3.2.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "glue", - "utils" - ], - "Hash": "1bdb126893e9ce6aae50ad1d6fc32faf" - }, - "clipr": { - "Package": "clipr", - "Version": "0.8.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "utils" - ], - "Hash": "3f038e5ac7f41d4ac41ce658c85e3042" - }, - "commonmark": { - "Package": "commonmark", - "Version": "1.8.0", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "2ba81b120c1655ab696c935ef33ea716" - }, - "cpp11": { - "Package": "cpp11", - "Version": "0.4.2", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "fa53ce256cd280f468c080a58ea5ba8c" - }, - "crayon": { - "Package": "crayon", - "Version": "1.5.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "grDevices", - "methods", - "utils" - ], - "Hash": "741c2e098e98afe3dc26a7b0e5489f4e" - }, - "credentials": { - "Package": "credentials", - "Version": "1.3.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "askpass", - "curl", - "jsonlite", - "openssl", - "sys" - ], - "Hash": "93762d0a34d78e6a025efdbfb5c6bb41" - }, - "curl": { - "Package": "curl", - "Version": "4.3.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R" - ], - "Hash": "022c42d49c28e95d69ca60446dbabf88" - }, - "desc": { - "Package": "desc", - "Version": "1.4.1", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "R6", - "cli", - "rprojroot", - "utils" - ], - "Hash": "eebd27ee58fcc58714eedb7aa07d8ad1" - }, - "devtools": { - "Package": "devtools", - "Version": "2.4.3", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "callr", - "cli", - "desc", - "ellipsis", - "fs", - "httr", - "lifecycle", - "memoise", - "pkgbuild", - "pkgload", - "rcmdcheck", - "remotes", - "rlang", - "roxygen2", - "rstudioapi", - "rversions", - "sessioninfo", - "stats", - "testthat", - "tools", - "usethis", - "utils", - "withr" - ], - "Hash": "fc35e13bb582e5fe6f63f3d647a4cbe5" - }, - "diffobj": { - "Package": "diffobj", - "Version": "0.3.5", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "crayon", - "methods", - "stats", - "tools", - "utils" - ], - "Hash": "bcaa8b95f8d7d01a5dedfd959ce88ab8" - }, - "digest": { - "Package": "digest", - "Version": "0.6.29", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "utils" - ], - "Hash": "cf6b206a045a684728c3267ef7596190" - }, - "dplyr": { - "Package": "dplyr", - "Version": "1.0.8", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "R6", - "generics", - "glue", - "lifecycle", - "magrittr", - "methods", - "pillar", - "rlang", - "tibble", - "tidyselect", - "utils", - "vctrs" - ], - "Hash": "ef47665e64228a17609d6df877bf86f2" - }, - "ellipsis": { - "Package": "ellipsis", - "Version": "0.3.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "rlang" - ], - "Hash": "bb0eec2fe32e88d9e2836c2f73ea2077" - }, - "evaluate": { - "Package": "evaluate", - "Version": "0.15", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "methods" - ], - "Hash": "699a7a93d08c962d9f8950b2d7a227f1" - }, - "fansi": { - "Package": "fansi", - "Version": "1.0.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "grDevices", - "utils" - ], - "Hash": "f28149c2d7a1342a834b314e95e67260" - }, - "fastmap": { - "Package": "fastmap", - "Version": "1.1.0", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "77bd60a6157420d4ffa93b27cf6a58b8" - }, - "fs": { - "Package": "fs", - "Version": "1.5.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "methods" - ], - "Hash": "7c89603d81793f0d5486d91ab1fc6f1d" - }, - "generics": { - "Package": "generics", - "Version": "0.1.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "methods" - ], - "Hash": "177475892cf4a55865868527654a7741" - }, - "gert": { - "Package": "gert", - "Version": "1.5.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "askpass", - "credentials", - "openssl", - "rstudioapi", - "sys", - "zip" - ], - "Hash": "8fddce7cbd59467106266a6e93e253b4" - }, - "gh": { - "Package": "gh", - "Version": "1.3.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "cli", - "gitcreds", - "httr", - "ini", - "jsonlite" - ], - "Hash": "38c2580abbda249bd6afeec00d14f531" - }, - "git2r": { - "Package": "git2r", - "Version": "0.29.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "graphics", - "utils" - ], - "Hash": "b114135c4749076bd5ef74a5827b6f62" - }, - "gitcreds": { - "Package": "gitcreds", - "Version": "0.1.1", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "f3aefccc1cc50de6338146b62f115de8" - }, - "glue": { - "Package": "glue", - "Version": "1.6.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "methods" - ], - "Hash": "4f2596dfb05dac67b9dc558e5c6fba2e" - }, - "highr": { - "Package": "highr", - "Version": "0.9", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "xfun" - ], - "Hash": "8eb36c8125038e648e5d111c0d7b2ed4" - }, - "htmltools": { - "Package": "htmltools", - "Version": "0.5.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "base64enc", - "digest", - "fastmap", - "grDevices", - "rlang", - "utils" - ], - "Hash": "526c484233f42522278ab06fb185cb26" - }, - "httr": { - "Package": "httr", - "Version": "1.4.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "R6", - "curl", - "jsonlite", - "mime", - "openssl" - ], - "Hash": "a525aba14184fec243f9eaec62fbed43" - }, - "hunspell": { - "Package": "hunspell", - "Version": "3.0.1", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "Rcpp", - "digest" - ], - "Hash": "3987784c19192ad0f2261c456d936df1" - }, - "ini": { - "Package": "ini", - "Version": "0.3.1", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "6154ec2223172bce8162d4153cda21f7" - }, - "jquerylib": { - "Package": "jquerylib", - "Version": "0.1.4", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "htmltools" - ], - "Hash": "5aab57a3bd297eee1c1d862735972182" - }, - "jsonlite": { - "Package": "jsonlite", - "Version": "1.8.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "methods" - ], - "Hash": "d07e729b27b372429d42d24d503613a0" - }, - "knitr": { - "Package": "knitr", - "Version": "1.37", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "evaluate", - "highr", - "methods", - "stringr", - "tools", - "xfun", - "yaml" - ], - "Hash": "a4ec675eb332a33fe7b7fe26f70e1f98" - }, - "lifecycle": { - "Package": "lifecycle", - "Version": "1.0.1", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "glue", - "rlang" - ], - "Hash": "a6b6d352e3ed897373ab19d8395c98d0" - }, - "magrittr": { - "Package": "magrittr", - "Version": "2.0.2", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "cdc87ecd81934679d1557633d8e1fe51" - }, - "memoise": { - "Package": "memoise", - "Version": "2.0.1", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "cachem", - "rlang" - ], - "Hash": "e2817ccf4a065c5d9d7f2cfbe7c1d78c" - }, - "mime": { - "Package": "mime", - "Version": "0.12", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "tools" - ], - "Hash": "18e9c28c1d3ca1560ce30658b22ce104" - }, - "openssl": { - "Package": "openssl", - "Version": "2.0.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "askpass" - ], - "Hash": "cf4329aac12c2c44089974559c18e446" - }, - "pillar": { - "Package": "pillar", - "Version": "1.7.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "cli", - "crayon", - "ellipsis", - "fansi", - "glue", - "lifecycle", - "rlang", - "utf8", - "utils", - "vctrs" - ], - "Hash": "51dfc97e1b7069e9f7e6f83f3589c22e" - }, - "pkgbuild": { - "Package": "pkgbuild", - "Version": "1.3.1", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "R6", - "callr", - "cli", - "crayon", - "desc", - "prettyunits", - "rprojroot", - "withr" - ], - "Hash": "66d2adfed274daf81ccfe77d974c3b9b" - }, - "pkgconfig": { - "Package": "pkgconfig", - "Version": "2.0.3", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "utils" - ], - "Hash": "01f28d4278f15c76cddbea05899c5d6f" - }, - "pkgload": { - "Package": "pkgload", - "Version": "1.2.4", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "cli", - "crayon", - "desc", - "methods", - "rlang", - "rprojroot", - "rstudioapi", - "utils", - "withr" - ], - "Hash": "7533cd805940821bf23eaf3c8d4c1735" - }, - "praise": { - "Package": "praise", - "Version": "1.0.0", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "a555924add98c99d2f411e37e7d25e9f" - }, - "prettyunits": { - "Package": "prettyunits", - "Version": "1.1.1", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "95ef9167b75dde9d2ccc3c7528393e7e" - }, - "processx": { - "Package": "processx", - "Version": "3.5.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R6", - "ps", - "utils" - ], - "Hash": "0cbca2bc4d16525d009c4dbba156b37c" - }, - "ps": { - "Package": "ps", - "Version": "1.6.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "utils" - ], - "Hash": "32620e2001c1dce1af49c49dccbb9420" - }, - "purrr": { - "Package": "purrr", - "Version": "0.3.4", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "magrittr", - "rlang" - ], - "Hash": "97def703420c8ab10d8f0e6c72101e02" - }, - "rappdirs": { - "Package": "rappdirs", - "Version": "0.3.3", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R" - ], - "Hash": "5e3c5dc0b071b21fa128676560dbe94d" - }, - "rcmdcheck": { - "Package": "rcmdcheck", - "Version": "1.4.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R6", - "callr", - "cli", - "curl", - "desc", - "digest", - "pkgbuild", - "prettyunits", - "rprojroot", - "sessioninfo", - "utils", - "withr", - "xopen" - ], - "Hash": "8f25ebe2ec38b1f2aef3b0d2ef76f6c4" - }, - "rematch2": { - "Package": "rematch2", - "Version": "2.1.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "tibble" - ], - "Hash": "76c9e04c712a05848ae7a23d2f170a40" - }, - "remotes": { - "Package": "remotes", - "Version": "2.4.2.1", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "methods", - "stats", - "tools", - "utils" - ], - "Hash": "63d15047eb239f95160112bcadc4fcb9" - }, - "renv": { - "Package": "renv", - "Version": "1.0.3", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "utils" - ], - "Hash": "41b847654f567341725473431dd0d5ab" - }, - "rlang": { - "Package": "rlang", - "Version": "1.0.6", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "utils" - ], - "Hash": "4ed1f8336c8d52c3e750adcdc57228a7" - }, - "rmarkdown": { - "Package": "rmarkdown", - "Version": "2.12", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "bslib", - "evaluate", - "htmltools", - "jquerylib", - "jsonlite", - "knitr", - "methods", - "stringr", - "tinytex", - "tools", - "utils", - "xfun", - "yaml" - ], - "Hash": "354da5088ddfdffb73c11cc952885d88" - }, - "roxygen2": { - "Package": "roxygen2", - "Version": "7.2.3", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "R6", - "brew", - "cli", - "commonmark", - "cpp11", - "desc", - "knitr", - "methods", - "pkgload", - "purrr", - "rlang", - "stringi", - "stringr", - "utils", - "withr", - "xml2" - ], - "Hash": "7b153c746193b143c14baa072bae4e27" - }, - "rprojroot": { - "Package": "rprojroot", - "Version": "2.0.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R" - ], - "Hash": "249d8cd1e74a8f6a26194a91b47f21d1" - }, - "rstudioapi": { - "Package": "rstudioapi", - "Version": "0.13", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "06c85365a03fdaf699966cc1d3cf53ea" - }, - "rversions": { - "Package": "rversions", - "Version": "2.1.1", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "curl", - "utils", - "xml2" - ], - "Hash": "f88fab00907b312f8b23ec13e2d437cb" - }, - "sass": { - "Package": "sass", - "Version": "0.4.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R6", - "fs", - "htmltools", - "rappdirs", - "rlang" - ], - "Hash": "50cf822feb64bb3977bda0b7091be623" - }, - "sessioninfo": { - "Package": "sessioninfo", - "Version": "1.2.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "cli", - "tools", - "utils" - ], - "Hash": "3f9796a8d0a0e8c6eb49a4b029359d1f" - }, - "spelling": { - "Package": "spelling", - "Version": "2.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "commonmark", - "hunspell", - "knitr", - "xml2" - ], - "Hash": "b8c899a5c83f0d897286550481c91798" - }, - "staged.dependencies": { - "Package": "staged.dependencies", - "Version": "0.3.1.9001", - "Source": "GitHub", - "RemoteType": "github", - "RemoteHost": "api.github.com", - "RemoteUsername": "openpharma", - "RemoteRepo": "staged.dependencies", - "RemoteRef": "main", - "RemoteSha": "fb124997306b35d44a0225bb4b400bf7258c4c75", - "Requirements": [ - "checkmate", - "desc", - "devtools", - "digest", - "dplyr", - "fs", - "git2r", - "glue", - "httr", - "jsonlite", - "methods", - "rcmdcheck", - "remotes", - "rlang", - "stats", - "tidyr", - "utils", - "withr", - "yaml" - ], - "Hash": "145e45afff215d85f808dda07557fcad" - }, - "stringi": { - "Package": "stringi", - "Version": "1.7.6", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "stats", - "tools", - "utils" - ], - "Hash": "bba431031d30789535745a9627ac9271" - }, - "stringr": { - "Package": "stringr", - "Version": "1.4.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "glue", - "magrittr", - "stringi" - ], - "Hash": "0759e6b6c0957edb1311028a49a35e76" - }, - "styler": { - "Package": "styler", - "Version": "1.10.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "R.cache", - "cli", - "magrittr", - "purrr", - "rlang", - "rprojroot", - "tools", - "vctrs", - "withr" - ], - "Hash": "d61238fd44fc63c8adf4565efe8eb682" - }, - "sys": { - "Package": "sys", - "Version": "3.4", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "b227d13e29222b4574486cfcbde077fa" - }, - "testthat": { - "Package": "testthat", - "Version": "3.1.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "R6", - "brio", - "callr", - "cli", - "crayon", - "desc", - "digest", - "ellipsis", - "evaluate", - "jsonlite", - "lifecycle", - "magrittr", - "methods", - "pkgload", - "praise", - "processx", - "ps", - "rlang", - "utils", - "waldo", - "withr" - ], - "Hash": "32454e5780e8dbe31e4b61b13d8918fe" - }, - "tibble": { - "Package": "tibble", - "Version": "3.1.6", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "ellipsis", - "fansi", - "lifecycle", - "magrittr", - "methods", - "pillar", - "pkgconfig", - "rlang", - "utils", - "vctrs" - ], - "Hash": "8a8f02d1934dfd6431c671361510dd0b" - }, - "tidyr": { - "Package": "tidyr", - "Version": "1.2.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "cpp11", - "dplyr", - "ellipsis", - "glue", - "lifecycle", - "magrittr", - "purrr", - "rlang", - "tibble", - "tidyselect", - "utils", - "vctrs" - ], - "Hash": "d8b95b7fee945d7da6888cf7eb71a49c" - }, - "tidyselect": { - "Package": "tidyselect", - "Version": "1.1.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "ellipsis", - "glue", - "purrr", - "rlang", - "vctrs" - ], - "Hash": "17f6da8cfd7002760a859915ce7eef8f" - }, - "tinytex": { - "Package": "tinytex", - "Version": "0.37", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "xfun" - ], - "Hash": "a80abeb527a977e4bef21873d29222dd" - }, - "usethis": { - "Package": "usethis", - "Version": "2.1.5", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "cli", - "clipr", - "crayon", - "curl", - "desc", - "fs", - "gert", - "gh", - "glue", - "jsonlite", - "lifecycle", - "purrr", - "rappdirs", - "rlang", - "rprojroot", - "rstudioapi", - "stats", - "utils", - "whisker", - "withr", - "yaml" - ], - "Hash": "c499f488e6dd7718accffaee5bc5a79b" - }, - "utf8": { - "Package": "utf8", - "Version": "1.2.2", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R" - ], - "Hash": "c9c462b759a5cc844ae25b5942654d13" - }, - "vctrs": { - "Package": "vctrs", - "Version": "0.4.1", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "cli", - "glue", - "rlang" - ], - "Hash": "8b54f22e2a58c4f275479c92ce041a57" - }, - "waldo": { - "Package": "waldo", - "Version": "0.3.1", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "cli", - "diffobj", - "fansi", - "glue", - "methods", - "rematch2", - "rlang", - "tibble" - ], - "Hash": "ad8cfff5694ac5b3c354f8f2044bd976" - }, - "whisker": { - "Package": "whisker", - "Version": "0.4", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "ca970b96d894e90397ed20637a0c1bbe" - }, - "withr": { - "Package": "withr", - "Version": "2.5.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "grDevices", - "graphics", - "stats" - ], - "Hash": "c0e49a9760983e81e55cdd9be92e7182" - }, - "xfun": { - "Package": "xfun", - "Version": "0.30", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "stats", - "tools" - ], - "Hash": "e83f48136b041845e50a6658feffb197" - }, - "xml2": { - "Package": "xml2", - "Version": "1.3.3", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "methods" - ], - "Hash": "40682ed6a969ea5abfd351eb67833adc" - }, - "xopen": { - "Package": "xopen", - "Version": "1.0.0", - "Source": "Repository", - "Repository": "RSPM", - "Requirements": [ - "R", - "processx" - ], - "Hash": "6c85f015dee9cc7710ddd20f86881f58" - }, - "yaml": { - "Package": "yaml", - "Version": "2.3.5", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "458bb38374d73bf83b1bb85e353da200" - }, - "zip": { - "Package": "zip", - "Version": "2.2.0", - "Source": "Repository", - "Repository": "RSPM", - "Hash": "c7eef2996ac270a18c2715c997a727c5" - } - } -} diff --git a/renv/profiles/4.1/renv/.gitignore b/renv/profiles/4.1/renv/.gitignore deleted file mode 100644 index 0ec0cbba..00000000 --- a/renv/profiles/4.1/renv/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -library/ -local/ -cellar/ -lock/ -python/ -sandbox/ -staging/ diff --git a/renv/profiles/4.1/renv/settings.json b/renv/profiles/4.1/renv/settings.json deleted file mode 100644 index 3830d97c..00000000 --- a/renv/profiles/4.1/renv/settings.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "bioconductor.version": null, - "external.libraries": [], - "ignored.packages": [ - "admiraldev" - ], - "package.dependency.fields": [ - "Imports", - "Depends", - "LinkingTo" - ], - "ppm.enabled": null, - "ppm.ignored.urls": [], - "r.version": null, - "snapshot.type": "custom", - "use.cache": true, - "vcs.ignore.cellar": true, - "vcs.ignore.library": true, - "vcs.ignore.local": true, - "vcs.manage.ignores": true -} diff --git a/staged_dependencies.yaml b/staged_dependencies.yaml new file mode 100644 index 00000000..72a49b08 --- /dev/null +++ b/staged_dependencies.yaml @@ -0,0 +1,11 @@ +--- +upstream_repos: +- repo: pharmaverse/admiraldev + host: https://github.com + +downstream_repos: + +current_repo: + repo: pharmaverse/sdtm.oak + host: https://github.com + \ No newline at end of file From 3becca5fb9d4f6f53ab2de79694500e169d19a99 Mon Sep 17 00:00:00 2001 From: ramiromagno Date: Mon, 20 Nov 2023 17:54:04 +0000 Subject: [PATCH 37/52] Automatic renv profile update. --- renv/profiles/4.2/renv.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/renv/profiles/4.2/renv.lock b/renv/profiles/4.2/renv.lock index 14ac16c0..4120eeca 100644 --- a/renv/profiles/4.2/renv.lock +++ b/renv/profiles/4.2/renv.lock @@ -54,7 +54,7 @@ }, "R.utils": { "Package": "R.utils", - "Version": "2.12.2", + "Version": "2.12.3", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -65,7 +65,7 @@ "tools", "utils" ], - "Hash": "325f01db13da12c04d8f6e7be36ff514" + "Hash": "3dc2829b790254bfba21e60965787651" }, "R6": { "Package": "R6", From 926026bc3696d6223c23eae366f704c4bff7c919 Mon Sep 17 00:00:00 2001 From: ramiromagno Date: Mon, 20 Nov 2023 17:58:25 +0000 Subject: [PATCH 38/52] Automatic renv profile update. --- renv.lock | 4 ++-- renv/profiles/4.3/renv.lock | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/renv.lock b/renv.lock index 181fd2eb..5141d10f 100644 --- a/renv.lock +++ b/renv.lock @@ -54,7 +54,7 @@ }, "R.utils": { "Package": "R.utils", - "Version": "2.12.2", + "Version": "2.12.3", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -65,7 +65,7 @@ "tools", "utils" ], - "Hash": "325f01db13da12c04d8f6e7be36ff514" + "Hash": "3dc2829b790254bfba21e60965787651" }, "R6": { "Package": "R6", diff --git a/renv/profiles/4.3/renv.lock b/renv/profiles/4.3/renv.lock index 181fd2eb..5141d10f 100644 --- a/renv/profiles/4.3/renv.lock +++ b/renv/profiles/4.3/renv.lock @@ -54,7 +54,7 @@ }, "R.utils": { "Package": "R.utils", - "Version": "2.12.2", + "Version": "2.12.3", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -65,7 +65,7 @@ "tools", "utils" ], - "Hash": "325f01db13da12c04d8f6e7be36ff514" + "Hash": "3dc2829b790254bfba21e60965787651" }, "R6": { "Package": "R6", From c06971325fe532c6a8ca33a1b9b4402b0d398658 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Mon, 20 Nov 2023 18:01:46 +0000 Subject: [PATCH 39/52] Cleaned up lintr issues --- R/parse_dttm_fmt.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/parse_dttm_fmt.R b/R/parse_dttm_fmt.R index d6db5d41..366cbdf0 100644 --- a/R/parse_dttm_fmt.R +++ b/R/parse_dttm_fmt.R @@ -293,7 +293,7 @@ parse_dttm_fmt_ <- function(fmt, pattern) { admiraldev::assert_character_scalar(fmt) admiraldev::assert_character_scalar(pattern) - if(identical(nchar(pattern), 0L)) + if (identical(nchar(pattern), 0L)) rlang::abort("`pattern` must be a literal string of at least one char.") match_data <- regexpr(pattern, fmt) @@ -366,13 +366,13 @@ parse_dttm_fmt <- function(fmt, patterns = fmt_c()) { # Get captures' ranks while leaving NA as NA (`rank()` won't do this.) fmt_dttmc$ord <- dplyr::row_number(fmt_dttmc$start) - if(identical(nrow(fmt_dttmc), 0L)) + if (identical(nrow(fmt_dttmc), 0L)) return(fmt_dttmc()) fmt_len <- nchar(fmt) - start <- end <- NULL # To avoid a "no visible binding for global variable" NOTE. - dttmc_pos <- with(fmt_dttmc, pseq(from = start[!is.na(start)], to = end[!is.na(end)])) + dttmc_pos <- + pseq(from = fmt_dttmc$start[!is.na(fmt_dttmc$start)], to = fmt_dttmc$end[!is.na(fmt_dttmc$end)]) # `delim_pos`: delimiter positions, i.e. positions in `fmt` in-between dttm components. delim_pos <- find_int_gap(dttmc_pos, xmin = 1L, xmax = fmt_len) From 160b7f09713a5fc6cef534026b184d6829dfff7d Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Tue, 21 Nov 2023 02:37:13 +0000 Subject: [PATCH 40/52] Export `fmt_cmp()` and add early draft of `create_iso8601()` article --- .Rbuildignore | 1 + NAMESPACE | 1 + R/dtc_create_iso8601.R | 9 +- R/dtc_parse_dttm.R | 6 +- R/parse_dttm_fmt.R | 57 +++++++---- man/create_iso8601.Rd | 6 +- man/dttm_fmt_to_regex.Rd | 7 +- man/{fmt_c.Rd => fmt_cmp.Rd} | 15 +-- man/parse_dttm.Rd | 2 + man/parse_dttm_fmt.Rd | 8 +- vignettes/.gitignore | 2 + vignettes/articles/iso_8601.Rmd | 162 ++++++++++++++++++++++++++++++++ 12 files changed, 242 insertions(+), 34 deletions(-) rename man/{fmt_c.Rd => fmt_cmp.Rd} (81%) create mode 100644 vignettes/.gitignore create mode 100644 vignettes/articles/iso_8601.Rmd diff --git a/.Rbuildignore b/.Rbuildignore index 41c60f38..c8038ef3 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -13,3 +13,4 @@ ^\.lintr$ ^data-raw$ ^staged_dependencies.yaml$ +^vignettes/articles$ diff --git a/NAMESPACE b/NAMESPACE index bbac999a..455b5386 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand export(create_iso8601) +export(fmt_cmp) importFrom(rlang,.data) importFrom(tibble,tibble) diff --git a/R/dtc_create_iso8601.R b/R/dtc_create_iso8601.R index 2aa51b40..e44b6378 100644 --- a/R/dtc_create_iso8601.R +++ b/R/dtc_create_iso8601.R @@ -325,6 +325,8 @@ format_iso8601 <- function(m, .cutoff_2000 = 68L) { #' provided, then each element must be a character vector of formats. The #' first vector of formats is used for parsing the first vector passed in #' `...`, and so on. +#' @param .fmt_c A list of regexps to use when parsing `.format`. Use [fmt_cmp()] +#' to create such an object to pass as argument to this parameter. #' @param .na A character vector of string literals to be regarded as missing #' values during parsing. #' @param .cutoff_2000 An integer value. Two-digit years smaller or equal to @@ -376,7 +378,10 @@ format_iso8601 <- function(m, .cutoff_2000 = 68L) { #' # Fractional seconds #' create_iso8601("2019-120602:20:13.1230001", .format = "y-mdH:M:S", .check_format = FALSE) #' @export -create_iso8601 <- function(..., .format, .na = NULL, .cutoff_2000 = 68L, .check_format = TRUE) { +create_iso8601 <- function(..., .format, .fmt_c = fmt_cmp(), .na = NULL, .cutoff_2000 = 68L, .check_format = FALSE) { + + assert_fmt_c(.fmt_c) + dots <- rlang::dots_list(...) if (rlang::is_empty(dots)) { @@ -402,7 +407,7 @@ create_iso8601 <- function(..., .format, .na = NULL, .cutoff_2000 = 68L, .check_ # character vectors, and that each string is one of the possible formats. if (.check_format) assert_dtc_format(.format) - cap_matrices <- purrr::map2(dots, .format, ~ parse_dttm(dttm = .x, fmt = .y, na = .na)) + cap_matrices <- purrr::map2(dots, .format, ~ parse_dttm(dttm = .x, fmt = .y, na = .na, fmt_c = .fmt_c)) cap_matrix <- coalesce_capture_matrices(!!!cap_matrices) format_iso8601(cap_matrix, .cutoff_2000 = .cutoff_2000) diff --git a/R/dtc_parse_dttm.R b/R/dtc_parse_dttm.R index 8f976fa2..2feed78a 100644 --- a/R/dtc_parse_dttm.R +++ b/R/dtc_parse_dttm.R @@ -2,6 +2,7 @@ #' @order 2 parse_dttm_ <- function(dttm, fmt, + fmt_c = fmt_cmp(), na = NULL, sec_na = na, min_na = na, @@ -22,7 +23,8 @@ parse_dttm_ <- function(dttm, mday_na = mday_na, mon_na = mon_na, year_na = year_na - ) + ), + fmt_c = fmt_c ) m <- stringr::str_match(dttm, regex) @@ -88,6 +90,7 @@ parse_dttm_ <- function(dttm, #' @keywords internal parse_dttm <- function(dttm, fmt, + fmt_c = fmt_cmp(), na = NULL, sec_na = na, min_na = na, @@ -101,6 +104,7 @@ parse_dttm <- function(dttm, ~ parse_dttm_( dttm = dttm, fmt = .x, + fmt_c = fmt_c, na = na, sec_na = sec_na, min_na = min_na, diff --git a/R/parse_dttm_fmt.R b/R/parse_dttm_fmt.R index 366cbdf0..ea3766fa 100644 --- a/R/parse_dttm_fmt.R +++ b/R/parse_dttm_fmt.R @@ -124,7 +124,7 @@ months_abb_regex <- function(x = month.abb, case = c("any", "upper", "lower", "t #' Regexps for date/time format components #' -#' [fmt_c()] creates a character vector of patterns to match individual +#' [fmt_cmp()] creates a character vector of patterns to match individual #' format date/time components. #' #' @param sec A string pattern for matching the second format component. @@ -138,24 +138,40 @@ months_abb_regex <- function(x = month.abb, case = c("any", "upper", "lower", "t #' of six elements, one for each date/time component. #' #' @examples -#' # Default patterns -#' sdtm.oak:::fmt_c() +#' # Regexps to parse format components +#' fmt_cmp() #' -#' @keywords internal -fmt_c <- function(sec = "S+", +# # Supply a different pattern for the year component +#' fmt_cmp(year = "yyyy") +#' +#' @export +fmt_cmp <- function(sec = "S+", min = "M+", hour = "H+", mday = "d+", mon = "m+", year = "y+") { - c( - sec = sec, - min = min, - hour = hour, - mday = mday, - mon = mon, - year = year + + structure( + list( + sec = sec, + min = min, + hour = hour, + mday = mday, + mon = mon, + year = year + ), + class = "fmt_c" ) + +} + +assert_fmt_c <- function(x) { + if (!inherits(x, "fmt_c")) { + rlang::abort("`x` must be an object created with `fmt_cmp()`.") + } + + invisible(x) } #' Utility function to assemble a regex of alternative patterns @@ -317,7 +333,7 @@ parse_dttm_fmt_ <- function(fmt, pattern) { #' @param fmt A format string (scalar) to be parsed by `patterns`. #' @param pattern,patterns A string (in the case of `pattern`), or a character #' vector (in the case of `patterns`) of regexps for each of the individual -#' date/time components. Default value is that of [fmt_c()]. Use this function +#' date/time components. Default value is that of [fmt_cmp()]. Use this function #' if you plan on passing a different set of patterns. #' #' @returns A [tibble][tibble::tibble-package] of seven columns: @@ -345,24 +361,29 @@ parse_dttm_fmt_ <- function(fmt, pattern) { #' #' # Note that `"y"`, `"m"`, `"d"`, `"H"`, `"M"` or `"S"` are reserved patterns #' # that are matched first and interpreted as format components. # Example: the -#' # first "y" in "year" is parsed as meaning year followed by # "ear y". The +#' # first "y" in "year" is parsed as meaning year followed by "ear y". The #' # second "y" is not longer matched because a first match already # succeded. #' sdtm.oak:::parse_dttm_fmt("year y") #' #' # Specify custom patterns #' sdtm.oak:::parse_dttm_fmt( #' "year month day", -#' sdtm.oak:::fmt_c(year = "year", mon = "month", mday = "day") +#' fmt_cmp(year = "year", mon = "month", mday = "day") #' ) #' #' @keywords internal -parse_dttm_fmt <- function(fmt, patterns = fmt_c()) { +parse_dttm_fmt <- function(fmt, patterns = fmt_cmp()) { admiraldev::assert_character_scalar(fmt) fmt_dttmc <- purrr::map(patterns, ~ parse_dttm_fmt_(fmt, .x)) |> purrr::list_rbind(names_to = "fmt_c") + # Check if patterns have matching overlap, i.e. whether they are not + # mutually exclusive (as they should). + if (anyDuplicated(pseq(fmt_dttmc$start, fmt_dttmc$end))) + rlang::abort("Patterns in `fmt_c` have overlapping matches.") + # Get captures' ranks while leaving NA as NA (`rank()` won't do this.) fmt_dttmc$ord <- dplyr::row_number(fmt_dttmc$start) @@ -418,9 +439,9 @@ parse_dttm_fmt <- function(fmt, patterns = fmt_c()) { #' sdtm.oak:::dttm_fmt_to_regex("ymd HH:MM:SS") #' #' @keywords internal -dttm_fmt_to_regex <- function(fmt, fmt_regex = fmt_rg(), anchored = TRUE) { +dttm_fmt_to_regex <- function(fmt, fmt_regex = fmt_rg(), fmt_c = fmt_cmp(), anchored = TRUE) { - tbl_fmt_c <- parse_dttm_fmt(fmt) + tbl_fmt_c <- parse_dttm_fmt(fmt, patterns = fmt_c) fmt_regex <- tbl_fmt_c |> diff --git a/man/create_iso8601.Rd b/man/create_iso8601.Rd index bca57597..334da067 100644 --- a/man/create_iso8601.Rd +++ b/man/create_iso8601.Rd @@ -7,9 +7,10 @@ create_iso8601( ..., .format, + .fmt_c = fmt_cmp(), .na = NULL, .cutoff_2000 = 68L, - .check_format = TRUE + .check_format = FALSE ) } \arguments{ @@ -22,6 +23,9 @@ provided, then each element must be a character vector of formats. The first vector of formats is used for parsing the first vector passed in \code{...}, and so on.} +\item{.fmt_c}{A list of regexps to use when parsing \code{.format}. Use \code{\link[=fmt_cmp]{fmt_cmp()}} +to create such an object to pass as argument to this parameter.} + \item{.na}{A character vector of string literals to be regarded as missing values during parsing.} diff --git a/man/dttm_fmt_to_regex.Rd b/man/dttm_fmt_to_regex.Rd index 4701c213..12510e9c 100644 --- a/man/dttm_fmt_to_regex.Rd +++ b/man/dttm_fmt_to_regex.Rd @@ -4,7 +4,12 @@ \alias{dttm_fmt_to_regex} \title{Convert a parsed date/time format to regex} \usage{ -dttm_fmt_to_regex(fmt, fmt_regex = fmt_rg(), anchored = TRUE) +dttm_fmt_to_regex( + fmt, + fmt_regex = fmt_rg(), + fmt_c = fmt_cmp(), + anchored = TRUE +) } \arguments{ \item{fmt}{A format string (scalar) to be parsed by \code{patterns}.} diff --git a/man/fmt_c.Rd b/man/fmt_cmp.Rd similarity index 81% rename from man/fmt_c.Rd rename to man/fmt_cmp.Rd index 0304c017..e278927c 100644 --- a/man/fmt_c.Rd +++ b/man/fmt_cmp.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/parse_dttm_fmt.R -\name{fmt_c} -\alias{fmt_c} +\name{fmt_cmp} +\alias{fmt_cmp} \title{Regexps for date/time format components} \usage{ -fmt_c( +fmt_cmp( sec = "S+", min = "M+", hour = "H+", @@ -31,12 +31,13 @@ A named character vector of date/time format patterns. This a vector of six elements, one for each date/time component. } \description{ -\code{\link[=fmt_c]{fmt_c()}} creates a character vector of patterns to match individual +\code{\link[=fmt_cmp]{fmt_cmp()}} creates a character vector of patterns to match individual format date/time components. } \examples{ -# Default patterns -sdtm.oak:::fmt_c() +# Regexps to parse format components +fmt_cmp() + +fmt_cmp(year = "yyyy") } -\keyword{internal} diff --git a/man/parse_dttm.Rd b/man/parse_dttm.Rd index 81be074d..016afca0 100644 --- a/man/parse_dttm.Rd +++ b/man/parse_dttm.Rd @@ -8,6 +8,7 @@ parse_dttm_( dttm, fmt, + fmt_c = fmt_cmp(), na = NULL, sec_na = na, min_na = na, @@ -20,6 +21,7 @@ parse_dttm_( parse_dttm( dttm, fmt, + fmt_c = fmt_cmp(), na = NULL, sec_na = na, min_na = na, diff --git a/man/parse_dttm_fmt.Rd b/man/parse_dttm_fmt.Rd index 9b9ecaab..6a74b183 100644 --- a/man/parse_dttm_fmt.Rd +++ b/man/parse_dttm_fmt.Rd @@ -7,14 +7,14 @@ \usage{ parse_dttm_fmt_(fmt, pattern) -parse_dttm_fmt(fmt, patterns = fmt_c()) +parse_dttm_fmt(fmt, patterns = fmt_cmp()) } \arguments{ \item{fmt}{A format string (scalar) to be parsed by \code{patterns}.} \item{pattern, patterns}{A string (in the case of \code{pattern}), or a character vector (in the case of \code{patterns}) of regexps for each of the individual -date/time components. Default value is that of \code{\link[=fmt_c]{fmt_c()}}. Use this function +date/time components. Default value is that of \code{\link[=fmt_cmp]{fmt_cmp()}}. Use this function if you plan on passing a different set of patterns.} } \value{ @@ -51,14 +51,14 @@ sdtm.oak:::parse_dttm_fmt("yymmdd HHMMSS") # Note that `"y"`, `"m"`, `"d"`, `"H"`, `"M"` or `"S"` are reserved patterns # that are matched first and interpreted as format components. # Example: the -# first "y" in "year" is parsed as meaning year followed by # "ear y". The +# first "y" in "year" is parsed as meaning year followed by "ear y". The # second "y" is not longer matched because a first match already # succeded. sdtm.oak:::parse_dttm_fmt("year y") # Specify custom patterns sdtm.oak:::parse_dttm_fmt( "year month day", - sdtm.oak:::fmt_c(year = "year", mon = "month", mday = "day") + fmt_cmp(year = "year", mon = "month", mday = "day") ) } diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 00000000..097b2416 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/articles/iso_8601.Rmd b/vignettes/articles/iso_8601.Rmd new file mode 100644 index 00000000..ed3992bb --- /dev/null +++ b/vignettes/articles/iso_8601.Rmd @@ -0,0 +1,162 @@ +--- +title: "Converting dates/times to ISO 8601" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +library(sdtm.oak) +``` + +An SDTM DTC variable may include data that is represented in [ISO +8601](https://en.wikipedia.org/wiki/ISO_8601) format as a complete date/time, a +partial date/time, or an incomplete date/time. `{sdtm.oak}` provides the +`create_iso8601()` function that allows flexible mapping of date and time +values in various formats to a single date-time ISO 8601 format. + +## Introduction + +To perform conversion to the ISO 8601 format you need to pass two key arguments: + +- At least one vector of dates, times, or date-times of `character` type; +- A date/time format via the `.format` parameter that instructs `create_iso8601()` on which date/time components to expect. + +```{r} +create_iso8601("2000 01 05", .format = "y m d") +create_iso8601("22:35:05", .format = "H:M:S") +``` + +By default the `.format` parameter understands a few reserved characters: + +- `"y"` for year +- `"m"` for month +- `"d"` for day +- `"H"` for hours +- `"M"` for minutes +- `"S"` for seconds + +Besides character vectors of dates and times, you may also pass a single vector +of date-times, provided you adjust the format: + +```{r} +create_iso8601("2000-01-05 22:35:05", .format = "y-m-d H:M:S") +``` + +If you have dates and times in separate vectors then you will need to pass +a format for each vector: + +```{r} +create_iso8601("2000-01-05", "22:35:05", .format = c("y-m-d", "H:M:S")) +``` + + + +## `create_iso8601()` is vectorized over inputs + +```{r} +date <- c("2000-01-05", "2001-12-25", "1980-06-18", "1979-09-07") +time <- c("00:12:21", "22:35:05", "03:00:15", "07:09:00") +create_iso8601(date, time, .format = c("y-m-d", "H:M:S")) +``` + +But the number of elements in each of the inputs has to match or you will get an +error: + +```{r} +date <- c("2000-01-05", "2001-12-25", "1980-06-18", "1979-09-07") +time <- "00:12:21" +try(create_iso8601(date, time, .format = c("y-m-d", "H:M:S"))) +``` + +## The `.format` parameter + +The `.format` parameter can easily accommodate variations in the format of the +inputs: + +```{r} +create_iso8601("2000-01-05", .format = "y-m-d") +create_iso8601("2000 01 05", .format = "y m d") +create_iso8601("2000/01/05", .format = "y/m/d") +``` + +Individual components may come in a different order, so adjust the format +accordingly: + +```{r} +create_iso8601("2000 01 05", .format = "y m d") +create_iso8601("05 01 2000", .format = "d m y") +create_iso8601("01 05, 2000", .format = "m d, y") +``` + +All other individual characters given in the format are taken strictly, so +one space is different from two: + +```{r} +date <- c("2000 01 05", "2000 01 05", "2000 01 05", "2000 01 05") +create_iso8601(date, .format = "y m d") +create_iso8601(date, .format = "y m d") +create_iso8601(date, .format = "y m d") +create_iso8601(date, .format = "y m d") +``` + +The format can include regular expressions though: + +```{r} +create_iso8601(date, .format = "y\\s+m\\s+d") +``` + +### Multiple alternative formats + +When an input vector contains values with varying formats, a single format may +not be adequate to encompass all variations. In such situations, it's advisable +to list multiple alternative formats. This approach ensures that each format is +tried sequentially until one matches the data in the vector. + +```{r} +date <- c("2000/01/01", "2000-01-02", "2000 01 03", "2000/01/04") +create_iso8601(date, .format = "y-m-d") +create_iso8601(date, .format = "y m d") +create_iso8601(date, .format = "y/m/d") +create_iso8601(date, .format = list(c("y-m-d", "y m d", "y/m/d"))) +``` + +Consider the order in which you supply the formats, as it can be significant. If +multiple formats could potentially match, the sequence determines which format +is applied first. + +```{r} +create_iso8601("07 04 2000", .format = list(c("d m y", "m d y"))) +create_iso8601("07 04 2000", .format = list(c("m d y", "d m y"))) +``` + +## Individual date/time components + +Each individual date or time component is parsed as per the indication in the +format according to certain constraints: + +```{r} +# Years: two-digit or four-digit numbers. +years <- c("0", "1", "00", "01", "15", "30", "50", "68", "69", "80", "99") +create_iso8601(years, .format = "y") + +# Adjust the point where two-digits years are mapped to 2000's or 1900's. +create_iso8601(years, .format = "y", .cutoff_2000 = 20L) + +# Both numeric months (two-digit only) and abbreviated months work out of the box +months <- c("0", "00", "1", "01", "Jan", "jan") +create_iso8601(months, .format = "m") + +# Month days: single or two-digit numbers, anything else results in NA. +create_iso8601(c("1", "01", "001", "10", "20", "31"), .format = "d") + +# Hours +create_iso8601(c("1", "01", "001", "10", "20", "31"), .format = "H") + +# Minutes +create_iso8601(c("1", "01", "001", "10", "20", "60"), .format = "M") + +# Seconds +create_iso8601(c("1", "01", "001", "10", "20", "60"), .format = "S") +``` From b87efd0b7fb6a72a3f0ed1692f328573cbc1502d Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 22 Nov 2023 00:33:16 +0000 Subject: [PATCH 41/52] Update `create_iso8601()` article --- vignettes/articles/iso_8601.Rmd | 59 +++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/vignettes/articles/iso_8601.Rmd b/vignettes/articles/iso_8601.Rmd index ed3992bb..e069a328 100644 --- a/vignettes/articles/iso_8601.Rmd +++ b/vignettes/articles/iso_8601.Rmd @@ -1,5 +1,5 @@ --- -title: "Converting dates/times to ISO 8601" +title: "Converting dates, times or date-times to ISO 8601" --- ```{r, include = FALSE} @@ -44,6 +44,8 @@ of date-times, provided you adjust the format: create_iso8601("2000-01-05 22:35:05", .format = "y-m-d H:M:S") ``` +## Multiple inputs + If you have dates and times in separate vectors then you will need to pass a format for each vector: @@ -51,9 +53,8 @@ a format for each vector: create_iso8601("2000-01-05", "22:35:05", .format = c("y-m-d", "H:M:S")) ``` - - -## `create_iso8601()` is vectorized over inputs +In addition, like most R functions that take vectors as input, +`create_iso8601()` is vectorized: ```{r} date <- c("2000-01-05", "2001-12-25", "1980-06-18", "1979-09-07") @@ -70,7 +71,27 @@ time <- "00:12:21" try(create_iso8601(date, time, .format = c("y-m-d", "H:M:S"))) ``` -## The `.format` parameter +You can combine individual date and time components coming +in as separate inputs; here is a contrived example of year, month and day +together, hour, and minute: + +```{r} +year <- c("99", "84", "00", "80", "79", "1944", "1953") +month_and_day <- c("jan 1", "apr 04", "mar 06", "jun 18", "sep 07", "sep 13", "sep 14") +hour <- c("12", "13", "05", "23", "16", "16", "19") +min <- c("0", "60", "59", "42", "44", "10", "13") +create_iso8601(year, month_and_day, hour, min, .format = c("y", "m d", "H", "M")) +``` + +The `.format` argument must be always named; otherwise, it will be treated as if +it were one of the inputs and interpreted as missing. + +```{r} +try(create_iso8601("2000-01-05", "y-m-d")) +``` + + +## Format variations The `.format` parameter can easily accommodate variations in the format of the inputs: @@ -90,8 +111,8 @@ create_iso8601("05 01 2000", .format = "d m y") create_iso8601("01 05, 2000", .format = "m d, y") ``` -All other individual characters given in the format are taken strictly, so -one space is different from two: +All other individual characters given in the format are taken strictly, e.g. +the number of spaces matters: ```{r} date <- c("2000 01 05", "2000 01 05", "2000 01 05", "2000 01 05") @@ -107,6 +128,17 @@ The format can include regular expressions though: create_iso8601(date, .format = "y\\s+m\\s+d") ``` +By default, a streak of the reserved characters is treated as if only one was +provided, so these formats are equivalent: + +```{r} +date <- c("2000-01-05", "2001-12-25", "1980-06-18", "1979-09-07") +time <- c("00:12:21", "22:35:05", "03:00:15", "07:09:00") +create_iso8601(date, time, .format = c("y-m-d", "H:M:S")) +create_iso8601(date, time, .format = c("yyyy-mm-dd", "HH:MM:SS")) +create_iso8601(date, time, .format = c("yyyyyyyy-m-dddddd", "H:MMMMM:SSSS")) +``` + ### Multiple alternative formats When an input vector contains values with varying formats, a single format may @@ -131,10 +163,15 @@ create_iso8601("07 04 2000", .format = list(c("d m y", "m d y"))) create_iso8601("07 04 2000", .format = list(c("m d y", "d m y"))) ``` -## Individual date/time components +## Parsing of date or time components + +By default, date or time components are parsed as follows: -Each individual date or time component is parsed as per the indication in the -format according to certain constraints: +- year: either parsed from a two- or four-digit year; +- month: either as a numeric month (single or two-digit number) or as an English abbreviated month name (e.g. Jan, Jun or Dec) regardless of case; +- month day: are parsed from two-digit numbers; +- hour and minute: are parsed from single or two-digit numbers; +- second: is parsed from single or two-digit numbers with an optional fractional part. ```{r} # Years: two-digit or four-digit numbers. @@ -158,5 +195,5 @@ create_iso8601(c("1", "01", "001", "10", "20", "31"), .format = "H") create_iso8601(c("1", "01", "001", "10", "20", "60"), .format = "M") # Seconds -create_iso8601(c("1", "01", "001", "10", "20", "60"), .format = "S") +create_iso8601(c("1", "01", "23.04", "001", "10", "20", "60"), .format = "S") ``` From 62eb91538d13c59ea25e592c2ad686d054ffd2c7 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 22 Nov 2023 01:18:57 +0000 Subject: [PATCH 42/52] Link `create_iso8601()` doc to article "iso_8601" --- R/dtc_create_iso8601.R | 5 +-- man/create_iso8601.Rd | 4 +-- vignettes/articles/iso_8601.Rmd | 57 ++++++++++++++++++++++++++++++++- 3 files changed, 61 insertions(+), 5 deletions(-) diff --git a/R/dtc_create_iso8601.R b/R/dtc_create_iso8601.R index e44b6378..0d50f1d9 100644 --- a/R/dtc_create_iso8601.R +++ b/R/dtc_create_iso8601.R @@ -315,8 +315,9 @@ format_iso8601 <- function(m, .cutoff_2000 = 68L) { #' Convert date or time collected values to ISO 8601 #' -#' [create_iso8601()] converts vectors of dates, times or date-times to -#' [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) format. +#' [create_iso8601()] converts vectors of dates, times or date-times to [ISO +#' 8601](https://en.wikipedia.org/wiki/ISO_8601) format. Learn more in +#' `vignette("iso_8601")`. #' #' @param ... Character vectors of dates, times or date-times' components. #' @param .format Parsing format(s). Either a character vector or a list of diff --git a/man/create_iso8601.Rd b/man/create_iso8601.Rd index 334da067..3e53b2fe 100644 --- a/man/create_iso8601.Rd +++ b/man/create_iso8601.Rd @@ -39,8 +39,8 @@ meaning to check against a selection of validated formats in interpretation of the formats.} } \description{ -\code{\link[=create_iso8601]{create_iso8601()}} converts vectors of dates, times or date-times to -\href{https://en.wikipedia.org/wiki/ISO_8601}{ISO 8601} format. +\code{\link[=create_iso8601]{create_iso8601()}} converts vectors of dates, times or date-times to \href{https://en.wikipedia.org/wiki/ISO_8601}{ISO 8601} format. Learn more in +\code{vignette("iso_8601")}. } \examples{ # Converting dates diff --git a/vignettes/articles/iso_8601.Rmd b/vignettes/articles/iso_8601.Rmd index e069a328..222e5d01 100644 --- a/vignettes/articles/iso_8601.Rmd +++ b/vignettes/articles/iso_8601.Rmd @@ -139,7 +139,7 @@ create_iso8601(date, time, .format = c("yyyy-mm-dd", "HH:MM:SS")) create_iso8601(date, time, .format = c("yyyyyyyy-m-dddddd", "H:MMMMM:SSSS")) ``` -### Multiple alternative formats +## Multiple alternative formats When an input vector contains values with varying formats, a single format may not be adequate to encompass all variations. In such situations, it's advisable @@ -163,6 +163,9 @@ create_iso8601("07 04 2000", .format = list(c("d m y", "m d y"))) create_iso8601("07 04 2000", .format = list(c("m d y", "d m y"))) ``` +Note that if you are passing alternative formats, then the `.format` argument +must be a list whose length matches the number of inputs. + ## Parsing of date or time components By default, date or time components are parsed as follows: @@ -197,3 +200,55 @@ create_iso8601(c("1", "01", "001", "10", "20", "60"), .format = "M") # Seconds create_iso8601(c("1", "01", "23.04", "001", "10", "20", "60"), .format = "S") ``` + +## Allowing alternative date or time values + +If date or time component values include special values, e.g. values +encoding missing values, then you can indicate those values as possible +alternatives such that the parsing will tolerate them; use the `.na` argument: + +```{r} +create_iso8601("U DEC 2019 14:00", .format = "d m y H:M") +create_iso8601("U DEC 2019 14:00", .format = "d m y H:M", .na = "U") + +create_iso8601("U UNK 2019 14:00", .format = "d m y H:M") +create_iso8601("U UNK 2019 14:00", .format = "d m y H:M", .na = c("U", "UNK")) +``` + +In this case you could achieve the same result using regexps: + +```{r} +create_iso8601("U UNK 2019 14:00", .format = "(d|U) (m|UNK) y H:M") +``` + + +## Changing reserved format characters + +There might be cases when the reserved characters --- `"y"`, `"m"`, `"d"`, +`"H"`, `"M"`, `"S"` --- might get in the way of specifying an adequate format. +For example, you might be tempted to use format `"HHMM"` to try to parse a time +such as `"14H00M"`. You could assume that the first "H" codes for parsing the +hour, and the second "H" to be a literal "H" but, actually, `"HH"` will be taken +to mean parsing hours, and `"MM"` to parse minutes. You can use the function +`fmt_cmp()` to specify alternative format regexps for the format, replacing the +default characters. + +In the next example, we reassign new format strings for the hour and minute +components, thus freeing the `"H"` and `"M"` patterns from being interpreted as +hours and minutes, and to be taken literally: + +```{r} +create_iso8601("14H00M", .format = "HHMM") +create_iso8601("14H00M", .format = "xHwM", .fmt_c = fmt_cmp(hour = "x", min = "w")) +``` +Note that you need to make sure that the format component regexps are mutually +exclusive, i.e. they don't have overlapping matches; otherwise +`create_iso8601()` will fail with an error. In the next example both months and +minutes could be represented by an `"m"` in the format resulting in an ambiguous +format specification. + +```{r} +fmt_cmp(hour = "h", min = "m") +try(create_iso8601("14H00M", .format = "hHmM", .fmt_c = fmt_cmp(hour = "h", min = "m"))) +``` + From 396d042c33d96fa5ff6be879f2ea43c6b2530208 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 22 Nov 2023 01:21:43 +0000 Subject: [PATCH 43/52] Add RM as author to DESCRIPTION --- DESCRIPTION | 3 +++ man/sdtm.oak-package.Rd | 1 + 2 files changed, 4 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index d115078b..5ed2fb67 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,6 +5,9 @@ Version: 0.0.0.9001 Authors@R: c( person("Omar", "Garcia", email = "ogcalderon@cdisc.org", role = c("aut", "cre")), person("Rammprasad", "Ganapathy", role = "aut"), + person("Ramiro", "Magno", email = "rmagno@pattern.institute", + role = c("aut", "cre"), comment = c(ORCID = "0000-0001-5226-3441")), + person("Pattern Institute", role = c("cph", "fnd")), person("F. Hoffmann-La Roche AG", role = c("cph", "fnd")), person("Pfizer Inc", role = c("cph", "fnd")) ) diff --git a/man/sdtm.oak-package.Rd b/man/sdtm.oak-package.Rd index 042f69dd..6e3315e1 100644 --- a/man/sdtm.oak-package.Rd +++ b/man/sdtm.oak-package.Rd @@ -27,6 +27,7 @@ Authors: Other contributors: \itemize{ + \item Pattern Institute [copyright holder, funder] \item F. Hoffmann-La Roche AG [copyright holder, funder] \item Pfizer Inc [copyright holder, funder] } From 530924f6a44631c208a4834f629f6e9fc2c8019b Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 22 Nov 2023 01:23:57 +0000 Subject: [PATCH 44/52] Fix author role of RM --- DESCRIPTION | 2 +- inst/WORDLIST | 1 + man/sdtm.oak-package.Rd | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5ed2fb67..e35c0e71 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -6,7 +6,7 @@ Authors@R: c( person("Omar", "Garcia", email = "ogcalderon@cdisc.org", role = c("aut", "cre")), person("Rammprasad", "Ganapathy", role = "aut"), person("Ramiro", "Magno", email = "rmagno@pattern.institute", - role = c("aut", "cre"), comment = c(ORCID = "0000-0001-5226-3441")), + role = "aut", comment = c(ORCID = "0000-0001-5226-3441")), person("Pattern Institute", role = c("cph", "fnd")), person("F. Hoffmann-La Roche AG", role = c("cph", "fnd")), person("Pfizer Inc", role = c("cph", "fnd")) diff --git a/inst/WORDLIST b/inst/WORDLIST index 07ffb10f..e695dab8 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -6,3 +6,4 @@ Hoffmann dtc funder vectorized +ORCID diff --git a/man/sdtm.oak-package.Rd b/man/sdtm.oak-package.Rd index 6e3315e1..eb1ff65d 100644 --- a/man/sdtm.oak-package.Rd +++ b/man/sdtm.oak-package.Rd @@ -23,6 +23,7 @@ Useful links: Authors: \itemize{ \item Rammprasad Ganapathy + \item Ramiro Magno \email{rmagno@pattern.institute} (\href{https://orcid.org/0000-0001-5226-3441}{ORCID}) } Other contributors: From b19b8a92a5c9895d747bdf471b4b1df9ebc46dc5 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 22 Nov 2023 01:25:20 +0000 Subject: [PATCH 45/52] Fix indentation at `fmt_cmp()` source --- R/parse_dttm_fmt.R | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/R/parse_dttm_fmt.R b/R/parse_dttm_fmt.R index ea3766fa..e3e6e26d 100644 --- a/R/parse_dttm_fmt.R +++ b/R/parse_dttm_fmt.R @@ -146,23 +146,20 @@ months_abb_regex <- function(x = month.abb, case = c("any", "upper", "lower", "t #' #' @export fmt_cmp <- function(sec = "S+", - min = "M+", - hour = "H+", - mday = "d+", - mon = "m+", - year = "y+") { - - structure( - list( - sec = sec, - min = min, - hour = hour, - mday = mday, - mon = mon, - year = year - ), - class = "fmt_c" - ) + min = "M+", + hour = "H+", + mday = "d+", + mon = "m+", + year = "y+") { + structure(list( + sec = sec, + min = min, + hour = hour, + mday = mday, + mon = mon, + year = year + ), + class = "fmt_c") } From aa2113556c29395161fddae73eea72e3dbe5ba9b Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 22 Nov 2023 12:02:54 +0000 Subject: [PATCH 46/52] Remove `.check_format` from examples and add an example with `fmt_cmp()` --- R/dtc_create_iso8601.R | 34 ++++++++++++++++++++-------------- man/create_iso8601.Rd | 34 ++++++++++++++++++++-------------- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/R/dtc_create_iso8601.R b/R/dtc_create_iso8601.R index 0d50f1d9..0a0a8e64 100644 --- a/R/dtc_create_iso8601.R +++ b/R/dtc_create_iso8601.R @@ -355,29 +355,35 @@ format_iso8601 <- function(m, .cutoff_2000 = 68L) { #' create_iso8601(c("79-01-01", "80-01-01", "81-01-01"), .format = "y-m-d", .cutoff_2000 = 80) #' #' # Converting times -#' create_iso8601("15:10", .format = "HH:MM", .check_format = FALSE) -#' create_iso8601("2:10", .format = "HH:MM", .check_format = FALSE) -#' create_iso8601("2:1", .format = "HH:MM", .check_format = FALSE) -#' create_iso8601("02:01:56", .format = "HH:MM:SS", .check_format = FALSE) -#' create_iso8601("020156.5", .format = "HHMMSS", .check_format = FALSE) +#' create_iso8601("15:10", .format = "HH:MM") +#' create_iso8601("2:10", .format = "HH:MM") +#' create_iso8601("2:1", .format = "HH:MM") +#' create_iso8601("02:01:56", .format = "HH:MM:SS") +#' create_iso8601("020156.5", .format = "HHMMSS") #' #' # Converting date-times -#' create_iso8601("12 NOV 202015:15", .format = "dd mmm yyyyHH:MM", .check_format = FALSE) +#' create_iso8601("12 NOV 202015:15", .format = "dd mmm yyyyHH:MM") #' #' # Indicate allowed missing values to make the parsing pass -#' create_iso8601("U DEC 201914:00", .format = "dd mmm yyyyHH:MM", .check_format = FALSE) -#' create_iso8601("U DEC 201914:00", .format = "dd mmm yyyyHH:MM", .check_format = FALSE, .na = "U") +#' create_iso8601("U DEC 201914:00", .format = "dd mmm yyyyHH:MM") +#' create_iso8601("U DEC 201914:00", .format = "dd mmm yyyyHH:MM", .na = "U") #' -#' create_iso8601("NOV 2020", .format = "m y", .check_format = FALSE) -#' create_iso8601(c("MAR 2019", "MaR 2020", "mar 2021"), .format = "m y", .check_format = FALSE) +#' create_iso8601("NOV 2020", .format = "m y") +#' create_iso8601(c("MAR 2019", "MaR 2020", "mar 2021"), .format = "m y") #' -#' create_iso8601("2019-04-041045-", .format = "yyyy-mm-ddHHMM-", .check_format = FALSE) +#' create_iso8601("2019-04-041045-", .format = "yyyy-mm-ddHHMM-") #' -#' create_iso8601("20200507null", .format = "ymd(HH:MM:SS)", .check_format = FALSE) -#' create_iso8601("20200507null", .format = "ymd((HH:MM:SS)|null)", .check_format = FALSE) +#' create_iso8601("20200507null", .format = "ymd(HH:MM:SS)") +#' create_iso8601("20200507null", .format = "ymd((HH:MM:SS)|null)") #' #' # Fractional seconds -#' create_iso8601("2019-120602:20:13.1230001", .format = "y-mdH:M:S", .check_format = FALSE) +#' create_iso8601("2019-120602:20:13.1230001", .format = "y-mdH:M:S") +#' +#' # Use different reserved characters in the format specification +#' # Here we change "H" to "x" and "M" to "w", for hour and minute, respectively. +#' create_iso8601("14H00M", .format = "HHMM") +#' create_iso8601("14H00M", .format = "xHwM", .fmt_c = fmt_cmp(hour = "x", min = "w")) +#' #' @export create_iso8601 <- function(..., .format, .fmt_c = fmt_cmp(), .na = NULL, .cutoff_2000 = 68L, .check_format = FALSE) { diff --git a/man/create_iso8601.Rd b/man/create_iso8601.Rd index 3e53b2fe..93c58507 100644 --- a/man/create_iso8601.Rd +++ b/man/create_iso8601.Rd @@ -59,27 +59,33 @@ create_iso8601(c("67-01-01", "68-01-01", "69-01-01"), .format = "y-m-d") create_iso8601(c("79-01-01", "80-01-01", "81-01-01"), .format = "y-m-d", .cutoff_2000 = 80) # Converting times -create_iso8601("15:10", .format = "HH:MM", .check_format = FALSE) -create_iso8601("2:10", .format = "HH:MM", .check_format = FALSE) -create_iso8601("2:1", .format = "HH:MM", .check_format = FALSE) -create_iso8601("02:01:56", .format = "HH:MM:SS", .check_format = FALSE) -create_iso8601("020156.5", .format = "HHMMSS", .check_format = FALSE) +create_iso8601("15:10", .format = "HH:MM") +create_iso8601("2:10", .format = "HH:MM") +create_iso8601("2:1", .format = "HH:MM") +create_iso8601("02:01:56", .format = "HH:MM:SS") +create_iso8601("020156.5", .format = "HHMMSS") # Converting date-times -create_iso8601("12 NOV 202015:15", .format = "dd mmm yyyyHH:MM", .check_format = FALSE) +create_iso8601("12 NOV 202015:15", .format = "dd mmm yyyyHH:MM") # Indicate allowed missing values to make the parsing pass -create_iso8601("U DEC 201914:00", .format = "dd mmm yyyyHH:MM", .check_format = FALSE) -create_iso8601("U DEC 201914:00", .format = "dd mmm yyyyHH:MM", .check_format = FALSE, .na = "U") +create_iso8601("U DEC 201914:00", .format = "dd mmm yyyyHH:MM") +create_iso8601("U DEC 201914:00", .format = "dd mmm yyyyHH:MM", .na = "U") -create_iso8601("NOV 2020", .format = "m y", .check_format = FALSE) -create_iso8601(c("MAR 2019", "MaR 2020", "mar 2021"), .format = "m y", .check_format = FALSE) +create_iso8601("NOV 2020", .format = "m y") +create_iso8601(c("MAR 2019", "MaR 2020", "mar 2021"), .format = "m y") -create_iso8601("2019-04-041045-", .format = "yyyy-mm-ddHHMM-", .check_format = FALSE) +create_iso8601("2019-04-041045-", .format = "yyyy-mm-ddHHMM-") -create_iso8601("20200507null", .format = "ymd(HH:MM:SS)", .check_format = FALSE) -create_iso8601("20200507null", .format = "ymd((HH:MM:SS)|null)", .check_format = FALSE) +create_iso8601("20200507null", .format = "ymd(HH:MM:SS)") +create_iso8601("20200507null", .format = "ymd((HH:MM:SS)|null)") # Fractional seconds -create_iso8601("2019-120602:20:13.1230001", .format = "y-mdH:M:S", .check_format = FALSE) +create_iso8601("2019-120602:20:13.1230001", .format = "y-mdH:M:S") + +# Use different reserved characters in the format specification +# Here we change "H" to "x" and "M" to "w", for hour and minute, respectively. +create_iso8601("14H00M", .format = "HHMM") +create_iso8601("14H00M", .format = "xHwM", .fmt_c = fmt_cmp(hour = "x", min = "w")) + } From 81336b18f5fdd41acc93c1a657bd0ea95ad765d8 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 22 Nov 2023 12:16:31 +0000 Subject: [PATCH 47/52] Add an example to `create_iso8601()` with involving alternative formats and unk values --- R/dtc_create_iso8601.R | 5 +++++ man/create_iso8601.Rd | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/R/dtc_create_iso8601.R b/R/dtc_create_iso8601.R index 0a0a8e64..ededaa4d 100644 --- a/R/dtc_create_iso8601.R +++ b/R/dtc_create_iso8601.R @@ -384,6 +384,11 @@ format_iso8601 <- function(m, .cutoff_2000 = 68L) { #' create_iso8601("14H00M", .format = "HHMM") #' create_iso8601("14H00M", .format = "xHwM", .fmt_c = fmt_cmp(hour = "x", min = "w")) #' +#' # Alternative formats with unknown values +#' datetimes <- c("UN UNK 201914:00", "UN JAN 2021") +#' format <- list(c("dd mmm yyyy", "dd mmm yyyyHH:MM")) +#' create_iso8601(datetimes, .format = format, .na = c("UN", "UNK")) +#' #' @export create_iso8601 <- function(..., .format, .fmt_c = fmt_cmp(), .na = NULL, .cutoff_2000 = 68L, .check_format = FALSE) { diff --git a/man/create_iso8601.Rd b/man/create_iso8601.Rd index 93c58507..e55adf17 100644 --- a/man/create_iso8601.Rd +++ b/man/create_iso8601.Rd @@ -88,4 +88,9 @@ create_iso8601("2019-120602:20:13.1230001", .format = "y-mdH:M:S") create_iso8601("14H00M", .format = "HHMM") create_iso8601("14H00M", .format = "xHwM", .fmt_c = fmt_cmp(hour = "x", min = "w")) +# Alternative formats with unknown values +datetimes <- c("UN UNK 201914:00", "UN JAN 2021") +format <- list(c("dd mmm yyyy", "dd mmm yyyyHH:MM")) +create_iso8601(datetimes, .format = format, .na = c("UN", "UNK")) + } From e62fc5ff9c40c373cc215f3c0638bd8920b3b72a Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 22 Nov 2023 12:30:00 +0000 Subject: [PATCH 48/52] Add example to `create_iso8601()` about the interplay of `.format` and `.fmt_c` --- R/dtc_create_iso8601.R | 5 +++++ man/create_iso8601.Rd | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/R/dtc_create_iso8601.R b/R/dtc_create_iso8601.R index ededaa4d..34981d44 100644 --- a/R/dtc_create_iso8601.R +++ b/R/dtc_create_iso8601.R @@ -389,6 +389,11 @@ format_iso8601 <- function(m, .cutoff_2000 = 68L) { #' format <- list(c("dd mmm yyyy", "dd mmm yyyyHH:MM")) #' create_iso8601(datetimes, .format = format, .na = c("UN", "UNK")) #' +#' # Dates and times may come in many format variations +#' fmt <- "dd MMM yyyy HH nn ss" +#' fmt_cmp <- fmt_cmp(mon = "MMM", min = "nn", sec = "ss") +#' create_iso8601("05 feb 1985 12 55 02", .format = fmt, .fmt_c = fmt_cmp) +#' #' @export create_iso8601 <- function(..., .format, .fmt_c = fmt_cmp(), .na = NULL, .cutoff_2000 = 68L, .check_format = FALSE) { diff --git a/man/create_iso8601.Rd b/man/create_iso8601.Rd index e55adf17..81481975 100644 --- a/man/create_iso8601.Rd +++ b/man/create_iso8601.Rd @@ -93,4 +93,9 @@ datetimes <- c("UN UNK 201914:00", "UN JAN 2021") format <- list(c("dd mmm yyyy", "dd mmm yyyyHH:MM")) create_iso8601(datetimes, .format = format, .na = c("UN", "UNK")) +# Dates and times may come in many format variations +fmt <- "dd MMM yyyy HH nn ss" +fmt_cmp <- fmt_cmp(mon = "MMM", min = "nn", sec = "ss") +create_iso8601("05 feb 1985 12 55 02", .format = fmt, .fmt_c = fmt_cmp) + } From 53d8b5a9bc21c9ff869bda0e34f552702c2f76bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Fory=C5=9B?= Date: Wed, 22 Nov 2023 16:37:14 +0100 Subject: [PATCH 49/52] Update common.yml --- .github/workflows/common.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/common.yml b/.github/workflows/common.yml index f4aacbb0..de646ffb 100644 --- a/.github/workflows/common.yml +++ b/.github/workflows/common.yml @@ -74,7 +74,7 @@ jobs: skip-multiversion-docs: true linter: name: Lint - uses: pharmaverse/admiralci/.github/workflows/lintr.yml@lintr-latest-cran + uses: pharmaverse/admiralci/.github/workflows/lintr.yml@main if: github.event_name == 'pull_request' with: r-version: "4.3" From 763f6da095450d456ea26d86ca21bc22ca1af8a1 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 22 Nov 2023 15:57:18 +0000 Subject: [PATCH 50/52] Update style --- R/dtc_create_iso8601.R | 1 - R/parse_dttm_fmt.R | 33 ++++++++++++++-------------- tests/testthat/test-parse_dttm_fmt.R | 5 ----- 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/R/dtc_create_iso8601.R b/R/dtc_create_iso8601.R index 34981d44..9d2908de 100644 --- a/R/dtc_create_iso8601.R +++ b/R/dtc_create_iso8601.R @@ -396,7 +396,6 @@ format_iso8601 <- function(m, .cutoff_2000 = 68L) { #' #' @export create_iso8601 <- function(..., .format, .fmt_c = fmt_cmp(), .na = NULL, .cutoff_2000 = 68L, .check_format = FALSE) { - assert_fmt_c(.fmt_c) dots <- rlang::dots_list(...) diff --git a/R/parse_dttm_fmt.R b/R/parse_dttm_fmt.R index e3e6e26d..eeb1e371 100644 --- a/R/parse_dttm_fmt.R +++ b/R/parse_dttm_fmt.R @@ -151,16 +151,17 @@ fmt_cmp <- function(sec = "S+", mday = "d+", mon = "m+", year = "y+") { - structure(list( - sec = sec, - min = min, - hour = hour, - mday = mday, - mon = mon, - year = year - ), - class = "fmt_c") - + structure( + list( + sec = sec, + min = min, + hour = hour, + mday = mday, + mon = mon, + year = year + ), + class = "fmt_c" + ) } assert_fmt_c <- function(x) { @@ -297,17 +298,16 @@ fmt_dttmc <- len = len, ord = ord ) - } #' @rdname parse_dttm_fmt parse_dttm_fmt_ <- function(fmt, pattern) { - admiraldev::assert_character_scalar(fmt) admiraldev::assert_character_scalar(pattern) - if (identical(nchar(pattern), 0L)) + if (identical(nchar(pattern), 0L)) { rlang::abort("`pattern` must be a literal string of at least one char.") + } match_data <- regexpr(pattern, fmt) match <- reg_matches(fmt, match_data) @@ -378,14 +378,16 @@ parse_dttm_fmt <- function(fmt, patterns = fmt_cmp()) { # Check if patterns have matching overlap, i.e. whether they are not # mutually exclusive (as they should). - if (anyDuplicated(pseq(fmt_dttmc$start, fmt_dttmc$end))) + if (anyDuplicated(pseq(fmt_dttmc$start, fmt_dttmc$end))) { rlang::abort("Patterns in `fmt_c` have overlapping matches.") + } # Get captures' ranks while leaving NA as NA (`rank()` won't do this.) fmt_dttmc$ord <- dplyr::row_number(fmt_dttmc$start) - if (identical(nrow(fmt_dttmc), 0L)) + if (identical(nrow(fmt_dttmc), 0L)) { return(fmt_dttmc()) + } fmt_len <- nchar(fmt) @@ -437,7 +439,6 @@ parse_dttm_fmt <- function(fmt, patterns = fmt_cmp()) { #' #' @keywords internal dttm_fmt_to_regex <- function(fmt, fmt_regex = fmt_rg(), fmt_c = fmt_cmp(), anchored = TRUE) { - tbl_fmt_c <- parse_dttm_fmt(fmt, patterns = fmt_c) fmt_regex <- diff --git a/tests/testthat/test-parse_dttm_fmt.R b/tests/testthat/test-parse_dttm_fmt.R index ed79dae7..1108c569 100644 --- a/tests/testthat/test-parse_dttm_fmt.R +++ b/tests/testthat/test-parse_dttm_fmt.R @@ -1,5 +1,4 @@ test_that("`parse_dttm_fmt_`: empty fmt", { - x <- tibble::tibble( pat = character(), @@ -18,7 +17,6 @@ test_that("`parse_dttm_fmt_`: empty pattern", { }) test_that("`parse_dttm_fmt_`: basic usage", { - fmt1 <- "y m d" fmt2 <- "y-m-d" @@ -58,7 +56,6 @@ test_that("`parse_dttm_fmt_`: basic usage", { }) test_that("`parse_dttm_fmt_`: pattern variations", { - fmt <- "HH:MM:SS" x1 <- @@ -94,7 +91,6 @@ test_that("`parse_dttm_fmt_`: pattern variations", { }) test_that("`parse_dttm_fmt_`: only the first match is returned", { - fmt <- "H M S H" x1 <- @@ -130,7 +126,6 @@ test_that("`parse_dttm_fmt_`: only the first match is returned", { }) test_that("`parse_dttm_fmt`: empty fmt", { - expect_identical(fmt_dttmc(), parse_dttm_fmt("", pattern = "y")) expect_error(parse_dttm_fmt_(character(), pattern = "y")) }) From 8cd08491f114017488eadc50a52cdbcdc96fe9e8 Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 22 Nov 2023 16:08:52 +0000 Subject: [PATCH 51/52] Change "oak" to "sdtm.oak" in DESCRIPTION --- DESCRIPTION | 4 ++-- man/sdtm.oak-package.Rd | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e35c0e71..7d93b1f5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,8 +16,8 @@ Description: An EDC and Data Standard agnostic SDTM data transformation engine based on standard mapping algorithms. Language: en-US License: Apache License (>= 2) -BugReports: https://github.com/pharmaverse/oak/issues -URL: https://pharmaverse.github.io/oak/, https://github.com/pharmaverse/oak +BugReports: https://github.com/pharmaverse/sdtm.oak/issues +URL: https://pharmaverse.github.io/sdtm.oak/, https://github.com/pharmaverse/sdtm.oak Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) diff --git a/man/sdtm.oak-package.Rd b/man/sdtm.oak-package.Rd index eb1ff65d..fc04b354 100644 --- a/man/sdtm.oak-package.Rd +++ b/man/sdtm.oak-package.Rd @@ -11,9 +11,9 @@ An EDC and Data Standard agnostic SDTM data transformation engine that automates \seealso{ Useful links: \itemize{ - \item \url{https://pharmaverse.github.io/oak/} - \item \url{https://github.com/pharmaverse/oak} - \item Report bugs at \url{https://github.com/pharmaverse/oak/issues} + \item \url{https://pharmaverse.github.io/sdtm.oak/} + \item \url{https://github.com/pharmaverse/sdtm.oak} + \item Report bugs at \url{https://github.com/pharmaverse/sdtm.oak/issues} } } From 30376c3da82d73f3eca0cd387d6de2ee1caa9f0f Mon Sep 17 00:00:00 2001 From: Ramiro Magno Date: Wed, 22 Nov 2023 16:45:53 +0000 Subject: [PATCH 52/52] Change "oak" to "sdtm.oak" in README --- README.Rmd | 4 ++-- README.md | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.Rmd b/README.Rmd index d221cffc..eea23ea5 100644 --- a/README.Rmd +++ b/README.Rmd @@ -23,9 +23,9 @@ An EDC and Data Standard agnostic SDTM data transformation engine that automates ## Installation -You can install the development version of `{sdtm.oak}` from [GitHub](https://github.com/pharmaverse/oak/) with: +You can install the development version of `{sdtm.oak}` from [GitHub](https://github.com/pharmaverse/sdtm.oak/) with: ``` r # install.packages("remotes") -remotes::install_github("pharmaverse/oak") +remotes::install_github("pharmaverse/sdtm.oak") ``` diff --git a/README.md b/README.md index ce9a234b..04f2fa5b 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ + # sdtm.oak @@ -6,7 +7,6 @@ [![CRAN status](https://www.r-pkg.org/badges/version/sdtm.oak)](https://CRAN.R-project.org/package=sdtm.oak) - An EDC and Data Standard agnostic SDTM data transformation engine that @@ -16,9 +16,9 @@ based on standard mapping algorithms ## Installation You can install the development version of `{sdtm.oak}` from -[GitHub](https://github.com/pharmaverse/oak/) with: +[GitHub](https://github.com/pharmaverse/sdtm.oak/) with: -``` +``` r # install.packages("remotes") -remotes::install_github("pharmaverse/oak") +remotes::install_github("pharmaverse/sdtm.oak") ```