diff --git a/NAMESPACE b/NAMESPACE index 38607f72..7fc88eef 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,5 +3,6 @@ S3method(print,iso8601) export(create_iso8601) export(fmt_cmp) +export(problems) importFrom(rlang,.data) importFrom(tibble,tibble) diff --git a/R/dtc_create_iso8601.R b/R/dtc_create_iso8601.R index 8bf15dd6..0bd3f90b 100644 --- a/R/dtc_create_iso8601.R +++ b/R/dtc_create_iso8601.R @@ -337,6 +337,7 @@ format_iso8601 <- function(m, .cutoff_2000 = 68L) { #' meaning to check against a selection of validated formats in #' [dtc_formats][sdtm.oak::dtc_formats]; or to have a more permissible #' interpretation of the formats. +#' @param .warn Whether to warn about parsing failures. #' #' @examples #' # Converting dates @@ -403,50 +404,58 @@ create_iso8601 <- .cutoff_2000 = 68L, .check_format = FALSE, .warn = TRUE) { - - assert_fmt_c(.fmt_c) - admiraldev::assert_logical_scalar(.check_format) - admiraldev::assert_logical_scalar(.warn) - - dots <- rlang::dots_list(...) - - if (rlang::is_empty(dots)) { - return(character()) - } - - # Check if all vectors in `dots` are of character type. - if (!identical(unique(sapply(dots, typeof)), "character")) { - rlang::abort("All vectors in `...` must be of type character.") - } - - # Check if all vectors in `dots` are of the same length. - n <- unique(lengths(dots)) - if (!identical(length(n), 1L)) { - rlang::abort("All vectors in `...` must be of the same length.") - } - - if (!identical(length(dots), length(.format))) { - rlang::abort("Number of vectors in `...` should match length of `.format`.") - } - - # Check that the `.format` is either a character vector or a list of - # character vectors, and that each string is one of the possible formats. - if (.check_format) assert_dtc_format(.format) - - cap_matrices <- purrr::map2(dots, .format, ~ parse_dttm(dttm = .x, fmt = .y, na = .na, fmt_c = .fmt_c)) - cap_matrix <- coalesce_capture_matrices(!!!cap_matrices) - - iso8601 <- format_iso8601(cap_matrix, .cutoff_2000 = .cutoff_2000) - iso8601 <- add_problems(iso8601, dots) - class(iso8601) <- "iso8601" - - if (.warn) { - warn_problems(iso8601) + assert_fmt_c(.fmt_c) + admiraldev::assert_logical_scalar(.check_format) + admiraldev::assert_logical_scalar(.warn) + + dots <- rlang::dots_list(...) + + if (rlang::is_empty(dots)) { + return(character()) + } + + # Check if all vectors in `dots` are of character type. + if (!identical(unique(sapply(dots, typeof)), "character")) { + rlang::abort("All vectors in `...` must be of type character.") + } + + # Check if all vectors in `dots` are of the same length. + n <- unique(lengths(dots)) + if (!identical(length(n), 1L)) { + rlang::abort("All vectors in `...` must be of the same length.") + } + + if (!identical(length(dots), length(.format))) { + rlang::abort("Number of vectors in `...` should match length of `.format`.") + } + + # Check that the `.format` is either a character vector or a list of + # character vectors, and that each string is one of the possible formats. + if (.check_format) + assert_dtc_format(.format) + + cap_matrices <- + purrr::map2(dots, + .format, + ~ parse_dttm( + dttm = .x, + fmt = .y, + na = .na, + fmt_c = .fmt_c + )) + cap_matrix <- coalesce_capture_matrices(!!!cap_matrices) + + iso8601 <- format_iso8601(cap_matrix, .cutoff_2000 = .cutoff_2000) + iso8601 <- add_problems(iso8601, dots) + class(iso8601) <- "iso8601" + + if (.warn && rlang::is_interactive()) { + warn_problems(iso8601) + } + + iso8601 } - iso8601 -} - #' @export print.iso8601 <- function(x, ...) { # Here we take advantage of the subset operator `[` dropping diff --git a/R/dtc_problems.R b/R/dtc_problems.R index 6b5b144c..d50f9c37 100644 --- a/R/dtc_problems.R +++ b/R/dtc_problems.R @@ -23,6 +23,54 @@ add_problems <- function(x, dtc) { x } +#' Retrieve date/time parsing problems +#' +#' [problems()] is a companion helper function to [create_iso8601()]. It +#' retrieves ISO 8601 parsing problems from an object of class iso8601, which is +#' [create_iso8601()]'s return value and that might contain a `problems` +#' attribute in case of parsing failures. [problems()] is a helper function that +#' provides easy access to these parsing problems. +#' +#' @param x An object of class iso8601, as typically obtained from a call to +#' [create_iso8601()]. The argument can also be left empty, in that case it +#' `problems()` will use the last returned value, making it convenient to use +#' immediately after [create_iso8601()]. +#' +#' @returns If there are no parsing problems in `x`, then the returned value is +#' `NULL`; otherwise, a [tibble][tibble::tibble-package] of parsing failures +#' is returned. Each row corresponds to a parsing problem. There will be a +#' first column named `..i` indicating the position(s) in the inputs to the +#' [create_iso8601()] call that resulted in failures; remaining columns +#' correspond to the original input values passed on to [create_iso8601()], +#' with columns being automatically named `..var1`, `..var2`, and so on, if +#' the inputs to [create_iso8601()] were unnamed, otherwise, the original +#' variable names are used instead. +#' +#' @examples +#' dates <- +#' c( +#' "2020-01-01", +#' "2020-02-11", +#' "2020-01-06", +#' "2020-0921", +#' "2020/10/30", +#' "2020-12-05", +#' "20231225" +#' ) +#' +#' #' # By inspect the problematic dates it can be understood that +#' # the `.format` parameter needs to update to include other variations. +#' iso8601_dttm <- create_iso8601(dates, .format = "y-m-d") +#' problems(iso8601_dttm) +#' +#' # Including more parsing formats addresses the previous problems +#' formats <- c("y-m-d", "y-md", "y/m/d", "ymd") +#' iso8601_dttm2 <- create_iso8601(dates, .format = list(formats)) +#' +#' # So now `problems()` returns `NULL` because there are no more parsing issues. +#' problems(iso8601_dttm2) +#' +#' @export problems <- function(x = .Last.value) { probs <- attr(x, "problems") if (!is.null(probs)) { @@ -43,14 +91,13 @@ n_problems <- function(x) { warn_problems <- function(x) { n_probs <- n_problems(x) - if (n_probs > 0) { + if (n_probs > 0L) { msg <- paste( sprintf("There were parsing %d problems.", n_probs), "Run `problems()` on parsed results for details." ) rlang::warn(msg) - invisible(NULL) - } else { - invisible(NULL) } + + invisible(NULL) } diff --git a/man/create_iso8601.Rd b/man/create_iso8601.Rd index 7aea3f09..b7a69c60 100644 --- a/man/create_iso8601.Rd +++ b/man/create_iso8601.Rd @@ -38,6 +38,8 @@ though starting with \code{19}.} meaning to check against a selection of validated formats in \link[=dtc_formats]{dtc_formats}; or to have a more permissible interpretation of the formats.} + +\item{.warn}{Whether to warn about parsing failures.} } \description{ \code{\link[=create_iso8601]{create_iso8601()}} converts vectors of dates, times or date-times to \href{https://en.wikipedia.org/wiki/ISO_8601}{ISO 8601} format. Learn more in diff --git a/man/problems.Rd b/man/problems.Rd new file mode 100644 index 00000000..c820713b --- /dev/null +++ b/man/problems.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtc_problems.R +\name{problems} +\alias{problems} +\title{Retrieve date/time parsing problems} +\usage{ +problems(x = .Last.value) +} +\arguments{ +\item{x}{An object of class iso8601, as typically obtained from a call to +\code{\link[=create_iso8601]{create_iso8601()}}. The argument can also be left empty, in that case it +\code{problems()} will use the last returned value, making it convenient to use +immediately after \code{\link[=create_iso8601]{create_iso8601()}}.} +} +\value{ +If there are no parsing problems in \code{x}, then the returned value is +\code{NULL}; otherwise, a \link[tibble:tibble-package]{tibble} of parsing failures +is returned. Each row corresponds to a parsing problem. There will be a +first column named \code{..i} indicating the position(s) in the inputs to the +\code{\link[=create_iso8601]{create_iso8601()}} call that resulted in failures; remaining columns +correspond to the original input values passed on to \code{\link[=create_iso8601]{create_iso8601()}}, +with columns being automatically named \code{..var1}, \code{..var2}, and so on, if +the inputs to \code{\link[=create_iso8601]{create_iso8601()}} were unnamed, otherwise, the original +variable names are used instead. +} +\description{ +\code{\link[=problems]{problems()}} is a companion helper function to \code{\link[=create_iso8601]{create_iso8601()}}. It +retrieves ISO 8601 parsing problems from an object of class iso8601, which is +\code{\link[=create_iso8601]{create_iso8601()}}'s return value and that might contain a \code{problems} +attribute in case of parsing failures. \code{\link[=problems]{problems()}} is a helper function that +provides easy access to these parsing problems. +} +\examples{ +dates <- +c( + "2020-01-01", + "2020-02-11", + "2020-01-06", + "2020-0921", + "2020/10/30", + "2020-12-05", + "20231225" +) + +#' # By inspect the problematic dates it can be understood that +# the `.format` parameter needs to update to include other variations. +iso8601_dttm <- create_iso8601(dates, .format = "y-m-d") +problems(iso8601_dttm) + +# Including more parsing formats addresses the previous problems +formats <- c("y-m-d", "y-md", "y/m/d", "ymd") +iso8601_dttm2 <- create_iso8601(dates, .format = list(formats)) + +# So now `problems()` returns `NULL` because there are no more parsing issues. +problems(iso8601_dttm2) + +} diff --git a/tests/testthat/test-create_iso8601.R b/tests/testthat/test-create_iso8601.R index f1325cfc..b21e9305 100644 --- a/tests/testthat/test-create_iso8601.R +++ b/tests/testthat/test-create_iso8601.R @@ -2,17 +2,17 @@ test_that("`create_iso8601()`: individual date components", { x <- c("0", "50", "1950", "80", "1980", "2000") y0 <- create_iso8601(x, .format = "y", .check_format = FALSE) y1 <- c(NA, "2050", "1950", "1980", "1980", "2000") - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("0", "jan", "JAN", "JaN", "1", "01") y0 <- create_iso8601(x, .format = "m", .check_format = FALSE) y1 <- c(NA, "--01", "--01", "--01", NA, "--01") - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("0", "00", "1", "01", "10", "31") y0 <- create_iso8601(x, .format = "d", .check_format = FALSE) y1 <- c("----00", "----00", "----01", "----01", "----10", "----31") - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) }) test_that("`create_iso8601()`: dates", { @@ -20,15 +20,15 @@ test_that("`create_iso8601()`: dates", { x <- c("19990101", "20000101", "990101", "991231") y0 <- create_iso8601(x, .format = "ymd", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("1999-01-01", "2000-01-01", "99-01-01", "99-12-31") y0 <- create_iso8601(x, .format = "y-m-d", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("1999 01 01", "2000 01 01", "99 01 01", "99 12 31") y0 <- create_iso8601(x, .format = "y m d", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) }) test_that("`create_iso8601()`: times: hours and minutes", { @@ -36,27 +36,27 @@ test_that("`create_iso8601()`: times: hours and minutes", { x <- c("1520", "0010", "2301", "0000") y0 <- create_iso8601(x, .format = "HM", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("15:20", "00:10", "23:01", "00:00") y0 <- create_iso8601(x, .format = "H:M", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("15h20", "00h10", "23h01", "00h00") y0 <- create_iso8601(x, .format = "HhM", .check_format = FALSE) - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) }) test_that("`create_iso8601()`: times: hours, minutes and seconds", { x <- c("152000", "001059", "230112.123", "00002.") y0 <- create_iso8601(x, .format = "HMS", .check_format = FALSE) y1 <- c("-----T15:20:00", "-----T00:10:59", "-----T23:01:12.123", "-----T00:00:02") - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) x <- c("15:20:00", "00:10:59", "23:01:12.123", "00:00:2.", "5:1:4") y0 <- create_iso8601(x, .format = "H:M:S", .check_format = FALSE) y1 <- c(y1, "-----T05:01:04") - expect_identical(y0, y1) + expect_identical(as.character(y0), y1) }) @@ -71,5 +71,5 @@ test_that("`create_iso8601()`: dates and times", { "1999-01-01T23:01", "1999-12-31T00:00" ) - expect_identical(iso8601_dttm, expectation) + expect_identical(as.character(iso8601_dttm), expectation) }) diff --git a/tests/testthat/test-parse_dttm.R b/tests/testthat/test-parse_dttm.R index 7ebbcf7e..8da7ca9a 100644 --- a/tests/testthat/test-parse_dttm.R +++ b/tests/testthat/test-parse_dttm.R @@ -51,4 +51,3 @@ test_that("`months_abb_regex()`: lowercase", { ) expect_identical(months_abb_regex(case = "lower"), x) }) -