Skip to content

Commit

Permalink
Closes #29
Browse files Browse the repository at this point in the history
The `problems()` is introduced that allows easy retrieval of what went wrong with the parsing by `create_iso8601()`
  • Loading branch information
ramiromagno committed Dec 13, 2023
1 parent e756942 commit 22d69ae
Show file tree
Hide file tree
Showing 7 changed files with 174 additions and 59 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
S3method(print,iso8601)
export(create_iso8601)
export(fmt_cmp)
export(problems)
importFrom(rlang,.data)
importFrom(tibble,tibble)
93 changes: 51 additions & 42 deletions R/dtc_create_iso8601.R
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ format_iso8601 <- function(m, .cutoff_2000 = 68L) {
#' meaning to check against a selection of validated formats in
#' [dtc_formats][sdtm.oak::dtc_formats]; or to have a more permissible
#' interpretation of the formats.
#' @param .warn Whether to warn about parsing failures.
#'
#' @examples
#' # Converting dates
Expand Down Expand Up @@ -403,50 +404,58 @@ create_iso8601 <-
.cutoff_2000 = 68L,
.check_format = FALSE,
.warn = TRUE) {

assert_fmt_c(.fmt_c)
admiraldev::assert_logical_scalar(.check_format)
admiraldev::assert_logical_scalar(.warn)

dots <- rlang::dots_list(...)

if (rlang::is_empty(dots)) {
return(character())
}

# Check if all vectors in `dots` are of character type.
if (!identical(unique(sapply(dots, typeof)), "character")) {
rlang::abort("All vectors in `...` must be of type character.")
}

# Check if all vectors in `dots` are of the same length.
n <- unique(lengths(dots))
if (!identical(length(n), 1L)) {
rlang::abort("All vectors in `...` must be of the same length.")
}

if (!identical(length(dots), length(.format))) {
rlang::abort("Number of vectors in `...` should match length of `.format`.")
}

# Check that the `.format` is either a character vector or a list of
# character vectors, and that each string is one of the possible formats.
if (.check_format) assert_dtc_format(.format)

cap_matrices <- purrr::map2(dots, .format, ~ parse_dttm(dttm = .x, fmt = .y, na = .na, fmt_c = .fmt_c))
cap_matrix <- coalesce_capture_matrices(!!!cap_matrices)

iso8601 <- format_iso8601(cap_matrix, .cutoff_2000 = .cutoff_2000)
iso8601 <- add_problems(iso8601, dots)
class(iso8601) <- "iso8601"

if (.warn) {
warn_problems(iso8601)
assert_fmt_c(.fmt_c)
admiraldev::assert_logical_scalar(.check_format)
admiraldev::assert_logical_scalar(.warn)

dots <- rlang::dots_list(...)

if (rlang::is_empty(dots)) {
return(character())
}

# Check if all vectors in `dots` are of character type.
if (!identical(unique(sapply(dots, typeof)), "character")) {
rlang::abort("All vectors in `...` must be of type character.")
}

# Check if all vectors in `dots` are of the same length.
n <- unique(lengths(dots))
if (!identical(length(n), 1L)) {
rlang::abort("All vectors in `...` must be of the same length.")
}

if (!identical(length(dots), length(.format))) {
rlang::abort("Number of vectors in `...` should match length of `.format`.")
}

# Check that the `.format` is either a character vector or a list of
# character vectors, and that each string is one of the possible formats.
if (.check_format)
assert_dtc_format(.format)

cap_matrices <-
purrr::map2(dots,
.format,
~ parse_dttm(
dttm = .x,
fmt = .y,
na = .na,
fmt_c = .fmt_c
))
cap_matrix <- coalesce_capture_matrices(!!!cap_matrices)

iso8601 <- format_iso8601(cap_matrix, .cutoff_2000 = .cutoff_2000)
iso8601 <- add_problems(iso8601, dots)
class(iso8601) <- "iso8601"

if (.warn && rlang::is_interactive()) {
warn_problems(iso8601)
}

iso8601
}

iso8601
}

#' @export
print.iso8601 <- function(x, ...) {
# Here we take advantage of the subset operator `[` dropping
Expand Down
55 changes: 51 additions & 4 deletions R/dtc_problems.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,54 @@ add_problems <- function(x, dtc) {
x
}

#' Retrieve date/time parsing problems
#'
#' [problems()] is a companion helper function to [create_iso8601()]. It
#' retrieves ISO 8601 parsing problems from an object of class iso8601, which is
#' [create_iso8601()]'s return value and that might contain a `problems`
#' attribute in case of parsing failures. [problems()] is a helper function that
#' provides easy access to these parsing problems.
#'
#' @param x An object of class iso8601, as typically obtained from a call to
#' [create_iso8601()]. The argument can also be left empty, in that case it
#' `problems()` will use the last returned value, making it convenient to use
#' immediately after [create_iso8601()].
#'
#' @returns If there are no parsing problems in `x`, then the returned value is
#' `NULL`; otherwise, a [tibble][tibble::tibble-package] of parsing failures
#' is returned. Each row corresponds to a parsing problem. There will be a
#' first column named `..i` indicating the position(s) in the inputs to the
#' [create_iso8601()] call that resulted in failures; remaining columns
#' correspond to the original input values passed on to [create_iso8601()],
#' with columns being automatically named `..var1`, `..var2`, and so on, if
#' the inputs to [create_iso8601()] were unnamed, otherwise, the original
#' variable names are used instead.
#'
#' @examples
#' dates <-
#' c(
#' "2020-01-01",
#' "2020-02-11",
#' "2020-01-06",
#' "2020-0921",
#' "2020/10/30",
#' "2020-12-05",
#' "20231225"
#' )
#'
#' #' # By inspect the problematic dates it can be understood that
#' # the `.format` parameter needs to update to include other variations.
#' iso8601_dttm <- create_iso8601(dates, .format = "y-m-d")
#' problems(iso8601_dttm)
#'
#' # Including more parsing formats addresses the previous problems
#' formats <- c("y-m-d", "y-md", "y/m/d", "ymd")
#' iso8601_dttm2 <- create_iso8601(dates, .format = list(formats))
#'
#' # So now `problems()` returns `NULL` because there are no more parsing issues.
#' problems(iso8601_dttm2)
#'
#' @export
problems <- function(x = .Last.value) {
probs <- attr(x, "problems")
if (!is.null(probs)) {
Expand All @@ -43,14 +91,13 @@ n_problems <- function(x) {

warn_problems <- function(x) {
n_probs <- n_problems(x)
if (n_probs > 0) {
if (n_probs > 0L) {
msg <- paste(
sprintf("There were parsing %d problems.", n_probs),
"Run `problems()` on parsed results for details."
)
rlang::warn(msg)
invisible(NULL)
} else {
invisible(NULL)
}

invisible(NULL)
}
2 changes: 2 additions & 0 deletions man/create_iso8601.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

57 changes: 57 additions & 0 deletions man/problems.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 12 additions & 12 deletions tests/testthat/test-create_iso8601.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,61 +2,61 @@ test_that("`create_iso8601()`: individual date components", {
x <- c("0", "50", "1950", "80", "1980", "2000")
y0 <- create_iso8601(x, .format = "y", .check_format = FALSE)
y1 <- c(NA, "2050", "1950", "1980", "1980", "2000")
expect_identical(y0, y1)
expect_identical(as.character(y0), y1)

x <- c("0", "jan", "JAN", "JaN", "1", "01")
y0 <- create_iso8601(x, .format = "m", .check_format = FALSE)
y1 <- c(NA, "--01", "--01", "--01", NA, "--01")
expect_identical(y0, y1)
expect_identical(as.character(y0), y1)

x <- c("0", "00", "1", "01", "10", "31")
y0 <- create_iso8601(x, .format = "d", .check_format = FALSE)
y1 <- c("----00", "----00", "----01", "----01", "----10", "----31")
expect_identical(y0, y1)
expect_identical(as.character(y0), y1)
})

test_that("`create_iso8601()`: dates", {
y1 <- c("1999-01-01", "2000-01-01", "1999-01-01", "1999-12-31")

x <- c("19990101", "20000101", "990101", "991231")
y0 <- create_iso8601(x, .format = "ymd", .check_format = FALSE)
expect_identical(y0, y1)
expect_identical(as.character(y0), y1)

x <- c("1999-01-01", "2000-01-01", "99-01-01", "99-12-31")
y0 <- create_iso8601(x, .format = "y-m-d", .check_format = FALSE)
expect_identical(y0, y1)
expect_identical(as.character(y0), y1)

x <- c("1999 01 01", "2000 01 01", "99 01 01", "99 12 31")
y0 <- create_iso8601(x, .format = "y m d", .check_format = FALSE)
expect_identical(y0, y1)
expect_identical(as.character(y0), y1)
})

test_that("`create_iso8601()`: times: hours and minutes", {
y1 <- c("-----T15:20", "-----T00:10", "-----T23:01", "-----T00:00")

x <- c("1520", "0010", "2301", "0000")
y0 <- create_iso8601(x, .format = "HM", .check_format = FALSE)
expect_identical(y0, y1)
expect_identical(as.character(y0), y1)

x <- c("15:20", "00:10", "23:01", "00:00")
y0 <- create_iso8601(x, .format = "H:M", .check_format = FALSE)
expect_identical(y0, y1)
expect_identical(as.character(y0), y1)

x <- c("15h20", "00h10", "23h01", "00h00")
y0 <- create_iso8601(x, .format = "HhM", .check_format = FALSE)
expect_identical(y0, y1)
expect_identical(as.character(y0), y1)
})

test_that("`create_iso8601()`: times: hours, minutes and seconds", {
x <- c("152000", "001059", "230112.123", "00002.")
y0 <- create_iso8601(x, .format = "HMS", .check_format = FALSE)
y1 <- c("-----T15:20:00", "-----T00:10:59", "-----T23:01:12.123", "-----T00:00:02")
expect_identical(y0, y1)
expect_identical(as.character(y0), y1)

x <- c("15:20:00", "00:10:59", "23:01:12.123", "00:00:2.", "5:1:4")
y0 <- create_iso8601(x, .format = "H:M:S", .check_format = FALSE)
y1 <- c(y1, "-----T05:01:04")
expect_identical(y0, y1)
expect_identical(as.character(y0), y1)
})


Expand All @@ -71,5 +71,5 @@ test_that("`create_iso8601()`: dates and times", {
"1999-01-01T23:01",
"1999-12-31T00:00"
)
expect_identical(iso8601_dttm, expectation)
expect_identical(as.character(iso8601_dttm), expectation)
})
1 change: 0 additions & 1 deletion tests/testthat/test-parse_dttm.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,3 @@ test_that("`months_abb_regex()`: lowercase", {
)
expect_identical(months_abb_regex(case = "lower"), x)
})

0 comments on commit 22d69ae

Please sign in to comment.