Skip to content

Commit

Permalink
Handle Missing Data Codes (#182)
Browse files Browse the repository at this point in the history
* convert logical fields

* warn for missing categorical vals

* Update microbenchmark_results.csv

* consolidate warnings

* update docs
  • Loading branch information
ezraporter authored Mar 25, 2024
1 parent 49991f3 commit 85244b4
Show file tree
Hide file tree
Showing 23 changed files with 386 additions and 58 deletions.
1 change: 1 addition & 0 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ jobs:
REDCAPTIDIER_DAG_API: ${{ secrets.REDCAPTIDIER_DAG_API }}
REDCAPTIDIER_LONGITUDINAL_DAG_API: ${{ secrets.REDCAPTIDIER_LONGITUDINAL_DAG_API }}
REDCAPTIDIER_MIXED_STRUCTURE_API: ${{ secrets.REDCAPTIDIER_MIXED_STRUCTURE_API }}
REDCAPTIDIER_MDC_API: ${{ secrets.REDCAPTIDIER_MDC_API }}
steps:
- name: Update Ubuntu, Install cURL Headers, add Libraries
run: |
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ jobs:
REDCAPTIDIER_LARGE_SPARSE_API: ${{ secrets.REDCAPTIDIER_LARGE_SPARSE_API }}
REDCAPTIDIER_DAG_API: ${{ secrets.REDCAPTIDIER_DAG_API }}
REDCAPTIDIER_LONGITUDINAL_DAG_API: ${{ secrets.REDCAPTIDIER_LONGITUDINAL_DAG_API }}
REDCAPTIDIER_MDC_API: ${{ secrets.REDCAPTIDIER_MDC_API }}
steps:
- name: Update Ubuntu, Install cURL Headers, add Libraries
run: |
Expand Down
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ Imports:
formattable,
pillar,
vctrs,
readr
readr,
stats
Suggests:
covr,
knitr,
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,14 @@ importFrom(lubridate,is.difftime)
importFrom(lubridate,is.period)
importFrom(pillar,tbl_sum)
importFrom(purrr,compose)
importFrom(purrr,discard)
importFrom(purrr,flatten_chr)
importFrom(purrr,map)
importFrom(purrr,map2)
importFrom(purrr,map_int)
importFrom(purrr,map_lgl)
importFrom(purrr,pluck)
importFrom(purrr,pmap)
importFrom(purrr,pmap_chr)
importFrom(purrr,some)
importFrom(readr,parse_character)
Expand Down Expand Up @@ -102,6 +105,7 @@ importFrom(rlang,new_environment)
importFrom(rlang,quo_get_expr)
importFrom(rlang,try_fetch)
importFrom(rlang,zap)
importFrom(stats,na.omit)
importFrom(stringi,stri_split_fixed)
importFrom(stringr,str_detect)
importFrom(stringr,str_ends)
Expand Down
3 changes: 2 additions & 1 deletion R/REDCapTidieR-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' @importFrom formattable percent
#' @importFrom lobstr obj_size
#' @importFrom lubridate is.difftime is.period is.POSIXt is.Date
#' @importFrom purrr compose map map2 map_int map_lgl pluck pmap_chr some
#' @importFrom purrr compose map map2 map_int map_lgl pluck pmap_chr some pmap discard flatten_chr
#' @importFrom REDCapR redcap_arm_export redcap_event_instruments redcap_instruments
#' redcap_metadata_read redcap_read_oneshot sanitize_token
#' @importFrom rlang .data !!! abort as_closure caller_arg caller_env catch_cnd
Expand All @@ -27,6 +27,7 @@
#' @importFrom pillar tbl_sum
#' @importFrom readr parse_logical parse_integer parse_double parse_date parse_time
#' parse_datetime parse_character
#' @importFrom stats na.omit
"_PACKAGE"

## usethis namespace: start
Expand Down
82 changes: 82 additions & 0 deletions R/checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -577,3 +577,85 @@ check_file_exists <- function(file, overwrite, call = caller_env()) {
)
}
}

#' @title
#' Parse logical field and compile data for warning if parsing errors occurred
#'
#' @param x vector to parse
#'
#' @keywords internal
check_field_is_logical <- function(x) {
out <- list(parsed = NULL, problems = NULL)
# If already logical just return it
if (is.logical(x)) {
out$parsed <- x
return(out)
}
# Parse
cnd <- NULL
out$parsed <- withCallingHandlers(
{
parse_logical(as.character(x))
},
warning = function(w) {
cnd <<- w
cnd_muffle(w)
}
)
# Check for parsing failures and warn if found
probs <- attr(out$parsed, "problems")
if (!is.null(probs)) {
if (!getOption("redcaptidier.allow.mdc", FALSE)) {
out$problems <- unique(probs$actual)
}
attr(out$parsed, "problems") <- NULL
} else if (!is.null(cnd)) {
# If there was some other warning we didn't mean to catch it, so re-raise
cli_warn(cnd)
}
out
}

#' @title
#' Check data field for field values not in metadata
#'
#' @param x data field
#' @param values expected field values
#'
#' @keywords internal
check_extra_field_values <- function(x, values) {
extra_vals <- setdiff(as.character(x), values) |> na.omit()
if (length(extra_vals) == 0) {
return(NULL)
}
as.character(extra_vals)
}

check_extra_field_values_message <- function(extra_field_values, call = caller_env()) {
extra_field_values <- extra_field_values |>
discard(is.null)

if (length(extra_field_values) == 0) {
return(NULL)
}

fields <- names(extra_field_values)
values <- flatten_chr(extra_field_values) |> unique()

msg <- c(
`!` = "{.code {fields}} contain{?s/} values with no labels: {values}",
i = "These were converted to {.code NA} resulting in possible data loss",
i = "Does your REDCap project utilize missing data codes?",
i = paste(
"Silence this warning with {.code options(redcaptidier.allow.mdc = TRUE)} or",
"set {.code raw_or_label = 'raw'} to access missing data codes"
)
)
cli_warn(
msg,
class = c("extra_field_values", "REDCapTidieR_cond"),
call = call,
fields = fields,
values = values
)
}
2 changes: 1 addition & 1 deletion R/read_redcap.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
#' @param allow_mixed_structure A logical to allow for support of mixed repeating/non-repeating
#' instruments. Setting to `TRUE` will treat the mixed instrument's non-repeating versions
#' as repeating instruments with a single instance. Applies to longitudinal projects
#' only. Default `FALSE`. Can be set globally with `options(redcaptidier.allow.mixed.structure = FALSE)`.
#' only. Default `FALSE`. Can be set globally with `options(redcaptidier.allow.mixed.structure = TRUE)`.
#'
#' @examples
#' \dontrun{
Expand Down
73 changes: 67 additions & 6 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -397,11 +397,12 @@ update_data_col_names <- function(db_data, db_metadata) {
#'
#' @param db_data A REDCap database object
#' @param db_metadata A REDCap metadata object
#' @param call call for conditions
#' @inheritParams read_redcap
#'
#' @keywords internal

multi_choice_to_labels <- function(db_data, db_metadata, raw_or_label = "label") {
multi_choice_to_labels <- function(db_data, db_metadata, raw_or_label = "label", call = caller_env()) {
if (raw_or_label == "label") {
label_handler <- apply_labs_factor
} else if (raw_or_label == "haven") {
Expand All @@ -426,12 +427,11 @@ multi_choice_to_labels <- function(db_data, db_metadata, raw_or_label = "label")

# Logical Column Handling ----
# Handle columns where we change 0/1 to FALSE/TRUE (logical)
logical_cols <- db_metadata %>%
filter(.data$field_type %in% c("yesno", "truefalse", "checkbox")) %>%
pull(.data$field_name_updated)
db_data <- parse_logical_cols(db_data, db_metadata, call = call)

db_data <- db_data %>%
mutate(across(.cols = all_of(logical_cols), as.logical))
# Buffer for fields with extra field values to be populated by check_extra_field_values
extra_field_values <- vector("list", length = nrow(db_metadata))
names(extra_field_values) <- db_metadata$field_name_updated

for (i in seq_len(nrow(db_metadata))) {
# Extract metadata field name and database corresponding column name
Expand Down Expand Up @@ -470,6 +470,13 @@ multi_choice_to_labels <- function(db_data, db_metadata, raw_or_label = "label")
warn_stripped_text = stripped_text_flag
)

if (!getOption("redcaptidier.allow.mdc", FALSE)) {
extra_field_values[i] <- check_extra_field_values(
db_data[[field_name]],
names(parse_labels_output)
)
}

# Replace values from db_data$(field_name) with label values from
# parse_labels key

Expand All @@ -480,9 +487,63 @@ multi_choice_to_labels <- function(db_data, db_metadata, raw_or_label = "label")
)
}
}

check_extra_field_values_message(extra_field_values, call = call)

db_data
}

#' @title
#' Convert yesno, truefalse, and checkbox fields to logical
#'
#' @inheritParams multi_choice_to_labels
#'
#' @keywords internal
parse_logical_cols <- function(db_data, db_metadata, call = caller_env()) {
logical_cols <- db_metadata %>%
filter(.data$field_type %in% c("yesno", "truefalse", "checkbox"))

if (nrow(logical_cols) == 0) {
return(db_data)
}

parsed <- map(db_data[logical_cols$field_name_updated], check_field_is_logical)

out <- db_data

out[logical_cols$field_name_updated] <- map(parsed, "parsed")

if (!getOption("redcaptidier.allow.mdc", FALSE)) {
problems <- parsed |>
map("problems") |>
discard(is.null)

if (length(problems) > 0) {
fields <- names(problems)
values <- flatten_chr(problems) |> unique()

msg <- c(
`!` = "{.code {fields}} {?is/are} logical but contain{?s/} non-logical values: {values}",
i = "These were converted to {.code NA} resulting in possible data loss",
i = "Does your REDCap project utilize missing data codes?",
i = paste(
"Silence this warning with {.code options(redcaptidier.allow.mdc = TRUE)} or",
"set {.code raw_or_label = 'raw'} to access missing data codes"
)
)
cli_warn(
msg,
class = c("field_is_logical", "REDCapTidieR_cond"),
call = call,
fields = fields,
problems = values
)
}
}

out
}

#' @title
#' Apply factor labels to a vector
#'
Expand Down
5 changes: 4 additions & 1 deletion man/apply_labs_factor.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/apply_labs_haven.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/check_extra_field_values.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions man/check_field_is_logical.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion man/multi_choice_to_labels.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/parse_logical_cols.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/read_redcap.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 85244b4

Please sign in to comment.