diff --git a/DESCRIPTION b/DESCRIPTION index 150c6859..b808e7cf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,86 +1,62 @@ Package: xportr Title: Utilities to Output CDISC SDTM/ADaM XPT Files -Version: 0.3.1.9000 -Authors@R: - c( - person(given = "Eli", - family = "Miller", - role = c("aut", "cre"), - email = "Eli.Miller@AtorusResearch.com", +Version: 0.3.1.9007 +Authors@R: c( + person("Eli", "Miller", , "Eli.Miller@AtorusResearch.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2127-9456")), - person(given = "Vignesh ", - family = "Thanikachalam", - role = c("aut")), - person(given = "Ben", - family = "Straub", - role = ("aut")), - person(given = "Ross", - family = "Didenko", - role = ("aut")), - person(given = "Zelos", - family = "Zhu", - role = ("aut")), - person(given = "Ethan", - family = "Brockmann", - role = ("aut")), - person(given = "Vedha", - family = "Viyash", - role = ("aut")), - person(given = "Andre", - family = "Verissimo", - role = ("aut")), - person(given = "Sophie", - family = "Shapcott", - role = ("aut")), - person(given = "Celine", - family = "Piraux", - role = ("aut")), - person(given = "Adrian", - family = "Chan", - role = ("aut")), - person(given = "Sadchla", - family = "Mascary", - role = ("aut")), - person(given = "Atorus/GSK JPT", - role = "cph")) -Description: Tools to build CDISC compliant data sets and check for CDISC compliance. + person("Vignesh ", "Thanikachalam", role = "aut"), + person("Ben", "Straub", role = "aut"), + person("Ross", "Didenko", role = "aut"), + person("Zelos", "Zhu", role = "aut"), + person("Ethan", "Brockmann", role = "aut"), + person("Vedha", "Viyash", role = "aut"), + person("Andre", "Verissimo", role = "aut"), + person("Sophie", "Shapcott", role = "aut"), + person("Celine", "Piraux", role = "aut"), + person("Adrian", "Chan", role = "aut"), + person("Sadchla", "Mascary", role = "aut"), + person("Atorus/GSK JPT", role = "cph") + ) +Description: Tools to build CDISC compliant data sets and check for CDISC + compliance. +License: MIT + file LICENSE URL: https://github.com/atorus-research/xportr BugReports: https://github.com/atorus-research/xportr/issues +Depends: + R (>= 3.5) Imports: checkmate, + cli, dplyr (>= 1.0.2), - purrr (>= 0.3.4), - stringr (>= 1.4.0), - magrittr, glue (>= 1.4.2), + haven (>= 2.5.0), + janitor, + lifecycle, + magrittr, + purrr (>= 0.3.4), + readr, rlang (>= 0.4.10), - cli, + stringr (>= 1.4.0), tidyselect, - readr, - janitor, - tm, - haven (>= 2.5.0), - lifecycle -License: MIT + file LICENSE -Encoding: UTF-8 -LazyData: true -Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 + tm Suggests: - testthat (>= 3.0.0), - withr, - knitr, - rmarkdown, - readxl, - DT, - labelled, admiral, devtools, + DT, + knitr, + labelled, + lintr, + metacore, + readxl, + rmarkdown, spelling, + testthat (>= 3.0.0), usethis, - lintr, - metacore + withr +VignetteBuilder: + knitr Config/testthat/edition: 3 -VignetteBuilder: knitr -Depends: - R (>= 3.5) +Encoding: UTF-8 +LazyData: true +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.2.3 diff --git a/NAMESPACE b/NAMESPACE index e07dd8b3..41d143e8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,7 +6,6 @@ export(type_log) export(var_names_log) export(var_ord_msg) export(xportr_df_label) -export(xportr_domain_name) export(xportr_format) export(xportr_label) export(xportr_length) @@ -37,6 +36,7 @@ importFrom(cli,cli_alert_success) importFrom(cli,cli_div) importFrom(cli,cli_h2) importFrom(cli,cli_text) +importFrom(dplyr,across) importFrom(dplyr,arrange) importFrom(dplyr,bind_cols) importFrom(dplyr,case_when) @@ -75,6 +75,7 @@ importFrom(stringr,str_replace) importFrom(stringr,str_replace_all) importFrom(tidyselect,all_of) importFrom(tidyselect,any_of) +importFrom(tidyselect,where) importFrom(tm,stemDocument) importFrom(utils,capture.output) importFrom(utils,packageVersion) diff --git a/NEWS.md b/NEWS.md index 5dfd35bf..54bb342b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,11 +2,24 @@ ## New Features and Bug Fixes +* `xportr_write()` now accepts `metadata` argument which can be used to set the dataset label to stay consistent with the other `xportr_*` functions. It is noteworthy that the dataset label set using the `xportr_df_label()` function will be retained during the `xportr_write()`. +* Exporting a new dataset `dataset_spec` that contains the Dataset Specification for ADSL. (#179) +* Added a check for character variable lengths up to 200 bytes in `xpt_validate()`(#91, #189). +* File name check is moved to strict_checks condition to allow underscores in the file name. Underscores are allowed in xpt but not per FDA requirements. (#126) * Adds argument assertions to public functions using `{checkmate}` (#175) +## Deprecation and Breaking Changes + +* The `domain` argument for xportr functions will no longer be dynamically +determined by the name of the data frame passed as the .df argument. This was +done to make the use of xportr functions more explicit. (#182) +* The `label` argument from the `xportr_write()` function is deprecated in favor of the `metadata` argument. (#179) +* The `metacore` argument, which was renamed to `metadata` in the following six xportr functions: (`xportr_df_label()`, `xportr_format()`, `xportr_label()`, `xportr_length()`, `xportr_order()`, and `xportr_type()`) in version `0.3.0` with a soft deprecation warning, has now been hard deprecated. Please update your code to use the new `metadata` argument in place of `metacore`. + ## Documentation -## Deprecation and Breaking Changes +* Created development version of the website (#187) +* Additional guidance for options added in deep dive vignette (#81) # xportr 0.3.1 diff --git a/R/data.R b/R/data.R index 96e24de2..ca83a2a6 100644 --- a/R/data.R +++ b/R/data.R @@ -56,7 +56,7 @@ #' } "adsl" -#' Example Dataset Specification +#' Example Dataset Variable Specification #' #' @format ## `var_spec` #' A data frame with 216 rows and 19 columns: @@ -82,3 +82,20 @@ #' \item{Developer Notes}{Developer Notes} #' } "var_spec" + +#' Example Dataset Specification +#' +#' @format ## `dataset_spec` +#' A data frame with 1 row and 9 columns: +#' \describe{ +#' \item{Dataset}{ Dataset} +#' \item{Description}{ Dataset description} +#' \item{Class}{ Dataset class} +#' \item{Structure}{ Logical, indicating if there's a specific structure} +#' \item{Purpose}{ Purpose of the dataset} +#' \item{Key, Variables}{ Join Key variables in the dataset} +#' \item{Repeating}{ Indicates if the dataset is repeating} +#' \item{Reference Data}{ Regerence Data} +#' \item{Comment}{ Additional comment} +#' } +"dataset_spec" diff --git a/R/df_label.R b/R/df_label.R index d63b791f..35d08318 100644 --- a/R/df_label.R +++ b/R/df_label.R @@ -44,12 +44,11 @@ xportr_df_label <- function(.df, domain = attr(.df, "_xportr.df_arg_"), metacore = deprecated()) { if (!missing(metacore)) { - lifecycle::deprecate_warn( - when = "0.3.0", + lifecycle::deprecate_stop( + when = "0.3.1.9005", what = "xportr_df_label(metacore = )", with = "xportr_df_label(metadata = )" ) - metadata <- metacore } assert_data_frame(.df) assert_string(domain, null.ok = TRUE) @@ -72,6 +71,10 @@ xportr_df_label <- function(.df, abort("Length of dataset label must be 40 characters or less.") } + if (stringr::str_detect(label, "[^[:ascii:]]")) { + abort("`label` cannot contain any non-ASCII, symbol or special characters.") + } + attr(.df, "label") <- label .df diff --git a/R/format.R b/R/format.R index 00fc692b..bf4b9ebc 100644 --- a/R/format.R +++ b/R/format.R @@ -46,12 +46,11 @@ xportr_format <- function(.df, domain = attr(.df, "_xportr.df_arg_"), metacore = deprecated()) { if (!missing(metacore)) { - lifecycle::deprecate_warn( - when = "0.3.0", + lifecycle::deprecate_stop( + when = "0.3.1.9005", what = "xportr_format(metacore = )", with = "xportr_format(metadata = )" ) - metadata <- metacore } assert_data_frame(.df) assert_string(domain, null.ok = TRUE) @@ -65,7 +64,7 @@ xportr_format <- function(.df, if (inherits(metadata, "Metacore")) metadata <- metadata$var_spec - if (domain_name %in% names(metadata)) { + if (domain_name %in% names(metadata) && !is.null(domain)) { metadata <- metadata %>% dplyr::filter(!!sym(domain_name) == domain & !is.na(!!sym(format_name))) } else { diff --git a/R/label.R b/R/label.R index 5667ae92..158b8e90 100644 --- a/R/label.R +++ b/R/label.R @@ -62,12 +62,11 @@ xportr_label <- function(.df, verbose = getOption("xportr.label_verbose", "none"), metacore = deprecated()) { if (!missing(metacore)) { - lifecycle::deprecate_warn( - when = "0.3.0", + lifecycle::deprecate_stop( + when = "0.3.1.9005", what = "xportr_label(metacore = )", with = "xportr_label(metadata = )" ) - metadata <- metacore } assert_data_frame(.df) assert_string(domain, null.ok = TRUE) @@ -82,7 +81,7 @@ xportr_label <- function(.df, if (inherits(metadata, "Metacore")) metadata <- metadata$var_spec - if (domain_name %in% names(metadata)) { + if (domain_name %in% names(metadata) && !is.null(domain)) { metadata <- metadata %>% dplyr::filter(!!sym(domain_name) == domain) } else { diff --git a/R/length.R b/R/length.R index 5202345d..eb8c635e 100644 --- a/R/length.R +++ b/R/length.R @@ -9,8 +9,8 @@ #' @param metadata A data frame containing variable level metadata. See #' 'Metadata' section for details. #' @param domain Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset -#' the metadata object. If none is passed, then name of the dataset passed as -#' .df will be used. +#' the metadata object. If none is passed, then [xportr_metadata()] must be +#' called before hand to set the domain as an attribute of `.df`. #' @param verbose The action this function takes when an action is taken on the #' dataset or function validation finds an issue. See 'Messaging' section for #' details. Options are 'stop', 'warn', 'message', and 'none' @@ -69,12 +69,11 @@ xportr_length <- function(.df, verbose = getOption("xportr.length_verbose", "none"), metacore = deprecated()) { if (!missing(metacore)) { - lifecycle::deprecate_warn( - when = "0.3.0", + lifecycle::deprecate_stop( + when = "0.3.1.9005", what = "xportr_length(metacore = )", with = "xportr_length(metadata = )" ) - metadata <- metacore } assert_data_frame(.df) assert_string(domain, null.ok = TRUE) @@ -89,7 +88,7 @@ xportr_length <- function(.df, if (inherits(metadata, "Metacore")) metadata <- metadata$var_spec - if (domain_name %in% names(metadata)) { + if (domain_name %in% names(metadata) && !is.null(domain)) { metadata <- metadata %>% filter(!!sym(domain_name) == domain) } else { diff --git a/R/metadata.R b/R/metadata.R index 6be201d7..d19b60a8 100644 --- a/R/metadata.R +++ b/R/metadata.R @@ -1,9 +1,10 @@ #' Set variable specifications and domain #' -#' Sets metadata for a dataset in a way that can be accessed by other xportr -#' functions. If used at the start of an xportr pipeline, it removes the need to -#' set metadata and domain at each step individually. For details on the format -#' of the metadata, see the 'Metadata' section for each function in question. +#' Sets metadata and/or domain for a dataset in a way that can be accessed by +#' other xportr functions. If used at the start of an xportr pipeline, it +#' removes the need to set metadata and domain at each step individually. For +#' details on the format of the metadata, see the 'Metadata' section for each +#' function in question. #' #' @inheritParams xportr_length #' @@ -35,36 +36,21 @@ #' library(magrittr) #' #' adlb %>% -#' xportr_domain_name("adlb") %>% #' xportr_metadata(metadata, "test") %>% #' xportr_type() %>% #' xportr_order() #' } -xportr_metadata <- function(.df, metadata, domain = attr(.df, "_xportr.df_arg_")) { +xportr_metadata <- function(.df, metadata = NULL, domain = NULL) { assert_data_frame(.df) - assert_metadata(metadata, include_fun_message = FALSE) + if (is.null(metadata) && is.null(domain)) { + stop("Assertion failed on `metadata` and `domain`: Must provide either `metadata` or `domain` argument") + } + assert_metadata(metadata, include_fun_message = FALSE, null.ok = TRUE) assert_string(domain, null.ok = TRUE) + + ## Common section to detect domain from argument or attribute if (!is.null(domain)) .df <- xportr_domain_name(.df, domain) structure(.df, "_xportr.df_metadata_" = metadata) } - - -#' Update Metadata Domain Name -#' -#' Similar to `xportr_metadata`, but just added the domain and not the metadata. -#' -#' @inheritParams xportr_length -#' -#' @return `.df` dataset with domain argument set -#' @export -#' -#' @rdname metadata -xportr_domain_name <- function(.df, domain) { - assert_data_frame(.df) - assert_string(domain) - attr(.df, "_xportr.df_arg_") <- domain - - .df -} diff --git a/R/order.R b/R/order.R index e9cf0406..686c8db3 100644 --- a/R/order.R +++ b/R/order.R @@ -65,12 +65,11 @@ xportr_order <- function(.df, verbose = getOption("xportr.order_verbose", "none"), metacore = deprecated()) { if (!missing(metacore)) { - lifecycle::deprecate_warn( - when = "0.3.0", + lifecycle::deprecate_stop( + when = "0.3.1.9005", what = "xportr_order(metacore = )", with = "xportr_order(metadata = )" ) - metadata <- metacore } assert_data_frame(.df) assert_string(domain, null.ok = TRUE) @@ -85,7 +84,7 @@ xportr_order <- function(.df, if (inherits(metadata, "Metacore")) metadata <- metadata$ds_vars - if (domain_name %in% names(metadata)) { + if (domain_name %in% names(metadata) && !is.null(domain)) { metadata <- metadata %>% dplyr::filter(!!sym(domain_name) == domain & !is.na(!!sym(order_name))) } else { diff --git a/R/support-test.R b/R/support-test.R index b81fba3d..d223a6d6 100644 --- a/R/support-test.R +++ b/R/support-test.R @@ -180,7 +180,7 @@ multiple_vars_in_spec_helper2 <- function(FUN) { local_cli_theme() adsl %>% - xportr_domain_name("adsl") %>% + xportr_metadata(domain = "adsl") %>% FUN(metadata) %>% testthat::expect_no_message(message = "There are multiple specs for the same variable name") } diff --git a/R/type.R b/R/type.R index a54064a5..b7943c0f 100644 --- a/R/type.R +++ b/R/type.R @@ -82,12 +82,11 @@ xportr_type <- function(.df, verbose = getOption("xportr.type_verbose", "none"), metacore = deprecated()) { if (!missing(metacore)) { - lifecycle::deprecate_warn( - when = "0.3.0", + lifecycle::deprecate_stop( + when = "0.3.1.9005", what = "xportr_type(metacore = )", with = "xportr_type(metadata = )" ) - metadata <- metacore } assert_data_frame(.df) assert_string(domain, null.ok = TRUE) @@ -106,7 +105,7 @@ xportr_type <- function(.df, if (inherits(metadata, "Metacore")) metadata <- metadata$var_spec - if (domain_name %in% names(metadata)) { + if (domain_name %in% names(metadata) && !is.null(domain)) { metadata <- metadata %>% filter(!!sym(domain_name) == domain) } diff --git a/R/utils-xportr.R b/R/utils-xportr.R index 9c9edace..b59c1d78 100644 --- a/R/utils-xportr.R +++ b/R/utils-xportr.R @@ -304,6 +304,18 @@ xpt_validate <- function(data) { glue("{fmt_fmts(names(chk_formats))} must have a valid format.") ) } + + # 4.0 max length of Character variables <= 200 bytes + max_nchar <- data %>% + summarize(across(where(is.character), ~ max(nchar(., type = "bytes")))) + nchar_gt_200 <- max_nchar[which(max_nchar > 200)] + if (length(nchar_gt_200) > 0) { + err_cnd <- c( + err_cnd, + glue("Length of {names(nchar_gt_200)} must be 200 bytes or less.") + ) + } + return(err_cnd) } diff --git a/R/write.R b/R/write.R index 561ec512..eab886d4 100644 --- a/R/write.R +++ b/R/write.R @@ -7,10 +7,12 @@ #' @param .df A data frame to write. #' @param path Path where transport file will be written. File name sans will be #' used as `xpt` name. -#' @param label Dataset label. It must be <=40 characters. +#' @param label `r lifecycle::badge("deprecated")` Previously used to to set the Dataset label. +#' Use the `metadata` argument to set the dataset label. #' @param strict_checks If TRUE, xpt validation will report errors and not write #' out the dataset. If FALSE, xpt validation will report warnings and continue #' with writing out the dataset. Defaults to FALSE +#' @inheritParams xportr_length #' #' @details #' * Variable and dataset labels are stored in the "label" attribute. @@ -32,39 +34,58 @@ #' Param = c("param1", "param2", "param3") #' ) #' +#' var_spec <- data.frame(dataset = "adsl", label = "Subject-Level Analysis Dataset") #' xportr_write(adsl, #' path = paste0(tempdir(), "/adsl.xpt"), -#' label = "Subject-Level Analysis", +#' domain = "adsl", +#' metadata = var_spec, #' strict_checks = FALSE #' ) #' -xportr_write <- function(.df, path, label = NULL, strict_checks = FALSE) { +xportr_write <- function(.df, + path, + metadata = NULL, + domain = NULL, + strict_checks = FALSE, + label = deprecated()) { assert_data_frame(.df) assert_string(path) - assert_string(label, null.ok = TRUE, max.chars = 40) + assert_metadata(metadata) assert_logical(strict_checks) + assert_string(label, null.ok = TRUE, max.chars = 40) path <- normalizePath(path, mustWork = FALSE) name <- tools::file_path_sans_ext(basename(path)) - if (nchar(name) > 8) { - assert("File name must be 8 characters or less.", .var.name = "path") - } + ## Common section to detect domain from argument or attribute - if (stringr::str_detect(name, "[^a-zA-Z0-9]")) { - assert("File name must not contain any non-ASCII, symbol or underscore characters.", .var.name = "path") - } + domain <- get_domain(.df, domain) + if (!is.null(domain)) attr(.df, "_xportr.df_arg_") <- domain - if (!is.null(label)) { - if (stringr::str_detect(label, "[^[:ascii:]]")) { - assert("Must not contain any non-ASCII, symbol or special characters.", .var.name = "label") - } + ## End of common section - attr(.df, "label") <- label + if (!missing(label)) { + lifecycle::deprecate_warn( + when = "0.3.2", + what = "xportr_write(label = )", + with = "xportr_write(metadata = )" + ) + metadata <- data.frame(dataset = domain, label = label) + } + if (!is.null(metadata)) { + .df <- xportr_df_label(.df, metadata = metadata, domain = domain) } + if (nchar(name) > 8) { + assert(".df file name must be 8 characters or less.", .var.name = "path") + } + checks <- xpt_validate(.df) + + if (stringr::str_detect(name, "[^a-zA-Z0-9]")) { + checks <- c(checks, "`.df` cannot contain any non-ASCII, symbol or underscore characters.") + } if (length(checks) > 0) { if (!strict_checks) { diff --git a/R/xportr-package.R b/R/xportr-package.R index 54205767..e84c9457 100644 --- a/R/xportr-package.R +++ b/R/xportr-package.R @@ -95,11 +95,11 @@ #' @import rlang haven #' @importFrom dplyr left_join bind_cols filter select rename rename_with n #' everything arrange group_by summarize mutate ungroup case_when distinct -#' tribble if_else +#' tribble if_else across #' @importFrom glue glue glue_collapse #' @importFrom cli cli_alert_info cli_h2 cli_alert_success cli_div cli_text #' cli_alert_danger -#' @importFrom tidyselect all_of any_of +#' @importFrom tidyselect all_of any_of where #' @importFrom utils capture.output str tail packageVersion #' @importFrom stringr str_detect str_extract str_replace str_replace_all #' @importFrom readr parse_number diff --git a/README.Rmd b/README.Rmd index 385a2e01..2f422c1c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -19,6 +19,7 @@ library(fontawesome) # xportr +[](https://RValidationHub.slack.com) [![R build status](https://github.com/atorus-research/xportr/workflows/R-CMD-check/badge.svg)](https://github.com/atorus-research/xportr/actions?workflow=R-CMD-check) [](https://app.codecov.io/gh/atorus-research/xportr) [](https://github.com/atorus-research/xportr/blob/master/LICENSE) @@ -121,19 +122,23 @@ spec_path <- system.file(paste0("specs/", "ADaM_admiral_spec.xlsx"), package = " var_spec <- readxl::read_xlsx(spec_path, sheet = "Variables") %>% dplyr::rename(type = "Data Type") %>% rlang::set_names(tolower) +dataset_spec <- readxl::read_xlsx(spec_path, sheet = "Datasets") %>% + dplyr::rename(label = "Description") %>% + rlang::set_names(tolower) ``` Each `xportr_` function has been written in a way to take in a part of the specification file and apply that piece to the dataset. Setting `verbose = "warn"` will send appropriate warning message to the console. We have suppressed the warning for the sake of brevity. ```{r, warning = FALSE, message=FALSE, eval=TRUE} adsl %>% - xportr_domain_name("ADSL") %>% - xportr_type(var_spec, verbose = "warn") %>% - xportr_length(var_spec, verbose = "warn") %>% - xportr_label(var_spec, verbose = "warn") %>% - xportr_order(var_spec, verbose = "warn") %>% - xportr_format(var_spec) %>% - xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset") + xportr_metadata(var_spec, "ADSL") %>% + xportr_type(verbose = "warn") %>% + xportr_length(verbose = "warn") %>% + xportr_label(verbose = "warn") %>% + xportr_order(verbose = "warn") %>% + xportr_format() %>% + xportr_df_label(dataset_spec, "ADSL") %>% + xportr_write("adsl.xpt") ``` The `xportr_metadata()` function can reduce duplication by setting the variable specification and domain explicitly at the top of a pipeline. If you would like to use the `verbose` argument, you will need to set in each function call. @@ -146,7 +151,8 @@ adsl %>% xportr_label() %>% xportr_order() %>% xportr_format() %>% - xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset") + xportr_df_label(dataset_spec) %>% + xportr_write("adsl.xpt") ``` That's it! We now have a xpt file created in R with all appropriate types, lengths, labels, ordering and formats. Please check out the [Get Started](https://atorus-research.github.io/xportr/articles/xportr.html) for more information and detailed walk through of each `xportr_` function. diff --git a/README.md b/README.md index c1c5bd6f..bebb06c8 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ +[](https://RValidationHub.slack.com) [![R build status](https://github.com/atorus-research/xportr/workflows/R-CMD-check/badge.svg)](https://github.com/atorus-research/xportr/actions?workflow=R-CMD-check) [](https://app.codecov.io/gh/atorus-research/xportr) @@ -125,6 +126,9 @@ spec_path <- system.file(paste0("specs/", "ADaM_admiral_spec.xlsx"), package = " var_spec <- readxl::read_xlsx(spec_path, sheet = "Variables") %>% dplyr::rename(type = "Data Type") %>% rlang::set_names(tolower) +dataset_spec <- readxl::read_xlsx(spec_path, sheet = "Datasets") %>% + dplyr::rename(label = "Description") %>% + rlang::set_names(tolower) ``` Each `xportr_` function has been written in a way to take in a part of @@ -134,13 +138,14 @@ We have suppressed the warning for the sake of brevity. ``` r adsl %>% - xportr_domain_name("ADSL") %>% - xportr_type(var_spec, verbose = "warn") %>% - xportr_length(var_spec, verbose = "warn") %>% - xportr_label(var_spec, verbose = "warn") %>% - xportr_order(var_spec, verbose = "warn") %>% - xportr_format(var_spec) %>% - xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset") + xportr_metadata(var_spec, "ADSL") %>% + xportr_type(verbose = "warn") %>% + xportr_length(verbose = "warn") %>% + xportr_label(verbose = "warn") %>% + xportr_order(verbose = "warn") %>% + xportr_format() %>% + xportr_df_label(dataset_spec, "ADSL") %>% + xportr_write("adsl.xpt") ``` The `xportr_metadata()` function can reduce duplication by setting the @@ -156,7 +161,8 @@ adsl %>% xportr_label() %>% xportr_order() %>% xportr_format() %>% - xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset") + xportr_df_label(dataset_spec) %>% + xportr_write("adsl.xpt") ``` That’s it! We now have a xpt file created in R with all appropriate diff --git a/_pkgdown.yml b/_pkgdown.yml index 28abfbf0..dbeae1cc 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -48,6 +48,7 @@ reference: - contents: - adsl - var_spec + - dataset_spec articles: - title: ~ diff --git a/data/dataset_spec.rda b/data/dataset_spec.rda new file mode 100644 index 00000000..be9c31c4 Binary files /dev/null and b/data/dataset_spec.rda differ diff --git a/man/dataset_spec.Rd b/man/dataset_spec.Rd new file mode 100644 index 00000000..7ab0d370 --- /dev/null +++ b/man/dataset_spec.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{dataset_spec} +\alias{dataset_spec} +\title{Example Dataset Specification} +\format{ +\subsection{\code{dataset_spec}}{ + +A data frame with 1 row and 9 columns: +\describe{ +\item{Dataset}{\if{html}{\out{}} Dataset} +\item{Description}{\if{html}{\out{}} Dataset description} +\item{Class}{\if{html}{\out{}} Dataset class} +\item{Structure}{\if{html}{\out{}} Logical, indicating if there's a specific structure} +\item{Purpose}{\if{html}{\out{}} Purpose of the dataset} +\item{Key, Variables}{\if{html}{\out{}} Join Key variables in the dataset} +\item{Repeating}{\if{html}{\out{}} Indicates if the dataset is repeating} +\item{Reference Data}{\if{html}{\out{}} Regerence Data} +\item{Comment}{\if{html}{\out{}} Additional comment} +} +} +} +\usage{ +dataset_spec +} +\description{ +Example Dataset Specification +} +\keyword{datasets} diff --git a/man/metadata.Rd b/man/metadata.Rd index 52b4ca6b..658fe0a4 100644 --- a/man/metadata.Rd +++ b/man/metadata.Rd @@ -2,12 +2,9 @@ % Please edit documentation in R/metadata.R \name{xportr_metadata} \alias{xportr_metadata} -\alias{xportr_domain_name} \title{Set variable specifications and domain} \usage{ -xportr_metadata(.df, metadata, domain = attr(.df, "_xportr.df_arg_")) - -xportr_domain_name(.df, domain) +xportr_metadata(.df, metadata = NULL, domain = NULL) } \arguments{ \item{.df}{A data frame of CDISC standard.} @@ -16,21 +13,18 @@ xportr_domain_name(.df, domain) 'Metadata' section for details.} \item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset -the metadata object. If none is passed, then name of the dataset passed as -.df will be used.} +the metadata object. If none is passed, then \code{\link[=xportr_metadata]{xportr_metadata()}} must be +called before hand to set the domain as an attribute of \code{.df}.} } \value{ \code{.df} dataset with metadata and domain attributes set - -\code{.df} dataset with domain argument set } \description{ -Sets metadata for a dataset in a way that can be accessed by other xportr -functions. If used at the start of an xportr pipeline, it removes the need to -set metadata and domain at each step individually. For details on the format -of the metadata, see the 'Metadata' section for each function in question. - -Similar to \code{xportr_metadata}, but just added the domain and not the metadata. +Sets metadata and/or domain for a dataset in a way that can be accessed by +other xportr functions. If used at the start of an xportr pipeline, it +removes the need to set metadata and domain at each step individually. For +details on the format of the metadata, see the 'Metadata' section for each +function in question. } \examples{ @@ -55,7 +49,6 @@ if (rlang::is_installed("magrittr")) { library(magrittr) adlb \%>\% - xportr_domain_name("adlb") \%>\% xportr_metadata(metadata, "test") \%>\% xportr_type() \%>\% xportr_order() diff --git a/man/var_spec.Rd b/man/var_spec.Rd index 1b688c9c..5460c33d 100644 --- a/man/var_spec.Rd +++ b/man/var_spec.Rd @@ -3,7 +3,7 @@ \docType{data} \name{var_spec} \alias{var_spec} -\title{Example Dataset Specification} +\title{Example Dataset Variable Specification} \format{ \subsection{\code{var_spec}}{ @@ -35,6 +35,6 @@ A data frame with 216 rows and 19 columns: var_spec } \description{ -Example Dataset Specification +Example Dataset Variable Specification } \keyword{datasets} diff --git a/man/xportr_df_label.Rd b/man/xportr_df_label.Rd index f5d9833e..7285571b 100644 --- a/man/xportr_df_label.Rd +++ b/man/xportr_df_label.Rd @@ -18,8 +18,8 @@ xportr_df_label( details.} \item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset -the metadata object. If none is passed, then name of the dataset passed as -.df will be used.} +the metadata object. If none is passed, then \code{\link[=xportr_metadata]{xportr_metadata()}} must be +called before hand to set the domain as an attribute of \code{.df}.} \item{metacore}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Previously used to pass metadata now renamed with \code{metadata}} diff --git a/man/xportr_format.Rd b/man/xportr_format.Rd index ad0f24b2..05076f1e 100644 --- a/man/xportr_format.Rd +++ b/man/xportr_format.Rd @@ -18,8 +18,8 @@ xportr_format( 'Metadata' section for details.} \item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset -the metadata object. If none is passed, then name of the dataset passed as -.df will be used.} +the metadata object. If none is passed, then \code{\link[=xportr_metadata]{xportr_metadata()}} must be +called before hand to set the domain as an attribute of \code{.df}.} \item{metacore}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Previously used to pass metadata now renamed with \code{metadata}} diff --git a/man/xportr_label.Rd b/man/xportr_label.Rd index fc19a966..881d646f 100644 --- a/man/xportr_label.Rd +++ b/man/xportr_label.Rd @@ -19,8 +19,8 @@ xportr_label( 'Metadata' section for details.} \item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset -the metadata object. If none is passed, then name of the dataset passed as -.df will be used.} +the metadata object. If none is passed, then \code{\link[=xportr_metadata]{xportr_metadata()}} must be +called before hand to set the domain as an attribute of \code{.df}.} \item{verbose}{The action this function takes when an action is taken on the dataset or function validation finds an issue. See 'Messaging' section for diff --git a/man/xportr_length.Rd b/man/xportr_length.Rd index f7674540..ceecd8d0 100644 --- a/man/xportr_length.Rd +++ b/man/xportr_length.Rd @@ -19,8 +19,8 @@ xportr_length( 'Metadata' section for details.} \item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset -the metadata object. If none is passed, then name of the dataset passed as -.df will be used.} +the metadata object. If none is passed, then \code{\link[=xportr_metadata]{xportr_metadata()}} must be +called before hand to set the domain as an attribute of \code{.df}.} \item{verbose}{The action this function takes when an action is taken on the dataset or function validation finds an issue. See 'Messaging' section for diff --git a/man/xportr_order.Rd b/man/xportr_order.Rd index fbfb3213..53c1e6df 100644 --- a/man/xportr_order.Rd +++ b/man/xportr_order.Rd @@ -19,8 +19,8 @@ xportr_order( 'Metadata' section for details.} \item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset -the metadata object. If none is passed, then name of the dataset passed as -.df will be used.} +the metadata object. If none is passed, then \code{\link[=xportr_metadata]{xportr_metadata()}} must be +called before hand to set the domain as an attribute of \code{.df}.} \item{verbose}{The action this function takes when an action is taken on the dataset or function validation finds an issue. See 'Messaging' section for diff --git a/man/xportr_type.Rd b/man/xportr_type.Rd index e4b68e8c..168494e8 100644 --- a/man/xportr_type.Rd +++ b/man/xportr_type.Rd @@ -19,8 +19,8 @@ xportr_type( 'Metadata' section for details.} \item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset -the metadata object. If none is passed, then name of the dataset passed as -.df will be used.} +the metadata object. If none is passed, then \code{\link[=xportr_metadata]{xportr_metadata()}} must be +called before hand to set the domain as an attribute of \code{.df}.} \item{verbose}{The action this function takes when an action is taken on the dataset or function validation finds an issue. See 'Messaging' section for diff --git a/man/xportr_write.Rd b/man/xportr_write.Rd index f1b89fc9..31c91c1e 100644 --- a/man/xportr_write.Rd +++ b/man/xportr_write.Rd @@ -4,7 +4,14 @@ \alias{xportr_write} \title{Write xpt v5 transport file} \usage{ -xportr_write(.df, path, label = NULL, strict_checks = FALSE) +xportr_write( + .df, + path, + metadata = NULL, + domain = NULL, + strict_checks = FALSE, + label = deprecated() +) } \arguments{ \item{.df}{A data frame to write.} @@ -12,11 +19,19 @@ xportr_write(.df, path, label = NULL, strict_checks = FALSE) \item{path}{Path where transport file will be written. File name sans will be used as \code{xpt} name.} -\item{label}{Dataset label. It must be <=40 characters.} +\item{metadata}{A data frame containing variable level metadata. See +'Metadata' section for details.} + +\item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset +the metadata object. If none is passed, then \code{\link[=xportr_metadata]{xportr_metadata()}} must be +called before hand to set the domain as an attribute of \code{.df}.} \item{strict_checks}{If TRUE, xpt validation will report errors and not write out the dataset. If FALSE, xpt validation will report warnings and continue with writing out the dataset. Defaults to FALSE} + +\item{label}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Previously used to to set the Dataset label. +Use the \code{metadata} argument to set the dataset label.} } \value{ A data frame. \code{xportr_write()} returns the input data invisibly. @@ -42,9 +57,11 @@ adsl <- data.frame( Param = c("param1", "param2", "param3") ) +var_spec <- data.frame(dataset = "adsl", label = "Subject-Level Analysis Dataset") xportr_write(adsl, path = paste0(tempdir(), "/adsl.xpt"), - label = "Subject-Level Analysis", + domain = "adsl", + metadata = var_spec, strict_checks = FALSE ) diff --git a/tests/testthat/test-depreciation.R b/tests/testthat/test-depreciation.R index eb63cafe..2679ecc9 100644 --- a/tests/testthat/test-depreciation.R +++ b/tests/testthat/test-depreciation.R @@ -1,16 +1,12 @@ -test_that("xportr_df_label: deprecated metacore argument still works and gives warning", { +test_that("xportr_df_label: deprecated metacore gives an error", { withr::local_options(lifecycle_verbosity = "quiet") df <- data.frame(x = "a", y = "b") df_meta <- data.frame(dataset = "df", label = "Label") - df_spec_labeled_df <- xportr_df_label(df, metacore = df_meta, domain = "df") - - expect_equal(attr(df_spec_labeled_df, "label"), "Label") - xportr_df_label(df, metacore = df_meta, domain = "df") %>% - lifecycle::expect_deprecated("Please use the `metadata` argument instead.") + expect_error(xportr_df_label(df, metacore = df_meta)) }) -test_that("xportr_format: deprecated metacore argument still works and gives warning", { +test_that("xportr_format: deprecated metacore gives an error", { withr::local_options(lifecycle_verbosity = "quiet") df <- data.frame(x = 1, y = 2) df_meta <- data.frame( @@ -19,33 +15,19 @@ test_that("xportr_format: deprecated metacore argument still works and gives war format = "date9." ) - formatted_df <- xportr_format(df, metacore = df_meta, domain = "df") - - expect_equal(attr(formatted_df$x, "format.sas"), "DATE9.") - xportr_format(df, metacore = df_meta, domain = "df") %>% - lifecycle::expect_deprecated("Please use the `metadata` argument instead.") + expect_error(xportr_format(df, metacore = df_meta)) }) -test_that("xportr_label: deprecated metacore argument still works and gives warning", { +test_that("xportr_label: using the deprecated metacore argument gives an error", { withr::local_options(lifecycle_verbosity = "quiet") df <- data.frame(x = "a", y = "b") df_meta <- data.frame(dataset = "df", variable = "x", label = "foo") - df_labeled_df <- suppressMessages( - xportr_label(df, metacore = df_meta, domain = "df") - ) - - expect_equal(attr(df_labeled_df$x, "label"), "foo") - - # Note that only the deprecated message should be caught (others are ignored) - suppressMessages( - xportr_label(df, metacore = df_meta, domain = "df") %>% - lifecycle::expect_deprecated("Please use the `metadata` argument instead.") - ) + expect_error(xportr_label(df, metacore = df_meta)) }) -test_that("xportr_length: deprecated metacore argument still works and gives warning", { +test_that("xportr_length: using the deprecated metacore argument gives an error", { withr::local_options(lifecycle_verbosity = "quiet") df <- data.frame(x = "a", y = "b") df_meta <- data.frame( @@ -55,15 +37,10 @@ test_that("xportr_length: deprecated metacore argument still works and gives war length = c(1, 2) ) - df_with_width <- xportr_length(df, metacore = df_meta, domain = "df") - - expect_equal(c(x = 1, y = 2), map_dbl(df_with_width, attr, "width")) - - xportr_length(df, metacore = df_meta, domain = "df") %>% - lifecycle::expect_deprecated("Please use the `metadata` argument instead.") + expect_error(xportr_length(df, metacore = df_meta)) }) -test_that("xportr_order: deprecated metacore argument still works and gives warning", { +test_that("xportr_order: using the deprecated metacore argument gives an error", { withr::local_options(lifecycle_verbosity = "quiet") df <- data.frame(c = 1:5, a = "a", d = 5:1, b = LETTERS[1:5]) @@ -73,20 +50,10 @@ test_that("xportr_order: deprecated metacore argument still works and gives warn order = 1:4 ) - ordered_df <- suppressMessages( - xportr_order(df, metacore = df_meta, domain = "DOMAIN") - ) - - expect_equal(names(ordered_df), df_meta$variable) - - # Note that only the deprecated message should be caught (others are ignored) - suppressMessages( - xportr_order(df, metacore = df_meta, domain = "df") %>% - lifecycle::expect_deprecated("Please use the `metadata` argument instead.") - ) + expect_error(xportr_order(df, metacore = df_meta, domain = "DOMAIN")) }) -test_that("xportr_type: deprecated metacore argument still works and gives warning", { +test_that("xportr_type: using the deprecated metacore argument gives an error", { withr::local_options(lifecycle_verbosity = "quiet") df <- data.frame( Subj = as.character(c(123, 456, 789, "", NA, NA_integer_)), @@ -101,13 +68,5 @@ test_that("xportr_type: deprecated metacore argument still works and gives warni format = NA ) - df2 <- suppressMessages( - xportr_type(df, metacore = df_meta, domain = "df") - ) - - # Note that only the deprecated message should be caught (others are ignored) - suppressMessages( - xportr_type(df, metacore = df_meta, domain = "df") %>% - lifecycle::expect_deprecated("Please use the `metadata` argument instead.") - ) + expect_error(xportr_type(df, metacore = df_meta)) }) diff --git a/tests/testthat/test-format.R b/tests/testthat/test-format.R index 6b39c7e6..63b4ff92 100644 --- a/tests/testthat/test-format.R +++ b/tests/testthat/test-format.R @@ -19,3 +19,18 @@ test_that("xportr_format: Gets warning when metadata has multiple rows with same # Checks that message doesn't appear when xportr.domain_name is valid multiple_vars_in_spec_helper2(xportr_format) }) + +test_that("xportr_format: Works as expected with only one domain in metadata", { + adsl <- data.frame( + USUBJID = c(1001, 1002, 1003), + BRTHDT = c(1, 1, 2) + ) + + metadata <- data.frame( + dataset = c("adsl", "adsl"), + variable = c("USUBJID", "BRTHDT"), + format = c(NA, "DATE9.") + ) + + expect_silent(xportr_format(adsl, metadata)) +}) diff --git a/tests/testthat/test-label.R b/tests/testthat/test-label.R index 6d1b0349..bb0a9e3b 100644 --- a/tests/testthat/test-label.R +++ b/tests/testthat/test-label.R @@ -21,3 +21,19 @@ test_that("xportr_label: Gets warning when metadata has multiple rows with same # Checks that message doesn't appear when xportr.domain_name is valid multiple_vars_in_spec_helper2(xportr_label) }) + + +test_that("xportr_label: Works as expected with only one domain in metadata", { + adsl <- data.frame( + USUBJID = c(1001, 1002, 1003), + BRTHDT = c(1, 1, 2) + ) + + metadata <- data.frame( + dataset = c("adsl", "adsl"), + variable = c("USUBJID", "BRTHDT"), + label = c("Hello", "Hello2") + ) + + expect_silent(xportr_label(adsl, metadata)) +}) diff --git a/tests/testthat/test-length.R b/tests/testthat/test-length.R index ab4ea152..481b145a 100644 --- a/tests/testthat/test-length.R +++ b/tests/testthat/test-length.R @@ -14,7 +14,7 @@ test_that("xportr_length: Accepts valid domain names in metadata object", { # Test minimal call with valid data and without domain adsl %>% - xportr_domain_name("adsl") %>% + xportr_metadata(domain = "adsl") %>% xportr_length(metadata) %>% expect_silent() %>% expect_attr_width(metadata$length) @@ -150,7 +150,6 @@ test_that("xportr_length: Metacore instance can be used", { }) test_that("xportr_length: Domain not in character format", { - skip_if_not_installed("haven") skip_if_not_installed("readxl") require(haven, quietly = TRUE) @@ -193,3 +192,19 @@ test_that("xportr_length: Gets warning when metadata has multiple rows with same # Checks that message doesn't appear when xportr.domain_name is valid multiple_vars_in_spec_helper2(xportr_length) }) + + +test_that("xportr_length: Works as expected with only one domain in metadata", { + adsl <- data.frame( + USUBJID = c(1001, 1002, 1003), + BRTHDT = c(1, 1, 2) + ) + + metadata <- data.frame( + dataset = c("adsl", "adsl"), + variable = c("USUBJID", "BRTHDT"), + length = c(1, 1) + ) + + expect_silent(xportr_length(adsl, metadata)) +}) diff --git a/tests/testthat/test-metadata.R b/tests/testthat/test-metadata.R index 9f6d4169..e10ce09e 100644 --- a/tests/testthat/test-metadata.R +++ b/tests/testthat/test-metadata.R @@ -178,7 +178,7 @@ test_that("xportr_df_label: Correctly applies label when data is piped", { df_meta <- data.frame(dataset = "df", label = "Label") df_spec_labeled_df <- df %>% - xportr_domain_name("df") %>% + xportr_metadata(domain = "df") %>% xportr_df_label(df_meta) %>% xportr_df_label(df_meta) @@ -549,7 +549,6 @@ test_that("xportr_length: Expect error if domain is not a character", { # start test_that("xportr_metadata: Check metadata interaction with other functions", { skip_if_not_installed("admiral") - adsl <- admiral::admiral_adsl var_spec <- @@ -607,4 +606,48 @@ test_that("xportr_metadata: Check metadata interaction with other functions", { ) ) }) + +test_that("xportr_metadata: must throw error if both metadata and domain are null", { + expect_error( + xportr_metadata(data.frame(), metadata = NULL, domain = NULL), + "Must provide either metadata or domain argument" + ) +}) + +test_that("xportr_*: Domain is kept in between calls", { + # Divert all messages to tempfile, instead of printing them + # note: be aware as this should only be used in tests that don't track + # messages + withr::local_message_sink(tempfile()) + + adsl <- minimal_table(30) + + metadata <- minimal_metadata( + dataset = TRUE, length = TRUE, label = TRUE, type = TRUE, format = TRUE, + order = TRUE + ) + + df2 <- adsl %>% + xportr_metadata(domain = "adsl") %>% + xportr_type(metadata) + + df3 <- df2 %>% + xportr_label(metadata) %>% + xportr_length(metadata) %>% + xportr_order(metadata) %>% + xportr_format(metadata) + + expect_equal(attr(df3, "_xportr.df_arg_"), "adsl") + + df4 <- adsl %>% + xportr_type(metadata, domain = "adsl") + + df5 <- df4 %>% + xportr_label(metadata) %>% + xportr_length(metadata) %>% + xportr_order(metadata) %>% + xportr_format(metadata) + + expect_equal(attr(df5, "_xportr.df_arg_"), "adsl") +}) # end diff --git a/tests/testthat/test-order.R b/tests/testthat/test-order.R index c5bde736..9f7a08f6 100644 --- a/tests/testthat/test-order.R +++ b/tests/testthat/test-order.R @@ -21,7 +21,7 @@ test_that("xportr_order: Variable are ordered correctly when data is piped", { ordered_df <- suppressMessages( df %>% - xportr_domain_name("df") %>% + xportr_metadata(domain = "df") %>% xportr_order(df_meta) %>% xportr_order(df_meta) ) @@ -111,7 +111,6 @@ test_that("xportr_order: error when metadata is not set", { }) test_that("xportr_order: Variable ordering messaging is correct", { - skip_if_not_installed("haven") skip_if_not_installed("readxl") require(haven, quietly = TRUE) @@ -170,3 +169,19 @@ test_that("xportr_order: Gets warning when metadata has multiple rows with same expect_message("All variables in specification file are in dataset") %>% expect_message("All variables in dataset are ordered") }) + + +test_that("xportr_order: Works as expected with only one domain in metadata", { + adsl <- data.frame( + USUBJID = c(1001, 1002, 1003), + BRTHDT = c(1, 1, 2) + ) + + metadata <- data.frame( + dataset = c("adsl", "adsl"), + variable = c("USUBJID", "BRTHDT"), + order = c(1, 2) + ) + + expect_equal(xportr_order(adsl, metadata), adsl) +}) diff --git a/tests/testthat/test-pipe.R b/tests/testthat/test-pipe.R deleted file mode 100644 index 90876763..00000000 --- a/tests/testthat/test-pipe.R +++ /dev/null @@ -1,202 +0,0 @@ -test_that("xportr_*: Domain is kept in between calls", { - # Divert all messages to tempfile, instead of printing them - # note: be aware as this should only be used in tests that don't track - # messages - withr::local_message_sink(tempfile()) - - adsl <- minimal_table(30) - - metadata <- minimal_metadata( - dataset = TRUE, length = TRUE, label = TRUE, type = TRUE, format = TRUE, - order = TRUE - ) - - df2 <- adsl %>% - xportr_domain_name("adsl") %>% - xportr_type(metadata) - - df3 <- df2 %>% - xportr_label(metadata) %>% - xportr_length(metadata) %>% - xportr_order(metadata) %>% - xportr_format(metadata) - - expect_equal(attr(df3, "_xportr.df_arg_"), "adsl") - - df4 <- adsl %>% - xportr_type(metadata, domain = "adsl") - - df5 <- df4 %>% - xportr_label(metadata) %>% - xportr_length(metadata) %>% - xportr_order(metadata) %>% - xportr_format(metadata) - - expect_equal(attr(df5, "_xportr.df_arg_"), "adsl") -}) - -test_that("xportr_*: Can use magrittr pipe and aquire domain from call", { - # Divert all messages to tempfile, instead of printing them - # note: be aware as this should only be used in tests that don't track - # messages - withr::local_message_sink(tempfile()) - - adsl <- minimal_table(30) - - metadata <- minimal_metadata( - dataset = TRUE, length = TRUE, label = TRUE, type = TRUE, format = TRUE, - order = TRUE - ) - - non_standard_name <- adsl - result <- non_standard_name %>% - xportr_domain_name("non_standard_name") %>% - xportr_type(metadata) %>% - xportr_label(metadata) %>% - xportr_length(metadata) %>% - xportr_order(metadata) %>% - xportr_format(metadata) %>% - xportr_df_label(metadata) - - expect_equal(attr(result, "_xportr.df_arg_"), "non_standard_name") - - # Different sequence call by moving first and last around - result2 <- non_standard_name %>% - xportr_domain_name("non_standard_name") %>% - xportr_label(metadata) %>% - xportr_length(metadata) %>% - xportr_order(metadata) %>% - xportr_df_label(metadata) %>% - xportr_type(metadata) %>% - xportr_format(metadata) - - expect_equal(attr(result2, "_xportr.df_arg_"), "non_standard_name") -}) - -test_that("xportr_*: Can use magrittr pipe and aquire domain from call (metadata)", { - # Divert all messages to tempfile, instead of printing them - # note: be aware as this should only be used in tests that don't track - # messages - withr::local_message_sink(tempfile()) - - adsl <- minimal_table(30) - - metadata <- minimal_metadata( - dataset = TRUE, length = TRUE, label = TRUE, type = TRUE, format = TRUE, - order = TRUE - ) - - non_standard_name <- adsl - result <- non_standard_name %>% - xportr_domain_name("non_standard_name") %>% - xportr_metadata(metadata) %>% - xportr_type() %>% - xportr_label() %>% - xportr_length() %>% - xportr_order() %>% - xportr_format() %>% - xportr_df_label() - - expect_equal(attr(result, "_xportr.df_arg_"), "non_standard_name") - - # Different sequence call by moving first and last around - result2 <- non_standard_name %>% - xportr_domain_name("non_standard_name") %>% - xportr_metadata(metadata) %>% - xportr_label() %>% - xportr_length() %>% - xportr_order() %>% - xportr_df_label() %>% - xportr_type() %>% - xportr_format() - - expect_equal(attr(result2, "_xportr.df_arg_"), "non_standard_name") -}) - -test_that("xportr_*: Can use R native pipe (R>4.1) and aquire domain from call", { - skip_if( - compareVersion(glue("{R.version$major}.{R.version$minor}"), "4.1.0") < 0, - "R Version doesn't support native pipe (<4.1)" - ) - - # Divert all messages to tempfile, instead of printing them - # note: be aware as this should only be used in tests that don't track - # messages - withr::local_message_sink(tempfile()) - - adsl <- minimal_table(30) - - metadata <- minimal_metadata( - dataset = TRUE, length = TRUE, label = TRUE, type = TRUE, format = TRUE, - order = TRUE - ) - - non_standard_name_native <- adsl - result <- non_standard_name_native |> - xportr_domain_name("non_standard_name_native") |> - xportr_type(metadata) |> - xportr_label(metadata) |> - xportr_length(metadata) |> - xportr_order(metadata) |> - xportr_format(metadata) |> - xportr_df_label(metadata) - - expect_equal(attr(result, "_xportr.df_arg_"), "non_standard_name_native") - - # Different sequence call by moving first and last around - result2 <- non_standard_name_native |> - xportr_domain_name("non_standard_name_native") |> - xportr_label(metadata) |> - xportr_length(metadata) |> - xportr_order(metadata) |> - xportr_df_label(metadata) |> - xportr_type(metadata) |> - xportr_format(metadata) - - expect_equal(attr(result2, "_xportr.df_arg_"), "non_standard_name_native") -}) - -test_that("xportr_*: Can use R native pipe (R>4.1) and aquire domain from call (metadata)", { - skip_if( - compareVersion(glue("{R.version$major}.{R.version$minor}"), "4.1.0") < 0, - "R Version doesn't support native pipe (<4.1)" - ) - - # Divert all messages to tempfile, instead of printing them - # note: be aware as this should only be used in tests that don't track - # messages - withr::local_message_sink(tempfile()) - - adsl <- minimal_table(30) - - metadata <- minimal_metadata( - dataset = TRUE, length = TRUE, label = TRUE, type = TRUE, format = TRUE, - order = TRUE - ) - - non_standard_name_native <- adsl - result <- non_standard_name_native |> - xportr_domain_name("non_standard_name_native") |> - xportr_metadata(metadata) |> - xportr_type() |> - xportr_label() |> - xportr_length() |> - xportr_order() |> - xportr_format() |> - xportr_df_label() - - expect_equal(attr(result, "_xportr.df_arg_"), "non_standard_name_native") - - # Different sequence call by moving first and last around - result2 <- non_standard_name_native |> - xportr_domain_name("non_standard_name_native") |> - xportr_metadata(metadata) |> - xportr_label() |> - xportr_length() |> - xportr_order() |> - xportr_df_label() |> - xportr_type() |> - xportr_format() - - expect_equal(attr(result2, "_xportr.df_arg_"), "non_standard_name_native") -}) diff --git a/tests/testthat/test-type.R b/tests/testthat/test-type.R index 3a3e10d7..9d9580b3 100644 --- a/tests/testthat/test-type.R +++ b/tests/testthat/test-type.R @@ -155,14 +155,14 @@ test_that("xportr_type: Variables retain column attributes, besides class", { withr::local_message_sink(tempfile()) df_type_label <- adsl %>% - xportr_domain_name("adsl") %>% + xportr_metadata(domain = "adsl") %>% xportr_type(metadata) %>% xportr_label(metadata) %>% xportr_length(metadata) %>% xportr_format(metadata) df_label_type <- adsl %>% - xportr_domain_name("adsl") %>% + xportr_metadata(domain = "adsl") %>% xportr_label(metadata) %>% xportr_length(metadata) %>% xportr_format(metadata) %>% @@ -284,3 +284,40 @@ test_that("xportr_type: Gets warning when metadata has multiple rows with same v # Checks that message doesn't appear when xportr.domain_name is valid multiple_vars_in_spec_helper2(xportr_type) }) + +test_that("xportr_type: Drops factor levels", { + metadata <- data.frame( + dataset = "test", + variable = c("Subj", "Param", "Val", "NotUsed"), + type = c("numeric", "character", "numeric", "character"), + format = NA + ) + + .df <- data.frame( + Subj = as.character(123, 456, 789), + Different = c("a", "b", "c"), + Val = factor(c("1", "2", "3")), + Param = c("param1", "param2", "param3") + ) + + df2 <- xportr_type(.df, metadata, "test") + + expect_null(attributes(df2$Val)) +}) + + +test_that("xportr_type: Works as expected with only one domain in metadata", { + adsl <- data.frame( + USUBJID = c(1001, 1002, 1003), + BRTHDT = c(1, 1, 2) + ) + + metadata <- data.frame( + dataset = c("adsl", "adsl"), + variable = c("USUBJID", "BRTHDT"), + type = c("numeric", "numeric"), + format = c(NA, "DATE9.") + ) + + expect_equal(xportr_type(adsl, metadata), adsl) +}) diff --git a/tests/testthat/test-utils-xportr.R b/tests/testthat/test-utils-xportr.R index 4167b698..41f6adb8 100644 --- a/tests/testthat/test-utils-xportr.R +++ b/tests/testthat/test-utils-xportr.R @@ -111,3 +111,19 @@ test_that("xpt_validate: Get error message when the label contains non-ASCII, sy "Label 'A=fooçbar' cannot contain any non-ASCII, symbol or special characters." ) }) + +test_that("xpt_validate: Get error message when the length of a character variable is > 200 bytes ", { + df <- data.frame(A = paste(rep("A", 201), collapse = "")) + expect_equal( + xpt_validate(df), + "Length of A must be 200 bytes or less." + ) +}) + +test_that("xpt_validate: Get error message when the length of a non-ASCII character variable is > 200 bytes", { + df <- data.frame(A = paste(rep("一", 67), collapse = "")) + expect_equal( + xpt_validate(df), + "Length of A must be 200 bytes or less." + ) +}) diff --git a/tests/testthat/test-write.R b/tests/testthat/test-write.R index ba165e3c..d53c7eb0 100644 --- a/tests/testthat/test-write.R +++ b/tests/testthat/test-write.R @@ -10,13 +10,56 @@ test_that("xportr_write: exported data can be saved to a file", { expect_equal(read_xpt(tmp), data_to_save) }) -test_that("xportr_write: exported data can be saved to a file with a label", { +test_that("xportr_write: exported data can still be saved to a file with a label", { tmpdir <- tempdir() tmp <- file.path(tmpdir, "xyz.xpt") on.exit(unlink(tmpdir)) - xportr_write(data_to_save, path = tmp, label = "Lorem ipsum dolor sit amet") + suppressWarnings( + xportr_write(data_to_save, + path = tmp, + label = "Lorem ipsum dolor sit amet", + domain = "data_to_save" + ) + ) + expect_output(str(read_xpt(tmp)), "Lorem ipsum dolor sit amet") +}) + +test_that("xportr_write: exported data can be saved to a file with a metadata", { + tmpdir <- tempdir() + tmp <- file.path(tmpdir, "xyz.xpt") + + on.exit(unlink(tmpdir)) + + xportr_write( + data_to_save, + path = tmp, + domain = "data_to_save", + metadata = data.frame( + dataset = "data_to_save", + label = "Lorem ipsum dolor sit amet" + ) + ) + expect_output(str(read_xpt(tmp)), "Lorem ipsum dolor sit amet") +}) + +test_that("xportr_write: exported data can be saved to a file with a existing metadata", { + tmpdir <- tempdir() + tmp <- file.path(tmpdir, "xyz.xpt") + + on.exit(unlink(tmpdir)) + + df <- xportr_df_label( + data_to_save, + domain = "data_to_save", + data.frame( + dataset = "data_to_save", + label = "Lorem ipsum dolor sit amet" + ) + ) + + xportr_write(df, path = tmp, domain = "data_to_save") expect_output(str(read_xpt(tmp)), "Lorem ipsum dolor sit amet") }) @@ -26,7 +69,16 @@ test_that("xportr_write: expect error when invalid multibyte string is passed in on.exit(unlink(tmpdir)) - expect_error(xportr_write(data_to_save, tmp, label = "Lorizzle ipsizzle dolizzl\xe7 pizzle")) + expect_error( + xportr_write( + data_to_save, + tmp, + metadata = data.frame( + dataset = "data_to_save", + label = "Lorizzle ipsizzle dolizzl\xe7 pizzle" + ) + ) + ) }) test_that("xportr_write: expect error when file name is over 8 characters long", { @@ -35,7 +87,7 @@ test_that("xportr_write: expect error when file name is over 8 characters long", on.exit(unlink(tmpdir)) - expect_error(xportr_write(data_to_save, tmp, label = "asdf")) + expect_error(xportr_write(data_to_save, tmp)) }) test_that("xportr_write: expect error when file name contains non-ASCII symbols or special characters", { @@ -44,45 +96,80 @@ test_that("xportr_write: expect error when file name contains non-ASCII symbols on.exit(unlink(tmpdir)) - expect_error(xportr_write(data_to_save, tmp, label = "asdf")) + expect_error(xportr_write(data_to_save, tmp)) }) -test_that("xportr_write: expect error when label contains non-ASCII symbols or special characters", { +test_that("xportr_write: expect warning when file name contains underscore and strict_checks = FALSE", { tmpdir <- tempdir() - tmp <- file.path(tmpdir, "xyz.xpt") + tmp <- file.path(tmpdir, "test_.xpt") on.exit(unlink(tmpdir)) - expect_error(xportr_write(data_to_save, tmp, label = "çtestç")) + expect_warning(xportr_write(data_to_save, tmp, strict_checks = FALSE)) }) -test_that("xportr_write: expect error when label is over 40 characters", { +test_that("xportr_write: expect error when label contains non-ASCII symbols or special characters", { tmpdir <- tempdir() tmp <- file.path(tmpdir, "xyz.xpt") on.exit(unlink(tmpdir)) - expect_error(xportr_write(data_to_save, tmp, label = paste(rep("a", 41), collapse = ""))) + expect_error( + xportr_write( + data_to_save, + tmp, + expect_error( + xportr_write( + data_to_save, + domain = "data_to_save", + tmp, + metadata = data.frame( + dataset = "data_to_save", + label = "çtestç" + ) + ) + ) + ) + ) }) -test_that("xportr_write: expect error when an xpt validation fails with strict_checks set to TRUE", { +test_that("xportr_write: expect error when label is over 40 characters", { tmpdir <- tempdir() tmp <- file.path(tmpdir, "xyz.xpt") - attr(data_to_save$X, "format.sas") <- "foo" on.exit(unlink(tmpdir)) - expect_error(xportr_write(data_to_save, tmp, label = "label", strict_checks = TRUE)) + expect_error( + xportr_write( + data_to_save, + domain = "data_to_save", + tmp, + metadata = data.frame( + dataset = "data_to_save", + label = paste(rep("a", 41), collapse = "") + ) + ) + ) }) -test_that("xportr_write: expect warning when an xpt validation fails with strict_checks set to FALSE", { +test_that("xportr_write: expect error when an xpt validation fails with strict_checks set to TRUE", { tmpdir <- tempdir() tmp <- file.path(tmpdir, "xyz.xpt") attr(data_to_save$X, "format.sas") <- "foo" on.exit(unlink(tmpdir)) - expect_warning(xportr_write(data_to_save, tmp, label = "label", strict_checks = FALSE)) + expect_error( + xportr_write( + data_to_save, tmp, + domain = "data_to_save", + metadata = data.frame( + dataset = "data_to_save", + label = "label" + ), + strict_checks = TRUE + ) + ) }) test_that("xportr_write: expect warning when an xpt validation fails with strict_checks set to FALSE", { @@ -92,9 +179,20 @@ test_that("xportr_write: expect warning when an xpt validation fails with strict on.exit(unlink(tmpdir)) - expect_warning(xportr_write(data_to_save, tmp, label = "label", strict_checks = FALSE)) + expect_warning( + xportr_write( + data_to_save, tmp, + domain = "data_to_save", + metadata = data.frame( + dataset = "data_to_save", + label = "label" + ), + strict_checks = FALSE + ) + ) }) + test_that("xportr_write: Capture errors by haven and report them as such", { tmpdir <- tempdir() tmp <- file.path(tmpdir, "xyz.xpt") @@ -102,8 +200,19 @@ test_that("xportr_write: Capture errors by haven and report them as such", { on.exit(unlink(tmpdir)) + expect_error( - suppressWarnings(xportr_write(data_to_save, tmp, label = "label", strict_checks = FALSE)), + suppressWarnings( + xportr_write( + data_to_save, tmp, + domain = "data_to_save", + metadata = data.frame( + dataset = "data_to_save", + label = "label" + ), + strict_checks = FALSE + ) + ), "Error reported by haven" ) }) diff --git a/vignettes/deepdive.Rmd b/vignettes/deepdive.Rmd index 8f1ccac0..f55b1f91 100644 --- a/vignettes/deepdive.Rmd +++ b/vignettes/deepdive.Rmd @@ -143,20 +143,19 @@ options( One final note on `options()`. 4 of the core `{xportr}` functions have the ability to set messaging as `"none", "message", "warn", "stop"`. Setting each of these in all your calls can be a bit repetitive. You can use `options()` to set these at a higher level and avoid this repetition. ```{r, eval = FALSE} -# Default +# Default verbose is set to `none` options( xportr.format_verbose = "none", xportr.label_verbose = "none", xportr.length_verbose = "none", - xportr.type_verbose = "none", + xportr.type_verbose = "none" ) -# Will send Warning Message to Console options( - xportr.format_verbose = "warn", - xportr.label_verbose = "warn", - xportr.length_verbose = "warn", - xportr.type_verbose = "warn", + xportr.format_verbose = "none", # Disables any messaging, keeping the console output clean + xportr.label_verbose = "message", # Sends a standard message to the console + xportr.length_verbose = "warn", # Sends a warning message to the console + xportr.type_verbose = "stop" # Stops execution and sends an error message to the console ) ``` @@ -171,7 +170,8 @@ adsl %>% xportr_label(var_spec, "ADSL", "message") %>% xportr_order(var_spec, "ADSL", "message") %>% xportr_format(var_spec, "ADSL") %>% - xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset") + xportr_df_label(dataset_spec, "ADSL") %>% + xportr_write("adsl.xpt") ``` To help reduce these repetitive calls, we have created `xportr_metadata()`. A user can just **set** the _metadata object_ and the Domain name in the first call, and this will be passed on to the other functions. Much cleaner! @@ -185,7 +185,8 @@ adsl %>% xportr_label() %>% xportr_order() %>% xportr_format() %>% - xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset") + xportr_df_label(dataset_spec) %>% + xportr_write("adsl.xpt") ``` @@ -198,11 +199,16 @@ For the next six sections, we are going to explore the Warnings and Errors messa ### Setting up our metadata object First, let's read in the specification file and call it `var_spec`. Note that we are not using `options()` here. We will do some slight manipulation to the column names by doing all lower case, and changing `Data Type` to `type` and making the Order column numeric. You can also use `options()` for this step as well. The `var_spec` object has five dataset specification files stacked on top of each other. We will make use of the `ADSL` subset of `var_spec`. You can make use of the Search field above the dataset column to subset the specification file for `ADSL` +Similarly, we can read the Dataset spec file and call it `dataset_spec`. ```{r} var_spec <- var_spec %>% rename(type = "Data Type") %>% set_names(tolower) + +dataset_spec <- dataset_spec %>% + rename(label = "Description") %>% + set_names(tolower) ``` ```{r, echo = FALSE} @@ -279,7 +285,7 @@ glimpse(adsl_type_glimpse) Note that `xportr_type(verbose = "warn")` was set so the function has provided feedback, which would show up in the console, on which variables were converted as a warning message. However, you can set `verbose = "stop"` so that the types are not applied if the data does not match what is in the specification file. Using `verbose = "stop"` will instantly stop the processing of this function and not create the object. A user will need to alter the variables in their R script before using `xportr_type()` ```{r, echo = TRUE, error = TRUE} -adsl_type <- xportr_type(.df = adsl, metadata = var_spec, domain = "ADSL", verbose = "stop") +adsl_type <- xportr_type(.df = adsl_fct, metadata = var_spec, domain = "ADSL", verbose = "stop") ``` ## `xportr_length()` @@ -400,7 +406,8 @@ At the time of `{xportr} v0.3.0` we have not implemented any warnings or error m Finally, we want to write out an `xpt` dataset with all our metadata applied. -We will make use of `xportr_metadata()` to reduce repetitive metadata and domain specifications. We will use default option for verbose, which is just `message` and so not set anything for `verbose`. In `xportr_write()` we will specify the path, which will just be our current working directory, set the dataset label and toggle the `strict_checks` to be `FALSE`. +We will make use of `xportr_metadata()` to reduce repetitive metadata and domain specifications. We will use default option for verbose, which is just `message` and so not set anything for `verbose`. In `xportr_write()` we will specify the path, which will just be our current working directory, set the dataset label and toggle the `strict_checks` to be `FALSE`. +It is also note worthy that you can set the dataset label using the `xportr_df_label` and a `dataset_spec` which will be used by the `xportr_write()` ```{r, echo = TRUE, error = TRUE} adsl %>% @@ -410,7 +417,8 @@ adsl %>% xportr_label() %>% xportr_order() %>% xportr_format() %>% - xportr_write(path = "adsl.xpt", label = "Subject-Level Analysis Dataset", strict_checks = FALSE) + xportr_df_label(dataset_spec) %>% + xportr_write(path = "adsl.xpt", strict_checks = FALSE) ``` Success! We have applied types, lengths, labels, ordering and formats to our dataset. Note the messages written out to the console. Remember the `TRTDUR` and `DCREASCD` and how these are not present in the metadata, but in the dataset. This impacts the messaging for lengths and labels where `{xportr}` is printing out some feedback to us on the two issues. 5 types are coerced, as well as 36 variables re-ordered. Note that `strict_checks` was set to `FALSE`. @@ -419,7 +427,7 @@ The next two examples showcase the `strict_checks = TRUE` option in `xportr_writ ```{r, echo = TRUE, error = TRUE} adsl %>% - xportr_write(path = "adsl.xpt", label = "Subject-Level Analysis Dataset", strict_checks = TRUE) + xportr_write(path = "adsl.xpt", metadata = dataset_spec, domain = "ADSL", strict_checks = TRUE) ``` @@ -439,7 +447,8 @@ adsl %>% xportr_label() %>% xportr_type() %>% xportr_format() %>% - xportr_write(path = "adsl.xpt", label = "Subject-Level Analysis Dataset", strict_checks = TRUE) + xportr_df_label(dataset_spec) %>% + xportr_write(path = "adsl.xpt", strict_checks = TRUE) ``` diff --git a/vignettes/xportr.Rmd b/vignettes/xportr.Rmd index 1c6acdb0..2e39f386 100644 --- a/vignettes/xportr.Rmd +++ b/vignettes/xportr.Rmd @@ -278,7 +278,7 @@ adsl %>% xportr_label(var_spec, "ADSL", "message") %>% xportr_order(var_spec, "ADSL", "message") %>% xportr_format(var_spec, "ADSL") %>% - xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset") + xportr_write("adsl.xpt") ``` That's it! We now have a `xpt` file created in R with all appropriate types, lengths, labels, ordering and formats from our specification file. If you are interested in exploring more of the custom