Merge pull request #185 from atorus-research/179_fetch_dataset_label_…

…from_metadata Extend `xportr_write` to accept `metadata` and deprecate `label`
atorus-research · Dec 7, 2023 · ef3170b · ef3170b
2 parents 40a1dd4 + 8437f07
commit ef3170b
Show file tree

Hide file tree

Showing 14 changed files with 249 additions and 54 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,10 +1,13 @@
-# xportr (development version)
+# xportr 0.3.1.9001
 
 ## New Features and Bug Fixes
-
-## Documentation
+* `xportr_write()` now accepts `metadata` argument which can be used to set the dataset label to stay consistent with the other `xportr_*` functions. It is noteworthy that the dataset label set using the `xportr_df_label()` function will be retained during the `xportr_write()`.
+* Exporting a new dataset `dataset_spec` that contains the Dataset Specification for ADSL.
 
 ## Deprecation and Breaking Changes
+* The `label` argument from the `xportr_write()` function is deprecated in favor of the `metadata` argument.
+
+## Documentation
 
 # xportr 0.3.1
 

diff --git a/R/data.R b/R/data.R
@@ -56,7 +56,7 @@
 #' }
 "adsl"
 
-#' Example Dataset Specification
+#' Example Dataset Variable Specification
 #'
 #' @format ## `var_spec`
 #' A data frame with 216 rows and 19 columns:
@@ -82,3 +82,20 @@
 #'   \item{Developer Notes}{Developer Notes}
 #' }
 "var_spec"
+
+#' Example Dataset Specification
+#'
+#' @format ## `dataset_spec`
+#' A data frame with 1 row and 9 columns:
+#' \describe{
+#'   \item{Dataset}{<chr> Dataset}
+#'   \item{Description}{<chr> Dataset description}
+#'   \item{Class}{<chr> Dataset class}
+#'   \item{Structure}{<lgl> Logical, indicating if there's a specific structure}
+#'   \item{Purpose}{<chr> Purpose of the dataset}
+#'   \item{Key, Variables}{<chr> Join Key variables in the dataset}
+#'   \item{Repeating}{<chr> Indicates if the dataset is repeating}
+#'   \item{Reference Data}{<lgl> Regerence Data}
+#'   \item{Comment}{<chr> Additional comment}
+#' }
+"dataset_spec"
diff --git a/R/df_label.R b/R/df_label.R
@@ -83,6 +83,10 @@ xportr_df_label <- function(.df,
     abort("Length of dataset label must be 40 characters or less.")
   }
 
+  if (stringr::str_detect(label, "[^[:ascii:]]")) {
+    abort("`label` cannot contain any non-ASCII, symbol or special characters.")
+  }
+
   attr(.df, "label") <- label
 
   .df

diff --git a/R/write.R b/R/write.R
@@ -7,10 +7,12 @@
 #' @param .df A data frame to write.
 #' @param path Path where transport file will be written. File name sans will be
 #'   used as `xpt` name.
-#' @param label Dataset label. It must be <=40 characters.
+#' @param label `r lifecycle::badge("deprecated")` Previously used to to set the Dataset label.
+#' Use the `metadata` argument to set the dataset label.
 #' @param strict_checks If TRUE, xpt validation will report errors and not write
 #'   out the dataset. If FALSE, xpt validation will report warnings and continue
 #'   with writing out the dataset. Defaults to FALSE
+#' @inheritParams xportr_length
 #'
 #' @details
 #'   * Variable and dataset labels are stored in the "label" attribute.
@@ -32,17 +34,43 @@
 #'   Param = c("param1", "param2", "param3")
 #' )
 #'
+#' var_spec <- data.frame(dataset = "adsl", label = "Subject-Level Analysis Dataset")
 #' xportr_write(adsl,
 #'   path = paste0(tempdir(), "/adsl.xpt"),
-#'   label = "Subject-Level Analysis",
+#'   metadata = var_spec,
 #'   strict_checks = FALSE
 #' )
 #'
-xportr_write <- function(.df, path, label = NULL, strict_checks = FALSE) {
+xportr_write <- function(.df,
+                         path,
+                         metadata = NULL,
+                         domain = NULL,
+                         strict_checks = FALSE,
+                         label = deprecated()) {
   path <- normalizePath(path, mustWork = FALSE)
 
   name <- tools::file_path_sans_ext(basename(path))
 
+  ## Common section to detect domain from argument or pipes
+
+  df_arg <- tryCatch(as_name(enexpr(.df)), error = function(err) NULL)
+  domain <- get_domain(.df, df_arg, domain)
+  if (!is.null(domain)) attr(.df, "_xportr.df_arg_") <- domain
+
+  ## End of common section
+
+  if (!missing(label)) {
+    lifecycle::deprecate_warn(
+      when = "0.3.2",
+      what = "xportr_write(label = )",
+      with = "xportr_write(metadata = )"
+    )
+    metadata <- data.frame(dataset = domain, label = label)
+  }
+  if (!is.null(metadata)) {
+    .df <- xportr_df_label(.df, metadata = metadata, domain = domain)
+  }
+
   if (nchar(name) > 8) {
     abort("`.df` file name must be 8 characters or less.")
   }
@@ -51,18 +79,6 @@ xportr_write <- function(.df, path, label = NULL, strict_checks = FALSE) {
     abort("`.df` cannot contain any non-ASCII, symbol or underscore characters.")
   }
 
-  if (!is.null(label)) {
-    if (nchar(label) > 40) {
-      abort("`label` must be 40 characters or less.")
-    }
-
-    if (stringr::str_detect(label, "[^[:ascii:]]")) {
-      abort("`label` cannot contain any non-ASCII, symbol or special characters.")
-    }
-
-    attr(.df, "label") <- label
-  }
-
   checks <- xpt_validate(.df)
 
   if (length(checks) > 0) {

diff --git a/README.Rmd b/README.Rmd
@@ -19,6 +19,7 @@ library(fontawesome)
 # xportr <img src="man/figures/logo.png" align="right" alt="" width="120" />
 
 <!-- badges: start -->
+[<img src="https://img.shields.io/badge/Slack-RValidationHub-blue?style=flat&logo=slack">](https://RValidationHub.slack.com)
 [![R build status](https://github.com/atorus-research/xportr/workflows/R-CMD-check/badge.svg)](https://github.com/atorus-research/xportr/actions?workflow=R-CMD-check)
 [<img src="https://img.shields.io/codecov/c/gh/atorus-research/xportr">](https://app.codecov.io/gh/atorus-research/xportr)
 [<img src="https://img.shields.io/badge/License-MIT-blue.svg">](https://github.com/atorus-research/xportr/blob/master/LICENSE)
@@ -121,6 +122,9 @@ spec_path <- system.file(paste0("specs/", "ADaM_admiral_spec.xlsx"), package = "
 var_spec <- readxl::read_xlsx(spec_path, sheet = "Variables") %>%
   dplyr::rename(type = "Data Type") %>%
   rlang::set_names(tolower)
+dataset_spec <- readxl::read_xlsx(spec_path, sheet = "Datasets") %>%
+  dplyr::rename(label = "Description") %>%
+  rlang::set_names(tolower)
 ```
 
 Each `xportr_` function has been written in a way to take in a part of the specification file and apply that piece to the dataset. Setting `verbose = "warn"` will send appropriate warning message to the console. We have suppressed the warning for the sake of brevity.
@@ -132,7 +136,8 @@ adsl %>%
   xportr_label(var_spec, "ADSL", verbose = "warn") %>%
   xportr_order(var_spec, "ADSL", verbose = "warn") %>%
   xportr_format(var_spec, "ADSL") %>%
-  xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset")
+  xportr_df_label(dataset_spec, "ADSL") %>%
+  xportr_write("adsl.xpt")
 ```
 
 The `xportr_metadata()` function can reduce duplication by setting the variable specification and domain explicitly at the top of a pipeline. If you would like to use the `verbose` argument, you will need to set in each function call.
@@ -145,7 +150,8 @@ adsl %>%
   xportr_label() %>%
   xportr_order() %>%
   xportr_format() %>%
-  xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset")
+  xportr_df_label(dataset_spec) %>%
+  xportr_write("adsl.xpt")
 ```
 
 That's it!  We now have a xpt file created in R with all appropriate types, lengths, labels, ordering and formats.  Please check out the [Get Started](https://atorus-research.github.io/xportr/articles/xportr.html) for more information and detailed walk through of each `xportr_` function.

diff --git a/README.md b/README.md
@@ -126,6 +126,9 @@ spec_path <- system.file(paste0("specs/", "ADaM_admiral_spec.xlsx"), package = "
 var_spec <- readxl::read_xlsx(spec_path, sheet = "Variables") %>%
   dplyr::rename(type = "Data Type") %>%
   rlang::set_names(tolower)
+dataset_spec <- readxl::read_xlsx(spec_path, sheet = "Datasets") %>%
+  dplyr::rename(label = "Description") %>%
+  rlang::set_names(tolower)
 ```
 
 Each `xportr_` function has been written in a way to take in a part of
@@ -140,7 +143,8 @@ adsl %>%
   xportr_label(var_spec, "ADSL", verbose = "warn") %>%
   xportr_order(var_spec, "ADSL", verbose = "warn") %>%
   xportr_format(var_spec, "ADSL") %>%
-  xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset")
+  xportr_df_label(dataset_spec, "ADSL") %>%
+  xportr_write("adsl.xpt")
 ```
 
 The `xportr_metadata()` function can reduce duplication by setting the
@@ -156,7 +160,8 @@ adsl %>%
   xportr_label() %>%
   xportr_order() %>%
   xportr_format() %>%
-  xportr_write("adsl.xpt", label = "Subject-Level Analysis Dataset")
+  xportr_df_label(dataset_spec) %>%
+  xportr_write("adsl.xpt")
 ```
 
 That’s it! We now have a xpt file created in R with all appropriate

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -48,6 +48,7 @@ reference:
   - contents:
       - adsl
       - var_spec
+      - dataset_spec
 
 articles:
   - title: ~

diff --git a/data/dataset_spec.rda b/data/dataset_spec.rda
diff --git a/man/dataset_spec.Rd b/man/dataset_spec.Rd
diff --git a/man/var_spec.Rd b/man/var_spec.Rd
diff --git a/man/xportr_write.Rd b/man/xportr_write.Rd