Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Column metadata arguments #50

Open
wants to merge 1 commit into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions R/column_metadata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#' Verify that the item metadata supplied is the appropriate format
#'
#' This function does the following checks and consolidates to a single error message:
#' - Columns missing that must be present
#' - Columns present that are not permissible
#' - Columns with NAs that must be fully populated
#' - Columns columns that should be character or integer but aren't
#' - Within the type column, if the values are within the permissible list per
#' the schema
#' @param items
#'
#' @return Error Check
#' @noRd
validate_dataset_columns <- function(items) {
required_cols <- c("OID", "name", "label", "type")
all_cols <- c("OID", "name", "label", "type", "displayFormat", "length", "keySequence")

# Check for missing or extraneous columns
missing_cols <- setdiff(required_cols, names(items))
err_missing_cols <- sprintf("Column `%s` is missing and must be present", missing_cols)
additional_cols <- setdiff(names(items), all_cols)
err_additional_cols <- sprintf("Column `%s` is not a permissible column", additional_cols)

# Check for for NAs in required columns
any_nas <- vapply(items[intersect(required_cols, names(items))], function(X) any(is.na(X)), FUN.VALUE = TRUE)
has_nas <- names(any_nas)[any_nas]
err_nas <- sprintf("Column `%s` must not have NA values", has_nas)

# Check columns that should be character
char_cols <- intersect(c("OID", "name", "label", "type", "displayFormat"), names(items))
are_char_cols <- vapply(items[char_cols], is.character, FUN.VALUE=TRUE)
not_char_cols <- names(are_char_cols)[!are_char_cols]
err_char_cols <- sprintf("Column `%s` must be of type character", not_char_cols)

# Check columns that should be integers
int_cols <- intersect(c("length", "keySequence"), names(items))
are_int_cols <- vapply(items[int_cols], is.integer, FUN.VALUE=TRUE)
not_int_cols <- names(are_int_cols)[!are_int_cols]
err_int_cols <- sprintf("Column `%s` must be of type integer", not_int_cols)

# Check that type values are within the permissible list
err_type_vars <- character()
if ('type' %in% names(items)) {
bad_types <- !(items$type %in% c("string", "integer", "float", "double", "decimal", "boolean"))
bad_type_vars <- items$name[bad_types]
bad_type_vals <- items$type[bad_types]
err_type_vars <- sprintf(
paste("Variable %s has an invalid type value of %s.",
"Must be one of string, integer, float, double, decimal, boolean"),
bad_type_vars, bad_type_vals
)
}

all_errs <- c(err_missing_cols, err_additional_cols, err_nas, err_char_cols, err_int_cols, err_type_vars)

if (length(all_errs) > 0) {
msg_prep <- paste0("\n\t", all_errs)
err_msg <- paste0(c("Error: Issues found in items data:", msg_prep))
stop(err_msg, call.=FALSE)
}
}


set_column_metadata <- function(x, columns) {
# Check items before moving any further
validate_dataset_columns(columns)

# Attach in the variable metadata
if (!("ITEMGROUPDATASEQ" %in% columns$OID)) {
igds_row <- data.frame(
OID = "ITEMGROUPDATASEQ",
name = "ITEMGROUPDATASEQ",
label = "Record Identifier",
type = "integer"
)

# Match up columns and fill
igds_row[setdiff(names(columns), names(igds_row))] <- NA
columns[setdiff(names(igds_row), names(columns))] <- NA

columns <- rbind(igds_row, columns)
}

columns_converted <- df_to_list_rows(columns)
}
21 changes: 14 additions & 7 deletions R/datasetjson.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#' @param ref_data Boolean value that is set to "true" when the dataset contains
#' reference data (not subject data). The default value is "false".
#' @param version The DatasetJSON version to use. Currently only 1.1.0 is supported.
#' @param columns Variable level metadata for the Dataset JSON object
#'
#' @return dataset_json object pertaining to the specific Dataset JSON version
#' specific
Expand All @@ -47,7 +48,8 @@
#' metadata_ref = "some/define.xml",
#' item_oid = "IG.IRIS",
#' name = "IRIS",
#' dataset_label = "Iris"
#' dataset_label = "Iris",
#' columns = iris_items
#' )
#'
#' # Attach attributes directly
Expand All @@ -62,14 +64,15 @@
#' ds_json_updated <- set_item_oid(ds_json_updated, "IG.IRIS")
#' ds_json_updated <- set_dataset_name(ds_json_updated, "IRIS")
#' ds_json_updated <- set_dataset_label(ds_json_updated, "Iris")
dataset_json <- function(.data, file_oid = NULL, last_modified=NULL,
#' ds_json_updates <- set_columns(ds_json_updated, iris_items)
dataset_json <- function(.data, file_oid=NULL, last_modified=NULL,
originator=NULL, sys=NULL, sys_version = NULL,
study=NULL, metadata_version=NULL,metadata_ref=NULL,
item_oid=NULL, name=NULL, dataset_label=NULL, ref_data=FALSE,
version="1.1.0") {
columns=NULL, version="1.1.0") {
new_dataset_json(.data, file_oid, last_modified, originator, sys, sys_version, study,
metadata_version, metadata_ref, item_oid, name, dataset_label, ref_data,
version)
columns, version)
}

#' Create a base Dataset JSON Container
Expand All @@ -83,7 +86,7 @@ dataset_json <- function(.data, file_oid = NULL, last_modified=NULL,
#' @noRd
new_dataset_json <- function(.data, file_oid, last_modified, originator, sys, sys_version, study,
metadata_version, metadata_ref, item_oid, name, dataset_label,
ref_data, version) {
ref_data, columns, version) {

if (!(version %in% c("1.1.0"))) {
stop("Unsupported version specified - currently only version 1.1.0 is supported", call.=FALSE)
Expand All @@ -97,7 +100,7 @@ new_dataset_json <- function(.data, file_oid, last_modified, originator, sys, sy
# Extract the function and call it to return the base structure
funcs[[version]](.data, file_oid, last_modified, originator, sys, sys_version, study,
metadata_version, metadata_ref, item_oid, name, dataset_label,
ref_data)
ref_data, columns)
}

#' Dataset JSON v1.1.0 Generator
Expand All @@ -106,7 +109,7 @@ new_dataset_json <- function(.data, file_oid, last_modified, originator, sys, sy
#' @noRd
new_dataset_json_v1_1_0 <- function(.data, file_oid, last_modified, originator, sys, sys_version,
study, metadata_version, metadata_ref, item_oid, name,
dataset_label, ref_data) {
dataset_label, ref_data, columns) {

if (!inherits(.data, 'data.frame')) {
stop("datasetjson objects must inherit from a data.frame", call.=FALSE)
Expand All @@ -130,6 +133,10 @@ new_dataset_json_v1_1_0 <- function(.data, file_oid, last_modified, originator,
attr(.data, 'name') <- name
attr(.data, 'label') <- dataset_label
attr(.data, 'isReferenceData') <- ref_data
if (!is.null(columns)) {
validate_dataset_columns(columns)
}
attr(.data, 'columns') <- columns

structure(
.data,
Expand Down
Loading