diff --git a/R/column_metadata.R b/R/column_metadata.R new file mode 100644 index 0000000..29f66eb --- /dev/null +++ b/R/column_metadata.R @@ -0,0 +1,85 @@ +#' Verify that the item metadata supplied is the appropriate format +#' +#' This function does the following checks and consolidates to a single error message: +#' - Columns missing that must be present +#' - Columns present that are not permissible +#' - Columns with NAs that must be fully populated +#' - Columns columns that should be character or integer but aren't +#' - Within the type column, if the values are within the permissible list per +#' the schema +#' @param items +#' +#' @return Error Check +#' @noRd +validate_dataset_columns <- function(items) { + required_cols <- c("OID", "name", "label", "type") + all_cols <- c("OID", "name", "label", "type", "displayFormat", "length", "keySequence") + + # Check for missing or extraneous columns + missing_cols <- setdiff(required_cols, names(items)) + err_missing_cols <- sprintf("Column `%s` is missing and must be present", missing_cols) + additional_cols <- setdiff(names(items), all_cols) + err_additional_cols <- sprintf("Column `%s` is not a permissible column", additional_cols) + + # Check for for NAs in required columns + any_nas <- vapply(items[intersect(required_cols, names(items))], function(X) any(is.na(X)), FUN.VALUE = TRUE) + has_nas <- names(any_nas)[any_nas] + err_nas <- sprintf("Column `%s` must not have NA values", has_nas) + + # Check columns that should be character + char_cols <- intersect(c("OID", "name", "label", "type", "displayFormat"), names(items)) + are_char_cols <- vapply(items[char_cols], is.character, FUN.VALUE=TRUE) + not_char_cols <- names(are_char_cols)[!are_char_cols] + err_char_cols <- sprintf("Column `%s` must be of type character", not_char_cols) + + # Check columns that should be integers + int_cols <- intersect(c("length", "keySequence"), names(items)) + are_int_cols <- vapply(items[int_cols], is.integer, FUN.VALUE=TRUE) + not_int_cols <- names(are_int_cols)[!are_int_cols] + err_int_cols <- sprintf("Column `%s` must be of type integer", not_int_cols) + + # Check that type values are within the permissible list + err_type_vars <- character() + if ('type' %in% names(items)) { + bad_types <- !(items$type %in% c("string", "integer", "float", "double", "decimal", "boolean")) + bad_type_vars <- items$name[bad_types] + bad_type_vals <- items$type[bad_types] + err_type_vars <- sprintf( + paste("Variable %s has an invalid type value of %s.", + "Must be one of string, integer, float, double, decimal, boolean"), + bad_type_vars, bad_type_vals + ) + } + + all_errs <- c(err_missing_cols, err_additional_cols, err_nas, err_char_cols, err_int_cols, err_type_vars) + + if (length(all_errs) > 0) { + msg_prep <- paste0("\n\t", all_errs) + err_msg <- paste0(c("Error: Issues found in items data:", msg_prep)) + stop(err_msg, call.=FALSE) + } +} + + +set_column_metadata <- function(x, columns) { + # Check items before moving any further + validate_dataset_columns(columns) + + # Attach in the variable metadata + if (!("ITEMGROUPDATASEQ" %in% columns$OID)) { + igds_row <- data.frame( + OID = "ITEMGROUPDATASEQ", + name = "ITEMGROUPDATASEQ", + label = "Record Identifier", + type = "integer" + ) + + # Match up columns and fill + igds_row[setdiff(names(columns), names(igds_row))] <- NA + columns[setdiff(names(igds_row), names(columns))] <- NA + + columns <- rbind(igds_row, columns) + } + + columns_converted <- df_to_list_rows(columns) +} \ No newline at end of file diff --git a/R/datasetjson.R b/R/datasetjson.R index ec453cc..69a4d46 100644 --- a/R/datasetjson.R +++ b/R/datasetjson.R @@ -28,6 +28,7 @@ #' @param ref_data Boolean value that is set to "true" when the dataset contains #' reference data (not subject data). The default value is "false". #' @param version The DatasetJSON version to use. Currently only 1.1.0 is supported. +#' @param columns Variable level metadata for the Dataset JSON object #' #' @return dataset_json object pertaining to the specific Dataset JSON version #' specific @@ -47,7 +48,8 @@ #' metadata_ref = "some/define.xml", #' item_oid = "IG.IRIS", #' name = "IRIS", -#' dataset_label = "Iris" +#' dataset_label = "Iris", +#' columns = iris_items #' ) #' #' # Attach attributes directly @@ -62,14 +64,15 @@ #' ds_json_updated <- set_item_oid(ds_json_updated, "IG.IRIS") #' ds_json_updated <- set_dataset_name(ds_json_updated, "IRIS") #' ds_json_updated <- set_dataset_label(ds_json_updated, "Iris") -dataset_json <- function(.data, file_oid = NULL, last_modified=NULL, +#' ds_json_updates <- set_columns(ds_json_updated, iris_items) +dataset_json <- function(.data, file_oid=NULL, last_modified=NULL, originator=NULL, sys=NULL, sys_version = NULL, study=NULL, metadata_version=NULL,metadata_ref=NULL, item_oid=NULL, name=NULL, dataset_label=NULL, ref_data=FALSE, - version="1.1.0") { + columns=NULL, version="1.1.0") { new_dataset_json(.data, file_oid, last_modified, originator, sys, sys_version, study, metadata_version, metadata_ref, item_oid, name, dataset_label, ref_data, - version) + columns, version) } #' Create a base Dataset JSON Container @@ -83,7 +86,7 @@ dataset_json <- function(.data, file_oid = NULL, last_modified=NULL, #' @noRd new_dataset_json <- function(.data, file_oid, last_modified, originator, sys, sys_version, study, metadata_version, metadata_ref, item_oid, name, dataset_label, - ref_data, version) { + ref_data, columns, version) { if (!(version %in% c("1.1.0"))) { stop("Unsupported version specified - currently only version 1.1.0 is supported", call.=FALSE) @@ -97,7 +100,7 @@ new_dataset_json <- function(.data, file_oid, last_modified, originator, sys, sy # Extract the function and call it to return the base structure funcs[[version]](.data, file_oid, last_modified, originator, sys, sys_version, study, metadata_version, metadata_ref, item_oid, name, dataset_label, - ref_data) + ref_data, columns) } #' Dataset JSON v1.1.0 Generator @@ -106,7 +109,7 @@ new_dataset_json <- function(.data, file_oid, last_modified, originator, sys, sy #' @noRd new_dataset_json_v1_1_0 <- function(.data, file_oid, last_modified, originator, sys, sys_version, study, metadata_version, metadata_ref, item_oid, name, - dataset_label, ref_data) { + dataset_label, ref_data, columns) { if (!inherits(.data, 'data.frame')) { stop("datasetjson objects must inherit from a data.frame", call.=FALSE) @@ -130,6 +133,10 @@ new_dataset_json_v1_1_0 <- function(.data, file_oid, last_modified, originator, attr(.data, 'name') <- name attr(.data, 'label') <- dataset_label attr(.data, 'isReferenceData') <- ref_data + if (!is.null(columns)) { + validate_dataset_columns(columns) + } + attr(.data, 'columns') <- columns structure( .data,