diff --git a/DESCRIPTION b/DESCRIPTION index 292e4a28..abae0884 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -57,7 +57,8 @@ Imports: stringr (>= 1.4.0), assertthat, pillar, - cli + cli, + styler Suggests: knitr, htmltools, @@ -67,7 +68,8 @@ Suggests: spelling, testthat (>= 3.1.7), DT, - readr + readr, + withr VignetteBuilder: knitr Config/testthat/edition: 3 Config/testthat/parallel: true diff --git a/NAMESPACE b/NAMESPACE index 2dc0c0f3..24f1c45d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,14 +17,18 @@ export(derive_seq) export(derive_study_day) export(domain_example) export(fmt_cmp) +export(generate_code) export(generate_oak_id_vars) export(hardcode_ct) export(hardcode_no_ct) +export(is_character) +export(is_numeric) export(oak_id_vars) export(problems) export(read_ct_spec) export(read_ct_spec_example) export(read_domain_example) +export(read_spec) export(sbj_vars) importFrom(dplyr,mutate) importFrom(pillar,ctl_new_rowid_pillar) diff --git a/R/generate_code.R b/R/generate_code.R new file mode 100644 index 00000000..e86c50a1 --- /dev/null +++ b/R/generate_code.R @@ -0,0 +1,459 @@ +# TODO Things I changed in specs +# - Made sure that target_sdtm_domain is not missing +# - Added raw_fmt for dates +# - Added raw_unk for dates +# - The algorithm for CMENRTPT in spec is hardcode_ct, but in the program assign_ct +# is used. I kept the algorithm as hardcode_ct in the spec. +# - I consolidated target_hardcoded_value and target_term_value into target_value +# - In the spec we have cm_raw_data and I used it in the template as well +# - Changed is.numeric/is.character into is_numeric/is_character +# - I did not generate id_vars since the default values was enough, certainly we +# can add it later when the customized id_vars are needed. +# - Some extra manipulations done in get_domain_spec() +# - Moved VSTESTCD before qualifiers, e.g. VSSTAT, VSPOS so that topic is created first +# - Changed mapping_is_dataset to TRUE for VSTPT, VSDTC +# - Added a new column topic, showing to which topic the mapping belongs to +# - Some code list codes were populated in target_sdtm_variable_controlled_terms_or_format +# I moved them under target_sdtm_variable_codelist_code, e.g. VSPOS, VSLOC + +#' Generate the code for the mapping SDTM specification +#' +#' One can use the option `width` to control the width of the code. A width of +#' twenty will almost always place every parameter on a separate line. This is +#' useful for debugging and understanding the code. The higher the width, the +#' more parameters will be placed on a single line and code will be shorter. +#' See the examples for more details. +#' +#' @param spec The specification data frame. +#' @param domain The SDTM domain to generate the code for. +#' @param out_dir The directory to save the code file. Default is the current +#' directory. +#' +#' @return Side effect: the code is generated and saved to a file. +#' @export +#' +#' @examples +#' \dontrun{ +#' # VS domain ---- +#' spec <- read_spec("vs_sdtm_oak_spec.csv") +#' domain <- "vs" +#' +#' spec <- spec |> +#' dplyr::filter( +#' !is.na(target_sdtm_variable), +#' !is.na(mapping_algorithm), +#' !mapping_is_dataset %in% c("TRUE") +#' ) +#' +#' old_width <- options(width = 20) +#' generate_code(spec, domain) +#' # Restore original width +#' options(width = old_width$width) +#' +#' # CM domain ---- +#' +#' spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") +#' +#' spec <- spec |> +#' dplyr::filter( +#' !is.na(target_sdtm_variable), +#' !is.na(mapping_algorithm), +#' !mapping_is_dataset %in% c("TRUE") +#' ) +#' +#' domain <- "cm" +#' generate_code(spec, domain) +#' +#' # One can use option width to control the width of the code +#' # Twenty will almost always place every parameter on a separate line +#' old_width <- options(width = 20) +#' generate_code(spec, domain) +#' # Restore original width +#' options(width = old_width$width) +#' } +#' +generate_code <- function(spec, domain, out_dir = ".") { + admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns())) + admiraldev::assert_character_scalar(domain) + + spec_domain <- get_domain_spec(spec, domain) + + topics <- unique(spec_domain$topic) + + styled_code <- purrr::map( + topics, + generate_one_topic_code, + domain = domain, + spec = spec_domain + ) |> + style_the_code(domain) + + file_name <- paste0(domain, "_sdtm_oak_code.R") + writeLines(styled_code, file.path(out_dir, file_name)) +} + +#' Style the code +#' +#' This function styles the code using the styler package and adds the necessary +#' templates to the code (e.g. cm_template_prefix, cm_template_suffix). +#' +#' @param code_by_topics A list of character vectors. +#' @inheritParams generate_code +#' +#' @return The styled code as a string. +#' @keywords internal +#' +style_the_code <- function(code_by_topics, domain) { + admiraldev::assert_list_of(code_by_topics, "character") + + prefix_f <- paste0(domain, "_template_prefix") + suffix_f <- paste0(domain, "_template_suffix") + + assertthat::assert_that(exists(prefix_f), msg = paste0("The function ", prefix_f, " does not exist.")) + assertthat::assert_that(exists(suffix_f), msg = paste0("The function ", suffix_f, " does not exist.")) + + prefix <- do.call(prefix_f, list()) + suffix <- do.call(suffix_f, list()) + + multiple_topics <- !identical(length(code_by_topics), 1L) + + if (multiple_topics) { + code_by_topics <- code_by_topics |> + purrr::map(remove_last_pipe) + } + + code_by_topics |> + unlist() |> + append(prefix, after = 0L) |> + append(suffix) |> + styler::style_text() +} + +#' Generate the code for one topic +#' +#' @param topic The topic to generate the code for. +#' @param domain The SDTM domain. +#' @param spec The specification data frame. +#' +#' @return The code for the topic as a string. +#' @keywords internal +#' +generate_one_topic_code <- function(topic, domain, spec) { + admiraldev::assert_character_scalar(topic) + admiraldev::assert_character_scalar(domain) + admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns_min())) + + spec_topic <- spec |> + dplyr::filter(topic %in% {{ topic }}) + + domain_topic <- paste(domain, topic, sep = "_") |> + tolower() + + map_topic <- paste0("\n\n# Map topic ", domain_topic, " ----\n") + assign_to_domain_topic <- paste0(domain_topic, " <-") + + # Generate the code for each variable row in spec + spec_topic |> + dplyr::rowwise() |> + dplyr::mutate( + algorithm_code = list(generate_one_var_code(dplyr::pick(dplyr::everything()))), + .keep = "none" + ) |> + unlist() |> + append(assign_to_domain_topic, after = 0L) |> + append(map_topic, after = 0L) +} + +#' Generate the code for one variable +#' +#' @param spec The specification data frame. +#' @param last_var Logical indicating if this is the last variable in the domain. +#' +#' @return The code for the variable as a string. +#' @keywords internal +#' +generate_one_var_code <- function(spec) { + admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns_min())) + assertthat::assert_that(identical(nrow(spec), 1L)) + + raw_dat_value <- rlang::parse_expr(spec$raw_dataset) + is_cond_add <- stringr::str_starts(spec$raw_dataset, stringr::fixed("condition_add")) + + # We want name-spaced functions to be used in the code, e.g. sdtm.oak::condition_add + if (is_cond_add) { + raw_dat_value <- paste0("sdtm.oak::", spec$raw_dataset) |> rlang::parse_expr() + } + + # We want name-spaced functions to be used in the code, e.g. sdtm.oak::hardcode_ct + function_name <- paste0("sdtm.oak::", spec$mapping_algorithm) |> + rlang::parse_expr() + + args <- list( + raw_dat = raw_dat_value, + raw_var = spec$raw_variable, + tgt_var = spec$target_sdtm_variable, + tgt_val = spec$target_value, + ct_spec = rlang::parse_expr("study_ct"), + ct_clst = spec$target_sdtm_variable_codelist_code, + raw_fmt = spec$raw_fmt, + raw_unk = parse_into_c_call(spec$raw_unk) + ) + + # If the ct_clst is missing, then we must remove ct_spec + if (is.na(args$ct_clst)) { + args$ct_spec <- NA + } + + # Remove the arguments that are missing + args <- purrr::discard(args, \(x) is.vector(x) && anyNA(x)) + + + # Generate the function call + generated_call <- rlang::call2( + function_name, + !!!args + ) + + rlang::expr_deparse(generated_call) |> + add_pipe() +} + +#' This function converts comma separated string into a character vector +#' +#' @param string A string with comma separated values. +#' +#' @return A character vector. +#' +#' @keywords internal +#' +#' @examples +#' \dontrun{ +#' str_in <- "a, b, c" +#' parse_into_c_call("a, b, c") +#' +#' str_in <- NA_character_ +#' parse_into_c_call(str_in) +#' } +#' +parse_into_c_call <- function(str_in) { + admiraldev::assert_character_scalar(str_in) + + str_out <- str_in |> + stringr::str_split(stringr::fixed(",")) |> + unlist() |> + stringr::str_trim() + + if (all(is.na(str_out))) { + return(NA) + } + + rlang::call2("c", !!!str_out) +} + +#' Add a pipe operator to the last element of a character vector +#' +#' @param code_block A character vector. +#' +#' @return The character vector with a pipe operator added to the last element. +#' @keywords internal +#' +add_pipe <- function(code_block) { + admiraldev::assert_character_vector(code_block) + + i <- length(code_block) + + # Add pipe operator to the last element of code block + code_block[i] <- paste0(code_block[i], " %>%") + code_block +} + +#' Remove the pipe operator from the last element of a character vector +#' +#' @param code_blocks A character vector. +#' +#' @return The character vector with the pipe operator removed from the last element. +#' @keywords internal +#' +remove_last_pipe <- function(code_blocks) { + admiraldev::assert_character_vector(code_blocks) + + len_code_block <- length(code_blocks) + + # The last code block should not have a pipe operator + code_blocks[len_code_block] <- code_blocks[len_code_block] |> + stringr::str_remove(stringr::fixed("%>%")) + + code_blocks +} + +#' Get the specification for a domain and modify it +#' +#' @param spec The specification data frame. +#' @param domain The SDTM domain to get the specification for. +#' +#' @return A tibble with the specification for the domain. +#' @keywords internal +#' +get_domain_spec <- function(spec, domain) { + admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns_min())) + admiraldev::assert_character_scalar(domain) + + # For now assuming that there is only one topic and the topic is the first one + + spec |> + dplyr::filter(tolower(target_sdtm_domain) %in% tolower(domain)) |> + dplyr::select(dplyr::all_of(expected_columns_min())) |> + # For now swapping entity_sub_algorithm with mapping_algorithm since the + # algorithms like assign_no_ct are the mapping_algorithm and they are populated + # in the entity_sub_algorithm + dplyr::mutate( + entity_sub_algorithm_temp = dplyr::if_else( + mapping_algorithm %in% "condition_add", + mapping_algorithm, + entity_sub_algorithm + ), + mapping_algorithm = dplyr::if_else( + mapping_algorithm %in% "condition_add", + entity_sub_algorithm, + mapping_algorithm + ), + entity_sub_algorithm = entity_sub_algorithm_temp + ) |> + dplyr::select(-entity_sub_algorithm_temp) |> + # Need to use the condition_add_raw_dat (if not missing) instead of raw_dataset + dplyr::mutate( + raw_dataset = dplyr::if_else( + entity_sub_algorithm %in% "condition_add" & !is.na(condition_add_raw_dat), + condition_add_raw_dat, + raw_dataset + ) + ) +} + +#' Check if a variable is character +#' +#' @param var_in The variable to check. +#' +#' @return Logical indicating if the variable is character. +#' @export +#' +is_numeric <- function(var_in) { + grepl(r"{^-?\d*(\.\d+)?(e[+-]?\d+)?$}", var_in) +} + +#' Check if a variable is character +#' +#' @param var_in The variable to check. +#' +#' @return Logical indicating if the variable is character. +#' @export +#' +is_character <- function(var_in) { + grepl("[^0-9eE.-]", var_in) +} + +#' Read the specification file +#' +#' @param file The path to the specification file. +#' +#' @returns A tibble with the specification. +#' @export +#' +#' @examples +#' \dontrun{ +#' file <- "cm_sdtm_oak_spec_cdash.csv" +#' observed <- read_spec(file) +#' } +#' +read_spec <- function(file) { + admiraldev::assert_character_scalar(file) + + spec <- utils::read.csv(file = file, na.strings = c("NA", ""), colClasses = "character") |> + tibble::as_tibble() + + admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns())) + + return(spec) +} + +#' Expected columns in the specification for the domain +#' +#' @keywords internal +#' @noRd +expected_columns_min <- function() { + c( + "raw_dataset", + "raw_variable", + "target_sdtm_variable", + "topic", + "mapping_algorithm", + "entity_sub_algorithm", + "condition_add_raw_dat", + "target_sdtm_variable_codelist_code", + "raw_data_format", + "raw_fmt", + "raw_unk", + "target_term_value", + "target_value" + ) +} + +#' Expected columns in the specification file +#' +#' @keywords internal +#' @noRd +expected_columns <- function() { + c( + "study_number", + "raw_dataset", + "raw_dataset_label", + "raw_variable", + "raw_variable_label", + "raw_variable_ordinal", + "raw_variable_type", + "raw_data_format", + "raw_fmt", + "raw_unk", + "study_specific", + "annotation_ordinal", + "mapping_is_dataset", + "annotation_text", + "target_sdtm_domain", + "target_sdtm_variable", + "target_sdtm_variable_role", + "target_sdtm_variable_codelist_code", + "target_sdtm_variable_controlled_terms_or_format", + "target_sdtm_variable_ordinal", + "origin", + "mapping_algorithm", + "entity_sub_algorithm", + "target_hardcoded_value", + "target_term_value", + "target_term_code", + "condition_ordinal", + "condition_group_ordinal", + "condition_add_raw_dat", + "condition_add_tgt_dat", + "condition_left_raw_dataset", + "condition_left_raw_variable", + "condition_left_sdtm_domain", + "condition_left_sdtm_variable", + "condition_operator", + "condition_right_text_value", + "condition_right_sdtm_domain", + "condition_right_sdtm_variable", + "condition_right_raw_dataset", + "condition_right_raw_variable", + "condition_next_logical_operator", + "merge_type", + "merge_left", + "merge_right", + "merge_condition", + "unduplicate_keys", + "groupby_keys", + "target_resource_raw_dataset", + "target_resource_raw_variable", + "target_value", + "topic" + ) +} diff --git a/R/generate_code_tempates.R b/R/generate_code_tempates.R new file mode 100644 index 00000000..17188912 --- /dev/null +++ b/R/generate_code_tempates.R @@ -0,0 +1,184 @@ +#' The template suffix for the cm code +#' +#' @noRd +#' @keywords internal +cm_template_prefix <- function() { + stringr::str_glue(' +library(sdtm.oak) +library(dplyr) + + +# Read CT Specification +study_ct <- read.csv("./datasets/sdtm_ct.csv") + +# Read in raw data +cm_raw_data <- read.csv("./datasets/cm_raw_data_cdash.csv") + +cm_raw_data <- admiral::convert_blanks_to_na(cm_raw_data) + +# derive oak_id_vars +cm_raw_data <- cm_raw_data %>% + sdtm.oak::generate_oak_id_vars( + pat_var = "PATNUM", + raw_src = "cm_raw_data" + ) + +# Read in DM domain to derive study day +dm <- read.csv("./datasets/dm.csv") + +dm <- admiral::convert_blanks_to_na(dm) +') +} + + +#' The template suffix for the cm code +#' +#' @noRd +#' @keywords internal +cm_template_suffix <- function() { + stringr::str_glue(' +dplyr::mutate( + STUDYID = "test_study", + DOMAIN = "CM", + CMCAT = "GENERAL CONMED", + USUBJID = paste0("test_study", "-", cm_raw_data$PATNUM) +) %>% +sdtm.oak::derive_seq(tgt_var = "CMSEQ", + rec_vars= c("USUBJID", "CMTRT")) %>% +sdtm.oak::derive_study_day( + sdtm_in = ., + dm_domain = dm, + tgdt = "CMENDTC", + refdt = "RFXSTDTC", + study_day_var = "CMENDY" +) %>% +sdtm.oak::derive_study_day( + sdtm_in = ., + dm_domain = dm, + tgdt = "CMSTDTC", + refdt = "RFXSTDTC", + study_day_var = "CMSTDY" +) %>% +dplyr::select("STUDYID", "DOMAIN", "USUBJID", "CMSEQ", "CMTRT", "CMCAT", "CMINDC", + "CMDOSE", "CMDOSTXT", "CMDOSU", "CMDOSFRM", "CMDOSFRQ", "CMROUTE", + "CMSTDTC", "CMENDTC","CMSTDY", "CMENDY", "CMENRTPT", "CMENTPT") +') +} + +#' The template suffix for the vs code +#' +#' @noRd +#' @keywords internal +vs_template_prefix <- function() { + stringr::str_glue(' +library(sdtm.oak) +library(dplyr) + + +# Read Specification + +# Read CT Specification +study_ct <- read.csv("./datasets/sdtm_ct.csv") + +# Read in raw data +vitals_raw_data <- read.csv("./datasets/vitals_raw_data.csv", + stringsAsFactors = FALSE) + +vitals_raw_data <- admiral::convert_blanks_to_na(vitals_raw_data) + + +# derive oak_id_vars +vitals_raw_data <- vitals_raw_data %>% + sdtm.oak::generate_oak_id_vars( + pat_var = "PATNUM", + raw_src = "vitals_raw_data" + ) + +# Read in DM domain to derive study day +dm <- read.csv("./datasets/dm.csv") + +dm <- admiral::convert_blanks_to_na(dm) +') +} + +#' The template suffix for the vs code +#' +#' @noRd +#' @keywords internal +vs_template_suffix <- function() { + stringr::str_glue(' +# Combine all the topic variables into a single data frame. ---- +vs_combined <- dplyr::bind_rows( + vs_asmntdn, vs_sys_bp, vs_dia_bp, vs_pulse, vs_temp, + vs_resprt, vs_oxy_sat +) %>% + dplyr::filter(!is.na(.data$VSTESTCD)) + +# Map qualifiers common to all topic variables ---- + +vs <- vs_combined %>% + # Map VSDTC using sdtm.oak::assign_ct algorithm + sdtm.oak::assign_datetime( + raw_dat = vitals_raw_data, + raw_var = c("VTLD", "VTLTM"), + tgt_var = "VSDTC", + raw_fmt = c(list(c("d-m-y", "dd-mmm-yyyy")), "H:M") + ) %>% + # Map VSTPT from TMPTC using sdtm.oak::assign_ct + sdtm.oak::assign_ct( + raw_dat = vitals_raw_data, + raw_var = "TMPTC", + tgt_var = "VSTPT", + ct_spec = study_ct, + ct_clst = "TPT", + id_vars = oak_id_vars() + ) %>% + # Map VSTPTNUM from TMPTC using sdtm.oak::assign_ct + sdtm.oak::assign_ct( + raw_dat = vitals_raw_data, + raw_var = "TMPTC", + tgt_var = "VSTPTNUM", + ct_spec = study_ct, + ct_clst = "TPTNUM", + id_vars = oak_id_vars() + ) %>% + # Map VISIT from VISIT_NAME using sdtm.oak::assign_ct + sdtm.oak::assign_ct( + raw_dat = vitals_raw_data, + raw_var = "VISIT_NAME", + tgt_var = "VISIT", + ct_spec = study_ct, + ct_clst = "VISIT", + id_vars = oak_id_vars() + ) %>% + # Map VISITNUM from VISIT_NAME using sdtm.oak::assign_ct + sdtm.oak::assign_ct( + raw_dat = vitals_raw_data, + raw_var = "VISIT_NAME", + tgt_var = "VISITNUM", + ct_spec = study_ct, + ct_clst = "VISITNUM", + id_vars = oak_id_vars() + ) %>% + dplyr::mutate( + STUDYID = "test_study", + DOMAIN = "VS", + VSCAT = "VITAL SIGNS", + USUBJID = paste0("test_study", "-", .data$patient_number) + ) %>% + sdtm.oak::derive_seq(tgt_var = "VSSEQ", + rec_vars= c("USUBJID", "VISITNUM", "VSTPTNUM", "VSTESTCD")) %>% + # A bug in sdtm.oak::derive_study_day V0.1 that clears the time values in VSDTC + sdtm.oak::derive_study_day( + sdtm_in = ., + dm_domain = dm, + tgdt = "VSDTC", + refdt = "RFXSTDTC", + study_day_var = "VSDY" + ) %>% + dplyr::select("STUDYID", "DOMAIN", "USUBJID", "VSSEQ", + "VSTESTCD", "VSTEST", "VSCAT", "VSPOS", + "VSORRES", "VSORRESU", "VSLOC", "VSLAT", + "VISIT", "VISITNUM", "VSDY", "VSTPT", "VSTPTNUM", "VSDTC" ) +') +} diff --git a/R/globals.R b/R/globals.R index 9a2998a0..5297f336 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,4 +1,6 @@ utils::globalVariables(c( "USUBJID", "VISIT", "dom_dt", "dom_tm", "ref_dt", - "ref_tm" + "ref_tm", "target_sdtm_domain", "target_sdtm_variable", + "mapping_algorithm", "entity_sub_algorithm", "entity_sub_algorithm", + "entity_sub_algorithm_temp", "condition_add_raw_dat", "raw_dataset" )) diff --git a/man/add_pipe.Rd b/man/add_pipe.Rd new file mode 100644 index 00000000..c9b1b58d --- /dev/null +++ b/man/add_pipe.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{add_pipe} +\alias{add_pipe} +\title{Add a pipe operator to the last element of a character vector} +\usage{ +add_pipe(code_block) +} +\arguments{ +\item{code_block}{A character vector.} +} +\value{ +The character vector with a pipe operator added to the last element. +} +\description{ +Add a pipe operator to the last element of a character vector +} +\keyword{internal} diff --git a/man/generate_code.Rd b/man/generate_code.Rd new file mode 100644 index 00000000..0729539b --- /dev/null +++ b/man/generate_code.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{generate_code} +\alias{generate_code} +\title{Generate the code for the mapping SDTM specification} +\usage{ +generate_code(spec, domain, out_dir = ".") +} +\arguments{ +\item{spec}{The specification data frame.} + +\item{domain}{The SDTM domain to generate the code for.} + +\item{out_dir}{The directory to save the code file. Default is the current +directory.} +} +\value{ +Side effect: the code is generated and saved to a file. +} +\description{ +One can use the option \code{width} to control the width of the code. A width of +twenty will almost always place every parameter on a separate line. This is +useful for debugging and understanding the code. The higher the width, the +more parameters will be placed on a single line and code will be shorter. +See the examples for more details. +} +\examples{ +\dontrun{ +# VS domain ---- +spec <- read_spec("vs_sdtm_oak_spec.csv") +domain <- "vs" + +spec <- spec |> + dplyr::filter( + !is.na(target_sdtm_variable), + !is.na(mapping_algorithm), + !mapping_is_dataset \%in\% c("TRUE") + ) + +old_width <- options(width = 20) +generate_code(spec, domain) +# Restore original width +options(width = old_width$width) + +# CM domain ---- + +spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") + +spec <- spec |> + dplyr::filter( + !is.na(target_sdtm_variable), + !is.na(mapping_algorithm), + !mapping_is_dataset \%in\% c("TRUE") + ) + +domain <- "cm" +generate_code(spec, domain) + +# One can use option width to control the width of the code +# Twenty will almost always place every parameter on a separate line +old_width <- options(width = 20) +generate_code(spec, domain) +# Restore original width +options(width = old_width$width) +} + +} diff --git a/man/generate_one_topic_code.Rd b/man/generate_one_topic_code.Rd new file mode 100644 index 00000000..06e3eae7 --- /dev/null +++ b/man/generate_one_topic_code.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{generate_one_topic_code} +\alias{generate_one_topic_code} +\title{Generate the code for one topic} +\usage{ +generate_one_topic_code(topic, domain, spec) +} +\arguments{ +\item{topic}{The topic to generate the code for.} + +\item{domain}{The SDTM domain.} + +\item{spec}{The specification data frame.} +} +\value{ +The code for the topic as a string. +} +\description{ +Generate the code for one topic +} +\keyword{internal} diff --git a/man/generate_one_var_code.Rd b/man/generate_one_var_code.Rd new file mode 100644 index 00000000..ce6fcdca --- /dev/null +++ b/man/generate_one_var_code.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{generate_one_var_code} +\alias{generate_one_var_code} +\title{Generate the code for one variable} +\usage{ +generate_one_var_code(spec) +} +\arguments{ +\item{spec}{The specification data frame.} + +\item{last_var}{Logical indicating if this is the last variable in the domain.} +} +\value{ +The code for the variable as a string. +} +\description{ +Generate the code for one variable +} +\keyword{internal} diff --git a/man/get_domain_spec.Rd b/man/get_domain_spec.Rd new file mode 100644 index 00000000..382c9650 --- /dev/null +++ b/man/get_domain_spec.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{get_domain_spec} +\alias{get_domain_spec} +\title{Get the specification for a domain and modify it} +\usage{ +get_domain_spec(spec, domain) +} +\arguments{ +\item{spec}{The specification data frame.} + +\item{domain}{The SDTM domain to get the specification for.} +} +\value{ +A tibble with the specification for the domain. +} +\description{ +Get the specification for a domain and modify it +} +\keyword{internal} diff --git a/man/is_character.Rd b/man/is_character.Rd new file mode 100644 index 00000000..3f805162 --- /dev/null +++ b/man/is_character.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{is_character} +\alias{is_character} +\title{Check if a variable is character} +\usage{ +is_character(var_in) +} +\arguments{ +\item{var_in}{The variable to check.} +} +\value{ +Logical indicating if the variable is character. +} +\description{ +Check if a variable is character +} diff --git a/man/is_numeric.Rd b/man/is_numeric.Rd new file mode 100644 index 00000000..18bd359a --- /dev/null +++ b/man/is_numeric.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{is_numeric} +\alias{is_numeric} +\title{Check if a variable is character} +\usage{ +is_numeric(var_in) +} +\arguments{ +\item{var_in}{The variable to check.} +} +\value{ +Logical indicating if the variable is character. +} +\description{ +Check if a variable is character +} diff --git a/man/parse_into_c_call.Rd b/man/parse_into_c_call.Rd new file mode 100644 index 00000000..77340daa --- /dev/null +++ b/man/parse_into_c_call.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{parse_into_c_call} +\alias{parse_into_c_call} +\title{This function converts comma separated string into a character vector} +\usage{ +parse_into_c_call(str_in) +} +\arguments{ +\item{string}{A string with comma separated values.} +} +\value{ +A character vector. +} +\description{ +This function converts comma separated string into a character vector +} +\examples{ +\dontrun{ +str_in <- "a, b, c" +parse_into_c_call("a, b, c") + +str_in <- NA_character_ +parse_into_c_call(str_in) +} + +} +\keyword{internal} diff --git a/man/read_spec.Rd b/man/read_spec.Rd new file mode 100644 index 00000000..3f4948ad --- /dev/null +++ b/man/read_spec.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{read_spec} +\alias{read_spec} +\title{Read the specification file} +\usage{ +read_spec(file) +} +\arguments{ +\item{file}{The path to the specification file.} +} +\value{ +A tibble with the specification. +} +\description{ +Read the specification file +} +\examples{ +\dontrun{ +file <- "cm_sdtm_oak_spec_cdash.csv" +observed <- read_spec(file) +} + +} diff --git a/man/remove_last_pipe.Rd b/man/remove_last_pipe.Rd new file mode 100644 index 00000000..a7538d02 --- /dev/null +++ b/man/remove_last_pipe.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{remove_last_pipe} +\alias{remove_last_pipe} +\title{Remove the pipe operator from the last element of a character vector} +\usage{ +remove_last_pipe(code_blocks) +} +\arguments{ +\item{code_blocks}{A character vector.} +} +\value{ +The character vector with the pipe operator removed from the last element. +} +\description{ +Remove the pipe operator from the last element of a character vector +} +\keyword{internal} diff --git a/man/style_the_code.Rd b/man/style_the_code.Rd new file mode 100644 index 00000000..0e62604f --- /dev/null +++ b/man/style_the_code.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{style_the_code} +\alias{style_the_code} +\title{Style the code} +\usage{ +style_the_code(code_by_topics, domain) +} +\arguments{ +\item{code_by_topics}{A list of character vectors.} + +\item{domain}{The SDTM domain to generate the code for.} +} +\value{ +The styled code as a string. +} +\description{ +This function styles the code using the styler package and adds the necessary +templates to the code (e.g. cm_template_prefix, cm_template_suffix). +} +\keyword{internal} diff --git a/renv.lock b/renv.lock index cd6da0f0..38cb4ead 100644 --- a/renv.lock +++ b/renv.lock @@ -33,7 +33,7 @@ "Package": "R.cache", "Version": "0.16.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", @@ -48,7 +48,7 @@ "Package": "R.methodsS3", "Version": "1.8.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "utils" @@ -57,22 +57,22 @@ }, "R.oo": { "Package": "R.oo", - "Version": "1.27.0", + "Version": "1.25.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", "methods", "utils" ], - "Hash": "6ac79ff194202248cf946fe3a5d6d498" + "Hash": "a0900a114f4f0194cf4aa8cd4a700681" }, "R.utils": { "Package": "R.utils", - "Version": "2.12.3", + "Version": "2.12.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", @@ -81,7 +81,7 @@ "tools", "utils" ], - "Hash": "3dc2829b790254bfba21e60965787651" + "Hash": "325f01db13da12c04d8f6e7be36ff514" }, "R6": { "Package": "R6", diff --git a/renv/profiles/4.2/renv.lock b/renv/profiles/4.2/renv.lock index 210a2de0..b6386152 100644 --- a/renv/profiles/4.2/renv.lock +++ b/renv/profiles/4.2/renv.lock @@ -33,7 +33,7 @@ "Package": "R.cache", "Version": "0.16.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", @@ -48,7 +48,7 @@ "Package": "R.methodsS3", "Version": "1.8.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "utils" @@ -57,22 +57,22 @@ }, "R.oo": { "Package": "R.oo", - "Version": "1.27.0", + "Version": "1.25.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", "methods", "utils" ], - "Hash": "6ac79ff194202248cf946fe3a5d6d498" + "Hash": "a0900a114f4f0194cf4aa8cd4a700681" }, "R.utils": { "Package": "R.utils", - "Version": "2.12.3", + "Version": "2.12.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", @@ -81,7 +81,7 @@ "tools", "utils" ], - "Hash": "3dc2829b790254bfba21e60965787651" + "Hash": "325f01db13da12c04d8f6e7be36ff514" }, "R6": { "Package": "R6", diff --git a/renv/profiles/4.3/renv.lock b/renv/profiles/4.3/renv.lock index cd6da0f0..38cb4ead 100644 --- a/renv/profiles/4.3/renv.lock +++ b/renv/profiles/4.3/renv.lock @@ -33,7 +33,7 @@ "Package": "R.cache", "Version": "0.16.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", @@ -48,7 +48,7 @@ "Package": "R.methodsS3", "Version": "1.8.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "utils" @@ -57,22 +57,22 @@ }, "R.oo": { "Package": "R.oo", - "Version": "1.27.0", + "Version": "1.25.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", "methods", "utils" ], - "Hash": "6ac79ff194202248cf946fe3a5d6d498" + "Hash": "a0900a114f4f0194cf4aa8cd4a700681" }, "R.utils": { "Package": "R.utils", - "Version": "2.12.3", + "Version": "2.12.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", @@ -81,7 +81,7 @@ "tools", "utils" ], - "Hash": "3dc2829b790254bfba21e60965787651" + "Hash": "325f01db13da12c04d8f6e7be36ff514" }, "R6": { "Package": "R6", diff --git a/tests/testthat/test-generate_code.R b/tests/testthat/test-generate_code.R new file mode 100644 index 00000000..4d4cd6c0 --- /dev/null +++ b/tests/testthat/test-generate_code.R @@ -0,0 +1,78 @@ +test_that("generate_code works for one topic domain", { + # nolint start + spec <- tibble::tribble( + ~study_number, ~raw_dataset, ~raw_dataset_label, ~raw_variable, ~raw_variable_label, ~raw_variable_ordinal, ~raw_variable_type, ~raw_data_format, ~raw_fmt, ~raw_unk, ~study_specific, ~annotation_ordinal, ~mapping_is_dataset, ~annotation_text, ~target_sdtm_domain, ~target_sdtm_variable, ~target_sdtm_variable_role, ~topic, ~target_sdtm_variable_codelist_code, ~target_sdtm_variable_controlled_terms_or_format, ~target_sdtm_variable_ordinal, ~origin, ~mapping_algorithm, ~entity_sub_algorithm, ~target_hardcoded_value, ~target_term_value, ~target_value, ~target_term_code, ~condition_ordinal, ~condition_group_ordinal, ~condition_add_raw_dat, ~condition_add_tgt_dat, ~condition_left_raw_dataset, ~condition_left_raw_variable, ~condition_left_sdtm_domain, ~condition_left_sdtm_variable, ~condition_operator, ~condition_right_text_value, ~condition_right_sdtm_domain, ~condition_right_sdtm_variable, ~condition_right_raw_dataset, ~condition_right_raw_variable, ~condition_next_logical_operator, ~merge_type, ~merge_left, ~merge_right, ~merge_condition, ~unduplicate_keys, ~groupby_keys, ~target_resource_raw_dataset, ~target_resource_raw_variable, + "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMTRT", "var label", "3", "Text", "$200", NA, NA, "FALSE", "1", "FALSE", "CM.CMTRT", "CM", "CMTRT", "Topic Variable", "CMTRT", NA, NA, "10", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMINDC", "var label", "4", "Text", "$100", NA, NA, "FALSE", "1", "FALSE", "CM.CMINDC", "CM", "CMINDC", "Record Qualifier", "CMTRT", NA, NA, "19", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA + ) + # nolint end + + # Convert all NA to NA_character_ + spec <- spec |> + dplyr::mutate(dplyr::across( + .cols = dplyr::everything(), + .fns = \(x) dplyr::if_else(is.na(x), NA_character_, x) + )) + + domain <- "cm" + + temp_dir <- tempdir() + out_dir <- file.path(temp_dir, "data/generate_code") + unlink(out_dir, recursive = TRUE, force = TRUE) + dir.create(out_dir, showWarnings = FALSE, recursive = TRUE) + + withr::with_options(list(width = 20L), { + generate_code(spec, domain, out_dir) + observed <- readLines(file.path(out_dir, paste0(domain, "_sdtm_oak_code.R"))) + + expect_gt(length(observed), 10L) + # From prefix + expect_true(grepl("generate_oak_id_vars", observed, fixed = TRUE) |> any()) + # From generator + expect_true(grepl("assign_no_ct", observed, fixed = TRUE) |> any()) + # From suffix + expect_true(grepl("dplyr::select", observed, fixed = TRUE) |> any()) + }) +}) + +test_that("generate_code works for multiple topics domain", { + # nolint start + spec <- tibble::tribble( + ~study_number, ~raw_dataset, ~raw_dataset_label, ~raw_variable, ~raw_variable_label, ~raw_variable_ordinal, ~raw_variable_type, ~raw_data_format, ~raw_fmt, ~raw_unk, ~study_specific, ~annotation_ordinal, ~mapping_is_dataset, ~annotation_text, ~target_sdtm_domain, ~target_sdtm_variable, ~target_sdtm_variable_role, ~topic, ~target_sdtm_variable_codelist_code, ~target_sdtm_variable_controlled_terms_or_format, ~target_sdtm_variable_ordinal, ~origin, ~mapping_algorithm, ~entity_sub_algorithm, ~target_hardcoded_value, ~target_term_value, ~target_value, ~target_term_code, ~condition_ordinal, ~condition_group_ordinal, ~condition_add_raw_dat, ~condition_add_tgt_dat, ~condition_left_raw_dataset, ~condition_left_raw_variable, ~condition_left_sdtm_domain, ~condition_left_sdtm_variable, ~condition_operator, ~condition_right_text_value, ~condition_right_sdtm_domain, ~condition_right_sdtm_variable, ~condition_right_raw_dataset, ~condition_right_raw_variable, ~condition_next_logical_operator, ~merge_type, ~merge_left, ~merge_right, ~merge_condition, ~unduplicate_keys, ~groupby_keys, ~target_resource_raw_dataset, ~target_resource_raw_variable, + "lp_study", "vitals_raw_data", "Vital Signs", "ASMNTDN", "Assessment not done", "1", "DropDownList", "1", NA, NA, "FALSE", "1", "FALSE", "If No then VS.VSSTAT = 'NOT DONE' when VS.VSTESTCD = 'VSALL'", "VS", "VSTESTCD", "Topic Variable", "ASMNTDN", "C66741", "(VSTESTCD)", "7", "Assigned", "condition_add", "hardcode_ct", NA, "VSALL", "VSALL", "V00224", "1.3", "0", "condition_add(vitals_raw_data, ASMNTDN == \"Yes\")", NA, "vitals_raw_data", "ASMNTDN", NA, NA, "equal_to", "Yes", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "vitals_raw_data", "Vital Signs", "ASMNTDN", "Assessment not done", "1", "DropDownList", "1", NA, NA, "FALSE", "2", "FALSE", "If No then VS.VSTEST = 'Vital Signs'", "VS", "VSTEST", "Synonym Qualifier", "ASMNTDN", "C67153", "(VSTEST)", "8", "Assigned", "condition_add", "hardcode_ct", NA, "Vital Signs", "Vital Signs", "V00224", "2.1", "0", "condition_add(vitals_raw_data, ASMNTDN == \"Yes\")", NA, "vitals_raw_data", "ASMNTDN", NA, NA, "equal_to", "Yes", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "vitals_raw_data", "Vital Signs", "ASMNTDN", "Assessment not done", "1", "DropDownList", "1", NA, NA, "FALSE", "1", "FALSE", "If No then VS.VSSTAT = 'NOT DONE' when VS.VSTESTCD = 'VSALL'", "VS", "VSSTAT", "Record Qualifier", "ASMNTDN", "C66789", "(ND)", "17", "Assigned", "condition_add", "hardcode_ct", NA, "NOT DONE", "NOT DONE", "C49484", "1.2", "0", "condition_add(vitals_raw_data, ASMNTDN == \"Yes\")", NA, "vitals_raw_data", "ASMNTDN", NA, NA, "equal_to", "Yes", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "vitals_raw_data", "Vital Signs", "SYS_BP", "Systolic blood pressure", "6", "Text", "3", NA, NA, "FALSE", "1", "FALSE", "VS.VSORRES when VS.VSTESTCD = 'SYSBP'", "VS", "VSTESTCD", "Topic Variable", "SYS_BP", "C66741", "(VSTESTCD)", "7", "Assigned", "hardcode_ct", NA, NA, "SYSBP", "SYSBP", "C25298", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "vitals_raw_data", "Vital Signs", "SYS_BP", "Systolic blood pressure", "6", "Text", "3", NA, NA, "FALSE", "2", "FALSE", "VS.VSTEST = 'Systolic Blood Pressure'", "VS", "VSTEST", "Synonym Qualifier", "SYS_BP", "C67153", "(VSTEST)", "8", "Assigned", "hardcode_ct", NA, NA, "Systolic Blood Pressure", "Systolic Blood Pressure", "C25298", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "vitals_raw_data", "Vital Signs", "SYS_BP", "Systolic blood pressure", "6", "Text", "3", NA, NA, "FALSE", "1", "FALSE", "VS.VSORRES when VS.VSTESTCD = 'SYSBP'", "VS", "VSORRES", "Result Qualifier", "SYS_BP", NA, NA, "12", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "vitals_raw_data", "Vital Signs", "SYS_BP", "Systolic blood pressure", "6", "Text", "3", NA, NA, "FALSE", "3", "FALSE", "VS.VSORRESU = ", "VS", "VSORRESU", "Variable Qualifier", "SYS_BP", "C66770", "(VSRESU)", "13", "Assigned", "hardcode_ct", NA, NA, "mmHg", "mmHg", "C49670", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA + ) + # nolint end + + # Convert all NA to NA_character_ + spec <- spec |> + dplyr::mutate(dplyr::across( + .cols = dplyr::everything(), + .fns = \(x) dplyr::if_else(is.na(x), NA_character_, x) + )) + + domain <- "vs" + + temp_dir <- tempdir() + out_dir <- file.path(temp_dir, "data/generate_code") + unlink(out_dir, recursive = TRUE, force = TRUE) + dir.create(out_dir, showWarnings = FALSE, recursive = TRUE) + + withr::with_options(list(width = 20L), { + generate_code(spec, domain, out_dir) + observed <- readLines(file.path(out_dir, paste0(domain, "_sdtm_oak_code.R"))) + + expect_gt(length(observed), 100L) + # From prefix + expect_true(grepl("generate_oak_id_vars", observed, fixed = TRUE) |> any()) + # From generator + expect_true(grepl("assign_no_ct", observed, fixed = TRUE) |> any()) + # From suffix + expect_true(grepl("dplyr::select", observed, fixed = TRUE) |> any()) + }) +})