From 1294c41572bd61827f0e9a4ef76ef167d3828ffb Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Mon, 25 Nov 2024 02:20:20 +0000 Subject: [PATCH] #107 poc of two vars --- DESCRIPTION | 3 +- NAMESPACE | 2 + R/generate_code.R | 171 ++++++++++++++++++++++++++++ man/expected_columns.Rd | 16 +++ man/generate_code.Rd | 31 +++++ man/generate_one_var_code.Rd | 20 ++++ man/read_spec.Rd | 25 ++++ tests/testthat/test-generate_code.R | 25 ++++ 8 files changed, 292 insertions(+), 1 deletion(-) create mode 100644 R/generate_code.R create mode 100644 man/expected_columns.Rd create mode 100644 man/generate_code.Rd create mode 100644 man/generate_one_var_code.Rd create mode 100644 man/read_spec.Rd create mode 100644 tests/testthat/test-generate_code.R diff --git a/DESCRIPTION b/DESCRIPTION index 292e4a28..01cbaa8a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -57,7 +57,8 @@ Imports: stringr (>= 1.4.0), assertthat, pillar, - cli + cli, + styler Suggests: knitr, htmltools, diff --git a/NAMESPACE b/NAMESPACE index 2dc0c0f3..68d7440d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(derive_seq) export(derive_study_day) export(domain_example) export(fmt_cmp) +export(generate_code) export(generate_oak_id_vars) export(hardcode_ct) export(hardcode_no_ct) @@ -25,6 +26,7 @@ export(problems) export(read_ct_spec) export(read_ct_spec_example) export(read_domain_example) +export(read_spec) export(sbj_vars) importFrom(dplyr,mutate) importFrom(pillar,ctl_new_rowid_pillar) diff --git a/R/generate_code.R b/R/generate_code.R new file mode 100644 index 00000000..fb6504e0 --- /dev/null +++ b/R/generate_code.R @@ -0,0 +1,171 @@ +#' Generate the code for the mapping SDTM specification +#' +#' @param spec The specification data frame. +#' @param domain The SDTM domain to generate the code for. +#' @param out_dir The directory to save the code file. Default is the current +#' directory. +#' +#' @return Side effect: the code is generated and saved to a file. +#' @export +#' +#' @examples +#' +#' \dontrun{ +#' spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") +#' domain <- "cm" +#' generate_code(spec, domain) +#' } +#' +generate_code <- function(spec, domain, out_dir = ".") { + + admiraldev::assert_data_frame(spec, required_vars = rlang::parse_exprs(expected_columns)) + admiraldev::assert_character_scalar(domain) + + # For now assuming that there is only one topic and the topic is the first one + + spec_domain <- spec |> + dplyr::filter(tolower(target_sdtm_domain) %in% tolower(domain)) |> + # TODO + # Doing only few variables + dplyr::filter(target_sdtm_variable %in% c("CMTRT", "CMINDC")) |> + + dplyr::select( + raw_dataset, + raw_variable, + target_sdtm_variable, + mapping_algorithm + ) + + n_rows <- nrow(spec_domain) + + # Generate the code for each variable row in spec_domain + code_blocks <- purrr::map( + seq_len(n_rows), + \(row) generate_one_var_code( + spec_domain[row, ], + last_var = identical(row, n_rows) + ) + ) |> + unlist() + + styled_code <- styler::style_text(code_blocks) + + # Save the code to a file + file_name <- paste0(domain, "_sdtm_oak_code.R") + writeLines(styled_code, file.path(out_dir, file_name)) +} + +#' Generate the code for one variable +#' +#' @param spec_var The specification for one variable. +#' @param last_var Logical indicating if this is the last variable in the domain. +#' +#' @return The code for the variable as a string. +#' @keywords internal +#' +generate_one_var_code <- function(spec_var, last_var = FALSE) { + + admiraldev::assert_data_frame(spec_var) + + # Generate the function call + generated_call <- with(spec_var, { + rlang::call2( + mapping_algorithm, + raw_dat = rlang::sym(raw_dataset), + raw_var = raw_variable, + tgt_var = target_sdtm_variable + ) + }) + + # Convert the call to code as a string. Intentionally limiting the width to 20 + # characters to force each parameter to be on a separate line. + raw_code <- rlang::expr_deparse(generated_call, width = 20) + + # Add the pipe operator if this is not the last variable + if (!last_var) { + raw_code[length(raw_code)] <- paste0(raw_code[length(raw_code)], " %>%") + } + + return(raw_code) +} + + +#' Read the specification file +#' +#' @param file The path to the specification file. +#' +#' @returns A tibble with the specification. +#' @export +#' +#' @examples +#' +#' \dontrun{ +#' file <- "cm_sdtm_oak_spec_cdash.csv" +#' observed <- read_spec(file) +#' } +#' +read_spec <- function(file) { + + admiraldev::assert_character_scalar(file) + + spec <- utils::read.csv(file = file, na.strings = c("NA", ""), colClasses = "character") |> + tibble::as_tibble() + + admiraldev::assert_data_frame(spec, required_vars = rlang::parse_exprs(expected_columns)) + + return(spec) +} + +#' Expected columns in the specification file +#' +#' @keywords internal +#' +expected_columns <- c( + "study_number", + "raw_dataset", + "raw_dataset_label", + "raw_variable", + "raw_variable_label", + "raw_variable_ordinal", + "raw_variable_type", + "raw_data_format", + "study_specific", + "annotation_ordinal", + "mapping_is_dataset", + "annotation_text", + "target_sdtm_domain", + "target_sdtm_variable", + "target_sdtm_variable_role", + "target_sdtm_variable_codelist_code", + "target_sdtm_variable_controlled_terms_or_format", + "target_sdtm_variable_ordinal", + "origin", + "mapping_algorithm", + "entity_sub_algorithm", + "target_hardcoded_value", + "target_term_value", + "target_term_code", + "condition_ordinal", + "condition_group_ordinal", + "condition_add_raw_dat", + "condition_add_tgt_dat", + "condition_left_raw_dataset", + "condition_left_raw_variable", + "condition_left_sdtm_domain", + "condition_left_sdtm_variable", + "condition_operator", + "condition_right_text_value", + "condition_right_sdtm_domain", + "condition_right_sdtm_variable", + "condition_right_raw_dataset", + "condition_right_raw_variable", + "condition_next_logical_operator", + "merge_type", + "merge_left", + "merge_right", + "merge_condition", + "unduplicate_keys", + "groupby_keys", + "target_resource_raw_dataset", + "target_resource_raw_variable" +) diff --git a/man/expected_columns.Rd b/man/expected_columns.Rd new file mode 100644 index 00000000..f4bdb453 --- /dev/null +++ b/man/expected_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\docType{data} +\name{expected_columns} +\alias{expected_columns} +\title{Expected columns in the specification file} +\format{ +An object of class \code{character} of length 47. +} +\usage{ +expected_columns +} +\description{ +Expected columns in the specification file +} +\keyword{internal} diff --git a/man/generate_code.Rd b/man/generate_code.Rd new file mode 100644 index 00000000..6363f523 --- /dev/null +++ b/man/generate_code.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{generate_code} +\alias{generate_code} +\title{Generate the code for the mapping SDTM specification} +\usage{ +generate_code(spec, domain, out_dir = ".") +} +\arguments{ +\item{spec}{The specification data frame.} + +\item{domain}{The SDTM domain to generate the code for.} + +\item{out_dir}{The directory to save the code file. Default is the current +directory.} +} +\value{ +Side effect: the code is generated and saved to a file. +} +\description{ +Generate the code for the mapping SDTM specification +} +\examples{ + +\dontrun{ +spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") +domain <- "cm" +generate_code(spec, domain) +} + +} diff --git a/man/generate_one_var_code.Rd b/man/generate_one_var_code.Rd new file mode 100644 index 00000000..ce38a1f9 --- /dev/null +++ b/man/generate_one_var_code.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{generate_one_var_code} +\alias{generate_one_var_code} +\title{Generate the code for one variable} +\usage{ +generate_one_var_code(spec_var, last_var = FALSE) +} +\arguments{ +\item{spec_var}{The specification for one variable.} + +\item{last_var}{Logical indicating if this is the last variable in the domain.} +} +\value{ +The code for the variable as a string. +} +\description{ +Generate the code for one variable +} +\keyword{internal} diff --git a/man/read_spec.Rd b/man/read_spec.Rd new file mode 100644 index 00000000..4dd18f75 --- /dev/null +++ b/man/read_spec.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{read_spec} +\alias{read_spec} +\title{Read the specification file} +\usage{ +read_spec(file) +} +\arguments{ +\item{file}{The path to the specification file.} +} +\value{ +A tibble with the specification. +} +\description{ +Read the specification file +} +\examples{ + +\dontrun{ +file <- "cm_sdtm_oak_spec_cdash.csv" +observed <- read_spec(file) +} + +} diff --git a/tests/testthat/test-generate_code.R b/tests/testthat/test-generate_code.R new file mode 100644 index 00000000..6b4372ba --- /dev/null +++ b/tests/testthat/test-generate_code.R @@ -0,0 +1,25 @@ +test_that("generate_code works", { + + # nolint start + spec <- tibble::tribble( + ~study_number, ~raw_dataset, ~raw_dataset_label, ~raw_variable, ~raw_variable_label, ~raw_variable_ordinal, ~raw_variable_type, ~raw_data_format, ~study_specific, ~annotation_ordinal, ~mapping_is_dataset, ~annotation_text, ~target_sdtm_domain, ~target_sdtm_variable, ~target_sdtm_variable_role, ~target_sdtm_variable_codelist_code, ~target_sdtm_variable_controlled_terms_or_format, ~target_sdtm_variable_ordinal, ~origin, ~mapping_algorithm, ~entity_sub_algorithm, ~target_hardcoded_value, ~target_term_value, ~target_term_code, ~condition_ordinal, ~condition_group_ordinal, ~condition_add_raw_dat, ~condition_add_tgt_dat, ~condition_left_raw_dataset, ~condition_left_raw_variable, ~condition_left_sdtm_domain, ~condition_left_sdtm_variable, ~condition_operator, ~condition_right_text_value, ~condition_right_sdtm_domain, ~condition_right_sdtm_variable, ~condition_right_raw_dataset, ~condition_right_raw_variable, ~condition_next_logical_operator, ~merge_type, ~merge_left, ~merge_right, ~merge_condition, ~unduplicate_keys, ~groupby_keys, ~target_resource_raw_dataset, ~target_resource_raw_variable, + "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMTRT", "var label", "3", "Text", "$200", "FALSE", "1", "FALSE", "CM.CMTRT", "CM", "CMTRT", "Topic Variable", NA, NA, "10", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMINDC", "var label", "4", "Text", "$100", "FALSE", "1", "FALSE", "CM.CMINDC", "CM", "CMINDC", "Record Qualifier", NA, NA, "19", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA + ) + # nolint end + + domain <- "cm" + + temp_dir <- tempdir() + out_dir <- file.path(temp_dir, "data/generate_code") + unlink(out_dir, recursive = TRUE, force = TRUE) + dir.create(out_dir, showWarnings = FALSE, recursive = TRUE) + + generate_code(spec, domain, out_dir) + + + observed <- readLines(file.path(out_dir, paste0(domain, "_sdtm_oak_code.R"))) + + expect_true(identical(length(observed), 10L)) + expect_true(grepl("CMTRT", observed[3])) +})