From 1294c41572bd61827f0e9a4ef76ef167d3828ffb Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Mon, 25 Nov 2024 02:20:20 +0000 Subject: [PATCH 01/35] #107 poc of two vars --- DESCRIPTION | 3 +- NAMESPACE | 2 + R/generate_code.R | 171 ++++++++++++++++++++++++++++ man/expected_columns.Rd | 16 +++ man/generate_code.Rd | 31 +++++ man/generate_one_var_code.Rd | 20 ++++ man/read_spec.Rd | 25 ++++ tests/testthat/test-generate_code.R | 25 ++++ 8 files changed, 292 insertions(+), 1 deletion(-) create mode 100644 R/generate_code.R create mode 100644 man/expected_columns.Rd create mode 100644 man/generate_code.Rd create mode 100644 man/generate_one_var_code.Rd create mode 100644 man/read_spec.Rd create mode 100644 tests/testthat/test-generate_code.R diff --git a/DESCRIPTION b/DESCRIPTION index 292e4a28..01cbaa8a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -57,7 +57,8 @@ Imports: stringr (>= 1.4.0), assertthat, pillar, - cli + cli, + styler Suggests: knitr, htmltools, diff --git a/NAMESPACE b/NAMESPACE index 2dc0c0f3..68d7440d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(derive_seq) export(derive_study_day) export(domain_example) export(fmt_cmp) +export(generate_code) export(generate_oak_id_vars) export(hardcode_ct) export(hardcode_no_ct) @@ -25,6 +26,7 @@ export(problems) export(read_ct_spec) export(read_ct_spec_example) export(read_domain_example) +export(read_spec) export(sbj_vars) importFrom(dplyr,mutate) importFrom(pillar,ctl_new_rowid_pillar) diff --git a/R/generate_code.R b/R/generate_code.R new file mode 100644 index 00000000..fb6504e0 --- /dev/null +++ b/R/generate_code.R @@ -0,0 +1,171 @@ +#' Generate the code for the mapping SDTM specification +#' +#' @param spec The specification data frame. +#' @param domain The SDTM domain to generate the code for. +#' @param out_dir The directory to save the code file. Default is the current +#' directory. +#' +#' @return Side effect: the code is generated and saved to a file. +#' @export +#' +#' @examples +#' +#' \dontrun{ +#' spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") +#' domain <- "cm" +#' generate_code(spec, domain) +#' } +#' +generate_code <- function(spec, domain, out_dir = ".") { + + admiraldev::assert_data_frame(spec, required_vars = rlang::parse_exprs(expected_columns)) + admiraldev::assert_character_scalar(domain) + + # For now assuming that there is only one topic and the topic is the first one + + spec_domain <- spec |> + dplyr::filter(tolower(target_sdtm_domain) %in% tolower(domain)) |> + # TODO + # Doing only few variables + dplyr::filter(target_sdtm_variable %in% c("CMTRT", "CMINDC")) |> + + dplyr::select( + raw_dataset, + raw_variable, + target_sdtm_variable, + mapping_algorithm + ) + + n_rows <- nrow(spec_domain) + + # Generate the code for each variable row in spec_domain + code_blocks <- purrr::map( + seq_len(n_rows), + \(row) generate_one_var_code( + spec_domain[row, ], + last_var = identical(row, n_rows) + ) + ) |> + unlist() + + styled_code <- styler::style_text(code_blocks) + + # Save the code to a file + file_name <- paste0(domain, "_sdtm_oak_code.R") + writeLines(styled_code, file.path(out_dir, file_name)) +} + +#' Generate the code for one variable +#' +#' @param spec_var The specification for one variable. +#' @param last_var Logical indicating if this is the last variable in the domain. +#' +#' @return The code for the variable as a string. +#' @keywords internal +#' +generate_one_var_code <- function(spec_var, last_var = FALSE) { + + admiraldev::assert_data_frame(spec_var) + + # Generate the function call + generated_call <- with(spec_var, { + rlang::call2( + mapping_algorithm, + raw_dat = rlang::sym(raw_dataset), + raw_var = raw_variable, + tgt_var = target_sdtm_variable + ) + }) + + # Convert the call to code as a string. Intentionally limiting the width to 20 + # characters to force each parameter to be on a separate line. + raw_code <- rlang::expr_deparse(generated_call, width = 20) + + # Add the pipe operator if this is not the last variable + if (!last_var) { + raw_code[length(raw_code)] <- paste0(raw_code[length(raw_code)], " %>%") + } + + return(raw_code) +} + + +#' Read the specification file +#' +#' @param file The path to the specification file. +#' +#' @returns A tibble with the specification. +#' @export +#' +#' @examples +#' +#' \dontrun{ +#' file <- "cm_sdtm_oak_spec_cdash.csv" +#' observed <- read_spec(file) +#' } +#' +read_spec <- function(file) { + + admiraldev::assert_character_scalar(file) + + spec <- utils::read.csv(file = file, na.strings = c("NA", ""), colClasses = "character") |> + tibble::as_tibble() + + admiraldev::assert_data_frame(spec, required_vars = rlang::parse_exprs(expected_columns)) + + return(spec) +} + +#' Expected columns in the specification file +#' +#' @keywords internal +#' +expected_columns <- c( + "study_number", + "raw_dataset", + "raw_dataset_label", + "raw_variable", + "raw_variable_label", + "raw_variable_ordinal", + "raw_variable_type", + "raw_data_format", + "study_specific", + "annotation_ordinal", + "mapping_is_dataset", + "annotation_text", + "target_sdtm_domain", + "target_sdtm_variable", + "target_sdtm_variable_role", + "target_sdtm_variable_codelist_code", + "target_sdtm_variable_controlled_terms_or_format", + "target_sdtm_variable_ordinal", + "origin", + "mapping_algorithm", + "entity_sub_algorithm", + "target_hardcoded_value", + "target_term_value", + "target_term_code", + "condition_ordinal", + "condition_group_ordinal", + "condition_add_raw_dat", + "condition_add_tgt_dat", + "condition_left_raw_dataset", + "condition_left_raw_variable", + "condition_left_sdtm_domain", + "condition_left_sdtm_variable", + "condition_operator", + "condition_right_text_value", + "condition_right_sdtm_domain", + "condition_right_sdtm_variable", + "condition_right_raw_dataset", + "condition_right_raw_variable", + "condition_next_logical_operator", + "merge_type", + "merge_left", + "merge_right", + "merge_condition", + "unduplicate_keys", + "groupby_keys", + "target_resource_raw_dataset", + "target_resource_raw_variable" +) diff --git a/man/expected_columns.Rd b/man/expected_columns.Rd new file mode 100644 index 00000000..f4bdb453 --- /dev/null +++ b/man/expected_columns.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\docType{data} +\name{expected_columns} +\alias{expected_columns} +\title{Expected columns in the specification file} +\format{ +An object of class \code{character} of length 47. +} +\usage{ +expected_columns +} +\description{ +Expected columns in the specification file +} +\keyword{internal} diff --git a/man/generate_code.Rd b/man/generate_code.Rd new file mode 100644 index 00000000..6363f523 --- /dev/null +++ b/man/generate_code.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{generate_code} +\alias{generate_code} +\title{Generate the code for the mapping SDTM specification} +\usage{ +generate_code(spec, domain, out_dir = ".") +} +\arguments{ +\item{spec}{The specification data frame.} + +\item{domain}{The SDTM domain to generate the code for.} + +\item{out_dir}{The directory to save the code file. Default is the current +directory.} +} +\value{ +Side effect: the code is generated and saved to a file. +} +\description{ +Generate the code for the mapping SDTM specification +} +\examples{ + +\dontrun{ +spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") +domain <- "cm" +generate_code(spec, domain) +} + +} diff --git a/man/generate_one_var_code.Rd b/man/generate_one_var_code.Rd new file mode 100644 index 00000000..ce38a1f9 --- /dev/null +++ b/man/generate_one_var_code.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{generate_one_var_code} +\alias{generate_one_var_code} +\title{Generate the code for one variable} +\usage{ +generate_one_var_code(spec_var, last_var = FALSE) +} +\arguments{ +\item{spec_var}{The specification for one variable.} + +\item{last_var}{Logical indicating if this is the last variable in the domain.} +} +\value{ +The code for the variable as a string. +} +\description{ +Generate the code for one variable +} +\keyword{internal} diff --git a/man/read_spec.Rd b/man/read_spec.Rd new file mode 100644 index 00000000..4dd18f75 --- /dev/null +++ b/man/read_spec.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{read_spec} +\alias{read_spec} +\title{Read the specification file} +\usage{ +read_spec(file) +} +\arguments{ +\item{file}{The path to the specification file.} +} +\value{ +A tibble with the specification. +} +\description{ +Read the specification file +} +\examples{ + +\dontrun{ +file <- "cm_sdtm_oak_spec_cdash.csv" +observed <- read_spec(file) +} + +} diff --git a/tests/testthat/test-generate_code.R b/tests/testthat/test-generate_code.R new file mode 100644 index 00000000..6b4372ba --- /dev/null +++ b/tests/testthat/test-generate_code.R @@ -0,0 +1,25 @@ +test_that("generate_code works", { + + # nolint start + spec <- tibble::tribble( + ~study_number, ~raw_dataset, ~raw_dataset_label, ~raw_variable, ~raw_variable_label, ~raw_variable_ordinal, ~raw_variable_type, ~raw_data_format, ~study_specific, ~annotation_ordinal, ~mapping_is_dataset, ~annotation_text, ~target_sdtm_domain, ~target_sdtm_variable, ~target_sdtm_variable_role, ~target_sdtm_variable_codelist_code, ~target_sdtm_variable_controlled_terms_or_format, ~target_sdtm_variable_ordinal, ~origin, ~mapping_algorithm, ~entity_sub_algorithm, ~target_hardcoded_value, ~target_term_value, ~target_term_code, ~condition_ordinal, ~condition_group_ordinal, ~condition_add_raw_dat, ~condition_add_tgt_dat, ~condition_left_raw_dataset, ~condition_left_raw_variable, ~condition_left_sdtm_domain, ~condition_left_sdtm_variable, ~condition_operator, ~condition_right_text_value, ~condition_right_sdtm_domain, ~condition_right_sdtm_variable, ~condition_right_raw_dataset, ~condition_right_raw_variable, ~condition_next_logical_operator, ~merge_type, ~merge_left, ~merge_right, ~merge_condition, ~unduplicate_keys, ~groupby_keys, ~target_resource_raw_dataset, ~target_resource_raw_variable, + "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMTRT", "var label", "3", "Text", "$200", "FALSE", "1", "FALSE", "CM.CMTRT", "CM", "CMTRT", "Topic Variable", NA, NA, "10", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMINDC", "var label", "4", "Text", "$100", "FALSE", "1", "FALSE", "CM.CMINDC", "CM", "CMINDC", "Record Qualifier", NA, NA, "19", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA + ) + # nolint end + + domain <- "cm" + + temp_dir <- tempdir() + out_dir <- file.path(temp_dir, "data/generate_code") + unlink(out_dir, recursive = TRUE, force = TRUE) + dir.create(out_dir, showWarnings = FALSE, recursive = TRUE) + + generate_code(spec, domain, out_dir) + + + observed <- readLines(file.path(out_dir, paste0(domain, "_sdtm_oak_code.R"))) + + expect_true(identical(length(observed), 10L)) + expect_true(grepl("CMTRT", observed[3])) +}) From 9e3c9a7ee8d52a6a12bccd4bff0b6f4c35f583f8 Mon Sep 17 00:00:00 2001 From: edgar-manukyan Date: Mon, 25 Nov 2024 02:25:46 +0000 Subject: [PATCH 02/35] Automatic renv profile update. --- renv/profiles/4.2/renv.lock | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/renv/profiles/4.2/renv.lock b/renv/profiles/4.2/renv.lock index 210a2de0..b6386152 100644 --- a/renv/profiles/4.2/renv.lock +++ b/renv/profiles/4.2/renv.lock @@ -33,7 +33,7 @@ "Package": "R.cache", "Version": "0.16.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", @@ -48,7 +48,7 @@ "Package": "R.methodsS3", "Version": "1.8.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "utils" @@ -57,22 +57,22 @@ }, "R.oo": { "Package": "R.oo", - "Version": "1.27.0", + "Version": "1.25.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", "methods", "utils" ], - "Hash": "6ac79ff194202248cf946fe3a5d6d498" + "Hash": "a0900a114f4f0194cf4aa8cd4a700681" }, "R.utils": { "Package": "R.utils", - "Version": "2.12.3", + "Version": "2.12.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", @@ -81,7 +81,7 @@ "tools", "utils" ], - "Hash": "3dc2829b790254bfba21e60965787651" + "Hash": "325f01db13da12c04d8f6e7be36ff514" }, "R6": { "Package": "R6", From 3a1a85b55675560499419fe261a81cc963425dc9 Mon Sep 17 00:00:00 2001 From: edgar-manukyan Date: Mon, 25 Nov 2024 02:30:41 +0000 Subject: [PATCH 03/35] Automatic renv profile update. --- renv.lock | 16 ++++++++-------- renv/profiles/4.3/renv.lock | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/renv.lock b/renv.lock index cd6da0f0..38cb4ead 100644 --- a/renv.lock +++ b/renv.lock @@ -33,7 +33,7 @@ "Package": "R.cache", "Version": "0.16.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", @@ -48,7 +48,7 @@ "Package": "R.methodsS3", "Version": "1.8.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "utils" @@ -57,22 +57,22 @@ }, "R.oo": { "Package": "R.oo", - "Version": "1.27.0", + "Version": "1.25.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", "methods", "utils" ], - "Hash": "6ac79ff194202248cf946fe3a5d6d498" + "Hash": "a0900a114f4f0194cf4aa8cd4a700681" }, "R.utils": { "Package": "R.utils", - "Version": "2.12.3", + "Version": "2.12.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", @@ -81,7 +81,7 @@ "tools", "utils" ], - "Hash": "3dc2829b790254bfba21e60965787651" + "Hash": "325f01db13da12c04d8f6e7be36ff514" }, "R6": { "Package": "R6", diff --git a/renv/profiles/4.3/renv.lock b/renv/profiles/4.3/renv.lock index cd6da0f0..38cb4ead 100644 --- a/renv/profiles/4.3/renv.lock +++ b/renv/profiles/4.3/renv.lock @@ -33,7 +33,7 @@ "Package": "R.cache", "Version": "0.16.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", @@ -48,7 +48,7 @@ "Package": "R.methodsS3", "Version": "1.8.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "utils" @@ -57,22 +57,22 @@ }, "R.oo": { "Package": "R.oo", - "Version": "1.27.0", + "Version": "1.25.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", "methods", "utils" ], - "Hash": "6ac79ff194202248cf946fe3a5d6d498" + "Hash": "a0900a114f4f0194cf4aa8cd4a700681" }, "R.utils": { "Package": "R.utils", - "Version": "2.12.3", + "Version": "2.12.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "repos", "Requirements": [ "R", "R.methodsS3", @@ -81,7 +81,7 @@ "tools", "utils" ], - "Hash": "3dc2829b790254bfba21e60965787651" + "Hash": "325f01db13da12c04d8f6e7be36ff514" }, "R6": { "Package": "R6", From dfbf1b6f1167fb04d6e84805798411443178661e Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Wed, 27 Nov 2024 08:16:44 -0500 Subject: [PATCH 04/35] #107 swap sub with algo --- R/generate_code.R | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index fb6504e0..fada9498 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -27,15 +27,41 @@ generate_code <- function(spec, domain, out_dir = ".") { dplyr::filter(tolower(target_sdtm_domain) %in% tolower(domain)) |> # TODO # Doing only few variables - dplyr::filter(target_sdtm_variable %in% c("CMTRT", "CMINDC")) |> + dplyr::filter(target_sdtm_variable %in% c( + "CMTRT", + "CMINDC", + "CMDOSE" + )) |> dplyr::select( raw_dataset, raw_variable, target_sdtm_variable, - mapping_algorithm + mapping_algorithm, + entity_sub_algorithm, + condition_add_raw_dat, ) + # For now swapping entity_sub_algorithm with mapping_algorithm since the + # algorithms like assign_no_ct are the mapping_algorithm and they are populated + # in the entity_sub_algorithm + spec_domain <- spec_domain |> + dplyr::mutate( + entity_sub_algorithm_temp = dplyr::if_else( + mapping_algorithm %in% "condition_add", + mapping_algorithm, + entity_sub_algorithm, + ), + mapping_algorithm = dplyr::if_else( + mapping_algorithm %in% "condition_add", + entity_sub_algorithm, + mapping_algorithm, + ), + entity_sub_algorithm = entity_sub_algorithm_temp + ) |> + dplyr::select(-entity_sub_algorithm_temp) + + n_rows <- nrow(spec_domain) # Generate the code for each variable row in spec_domain From ed15f38385a99c028507ac2853ecf10c0301dd97 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Wed, 27 Nov 2024 19:13:49 -0500 Subject: [PATCH 05/35] #107 support add_cond and ct --- R/generate_code.R | 52 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index fada9498..c5f0060b 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -1,3 +1,12 @@ +#' Temporary vector to control the vars we generate and the order +tgt_vars <- c( + "CMTRT", + "CMINDC", + "CMDOSE", + "CMDOSTXT", + "CMDOSU" +) + #' Generate the code for the mapping SDTM specification #' #' @param spec The specification data frame. @@ -27,11 +36,7 @@ generate_code <- function(spec, domain, out_dir = ".") { dplyr::filter(tolower(target_sdtm_domain) %in% tolower(domain)) |> # TODO # Doing only few variables - dplyr::filter(target_sdtm_variable %in% c( - "CMTRT", - "CMINDC", - "CMDOSE" - )) |> + dplyr::filter(target_sdtm_variable %in% tgt_vars) |> dplyr::select( raw_dataset, @@ -40,6 +45,7 @@ generate_code <- function(spec, domain, out_dir = ".") { mapping_algorithm, entity_sub_algorithm, condition_add_raw_dat, + target_sdtm_variable_codelist_code, ) # For now swapping entity_sub_algorithm with mapping_algorithm since the @@ -92,16 +98,36 @@ generate_code <- function(spec, domain, out_dir = ".") { generate_one_var_code <- function(spec_var, last_var = FALSE) { admiraldev::assert_data_frame(spec_var) + assertthat::assert_that(identical(nrow(spec_var), 1L)) - # Generate the function call - generated_call <- with(spec_var, { - rlang::call2( - mapping_algorithm, - raw_dat = rlang::sym(raw_dataset), - raw_var = raw_variable, - tgt_var = target_sdtm_variable + # Need to use the condition_add_raw_dat (if not missing) instead of raw_dataset + spec_var <- spec_var |> + dplyr::mutate( + raw_dataset = dplyr::if_else( + entity_sub_algorithm %in% "condition_add" & !is.na(condition_add_raw_dat), + condition_add_raw_dat, + raw_dataset + ) ) - }) + + args <- list( + raw_dat = rlang::parse_expr(spec_var$raw_dataset), + raw_var = spec_var$raw_variable, + tgt_var = spec_var$target_sdtm_variable + ) + + is_ct <- spec_var$mapping_algorithm %in% c("assign_ct") + + if (is_ct) { + args$ct_spec <- rlang::parse_expr("study_ct") + args$ct_clst <- spec_var$target_sdtm_variable_codelist_code + } + + # Generate the function call + generated_call <- rlang::call2( + spec_var$mapping_algorithm, + !!!args + ) # Convert the call to code as a string. Intentionally limiting the width to 20 # characters to force each parameter to be on a separate line. From 920a227ee401cf38b05151f136871b06a0fe2c9a Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Sun, 1 Dec 2024 19:08:27 +0000 Subject: [PATCH 06/35] #107 remove hard-coded width --- R/generate_code.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/generate_code.R b/R/generate_code.R index c5f0060b..ad7b5de6 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -23,6 +23,11 @@ tgt_vars <- c( #' spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") #' domain <- "cm" #' generate_code(spec, domain) +#' +#' # One can use option width to control the width of the code +#' # Twenty will almost always place every parameter on a separate line +#' options(width = 20) +#' generate_code(spec, domain) #' } #' generate_code <- function(spec, domain, out_dir = ".") { @@ -131,7 +136,7 @@ generate_one_var_code <- function(spec_var, last_var = FALSE) { # Convert the call to code as a string. Intentionally limiting the width to 20 # characters to force each parameter to be on a separate line. - raw_code <- rlang::expr_deparse(generated_call, width = 20) + raw_code <- rlang::expr_deparse(generated_call) # Add the pipe operator if this is not the last variable if (!last_var) { From dd1d61fe729b5f5130b1f930454d6b64b9492c63 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Sun, 1 Dec 2024 19:24:59 +0000 Subject: [PATCH 07/35] #107 ref --- R/generate_code.R | 134 ++++++++++++++++++++++++++-------------------- 1 file changed, 76 insertions(+), 58 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index ad7b5de6..1e0636ab 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -1,12 +1,3 @@ -#' Temporary vector to control the vars we generate and the order -tgt_vars <- c( - "CMTRT", - "CMINDC", - "CMDOSE", - "CMDOSTXT", - "CMDOSU" -) - #' Generate the code for the mapping SDTM specification #' #' @param spec The specification data frame. @@ -32,46 +23,10 @@ tgt_vars <- c( #' generate_code <- function(spec, domain, out_dir = ".") { - admiraldev::assert_data_frame(spec, required_vars = rlang::parse_exprs(expected_columns)) + admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns)) admiraldev::assert_character_scalar(domain) - # For now assuming that there is only one topic and the topic is the first one - - spec_domain <- spec |> - dplyr::filter(tolower(target_sdtm_domain) %in% tolower(domain)) |> - # TODO - # Doing only few variables - dplyr::filter(target_sdtm_variable %in% tgt_vars) |> - - dplyr::select( - raw_dataset, - raw_variable, - target_sdtm_variable, - mapping_algorithm, - entity_sub_algorithm, - condition_add_raw_dat, - target_sdtm_variable_codelist_code, - ) - - # For now swapping entity_sub_algorithm with mapping_algorithm since the - # algorithms like assign_no_ct are the mapping_algorithm and they are populated - # in the entity_sub_algorithm - spec_domain <- spec_domain |> - dplyr::mutate( - entity_sub_algorithm_temp = dplyr::if_else( - mapping_algorithm %in% "condition_add", - mapping_algorithm, - entity_sub_algorithm, - ), - mapping_algorithm = dplyr::if_else( - mapping_algorithm %in% "condition_add", - entity_sub_algorithm, - mapping_algorithm, - ), - entity_sub_algorithm = entity_sub_algorithm_temp - ) |> - dplyr::select(-entity_sub_algorithm_temp) - + spec_domain <- get_domain_spec(spec, domain) n_rows <- nrow(spec_domain) @@ -105,16 +60,6 @@ generate_one_var_code <- function(spec_var, last_var = FALSE) { admiraldev::assert_data_frame(spec_var) assertthat::assert_that(identical(nrow(spec_var), 1L)) - # Need to use the condition_add_raw_dat (if not missing) instead of raw_dataset - spec_var <- spec_var |> - dplyr::mutate( - raw_dataset = dplyr::if_else( - entity_sub_algorithm %in% "condition_add" & !is.na(condition_add_raw_dat), - condition_add_raw_dat, - raw_dataset - ) - ) - args <- list( raw_dat = rlang::parse_expr(spec_var$raw_dataset), raw_var = spec_var$raw_variable, @@ -146,6 +91,79 @@ generate_one_var_code <- function(spec_var, last_var = FALSE) { return(raw_code) } +#' Get the specification for a domain and modify it +#' +#' @param spec The specification data frame. +#' @param domain The SDTM domain to get the specification for. +#' +#' @return +#' @export +#' +#' @examples +get_domain_spec <- function(spec, domain) { + + expected_columns <- c( + "raw_dataset", + "raw_variable", + "target_sdtm_variable", + "mapping_algorithm", + "entity_sub_algorithm", + "condition_add_raw_dat", + "target_sdtm_variable_codelist_code" + ) + + admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns)) + admiraldev::assert_character_scalar(domain) + + # TODO + # Temporary vector to control the vars we generate and the order + tgt_vars <- c( + "CMTRT", + "CMINDC", + "CMDOSE", + "CMDOSTXT", + "CMDOSU" + ) + + # For now assuming that there is only one topic and the topic is the first one + + spec |> + dplyr::filter(tolower(target_sdtm_domain) %in% tolower(domain)) |> + # TODO + # Doing only few variables + dplyr::filter(target_sdtm_variable %in% tgt_vars) |> + + dplyr::select(dplyr::all_of(expected_columns)) |> + + # For now swapping entity_sub_algorithm with mapping_algorithm since the + # algorithms like assign_no_ct are the mapping_algorithm and they are populated + # in the entity_sub_algorithm + dplyr::mutate( + entity_sub_algorithm_temp = dplyr::if_else( + mapping_algorithm %in% "condition_add", + mapping_algorithm, + entity_sub_algorithm, + ), + mapping_algorithm = dplyr::if_else( + mapping_algorithm %in% "condition_add", + entity_sub_algorithm, + mapping_algorithm, + ), + entity_sub_algorithm = entity_sub_algorithm_temp + ) |> + dplyr::select(-entity_sub_algorithm_temp) |> + + # Need to use the condition_add_raw_dat (if not missing) instead of raw_dataset + dplyr::mutate( + raw_dataset = dplyr::if_else( + entity_sub_algorithm %in% "condition_add" & !is.na(condition_add_raw_dat), + condition_add_raw_dat, + raw_dataset + ) + ) + +} + #' Read the specification file #' @@ -168,7 +186,7 @@ read_spec <- function(file) { spec <- utils::read.csv(file = file, na.strings = c("NA", ""), colClasses = "character") |> tibble::as_tibble() - admiraldev::assert_data_frame(spec, required_vars = rlang::parse_exprs(expected_columns)) + admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns)) return(spec) } From eb81f9a07c70343a836fed9334f5567e6d75efc6 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Sun, 1 Dec 2024 20:29:22 +0000 Subject: [PATCH 08/35] #107 adjust test --- R/generate_code.R | 76 ++++++++++++++++++++--------- man/add_pipe.Rd | 18 +++++++ man/generate_code.Rd | 11 ++++- man/generate_one_var_code.Rd | 2 +- man/get_domain_spec.Rd | 17 +++++++ man/remove_last_pipe.Rd | 18 +++++++ tests/testthat/test-generate_code.R | 12 ++--- 7 files changed, 124 insertions(+), 30 deletions(-) create mode 100644 man/add_pipe.Rd create mode 100644 man/get_domain_spec.Rd create mode 100644 man/remove_last_pipe.Rd diff --git a/R/generate_code.R b/R/generate_code.R index 1e0636ab..ef816846 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -1,5 +1,11 @@ #' Generate the code for the mapping SDTM specification #' +#' One can use the option `width` to control the width of the code. A width of +#' twenty will almost always place every parameter on a separate line. This is +#' useful for debugging and understanding the code. The higher the width, the +#' more parameters will be placed on a single line and code will be shorter. +#' See the examples for more details. +#' #' @param spec The specification data frame. #' @param domain The SDTM domain to generate the code for. #' @param out_dir The directory to save the code file. Default is the current @@ -28,25 +34,21 @@ generate_code <- function(spec, domain, out_dir = ".") { spec_domain <- get_domain_spec(spec, domain) - n_rows <- nrow(spec_domain) - # Generate the code for each variable row in spec_domain - code_blocks <- purrr::map( - seq_len(n_rows), - \(row) generate_one_var_code( - spec_domain[row, ], - last_var = identical(row, n_rows) - ) + styled_code <- purrr::map( + seq_len(nrow(spec_domain)), + \(row) generate_one_var_code(spec_domain[row, ]) ) |> - unlist() - - styled_code <- styler::style_text(code_blocks) + unlist() |> + remove_last_pipe() |> + styler::style_text() # Save the code to a file file_name <- paste0(domain, "_sdtm_oak_code.R") writeLines(styled_code, file.path(out_dir, file_name)) } + #' Generate the code for one variable #' #' @param spec_var The specification for one variable. @@ -55,7 +57,7 @@ generate_code <- function(spec, domain, out_dir = ".") { #' @return The code for the variable as a string. #' @keywords internal #' -generate_one_var_code <- function(spec_var, last_var = FALSE) { +generate_one_var_code <- function(spec_var) { admiraldev::assert_data_frame(spec_var) assertthat::assert_that(identical(nrow(spec_var), 1L)) @@ -79,16 +81,47 @@ generate_one_var_code <- function(spec_var, last_var = FALSE) { !!!args ) - # Convert the call to code as a string. Intentionally limiting the width to 20 - # characters to force each parameter to be on a separate line. - raw_code <- rlang::expr_deparse(generated_call) + rlang::expr_deparse(generated_call) |> + add_pipe() +} - # Add the pipe operator if this is not the last variable - if (!last_var) { - raw_code[length(raw_code)] <- paste0(raw_code[length(raw_code)], " %>%") - } - return(raw_code) +#' Add a pipe operator to the last element of a character vector +#' +#' @param code_block A character vector. +#' +#' @return The character vector with a pipe operator added to the last element. +#' @keywords internal +#' +add_pipe <- function(code_block) { + + admiraldev::assert_character_vector(code_block) + + i <- length(code_block) + + # Add pipe operator to the last element of code block + code_block[i] <- paste0(code_block[i], " %>%") + code_block +} + +#' Remove the pipe operator from the last element of a character vector +#' +#' @param code_blocks A character vector. +#' +#' @return The character vector with the pipe operator removed from the last element. +#' @keywords internal +#' +remove_last_pipe <- function(code_blocks) { + + admiraldev::assert_character_vector(code_blocks) + + len_code_block <- length(code_blocks) + + # The last code block should not have a pipe operator + code_blocks[len_code_block] <- code_blocks[len_code_block] |> + stringr::str_remove("%>%") + + code_blocks } #' Get the specification for a domain and modify it @@ -97,7 +130,7 @@ generate_one_var_code <- function(spec_var, last_var = FALSE) { #' @param domain The SDTM domain to get the specification for. #' #' @return -#' @export +#' @keywords internal #' #' @examples get_domain_spec <- function(spec, domain) { @@ -161,7 +194,6 @@ get_domain_spec <- function(spec, domain) { raw_dataset ) ) - } diff --git a/man/add_pipe.Rd b/man/add_pipe.Rd new file mode 100644 index 00000000..c9b1b58d --- /dev/null +++ b/man/add_pipe.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{add_pipe} +\alias{add_pipe} +\title{Add a pipe operator to the last element of a character vector} +\usage{ +add_pipe(code_block) +} +\arguments{ +\item{code_block}{A character vector.} +} +\value{ +The character vector with a pipe operator added to the last element. +} +\description{ +Add a pipe operator to the last element of a character vector +} +\keyword{internal} diff --git a/man/generate_code.Rd b/man/generate_code.Rd index 6363f523..6efd45e0 100644 --- a/man/generate_code.Rd +++ b/man/generate_code.Rd @@ -18,7 +18,11 @@ directory.} Side effect: the code is generated and saved to a file. } \description{ -Generate the code for the mapping SDTM specification +One can use the option \code{width} to control the width of the code. A width of +twenty will almost always place every parameter on a separate line. This is +useful for debugging and understanding the code. The higher the width, the +more parameters will be placed on a single line and code will be shorter. +See the examples for more details. } \examples{ @@ -26,6 +30,11 @@ Generate the code for the mapping SDTM specification spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") domain <- "cm" generate_code(spec, domain) + +# One can use option width to control the width of the code +# Twenty will almost always place every parameter on a separate line +options(width = 20) +generate_code(spec, domain) } } diff --git a/man/generate_one_var_code.Rd b/man/generate_one_var_code.Rd index ce38a1f9..f23d6ace 100644 --- a/man/generate_one_var_code.Rd +++ b/man/generate_one_var_code.Rd @@ -4,7 +4,7 @@ \alias{generate_one_var_code} \title{Generate the code for one variable} \usage{ -generate_one_var_code(spec_var, last_var = FALSE) +generate_one_var_code(spec_var) } \arguments{ \item{spec_var}{The specification for one variable.} diff --git a/man/get_domain_spec.Rd b/man/get_domain_spec.Rd new file mode 100644 index 00000000..976589a6 --- /dev/null +++ b/man/get_domain_spec.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{get_domain_spec} +\alias{get_domain_spec} +\title{Get the specification for a domain and modify it} +\usage{ +get_domain_spec(spec, domain) +} +\arguments{ +\item{spec}{The specification data frame.} + +\item{domain}{The SDTM domain to get the specification for.} +} +\description{ +Get the specification for a domain and modify it +} +\keyword{internal} diff --git a/man/remove_last_pipe.Rd b/man/remove_last_pipe.Rd new file mode 100644 index 00000000..a7538d02 --- /dev/null +++ b/man/remove_last_pipe.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{remove_last_pipe} +\alias{remove_last_pipe} +\title{Remove the pipe operator from the last element of a character vector} +\usage{ +remove_last_pipe(code_blocks) +} +\arguments{ +\item{code_blocks}{A character vector.} +} +\value{ +The character vector with the pipe operator removed from the last element. +} +\description{ +Remove the pipe operator from the last element of a character vector +} +\keyword{internal} diff --git a/tests/testthat/test-generate_code.R b/tests/testthat/test-generate_code.R index 6b4372ba..59e05428 100644 --- a/tests/testthat/test-generate_code.R +++ b/tests/testthat/test-generate_code.R @@ -15,11 +15,11 @@ test_that("generate_code works", { unlink(out_dir, recursive = TRUE, force = TRUE) dir.create(out_dir, showWarnings = FALSE, recursive = TRUE) - generate_code(spec, domain, out_dir) + withr::with_options(list(width = 20), { + generate_code(spec, domain, out_dir) + observed <- readLines(file.path(out_dir, paste0(domain, "_sdtm_oak_code.R"))) - - observed <- readLines(file.path(out_dir, paste0(domain, "_sdtm_oak_code.R"))) - - expect_true(identical(length(observed), 10L)) - expect_true(grepl("CMTRT", observed[3])) + expect_true(identical(length(observed), 10L)) + expect_true(grepl("CMTRT", observed[3])) + }) }) From 66b6ae0d20898496f84ea196afd5591983bb698b Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Mon, 2 Dec 2024 00:53:16 +0000 Subject: [PATCH 09/35] #107 working cm --- R/generate_code.R | 160 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 142 insertions(+), 18 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index ef816846..9e6c060e 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -1,3 +1,88 @@ +# TODO Things I changed in specs +# - Made sure that target_sdtm_domain is not missing +# - Added raw_fmt for dates +# - Added raw_unk for dates +# - The algorithm for CMENRTPT in spec is hardcode_ct, but in the program assign_ct +# is used. I kept the algorithm as hardcode_ct in the spec. +# - I consolidated target_hardcoded_value and target_term_value into target_value +# - In the spec we have cm_raw_data and I used it in the template as well +# - Changed is.numeric/is.character into is_numeric/is_character +# - I did not generate id_vars since the default values was enough, certainly we +# can add it later when the customized id_vars are needed. + + +# TODO +# Temporary vector to control the vars we generate. For now we assume that the +# order in the specs is the one we want to generate the code for. +tgt_vars <- c( + "CMTRT", + "CMINDC", + "CMDOSE", + "CMDOSTXT", + "CMDOSU", + "CMDOSFRM", + "CMDOSFRQ", + "CMROUTE", + "CMSTDTC", + "CMENRTPT", + "CMENTPT", + "CMENDTC" +) + +#' The template suffix for the cm code +#' +#' @keywords internal +cm_template_prefix <- stringr::str_glue(' +library(sdtm.oak) +library(dplyr) + + +# Read CT Specification +study_ct <- read.csv("./datasets/sdtm_ct.csv") + +# Read in raw data +cm_raw_data <- read.csv("./datasets/cm_raw_data_cdash.csv") + +cm_raw_data <- admiral::convert_blanks_to_na(cm_raw_data) + +# derive oak_id_vars +cm_raw_data <- cm_raw_data %>% + generate_oak_id_vars( + pat_var = "PATNUM", + raw_src = "cm_raw_data" + ) + +# Read in DM domain to derive study day +dm <- read.csv("./datasets/dm.csv") + +dm <- admiral::convert_blanks_to_na(dm) + +cm <- +') + +#' Check if a variable is character +#' +#' @param var_in The variable to check. +#' +#' @return Logical indicating if the variable is character. +#' @export +#' +is_numeric <- function(var_in) { + grepl("^-?\\d*(\\.\\d+)?(e[+-]?\\d+)?$", var_in) +} + +#' Check if a variable is character +#' +#' @param var_in The variable to check. +#' +#' @return +#' @export +#' +is_character <- function(var_in) { + grepl("[^0-9eE.-]", var_in) +} + + #' Generate the code for the mapping SDTM specification #' #' One can use the option `width` to control the width of the code. A width of @@ -41,6 +126,7 @@ generate_code <- function(spec, domain, out_dir = ".") { ) |> unlist() |> remove_last_pipe() |> + append(cm_template_prefix, after = 0) |> styler::style_text() # Save the code to a file @@ -65,16 +151,22 @@ generate_one_var_code <- function(spec_var) { args <- list( raw_dat = rlang::parse_expr(spec_var$raw_dataset), raw_var = spec_var$raw_variable, - tgt_var = spec_var$target_sdtm_variable + tgt_var = spec_var$target_sdtm_variable, + tgt_val = spec_var$target_value, + ct_spec = rlang::parse_expr("study_ct"), + ct_clst = spec_var$target_sdtm_variable_codelist_code, + raw_fmt = spec_var$raw_fmt, + raw_unk = parse_into_c_call(spec_var$raw_unk) ) - is_ct <- spec_var$mapping_algorithm %in% c("assign_ct") - - if (is_ct) { - args$ct_spec <- rlang::parse_expr("study_ct") - args$ct_clst <- spec_var$target_sdtm_variable_codelist_code + # If the ct_clst is missing, then we must remove ct_spec + if (is.na(args$ct_clst)) { + args$ct_spec <- NA } + # Remove the arguments that are missing + args <- purrr::discard(args, \(x) is.vector(x) && any(is.na(x))) + # Generate the function call generated_call <- rlang::call2( spec_var$mapping_algorithm, @@ -85,6 +177,40 @@ generate_one_var_code <- function(spec_var) { add_pipe() } +#' This function converts comma separated string into a character vector +#' +#' @param string A string with comma separated values. +#' +#' @return A character vector. +#' +#' @keywords internal +#' +#' @examples +#' +#' \dontrun{ +#' str_in <- "a, b, c" +#' parse_into_c_call("a, b, c") +#' +#' str_in <- NA_character_ +#' parse_into_c_call(str_in) +#' } +#' +parse_into_c_call <- function(str_in) { + + admiraldev::assert_character_scalar(str_in) + + str_out <- str_in |> + stringr::str_split(",") |> + unlist() |> + stringr::str_trim() + + if (all(is.na(str_out))) { + return(NA) + } + + rlang::call2("c", !!!str_out) +} + #' Add a pipe operator to the last element of a character vector #' @@ -142,22 +268,17 @@ get_domain_spec <- function(spec, domain) { "mapping_algorithm", "entity_sub_algorithm", "condition_add_raw_dat", - "target_sdtm_variable_codelist_code" + "target_sdtm_variable_codelist_code", + "raw_data_format", + "raw_fmt", + "raw_unk", + "target_term_value", + "target_value" ) admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns)) admiraldev::assert_character_scalar(domain) - # TODO - # Temporary vector to control the vars we generate and the order - tgt_vars <- c( - "CMTRT", - "CMINDC", - "CMDOSE", - "CMDOSTXT", - "CMDOSU" - ) - # For now assuming that there is only one topic and the topic is the first one spec |> @@ -236,6 +357,8 @@ expected_columns <- c( "raw_variable_ordinal", "raw_variable_type", "raw_data_format", + "raw_fmt", + "raw_unk", "study_specific", "annotation_ordinal", "mapping_is_dataset", @@ -274,5 +397,6 @@ expected_columns <- c( "unduplicate_keys", "groupby_keys", "target_resource_raw_dataset", - "target_resource_raw_variable" + "target_resource_raw_variable", + "target_value" ) From 11e9ebc639703a3b89ddaf515fb1b226b94b0175 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Mon, 2 Dec 2024 01:11:21 +0000 Subject: [PATCH 10/35] #107 adjust tests --- tests/testthat/test-generate_code.R | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-generate_code.R b/tests/testthat/test-generate_code.R index 59e05428..0aff91d9 100644 --- a/tests/testthat/test-generate_code.R +++ b/tests/testthat/test-generate_code.R @@ -2,12 +2,21 @@ test_that("generate_code works", { # nolint start spec <- tibble::tribble( - ~study_number, ~raw_dataset, ~raw_dataset_label, ~raw_variable, ~raw_variable_label, ~raw_variable_ordinal, ~raw_variable_type, ~raw_data_format, ~study_specific, ~annotation_ordinal, ~mapping_is_dataset, ~annotation_text, ~target_sdtm_domain, ~target_sdtm_variable, ~target_sdtm_variable_role, ~target_sdtm_variable_codelist_code, ~target_sdtm_variable_controlled_terms_or_format, ~target_sdtm_variable_ordinal, ~origin, ~mapping_algorithm, ~entity_sub_algorithm, ~target_hardcoded_value, ~target_term_value, ~target_term_code, ~condition_ordinal, ~condition_group_ordinal, ~condition_add_raw_dat, ~condition_add_tgt_dat, ~condition_left_raw_dataset, ~condition_left_raw_variable, ~condition_left_sdtm_domain, ~condition_left_sdtm_variable, ~condition_operator, ~condition_right_text_value, ~condition_right_sdtm_domain, ~condition_right_sdtm_variable, ~condition_right_raw_dataset, ~condition_right_raw_variable, ~condition_next_logical_operator, ~merge_type, ~merge_left, ~merge_right, ~merge_condition, ~unduplicate_keys, ~groupby_keys, ~target_resource_raw_dataset, ~target_resource_raw_variable, - "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMTRT", "var label", "3", "Text", "$200", "FALSE", "1", "FALSE", "CM.CMTRT", "CM", "CMTRT", "Topic Variable", NA, NA, "10", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, - "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMINDC", "var label", "4", "Text", "$100", "FALSE", "1", "FALSE", "CM.CMINDC", "CM", "CMINDC", "Record Qualifier", NA, NA, "19", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA - ) + ~study_number, ~raw_dataset, ~raw_dataset_label, ~raw_variable, ~raw_variable_label, ~raw_variable_ordinal, ~raw_variable_type, ~raw_data_format, ~raw_fmt, ~raw_unk, ~study_specific, ~annotation_ordinal, ~mapping_is_dataset, ~annotation_text, ~target_sdtm_domain, ~target_sdtm_variable, ~target_sdtm_variable_role, ~target_sdtm_variable_codelist_code, ~target_sdtm_variable_controlled_terms_or_format, ~target_sdtm_variable_ordinal, ~origin, ~mapping_algorithm, ~entity_sub_algorithm, ~target_hardcoded_value, ~target_term_value, ~target_value, ~target_term_code, ~condition_ordinal, ~condition_group_ordinal, ~condition_add_raw_dat, ~condition_add_tgt_dat, ~condition_left_raw_dataset, ~condition_left_raw_variable, ~condition_left_sdtm_domain, ~condition_left_sdtm_variable, ~condition_operator, ~condition_right_text_value, ~condition_right_sdtm_domain, ~condition_right_sdtm_variable, ~condition_right_raw_dataset, ~condition_right_raw_variable, ~condition_next_logical_operator, ~merge_type, ~merge_left, ~merge_right, ~merge_condition, ~unduplicate_keys, ~groupby_keys, ~target_resource_raw_dataset, ~target_resource_raw_variable, + "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMTRT", "var label", "3", "Text", "$200", NA, NA, "FALSE", "1", "FALSE", "CM.CMTRT", "CM", "CMTRT", "Topic Variable", NA, NA, "10", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMINDC", "var label", "4", "Text", "$100", NA, NA, "FALSE", "1", "FALSE", "CM.CMINDC", "CM", "CMINDC", "Record Qualifier", NA, NA, "19", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA + ) # nolint end + # Convert all NA to NA_character_ + spec <- spec |> + dplyr::mutate( + dplyr::across( + .cols = dplyr::everything(), + .fns = ~dplyr::if_else(is.na(.x), NA_character_, .x) + ) + ) + domain <- "cm" temp_dir <- tempdir() @@ -19,7 +28,8 @@ test_that("generate_code works", { generate_code(spec, domain, out_dir) observed <- readLines(file.path(out_dir, paste0(domain, "_sdtm_oak_code.R"))) - expect_true(identical(length(observed), 10L)) - expect_true(grepl("CMTRT", observed[3])) + expect_true(length(observed) > 10L) + expect_true(grepl("generate_oak_id_vars", observed) |> any()) + expect_true(grepl("assign_no_ct", observed) |> any()) }) }) From 26345f2ba32c8bcdce41c51e46031e46251e3456 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Mon, 2 Dec 2024 01:18:46 +0000 Subject: [PATCH 11/35] #107 lintr --- R/generate_code.R | 12 ++++++------ tests/testthat/test-generate_code.R | 18 ++++++++---------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index 9e6c060e..183ae3ec 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -126,7 +126,7 @@ generate_code <- function(spec, domain, out_dir = ".") { ) |> unlist() |> remove_last_pipe() |> - append(cm_template_prefix, after = 0) |> + append(cm_template_prefix, after = 0L) |> styler::style_text() # Save the code to a file @@ -165,7 +165,7 @@ generate_one_var_code <- function(spec_var) { } # Remove the arguments that are missing - args <- purrr::discard(args, \(x) is.vector(x) && any(is.na(x))) + args <- purrr::discard(args, \(x) is.vector(x) && anyNA(x)) # Generate the function call generated_call <- rlang::call2( @@ -200,7 +200,7 @@ parse_into_c_call <- function(str_in) { admiraldev::assert_character_scalar(str_in) str_out <- str_in |> - stringr::str_split(",") |> + stringr::str_split(stringr::fixed(",")) |> unlist() |> stringr::str_trim() @@ -245,7 +245,7 @@ remove_last_pipe <- function(code_blocks) { # The last code block should not have a pipe operator code_blocks[len_code_block] <- code_blocks[len_code_block] |> - stringr::str_remove("%>%") + stringr::str_remove(stringr::fixed("%>%")) code_blocks } @@ -296,12 +296,12 @@ get_domain_spec <- function(spec, domain) { entity_sub_algorithm_temp = dplyr::if_else( mapping_algorithm %in% "condition_add", mapping_algorithm, - entity_sub_algorithm, + entity_sub_algorithm ), mapping_algorithm = dplyr::if_else( mapping_algorithm %in% "condition_add", entity_sub_algorithm, - mapping_algorithm, + mapping_algorithm ), entity_sub_algorithm = entity_sub_algorithm_temp ) |> diff --git a/tests/testthat/test-generate_code.R b/tests/testthat/test-generate_code.R index 0aff91d9..98cd64fb 100644 --- a/tests/testthat/test-generate_code.R +++ b/tests/testthat/test-generate_code.R @@ -10,12 +10,10 @@ test_that("generate_code works", { # Convert all NA to NA_character_ spec <- spec |> - dplyr::mutate( - dplyr::across( - .cols = dplyr::everything(), - .fns = ~dplyr::if_else(is.na(.x), NA_character_, .x) - ) - ) + dplyr::mutate(dplyr::across( + .cols = dplyr::everything(), + .fns = ~ dplyr::if_else(is.na(.x), NA_character_, .x) + )) domain <- "cm" @@ -24,12 +22,12 @@ test_that("generate_code works", { unlink(out_dir, recursive = TRUE, force = TRUE) dir.create(out_dir, showWarnings = FALSE, recursive = TRUE) - withr::with_options(list(width = 20), { + withr::with_options(list(width = 20L), { generate_code(spec, domain, out_dir) observed <- readLines(file.path(out_dir, paste0(domain, "_sdtm_oak_code.R"))) - expect_true(length(observed) > 10L) - expect_true(grepl("generate_oak_id_vars", observed) |> any()) - expect_true(grepl("assign_no_ct", observed) |> any()) + expect_gt(length(observed), 10L) + expect_true(grepl("generate_oak_id_vars", observed, fixed = TRUE) |> any()) + expect_true(grepl("assign_no_ct", observed, fixed = TRUE) |> any()) }) }) From 6e308e1c81b28c5e2d7531932edde6fb7de5e5a7 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Mon, 2 Dec 2024 01:24:53 +0000 Subject: [PATCH 12/35] #107 styler --- R/generate_code.R | 13 ------------- tests/testthat/test-generate_code.R | 3 +-- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index 183ae3ec..ad6a3d5e 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -100,7 +100,6 @@ is_character <- function(var_in) { #' @export #' #' @examples -#' #' \dontrun{ #' spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") #' domain <- "cm" @@ -113,7 +112,6 @@ is_character <- function(var_in) { #' } #' generate_code <- function(spec, domain, out_dir = ".") { - admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns)) admiraldev::assert_character_scalar(domain) @@ -144,7 +142,6 @@ generate_code <- function(spec, domain, out_dir = ".") { #' @keywords internal #' generate_one_var_code <- function(spec_var) { - admiraldev::assert_data_frame(spec_var) assertthat::assert_that(identical(nrow(spec_var), 1L)) @@ -186,7 +183,6 @@ generate_one_var_code <- function(spec_var) { #' @keywords internal #' #' @examples -#' #' \dontrun{ #' str_in <- "a, b, c" #' parse_into_c_call("a, b, c") @@ -196,7 +192,6 @@ generate_one_var_code <- function(spec_var) { #' } #' parse_into_c_call <- function(str_in) { - admiraldev::assert_character_scalar(str_in) str_out <- str_in |> @@ -220,7 +215,6 @@ parse_into_c_call <- function(str_in) { #' @keywords internal #' add_pipe <- function(code_block) { - admiraldev::assert_character_vector(code_block) i <- length(code_block) @@ -238,7 +232,6 @@ add_pipe <- function(code_block) { #' @keywords internal #' remove_last_pipe <- function(code_blocks) { - admiraldev::assert_character_vector(code_blocks) len_code_block <- length(code_blocks) @@ -260,7 +253,6 @@ remove_last_pipe <- function(code_blocks) { #' #' @examples get_domain_spec <- function(spec, domain) { - expected_columns <- c( "raw_dataset", "raw_variable", @@ -286,9 +278,7 @@ get_domain_spec <- function(spec, domain) { # TODO # Doing only few variables dplyr::filter(target_sdtm_variable %in% tgt_vars) |> - dplyr::select(dplyr::all_of(expected_columns)) |> - # For now swapping entity_sub_algorithm with mapping_algorithm since the # algorithms like assign_no_ct are the mapping_algorithm and they are populated # in the entity_sub_algorithm @@ -306,7 +296,6 @@ get_domain_spec <- function(spec, domain) { entity_sub_algorithm = entity_sub_algorithm_temp ) |> dplyr::select(-entity_sub_algorithm_temp) |> - # Need to use the condition_add_raw_dat (if not missing) instead of raw_dataset dplyr::mutate( raw_dataset = dplyr::if_else( @@ -326,14 +315,12 @@ get_domain_spec <- function(spec, domain) { #' @export #' #' @examples -#' #' \dontrun{ #' file <- "cm_sdtm_oak_spec_cdash.csv" #' observed <- read_spec(file) #' } #' read_spec <- function(file) { - admiraldev::assert_character_scalar(file) spec <- utils::read.csv(file = file, na.strings = c("NA", ""), colClasses = "character") |> diff --git a/tests/testthat/test-generate_code.R b/tests/testthat/test-generate_code.R index 98cd64fb..31de2c29 100644 --- a/tests/testthat/test-generate_code.R +++ b/tests/testthat/test-generate_code.R @@ -1,5 +1,4 @@ test_that("generate_code works", { - # nolint start spec <- tibble::tribble( ~study_number, ~raw_dataset, ~raw_dataset_label, ~raw_variable, ~raw_variable_label, ~raw_variable_ordinal, ~raw_variable_type, ~raw_data_format, ~raw_fmt, ~raw_unk, ~study_specific, ~annotation_ordinal, ~mapping_is_dataset, ~annotation_text, ~target_sdtm_domain, ~target_sdtm_variable, ~target_sdtm_variable_role, ~target_sdtm_variable_codelist_code, ~target_sdtm_variable_controlled_terms_or_format, ~target_sdtm_variable_ordinal, ~origin, ~mapping_algorithm, ~entity_sub_algorithm, ~target_hardcoded_value, ~target_term_value, ~target_value, ~target_term_code, ~condition_ordinal, ~condition_group_ordinal, ~condition_add_raw_dat, ~condition_add_tgt_dat, ~condition_left_raw_dataset, ~condition_left_raw_variable, ~condition_left_sdtm_domain, ~condition_left_sdtm_variable, ~condition_operator, ~condition_right_text_value, ~condition_right_sdtm_domain, ~condition_right_sdtm_variable, ~condition_right_raw_dataset, ~condition_right_raw_variable, ~condition_next_logical_operator, ~merge_type, ~merge_left, ~merge_right, ~merge_condition, ~unduplicate_keys, ~groupby_keys, ~target_resource_raw_dataset, ~target_resource_raw_variable, @@ -24,7 +23,7 @@ test_that("generate_code works", { withr::with_options(list(width = 20L), { generate_code(spec, domain, out_dir) - observed <- readLines(file.path(out_dir, paste0(domain, "_sdtm_oak_code.R"))) + observed <- readLines(file.path(out_dir, paste0(domain, "_sdtm_oak_code.R"))) expect_gt(length(observed), 10L) expect_true(grepl("generate_oak_id_vars", observed, fixed = TRUE) |> any()) From 091dc954c3f397fa0150a38f070ad0c068eaa508 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Mon, 2 Dec 2024 01:32:33 +0000 Subject: [PATCH 13/35] #107 cmd checks --- DESCRIPTION | 3 ++- NAMESPACE | 2 ++ R/generate_code.R | 5 ++--- R/globals.R | 4 +++- man/cm_template_prefix.Rd | 16 ++++++++++++++++ man/expected_columns.Rd | 2 +- man/generate_code.Rd | 1 - man/get_domain_spec.Rd | 3 +++ man/is_character.Rd | 17 +++++++++++++++++ man/is_numeric.Rd | 17 +++++++++++++++++ man/parse_into_c_call.Rd | 28 ++++++++++++++++++++++++++++ man/read_spec.Rd | 1 - 12 files changed, 91 insertions(+), 8 deletions(-) create mode 100644 man/cm_template_prefix.Rd create mode 100644 man/is_character.Rd create mode 100644 man/is_numeric.Rd create mode 100644 man/parse_into_c_call.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 01cbaa8a..abae0884 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -68,7 +68,8 @@ Suggests: spelling, testthat (>= 3.1.7), DT, - readr + readr, + withr VignetteBuilder: knitr Config/testthat/edition: 3 Config/testthat/parallel: true diff --git a/NAMESPACE b/NAMESPACE index 68d7440d..24f1c45d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,6 +21,8 @@ export(generate_code) export(generate_oak_id_vars) export(hardcode_ct) export(hardcode_no_ct) +export(is_character) +export(is_numeric) export(oak_id_vars) export(problems) export(read_ct_spec) diff --git a/R/generate_code.R b/R/generate_code.R index ad6a3d5e..9af3ecf0 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -75,7 +75,7 @@ is_numeric <- function(var_in) { #' #' @param var_in The variable to check. #' -#' @return +#' @return Logical indicating if the variable is character. #' @export #' is_character <- function(var_in) { @@ -248,10 +248,9 @@ remove_last_pipe <- function(code_blocks) { #' @param spec The specification data frame. #' @param domain The SDTM domain to get the specification for. #' -#' @return +#' @return A tibble with the specification for the domain. #' @keywords internal #' -#' @examples get_domain_spec <- function(spec, domain) { expected_columns <- c( "raw_dataset", diff --git a/R/globals.R b/R/globals.R index 9a2998a0..5297f336 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,4 +1,6 @@ utils::globalVariables(c( "USUBJID", "VISIT", "dom_dt", "dom_tm", "ref_dt", - "ref_tm" + "ref_tm", "target_sdtm_domain", "target_sdtm_variable", + "mapping_algorithm", "entity_sub_algorithm", "entity_sub_algorithm", + "entity_sub_algorithm_temp", "condition_add_raw_dat", "raw_dataset" )) diff --git a/man/cm_template_prefix.Rd b/man/cm_template_prefix.Rd new file mode 100644 index 00000000..eb244d88 --- /dev/null +++ b/man/cm_template_prefix.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\docType{data} +\name{cm_template_prefix} +\alias{cm_template_prefix} +\title{The template suffix for the cm code} +\format{ +An object of class \code{glue} (inherits from \code{character}) of length 1. +} +\usage{ +cm_template_prefix +} +\description{ +The template suffix for the cm code +} +\keyword{internal} diff --git a/man/expected_columns.Rd b/man/expected_columns.Rd index f4bdb453..781739d2 100644 --- a/man/expected_columns.Rd +++ b/man/expected_columns.Rd @@ -5,7 +5,7 @@ \alias{expected_columns} \title{Expected columns in the specification file} \format{ -An object of class \code{character} of length 47. +An object of class \code{character} of length 50. } \usage{ expected_columns diff --git a/man/generate_code.Rd b/man/generate_code.Rd index 6efd45e0..ca651ce6 100644 --- a/man/generate_code.Rd +++ b/man/generate_code.Rd @@ -25,7 +25,6 @@ more parameters will be placed on a single line and code will be shorter. See the examples for more details. } \examples{ - \dontrun{ spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") domain <- "cm" diff --git a/man/get_domain_spec.Rd b/man/get_domain_spec.Rd index 976589a6..382c9650 100644 --- a/man/get_domain_spec.Rd +++ b/man/get_domain_spec.Rd @@ -11,6 +11,9 @@ get_domain_spec(spec, domain) \item{domain}{The SDTM domain to get the specification for.} } +\value{ +A tibble with the specification for the domain. +} \description{ Get the specification for a domain and modify it } diff --git a/man/is_character.Rd b/man/is_character.Rd new file mode 100644 index 00000000..3f805162 --- /dev/null +++ b/man/is_character.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{is_character} +\alias{is_character} +\title{Check if a variable is character} +\usage{ +is_character(var_in) +} +\arguments{ +\item{var_in}{The variable to check.} +} +\value{ +Logical indicating if the variable is character. +} +\description{ +Check if a variable is character +} diff --git a/man/is_numeric.Rd b/man/is_numeric.Rd new file mode 100644 index 00000000..18bd359a --- /dev/null +++ b/man/is_numeric.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{is_numeric} +\alias{is_numeric} +\title{Check if a variable is character} +\usage{ +is_numeric(var_in) +} +\arguments{ +\item{var_in}{The variable to check.} +} +\value{ +Logical indicating if the variable is character. +} +\description{ +Check if a variable is character +} diff --git a/man/parse_into_c_call.Rd b/man/parse_into_c_call.Rd new file mode 100644 index 00000000..77340daa --- /dev/null +++ b/man/parse_into_c_call.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{parse_into_c_call} +\alias{parse_into_c_call} +\title{This function converts comma separated string into a character vector} +\usage{ +parse_into_c_call(str_in) +} +\arguments{ +\item{string}{A string with comma separated values.} +} +\value{ +A character vector. +} +\description{ +This function converts comma separated string into a character vector +} +\examples{ +\dontrun{ +str_in <- "a, b, c" +parse_into_c_call("a, b, c") + +str_in <- NA_character_ +parse_into_c_call(str_in) +} + +} +\keyword{internal} diff --git a/man/read_spec.Rd b/man/read_spec.Rd index 4dd18f75..3f4948ad 100644 --- a/man/read_spec.Rd +++ b/man/read_spec.Rd @@ -16,7 +16,6 @@ A tibble with the specification. Read the specification file } \examples{ - \dontrun{ file <- "cm_sdtm_oak_spec_cdash.csv" observed <- read_spec(file) From a187e186289e51b2495c27bab75a1040a2ef4d8b Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Mon, 2 Dec 2024 01:42:22 +0000 Subject: [PATCH 14/35] #107 add suffix --- R/generate_code.R | 36 ++++++++++++++++++++++++++++++++++-- man/generate_code.Rd | 5 ++++- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index 9af3ecf0..e0f8cc58 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -60,6 +60,34 @@ dm <- admiral::convert_blanks_to_na(dm) cm <- ') +cm_template_suffix <- stringr::str_glue(' +dplyr::mutate( + STUDYID = "test_study", + DOMAIN = "CM", + CMCAT = "GENERAL CONMED", + USUBJID = paste0("test_study", "-", cm_raw_data$PATNUM) +) %>% +derive_seq(tgt_var = "CMSEQ", + rec_vars= c("USUBJID", "CMTRT")) %>% +derive_study_day( + sdtm_in = ., + dm_domain = dm, + tgdt = "CMENDTC", + refdt = "RFXSTDTC", + study_day_var = "CMENDY" +) %>% +derive_study_day( + sdtm_in = ., + dm_domain = dm, + tgdt = "CMSTDTC", + refdt = "RFXSTDTC", + study_day_var = "CMSTDY" +) %>% +dplyr::select("STUDYID", "DOMAIN", "USUBJID", "CMSEQ", "CMTRT", "CMCAT", "CMINDC", + "CMDOSE", "CMDOSTXT", "CMDOSU", "CMDOSFRM", "CMDOSFRQ", "CMROUTE", + "CMSTDTC", "CMENDTC","CMSTDY", "CMENDY", "CMENRTPT", "CMENTPT") +') + #' Check if a variable is character #' #' @param var_in The variable to check. @@ -107,8 +135,11 @@ is_character <- function(var_in) { #' #' # One can use option width to control the width of the code #' # Twenty will almost always place every parameter on a separate line -#' options(width = 20) +#' spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") +#' domain <- "cm" +#' old_width <- options(width = 20) #' generate_code(spec, domain) +#' options(width = old_width) #' } #' generate_code <- function(spec, domain, out_dir = ".") { @@ -123,8 +154,9 @@ generate_code <- function(spec, domain, out_dir = ".") { \(row) generate_one_var_code(spec_domain[row, ]) ) |> unlist() |> - remove_last_pipe() |> + # remove_last_pipe() |> append(cm_template_prefix, after = 0L) |> + append(cm_template_suffix) |> styler::style_text() # Save the code to a file diff --git a/man/generate_code.Rd b/man/generate_code.Rd index ca651ce6..48184b2c 100644 --- a/man/generate_code.Rd +++ b/man/generate_code.Rd @@ -32,8 +32,11 @@ generate_code(spec, domain) # One can use option width to control the width of the code # Twenty will almost always place every parameter on a separate line -options(width = 20) +spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") +domain <- "cm" +old_width <- options(width = 20) generate_code(spec, domain) +options(width = old_width) } } From 18a69d6b21d281fb6a80dd7ccf0ddd0d79b5f34e Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Mon, 2 Dec 2024 01:57:30 +0000 Subject: [PATCH 15/35] #107 docs --- R/generate_code.R | 109 ++++++++++++++++++++++--------------------- man/generate_code.Rd | 3 +- 2 files changed, 57 insertions(+), 55 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index e0f8cc58..aabf40a6 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -29,6 +29,61 @@ tgt_vars <- c( "CMENDTC" ) +#' Generate the code for the mapping SDTM specification +#' +#' One can use the option `width` to control the width of the code. A width of +#' twenty will almost always place every parameter on a separate line. This is +#' useful for debugging and understanding the code. The higher the width, the +#' more parameters will be placed on a single line and code will be shorter. +#' See the examples for more details. +#' +#' @param spec The specification data frame. +#' @param domain The SDTM domain to generate the code for. +#' @param out_dir The directory to save the code file. Default is the current +#' directory. +#' +#' @return Side effect: the code is generated and saved to a file. +#' @export +#' +#' @examples +#' \dontrun{ +#' spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") +#' domain <- "cm" +#' generate_code(spec, domain) +#' +#' # One can use option width to control the width of the code +#' # Twenty will almost always place every parameter on a separate line +#' spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") +#' domain <- "cm" +#' old_width <- options(width = 20) +#' generate_code(spec, domain) +#' # Restore original width +#' options(width = old_width$width) +#' } +#' +generate_code <- function(spec, domain, out_dir = ".") { + admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns)) + admiraldev::assert_character_scalar(domain) + + spec_domain <- get_domain_spec(spec, domain) + + # Generate the code for each variable row in spec_domain + styled_code <- purrr::map( + seq_len(nrow(spec_domain)), + \(row) generate_one_var_code(spec_domain[row, ]) + ) |> + unlist() |> + # remove_last_pipe() |> + append(cm_template_prefix, after = 0L) |> + append(cm_template_suffix) |> + styler::style_text() + + # Save the code to a file + file_name <- paste0(domain, "_sdtm_oak_code.R") + writeLines(styled_code, file.path(out_dir, file_name)) +} + + #' The template suffix for the cm code #' #' @keywords internal @@ -111,60 +166,6 @@ is_character <- function(var_in) { } -#' Generate the code for the mapping SDTM specification -#' -#' One can use the option `width` to control the width of the code. A width of -#' twenty will almost always place every parameter on a separate line. This is -#' useful for debugging and understanding the code. The higher the width, the -#' more parameters will be placed on a single line and code will be shorter. -#' See the examples for more details. -#' -#' @param spec The specification data frame. -#' @param domain The SDTM domain to generate the code for. -#' @param out_dir The directory to save the code file. Default is the current -#' directory. -#' -#' @return Side effect: the code is generated and saved to a file. -#' @export -#' -#' @examples -#' \dontrun{ -#' spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") -#' domain <- "cm" -#' generate_code(spec, domain) -#' -#' # One can use option width to control the width of the code -#' # Twenty will almost always place every parameter on a separate line -#' spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") -#' domain <- "cm" -#' old_width <- options(width = 20) -#' generate_code(spec, domain) -#' options(width = old_width) -#' } -#' -generate_code <- function(spec, domain, out_dir = ".") { - admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns)) - admiraldev::assert_character_scalar(domain) - - spec_domain <- get_domain_spec(spec, domain) - - # Generate the code for each variable row in spec_domain - styled_code <- purrr::map( - seq_len(nrow(spec_domain)), - \(row) generate_one_var_code(spec_domain[row, ]) - ) |> - unlist() |> - # remove_last_pipe() |> - append(cm_template_prefix, after = 0L) |> - append(cm_template_suffix) |> - styler::style_text() - - # Save the code to a file - file_name <- paste0(domain, "_sdtm_oak_code.R") - writeLines(styled_code, file.path(out_dir, file_name)) -} - - #' Generate the code for one variable #' #' @param spec_var The specification for one variable. diff --git a/man/generate_code.Rd b/man/generate_code.Rd index 48184b2c..b26ed666 100644 --- a/man/generate_code.Rd +++ b/man/generate_code.Rd @@ -36,7 +36,8 @@ spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") domain <- "cm" old_width <- options(width = 20) generate_code(spec, domain) -options(width = old_width) +# Restore original width +options(width = old_width$width) } } From eecd827c3724c9c79fbf0b624dab92d7f345d2a6 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Tue, 3 Dec 2024 08:02:32 -0500 Subject: [PATCH 16/35] #107 filter spec before generating --- R/generate_code.R | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index aabf40a6..3243f7c2 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -11,24 +11,6 @@ # can add it later when the customized id_vars are needed. -# TODO -# Temporary vector to control the vars we generate. For now we assume that the -# order in the specs is the one we want to generate the code for. -tgt_vars <- c( - "CMTRT", - "CMINDC", - "CMDOSE", - "CMDOSTXT", - "CMDOSU", - "CMDOSFRM", - "CMDOSFRQ", - "CMROUTE", - "CMSTDTC", - "CMENRTPT", - "CMENTPT", - "CMENDTC" -) - #' Generate the code for the mapping SDTM specification #' #' One can use the option `width` to control the width of the code. A width of @@ -48,13 +30,19 @@ tgt_vars <- c( #' @examples #' \dontrun{ #' spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") +#' +#' spec <- spec |> +#' dplyr::filter( +#' !is.na(target_sdtm_variable), +#' !is.na(mapping_algorithm), +#' !target_sdtm_variable %in% c("DOMAIN") +#' ) +#' #' domain <- "cm" #' generate_code(spec, domain) #' #' # One can use option width to control the width of the code #' # Twenty will almost always place every parameter on a separate line -#' spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") -#' domain <- "cm" #' old_width <- options(width = 20) #' generate_code(spec, domain) #' # Restore original width @@ -307,9 +295,6 @@ get_domain_spec <- function(spec, domain) { spec |> dplyr::filter(tolower(target_sdtm_domain) %in% tolower(domain)) |> - # TODO - # Doing only few variables - dplyr::filter(target_sdtm_variable %in% tgt_vars) |> dplyr::select(dplyr::all_of(expected_columns)) |> # For now swapping entity_sub_algorithm with mapping_algorithm since the # algorithms like assign_no_ct are the mapping_algorithm and they are populated From 7e7e462add74daf8bfcaca23d0b8484f033f8221 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Wed, 4 Dec 2024 12:39:31 -0500 Subject: [PATCH 17/35] #107 ref --- R/generate_code.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index 3243f7c2..1c2c3ff6 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -56,12 +56,13 @@ generate_code <- function(spec, domain, out_dir = ".") { spec_domain <- get_domain_spec(spec, domain) # Generate the code for each variable row in spec_domain - styled_code <- purrr::map( - seq_len(nrow(spec_domain)), - \(row) generate_one_var_code(spec_domain[row, ]) - ) |> + styled_code <- spec_domain |> + dplyr::rowwise() |> + dplyr::mutate( + algorithm_code = list(generate_one_var_code(dplyr::pick(dplyr::everything()))), + .keep = "none" + ) |> unlist() |> - # remove_last_pipe() |> append(cm_template_prefix, after = 0L) |> append(cm_template_suffix) |> styler::style_text() @@ -139,7 +140,7 @@ dplyr::select("STUDYID", "DOMAIN", "USUBJID", "CMSEQ", "CMTRT", "CMCAT", "CMINDC #' @export #' is_numeric <- function(var_in) { - grepl("^-?\\d*(\\.\\d+)?(e[+-]?\\d+)?$", var_in) + grepl(r"{^-?\d*(\.\d+)?(e[+-]?\d+)?$}", var_in) } #' Check if a variable is character From a39dfae9dc1d516b2f0e27d6d9cfcbb8936ad259 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Sat, 7 Dec 2024 13:42:08 -0500 Subject: [PATCH 18/35] #107 vs spec read --- R/generate_code.R | 18 ++++++++++++++++++ man/generate_code.Rd | 24 ++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index 1c2c3ff6..e4c88d05 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -9,6 +9,10 @@ # - Changed is.numeric/is.character into is_numeric/is_character # - I did not generate id_vars since the default values was enough, certainly we # can add it later when the customized id_vars are needed. +# - Some extra manipulations done in get_domain_spec() +# - Moved VSTESTCD before qualifired, e.g. VSSTAT, VSPOS so that topic is created first +# - Changed mapping_is_dataset to TRUE for VSTPT, VSDTC +# - Added a new column topic, showing to which topic the mapping belongs to #' Generate the code for the mapping SDTM specification @@ -29,6 +33,20 @@ #' #' @examples #' \dontrun{ +#' # VS domain ---- +#' spec <- read_spec("vs_sdtm_oak_spec.csv") +#' domain <- "vs" +#' +#' spec <- spec |> +#' dplyr::filter( +#' !is.na(target_sdtm_variable), +#' !is.na(mapping_algorithm), +#' !target_sdtm_variable %in% c("DOMAIN"), +#' !mapping_is_dataset %in% c("TRUE") +#' ) +#' +#' # CM domain ---- +#' #' spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") #' #' spec <- spec |> diff --git a/man/generate_code.Rd b/man/generate_code.Rd index b26ed666..e264182a 100644 --- a/man/generate_code.Rd +++ b/man/generate_code.Rd @@ -26,14 +26,34 @@ See the examples for more details. } \examples{ \dontrun{ +# VS domain ---- +spec <- read_spec("vs_sdtm_oak_spec.csv") +domain <- "vs" + +spec <- spec |> + dplyr::filter( + !is.na(target_sdtm_variable), + !is.na(mapping_algorithm), + !target_sdtm_variable \%in\% c("DOMAIN"), + !mapping_is_dataset \%in\% c("TRUE") + ) + +# CM domain ---- + spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") + +spec <- spec |> + dplyr::filter( + !is.na(target_sdtm_variable), + !is.na(mapping_algorithm), + !target_sdtm_variable \%in\% c("DOMAIN") + ) + domain <- "cm" generate_code(spec, domain) # One can use option width to control the width of the code # Twenty will almost always place every parameter on a separate line -spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") -domain <- "cm" old_width <- options(width = 20) generate_code(spec, domain) # Restore original width From 9e603d3d99e692b03b690969e8628d756fd27856 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Sun, 8 Dec 2024 00:45:33 +0000 Subject: [PATCH 19/35] #107 move tempates to sep file --- R/generate_code_tempates.R | 62 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 R/generate_code_tempates.R diff --git a/R/generate_code_tempates.R b/R/generate_code_tempates.R new file mode 100644 index 00000000..a6fdf937 --- /dev/null +++ b/R/generate_code_tempates.R @@ -0,0 +1,62 @@ + +#' The template suffix for the cm code +#' +#' @noRd +#' @keywords internal +cm_template_prefix <- stringr::str_glue(' +library(sdtm.oak) +library(dplyr) + + +# Read CT Specification +study_ct <- read.csv("./datasets/sdtm_ct.csv") + +# Read in raw data +cm_raw_data <- read.csv("./datasets/cm_raw_data_cdash.csv") + +cm_raw_data <- admiral::convert_blanks_to_na(cm_raw_data) + +# derive oak_id_vars +cm_raw_data <- cm_raw_data %>% + generate_oak_id_vars( + pat_var = "PATNUM", + raw_src = "cm_raw_data" + ) + +# Read in DM domain to derive study day +dm <- read.csv("./datasets/dm.csv") + +dm <- admiral::convert_blanks_to_na(dm) +') + +#' The template suffix for the cm code +#' +#' @noRd +#' @keywords internal +cm_template_suffix <- stringr::str_glue(' +dplyr::mutate( + STUDYID = "test_study", + DOMAIN = "CM", + CMCAT = "GENERAL CONMED", + USUBJID = paste0("test_study", "-", cm_raw_data$PATNUM) +) %>% +derive_seq(tgt_var = "CMSEQ", + rec_vars= c("USUBJID", "CMTRT")) %>% +derive_study_day( + sdtm_in = ., + dm_domain = dm, + tgdt = "CMENDTC", + refdt = "RFXSTDTC", + study_day_var = "CMENDY" +) %>% +derive_study_day( + sdtm_in = ., + dm_domain = dm, + tgdt = "CMSTDTC", + refdt = "RFXSTDTC", + study_day_var = "CMSTDY" +) %>% +dplyr::select("STUDYID", "DOMAIN", "USUBJID", "CMSEQ", "CMTRT", "CMCAT", "CMINDC", + "CMDOSE", "CMDOSTXT", "CMDOSU", "CMDOSFRM", "CMDOSFRQ", "CMROUTE", + "CMSTDTC", "CMENDTC","CMSTDY", "CMENDY", "CMENRTPT", "CMENTPT") +') From 12490579bd8aede94d1891e7d6ddf04c297b04c7 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Sun, 8 Dec 2024 00:45:58 +0000 Subject: [PATCH 20/35] #107 try per one topic --- R/generate_code.R | 111 ++++++++++++++++------------------------------ 1 file changed, 37 insertions(+), 74 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index e4c88d05..509ecbb6 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -41,7 +41,6 @@ #' dplyr::filter( #' !is.na(target_sdtm_variable), #' !is.na(mapping_algorithm), -#' !target_sdtm_variable %in% c("DOMAIN"), #' !mapping_is_dataset %in% c("TRUE") #' ) #' @@ -53,7 +52,7 @@ #' dplyr::filter( #' !is.na(target_sdtm_variable), #' !is.na(mapping_algorithm), -#' !target_sdtm_variable %in% c("DOMAIN") +#' !mapping_is_dataset %in% c("TRUE") #' ) #' #' domain <- "cm" @@ -73,83 +72,45 @@ generate_code <- function(spec, domain, out_dir = ".") { spec_domain <- get_domain_spec(spec, domain) - # Generate the code for each variable row in spec_domain - styled_code <- spec_domain |> - dplyr::rowwise() |> - dplyr::mutate( - algorithm_code = list(generate_one_var_code(dplyr::pick(dplyr::everything()))), - .keep = "none" - ) |> - unlist() |> - append(cm_template_prefix, after = 0L) |> - append(cm_template_suffix) |> - styler::style_text() + topics <- unique(spec_domain$topic) + + # Do by topic + code_by_topics <- purrr::map(topics, \(topic) { + spec_domain_topic <- spec_domain |> + dplyr::filter(topic %in% topic) + + domain_topic <- paste(domain, topic, sep = "_") |> + tolower() + + map_topic <- paste0("\n\n# Map topic ", domain_topic, " ----\n") + + # Generate the code for each variable row in spec_domain + spec_domain_topic |> + dplyr::rowwise() |> + dplyr::mutate( + algorithm_code = list(generate_one_var_code(dplyr::pick(dplyr::everything()))), + .keep = "none" + ) |> + unlist() |> + append(paste0(domain_topic, " <-"), after = 0L) |> + append(map_topic, after = 0L) + }) + + one_topic <- identical(length(code_by_topics), 1L) + + styled_code <- if (one_topic) { + code_by_topics |> + unlist() |> + append(cm_template_prefix, after = 0L) |> + append(cm_template_suffix) |> + styler::style_text() + } # Save the code to a file file_name <- paste0(domain, "_sdtm_oak_code.R") writeLines(styled_code, file.path(out_dir, file_name)) } - -#' The template suffix for the cm code -#' -#' @keywords internal -cm_template_prefix <- stringr::str_glue(' -library(sdtm.oak) -library(dplyr) - - -# Read CT Specification -study_ct <- read.csv("./datasets/sdtm_ct.csv") - -# Read in raw data -cm_raw_data <- read.csv("./datasets/cm_raw_data_cdash.csv") - -cm_raw_data <- admiral::convert_blanks_to_na(cm_raw_data) - -# derive oak_id_vars -cm_raw_data <- cm_raw_data %>% - generate_oak_id_vars( - pat_var = "PATNUM", - raw_src = "cm_raw_data" - ) - -# Read in DM domain to derive study day -dm <- read.csv("./datasets/dm.csv") - -dm <- admiral::convert_blanks_to_na(dm) - -cm <- -') - -cm_template_suffix <- stringr::str_glue(' -dplyr::mutate( - STUDYID = "test_study", - DOMAIN = "CM", - CMCAT = "GENERAL CONMED", - USUBJID = paste0("test_study", "-", cm_raw_data$PATNUM) -) %>% -derive_seq(tgt_var = "CMSEQ", - rec_vars= c("USUBJID", "CMTRT")) %>% -derive_study_day( - sdtm_in = ., - dm_domain = dm, - tgdt = "CMENDTC", - refdt = "RFXSTDTC", - study_day_var = "CMENDY" -) %>% -derive_study_day( - sdtm_in = ., - dm_domain = dm, - tgdt = "CMSTDTC", - refdt = "RFXSTDTC", - study_day_var = "CMSTDY" -) %>% -dplyr::select("STUDYID", "DOMAIN", "USUBJID", "CMSEQ", "CMTRT", "CMCAT", "CMINDC", - "CMDOSE", "CMDOSTXT", "CMDOSU", "CMDOSFRM", "CMDOSFRQ", "CMROUTE", - "CMSTDTC", "CMENDTC","CMSTDY", "CMENDY", "CMENRTPT", "CMENTPT") -') - #' Check if a variable is character #' #' @param var_in The variable to check. @@ -296,6 +257,7 @@ get_domain_spec <- function(spec, domain) { "raw_dataset", "raw_variable", "target_sdtm_variable", + "topic", "mapping_algorithm", "entity_sub_algorithm", "condition_add_raw_dat", @@ -421,5 +383,6 @@ expected_columns <- c( "groupby_keys", "target_resource_raw_dataset", "target_resource_raw_variable", - "target_value" + "target_value", + "topic" ) From 938de20ece54d94a29956eb070db26e7569d7d3e Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Sun, 8 Dec 2024 00:46:08 +0000 Subject: [PATCH 21/35] #107 adjust test --- tests/testthat/test-generate_code.R | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-generate_code.R b/tests/testthat/test-generate_code.R index 31de2c29..0d513cc1 100644 --- a/tests/testthat/test-generate_code.R +++ b/tests/testthat/test-generate_code.R @@ -1,9 +1,9 @@ test_that("generate_code works", { # nolint start spec <- tibble::tribble( - ~study_number, ~raw_dataset, ~raw_dataset_label, ~raw_variable, ~raw_variable_label, ~raw_variable_ordinal, ~raw_variable_type, ~raw_data_format, ~raw_fmt, ~raw_unk, ~study_specific, ~annotation_ordinal, ~mapping_is_dataset, ~annotation_text, ~target_sdtm_domain, ~target_sdtm_variable, ~target_sdtm_variable_role, ~target_sdtm_variable_codelist_code, ~target_sdtm_variable_controlled_terms_or_format, ~target_sdtm_variable_ordinal, ~origin, ~mapping_algorithm, ~entity_sub_algorithm, ~target_hardcoded_value, ~target_term_value, ~target_value, ~target_term_code, ~condition_ordinal, ~condition_group_ordinal, ~condition_add_raw_dat, ~condition_add_tgt_dat, ~condition_left_raw_dataset, ~condition_left_raw_variable, ~condition_left_sdtm_domain, ~condition_left_sdtm_variable, ~condition_operator, ~condition_right_text_value, ~condition_right_sdtm_domain, ~condition_right_sdtm_variable, ~condition_right_raw_dataset, ~condition_right_raw_variable, ~condition_next_logical_operator, ~merge_type, ~merge_left, ~merge_right, ~merge_condition, ~unduplicate_keys, ~groupby_keys, ~target_resource_raw_dataset, ~target_resource_raw_variable, - "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMTRT", "var label", "3", "Text", "$200", NA, NA, "FALSE", "1", "FALSE", "CM.CMTRT", "CM", "CMTRT", "Topic Variable", NA, NA, "10", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, - "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMINDC", "var label", "4", "Text", "$100", NA, NA, "FALSE", "1", "FALSE", "CM.CMINDC", "CM", "CMINDC", "Record Qualifier", NA, NA, "19", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA + ~study_number, ~raw_dataset, ~raw_dataset_label, ~raw_variable, ~raw_variable_label, ~raw_variable_ordinal, ~raw_variable_type, ~raw_data_format, ~raw_fmt, ~raw_unk, ~study_specific, ~annotation_ordinal, ~mapping_is_dataset, ~annotation_text, ~target_sdtm_domain, ~target_sdtm_variable, ~target_sdtm_variable_role, ~topic, ~target_sdtm_variable_codelist_code, ~target_sdtm_variable_controlled_terms_or_format, ~target_sdtm_variable_ordinal, ~origin, ~mapping_algorithm, ~entity_sub_algorithm, ~target_hardcoded_value, ~target_term_value, ~target_value, ~target_term_code, ~condition_ordinal, ~condition_group_ordinal, ~condition_add_raw_dat, ~condition_add_tgt_dat, ~condition_left_raw_dataset, ~condition_left_raw_variable, ~condition_left_sdtm_domain, ~condition_left_sdtm_variable, ~condition_operator, ~condition_right_text_value, ~condition_right_sdtm_domain, ~condition_right_sdtm_variable, ~condition_right_raw_dataset, ~condition_right_raw_variable, ~condition_next_logical_operator, ~merge_type, ~merge_left, ~merge_right, ~merge_condition, ~unduplicate_keys, ~groupby_keys, ~target_resource_raw_dataset, ~target_resource_raw_variable, + "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMTRT", "var label", "3", "Text", "$200", NA, NA, "FALSE", "1", "FALSE", "CM.CMTRT", "CM", "CMTRT", "Topic Variable", "CMTRT", NA, NA, "10", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMINDC", "var label", "4", "Text", "$100", NA, NA, "FALSE", "1", "FALSE", "CM.CMINDC", "CM", "CMINDC", "Record Qualifier", "CMTRT", NA, NA, "19", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA ) # nolint end @@ -26,7 +26,11 @@ test_that("generate_code works", { observed <- readLines(file.path(out_dir, paste0(domain, "_sdtm_oak_code.R"))) expect_gt(length(observed), 10L) + # From prefix expect_true(grepl("generate_oak_id_vars", observed, fixed = TRUE) |> any()) + # From generator expect_true(grepl("assign_no_ct", observed, fixed = TRUE) |> any()) + # From suffix + expect_true(grepl("dplyr::select", observed, fixed = TRUE) |> any()) }) }) From 8bd5f6e416f3f5b087efca1f9d5090f26accb799 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Sun, 8 Dec 2024 01:13:24 +0000 Subject: [PATCH 22/35] #107 add multiple topics, vs templates --- R/generate_code.R | 16 +++++- R/generate_code_tempates.R | 113 +++++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 2 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index 509ecbb6..b5e258c3 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -44,6 +44,11 @@ #' !mapping_is_dataset %in% c("TRUE") #' ) #' +#' old_width <- options(width = 20) +#' generate_code(spec, domain) +#' # Restore original width +#' options(width = old_width$width) +#' #' # CM domain ---- #' #' spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") @@ -74,10 +79,10 @@ generate_code <- function(spec, domain, out_dir = ".") { topics <- unique(spec_domain$topic) - # Do by topic code_by_topics <- purrr::map(topics, \(topic) { + spec_domain_topic <- spec_domain |> - dplyr::filter(topic %in% topic) + dplyr::filter(topic %in% {{topic}}) domain_topic <- paste(domain, topic, sep = "_") |> tolower() @@ -104,6 +109,13 @@ generate_code <- function(spec, domain, out_dir = ".") { append(cm_template_prefix, after = 0L) |> append(cm_template_suffix) |> styler::style_text() + } else { + code_by_topics |> + purrr::map(remove_last_pipe) |> + unlist() |> + append(vs_template_prefix, after = 0L) |> + append(vs_template_suffix) |> + styler::style_text() } # Save the code to a file diff --git a/R/generate_code_tempates.R b/R/generate_code_tempates.R index a6fdf937..00961250 100644 --- a/R/generate_code_tempates.R +++ b/R/generate_code_tempates.R @@ -60,3 +60,116 @@ dplyr::select("STUDYID", "DOMAIN", "USUBJID", "CMSEQ", "CMTRT", "CMCAT", "CMINDC "CMDOSE", "CMDOSTXT", "CMDOSU", "CMDOSFRM", "CMDOSFRQ", "CMROUTE", "CMSTDTC", "CMENDTC","CMSTDY", "CMENDY", "CMENRTPT", "CMENTPT") ') + +#' The template suffix for the vs code +#' +#' @noRd +#' @keywords internal +vs_template_prefix <- stringr::str_glue(' +library(sdtm.oak) +library(dplyr) + + +# Read Specification + +# Read CT Specification +study_ct <- read.csv("./datasets/sdtm_ct.csv") + +# Read in raw data +vitals_raw_data <- read.csv("./datasets/vitals_raw_data.csv", + stringsAsFactors = FALSE) + +vitals_raw_data <- admiral::convert_blanks_to_na(vitals_raw_data) + + +# derive oak_id_vars +vitals_raw_data <- vitals_raw_data %>% + generate_oak_id_vars( + pat_var = "PATNUM", + raw_src = "vitals_raw_data" + ) + +# Read in DM domain to derive study day +dm <- read.csv("./datasets/dm.csv") + +dm <- admiral::convert_blanks_to_na(dm) +') + +#' The template suffix for the vs code +#' +#' @noRd +#' @keywords internal +vs_template_suffix <- stringr::str_glue(' +# Combine all the topic variables into a single data frame. ---- +vs_combined <- dplyr::bind_rows( + vs_asmntdn, vs_sys_bp, vs_dia_bp, vs_pulse, vs_temp, + vs_resprt, vs_oxy_sat +) + +# Map qualifiers common to all topic variables ---- + +vs <- vs_combined %>% + # Map VSDTC using assign_ct algorithm + assign_datetime( + raw_dat = vitals_raw, + raw_var = c("VTLD", "VTLTM"), + tgt_var = "VSDTC", + raw_fmt = c(list(c("d-m-y", "dd-mmm-yyyy")), "H:M") + ) %>% + # Map VSTPT from TMPTC using assign_ct + assign_ct( + raw_dat = vitals_raw, + raw_var = "TMPTC", + tgt_var = "VSTPT", + ct_spec = study_ct, + ct_clst = "TPT", + id_vars = oak_id_vars() + ) %>% + # Map VSTPTNUM from TMPTC using assign_ct + assign_ct( + raw_dat = vitals_raw, + raw_var = "TMPTC", + tgt_var = "VSTPTNUM", + ct_spec = study_ct, + ct_clst = "TPTNUM", + id_vars = oak_id_vars() + ) %>% + # Map VISIT from VISIT_NAME using assign_ct + assign_ct( + raw_dat = vitals_raw, + raw_var = "VISIT_NAME", + tgt_var = "VISIT", + ct_spec = study_ct, + ct_clst = "VISIT", + id_vars = oak_id_vars() + ) %>% + # Map VISITNUM from VISIT_NAME using assign_ct + assign_ct( + raw_dat = vitals_raw, + raw_var = "VISIT_NAME", + tgt_var = "VISITNUM", + ct_spec = study_ct, + ct_clst = "VISITNUM", + id_vars = oak_id_vars() + ) %>% + dplyr::mutate( + STUDYID = "test_study", + DOMAIN = "VS", + VSCAT = "VITAL SIGNS", + USUBJID = paste0("test_study", "-", .data$patient_number) + ) %>% + derive_seq(tgt_var = "VSSEQ", + rec_vars= c("USUBJID", "VISITNUM", "VSTPTNUM", "VSTESTCD")) %>% + # A bug in derive_study_day V0.1 that clears the time values in VSDTC + derive_study_day( + sdtm_in = ., + dm_domain = dm, + tgdt = "VSDTC", + refdt = "RFXSTDTC", + study_day_var = "VSDY" + ) %>% + dplyr::select("STUDYID", "DOMAIN", "USUBJID", "VSSEQ", + "VSTESTCD", "VSTEST", "VSCAT", "VSPOS", + "VSORRES", "VSORRESU", "VSLOC", "VSLAT", + "VISIT", "VISITNUM", "VSDY", "VSTPT", "VSTPTNUM", "VSDTC" ) +') From e8bedf2a1e84b27a7034939659c6a6099f23f61c Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Sun, 8 Dec 2024 01:51:35 +0000 Subject: [PATCH 23/35] #107 change code list code --- R/generate_code.R | 3 ++- R/generate_code_tempates.R | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index b5e258c3..4c42ce07 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -13,7 +13,8 @@ # - Moved VSTESTCD before qualifired, e.g. VSSTAT, VSPOS so that topic is created first # - Changed mapping_is_dataset to TRUE for VSTPT, VSDTC # - Added a new column topic, showing to which topic the mapping belongs to - +# - Some code list codes were populated in target_sdtm_variable_controlled_terms_or_format +# I move them under target_sdtm_variable_codelist_code, e.g. VSPOS, VSLOC #' Generate the code for the mapping SDTM specification #' diff --git a/R/generate_code_tempates.R b/R/generate_code_tempates.R index 00961250..664166ae 100644 --- a/R/generate_code_tempates.R +++ b/R/generate_code_tempates.R @@ -111,14 +111,14 @@ vs_combined <- dplyr::bind_rows( vs <- vs_combined %>% # Map VSDTC using assign_ct algorithm assign_datetime( - raw_dat = vitals_raw, + raw_dat = vitals_raw_data, raw_var = c("VTLD", "VTLTM"), tgt_var = "VSDTC", raw_fmt = c(list(c("d-m-y", "dd-mmm-yyyy")), "H:M") ) %>% # Map VSTPT from TMPTC using assign_ct assign_ct( - raw_dat = vitals_raw, + raw_dat = vitals_raw_data, raw_var = "TMPTC", tgt_var = "VSTPT", ct_spec = study_ct, @@ -127,7 +127,7 @@ vs <- vs_combined %>% ) %>% # Map VSTPTNUM from TMPTC using assign_ct assign_ct( - raw_dat = vitals_raw, + raw_dat = vitals_raw_data, raw_var = "TMPTC", tgt_var = "VSTPTNUM", ct_spec = study_ct, @@ -136,7 +136,7 @@ vs <- vs_combined %>% ) %>% # Map VISIT from VISIT_NAME using assign_ct assign_ct( - raw_dat = vitals_raw, + raw_dat = vitals_raw_data, raw_var = "VISIT_NAME", tgt_var = "VISIT", ct_spec = study_ct, @@ -145,7 +145,7 @@ vs <- vs_combined %>% ) %>% # Map VISITNUM from VISIT_NAME using assign_ct assign_ct( - raw_dat = vitals_raw, + raw_dat = vitals_raw_data, raw_var = "VISIT_NAME", tgt_var = "VISITNUM", ct_spec = study_ct, From ef9dfc2f3d724bbaa5cf869144f8ff439330275e Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Sun, 8 Dec 2024 01:54:15 +0000 Subject: [PATCH 24/35] #107 drop empty testcd in the template --- R/generate_code_tempates.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/generate_code_tempates.R b/R/generate_code_tempates.R index 664166ae..4be634cb 100644 --- a/R/generate_code_tempates.R +++ b/R/generate_code_tempates.R @@ -104,7 +104,8 @@ vs_template_suffix <- stringr::str_glue(' vs_combined <- dplyr::bind_rows( vs_asmntdn, vs_sys_bp, vs_dia_bp, vs_pulse, vs_temp, vs_resprt, vs_oxy_sat -) +) %>% + dplyr::filter(!is.na(.data$VSTESTCD)) # Map qualifiers common to all topic variables ---- From ea2dfee4e8327f4abfeae1b16dba6e016e157b1f Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Mon, 9 Dec 2024 01:19:30 +0000 Subject: [PATCH 25/35] #107 styler --- R/generate_code.R | 3 +-- R/generate_code_tempates.R | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index 4c42ce07..dedd1597 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -81,9 +81,8 @@ generate_code <- function(spec, domain, out_dir = ".") { topics <- unique(spec_domain$topic) code_by_topics <- purrr::map(topics, \(topic) { - spec_domain_topic <- spec_domain |> - dplyr::filter(topic %in% {{topic}}) + dplyr::filter(topic %in% {{ topic }}) domain_topic <- paste(domain, topic, sep = "_") |> tolower() diff --git a/R/generate_code_tempates.R b/R/generate_code_tempates.R index 4be634cb..eedfca81 100644 --- a/R/generate_code_tempates.R +++ b/R/generate_code_tempates.R @@ -1,4 +1,3 @@ - #' The template suffix for the cm code #' #' @noRd From 2bfb2b3f4e2cecfd709afead54f94de7cb773ccc Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Mon, 9 Dec 2024 02:04:28 +0000 Subject: [PATCH 26/35] #107 refactor, add test for multiple topics --- R/generate_code.R | 274 +++++++++++++++------------- man/cm_template_prefix.Rd | 16 -- man/expected_columns.Rd | 16 -- man/generate_code.Rd | 8 +- man/generate_one_topic_code.Rd | 22 +++ man/generate_one_var_code.Rd | 4 +- tests/testthat/test-generate_code.R | 47 ++++- 7 files changed, 225 insertions(+), 162 deletions(-) delete mode 100644 man/cm_template_prefix.Rd delete mode 100644 man/expected_columns.Rd create mode 100644 man/generate_one_topic_code.Rd diff --git a/R/generate_code.R b/R/generate_code.R index dedd1597..31ab81e9 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -14,7 +14,7 @@ # - Changed mapping_is_dataset to TRUE for VSTPT, VSDTC # - Added a new column topic, showing to which topic the mapping belongs to # - Some code list codes were populated in target_sdtm_variable_controlled_terms_or_format -# I move them under target_sdtm_variable_codelist_code, e.g. VSPOS, VSLOC +# I moved them under target_sdtm_variable_codelist_code, e.g. VSPOS, VSLOC #' Generate the code for the mapping SDTM specification #' @@ -73,33 +73,19 @@ #' } #' generate_code <- function(spec, domain, out_dir = ".") { - admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns)) + admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns())) admiraldev::assert_character_scalar(domain) spec_domain <- get_domain_spec(spec, domain) topics <- unique(spec_domain$topic) - code_by_topics <- purrr::map(topics, \(topic) { - spec_domain_topic <- spec_domain |> - dplyr::filter(topic %in% {{ topic }}) - - domain_topic <- paste(domain, topic, sep = "_") |> - tolower() - - map_topic <- paste0("\n\n# Map topic ", domain_topic, " ----\n") - - # Generate the code for each variable row in spec_domain - spec_domain_topic |> - dplyr::rowwise() |> - dplyr::mutate( - algorithm_code = list(generate_one_var_code(dplyr::pick(dplyr::everything()))), - .keep = "none" - ) |> - unlist() |> - append(paste0(domain_topic, " <-"), after = 0L) |> - append(map_topic, after = 0L) - }) + code_by_topics <- purrr::map( + topics, + generate_one_topic_code, + domain = domain, + spec = spec_domain + ) one_topic <- identical(length(code_by_topics), 1L) @@ -123,50 +109,61 @@ generate_code <- function(spec, domain, out_dir = ".") { writeLines(styled_code, file.path(out_dir, file_name)) } -#' Check if a variable is character +#' Generate the code for one topic #' -#' @param var_in The variable to check. +#' @param topic The topic to generate the code for. +#' @param domain The SDTM domain. +#' @param spec The specification data frame. #' -#' @return Logical indicating if the variable is character. -#' @export +#' @return The code for the topic as a string. +#' @keywords internal #' -is_numeric <- function(var_in) { - grepl(r"{^-?\d*(\.\d+)?(e[+-]?\d+)?$}", var_in) -} +generate_one_topic_code <- function(topic, domain, spec) { + admiraldev::assert_character_scalar(topic) + admiraldev::assert_character_scalar(domain) + admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns_min())) -#' Check if a variable is character -#' -#' @param var_in The variable to check. -#' -#' @return Logical indicating if the variable is character. -#' @export -#' -is_character <- function(var_in) { - grepl("[^0-9eE.-]", var_in) -} + spec_topic <- spec |> + dplyr::filter(topic %in% {{ topic }}) + domain_topic <- paste(domain, topic, sep = "_") |> + tolower() + + map_topic <- paste0("\n\n# Map topic ", domain_topic, " ----\n") + + # Generate the code for each variable row in spec + spec_topic |> + dplyr::rowwise() |> + dplyr::mutate( + algorithm_code = list(generate_one_var_code(dplyr::pick(dplyr::everything()))), + .keep = "none" + ) |> + unlist() |> + append(paste0(domain_topic, " <-"), after = 0L) |> + append(map_topic, after = 0L) +} #' Generate the code for one variable #' -#' @param spec_var The specification for one variable. +#' @param spec The specification data frame. #' @param last_var Logical indicating if this is the last variable in the domain. #' #' @return The code for the variable as a string. #' @keywords internal #' -generate_one_var_code <- function(spec_var) { - admiraldev::assert_data_frame(spec_var) - assertthat::assert_that(identical(nrow(spec_var), 1L)) +generate_one_var_code <- function(spec) { + admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns_min())) + assertthat::assert_that(identical(nrow(spec), 1L)) args <- list( - raw_dat = rlang::parse_expr(spec_var$raw_dataset), - raw_var = spec_var$raw_variable, - tgt_var = spec_var$target_sdtm_variable, - tgt_val = spec_var$target_value, + raw_dat = rlang::parse_expr(spec$raw_dataset), + raw_var = spec$raw_variable, + tgt_var = spec$target_sdtm_variable, + tgt_val = spec$target_value, ct_spec = rlang::parse_expr("study_ct"), - ct_clst = spec_var$target_sdtm_variable_codelist_code, - raw_fmt = spec_var$raw_fmt, - raw_unk = parse_into_c_call(spec_var$raw_unk) + ct_clst = spec$target_sdtm_variable_codelist_code, + raw_fmt = spec$raw_fmt, + raw_unk = parse_into_c_call(spec$raw_unk) ) # If the ct_clst is missing, then we must remove ct_spec @@ -179,7 +176,7 @@ generate_one_var_code <- function(spec_var) { # Generate the function call generated_call <- rlang::call2( - spec_var$mapping_algorithm, + spec$mapping_algorithm, !!!args ) @@ -219,7 +216,6 @@ parse_into_c_call <- function(str_in) { rlang::call2("c", !!!str_out) } - #' Add a pipe operator to the last element of a character vector #' #' @param code_block A character vector. @@ -265,30 +261,15 @@ remove_last_pipe <- function(code_blocks) { #' @keywords internal #' get_domain_spec <- function(spec, domain) { - expected_columns <- c( - "raw_dataset", - "raw_variable", - "target_sdtm_variable", - "topic", - "mapping_algorithm", - "entity_sub_algorithm", - "condition_add_raw_dat", - "target_sdtm_variable_codelist_code", - "raw_data_format", - "raw_fmt", - "raw_unk", - "target_term_value", - "target_value" - ) - admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns)) + admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns_min())) admiraldev::assert_character_scalar(domain) # For now assuming that there is only one topic and the topic is the first one spec |> dplyr::filter(tolower(target_sdtm_domain) %in% tolower(domain)) |> - dplyr::select(dplyr::all_of(expected_columns)) |> + dplyr::select(dplyr::all_of(expected_columns_min())) |> # For now swapping entity_sub_algorithm with mapping_algorithm since the # algorithms like assign_no_ct are the mapping_algorithm and they are populated # in the entity_sub_algorithm @@ -316,6 +297,27 @@ get_domain_spec <- function(spec, domain) { ) } +#' Check if a variable is character +#' +#' @param var_in The variable to check. +#' +#' @return Logical indicating if the variable is character. +#' @export +#' +is_numeric <- function(var_in) { + grepl(r"{^-?\d*(\.\d+)?(e[+-]?\d+)?$}", var_in) +} + +#' Check if a variable is character +#' +#' @param var_in The variable to check. +#' +#' @return Logical indicating if the variable is character. +#' @export +#' +is_character <- function(var_in) { + grepl("[^0-9eE.-]", var_in) +} #' Read the specification file #' @@ -336,65 +338,89 @@ read_spec <- function(file) { spec <- utils::read.csv(file = file, na.strings = c("NA", ""), colClasses = "character") |> tibble::as_tibble() - admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns)) + admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns())) return(spec) } -#' Expected columns in the specification file +#' Expected columns in the specification for the domain #' #' @keywords internal +#' @noRd +expected_columns_min <- function() { + c( + "raw_dataset", + "raw_variable", + "target_sdtm_variable", + "topic", + "mapping_algorithm", + "entity_sub_algorithm", + "condition_add_raw_dat", + "target_sdtm_variable_codelist_code", + "raw_data_format", + "raw_fmt", + "raw_unk", + "target_term_value", + "target_value" + ) +} + +#' Expected columns in the specification file #' -expected_columns <- c( - "study_number", - "raw_dataset", - "raw_dataset_label", - "raw_variable", - "raw_variable_label", - "raw_variable_ordinal", - "raw_variable_type", - "raw_data_format", - "raw_fmt", - "raw_unk", - "study_specific", - "annotation_ordinal", - "mapping_is_dataset", - "annotation_text", - "target_sdtm_domain", - "target_sdtm_variable", - "target_sdtm_variable_role", - "target_sdtm_variable_codelist_code", - "target_sdtm_variable_controlled_terms_or_format", - "target_sdtm_variable_ordinal", - "origin", - "mapping_algorithm", - "entity_sub_algorithm", - "target_hardcoded_value", - "target_term_value", - "target_term_code", - "condition_ordinal", - "condition_group_ordinal", - "condition_add_raw_dat", - "condition_add_tgt_dat", - "condition_left_raw_dataset", - "condition_left_raw_variable", - "condition_left_sdtm_domain", - "condition_left_sdtm_variable", - "condition_operator", - "condition_right_text_value", - "condition_right_sdtm_domain", - "condition_right_sdtm_variable", - "condition_right_raw_dataset", - "condition_right_raw_variable", - "condition_next_logical_operator", - "merge_type", - "merge_left", - "merge_right", - "merge_condition", - "unduplicate_keys", - "groupby_keys", - "target_resource_raw_dataset", - "target_resource_raw_variable", - "target_value", - "topic" -) +#' @keywords internal +#' @noRd +expected_columns <- function() { + c( + "study_number", + "raw_dataset", + "raw_dataset_label", + "raw_variable", + "raw_variable_label", + "raw_variable_ordinal", + "raw_variable_type", + "raw_data_format", + "raw_fmt", + "raw_unk", + "study_specific", + "annotation_ordinal", + "mapping_is_dataset", + "annotation_text", + "target_sdtm_domain", + "target_sdtm_variable", + "target_sdtm_variable_role", + "target_sdtm_variable_codelist_code", + "target_sdtm_variable_controlled_terms_or_format", + "target_sdtm_variable_ordinal", + "origin", + "mapping_algorithm", + "entity_sub_algorithm", + "target_hardcoded_value", + "target_term_value", + "target_term_code", + "condition_ordinal", + "condition_group_ordinal", + "condition_add_raw_dat", + "condition_add_tgt_dat", + "condition_left_raw_dataset", + "condition_left_raw_variable", + "condition_left_sdtm_domain", + "condition_left_sdtm_variable", + "condition_operator", + "condition_right_text_value", + "condition_right_sdtm_domain", + "condition_right_sdtm_variable", + "condition_right_raw_dataset", + "condition_right_raw_variable", + "condition_next_logical_operator", + "merge_type", + "merge_left", + "merge_right", + "merge_condition", + "unduplicate_keys", + "groupby_keys", + "target_resource_raw_dataset", + "target_resource_raw_variable", + "target_value", + "topic" + ) +} diff --git a/man/cm_template_prefix.Rd b/man/cm_template_prefix.Rd deleted file mode 100644 index eb244d88..00000000 --- a/man/cm_template_prefix.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/generate_code.R -\docType{data} -\name{cm_template_prefix} -\alias{cm_template_prefix} -\title{The template suffix for the cm code} -\format{ -An object of class \code{glue} (inherits from \code{character}) of length 1. -} -\usage{ -cm_template_prefix -} -\description{ -The template suffix for the cm code -} -\keyword{internal} diff --git a/man/expected_columns.Rd b/man/expected_columns.Rd deleted file mode 100644 index 781739d2..00000000 --- a/man/expected_columns.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/generate_code.R -\docType{data} -\name{expected_columns} -\alias{expected_columns} -\title{Expected columns in the specification file} -\format{ -An object of class \code{character} of length 50. -} -\usage{ -expected_columns -} -\description{ -Expected columns in the specification file -} -\keyword{internal} diff --git a/man/generate_code.Rd b/man/generate_code.Rd index e264182a..0729539b 100644 --- a/man/generate_code.Rd +++ b/man/generate_code.Rd @@ -34,10 +34,14 @@ spec <- spec |> dplyr::filter( !is.na(target_sdtm_variable), !is.na(mapping_algorithm), - !target_sdtm_variable \%in\% c("DOMAIN"), !mapping_is_dataset \%in\% c("TRUE") ) +old_width <- options(width = 20) +generate_code(spec, domain) +# Restore original width +options(width = old_width$width) + # CM domain ---- spec <- read_spec("cm_sdtm_oak_spec_cdash.csv") @@ -46,7 +50,7 @@ spec <- spec |> dplyr::filter( !is.na(target_sdtm_variable), !is.na(mapping_algorithm), - !target_sdtm_variable \%in\% c("DOMAIN") + !mapping_is_dataset \%in\% c("TRUE") ) domain <- "cm" diff --git a/man/generate_one_topic_code.Rd b/man/generate_one_topic_code.Rd new file mode 100644 index 00000000..06e3eae7 --- /dev/null +++ b/man/generate_one_topic_code.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{generate_one_topic_code} +\alias{generate_one_topic_code} +\title{Generate the code for one topic} +\usage{ +generate_one_topic_code(topic, domain, spec) +} +\arguments{ +\item{topic}{The topic to generate the code for.} + +\item{domain}{The SDTM domain.} + +\item{spec}{The specification data frame.} +} +\value{ +The code for the topic as a string. +} +\description{ +Generate the code for one topic +} +\keyword{internal} diff --git a/man/generate_one_var_code.Rd b/man/generate_one_var_code.Rd index f23d6ace..ce6fcdca 100644 --- a/man/generate_one_var_code.Rd +++ b/man/generate_one_var_code.Rd @@ -4,10 +4,10 @@ \alias{generate_one_var_code} \title{Generate the code for one variable} \usage{ -generate_one_var_code(spec_var) +generate_one_var_code(spec) } \arguments{ -\item{spec_var}{The specification for one variable.} +\item{spec}{The specification data frame.} \item{last_var}{Logical indicating if this is the last variable in the domain.} } diff --git a/tests/testthat/test-generate_code.R b/tests/testthat/test-generate_code.R index 0d513cc1..2e1a608b 100644 --- a/tests/testthat/test-generate_code.R +++ b/tests/testthat/test-generate_code.R @@ -1,4 +1,4 @@ -test_that("generate_code works", { +test_that("generate_code works for one topic domain", { # nolint start spec <- tibble::tribble( ~study_number, ~raw_dataset, ~raw_dataset_label, ~raw_variable, ~raw_variable_label, ~raw_variable_ordinal, ~raw_variable_type, ~raw_data_format, ~raw_fmt, ~raw_unk, ~study_specific, ~annotation_ordinal, ~mapping_is_dataset, ~annotation_text, ~target_sdtm_domain, ~target_sdtm_variable, ~target_sdtm_variable_role, ~topic, ~target_sdtm_variable_codelist_code, ~target_sdtm_variable_controlled_terms_or_format, ~target_sdtm_variable_ordinal, ~origin, ~mapping_algorithm, ~entity_sub_algorithm, ~target_hardcoded_value, ~target_term_value, ~target_value, ~target_term_code, ~condition_ordinal, ~condition_group_ordinal, ~condition_add_raw_dat, ~condition_add_tgt_dat, ~condition_left_raw_dataset, ~condition_left_raw_variable, ~condition_left_sdtm_domain, ~condition_left_sdtm_variable, ~condition_operator, ~condition_right_text_value, ~condition_right_sdtm_domain, ~condition_right_sdtm_variable, ~condition_right_raw_dataset, ~condition_right_raw_variable, ~condition_next_logical_operator, ~merge_type, ~merge_left, ~merge_right, ~merge_condition, ~unduplicate_keys, ~groupby_keys, ~target_resource_raw_dataset, ~target_resource_raw_variable, @@ -11,7 +11,7 @@ test_that("generate_code works", { spec <- spec |> dplyr::mutate(dplyr::across( .cols = dplyr::everything(), - .fns = ~ dplyr::if_else(is.na(.x), NA_character_, .x) + .fns = \(x) dplyr::if_else(is.na(x), NA_character_, x) )) domain <- "cm" @@ -34,3 +34,46 @@ test_that("generate_code works", { expect_true(grepl("dplyr::select", observed, fixed = TRUE) |> any()) }) }) + +test_that("generate_code works for multiple topics domain", { + + # nolint start + spec <- tibble::tribble( + ~study_number, ~raw_dataset, ~raw_dataset_label, ~raw_variable, ~raw_variable_label, ~raw_variable_ordinal, ~raw_variable_type, ~raw_data_format, ~raw_fmt, ~raw_unk, ~study_specific, ~annotation_ordinal, ~mapping_is_dataset, ~annotation_text, ~target_sdtm_domain, ~target_sdtm_variable, ~target_sdtm_variable_role, ~topic, ~target_sdtm_variable_codelist_code, ~target_sdtm_variable_controlled_terms_or_format, ~target_sdtm_variable_ordinal, ~origin, ~mapping_algorithm, ~entity_sub_algorithm, ~target_hardcoded_value, ~target_term_value, ~target_value, ~target_term_code, ~condition_ordinal, ~condition_group_ordinal, ~condition_add_raw_dat, ~condition_add_tgt_dat, ~condition_left_raw_dataset, ~condition_left_raw_variable, ~condition_left_sdtm_domain, ~condition_left_sdtm_variable, ~condition_operator, ~condition_right_text_value, ~condition_right_sdtm_domain, ~condition_right_sdtm_variable, ~condition_right_raw_dataset, ~condition_right_raw_variable, ~condition_next_logical_operator, ~merge_type, ~merge_left, ~merge_right, ~merge_condition, ~unduplicate_keys, ~groupby_keys, ~target_resource_raw_dataset, ~target_resource_raw_variable, + "lp_study", "vitals_raw_data", "Vital Signs", "ASMNTDN", "Assessment not done", "1", "DropDownList", "1", NA, NA, "FALSE", "1", "FALSE", "If No then VS.VSSTAT = 'NOT DONE' when VS.VSTESTCD = 'VSALL'", "VS", "VSTESTCD", "Topic Variable", "ASMNTDN", "C66741", "(VSTESTCD)", "7", "Assigned", "condition_add", "hardcode_ct", NA, "VSALL", "VSALL", "V00224", "1.3", "0", "condition_add(vitals_raw_data, ASMNTDN == \"Yes\")", NA, "vitals_raw_data", "ASMNTDN", NA, NA, "equal_to", "Yes", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "vitals_raw_data", "Vital Signs", "ASMNTDN", "Assessment not done", "1", "DropDownList", "1", NA, NA, "FALSE", "2", "FALSE", "If No then VS.VSTEST = 'Vital Signs'", "VS", "VSTEST", "Synonym Qualifier", "ASMNTDN", "C67153", "(VSTEST)", "8", "Assigned", "condition_add", "hardcode_ct", NA, "Vital Signs", "Vital Signs", "V00224", "2.1", "0", "condition_add(vitals_raw_data, ASMNTDN == \"Yes\")", NA, "vitals_raw_data", "ASMNTDN", NA, NA, "equal_to", "Yes", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "vitals_raw_data", "Vital Signs", "ASMNTDN", "Assessment not done", "1", "DropDownList", "1", NA, NA, "FALSE", "1", "FALSE", "If No then VS.VSSTAT = 'NOT DONE' when VS.VSTESTCD = 'VSALL'", "VS", "VSSTAT", "Record Qualifier", "ASMNTDN", "C66789", "(ND)", "17", "Assigned", "condition_add", "hardcode_ct", NA, "NOT DONE", "NOT DONE", "C49484", "1.2", "0", "condition_add(vitals_raw_data, ASMNTDN == \"Yes\")", NA, "vitals_raw_data", "ASMNTDN", NA, NA, "equal_to", "Yes", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "vitals_raw_data", "Vital Signs", "SYS_BP", "Systolic blood pressure", "6", "Text", "3", NA, NA, "FALSE", "1", "FALSE", "VS.VSORRES when VS.VSTESTCD = 'SYSBP'", "VS", "VSTESTCD", "Topic Variable", "SYS_BP", "C66741", "(VSTESTCD)", "7", "Assigned", "hardcode_ct", NA, NA, "SYSBP", "SYSBP", "C25298", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "vitals_raw_data", "Vital Signs", "SYS_BP", "Systolic blood pressure", "6", "Text", "3", NA, NA, "FALSE", "2", "FALSE", "VS.VSTEST = 'Systolic Blood Pressure'", "VS", "VSTEST", "Synonym Qualifier", "SYS_BP", "C67153", "(VSTEST)", "8", "Assigned", "hardcode_ct", NA, NA, "Systolic Blood Pressure", "Systolic Blood Pressure", "C25298", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "vitals_raw_data", "Vital Signs", "SYS_BP", "Systolic blood pressure", "6", "Text", "3", NA, NA, "FALSE", "1", "FALSE", "VS.VSORRES when VS.VSTESTCD = 'SYSBP'", "VS", "VSORRES", "Result Qualifier", "SYS_BP", NA, NA, "12", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "vitals_raw_data", "Vital Signs", "SYS_BP", "Systolic blood pressure", "6", "Text", "3", NA, NA, "FALSE", "3", "FALSE", "VS.VSORRESU = ", "VS", "VSORRESU", "Variable Qualifier", "SYS_BP", "C66770", "(VSRESU)", "13", "Assigned", "hardcode_ct", NA, NA, "mmHg", "mmHg", "C49670", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA + ) + # nolint end + + # Convert all NA to NA_character_ + spec <- spec |> + dplyr::mutate(dplyr::across( + .cols = dplyr::everything(), + .fns = \(x) dplyr::if_else(is.na(x), NA_character_, x) + )) + + domain <- "vs" + + temp_dir <- tempdir() + out_dir <- file.path(temp_dir, "data/generate_code") + unlink(out_dir, recursive = TRUE, force = TRUE) + dir.create(out_dir, showWarnings = FALSE, recursive = TRUE) + + withr::with_options(list(width = 20L), { + generate_code(spec, domain, out_dir) + observed <- readLines(file.path(out_dir, paste0(domain, "_sdtm_oak_code.R"))) + + expect_gt(length(observed), 100L) + # From prefix + expect_true(grepl("generate_oak_id_vars", observed, fixed = TRUE) |> any()) + # From generator + expect_true(grepl("assign_no_ct", observed, fixed = TRUE) |> any()) + # From suffix + expect_true(grepl("dplyr::select", observed, fixed = TRUE) |> any()) + }) +}) From c465b228de2801e8450f143dde3a4402869c12da Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Mon, 9 Dec 2024 02:12:18 +0000 Subject: [PATCH 27/35] #107 refactor --- R/generate_code.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/generate_code.R b/R/generate_code.R index 31ab81e9..70adf214 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -130,6 +130,7 @@ generate_one_topic_code <- function(topic, domain, spec) { tolower() map_topic <- paste0("\n\n# Map topic ", domain_topic, " ----\n") + assign_to_domain_topic <- paste0(domain_topic, " <-") # Generate the code for each variable row in spec spec_topic |> @@ -139,7 +140,7 @@ generate_one_topic_code <- function(topic, domain, spec) { .keep = "none" ) |> unlist() |> - append(paste0(domain_topic, " <-"), after = 0L) |> + append(assign_to_domain_topic, after = 0L) |> append(map_topic, after = 0L) } From c444060cb1270dcd1414e693554f8933ebd6010a Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Mon, 9 Dec 2024 02:13:48 +0000 Subject: [PATCH 28/35] #107 todo --- R/generate_code.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/generate_code.R b/R/generate_code.R index 70adf214..442bccd0 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -89,6 +89,9 @@ generate_code <- function(spec, domain, out_dir = ".") { one_topic <- identical(length(code_by_topics), 1L) + # TODO + # - refactor into function + # - dynamically select the templates based on domain styled_code <- if (one_topic) { code_by_topics |> unlist() |> From 9532f87455414eca2dac176e99a83f4a89823f99 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Wed, 11 Dec 2024 02:57:50 +0000 Subject: [PATCH 29/35] #107 refactor --- R/generate_code.R | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index 442bccd0..b9e151de 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -80,36 +80,42 @@ generate_code <- function(spec, domain, out_dir = ".") { topics <- unique(spec_domain$topic) - code_by_topics <- purrr::map( + styled_code <- purrr::map( topics, generate_one_topic_code, domain = domain, spec = spec_domain - ) + ) |> + style_the_code() + + file_name <- paste0(domain, "_sdtm_oak_code.R") + writeLines(styled_code, file.path(out_dir, file_name)) +} + +style_the_code <- function(code_by_topics) { + + admiraldev::assert_list_of(code_by_topics, "character") one_topic <- identical(length(code_by_topics), 1L) # TODO - # - refactor into function # - dynamically select the templates based on domain - styled_code <- if (one_topic) { - code_by_topics |> + if (one_topic) { + styled_code <- code_by_topics |> unlist() |> append(cm_template_prefix, after = 0L) |> append(cm_template_suffix) |> styler::style_text() - } else { - code_by_topics |> - purrr::map(remove_last_pipe) |> - unlist() |> - append(vs_template_prefix, after = 0L) |> - append(vs_template_suffix) |> - styler::style_text() + + return(styled_code) } - # Save the code to a file - file_name <- paste0(domain, "_sdtm_oak_code.R") - writeLines(styled_code, file.path(out_dir, file_name)) + code_by_topics |> + purrr::map(remove_last_pipe) |> + unlist() |> + append(vs_template_prefix, after = 0L) |> + append(vs_template_suffix) |> + styler::style_text() } #' Generate the code for one topic From aed2012e944a8634049211bd707d9d2a32697198 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Wed, 11 Dec 2024 03:32:46 +0000 Subject: [PATCH 30/35] #107 refactor, dynamic templates --- R/generate_code.R | 42 ++++++++++++++++++++++++-------------- R/generate_code_tempates.R | 17 +++++++++++---- man/style_the_code.Rd | 21 +++++++++++++++++++ 3 files changed, 61 insertions(+), 19 deletions(-) create mode 100644 man/style_the_code.Rd diff --git a/R/generate_code.R b/R/generate_code.R index b9e151de..cc731b40 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -86,35 +86,47 @@ generate_code <- function(spec, domain, out_dir = ".") { domain = domain, spec = spec_domain ) |> - style_the_code() + style_the_code(domain) file_name <- paste0(domain, "_sdtm_oak_code.R") writeLines(styled_code, file.path(out_dir, file_name)) } -style_the_code <- function(code_by_topics) { +#' Style the code +#' +#' This function styles the code using the styler package and adds the necessary +#' templates to the code (e.g. cm_template_prefix, cm_template_suffix). +#' +#' @param code_by_topics A list of character vectors. +#' @inheritParams generate_code +#' +#' @return The styled code as a string. +#' @keywords internal +#' +style_the_code <- function(code_by_topics, domain) { admiraldev::assert_list_of(code_by_topics, "character") - one_topic <- identical(length(code_by_topics), 1L) + prefix_f <- paste0(domain, "_template_prefix") + suffix_f <- paste0(domain, "_template_suffix") + + assertthat::assert_that(exists(prefix_f), msg = paste0("The function ", prefix_f, " does not exist.")) + assertthat::assert_that(exists(suffix_f), msg = paste0("The function ", suffix_f, " does not exist.")) + + prefix <- do.call(prefix_f, list()) + suffix <- do.call(suffix_f, list()) - # TODO - # - dynamically select the templates based on domain - if (one_topic) { - styled_code <- code_by_topics |> - unlist() |> - append(cm_template_prefix, after = 0L) |> - append(cm_template_suffix) |> - styler::style_text() + multiple_topics <- !identical(length(code_by_topics), 1L) - return(styled_code) + if (multiple_topics) { + code_by_topics <- code_by_topics |> + purrr::map(remove_last_pipe) } code_by_topics |> - purrr::map(remove_last_pipe) |> unlist() |> - append(vs_template_prefix, after = 0L) |> - append(vs_template_suffix) |> + append(prefix, after = 0L) |> + append(suffix) |> styler::style_text() } diff --git a/R/generate_code_tempates.R b/R/generate_code_tempates.R index eedfca81..bcfc6267 100644 --- a/R/generate_code_tempates.R +++ b/R/generate_code_tempates.R @@ -2,7 +2,8 @@ #' #' @noRd #' @keywords internal -cm_template_prefix <- stringr::str_glue(' +cm_template_prefix <- function() { + stringr::str_glue(' library(sdtm.oak) library(dplyr) @@ -27,12 +28,15 @@ dm <- read.csv("./datasets/dm.csv") dm <- admiral::convert_blanks_to_na(dm) ') +} + #' The template suffix for the cm code #' #' @noRd #' @keywords internal -cm_template_suffix <- stringr::str_glue(' +cm_template_suffix <- function() { + stringr::str_glue(' dplyr::mutate( STUDYID = "test_study", DOMAIN = "CM", @@ -59,12 +63,14 @@ dplyr::select("STUDYID", "DOMAIN", "USUBJID", "CMSEQ", "CMTRT", "CMCAT", "CMINDC "CMDOSE", "CMDOSTXT", "CMDOSU", "CMDOSFRM", "CMDOSFRQ", "CMROUTE", "CMSTDTC", "CMENDTC","CMSTDY", "CMENDY", "CMENRTPT", "CMENTPT") ') +} #' The template suffix for the vs code #' #' @noRd #' @keywords internal -vs_template_prefix <- stringr::str_glue(' +vs_template_prefix <- function() { + stringr::str_glue(' library(sdtm.oak) library(dplyr) @@ -93,12 +99,14 @@ dm <- read.csv("./datasets/dm.csv") dm <- admiral::convert_blanks_to_na(dm) ') +} #' The template suffix for the vs code #' #' @noRd #' @keywords internal -vs_template_suffix <- stringr::str_glue(' +vs_template_suffix <- function() { + stringr::str_glue(' # Combine all the topic variables into a single data frame. ---- vs_combined <- dplyr::bind_rows( vs_asmntdn, vs_sys_bp, vs_dia_bp, vs_pulse, vs_temp, @@ -173,3 +181,4 @@ vs <- vs_combined %>% "VSORRES", "VSORRESU", "VSLOC", "VSLAT", "VISIT", "VISITNUM", "VSDY", "VSTPT", "VSTPTNUM", "VSDTC" ) ') +} diff --git a/man/style_the_code.Rd b/man/style_the_code.Rd new file mode 100644 index 00000000..0e62604f --- /dev/null +++ b/man/style_the_code.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_code.R +\name{style_the_code} +\alias{style_the_code} +\title{Style the code} +\usage{ +style_the_code(code_by_topics, domain) +} +\arguments{ +\item{code_by_topics}{A list of character vectors.} + +\item{domain}{The SDTM domain to generate the code for.} +} +\value{ +The styled code as a string. +} +\description{ +This function styles the code using the styler package and adds the necessary +templates to the code (e.g. cm_template_prefix, cm_template_suffix). +} +\keyword{internal} From 54a1dc8fa92cb1bba209baa8c5f47d4ee7305e75 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Wed, 11 Dec 2024 03:37:35 +0000 Subject: [PATCH 31/35] #107 namespace in the templates --- R/generate_code_tempates.R | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/R/generate_code_tempates.R b/R/generate_code_tempates.R index bcfc6267..17188912 100644 --- a/R/generate_code_tempates.R +++ b/R/generate_code_tempates.R @@ -18,7 +18,7 @@ cm_raw_data <- admiral::convert_blanks_to_na(cm_raw_data) # derive oak_id_vars cm_raw_data <- cm_raw_data %>% - generate_oak_id_vars( + sdtm.oak::generate_oak_id_vars( pat_var = "PATNUM", raw_src = "cm_raw_data" ) @@ -43,16 +43,16 @@ dplyr::mutate( CMCAT = "GENERAL CONMED", USUBJID = paste0("test_study", "-", cm_raw_data$PATNUM) ) %>% -derive_seq(tgt_var = "CMSEQ", +sdtm.oak::derive_seq(tgt_var = "CMSEQ", rec_vars= c("USUBJID", "CMTRT")) %>% -derive_study_day( +sdtm.oak::derive_study_day( sdtm_in = ., dm_domain = dm, tgdt = "CMENDTC", refdt = "RFXSTDTC", study_day_var = "CMENDY" ) %>% -derive_study_day( +sdtm.oak::derive_study_day( sdtm_in = ., dm_domain = dm, tgdt = "CMSTDTC", @@ -89,7 +89,7 @@ vitals_raw_data <- admiral::convert_blanks_to_na(vitals_raw_data) # derive oak_id_vars vitals_raw_data <- vitals_raw_data %>% - generate_oak_id_vars( + sdtm.oak::generate_oak_id_vars( pat_var = "PATNUM", raw_src = "vitals_raw_data" ) @@ -117,15 +117,15 @@ vs_combined <- dplyr::bind_rows( # Map qualifiers common to all topic variables ---- vs <- vs_combined %>% - # Map VSDTC using assign_ct algorithm - assign_datetime( + # Map VSDTC using sdtm.oak::assign_ct algorithm + sdtm.oak::assign_datetime( raw_dat = vitals_raw_data, raw_var = c("VTLD", "VTLTM"), tgt_var = "VSDTC", raw_fmt = c(list(c("d-m-y", "dd-mmm-yyyy")), "H:M") ) %>% - # Map VSTPT from TMPTC using assign_ct - assign_ct( + # Map VSTPT from TMPTC using sdtm.oak::assign_ct + sdtm.oak::assign_ct( raw_dat = vitals_raw_data, raw_var = "TMPTC", tgt_var = "VSTPT", @@ -133,8 +133,8 @@ vs <- vs_combined %>% ct_clst = "TPT", id_vars = oak_id_vars() ) %>% - # Map VSTPTNUM from TMPTC using assign_ct - assign_ct( + # Map VSTPTNUM from TMPTC using sdtm.oak::assign_ct + sdtm.oak::assign_ct( raw_dat = vitals_raw_data, raw_var = "TMPTC", tgt_var = "VSTPTNUM", @@ -142,8 +142,8 @@ vs <- vs_combined %>% ct_clst = "TPTNUM", id_vars = oak_id_vars() ) %>% - # Map VISIT from VISIT_NAME using assign_ct - assign_ct( + # Map VISIT from VISIT_NAME using sdtm.oak::assign_ct + sdtm.oak::assign_ct( raw_dat = vitals_raw_data, raw_var = "VISIT_NAME", tgt_var = "VISIT", @@ -151,8 +151,8 @@ vs <- vs_combined %>% ct_clst = "VISIT", id_vars = oak_id_vars() ) %>% - # Map VISITNUM from VISIT_NAME using assign_ct - assign_ct( + # Map VISITNUM from VISIT_NAME using sdtm.oak::assign_ct + sdtm.oak::assign_ct( raw_dat = vitals_raw_data, raw_var = "VISIT_NAME", tgt_var = "VISITNUM", @@ -166,10 +166,10 @@ vs <- vs_combined %>% VSCAT = "VITAL SIGNS", USUBJID = paste0("test_study", "-", .data$patient_number) ) %>% - derive_seq(tgt_var = "VSSEQ", + sdtm.oak::derive_seq(tgt_var = "VSSEQ", rec_vars= c("USUBJID", "VISITNUM", "VSTPTNUM", "VSTESTCD")) %>% - # A bug in derive_study_day V0.1 that clears the time values in VSDTC - derive_study_day( + # A bug in sdtm.oak::derive_study_day V0.1 that clears the time values in VSDTC + sdtm.oak::derive_study_day( sdtm_in = ., dm_domain = dm, tgdt = "VSDTC", From 59c00724107da8545c295cc240675ddd3dd93c05 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Wed, 11 Dec 2024 04:12:35 +0000 Subject: [PATCH 32/35] #107 name-space dynamic code --- R/generate_code.R | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index cc731b40..8325c2a2 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -177,8 +177,20 @@ generate_one_var_code <- function(spec) { admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns_min())) assertthat::assert_that(identical(nrow(spec), 1L)) + raw_dat_value <- rlang::parse_expr(spec$raw_dataset) + is_cond_add <- stringr::str_starts(spec$raw_dataset, stringr::fixed("condition_add")) + + # We want name-spaced functions to be used in the code, e.g. sdtm.oak::condition_add + if (is_cond_add) { + raw_dat_value <- paste0("sdtm.oak::", spec$raw_dataset) |> rlang::parse_expr() + } + + # We want name-spaced functions to be used in the code, e.g. sdtm.oak::hardcode_ct + function_name <- paste0("sdtm.oak::", spec$mapping_algorithm) |> + rlang::parse_expr() + args <- list( - raw_dat = rlang::parse_expr(spec$raw_dataset), + raw_dat = raw_dat_value, raw_var = spec$raw_variable, tgt_var = spec$target_sdtm_variable, tgt_val = spec$target_value, @@ -196,9 +208,10 @@ generate_one_var_code <- function(spec) { # Remove the arguments that are missing args <- purrr::discard(args, \(x) is.vector(x) && anyNA(x)) + # Generate the function call generated_call <- rlang::call2( - spec$mapping_algorithm, + function_name, !!!args ) From 4ba5d3f734092791e0f7ff7c2b0f82ab5943e074 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Wed, 11 Dec 2024 04:14:34 +0000 Subject: [PATCH 33/35] #107 style code --- R/generate_code.R | 2 -- tests/testthat/test-generate_code.R | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index 8325c2a2..0e66367c 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -104,7 +104,6 @@ generate_code <- function(spec, domain, out_dir = ".") { #' @keywords internal #' style_the_code <- function(code_by_topics, domain) { - admiraldev::assert_list_of(code_by_topics, "character") prefix_f <- paste0(domain, "_template_prefix") @@ -296,7 +295,6 @@ remove_last_pipe <- function(code_blocks) { #' @keywords internal #' get_domain_spec <- function(spec, domain) { - admiraldev::assert_data_frame(spec, required_vars = rlang::syms(expected_columns_min())) admiraldev::assert_character_scalar(domain) diff --git a/tests/testthat/test-generate_code.R b/tests/testthat/test-generate_code.R index 2e1a608b..cc9ef66c 100644 --- a/tests/testthat/test-generate_code.R +++ b/tests/testthat/test-generate_code.R @@ -36,7 +36,6 @@ test_that("generate_code works for one topic domain", { }) test_that("generate_code works for multiple topics domain", { - # nolint start spec <- tibble::tribble( ~study_number, ~raw_dataset, ~raw_dataset_label, ~raw_variable, ~raw_variable_label, ~raw_variable_ordinal, ~raw_variable_type, ~raw_data_format, ~raw_fmt, ~raw_unk, ~study_specific, ~annotation_ordinal, ~mapping_is_dataset, ~annotation_text, ~target_sdtm_domain, ~target_sdtm_variable, ~target_sdtm_variable_role, ~topic, ~target_sdtm_variable_codelist_code, ~target_sdtm_variable_controlled_terms_or_format, ~target_sdtm_variable_ordinal, ~origin, ~mapping_algorithm, ~entity_sub_algorithm, ~target_hardcoded_value, ~target_term_value, ~target_value, ~target_term_code, ~condition_ordinal, ~condition_group_ordinal, ~condition_add_raw_dat, ~condition_add_tgt_dat, ~condition_left_raw_dataset, ~condition_left_raw_variable, ~condition_left_sdtm_domain, ~condition_left_sdtm_variable, ~condition_operator, ~condition_right_text_value, ~condition_right_sdtm_domain, ~condition_right_sdtm_variable, ~condition_right_raw_dataset, ~condition_right_raw_variable, ~condition_next_logical_operator, ~merge_type, ~merge_left, ~merge_right, ~merge_condition, ~unduplicate_keys, ~groupby_keys, ~target_resource_raw_dataset, ~target_resource_raw_variable, @@ -47,7 +46,7 @@ test_that("generate_code works for multiple topics domain", { "lp_study", "vitals_raw_data", "Vital Signs", "SYS_BP", "Systolic blood pressure", "6", "Text", "3", NA, NA, "FALSE", "2", "FALSE", "VS.VSTEST = 'Systolic Blood Pressure'", "VS", "VSTEST", "Synonym Qualifier", "SYS_BP", "C67153", "(VSTEST)", "8", "Assigned", "hardcode_ct", NA, NA, "Systolic Blood Pressure", "Systolic Blood Pressure", "C25298", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "lp_study", "vitals_raw_data", "Vital Signs", "SYS_BP", "Systolic blood pressure", "6", "Text", "3", NA, NA, "FALSE", "1", "FALSE", "VS.VSORRES when VS.VSTESTCD = 'SYSBP'", "VS", "VSORRES", "Result Qualifier", "SYS_BP", NA, NA, "12", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "lp_study", "vitals_raw_data", "Vital Signs", "SYS_BP", "Systolic blood pressure", "6", "Text", "3", NA, NA, "FALSE", "3", "FALSE", "VS.VSORRESU = ", "VS", "VSORRESU", "Variable Qualifier", "SYS_BP", "C66770", "(VSRESU)", "13", "Assigned", "hardcode_ct", NA, NA, "mmHg", "mmHg", "C49670", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA - ) + ) # nolint end # Convert all NA to NA_character_ From d816f1c3361354603f32c84837eb8f1e5dd02c1a Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Wed, 11 Dec 2024 04:20:45 +0000 Subject: [PATCH 34/35] #107 styler --- tests/testthat/test-generate_code.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-generate_code.R b/tests/testthat/test-generate_code.R index cc9ef66c..4d4cd6c0 100644 --- a/tests/testthat/test-generate_code.R +++ b/tests/testthat/test-generate_code.R @@ -2,8 +2,8 @@ test_that("generate_code works for one topic domain", { # nolint start spec <- tibble::tribble( ~study_number, ~raw_dataset, ~raw_dataset_label, ~raw_variable, ~raw_variable_label, ~raw_variable_ordinal, ~raw_variable_type, ~raw_data_format, ~raw_fmt, ~raw_unk, ~study_specific, ~annotation_ordinal, ~mapping_is_dataset, ~annotation_text, ~target_sdtm_domain, ~target_sdtm_variable, ~target_sdtm_variable_role, ~topic, ~target_sdtm_variable_codelist_code, ~target_sdtm_variable_controlled_terms_or_format, ~target_sdtm_variable_ordinal, ~origin, ~mapping_algorithm, ~entity_sub_algorithm, ~target_hardcoded_value, ~target_term_value, ~target_value, ~target_term_code, ~condition_ordinal, ~condition_group_ordinal, ~condition_add_raw_dat, ~condition_add_tgt_dat, ~condition_left_raw_dataset, ~condition_left_raw_variable, ~condition_left_sdtm_domain, ~condition_left_sdtm_variable, ~condition_operator, ~condition_right_text_value, ~condition_right_sdtm_domain, ~condition_right_sdtm_variable, ~condition_right_raw_dataset, ~condition_right_raw_variable, ~condition_next_logical_operator, ~merge_type, ~merge_left, ~merge_right, ~merge_condition, ~unduplicate_keys, ~groupby_keys, ~target_resource_raw_dataset, ~target_resource_raw_variable, - "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMTRT", "var label", "3", "Text", "$200", NA, NA, "FALSE", "1", "FALSE", "CM.CMTRT", "CM", "CMTRT", "Topic Variable", "CMTRT", NA, NA, "10", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, - "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMINDC", "var label", "4", "Text", "$100", NA, NA, "FALSE", "1", "FALSE", "CM.CMINDC", "CM", "CMINDC", "Record Qualifier", "CMTRT", NA, NA, "19", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA + "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMTRT", "var label", "3", "Text", "$200", NA, NA, "FALSE", "1", "FALSE", "CM.CMTRT", "CM", "CMTRT", "Topic Variable", "CMTRT", NA, NA, "10", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + "lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMINDC", "var label", "4", "Text", "$100", NA, NA, "FALSE", "1", "FALSE", "CM.CMINDC", "CM", "CMINDC", "Record Qualifier", "CMTRT", NA, NA, "19", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA ) # nolint end From d1f6db6f5a7dc7ed58edca94202ef8f922121991 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Wed, 11 Dec 2024 16:37:02 +0000 Subject: [PATCH 35/35] #107 comment alignment --- R/generate_code.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/generate_code.R b/R/generate_code.R index 0e66367c..e86c50a1 100644 --- a/R/generate_code.R +++ b/R/generate_code.R @@ -10,11 +10,11 @@ # - I did not generate id_vars since the default values was enough, certainly we # can add it later when the customized id_vars are needed. # - Some extra manipulations done in get_domain_spec() -# - Moved VSTESTCD before qualifired, e.g. VSSTAT, VSPOS so that topic is created first +# - Moved VSTESTCD before qualifiers, e.g. VSSTAT, VSPOS so that topic is created first # - Changed mapping_is_dataset to TRUE for VSTPT, VSDTC # - Added a new column topic, showing to which topic the mapping belongs to # - Some code list codes were populated in target_sdtm_variable_controlled_terms_or_format -# I moved them under target_sdtm_variable_codelist_code, e.g. VSPOS, VSLOC +# I moved them under target_sdtm_variable_codelist_code, e.g. VSPOS, VSLOC #' Generate the code for the mapping SDTM specification #'