Skip to content

Commit

Permalink
#107 poc of two vars
Browse files Browse the repository at this point in the history
  • Loading branch information
edgar-manukyan committed Nov 25, 2024
1 parent 0b6ade6 commit 1294c41
Show file tree
Hide file tree
Showing 8 changed files with 292 additions and 1 deletion.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ Imports:
stringr (>= 1.4.0),
assertthat,
pillar,
cli
cli,
styler
Suggests:
knitr,
htmltools,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export(derive_seq)
export(derive_study_day)
export(domain_example)
export(fmt_cmp)
export(generate_code)
export(generate_oak_id_vars)
export(hardcode_ct)
export(hardcode_no_ct)
Expand All @@ -25,6 +26,7 @@ export(problems)
export(read_ct_spec)
export(read_ct_spec_example)
export(read_domain_example)
export(read_spec)
export(sbj_vars)
importFrom(dplyr,mutate)
importFrom(pillar,ctl_new_rowid_pillar)
Expand Down
171 changes: 171 additions & 0 deletions R/generate_code.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
#' Generate the code for the mapping SDTM specification
#'
#' @param spec The specification data frame.
#' @param domain The SDTM domain to generate the code for.
#' @param out_dir The directory to save the code file. Default is the current
#' directory.
#'
#' @return Side effect: the code is generated and saved to a file.
#' @export
#'
#' @examples
#'
#' \dontrun{
#' spec <- read_spec("cm_sdtm_oak_spec_cdash.csv")
#' domain <- "cm"
#' generate_code(spec, domain)
#' }
#'
generate_code <- function(spec, domain, out_dir = ".") {

admiraldev::assert_data_frame(spec, required_vars = rlang::parse_exprs(expected_columns))
admiraldev::assert_character_scalar(domain)

# For now assuming that there is only one topic and the topic is the first one

spec_domain <- spec |>
dplyr::filter(tolower(target_sdtm_domain) %in% tolower(domain)) |>
# TODO
# Doing only few variables
dplyr::filter(target_sdtm_variable %in% c("CMTRT", "CMINDC")) |>

dplyr::select(
raw_dataset,
raw_variable,
target_sdtm_variable,
mapping_algorithm
)

n_rows <- nrow(spec_domain)

# Generate the code for each variable row in spec_domain
code_blocks <- purrr::map(
seq_len(n_rows),
\(row) generate_one_var_code(
spec_domain[row, ],
last_var = identical(row, n_rows)
)
) |>
unlist()

styled_code <- styler::style_text(code_blocks)

# Save the code to a file
file_name <- paste0(domain, "_sdtm_oak_code.R")
writeLines(styled_code, file.path(out_dir, file_name))
}

#' Generate the code for one variable
#'
#' @param spec_var The specification for one variable.
#' @param last_var Logical indicating if this is the last variable in the domain.
#'
#' @return The code for the variable as a string.
#' @keywords internal
#'
generate_one_var_code <- function(spec_var, last_var = FALSE) {

admiraldev::assert_data_frame(spec_var)

# Generate the function call
generated_call <- with(spec_var, {
rlang::call2(
mapping_algorithm,
raw_dat = rlang::sym(raw_dataset),
raw_var = raw_variable,
tgt_var = target_sdtm_variable
)
})

# Convert the call to code as a string. Intentionally limiting the width to 20
# characters to force each parameter to be on a separate line.
raw_code <- rlang::expr_deparse(generated_call, width = 20)

# Add the pipe operator if this is not the last variable
if (!last_var) {
raw_code[length(raw_code)] <- paste0(raw_code[length(raw_code)], " %>%")
}

return(raw_code)
}


#' Read the specification file
#'
#' @param file The path to the specification file.
#'
#' @returns A tibble with the specification.
#' @export
#'
#' @examples
#'
#' \dontrun{
#' file <- "cm_sdtm_oak_spec_cdash.csv"
#' observed <- read_spec(file)
#' }
#'
read_spec <- function(file) {

admiraldev::assert_character_scalar(file)

spec <- utils::read.csv(file = file, na.strings = c("NA", ""), colClasses = "character") |>
tibble::as_tibble()

admiraldev::assert_data_frame(spec, required_vars = rlang::parse_exprs(expected_columns))

return(spec)
}

#' Expected columns in the specification file
#'
#' @keywords internal
#'
expected_columns <- c(
"study_number",
"raw_dataset",
"raw_dataset_label",
"raw_variable",
"raw_variable_label",
"raw_variable_ordinal",
"raw_variable_type",
"raw_data_format",
"study_specific",
"annotation_ordinal",
"mapping_is_dataset",
"annotation_text",
"target_sdtm_domain",
"target_sdtm_variable",
"target_sdtm_variable_role",
"target_sdtm_variable_codelist_code",
"target_sdtm_variable_controlled_terms_or_format",
"target_sdtm_variable_ordinal",
"origin",
"mapping_algorithm",
"entity_sub_algorithm",
"target_hardcoded_value",
"target_term_value",
"target_term_code",
"condition_ordinal",
"condition_group_ordinal",
"condition_add_raw_dat",
"condition_add_tgt_dat",
"condition_left_raw_dataset",
"condition_left_raw_variable",
"condition_left_sdtm_domain",
"condition_left_sdtm_variable",
"condition_operator",
"condition_right_text_value",
"condition_right_sdtm_domain",
"condition_right_sdtm_variable",
"condition_right_raw_dataset",
"condition_right_raw_variable",
"condition_next_logical_operator",
"merge_type",
"merge_left",
"merge_right",
"merge_condition",
"unduplicate_keys",
"groupby_keys",
"target_resource_raw_dataset",
"target_resource_raw_variable"
)
16 changes: 16 additions & 0 deletions man/expected_columns.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions man/generate_code.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/generate_one_var_code.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions man/read_spec.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions tests/testthat/test-generate_code.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
test_that("generate_code works", {

# nolint start
spec <- tibble::tribble(
~study_number, ~raw_dataset, ~raw_dataset_label, ~raw_variable, ~raw_variable_label, ~raw_variable_ordinal, ~raw_variable_type, ~raw_data_format, ~study_specific, ~annotation_ordinal, ~mapping_is_dataset, ~annotation_text, ~target_sdtm_domain, ~target_sdtm_variable, ~target_sdtm_variable_role, ~target_sdtm_variable_codelist_code, ~target_sdtm_variable_controlled_terms_or_format, ~target_sdtm_variable_ordinal, ~origin, ~mapping_algorithm, ~entity_sub_algorithm, ~target_hardcoded_value, ~target_term_value, ~target_term_code, ~condition_ordinal, ~condition_group_ordinal, ~condition_add_raw_dat, ~condition_add_tgt_dat, ~condition_left_raw_dataset, ~condition_left_raw_variable, ~condition_left_sdtm_domain, ~condition_left_sdtm_variable, ~condition_operator, ~condition_right_text_value, ~condition_right_sdtm_domain, ~condition_right_sdtm_variable, ~condition_right_raw_dataset, ~condition_right_raw_variable, ~condition_next_logical_operator, ~merge_type, ~merge_left, ~merge_right, ~merge_condition, ~unduplicate_keys, ~groupby_keys, ~target_resource_raw_dataset, ~target_resource_raw_variable,
"lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMTRT", "var label", "3", "Text", "$200", "FALSE", "1", "FALSE", "CM.CMTRT", "CM", "CMTRT", "Topic Variable", NA, NA, "10", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
"lp_study", "cm_raw_data", "Concomitant Medications", "IT.CMINDC", "var label", "4", "Text", "$100", "FALSE", "1", "FALSE", "CM.CMINDC", "CM", "CMINDC", "Record Qualifier", NA, NA, "19", "CRF", "assign_no_ct", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
)
# nolint end

domain <- "cm"

temp_dir <- tempdir()
out_dir <- file.path(temp_dir, "data/generate_code")
unlink(out_dir, recursive = TRUE, force = TRUE)
dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)

generate_code(spec, domain, out_dir)


observed <- readLines(file.path(out_dir, paste0(domain, "_sdtm_oak_code.R")))

expect_true(identical(length(observed), 10L))
expect_true(grepl("CMTRT", observed[3]))
})

0 comments on commit 1294c41

Please sign in to comment.