Skip to content

Commit

Permalink
Merge pull request #144 from SafetyGraphics/stack_events
Browse files Browse the repository at this point in the history
new stack_events function + tests
  • Loading branch information
jwildfire authored Jun 16, 2023
2 parents 3e4b20e + 1997fa6 commit cb2bb82
Show file tree
Hide file tree
Showing 5 changed files with 236 additions and 0 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@ import(Tplyr)
import(dplyr)
import(ggplot2)
import(htmlwidgets)
import(purrr)
import(rlang)
import(shiny)
import(tidyr)
importFrom(huxtable,as_hux)
importFrom(huxtable,set_align)
importFrom(huxtable,set_bold)
Expand Down
85 changes: 85 additions & 0 deletions R/stack_events.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#' Combine Event Domains
#'
#' @param data safetyGraphics data object
#' @param settings safetyGraphics settings object
#' @param domains list of safetyGraphics domains. Adverse events ('aes'), concominate medications ('cm') and exposure ('ex') included by default.
#' @return combined dataset with stacked AE and CM data

#'
#' @examples
#' stack_events(
#' )

stack_events <- function(data, settings, domains=c("aes","cm", "ex")){

all_events <- domains %>% map(function(domain){
# check it exists in the data
if(!domain %in% names(data)){
message(paste0("'",domain, "' data not found in data object and will be skipped in stacked event data."))
return(NULL)

}else{
domain_data <- data[[domain]]
domain_settings<- settings[[domain]]
return(standardize_events(domain_data, domain_settings, domain=domain))
}

}) %>% bind_rows

return(all_events)
}

#' Create a standardized event data set
#'
#' Create an event data set with a standard set of hard-coded column names using standard safetyGraphics settings and data. The settings for each specified domain should contain valid mappings for ID ("id_col"), event start date ("stdy_col") and event end date ("endy_col"). Missing start day and end day values are extrapolated to NA. All other columns specified in settings are collapsed into a single "details" column. The final standardized data contains the following columns: "id", "domain", "stdy", "endy", "details".
#'
#' @param data safetyGraphics data object
#' @param settings safetyGraphics settings object.
#' @param domains list of safetyGraphics domains. Adverse events ('aes'), concominate medications ('cm') and exposure ('ex') included by default.
#' @return combined dataset with stacked AE and CM data
#'
#' @import purrr
#' @import tidyr

standardize_events <- function(data, settings, domain=""){
# stop if id_col doesn't exist
stopifnot("id_col" %in% names(settings))
stopifnot(settings$id_col %in% names(data))

# add stdy_col & endy_col if missing
if(!"stdy_col" %in% names(settings)){
settings$stdy_col <- 'stdy'
data$stdy<-NA
}else{
stopifnot(settings$stdy_col %in% names(data))
}

if(!"endy_col" %in% names(settings)){
settings$endy_col <- 'endy'
data$endy<-NA
}else{
stopifnot(settings$endy_col %in% names(data))
}

# make a details object with all other columns in settings
cols<-as.character(settings)
details <- data %>%
select(all_of(cols)) %>%
select(-settings[["id_col"]]) %>%
select(-settings[["stdy_col"]]) %>%
select(-settings[["endy_col"]]) %>%
imap( ~ paste(.y, .x, sep=": ")) %>%
as_tibble %>%
unite("details", sep="\n")

# get id, start day and end day
event_data <- data %>% select(
id = settings[["id_col"]],
stdy = settings[["stdy_col"]],
endy = settings[["endy_col"]]
) %>%
bind_cols(details) %>%
mutate(domain = domain)

return(event_data)
}
28 changes: 28 additions & 0 deletions man/stack_events.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions man/standardize_events.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

100 changes: 100 additions & 0 deletions tests/testthat/test_stack_events.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Test data

settings <- list(
aes = list(
id_col = "USUBJID",
bodsys_col = "AEBODSYS",
term_col = "AEDECOD",
term_col = "AETERM",
severity_col = "AESEV",
stdy_col = "AESTDY",
endy_col = "AEENDY"
),
cm = list(
id_col = "USUBJID",
cmtrt_col = "CMTRT",
stdy_col = "CMSTDY",
endy_col = "CMENDY",
class_col = "CMCLAS",
desc_col = "CMINDC"
)
)
data <- list(aes=safetyData::sdtm_ae, cm=safetyData::sdtm_cm)


# standardize_events tests

test_that('stops if id_col is missing in settings', {
settings2 <- settings$aes
settings2$id_col <- NULL
expect_error(standardize_events(data$aes, settings2))
})

test_that('stops if id_col in settings is not found in the data', {
settings2$id_col <- "Notanidcol"
expect_error(standardize_events(data$aes, settings2))
})

test_that('stops if stdy_col in settings is not found in the data', {
settings3 <- settings$aes
settings3$stdy_col <- "notacol"
expect_error(standardize_events(data$aes, settings3))
})

test_that('adds col of NA if stdy_col is missing in settings', {
settings3$stdy_col <- NULL
df<-standardize_events(data$aes, settings3)
expect_true(all(is.na(df$stdy)))
})

test_that('stops if endy_col in settings is not found in the data', {
settings4 <- settings$aes
settings4$endy_col <- "notacol"
expect_error(standardize_events(data$aes, settings4))
})

test_that('adds col of NA if stdy_col is missing in settings', {
settings4$endy_col <- NULL
df<-standardize_events(data$aes, settings4)
expect_true(all(is.na(df$endy)))
})

aedf<-standardize_events(data$aes, settings$aes,domain="aes")
test_that('returns a data.frame with the expected names', {
expect_true(is.data.frame(aedf))
expect_setequal(names(aedf), c("id","domain","stdy","endy","details"))
})

test_that('ae output matches raw data', {
expect_equal(aedf$id, data$aes$USUBJID)
expect_equal(aedf$stdy, data$aes$AESTDY)
expect_equal(aedf$endy, data$aes$AEENDY)
})

test_that('ae domain is set correctly',{
expect_true(all(aedf$domain=="aes"))
})


cmdf<-standardize_events(data$cm, settings$cm, domain="cm")
test_that('cm returns a data.frame with the expected names', {
expect_true(is.data.frame(cmdf))
expect_setequal(names(cmdf), c("id","domain","stdy","endy","details"))
})

#stack events tests

all<-stack_events(data,settings,domains=c("aes","cm"))
test_that('returns a dataframe with the right rows/cols', {
expect_true(is.data.frame(all))
expect_setequal(names(all), c("id","domain","stdy","endy","details"))
expect_equal(nrow(all), nrow(data$aes)+nrow(data$cm))
})

all2<-stack_events(data,settings,domains=c("aes","cm","some","other","domains"))
test_that('ignores invalid domains', {
expect_message(stack_events(data,settings,domains=c("aes","cm","some","other","domains")))
expect_true(is.data.frame(all2))
expect_setequal(names(all2), c("id","domain","stdy","endy","details"))
expect_equal(nrow(all2), nrow(data$aes)+nrow(data$cm))
})

0 comments on commit cb2bb82

Please sign in to comment.