new updates for recalib,no program level, new data

pph-collective · Feb 4, 2022 · b24d60c · b24d60c
1 parent f36f58d
commit b24d60c
Show file tree

Hide file tree

Showing 15 changed files with 751 additions and 225 deletions.
diff --git a/Inputs/MasterTable.xlsx b/Inputs/MasterTable.xlsx
diff --git a/analyze_calibration.R b/analyze_calibration.R
@@ -30,7 +30,7 @@ source("data_input.R")
 
 calibration_results <- NULL
 
-for (batch.ind in 1:10) {
+for (batch.ind in 1:5) {
   temp_results <- readRDS(paste0("calibration/CalibrationOutputs", batch.ind, ".rds"))
   calibration_results <- rbind(calibration_results, temp_results)
 }
@@ -67,7 +67,7 @@ for (ss in 1:nrow(calibration_results)) {
 
 # sort by goodness of fit and select top 100 fits
 sorted.mx <- calibration_results[order(calibration_results[, "gof"], decreasing = F), ]
-cal_sample <- 50
+cal_sample <- 40
 calibration_results_subset <- sorted.mx[1:cal_sample, ]
 write.xlsx(calibration_results,
   file = paste0("calibration/Calibrated_results.xlsx"),

diff --git a/calibrate.R b/calibrate.R
@@ -20,7 +20,6 @@ rm(list = ls())
 # Authors: Xiao Zang, PhD, Sam Bessey, MS
 #
 # People, Place and Health Collective, Department of Epidemiology, Brown University
-#
 
 rm(list = ls())
 print("Loading required packages")
@@ -38,17 +37,20 @@ args <- arg_parser("arguments")
 args <- add_argument(args, "--seed", help = "seed for latin hypercube sampling", default = 5112021)
 args <- add_argument(args, "--outfile", help = "file to store outputs", default = "OverdoseDeath_RIV1_0.csv")
 args <- add_argument(args, "--params", help = "name of the file with calibration params", default = "Inputs/CalibrationSampleData1.rds")
+args <- add_argument(args, "--ppl", help = "file with initial ppl info", default = "Inputs/init_pop.rds")
 args <- add_argument(args, "--cores", help = "number of cores for parallel processing", default = 4)
 args <- add_argument(args, "--index", help = "index of batch", default = 1)
-args <- add_argument(args, "--size", help = "batch size", default = 1000)
+args <- add_argument(args, "--size", help = "batch size", default = 10000)
 
 argv <- parse_args(args)
 seed <- as.integer(argv$seed)
 cores <- as.integer(argv$cores)
-batch.ind <- 4
+batch.ind <- 1
 batch.size <- as.integer(argv$size)
+# init_ppl <- readRDS(argv$ppl)
 # note: outfile, params not used
 
+
 # make and register the cluster given the provided number of cores
 c1 <- makeCluster(cores, outfile = "")
 registerDoParallel(c1)
@@ -70,21 +72,9 @@ if (file.exists(paste0("calibration/prep_calibration_data/Calib_par_table.rds"))
   Calibration.data.ls <- readRDS(paste0("calibration/prep_calibration_data/CalibrationSampleData", batch.ind, ".rds"))
 } else {
   ## Specify the number of calibration random parameter sets
-  sample.size <- 10000 # total number of calibration samples
+  sample.size <- 1000000 # total number of calibration samples
   source("prep_calibration_data.R")
   Calibration.data.ls <- readRDS(paste0("calibration/prep_calibration_data/CalibrationSampleData", batch.ind, ".rds"))
-  # # load calibration parameter bounds and values
-  # CalibPar <- read.xlsx("Inputs/MasterTable.xlsx", "CalibPar")
-  # parRange <- data.frame(min = CalibPar$lower, max = CalibPar$upper)
-  # row.names(parRange) <- CalibPar$par
-  # 
-  # # create and save calibration parameters using latin hypercube sampling with a set seed
-  # set.seed(seed)
-  # calib.par <- Latinhyper(parRange, sample.size) %>% data.frame()
-  # saveRDS(calib.par, paste0("Inputs/Calib_par_table.rds")) # save sampled calibration parameter values
-  # 
-  # Calibration.data.ls <- readRDS(paste0("Inputs/CalibrationSampleData", batch.ind, ".rds"))
-
 }
 
 # generate stepwise seeds for calibration starting at initial seed
@@ -107,40 +97,33 @@ colnames(calibration_temp) <- c("od.death16", "od.death17", "od.death18", "od.de
                                    "fx.death16", "fx.death17", "fx.death18", "fx.death19",
                                    "ed.visit16", "ed.visit17", "ed.visit18", "ed.visit19")  
 v.region <- Calibration.data.ls[[1]]$v.region # load vector for regions (required by simulation)
-
-for (jj in 1:length(Calibration.data.ls)){
-  Calibration.data.ls[[jj]]$NxDataPharm$pe  <- 0 # ATTN: This is temporary, used to manually remove pharamacy naloxone
-}
 # parallel calibration simulation
 # TODO: look into apply functions for parallel
 
-for (ss in 1:length(Calibration.data.ls)){
-  print(paste0("Batch: ", batch.ind, "; simulation: ", ss))
+## Run model calibration with parallel operation
+calibration_temp <- foreach(ss = 1:length(Calibration.data.ls), .combine = rbind, .packages = c("dplyr", "abind"), .errorhandling = "remove") %dopar% {
   yr_start <- 2016 # simulation first year
-  yr_end <- 2020 # simulation last year
-  ppl_info <- c(
-    "sex", "race", "age", "residence", "curr.state",
-    "OU.state", "init.age", "init.state", "ever.od", "fx"
-  ) # information for each model individual
+  yr_end <- 2019 # simulation last year
   agent_states <- c("preb", "il.lr", "il.hr", "inact", "NODU", "relap", "dead") # vector for state names
   v.oustate <- c("preb", "il.lr", "il.hr") # vector for active opioid use state names
   num_states <- length(agent_states) # number of states
   num_years <- yr_end - yr_start + 1
   timesteps <- 12 * num_years # number of time cycles (in month)
   num_regions <- length(v.region) # number of regions
-
+  discount.rate <- 0.03 # discounting of costs by 3%
+
   # OUTPUT matrices and vectors
   v.od <- rep(0, times = timesteps) # count of overdose events at each time step
   v.oddeath <- rep(0, times = timesteps) # count of overdose deaths at each time step
+  v.oddeath.w <- rep(0, times = timesteps) # count of overdose deaths that were witnessed at each time step
   m.oddeath <- matrix(0, nrow = timesteps, ncol = num_regions)
   colnames(m.oddeath) <- v.region
   v.odpriv <- rep(0, times = timesteps) # count of overdose events occurred at private setting at each time step
   v.odpubl <- rep(0, times = timesteps) # count of overdose events occurred at public setting at each time step
   v.deathpriv <- rep(0, times = timesteps) # count of overdose deaths occurred at private setting at each time step
   v.deathpubl <- rep(0, times = timesteps) # count of overdose deaths occurred at public setting at each time step
-  v.nlxused <- rep(0, times = timesteps) # count of overdose deaths occurred at public setting at each time step
-  v.str <- c("SQ", "Expand100") # store the strategy names
-  d.c <- 0.03 # discounting of costs by 3%
+  v.nlxused <- rep(0, times = timesteps) # count of naloxone kits used at each time step
+  v.str <- c("SQ", "expand", "program") # store the strategy names
   cost.item <- c("TotalCost", "NxCost")
   cost.matrix <- matrix(0, nrow = timesteps, ncol = length(cost.item))
   colnames(cost.matrix) <- cost.item
@@ -149,57 +132,17 @@ for (ss in 1:length(Calibration.data.ls)){
   m.oddeath.st <- rep(0, times = timesteps) # count of overdose deaths among stimulant users at each time step
   m.EDvisits <- rep(0, times = timesteps) # count of opioid overdose-related ED visits at each time step
   m.oddeath.hr <- rep(0, times = timesteps) # count of overdose deaths among high-risk opioid users (inject heroin) at each time step
-
+  m.oddeath.preb <- m.oddeath.il.lr <- m.oddeath.il.hr <- m.nlx.mn.OEND <- m.nlx.mn.Pharm <- rep(0, times = timesteps)   # count of overdose deaths stratified by risk group each time step
+
   ## Initialize the study population - people who are at risk of opioid overdose
   ppl_info <- c("sex", "race", "age", "residence", "curr.state", "OU.state", "init.age", "init.state", "ever.od", "fx")
   init_ppl <- readRDS(paste0("Inputs/init_pop.rds"))
-  
+
   outcomes <- parallel.fun(calib.seed = calib.seed.vt[ss], params = Calibration.data.ls[[ss]])
-  calibration_temp[ss, ] <- outcomes
+  outcomes
 }
-# calibration_temp <- foreach(ss = 1:length(Calibration.data.ls), .combine = rbind, .packages = c("dplyr", "abind"), .errorhandling = "remove") %dopar% {
-#   yr_start <- 2016 # simulation first year
-#   yr_end <- 2020 # simulation last year
-#   ppl_info <- c(
-#     "sex", "race", "age", "residence", "curr.state",
-#     "OU.state", "init.age", "init.state", "ever.od", "fx"
-#   ) # information for each model individual
-#   agent_states <- c("preb", "il.lr", "il.hr", "inact", "NODU", "relap", "dead") # vector for state names
-#   v.oustate <- c("preb", "il.lr", "il.hr") # vector for active opioid use state names
-#   num_states <- length(agent_states) # number of states
-#   num_years <- yr_end - yr_start + 1
-#   timesteps <- 12 * num_years # number of time cycles (in month)
-#   num_regions <- length(v.region) # number of regions
-# 
-#   # OUTPUT matrices and vectors
-#   v.od <- rep(0, times = timesteps) # count of overdose events at each time step
-#   v.oddeath <- rep(0, times = timesteps) # count of overdose deaths at each time step
-#   m.oddeath <- matrix(0, nrow = timesteps, ncol = num_regions)
-#   colnames(m.oddeath) <- v.region
-#   v.odpriv <- rep(0, times = timesteps) # count of overdose events occurred at private setting at each time step
-#   v.odpubl <- rep(0, times = timesteps) # count of overdose events occurred at public setting at each time step
-#   v.deathpriv <- rep(0, times = timesteps) # count of overdose deaths occurred at private setting at each time step
-#   v.deathpubl <- rep(0, times = timesteps) # count of overdose deaths occurred at public setting at each time step
-#   v.nlxused <- rep(0, times = timesteps) # count of overdose deaths occurred at public setting at each time step
-#   v.str <- c("SQ", "Expand100") # store the strategy names
-#   d.c <- 0.03 # discounting of costs by 3%
-#   cost.item <- c("TotalCost", "NxCost")
-#   cost.matrix <- matrix(0, nrow = timesteps, ncol = length(cost.item))
-#   colnames(cost.matrix) <- cost.item
-#   m.oddeath.fx <- rep(0, times = timesteps) # count of overdose deaths with fentanyl present at each time step
-#   m.oddeath.op <- rep(0, times = timesteps) # count of overdose deaths among opioid users at each time step
-#   m.oddeath.st <- rep(0, times = timesteps) # count of overdose deaths among stimulant users at each time step
-#   m.EDvisits <- rep(0, times = timesteps) # count of opioid overdose-related ED visits at each time step
-#   m.oddeath.hr <- rep(0, times = timesteps) # count of overdose deaths among high-risk opioid users (inject heroin) at each time step
-# 
-#   ## Initialize the study population - people who are at risk of opioid overdose
-#   ppl_info <- c("sex", "race", "age", "residence", "curr.state", "OU.state", "init.age", "init.state", "ever.od", "fx")
-#   init_ppl <- readRDS(paste0("Inputs/init_pop.rds"))
-# 
-#   outcomes <- parallel.fun(calib.seed = calib.seed.vt[ss], params = Calibration.data.ls[[ss]])
-# }
+stopCluster(c1)   #optional: stop clustering
 
-calibration_results[, 3:14] <- calibration_temp #  calibration results
-saveRDS(calibration_results, paste0("calibration/CalibrationOutputs", batch.ind, ".rds")) # save calibration_results table to an rds, will combine all 10 tables/bacthes in a subsequent process
 
-# stopCluster(c1)   #optional: stop clustering
+calibration_results[, 3:14] <- calibration_temp #  calibration results
+saveRDS(calibration_results, paste0("calibration/CalibrationOutputs", batch.ind, ".rds")) # save calibration_results table to an rds, will combine all 10 tables/bacthes in a subsequent process
diff --git a/calibration/CalibratedData.rds b/calibration/CalibratedData.rds
diff --git a/calibration/CalibratedSeed.rds b/calibration/CalibratedSeed.rds
diff --git a/calibration/Calibrated_results.xlsx b/calibration/Calibrated_results.xlsx