diff --git a/CITATION.cff b/CITATION.cff index d0503012..07942726 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -104,26 +104,6 @@ references: email: adam.kucharski@lshtm.ac.uk orcid: https://orcid.org/0000-0001-8814-9421 year: '2024' -- type: software - title: bpmodels - abstract: 'bpmodels: Simulating and Analysing Transmission Chain Statistics using - Branching Process Models' - notes: Imports - url: https://epiverse-trace.github.io/bpmodels/ - authors: - - family-names: Azam - given-names: James M. - email: james.azam@lshtm.ac.uk - orcid: https://orcid.org/0000-0001-5782-7330 - - family-names: Finger - given-names: Flavio - email: flavio.finger@epicentre.msf.org - orcid: https://orcid.org/0000-0002-8613-5170 - - family-names: Funk - given-names: Sebastian - email: sebastian.funk@lshtm.ac.uk - orcid: https://orcid.org/0000-0002-2842-3406 - year: '2024' - type: software title: randomNames abstract: 'randomNames: Generate Random Given and Surnames' diff --git a/DESCRIPTION b/DESCRIPTION index 1bebd8a1..1d2c17af 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,6 @@ Imports: stats, checkmate, epiparameter, - bpmodels, randomNames, rlang Suggests: @@ -40,8 +39,7 @@ Suggests: spelling, testthat (>= 3.0.0) Remotes: - epiverse-trace/epiparameter, - epiverse-trace/bpmodels + epiverse-trace/epiparameter Config/testthat/edition: 3 Config/Needs/website: epiverse-trace/epiversetheme diff --git a/R/add_cols.R b/R/add_cols.R index dbcb6f8c..543e0cfb 100644 --- a/R/add_cols.R +++ b/R/add_cols.R @@ -81,13 +81,17 @@ NULL .add_hospitalisation <- function(.data, onset_to_hosp, hosp_risk) { - .data$hospitalisation <- .data$time + onset_to_hosp(nrow(.data)) + infected_idx <- .data$infected == "infected" + num_infected <- sum(infected_idx) + .data$hospitalisation <- NA_real_ + .data$hospitalisation[infected_idx] <- .data$time[infected_idx] + + onset_to_hosp(num_infected) if (is.numeric(hosp_risk)) { pop_sample <- sample( - seq_len(nrow(.data)), + which(infected_idx), replace = FALSE, - size = (1 - hosp_risk) * nrow(.data) + size = (1 - hosp_risk) * num_infected ) .data$hospitalisation[pop_sample] <- NA } else { @@ -113,14 +117,18 @@ NULL onset_to_death, hosp_death_risk, non_hosp_death_risk) { - .data$deaths <- .data$time + onset_to_death(nrow(.data)) + infected_idx <- .data$infected == "infected" + num_infected <- sum(infected_idx) + .data$deaths <- NA_real_ + .data$deaths[infected_idx] <- .data$time[infected_idx] + + onset_to_death(num_infected) apply_death_risk <- function(.data, risk, hosp = TRUE) { if (is.numeric(risk)) { pop_sample <- sample( - seq_len(nrow(.data)), + which(infected_idx), replace = FALSE, - size = (1 - risk) * nrow(.data) + size = (1 - risk) * num_infected ) .data$deaths[pop_sample] <- NA } else { @@ -176,14 +184,7 @@ NULL #' @name .add_info .add_names <- function(.data) { - # create sample of names so there are no duplicates - .data$case_name <- randomNames::randomNames( - which.names = "both", - name.sep = " ", - name.order = "first.last", - gender = .data$gender, - sample.with.replacement = FALSE - ) + .data$case_name <- .sample_names(.data = .data) # left join corresponding names to infectors preserving column and row order infector_names <- data.frame(id = .data$id, infector_name = .data$case_name) diff --git a/R/checkers.R b/R/checkers.R index 35fb10fc..13881222 100644 --- a/R/checkers.R +++ b/R/checkers.R @@ -114,13 +114,13 @@ #' called for its side-effects, which will error if the input is invalid. #' @keywords internal .check_sim_input <- function(sim_type = c("linelist", "contacts", "outbreak"), # nolint cyclocomp_linter - R, - serial_interval, + contact_distribution, + contact_interval, + prob_infect, outbreak_start_date, min_outbreak_size, onset_to_hosp = NULL, onset_to_death = NULL, - contact_distribution = NULL, add_names = NULL, add_ct = NULL, case_type_probs = NULL, @@ -131,8 +131,9 @@ population_age = NULL) { sim_type <- match.arg(sim_type) - checkmate::assert_number(R, lower = 0) - .check_func_req_args(serial_interval) + checkmate::assert_number(prob_infect, lower = 0, upper = 1) + .check_func_req_args(contact_distribution) + .check_func_req_args(contact_interval) checkmate::assert_date(outbreak_start_date) checkmate::assert_integerish(min_outbreak_size, lower = 1) @@ -169,7 +170,6 @@ } if (sim_type == "contacts" || sim_type == "outbreak") { - .check_func_req_args(contact_distribution) checkmate::assert_numeric(contact_tracing_status_probs, len = 3) checkmate::assert_names( names(contact_tracing_status_probs), diff --git a/R/create_config.R b/R/create_config.R index 75694b62..22209cf8 100644 --- a/R/create_config.R +++ b/R/create_config.R @@ -12,6 +12,7 @@ #' * `first_contact_distribution_params = c(lambda = 3)` #' * `ct_distribution = "norm"` #' * `ct_distribution_params = c(mean = 25, sd = 2)` +#' * `network = "adjusted"` #' #' These parameters do not warrant their own arguments in #' [sim_linelist()] as they rarely need to be changed from their default @@ -19,6 +20,13 @@ #' arguments to accommodate these and the `config` argument keeps the function #' signature simpler and more readable. #' +#' The `network` option controls whether to sample contacts from a adjusted or +#' unadjusted contact distribution. Adjusted (default) sampling uses +#' \eqn{q(n) \sim (n + 1)p(n + 1)} where \eqn{p(n)} is the probability +#' density function of a distribution, e.g., Poisson or Negative binomial. +#' Unadjusted (`network = "unadjusted"`) instead samples contacts directly from +#' a probability distribution \eqn{p(n)}. +#' #' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Named elements to replace #' default settings. Only if names match exactly are elements replaced, #' otherwise the function errors. @@ -42,7 +50,8 @@ create_config <- function(...) { first_contact_distribution = "pois", first_contact_distribution_params = c(lambda = 3), ct_distribution = "norm", - ct_distribution_params = c(mean = 25, sd = 2) + ct_distribution_params = c(mean = 25, sd = 2), + network = "adjusted" ) # capture dynamic dots diff --git a/R/sim_contacts.R b/R/sim_contacts.R index ecacee21..3f109fb4 100644 --- a/R/sim_contacts.R +++ b/R/sim_contacts.R @@ -1,9 +1,6 @@ #' Simulate contacts for an infectious disease outbreak #' #' @inheritParams sim_linelist -#' @param contact_distribution An `` object or anonymous function for -#' the contact distribution. This is similar to the offspring distribution, -#' but describes the heterogeneity of contacts that are not infected. #' @param contact_tracing_status_probs A named `numeric` vector with the #' probability of each contact tracing status. The names of the vector must #' be `"under_followup"`, `"lost_to_followup"`, `"unknown"`. Values of each @@ -16,28 +13,28 @@ #' #' @examples #' # load data required to simulate contacts -#' serial_interval <- epiparameter::epidist( +#' contact_distribution <- epiparameter::epidist( #' disease = "COVID-19", -#' epi_dist = "serial interval", -#' prob_distribution = "gamma", -#' prob_distribution_params = c(shape = 1, scale = 1) +#' epi_dist = "contact distribution", +#' prob_distribution = "pois", +#' prob_distribution_params = c(mean = 2) #' ) #' -#' contact_distribution <- epiparameter::epidist( +#' contact_interval <- epiparameter::epidist( #' disease = "COVID-19", -#' epi_dist = "contact_distribution", -#' prob_distribution = "pois", -#' prob_distribution_params = c(l = 5) +#' epi_dist = "contact interval", +#' prob_distribution = "gamma", +#' prob_distribution_params = c(shape = 1, scale = 1) #' ) #' #' contacts <- sim_contacts( -#' R = 1.1, -#' serial_interval = serial_interval, -#' contact_distribution = contact_distribution +#' contact_distribution = contact_distribution, +#' contact_interval = contact_interval, +#' prob_infect = 0.5 #' ) -sim_contacts <- function(R, - serial_interval, - contact_distribution, +sim_contacts <- function(contact_distribution, + contact_interval, + prob_infect, outbreak_start_date = as.Date("2023-01-01"), min_outbreak_size = 10, population_age = c(1, 90), @@ -50,29 +47,28 @@ sim_contacts <- function(R, # check and convert distribution to func if needed before .check_sim_input() stopifnot( "Input delay distributions need to be either functions or " = - inherits(serial_interval, c("function", "epidist")) && - inherits(contact_distribution, c("function", "epidist")) + inherits(contact_interval, c("function", "epidist")) ) - serial_interval <- as.function(serial_interval, func_type = "generate") contact_distribution <- as.function( - contact_distribution, - func_type = "generate" + contact_distribution, func_type = "density" ) + contact_interval <- as.function(contact_interval, func_type = "generate") .check_sim_input( sim_type = "contacts", - R = R, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = prob_infect, outbreak_start_date = outbreak_start_date, min_outbreak_size = min_outbreak_size, - contact_distribution = contact_distribution, contact_tracing_status_probs = contact_tracing_status_probs, population_age = population_age ) chain <- .sim_bp_linelist( - R = R, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = prob_infect, outbreak_start_date = outbreak_start_date, min_outbreak_size = min_outbreak_size, population_age = population_age, @@ -83,11 +79,7 @@ sim_contacts <- function(R, contacts <- .sim_contacts_tbl( .data = chain, - outbreak_start_date = outbreak_start_date, - contact_distribution = contact_distribution, - population_age = population_age, - contact_tracing_status_probs = contact_tracing_status_probs, - config = config + contact_tracing_status_probs = contact_tracing_status_probs ) # return line list diff --git a/R/sim_contacts_tbl.R b/R/sim_contacts_tbl.R index 5de90b5a..5ab11402 100644 --- a/R/sim_contacts_tbl.R +++ b/R/sim_contacts_tbl.R @@ -6,128 +6,41 @@ #' @return A contacts `` #' @keywords internal .sim_contacts_tbl <- function(.data, - outbreak_start_date, - contact_distribution, - population_age, - contact_tracing_status_probs, - config) { + contact_tracing_status_probs) { if (!"infector_name" %in% colnames(.data)) { .data <- .add_names(.data = .data) } - contact_investigation <- subset( + contacts_tbl <- subset( .data, select = c( - "infector", "infector_name", "case_name", "age", "gender", + "infector_name", "case_name", "age", "gender", "date_first_contact", "date_last_contact" ) ) - colnames(contact_investigation) <- c( - "infector", "from", "to", "cnt_age", "cnt_gender", - "date_first_contact", "date_last_contact" + colnames(contacts_tbl) <- c( + "from", "to", "age", "gender", "date_first_contact", + "date_last_contact" ) - contact_investigation <- contact_investigation[-1, ] - contact_investigation$was_case <- "Y" - - other_contacts <- subset(.data, select = c("infector", "infector_time")) - other_contacts <- other_contacts[-1, ] - other_contacts$from <- contact_investigation$from - other_contacts <- subset( - other_contacts, - subset = !duplicated(other_contacts$infector) - ) - - # sample contact distribution for non-infected contacts - contact_freq <- contact_distribution(nrow(other_contacts)) - - # Multiply row by times specified in frequency column V - other_contacts <- - other_contacts[rep(row.names(other_contacts), contact_freq), ] - - # add random age and gender - if (is.data.frame(population_age)) { - age_groups <- apply(population_age, MARGIN = 1, function(x) x[1]:x[2]) - sample_weight <- rep(population_age$proportion, times = lengths(age_groups)) - # normalise for vector length - sample_weight <- sample_weight / - rep(lengths(age_groups), times = lengths(age_groups)) - other_contacts$cnt_age <- sample( - unlist(age_groups), - size = nrow(other_contacts), - replace = TRUE, - prob = sample_weight - ) - } else { - other_contacts$cnt_age <- sample( - population_age[1]:population_age[2], - size = nrow(other_contacts), - replace = TRUE - ) - } - other_contacts$cnt_gender <- sample( - c("m", "f"), - size = nrow(other_contacts), - replace = TRUE - ) - - # Add corresponding names acc to the gender V - other_contacts$to <- randomNames::randomNames( - which.names = "both", - name.sep = " ", - name.order = "first.last", - gender = other_contacts$cnt_gender, - sample.with.replacement = FALSE + contacts_tbl$was_case <- ifelse( + test = .data$infected == "infected", + yes = "Y", + no = "N" ) - # add contact dates - other_contacts <- .add_date_contact( - .data = other_contacts, - contact_type = "last", - distribution = config$last_contact_distribution, - config$last_contact_distribution_params, - outbreak_start_date = outbreak_start_date - ) - other_contacts <- .add_date_contact( - .data = other_contacts, - contact_type = "first", - distribution = config$first_contact_distribution, - config$first_contact_distribution_params - ) - - other_contacts <- subset( - other_contacts, - select = c( - "infector", "from", "to", "cnt_age", "cnt_gender", - "date_first_contact", "date_last_contact" - ) - ) - other_contacts$was_case <- "N" - - # merge both datasets and keep all the values from both - contact_investigation <- rbind(contact_investigation, other_contacts) - contact_investigation <- - contact_investigation[order(contact_investigation$infector), ] - row.names(contact_investigation) <- NULL - # add contact tracing status - pick_N <- which(contact_investigation$was_case == "N") + pick_N <- which(contacts_tbl$was_case == "N") status_type <- sample( names(contact_tracing_status_probs), size = length(pick_N), replace = TRUE, prob = contact_tracing_status_probs ) - contact_investigation$status <- NA - contact_investigation$status[pick_N] <- status_type - contact_investigation$status <- ifelse( - test = is.na(contact_investigation$status), - yes = "case", - no = contact_investigation$status - ) + contacts_tbl$status <- "case" + contacts_tbl$status[pick_N] <- status_type - # remove infector col - contact_investigation$infector <- NULL + contacts_tbl <- contacts_tbl[-1, ] # return contacts - contact_investigation + contacts_tbl } diff --git a/R/sim_linelist.R b/R/sim_linelist.R index a774e415..9ee1ec51 100644 --- a/R/sim_linelist.R +++ b/R/sim_linelist.R @@ -20,10 +20,17 @@ #' * `proportion`: a column with the proportion of the population that are in #' that age group. Proportions must sum to one. #' -#' -#' @param R A single `numeric` for the reproduction number. -#' @param serial_interval An `` object or anonymous function for -#' the serial interval. +#' @param contact_distribution An `` object or anonymous function for +#' the contact distribution. This is any discrete density function that +#' produces non-negative integers (including zero, \eqn{\mathbb{N}_0}) for the +#' number of contacts per infection. +#' @param contact_interval An `` object or anonymous function for +#' the contact interval. This is analogous to the serial interval or generation +#' time, but defines the time interval between an individual being +#' infected/infectious (in the simulation the latency period is assumed to be +#' zero) and having contact with another susceptible individual. +#' @param prob_infect A single `numeric` for the probability of a secondary +#' contact being infected by an infected primary contact. #' @param onset_to_hosp An `` object or anonymous function for #' the onset to hospitalisation delay distribution. #' @param onset_to_death An `` object or anonymous function for @@ -70,9 +77,16 @@ #' #' @examples #' # load data required to simulate line list -#' serial_interval <- epiparameter::epidist( +#' contact_distribution <- epiparameter::epidist( #' disease = "COVID-19", -#' epi_dist = "serial interval", +#' epi_dist = "contact distribution", +#' prob_distribution = "pois", +#' prob_distribution_params = c(mean = 2) +#' ) +#' +#' contact_interval <- epiparameter::epidist( +#' disease = "COVID-19", +#' epi_dist = "contact interval", #' prob_distribution = "gamma", #' prob_distribution_params = c(shape = 1, scale = 1) #' ) @@ -92,8 +106,9 @@ #' ) #' # example with single hospitalisation risk for entire population #' linelist <- sim_linelist( -#' R = 1.1, -#' serial_interval = serial_interval, +#' contact_distribution = contact_distribution, +#' contact_interval = contact_interval, +#' prob_infect = 0.5, #' onset_to_hosp = onset_to_hosp, #' onset_to_death = onset_to_death, #' hosp_risk = 0.5 @@ -109,15 +124,17 @@ #' risk = c(0.1, 0.05, 0.2) #' ) #' linelist <- sim_linelist( -#' R = 1.1, -#' serial_interval = serial_interval, +#' contact_distribution = contact_distribution, +#' contact_interval = contact_interval, +#' prob_infect = 0.5, #' onset_to_hosp = onset_to_hosp, #' onset_to_death = onset_to_death, #' hosp_risk = age_dep_hosp_risk #' ) #' head(linelist) -sim_linelist <- function(R, - serial_interval, +sim_linelist <- function(contact_distribution, + contact_interval, + prob_infect, onset_to_hosp, onset_to_death, hosp_risk = 0.2, @@ -137,18 +154,23 @@ sim_linelist <- function(R, # check and convert distribution to func if needed before .check_sim_input() stopifnot( "Input delay distributions need to be either functions or " = - inherits(serial_interval, c("function", "epidist")) && + inherits(contact_distribution, c("function", "epidist")) && + inherits(contact_interval, c("function", "epidist")) && inherits(onset_to_hosp, c("function", "epidist")) && inherits(onset_to_death, c("function", "epidist")) ) - serial_interval <- as.function(serial_interval, func_type = "generate") + contact_distribution <- as.function( + contact_distribution, func_type = "density" + ) + contact_interval <- as.function(contact_interval, func_type = "generate") onset_to_hosp <- as.function(onset_to_hosp, func_type = "generate") onset_to_death <- as.function(onset_to_death, func_type = "generate") .check_sim_input( sim_type = "linelist", - R = R, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = prob_infect, outbreak_start_date = outbreak_start_date, min_outbreak_size = min_outbreak_size, onset_to_hosp = onset_to_hosp, @@ -191,8 +213,9 @@ sim_linelist <- function(R, } chain <- .sim_bp_linelist( - R = R, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = prob_infect, outbreak_start_date = outbreak_start_date, min_outbreak_size = min_outbreak_size, population_age = population_age, @@ -213,6 +236,7 @@ sim_linelist <- function(R, config = config ) + linelist$chain <- linelist$chain[linelist$chain$infected == "infected", ] chain <- linelist$chain[, linelist$cols] # return line list diff --git a/R/sim_network_bp.R b/R/sim_network_bp.R new file mode 100644 index 00000000..46532a69 --- /dev/null +++ b/R/sim_network_bp.R @@ -0,0 +1,124 @@ +#' Simulate a random network branching process model with a probability of +#' infection for each contact +#' +#' @description +#' Simulate a branching process on a infinite network where the contact +#' distribution provides a function to sample the number of contacts of each +#' individual in the simulation. Each contact is then infected with the +#' probability of infection. The time between each infection is determined +#' by the contact interval function. +#' +#' @details +#' The contact distribution sampled takes the network effect +#' \eqn{q(n) \sim (n + 1)p(n + 1)} where \eqn{p(n)} is the probability +#' density function of a distribution, e.g., Poisson or Negative binomial. +#' That is to say, the probability of having choosing a contact at random by +#' following up a contact chooses individuals with a probability proportional +#' to their number of contacts. The plus one is because one of the contacts +#' was "used" to infect the person. +#' +#' @inheritParams sim_linelist +#' +#' @return A `` with the contact and transmission chain data. +#' @keywords internal +.sim_network_bp <- function(contact_distribution, + contact_interval, + prob_infect, + config) { + if (is.null(config$network) || + !config$network %in% c("adjusted", "unadjusted")) { + stop("Network incorrectly specified, check config", call. = FALSE) + } + + # initialise data object + ancestor <- vector(mode = "integer", 1e5) + generation <- vector(mode = "integer", 1e5) + infected <- vector(mode = "integer", 1e5) + time <- vector(mode = "double", 1e5) + + # store initial individual + ancestor[1] <- NA_integer_ + generation[1] <- 1L + # 1 is infected, 0 is non-infected contact + infected[1] <- 1L + time[1] <- 0 + + # initialise counters + next_gen_size <- 1L + chain_generation <- 1L + chain_size <- 1L + chain_length <- 1L + ancestor_idx <- 1L + prev_ancestors <- 1L + + # run loop until no more individuals are sampled + while (next_gen_size > 0) { + if (config$network == "adjusted") { + # sample contact distribution (excess degree distribution) + q <- contact_distribution(0:1e4 + 1) * (0:1e4 + 1) + q <- q / sum(q) + } else { + q <- contact_distribution(0:1e4) + } + contacts <- sample(0:1e4, size = next_gen_size, replace = TRUE, prob = q) + + # add contacts if sampled + if (sum(contacts) > 0L) { + chain_size <- chain_size + sum(contacts) + chain_length <- chain_length + any(contacts >= 1L) + chain_generation <- chain_generation + 1L + + for (i in seq_along(contacts)) { + + if (contacts[i] > 0) { + + # vec index can be which.min of either generation or ancestor vec + vec_idx <- + which.min(generation):(which.min(generation) + contacts[i] - 1) + + # grow vectors if idx exceeds length + if (max(vec_idx) > length(generation)) { + ancestor <- c(ancestor, vector(mode = "integer", 1e5)) + generation <- c(generation, vector(mode = "integer", 1e5)) + infected <- c(infected, vector(mode = "integer", 1e5)) + time <- c(time, vector(mode = "double", 1e5)) + } + + generation[vec_idx] <- chain_generation + ancestor[vec_idx] <- ancestor_idx[i] + + # sample infected contacts + infect <- stats::rbinom(n = contacts[i], size = 1, prob = prob_infect) + infected[vec_idx] <- as.numeric(infect) + + # add delay time, removing first element of ancestor time as it is NA + time[vec_idx] <- contact_interval(length(vec_idx)) + + time[ancestor == ancestor_idx[i]][-1] + } + } + ancestor_idx <- setdiff(which(infected == 1), prev_ancestors) + prev_ancestors <- c(prev_ancestors, ancestor_idx) + next_gen_size <- length(ancestor_idx) + } else { + next_gen_size <- 0L + } + } + + ancestor <- ancestor[ancestor != 0] + generation <- generation[generation != 0] + + # remove unused IDs + id <- seq_along(generation) + infected <- infected[seq_along(generation)] + infected <- ifelse(test = infected, yes = "infected", no = "contact") + time <- time[seq_along(generation)] + + # return chain as + data.frame( + id = id, + ancestor = ancestor, + generation = generation, + infected = infected, + time = time + ) +} diff --git a/R/sim_outbreak.R b/R/sim_outbreak.R index 19d9cafd..494beccd 100644 --- a/R/sim_outbreak.R +++ b/R/sim_outbreak.R @@ -18,7 +18,14 @@ #' #' @examples #' # load data required to simulate outbreak data -#' serial_interval <- epiparameter::epidist( +#' contact_distribution <- epiparameter::epidist( +#' disease = "COVID-19", +#' epi_dist = "contact distribution", +#' prob_distribution = "pois", +#' prob_distribution_params = c(mean = 2) +#' ) +#' +#' contact_interval <- epiparameter::epidist( #' disease = "COVID-19", #' epi_dist = "serial interval", #' prob_distribution = "gamma", @@ -39,25 +46,18 @@ #' single_epidist = TRUE #' ) #' -#' contact_distribution <- epiparameter::epidist( -#' disease = "COVID-19", -#' epi_dist = "contact_distribution", -#' prob_distribution = "pois", -#' prob_distribution_params = c(l = 5) -#' ) -#' #' outbreak <- sim_outbreak( -#' R = 1.1, -#' serial_interval = serial_interval, +#' contact_distribution = contact_distribution, +#' contact_interval = contact_interval, +#' prob_infect = 0.5, #' onset_to_hosp = onset_to_hosp, -#' onset_to_death = onset_to_death, -#' contact_distribution = contact_distribution +#' onset_to_death = onset_to_death #' ) -sim_outbreak <- function(R, - serial_interval, +sim_outbreak <- function(contact_distribution, + contact_interval, + prob_infect, onset_to_hosp, onset_to_death, - contact_distribution, hosp_risk = 0.2, hosp_death_risk = 0.5, non_hosp_death_risk = 0.05, @@ -80,28 +80,27 @@ sim_outbreak <- function(R, # check and convert distribution to func if needed before .check_sim_input() stopifnot( "Input delay distributions need to be either functions or " = - inherits(serial_interval, c("function", "epidist")) && + inherits(contact_distribution, c("function", "epidist")) && + inherits(contact_interval, c("function", "epidist")) && inherits(onset_to_hosp, c("function", "epidist")) && - inherits(onset_to_death, c("function", "epidist")) && - inherits(contact_distribution, c("function", "epidist")) + inherits(onset_to_death, c("function", "epidist")) ) - serial_interval <- as.function(serial_interval, func_type = "generate") - onset_to_hosp <- as.function(onset_to_hosp, func_type = "generate") - onset_to_death <- as.function(onset_to_death, func_type = "generate") contact_distribution <- as.function( - contact_distribution, - func_type = "generate" + contact_distribution, func_type = "density" ) + contact_interval <- as.function(contact_interval, func_type = "generate") + onset_to_hosp <- as.function(onset_to_hosp, func_type = "generate") + onset_to_death <- as.function(onset_to_death, func_type = "generate") .check_sim_input( sim_type = "outbreak", - R = R, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = prob_infect, outbreak_start_date = outbreak_start_date, min_outbreak_size = min_outbreak_size, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, - contact_distribution = contact_distribution, add_names = add_names, add_ct = add_ct, case_type_probs = case_type_probs, @@ -141,8 +140,9 @@ sim_outbreak <- function(R, } chain <- .sim_bp_linelist( - R = R, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = prob_infect, outbreak_start_date = outbreak_start_date, min_outbreak_size = min_outbreak_size, population_age = population_age, @@ -165,13 +165,10 @@ sim_outbreak <- function(R, contacts <- .sim_contacts_tbl( .data = linelist$chain, - outbreak_start_date = outbreak_start_date, - contact_distribution = contact_distribution, - population_age = population_age, - contact_tracing_status_probs = contact_tracing_status_probs, - config = config + contact_tracing_status_probs = contact_tracing_status_probs ) + linelist$chain <- linelist$chain[linelist$chain$infected == "infected", ] chain <- linelist$chain[, linelist$cols] # return outbreak data diff --git a/R/sim_utils.R b/R/sim_utils.R index ece171ce..d4386628 100644 --- a/R/sim_utils.R +++ b/R/sim_utils.R @@ -14,28 +14,26 @@ NULL #' @name .sim_utils -.sim_bp_linelist <- function(R, - serial_interval, +.sim_bp_linelist <- function(contact_distribution, + contact_interval, + prob_infect, outbreak_start_date, min_outbreak_size, population_age, config) { - chain_size <- 0 + outbreak_size <- 0 max_iter <- 0L # condition on a minimum chain size - while (chain_size < min_outbreak_size) { - chain <- bpmodels::chain_sim( - n = 1, - offspring = "pois", - stat = "size", - serial = serial_interval, - lambda = R, - tree = TRUE, - infinite = 1000 + while (outbreak_size < min_outbreak_size) { + chain <- .sim_network_bp( + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = prob_infect, + config = config ) - chain_size <- max(chain$id) + outbreak_size <- sum(chain$infected == "infected") max_iter <- max_iter + 1L - if (max_iter >= 1e4) { + if (max_iter >= 1e3) { stop( "Exceeded maximum number of iterations for simulating outbreak. \n", "Change input parameters or min_outbreak_size.", @@ -138,9 +136,9 @@ NULL } # add confirmed, probable, suspected case types - chain$case_type <- sample( + chain$case_type[chain$infected == "infected"] <- sample( x = names(case_type_probs), - size = nrow(chain), + size = sum(chain$infected == "infected"), replace = TRUE, prob = case_type_probs ) @@ -161,3 +159,73 @@ NULL cols = linelist_cols ) } + +#' Sample names using [randomNames::randomNames()] +#' +#' @description +#' Sample names for specified genders by sampling with replacement to avoid +#' exhausting number of name when `sample.with.replacement = FALSE`. The +#' duplicated names during sampling need to be removed to ensure each +#' individual has a unique name. In order to have enough unique names, more +#' names than required are sampled from [randomNames()], and the level of +#' oversampling is determined by the `buffer_factor` argument. A +#' `buffer_factor` too high and the more names are sampled which takes longer, +#' a `buffer_factor` too low and not enough unique names are sampled and +#' the `.sample_names()` function will need to loop until it has enough +#' unique names. +#' +#' @inheritParams .add_date +#' @param buffer_factor A single `numeric` determining the level of +#' oversampling (or buffer) when creating a vector of unique names from +#' [randomNames()]. +#' +#' @return A `character` vector. +#' @keywords internal +.sample_names <- function(.data, + buffer_factor = 1.5) { + m_idx <- .data$gender == "m" + f_idx <- .data$gender == "f" + num_m <- sum(m_idx) + num_f <- sum(f_idx) + num_sample_m <- ceiling(num_m * buffer_factor) + num_sample_f <- ceiling(num_f * buffer_factor) + + # create sample of names so there are no duplicates + names_m <- character(0) + while (length(names_m) < num_m) { + names_m <- unique( + randomNames::randomNames( + which.names = "both", + name.sep = " ", + name.order = "first.last", + gender = rep("M", num_sample_m), + sample.with.replacement = TRUE + ) + ) + } + + names_f <- character(0) + while (length(names_f) < num_f) { + names_f <- unique( + randomNames::randomNames( + which.names = "both", + name.sep = " ", + name.order = "first.last", + gender = rep("F", num_sample_f), + sample.with.replacement = TRUE + ) + ) + } + + # subset to use required number of names + names_m <- names_m[1:num_m] + names_f <- names_f[1:num_f] + + # order names with gender codes from .data + names_mf <- vector(mode = "character", length = nrow(.data)) + names_mf[m_idx] <- names_m + names_mf[f_idx] <- names_f + + # return vector of names + names_mf +} diff --git a/README.Rmd b/README.Rmd index 0337402a..d416edc0 100644 --- a/README.Rmd +++ b/README.Rmd @@ -50,13 +50,21 @@ library(simulist) library(epiparameter) ``` -The line list simulation requires that we define a serial interval, onset-to-hospitalisation delay, and onset-to-death delay. We can load these from the library of epidemiological parameters in the `{epiparameter}` R package if available, or if these are not in the database yet (such as the serial interval for COVID-19) we can define them ourselves. +The line list simulation requires that we define a contact distribution, contact interval, onset-to-hospitalisation delay, and onset-to-death delay. We can load these from the library of epidemiological parameters in the `{epiparameter}` R package if available, or if these are not in the database yet (such as the contact interval for COVID-19) we can define them ourselves. ```{r create-epidists} -# create COVID-19 serial interval -serial_interval <- epiparameter::epidist( +# create COVID-19 contact distribution +contact_distribution <- epiparameter::epidist( + disease = "COVID-19", + epi_dist = "contact distribution", + prob_distribution = "pois", + prob_distribution_params = c(mean = 2) +) + +# create COVID-19 contact interval +contact_interval <- epiparameter::epidist( disease = "COVID-19", - epi_dist = "serial interval", + epi_dist = "contact interval", prob_distribution = "gamma", prob_distribution_params = c(shape = 1, scale = 1) ) @@ -76,12 +84,14 @@ onset_to_death <- epiparameter::epidist_db( ) ``` -To simulate a line list for COVID-19 with an assumed reproduction number (`R`) of 1.1 we use the `sim_linelist()` function. Using a reproduction number greater than one means we will likely get a reasonably sized outbreak (10 - 1000 cases, varying due to the stochastic simulation). _Do not set the reproduction number too high (e.g. >5) as the outbreak can become extremely large_. +To simulate a line list for COVID-19 with an Poisson contact distribution with a mean number of contacts of 2 and a probability of infection per contact of 0.5, we use the `sim_linelist()` function. The mean number of contacts and probability of infection determine the outbreak reproduction number, if the resulting reproduction number is around one it means we will likely get a reasonably sized outbreak (10 - 1,000 cases, varying due to the stochastic simulation). _Take care when setting the mean number of contacts and the probability of infection, as this can lead to the outbreak becoming extremely large_. ```{r sim-linelist} +set.seed(1) linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death ) @@ -92,8 +102,9 @@ In this example, the line list is simulated using the default values (see `?sim_ ```{r sim-linelist-diff-args} linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, hosp_risk = 0.01, @@ -102,20 +113,13 @@ linelist <- sim_linelist( head(linelist) ``` -To simulate a table of contacts of cases (i.e. to reflect a contact tracing dataset) we can use the same serial interval defined for the example above. We additionally need a contact distribution, which represents the probability that each person that infected an individual, also had a given number of contacts that did not become infected. +To simulate a table of contacts of cases (i.e. to reflect a contact tracing dataset) we can use the same parameters defined for the example above. ```{r, sim-contacts} -contact_distribution <- epiparameter::epidist( - disease = "COVID-19", - epi_dist = "contact_distribution", - prob_distribution = "pois", - prob_distribution_params = c(l = 5) -) - contacts <- sim_contacts( - R = 1.1, - serial_interval = serial_interval, - contact_distribution = contact_distribution + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5 ) head(contacts) ``` @@ -124,11 +128,11 @@ If both the line list and contacts table are required, they can be jointly simul ```{r, sim-outbreak} outbreak <- sim_outbreak( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, - onset_to_death = onset_to_death, - contact_distribution = contact_distribution + onset_to_death = onset_to_death ) head(outbreak$linelist) head(outbreak$contacts) diff --git a/README.md b/README.md index 9b7d579a..6bfe6bbe 100644 --- a/README.md +++ b/README.md @@ -50,18 +50,27 @@ library(simulist) library(epiparameter) ``` -The line list simulation requires that we define a serial interval, -onset-to-hospitalisation delay, and onset-to-death delay. We can load -these from the library of epidemiological parameters in the -`{epiparameter}` R package if available, or if these are not in the -database yet (such as the serial interval for COVID-19) we can define -them ourselves. +The line list simulation requires that we define a contact distribution, +contact interval, onset-to-hospitalisation delay, and onset-to-death +delay. We can load these from the library of epidemiological parameters +in the `{epiparameter}` R package if available, or if these are not in +the database yet (such as the contact interval for COVID-19) we can +define them ourselves. ``` r -# create COVID-19 serial interval -serial_interval <- epiparameter::epidist( +# create COVID-19 contact distribution +contact_distribution <- epiparameter::epidist( + disease = "COVID-19", + epi_dist = "contact distribution", + prob_distribution = "pois", + prob_distribution_params = c(mean = 2) +) +#> Citation cannot be created as author, year, journal or title is missing + +# create COVID-19 contact interval +contact_interval <- epiparameter::epidist( disease = "COVID-19", - epi_dist = "serial interval", + epi_dist = "contact interval", prob_distribution = "gamma", prob_distribution_params = c(shape = 1, scale = 1) ) @@ -96,35 +105,40 @@ onset_to_death <- epiparameter::epidist_db( #> To retrieve the short citation use the 'get_citation' function ``` -To simulate a line list for COVID-19 with an assumed reproduction number -(`R`) of 1.1 we use the `sim_linelist()` function. Using a reproduction -number greater than one means we will likely get a reasonably sized -outbreak (10 - 1000 cases, varying due to the stochastic simulation). -*Do not set the reproduction number too high (e.g. \>5) as the outbreak -can become extremely large*. +To simulate a line list for COVID-19 with an Poisson contact +distribution with a mean number of contacts of 2 and a probability of +infection per contact of 0.5, we use the `sim_linelist()` function. The +mean number of contacts and probability of infection determine the +outbreak reproduction number, if the resulting reproduction number is +around one it means we will likely get a reasonably sized outbreak (10 - +1,000 cases, varying due to the stochastic simulation). *Take care when +setting the mean number of contacts and the probability of infection, as +this can lead to the outbreak becoming extremely large*. ``` r +set.seed(1) linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death ) head(linelist) -#> id case_name case_type gender age date_onset date_admission -#> 1 1 Paul Reynolds confirmed m 5 2023-01-01 -#> 2 2 Kayla Martin suspected f 6 2023-01-01 -#> 3 3 Christian Oberai confirmed m 59 2023-01-02 -#> 4 4 Alexander Anderson confirmed m 90 2023-01-01 -#> 5 5 Matthew Bodine probable m 38 2023-01-02 -#> 6 6 Randa al-Mansur probable f 5 2023-01-02 2023-01-11 +#> id case_name case_type gender age date_onset date_admission +#> 1 1 Sabeeha el-Hannan probable f 28 2023-01-01 +#> 2 2 Jaedyn Robbins confirmed f 62 2023-01-02 2023-01-02 +#> 5 5 Young Vu confirmed m 42 2023-01-01 +#> 6 6 Alyssa Gloyd probable f 60 2023-01-01 +#> 8 8 Sebastian Boden probable m 28 2023-01-02 2023-01-03 +#> 9 9 Sierra Hernandez suspected f 78 2023-01-01 #> date_death date_first_contact date_last_contact ct_value -#> 1 25.4 -#> 2 2022-12-31 2023-01-02 NA -#> 3 2023-01-01 2023-01-03 25.4 -#> 4 2022-12-29 2023-01-05 25.4 -#> 5 2022-12-29 2023-01-04 NA -#> 6 2023-01-04 2023-01-06 NA +#> 1 NA +#> 2 2023-01-02 2023-01-05 25.1 +#> 5 2023-01-03 2023-01-04 25.1 +#> 6 2023-01-02 2023-01-04 NA +#> 8 2023-01-01 2023-01-03 NA +#> 9 2022-12-29 2023-01-04 NA ``` In this example, the line list is simulated using the default values @@ -135,65 +149,56 @@ modify either of these, we can specify them in the function. ``` r linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, hosp_risk = 0.01, outbreak_start_date = as.Date("2019-12-01") ) head(linelist) -#> id case_name case_type gender age date_onset date_admission date_death -#> 1 1 Sabrina Jones confirmed f 90 2019-12-01 -#> 2 2 Ariel Aragon probable f 81 2019-12-01 -#> 3 3 Shandon Roberts confirmed m 44 2019-12-03 -#> 4 4 Nathan Martinez confirmed m 43 2019-12-01 -#> 5 5 Maria Downs suspected f 62 2019-12-03 -#> 6 6 Andre Turner suspected m 49 2019-12-01 -#> date_first_contact date_last_contact ct_value -#> 1 24.5 -#> 2 2019-12-03 2019-12-06 NA -#> 3 2019-12-02 2019-12-04 24.5 -#> 4 2019-12-01 2019-12-04 24.5 -#> 5 2019-12-06 2019-12-08 NA -#> 6 2019-12-02 2019-12-05 NA +#> id case_name case_type gender age date_onset date_admission +#> 1 1 Aaren-Matthew Deguzman probable m 65 2019-12-01 +#> 2 2 Thaamira el-Yusuf confirmed f 61 2019-12-01 +#> 3 3 Hector Hickman suspected m 56 2019-12-01 +#> 4 4 Zuhriyaa al-Saleem probable f 36 2019-12-01 +#> 5 5 Chisa Xiong confirmed f 20 2019-12-01 +#> 6 6 Tre-Shawn Williams confirmed m 85 2019-12-02 2019-12-06 +#> date_death date_first_contact date_last_contact ct_value +#> 1 NA +#> 2 2019-11-26 2019-12-06 25.3 +#> 3 2019-11-28 2019-12-03 NA +#> 4 2019-11-28 2019-12-02 NA +#> 5 2019-11-28 2019-12-02 25.3 +#> 6 2019-12-01 2019-12-04 25.3 ``` To simulate a table of contacts of cases (i.e. to reflect a contact -tracing dataset) we can use the same serial interval defined for the -example above. We additionally need a contact distribution, which -represents the probability that each person that infected an individual, -also had a given number of contacts that did not become infected. +tracing dataset) we can use the same parameters defined for the example +above. ``` r -contact_distribution <- epiparameter::epidist( - disease = "COVID-19", - epi_dist = "contact_distribution", - prob_distribution = "pois", - prob_distribution_params = c(l = 5) -) -#> Citation cannot be created as author, year, journal or title is missing - contacts <- sim_contacts( - R = 1.1, - serial_interval = serial_interval, - contact_distribution = contact_distribution + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5 ) head(contacts) -#> from to cnt_age cnt_gender date_first_contact -#> 1 Waseema el-Huda Haasini Dinh 62 f 2022-12-30 -#> 2 Waseema el-Huda Jahlisha Eckhart 6 f 2023-01-02 -#> 3 Waseema el-Huda Sofya Brabson 1 f 2022-12-27 -#> 4 Waseema el-Huda Eoin Acosta 67 m 2022-12-30 -#> 5 Waseema el-Huda Mandeep Cha 37 m 2023-01-03 -#> 6 Waseema el-Huda Brianna Dock 21 f 2023-01-03 +#> from to age gender date_first_contact +#> 2 James Padilla Miranda Blanco 69 f 2022-12-31 +#> 3 James Padilla Allan Bunge 83 m 2023-01-05 +#> 4 Allan Bunge Nicholas Rabia 18 m 2023-01-02 +#> 5 Allan Bunge Unais al-Shabazz 85 m 2023-01-04 +#> 6 Nicholas Rabia Apiluck Chong 84 m 2022-12-30 +#> 7 Nicholas Rabia Rachel Tyler 90 f 2023-01-02 #> date_last_contact was_case status -#> 1 2023-01-04 Y case -#> 2 2023-01-04 Y case -#> 3 2023-01-01 N under_followup -#> 4 2023-01-03 N under_followup -#> 5 2023-01-08 N under_followup -#> 6 2023-01-05 N unknown +#> 2 2023-01-03 N unknown +#> 3 2023-01-05 Y case +#> 4 2023-01-05 Y case +#> 5 2023-01-06 Y case +#> 6 2023-01-02 N under_followup +#> 7 2023-01-04 Y case ``` If both the line list and contacts table are required, they can be @@ -204,42 +209,42 @@ the same default settings as the other functions). ``` r outbreak <- sim_outbreak( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, - onset_to_death = onset_to_death, - contact_distribution = contact_distribution + onset_to_death = onset_to_death ) head(outbreak$linelist) -#> id case_name case_type gender age date_onset date_admission -#> 1 1 Uqbah al-Sultana confirmed m 41 2023-01-01 -#> 2 2 Ulises Mora-Rojas probable m 41 2023-01-01 -#> 3 3 Katherine Galambos probable f 41 2023-01-02 2023-01-03 -#> 4 4 Annie Doak probable f 63 2023-01-01 -#> 5 5 Erika Yin confirmed f 89 2023-01-02 2023-01-02 -#> 6 6 Casey Borcherding confirmed f 47 2023-01-01 -#> date_death date_first_contact date_last_contact ct_value -#> 1 26 -#> 2 2022-12-31 2023-01-04 NA -#> 3 2023-01-01 2023-01-02 NA -#> 4 2022-12-27 2023-01-02 NA -#> 5 2022-12-31 2023-01-05 26 -#> 6 2023-01-04 2023-01-05 26 +#> id case_name case_type gender age date_onset date_admission +#> 1 1 Katherin Trancoso confirmed f 2 2023-01-01 +#> 4 4 E-Shaw Allen confirmed f 47 2023-01-01 +#> 5 5 Madison Moltrer suspected f 8 2023-01-01 +#> 8 8 Christopher Richter confirmed m 18 2023-01-02 2023-01-08 +#> 11 11 Elias Mckenzie confirmed m 4 2023-01-01 +#> 12 12 Elaine Nguyen confirmed f 60 2023-01-02 +#> date_death date_first_contact date_last_contact ct_value +#> 1 24.5 +#> 4 2023-01-02 2023-01-06 24.5 +#> 5 2022-12-31 2023-01-03 NA +#> 8 2023-01-02 2023-01-03 24.5 +#> 11 2023-01-04 2023-01-06 24.5 +#> 12 2023-01-07 2023-01-09 24.5 head(outbreak$contacts) -#> from to cnt_age cnt_gender date_first_contact -#> 1 Uqbah al-Sultana Ulises Mora-Rojas 41 m 2022-12-31 -#> 2 Uqbah al-Sultana Katherine Galambos 41 f 2023-01-01 -#> 3 Uqbah al-Sultana Alexander Foster 2 m 2023-01-06 -#> 4 Uqbah al-Sultana Daniel Ordaz-Lopez 2 m 2023-01-03 -#> 5 Uqbah al-Sultana Intisaar al-Saah 74 f 2023-01-02 -#> 6 Uqbah al-Sultana Alondra Juarez 23 f 2022-12-31 -#> date_last_contact was_case status -#> 1 2023-01-04 Y case -#> 2 2023-01-02 Y case -#> 3 2023-01-08 N under_followup -#> 4 2023-01-03 N under_followup -#> 5 2023-01-04 N under_followup -#> 6 2023-01-07 N under_followup +#> from to age gender date_first_contact +#> 2 Katherin Trancoso David Mcafee 46 m 2023-01-02 +#> 3 Katherin Trancoso Firdaus al-Hamidi 68 f 2022-12-30 +#> 4 Katherin Trancoso E-Shaw Allen 47 f 2023-01-02 +#> 5 Katherin Trancoso Madison Moltrer 8 f 2022-12-31 +#> 6 E-Shaw Allen Tyko Pahang 24 m 2023-01-05 +#> 7 Madison Moltrer Bryan Rodriguez 67 m 2022-12-31 +#> date_last_contact was_case status +#> 2 2023-01-02 N lost_to_followup +#> 3 2023-01-02 N under_followup +#> 4 2023-01-06 Y case +#> 5 2023-01-03 Y case +#> 6 2023-01-05 N lost_to_followup +#> 7 2023-01-02 N under_followup ``` ## Help diff --git a/inst/WORDLIST b/inst/WORDLIST index 6016c8e5..5e8a9579 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,6 +1,5 @@ apyramid bookdown -bpmodels CMD codecov Codecov diff --git a/man/create_config.Rd b/man/create_config.Rd index 6a1903eb..ae79b539 100644 --- a/man/create_config.Rd +++ b/man/create_config.Rd @@ -31,6 +31,7 @@ Accepted arguments and their defaults are: \item \code{first_contact_distribution_params = c(lambda = 3)} \item \code{ct_distribution = "norm"} \item \code{ct_distribution_params = c(mean = 25, sd = 2)} +\item \code{network = "adjusted"} } These parameters do not warrant their own arguments in @@ -38,6 +39,13 @@ These parameters do not warrant their own arguments in setting. Therefore it is not worth increasing the number of \code{\link[=sim_linelist]{sim_linelist()}} arguments to accommodate these and the \code{config} argument keeps the function signature simpler and more readable. + +The \code{network} option controls whether to sample contacts from a adjusted or +unadjusted contact distribution. Adjusted (default) sampling uses +\eqn{q(n) \sim (n + 1)p(n + 1)} where \eqn{p(n)} is the probability +density function of a distribution, e.g., Poisson or Negative binomial. +Unadjusted (\code{network = "unadjusted"}) instead samples contacts directly from +a probability distribution \eqn{p(n)}. } \examples{ # example with default configuration diff --git a/man/dot-check_sim_input.Rd b/man/dot-check_sim_input.Rd index cdc3889b..525ac581 100644 --- a/man/dot-check_sim_input.Rd +++ b/man/dot-check_sim_input.Rd @@ -6,13 +6,13 @@ \usage{ .check_sim_input( sim_type = c("linelist", "contacts", "outbreak"), - R, - serial_interval, + contact_distribution, + contact_interval, + prob_infect, outbreak_start_date, min_outbreak_size, onset_to_hosp = NULL, onset_to_death = NULL, - contact_distribution = NULL, add_names = NULL, add_ct = NULL, case_type_probs = NULL, @@ -27,10 +27,19 @@ \item{sim_type}{A \code{character} string specifying which simulation function this function is being called within.} -\item{R}{A single \code{numeric} for the reproduction number.} +\item{contact_distribution}{An \verb{} object or anonymous function for +the contact distribution. This is any discrete density function that +produces non-negative integers (including zero, \eqn{\mathbb{N}_0}) for the +number of contacts per infection.} + +\item{contact_interval}{An \verb{} object or anonymous function for +the contact interval. This is analogous to the serial interval or generation +time, but defines the time interval between an individual being +infected/infectious (in the simulation the latency period is assumed to be +zero) and having contact with another susceptible individual.} -\item{serial_interval}{An \verb{} object or anonymous function for -the serial interval.} +\item{prob_infect}{A single \code{numeric} for the probability of a secondary +contact being infected by an infected primary contact.} \item{outbreak_start_date}{A \code{date} for the start of the outbreak.} @@ -44,10 +53,6 @@ the onset to hospitalisation delay distribution.} \item{onset_to_death}{An \verb{} object or anonymous function for the onset to death delay distribution.} -\item{contact_distribution}{An \verb{} object or anonymous function for -the contact distribution. This is similar to the offspring distribution, -but describes the heterogeneity of contacts that are not infected.} - \item{add_names}{A \code{logical} boolean for whether to add names to each row of the line list. Default is \code{TRUE}.} diff --git a/man/dot-sample_names.Rd b/man/dot-sample_names.Rd new file mode 100644 index 00000000..ad064aa1 --- /dev/null +++ b/man/dot-sample_names.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sim_utils.R +\name{.sample_names} +\alias{.sample_names} +\title{Sample names using \code{\link[randomNames:randomNames]{randomNames::randomNames()}}} +\usage{ +.sample_names(.data, buffer_factor = 1.5) +} +\arguments{ +\item{.data}{A \verb{} containing the infectious history from a +branching process simulation.} + +\item{buffer_factor}{A single \code{numeric} determining the level of +oversampling (or buffer) when creating a vector of unique names from +\code{\link[=randomNames]{randomNames()}}.} +} +\value{ +A \code{character} vector. +} +\description{ +Sample names for specified genders by sampling with replacement to avoid +exhausting number of name when \code{sample.with.replacement = FALSE}. The +duplicated names during sampling need to be removed to ensure each +individual has a unique name. In order to have enough unique names, more +names than required are sampled from \code{\link[=randomNames]{randomNames()}}, and the level of +oversampling is determined by the \code{buffer_factor} argument. A +\code{buffer_factor} too high and the more names are sampled which takes longer, +a \code{buffer_factor} too low and not enough unique names are sampled and +the \code{.sample_names()} function will need to loop until it has enough +unique names. +} +\keyword{internal} diff --git a/man/dot-sim_contacts_tbl.Rd b/man/dot-sim_contacts_tbl.Rd index 1bdf9974..5968a010 100644 --- a/man/dot-sim_contacts_tbl.Rd +++ b/man/dot-sim_contacts_tbl.Rd @@ -4,40 +4,16 @@ \alias{.sim_contacts_tbl} \title{Create contacts table} \usage{ -.sim_contacts_tbl( - .data, - outbreak_start_date, - contact_distribution, - population_age, - contact_tracing_status_probs, - config -) +.sim_contacts_tbl(.data, contact_tracing_status_probs) } \arguments{ \item{.data}{A \verb{} containing the infectious history from a branching process simulation.} -\item{outbreak_start_date}{A \code{date} for the start of the outbreak.} - -\item{contact_distribution}{An \verb{} object or anonymous function for -the contact distribution. This is similar to the offspring distribution, -but describes the heterogeneity of contacts that are not infected.} - -\item{population_age}{Either a \code{numeric} vector with two elements or a -\verb{} with age structure in the population. Use a \code{numeric} vector -to specific the age range of the population, the first element is the lower -bound for the age range, and and the second is the upper bound for the age -range (both inclusive, i.e. [lower, upper]). The \verb{} with -age groups and the proportion of the population in that group. See details -and examples for more information.} - \item{contact_tracing_status_probs}{A named \code{numeric} vector with the probability of each contact tracing status. The names of the vector must be \code{"under_followup"}, \code{"lost_to_followup"}, \code{"unknown"}. Values of each contact tracing status must sum to one.} - -\item{config}{A list of settings to adjust the randomly sampled delays and -Ct values (if \code{add_ct = TRUE}). See \code{\link[=create_config]{create_config()}} for more information.} } \value{ A contacts \verb{} diff --git a/man/dot-sim_network_bp.Rd b/man/dot-sim_network_bp.Rd new file mode 100644 index 00000000..6719bdf3 --- /dev/null +++ b/man/dot-sim_network_bp.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sim_network_bp.R +\name{.sim_network_bp} +\alias{.sim_network_bp} +\title{Simulate a random network branching process model with a probability of +infection for each contact} +\usage{ +.sim_network_bp(contact_distribution, contact_interval, prob_infect, config) +} +\arguments{ +\item{contact_distribution}{An \verb{} object or anonymous function for +the contact distribution. This is any discrete density function that +produces non-negative integers (including zero, \eqn{\mathbb{N}_0}) for the +number of contacts per infection.} + +\item{contact_interval}{An \verb{} object or anonymous function for +the contact interval. This is analogous to the serial interval or generation +time, but defines the time interval between an individual being +infected/infectious (in the simulation the latency period is assumed to be +zero) and having contact with another susceptible individual.} + +\item{prob_infect}{A single \code{numeric} for the probability of a secondary +contact being infected by an infected primary contact.} + +\item{config}{A list of settings to adjust the randomly sampled delays and +Ct values (if \code{add_ct = TRUE}). See \code{\link[=create_config]{create_config()}} for more information.} +} +\value{ +A \verb{} with the contact and transmission chain data. +} +\description{ +Simulate a branching process on a infinite network where the contact +distribution provides a function to sample the number of contacts of each +individual in the simulation. Each contact is then infected with the +probability of infection. The time between each infection is determined +by the contact interval function. +} +\details{ +The contact distribution sampled takes the network effect +\eqn{q(n) \sim (n + 1)p(n + 1)} where \eqn{p(n)} is the probability +density function of a distribution, e.g., Poisson or Negative binomial. +That is to say, the probability of having choosing a contact at random by +following up a contact chooses individuals with a probability proportional +to their number of contacts. The plus one is because one of the contacts +was "used" to infect the person. +} +\keyword{internal} diff --git a/man/dot-sim_utils.Rd b/man/dot-sim_utils.Rd index decb0283..d38dbdd6 100644 --- a/man/dot-sim_utils.Rd +++ b/man/dot-sim_utils.Rd @@ -8,8 +8,9 @@ within \pkg{simulist}} \usage{ .sim_bp_linelist( - R, - serial_interval, + contact_distribution, + contact_interval, + prob_infect, outbreak_start_date, min_outbreak_size, population_age, @@ -31,10 +32,19 @@ within \pkg{simulist}} ) } \arguments{ -\item{R}{A single \code{numeric} for the reproduction number.} +\item{contact_distribution}{An \verb{} object or anonymous function for +the contact distribution. This is any discrete density function that +produces non-negative integers (including zero, \eqn{\mathbb{N}_0}) for the +number of contacts per infection.} -\item{serial_interval}{An \verb{} object or anonymous function for -the serial interval.} +\item{contact_interval}{An \verb{} object or anonymous function for +the contact interval. This is analogous to the serial interval or generation +time, but defines the time interval between an individual being +infected/infectious (in the simulation the latency period is assumed to be +zero) and having contact with another susceptible individual.} + +\item{prob_infect}{A single \code{numeric} for the probability of a secondary +contact being infected by an infected primary contact.} \item{outbreak_start_date}{A \code{date} for the start of the outbreak.} diff --git a/man/sim_contacts.Rd b/man/sim_contacts.Rd index 1fcdeb0f..a467e6b7 100644 --- a/man/sim_contacts.Rd +++ b/man/sim_contacts.Rd @@ -5,9 +5,9 @@ \title{Simulate contacts for an infectious disease outbreak} \usage{ sim_contacts( - R, - serial_interval, contact_distribution, + contact_interval, + prob_infect, outbreak_start_date = as.Date("2023-01-01"), min_outbreak_size = 10, population_age = c(1, 90), @@ -17,14 +17,19 @@ sim_contacts( ) } \arguments{ -\item{R}{A single \code{numeric} for the reproduction number.} +\item{contact_distribution}{An \verb{} object or anonymous function for +the contact distribution. This is any discrete density function that +produces non-negative integers (including zero, \eqn{\mathbb{N}_0}) for the +number of contacts per infection.} -\item{serial_interval}{An \verb{} object or anonymous function for -the serial interval.} +\item{contact_interval}{An \verb{} object or anonymous function for +the contact interval. This is analogous to the serial interval or generation +time, but defines the time interval between an individual being +infected/infectious (in the simulation the latency period is assumed to be +zero) and having contact with another susceptible individual.} -\item{contact_distribution}{An \verb{} object or anonymous function for -the contact distribution. This is similar to the offspring distribution, -but describes the heterogeneity of contacts that are not infected.} +\item{prob_infect}{A single \code{numeric} for the probability of a secondary +contact being infected by an infected primary contact.} \item{outbreak_start_date}{A \code{date} for the start of the outbreak.} @@ -56,24 +61,24 @@ Simulate contacts for an infectious disease outbreak } \examples{ # load data required to simulate contacts -serial_interval <- epiparameter::epidist( +contact_distribution <- epiparameter::epidist( disease = "COVID-19", - epi_dist = "serial interval", - prob_distribution = "gamma", - prob_distribution_params = c(shape = 1, scale = 1) + epi_dist = "contact distribution", + prob_distribution = "pois", + prob_distribution_params = c(mean = 2) ) -contact_distribution <- epiparameter::epidist( +contact_interval <- epiparameter::epidist( disease = "COVID-19", - epi_dist = "contact_distribution", - prob_distribution = "pois", - prob_distribution_params = c(l = 5) + epi_dist = "contact interval", + prob_distribution = "gamma", + prob_distribution_params = c(shape = 1, scale = 1) ) contacts <- sim_contacts( - R = 1.1, - serial_interval = serial_interval, - contact_distribution = contact_distribution + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5 ) } \author{ diff --git a/man/sim_linelist.Rd b/man/sim_linelist.Rd index 6e95aec5..b8ce42f2 100644 --- a/man/sim_linelist.Rd +++ b/man/sim_linelist.Rd @@ -5,8 +5,9 @@ \title{Simulate a line list} \usage{ sim_linelist( - R, - serial_interval, + contact_distribution, + contact_interval, + prob_infect, onset_to_hosp, onset_to_death, hosp_risk = 0.2, @@ -22,10 +23,19 @@ sim_linelist( ) } \arguments{ -\item{R}{A single \code{numeric} for the reproduction number.} +\item{contact_distribution}{An \verb{} object or anonymous function for +the contact distribution. This is any discrete density function that +produces non-negative integers (including zero, \eqn{\mathbb{N}_0}) for the +number of contacts per infection.} -\item{serial_interval}{An \verb{} object or anonymous function for -the serial interval.} +\item{contact_interval}{An \verb{} object or anonymous function for +the contact interval. This is analogous to the serial interval or generation +time, but defines the time interval between an individual being +infected/infectious (in the simulation the latency period is assumed to be +zero) and having contact with another susceptible individual.} + +\item{prob_infect}{A single \code{numeric} for the probability of a secondary +contact being infected by an infected primary contact.} \item{onset_to_hosp}{An \verb{} object or anonymous function for the onset to hospitalisation delay distribution.} @@ -108,9 +118,16 @@ that age group. Proportions must sum to one. } \examples{ # load data required to simulate line list -serial_interval <- epiparameter::epidist( +contact_distribution <- epiparameter::epidist( + disease = "COVID-19", + epi_dist = "contact distribution", + prob_distribution = "pois", + prob_distribution_params = c(mean = 2) +) + +contact_interval <- epiparameter::epidist( disease = "COVID-19", - epi_dist = "serial interval", + epi_dist = "contact interval", prob_distribution = "gamma", prob_distribution_params = c(shape = 1, scale = 1) ) @@ -130,8 +147,9 @@ onset_to_death <- epiparameter::epidist_db( ) # example with single hospitalisation risk for entire population linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, hosp_risk = 0.5 @@ -147,8 +165,9 @@ age_dep_hosp_risk <- data.frame( risk = c(0.1, 0.05, 0.2) ) linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, hosp_risk = age_dep_hosp_risk diff --git a/man/sim_outbreak.Rd b/man/sim_outbreak.Rd index 7ad98942..f238571a 100644 --- a/man/sim_outbreak.Rd +++ b/man/sim_outbreak.Rd @@ -5,11 +5,11 @@ \title{Simulate a line list and a contacts table} \usage{ sim_outbreak( - R, - serial_interval, + contact_distribution, + contact_interval, + prob_infect, onset_to_hosp, onset_to_death, - contact_distribution, hosp_risk = 0.2, hosp_death_risk = 0.5, non_hosp_death_risk = 0.05, @@ -25,10 +25,19 @@ sim_outbreak( ) } \arguments{ -\item{R}{A single \code{numeric} for the reproduction number.} +\item{contact_distribution}{An \verb{} object or anonymous function for +the contact distribution. This is any discrete density function that +produces non-negative integers (including zero, \eqn{\mathbb{N}_0}) for the +number of contacts per infection.} + +\item{contact_interval}{An \verb{} object or anonymous function for +the contact interval. This is analogous to the serial interval or generation +time, but defines the time interval between an individual being +infected/infectious (in the simulation the latency period is assumed to be +zero) and having contact with another susceptible individual.} -\item{serial_interval}{An \verb{} object or anonymous function for -the serial interval.} +\item{prob_infect}{A single \code{numeric} for the probability of a secondary +contact being infected by an infected primary contact.} \item{onset_to_hosp}{An \verb{} object or anonymous function for the onset to hospitalisation delay distribution.} @@ -36,10 +45,6 @@ the onset to hospitalisation delay distribution.} \item{onset_to_death}{An \verb{} object or anonymous function for the onset to death delay distribution.} -\item{contact_distribution}{An \verb{} object or anonymous function for -the contact distribution. This is similar to the offspring distribution, -but describes the heterogeneity of contacts that are not infected.} - \item{hosp_risk}{Either a single \code{numeric} for the hospitalisation risk of everyone in the population, or a \verb{} with age specific hospitalisation risks Default is 20\% hospitalisation (\code{0.2}) for the entire @@ -125,7 +130,14 @@ that age group. Proportions must sum to one. } \examples{ # load data required to simulate outbreak data -serial_interval <- epiparameter::epidist( +contact_distribution <- epiparameter::epidist( + disease = "COVID-19", + epi_dist = "contact distribution", + prob_distribution = "pois", + prob_distribution_params = c(mean = 2) +) + +contact_interval <- epiparameter::epidist( disease = "COVID-19", epi_dist = "serial interval", prob_distribution = "gamma", @@ -146,19 +158,12 @@ onset_to_death <- epiparameter::epidist_db( single_epidist = TRUE ) -contact_distribution <- epiparameter::epidist( - disease = "COVID-19", - epi_dist = "contact_distribution", - prob_distribution = "pois", - prob_distribution_params = c(l = 5) -) - outbreak <- sim_outbreak( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, - onset_to_death = onset_to_death, - contact_distribution = contact_distribution + onset_to_death = onset_to_death ) } \author{ diff --git a/tests/testthat/_snaps/sim_network_bp.md b/tests/testthat/_snaps/sim_network_bp.md new file mode 100644 index 00000000..89a5fc03 --- /dev/null +++ b/tests/testthat/_snaps/sim_network_bp.md @@ -0,0 +1,45 @@ +# .sim_network_bp works as expected + + Code + .sim_network_bp(contact_distribution = contact_distribution, contact_interval = contact_interval, + prob_infect = 0.5, config = create_config()) + Output + id ancestor generation infected time + 1 1 NA 1 infected 0.00000000 + 2 2 1 2 contact 1.88240160 + 3 3 1 2 infected 1.80451250 + 4 4 3 3 infected 0.05896314 + 5 5 3 3 contact 1.15835525 + 6 6 3 3 contact 0.99001994 + 7 7 4 4 infected 2.14036129 + 8 8 7 5 contact 0.46988251 + 9 9 7 5 contact 0.69425308 + 10 10 7 5 contact 0.06819735 + +# .sim_network_bp works as expected with no contacts + + Code + .sim_network_bp(contact_distribution = contact_distribution, contact_interval = contact_interval, + prob_infect = 0.5, config = create_config()) + Output + id ancestor generation infected time + 1 1 NA 1 infected 0 + +# .sim_network_bp works as expected with unadjusted network + + Code + .sim_network_bp(contact_distribution = contact_distribution, contact_interval = contact_interval, + prob_infect = 0.5, config = create_config(network = "unadjusted")) + Output + id ancestor generation infected time + 1 1 NA 1 infected 0.00000000 + 2 2 1 2 contact 1.88240160 + 3 3 1 2 infected 1.80451250 + 4 4 3 3 infected 0.05896314 + 5 5 3 3 contact 1.15835525 + 6 6 3 3 contact 0.99001994 + 7 7 4 4 infected 2.14036129 + 8 8 7 5 contact 0.46988251 + 9 9 7 5 contact 0.69425308 + 10 10 7 5 contact 0.06819735 + diff --git a/tests/testthat/test-checkers.R b/tests/testthat/test-checkers.R index 9506b6be..9d9e1e83 100644 --- a/tests/testthat/test-checkers.R +++ b/tests/testthat/test-checkers.R @@ -135,9 +135,16 @@ test_that(".check_age_df fails as expected", { }) suppressMessages({ - serial_interval <- as.function(epiparameter::epidist( + contact_distribution <- as.function(epiparameter::epidist( + disease = "COVID-19", + epi_dist = "contact distribution", + prob_distribution = "pois", + prob_distribution_params = c(mean = 2) + )) + + contact_interval <- as.function(epiparameter::epidist( disease = "COVID-19", - epi_dist = "serial interval", + epi_dist = "contact interval", prob_distribution = "gamma", prob_distribution_params = c(shape = 1, scale = 1) )) @@ -155,25 +162,18 @@ suppressMessages({ epi_dist = "onset to death", single_epidist = TRUE )) - - contact_distribution <- as.function(epiparameter::epidist( - disease = "COVID-19", - epi_dist = "contact_distribution", - prob_distribution = "pois", - prob_distribution_params = c(l = 5) - )) }) test_that(".check_sim_input works as expected", { chk <- .check_sim_input( sim_type = "outbreak", - R = 1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, outbreak_start_date = as.Date("2023-01-01"), min_outbreak_size = 10, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, - contact_distribution = contact_distribution, add_names = TRUE, add_ct = FALSE, case_type_probs = c( @@ -196,8 +196,9 @@ test_that(".check_sim_input works as expected", { chk <- .check_sim_input( sim_type = "linelist", - R = 1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, outbreak_start_date = as.Date("2023-01-01"), min_outbreak_size = 10, onset_to_hosp = onset_to_hosp, @@ -219,11 +220,11 @@ test_that(".check_sim_input works as expected", { chk <- .check_sim_input( sim_type = "contacts", - R = 1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, outbreak_start_date = as.Date("2023-01-01"), min_outbreak_size = 10, - contact_distribution = contact_distribution, contact_tracing_status_probs = c( under_followup = 0.7, lost_to_followup = 0.2, @@ -241,18 +242,19 @@ test_that(".check_sim_input fails as expected", { regexp = "(arg)*(should be one of)*(linelist)*(contacts)*(outbreak)" ) expect_error( - .check_sim_input(sim_type = "outbreak", R = -1), - regexp = "(Assertion on)*(R)*(failed)" + .check_sim_input(sim_type = "outbreak", contact_distribution = list(), prob_infect = 0.5), + regexp = "(Assertion on)*(contact_distribution)*(failed)" ) expect_error( - .check_sim_input(sim_type = "outbreak", R = 1, serial_interval = list()), - regexp = "(Assertion on)*(serial_interval)*(failed)" + .check_sim_input(sim_type = "outbreak", contact_distribution = contact_distribution, contact_interval = list(), prob_infect = 0.5), + regexp = "(Assertion on)*(contact_interval)*(failed)" ) expect_error( .check_sim_input( sim_type = "outbreak", - R = 1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, outbreak_start_date = "01-01-2023" ), regexp = "(Assertion on)*(outbreak_start_date)*(failed)" @@ -260,8 +262,9 @@ test_that(".check_sim_input fails as expected", { expect_error( .check_sim_input( sim_type = "outbreak", - R = 1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, outbreak_start_date = as.Date("2023-01-01"), min_outbreak_size = "10" ), diff --git a/tests/testthat/test-create_config.R b/tests/testthat/test-create_config.R index 1163b447..6d82b971 100644 --- a/tests/testthat/test-create_config.R +++ b/tests/testthat/test-create_config.R @@ -1,13 +1,13 @@ test_that("create_config works as expected with defaults", { config <- create_config() expect_type(config, type = "list") - expect_length(config, 6) + expect_length(config, 7) expect_named( config, c( "last_contact_distribution", "last_contact_distribution_params", "first_contact_distribution", "first_contact_distribution_params", - "ct_distribution", "ct_distribution_params" + "ct_distribution", "ct_distribution_params", "network" ) ) }) @@ -15,13 +15,13 @@ test_that("create_config works as expected with defaults", { test_that("create_config works as expected modifying element", { config <- create_config(last_contact_distribution = "geom") expect_type(config, type = "list") - expect_length(config, 6) + expect_length(config, 7) expect_named( config, c( "last_contact_distribution", "last_contact_distribution_params", "first_contact_distribution", "first_contact_distribution_params", - "ct_distribution", "ct_distribution_params" + "ct_distribution", "ct_distribution_params", "network" ) ) expect_identical(config$last_contact_distribution, "geom") @@ -35,13 +35,13 @@ test_that("create_config works as expected with spliced list", { ) ) expect_type(config, type = "list") - expect_length(config, 6) + expect_length(config, 7) expect_named( config, c( "last_contact_distribution", "last_contact_distribution_params", "first_contact_distribution", "first_contact_distribution_params", - "ct_distribution", "ct_distribution_params" + "ct_distribution", "ct_distribution_params", "network" ) ) expect_identical(config$ct_distribution, "lnorm") @@ -55,13 +55,13 @@ test_that("create_config works as expected with spliced list", { ) ) expect_type(config, type = "list") - expect_length(config, 6) + expect_length(config, 7) expect_named( config, c( "last_contact_distribution", "last_contact_distribution_params", "first_contact_distribution", "first_contact_distribution_params", - "ct_distribution", "ct_distribution_params" + "ct_distribution", "ct_distribution_params", "network" ) ) expect_identical(config$last_contact_distribution, "geom") diff --git a/tests/testthat/test-sim_contacts.R b/tests/testthat/test-sim_contacts.R index 861d1ff1..ebc07c83 100644 --- a/tests/testthat/test-sim_contacts.R +++ b/tests/testthat/test-sim_contacts.R @@ -1,33 +1,33 @@ suppressMessages({ - serial_interval <- epiparameter::epidist( + contact_distribution <- epiparameter::epidist( disease = "COVID-19", - epi_dist = "serial interval", - prob_distribution = "gamma", - prob_distribution_params = c(shape = 1, scale = 1) + epi_dist = "contact distribution", + prob_distribution = "pois", + prob_distribution_params = c(mean = 2) ) - contact_distribution <- epiparameter::epidist( + contact_interval <- epiparameter::epidist( disease = "COVID-19", - epi_dist = "contact_distribution", - prob_distribution = "pois", - prob_distribution_params = c(l = 5) + epi_dist = "contact interval", + prob_distribution = "gamma", + prob_distribution_params = c(shape = 1, scale = 1) ) }) test_that("sim_contacts works as expected", { set.seed(1) contacts <- sim_contacts( - R = 1.1, - serial_interval = serial_interval, - contact_distribution = contact_distribution + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5 ) expect_s3_class(contacts, class = "data.frame") - expect_identical(dim(contacts), c(170L, 8L)) + expect_identical(dim(contacts), c(35L, 8L)) expect_identical( colnames(contacts), c( - "from", "to", "cnt_age", "cnt_gender", "date_first_contact", + "from", "to", "age", "gender", "date_first_contact", "date_last_contact", "was_case", "status" ) ) @@ -36,9 +36,9 @@ test_that("sim_contacts works as expected", { test_that("sim_contacts works as expected with modified config", { set.seed(1) contacts <- sim_contacts( - R = 1.1, - serial_interval = serial_interval, contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, config = create_config( last_contact_distribution = "geom", last_contact_distribution_params = c(prob = 0.5) @@ -46,11 +46,11 @@ test_that("sim_contacts works as expected with modified config", { ) expect_s3_class(contacts, class = "data.frame") - expect_identical(dim(contacts), c(178L, 8L)) + expect_identical(dim(contacts), c(35L, 8L)) expect_identical( colnames(contacts), c( - "from", "to", "cnt_age", "cnt_gender", "date_first_contact", + "from", "to", "age", "gender", "date_first_contact", "date_last_contact", "was_case", "status" ) ) @@ -59,20 +59,20 @@ test_that("sim_contacts works as expected with modified config", { test_that("sim_contacts works as expected with modified config parameters", { set.seed(1) contacts <- sim_contacts( - R = 1.1, - serial_interval = serial_interval, contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, config = create_config( last_contact_distribution_params = c(lambda = 5) ) ) expect_s3_class(contacts, class = "data.frame") - expect_identical(dim(contacts), c(170L, 8L)) # TODO check why nrow is diff + expect_identical(dim(contacts), c(35L, 8L)) expect_identical( colnames(contacts), c( - "from", "to", "cnt_age", "cnt_gender", "date_first_contact", + "from", "to", "age", "gender", "date_first_contact", "date_last_contact", "was_case", "status" ) ) @@ -81,9 +81,9 @@ test_that("sim_contacts works as expected with modified config parameters", { test_that("sim_contacts fails as expected with modified config", { expect_error( sim_contacts( - R = 1.1, - serial_interval = serial_interval, contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, config = create_config( last_contact_distribution = "geom" ) @@ -95,11 +95,11 @@ test_that("sim_contacts fails as expected with modified config", { test_that("sim_contacts fails as expected with empty config", { expect_error( sim_contacts( - R = 1.1, - serial_interval = serial_interval, contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, config = list() ), - regexp = "Distribution parameters are missing, check config" + regexp = "Network incorrectly specified, check config" ) }) diff --git a/tests/testthat/test-sim_linelist.R b/tests/testthat/test-sim_linelist.R index 9b5c6919..03498286 100644 --- a/tests/testthat/test-sim_linelist.R +++ b/tests/testthat/test-sim_linelist.R @@ -1,7 +1,14 @@ suppressMessages({ - serial_interval <- epiparameter::epidist( + contact_distribution <- epiparameter::epidist( disease = "COVID-19", - epi_dist = "serial interval", + epi_dist = "contact distribution", + prob_distribution = "pois", + prob_distribution_params = c(mean = 2) + ) + + contact_interval <- epiparameter::epidist( + disease = "COVID-19", + epi_dist = "contact interval", prob_distribution = "gamma", prob_distribution_params = c(shape = 1, scale = 1) ) @@ -24,14 +31,15 @@ suppressMessages({ test_that("sim_linelist works as expected", { set.seed(1) linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death ) expect_s3_class(linelist, class = "data.frame") - expect_identical(dim(linelist), c(42L, 11L)) + expect_identical(dim(linelist), c(17L, 11L)) expect_identical( colnames(linelist), c( @@ -57,8 +65,9 @@ test_that("sim_linelist works as expected with age-strat risks", { ) set.seed(1) linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, hosp_risk = age_dep_hosp_risk, @@ -67,7 +76,7 @@ test_that("sim_linelist works as expected with age-strat risks", { ) expect_s3_class(linelist, class = "data.frame") - expect_identical(dim(linelist), c(42L, 11L)) + expect_identical(dim(linelist), c(17L, 11L)) expect_identical( colnames(linelist), c( @@ -81,15 +90,16 @@ test_that("sim_linelist works as expected with age-strat risks", { test_that("sim_linelist works as expected without Ct", { set.seed(1) linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, add_ct = FALSE ) expect_s3_class(linelist, class = "data.frame") - expect_identical(dim(linelist), c(42L, 10L)) + expect_identical(dim(linelist), c(17L, 10L)) expect_identical( colnames(linelist), c( @@ -103,15 +113,16 @@ test_that("sim_linelist works as expected without Ct", { test_that("sim_linelist works as expected with anonymous", { set.seed(1) linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, add_names = FALSE ) expect_s3_class(linelist, class = "data.frame") - expect_identical(dim(linelist), c(42L, 10L)) + expect_identical(dim(linelist), c(17L, 10L)) expect_identical( colnames(linelist), c( @@ -130,15 +141,16 @@ test_that("sim_linelist works as expected with age structure", { ) set.seed(1) linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, population_age = age_struct ) expect_s3_class(linelist, class = "data.frame") - expect_identical(dim(linelist), c(42L, 11L)) + expect_identical(dim(linelist), c(17L, 11L)) expect_identical( colnames(linelist), c( @@ -161,8 +173,9 @@ test_that("sim_linelist works as expected with age-strat risks & age struct", { ) set.seed(1) linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, hosp_risk = age_dep_hosp_risk, @@ -170,7 +183,7 @@ test_that("sim_linelist works as expected with age-strat risks & age struct", { ) expect_s3_class(linelist, class = "data.frame") - expect_identical(dim(linelist), c(42L, 11L)) + expect_identical(dim(linelist), c(17L, 11L)) expect_identical( colnames(linelist), c( @@ -187,10 +200,11 @@ test_that("sim_linelist gives expected proportion of ages with age struct", { proportion = c(0.2, 0.5, 0.3), stringsAsFactors = FALSE ) - set.seed(1) + set.seed(3) linelist <- sim_linelist( - R = 1.5, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, population_age = age_struct @@ -218,8 +232,9 @@ test_that("sim_linelist gives expected proportion of ages with age struct", { test_that("sim_linelist works as expected with modified config", { set.seed(1) linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, config = create_config( @@ -229,7 +244,7 @@ test_that("sim_linelist works as expected with modified config", { ) expect_s3_class(linelist, class = "data.frame") - expect_identical(dim(linelist), c(42L, 11L)) + expect_identical(dim(linelist), c(17L, 11L)) expect_identical( colnames(linelist), c( @@ -243,8 +258,9 @@ test_that("sim_linelist works as expected with modified config", { test_that("sim_linelist works as expected with modified config parameters", { set.seed(1) linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, config = create_config( @@ -253,7 +269,7 @@ test_that("sim_linelist works as expected with modified config parameters", { ) expect_s3_class(linelist, class = "data.frame") - expect_identical(dim(linelist), c(42L, 11L)) + expect_identical(dim(linelist), c(17L, 11L)) expect_identical( colnames(linelist), c( @@ -267,8 +283,9 @@ test_that("sim_linelist works as expected with modified config parameters", { test_that("sim_linelist fails as expected with modified config", { expect_error( sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, config = create_config( @@ -280,8 +297,9 @@ test_that("sim_linelist fails as expected with modified config", { expect_error( sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, add_ct = TRUE, @@ -296,22 +314,24 @@ test_that("sim_linelist fails as expected with modified config", { test_that("sim_linelist fails as expected with empty config", { expect_error( sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, config = list() ), - regexp = "Distribution parameters are missing, check config" + regexp = "Network incorrectly specified, check config" ) }) -test_that("sim_linelist fails as expected exceeding max iter for bpmodel", { +test_that("sim_linelist fails as expected exceeding max iter for bp model", { set.seed(1) expect_error( sim_linelist( - R = 0.2, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.1, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death ), diff --git a/tests/testthat/test-sim_network_bp.R b/tests/testthat/test-sim_network_bp.R new file mode 100644 index 00000000..164de9e2 --- /dev/null +++ b/tests/testthat/test-sim_network_bp.R @@ -0,0 +1,65 @@ +suppressMessages({ + contact_distribution <- as.function( + epiparameter::epidist( + disease = "COVID-19", + epi_dist = "contact distribution", + prob_distribution = "pois", + prob_distribution_params = c(mean = 2) + ) + ) + + contact_interval <- as.function( + epiparameter::epidist( + disease = "COVID-19", + epi_dist = "contact interval", + prob_distribution = "gamma", + prob_distribution_params = c(shape = 1, scale = 1) + ), func_type = "generate" + ) +}) + +test_that(".sim_network_bp works as expected", { + set.seed(1) + expect_snapshot( + .sim_network_bp( + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, + config = create_config() + ) + ) +}) + +test_that(".sim_network_bp works as expected with no contacts", { + suppressMessages( + contact_distribution <- as.function( + epiparameter::epidist( + disease = "COVID-19", + epi_dist = "contact distribution", + prob_distribution = "pois", + prob_distribution_params = c(mean = 1) + ) + ) + ) + set.seed(1) + expect_snapshot( + .sim_network_bp( + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, + config = create_config() + ) + ) +}) + +test_that(".sim_network_bp works as expected with unadjusted network", { + set.seed(1) + expect_snapshot( + .sim_network_bp( + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, + config = create_config(network = "unadjusted") + ) + ) +}) \ No newline at end of file diff --git a/tests/testthat/test-sim_outbreak.R b/tests/testthat/test-sim_outbreak.R index 90e1c99c..30bcc74e 100644 --- a/tests/testthat/test-sim_outbreak.R +++ b/tests/testthat/test-sim_outbreak.R @@ -1,7 +1,14 @@ suppressMessages({ - serial_interval <- epiparameter::epidist( + contact_distribution <- epiparameter::epidist( + disease = "COVID-19", + epi_dist = "contact distribution", + prob_distribution = "pois", + prob_distribution_params = c(mean = 2) + ) + + contact_interval <- epiparameter::epidist( disease = "COVID-19", - epi_dist = "serial interval", + epi_dist = "contact interval", prob_distribution = "gamma", prob_distribution_params = c(shape = 1, scale = 1) ) @@ -19,30 +26,23 @@ suppressMessages({ epi_dist = "onset to death", single_epidist = TRUE ) - - contact_distribution <- epiparameter::epidist( - disease = "COVID-19", - epi_dist = "contact_distribution", - prob_distribution = "pois", - prob_distribution_params = c(l = 5) - ) }) test_that("sim_outbreak works as expected", { set.seed(1) outbreak <- sim_outbreak( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, - onset_to_death = onset_to_death, - contact_distribution = contact_distribution + onset_to_death = onset_to_death ) expect_type(outbreak, type = "list") expect_s3_class(outbreak$linelist, class = "data.frame") expect_s3_class(outbreak$contacts, class = "data.frame") - expect_identical(dim(outbreak$linelist), c(42L, 11L)) - expect_identical(dim(outbreak$contacts), c(172L, 8L)) + expect_identical(dim(outbreak$linelist), c(17L, 11L)) + expect_identical(dim(outbreak$contacts), c(35L, 8L)) expect_identical( colnames(outbreak$linelist), c( @@ -54,7 +54,7 @@ test_that("sim_outbreak works as expected", { expect_identical( colnames(outbreak$contacts), c( - "from", "to", "cnt_age", "cnt_gender", "date_first_contact", + "from", "to", "age", "gender", "date_first_contact", "date_last_contact", "was_case", "status" ) ) @@ -63,19 +63,19 @@ test_that("sim_outbreak works as expected", { test_that("sim_outbreak works as expected with add_names = FALSE", { set.seed(1) outbreak <- sim_outbreak( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, - contact_distribution = contact_distribution, add_names = FALSE ) expect_type(outbreak, type = "list") expect_s3_class(outbreak$linelist, class = "data.frame") expect_s3_class(outbreak$contacts, class = "data.frame") - expect_identical(dim(outbreak$linelist), c(42L, 10L)) - expect_identical(dim(outbreak$contacts), c(170L, 8L)) + expect_identical(dim(outbreak$linelist), c(17L, 10L)) + expect_identical(dim(outbreak$contacts), c(35L, 8L)) expect_identical( colnames(outbreak$linelist), c( @@ -87,7 +87,7 @@ test_that("sim_outbreak works as expected with add_names = FALSE", { expect_identical( colnames(outbreak$contacts), c( - "from", "to", "cnt_age", "cnt_gender", "date_first_contact", + "from", "to", "age", "gender", "date_first_contact", "date_last_contact", "was_case", "status" ) ) @@ -108,11 +108,11 @@ test_that("sim_outbreak works as expected with age-strat risks", { ) set.seed(1) outbreak <- sim_outbreak( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, - contact_distribution = contact_distribution, hosp_risk = age_dep_hosp_risk, hosp_death_risk = age_dep_hosp_death_risk, non_hosp_death_risk = age_dep_non_hosp_death_risk @@ -121,8 +121,8 @@ test_that("sim_outbreak works as expected with age-strat risks", { expect_type(outbreak, type = "list") expect_s3_class(outbreak$linelist, class = "data.frame") expect_s3_class(outbreak$contacts, class = "data.frame") - expect_identical(dim(outbreak$linelist), c(42L, 11L)) - expect_identical(dim(outbreak$contacts), c(173L, 8L)) + expect_identical(dim(outbreak$linelist), c(17L, 11L)) + expect_identical(dim(outbreak$contacts), c(35L, 8L)) expect_identical( colnames(outbreak$linelist), c( @@ -134,7 +134,7 @@ test_that("sim_outbreak works as expected with age-strat risks", { expect_identical( colnames(outbreak$contacts), c( - "from", "to", "cnt_age", "cnt_gender", "date_first_contact", + "from", "to", "age", "gender", "date_first_contact", "date_last_contact", "was_case", "status" ) ) @@ -148,19 +148,19 @@ test_that("sim_outbreak works as expected with age structure", { ) set.seed(1) outbreak <- sim_outbreak( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, - contact_distribution = contact_distribution, population_age = age_struct ) expect_type(outbreak, type = "list") expect_s3_class(outbreak$linelist, class = "data.frame") expect_s3_class(outbreak$contacts, class = "data.frame") - expect_identical(dim(outbreak$linelist), c(42L, 11L)) - expect_identical(dim(outbreak$contacts), c(175L, 8L)) + expect_identical(dim(outbreak$linelist), c(17L, 11L)) + expect_identical(dim(outbreak$contacts), c(35L, 8L)) expect_identical( colnames(outbreak$linelist), c( @@ -172,7 +172,7 @@ test_that("sim_outbreak works as expected with age structure", { expect_identical( colnames(outbreak$contacts), c( - "from", "to", "cnt_age", "cnt_gender", "date_first_contact", + "from", "to", "age", "gender", "date_first_contact", "date_last_contact", "was_case", "status" ) ) diff --git a/vignettes/age-strat-risks.Rmd b/vignettes/age-strat-risks.Rmd index 12336b91..cc3939e7 100644 --- a/vignettes/age-strat-risks.Rmd +++ b/vignettes/age-strat-risks.Rmd @@ -44,9 +44,16 @@ library(epiparameter) Here is an example that uses the default hospitalisation and death risks (inside and outside of hospital). First we load the requested delay distributions using the {epiparameter} package. ```{r, read-delay-dists} -serial_interval <- epidist( +contact_distribution <- epidist( disease = "COVID-19", - epi_dist = "serial interval", + epi_dist = "contact distribution", + prob_distribution = "pois", + prob_distribution_params = c(mean = 2) +) + +contact_interval <- epidist( + disease = "COVID-19", + epi_dist = "contact interval", prob_distribution = "gamma", prob_distribution_params = c(shape = 1, scale = 1) ) @@ -82,8 +89,9 @@ Simulate a line list with population-wide default risks: ```{r, sim-linelist} linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death ) @@ -96,8 +104,9 @@ We can run another simulation and change the hospitalisation and death risks, in ```{r, sim-linelist-diff-risks} linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, hosp_risk = 0.4, @@ -130,8 +139,9 @@ age_dep_hosp_risk ```{r, sim-age-strat-linelist} linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, hosp_risk = age_dep_hosp_risk @@ -158,8 +168,9 @@ age_dep_hosp_risk <- data.frame( age_dep_hosp_risk linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, hosp_risk = age_dep_hosp_risk @@ -175,8 +186,9 @@ age_dep_hosp_risk <- data.frame( ) linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, hosp_risk = age_dep_hosp_risk, @@ -198,8 +210,9 @@ age_dep_hosp_death_risk <- data.frame( age_dep_hosp_death_risk linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, hosp_death_risk = age_dep_hosp_death_risk @@ -214,8 +227,9 @@ age_dep_non_hosp_death_risk <- data.frame( age_dep_non_hosp_death_risk linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, non_hosp_death_risk = age_dep_non_hosp_death_risk @@ -239,8 +253,9 @@ age_dep_non_hosp_death_risk <- data.frame( ) linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, hosp_risk = age_dep_hosp_risk, diff --git a/vignettes/age-struct-pop.Rmd b/vignettes/age-struct-pop.Rmd index 0b5f9b9b..ab407c63 100644 --- a/vignettes/age-struct-pop.Rmd +++ b/vignettes/age-struct-pop.Rmd @@ -22,9 +22,9 @@ knitr::opts_chunk$set( If you are unfamiliar with the {simulist} package or the `sim_linelist()` function [Get Started vignette](simulist.html) is a great place to start. ::: -This vignette describes how to simulate a line list with either a uniform or non-uniform distribution of ages within the population. This population age structure may either describe the population of a region as a whole, or specifically which age groups are more likely to become infected than others, for instance because they have more social contacts. +This vignette describes how to simulate a line list with either a uniform or non-uniform distribution of ages within the population. This population age structure may either describe the population of a region as a whole, or specifically which age groups are more likely to become infected than others. -The {simulist} package uses a [branching process](https://epiverse-trace.github.io/bpmodels/reference/chain_sim.html) which is independent of population age structure to simulate the line list, and then {simulist} _paints_ the demographic information onto the infected individuals (and in the case of `sim_outbreak()` or `sim_contacts()` the contacts of infected individuals). In other words, once the cases in the line list have been simulated the ages are assigned to each individual post hoc, specified by the age range, and age structure, if supplied. +The {simulist} package uses a branching process (using a random network model of contacts), which is independent of population age structure to simulate the line list, and then {simulist} _paints_ the demographic information onto the infected individuals (and in the case of `sim_outbreak()` or `sim_contacts()` the contacts of infected individuals). In other words, once the cases in the line list have been simulated the ages are assigned to each individual post hoc, specified by the age range, and age structure, if supplied. The age range and age structure for the simulation functions (`sim_*()`) in {simulist} is controlled by the `population_age` argument. The default age structure in {simulist} is a uniform structure between a lower and upper age range. The `sim_linelist()` function (and other `sim_*()` functions) can accept a `` instead of the `numeric` vector to specify the age structure for specified age groups. This feature can be especially useful when wanting to simulate an outbreak in a region with a heavily non-uniform age structure, [for example younger populations such as Nigeria, or older populations such as Japan](https://ourworldindata.org/age-structure). @@ -34,12 +34,19 @@ library(epiparameter) library(ggplot2) ``` -First we load the requested delay distributions using the {epiparameter} package. The onset-to-hospitalisation and onset-to-death delay distributions are extracted from the {epiparameter} database, and the serial interval is manually defined as it is not yet available from the {epiparameter} database. +First we load the requested delay distributions using the {epiparameter} package. The onset-to-hospitalisation and onset-to-death delay distributions are extracted from the {epiparameter} database, and the contact distribution and the contact interval are manually defined as they are not yet available from the {epiparameter} database. ```{r, read-delay-dists} -serial_interval <- epidist( +contact_distribution <- epidist( disease = "COVID-19", - epi_dist = "serial interval", + epi_dist = "contact distribution", + prob_distribution = "pois", + prob_distribution_params = c(mean = 2) +) + +contact_interval <- epidist( + disease = "COVID-19", + epi_dist = "contact interval", prob_distribution = "gamma", prob_distribution_params = c(shape = 1, scale = 1) ) @@ -67,17 +74,21 @@ set.seed(1) ## Uniform population age -By default `sim_linelist()` simulates individuals ages assuming a uniform distribution between 1 and 90. To change this age range, a vector of two numbers can be supplied to the `population_age` argument. Here we simulated an outbreak in a population with a population ranging from 5 to 75 (inclusive, `[5,75]`). +By default `sim_linelist()` simulates individuals ages assuming a uniform distribution between 1 and 90. To change this age range, a vector of two numbers can be supplied to the `population_age` argument. Here we simulate an outbreak in a population with a population ranging from 5 to 75 (inclusive, `[5,75]`). + +***Note***: all ages are assumed to be in the unit of years and need to be integers (or at least ["integerish"](https://rlang.r-lib.org/reference/is_integerish.html) if stored as double). -***Note***: all ages are assumed to be in the unit of years and need to be integers (or at least ["integerish"](https://rlang.r-lib.org/reference/is_integerish.html) if stored as double) +All simulations in this vignette condition the simulation to have a minimum outbreak size (`min_outbreak_size`) of 100 cases to clearly visualise the distribution of ages. ```{r sim-linelist-age-range} linelist <- sim_linelist( - R = 1.3, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.45, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, - population_age = c(5, 75) + population_age = c(5, 75), + min_outbreak_size = 100 ) head(linelist) ``` @@ -116,11 +127,13 @@ age_struct ```{r, sim-age-struct-linelist} linelist <- sim_linelist( - R = 1.3, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.45, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, - population_age = age_struct + population_age = age_struct, + min_outbreak_size = 100 ) head(linelist) @@ -167,11 +180,13 @@ age_struct ```{r, sim-age-struct-linelist-young} linelist <- sim_linelist( - R = 1.3, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.45, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, - population_age = age_struct + population_age = age_struct, + min_outbreak_size = 100 ) head(linelist) diff --git a/vignettes/design-principles.Rmd b/vignettes/design-principles.Rmd index 1cfe0e61..9db296f0 100644 --- a/vignettes/design-principles.Rmd +++ b/vignettes/design-principles.Rmd @@ -30,7 +30,7 @@ The simulation functions either return a `` or a `list` of `` that defines the age-stratification in `hosp_risk`, `hosp_death_risk` and `non_hosp_death_risk` arguments gives the lower bound of the age groups. The upper bound of the age groups is derived from the next lower bound, but the upper bound oldest age group is defined by the upper age given to the `population_age` argument. This interaction of arguments is not ideal, as it can be more difficult to understand for users (as outlined in [The Tidy Design book](https://design.tidyverse.org/independent-meaning.html#whats-the-problem)), however, the interaction does not change the interpretation of each argument which would be more convoluted. This design decision was taken when we changed the structure of the `` accepted as input to the `*_risk` arguments from having two columns for a lower and upper age limit, to a single column of lower age bounds. This change was made in [pull request #14](https://github.com/epiverse-trace/simulist/pull/14) and follows the design of [{socialmixr}](https://github.com/epiforecasts/socialmixr/blob/main/R/contact_matrix.r) for defining age bounds. The newer structure is judged to be preferred as it prevents input errors by the user when the age bounds are overlapping or non-contiguous (i.e. missing age groups). -- The column names of the contact relationships (edges of the network) are called `from` and `to`. Names of contacts table match {epicontacts} `` objects. If the column names of the two contacts provided to `epicontacts::make_epicontacts()` arguments `from` and `to` are not `from` and `to` they will be silently renamed in the resulting `` object. By making these column names `from` and `to` when output from `sim_contacts()` or `sim_outbreak()` it prevents any confusion when used with {epicontacts}. This names are also preferred as they are usefully descriptive. +- The column names of the contact relationships (edges of the network) are called `from` and `to`. Names of the contacts table match {epicontacts} `` objects. If the column names of the two contacts provided to `epicontacts::make_epicontacts()` arguments `from` and `to` are not `from` and `to` they will be silently renamed in the resulting `` object. By making these column names `from` and `to` when output from `sim_contacts()` or `sim_outbreak()` it prevents any confusion when used with {epicontacts}. This naming is also preferred as they are usefully descriptive. - The [Visualising simulated data vignette](vis-linelist.html) contains interactive data visualisation when rendered to the web. This enforces some limitations. The vignette uses `output: rmarkdown::html_document()` instead of `output: bookdown::html_vignette2` and does not contain `pkgdown: as_is: true` in the yaml metadata, in order for the interactive figures to render and operate correctly. This means that the vignette figures will not automatically be numbered and start with "Figure _x_" (where _x_ is replaced by a number). Instead, it was decided for this vignette that this information would be manually written, and then manually updated if the number or order of the figures changed. This is not an ideal solution and automation is preferred, but on balance, it was decided that the addition of interactive visualisation from {epicontacts} outweighed the downside of manual figure labelling. @@ -40,6 +40,8 @@ The simulation functions either return a `` or a `list` of ``) with case information for each infected individual. +The main function in the {simulist} package is `sim_linelist()`. This functions takes in arguments that control the dynamics of the outbreak, such as the contact interval (i.e. time delay between contacts), and outputs a line list table (``) with case information for each infected individual. For this introduction we will simulate a line list for the early stages of a COVID-19 (SARS-CoV-2) outbreak. This will require two R packages: {simulist}, to produce the line list, and {epiparameter} to provide epidemiological parameters, such as onset-to-death delays. @@ -32,11 +32,18 @@ library(epiparameter) First we load in some data that is required for the line list simulation. Data on epidemiological parameters and distributions are read from the {epiparameter} R package. ```{r read-epidist} -# get serial interval from {epiparameter} database +# create contact distribution (not available from {epiparameter} database) +contact_distribution <- epidist( + disease = "COVID-19", + epi_dist = "contact distribution", + prob_distribution = "pois", + prob_distribution_params = c(mean = 2) +) -serial_interval <- epidist( +# create contact interval (not available from {epiparameter} database) +contact_interval <- epidist( disease = "COVID-19", - epi_dist = "serial interval", + epi_dist = "contact interval", prob_distribution = "gamma", prob_distribution_params = c(shape = 1, scale = 1) ) @@ -56,12 +63,19 @@ onset_to_death <- epidist_db( ) ``` -The reproduction number (`R`) is the first argument in `sim_linelist()` and with the serial interval will control the growth rate of the simulated epidemic. Here we set it as 1.1. The minimum requirements to simulate a line list are the reproduction number (`R`), the serial interval, onset-to-hospitalisation delay and onset-to-death delay. +The seed is set to ensure the output of the vignette is consistent. When using {simulist}, setting the seed is not required unless you need to simulate the same line list multiple times. + +```{r, set-seed} +set.seed(123) +``` + +The first argument in `sim_linelist()` is the contact distribution (`contact_distribution`), which here we specify as Poisson distribution with a mean (average) number of contacts of 2, and with the contact interval and probability of infection per contact (`prob_infect`) will control the growth rate of the simulated epidemic. Here we set the probability of infection as 0.5 (on average half of contacts become infected). The minimum requirements to simulate a line list are the contact distribution, the contact interval, probability of infection, onset-to-hospitalisation delay and onset-to-death delay. ```{r, sim-linelist} linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death ) @@ -70,7 +84,7 @@ head(linelist) ## Case type -During an infectious disease outbreak it may not be possible to confirm every infection as a case. A confirmed case is typically defined via a diagnostic test. There are several reasons why a case may not be confirmed, including limited testing capacity and mild or non-specific early symptoms, especially in fast growing epidemics. We therefore include two other categories for cases: probable and suspected. For example, probable cases may those that show clinical evidence for the disease but have not, or cannot, be confirmed by a diagnostic test. Suspected cases are those that are possibly infected but do not show clear clinical or epidemiological evidence, nor has a diagnostic test been performed. Hence the distribution of suspected/probable/confirmed will depend on the pathogen characteristics, outbreak-specific definitions, and resources available. +During an infectious disease outbreak it may not be possible to confirm every infection as a case. A confirmed case is typically defined via a diagnostic test. There are several reasons why a case may not be confirmed, including limited testing capacity and mild or non-specific early symptoms, especially in fast growing epidemics. We therefore include two other categories for cases: probable and suspected. For example, probable cases may be those that show clinical evidence for the disease but have not, or cannot, be confirmed by a diagnostic test. Suspected cases are those that are possibly infected but do not show clear clinical or epidemiological evidence, nor has a diagnostic test been performed. Hence the distribution of suspected/probable/confirmed will depend on the pathogen characteristics, outbreak-specific definitions, and resources available. The line list output from the {simulist} simulation contains a column (`case_type`) with the type of case. @@ -78,8 +92,9 @@ The line list output from the {simulist} simulation contains a column (`case_typ ```{r, sim-linelist-default-case-type} linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death ) @@ -90,8 +105,9 @@ To alter these probabilities, supply a named vector to the `sim_linelist()` argu ```{r, sim-linelist-mod-case-type} linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, case_type_probs = c(suspected = 0.05, probable = 0.05, confirmed = 0.9) @@ -105,7 +121,7 @@ The way {simulist} assigns case types is by pasting case types onto existing cas ## Conditioning simulation on outbreak size -The reproduction number has a strong influence on the size of an outbreak. However, the {simulist} package generates line list data using a stochastic algorithm, so even when $R < 1$ it can produce a substantial outbreak by chance, or an $R >> 1$ will sometimes not produce a vast epidemic in one simulation (i.e. one replicate) due to the stochasticity. +The reproduction number has a strong influence on the size of an outbreak. For {simulist}, the reproduction number is determined by the mean number of contacts and the probability of infection. However, the {simulist} package generates line list data using a stochastic algorithm, so even when $R < 1$ it can produce a substantial outbreak by chance, or an $R >> 1$ will sometimes not produce a vast epidemic in one simulation (i.e. one replicate) due to the stochasticity. When requiring a minimum outbreak size we can specify the `min_outbreak_size` argument in `sim_linelist()`. By default this is set to 10. This means that the simulation will not return a line list until the conditioning has been met. In other words, the simulation will resimulate a branching process model until an outbreak infects at least 10 people. @@ -113,8 +129,9 @@ When requiring a line list that represents a large outbreak, such as the COVID-1 ```{r, sim-large-linelist} linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, min_outbreak_size = 250 @@ -122,7 +139,7 @@ linelist <- sim_linelist( head(linelist) ``` -The amount of time the simulation takes can be determined by the reproduction number (`R`) and the minimum outbreak size (`min_outbreak_size`). If the `min_outbreak_size` is large, for example hundreds or thousands of cases, and the reproduction number is below one, it will take many branching process simulations until finding one that produces a sufficiently large epidemic. +The amount of time the simulation takes can be determined by the mean of the contact distribution (`contact_distribution`), the probability of infection (`prob_infect`) and the minimum outbreak size (`min_outbreak_size`). If the `min_outbreak_size` is large, for example hundreds or thousands of cases, and the mean number of contacts and probability of infection mean the reproduction number is below one, it will take many branching process simulations until finding one that produces a sufficiently large epidemic. ## Anonymous line list @@ -130,8 +147,9 @@ By default `sim_linelist()` provides the name of each individual in the line lis ```{r sim-anon-linelist} linelist <- sim_linelist( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, onset_to_death = onset_to_death, add_names = FALSE @@ -151,21 +169,13 @@ For an overview of how a line list can be simulated with age-stratified (or age- ## Simulate contacts table -To simulate a contacts table, the `sim_contacts()` function can be used. This requires a contacts distribution (i.e. a distribution describing the variability in number of contacts for different individuals within the population). +To simulate a contacts table, the `sim_contacts()` function can be used. This requires the same arguments as `sim_linelist()`, but does not require the onset-to-hospitalisation delay and onset-to-death delays. ```{r, sim-contacts} -contact_distribution <- epiparameter::epidist( - disease = "COVID-19", - epi_dist = "contact_distribution", - prob_distribution = "pois", - prob_distribution_params = c(l = 5) -) - contacts <- sim_contacts( - R = 1.1, - serial_interval = serial_interval, - contact_distribution = contact_distribution -) + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5) head(contacts) ``` @@ -173,15 +183,15 @@ head(contacts) To produce both a line list and a contacts table for the same outbreak, the `sim_linelist()` and `sim_contacts()` cannot be used separately due to the stochastic algorithm, meaning the data in the line list will be discordant with the contacts table. -In order to simulate a line list and a contacts table of the same outbreak the `sim_outbreak()` function is required. This will simulate a single outbreak and return a line list and a contacts table. The inputs of `sim_outbreak()` are a combination of the inputs required for `sim_linelist()` and `sim_contacts()`. +In order to simulate a line list and a contacts table of the same outbreak the `sim_outbreak()` function is required. This will simulate a single outbreak and return a line list and a contacts table. The inputs of `sim_outbreak()` are the same as the inputs required for `sim_linelist()`. ```{r, sim-outbreak} outbreak <- sim_outbreak( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, - onset_to_death = onset_to_death, - contact_distribution = contact_distribution + onset_to_death = onset_to_death ) head(outbreak$linelist) head(outbreak$contacts) @@ -189,20 +199,34 @@ head(outbreak$contacts) `sim_outbreak()` has the same features as `sim_linelist()` and `sim_contacts()`, this includes simulating with age-stratified risks of hospitalisation and death, the probability of case types or contact tracing status can be modified. +::: {.alert .alert-info} + +_Advanced_ + +The `sim_*()` functions, by default, use an excess degree distribution to account for a network effect when sampling the number of contacts in the simulation model ($q(n) \sim (n + 1)p(n + 1)$ where $p(n)$ is the probability density function of a distribution, e.g., Poisson or Negative binomial, within the `.sim_network_bp()` internal function). This network effect can be turned off by using the `config` argument in any `sim_*()` function and setting `network = "unadjusted"` (`create_config(network = "unadjusted")`) which will instead sample from a probability distribution $p(n)$. + +::: + ## Using functions for distributions instead of `` It is possible to use an [anonymous function](https://en.wikipedia.org/wiki/Anonymous_function) instead of an `` object when specifying the parameters of the delay and contact distributions. We recommend using the `` objects but here outline the alternative approach. ```{r, sim-outbreak-anon-func} outbreak <- sim_outbreak( - R = 1.1, - serial_interval = function(x) rgamma(n = x, shape = 2, scale = 2), + contact_distribution = function(x) dpois(x = x, lambda = 2), + contact_interval = function(x) rgamma(n = x, shape = 2, scale = 2), + prob_infect = 0.5, onset_to_hosp = function(x) rlnorm(n = x, meanlog = 1.5, sdlog = 0.5), - onset_to_death = function(x) rweibull(n = x, shape = 0.5, scale = 0.2), - contact_distribution = function(x) rnbinom(n = x, mu = 5, size = 0.5) + onset_to_death = function(x) rweibull(n = x, shape = 0.5, scale = 0.2) ) head(outbreak$linelist) head(outbreak$contacts) ``` +::: {.alert .alert-warning} + +The `contact_distribution` requires a density function instead of a random number generation function (i.e. `dpois()` or `dnbinom()` instead of `rpois()` or `rnbinom()`). This is due to the branching process simulation adjusting the sampling of contacts to take into account the random network effect. + +::: + The same approach of using anonymous functions can be used in `sim_linelist()` and `sim_contacts()`. diff --git a/vignettes/vis-linelist.Rmd b/vignettes/vis-linelist.Rmd index 6414c372..f6b9fc9d 100644 --- a/vignettes/vis-linelist.Rmd +++ b/vignettes/vis-linelist.Rmd @@ -28,11 +28,18 @@ library(epicontacts) First we load the required delay distributions using the {epiparameter} package. ```{r, read-delay-dists} -serial_interval <- epidist( +contact_distribution <- epidist( disease = "COVID-19", - epi_dist = "serial interval", + epi_dist = "contact distribution", + prob_distribution = "pois", + prob_distribution_params = c(mean = 3) +) + +contact_interval <- epidist( + disease = "COVID-19", + epi_dist = "contact interval", prob_distribution = "gamma", - prob_distribution_params = c(shape = 1, scale = 1) + prob_distribution_params = c(shape = 3, scale = 2) ) # get onset to hospital admission from {epiparameter} database @@ -53,17 +60,19 @@ onset_to_death <- epidist_db( Setting the seed ensures we have the same output each time the vignette is rendered. When using {simulist}, setting the seed is not required unless you need to simulate the same line list multiple times. ```{r, set-seed} -set.seed(1) +set.seed(123) ``` Using a simple line list simulation with the factory default settings: ```{r sim-linelist} linelist <- sim_linelist( - R = 1.3, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.33, onset_to_hosp = onset_to_hosp, - onset_to_death = onset_to_death + onset_to_death = onset_to_death, + min_outbreak_size = 500 ) ``` @@ -160,27 +169,25 @@ Additionally, {epicontacts} provides access to network plotting from JavaScript ::: -The {epicontacts} function `make_epicontacts()` requires both the line list and contacts table, so we will run the `sim_outbreak()` function to produce both. We will use the same epidemiological delay distributions that we used to simulate a line list above. We need an extra distribution to simulate an outbreak, a contact distribution (see the documentation `?sim_outbreak` for more detail). We create this distribution using the {epiparameter} package. +The {epicontacts} function `make_epicontacts()` requires both the line list and contacts table, so we will run the `sim_outbreak()` function to produce both. We will use the same epidemiological delay distributions that we used to simulate a line list above, but reduce the mean number of contacts in the contact distribution to 2. -```{r, create-contact-dist} -contact_distribution <- epiparameter::epidist( +```{r, contact-distribution} +contact_distribution <- epidist( disease = "COVID-19", - epi_dist = "contact_distribution", + epi_dist = "contact distribution", prob_distribution = "pois", - prob_distribution_params = c(l = 5) + prob_distribution_params = c(mean = 2) ) ``` -Now we can simulate an outbreak: - ```{r, sim-outbreak} set.seed(1) outbreak <- sim_outbreak( - R = 1.1, - serial_interval = serial_interval, + contact_distribution = contact_distribution, + contact_interval = contact_interval, + prob_infect = 0.5, onset_to_hosp = onset_to_hosp, - onset_to_death = onset_to_death, - contact_distribution = contact_distribution + onset_to_death = onset_to_death ) head(outbreak$linelist) head(outbreak$contacts)